美文网首页
MapReduce之多字段排序(String型)

MapReduce之多字段排序(String型)

作者: 还闹不闹 | 来源:发表于2020-04-02 09:56 被阅读0次

    1、需求描述:输入有3列,对3列值进行排序

    inputFile:
    ab d 1a
    b1 d1 1a
    c2 a1 1a
    d3 e3 1a
    d2 a4 1b
    b1 a2 1b
    c1 b2 1c

    2、代码

    import java.io.DataInput;
    import java.io.DataOutput;
    import java.io.CharArrayWriter;
    import java.io.CharArrayReader;
    import java.util.StringTokenizer;
    
    import java.io.IOException;
    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.fs.Path;
    
    import org.apache.hadoop.io.LongWritable;
    import org.apache.hadoop.io.NullWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.io.WritableComparable;
    
    import org.apache.hadoop.mapreduce.Job;
    import org.apache.hadoop.mapreduce.Mapper;
    import org.apache.hadoop.mapreduce.Reducer;
    import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
    import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
    
    public class TripleIntSortApp {
        public static void main(String[] args) throws Exception {
            // 创建Job对象
            Job job = Job.getInstance(new Configuration(), TripleIntSortApp.class.getSimpleName());
            // 设置运行job的类
            job.setJarByClass(TripleIntSortApp.class);
            // 设置输入的路径
            FileInputFormat.setInputPaths(job, args[0]);
            // 设置mapper类
            job.setMapperClass(TripleStringSortMapper.class);
            // 设置map输出的key value
            job.setMapOutputKeyClass(TripleString.class);
            job.setMapOutputValueClass(NullWritable.class);
            // 设置reduce类
            job.setReducerClass(TripleStringSortReducer.class);
            // 设置reduce输出的key value值
            job.setOutputKeyClass(TripleString.class);
            job.setOutputValueClass(NullWritable.class);
            // 设置输出的路径
            FileOutputFormat.setOutputPath(job, new Path(args[1]));
            // 提交job
            job.waitForCompletion(true);        
        }
        
        public static class TripleStringSortMapper extends Mapper<LongWritable, Text, TripleString, NullWritable>{
            TripleString k3 = new TripleString();
            @Override
            protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, TripleString, NullWritable>.Context context) throws IOException, InterruptedException {
                String[] splited = value.toString().split("\t");
                k3.set(splited[0], splited[1], splited[2]);
                context.write(k3, NullWritable.get());
                System.out.println("Mapper-----第一个数:"+k3.first+" 第二个数:"+k3.second+" 第三个数:"+k3.third);
            }
        }
        
        public static class TripleStringSortReducer extends Reducer<TripleString, NullWritable, TripleString, NullWritable>{
            int i=1;
            @Override
            protected void reduce(TripleString k3, Iterable<NullWritable> arg1, Reducer<TripleString, NullWritable, TripleString, NullWritable>.Context context) throws IOException, InterruptedException {
                context.write(k3, NullWritable.get());
                System.out.println("调用次数"+(i++));
                System.out.println("Reducer-----第一个数:"+k3.first+" 第二个数:"+k3.second+" 第三个数:"+k3.third);
            }
        }
        
        public static class TripleString implements WritableComparable<TripleString>{
            String first;
            String second;
            String third;
            public void write(DataOutput out) throws IOException {
                out.writeUTF(first);
                out.writeUTF(second);
                out.writeUTF(third);
            }
            
            public void set(String s1, String s2, String s3){
                this.first = s1;
                this.second = s2;
                this.third = s3;
            }
    
            public void readFields(DataInput in) throws IOException {
                this.first = in.readUTF();
                this.second = in.readUTF();
                this.third = in.readUTF();
            }
    
    //          正序:从小到大排序
    //        public int compareTo(TripleString o) {
    //            //int r1 = this.first - o.first;
    //            int r1 = (this.first).compareTo(o.first);
    //            if(r1 < 0){
    //                return -1;
    //            }else if(r1 > 0){
    //                return 1;
    //            }
    //            //int r2 = this.second - o.second;
    //          int r2 = (this.second).compareTo(o.second);
    //            if(r2 < 0){
    //                return -1;
    //            }else if(r2 > 0){
    //                return 1;
    //            }
    //            //int r3 = this.third - o.third;
    //          int r3 = (this.third).compareTo(o.third);
    //            return (r3 < 0 ? -1 : (r3 > 0 ? 1: 0));
    //        }
            
    //        倒序:从大到小排序
            public int compareTo(TripleString o) {
    //            int r1 = this.first - o.first;
                int r1 = (this.first).compareTo(o.first);
                if(r1 > 0){
                    return -1;
                }else if(r1 < 0){
                    return 1;
                }
    //            int r2 = this.second - o.second;
                int r2 = (this.second).compareTo(o.second);
                if(r2 > 0){
                    return -1;
                }else if(r2 < 0){
                    return 1;
                }
    //            int r3 = this.third - o.third;
                int r3 = (this.third).compareTo(o.third);
                return (r3 > 0 ? -1 : (r3 < 0 ? 1: 0));
            }
            
            @Override
            public String toString() {
                return this.first+"\t"+this.second+"\t"+this.third;
            }
        }
    }
    
    

    3、输出结果

    d3 e3 1a
    d2 a4 1b
    c2 a1 1a
    c1 b2 1c
    b1 d1 1a
    b1 a2 1b
    ab d 1a

    相关文章

      网友评论

          本文标题:MapReduce之多字段排序(String型)

          本文链接:https://www.haomeiwen.com/subject/xdptphtx.html