美文网首页
MapReduce编程实例(三),数据去重

MapReduce编程实例(三),数据去重

作者: Mr_K_ | 来源:发表于2019-12-26 15:47 被阅读0次

    实验内容为:MapReduce编程实例(三),数据去重


    输入:
    2013-11-01 aa
    2013-11-02 bb
    2013-11-03 cc
    2013-11-04 aa
    2013-11-05 dd
    2013-11-06 dd
    2013-11-07 aa
    2013-11-09 cc
    2013-11-10 ee

    2013-11-01 bb
    2013-11-02 33
    2013-11-03 cc
    2013-11-04 bb
    2013-11-05 23
    2013-11-06 dd
    2013-11-07 99
    2013-11-09 99
    2013-11-10 ee


    头文件:

    3.  import java.io.IOException;  
    4.  import java.util.HashSet;  
    5.  import java.util.StringTokenizer;  
    6.    
    7.  import org.apache.hadoop.conf.Configuration;  
    8.  import org.apache.hadoop.fs.Path;  
    9.  import org.apache.hadoop.io.Text;  
    10. import org.apache.hadoop.mapreduce.Job;  
    11. import org.apache.hadoop.mapreduce.Mapper;  
    12. import org.apache.hadoop.mapreduce.Reducer;  
    13. import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;  
    14. import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;  
    15. import org.apache.hadoop.util.GenericOptionsParser;  
    

    其他部分:

    public class Dedup {  
    23.   
    24.     public static class MyMapper extends Mapper<Object, Text, Text, Text>{  
    25.   
    26.         @Override  
    27.         protected void map(Object key, Text value, Context context)  
    28.                 throws IOException, InterruptedException {  
    29.                 context.write(value, new Text(""));  
    30.         }  
    31.     }  
    32.       
    33.     public static class MyReducer extends Reducer<Text, Text, Text, Text>{  
    34.   
    35.         @Override  
    36.         protected void reduce(Text key, Iterable<Text> value,  
    37.                 Context context)  
    38.                 throws IOException, InterruptedException {  
    39.             context.write(key, new Text(""));  
    40.         }  
    41.     }  
    42.       
    43.       
    44.     public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException{  
    45.         Configuration conf = new Configuration();  
    46.         String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();  
    47.           
    48.         if(otherArgs.length<2){  
    49.             System.out.println("parameter errors!");  
    50.             System.exit(2);  
    51.         }  
    52.           
    53.         Job job = new org.apache.hadoop.mapreduce.Job(conf, "Dedup");  
    54.         job.setJarByClass(Dedup.class);  
    55.         job.setMapperClass(MyMapper.class);  
    56.         job.setCombinerClass(MyReducer.class);  
    57.         job.setReducerClass(MyReducer.class);  
    58.         job.setOutputKeyClass(Text.class);  
    59.         job.setOutputValueClass(Text.class);  
    60.           
    61.         FileInputFormat.addInputPath(job,new ath(otherArgs[0]));  
    62.         FileOutputFormat.setOutputPath(job,new Path(otherArgs[1]));  
    63.           
    64.         System.exit(job.waitForCompletion(true)?0:1);  
    65.           
    66.     }  
    67.       
    68. }  
    
    

    输出结果:
    2013-11-01 aa
    2013-11-01 bb
    2013-11-02 33
    2013-11-02 bb
    2013-11-03 cc
    2013-11-03 cc
    2013-11-04 98
    2013-11-04 aa
    2013-11-04 bb
    2013-11-05 23
    2013-11-05 93
    2013-11-05 dd
    2013-11-06 99
    2013-11-06 dd
    2013-11-07 92
    2013-11-07 99
    2013-11-07 aa
    2013-11-09 99
    2013-11-09 aa
    2013-11-09 cc
    2013-11-10 ee

    相关文章

      网友评论

          本文标题:MapReduce编程实例(三),数据去重

          本文链接:https://www.haomeiwen.com/subject/safloctx.html