美文网首页
Hadoop学习-第二天(MapReduce原理及WordCou

Hadoop学习-第二天(MapReduce原理及WordCou

作者: 风笑天2013 | 来源:发表于2018-07-02 15:42 被阅读14次

    2018-07-01

    1、给合WordCount程序分析MapReduce的运行原理

    WordCount数据处理流程

    2、WordCount程序

    // WordCountMapper.java文件     Map阶段

    package cn.xia.java.wordcount;

    import java.io.IOException;

    import org.apache.hadoop.io.IntWritable;

    import org.apache.hadoop.io.LongWritable;

    import org.apache.hadoop.io.Text;

    import org.apache.hadoop.mapreduce.Mapper;

    public class WordCountMapper extends Mapper {

            private static IntWritable one = new IntWritable(1);

            private static Text word = new Text();

            protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {

                    String v1 = value.toString();

                    String output[] = v1.split(" ");

                    for (String s : output) {

                            word.set(s);

                            context.write(word, one);

                    }

            }

    }

    // WordCountReducer.java文件    Reduce阶段

    package cn.xia.java.wordcount;

    import java.io.IOException;

    import org.apache.hadoop.io.IntWritable;

    import org.apache.hadoop.io.Text;

    import org.apache.hadoop.mapreduce.Reducer;

    public class WordCountReducer extends Reducer{

            private static IntWritable num = new IntWritable();

            protected void reduce(Text key, Iterable values, Context context) throws IOException, InterruptedException {

                    int count = 0;

                    for (IntWritable val : values) {

                            count += val.get();

                    }

                    num.set(count);

                    context.write(key, num);

            }

    }

    // WordCount.java文件       主程序,组装成Job

    package cn.xia.java.wordcount;

    import org.apache.hadoop.conf.Configuration;

    import org.apache.hadoop.fs.FileSystem;

    import org.apache.hadoop.fs.Path;

    import org.apache.hadoop.io.IntWritable;

    import org.apache.hadoop.io.Text;

    import org.apache.hadoop.mapreduce.Job;

    import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;

    import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

    public class WordCount {

            public static void main(String[] args) throws Exception  {

                    // 删除Reduce的输出文件夹

                    Configuration conf = new Configuration(); // 读取本地配置文件,读取本地配置

                    FileSystem.get(conf).delete(new Path(args[1]), true);

                    // Job

                    Job job = Job.getInstance(conf);

                    job.setJarByClass(WordCount.class);

                    job.setMapperClass(WordCountMapper.class);

                    job.setMapOutputKeyClass(Text.class);

                    job.setMapOutputValueClass(IntWritable.class);

                    FileInputFormat.setInputPaths(job, new Path(args[0])); 

                    job.setReducerClass(WordCountReducer.class);

                    job.setOutputKeyClass(Text.class);

                    job.setOutputValueClass(IntWritable.class);

                    FileOutputFormat.setOutputPath(job, new Path(args[1]));

                    job.waitForCompletion(true);

            }

    }

    相关文章

      网友评论

          本文标题:Hadoop学习-第二天(MapReduce原理及WordCou

          本文链接:https://www.haomeiwen.com/subject/ghehuftx.html