Hadoop学习-第二天（MapReduce原理及WordCou

作者: 风笑天2013 | 来源:发表于2018-07-02 15:42 被阅读14次

2018-07-01

1、给合WordCount程序分析MapReduce的运行原理

WordCount数据处理流程

2、WordCount程序

// WordCountMapper.java文件 Map阶段

package cn.xia.java.wordcount;

import java.io.IOException;

import org.apache.hadoop.io.IntWritable;

import org.apache.hadoop.io.LongWritable;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapreduce.Mapper;

public class WordCountMapper extends Mapper {

        private static IntWritable one = new IntWritable(1);

        private static Text word = new Text();

        protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {

                String v1 = value.toString();

                String output[] = v1.split(" ");

                for (String s : output) {

                        word.set(s);

                        context.write(word, one);

                }

        }

}

// WordCountReducer.java文件 Reduce阶段

package cn.xia.java.wordcount;

import java.io.IOException;

import org.apache.hadoop.io.IntWritable;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapreduce.Reducer;

public class WordCountReducer extends Reducer{

        private static IntWritable num = new IntWritable();

        protected void reduce(Text key, Iterable values, Context context) throws IOException, InterruptedException {

                int count = 0;

                for (IntWritable val : values) {

                        count += val.get();

                }

                num.set(count);

                context.write(key, num);

        }

}

// WordCount.java文件 主程序，组装成Job

package cn.xia.java.wordcount;

import org.apache.hadoop.conf.Configuration;

import org.apache.hadoop.fs.FileSystem;

import org.apache.hadoop.fs.Path;

import org.apache.hadoop.io.IntWritable;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapreduce.Job;

import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;

import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

public class WordCount {

        public static void main(String[] args) throws Exception {

                // 删除Reduce的输出文件夹

                Configuration conf = new Configuration(); // 读取本地配置文件，读取本地配置

                FileSystem.get(conf).delete(new Path(args[1]), true);

                // Job

                Job job = Job.getInstance(conf);

                job.setJarByClass(WordCount.class);

                job.setMapperClass(WordCountMapper.class);

                job.setMapOutputKeyClass(Text.class);

                job.setMapOutputValueClass(IntWritable.class);

                FileInputFormat.setInputPaths(job, new Path(args[0]));

                job.setReducerClass(WordCountReducer.class);

                job.setOutputKeyClass(Text.class);

                job.setOutputValueClass(IntWritable.class);

                FileOutputFormat.setOutputPath(job, new Path(args[1]));

                job.waitForCompletion(true);

        }

}

网友评论

本文标题：Hadoop学习-第二天（MapReduce原理及WordCou

本文链接：https://www.haomeiwen.com/subject/ghehuftx.html

延伸阅读

深度阅读

您也可以注册成为美文阅读网的作者，发表您的原创作品、分享您的心情！

Hadoop学习-第二天（MapReduce原理及WordCou

相关文章

网友评论

延伸阅读

深度阅读

栏目导航

热点阅读