美文网首页
31.hbase和MapReduce整合

31.hbase和MapReduce整合

作者: 文茶君 | 来源:发表于2020-01-02 15:38 被阅读0次

    在这里我们仍然以wordcount为例,这里大数据的wordcount就和helloworld一样吧(笑)。还是逐步分析代码。
    配置连接。



    这里的修改hadoop源码是指修改org.apache.hadoop

     Configuration conf = new Configuration();
            conf.set("hbase.zookeeper.quorum", "node1,node2,node3");
            conf.set("fs.defaultFS", "hdfs://node1:8020");//写你active的namenode名称
    

    创建job类

      Job job = Job.getInstance(conf);
    job.setJarByClass(WCRunner.class);
    

    设置mapreduce

    //
            job.setMapperClass(WCMapper.class);
            job.setMapOutputKeyClass(Text.class);
            job.setMapOutputValueClass(IntWritable.class);
    // map设置完毕
            TableMapReduceUtil.initTableReducerJob("wc", WCReducer.class, job, null, null, null, null, false);
    // 第一个参数是表名,往哪存数据,第二个class <? extend TableReducer>第三个job,后面全写空,最后一个必须写false
            FileInputFormat.addInputPath(job, new Path("/usr/wc"));//指定路径,从哪里读文件
            // reduce端输出的key和value的类型
            job.setOutputKeyClass(NullWritable.class);
            job.setOutputValueClass(Put.class);//hdfs的put
    // job.setOutputFormatClass(cls);这注释的两句控制从哪个源读数据,向哪个源写数据
            // job.setInputFormatClass(cls);
           job.waitForCompletion(true);
    

    TableMapReduceUtil.initTableReducerJob("wc", WCReducer.class, job, null, null, null, null, false);
    https://blog.csdn.net/shudaqi2010/article/details/88653797

    WCrunner代码

    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.fs.Path;
    import org.apache.hadoop.hbase.client.Put;
    import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
    import org.apache.hadoop.io.IntWritable;
    import org.apache.hadoop.io.NullWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Job;
    import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
    
    
    public class WCRunner {
    
        public static void main(String[] args) throws Exception {
            // 配置文件设置
            Configuration conf = new Configuration();
            conf.set("hbase.zookeeper.quorum", "node1,node2,node3");
            conf.set("fs.defaultFS", "hdfs://node1:8020");
    
            Job job = Job.getInstance(conf);
            job.setJarByClass(WCRunner.class);
    
            job.setMapperClass(WCMapper.class);
            job.setMapOutputKeyClass(Text.class);
            job.setMapOutputValueClass(IntWritable.class);
            TableMapReduceUtil.initTableReducerJob("wc", WCReducer.class, job, null, null, null, null, false);
            FileInputFormat.addInputPath(job, new Path("/usr/wc"));
            // reduce端输出的key和value的类型
            job.setOutputKeyClass(NullWritable.class);
            job.setOutputValueClass(Put.class);
    
            // job.setOutputFormatClass(cls);
            // job.setInputFormatClass(cls);
    
            job.waitForCompletion(true);
    
        }
    }
    
    

    WCMapper全部代码

    import java.io.IOException;
    
    import org.apache.hadoop.io.IntWritable;
    import org.apache.hadoop.io.LongWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Mapper;
    
    public class WCMapper extends Mapper<LongWritable,Text,Text,IntWritable>{
    
        @Override
        protected void map(LongWritable key, Text value, Context context)
                throws IOException, InterruptedException {
            String[] splits = value.toString().split(" ");
    //      new StringTokenizer(value.toString()," ");这两种方法都可以
            for (String string : splits) {
                context.write(new Text(string), new IntWritable(1));
            }
        }
    }
    
    

    reduce代码

    import java.io.IOException;
    
    import org.apache.hadoop.hbase.client.Put;
    import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
    import org.apache.hadoop.hbase.mapreduce.TableReducer;
    import org.apache.hadoop.io.IntWritable;
    import org.apache.hadoop.io.Text;
    
    public class WCReducer extends TableReducer<Text, IntWritable, ImmutableBytesWritable>{
    
        @Override
        protected void reduce(Text key, Iterable<IntWritable> iter,
                Context context)
                throws IOException, InterruptedException {
            int sum = 0;
            for (IntWritable intWritable : iter) {
                sum+=intWritable.get();
            }
            Put put = new Put(key.toString().getBytes());//rowkey
            put.add("cf".getBytes(), "cf".getBytes(), String.valueOf(sum).getBytes());
            context.write(null, put);
        }
    }
    
    

    建表



    run后


    相关文章

      网友评论

          本文标题:31.hbase和MapReduce整合

          本文链接:https://www.haomeiwen.com/subject/kyegoctx.html