美文网首页
hadoop入门系列--使用hbase过滤器(一篇全掌握)

hadoop入门系列--使用hbase过滤器(一篇全掌握)

作者: 微生活_小阿楠 | 来源:发表于2020-04-15 14:37 被阅读0次

    传送门
    hadoop入门系列--hbase基础知识点
    hadoop入门系列--从本地把数据导入Hbase
    hadoop入门系列--用java代码实现创建hbase表
    hadoop入门系列--使用hbase过滤器(一篇全掌握)
    传送门

    1)1.BinaryComparator()与SubstringComparator()区别

    概念简述:

    • BinaryComparator按字节索引顺序比较指定字节数组,采用Bytes.compareTo(byte[])
    • SubstringComparator判断提供的子串是否出现在value中

    总结:BinaryComparator一般用于已经确定好,肯定一模一样的(比如列名、列限定符),SubstringComparator一般用于模糊匹配(也可以说是有包含就匹配)(比如值)

    注意:这里的代码完全是由题主亲自验证过的,题目由简到难。基本上每一道题目都用到了2个及其以上的过滤器,属于多条件过滤。

    练习: 请按照以下要求完成查询任务:

    • (1)请查询出属于“互联网”产业的公司的职位名称;
    • (2)请查询出学历要求是“硕士”的职位信息;
    • (3)请分页(每页2条)查询出职位为“机器学习”的职位信息(查2页);
    • (4)请查询出“北京”或“上海”薪水在“10k-20k”之间的职位信息;
    • (5)请查询出“北京”的公司规模在“100人”以上的职位信息;

    2)话不多说,直接上代码

    package hbase_put_scan;
    
    import java.io.IOException;
    import java.util.List;
    
    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.hbase.Cell;
    import org.apache.hadoop.hbase.CellUtil;
    import org.apache.hadoop.hbase.HBaseConfiguration;
    import org.apache.hadoop.hbase.HColumnDescriptor;
    import org.apache.hadoop.hbase.HTableDescriptor;
    import org.apache.hadoop.hbase.KeyValue;
    import org.apache.hadoop.hbase.NamespaceDescriptor;
    import org.apache.hadoop.hbase.TableName;
    import org.apache.hadoop.hbase.client.Admin;
    import org.apache.hadoop.hbase.client.Connection;
    import org.apache.hadoop.hbase.client.ConnectionFactory;
    import org.apache.hadoop.hbase.client.Delete;
    import org.apache.hadoop.hbase.client.Get;
    import org.apache.hadoop.hbase.client.HTable;
    import org.apache.hadoop.hbase.client.Put;
    import org.apache.hadoop.hbase.client.Result;
    import org.apache.hadoop.hbase.client.ResultScanner;
    import org.apache.hadoop.hbase.client.Scan;
    import org.apache.hadoop.hbase.client.Table;
    import org.apache.hadoop.hbase.filter.BinaryComparator;
    import org.apache.hadoop.hbase.filter.ColumnPrefixFilter;
    import org.apache.hadoop.hbase.filter.CompareFilter;
    import org.apache.hadoop.hbase.filter.CompareFilter.CompareOp;
    import org.apache.hadoop.hbase.filter.FamilyFilter;
    import org.apache.hadoop.hbase.filter.Filter;
    import org.apache.hadoop.hbase.filter.FilterList;
    import org.apache.hadoop.hbase.filter.PageFilter;
    import org.apache.hadoop.hbase.filter.QualifierFilter;
    import org.apache.hadoop.hbase.filter.SingleColumnValueFilter;
    import org.apache.hadoop.hbase.filter.SubstringComparator;
    import org.apache.hadoop.hbase.filter.ValueFilter;
    import org.apache.hadoop.hbase.util.Bytes;
     
    
    public class HbaseFilterTest {
        public static Configuration conf;
        public static Connection conn;
        
        static {
            //1.获取资源
            conf = HBaseConfiguration.create();
            conf.set("hbase.zookeeper.property", "2181");
            conf.set("hbasae.zookeeper.quorum", "centos");
            conf.set("hbase.master", "centos:60000");
            try {
                //2.创建连接
                conn = ConnectionFactory.createConnection(conf);
            }catch(IOException e) {
                e.printStackTrace();
            }
        }
        //(1)请查询出属于“互联网”产业的公司的职位名称; 
        public static void scanColumnFamily(String Table,String Family,String company_industry,String qualifier,String job_name) throws IOException{
            Admin admin = conn.getAdmin();
            //3.依据指定表名建立table实例
            Table table = conn.getTable(TableName.valueOf(Table));
            //4.建立scan实例
            Scan scan = new Scan();
            //单列值过滤器
            SingleColumnValueFilter singleColumnValueFilter = new SingleColumnValueFilter(
                    Family.getBytes(),
                    company_industry.getBytes(),
                    CompareOp.EQUAL,
                    new SubstringComparator(qualifier)
                    );  
            //列过滤器
            Filter qualifierFilter = new QualifierFilter(CompareOp.EQUAL,new BinaryComparator(job_name.getBytes()));
            //把上面两个过滤器and
            FilterList filterList = new FilterList();
            filterList.addFilter(singleColumnValueFilter);
            filterList.addFilter(qualifierFilter);
            
            scan.setFilter(filterList);
            ResultScanner resultScanner = table.getScanner(scan);
            //5.遍历读取ResultScanner集中内容
            int a = 0;
            for(Result result : resultScanner) {
                //System.out.println(new String(result.getValue(Family.getBytes(), job_name.getBytes())));
                //遍历读取result集中的内容
                List<Cell> cells = result.listCells();
                for(Cell cell : cells) {a++;
                    System.out.print("行健:" + new String(CellUtil.cloneRow(cell) ));
                    System.out.print("列族:" + new String(CellUtil.cloneFamily(cell) ));
                    System.out.print("列:" + new String(CellUtil.cloneQualifier(cell) ));
                    System.out.println("值:" + new String(CellUtil.cloneValue(cell) ));
                }
                System.out.println("============================================="+ a);
            }
            //6.关闭打开的资源
            resultScanner.close();
            table.close();
            conn.close();
        }
        
        //(2)请查询出学历要求是“硕士”的职位信息;
        public static void job_info(String Table,String family,String job_edu_require,String qualifier,String job_info) throws IOException{
            Admin admin = conn.getAdmin();
            Table table = conn.getTable(TableName.valueOf(Table));
            
            Scan scan = new Scan();
            //单列值过滤器
            SingleColumnValueFilter singleColumnValueFilter = new SingleColumnValueFilter(
                    family.getBytes(),
                    job_edu_require.getBytes(),
                    CompareOp.EQUAL,
                    new SubstringComparator(qualifier)
                    );
    
            scan.setFilter(singleColumnValueFilter);
            ResultScanner resultScanner = table.getScanner(scan);
            int a = 0;  
            for(Result result : resultScanner) {a++; 
                //方法一
                System.out.println("RowKey:" + new String(result.getRow()) + " qualifier=" + new String(result.getValue(family.getBytes(),job_info.getBytes())));
                System.out.println("========================");
                //方法二   (参考第一道题)        
            }       System.out.println(a);
        }
                 
        //(3)请查询出“北京”或“上海”薪水在“10k-20k”之间的职位信息;
        public static void company_job_info() throws IOException{
            Admin admin = conn.getAdmin();
            //3.依据指定表名建立table实例
            Table table = conn.getTable(TableName.valueOf("jobs"));
            //4.建立scan实例
            Scan scan = new Scan();
            //单列值过滤器--薪水在“10k-20k”
            SingleColumnValueFilter singleColumnValueFilter_salary = new SingleColumnValueFilter("info".getBytes(),"job_salary".getBytes(),CompareOp.EQUAL,new SubstringComparator("10k-20k")); 
            //单列值过滤器--北京
            SingleColumnValueFilter singleColumnValueFilter1 = new SingleColumnValueFilter("info".getBytes(),"company_location".getBytes(),CompareOp.EQUAL,new SubstringComparator("北京")); 
            //单列值过滤器--上海
            SingleColumnValueFilter singleColumnValueFilter2 = new SingleColumnValueFilter("info".getBytes(),"company_location".getBytes(),CompareOp.EQUAL,new SubstringComparator("上海")); 
            //列过滤器--只获取所有职位信息
            Filter qualifierFilter = new QualifierFilter(CompareOp.EQUAL,new SubstringComparator("job_"));
            Filter qualifierFilter_companylocation = new QualifierFilter(CompareOp.EQUAL,new BinaryComparator("company_location".getBytes()));
            
            //公司地址的组合--北京or上海
            FilterList filterList_location = new FilterList(FilterList.Operator.MUST_PASS_ONE);
            filterList_location.addFilter(singleColumnValueFilter1);
            filterList_location.addFilter(singleColumnValueFilter2);
     
            //查询列的组合(可以把你想要输出的列写在这里)
            FilterList filterList_column = new FilterList(FilterList.Operator.MUST_PASS_ONE);
            filterList_column.addFilter(qualifierFilter);
            filterList_column.addFilter(qualifierFilter_companylocation);
            
            //把上面所有过滤器and
            FilterList filterList = new FilterList();
            filterList.addFilter(singleColumnValueFilter_salary);
            filterList.addFilter(filterList_location);    
            filterList.addFilter(filterList_column);
            
            scan.setFilter(filterList);
            ResultScanner resultScanner = table.getScanner(scan);
            //5.遍历读取ResultScanner集中内容   
            for(Result result : resultScanner) {
                //System.out.println(new String(result.getValue(Family.getBytes(), job_name.getBytes())));
                //遍历读取result集中的内容
                List<Cell> cells = result.listCells();
                for(Cell cell : cells) {
                    System.out.print("行健:" + new String(CellUtil.cloneRow(cell) ));
                    System.out.print("列族:" + new String(CellUtil.cloneFamily(cell) ));
                    System.out.print("列:" + new String(CellUtil.cloneQualifier(cell) ));
                    System.out.println("值:" + new String(CellUtil.cloneValue(cell) ));
                }
                System.out.println("=============================================");
            }
            //6.关闭打开的资源
            resultScanner.close();
            table.close();
            conn.close();
        }
        
        //(4)请查询出“北京”的公司规模在“100人”以上的职位信息;
        public static void company_people_job_info() throws IOException{
            Admin admin = conn.getAdmin();
            //3.依据指定表名建立table实例
            Table table = conn.getTable(TableName.valueOf("jobs"));
            //4.建立scan实例
            Scan scan = new Scan();
            //单列值过滤器--北京
            SingleColumnValueFilter singleColumnValueFilter = new SingleColumnValueFilter(
                    "info".getBytes(),
                    "company_location".getBytes(),
                    CompareOp.EQUAL,
                    new SubstringComparator("北京")
                    );  
            //单列值过滤器--公司规模在“100人”以上
            SingleColumnValueFilter singleColumnValueFilter_people = new SingleColumnValueFilter("info".getBytes(),"company_people".getBytes(),CompareOp.EQUAL,new SubstringComparator("00")); 
    
            
            //把上面两个过滤器and
            FilterList filterList = new FilterList();
            filterList.addFilter(singleColumnValueFilter);
            filterList.addFilter(singleColumnValueFilter_people);
            
            scan.setFilter(filterList);
            ResultScanner resultScanner = table.getScanner(scan);
            //5.遍历读取ResultScanner集中内容
            int a = 0;
            for(Result result : resultScanner) {
                //System.out.println(new String(result.getValue(Family.getBytes(), job_name.getBytes())));
                //遍历读取result集中的内容
                List<Cell> cells = result.listCells();
                for(Cell cell : cells) {a++;
                    System.out.print("行健:" + new String(CellUtil.cloneRow(cell) ));
                    System.out.print("列族:" + new String(CellUtil.cloneFamily(cell) ));
                    System.out.print("列:" + new String(CellUtil.cloneQualifier(cell) ));
                    System.out.println("值:" + new String(CellUtil.cloneValue(cell) ));
                }
                System.out.println("============================================="+ a);
            }
            //6.关闭打开的资源
            resultScanner.close();
            table.close();
            conn.close();
        }
        
        
        public static void main(String[] args) throws IOException{  
            //(1)请查询出属于“互联网”产业的公司的职位名称
            //scanColumnFamily("jobs","info","company_industry","互联网","job_name");
            
            //(2)请查询出学历要求是“硕士”的职位信息;(下面是做法一、做法二);   
            //job_info("jobs","info","job_edu_require","硕士","job_info");        
            //scanColumnFamily("jobs","info","job_edu_require","硕士","job_info");
            
            //(3)请查询出“北京”或“上海”薪水在“10k-20k”之间的职位信息;
            company_job_info();
            
            //(4)请查询出“北京”的公司规模在“100人”以上的职位信息;
            //company_people_job_info();
        }
        
    }
    
    
    image.png

    相关文章

      网友评论

          本文标题:hadoop入门系列--使用hbase过滤器(一篇全掌握)

          本文链接:https://www.haomeiwen.com/subject/ydwtvhtx.html