HBase-Spark-Read-Demo

作者: 步闲 | 来源:发表于2019-12-25 18:37 被阅读0次
    import org.apache.hadoop.conf.Configuration
    import org.apache.hadoop.hbase._
    import org.apache.hadoop.hbase.client.Scan
    import org.apache.hadoop.hbase.mapreduce.TableInputFormat
    import org.apache.hadoop.hbase.protobuf.ProtobufUtil
    import org.apache.hadoop.hbase.util.{Base64, Bytes}
    import org.apache.spark.{SparkConf, SparkContext}
    
    
    object SparkReadHBaseDemo {
    
      val HBASE_ZOOKEEPER_QUORUM = "xxx1.com.cn,xxx2.com.cn,xxx3.com.cn"
    
      //   主函数
      def main(args: Array[String]) {
    
        // 设置spark访问入口
        val conf = new SparkConf().setAppName("SparkReadHBaseDemo ")
          .set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
        .setMaster("local")//调试
        val sc = new SparkContext(conf)
        // 获取HbaseRDD
        val hbaseRDD = sc.newAPIHadoopRDD(getHbaseConf(), classOf[TableInputFormat],
          classOf[org.apache.hadoop.hbase.io.ImmutableBytesWritable],
          classOf[org.apache.hadoop.hbase.client.Result])
        hbaseRDD.map(_._2).map(getRes(_)).count()
      }
    
    
      def getRes(result: org.apache.hadoop.hbase.client.Result): String = {
        val rowkey = Bytes.toString(result.getRow())
        val addr = Bytes.toString(result.getValue("f".getBytes, "addr".getBytes))
        println(rowkey+"---"+addr)
        addr
      }
      // 构造 Hbase 配置信息
      def getHbaseConf(): Configuration = {
        val conf: Configuration = HBaseConfiguration.create()
        conf.set("hbase.zookeeper.property.clientPort", "2181")
        conf.set("zookeeper.znode.parent", "/hbase-unsecure")
        conf.set("hbase.zookeeper.quorum", HBASE_ZOOKEEPER_QUORUM)
        // 设置查询的表名
        conf.set(TableInputFormat.INPUT_TABLE, "test_shx")
        conf.set(TableInputFormat.SCAN, getScanStr())
        conf
      }
    
      // 获取扫描器
      def getScanStr(): String = {
        val scan = new Scan()
        val proto = ProtobufUtil.toScan(scan)
        Base64.encodeBytes(proto.toByteArray())
      }
    }
    
    <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
             xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
        <modelVersion>4.0.0</modelVersion>
    
        <groupId>com.xcar.etl</groupId>
        <artifactId>kafka.hbase.spark.hive</artifactId>
        <version>1.0-SNAPSHOT</version>
        <packaging>jar</packaging>
    
        <name>kafka.hbase.spark.hive</name>
        <url>http://maven.apache.org</url>
    
        <repositories>
            <repository>
                <id>cloudera</id>
                <url>https://repository.cloudera.com/artifactory/cloudera-repos/</url>
            </repository>
        </repositories>
    
        <properties>
            <cdh.hbase.version>1.2.0-cdh5.7.0</cdh.hbase.version>
            <cdh.spark.version>1.6.0-cdh5.7.0</cdh.spark.version>
            <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
        </properties>
    
        <dependencies>
            <dependency>
                <groupId>junit</groupId>
                <artifactId>junit</artifactId>
                <version>3.8.1</version>
                <scope>test</scope>
            </dependency>
            <dependency>
                <groupId>com.alibaba</groupId>
                <artifactId>fastjson</artifactId>
                <version>1.2.62</version>
            </dependency>
            <dependency>
                <groupId>org.apache.spark</groupId>
                <artifactId>spark-core_2.10</artifactId>
                <version>${cdh.spark.version}</version>
                <!--<scope>provided</scope>-->
            </dependency>
            <dependency>
                <groupId>org.apache.hbase</groupId>
                <artifactId>hbase-server</artifactId>
                <version>${cdh.hbase.version}</version>
            </dependency>
        </dependencies>
    </project>
    

    相关文章

      网友评论

        本文标题:HBase-Spark-Read-Demo

        本文链接:https://www.haomeiwen.com/subject/ibxhoctx.html