美文网首页
Spark SQL以编程方式连接到Hive Metastore

Spark SQL以编程方式连接到Hive Metastore

作者: 尼小摩 | 来源:发表于2018-08-14 12:16 被阅读545次

    使用SparkSQL连接Hive数据库报错:找不到表 Table or view not found: bp_ods.ods_bp_user_behavior

    Exception in thread "main" org.apache.spark.sql.AnalysisException: Table or view not found: bp_ods.ods_bp_user_behavior; line 1 pos 0
        at org.apache.spark.sql.catalyst.analysis.package$AnalysisErrorAt.failAnalysis(package.scala:42)
        at org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveRelations$.org$apache$spark$sql$catalyst$analysis$Analyzer$ResolveRelations$$lookupTableFromCatalog(Analyzer.scala:663)
        at org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveRelations$$anonfun$apply$8.applyOrElse(Analyzer.scala:640)
        at org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveRelations$$anonfun$apply$8.applyOrElse(Analyzer.scala:638)
        at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformUp$1.apply(TreeNode.scala:289)
        at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformUp$1.apply(TreeNode.scala:289)
        at org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:70)
        at org.apache.spark.sql.catalyst.trees.TreeNode.transformUp(TreeNode.scala:288)
        at org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveRelations$.apply(Analyzer.scala:638)
        at org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveRelations$.apply(Analyzer.scala:584)
        at org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1$$anonfun$apply$1.apply(RuleExecutor.scala:87)
        at org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1$$anonfun$apply$1.apply(RuleExecutor.scala:84)
        at scala.collection.LinearSeqOptimized$class.foldLeft(LinearSeqOptimized.scala:124)
        at scala.collection.immutable.List.foldLeft(List.scala:84)
        at org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1.apply(RuleExecutor.scala:84)
        at org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1.apply(RuleExecutor.scala:76)
        at scala.collection.immutable.List.foreach(List.scala:381)
        at org.apache.spark.sql.catalyst.rules.RuleExecutor.execute(RuleExecutor.scala:76)
        at org.apache.spark.sql.catalyst.analysis.Analyzer.org$apache$spark$sql$catalyst$analysis$Analyzer$$executeSameContext(Analyzer.scala:123)
        at org.apache.spark.sql.catalyst.analysis.Analyzer.execute(Analyzer.scala:117)
        at org.apache.spark.sql.catalyst.analysis.Analyzer.executeAndCheck(Analyzer.scala:102)
        at org.apache.spark.sql.execution.QueryExecution.analyzed$lzycompute(QueryExecution.scala:57)
        at org.apache.spark.sql.execution.QueryExecution.analyzed(QueryExecution.scala:55)
        at org.apache.spark.sql.execution.QueryExecution.assertAnalyzed(QueryExecution.scala:47)
        at org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:74)
        at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:638)
        at com.aldb.HdfsToHiveLauncher$.main(HdfsToHiveLauncher.scala:63)
        at com.aldb.HdfsToHiveLauncher.main(HdfsToHiveLauncher.scala)
        at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
        at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
        at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
        at java.lang.reflect.Method.invoke(Method.java:498)
        at org.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)
        at org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:892)
        at org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:197)
        at org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:227)
        at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:136)
        at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
    18/07/09 18:17:44 INFO spark.SparkContext: Invoking stop() from shutdown hook
    

    解决方案:

    1. 通过在项目的类路径中添加hive-site.xml配置文件

    • 去集群服务器上查找配置文件:find -name hive-site.xml
    • 需要将hive-site.xml这个文件拷贝到resource下。

    2. 以编程的方式在代码中设置Hive MateStore参数:

    2.1 spark1.x设置:
    val conf = new SparkConf(); 
    val sc = new SparkContext(conf); 
    val hiveContext = new HiveContext(sc); 
    hiveContext.setConf("hive.metastore.uris", "thrift://METASTORE:9083"); 
    
    2.2 spark2.x设置:
    package com.aldb.bigdata.basic
    
    import com.aldb.bigdata.utils.{CommonUtils, DateUtils}
    import com.alibaba.fastjson.JSON
    import org.apache.spark.sql.types.{StringType, StructField, StructType}
    import org.apache.spark.sql.{Row, SparkSession}
    import org.slf4j.LoggerFactory
    
    /**
      * 地理位置埋点数据解析写入Hive
      * Created by fc.w on 2018/07/10
      */
    object DeviceLocHdfsToHiveLauncher {
    
      val logger  = LoggerFactory.getLogger(DeviceLocHdfsToHiveLauncher.getClass)
    
      def main(args: Array[String]): Unit = {
        val ss = SparkSession.builder()
          .appName("DeviceLoc_Hdfs_To_Hive_Job")
          .config("hive.metastore.uris", "thrift://METASTORE:9083")
          .config(CommonUtils.SPARK_SQL_DIR, CommonUtils.HIVE_WAREHOUSE)
          .enableHiveSupport()
          .getOrCreate()
    
         import spark.implicits._ 
         import spark.sql 
         // create an arbitrary frame 
         val frame = Seq(("one", 1), ("two", 2), ("three", 3)).toDF("word", "count") 
         // see the frame created 
         frame.show() 
         /** 
         * +-----+-----+ 
         * | word|count| 
         * +-----+-----+ 
         * | one| 1| 
         * | two| 2| 
         * |three| 3| 
         * +-----+-----+ 
         */ 
         // write the frame 
         frame.write.mode("overwrite").saveAsTable("t4") 
    }
    

    推荐使用:方式2

    相关文章

      网友评论

          本文标题:Spark SQL以编程方式连接到Hive Metastore

          本文链接:https://www.haomeiwen.com/subject/xhnmbftx.html