美文网首页
ubuntu16.04 spark连接hadoop配置

ubuntu16.04 spark连接hadoop配置

作者: WJXZ | 来源:发表于2018-09-10 11:42 被阅读0次
    1.spark配置historyserver
    2.hadoop配置yarn
    3.hadoop添加配置
    cd /usr/share/hadoop/hadoop-2.7.7/etc/hadoop
    

    配置yarn-site.xml

    sudo vim yarn-site.xml
    <configuration>
    <!-- Site specific YARN configuration properties -->
            <property>
                    <name>yarn.nodemanager.aux-services</name>
                    <value>mapreduce_shuffle</value>
            </property>
            <property>
                    <name>yarn.nodemanager.aux-services.mapreduce.shuffle.class</name>
                    <value>org.apache.hadoop.mapred.ShuffleHandler</value>
            </property>
    <!-- spark -->
    <property>
            <name>yarn.log-aggregation-enable</name>
            <value>true</value>
    </property>
    <property>
            <name>yarn.log.server.url</name>
            <value>http://你的ip:19888/jobhistory/logs</value>
    </property>
    <property>
            <name>yarn.nodemanager.pmem-check-enabled</name>
            <value>false</value>
    </property>
    <property>
            <name>yarn.nodemanager.vmem-check-enabled</name>
            <value>false</value>
    </property>
    </configuration>
    #保存退出:wq
    

    配置mapred-site.xml

    sudo vim mapred-site.xml
    <configuration>
            <property>
                    <name>mapreduce.framework.name</name>
                    <value>yarn</value>
            </property>
            <!-- spark-->
            <property>
                    <name>mapreduce.jobhistory.done-dir</name>
                    <value>/user/history/done</value>
            </property>
            <property>
                      <name>mapreduce.jobhistory.intermediate-done-dir</name>
                      <value>/user/history/done_intermediate</value>
             </property>
    </configuration>
    #保存退出:wq
    
    4.重新启动yarn
    cd /usr/share/hadoop/hadoop-2.7.7
    ./sbin/stop-yarn.sh
    ./sbin/mr-jobhistory-daemon.sh stop historyserver
    ./sbin/start-yarn.sh
    ./sbin/mr-jobhistory-daemon.sh start historyserver
    
    5.配置spark
    cd /usr/share/spark/spark-2.2.2-bin-hadoop2.7/conf
    sudo vim spark-defaults.conf
    spark.yarn.historyServer.address=你的ip:18080
    #剩下几个是跟sparkhistory相关的一些配置
    spark.history.ui.port=18080
    spark.eventLog.enabled=true
    spark.eventLog.dir=hdfs:///tmp/spark/events
    spark.history.fs.logDirectory=hdfs:///tmp/spark/events
    
    6.HDFS创建spark文件
    hdfs dfs -mkdir -p /tmp/spark/events
    
    7.启动sparkhistory
    cd  /usr/share/spark/spark-2.2.2-bin-hadoop2.7
    ./sbin/start-history-server.sh
    
    8.测试

    localhost:18080

    相关文章

      网友评论

          本文标题:ubuntu16.04 spark连接hadoop配置

          本文链接:https://www.haomeiwen.com/subject/gbxsgftx.html