美文网首页
Ubuntu 伪分布式安装Hadoop、Spark、Hive

Ubuntu 伪分布式安装Hadoop、Spark、Hive

作者: zfylin | 来源:发表于2018-12-18 11:13 被阅读0次

    0. 准备

    安装包下载

    1. Hadoop : http://apache.communilink.net/hadoop/common/hadoop-2.6.5/
    2. Spark: https://spark.apache.org/downloads.html
    3. Hive: http://ftp.cuhk.edu.hk/pub/packages/apache.org/hive/
    4. JDK: https://www.oracle.com/technetwork/cn/java/javase/downloads/jdk8-downloads-2133151-zhs.html
    5. Scala: https://www.scala-lang.org/download/

    安装 Java

    sudo tar -xzvf jdk-8u144-linux-x64.tar.gz -C /opt
    cd /opt
    sudo ln -s jdk1.8.0_144 jdk
    
    # 添加环境变量
    sudo vim /etc/profile
        # 添加下面的环境变量
        export JAVA_HOME=/opt/jdk
        export JRE_HOME=$JAVA_HOME/jre
        export CLASS_PATH=.:$JAVA_HOME/lib/tools.jar:$JAVA_HOME/lib/dt.jar
        export PATH=$PATH:$JAVA_HOME/bin
    source /etc/profile
    

    安装 Scala

    tar -zxvf $SCALA_FILE -C /opt
    cd /opt
    sudo ln -s scala-2.11.8 scala
    sudo vim /etc/profile
        # 添加
        export SCALA_HOME=/opt/scala
        export PATH=$PATH:$SCALA_HOME/bin
    source /etc/profile
    

    安装 Mysql

    ubuntu安装mysql

    SSH免密的登录

    sudo apt-get install openssh-server
    ssh localhost ssh-keygen -t dsa -P '' -f ~/.ssh/id_dsa
    cat ~/.ssh/id_dsa.pub >> ~/.ssh/authorized_keys
    

    1. 安装Hadoop

    安装配置

    tar -zxvf $YOUR_HADOOP_FILE -C /opt
    cd /opt
    sudo ln -s hadoop-2.6.0 hadoop
    sudo vim /etc/profile
        # 添加
        export HADOOP_HOME=/opt/hadoop
        export PATH=$PATH:$JAVA_HOME/bin:$HADOOP_HOME/bin
    source /etc/profile
    
    vim /opt/hadoop/etc/hadoop/hadoop-env.sh
        # 添加
        export JAVA_HOME=/opt/jdk
    
    vim /opt/hadoop/etc/hadoop/hdfs-site.xml
    
    <configuration>
    <property>
        <name>dfs.replication</name>
        <value>1</value>
    </property>
    <property>
        <name>dfs.permissions</name>
        <value>false</value>
    </property>
    <property>  
        <name>dfs.namenode.name.dir</name>  
        <value>file:///data/hadoopdata/hdfs/name</value>  
    </property> 
    <property>  
         <name>dfs.datanode.data.dir</name>  
         <value>file:///data/hadoopdata/hdfs/data</value>  
    </property>  
    <property>  
        <name>dfs.namenode.secondary.http-address</name>  
        <value>localhost:9001</value>  
    </property>  
    </configuration>
    
    vim /opt/hadoop/etc/hadoop/core-site.xml 
    
    <configuration>
    <property>  
        <name>fs.defaultFS</name>   
        <value>hdfs://localhost:9000</value>  
    </property>       
    <property>  
        <name>hadoop.tmp.dir</name>
        <value>file:///opt/hadoop/tmp</value>  
    </property> 
    <property>
        <name>fs.defaultFS</name>
        <value>hdfs://localhost:9000</value>
    </property>
    </configuration>
    
    vim /opt/hadoop/etc/hadoop/hdfs-site.xml
    
    <configuration>
    <property>
        <name>mapreduce.framework.name</name>
        <value>yarn</value>
    </property>
    <property>
        <name>yarn.app.mapreduce.am.env</name>
        <value>HADOOP_MAPRED_HOME=/opt/hadoop</value>
    </property>
    <property>
        <name>mapreduce.map.env</name>
        <value>HADOOP_MAPRED_HOME=/opt/hadoop</value>
    </property>
    <property>
        <name>mapreduce.reduce.env</name>
        <value>HADOOP_MAPRED_HOME=/opt/hadoop</value>
    </property>
    </configuration>
    
    vim /opt/hadoop/etc/hadoop/yarn-site.xml
    
    <configuration>
    <property>
        <name>yarn.nodemanager.aux-services</name>
        <value>mapreduce_shuffle</value>
    </property>
    <property>  
        <name>yarn.nodemanager.vmem-check-enabled</name>  
        <value>false</value>  
    </property>  
    </configuration>
    

    格式化

    hdfs namenode -format
    

    运行

    /opt/hadoop/sbin/start-all.sh
    

    验证

    jps
    # 看到如下进程
    -----------------
    5939 Jps
    5636 DataNode
    5493 NameNode
    5814 SecondaryNameNode
    

    浏览器访问: http://localhost:8088 http://localhost:50070

    2. 安装Hive

    安装配置

    tar -zxvf $HIVE_FILE -C /opt
    sudo ln -s apache-hive-1.2.2-bin hive
    sudo vim /etc/profile
        # 添加
        export HIVE_HOME=/opt/hive
        export PATH=$PATH:$JAVA_HOME/bin:$SCALA_HOME/bin:$HADOOP_HOME/bin:$HIVE_HOME/bin:$SPARK_HOME/bin
    source /etc/profile
    
    vim /opt/hive/conf/hive-site.xml
    
    <configuration>
            <!-- (mysql地址localhost) -->
            <property>
                    <name>javax.jdo.option.ConnectionURL</name>
                    <value>jdbc:mysql://localhost:3306/hive</value>
            </property>
            <!-- (mysql的驱动) -->
            <property>
                    <name>javax.jdo.option.ConnectionDriverName</name>
                    <value>com.mysql.jdbc.Driver</value>
            </property>
            <!-- (用户名) -->
            <property>
                    <name>javax.jdo.option.ConnectionUserName</name>
                    <value>root</value>
            </property>
            <!-- (密码) -->
            <property>
                    <name>javax.jdo.option.ConnectionPassword</name>
                    <value>123456</value>
            </property>
    
            <property>
                    <name>hive.metastore.schema.verification</name>
                    <value>false</value>
            </property>
    </configuration>
    
    # 下载 mysql-connector-java-5.1.44.jar 
    cp mysql-connector-java-5.1.44.jar /opt/hive/lib
    

    验证

    hive
    

    3. 安装Spark

    安装配置

    tar -zxvf $SPARK_FILE -C /opt
    sudo ln -s spark-2.2.1-bin-hadoop2.6  spark
    sudo vim /etc/profile
        # 添加
        export SPARK_HOME=/opt/spark
        export PATH=$PATH:$JAVA_HOME/bin:$SCALA_HOME/bin:$HADOOP_HOME/bin:$SPARK_HOME/bin
    source /etc/profile
    
    vim /opt/spark/conf/spark-env.sh
        # 添加
        export JAVA_HOME=/opt/jdk
        export SPARK_MASTER_IP=localhost
        export SCALA_HOME=/opt/scala
        export SPARK_WORKER_MEMORY=4G
    

    验证

    spark-shell
    

    hive 配置

    # 拷贝hive相关配置
    cp /opt/hive/conf/*.xml /opt/spark/conf 
    # 拷贝mysql jar 包
    cp /opt/hive/lib/mysql-connector-java-5.1.44.jar /opt/spark-2.2.1/jars
    

    相关文章

      网友评论

          本文标题:Ubuntu 伪分布式安装Hadoop、Spark、Hive

          本文链接:https://www.haomeiwen.com/subject/vamkkqtx.html