1、下载二进制安装包
#下载
wget http://mirrors.tuna.tsinghua.edu.cn/apache/hadoop/common/hadoop-2.7.6/hadoop-2.7.6.tar.gz
#解压
tar zxvf hadoop-2.7.6.tar.gz
#建立软连接 方便以后更换hadoop的版本 避免重新更改环境变量
ln -s hadoop-2.7.6 hadoop
2、添加环境变量
添加到 shell 的初始化脚本里边 zshrc或者bashrc,取决于你用的shell
export HADOOP_HOME=$HOME/hadoop
export HADOOP_INSTALL=$HADOOP_HOME
export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop
export HADOOP_MAPRED_HOME=$HADOOP_HOME
export HADOOP_COMMON_HOME=$HADOOP_HOME
export HADOOP_HDFS_HOME=$HADOOP_HOME
export HADOOP_OPTS="-Djava.library.path=$HADOOP_HOME/lib/native"
export YARN_HOME=$HADOOP_HOME
export HADOOP_COMMON_LIB_NATIVE_DIR=$HADOOP_HOME/lib/native
export PATH=$PATH:$HADOOP_HOME/sbin:$HADOOP_HOME/bin
在$HADOOP_HOME/etc/hadoop/hadoop-env.sh中添加
export HADOOP_IDENT_STRING=$USER
export HADOOP_HOME=$HOME/hadoop
export PATH=$PATH:$HADOOP_HOME/sbin
export PATH=$PATH:$HADOOP_HOME/bin
export HADOOP_COMMON_LIB_NATIVE_DIR=$HADOOP_HOME/lib/native
export HADOOP_OPTS="-Djava.library.path=$HADOOP_HOME/lib/native"
export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop
# jdk 具体的安装路径
export JAVA_HOME=/usr/java/jdk1.8.0_144
# export TEZ_CONF_DIR=$HADOOP_HOME/etc/hadoop/tez-site.xml
# export TEZ_JARS=$HOME/tez
# export HADOOP_CLASSPATH=${CLASSPATH}:${TEZ_CONF_DIR}:${TEZ_JARS}/*:${TEZ_JARS}/lib/*
在在$HADOOP_HOME/etc/hadoop/yarn-env.sh中添加
export HADOOP_HOME=$HOME/hadoop
export JAVA_HOME=/usr/java/jdk1.8.0_144
export PATH=$PATH:$HADOOP_HOME/bin
export PATH=$PATH:$HADOOP_HOME/sbin
export HADOOP_COMMON_LIB_NATIVE_DIR=$HADOOP_HOME/lib/native
export HADOOP_OPTS="-Djava.library.path=$HADOOP_HOME/lib/native"
3、修改配置文件 $HADOOP_CONF_DIR 中的yarn-site.xml core-site.xml mapred.xml hdfs.xml
hbase-01是我的服务器的主机名和域名,需要进行必要的替换
yarn-site.xml
<configuration>
<property>
<name>yarn.log-aggregation-enable</name>
<value>true</value>
</property>
<!-- Site specific YARN configuration properties -->
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.timeline-service.enabled</name>
<value>true</value>
</property>
<property>
<name>yarn.timeline-service.hostname</name>
<value>hbase-01</value>
</property>
<property>
<name>yarn.timeline-service.http-cross-origin.enabled</name>
<value>true</value>
</property>
<property>
<name>yarn.timeline-service.generic-application-history.enabled</name>
<value>true</value>
</property>
<property>
<name>yarn.resourcemanager.system-metrics-publisher.enabled</name>
<value>true</value>
</property>
<property>
<name>yarn.timeline-service.address</name>
<value>${yarn.timeline-service.hostname}:10200</value>
</property>
<property>
<name>yarn.timeline-service.webapp.address</name>
<value>${yarn.timeline-service.hostname}:8188</value>
</property>
<property>
<name>yarn.timeline-service.webapp.https.address</name>
<value>${yarn.timeline-service.hostname}:8190</value>
</property>
<property>
<description>Handler thread count to serve the client RPC requests.</description>
<name>yarn.timeline-service.handler-thread-count</name>
<value>10</value>
</property>
<property>
<name>yarn.timeline-service.generic-application-history.enabled</name>
<value>false</value>
</property>
<property>
<name>yarn.timeline-service.generic-application-history.store-class</name>
<value>org.apache.hadoop.yarn.server.applicationhistoryservice.FileSystemApplicationHistoryStore</value>
</property>
<property>
<name>yarn.log.server.url</name>
<value>http://hbase-01:19888/jobhistory/logs/</value>
</property>
<property>
<name>yarn.nodemanager.vmem-pmem-ratio</name>
<value>4</value>
</property>
<property>
<name>yarn.resourcemanager.scheduler.class</name>
<value>org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler</value>
</property>
</configuration>
core-site.xml
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<property>
<name>hadoop.proxyuser.mark.hosts</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.mark.groups</name>
<value>*</value>
</property>
<property>
<name>fs.name.dir</name>
<value>/tmp/mark/hadoop</value>
</property>
<property>
<name>fs.default.name</name>
<value>hdfs://hbase-01:9000</value>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>/tmp/mark</value>
</property>
</configuration>
mapred.xml
<configuration>
<property>
<name>mapreduce.jobhistory.address</name>
<value>hbase-01:10020</value>
</property>
<property>
<name>mapreduce.jobhistory.webapp.address</name>
<value>hbase-01:19888</value>
</property>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<property>
<name>mapreduce.jobhistory.done-dir</name>
<value>/history/done</value>
</property>
<property>
<name>mapreduce.jobhistory.intermediate-done-dir</name>
<value>/history/done_intermediate</value>
</property>
<property>
<name>mapreduce.system.dir</name>
<value>/tmp/mark/hadoop/mapred_system</value>
</property>
<property>
<name>mapreduce.local.dir</name>
<value>/tmp/mark/hadoop/mapred_local</value>
</property>
</configuration>
hdfs.xml
<configuration>
<!-- property>
<name>hadoop.tmp.dir</name>
<value>/home/mark/tmp</value>
</propertiy -->
<property>
<name>dfs.replication</name>
<value>3</value>
</property>
<property>
<name>dfs.name.dir</name>
<value>/home/mark/hadoopdata/hdfs/namenode</value>
</property>
<property>
<name>dfs.data.dir</name>
<value>/home/mark/hadoopdata/hdfs/datanode</value>
</property>
</configuration>
4、如果有多态服务器,需要配置集群,那么需要配置每台服务器之间的ssh-key 能够相互之间ssh免密码登陆,这个百度下就好了。然后将$HADOOP_CONF_DIR下的slaves文件进行修改,去掉localhost,添加每台服务的域名。scp命令复制整个hadoop 文件夹过去到其他服务器上相同目录。
5、启动服务
hdfs namenode -format
start-dfs.sh && start-yarn.sh
6、访问50070 50075 8088端口 我这里已经跑了一个spark-sql的任务了
Screen Shot 2018-05-02 at 3.24.38 PM.png Screen Shot 2018-05-02 at 3.25.21 PM.png Screen Shot 2018-05-02 at 3.26.24 PM.png
网友评论