1. 启动三个主机镜像
容器规划
hadoop-namenode | hadoop-second-namenode | hadoop-datanode | |
---|---|---|---|
hdfs | NameNode/DataNode | SecondaryNameNode/DataNode | DataNode |
yarn | NodeManager | NodeManager | ResourceManager |
- 创建hdfs的网卡,便于容器间通信
#创建网卡
docker network create hdfs
#查看网卡
docker network ls
- 启动namenode
docker run -d -it -p 37764 -p 50090 -p 38832 -p 44026 -p 50020 -p 50475 -p 50075 -p 50010 -p 50470 -p 50070 -p 9000 --network hdfs --hostname hadoop-namenode --name hadoop-namenode centos:6.6 /bin/bash
- 启动second namenode(其兼职datanode)
docker run -d -it -p 37764 -p 50090 -p 38832 -p 44026 -p 50020 -p 50475 -p 50075 -p 50010 -p 50470 -p 50070 -p 9000 --network hdfs --hostname hadoop-second-namenode --name hadoop-second-namenode centos:6.6 /bin/bash
- 启动datanode
docker run -d -it -p 37764 -p 50090 -p 38832 -p 44026 -p 50020 -p 50475 -p 50075 -p 50010 -p 50470 -p 50070 -p 9000 --network hdfs --hostname hadoop-datanode --name hadoop-datanode centos:6.6 /bin/bash
2. ssh免密登录
- 安装三个容器的ssh客户端和服务端, 以namenode为例
# 安装ssh服务依赖
yum install -y openssh-server
# 安装ssh客户端依赖
yum install -y openssh-clients
# 设置ssh密码
echo 'root:123456' | chpasswd
sed -i 's/PermitRootLogin prohibit-password/PermitRootLogin yes/' /etc/ssh/sshd_config
sed 's@session\s*required\s*pam_loginuid.so@session optional pam_loginuid.so@g' -i /etc/pam.d/sshd
echo "export VISIBLE=now" >> /etc/profile
# 启动ssh服务
/etc/init.d/sshd start
- 登录namenode
# 进入容器内部
docker exec -it hadoop-namenode /bin/bash
# 生成共钥和私钥
ssh-keygen -t rsa
# 将共钥拷贝到second namenode和datanode
ssh-copy-id localhost
ssh-copy-id 0.0.0.0
ssh-copy-id hadoop-namenode
ssh-copy-id hadoop-second-namenode
ssh-copy-id hadoop-datanode
- 登录hadoop-second-namenode
# 进入容器内部
docker exec -it hadoop-second-namenode /bin/bash
# 生成共钥和私钥
ssh-keygen -t rsa
# 将共钥拷贝到namenode和datanode
ssh-copy-id localhost
ssh-copy-id hadoop-namenode
ssh-copy-id hadoop-second-namenode
ssh-copy-id hadoop-datanode
- 登录hadoop-datanode
# 进入容器内部
docker exec -it hadoop-datanode /bin/bash
# 生成共钥和私钥
ssh-keygen -t rsa
# 将共钥拷贝到namenode和second namenode
ssh-copy-id localhost
ssh-copy-id hadoop-namenode
ssh-copy-id hadoop-second-namenode
ssh-copy-id hadoop-datanode
3. 下载hadoop安装包并进行配置
- 下载hadoop二进制包 我选的版本是2.8.5的
cd /home
yum install -y tar wget which
wget http://mirror.bit.edu.cn/apache/hadoop/common/hadoop-2.8.5/hadoop-2.8.5.tar.gz
tar -zxvf hadoop-2.8.5.tar.gz
rm -rf hadoop-2.8.5.tar.gz
- yum安装jdk
yum -y install java-1.8.0-openjdk
yum -y install java-1.8.0-openjdk-devel
- 配置环境变量 ~/.bashrc
JAVA_HOME根据实际版本填写,因为我安装的版本可能和你不一样。
export JAVA_HOME=/usr/lib/jvm/java-1.8.0-openjdk-1.8.0.242.b07-1.el6_10.x86_64
export CLASSPATH=$CLASSPATH:$JAVA_HOME/lib
export PATH=$JAVA_HOME/bin:$PATH:$HOME/bin
export HADOOP_HOME=/home/hadoop-2.8.5
export PATH=$HADOOP_HOME/sbin:$HADOOP_HOME/bin:$PATH
export CLASSPATH=$HADOOP_HOME/lib:$CLASSPATH
source ~/.bashrc
- 修改core-site.xml
hadoop-namenode这台机器fs.defaultFS写0.0.0.0:9000其他节点写hadoop-namenode:9000!!!
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://0.0.0.0:9000</value>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>/tmp/hadoop-2.8.5</value>
</property>
<property>
<name>hadoop.proxyuser.hadoop.hosts</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.hadoop.groups</name>
<value>*</value>
</property>
</configuration>
- 修改hdfs-site.xml
<configuration>
<property>
<name>dfs.replication</name>
<value>3</value>
</property>
<property>
<name>dfs.namenode.secondary.http-address</name>
<value>hadoop-second-namenode:50090</value>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>/tmp/hadoop-2.8.5</value>
</property>
</configuration>
- 修改slaves
hadoop-namenode
hadoop-second-namenode
hadoop-datanode
- 修改yarn-site.xml
<configuration>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.resourcemanager.hostname</name>
<value>hadoop-datanode</value>
</property>
</configuration>
- 修改mapred-site.xml
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
</configuration>
4. 启动集群
进入hadoop-namenode容器
hdfs namenode -format
start-dfs.sh
然后查看hadoop-namenode端口情况比如我的
570b3fd120ae centos:6.6 "/bin/bash" 40 minutes ago Up 40 minutes 0.0.0.0:32945->9000/tcp, 0.0.0.0:32944->37764/tcp, 0.0.0.0:32943->38832/tcp, 0.0.0.0:32942->44026/tcp, 0.0.0.0:32941->50010/tcp, 0.0.0.0:32940->50020/tcp, 0.0.0.0:32939->50070/tcp, 0.0.0.0:32938->50075/tcp, 0.0.0.0:32937->50090/tcp, 0.0.0.0:32936->50470/tcp, 0.0.0.0:32935->50475/tcp hadoop-namenode
所以通过http://ip:32939 访问namenode
5. 坑记录
- docker中环境变量要写在~/.bashrc, 写在/etc/profile和~/.bash_profile重新进入容器是会失效的
- docker容器重新启动后,/etc/init.d/sshd start 需要重新执行别忘了。
网友评论