制作镜像
docker run -itd --name centos7 --privileged=true centos:7 /usr/sbin/init
docker exec -it centos7 /bin/bash
yum install -y wget make gcc gcc-c++ perl bind-utils openssl openssh-server openssh-clinets
cd /usr/local/src
wget https://download.open-mpi.org/release/open-mpi/v3.1/openmpi-3.1.0.tar.gz
tar -zxvf openmpi-3.1.0.tar.gz
cd openmpi-3.1.0/
./configure --prefix="/usr/local/openmpi"
make && make install
vi /etc/profile
# OPENMPI
export PATH=$PATH:/usr/local/openmpi/bin
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/openmpi/lib
source /etc/profile
cd /usr/local/src/openmpi-3.1.0/examples
make
passwd
systemctl restart sshd
docker commit centos7 mrwangwei/centos-openmpi:v1
docker login
docker push mrwangwei/centos-openmpi:v1
手动创建容器
docker network create --subnet=192.168.10.0/16 my_network
docker network ls
docker run -itd --name master -h master --ip 192.168.10.30 --net my_network --add-host node1:192.168.10.31 --add-host node2:192.168.10.32 --privileged=true mrwangwei/centos-openmpi:v1
docker run -itd --name node1 -h node1 --ip 192.168.10.31 --net my_network --add-host master:192.168.10.30 --add-host node2:192.168.10.32 --privileged=true mrwangwei/centos-openmpi:v1
docker run -itd --name node2 -h node2 --ip 192.168.10.32 --net my_network --add-host master:192.168.10.30 --add-host node1:192.168.10.31 --privileged=true mrwangwei/centos-openmpi:v1
使用docker-compose创建容器
version: '3'
services:
master:
image: mrwangwei/centos-openmpi:v1
container_name: master
restart: always
tty: true
privileged: true
networks:
my_network:
ipv4_address: 192.168.10.132
extra_hosts:
- "master:192.168.10.132"
- "node1:192.168.10.133"
- "node2:192.168.10.134"
node1:
image: mrwangwei/centos-openmpi:v1
container_name: node1
restart: always
tty: true
privileged: true
networks:
my_network:
ipv4_address: 192.168.10.133
extra_hosts:
- "master:192.168.10.132"
- "node1:192.168.10.133"
- "node2:192.168.10.134"
node2:
image: mrwangwei/centos-openmpi:v1
container_name: node2
restart: always
tty: true
privileged: true
networks:
my_network:
ipv4_address: 192.168.10.134
extra_hosts:
- "master:192.168.10.132"
- "node1:192.168.10.133"
- "node2:192.168.10.134"
networks:
my_network:
ipam:
config:
- subnet: 192.168.10.0/16
配置ssh无密码访问并运行mpi测试程序
docker exec -it master /bin/bash
ssh-keygen -t rsa
ssh-copy-id node1
ssh-copy-id node2
source /etc/profile
vi machines
master
node1
node2
mpirun -np 10 --oversubscribe --allow-run-as-root --machinefile machines --prefix /usr/local/openmpi hello_c
question1:
--------------------------------------------------------------------------
There are not enough slots available in the system to satisfy the 10 slots
that were requested by the application:
hello_c
Either request fewer slots for your application, or make more slots available
for use.
--------------------------------------------------------------------------
--oversubscribe 超线程
question2:
--------------------------------------------------------------------------
mpirun has detected an attempt to run as root.
Running as root is *strongly* discouraged as any mistake (e.g., in
defining TMPDIR) or bug can result in catastrophic damage to the OS
file system, leaving your system in an unusable state.
We strongly suggest that you run mpirun as a non-root user.
You can override this protection by adding the --allow-run-as-root
option to your command line. However, we reiterate our strong advice
against doing so - please do so at your own risk.
--------------------------------------------------------------------------
--allow-run-as-root 允许root跑
question3:
--machinefile 和 --hostfile 同义,用于指定host文件
host文件内容
master slots=2
node1 slots=2
node2 slots=2
slots表示每个节点需要使用的核心数,如果加上slots 那么必须使得slots的总和与-np的总核心数相等,否则slots不生效。
question4:
bash: orted: command not found
--------------------------------------------------------------------------
ORTE was unable to reliably start one or more daemons.
This usually is caused by:
* not finding the required libraries and/or binaries on
one or more nodes. Please check your PATH and LD_LIBRARY_PATH
settings, or configure OMPI with --enable-orterun-prefix-by-default
* lack of authority to execute on one or more specified nodes.
Please verify your allocation and authorities.
* the inability to write startup files into /tmp (--tmpdir/orte_tmpdir_base).
Please check with your sys admin to determine the correct location to use.
* compilation of the orted with dynamic libraries when static are required
(e.g., on Cray). Please check your configure cmd line and consider using
one of the contrib/platform definitions for your system type.
* an inability to create a connection back to mpirun due to a
lack of common network interfaces and/or no route found between
them. Please check network connectivity (including firewalls
and network routing requirements).
--------------------------------------------------------------------------
--prefix /usr/local/openmpi 需要指定远程节点的openmpi目录
FROM centos:7
MAINTAINER mrwangwei
ENV PATH $PATH:/usr/local/openmpi/bin
ENV LD_LIBRARY_PATH $LD_LIBRARY_PATH:/usr/local/openmpi/lib
RUN yum install -y wget make gcc gcc-c++ perl bind-utils openssl openssh-server openssh-clients \
&& cd /usr/local/src \
&& wget https://download.open-mpi.org/release/open-mpi/v3.1/openmpi-3.1.0.tar.gz \
&& tar -zxvf openmpi-3.1.0.tar.gz \
&& cd openmpi-3.1.0/ \
&& ./configure --prefix="/usr/local/openmpi" \
&& make -j $(nproc) install
RUN cd /usr/local/src/openmpi-3.1.0/examples \
&& make \
&& cp -r /usr/local/src/openmpi-3.1.0/examples /root/mpi_hello_world \
&& rm -rf /usr/local/src/* \
&& echo "root:1234qwer" | chpasswd \
&& ssh-keygen -t rsa -b 2048 -f /etc/ssh/ssh_host_rsa_key \
&& ssh-keygen -t ecdsa -f /etc/ssh/ssh_host_ecdsa_key \
&& ssh-keygen -t dsa -f /etc/ssh/ssh_host_ed25519_key \
&& mkdir -p /var/run/sshd
CMD ["/usr/sbin/sshd", "-D"]
###############################################################################
# nvidia/cuda:10.0, OPENMPI 3.1.0, and OSU MPI Benchmarks
#
# Build with:
# sudo docker build -t openmpi:3.1.0 . --build-arg ROOT_PASSWD=root_passwd \
#
# Run with:
# sudo docker run -it openmpi:3.1.0
# mpirun --allow-run-as-root --prefix /usr/local/openmpi -H node1,node2 -np 2 osu_latency
#
###############################################################################
###############################################################################
# Build stage
###############################################################################
FROM nvidia/cuda:10.0-cudnn7-devel-centos7
MAINTAINER mrwangwei
ARG ROOT_PASSWD=1234qwer
WORKDIR /opt
# Install required packages
RUN yum-config-manager --disable cuda && \
yum-config-manager --disable nvidia-ml && \
yum install -y wget make gcc gcc-c++ perl bind-utils openssl openssh-server openssh-clients
# Install openmpi
RUN cd /opt && curl -o openmpi-3.1.0.tar.gz https://download.open-mpi.org/release/open-mpi/v3.1/openmpi-3.1.0.tar.gz && \
tar -zxvf /opt/openmpi-3.1.0.tar.gz && cd /opt/openmpi-3.1.0 && \
./configure && make -j $(nproc) install && \
cd /opt/openmpi-3.1.0/examples && make && \
cp -r /opt/openmpi-3.1.0/examples /root/mpi_hello_world && \
rm -rf /opt/openmpi-3.1.0 /opt/openmpi-3.1.0.tar.gz
# Install OSU MPI Benchmarks
RUN cd /opt && wget http://mvapich.cse.ohio-state.edu/download/mvapich/osu-micro-benchmarks-5.7.tar.gz && \
tar -zxvf /opt/osu-micro-benchmarks-5.7.tar.gz && cd /opt/osu-micro-benchmarks-5.7 && \
./configure CC=mpicc CXX=mpicxx --enable-cuda --with-cuda-include=/usr/local/cuda/include --with-cuda-libpath=/usr/local/cuda/lib64 && \
make && make install
# start sshd
RUN echo "root:${ROOT_PASSWD}" | chpasswd && \
ssh-keygen -t rsa -b 2048 -f /etc/ssh/ssh_host_rsa_key && \
ssh-keygen -t ecdsa -f /etc/ssh/ssh_host_ecdsa_key && \
ssh-keygen -t dsa -f /etc/ssh/ssh_host_ed25519_key && \
mkdir -p /var/run/sshd
CMD ["/usr/sbin/sshd", "-D"]
网友评论