美文网首页
单机使用docker部署多个容器并行openmpi

单机使用docker部署多个容器并行openmpi

作者: 全村滴希望 | 来源:发表于2020-11-03 16:12 被阅读0次

    制作镜像

    docker run -itd --name centos7 --privileged=true centos:7 /usr/sbin/init
    docker exec -it centos7 /bin/bash
    yum install -y wget make gcc gcc-c++ perl bind-utils openssl openssh-server openssh-clinets
    
    cd /usr/local/src
    wget https://download.open-mpi.org/release/open-mpi/v3.1/openmpi-3.1.0.tar.gz
    tar -zxvf openmpi-3.1.0.tar.gz
    cd openmpi-3.1.0/
    ./configure --prefix="/usr/local/openmpi"
    make && make install
    
    vi /etc/profile
    # OPENMPI
    export PATH=$PATH:/usr/local/openmpi/bin
    export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/openmpi/lib
    source /etc/profile
    cd /usr/local/src/openmpi-3.1.0/examples
    make
    
    passwd
    systemctl restart sshd
    
    
    docker commit centos7 mrwangwei/centos-openmpi:v1
    docker login
    docker push mrwangwei/centos-openmpi:v1
    

    手动创建容器

    docker network create --subnet=192.168.10.0/16 my_network
    docker network ls
    docker run -itd --name master -h master --ip 192.168.10.30 --net my_network --add-host node1:192.168.10.31 --add-host node2:192.168.10.32 --privileged=true mrwangwei/centos-openmpi:v1
    docker run -itd --name node1 -h node1 --ip 192.168.10.31 --net my_network  --add-host master:192.168.10.30 --add-host node2:192.168.10.32 --privileged=true mrwangwei/centos-openmpi:v1
    docker run -itd --name node2 -h node2 --ip 192.168.10.32 --net my_network  --add-host master:192.168.10.30 --add-host node1:192.168.10.31 --privileged=true mrwangwei/centos-openmpi:v1
    

    使用docker-compose创建容器

    version: '3'
    services:
       master:
          image: mrwangwei/centos-openmpi:v1
          container_name: master
          restart: always
          tty: true
          privileged: true
          networks:
             my_network:
                ipv4_address: 192.168.10.132
          extra_hosts:
             - "master:192.168.10.132"
             - "node1:192.168.10.133"
             - "node2:192.168.10.134"
       node1:
          image: mrwangwei/centos-openmpi:v1
          container_name: node1
          restart: always
          tty: true
          privileged: true
          networks:
             my_network:
                ipv4_address: 192.168.10.133
          extra_hosts:
             - "master:192.168.10.132"
             - "node1:192.168.10.133"
             - "node2:192.168.10.134"
       node2:
          image: mrwangwei/centos-openmpi:v1
          container_name: node2
          restart: always
          tty: true
          privileged: true
          networks:
             my_network:
                ipv4_address: 192.168.10.134
          extra_hosts:
             - "master:192.168.10.132"
             - "node1:192.168.10.133"
             - "node2:192.168.10.134"
    networks:
       my_network:
          ipam:
             config:
             - subnet: 192.168.10.0/16
    

    配置ssh无密码访问并运行mpi测试程序

    docker exec -it master /bin/bash
    ssh-keygen -t rsa
    ssh-copy-id node1
    ssh-copy-id node2
    
    source /etc/profile
    
    vi machines
    master
    node1
    node2
    
    mpirun -np 10 --oversubscribe --allow-run-as-root --machinefile machines --prefix /usr/local/openmpi hello_c
    

    question1:

    --------------------------------------------------------------------------
    There are not enough slots available in the system to satisfy the 10 slots
    that were requested by the application:
      hello_c
    
    Either request fewer slots for your application, or make more slots available
    for use.
    --------------------------------------------------------------------------
    

    --oversubscribe 超线程

    question2:

    --------------------------------------------------------------------------
    mpirun has detected an attempt to run as root.
    
    Running as root is *strongly* discouraged as any mistake (e.g., in
    defining TMPDIR) or bug can result in catastrophic damage to the OS
    file system, leaving your system in an unusable state.
    
    We strongly suggest that you run mpirun as a non-root user.
    
    You can override this protection by adding the --allow-run-as-root
    option to your command line.  However, we reiterate our strong advice
    against doing so - please do so at your own risk.
    --------------------------------------------------------------------------
    

    --allow-run-as-root 允许root跑

    question3:
    --machinefile 和 --hostfile 同义,用于指定host文件
    host文件内容

    master    slots=2
    node1     slots=2
    node2     slots=2
    

    slots表示每个节点需要使用的核心数,如果加上slots 那么必须使得slots的总和与-np的总核心数相等,否则slots不生效。

    question4:

    bash: orted: command not found
    --------------------------------------------------------------------------
    ORTE was unable to reliably start one or more daemons.
    This usually is caused by:
    
    * not finding the required libraries and/or binaries on
      one or more nodes. Please check your PATH and LD_LIBRARY_PATH
      settings, or configure OMPI with --enable-orterun-prefix-by-default
    
    * lack of authority to execute on one or more specified nodes.
      Please verify your allocation and authorities.
    
    * the inability to write startup files into /tmp (--tmpdir/orte_tmpdir_base).
      Please check with your sys admin to determine the correct location to use.
    
    *  compilation of the orted with dynamic libraries when static are required
      (e.g., on Cray). Please check your configure cmd line and consider using
      one of the contrib/platform definitions for your system type.
    
    * an inability to create a connection back to mpirun due to a
      lack of common network interfaces and/or no route found between
      them. Please check network connectivity (including firewalls
      and network routing requirements).
    --------------------------------------------------------------------------
    

    --prefix /usr/local/openmpi 需要指定远程节点的openmpi目录

    FROM centos:7
    
    MAINTAINER mrwangwei
    ENV PATH $PATH:/usr/local/openmpi/bin
    ENV LD_LIBRARY_PATH $LD_LIBRARY_PATH:/usr/local/openmpi/lib
    RUN  yum install -y wget make gcc gcc-c++ perl bind-utils openssl openssh-server openssh-clients \
         && cd /usr/local/src \
         && wget https://download.open-mpi.org/release/open-mpi/v3.1/openmpi-3.1.0.tar.gz \
         && tar -zxvf openmpi-3.1.0.tar.gz \
         && cd openmpi-3.1.0/ \
         && ./configure --prefix="/usr/local/openmpi" \
         &&  make -j $(nproc) install 
    RUN  cd /usr/local/src/openmpi-3.1.0/examples \
         && make \
         && cp -r /usr/local/src/openmpi-3.1.0/examples /root/mpi_hello_world \
         && rm -rf /usr/local/src/* \
         && echo "root:1234qwer" | chpasswd \
         && ssh-keygen -t rsa -b 2048 -f /etc/ssh/ssh_host_rsa_key \
         && ssh-keygen -t ecdsa -f /etc/ssh/ssh_host_ecdsa_key \
         && ssh-keygen -t dsa -f /etc/ssh/ssh_host_ed25519_key \
         && mkdir -p /var/run/sshd
    CMD ["/usr/sbin/sshd", "-D"]
    
    ###############################################################################
    # nvidia/cuda:10.0, OPENMPI 3.1.0, and OSU MPI Benchmarks
    #
    # Build with:
    # sudo docker build -t openmpi:3.1.0 . --build-arg ROOT_PASSWD=root_passwd \
    #
    # Run with:
    # sudo docker run -it openmpi:3.1.0
    # mpirun --allow-run-as-root --prefix /usr/local/openmpi -H node1,node2 -np 2 osu_latency
    #
    ###############################################################################
    
    ###############################################################################
    # Build stage
    ###############################################################################
    FROM nvidia/cuda:10.0-cudnn7-devel-centos7
    
    MAINTAINER mrwangwei
    
    ARG ROOT_PASSWD=1234qwer
    
    WORKDIR /opt
    
    # Install required packages
    RUN yum-config-manager --disable cuda && \
        yum-config-manager --disable nvidia-ml && \
        yum install -y wget make gcc gcc-c++ perl bind-utils openssl openssh-server openssh-clients
    
    # Install openmpi
    RUN cd /opt && curl -o openmpi-3.1.0.tar.gz https://download.open-mpi.org/release/open-mpi/v3.1/openmpi-3.1.0.tar.gz && \
        tar -zxvf /opt/openmpi-3.1.0.tar.gz && cd /opt/openmpi-3.1.0 && \
        ./configure && make -j $(nproc) install && \
        cd /opt/openmpi-3.1.0/examples && make && \
        cp -r /opt/openmpi-3.1.0/examples /root/mpi_hello_world && \
        rm -rf /opt/openmpi-3.1.0 /opt/openmpi-3.1.0.tar.gz
    
    # Install OSU MPI Benchmarks
    RUN cd /opt && wget http://mvapich.cse.ohio-state.edu/download/mvapich/osu-micro-benchmarks-5.7.tar.gz && \
        tar -zxvf /opt/osu-micro-benchmarks-5.7.tar.gz && cd /opt/osu-micro-benchmarks-5.7 && \
        ./configure CC=mpicc CXX=mpicxx --enable-cuda --with-cuda-include=/usr/local/cuda/include --with-cuda-libpath=/usr/local/cuda/lib64 && \
        make && make install
    
    # start sshd
    RUN echo "root:${ROOT_PASSWD}" | chpasswd && \
        ssh-keygen -t rsa -b 2048 -f /etc/ssh/ssh_host_rsa_key && \
        ssh-keygen -t ecdsa -f /etc/ssh/ssh_host_ecdsa_key && \
        ssh-keygen -t dsa -f /etc/ssh/ssh_host_ed25519_key && \
        mkdir -p /var/run/sshd
    
    CMD ["/usr/sbin/sshd", "-D"]
    
    

    相关文章

      网友评论

          本文标题:单机使用docker部署多个容器并行openmpi

          本文链接:https://www.haomeiwen.com/subject/mwxivktx.html