美文网首页
多机使用dockerswarm部署多个容器并行openmpi

多机使用dockerswarm部署多个容器并行openmpi

作者: 全村滴希望 | 来源:发表于2020-11-07 22:56 被阅读0次

    将镜像导出到node节点,并开启nfs

    docker save > openmpi.tar mrwangwei/centos-openmpi:v1
    scp openmpi.tar node1:/usr/local/src/code/
    scp openmpi.tar node2:/usr/local/src/code/
    docker load < openmpi.tar
    
    yum install nfs-utils -y
    
    vi /etc/exports
    /usr/local/src/code 192.168.220.0/16(rw,no_root_squash)
    systemctl restart rpcbind
    systemctl restart nfs
    

    初始化docker swarm环境

    docker swarm init --advertise-addr 192.168.220.132
    
    --------------------------------------------------------------------------
    Swarm initialized: current node (dxn1zf6l61qsb1josjja83ngz) is now a manager.
    
    To add a worker to this swarm, run the following command:
    
    docker swarm join \
    --token SWMTKN-1-49nj1cmql0jkz5s954yi3oex3nedyz0fb0xx14ie39trti4wxv-8vxv8rssmk743ojnwacrr2e7c \
    192.168.220.132:2377
    
    To add a manager to this swarm, run 'docker swarm join-token manager' and follow the instructions.
    --------------------------------------------------------------------------
    
    docker swarm join \
    --token SWMTKN-1-49nj1cmql0jkz5s954yi3oex3nedyz0fb0xx14ie39trti4wxv-8vxv8rssmk743ojnwacrr2e7c \
    192.168.220.132:2377
    
    docker node list
    

    使用docker-compose文件创建容器

    vi docker-compose.yaml
    
    version: "3.2"
    services:
      mpi_master:
        image: mrwangwei/centos-openmpi:v1
        command: /bin/bash -c "/usr/sbin/sshd -D"
        deploy:
          replicas: 1
          placement:
            constraints:
              - node.role == manager
        networks:
          - mpi_overlay
        volumes:
          - "mpi_code:/usr/local/src/code"
        ports:
         - "22"
    
      mpi_node:
        image: mrwangwei/centos-openmpi:v1
        command: /bin/bash -c "/usr/sbin/sshd -D"
        deploy:
          replicas: 2
          placement:
            constraints:
              - node.role == worker
        volumes:
          - "mpi_code:/usr/local/src/code"
        networks:
            - mpi_overlay
    
    networks:
      mpi_overlay:
    
    volumes:
      mpi_code:
        driver: local
        driver_opts:
          type: "nfs"
          o: "addr=192.168.220.132,rw"
          device: ":/usr/local/src/code"
    

    启动部署docker swarm

    docker stack deploy --compose-file docker-compose.yaml example
    
    docker service ls
    
    docker service ps --no-trunc {serviceName}
    
    docker exec -it /bin/bash
    source /etc/profile
    cd /usr/local/src/code/
    dig tasks.mpi_node | grep ^tasks|awk '{print $5}' > machines
    cat machines
    ssh-keygen -t rsa
    ssh-copy-id ...
    ssh-copy-id ...
    mpicc main.c
    mpirun -np 10 --oversubscribe --allow-run-as-root --machinefile machines --prefix /usr/local/openmpi a.out
    
    docker stack rm example
    

    main.c

    #include <stdio.h>
    #include <mpi.h>
    
    int main(int argc, char *argv[])
    {
            int myrank, nprocs;
            char name[10];
            int name_len;
            int i,j,k,sum=0;
            MPI_Init(&argc, &argv);
            MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
            MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
            MPI_Get_processor_name(name, &name_len);
            printf("core[%3d] of [%3d] in {%s},  in dir[ ~/托腮etMPI ]\n", myrank, nprocs, name);
            /* for(i =0 ; i<10000; i++) */
                    /* for(j=0 ;j< myrank ; j++) */
                            /* sum +=j; */
            /* printf("core[%3d], sum= %12d\n", myrank,sum ); */
    
            MPI_Finalize();
    
            return 0;
    }
    

    lammps测试
    docker-compose.yaml

    ```yaml
    version: "3"
    services:
      mpi_master:
        image: lammps-gpu:deepmd
        command: /bin/bash -c "/usr/sbin/sshd -D"
        deploy:
          replicas: 1
          placement:
            constraints:
              - node.role == manager
        networks:
          - mpi_overlay
        volumes:
          - "mpi_code:/root/lammps-data"
        working_dir: /root/lammps-data/
      
      mpi_node:
        image: lammps-gpu:deepmd
        command: /bin/bash -c "/usr/sbin/sshd -D"
        deploy:
          replicas: 2
          placement:
            constraints:
              - node.role == worker
        volumes:
          - "mpi_code:/root/lammps-data"
        networks:
            - mpi_overlay
    
    networks:
      mpi_overlay:
    
    volumes:
      mpi_code:
        driver: local
        driver_opts:
          type: "nfs"
          o: "addr=192.168.220.132,rw"
          device: ":/root/lammps-data"
    ```
    
    ```shell
    docker stack deploy --compose-file docker-compose.yaml lammps
    docker service ls
    # 进入master容器中进行配置
    source activate dpdev
    cd /root/lammps-data/CH.airebo
    yum -y install bind-utils
    dig tasks.mpi_node | grep ^tasks|awk '{print $5}' > machines
    ssh-keygen -t rsa -f ~/.ssh/id_rsa -P ''
    ssh-copy-id $(cat machines | awk 'NR==1')
    ssh-copy-id $(cat machines | awk 'NR==2')
    mpirun --machinefile machines -np 56 /opt/lammps-3Mar20/src/lmp_mpi -in opt.in
    mpirun --machinefile machines -np 28 /opt/lammps-3Mar20/src/lmp_mpi -sf gpu -pk gpu 2 -in gpu-opt.in
    ```
    

    https://github.com/moby/moby/issues/37855
    https://my.oschina.net/u/1787735/blog/4374958

    相关文章

      网友评论

          本文标题:多机使用dockerswarm部署多个容器并行openmpi

          本文链接:https://www.haomeiwen.com/subject/tnravktx.html