扩展docker数据池
关闭docker并删除docker数据
systemctl stop docker
rm -rf /var/lib/docker
创建新的docker数据池
mkdir -p /var/lib/docker/devicemapper/devicemapper
dd if=/dev/zero of=/var/lib/docker/devicemapper/devicemapper/data bs=1G count=0 seek=600
重新启动容器之后,再查看池大小
systemctl restart docker
dokcer info
拉取centos7作为基础镜像
docker pull centos:7
启动容器
这里将容器空间设置为20G,其实最后打包的镜像大小不到10G
docker run -it --storage-opt size=20G centos:7 /bin/bash
进入容器后,更换yum源并安装依赖
mv /etc/yum.repos.d/CentOS-Base.repo /etc/yum.repos.d/CentOS-Base.repo.backup
curl -o /etc/yum.repos.d/CentOS-Base.repo http://mirrors.aliyun.com/repo/Centos-7.repo
yum clean all
yum makecache
yum -y install gcc gcc-gfortran gcc-c++ flex tcsh zlib-devel \
bzip2-devel libXt-devel libXext-devel libXdmcp-devel \
tkinter openmpi openmpi-devel perl perl-ExtUtils-MakeMaker \
patch bison boost-devel libffi-devel
yum -y install which make
安装cuda
amber编译GPU版本需要使用cuda,这里采用rpm包安装cuda-9.2,需要到cuda官网下载rpm包,下载好后,使用docker cp [filename] [容器ID]:[容器目标目录]
命令将文件传输至容器内。
docker cp cuda-repo-rhel7-8-0-local-ga2-8.0.61-1.x86_64.rpm 7b99d8408200:/opt
安装cuda
rpm -i cuda-repo-rhel7-8-0-local-ga2-8.0.61-1.x86_64.rpm
yum clean all
yum install cuda
如果报如下错误:
Error: Package: 1:nvidia-kmod-375.26-2.el7.x86_64 (cuda-8-0-local-ga2) Requires: dkms You could try using --skip-broken to work around the problem You could try running: rpm -Va --nofiles --nodigest
解决办法:
缺少2个包,装第一个:vi /etc/yum.repos.d/linuxtech.testing.repo
输入:
[linuxtech-testing]
name=LinuxTECH Testing
baseurl=http://pkgrepo.linuxtech.net/el6/testing/
enabled=0
gpgcheck=1
gpgkey=http://pkgrepo.linuxtech.net/el6/release/RPM-GPG-KEY-LinuxTECH.NETyum --enablerepo=linuxtech-testing install libvdpau
装第二个:
yum -y install epel-release yum -y install --enablerepo=epel dkms
再次执行install安装cuda
安装python3
使用yum安装python3(采用编译安装pthon3会报错,目前未解决)
yum -y install python3
现在容器外将Amber18的两个安装包解压缩后,将解压后的amber18目录传入容器中
# 需要 bzip2
tar jxvf AmberTools18.tar.bz2
tar jxvf Amber18.tar.bz2
docker cp amber18/ 7b99d8408200:/opt
然后在容器内使用pip3安装编译amber需要的依赖
pip3 install -r /opt/amber18/AmberTools/src/python_requirement.txt
安装amber18
在~/.bashrc中添加环境变量
#AMBER
test -f /opt/amber18/amber.sh && source /opt/amber18/amber.sh
export AMBERHOME=/opt/amber18
#CUDA
export CUDA_HOME=/usr/local/cuda-9.2
export PATH=$PATH:$CUDA_HOME/bin
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$CUDA_HOME/lib64
#OPENMPI
export PATH=$PATH:/usr/lib64/openmpi/bin/
编译顺序为:串行版本 -> 并行版本 -> GPU版本
串行版本:
cd $AMBERHOME
./configure --with-python /usr/bin/python3 gnu
make install
make test
并行版本:
cd $AMBERHOME
./configure --with-python /usr/bin/python3 -mpi gnu
make install
export DO_PARALLEL="mpirun --allow-run-as-root -np 4"
./configure --with-python /usr/bin/python3 -openmp gnu
make openmp
GPU版本:
cd $AMBERHOME
./configure --with-python /usr/bin/python3 -cuda gnu
make install
./configure --with-python /usr/bin/python3 -cuda -mpi gnu
make install
# 此时test耗时很长,最终卡住无法进行,原因未知
make test.cuda
make test.cuda_parallel
打包镜像
退出容器后先将容器重启一下
docker restart 7b99d8408200
然后将容器打包成镜像,-a参数注明作者,-m参数注明提交时的说明文字
docker commit -a "mrwangwei" -m "amber18" 7b99d8408200 amber:18
将镜像打tag后上传至hub
docker login
docker tag amber:18 mrwangwei/amber:18
docker push mrwangwei/amber:18
dockerfile
# syntax=docker/dockerfile:experimental
###############################################################################
# CentOS 7, CUDA, INTEL MPI, and AMBER.
#
# Requirements:
# * amber18 directory in build directory with INTEL development kit.
#
# Build with:
# sudo docker build -t amber:18-cuda10.2 . \
# --build-arg IMPI_VERSION=impi_version \
# --build-arg INTEL_VERSION=intel_version
#
# Run with:
# export DOCKER_BUILDKIT=1
# sudo docker run -it \
# --mount type=bind,source=/path/to/build-amber/intel/,target=/opt/intel/ \
# amber:18-cuda10.2
#
# Before making the image you need two installation packages
# to get the amber18 installation directory.
# Use tar jxf AmberTools18.tar.bz2, tar jxf Amber18.tar.bz2
# two commands to decompress the two compressed files,
# then you need to copy the amber18 installation directory
# into the image file system in the dockerfile.
#
# To compile amber18 using the intel compiler,
# you must use the intel compiler version 12-17,
# and amber17 only supports the version before cuda-9.1,
# so you need to update amber before compiling,
# you cannot add the '--no-updates' parameter to the configure command
#
# Test with:
# sudo docker run -it \
# --mount type=bind,source=/path/to/build-amber/intel/,target=/opt/intel/ \
# amber:18-cuda10.2
# cd $AMBERHOME/test
# ./test_amber_serial.sh
# export DO_PARALLEL="mpirun -np $(nproc)"
# ./test_amber_parallel.sh
# ./test_amber_cuda_serial.sh
# ./test_amber_cuda_parallel.sh
###############################################################################
###############################################################################
# Build stage
###############################################################################
FROM nvidia/cuda:10.2-cudnn7-devel-centos7
# sofrware version
ARG IMPI_VERSION=2017.4.256
ARG INTEL_VERSION=2017.6.256
ARG INTEL_VERSION=2017.6.256
# set environment variable
ENV AMBERHOME /opt/amber18
ENV INTELLIB_HOME /opt/intel/compilers_and_libraries_${INTEL_VERSION}/linux/
ENV PATH $PATH:$MPI_HOME/bin
ENV LD_LIBRARY_PATH $LD_LIBRARY_PATH:$MPI_HOME/lib
ENV LD_LIBRARY_PATH $LD_LIBRARY_PATH:$INTELLIB_HOME/mkl/lib/intel64
ENV LD_LIBRARY_PATH $LD_LIBRARY_PATH:$INTELLIB_HOME/compiler/lib/intel64
ENV CUDA_HOME /usr/local/cuda-10.2
ENV PATH $PATH:$CUDA_HOME/bin
ENV LD_LIBRARY_PATH $LD_LIBRARY_PATH:$CUDA_HOME/lib64
# inject nvidia-smi tool and init nvidia devices needed by cuda
COPY ./nvidia-smi /usr/bin/
COPY init-nvidia-dev.sh /usr/local/bin/
RUN chmod +x /usr/bin/nvidia-smi && chmod +x /usr/local/bin/init-nvidia-dev.sh
# prepare the installation directory and install required packages
WORKDIR /opt
COPY ./amber18 /opt/amber18
RUN yum install -y flex tcsh zlib-devel bzip2-devel \
libXt-devel libXext-devel libXdmcp-devel perl patch \
&& yum install -y which make python3 \
&& pip3 install -r /opt/amber18/AmberTools/src/python_requirement.txt
# mount Intel compiler and build AMBER
RUN --mount=target=/opt/intel,type=bind,source=./intel \
. /opt/intel/bin/compilervars.sh intel64 \
&& test -f /opt/amber18/amber.sh && source /opt/amber18/amber.sh \
&& cd $AMBERHOME \
# build serial
&& echo 'y' | ./configure --with-python /usr/bin/python3 intel \
&& make clean && make install -j $(nproc) \
# build parallel (mpi and openmp)
&& ./configure --with-python /usr/bin/python3 -intelmpi intel \
&& make clean && make install -j $(nproc) \
&& ./configure --with-python /usr/bin/python3 -openmp intel \
&& make clean && make openmp -j $(nproc) \
# build cuda_serial and cuda_parallel
&& ./configure --with-python /usr/bin/python3 -cuda intel \
&& make clean && make install -j $(nproc) \
&& ./configure --with-python /usr/bin/python3 -cuda -intelmpi intel \
&& make clean && make install -j $(nproc) \
# set environment variable
&& echo "# AMBER" >> /etc/profile \
&& echo "test -f /opt/amber18/amber.sh && source /opt/amber18/amber.sh" >> /etc/profile \
&& echo "source /opt/intel/bin/compilervars.sh intel64" >> /etc/profile \
&& echo "source /etc/profile" >> /etc/bashrc
如果出现磁盘已满的情况,
vi /usr/lib/systemd/system/docker.service
在ExecStart=/usr/bin/dockerd -H fd:// --containerd=/run/containerd/containerd.sock后面添加如下参数: --graph /opt/docker/data
systemctl daemon-reload
systemctl restart docker
nvidia-smi tool
wget https://cn.download.nvidia.com/XFree86/Linux-x86_64/${VERSION_NAME}/NVIDIA-Linux-x86_64-${VERSION_NAME}.run
sh NVIDIA-Linux-x86_64-${VERSION_NAME}.run -x
cp nvidia-smi ../
init-nvidia-dev.sh
#!/bin/bash
touch init-nvidia-dev.log
ret=`nvidia-smi`
if [[ $ret -ne 0 ]]; then
echo " [$(date) ] run nvidia-smi command error" >> init-nvidia-dev.log
else
echo " [$(date) ] run nvidia-smi command success" >> init-nvidia-dev.log
fi
if [[ ! -c /dev/nvidia-uvm ]]; then
ret=`mknod -m 666 /dev/nvidia-uvm c 245 0`
if [[ $ret -ne 0 ]];then
echo " [$(date) ] mknod error" >> init-nvidia-dev.log
else
echo " [$(date) ] mknod success" >> init-nvidia-dev.log
fi
fi
网友评论