美文网首页
安装IB驱动

安装IB驱动

作者: 深度学习dlai | 来源:发表于2019-03-28 13:34 被阅读0次

环境

ubuntu 16.04
Mellanox Technologies MT26428 [ConnectX VPI PCIe 2.0 5GT/s - IB QDR / 10GigE] (rev b0)

omnisky@omnisky:~$ lspci | grep Mell    
01:00.0 InfiniBand: Mellanox Technologies MT26428 [ConnectX VPI PCIe 2.0 5GT/s - IB QDR / 10GigE] (rev b0)

下载驱动

http://cn.mellanox.com/page/products_dyn?product_family=26&mtag=linux_sw_drivers

安装驱动

官方文档:http://120.52.51.16/www.mellanox.com/related-docs/prod_software/Mellanox_OFED_Linux_User_Manual_v4_5.pdf

ISO格式

挂载镜像
mount -o ro,loop MLNX_OFED_LINUX-2.1-1.0.0-rhel6.4-x86_64.iso /mnt
在/mnt目录下执行如下命令:
./mlnxofedinstall

tgz格式

tar -zxvf MLNX_OFED_LINUX-4.5-1.0.1.0-ubuntu16.04-x86_64.tgz
# 进入
./mlnxofedinstall --force
#安装完毕,加载驱动
/etc/init.d/openibd restart

配置IB

成功安装IB驱动之后,使用命令ifocnfig,可以看见ib0
vi /etc/network/interfaces,配置IB的IP


重启网络
ifconfig 
# 显示
ib0       Link encap:UNSPEC  HWaddr A0-00-02-20-FE-80-00-00-00-00-00-00-00-00-00-00  
          inet addr:11.11.11.11  Bcast:11.11.11.255  Mask:255.255.255.0
          inet6 addr: fe80::202:c903:52:bdd9/64 Scope:Link
          UP BROADCAST RUNNING MULTICAST  MTU:2044  Metric:1
          RX packets:31 errors:0 dropped:0 overruns:0 frame:0
          TX packets:47 errors:0 dropped:0 overruns:0 carrier:0
          collisions:0 txqueuelen:256 
          RX bytes:3783 (3.7 KB)  TX bytes:5153 (5.1 KB)

启动相关服务

# 主节点启动
/etc/init.d/openibd restart
/etc/init.d/opensmd restart
# 其他节点
/etc/init.d/openibd restart

如果配置成功,则如下所示

root@master:/home/omnisky# ibstat
CA 'mlx4_0'
    CA type: MT26428
    Number of ports: 1
    Firmware version: 2.9.1200
    Hardware version: b0
    Node GUID: 0x0002c9030052bdd8
    System image GUID: 0x0002c9030052bddb
    Port 1:
        State: Active
        Physical state: LinkUp
        Rate: 40
        Base lid: 1
        LMC: 0
        SM lid: 1
        Capability mask: 0x0251086a
        Port GUID: 0x0002c9030052bdd9
        Link layer: InfiniBand

测试IB

参考:https://community.mellanox.com/s/article/how-to-create-a-docker-container-with-rdma-accelerated-applications-over-100gb-infiniband-network#jive_content_id_Equipment

# server 
ib_write_bw -a -d mlx4_0
# client
ib_write_bw -a -F $server_IP -d mlx4_0 --report_gbits
# mlx4_0 通过ibstat查询

如果正常

[root@mofed-test-pod1 /]# ib_write_bw -a -F 10.244.1.171 -d mlx4_0 --report_gbits
---------------------------------------------------------------------------------------
                    RDMA_Write BW Test
 Dual-port       : OFF      Device         : mlx4_0
 Number of qps   : 1        Transport type : IB
 Connection type : RC       Using SRQ      : OFF
 TX depth        : 128
 CQ Moderation   : 100
 Mtu             : 4096[B]
 Link type       : IB
 Max inline data : 0[B]
 rdma_cm QPs     : OFF
 Data ex. method : Ethernet
---------------------------------------------------------------------------------------
 local address: LID 0x01 QPN 0x0218 PSN 0xa65d9c RKey 0x001100 VAddr 0x007f392cbec000
 remote address: LID 0x03 QPN 0x0218 PSN 0xdad5f RKey 0x001100 VAddr 0x007ff453bc3000
---------------------------------------------------------------------------------------
 #bytes     #iterations    BW peak[Gb/sec]    BW average[Gb/sec]   MsgRate[Mpps]
 2          5000           0.054750            0.052885            3.305343
 4          5000             0.15               0.13           4.134340
 8          5000             0.30               0.30           4.673526
 16         5000             0.60               0.60           4.654721
 32         5000             1.20               1.18           4.594563
 64         5000             2.39               2.32           4.526180
 128        5000             4.79               4.78           4.669972
 256        5000             9.56               9.54           4.658179
 512        5000             19.00              18.76          4.580314
 1024       5000             23.97              23.93          2.921618
 2048       5000             25.29              25.23          1.539684
 4096       5000             26.32              26.31          0.803058
 8192       5000             26.85              26.84          0.409597
 16384      5000             27.09              27.09          0.206665
 32768      5000             27.21              27.21          0.103811
 65536      5000             27.28              27.28          0.052026
 131072     5000             27.37              27.28          0.026016
 262144     5000             27.28              27.28          0.013009
 524288     5000             27.38              27.30          0.006509
 1048576    5000             27.34              27.33          0.003258
 2097152    5000             27.34              27.33          0.001629
 4194304    5000             27.33              27.32          0.000814
 8388608    5000             27.32              27.32          0.000407
---------------------------------------------------------------------------------------

k8s rdma插件安装以及测试

环境

root@master:/home/omnisky# kubectl get nodes -o wide
NAME      STATUS   ROLES    AGE   VERSION   INTERNAL-IP       EXTERNAL-IP   OS-IMAGE             KERNEL-VERSION      CONTAINER-RUNTIME
master    Ready    master   48m   v1.13.3   192.168.207.122   <none>        Ubuntu 16.04.5 LTS   4.15.0-45-generic   docker://18.6.1
omnisky   Ready    <none>   46m   v1.13.3   192.168.207.124   <none>        Ubuntu 16.04.5 LTS   4.15.0-45-generic   docker://18.6.1

安装插件

git clone https://github.com/Mellanox/k8s-rdma-sriov-dev-plugin.git
# 进入该文件夹
# Create config map to describe mode as "hca" mode. This is per node configuration.
kubectl create -f example/hca/rdma-hca-node-config.yaml
# Deploy device plugin
kubectl create -f example/device-plugin.yaml

启动容器测试

# Create test pod which requests 1 vhca resource.
kubectl create -f example/hca/test-hca-pod.yaml

复制文件test-hca-pod.yaml
修改文件name,同时指定nodeName,确保pod运行在不同节点

apiVersion: v1
kind: Pod
metadata:
  name: mofed-test-pod1
spec:
  restartPolicy: OnFailure
  nodeName: master
  containers:
  - image: mellanox/centos_7_4_mofed_4_2_1_2_0_0_60
    name: mofed-test-ctr
    securityContext:
      capabilities:
        add: [ "IPC_LOCK" ]
    resources:
      limits:
        rdma/hca: 1
    command:
    - sh
    - -c
    - |
      ls -l /dev/infiniband /sys/class/net
      sleep 1000000
kubectl create -f example/hca/test_custom.yaml
# 两个不同节点的pods
root@master:/home/omnisky/ty/k8s-rdma-sriov-dev-plugin/example/hca# kubectl get pods -o wide
NAME              READY   STATUS    RESTARTS   AGE   IP             NODE      NOMINATED NODE   READINESS GATES
mofed-test-pod    1/1     Running   0          49m   10.244.1.171   omnisky   <none>           <none>
mofed-test-pod1   1/1     Running   0          49m   10.244.0.4     master    <none>           <none>

分别进入两个容器
kubectl exec -it pod_name bash
ibstat的结果正常
一个做服务端,一个做客户端,测试

# server 
ib_write_bw -a -d mlx4_0
# client
ib_write_bw -a -F $server_IP -d mlx4_0 --report_gbits
# mlx4_0 通过ibstat查询

相关文章

网友评论

      本文标题:安装IB驱动

      本文链接:https://www.haomeiwen.com/subject/pfkxyqtx.html