1. ip netns
ip netns
用来管理network namespace
.
------------------------------terminal 01-----------------------------------
root@nicktming:~# ip netns help
Usage: ip netns list
ip netns add NAME
ip netns delete NAME
ip netns identify PID
ip netns pids NAME
ip netns exec NAME cmd ...
ip netns monitor
// 增加一个network namespace : neta
root@nicktming:~# ls -l /var/run/netns
ls: cannot access /var/run/netns: No such file or directory
root@nicktming:~# ip netns add neta
root@nicktming:~# ls -l /var/run/netns
total 0
-r--r--r-- 1 root root 0 May 2 13:16 neta
root@nicktming:~# ip netns list
neta
// 进入到neta namespace
root@nicktming:~# ip netns exec neta sh
# ifconfig
// 查看某个pid的namespace
# echo $$
28033
# ip netns identify 28033
neta
// 查看neta namespace下的所有进程pid
# ip netns pids neta
28033
28262
# exit
// 打开第二个terminal
------------------------------terminal 02-----------------------------------
root@nicktming:~# ip netns monitor
------------------------------terminal 01-----------------------------------
root@nicktming:~# ip netns delete neta
root@nicktming:~# ls -l /var/run/netns
total 0
root@nicktming:~# ip netns list
root@nicktming:~#
------------------------------terminal 02-----------------------------------
root@nicktming:~# ip netns monitor
delete neta
手动创建
network namespace
root@nicktming:~# readlink /proc/$$/net/ns
root@nicktming:~# readlink /proc/$$/ns/net
net:[4026531956]
// 查看当前network namespace
root@nicktming:~# ls -l /var/run/netns/
total 0
// 主动创建一个neta
root@nicktming:~# touch /var/run/netns/neta
root@nicktming:~# ls -i /var/run/netns/neta
4668465 /var/run/netns/neta
root@nicktming:~# ip netns list
neta
// 当准备进入到neta namespace时显示错误
// 表明仅仅在/var/run/netns文件夹下面创建一个文件并不能代表创建了一个network namespace
// 需要增加一些配置
root@nicktming:~# ip netns exec neta sh
seting the network namespace "neta" failed: Invalid argument
// 利用unshare创建一个network隔离的进程sh
root@nicktming:~# unshare -n sh
# ifconfig
# ip addr
1: lo: <LOOPBACK> mtu 65536 qdisc noop state DOWN group default
link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
# ip link
1: lo: <LOOPBACK> mtu 65536 qdisc noop state DOWN mode DEFAULT group default
link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
# echo $$
31802
// 查看当前network namespace的信息
# readlink /proc/$$/ns/net
net:[4026532167]
// 将该network与之前创建的文件做绑定
# mount --bind /proc/$$/ns/net /var/run/netns/neta
// 可以看到neta文件的inode与network namespace的一致了
# ls -i /var/run/netns/neta
4026532167 /var/run/netns/neta
# exit
// 绑定后重新进入neta namespace
root@nicktming:~# ip netns exec neta sh
# ip netns pids neta
32210
32264
# exit
root@nicktming:~#
上面一系列操作相当于
ip netns add neta
.
2 veth
Veth是成对出现的虚拟网络设备, 发送到
Veth
一端虚拟设备的请求会从另一端的虚拟设备中发出.
2.1 veth 在同一个network namespace通信
root@nicktming:~# ip link add veth0 type veth peer name veth1
root@nicktming:~# ip addr add 192.168.2.11/24 dev veth0
root@nicktming:~# ip addr add 192.168.2.12/24 dev veth1
root@nicktming:~# ip link set veth0 up
root@nicktming:~# ip link set veth1 up
// 如果ping不通可以修改如下配置
root@nicktming:~# echo 0 > /proc/sys/net/ipv4/conf/all/rp_filter
root@nicktming:~# echo 0 > /proc/sys/net/ipv4/conf/veth0/rp_filter
root@nicktming:~# echo 0 > /proc/sys/net/ipv4/conf/veth1/rp_filter
// 当前主机路由
root@nicktming:~# route -n
Kernel IP routing table
Destination Gateway Genmask Flags Metric Ref Use Iface
// 默认网关 172.19.16.1
0.0.0.0 172.19.16.1 0.0.0.0 UG 0 0 0 eth0
// docker的路由
172.17.0.0 0.0.0.0 255.255.0.0 U 0 0 0 docker0
// 主机内网的网络走eth0
172.19.16.0 0.0.0.0 255.255.240.0 U 0 0 0 eth0
// 访问192.168.2.0/24网络 走veth0, veth1
192.168.2.0 0.0.0.0 255.255.255.0 U 0 0 0 veth0
192.168.2.0 0.0.0.0 255.255.255.0 U 0 0 0 veth1
// 通过veth0 来ping veth1
root@nicktming:~# ping -c 5 192.168.2.12 -I veth0
PING 192.168.2.12 (192.168.2.12) from 192.168.2.11 veth0: 56(84) bytes of data.
64 bytes from 192.168.2.12: icmp_seq=1 ttl=64 time=0.027 ms
64 bytes from 192.168.2.12: icmp_seq=2 ttl=64 time=0.039 ms
64 bytes from 192.168.2.12: icmp_seq=3 ttl=64 time=0.038 ms
64 bytes from 192.168.2.12: icmp_seq=4 ttl=64 time=0.041 ms
64 bytes from 192.168.2.12: icmp_seq=5 ttl=64 time=0.041 ms
--- 192.168.2.12 ping statistics ---
5 packets transmitted, 5 received, 0% packet loss, time 3997ms
rtt min/avg/max/mdev = 0.027/0.037/0.041/0.006 ms
root@nicktming:~#
veth-0.png
2.2 veth 在不同的network namespace通信
root@nicktming:~# ip link set veth0 netns ns1
root@nicktming:~# ip link set veth1 netns ns2
// 配置ns1
root@nicktming:~# ip netns exec ns1 ip link
1: lo: <LOOPBACK> mtu 65536 qdisc noop state DOWN mode DEFAULT group default
link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
5: veth0: <BROADCAST,MULTICAST> mtu 1500 qdisc noop state DOWN mode DEFAULT group default qlen 1000
link/ether be:eb:f2:ee:19:0b brd ff:ff:ff:ff:ff:ff
root@nicktming:~# ip netns exec ns1 ifconfig
root@nicktming:~# ip netns exec ns1 ip addr add 192.168.2.11/24 dev veth0
root@nicktming:~# ip netns exec ns1 ip link set veth0 up
root@nicktming:~# ip netns exec ns1 ifconfig
veth0 Link encap:Ethernet HWaddr be:eb:f2:ee:19:0b
inet addr:192.168.2.11 Bcast:0.0.0.0 Mask:255.255.255.0
UP BROADCAST MULTICAST MTU:1500 Metric:1
RX packets:1 errors:0 dropped:0 overruns:0 frame:0
TX packets:15 errors:0 dropped:0 overruns:0 carrier:0
collisions:0 txqueuelen:1000
RX bytes:42 (42.0 B) TX bytes:1078 (1.0 KB)
root@nicktming:~#
// 配置ns2
root@nicktming:~# ip netns exec ns2 ifconfig
root@nicktming:~# ip netns exec ns2 ip addr add 192.168.2.12/24 dev veth1
root@nicktming:~# ip netns exec ns2 ip link set veth1 up
root@nicktming:~# ip netns exec ns2 ifconfig
veth1 Link encap:Ethernet HWaddr 5e:74:92:21:47:42
inet addr:192.168.2.12 Bcast:0.0.0.0 Mask:255.255.255.0
inet6 addr: fe80::5c74:92ff:fe21:4742/64 Scope:Link
UP BROADCAST RUNNING MULTICAST MTU:1500 Metric:1
RX packets:21 errors:0 dropped:0 overruns:0 frame:0
TX packets:7 errors:0 dropped:0 overruns:0 carrier:0
collisions:0 txqueuelen:1000
RX bytes:1586 (1.5 KB) TX bytes:550 (550.0 B)
root@nicktming:~#
// 从ns1中ping ns2中的网络
root@nicktming:~# ip netns exec ns1 ping -c 2 192.168.2.12
PING 192.168.2.12 (192.168.2.12) 56(84) bytes of data.
64 bytes from 192.168.2.12: icmp_seq=1 ttl=64 time=0.045 ms
64 bytes from 192.168.2.12: icmp_seq=2 ttl=64 time=0.041 ms
--- 192.168.2.12 ping statistics ---
2 packets transmitted, 2 received, 0% packet loss, time 999ms
rtt min/avg/max/mdev = 0.041/0.043/0.045/0.002 ms
root@nicktming:~#
veth-1.png
3. bridge
bridge是一个虚拟网络设备,具有网络设备的特性(可以配置IP、MAC地址等.)
对于普通的网络设备来说,只有两端,从一端进来的数据会从另一端出去,如物理网卡从外面网络中收到的数据会转发给内核协议栈,而从协议栈过来的数据会转发到外面的物理网络中.
bridge是建立在从设备上(物理设备、虚拟设备、vlan设备等,即attach一个从设备,类似于现实世界中的交换机和一个用户终端之间连接了一根网线),并且可以为bridge配置一个IP(参考LinuxBridge MAC地址行为),这样该主机就可以通过这个bridge设备与网络中的其他主机进行通信了
3.1 创建bridge
在 2.1veth 在同一个network namespace通信的基础上加入
bridge
, 并且把veth0
attach到该bridge
上.
// 创建网桥br0
root@nicktming:~# ip link add name br0 type bridge
root@nicktming:~# ip link set br0 up
root@nicktming:~# bridge link
// 创建veth设备
root@nicktming:~# ip link add veth0 type veth peer name veth1
root@nicktming:~# ip addr add 192.168.2.11/24 dev veth0
root@nicktming:~# ip addr add 192.168.2.12/24 dev veth1
root@nicktming:~# ip link set veth0 up
root@nicktming:~# ip link set veth1 up
root@nicktming:~# echo 0 > /proc/sys/net/ipv4/conf/all/rp_filter
root@nicktming:~# echo 0 > /proc/sys/net/ipv4/conf/veth0/rp_filter
root@nicktming:~# echo 0 > /proc/sys/net/ipv4/conf/veth1/rp_filter
root@nicktming:~#
root@nicktming:~# bridge link
bridge-1.png
将
veth0
attach 到br0
.
root@nicktming:~# ip link set dev veth0 master br0
// 可以看到veth0的mac地址跟br0的mac地址一样
root@nicktming:~# ifconfig | grep br0
br0 Link encap:Ethernet HWaddr d6:2b:7b:1d:0e:b0
root@nicktming:~# ifconfig | grep veth0
veth0 Link encap:Ethernet HWaddr d6:2b:7b:1d:0e:b0
root@nicktming:~# bridge link
13: veth0 state UP : <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 master br0 state forwarding priority 32 cost 2
bridge-2.png
通过
veth0
pingveth1
.
// 通过veth0 ping 不通veth1
root@nicktming:~# ping -c 5 192.168.2.12 -I veth0
PING 192.168.2.12 (192.168.2.12) from 192.168.2.11 veth0: 56(84) bytes of data.
From 192.168.2.11 icmp_seq=1 Destination Host Unreachable
From 192.168.2.11 icmp_seq=2 Destination Host Unreachable
From 192.168.2.11 icmp_seq=3 Destination Host Unreachable
From 192.168.2.11 icmp_seq=4 Destination Host Unreachable
From 192.168.2.11 icmp_seq=5 Destination Host Unreachable
--- 192.168.2.12 ping statistics ---
5 packets transmitted, 0 received, +5 errors, 100% packet loss, time 4022ms
pipe 3
root@nicktming:~#
抓包的结果
root@nicktming:~# tcpdump -n -i veth1
tcpdump: verbose output suppressed, use -v or -vv for full protocol decode
listening on veth1, link-type EN10MB (Ethernet), capture size 262144 bytes
19:55:52.894057 ARP, Request who-has 192.168.2.12 tell 192.168.2.11, length 28
19:55:52.894064 ARP, Reply 192.168.2.12 is-at ae:68:a3:2c:73:30, length 28
19:55:53.891010 ARP, Request who-has 192.168.2.12 tell 192.168.2.11, length 28
19:55:53.891029 ARP, Reply 192.168.2.12 is-at ae:68:a3:2c:73:30, length 28
19:55:54.891001 ARP, Request who-has 192.168.2.12 tell 192.168.2.11, length 28
19:55:54.891016 ARP, Reply 192.168.2.12 is-at ae:68:a3:2c:73:30, length 28
19:55:55.908200 ARP, Request who-has 192.168.2.12 tell 192.168.2.11, length 28
19:55:55.908208 ARP, Reply 192.168.2.12 is-at ae:68:a3:2c:73:30, length 28
19:55:56.907002 ARP, Request who-has 192.168.2.12 tell 192.168.2.11, length 28
19:55:56.907019 ARP, Reply 192.168.2.12 is-at ae:68:a3:2c:73:30, length 28
19:55:57.907025 ARP, Request who-has 192.168.2.12 tell 192.168.2.11, length 28
19:55:57.907054 ARP, Reply 192.168.2.12 is-at ae:68:a3:2c:73:30, length 28
--------------------------------------------------------------------------------------------
root@nicktming:~# tcpdump -n -i veth0
tcpdump: verbose output suppressed, use -v or -vv for full protocol decode
listening on veth0, link-type EN10MB (Ethernet), capture size 262144 bytes
19:55:52.894051 ARP, Request who-has 192.168.2.12 tell 192.168.2.11, length 28
19:55:52.894065 ARP, Reply 192.168.2.12 is-at ae:68:a3:2c:73:30, length 28
19:55:53.891001 ARP, Request who-has 192.168.2.12 tell 192.168.2.11, length 28
19:55:53.891030 ARP, Reply 192.168.2.12 is-at ae:68:a3:2c:73:30, length 28
19:55:54.890992 ARP, Request who-has 192.168.2.12 tell 192.168.2.11, length 28
19:55:54.891017 ARP, Reply 192.168.2.12 is-at ae:68:a3:2c:73:30, length 28
19:55:55.908194 ARP, Request who-has 192.168.2.12 tell 192.168.2.11, length 28
19:55:55.908208 ARP, Reply 192.168.2.12 is-at ae:68:a3:2c:73:30, length 28
19:55:56.906994 ARP, Request who-has 192.168.2.12 tell 192.168.2.11, length 28
19:55:56.907019 ARP, Reply 192.168.2.12 is-at ae:68:a3:2c:73:30, length 28
19:55:57.907012 ARP, Request who-has 192.168.2.12 tell 192.168.2.11, length 28
19:55:57.907055 ARP, Reply 192.168.2.12 is-at ae:68:a3:2c:73:30, length 28
--------------------------------------------------------------------------------------------
root@nicktming:~# tcpdump -n -i br0
tcpdump: verbose output suppressed, use -v or -vv for full protocol decode
listening on br0, link-type EN10MB (Ethernet), capture size 262144 bytes
19:55:52.894065 ARP, Reply 192.168.2.12 is-at ae:68:a3:2c:73:30, length 28
19:55:53.891030 ARP, Reply 192.168.2.12 is-at ae:68:a3:2c:73:30, length 28
19:55:54.891017 ARP, Reply 192.168.2.12 is-at ae:68:a3:2c:73:30, length 28
19:55:55.908208 ARP, Reply 192.168.2.12 is-at ae:68:a3:2c:73:30, length 28
19:55:56.907019 ARP, Reply 192.168.2.12 is-at ae:68:a3:2c:73:30, length 28
19:55:57.907055 ARP, Reply 192.168.2.12 is-at ae:68:a3:2c:73:30, length 28
可以看到veth0收到应答包后没有给协议栈,而是给了br0,所以协议栈得不到veth1的mac地址,从而通信失败.
3.2 给bridge配上ip
可以看到veth0收到的数据已经不往协议栈发送, 会直接发给br0.
root@nicktming:~# ip addr del 192.168.2.11/24 dev veth0
root@nicktming:~# ip addr add 192.168.2.11/24 dev br0
bridge-3.png
root@nicktming:~# ping -c 1 -I br0 192.168.2.12
PING 192.168.2.12 (192.168.2.12) from 192.168.2.11 br0: 56(84) bytes of data.
64 bytes from 192.168.2.12: icmp_seq=1 ttl=64 time=0.059 ms
--- 192.168.2.12 ping statistics ---
1 packets transmitted, 1 received, 0% packet loss, time 0ms
rtt min/avg/max/mdev = 0.059/0.059/0.059/0.000 ms
root@nicktming:~#
// 路由已经没有veth0了
root@nicktming:~# route
Kernel IP routing table
Destination Gateway Genmask Flags Metric Ref Use Iface
default 172.19.16.1 0.0.0.0 UG 0 0 0 eth0
172.17.0.0 * 255.255.0.0 U 0 0 0 docker0
172.19.16.0 * 255.255.240.0 U 0 0 0 eth0
192.168.2.0 * 255.255.255.0 U 0 0 0 veth1
192.168.2.0 * 255.255.255.0 U 0 0 0 br0
root@nicktming:~#
因为
veth0
没有配置IP
, 所以协议栈在路由的时候不会将数据包发给veth0
,即使强制要求数据包通过veth0
发送出去,但由于veth0
从另一端收到的数据包只会给br0
所以此时veth0
相当于一根网线。
bridge-5.png
3.3 增加network namespace
增加一个
network namespace
将veth1
加入到该namespace
中.
root@nicktming:~# ip netns list
root@nicktming:~#
root@nicktming:~# ip netns add ns1
root@nicktming:~# ip netns list
ns1
root@nicktming:~# ip link set veth1 netns ns1
// 路由表中已经没有veth1了
root@nicktming:~# route
Kernel IP routing table
Destination Gateway Genmask Flags Metric Ref Use Iface
default 172.19.16.1 0.0.0.0 UG 0 0 0 eth0
172.17.0.0 * 255.255.0.0 U 0 0 0 docker0
172.19.16.0 * 255.255.240.0 U 0 0 0 eth0
192.168.2.0 * 255.255.255.0 U 0 0 0 br0
// 去namespace ns1中执行相关配置
root@nicktming:~# ip netns exec ns1 sh
# ifconfig
# ip link
1: lo: <LOOPBACK> mtu 65536 qdisc noop state DOWN mode DEFAULT group default
link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
12: veth1: <BROADCAST,MULTICAST> mtu 1500 qdisc noop state DOWN mode DEFAULT group default qlen 1000
link/ether ae:68:a3:2c:73:30 brd ff:ff:ff:ff:ff:ff
# ip addr add 192.168.2.12/24 dev veth1
# ip link set veth1 up
# ifconfig
veth1 Link encap:Ethernet HWaddr ae:68:a3:2c:73:30
inet addr:192.168.2.12 Bcast:0.0.0.0 Mask:255.255.255.0
inet6 addr: fe80::ac68:a3ff:fe2c:7330/64 Scope:Link
UP BROADCAST RUNNING MULTICAST MTU:1500 Metric:1
RX packets:26 errors:0 dropped:0 overruns:0 frame:0
TX packets:31 errors:0 dropped:0 overruns:0 carrier:0
collisions:0 txqueuelen:1000
RX bytes:1148 (1.1 KB) TX bytes:1558 (1.5 KB)
# ip link set lo up
# ifconfig
lo Link encap:Local Loopback
inet addr:127.0.0.1 Mask:255.0.0.0
inet6 addr: ::1/128 Scope:Host
UP LOOPBACK RUNNING MTU:65536 Metric:1
RX packets:0 errors:0 dropped:0 overruns:0 frame:0
TX packets:0 errors:0 dropped:0 overruns:0 carrier:0
collisions:0 txqueuelen:0
RX bytes:0 (0.0 B) TX bytes:0 (0.0 B)
veth1 Link encap:Ethernet HWaddr ae:68:a3:2c:73:30
inet addr:192.168.2.12 Bcast:0.0.0.0 Mask:255.255.255.0
inet6 addr: fe80::ac68:a3ff:fe2c:7330/64 Scope:Link
UP BROADCAST RUNNING MULTICAST MTU:1500 Metric:1
RX packets:26 errors:0 dropped:0 overruns:0 frame:0
TX packets:33 errors:0 dropped:0 overruns:0 carrier:0
collisions:0 txqueuelen:1000
RX bytes:1148 (1.1 KB) TX bytes:1698 (1.6 KB)
// 从路由表中可以看到访问192.168.2.0网络会走veth1设备
// 该设备会把数据包发送到br0
# route
Kernel IP routing table
Destination Gateway Genmask Flags Metric Ref Use Iface
192.168.2.0 * 255.255.255.0 U 0 0 0 veth1
#
// ping 192.168.2.11 网桥
# ping -c 1 192.168.2.11
PING 192.168.2.11 (192.168.2.11) 56(84) bytes of data.
64 bytes from 192.168.2.11: icmp_seq=1 ttl=64 time=0.048 ms
--- 192.168.2.11 ping statistics ---
1 packets transmitted, 1 received, 0% packet loss, time 0ms
rtt min/avg/max/mdev = 0.048/0.048/0.048/0.000 ms
bridge-6.png
4. 路由表
路由表是
Linux
内核的一个模块, 通过定义路由表来决定在某个网络namespace
中包的流向, 从而定义请求会到哪个网络设备上.
测试承接于 3.3 增加network namespace.
root@nicktming:~# ip netns exec ns1 sh
# ifconfig
lo Link encap:Local Loopback
inet addr:127.0.0.1 Mask:255.0.0.0
inet6 addr: ::1/128 Scope:Host
UP LOOPBACK RUNNING MTU:65536 Metric:1
RX packets:0 errors:0 dropped:0 overruns:0 frame:0
TX packets:0 errors:0 dropped:0 overruns:0 carrier:0
collisions:0 txqueuelen:0
RX bytes:0 (0.0 B) TX bytes:0 (0.0 B)
veth1 Link encap:Ethernet HWaddr ae:68:a3:2c:73:30
inet addr:192.168.2.12 Bcast:0.0.0.0 Mask:255.255.255.0
inet6 addr: fe80::ac68:a3ff:fe2c:7330/64 Scope:Link
UP BROADCAST RUNNING MULTICAST MTU:1500 Metric:1
RX packets:26 errors:0 dropped:0 overruns:0 frame:0
TX packets:33 errors:0 dropped:0 overruns:0 carrier:0
collisions:0 txqueuelen:1000
RX bytes:1148 (1.1 KB) TX bytes:1698 (1.6 KB)
// 可以看到当前的路由表
# route -n
Kernel IP routing table
Destination Gateway Genmask Flags Metric Ref Use Iface
192.168.2.0 0.0.0.0 255.255.255.0 U 0 0 0 veth1
// ping 当前主机的ip 发现无法ping通
# ping -c 1 172.19.16.7
connect: Network is unreachable
#
// 给ns1增加默认网关, 也就是当访问的网络在路由表中找不到出口时会使用默认网关
// 比如访问172.19.16.7, 会走默认网关192.168.2.11, 会通过veth1把发送给172.19.16.7的包先发给192.168.2.11
# route add default gw 192.168.2.11
# route -n
Kernel IP routing table
Destination Gateway Genmask Flags Metric Ref Use Iface
0.0.0.0 192.168.2.11 0.0.0.0 UG 0 0 0 veth1
192.168.2.0 0.0.0.0 255.255.255.0 U 0 0 0 veth1
// 修改了路由表后可以ping通172.19.16.7
# ping -c 1 172.19.16.7
PING 172.19.16.7 (172.19.16.7) 56(84) bytes of data.
64 bytes from 172.19.16.7: icmp_seq=1 ttl=64 time=0.050 ms
--- 172.19.16.7 ping statistics ---
1 packets transmitted, 1 received, 0% packet loss, time 0ms
rtt min/avg/max/mdev = 0.050/0.050/0.050/0.000 ms
5. iptables
iptables
是对Linux
内核的netfilter
模块进行操作和展示的工具,用来管理包的流动和转送。iptables
定义了一套链式处理的结构,在网络包传输的各个阶段可以使用不同的策略对包进行加工、传送或丢弃。
关于
iptables
, 有个系列文章对于使用可以参考 http://www.zsythink.net/archives/1199
关于
MASQUERADE
和DNAT
可以参考 http://www.zsythink.net/archives/1764
由于后面需要测试, 需要地址转换, 需要把宿主机的
ip_forward
功能打开, 不然规则会不起作用.echo 1 > /proc/sys/net/ipv4/ip_forward
.
测试承接于 4. 路由表
root@nicktming:~# ip netns exec ns1 sh
# cat /etc/resolv.conf
nameserver 183.60.83.19
nameserver 183.60.82.98
# ping -c 1 baidu.com
ping: unknown host baidu.com
5.1 MASQUERADE规则
Namespace
(ns1)中网络设备的地址是172.18 .0 .2
,这个地址虽然在宿主机上可以路由到br0
的网桥,但是到达宿主机外部之后,是不知道如何路由到这个IP
地址的,所以如果请求外部地址的话,需要先通过MASQUERADE
策略将这个IP
转换成宿主机出口网卡的IP
.
在宿主机中修改
iptables
增加一条MASQUERADE
规则.
root@nicktming:~# iptables -t nat -vnL POSTROUTING --line
Chain POSTROUTING (policy ACCEPT 1 packets, 60 bytes)
num pkts bytes target prot opt in out source destination
1 0 0 MASQUERADE all -- * !docker0 172.17.0.0/16 0.0.0.0/0
// 增加一条规则
// 意思192.168.2.0网络来的ip从宿主机出去的时候把源ip(192.168.2.12)转成网卡eth0的地址(172.19.16.7)
root@nicktming:~# iptables -t nat -A POSTROUTING -s 192.168.2.0/24 -o eth0 -j MASQUERADE
root@nicktming:~# iptables -t nat -vnL POSTROUTING --line
Chain POSTROUTING (policy ACCEPT 2 packets, 120 bytes)
num pkts bytes target prot opt in out source destination
1 0 0 MASQUERADE all -- * !docker0 172.17.0.0/16 0.0.0.0/0
2 0 0 MASQUERADE all -- * eth0 192.168.2.0/24 0.0.0.0/0
此时在ns1中再次尝试ping
# ping -c 1 www.baidu.com
PING www.wshifen.com (119.63.197.151) 56(84) bytes of data.
64 bytes from 119.63.197.151: icmp_seq=1 ttl=51 time=54.7 ms
--- www.wshifen.com ping statistics ---
1 packets transmitted, 1 received, 0% packet loss, time 0ms
rtt min/avg/max/mdev = 54.746/54.746/54.746/0.000 ms
在
ns1
中请求宿主机外部地址时,将ns1
中的源地址(192.168.2.12
)转换成宿主机的地址(172.19.16.7
)作为源地址,就可以在ns1
中访问宿主机外的网络了。
5.2 DNAT规则
那如果
ns1
中提供服务, 互联网上的主机如何可以访问到该ns1
的服务呢?因为ns1
的ip
地址不是互联网上的ip
, 是私有ip
. 但是宿主机的ip
是互联网的ip
, 全球唯一, 所以可以把此服务通过``iptables的
DNAT```来转换.
iptables -t nat -A PREROUTING -p tcp -m tcp --dport 80 -j DNAT --to-destination
192.168.2.12:80
此规则表示访问宿主机的
80
端口服务会被转发到192.168.2.12
的80
端口服务.
该测试承接于 4. 路由表
在ns1
启动一个80
端口服务.
# nc -lp 80
该主机的公网地址
150.109.72.88
, 在另外一台机器访问150.109.72.88
的80
服务.
Tingzhangs-MacBook-Pro:~ tingzhangming$ telnet 150.109.72.88 80
Trying 150.109.72.88...
telnet: connect to address 150.109.72.88: Connection refused
telnet: Unable to connect to remote host
这是因为没有做
iptables
转换所以访问不到, 在宿主机上加入一条DNAT
规则.
root@nicktming:~# iptables -t nat -vnL PREROUTING
Chain PREROUTING (policy ACCEPT 1354 packets, 56640 bytes)
pkts bytes target prot opt in out source destination
693K 26M DOCKER all -- * * 0.0.0.0/0 0.0.0.0/0 ADDRTYPE match dst-type LOCAL
root@nicktming:~# iptables -t nat -A PREROUTING -p tcp -m tcp --dport 80 -j DNAT --to-destination 192.168.2.12:80
root@nicktming:~# iptables -t nat -vnL PREROUTING
Chain PREROUTING (policy ACCEPT 2 packets, 56 bytes)
pkts bytes target prot opt in out source destination
693K 26M DOCKER all -- * * 0.0.0.0/0 0.0.0.0/0 ADDRTYPE match dst-type LOCAL
0 0 DNAT tcp -- * * 0.0.0.0/0 0.0.0.0/0 tcp dpt:80 to:192.168.2.12:80
再次在另外一台机器访问
150.109.72.88
的80
端口, 并输入hello world
.
Tingzhangs-MacBook-Pro:~ tingzhangming$ telnet 150.109.72.88 80
Trying 150.109.72.88...
Connected to 150.109.72.88.
Escape character is '^]'.
hello world
查看
ns1
是否有动静
# nc -lp 80
hello world
由此可以看到
iptables
的NDAT
作用.
6. 参考
1. http://www.cnblogs.com/sparkdev/p/9253409.html
2. https://segmentfault.com/a/1190000009251098
3. https://blog.csdn.net/sld880311/article/details/77840343
4. https://segmentfault.com/a/1190000009491002
5. http://www.cnblogs.com/zmkeil/archive/2013/04/21/3034733.html
6. http://www.zsythink.net/archives/1199
7. 全部内容
mydocker.png
1. [mydocker]---环境说明
2. [mydocker]---urfave cli 理解
3. [mydocker]---Linux Namespace
4. [mydocker]---Linux Cgroup
5. [mydocker]---构造容器01-实现run命令
6. [mydocker]---构造容器02-实现资源限制01
7. [mydocker]---构造容器02-实现资源限制02
8. [mydocker]---构造容器03-实现增加管道
9. [mydocker]---通过例子理解存储驱动AUFS
10. [mydocker]---通过例子理解chroot 和 pivot_root
11. [mydocker]---一步步实现使用busybox创建容器
12. [mydocker]---一步步实现使用AUFS包装busybox
13. [mydocker]---一步步实现volume操作
14. [mydocker]---实现保存镜像
15. [mydocker]---实现容器的后台运行
16. [mydocker]---实现查看运行中容器
17. [mydocker]---实现查看容器日志
18. [mydocker]---实现进入容器Namespace
19. [mydocker]---实现停止容器
20. [mydocker]---实现删除容器
21. [mydocker]---实现容器层隔离
22. [mydocker]---实现通过容器制作镜像
23. [mydocker]---实现cp操作
24. [mydocker]---实现容器指定环境变量
25. [mydocker]---网际协议IP
26. [mydocker]---网络虚拟设备veth bridge iptables
27. [mydocker]---docker的四种网络模型与原理实现(1)
28. [mydocker]---docker的四种网络模型与原理实现(2)
29. [mydocker]---容器地址分配
30. [mydocker]---网络net/netlink api 使用解析
31. [mydocker]---网络实现
32. [mydocker]---网络实现测试
网友评论