美文网首页
ebpf学习(5)

ebpf学习(5)

作者: android小奉先 | 来源:发表于2024-10-03 21:56 被阅读0次

    本篇介绍

    bpf在网络中可以用来对网络包进行捕获和过滤,接下来看下是如何做到的。

    Packet Filtering

    Filtering 主要可以用于如下三个场景:

    1. 实时数据包丢弃
    2. 观察实时数据包
    3. 数据包分析,主要是pcap格式

    tcpdump

    tcpdump是流量分析和观察中常用的一个工具,实际上tcpdump也是从网络接口上读取数据并且将对应包的内容提供给我们,同时也可以使用pcap过滤语法进行过滤。
    比如我们想看端口443(https)的数据,命令如下:

    # tcpdump -n 'ip and tcp port 443'
    tcpdump: verbose output suppressed, use -v[v]... for full protocol decode
    listening on wlp4s0, link-type EN10MB (Ethernet), snapshot length 262144 bytes
    13:06:23.124480 IP 192.168.31.63.35806 > 142.251.215.234.443: Flags [S], seq 2161845746, win 64240, options [mss 1460,sackOK,TS val 4275232445 ecr 0,nop,wscale 7], length 0
    13:06:23.380465 IP 192.168.31.63.51158 > 23.216.153.92.443: Flags [.], ack 1565733590, win 456, options [nop,nop,TS val 4010187404 ecr 1943807953], length 0
    13:06:23.380507 IP 192.168.31.63.52146 > 114.250.70.41.443: Flags [.], ack 3097876363, win 1257, options [nop,nop,TS val 3161263685 ecr 2071104007], length 0
    13:06:23.385979 IP 114.250.70.41.443 > 192.168.31.63.52146: Flags [.], ack 1, win 285, options [nop,nop,TS val 2071152322 ecr 3161215372], length 0
    13:06:23.448311 IP 23.216.153.92.443 > 192.168.31.63.51158: Flags [.], ack 1, win 501, options [nop,nop,TS val 1943856427 ecr 4010139037], length 0
    13:06:24.014502 IP 114.250.65.34.443 > 192.168.31.63.34102: Flags [P.], seq 1667579479:1667579552, ack 2774828657, win 301, options [nop,nop,TS val 1564142983 ecr 3366717057], length 73
    13:06:24.015094 IP 192.168.31.63.34102 > 114.250.65.34.443: Flags [F.], seq 1, ack 73, win 488, options [nop,nop,TS val 3366957171 ecr 1564142983], length 0
    13:06:24.022775 IP 114.250.65.34.443 > 192.168.31.63.34102: Flags [F.], seq 73, ack 2, win 301, options [nop,nop,TS val 1564143099 ecr 3366957171], length 0
    13:06:24.022857 IP 192.168.31.63.34102 > 114.250.65.34.443: Flags [.], ack 74, win 488, options [nop,nop,TS val 3366957179 ecr 1564143099], length 0
    13:06:24.148403 IP 192.168.31.63.35806 > 142.251.215.234.443: Flags [S], seq 2161845746, win 64240, options [mss 1460,sackOK,TS val 4275233469 ecr 0,nop,wscale 7], length 0
    13:06:26.196385 IP 192.168.31.63.35806 > 142.251.215.234.443: Flags [S], seq 2161845746, win 64240, options [mss 1460,sackOK,TS val 4275235517 ecr 0,nop,wscale 7], length 0
    13:06:26.573162 IP 110.242.68.4.443 > 192.168.31.63.59242: Flags [.], ack 1765924645, win 1108, length 0
    13:06:26.573216 IP 192.168.31.63.59242 > 110.242.68.4.443: Flags [.], ack 1, win 501, length 0
    13:06:26.964926 IP 192.168.31.63.44972 > 142.251.215.234.443: Flags [S], seq 261160313, win 64240, options [mss 1460,sackOK,TS val 4275236285 ecr 0,nop,wscale 7], length 0
    13:06:27.188234 IP 110.242.69.113.443 > 192.168.31.63.56366: Flags [P.], seq 1359899758:1359899789, ack 4057787616, win 1208, length 31
    13:06:27.188236 IP 110.242.69.113.443 > 192.168.31.63.56366: Flags [F.], seq 31, ack 1, win 1208, length 0
    13:06:27.188237 IP 110.242.69.113.443 > 192.168.31.63.56366: Flags [F.], seq 31, ack 1, win 1208, length 0
    13:06:27.188343 IP 192.168.31.63.56366 > 110.242.69.113.443: Flags [.], ack 32, win 428, options [nop,nop,sack 1 {31:32}], length 0
    

    上述指令介绍如下:

    -n: 不需要转换地址,这样可以方便看dst和src的地址
    ip and tcp port 8080: ip表示ipv4,tcp prot 8080: 表示tcp包,并且来自或者去端口8080的数据包
    

    这儿我们用到了pcap filters,实际上pcap filter会被编译成bpf指令,并加载到系统中用来过滤包。也就是我们在使用tcpdump的时候实际上就是在加载并使用bpf程序,可以添加-d看下:

    tcpdump -n -d 'ip and tcp port 443'
    Warning: assuming Ethernet
    (000) ldh      [12]  // 在偏移12的地方读取半字,也就是2个字节
    (001) jeq      #0x800           jt 2    jf 12 // 如果等于800, 就跳到2,否则到12,也就是直接推出
    (002) ldb      [23] // 在偏移23的地方读取一个字节,对应的是网络协议字段
    (003) jeq      #0x6             jt 4    jf 12 // 如果是6,就跳到4,否则就到12, 6表示的是tcp
    (004) ldh      [20] // 从偏移20的地方读取2字节,对应的是
    (005) jset     #0x1fff          jt 12   jf 6 // 查看最后13字节,如果包含1,则跳到12,否则就到6
    (006) ldxb     4*([14]&0xf) // 从偏移14的地方读取一个字节,也就是 IP header length
    (007) ldh      [x + 14] // 从x +14的地方读取2个字节,也就是源端口号
    (008) jeq      #0x1bb           jt 11   jf 9 // 如果是443,就跳到11,否则跳到9
    (009) ldh      [x + 16] // 从x + 16 的地方读取2个字节,也就是目的端口号
    (010) jeq      #0x1bb           jt 11   jf 12 // 如果是443,就跳到11,否则跳到12
    (011) ret      #262144 // 满足查询条件,返回对应的长度,这儿是个默认值
    (012) ret      #0 // 不满足查询条件
    

    如果需要看懂上述的指令,需要先了解下网络包格式:
    Destination Mac: 6 bytes
    Source Mac: 6 bytes
    Ethertype: 2 bytes
    Data(payload):45-1500 bytes
    Frame check sequence(CRC): 4 bytes

    基于裸socket的包过滤

    接下来用代码来看看如何将bpf程序attach到一个裸socket上,此时该socket收到的所有数据都会移交给我们的bpf程序,这时候我们的程序就可以决定是否需要丢弃。接下来的例子只是统计下该socket上不同协议的报文数量,代码如下:

    #include <linux/bpf.h>
    #include <linux/if_ether.h>
    #include <linux/if_packet.h>
    #include <linux/in.h>
    #include <linux/ip.h>
    #include <linux/string.h>
    #include <linux/tcp.h>
    #include <linux/types.h>
    #include <linux/udp.h>
    
    #ifndef offsetof
    #define offsetof(TYPE, MEMBER) ((size_t) & ((TYPE *)0)->MEMBER)
    #endif
    
    #define SEC(NAME) __attribute__((section(NAME), used)) 
    
    struct bpf_map_def {
      unsigned int type;
      unsigned int key_size;
      unsigned int value_size;
      unsigned int max_entries;
      unsigned int map_flags;
    };
    
    static int (*bpf_map_update_elem)(struct bpf_map_def *map, void *key,
                                      void *value, __u64 flags) = (void *)
        BPF_FUNC_map_update_elem;
    static void *(*bpf_map_lookup_elem)(struct bpf_map_def *map, void *key) =
        (void *)BPF_FUNC_map_lookup_elem;
    
    unsigned long long load_byte(void *skb,
                                 unsigned long long off) asm("llvm.bpf.load.byte");
    // 注意,该方式在libbpf v1.0 + 上不再支持了
    struct bpf_map_def SEC("maps") countmap = {
        .type = BPF_MAP_TYPE_ARRAY,
        .key_size = sizeof(int),
        .value_size = sizeof(int),
        .max_entries = 256,
    };
    
    SEC("socket")
    int socket_prog(struct __sk_buff *skb) {
      int proto = load_byte(skb, ETH_HLEN + offsetof(struct iphdr, protocol));
      int one = 1;
      int *el = bpf_map_lookup_elem(&countmap, &proto);
      if (el) {
        (*el)++;
      } else {
        el = &one;
      }
      bpf_map_update_elem(&countmap, &proto, el, BPF_ANY);
      return 0;
    }
    
    char _license[] SEC("license") = "GPL";
    

    该代码需要在内核代码树中编译,并且编译target是bpf。
    接下来是加载的代码:

    #include <arpa/inet.h>
    #include <assert.h>
    #include <bpf/bpf.h>
    #include <bpf/bpf_load.h>
    #include <bpf/sock_example.h>
    #include <errno.h>
    #include <linux/bpf.h>
    #include <linux/if_ether.h>
    #include <stddef.h>
    #include <stdio.h>
    #include <stdlib.h>
    #include <string.h>
    #include <sys/socket.h>
    #include <unistd.h>
    
    char bpf_log_buf[BPF_LOG_BUF_SIZE];
    
    int main(int argc, char **argv) {
      int sock = -1, i, key;
      int tcp_cnt, udp_cnt, icmp_cnt;
    
      char filename[256];
      snprintf(filename, sizeof(filename), "%s", argv[1]);
    
      if (load_bpf_file(filename)) {
        printf("%s", bpf_log_buf);
        return 1;
      }
    
      sock = open_raw_sock("lo");
    
      if (setsockopt(sock, SOL_SOCKET, SO_ATTACH_BPF, prog_fd,
                     sizeof(prog_fd[0]))) {
        printf("setsockopt %s\n", strerror(errno));
        return 0;
      }
    
      for (i = 0; i < 10; i++) {
        key = IPPROTO_TCP;
        assert(bpf_map_lookup_elem(map_fd[0], &key, &tcp_cnt) == 0);
    
        key = IPPROTO_UDP;
        assert(bpf_map_lookup_elem(map_fd[0], &key, &udp_cnt) == 0);
    
        key = IPPROTO_ICMP;
        assert(bpf_map_lookup_elem(map_fd[0], &key, &icmp_cnt) == 0);
    
        printf("TCP %d UDP %d ICMP %d packets\n", tcp_cnt, udp_cnt, icmp_cnt);
        sleep(1);
      }
    }
    

    按照如上操作后,如果执行ping 127.0.0.1, 就会得到如下结果:

    TCP 0 UDP 0 ICMP 0 packets
    TCP 0 UDP 0 ICMP 4 packets
    TCP 0 UDP 0 ICMP 8 packets
    TCP 0 UDP 0 ICMP 12 packets
    TCP 0 UDP 0 ICMP 16 packets
    TCP 0 UDP 0 ICMP 20 packets
    TCP 0 UDP 0 ICMP 24 packets
    TCP 0 UDP 0 ICMP 28 packets
    TCP 0 UDP 0 ICMP 32 packets
    TCP 0 UDP 0 ICMP 36 packets
    

    基于BPF 的TC

    TC(traffic control) 是内核包调度子系统,可以决定包如何流动和如何被接收。
    为了了解tc,可以先了解几个术语。
    Queueing disciplines:qdisc, 用来决定数据包在网络接口上的发送顺序,默认的是pfifo_fast, 拥有3个优先级队列,按照优先级先进先出。用如下命令就可以看到当前设备上的qdisc:

    ip a
    1: lo: <LOOPBACK,UP,LOWER_UP> mtu 65536 qdisc noqueue state UNKNOWN group default qlen 1000
        link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
        inet 127.0.0.1/8 scope host lo
           valid_lft forever preferred_lft forever
        inet6 ::1/128 scope host noprefixroute 
           valid_lft forever preferred_lft forever
    2: enp0s31f6: <NO-CARRIER,BROADCAST,MULTICAST,UP> mtu 1500 qdisc fq_codel state DOWN group default qlen 1000
        link/ether c8:5b:76:3f:0e:74 brd ff:ff:ff:ff:ff:ff
    3: wlp4s0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc noqueue state UP group default qlen 1000
        link/ether e4:a4:71:b3:62:17 brd ff:ff:ff:ff:ff:ff
        inet 192.168.0.112/24 brd 192.168.0.255 scope global dynamic noprefixroute wlp4s0
           valid_lft 5077sec preferred_lft 5077sec
        inet6 fe80::a7fa:5c7f:99ae:bd31/64 scope link noprefixroute 
           valid_lft forever preferred_lft forever
    4: docker0: <NO-CARRIER,BROADCAST,MULTICAST,UP> mtu 1500 qdisc noqueue state DOWN group default 
        link/ether 02:42:c0:45:54:ec brd ff:ff:ff:ff:ff:ff
        inet 172.17.0.1/16 brd 172.17.255.255 scope global docker0
           valid_lft forever preferred_lft forever
    

    可以看到如上用到的主要是noqueue和fq_codel,前者就是收到包后就立即发,没有其他规则,后者是fair queue controlled delay,以一个随机模型分类数据包,实现公平发送。

    Classful qdiscs: 允许为不同的包定义不同的类别,这样就可以使用不同的规则。
    Filters:用来给数据包制定一个特定的类别。
    Classless qdiscs:没有任何关联的类别,这意味着没法指定filters。

    接下来就看一个例子, 如果发现是http包,就打印下log:

    #pragma clang diagnostic ignored "-Wcompare-distinct-pointer-types"
    
    #include <bits/types.h>
    #include <linux/bpf.h>
    #include <linux/if_ether.h>
    #include <linux/in.h>
    #include <linux/ip.h>
    #include <linux/pkt_cls.h>
    #include <linux/tcp.h>
    
    #define SEC(NAME) __attribute__((section(NAME), used))
    
    #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
    #define __bpf_htons(x) __builtin_bswap16(x)
    #define __bpf_constant_htons(x) ___constant_swab16(x)
    #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
    #define __bpf_htons(x) (x)
    #define __bpf_constant_htons(x) (x)
    #else
    #error "Fix your compiler's __BYTE_ORDER__?!"
    #endif
    
    #define bpf_htons(x) \
      (__builtin_constant_p(x) ? __bpf_constant_htons(x) : __bpf_htons(x))
    
    static int (*bpf_trace_printk)(const char *fmt, int fmt_size,
                                   ...) = (void *)BPF_FUNC_trace_printk;
    
    #define trace_printk(fmt, ...)                                                 \
      do {                                                                         \
        char _fmt[] = fmt;                                                         \
        bpf_trace_printk(_fmt, sizeof(_fmt), ##__VA_ARGS__);                       \
      } while (0)
    
    unsigned long long load_byte(void *skb,
                                 unsigned long long off) asm("llvm.bpf.load.byte");
    
    struct http_payload {
      int method;
    };
    
    static inline int is_http(struct __sk_buff *skb, __u64 nh_off);
    
    typedef __uint8_t uint8_t;
    typedef __uint16_t uint16_t;
    typedef __uint32_t uint32_t;
    typedef __uint64_t uint64_t;
    
    SEC("classifier")
    static inline int classification(struct __sk_buff *skb) {
      void *data_end = (void *)(long)skb->data_end;
      void *data = (void *)(long)skb->data;
      struct ethhdr *eth = data;
    
      __u16 h_proto;
      __u64 nh_off = 0;
      nh_off = sizeof(*eth);
    
      if (data + nh_off > data_end) {
        return TC_ACT_OK;
      }
    
      h_proto = eth->h_proto;
    
      if (h_proto == bpf_htons(ETH_P_IP)) {
        if (is_http(skb, nh_off) == 1) {
          trace_printk("Yes! It is HTTP!\n");
        }
      }
    
      return TC_ACT_OK;
    }
    
    static inline int is_http(struct __sk_buff *skb, __u64 nh_off) {
      void *data_end = (void *)(long)skb->data_end;
      void *data = (void *)(long)skb->data;
      struct iphdr *iph = data + nh_off;
    
      if (iph + 1 > data_end) {
        return 0;
      }
    
      if (iph->protocol != IPPROTO_TCP) {
        return 0;
      }
      __u32 tcp_hlen = 0;
      __u32 ip_hlen = 0;
      __u32 poffset = 0;
      __u32 plength = 0;
      __u32 ip_total_length = iph->tot_len;
    
      ip_hlen = iph->ihl << 2;
    
      if (ip_hlen < sizeof(*iph)) {
        return 0;
      }
    
      struct tcphdr *tcph = data + nh_off + sizeof(*iph);
    
      if (tcph + 1 > data_end) {
        return 0;
      }
    
      tcp_hlen = tcph->doff << 2;
    
      poffset = ETH_HLEN + ip_hlen + tcp_hlen;
      plength = ip_total_length - ip_hlen - tcp_hlen;
      if (plength >= 7) {
        unsigned long p[7];
        int i = 0;
        for (i = 0; i < 7; i++) {
    
          p[i] = load_byte(skb, poffset + i);
        }
        int *value;
        if ((p[0] == 'H') && (p[1] == 'T') && (p[2] == 'T') && (p[3] == 'P')) {
          return 1;
        }
      }
    
      return 0;
    }
    
    char _license[] SEC("license") = "GPL";
    

    将上面文件编译成bpf的目标文件。
    使用如下命令进行加载:

    # tc qdisc add dev eth0 handle 0: ingress
    # tc filter add dev eth0 ingress bpf obj classifier.o flowid 0:
    # tc exec bpf dbg
    

    如果有http数据,就会看到对应的打印。

    相关文章

      网友评论

          本文标题:ebpf学习(5)

          本文链接:https://www.haomeiwen.com/subject/efbkrjtx.html