美文网首页
netfilter之conntrack连接跟踪

netfilter之conntrack连接跟踪

作者: 分享放大价值 | 来源:发表于2020-07-02 00:16 被阅读0次

    连接跟踪conntrack是状态防火墙和NAT的基础,每个经过conntrack处理的数据包的skb->nfctinfo都会设置如下值之一,后续流程中NAT模块根据此值做不同的处理,filter模块可以在扩展匹配中指定state进行不同的处理。

    enum ip_conntrack_info {
        /* Part of an established connection (either direction). */
        //收到双向报文,连接已经建立,对original方向报文设置此标志
        IP_CT_ESTABLISHED,
    
        /* Like NEW, but related to an existing connection, or ICMP error
           (in either direction). */
        IP_CT_RELATED,
    
        /* Started a new connection to track (only
               IP_CT_DIR_ORIGINAL); may be a retransmission. */
        //收到original方向数据包,连接还未建立
        IP_CT_NEW,
    
        /* >= this indicates reply direction */
        //收到reply方向数据包,说明连接建立
        IP_CT_IS_REPLY,
    
        IP_CT_ESTABLISHED_REPLY = IP_CT_ESTABLISHED + IP_CT_IS_REPLY,
        IP_CT_RELATED_REPLY = IP_CT_RELATED + IP_CT_IS_REPLY,
        IP_CT_NEW_REPLY = IP_CT_NEW + IP_CT_IS_REPLY,   
        /* Number of distinct IP_CT types (no NEW in reply dirn). */
        IP_CT_NUMBER = IP_CT_IS_REPLY * 2 - 1
    };
    

    nf hook函数注册

    跟连接跟踪相关的hook函数包含下面两个:重组相关的和conntrack处理相关的。

    注册报文重组hook函数。

    static struct nf_hook_ops ipv4_defrag_ops[] = {
        {
            .hook       = ipv4_conntrack_defrag,
            .owner      = THIS_MODULE,
            .pf     = NFPROTO_IPV4,
            .hooknum    = NF_INET_PRE_ROUTING,
            .priority   = NF_IP_PRI_CONNTRACK_DEFRAG,
        },
        {
            .hook           = ipv4_conntrack_defrag,
            .owner          = THIS_MODULE,
            .pf             = NFPROTO_IPV4,
            .hooknum        = NF_INET_LOCAL_OUT,
            .priority       = NF_IP_PRI_CONNTRACK_DEFRAG,
        },
    };
    static int __init nf_defrag_init(void)
    {
        return nf_register_hooks(ipv4_defrag_ops, ARRAY_SIZE(ipv4_defrag_ops));
    }
    
    module_init(nf_defrag_init);
    

    注册conntrack hook函数。

    /* Connection tracking may drop packets, but never alters them, so
       make it the first hook. */
    static struct nf_hook_ops ipv4_conntrack_ops[] __read_mostly = {
        {
            .hook       = ipv4_conntrack_in,
            .owner      = THIS_MODULE,
            .pf     = NFPROTO_IPV4,
            .hooknum    = NF_INET_PRE_ROUTING,
            .priority   = NF_IP_PRI_CONNTRACK,
        },
        {
            .hook       = ipv4_conntrack_local,
            .owner      = THIS_MODULE,
            .pf     = NFPROTO_IPV4,
            .hooknum    = NF_INET_LOCAL_OUT,
            .priority   = NF_IP_PRI_CONNTRACK,
        },
        {
            .hook       = ipv4_helper,
            .owner      = THIS_MODULE,
            .pf     = NFPROTO_IPV4,
            .hooknum    = NF_INET_POST_ROUTING,
            .priority   = NF_IP_PRI_CONNTRACK_HELPER,
        },
        {
            .hook       = ipv4_confirm,
            .owner      = THIS_MODULE,
            .pf     = NFPROTO_IPV4,
            .hooknum    = NF_INET_POST_ROUTING,
            .priority   = NF_IP_PRI_CONNTRACK_CONFIRM,
        },
        {
            .hook       = ipv4_helper,
            .owner      = THIS_MODULE,
            .pf     = NFPROTO_IPV4,
            .hooknum    = NF_INET_LOCAL_IN,
            .priority   = NF_IP_PRI_CONNTRACK_HELPER,
        },
        {
            .hook       = ipv4_confirm,
            .owner      = THIS_MODULE,
            .pf     = NFPROTO_IPV4,
            .hooknum    = NF_INET_LOCAL_IN,
            .priority   = NF_IP_PRI_CONNTRACK_CONFIRM,
        },
    };
    static int __init nf_conntrack_l3proto_ipv4_init(void)
    {
        ...
        ret = nf_register_hooks(ipv4_conntrack_ops,
                    ARRAY_SIZE(ipv4_conntrack_ops));
        ...
    }
    

    上面的hook函数会注册到二维数组nf_hook中。

    enum nf_inet_hooks {
        NF_INET_PRE_ROUTING,
        NF_INET_LOCAL_IN,
        NF_INET_FORWARD,
        NF_INET_LOCAL_OUT,
        NF_INET_POST_ROUTING,
        NF_INET_NUMHOOKS
    };
    enum {
        NFPROTO_UNSPEC =  0,
        NFPROTO_INET   =  1,
        NFPROTO_IPV4   =  2,
        NFPROTO_ARP    =  3,
        NFPROTO_BRIDGE =  7,
        NFPROTO_IPV6   = 10,
        NFPROTO_DECNET = 12,
        NFPROTO_NUMPROTO,
    };
    extern struct list_head nf_hooks[NFPROTO_NUMPROTO][NF_MAX_HOOKS];
    
    int nf_register_hooks(struct nf_hook_ops *reg, unsigned int n)
    {
        unsigned int i;
    
        for (i = 0; i < n; i++) {
             nf_register_hook(&reg[i]);
        }
    }
    
    int nf_register_hook(struct nf_hook_ops *reg)
    {
        struct nf_hook_ops *elem;
    
        mutex_lock(&nf_hook_mutex);
        list_for_each_entry(elem, &nf_hooks[reg->pf][reg->hooknum], list) {
            if (reg->priority < elem->priority)
                break;
        }
        list_add_rcu(&reg->list, elem->list.prev);
        mutex_unlock(&nf_hook_mutex);
    #ifdef HAVE_JUMP_LABEL
        static_key_slow_inc(&nf_hooks_needed[reg->pf][reg->hooknum]);
    #endif
        return 0;
    }
    

    重组和conntrack hook注册成功后,nf_hook内容如下标黄,这也是ipv4的连接跟踪模块用到的hook函数,小括号中的数字是hook函数的优先级。在同一个hook点上,数字越小优先级越高。


    image.png

    发送给本机的数据会经过 NF_INET_PRE_ROUTING 和 NF_INET_LOCAL_IN 两个hook点,所以hook函数调用顺序为:
    ipv4_conntrack_defrag -> ipv4_conntrack_in -> ipv4_helper -> ipv4_confirm。

    由本机转发的数据会经过 NF_INET_PRE_ROUTING 和 NF_INET_FORWARD 和 NF_INET_POST_ROUTING 三个hook点,所以hook函数调用顺序为:
    ipv4_conntrack_defrag -> ipv4_conntrack_in -> ipv4_helper -> ipv4_confirm。

    本机发送的数据会经过 NF_INET_LOCAL_OUT 和NF_INET_POST_ROUTING 两个hook点,所以hook函数调用顺序为:
    ipv4_conntrack_defrag -> ipv4_conntrack_local -> ipv4_helper -> ipv4_confirm。

    可看到不管数据包从哪来到哪去,经过的连接跟踪模块处理基本是一样的,唯一的区别是ipv4_conntrack_in和ipv4_conntrack_local,后者增加了对数据包长度的校验,即只有从本机发出去的报文才需要校验长度。

    static unsigned int ipv4_conntrack_in(const struct nf_hook_ops *ops,
                          struct sk_buff *skb,
                          const struct net_device *in,
                          const struct net_device *out,
                          int (*okfn)(struct sk_buff *))
    {
        return nf_conntrack_in(dev_net(in), PF_INET, ops->hooknum, skb);
    }
    
    static unsigned int ipv4_conntrack_local(const struct nf_hook_ops *ops,
                         struct sk_buff *skb,
                         const struct net_device *in,
                         const struct net_device *out,
                         int (*okfn)(struct sk_buff *))
    {
        /* root is playing with raw sockets. */
        if (skb->len < sizeof(struct iphdr) ||
            ip_hdrlen(skb) < sizeof(struct iphdr))
            return NF_ACCEPT;
        return nf_conntrack_in(dev_net(out), PF_INET, ops->hooknum, skb);
    }
    

    hook函数执行

    下面分别分析这四个hook函数。

    1. ipv4_conntrack_defrag
      重组分片报文。重组完整前不让数据包进行下一步
    static unsigned int ipv4_conntrack_defrag(const struct nf_hook_ops *ops,
                          struct sk_buff *skb,
                          const struct net_device *in,
                          const struct net_device *out,
                          int (*okfn)(struct sk_buff *))
    {
        struct sock *sk = skb->sk;
        struct inet_sock *inet = inet_sk(skb->sk);
        //对于PF_INET类型的socket,并且inet->nodefrag置位了,则
        //不允许重组,返回NF_ACCEPT
        if (sk && (sk->sk_family == PF_INET) &&
            inet->nodefrag)
            return NF_ACCEPT;
    
    #if IS_ENABLED(CONFIG_NF_CONNTRACK)
    #if !IS_ENABLED(CONFIG_NF_NAT)
        /* Previously seen (loopback)?  Ignore.  Do this before
           fragment check. */
        //nfct不为空,并且没有IPS_TEMPLATE_BIT标志,说明此ct是
        //在raw表匹配到target为notrack的规则。
        if (skb->nfct && !nf_ct_is_template((struct nf_conn *)skb->nfct))
            return NF_ACCEPT;
    #endif
    #endif
        /* Gather fragments. */
        //如果是分片报文,只处理分片报文
        if (ip_is_fragment(ip_hdr(skb))) {
            //获取重组的user。user表示谁来执行重组,或者说在哪个
            //模块哪个阶段重组
            enum ip_defrag_users user =
                nf_ct_defrag_user(ops->hooknum, skb);
            //返回值为非零表示未完成重组(只收到第一片或者某几
            //片),需要将skb保存到队列,或者重组过程出错,此时
            //需要释放skb。
            if (nf_ct_ipv4_gather_frags(skb, user))
                return NF_STOLEN;
        }
        return NF_ACCEPT;
    }
    
    1. nf_conntrack_in
      连接跟踪处理的主函数nf_conntrack_in,其会用到l3proto和l4proto函数,先看一下这两组函数的注册。

    注册l3proto Ipv4的处理函数。

    struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4 __read_mostly = {
        .l3proto     = PF_INET,
        .name        = "ipv4",
        .pkt_to_tuple    = ipv4_pkt_to_tuple, //获取源目的ip
        .invert_tuple    = ipv4_invert_tuple,  //源目的ip调换
        .print_tuple     = ipv4_print_tuple,  //打印出源目的ip
        .get_l4proto     = ipv4_get_l4proto, //获取ip报文总长度和四层协议号
    #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
        .tuple_to_nlattr = ipv4_tuple_to_nlattr,
        .nlattr_tuple_size = ipv4_nlattr_tuple_size,
        .nlattr_to_tuple = ipv4_nlattr_to_tuple,
        .nla_policy  = ipv4_nla_policy,
    #endif
    #if defined(CONFIG_SYSCTL) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT)
        .ctl_table_path  = "net/ipv4/netfilter",
    #endif
        .init_net    = ipv4_init_net,
        .me      = THIS_MODULE,
    };
    static int __init nf_conntrack_l3proto_ipv4_init(void)
        ret = nf_ct_l3proto_register(&nf_conntrack_l3proto_ipv4);
    
    int nf_ct_l3proto_register(struct nf_conntrack_l3proto *proto)
    {
        int ret = 0;
        struct nf_conntrack_l3proto *old;
    
        if (proto->l3proto >= AF_MAX)
            return -EBUSY;
    
        if (proto->tuple_to_nlattr && !proto->nlattr_tuple_size)
            return -EINVAL;
    
        mutex_lock(&nf_ct_proto_mutex);
        old = rcu_dereference_protected(nf_ct_l3protos[proto->l3proto],
                        lockdep_is_held(&nf_ct_proto_mutex));
    
        if (proto->nlattr_tuple_size)
            proto->nla_size = 3 * proto->nlattr_tuple_size();
    
        rcu_assign_pointer(nf_ct_l3protos[proto->l3proto], proto);
    }
    

    注册l4proto处理函数

    int nf_ct_l4proto_register(struct nf_conntrack_l4proto *l4proto)
        rcu_assign_pointer(nf_ct_protos[l4proto->l3proto][l4proto->l4proto], l4proto);
    
    //ipv4的l4注册了tcp,udp和icmp这三种协议
    static int __init nf_conntrack_l3proto_ipv4_init(void)
        ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_tcp4);
        ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_udp4);
        ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_icmp);
    
    //以udp为例说明
    struct nf_conntrack_l4proto nf_conntrack_l4proto_udp4 __read_mostly =
    {
        .l3proto        = PF_INET,
        .l4proto        = IPPROTO_UDP,
        .name           = "udp",
        .pkt_to_tuple       = udp_pkt_to_tuple, //获取源目的port
        .invert_tuple       = udp_invert_tuple,//源目的port调换
        .print_tuple        = udp_print_tuple,//打印源目的port
        .packet         = udp_packet, //更新ct中定时器超时时间并更新统计计数
        .get_timeouts       = udp_get_timeouts, //获取ct超时时间
        .new            = udp_new, //创建新ct时调用
        .error          = udp_error,
    #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
        .tuple_to_nlattr    = nf_ct_port_tuple_to_nlattr,
        .nlattr_to_tuple    = nf_ct_port_nlattr_to_tuple,
        .nlattr_tuple_size  = nf_ct_port_nlattr_tuple_size,
        .nla_policy     = nf_ct_port_nla_policy,
    #endif
    #if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
        .ctnl_timeout       = {
            .nlattr_to_obj  = udp_timeout_nlattr_to_obj,
            .obj_to_nlattr  = udp_timeout_obj_to_nlattr,
            .nlattr_max = CTA_TIMEOUT_UDP_MAX,
            .obj_size   = sizeof(unsigned int) * CTA_TIMEOUT_UDP_MAX,
            .nla_policy = udp_timeout_nla_policy,
        },
    #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
        .init_net       = udp_init_net,
        .get_net_proto      = udp_get_net_proto,
    };
    

    nf_conntrack_in 会用到上面注册的l3proto和l4proto函数

    unsigned int
    nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
            struct sk_buff *skb)
    {
        struct nf_conn *ct, *tmpl = NULL;
        enum ip_conntrack_info ctinfo;
        struct nf_conntrack_l3proto *l3proto;
        struct nf_conntrack_l4proto *l4proto;
        unsigned int *timeouts;
        unsigned int dataoff;
        u_int8_t protonum;
        int set_reply = 0;
        int ret;
        //如果skb已经有ct了,并且有template标志
        //IPS_TEMPLATE_BIT,说明报文在raw表经过了
        //notrack处理,不用记录在连接跟踪表,可以直接返回。
        //如果有template标志IPS_TEMPLATE_BIT,说明是helper相关 
        //的处理,保存tmpl,将skb->nfct置空,后面重新给它分配ct
        if (skb->nfct) {
            /* Previously seen (loopback or untracked)?  Ignore. */
            tmpl = (struct nf_conn *)skb->nfct;
            if (!nf_ct_is_template(tmpl)) {
                NF_CT_STAT_INC_ATOMIC(net, ignore);
                return NF_ACCEPT;
            }
            skb->nfct = NULL;
        }
    
        /* rcu_read_lock()ed by nf_hook_slow */
        //根据pf到nf_ct_l3protos获取l3proto
        //对于ipv4,l3proto为nf_conntrack_l3proto_ipv4,用于获取三层源目的ip。
        l3proto = __nf_ct_l3proto_find(pf);
        ret = l3proto->get_l4proto(skb, skb_network_offset(skb),
                       &dataoff, &protonum);
        if (ret <= 0) {
            pr_debug("not prepared to track yet or error occurred\n");
            NF_CT_STAT_INC_ATOMIC(net, error);
            NF_CT_STAT_INC_ATOMIC(net, invalid);
            ret = -ret;
            goto out;
        }
        //根据pf和四层协议号到nf_ct_protos找l4proto。
        //对于udp协议来说,l4proto就是nf_conntrack_l4proto_udp4,
        //用于获取四层源目的端口号等信息。
        l4proto = __nf_ct_l4proto_find(pf, protonum);
    
        /* It may be an special packet, error, unclean...
         * inverse of the return code tells to the netfilter
         * core what to do with the packet. */
        if (l4proto->error != NULL) {
            ret = l4proto->error(net, tmpl, skb, dataoff, &ctinfo,
                         pf, hooknum);
            if (ret <= 0) {
                NF_CT_STAT_INC_ATOMIC(net, error);
                NF_CT_STAT_INC_ATOMIC(net, invalid);
                ret = -ret;
                goto out;
            }
            /* ICMP[v6] protocol trackers may assign one conntrack. */
            if (skb->nfct)
                goto out;
        }
    
        ct = resolve_normal_ct(net, tmpl, skb, dataoff, pf, protonum,
                       l3proto, l4proto, &set_reply, &ctinfo);
        if (!ct) {
            /* Not valid part of a connection */
            NF_CT_STAT_INC_ATOMIC(net, invalid);
            ret = NF_ACCEPT;
            goto out;
        }
    
        if (IS_ERR(ct)) {
            /* Too stressed to deal. */
            NF_CT_STAT_INC_ATOMIC(net, drop);
            ret = NF_DROP;
            goto out;
        }
    
        NF_CT_ASSERT(skb->nfct);
    
        /* Decide what timeout policy we want to apply to this flow. */
        //不同的四层协议根据各自特点提供了不同的超时时间,udp提供如下两种
        //static unsigned int udp_timeouts[UDP_CT_MAX] = {
        //  [UDP_CT_UNREPLIED]  = 30*HZ,
        //  [UDP_CT_REPLIED]    = 180*HZ,
        //};
        timeouts = nf_ct_timeout_lookup(net, ct, l4proto);
    
        //对于udp来说,调用udp_packet->nf_ct_refresh_acct更新ct超
        //时时间,保证此条数据流不断,ct就不会被删除。并且更新统
        //计计数到acct中。
        ret = l4proto->packet(ct, skb, dataoff, ctinfo, pf, hooknum, timeouts);
        if (ret <= 0) {
            /* Invalid: inverse of the return code tells
             * the netfilter core what to do */
            pr_debug("nf_conntrack_in: Can't track with proto module\n");
            nf_conntrack_put(skb->nfct);
            skb->nfct = NULL;
            NF_CT_STAT_INC_ATOMIC(net, invalid);
            if (ret == -NF_DROP)
                NF_CT_STAT_INC_ATOMIC(net, drop);
            ret = -ret;
            goto out;
        }
    
        //对于一个新创建的连接跟踪项后,当第一次收到reply方向的数
        //据包后,则会设置nf_conn->status的IPS_SEEN_REPLY_BIT
        //位为1,当设置成功且IPS_SEEN_REPLY_BIT位的原来值为0
        //时,则调用nf_conntrack_event_cache ,由nfnetlink模块处理
        //状态改变的事件。
        if (set_reply && !test_and_set_bit(IPS_SEEN_REPLY_BIT, &ct->status))
            nf_conntrack_event_cache(IPCT_REPLY, ct);
    out:
        if (tmpl) {
            /* Special case: we have to repeat this hook, assign the
             * template again to this packet. We assume that this packet
             * has no conntrack assigned. This is used by nf_ct_tcp. */
            if (ret == NF_REPEAT)
                skb->nfct = (struct nf_conntrack *)tmpl;
            else
                nf_ct_put(tmpl);
        }
    
        return ret;
    }
    
    /* On success, returns conntrack ptr, sets skb->nfct and ctinfo */
    static inline struct nf_conn *
    resolve_normal_ct(struct net *net, struct nf_conn *tmpl,
              struct sk_buff *skb,
              unsigned int dataoff,
              u_int16_t l3num,
              u_int8_t protonum,
              struct nf_conntrack_l3proto *l3proto,
              struct nf_conntrack_l4proto *l4proto,
              int *set_reply,
              enum ip_conntrack_info *ctinfo)
    {
        struct nf_conntrack_tuple tuple;
        struct nf_conntrack_tuple_hash *h;
        struct nf_conn *ct;
        u16 zone = tmpl ? nf_ct_zone(tmpl) : NF_CT_DEFAULT_ZONE;
        u32 hash;
        //获取五元组信息
        if (!nf_ct_get_tuple(skb, skb_network_offset(skb),
                     dataoff, l3num, protonum, &tuple, l3proto,
                     l4proto)) {
            pr_debug("resolve_normal_ct: Can't get tuple\n");
            return NULL;
        }
    
        /* look for tuple match */
        hash = hash_conntrack_raw(&tuple, zone);
        //到全局confirm表net->ct.hash中查找是否已经存在此条流
        h = __nf_conntrack_find_get(net, zone, &tuple, hash);
        if (!h) {
            //如果查找不到,需要分配一个ct。根据tuple获取反方向的
            //reply tuple,将他俩赋值给ct的tuplehash。并将original
            //的tuplehash挂到 net.ct.pcpu_lists->unconfirmed表中
            h = init_conntrack(net, tmpl, &tuple, l3proto, l4proto,
                       skb, dataoff, hash);
            if (!h)
                return NULL;
            if (IS_ERR(h))
                return (void *)h;
        }
        ct = nf_ct_tuplehash_to_ctrack(h);
    
        //如果是reply方向的数据包,设置 ctinfo = 
        //IP_CT_ESTABLISHED_REPLY,
        //如果是original方向的数据包,分为如下几种情况:
        //a. original方向第一个数据包,则设置ctinfo = IP_CT_NEW
        //b. original方向的非第一个数据包,并且已经收到reply的数据
        //包,则设置ctinfo = IP_CT_ESTABLISHED
        //c.original方向的数据包,并且是其他连接的期望连接,则设置ctinfo = IP_CT_RELATED
        //d. original方向的非第一个数据包,但是还没有收到reply包,也设置ctinfo = IP_CT_NEW
        /* It exists; we have (non-exclusive) reference. */
        if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY) {
            *ctinfo = IP_CT_ESTABLISHED_REPLY;
            /* Please set reply bit if this packet OK */
            *set_reply = 1;
        } else {
            /* Once we've had two way comms, always ESTABLISHED. */
            if (test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {
                pr_debug("nf_conntrack_in: normal packet for %p\n", ct);
                *ctinfo = IP_CT_ESTABLISHED;
            } else if (test_bit(IPS_EXPECTED_BIT, &ct->status)) {
                pr_debug("nf_conntrack_in: related packet for %p\n",
                     ct);
                *ctinfo = IP_CT_RELATED;
            } else {
                pr_debug("nf_conntrack_in: new packet for %p\n", ct);
                *ctinfo = IP_CT_NEW;
            }
            *set_reply = 0;
        }
        //将ct和ctinfo保存到数据包
        skb->nfct = &ct->ct_general;
        skb->nfctinfo = *ctinfo;
        return ct;
    }
    
    //只有original方向的报文才会执行此函数
    //分配ct
    /* Allocate a new conntrack: we return -ENOMEM if classification
       failed due to stress.  Otherwise it really is unclassifiable. */
    static struct nf_conntrack_tuple_hash *
    init_conntrack(struct net *net, struct nf_conn *tmpl,
               const struct nf_conntrack_tuple *tuple,
               struct nf_conntrack_l3proto *l3proto,
               struct nf_conntrack_l4proto *l4proto,
               struct sk_buff *skb,
               unsigned int dataoff, u32 hash)
    {
        struct nf_conn *ct;
        struct nf_conn_help *help;
        struct nf_conntrack_tuple repl_tuple;
        struct nf_conntrack_ecache *ecache;
        struct nf_conntrack_expect *exp = NULL;
        u16 zone = tmpl ? nf_ct_zone(tmpl) : NF_CT_DEFAULT_ZONE;
        struct nf_conn_timeout *timeout_ext;
        unsigned int *timeouts;
        //由tuple获取reply方向的tuple
        if (!nf_ct_invert_tuple(&repl_tuple, tuple, l3proto, l4proto)) {
            pr_debug("Can't invert tuple.\n");
            return NULL;
        }
    
        ct = __nf_conntrack_alloc(net, zone, tuple, &repl_tuple, GFP_ATOMIC,
                      hash);
        if (IS_ERR(ct))
            return (struct nf_conntrack_tuple_hash *)ct;
    
        if (tmpl && nfct_synproxy(tmpl)) {
            nfct_seqadj_ext_add(ct);
            nfct_synproxy_ext_add(ct);
        }
    
        timeout_ext = tmpl ? nf_ct_timeout_find(tmpl) : NULL;
        if (timeout_ext)
            timeouts = NF_CT_TIMEOUT_EXT_DATA(timeout_ext);
        else
            timeouts = l4proto->get_timeouts(net);
    
        if (!l4proto->new(ct, skb, dataoff, timeouts)) {
            nf_conntrack_free(ct);
            pr_debug("init conntrack: can't track with proto module\n");
            return NULL;
        }
    
        if (timeout_ext)
            nf_ct_timeout_ext_add(ct, timeout_ext->timeout, GFP_ATOMIC);
    
        nf_ct_acct_ext_add(ct, GFP_ATOMIC);
        nf_ct_tstamp_ext_add(ct, GFP_ATOMIC);
        nf_ct_labels_ext_add(ct);
    
        ecache = tmpl ? nf_ct_ecache_find(tmpl) : NULL;
        nf_ct_ecache_ext_add(ct, ecache ? ecache->ctmask : 0,
                     ecache ? ecache->expmask : 0,
                     GFP_ATOMIC);
    
        local_bh_disable();
        //如果有了期望连接,则需要到net->ct.expect_hash查找自己是
        //否是期望连接,如果是,需要设置 IPS_EXPECTED_BIT,并
        //将 ct->master 指向主连接
        if (net->ct.expect_count) {
            spin_lock(&nf_conntrack_expect_lock);
            exp = nf_ct_find_expectation(net, zone, tuple);
            if (exp) {
                pr_debug("conntrack: expectation arrives ct=%p exp=%p\n",
                     ct, exp);
                /* Welcome, Mr. Bond.  We've been expecting you... */
                __set_bit(IPS_EXPECTED_BIT, &ct->status);
                /* exp->master safe, refcnt bumped in nf_ct_find_expectation */
                ct->master = exp->master;
                if (exp->helper) {
                    help = nf_ct_helper_ext_add(ct, exp->helper,
                                    GFP_ATOMIC);
                    if (help)
                        rcu_assign_pointer(help->helper, exp->helper);
                }
    
    #ifdef CONFIG_NF_CONNTRACK_MARK
                ct->mark = exp->master->mark;
    #endif
    #ifdef CONFIG_NF_CONNTRACK_SECMARK
                ct->secmark = exp->master->secmark;
    #endif
                NF_CT_STAT_INC(net, expect_new);
            }
            spin_unlock(&nf_conntrack_expect_lock);
        }
        if (!exp) {
            __nf_ct_try_assign_helper(ct, tmpl, GFP_ATOMIC);
            NF_CT_STAT_INC(net, new);
        }
    
        /* Now it is inserted into the unconfirmed list, bump refcount */
        nf_conntrack_get(&ct->ct_general);
        //暂时将ct保存到本cpu的unconfirm链表中
        nf_ct_add_to_unconfirmed_list(ct);
    
        local_bh_enable();
    
        if (exp) {
            if (exp->expectfn)
                exp->expectfn(ct, exp);
            nf_ct_expect_put(exp);
        }
    
        return &ct->tuplehash[IP_CT_DIR_ORIGINAL];
    }
    
    1. ipv4_help
      ipv4_help主要是执行匹配到的helper函数,进行一些扩展操作,比如ftp的数据通道建立,nat的转换。可参考ftp提供的helper:help
    static unsigned int ipv4_helper(const struct nf_hook_ops *ops,
                    struct sk_buff *skb,
                    const struct net_device *in,
                    const struct net_device *out,
                    int (*okfn)(struct sk_buff *))
    {
        struct nf_conn *ct;
        enum ip_conntrack_info ctinfo;
        const struct nf_conn_help *help;
        const struct nf_conntrack_helper *helper;
    
        /* This is where we call the helper: as the packet goes out. */
        ct = nf_ct_get(skb, &ctinfo);
        if (!ct || ctinfo == IP_CT_RELATED_REPLY)
            return NF_ACCEPT;
    
        //从ct的扩展区域尝试获取helper
        help = nfct_help(ct);
        if (!help)
            return NF_ACCEPT;
    
        /* rcu_read_lock()ed by nf_hook_slow */
        helper = rcu_dereference(help->helper);
        if (!helper)
            return NF_ACCEPT;
    
        return helper->help(skb, skb_network_offset(skb) + ip_hdrlen(skb),
                    ct, ctinfo);
    }
    
    1. ipv4_confirm
      ipv4_confirm是优先级最低的hook函数,数据包能走到这里就肯定不会被netfilter丢弃,所以可以将它的ct从uncomfirm(per-cpu)转到confirm(全局的)链表上。
    static unsigned int ipv4_confirm(const struct nf_hook_ops *ops,
                     struct sk_buff *skb,
                     const struct net_device *in,
                     const struct net_device *out,
                     int (*okfn)(struct sk_buff *))
    {
        struct nf_conn *ct;
        enum ip_conntrack_info ctinfo;
    
        ct = nf_ct_get(skb, &ctinfo);
        if (!ct || ctinfo == IP_CT_RELATED_REPLY)
            goto out;
    
        /* adjust seqs for loopback traffic only in outgoing direction */
        if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status) &&
            !nf_is_loopback_packet(skb)) {
            if (!nf_ct_seq_adjust(skb, ct, ctinfo, ip_hdrlen(skb))) {
                NF_CT_STAT_INC_ATOMIC(nf_ct_net(ct), drop);
                return NF_DROP;
            }
        }
    out:
        /* We've seen it coming out the other side: confirm it */
        return nf_conntrack_confirm(skb);
    }
    
    /* Confirm a connection: returns NF_DROP if packet must be dropped. */
    static inline int nf_conntrack_confirm(struct sk_buff *skb)
    {
        struct nf_conn *ct = (struct nf_conn *)skb->nfct;
        int ret = NF_ACCEPT;
    
        if (ct && !nf_ct_is_untracked(ct)) {
            if (!nf_ct_is_confirmed(ct))
                //将 ct->tuplehash[IP_CT_DIR_ORIGINAL] 从
                //unconfirm hash链上删除,并将ct-
                //>tuplehash[IP_CT_DIR_ORIGINAL]
                //和ct->tuplehash[IP_CT_DIR_REPLY]根据hash同时
                //添加到全局confirm hash链上
                ret = __nf_conntrack_confirm(skb);
            if (likely(ret == NF_ACCEPT))
                //调用通知链上的函数通知netlink模块
                nf_ct_deliver_cached_events(ct);
        }
        return ret;
    }
    

    连接跟踪是个基础模块,总结如下图,其他利用它实现功能的模块在其他文章中记录。


    image.png

    相关文章

      网友评论

          本文标题:netfilter之conntrack连接跟踪

          本文链接:https://www.haomeiwen.com/subject/ixzaxktx.html