]> Gentwo Git Trees - linux/.git/commitdiff
netfilter: nf_conncount: rework API to use sk_buff directly
authorFernando Fernandez Mancera <fmancera@suse.de>
Fri, 21 Nov 2025 00:14:30 +0000 (01:14 +0100)
committerPablo Neira Ayuso <pablo@netfilter.org>
Fri, 28 Nov 2025 00:05:49 +0000 (00:05 +0000)
When using nf_conncount infrastructure for non-confirmed connections a
duplicated track is possible due to an optimization introduced since
commit d265929930e2 ("netfilter: nf_conncount: reduce unnecessary GC").

In order to fix this introduce a new conncount API that receives
directly an sk_buff struct.  It fetches the tuple and zone and the
corresponding ct from it. It comes with both existing conncount variants
nf_conncount_count_skb() and nf_conncount_add_skb(). In addition remove
the old API and adjust all the users to use the new one.

This way, for each sk_buff struct it is possible to check if there is a
ct present and already confirmed. If so, skip the add operation.

Fixes: d265929930e2 ("netfilter: nf_conncount: reduce unnecessary GC")
Signed-off-by: Fernando Fernandez Mancera <fmancera@suse.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
include/net/netfilter/nf_conntrack_count.h
net/netfilter/nf_conncount.c
net/netfilter/nft_connlimit.c
net/netfilter/xt_connlimit.c
net/openvswitch/conntrack.c

index 1b58b5b91ff6a48f2dba2608f53477b32c546c80..52a06de41aa0f9301e99e90e81845de0b455d455 100644 (file)
@@ -18,15 +18,14 @@ struct nf_conncount_list {
 struct nf_conncount_data *nf_conncount_init(struct net *net, unsigned int keylen);
 void nf_conncount_destroy(struct net *net, struct nf_conncount_data *data);
 
-unsigned int nf_conncount_count(struct net *net,
-                               struct nf_conncount_data *data,
-                               const u32 *key,
-                               const struct nf_conntrack_tuple *tuple,
-                               const struct nf_conntrack_zone *zone);
-
-int nf_conncount_add(struct net *net, struct nf_conncount_list *list,
-                    const struct nf_conntrack_tuple *tuple,
-                    const struct nf_conntrack_zone *zone);
+unsigned int nf_conncount_count_skb(struct net *net,
+                                   const struct sk_buff *skb,
+                                   u16 l3num,
+                                   struct nf_conncount_data *data,
+                                   const u32 *key);
+
+int nf_conncount_add_skb(struct net *net, const struct sk_buff *skb,
+                        u16 l3num, struct nf_conncount_list *list);
 
 void nf_conncount_list_init(struct nf_conncount_list *list);
 
index 913ede2f57f9a917ce64adfec55b46c09db9a329..0ffc5ff78a71426a3b8581c24bc2a78da7cca17b 100644 (file)
@@ -122,15 +122,65 @@ find_or_evict(struct net *net, struct nf_conncount_list *list,
        return ERR_PTR(-EAGAIN);
 }
 
+static bool get_ct_or_tuple_from_skb(struct net *net,
+                                    const struct sk_buff *skb,
+                                    u16 l3num,
+                                    struct nf_conn **ct,
+                                    struct nf_conntrack_tuple *tuple,
+                                    const struct nf_conntrack_zone **zone,
+                                    bool *refcounted)
+{
+       const struct nf_conntrack_tuple_hash *h;
+       enum ip_conntrack_info ctinfo;
+       struct nf_conn *found_ct;
+
+       found_ct = nf_ct_get(skb, &ctinfo);
+       if (found_ct && !nf_ct_is_template(found_ct)) {
+               *tuple = found_ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
+               *zone = nf_ct_zone(found_ct);
+               *ct = found_ct;
+               return true;
+       }
+
+       if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb), l3num, net, tuple))
+               return false;
+
+       if (found_ct)
+               *zone = nf_ct_zone(found_ct);
+
+       h = nf_conntrack_find_get(net, *zone, tuple);
+       if (!h)
+               return true;
+
+       found_ct = nf_ct_tuplehash_to_ctrack(h);
+       *refcounted = true;
+       *ct = found_ct;
+
+       return true;
+}
+
 static int __nf_conncount_add(struct net *net,
-                             struct nf_conncount_list *list,
-                             const struct nf_conntrack_tuple *tuple,
-                             const struct nf_conntrack_zone *zone)
+                             const struct sk_buff *skb,
+                             u16 l3num,
+                             struct nf_conncount_list *list)
 {
+       const struct nf_conntrack_zone *zone = &nf_ct_zone_dflt;
        const struct nf_conntrack_tuple_hash *found;
        struct nf_conncount_tuple *conn, *conn_n;
+       struct nf_conntrack_tuple tuple;
+       struct nf_conn *ct = NULL;
        struct nf_conn *found_ct;
        unsigned int collect = 0;
+       bool refcounted = false;
+
+       if (!get_ct_or_tuple_from_skb(net, skb, l3num, &ct, &tuple, &zone, &refcounted))
+               return -ENOENT;
+
+       if (ct && nf_ct_is_confirmed(ct)) {
+               if (refcounted)
+                       nf_ct_put(ct);
+               return 0;
+       }
 
        if ((u32)jiffies == list->last_gc)
                goto add_new_node;
@@ -144,10 +194,10 @@ static int __nf_conncount_add(struct net *net,
                if (IS_ERR(found)) {
                        /* Not found, but might be about to be confirmed */
                        if (PTR_ERR(found) == -EAGAIN) {
-                               if (nf_ct_tuple_equal(&conn->tuple, tuple) &&
+                               if (nf_ct_tuple_equal(&conn->tuple, &tuple) &&
                                    nf_ct_zone_id(&conn->zone, conn->zone.dir) ==
                                    nf_ct_zone_id(zone, zone->dir))
-                                       return 0; /* already exists */
+                                       goto out_put; /* already exists */
                        } else {
                                collect++;
                        }
@@ -156,7 +206,7 @@ static int __nf_conncount_add(struct net *net,
 
                found_ct = nf_ct_tuplehash_to_ctrack(found);
 
-               if (nf_ct_tuple_equal(&conn->tuple, tuple) &&
+               if (nf_ct_tuple_equal(&conn->tuple, &tuple) &&
                    nf_ct_zone_equal(found_ct, zone, zone->dir)) {
                        /*
                         * We should not see tuples twice unless someone hooks
@@ -165,7 +215,7 @@ static int __nf_conncount_add(struct net *net,
                         * Attempt to avoid a re-add in this case.
                         */
                        nf_ct_put(found_ct);
-                       return 0;
+                       goto out_put;
                } else if (already_closed(found_ct)) {
                        /*
                         * we do not care about connections which are
@@ -188,31 +238,35 @@ static int __nf_conncount_add(struct net *net,
        if (conn == NULL)
                return -ENOMEM;
 
-       conn->tuple = *tuple;
+       conn->tuple = tuple;
        conn->zone = *zone;
        conn->cpu = raw_smp_processor_id();
        conn->jiffies32 = (u32)jiffies;
        list_add_tail(&conn->node, &list->head);
        list->count++;
        list->last_gc = (u32)jiffies;
+
+out_put:
+       if (refcounted)
+               nf_ct_put(ct);
        return 0;
 }
 
-int nf_conncount_add(struct net *net,
-                    struct nf_conncount_list *list,
-                    const struct nf_conntrack_tuple *tuple,
-                    const struct nf_conntrack_zone *zone)
+int nf_conncount_add_skb(struct net *net,
+                        const struct sk_buff *skb,
+                        u16 l3num,
+                        struct nf_conncount_list *list)
 {
        int ret;
 
        /* check the saved connections */
        spin_lock_bh(&list->list_lock);
-       ret = __nf_conncount_add(net, list, tuple, zone);
+       ret = __nf_conncount_add(net, skb, l3num, list);
        spin_unlock_bh(&list->list_lock);
 
        return ret;
 }
-EXPORT_SYMBOL_GPL(nf_conncount_add);
+EXPORT_SYMBOL_GPL(nf_conncount_add_skb);
 
 void nf_conncount_list_init(struct nf_conncount_list *list)
 {
@@ -309,19 +363,22 @@ static void schedule_gc_worker(struct nf_conncount_data *data, int tree)
 
 static unsigned int
 insert_tree(struct net *net,
+           const struct sk_buff *skb,
+           u16 l3num,
            struct nf_conncount_data *data,
            struct rb_root *root,
            unsigned int hash,
-           const u32 *key,
-           const struct nf_conntrack_tuple *tuple,
-           const struct nf_conntrack_zone *zone)
+           const u32 *key)
 {
        struct nf_conncount_rb *gc_nodes[CONNCOUNT_GC_MAX_NODES];
+       const struct nf_conntrack_zone *zone = &nf_ct_zone_dflt;
+       bool do_gc = true, refcounted = false;
+       unsigned int count = 0, gc_count = 0;
        struct rb_node **rbnode, *parent;
-       struct nf_conncount_rb *rbconn;
+       struct nf_conntrack_tuple tuple;
        struct nf_conncount_tuple *conn;
-       unsigned int count = 0, gc_count = 0;
-       bool do_gc = true;
+       struct nf_conncount_rb *rbconn;
+       struct nf_conn *ct = NULL;
 
        spin_lock_bh(&nf_conncount_locks[hash]);
 restart:
@@ -340,7 +397,7 @@ insert_tree(struct net *net,
                } else {
                        int ret;
 
-                       ret = nf_conncount_add(net, &rbconn->list, tuple, zone);
+                       ret = nf_conncount_add_skb(net, skb, l3num, &rbconn->list);
                        if (ret)
                                count = 0; /* hotdrop */
                        else
@@ -364,30 +421,35 @@ insert_tree(struct net *net,
                goto restart;
        }
 
-       /* expected case: match, insert new node */
-       rbconn = kmem_cache_alloc(conncount_rb_cachep, GFP_ATOMIC);
-       if (rbconn == NULL)
-               goto out_unlock;
+       if (get_ct_or_tuple_from_skb(net, skb, l3num, &ct, &tuple, &zone, &refcounted)) {
+               /* expected case: match, insert new node */
+               rbconn = kmem_cache_alloc(conncount_rb_cachep, GFP_ATOMIC);
+               if (rbconn == NULL)
+                       goto out_unlock;
 
-       conn = kmem_cache_alloc(conncount_conn_cachep, GFP_ATOMIC);
-       if (conn == NULL) {
-               kmem_cache_free(conncount_rb_cachep, rbconn);
-               goto out_unlock;
-       }
+               conn = kmem_cache_alloc(conncount_conn_cachep, GFP_ATOMIC);
+               if (conn == NULL) {
+                       kmem_cache_free(conncount_rb_cachep, rbconn);
+                       goto out_unlock;
+               }
 
-       conn->tuple = *tuple;
-       conn->zone = *zone;
-       conn->cpu = raw_smp_processor_id();
-       conn->jiffies32 = (u32)jiffies;
-       memcpy(rbconn->key, key, sizeof(u32) * data->keylen);
+               conn->tuple = tuple;
+               conn->zone = *zone;
+               conn->cpu = raw_smp_processor_id();
+               conn->jiffies32 = (u32)jiffies;
+               memcpy(rbconn->key, key, sizeof(u32) * data->keylen);
+
+               nf_conncount_list_init(&rbconn->list);
+               list_add(&conn->node, &rbconn->list.head);
+               count = 1;
+               rbconn->list.count = count;
 
-       nf_conncount_list_init(&rbconn->list);
-       list_add(&conn->node, &rbconn->list.head);
-       count = 1;
-       rbconn->list.count = count;
+               rb_link_node_rcu(&rbconn->node, parent, rbnode);
+               rb_insert_color(&rbconn->node, root);
 
-       rb_link_node_rcu(&rbconn->node, parent, rbnode);
-       rb_insert_color(&rbconn->node, root);
+               if (refcounted)
+                       nf_ct_put(ct);
+       }
 out_unlock:
        spin_unlock_bh(&nf_conncount_locks[hash]);
        return count;
@@ -395,10 +457,10 @@ insert_tree(struct net *net,
 
 static unsigned int
 count_tree(struct net *net,
+          const struct sk_buff *skb,
+          u16 l3num,
           struct nf_conncount_data *data,
-          const u32 *key,
-          const struct nf_conntrack_tuple *tuple,
-          const struct nf_conntrack_zone *zone)
+          const u32 *key)
 {
        struct rb_root *root;
        struct rb_node *parent;
@@ -422,7 +484,7 @@ count_tree(struct net *net,
                } else {
                        int ret;
 
-                       if (!tuple) {
+                       if (!skb) {
                                nf_conncount_gc_list(net, &rbconn->list);
                                return rbconn->list.count;
                        }
@@ -437,7 +499,7 @@ count_tree(struct net *net,
                        }
 
                        /* same source network -> be counted! */
-                       ret = __nf_conncount_add(net, &rbconn->list, tuple, zone);
+                       ret = __nf_conncount_add(net, skb, l3num, &rbconn->list);
                        spin_unlock_bh(&rbconn->list.list_lock);
                        if (ret)
                                return 0; /* hotdrop */
@@ -446,10 +508,10 @@ count_tree(struct net *net,
                }
        }
 
-       if (!tuple)
+       if (!skb)
                return 0;
 
-       return insert_tree(net, data, root, hash, key, tuple, zone);
+       return insert_tree(net, skb, l3num, data, root, hash, key);
 }
 
 static void tree_gc_worker(struct work_struct *work)
@@ -511,18 +573,19 @@ static void tree_gc_worker(struct work_struct *work)
 }
 
 /* Count and return number of conntrack entries in 'net' with particular 'key'.
- * If 'tuple' is not null, insert it into the accounting data structure.
- * Call with RCU read lock.
+ * If 'skb' is not null, insert the corresponding tuple into the accounting
+ * data structure. Call with RCU read lock.
  */
-unsigned int nf_conncount_count(struct net *net,
-                               struct nf_conncount_data *data,
-                               const u32 *key,
-                               const struct nf_conntrack_tuple *tuple,
-                               const struct nf_conntrack_zone *zone)
+unsigned int nf_conncount_count_skb(struct net *net,
+                                   const struct sk_buff *skb,
+                                   u16 l3num,
+                                   struct nf_conncount_data *data,
+                                   const u32 *key)
 {
-       return count_tree(net, data, key, tuple, zone);
+       return count_tree(net, skb, l3num, data, key);
+
 }
-EXPORT_SYMBOL_GPL(nf_conncount_count);
+EXPORT_SYMBOL_GPL(nf_conncount_count_skb);
 
 struct nf_conncount_data *nf_conncount_init(struct net *net, unsigned int keylen)
 {
index fc35a11cdca2097e6cbaeb09ea1e1a814d6319e4..5df7134131d29f8bd0f1100ead0beca891f70e85 100644 (file)
@@ -24,26 +24,11 @@ static inline void nft_connlimit_do_eval(struct nft_connlimit *priv,
                                         const struct nft_pktinfo *pkt,
                                         const struct nft_set_ext *ext)
 {
-       const struct nf_conntrack_zone *zone = &nf_ct_zone_dflt;
-       const struct nf_conntrack_tuple *tuple_ptr;
-       struct nf_conntrack_tuple tuple;
-       enum ip_conntrack_info ctinfo;
-       const struct nf_conn *ct;
        unsigned int count;
+       int err;
 
-       tuple_ptr = &tuple;
-
-       ct = nf_ct_get(pkt->skb, &ctinfo);
-       if (ct != NULL) {
-               tuple_ptr = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
-               zone = nf_ct_zone(ct);
-       } else if (!nf_ct_get_tuplepr(pkt->skb, skb_network_offset(pkt->skb),
-                                     nft_pf(pkt), nft_net(pkt), &tuple)) {
-               regs->verdict.code = NF_DROP;
-               return;
-       }
-
-       if (nf_conncount_add(nft_net(pkt), priv->list, tuple_ptr, zone)) {
+       err = nf_conncount_add_skb(nft_net(pkt), pkt->skb, nft_pf(pkt), priv->list);
+       if (err) {
                regs->verdict.code = NF_DROP;
                return;
        }
index 0189f8b6b0bd15e7b0714de41359964b6e6990aa..848287ab79cfb1f875e92fa25b13c74e6d428ac1 100644 (file)
@@ -31,8 +31,6 @@ connlimit_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
        struct net *net = xt_net(par);
        const struct xt_connlimit_info *info = par->matchinfo;
-       struct nf_conntrack_tuple tuple;
-       const struct nf_conntrack_tuple *tuple_ptr = &tuple;
        const struct nf_conntrack_zone *zone = &nf_ct_zone_dflt;
        enum ip_conntrack_info ctinfo;
        const struct nf_conn *ct;
@@ -40,13 +38,8 @@ connlimit_mt(const struct sk_buff *skb, struct xt_action_param *par)
        u32 key[5];
 
        ct = nf_ct_get(skb, &ctinfo);
-       if (ct != NULL) {
-               tuple_ptr = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
+       if (ct)
                zone = nf_ct_zone(ct);
-       } else if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb),
-                                     xt_family(par), net, &tuple)) {
-               goto hotdrop;
-       }
 
        if (xt_family(par) == NFPROTO_IPV6) {
                const struct ipv6hdr *iph = ipv6_hdr(skb);
@@ -69,10 +62,9 @@ connlimit_mt(const struct sk_buff *skb, struct xt_action_param *par)
                key[1] = zone->id;
        }
 
-       connections = nf_conncount_count(net, info->data, key, tuple_ptr,
-                                        zone);
+       connections = nf_conncount_count_skb(net, skb, xt_family(par), info->data, key);
        if (connections == 0)
-               /* kmalloc failed, drop it entirely */
+               /* kmalloc failed or tuple couldn't be found, drop it entirely */
                goto hotdrop;
 
        return (connections > info->limit) ^ !!(info->flags & XT_CONNLIMIT_INVERT);
index e573e92213029cf66873e63567174f006d837be2..a0811e1fba6563063e99c210c9ef09fe3943d1a0 100644 (file)
@@ -928,8 +928,8 @@ static u32 ct_limit_get(const struct ovs_ct_limit_info *info, u16 zone)
 }
 
 static int ovs_ct_check_limit(struct net *net,
-                             const struct ovs_conntrack_info *info,
-                             const struct nf_conntrack_tuple *tuple)
+                             const struct sk_buff *skb,
+                             const struct ovs_conntrack_info *info)
 {
        struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
        const struct ovs_ct_limit_info *ct_limit_info = ovs_net->ct_limit_info;
@@ -942,8 +942,9 @@ static int ovs_ct_check_limit(struct net *net,
        if (per_zone_limit == OVS_CT_LIMIT_UNLIMITED)
                return 0;
 
-       connections = nf_conncount_count(net, ct_limit_info->data,
-                                        &conncount_key, tuple, &info->zone);
+       connections = nf_conncount_count_skb(net, skb, info->family,
+                                            ct_limit_info->data,
+                                            &conncount_key);
        if (connections > per_zone_limit)
                return -ENOMEM;
 
@@ -972,8 +973,7 @@ static int ovs_ct_commit(struct net *net, struct sw_flow_key *key,
 #if    IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT)
        if (static_branch_unlikely(&ovs_ct_limit_enabled)) {
                if (!nf_ct_is_confirmed(ct)) {
-                       err = ovs_ct_check_limit(net, info,
-                               &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
+                       err = ovs_ct_check_limit(net, skb, info);
                        if (err) {
                                net_warn_ratelimited("openvswitch: zone: %u "
                                        "exceeds conntrack limit\n",
@@ -1770,8 +1770,8 @@ static int __ovs_ct_limit_get_zone_limit(struct net *net,
        zone_limit.limit = limit;
        nf_ct_zone_init(&ct_zone, zone_id, NF_CT_DEFAULT_ZONE_DIR, 0);
 
-       zone_limit.count = nf_conncount_count(net, data, &conncount_key, NULL,
-                                             &ct_zone);
+       zone_limit.count = nf_conncount_count_skb(net, NULL, 0, data,
+                                                 &conncount_key);
        return nla_put_nohdr(reply, sizeof(zone_limit), &zone_limit);
 }