]> Gentwo Git Trees - linux/.git/commit
net_sched: add Qdisc_read_mostly and Qdisc_write groups
authorEric Dumazet <edumazet@google.com>
Fri, 21 Nov 2025 08:32:49 +0000 (08:32 +0000)
committerPaolo Abeni <pabeni@redhat.com>
Tue, 25 Nov 2025 15:10:32 +0000 (16:10 +0100)
commitad50d5a3fc20327e133e2db849c6e67fc49650e6
tree2a7afea577265da7209ed36c53b6021d356d9ecc
parentc5d34f4583ea883b8d3441ded83e3a7207e0182d
net_sched: add Qdisc_read_mostly and Qdisc_write groups

It is possible to reorg Qdisc to avoid always dirtying 2 cache lines in
fast path by reducing this to a single dirtied cache line.

In current layout, we change only four/six fields in the first cache line:
 - q.spinlock
 - q.qlen
 - bstats.bytes
 - bstats.packets
 - some Qdisc also change q.next/q.prev

In the second cache line we change in the fast path:
 - running
 - state
 - qstats.backlog

        /* --- cacheline 2 boundary (128 bytes) --- */
        struct sk_buff_head        gso_skb __attribute__((__aligned__(64))); /*  0x80  0x18 */
        struct qdisc_skb_head      q;                    /*  0x98  0x18 */
        struct gnet_stats_basic_sync bstats __attribute__((__aligned__(16))); /*  0xb0  0x10 */

        /* --- cacheline 3 boundary (192 bytes) --- */
        struct gnet_stats_queue    qstats;               /*  0xc0  0x14 */
        bool                       running;              /*  0xd4   0x1 */

        /* XXX 3 bytes hole, try to pack */

        unsigned long              state;                /*  0xd8   0x8 */
        struct Qdisc *             next_sched;           /*  0xe0   0x8 */
        struct sk_buff_head        skb_bad_txq;          /*  0xe8  0x18 */
        /* --- cacheline 4 boundary (256 bytes) --- */

Reorganize things to have a first cache line mostly read,
then a mostly written one.

This gives a ~3% increase of performance under tx stress.

Note that there is an additional hole because @qstats now spans over a third cache line.

/* --- cacheline 2 boundary (128 bytes) --- */
__u8                       __cacheline_group_begin__Qdisc_read_mostly[0] __attribute__((__aligned__(64))); /*  0x80     0 */
struct sk_buff_head        gso_skb;              /*  0x80  0x18 */
struct Qdisc *             next_sched;           /*  0x98   0x8 */
struct sk_buff_head        skb_bad_txq;          /*  0xa0  0x18 */
__u8                       __cacheline_group_end__Qdisc_read_mostly[0]; /*  0xb8     0 */

/* XXX 8 bytes hole, try to pack */

/* --- cacheline 3 boundary (192 bytes) --- */
__u8                       __cacheline_group_begin__Qdisc_write[0] __attribute__((__aligned__(64))); /*  0xc0     0 */
struct qdisc_skb_head      q;                    /*  0xc0  0x18 */
unsigned long              state;                /*  0xd8   0x8 */
struct gnet_stats_basic_sync bstats __attribute__((__aligned__(16))); /*  0xe0  0x10 */
bool                       running;              /*  0xf0   0x1 */

/* XXX 3 bytes hole, try to pack */

struct gnet_stats_queue    qstats;               /*  0xf4  0x14 */
/* --- cacheline 4 boundary (256 bytes) was 8 bytes ago --- */
__u8                       __cacheline_group_end__Qdisc_write[0]; /* 0x108     0 */

/* XXX 56 bytes hole, try to pack */

Signed-off-by: Eric Dumazet <edumazet@google.com>
Link: https://patch.msgid.link/20251121083256.674562-8-edumazet@google.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
include/net/sch_generic.h