net: add net.core.qdisc_max_burst

In blamed commit, I added a check against the temporary queue
built in __dev_xmit_skb(). Idea was to drop packets early,
before any spinlock was acquired.

if (unlikely(defer_count > READ_ONCE(q->limit))) {
	kfree_skb_reason(skb, SKB_DROP_REASON_QDISC_DROP);
	return NET_XMIT_DROP;
}

It turned out that HTB Qdisc has a zero q->limit.
HTB limits packets on a per-class basis.
Some of our tests became flaky.

Add a new sysctl : net.core.qdisc_max_burst to control
how many packets can be stored in the temporary lockless queue.

Also add a new QDISC_BURST_DROP drop reason to better diagnose
future issues.

Thanks Neal !

Fixes: 100dfa74ca ("net: dev_queue_xmit() llist adoption")
Reported-and-bisected-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Neal Cardwell <ncardwell@google.com>
Link: https://patch.msgid.link/20260107104159.3669285-1-edumazet@google.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
This commit is contained in:
Eric Dumazet
2026-01-07 10:41:59 +00:00
committed by Paolo Abeni
parent dfdf774656
commit ffe4ccd359
6 changed files with 26 additions and 3 deletions

View File

@@ -303,6 +303,14 @@ netdev_max_backlog
Maximum number of packets, queued on the INPUT side, when the interface
receives packets faster than kernel can process them.
qdisc_max_burst
------------------
Maximum number of packets that can be temporarily stored before
reaching qdisc.
Default: 1000
netdev_rss_key
--------------

View File

@@ -67,6 +67,7 @@
FN(TC_EGRESS) \
FN(SECURITY_HOOK) \
FN(QDISC_DROP) \
FN(QDISC_BURST_DROP) \
FN(QDISC_OVERLIMIT) \
FN(QDISC_CONGESTED) \
FN(CAKE_FLOOD) \
@@ -374,6 +375,11 @@ enum skb_drop_reason {
* failed to enqueue to current qdisc)
*/
SKB_DROP_REASON_QDISC_DROP,
/**
* @SKB_DROP_REASON_QDISC_BURST_DROP: dropped when net.core.qdisc_max_burst
* limit is hit.
*/
SKB_DROP_REASON_QDISC_BURST_DROP,
/**
* @SKB_DROP_REASON_QDISC_OVERLIMIT: dropped by qdisc when a qdisc
* instance exceeds its total buffer size limit.

View File

@@ -42,6 +42,7 @@ struct net_hotdata {
int netdev_budget_usecs;
int tstamp_prequeue;
int max_backlog;
int qdisc_max_burst;
int dev_tx_weight;
int dev_rx_weight;
int sysctl_max_skb_frags;

View File

@@ -4203,8 +4203,8 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
do {
if (first_n && !defer_count) {
defer_count = atomic_long_inc_return(&q->defer_count);
if (unlikely(defer_count > READ_ONCE(q->limit))) {
kfree_skb_reason(skb, SKB_DROP_REASON_QDISC_DROP);
if (unlikely(defer_count > READ_ONCE(net_hotdata.qdisc_max_burst))) {
kfree_skb_reason(skb, SKB_DROP_REASON_QDISC_BURST_DROP);
return NET_XMIT_DROP;
}
}
@@ -4222,7 +4222,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
ll_list = llist_del_all(&q->defer_list);
/* There is a small race because we clear defer_count not atomically
* with the prior llist_del_all(). This means defer_list could grow
* over q->limit.
* over qdisc_max_burst.
*/
atomic_long_set(&q->defer_count, 0);

View File

@@ -17,6 +17,7 @@ struct net_hotdata net_hotdata __cacheline_aligned = {
.tstamp_prequeue = 1,
.max_backlog = 1000,
.qdisc_max_burst = 1000,
.dev_tx_weight = 64,
.dev_rx_weight = 64,
.sysctl_max_skb_frags = MAX_SKB_FRAGS,

View File

@@ -429,6 +429,13 @@ static struct ctl_table net_core_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec
},
{
.procname = "qdisc_max_burst",
.data = &net_hotdata.qdisc_max_burst,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec
},
{
.procname = "netdev_rss_key",
.data = &netdev_rss_key,