net: add net.core.qdisc_max_burst

In blamed commit, I added a check against the temporary queue built in __dev_xmit_skb(). Idea was to drop packets early, before any spinlock was acquired. if (unlikely(defer_count > READ_ONCE(q->limit))) { kfree_skb_reason(skb, SKB_DROP_REASON_QDISC_DROP); return NET_XMIT_DROP; } It turned out that HTB Qdisc has a zero q->limit. HTB limits packets on a per-class basis. Some of our tests became flaky. Add a new sysctl : net.core.qdisc_max_burst to control how many packets can be stored in the temporary lockless queue. Also add a new QDISC_BURST_DROP drop reason to better diagnose future issues. Thanks Neal ! Fixes: 100dfa74ca ("net: dev_queue_xmit() llist adoption") Reported-and-bisected-by: Neal Cardwell <ncardwell@google.com> Signed-off-by: Eric Dumazet <edumazet@google.com> Reviewed-by: Neal Cardwell <ncardwell@google.com> Link: https://patch.msgid.link/20260107104159.3669285-1-edumazet@google.com Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2026-01-07 10:41:59 +00:00
parent dfdf774656
commit ffe4ccd359
6 changed files with 26 additions and 3 deletions
--- a/Documentation/admin-guide/sysctl/net.rst
+++ b/Documentation/admin-guide/sysctl/net.rst
@@ -303,6 +303,14 @@ netdev_max_backlog
 Maximum number of packets, queued on the INPUT side, when the interface
 receives packets faster than kernel can process them.

+qdisc_max_burst
+------------------
+
+Maximum number of packets that can be temporarily stored before
+reaching qdisc.
+
+Default: 1000
+
 netdev_rss_key
 --------------

--- a/include/net/dropreason-core.h
+++ b/include/net/dropreason-core.h
@@ -67,6 +67,7 @@
 	FN(TC_EGRESS)			\
 	FN(SECURITY_HOOK)		\
 	FN(QDISC_DROP)			\
+	FN(QDISC_BURST_DROP)		\
 	FN(QDISC_OVERLIMIT)		\
 	FN(QDISC_CONGESTED)		\
 	FN(CAKE_FLOOD)			\
@@ -374,6 +375,11 @@ enum skb_drop_reason {
 	 * failed to enqueue to current qdisc)
 	 */
 	SKB_DROP_REASON_QDISC_DROP,
+	/**
+	 * @SKB_DROP_REASON_QDISC_BURST_DROP: dropped when net.core.qdisc_max_burst
+	 * limit is hit.
+	 */
+	SKB_DROP_REASON_QDISC_BURST_DROP,
 	/**
 	 * @SKB_DROP_REASON_QDISC_OVERLIMIT: dropped by qdisc when a qdisc
 	 * instance exceeds its total buffer size limit.
--- a/include/net/hotdata.h
+++ b/include/net/hotdata.h
@@ -42,6 +42,7 @@ struct net_hotdata {
 	int			netdev_budget_usecs;
 	int			tstamp_prequeue;
 	int			max_backlog;
+	int			qdisc_max_burst;
 	int			dev_tx_weight;
 	int			dev_rx_weight;
 	int			sysctl_max_skb_frags;
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4203,8 +4203,8 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
 	do {
 		if (first_n && !defer_count) {
 			defer_count = atomic_long_inc_return(&q->defer_count);
-			if (unlikely(defer_count > READ_ONCE(q->limit))) {
-				kfree_skb_reason(skb, SKB_DROP_REASON_QDISC_DROP);
+			if (unlikely(defer_count > READ_ONCE(net_hotdata.qdisc_max_burst))) {
+				kfree_skb_reason(skb, SKB_DROP_REASON_QDISC_BURST_DROP);
 				return NET_XMIT_DROP;
 			}
 		}
@@ -4222,7 +4222,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
 	ll_list = llist_del_all(&q->defer_list);
 	/* There is a small race because we clear defer_count not atomically
 	 * with the prior llist_del_all(). This means defer_list could grow
-	 * over q->limit.
+	 * over qdisc_max_burst.
 	 */
 	atomic_long_set(&q->defer_count, 0);

--- a/net/core/hotdata.c
+++ b/net/core/hotdata.c
@@ -17,6 +17,7 @@ struct net_hotdata net_hotdata __cacheline_aligned = {

 	.tstamp_prequeue = 1,
 	.max_backlog = 1000,
+	.qdisc_max_burst = 1000,
 	.dev_tx_weight = 64,
 	.dev_rx_weight = 64,
 	.sysctl_max_skb_frags = MAX_SKB_FRAGS,
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -429,6 +429,13 @@ static struct ctl_table net_core_table[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec
 	},
+	{
+		.procname	= "qdisc_max_burst",
+		.data		= &net_hotdata.qdisc_max_burst,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec
+	},
 	{
 		.procname	= "netdev_rss_key",
 		.data		= &netdev_rss_key,