Skip to content

Commit 7de196a

Browse files
committed
Merge branch 'make-dsa-work-with-bonding-s-arp-monitor'
Vladimir Oltean says: ==================== Make DSA work with bonding's ARP monitor Since commit 2b86cb8 ("net: dsa: declare lockless TX feature for slave ports") in v5.7, DSA breaks the ARP monitoring logic from the bonding driver, fact which was pointed out by Brian Hutchinson who uses a linux-5.10.y stable kernel. Initially I got lured by other similar hacks introduced for other NETIF_F_LLTX drivers, which, inspired by the bonding documentation, update the trans_start of their TX queues by hand. However Jakub pointed out that this simply isn't a proper solution, and after coming to think more about it, I agree, and it doesn't work properly with DSA nor is it maintainable for the future changes I plan for it (multiple DSA masters in a LAG). I've tested these changes using a DSA-based setup and a veth-based setup, using the active-backup mode and ARP monitoring, with and without arp_validate. Link to v1: https://patchwork.kernel.org/project/netdevbpf/patch/[email protected]/ Link to v2: https://patchwork.kernel.org/project/netdevbpf/patch/[email protected]/ ==================== Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Jakub Kicinski <[email protected]>
2 parents f86d1fb + cba8d8f commit 7de196a

File tree

5 files changed

+34
-35
lines changed

5 files changed

+34
-35
lines changed

Documentation/networking/bonding.rst

-9
Original file line numberDiff line numberDiff line change
@@ -1982,15 +1982,6 @@ uses the response as an indication that the link is operating. This
19821982
gives some assurance that traffic is actually flowing to and from one
19831983
or more peers on the local network.
19841984

1985-
The ARP monitor relies on the device driver itself to verify
1986-
that traffic is flowing. In particular, the driver must keep up to
1987-
date the last receive time, dev->last_rx. Drivers that use NETIF_F_LLTX
1988-
flag must also update netdev_queue->trans_start. If they do not, then the
1989-
ARP monitor will immediately fail any slaves using that driver, and
1990-
those slaves will stay down. If networking monitoring (tcpdump, etc)
1991-
shows the ARP requests and replies on the network, then it may be that
1992-
your device driver is not updating last_rx and trans_start.
1993-
19941985
7.2 Configuring Multiple ARP Targets
19951986
------------------------------------
19961987

drivers/net/bonding/bond_main.c

+20-15
Original file line numberDiff line numberDiff line change
@@ -2001,6 +2001,8 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev,
20012001
for (i = 0; i < BOND_MAX_ARP_TARGETS; i++)
20022002
new_slave->target_last_arp_rx[i] = new_slave->last_rx;
20032003

2004+
new_slave->last_tx = new_slave->last_rx;
2005+
20042006
if (bond->params.miimon && !bond->params.use_carrier) {
20052007
link_reporting = bond_check_dev_link(bond, slave_dev, 1);
20062008

@@ -2884,8 +2886,11 @@ static void bond_arp_send(struct slave *slave, int arp_op, __be32 dest_ip,
28842886
return;
28852887
}
28862888

2887-
if (bond_handle_vlan(slave, tags, skb))
2889+
if (bond_handle_vlan(slave, tags, skb)) {
2890+
slave_update_last_tx(slave);
28882891
arp_xmit(skb);
2892+
}
2893+
28892894
return;
28902895
}
28912896

@@ -3074,8 +3079,7 @@ static int bond_arp_rcv(const struct sk_buff *skb, struct bonding *bond,
30743079
curr_active_slave->last_link_up))
30753080
bond_validate_arp(bond, slave, tip, sip);
30763081
else if (curr_arp_slave && (arp->ar_op == htons(ARPOP_REPLY)) &&
3077-
bond_time_in_interval(bond,
3078-
dev_trans_start(curr_arp_slave->dev), 1))
3082+
bond_time_in_interval(bond, slave_last_tx(curr_arp_slave), 1))
30793083
bond_validate_arp(bond, slave, sip, tip);
30803084

30813085
out_unlock:
@@ -3103,8 +3107,10 @@ static void bond_ns_send(struct slave *slave, const struct in6_addr *daddr,
31033107
}
31043108

31053109
addrconf_addr_solict_mult(daddr, &mcaddr);
3106-
if (bond_handle_vlan(slave, tags, skb))
3110+
if (bond_handle_vlan(slave, tags, skb)) {
3111+
slave_update_last_tx(slave);
31073112
ndisc_send_skb(skb, &mcaddr, saddr);
3113+
}
31083114
}
31093115

31103116
static void bond_ns_send_all(struct bonding *bond, struct slave *slave)
@@ -3246,8 +3252,7 @@ static int bond_na_rcv(const struct sk_buff *skb, struct bonding *bond,
32463252
curr_active_slave->last_link_up))
32473253
bond_validate_ns(bond, slave, saddr, daddr);
32483254
else if (curr_arp_slave &&
3249-
bond_time_in_interval(bond,
3250-
dev_trans_start(curr_arp_slave->dev), 1))
3255+
bond_time_in_interval(bond, slave_last_tx(curr_arp_slave), 1))
32513256
bond_validate_ns(bond, slave, saddr, daddr);
32523257

32533258
out:
@@ -3335,12 +3340,12 @@ static void bond_loadbalance_arp_mon(struct bonding *bond)
33353340
* so it can wait
33363341
*/
33373342
bond_for_each_slave_rcu(bond, slave, iter) {
3338-
unsigned long trans_start = dev_trans_start(slave->dev);
3343+
unsigned long last_tx = slave_last_tx(slave);
33393344

33403345
bond_propose_link_state(slave, BOND_LINK_NOCHANGE);
33413346

33423347
if (slave->link != BOND_LINK_UP) {
3343-
if (bond_time_in_interval(bond, trans_start, 1) &&
3348+
if (bond_time_in_interval(bond, last_tx, 1) &&
33443349
bond_time_in_interval(bond, slave->last_rx, 1)) {
33453350

33463351
bond_propose_link_state(slave, BOND_LINK_UP);
@@ -3365,7 +3370,7 @@ static void bond_loadbalance_arp_mon(struct bonding *bond)
33653370
* when the source ip is 0, so don't take the link down
33663371
* if we don't know our ip yet
33673372
*/
3368-
if (!bond_time_in_interval(bond, trans_start, bond->params.missed_max) ||
3373+
if (!bond_time_in_interval(bond, last_tx, bond->params.missed_max) ||
33693374
!bond_time_in_interval(bond, slave->last_rx, bond->params.missed_max)) {
33703375

33713376
bond_propose_link_state(slave, BOND_LINK_DOWN);
@@ -3431,7 +3436,7 @@ static void bond_loadbalance_arp_mon(struct bonding *bond)
34313436
*/
34323437
static int bond_ab_arp_inspect(struct bonding *bond)
34333438
{
3434-
unsigned long trans_start, last_rx;
3439+
unsigned long last_tx, last_rx;
34353440
struct list_head *iter;
34363441
struct slave *slave;
34373442
int commit = 0;
@@ -3482,9 +3487,9 @@ static int bond_ab_arp_inspect(struct bonding *bond)
34823487
* - (more than missed_max*delta since receive AND
34833488
* the bond has an IP address)
34843489
*/
3485-
trans_start = dev_trans_start(slave->dev);
3490+
last_tx = slave_last_tx(slave);
34863491
if (bond_is_active_slave(slave) &&
3487-
(!bond_time_in_interval(bond, trans_start, bond->params.missed_max) ||
3492+
(!bond_time_in_interval(bond, last_tx, bond->params.missed_max) ||
34883493
!bond_time_in_interval(bond, last_rx, bond->params.missed_max))) {
34893494
bond_propose_link_state(slave, BOND_LINK_DOWN);
34903495
commit++;
@@ -3501,8 +3506,8 @@ static int bond_ab_arp_inspect(struct bonding *bond)
35013506
*/
35023507
static void bond_ab_arp_commit(struct bonding *bond)
35033508
{
3504-
unsigned long trans_start;
35053509
struct list_head *iter;
3510+
unsigned long last_tx;
35063511
struct slave *slave;
35073512

35083513
bond_for_each_slave(bond, slave, iter) {
@@ -3511,10 +3516,10 @@ static void bond_ab_arp_commit(struct bonding *bond)
35113516
continue;
35123517

35133518
case BOND_LINK_UP:
3514-
trans_start = dev_trans_start(slave->dev);
3519+
last_tx = slave_last_tx(slave);
35153520
if (rtnl_dereference(bond->curr_active_slave) != slave ||
35163521
(!rtnl_dereference(bond->curr_active_slave) &&
3517-
bond_time_in_interval(bond, trans_start, 1))) {
3522+
bond_time_in_interval(bond, last_tx, 1))) {
35183523
struct slave *current_arp_slave;
35193524

35203525
current_arp_slave = rtnl_dereference(bond->current_arp_slave);

drivers/net/veth.c

-4
Original file line numberDiff line numberDiff line change
@@ -312,7 +312,6 @@ static bool veth_skb_is_eligible_for_gro(const struct net_device *dev,
312312
static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev)
313313
{
314314
struct veth_priv *rcv_priv, *priv = netdev_priv(dev);
315-
struct netdev_queue *queue = NULL;
316315
struct veth_rq *rq = NULL;
317316
struct net_device *rcv;
318317
int length = skb->len;
@@ -330,7 +329,6 @@ static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev)
330329
rxq = skb_get_queue_mapping(skb);
331330
if (rxq < rcv->real_num_rx_queues) {
332331
rq = &rcv_priv->rq[rxq];
333-
queue = netdev_get_tx_queue(dev, rxq);
334332

335333
/* The napi pointer is available when an XDP program is
336334
* attached or when GRO is enabled
@@ -342,8 +340,6 @@ static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev)
342340

343341
skb_tx_timestamp(skb);
344342
if (likely(veth_forward_skb(rcv, skb, rq, use_napi) == NET_RX_SUCCESS)) {
345-
if (queue)
346-
txq_trans_cond_update(queue);
347343
if (!use_napi)
348344
dev_lstats_add(dev, length);
349345
} else {

include/net/bonding.h

+12-1
Original file line numberDiff line numberDiff line change
@@ -161,8 +161,9 @@ struct slave {
161161
struct net_device *dev; /* first - useful for panic debug */
162162
struct bonding *bond; /* our master */
163163
int delay;
164-
/* all three in jiffies */
164+
/* all 4 in jiffies */
165165
unsigned long last_link_up;
166+
unsigned long last_tx;
166167
unsigned long last_rx;
167168
unsigned long target_last_arp_rx[BOND_MAX_ARP_TARGETS];
168169
s8 link; /* one of BOND_LINK_XXXX */
@@ -540,6 +541,16 @@ static inline unsigned long slave_last_rx(struct bonding *bond,
540541
return slave->last_rx;
541542
}
542543

544+
static inline void slave_update_last_tx(struct slave *slave)
545+
{
546+
WRITE_ONCE(slave->last_tx, jiffies);
547+
}
548+
549+
static inline unsigned long slave_last_tx(struct slave *slave)
550+
{
551+
return READ_ONCE(slave->last_tx);
552+
}
553+
543554
#ifdef CONFIG_NET_POLL_CONTROLLER
544555
static inline netdev_tx_t bond_netpoll_send_skb(const struct slave *slave,
545556
struct sk_buff *skb)

net/sched/sch_generic.c

+2-6
Original file line numberDiff line numberDiff line change
@@ -427,14 +427,10 @@ void __qdisc_run(struct Qdisc *q)
427427

428428
unsigned long dev_trans_start(struct net_device *dev)
429429
{
430-
unsigned long val, res;
430+
unsigned long res = READ_ONCE(netdev_get_tx_queue(dev, 0)->trans_start);
431+
unsigned long val;
431432
unsigned int i;
432433

433-
if (is_vlan_dev(dev))
434-
dev = vlan_dev_real_dev(dev);
435-
else if (netif_is_macvlan(dev))
436-
dev = macvlan_dev_real_dev(dev);
437-
res = READ_ONCE(netdev_get_tx_queue(dev, 0)->trans_start);
438434
for (i = 1; i < dev->num_tx_queues; i++) {
439435
val = READ_ONCE(netdev_get_tx_queue(dev, i)->trans_start);
440436
if (val && time_after(val, res))

0 commit comments

Comments
 (0)