Skip to content

Commit 37c3b9f

Browse files
Chris MiSaeed Mahameed
Chris Mi
authored and
Saeed Mahameed
committed
net/mlx5e: Prevent encap offload when neigh update is running
The cited commit adds a compeletion to remove dependency on rtnl lock. But it causes a deadlock for multiple encapsulations: crash> bt ffff8aece8a64000 PID: 1514557 TASK: ffff8aece8a64000 CPU: 3 COMMAND: "tc" #0 [ffffa6d14183f368] __schedule at ffffffffb8ba7f45 #1 [ffffa6d14183f3f8] schedule at ffffffffb8ba8418 #2 [ffffa6d14183f418] schedule_preempt_disabled at ffffffffb8ba8898 #3 [ffffa6d14183f428] __mutex_lock at ffffffffb8baa7f8 #4 [ffffa6d14183f4d0] mutex_lock_nested at ffffffffb8baabeb #5 [ffffa6d14183f4e0] mlx5e_attach_encap at ffffffffc0f48c17 [mlx5_core] #6 [ffffa6d14183f628] mlx5e_tc_add_fdb_flow at ffffffffc0f39680 [mlx5_core] #7 [ffffa6d14183f688] __mlx5e_add_fdb_flow at ffffffffc0f3b636 [mlx5_core] #8 [ffffa6d14183f6f0] mlx5e_tc_add_flow at ffffffffc0f3bcdf [mlx5_core] #9 [ffffa6d14183f728] mlx5e_configure_flower at ffffffffc0f3c1d1 [mlx5_core] #10 [ffffa6d14183f790] mlx5e_rep_setup_tc_cls_flower at ffffffffc0f3d529 [mlx5_core] #11 [ffffa6d14183f7a0] mlx5e_rep_setup_tc_cb at ffffffffc0f3d714 [mlx5_core] torvalds#12 [ffffa6d14183f7b0] tc_setup_cb_add at ffffffffb8931bb8 torvalds#13 [ffffa6d14183f810] fl_hw_replace_filter at ffffffffc0dae901 [cls_flower] torvalds#14 [ffffa6d14183f8d8] fl_change at ffffffffc0db5c57 [cls_flower] torvalds#15 [ffffa6d14183f970] tc_new_tfilter at ffffffffb8936047 torvalds#16 [ffffa6d14183fac8] rtnetlink_rcv_msg at ffffffffb88c7c31 torvalds#17 [ffffa6d14183fb50] netlink_rcv_skb at ffffffffb8942853 torvalds#18 [ffffa6d14183fbc0] rtnetlink_rcv at ffffffffb88c1835 torvalds#19 [ffffa6d14183fbd0] netlink_unicast at ffffffffb8941f27 torvalds#20 [ffffa6d14183fc18] netlink_sendmsg at ffffffffb8942245 torvalds#21 [ffffa6d14183fc98] sock_sendmsg at ffffffffb887d482 torvalds#22 [ffffa6d14183fcb8] ____sys_sendmsg at ffffffffb887d81a torvalds#23 [ffffa6d14183fd38] ___sys_sendmsg at ffffffffb88806e2 torvalds#24 [ffffa6d14183fe90] __sys_sendmsg at ffffffffb88807a2 torvalds#25 [ffffa6d14183ff28] __x64_sys_sendmsg at ffffffffb888080f torvalds#26 [ffffa6d14183ff38] do_syscall_64 at ffffffffb8b9b6a8 torvalds#27 [ffffa6d14183ff50] entry_SYSCALL_64_after_hwframe at ffffffffb8c0007c crash> bt 0xffff8aeb07544000 PID: 1110766 TASK: ffff8aeb07544000 CPU: 0 COMMAND: "kworker/u20:9" #0 [ffffa6d14e6b7bd8] __schedule at ffffffffb8ba7f45 #1 [ffffa6d14e6b7c68] schedule at ffffffffb8ba8418 #2 [ffffa6d14e6b7c88] schedule_timeout at ffffffffb8baef88 #3 [ffffa6d14e6b7d10] wait_for_completion at ffffffffb8ba968b #4 [ffffa6d14e6b7d60] mlx5e_take_all_encap_flows at ffffffffc0f47ec4 [mlx5_core] #5 [ffffa6d14e6b7da0] mlx5e_rep_update_flows at ffffffffc0f3e734 [mlx5_core] #6 [ffffa6d14e6b7df8] mlx5e_rep_neigh_update at ffffffffc0f400bb [mlx5_core] #7 [ffffa6d14e6b7e50] process_one_work at ffffffffb80acc9c #8 [ffffa6d14e6b7ed0] worker_thread at ffffffffb80ad012 #9 [ffffa6d14e6b7f10] kthread at ffffffffb80b615d #10 [ffffa6d14e6b7f50] ret_from_fork at ffffffffb8001b2f After the first encap is attached, flow will be added to encap entry's flows list. If neigh update is running at this time, the following encaps of the flow can't hold the encap_tbl_lock and sleep. If neigh update thread is waiting for that flow's init_done, deadlock happens. Fix it by holding lock outside of the for loop. If neigh update is running, prevent encap flows from offloading. Since the lock is held outside of the for loop, concurrent creation of encap entries is not allowed. So remove unnecessary wait_for_completion call for res_ready. Fixes: 95435ad ("net/mlx5e: Only access fully initialized flows in neigh update") Signed-off-by: Chris Mi <[email protected]> Reviewed-by: Roi Dayan <[email protected]> Reviewed-by: Vlad Buslov <[email protected]> Signed-off-by: Saeed Mahameed <[email protected]>
1 parent e2ab5aa commit 37c3b9f

File tree

1 file changed

+20
-17
lines changed

1 file changed

+20
-17
lines changed

drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c

+20-17
Original file line numberDiff line numberDiff line change
@@ -492,6 +492,19 @@ void mlx5e_encap_put(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e)
492492
mlx5e_encap_dealloc(priv, e);
493493
}
494494

495+
static void mlx5e_encap_put_locked(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e)
496+
{
497+
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
498+
499+
lockdep_assert_held(&esw->offloads.encap_tbl_lock);
500+
501+
if (!refcount_dec_and_test(&e->refcnt))
502+
return;
503+
list_del(&e->route_list);
504+
hash_del_rcu(&e->encap_hlist);
505+
mlx5e_encap_dealloc(priv, e);
506+
}
507+
495508
static void mlx5e_decap_put(struct mlx5e_priv *priv, struct mlx5e_decap_entry *d)
496509
{
497510
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
@@ -816,6 +829,8 @@ int mlx5e_attach_encap(struct mlx5e_priv *priv,
816829
uintptr_t hash_key;
817830
int err = 0;
818831

832+
lockdep_assert_held(&esw->offloads.encap_tbl_lock);
833+
819834
parse_attr = attr->parse_attr;
820835
tun_info = parse_attr->tun_info[out_index];
821836
mpls_info = &parse_attr->mpls_info[out_index];
@@ -829,7 +844,6 @@ int mlx5e_attach_encap(struct mlx5e_priv *priv,
829844

830845
hash_key = hash_encap_info(&key);
831846

832-
mutex_lock(&esw->offloads.encap_tbl_lock);
833847
e = mlx5e_encap_get(priv, &key, hash_key);
834848

835849
/* must verify if encap is valid or not */
@@ -840,15 +854,6 @@ int mlx5e_attach_encap(struct mlx5e_priv *priv,
840854
goto out_err;
841855
}
842856

843-
mutex_unlock(&esw->offloads.encap_tbl_lock);
844-
wait_for_completion(&e->res_ready);
845-
846-
/* Protect against concurrent neigh update. */
847-
mutex_lock(&esw->offloads.encap_tbl_lock);
848-
if (e->compl_result < 0) {
849-
err = -EREMOTEIO;
850-
goto out_err;
851-
}
852857
goto attach_flow;
853858
}
854859

@@ -877,15 +882,12 @@ int mlx5e_attach_encap(struct mlx5e_priv *priv,
877882
INIT_LIST_HEAD(&e->flows);
878883
hash_add_rcu(esw->offloads.encap_tbl, &e->encap_hlist, hash_key);
879884
tbl_time_before = mlx5e_route_tbl_get_last_update(priv);
880-
mutex_unlock(&esw->offloads.encap_tbl_lock);
881885

882886
if (family == AF_INET)
883887
err = mlx5e_tc_tun_create_header_ipv4(priv, mirred_dev, e);
884888
else if (family == AF_INET6)
885889
err = mlx5e_tc_tun_create_header_ipv6(priv, mirred_dev, e);
886890

887-
/* Protect against concurrent neigh update. */
888-
mutex_lock(&esw->offloads.encap_tbl_lock);
889891
complete_all(&e->res_ready);
890892
if (err) {
891893
e->compl_result = err;
@@ -920,18 +922,15 @@ int mlx5e_attach_encap(struct mlx5e_priv *priv,
920922
} else {
921923
flow_flag_set(flow, SLOW);
922924
}
923-
mutex_unlock(&esw->offloads.encap_tbl_lock);
924925

925926
return err;
926927

927928
out_err:
928-
mutex_unlock(&esw->offloads.encap_tbl_lock);
929929
if (e)
930-
mlx5e_encap_put(priv, e);
930+
mlx5e_encap_put_locked(priv, e);
931931
return err;
932932

933933
out_err_init:
934-
mutex_unlock(&esw->offloads.encap_tbl_lock);
935934
kfree(tun_info);
936935
kfree(e);
937936
return err;
@@ -1027,6 +1026,7 @@ int mlx5e_tc_tun_encap_dests_set(struct mlx5e_priv *priv,
10271026
struct net_device *encap_dev = NULL;
10281027
struct mlx5e_rep_priv *rpriv;
10291028
struct mlx5e_priv *out_priv;
1029+
struct mlx5_eswitch *esw;
10301030
int out_index;
10311031
int err = 0;
10321032

@@ -1037,6 +1037,8 @@ int mlx5e_tc_tun_encap_dests_set(struct mlx5e_priv *priv,
10371037
esw_attr = attr->esw_attr;
10381038
*vf_tun = false;
10391039

1040+
esw = priv->mdev->priv.eswitch;
1041+
mutex_lock(&esw->offloads.encap_tbl_lock);
10401042
for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) {
10411043
struct net_device *out_dev;
10421044
int mirred_ifindex;
@@ -1075,6 +1077,7 @@ int mlx5e_tc_tun_encap_dests_set(struct mlx5e_priv *priv,
10751077
}
10761078

10771079
out:
1080+
mutex_unlock(&esw->offloads.encap_tbl_lock);
10781081
return err;
10791082
}
10801083

0 commit comments

Comments
 (0)