Skip to content

Commit 64ea2d0

Browse files
rleondavem330
authored andcommitted
net/mlx5: Accept devlink user input after driver initialization complete
The change of devlink_alloc() to accept device makes sure that device is fully initialized and device_register() does nothing except allowing users to use that devlink instance. Such change ensures that no user input will be usable till that point and it eliminates the need to worry about internal locking as long as devlink_register is called last since all accesses to the devlink are during initialization. This change fixes the following lockdep warning. ====================================================== WARNING: possible circular locking dependency detected 5.14.0-rc2+ Freescale#27 Not tainted ------------------------------------------------------ devlink/265 is trying to acquire lock: ffff8880133c2bc0 (&dev->intf_state_mutex){+.+.}-{3:3}, at: mlx5_unload_one+0x1e/0xa0 [mlx5_core] but task is already holding lock: ffffffff8362b468 (devlink_mutex){+.+.}-{3:3}, at: devlink_nl_pre_doit+0x2b/0x8d0 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> Freescale#1 (devlink_mutex){+.+.}-{3:3}: __mutex_lock+0x149/0x1310 devlink_register+0xe7/0x280 mlx5_devlink_register+0x118/0x480 [mlx5_core] mlx5_init_one+0x34b/0x440 [mlx5_core] probe_one+0x480/0x6e0 [mlx5_core] pci_device_probe+0x2a0/0x4a0 really_probe+0x1cb/0xba0 __driver_probe_device+0x18f/0x470 driver_probe_device+0x49/0x120 __driver_attach+0x1ce/0x400 bus_for_each_dev+0x11e/0x1a0 bus_add_driver+0x309/0x570 driver_register+0x20f/0x390 0xffffffffa04a0062 do_one_initcall+0xd5/0x400 do_init_module+0x1c8/0x760 load_module+0x7d9d/0xa4b0 __do_sys_finit_module+0x118/0x1a0 do_syscall_64+0x3d/0x90 entry_SYSCALL_64_after_hwframe+0x44/0xae -> #0 (&dev->intf_state_mutex){+.+.}-{3:3}: __lock_acquire+0x2999/0x5a40 lock_acquire+0x1a9/0x4a0 __mutex_lock+0x149/0x1310 mlx5_unload_one+0x1e/0xa0 [mlx5_core] mlx5_devlink_reload_down+0x185/0x2b0 [mlx5_core] devlink_reload+0x1f2/0x640 devlink_nl_cmd_reload+0x6c3/0x10d0 genl_family_rcv_msg_doit+0x1e9/0x2f0 genl_rcv_msg+0x27f/0x4a0 netlink_rcv_skb+0x11e/0x340 genl_rcv+0x24/0x40 netlink_unicast+0x433/0x700 netlink_sendmsg+0x6fb/0xbe0 sock_sendmsg+0xb0/0xe0 __sys_sendto+0x192/0x240 __x64_sys_sendto+0xdc/0x1b0 do_syscall_64+0x3d/0x90 entry_SYSCALL_64_after_hwframe+0x44/0xae other info that might help us debug this: Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(devlink_mutex); lock(&dev->intf_state_mutex); lock(devlink_mutex); lock(&dev->intf_state_mutex); *** DEADLOCK *** 3 locks held by devlink/265: #0: ffffffff836371d0 (cb_lock){++++}-{3:3}, at: genl_rcv+0x15/0x40 Freescale#1: ffffffff83637288 (genl_mutex){+.+.}-{3:3}, at: genl_rcv_msg+0x31a/0x4a0 Freescale#2: ffffffff8362b468 (devlink_mutex){+.+.}-{3:3}, at: devlink_nl_pre_doit+0x2b/0x8d0 stack backtrace: CPU: 0 PID: 265 Comm: devlink Not tainted 5.14.0-rc2+ Freescale#27 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 Call Trace: dump_stack_lvl+0x45/0x59 check_noncircular+0x268/0x310 ? print_circular_bug+0x460/0x460 ? __kernel_text_address+0xe/0x30 ? alloc_chain_hlocks+0x1e6/0x5a0 __lock_acquire+0x2999/0x5a40 ? lockdep_hardirqs_on_prepare+0x3e0/0x3e0 ? add_lock_to_list.constprop.0+0x6c/0x530 lock_acquire+0x1a9/0x4a0 ? mlx5_unload_one+0x1e/0xa0 [mlx5_core] ? lock_release+0x6c0/0x6c0 ? lockdep_hardirqs_on_prepare+0x3e0/0x3e0 ? lock_is_held_type+0x98/0x110 __mutex_lock+0x149/0x1310 ? mlx5_unload_one+0x1e/0xa0 [mlx5_core] ? lock_is_held_type+0x98/0x110 ? mlx5_unload_one+0x1e/0xa0 [mlx5_core] ? find_held_lock+0x2d/0x110 ? mutex_lock_io_nested+0x1160/0x1160 ? mlx5_lag_is_active+0x72/0x90 [mlx5_core] ? lock_downgrade+0x6d0/0x6d0 ? do_raw_spin_lock+0x12e/0x270 ? rwlock_bug.part.0+0x90/0x90 ? mlx5_unload_one+0x1e/0xa0 [mlx5_core] mlx5_unload_one+0x1e/0xa0 [mlx5_core] mlx5_devlink_reload_down+0x185/0x2b0 [mlx5_core] ? netlink_broadcast_filtered+0x308/0xac0 ? mlx5_devlink_info_get+0x1f0/0x1f0 [mlx5_core] ? __build_skb_around+0x110/0x2b0 ? __alloc_skb+0x113/0x2b0 devlink_reload+0x1f2/0x640 ? devlink_unregister+0x1e0/0x1e0 ? security_capable+0x51/0x90 devlink_nl_cmd_reload+0x6c3/0x10d0 ? devlink_nl_cmd_get_doit+0x1e0/0x1e0 ? devlink_nl_pre_doit+0x72/0x8d0 genl_family_rcv_msg_doit+0x1e9/0x2f0 ? __lock_acquire+0x15e2/0x5a40 ? genl_family_rcv_msg_attrs_parse.constprop.0+0x240/0x240 ? mutex_lock_io_nested+0x1160/0x1160 ? security_capable+0x51/0x90 genl_rcv_msg+0x27f/0x4a0 ? genl_get_cmd+0x3c0/0x3c0 ? lock_acquire+0x1a9/0x4a0 ? devlink_nl_cmd_get_doit+0x1e0/0x1e0 ? lock_release+0x6c0/0x6c0 netlink_rcv_skb+0x11e/0x340 ? genl_get_cmd+0x3c0/0x3c0 ? netlink_ack+0x930/0x930 genl_rcv+0x24/0x40 netlink_unicast+0x433/0x700 ? netlink_attachskb+0x750/0x750 ? __alloc_skb+0x113/0x2b0 netlink_sendmsg+0x6fb/0xbe0 ? netlink_unicast+0x700/0x700 ? netlink_unicast+0x700/0x700 sock_sendmsg+0xb0/0xe0 __sys_sendto+0x192/0x240 ? __x64_sys_getpeername+0xb0/0xb0 ? do_sys_openat2+0x10a/0x370 ? down_write_nested+0x150/0x150 ? do_user_addr_fault+0x215/0xd50 ? __x64_sys_openat+0x11f/0x1d0 ? __x64_sys_open+0x1a0/0x1a0 __x64_sys_sendto+0xdc/0x1b0 ? syscall_enter_from_user_mode+0x1d/0x50 do_syscall_64+0x3d/0x90 entry_SYSCALL_64_after_hwframe+0x44/0xae RIP: 0033:0x7f50b50b6b3a Code: d8 64 89 02 48 c7 c0 ff ff ff ff eb b8 0f 1f 00 f3 0f 1e fa 41 89 ca 64 8b 04 25 18 00 00 00 85 c0 75 15 b8 2c 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 76 c3 0f 1f 44 00 00 55 48 83 ec 30 44 89 4c RSP: 002b:00007fff6c0d3f38 EFLAGS: 00000246 ORIG_RAX: 000000000000002c RAX: ffffffffffffffda RBX: 0000000000000005 RCX: 00007f50b50b6b3a RDX: 0000000000000038 RSI: 000055763ac08440 RDI: 0000000000000003 RBP: 000055763ac08410 R08: 00007f50b5192200 R09: 000000000000000c R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 R13: 0000000000000000 R14: 000055763ac08410 R15: 000055763ac08440 mlx5_core 0000:00:09.0: firmware version: 4.8.9999 mlx5_core 0000:00:09.0: 0.000 Gb/s available PCIe bandwidth (8.0 GT/s PCIe x255 link) mlx5_core 0000:00:09.0 eth1: Link up Fixes: a6f3b62 ("net/mlx5: Move devlink registration before interfaces load") Signed-off-by: Leon Romanovsky <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent 1e72685 commit 64ea2d0

File tree

3 files changed

+6
-7
lines changed

3 files changed

+6
-7
lines changed

drivers/net/ethernet/mellanox/mlx5/core/devlink.c

+2-7
Original file line numberDiff line numberDiff line change
@@ -793,11 +793,11 @@ int mlx5_devlink_register(struct devlink *devlink)
793793
{
794794
int err;
795795

796-
devlink_register(devlink);
797796
err = devlink_params_register(devlink, mlx5_devlink_params,
798797
ARRAY_SIZE(mlx5_devlink_params));
799798
if (err)
800-
goto params_reg_err;
799+
return err;
800+
801801
mlx5_devlink_set_params_init_values(devlink);
802802

803803
err = mlx5_devlink_auxdev_params_register(devlink);
@@ -808,25 +808,20 @@ int mlx5_devlink_register(struct devlink *devlink)
808808
if (err)
809809
goto traps_reg_err;
810810

811-
devlink_params_publish(devlink);
812811
return 0;
813812

814813
traps_reg_err:
815814
mlx5_devlink_auxdev_params_unregister(devlink);
816815
auxdev_reg_err:
817816
devlink_params_unregister(devlink, mlx5_devlink_params,
818817
ARRAY_SIZE(mlx5_devlink_params));
819-
params_reg_err:
820-
devlink_unregister(devlink);
821818
return err;
822819
}
823820

824821
void mlx5_devlink_unregister(struct devlink *devlink)
825822
{
826-
devlink_params_unpublish(devlink);
827823
mlx5_devlink_traps_unregister(devlink);
828824
mlx5_devlink_auxdev_params_unregister(devlink);
829825
devlink_params_unregister(devlink, mlx5_devlink_params,
830826
ARRAY_SIZE(mlx5_devlink_params));
831-
devlink_unregister(devlink);
832827
}

drivers/net/ethernet/mellanox/mlx5/core/main.c

+2
Original file line numberDiff line numberDiff line change
@@ -1537,6 +1537,7 @@ static int probe_one(struct pci_dev *pdev, const struct pci_device_id *id)
15371537
dev_err(&pdev->dev, "mlx5_crdump_enable failed with error code %d\n", err);
15381538

15391539
pci_save_state(pdev);
1540+
devlink_register(devlink);
15401541
if (!mlx5_core_is_mp_slave(dev))
15411542
devlink_reload_enable(devlink);
15421543
return 0;
@@ -1559,6 +1560,7 @@ static void remove_one(struct pci_dev *pdev)
15591560
struct devlink *devlink = priv_to_devlink(dev);
15601561

15611562
devlink_reload_disable(devlink);
1563+
devlink_unregister(devlink);
15621564
mlx5_crdump_disable(dev);
15631565
mlx5_drain_health_wq(dev);
15641566
mlx5_uninit_one(dev);

drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c

+2
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ static int mlx5_sf_dev_probe(struct auxiliary_device *adev, const struct auxilia
4646
mlx5_core_warn(mdev, "mlx5_init_one err=%d\n", err);
4747
goto init_one_err;
4848
}
49+
devlink_register(devlink);
4950
devlink_reload_enable(devlink);
5051
return 0;
5152

@@ -65,6 +66,7 @@ static void mlx5_sf_dev_remove(struct auxiliary_device *adev)
6566

6667
devlink = priv_to_devlink(sf_dev->mdev);
6768
devlink_reload_disable(devlink);
69+
devlink_unregister(devlink);
6870
mlx5_uninit_one(sf_dev->mdev);
6971
iounmap(sf_dev->mdev->iseg);
7072
mlx5_mdev_uninit(sf_dev->mdev);

0 commit comments

Comments
 (0)