Skip to content

Commit afe1585

Browse files
Florian Westphalgregkh
Florian Westphal
authored andcommitted
netlink: don't hold mutex in rcu callback when releasing mmapd ring
[ Upstream commit 0470eb9 ] Kirill A. Shutemov says: This simple test-case trigers few locking asserts in kernel: int main(int argc, char **argv) { unsigned int block_size = 16 * 4096; struct nl_mmap_req req = { .nm_block_size = block_size, .nm_block_nr = 64, .nm_frame_size = 16384, .nm_frame_nr = 64 * block_size / 16384, }; unsigned int ring_size; int fd; fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC); if (setsockopt(fd, SOL_NETLINK, NETLINK_RX_RING, &req, sizeof(req)) < 0) exit(1); if (setsockopt(fd, SOL_NETLINK, NETLINK_TX_RING, &req, sizeof(req)) < 0) exit(1); ring_size = req.nm_block_nr * req.nm_block_size; mmap(NULL, 2 * ring_size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); return 0; } +++ exited with 0 +++ BUG: sleeping function called from invalid context at /home/kas/git/public/linux-mm/kernel/locking/mutex.c:616 in_atomic(): 1, irqs_disabled(): 0, pid: 1, name: init 3 locks held by init/1: #0: (reboot_mutex){+.+...}, at: [<ffffffff81080959>] SyS_reboot+0xa9/0x220 #1: ((reboot_notifier_list).rwsem){.+.+..}, at: [<ffffffff8107f379>] __blocking_notifier_call_chain+0x39/0x70 #2: (rcu_callback){......}, at: [<ffffffff810d32e0>] rcu_do_batch.isra.49+0x160/0x10c0 Preemption disabled at:[<ffffffff8145365f>] __delay+0xf/0x20 CPU: 1 PID: 1 Comm: init Not tainted 4.1.0-00009-gbddf4c4818e0 Freescale#253 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS Debian-1.8.2-1 04/01/2014 ffff88017b3d8000 ffff88027bc03c38 ffffffff81929ceb 0000000000000102 0000000000000000 ffff88027bc03c68 ffffffff81085a9d 0000000000000002 ffffffff81ca2a20 0000000000000268 0000000000000000 ffff88027bc03c98 Call Trace: <IRQ> [<ffffffff81929ceb>] dump_stack+0x4f/0x7b [<ffffffff81085a9d>] ___might_sleep+0x16d/0x270 [<ffffffff81085bed>] __might_sleep+0x4d/0x90 [<ffffffff8192e96f>] mutex_lock_nested+0x2f/0x430 [<ffffffff81932fed>] ? _raw_spin_unlock_irqrestore+0x5d/0x80 [<ffffffff81464143>] ? __this_cpu_preempt_check+0x13/0x20 [<ffffffff8182fc3d>] netlink_set_ring+0x1ed/0x350 [<ffffffff8182e000>] ? netlink_undo_bind+0x70/0x70 [<ffffffff8182fe20>] netlink_sock_destruct+0x80/0x150 [<ffffffff817e484d>] __sk_free+0x1d/0x160 [<ffffffff817e49a9>] sk_free+0x19/0x20 [..] Cong Wang says: We can't hold mutex lock in a rcu callback, [..] Thomas Graf says: The socket should be dead at this point. It might be simpler to add a netlink_release_ring() function which doesn't require locking at all. Reported-by: "Kirill A. Shutemov" <[email protected]> Diagnosed-by: Cong Wang <[email protected]> Suggested-by: Thomas Graf <[email protected]> Signed-off-by: Florian Westphal <[email protected]> Signed-off-by: David S. Miller <[email protected]> Signed-off-by: Greg Kroah-Hartman <[email protected]>
1 parent ba63f0d commit afe1585

File tree

1 file changed

+47
-32
lines changed

1 file changed

+47
-32
lines changed

net/netlink/af_netlink.c

+47-32
Original file line numberDiff line numberDiff line change
@@ -350,25 +350,52 @@ static void **alloc_pg_vec(struct netlink_sock *nlk,
350350
return NULL;
351351
}
352352

353+
354+
static void
355+
__netlink_set_ring(struct sock *sk, struct nl_mmap_req *req, bool tx_ring, void **pg_vec,
356+
unsigned int order)
357+
{
358+
struct netlink_sock *nlk = nlk_sk(sk);
359+
struct sk_buff_head *queue;
360+
struct netlink_ring *ring;
361+
362+
queue = tx_ring ? &sk->sk_write_queue : &sk->sk_receive_queue;
363+
ring = tx_ring ? &nlk->tx_ring : &nlk->rx_ring;
364+
365+
spin_lock_bh(&queue->lock);
366+
367+
ring->frame_max = req->nm_frame_nr - 1;
368+
ring->head = 0;
369+
ring->frame_size = req->nm_frame_size;
370+
ring->pg_vec_pages = req->nm_block_size / PAGE_SIZE;
371+
372+
swap(ring->pg_vec_len, req->nm_block_nr);
373+
swap(ring->pg_vec_order, order);
374+
swap(ring->pg_vec, pg_vec);
375+
376+
__skb_queue_purge(queue);
377+
spin_unlock_bh(&queue->lock);
378+
379+
WARN_ON(atomic_read(&nlk->mapped));
380+
381+
if (pg_vec)
382+
free_pg_vec(pg_vec, order, req->nm_block_nr);
383+
}
384+
353385
static int netlink_set_ring(struct sock *sk, struct nl_mmap_req *req,
354-
bool closing, bool tx_ring)
386+
bool tx_ring)
355387
{
356388
struct netlink_sock *nlk = nlk_sk(sk);
357389
struct netlink_ring *ring;
358-
struct sk_buff_head *queue;
359390
void **pg_vec = NULL;
360391
unsigned int order = 0;
361-
int err;
362392

363393
ring = tx_ring ? &nlk->tx_ring : &nlk->rx_ring;
364-
queue = tx_ring ? &sk->sk_write_queue : &sk->sk_receive_queue;
365394

366-
if (!closing) {
367-
if (atomic_read(&nlk->mapped))
368-
return -EBUSY;
369-
if (atomic_read(&ring->pending))
370-
return -EBUSY;
371-
}
395+
if (atomic_read(&nlk->mapped))
396+
return -EBUSY;
397+
if (atomic_read(&ring->pending))
398+
return -EBUSY;
372399

373400
if (req->nm_block_nr) {
374401
if (ring->pg_vec != NULL)
@@ -400,31 +427,19 @@ static int netlink_set_ring(struct sock *sk, struct nl_mmap_req *req,
400427
return -EINVAL;
401428
}
402429

403-
err = -EBUSY;
404430
mutex_lock(&nlk->pg_vec_lock);
405-
if (closing || atomic_read(&nlk->mapped) == 0) {
406-
err = 0;
407-
spin_lock_bh(&queue->lock);
408-
409-
ring->frame_max = req->nm_frame_nr - 1;
410-
ring->head = 0;
411-
ring->frame_size = req->nm_frame_size;
412-
ring->pg_vec_pages = req->nm_block_size / PAGE_SIZE;
413-
414-
swap(ring->pg_vec_len, req->nm_block_nr);
415-
swap(ring->pg_vec_order, order);
416-
swap(ring->pg_vec, pg_vec);
417-
418-
__skb_queue_purge(queue);
419-
spin_unlock_bh(&queue->lock);
420-
421-
WARN_ON(atomic_read(&nlk->mapped));
431+
if (atomic_read(&nlk->mapped) == 0) {
432+
__netlink_set_ring(sk, req, tx_ring, pg_vec, order);
433+
mutex_unlock(&nlk->pg_vec_lock);
434+
return 0;
422435
}
436+
423437
mutex_unlock(&nlk->pg_vec_lock);
424438

425439
if (pg_vec)
426440
free_pg_vec(pg_vec, order, req->nm_block_nr);
427-
return err;
441+
442+
return -EBUSY;
428443
}
429444

430445
static void netlink_mm_open(struct vm_area_struct *vma)
@@ -893,10 +908,10 @@ static void netlink_sock_destruct(struct sock *sk)
893908

894909
memset(&req, 0, sizeof(req));
895910
if (nlk->rx_ring.pg_vec)
896-
netlink_set_ring(sk, &req, true, false);
911+
__netlink_set_ring(sk, &req, false, NULL, 0);
897912
memset(&req, 0, sizeof(req));
898913
if (nlk->tx_ring.pg_vec)
899-
netlink_set_ring(sk, &req, true, true);
914+
__netlink_set_ring(sk, &req, true, NULL, 0);
900915
}
901916
#endif /* CONFIG_NETLINK_MMAP */
902917

@@ -2190,7 +2205,7 @@ static int netlink_setsockopt(struct socket *sock, int level, int optname,
21902205
return -EINVAL;
21912206
if (copy_from_user(&req, optval, sizeof(req)))
21922207
return -EFAULT;
2193-
err = netlink_set_ring(sk, &req, false,
2208+
err = netlink_set_ring(sk, &req,
21942209
optname == NETLINK_TX_RING);
21952210
break;
21962211
}

0 commit comments

Comments
 (0)