Skip to content

Commit 180dccb

Browse files
Laibin Qiuaxboe
Laibin Qiu
authored andcommitted
blk-mq: fix tag_get wait task can't be awakened
In case of shared tags, there might be more than one hctx which allocates from the same tags, and each hctx is limited to allocate at most: hctx_max_depth = max((bt->sb.depth + users - 1) / users, 4U); tag idle detection is lazy, and may be delayed for 30sec, so there could be just one real active hctx(queue) but all others are actually idle and still accounted as active because of the lazy idle detection. Then if wake_batch is > hctx_max_depth, driver tag allocation may wait forever on this real active hctx. Fix this by recalculating wake_batch when inc or dec active_queues. Fixes: 0d2602c ("blk-mq: improve support for shared tags maps") Suggested-by: Ming Lei <[email protected]> Suggested-by: John Garry <[email protected]> Signed-off-by: Laibin Qiu <[email protected]> Reviewed-by: Andy Shevchenko <[email protected]> Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Jens Axboe <[email protected]>
1 parent fb3b067 commit 180dccb

File tree

3 files changed

+66
-10
lines changed

3 files changed

+66
-10
lines changed

block/blk-mq-tag.c

+33-7
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,21 @@
1616
#include "blk-mq-sched.h"
1717
#include "blk-mq-tag.h"
1818

19+
/*
20+
* Recalculate wakeup batch when tag is shared by hctx.
21+
*/
22+
static void blk_mq_update_wake_batch(struct blk_mq_tags *tags,
23+
unsigned int users)
24+
{
25+
if (!users)
26+
return;
27+
28+
sbitmap_queue_recalculate_wake_batch(&tags->bitmap_tags,
29+
users);
30+
sbitmap_queue_recalculate_wake_batch(&tags->breserved_tags,
31+
users);
32+
}
33+
1934
/*
2035
* If a previously inactive queue goes active, bump the active user count.
2136
* We need to do this before try to allocate driver tag, then even if fail
@@ -24,18 +39,26 @@
2439
*/
2540
bool __blk_mq_tag_busy(struct blk_mq_hw_ctx *hctx)
2641
{
42+
unsigned int users;
43+
2744
if (blk_mq_is_shared_tags(hctx->flags)) {
2845
struct request_queue *q = hctx->queue;
2946

30-
if (!test_bit(QUEUE_FLAG_HCTX_ACTIVE, &q->queue_flags) &&
31-
!test_and_set_bit(QUEUE_FLAG_HCTX_ACTIVE, &q->queue_flags))
32-
atomic_inc(&hctx->tags->active_queues);
47+
if (test_bit(QUEUE_FLAG_HCTX_ACTIVE, &q->queue_flags) ||
48+
test_and_set_bit(QUEUE_FLAG_HCTX_ACTIVE, &q->queue_flags)) {
49+
return true;
50+
}
3351
} else {
34-
if (!test_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state) &&
35-
!test_and_set_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state))
36-
atomic_inc(&hctx->tags->active_queues);
52+
if (test_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state) ||
53+
test_and_set_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state)) {
54+
return true;
55+
}
3756
}
3857

58+
users = atomic_inc_return(&hctx->tags->active_queues);
59+
60+
blk_mq_update_wake_batch(hctx->tags, users);
61+
3962
return true;
4063
}
4164

@@ -56,6 +79,7 @@ void blk_mq_tag_wakeup_all(struct blk_mq_tags *tags, bool include_reserve)
5679
void __blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx)
5780
{
5881
struct blk_mq_tags *tags = hctx->tags;
82+
unsigned int users;
5983

6084
if (blk_mq_is_shared_tags(hctx->flags)) {
6185
struct request_queue *q = hctx->queue;
@@ -68,7 +92,9 @@ void __blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx)
6892
return;
6993
}
7094

71-
atomic_dec(&tags->active_queues);
95+
users = atomic_dec_return(&tags->active_queues);
96+
97+
blk_mq_update_wake_batch(tags, users);
7298

7399
blk_mq_tag_wakeup_all(tags, false);
74100
}

include/linux/sbitmap.h

+11
Original file line numberDiff line numberDiff line change
@@ -415,6 +415,17 @@ static inline void sbitmap_queue_free(struct sbitmap_queue *sbq)
415415
sbitmap_free(&sbq->sb);
416416
}
417417

418+
/**
419+
* sbitmap_queue_recalculate_wake_batch() - Recalculate wake batch
420+
* @sbq: Bitmap queue to recalculate wake batch.
421+
* @users: Number of shares.
422+
*
423+
* Like sbitmap_queue_update_wake_batch(), this will calculate wake batch
424+
* by depth. This interface is for HCTX shared tags or queue shared tags.
425+
*/
426+
void sbitmap_queue_recalculate_wake_batch(struct sbitmap_queue *sbq,
427+
unsigned int users);
428+
418429
/**
419430
* sbitmap_queue_resize() - Resize a &struct sbitmap_queue.
420431
* @sbq: Bitmap queue to resize.

lib/sbitmap.c

+22-3
Original file line numberDiff line numberDiff line change
@@ -457,10 +457,9 @@ int sbitmap_queue_init_node(struct sbitmap_queue *sbq, unsigned int depth,
457457
}
458458
EXPORT_SYMBOL_GPL(sbitmap_queue_init_node);
459459

460-
static void sbitmap_queue_update_wake_batch(struct sbitmap_queue *sbq,
461-
unsigned int depth)
460+
static inline void __sbitmap_queue_update_wake_batch(struct sbitmap_queue *sbq,
461+
unsigned int wake_batch)
462462
{
463-
unsigned int wake_batch = sbq_calc_wake_batch(sbq, depth);
464463
int i;
465464

466465
if (sbq->wake_batch != wake_batch) {
@@ -476,6 +475,26 @@ static void sbitmap_queue_update_wake_batch(struct sbitmap_queue *sbq,
476475
}
477476
}
478477

478+
static void sbitmap_queue_update_wake_batch(struct sbitmap_queue *sbq,
479+
unsigned int depth)
480+
{
481+
unsigned int wake_batch;
482+
483+
wake_batch = sbq_calc_wake_batch(sbq, depth);
484+
__sbitmap_queue_update_wake_batch(sbq, wake_batch);
485+
}
486+
487+
void sbitmap_queue_recalculate_wake_batch(struct sbitmap_queue *sbq,
488+
unsigned int users)
489+
{
490+
unsigned int wake_batch;
491+
492+
wake_batch = clamp_val((sbq->sb.depth + users - 1) /
493+
users, 4, SBQ_WAKE_BATCH);
494+
__sbitmap_queue_update_wake_batch(sbq, wake_batch);
495+
}
496+
EXPORT_SYMBOL_GPL(sbitmap_queue_recalculate_wake_batch);
497+
479498
void sbitmap_queue_resize(struct sbitmap_queue *sbq, unsigned int depth)
480499
{
481500
sbitmap_queue_update_wake_batch(sbq, depth);

0 commit comments

Comments
 (0)