Skip to content

Commit 59458fa

Browse files
committed
kcsan: Turn report_filterlist_lock into a raw_spinlock
Ran Xiaokai reports that with a KCSAN-enabled PREEMPT_RT kernel, we can see splats like: | BUG: sleeping function called from invalid context at kernel/locking/spinlock_rt.c:48 | in_atomic(): 1, irqs_disabled(): 1, non_block: 0, pid: 0, name: swapper/1 | preempt_count: 10002, expected: 0 | RCU nest depth: 0, expected: 0 | no locks held by swapper/1/0. | irq event stamp: 156674 | hardirqs last enabled at (156673): [<ffffffff81130bd9>] do_idle+0x1f9/0x240 | hardirqs last disabled at (156674): [<ffffffff82254f84>] sysvec_apic_timer_interrupt+0x14/0xc0 | softirqs last enabled at (0): [<ffffffff81099f47>] copy_process+0xfc7/0x4b60 | softirqs last disabled at (0): [<0000000000000000>] 0x0 | Preemption disabled at: | [<ffffffff814a3e2a>] paint_ptr+0x2a/0x90 | CPU: 1 UID: 0 PID: 0 Comm: swapper/1 Not tainted 6.11.0+ #3 | Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.12.0-0-ga698c8995f-prebuilt.qemu.org 04/01/2014 | Call Trace: | <IRQ> | dump_stack_lvl+0x7e/0xc0 | dump_stack+0x1d/0x30 | __might_resched+0x1a2/0x270 | rt_spin_lock+0x68/0x170 | kcsan_skip_report_debugfs+0x43/0xe0 | print_report+0xb5/0x590 | kcsan_report_known_origin+0x1b1/0x1d0 | kcsan_setup_watchpoint+0x348/0x650 | __tsan_unaligned_write1+0x16d/0x1d0 | hrtimer_interrupt+0x3d6/0x430 | __sysvec_apic_timer_interrupt+0xe8/0x3a0 | sysvec_apic_timer_interrupt+0x97/0xc0 | </IRQ> On a detected data race, KCSAN's reporting logic checks if it should filter the report. That list is protected by the report_filterlist_lock *non-raw* spinlock which may sleep on RT kernels. Since KCSAN may report data races in any context, convert it to a raw_spinlock. This requires being careful about when to allocate memory for the filter list itself which can be done via KCSAN's debugfs interface. Concurrent modification of the filter list via debugfs should be rare: the chosen strategy is to optimistically pre-allocate memory before the critical section and discard if unused. Link: https://lore.kernel.org/all/[email protected]/ Reported-by: Ran Xiaokai <[email protected]> Tested-by: Ran Xiaokai <[email protected]> Signed-off-by: Marco Elver <[email protected]>
1 parent 8cf0b93 commit 59458fa

File tree

1 file changed

+36
-38
lines changed

1 file changed

+36
-38
lines changed

kernel/kcsan/debugfs.c

+36-38
Original file line numberDiff line numberDiff line change
@@ -46,14 +46,8 @@ static struct {
4646
int used; /* number of elements used */
4747
bool sorted; /* if elements are sorted */
4848
bool whitelist; /* if list is a blacklist or whitelist */
49-
} report_filterlist = {
50-
.addrs = NULL,
51-
.size = 8, /* small initial size */
52-
.used = 0,
53-
.sorted = false,
54-
.whitelist = false, /* default is blacklist */
55-
};
56-
static DEFINE_SPINLOCK(report_filterlist_lock);
49+
} report_filterlist;
50+
static DEFINE_RAW_SPINLOCK(report_filterlist_lock);
5751

5852
/*
5953
* The microbenchmark allows benchmarking KCSAN core runtime only. To run
@@ -110,7 +104,7 @@ bool kcsan_skip_report_debugfs(unsigned long func_addr)
110104
return false;
111105
func_addr -= offset; /* Get function start */
112106

113-
spin_lock_irqsave(&report_filterlist_lock, flags);
107+
raw_spin_lock_irqsave(&report_filterlist_lock, flags);
114108
if (report_filterlist.used == 0)
115109
goto out;
116110

@@ -127,67 +121,71 @@ bool kcsan_skip_report_debugfs(unsigned long func_addr)
127121
ret = !ret;
128122

129123
out:
130-
spin_unlock_irqrestore(&report_filterlist_lock, flags);
124+
raw_spin_unlock_irqrestore(&report_filterlist_lock, flags);
131125
return ret;
132126
}
133127

134128
static void set_report_filterlist_whitelist(bool whitelist)
135129
{
136130
unsigned long flags;
137131

138-
spin_lock_irqsave(&report_filterlist_lock, flags);
132+
raw_spin_lock_irqsave(&report_filterlist_lock, flags);
139133
report_filterlist.whitelist = whitelist;
140-
spin_unlock_irqrestore(&report_filterlist_lock, flags);
134+
raw_spin_unlock_irqrestore(&report_filterlist_lock, flags);
141135
}
142136

143137
/* Returns 0 on success, error-code otherwise. */
144138
static ssize_t insert_report_filterlist(const char *func)
145139
{
146140
unsigned long flags;
147141
unsigned long addr = kallsyms_lookup_name(func);
142+
unsigned long *delay_free = NULL;
143+
unsigned long *new_addrs = NULL;
144+
size_t new_size = 0;
148145
ssize_t ret = 0;
149146

150147
if (!addr) {
151148
pr_err("could not find function: '%s'\n", func);
152149
return -ENOENT;
153150
}
154151

155-
spin_lock_irqsave(&report_filterlist_lock, flags);
152+
retry_alloc:
153+
/*
154+
* Check if we need an allocation, and re-validate under the lock. Since
155+
* the report_filterlist_lock is a raw, cannot allocate under the lock.
156+
*/
157+
if (data_race(report_filterlist.used == report_filterlist.size)) {
158+
new_size = (report_filterlist.size ?: 4) * 2;
159+
delay_free = new_addrs = kmalloc_array(new_size, sizeof(unsigned long), GFP_KERNEL);
160+
if (!new_addrs)
161+
return -ENOMEM;
162+
}
156163

157-
if (report_filterlist.addrs == NULL) {
158-
/* initial allocation */
159-
report_filterlist.addrs =
160-
kmalloc_array(report_filterlist.size,
161-
sizeof(unsigned long), GFP_ATOMIC);
162-
if (report_filterlist.addrs == NULL) {
163-
ret = -ENOMEM;
164-
goto out;
165-
}
166-
} else if (report_filterlist.used == report_filterlist.size) {
167-
/* resize filterlist */
168-
size_t new_size = report_filterlist.size * 2;
169-
unsigned long *new_addrs =
170-
krealloc(report_filterlist.addrs,
171-
new_size * sizeof(unsigned long), GFP_ATOMIC);
172-
173-
if (new_addrs == NULL) {
174-
/* leave filterlist itself untouched */
175-
ret = -ENOMEM;
176-
goto out;
164+
raw_spin_lock_irqsave(&report_filterlist_lock, flags);
165+
if (report_filterlist.used == report_filterlist.size) {
166+
/* Check we pre-allocated enough, and retry if not. */
167+
if (report_filterlist.used >= new_size) {
168+
raw_spin_unlock_irqrestore(&report_filterlist_lock, flags);
169+
kfree(new_addrs); /* kfree(NULL) is safe */
170+
delay_free = new_addrs = NULL;
171+
goto retry_alloc;
177172
}
178173

174+
if (report_filterlist.used)
175+
memcpy(new_addrs, report_filterlist.addrs, report_filterlist.used * sizeof(unsigned long));
176+
delay_free = report_filterlist.addrs; /* free the old list */
177+
report_filterlist.addrs = new_addrs; /* switch to the new list */
179178
report_filterlist.size = new_size;
180-
report_filterlist.addrs = new_addrs;
181179
}
182180

183181
/* Note: deduplicating should be done in userspace. */
184182
report_filterlist.addrs[report_filterlist.used++] =
185183
kallsyms_lookup_name(func);
186184
report_filterlist.sorted = false;
187185

188-
out:
189-
spin_unlock_irqrestore(&report_filterlist_lock, flags);
186+
raw_spin_unlock_irqrestore(&report_filterlist_lock, flags);
190187

188+
kfree(delay_free);
191189
return ret;
192190
}
193191

@@ -204,13 +202,13 @@ static int show_info(struct seq_file *file, void *v)
204202
}
205203

206204
/* show filter functions, and filter type */
207-
spin_lock_irqsave(&report_filterlist_lock, flags);
205+
raw_spin_lock_irqsave(&report_filterlist_lock, flags);
208206
seq_printf(file, "\n%s functions: %s\n",
209207
report_filterlist.whitelist ? "whitelisted" : "blacklisted",
210208
report_filterlist.used == 0 ? "none" : "");
211209
for (i = 0; i < report_filterlist.used; ++i)
212210
seq_printf(file, " %ps\n", (void *)report_filterlist.addrs[i]);
213-
spin_unlock_irqrestore(&report_filterlist_lock, flags);
211+
raw_spin_unlock_irqrestore(&report_filterlist_lock, flags);
214212

215213
return 0;
216214
}

0 commit comments

Comments
 (0)