Skip to content

Commit a98172e

Browse files
rostedtgregkh
authored andcommitted
tracing: Have trace_event_file have ref counters
commit bb32500 upstream. The following can crash the kernel: # cd /sys/kernel/tracing # echo 'p:sched schedule' > kprobe_events # exec 5>>events/kprobes/sched/enable # > kprobe_events # exec 5>&- The above commands: 1. Change directory to the tracefs directory 2. Create a kprobe event (doesn't matter what one) 3. Open bash file descriptor 5 on the enable file of the kprobe event 4. Delete the kprobe event (removes the files too) 5. Close the bash file descriptor 5 The above causes a crash! BUG: kernel NULL pointer dereference, address: 0000000000000028 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 0 P4D 0 Oops: 0000 [#1] PREEMPT SMP PTI CPU: 6 PID: 877 Comm: bash Not tainted 6.5.0-rc4-test-00008-g2c6b6b1029d4-dirty torvalds#186 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.2-debian-1.16.2-1 04/01/2014 RIP: 0010:tracing_release_file_tr+0xc/0x50 What happens here is that the kprobe event creates a trace_event_file "file" descriptor that represents the file in tracefs to the event. It maintains state of the event (is it enabled for the given instance?). Opening the "enable" file gets a reference to the event "file" descriptor via the open file descriptor. When the kprobe event is deleted, the file is also deleted from the tracefs system which also frees the event "file" descriptor. But as the tracefs file is still opened by user space, it will not be totally removed until the final dput() is called on it. But this is not true with the event "file" descriptor that is already freed. If the user does a write to or simply closes the file descriptor it will reference the event "file" descriptor that was just freed, causing a use-after-free bug. To solve this, add a ref count to the event "file" descriptor as well as a new flag called "FREED". The "file" will not be freed until the last reference is released. But the FREE flag will be set when the event is removed to prevent any more modifications to that event from happening, even if there's still a reference to the event "file" descriptor. Link: https://lore.kernel.org/linux-trace-kernel/[email protected]/ Link: https://lore.kernel.org/linux-trace-kernel/[email protected] Cc: [email protected] Cc: Mark Rutland <[email protected]> Fixes: f5ca233 ("tracing: Increase trace array ref count on enable and filter files") Reported-by: Beau Belgrave <[email protected]> Tested-by: Beau Belgrave <[email protected]> Reviewed-by: Masami Hiramatsu (Google) <[email protected]> Signed-off-by: Steven Rostedt (Google) <[email protected]> Signed-off-by: Greg Kroah-Hartman <[email protected]>
1 parent c6e8af2 commit a98172e

File tree

5 files changed

+53
-15
lines changed

5 files changed

+53
-15
lines changed

include/linux/trace_events.h

+4
Original file line numberDiff line numberDiff line change
@@ -369,6 +369,7 @@ enum {
369369
EVENT_FILE_FL_TRIGGER_COND_BIT,
370370
EVENT_FILE_FL_PID_FILTER_BIT,
371371
EVENT_FILE_FL_WAS_ENABLED_BIT,
372+
EVENT_FILE_FL_FREED_BIT,
372373
};
373374

374375
extern struct trace_event_file *trace_get_event_file(const char *instance,
@@ -507,6 +508,7 @@ extern int __kprobe_event_add_fields(struct dynevent_cmd *cmd, ...);
507508
* TRIGGER_COND - When set, one or more triggers has an associated filter
508509
* PID_FILTER - When set, the event is filtered based on pid
509510
* WAS_ENABLED - Set when enabled to know to clear trace on module removal
511+
* FREED - File descriptor is freed, all fields should be considered invalid
510512
*/
511513
enum {
512514
EVENT_FILE_FL_ENABLED = (1 << EVENT_FILE_FL_ENABLED_BIT),
@@ -520,6 +522,7 @@ enum {
520522
EVENT_FILE_FL_TRIGGER_COND = (1 << EVENT_FILE_FL_TRIGGER_COND_BIT),
521523
EVENT_FILE_FL_PID_FILTER = (1 << EVENT_FILE_FL_PID_FILTER_BIT),
522524
EVENT_FILE_FL_WAS_ENABLED = (1 << EVENT_FILE_FL_WAS_ENABLED_BIT),
525+
EVENT_FILE_FL_FREED = (1 << EVENT_FILE_FL_FREED_BIT),
523526
};
524527

525528
struct trace_event_file {
@@ -548,6 +551,7 @@ struct trace_event_file {
548551
* caching and such. Which is mostly OK ;-)
549552
*/
550553
unsigned long flags;
554+
atomic_t ref; /* ref count for opened files */
551555
atomic_t sm_ref; /* soft-mode reference counter */
552556
atomic_t tm_ref; /* trigger-mode reference counter */
553557
};

kernel/trace/trace.c

+15
Original file line numberDiff line numberDiff line change
@@ -4572,6 +4572,20 @@ int tracing_open_file_tr(struct inode *inode, struct file *filp)
45724572
if (ret)
45734573
return ret;
45744574

4575+
mutex_lock(&event_mutex);
4576+
4577+
/* Fail if the file is marked for removal */
4578+
if (file->flags & EVENT_FILE_FL_FREED) {
4579+
trace_array_put(file->tr);
4580+
ret = -ENODEV;
4581+
} else {
4582+
event_file_get(file);
4583+
}
4584+
4585+
mutex_unlock(&event_mutex);
4586+
if (ret)
4587+
return ret;
4588+
45754589
filp->private_data = inode->i_private;
45764590

45774591
return 0;
@@ -4582,6 +4596,7 @@ int tracing_release_file_tr(struct inode *inode, struct file *filp)
45824596
struct trace_event_file *file = inode->i_private;
45834597

45844598
trace_array_put(file->tr);
4599+
event_file_put(file);
45854600

45864601
return 0;
45874602
}

kernel/trace/trace.h

+3
Original file line numberDiff line numberDiff line change
@@ -1784,6 +1784,9 @@ extern int register_event_command(struct event_command *cmd);
17841784
extern int unregister_event_command(struct event_command *cmd);
17851785
extern int register_trigger_hist_enable_disable_cmds(void);
17861786

1787+
extern void event_file_get(struct trace_event_file *file);
1788+
extern void event_file_put(struct trace_event_file *file);
1789+
17871790
/**
17881791
* struct event_trigger_ops - callbacks for trace event triggers
17891792
*

kernel/trace/trace_events.c

+28-15
Original file line numberDiff line numberDiff line change
@@ -746,26 +746,38 @@ static void remove_subsystem(struct trace_subsystem_dir *dir)
746746
}
747747
}
748748

749-
static void remove_event_file_dir(struct trace_event_file *file)
749+
void event_file_get(struct trace_event_file *file)
750750
{
751-
struct dentry *dir = file->dir;
752-
struct dentry *child;
751+
atomic_inc(&file->ref);
752+
}
753753

754-
if (dir) {
755-
spin_lock(&dir->d_lock); /* probably unneeded */
756-
list_for_each_entry(child, &dir->d_subdirs, d_child) {
757-
if (d_really_is_positive(child)) /* probably unneeded */
758-
d_inode(child)->i_private = NULL;
759-
}
760-
spin_unlock(&dir->d_lock);
754+
void event_file_put(struct trace_event_file *file)
755+
{
756+
if (WARN_ON_ONCE(!atomic_read(&file->ref))) {
757+
if (file->flags & EVENT_FILE_FL_FREED)
758+
kmem_cache_free(file_cachep, file);
759+
return;
760+
}
761761

762-
tracefs_remove(dir);
762+
if (atomic_dec_and_test(&file->ref)) {
763+
/* Count should only go to zero when it is freed */
764+
if (WARN_ON_ONCE(!(file->flags & EVENT_FILE_FL_FREED)))
765+
return;
766+
kmem_cache_free(file_cachep, file);
763767
}
768+
}
769+
770+
static void remove_event_file_dir(struct trace_event_file *file)
771+
{
772+
struct dentry *dir = file->dir;
773+
774+
tracefs_remove(dir);
764775

765776
list_del(&file->list);
766777
remove_subsystem(file->system);
767778
free_event_filter(file->filter);
768-
kmem_cache_free(file_cachep, file);
779+
file->flags |= EVENT_FILE_FL_FREED;
780+
event_file_put(file);
769781
}
770782

771783
/*
@@ -1138,7 +1150,7 @@ event_enable_read(struct file *filp, char __user *ubuf, size_t cnt,
11381150
flags = file->flags;
11391151
mutex_unlock(&event_mutex);
11401152

1141-
if (!file)
1153+
if (!file || flags & EVENT_FILE_FL_FREED)
11421154
return -ENODEV;
11431155

11441156
if (flags & EVENT_FILE_FL_ENABLED &&
@@ -1176,7 +1188,7 @@ event_enable_write(struct file *filp, const char __user *ubuf, size_t cnt,
11761188
ret = -ENODEV;
11771189
mutex_lock(&event_mutex);
11781190
file = event_file_data(filp);
1179-
if (likely(file))
1191+
if (likely(file && !(file->flags & EVENT_FILE_FL_FREED)))
11801192
ret = ftrace_event_enable_disable(file, val);
11811193
mutex_unlock(&event_mutex);
11821194
break;
@@ -1445,7 +1457,7 @@ event_filter_read(struct file *filp, char __user *ubuf, size_t cnt,
14451457

14461458
mutex_lock(&event_mutex);
14471459
file = event_file_data(filp);
1448-
if (file)
1460+
if (file && !(file->flags & EVENT_FILE_FL_FREED))
14491461
print_event_filter(file, s);
14501462
mutex_unlock(&event_mutex);
14511463

@@ -2482,6 +2494,7 @@ trace_create_new_event(struct trace_event_call *call,
24822494
atomic_set(&file->tm_ref, 0);
24832495
INIT_LIST_HEAD(&file->triggers);
24842496
list_add(&file->list, &tr->events);
2497+
event_file_get(file);
24852498

24862499
return file;
24872500
}

kernel/trace/trace_events_filter.c

+3
Original file line numberDiff line numberDiff line change
@@ -1893,6 +1893,9 @@ int apply_event_filter(struct trace_event_file *file, char *filter_string)
18931893
struct event_filter *filter = NULL;
18941894
int err;
18951895

1896+
if (file->flags & EVENT_FILE_FL_FREED)
1897+
return -ENODEV;
1898+
18961899
if (!strcmp(strstrip(filter_string), "0")) {
18971900
filter_disable(file);
18981901
filter = event_filter(file);

0 commit comments

Comments
 (0)