Skip to content

Commit 15aa876

Browse files
rostedtgregkh
authored andcommitted
tracing: Have trace_event_file have ref counters
commit bb32500 upstream. The following can crash the kernel: # cd /sys/kernel/tracing # echo 'p:sched schedule' > kprobe_events # exec 5>>events/kprobes/sched/enable # > kprobe_events # exec 5>&- The above commands: 1. Change directory to the tracefs directory 2. Create a kprobe event (doesn't matter what one) 3. Open bash file descriptor 5 on the enable file of the kprobe event 4. Delete the kprobe event (removes the files too) 5. Close the bash file descriptor 5 The above causes a crash! BUG: kernel NULL pointer dereference, address: 0000000000000028 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 0 P4D 0 Oops: 0000 [#1] PREEMPT SMP PTI CPU: 6 PID: 877 Comm: bash Not tainted 6.5.0-rc4-test-00008-g2c6b6b1029d4-dirty torvalds#186 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.2-debian-1.16.2-1 04/01/2014 RIP: 0010:tracing_release_file_tr+0xc/0x50 What happens here is that the kprobe event creates a trace_event_file "file" descriptor that represents the file in tracefs to the event. It maintains state of the event (is it enabled for the given instance?). Opening the "enable" file gets a reference to the event "file" descriptor via the open file descriptor. When the kprobe event is deleted, the file is also deleted from the tracefs system which also frees the event "file" descriptor. But as the tracefs file is still opened by user space, it will not be totally removed until the final dput() is called on it. But this is not true with the event "file" descriptor that is already freed. If the user does a write to or simply closes the file descriptor it will reference the event "file" descriptor that was just freed, causing a use-after-free bug. To solve this, add a ref count to the event "file" descriptor as well as a new flag called "FREED". The "file" will not be freed until the last reference is released. But the FREE flag will be set when the event is removed to prevent any more modifications to that event from happening, even if there's still a reference to the event "file" descriptor. Link: https://lore.kernel.org/linux-trace-kernel/[email protected]/ Link: https://lore.kernel.org/linux-trace-kernel/[email protected] Cc: [email protected] Cc: Mark Rutland <[email protected]> Fixes: f5ca233 ("tracing: Increase trace array ref count on enable and filter files") Reported-by: Beau Belgrave <[email protected]> Tested-by: Beau Belgrave <[email protected]> Reviewed-by: Masami Hiramatsu (Google) <[email protected]> Signed-off-by: Steven Rostedt (Google) <[email protected]> Signed-off-by: Greg Kroah-Hartman <[email protected]>
1 parent 16f6be9 commit 15aa876

File tree

5 files changed

+53
-14
lines changed

5 files changed

+53
-14
lines changed

include/linux/trace_events.h

+4
Original file line numberDiff line numberDiff line change
@@ -492,6 +492,7 @@ enum {
492492
EVENT_FILE_FL_TRIGGER_COND_BIT,
493493
EVENT_FILE_FL_PID_FILTER_BIT,
494494
EVENT_FILE_FL_WAS_ENABLED_BIT,
495+
EVENT_FILE_FL_FREED_BIT,
495496
};
496497

497498
extern struct trace_event_file *trace_get_event_file(const char *instance,
@@ -630,6 +631,7 @@ extern int __kprobe_event_add_fields(struct dynevent_cmd *cmd, ...);
630631
* TRIGGER_COND - When set, one or more triggers has an associated filter
631632
* PID_FILTER - When set, the event is filtered based on pid
632633
* WAS_ENABLED - Set when enabled to know to clear trace on module removal
634+
* FREED - File descriptor is freed, all fields should be considered invalid
633635
*/
634636
enum {
635637
EVENT_FILE_FL_ENABLED = (1 << EVENT_FILE_FL_ENABLED_BIT),
@@ -643,6 +645,7 @@ enum {
643645
EVENT_FILE_FL_TRIGGER_COND = (1 << EVENT_FILE_FL_TRIGGER_COND_BIT),
644646
EVENT_FILE_FL_PID_FILTER = (1 << EVENT_FILE_FL_PID_FILTER_BIT),
645647
EVENT_FILE_FL_WAS_ENABLED = (1 << EVENT_FILE_FL_WAS_ENABLED_BIT),
648+
EVENT_FILE_FL_FREED = (1 << EVENT_FILE_FL_FREED_BIT),
646649
};
647650

648651
struct trace_event_file {
@@ -671,6 +674,7 @@ struct trace_event_file {
671674
* caching and such. Which is mostly OK ;-)
672675
*/
673676
unsigned long flags;
677+
atomic_t ref; /* ref count for opened files */
674678
atomic_t sm_ref; /* soft-mode reference counter */
675679
atomic_t tm_ref; /* trigger-mode reference counter */
676680
};

kernel/trace/trace.c

+15
Original file line numberDiff line numberDiff line change
@@ -5000,6 +5000,20 @@ int tracing_open_file_tr(struct inode *inode, struct file *filp)
50005000
if (ret)
50015001
return ret;
50025002

5003+
mutex_lock(&event_mutex);
5004+
5005+
/* Fail if the file is marked for removal */
5006+
if (file->flags & EVENT_FILE_FL_FREED) {
5007+
trace_array_put(file->tr);
5008+
ret = -ENODEV;
5009+
} else {
5010+
event_file_get(file);
5011+
}
5012+
5013+
mutex_unlock(&event_mutex);
5014+
if (ret)
5015+
return ret;
5016+
50035017
filp->private_data = inode->i_private;
50045018

50055019
return 0;
@@ -5010,6 +5024,7 @@ int tracing_release_file_tr(struct inode *inode, struct file *filp)
50105024
struct trace_event_file *file = inode->i_private;
50115025

50125026
trace_array_put(file->tr);
5027+
event_file_put(file);
50135028

50145029
return 0;
50155030
}

kernel/trace/trace.h

+3
Original file line numberDiff line numberDiff line change
@@ -1656,6 +1656,9 @@ extern void event_trigger_unregister(struct event_command *cmd_ops,
16561656
char *glob,
16571657
struct event_trigger_data *trigger_data);
16581658

1659+
extern void event_file_get(struct trace_event_file *file);
1660+
extern void event_file_put(struct trace_event_file *file);
1661+
16591662
/**
16601663
* struct event_trigger_ops - callbacks for trace event triggers
16611664
*

kernel/trace/trace_events.c

+28-14
Original file line numberDiff line numberDiff line change
@@ -990,26 +990,39 @@ static void remove_subsystem(struct trace_subsystem_dir *dir)
990990
}
991991
}
992992

993+
void event_file_get(struct trace_event_file *file)
994+
{
995+
atomic_inc(&file->ref);
996+
}
997+
998+
void event_file_put(struct trace_event_file *file)
999+
{
1000+
if (WARN_ON_ONCE(!atomic_read(&file->ref))) {
1001+
if (file->flags & EVENT_FILE_FL_FREED)
1002+
kmem_cache_free(file_cachep, file);
1003+
return;
1004+
}
1005+
1006+
if (atomic_dec_and_test(&file->ref)) {
1007+
/* Count should only go to zero when it is freed */
1008+
if (WARN_ON_ONCE(!(file->flags & EVENT_FILE_FL_FREED)))
1009+
return;
1010+
kmem_cache_free(file_cachep, file);
1011+
}
1012+
}
1013+
9931014
static void remove_event_file_dir(struct trace_event_file *file)
9941015
{
9951016
struct dentry *dir = file->dir;
9961017
struct dentry *child;
9971018

998-
if (dir) {
999-
spin_lock(&dir->d_lock); /* probably unneeded */
1000-
list_for_each_entry(child, &dir->d_subdirs, d_child) {
1001-
if (d_really_is_positive(child)) /* probably unneeded */
1002-
d_inode(child)->i_private = NULL;
1003-
}
1004-
spin_unlock(&dir->d_lock);
1005-
1006-
tracefs_remove(dir);
1007-
}
1019+
tracefs_remove(dir);
10081020

10091021
list_del(&file->list);
10101022
remove_subsystem(file->system);
10111023
free_event_filter(file->filter);
1012-
kmem_cache_free(file_cachep, file);
1024+
file->flags |= EVENT_FILE_FL_FREED;
1025+
event_file_put(file);
10131026
}
10141027

10151028
/*
@@ -1382,7 +1395,7 @@ event_enable_read(struct file *filp, char __user *ubuf, size_t cnt,
13821395
flags = file->flags;
13831396
mutex_unlock(&event_mutex);
13841397

1385-
if (!file)
1398+
if (!file || flags & EVENT_FILE_FL_FREED)
13861399
return -ENODEV;
13871400

13881401
if (flags & EVENT_FILE_FL_ENABLED &&
@@ -1420,7 +1433,7 @@ event_enable_write(struct file *filp, const char __user *ubuf, size_t cnt,
14201433
ret = -ENODEV;
14211434
mutex_lock(&event_mutex);
14221435
file = event_file_data(filp);
1423-
if (likely(file))
1436+
if (likely(file && !(file->flags & EVENT_FILE_FL_FREED)))
14241437
ret = ftrace_event_enable_disable(file, val);
14251438
mutex_unlock(&event_mutex);
14261439
break;
@@ -1694,7 +1707,7 @@ event_filter_read(struct file *filp, char __user *ubuf, size_t cnt,
16941707

16951708
mutex_lock(&event_mutex);
16961709
file = event_file_data(filp);
1697-
if (file)
1710+
if (file && !(file->flags & EVENT_FILE_FL_FREED))
16981711
print_event_filter(file, s);
16991712
mutex_unlock(&event_mutex);
17001713

@@ -2810,6 +2823,7 @@ trace_create_new_event(struct trace_event_call *call,
28102823
atomic_set(&file->tm_ref, 0);
28112824
INIT_LIST_HEAD(&file->triggers);
28122825
list_add(&file->list, &tr->events);
2826+
event_file_get(file);
28132827

28142828
return file;
28152829
}

kernel/trace/trace_events_filter.c

+3
Original file line numberDiff line numberDiff line change
@@ -2088,6 +2088,9 @@ int apply_event_filter(struct trace_event_file *file, char *filter_string)
20882088
struct event_filter *filter = NULL;
20892089
int err;
20902090

2091+
if (file->flags & EVENT_FILE_FL_FREED)
2092+
return -ENODEV;
2093+
20912094
if (!strcmp(strstrip(filter_string), "0")) {
20922095
filter_disable(file);
20932096
filter = event_filter(file);

0 commit comments

Comments
 (0)