Skip to content

Commit e12f03d

Browse files
liu-song-6Ingo Molnar
authored and
Ingo Molnar
committed
perf/core: Implement the 'perf_kprobe' PMU
A new PMU type, perf_kprobe is added. Based on attr from perf_event_open(), perf_kprobe creates a kprobe (or kretprobe) for the perf_event. This kprobe is private to this perf_event, and thus not added to global lists, and not available in tracefs. Two functions, create_local_trace_kprobe() and destroy_local_trace_kprobe() are added to created and destroy these local trace_kprobe. Signed-off-by: Song Liu <[email protected]> Signed-off-by: Peter Zijlstra (Intel) <[email protected]> Reviewed-by: Yonghong Song <[email protected]> Reviewed-by: Josef Bacik <[email protected]> Cc: <[email protected]> Cc: <[email protected]> Cc: <[email protected]> Cc: <[email protected]> Cc: Arnaldo Carvalho de Melo <[email protected]> Cc: Jiri Olsa <[email protected]> Cc: Linus Torvalds <[email protected]> Cc: Namhyung Kim <[email protected]> Cc: Peter Zijlstra <[email protected]> Cc: Thomas Gleixner <[email protected]> Link: http://lkml.kernel.org/r/[email protected] Signed-off-by: Ingo Molnar <[email protected]>
1 parent 0d8dd67 commit e12f03d

File tree

5 files changed

+250
-43
lines changed

5 files changed

+250
-43
lines changed

include/linux/trace_events.h

+4
Original file line numberDiff line numberDiff line change
@@ -533,6 +533,10 @@ extern int perf_trace_init(struct perf_event *event);
533533
extern void perf_trace_destroy(struct perf_event *event);
534534
extern int perf_trace_add(struct perf_event *event, int flags);
535535
extern void perf_trace_del(struct perf_event *event, int flags);
536+
#ifdef CONFIG_KPROBE_EVENTS
537+
extern int perf_kprobe_init(struct perf_event *event, bool is_retprobe);
538+
extern void perf_kprobe_destroy(struct perf_event *event);
539+
#endif
536540
extern int ftrace_profile_set_filter(struct perf_event *event, int event_id,
537541
char *filter_str);
538542
extern void ftrace_profile_free_filter(struct perf_event *event);

kernel/events/core.c

+107-35
Original file line numberDiff line numberDiff line change
@@ -7992,9 +7992,77 @@ static struct pmu perf_tracepoint = {
79927992
.read = perf_swevent_read,
79937993
};
79947994

7995+
#ifdef CONFIG_KPROBE_EVENTS
7996+
/*
7997+
* Flags in config, used by dynamic PMU kprobe and uprobe
7998+
* The flags should match following PMU_FORMAT_ATTR().
7999+
*
8000+
* PERF_PROBE_CONFIG_IS_RETPROBE if set, create kretprobe/uretprobe
8001+
* if not set, create kprobe/uprobe
8002+
*/
8003+
enum perf_probe_config {
8004+
PERF_PROBE_CONFIG_IS_RETPROBE = 1U << 0, /* [k,u]retprobe */
8005+
};
8006+
8007+
PMU_FORMAT_ATTR(retprobe, "config:0");
8008+
8009+
static struct attribute *probe_attrs[] = {
8010+
&format_attr_retprobe.attr,
8011+
NULL,
8012+
};
8013+
8014+
static struct attribute_group probe_format_group = {
8015+
.name = "format",
8016+
.attrs = probe_attrs,
8017+
};
8018+
8019+
static const struct attribute_group *probe_attr_groups[] = {
8020+
&probe_format_group,
8021+
NULL,
8022+
};
8023+
8024+
static int perf_kprobe_event_init(struct perf_event *event);
8025+
static struct pmu perf_kprobe = {
8026+
.task_ctx_nr = perf_sw_context,
8027+
.event_init = perf_kprobe_event_init,
8028+
.add = perf_trace_add,
8029+
.del = perf_trace_del,
8030+
.start = perf_swevent_start,
8031+
.stop = perf_swevent_stop,
8032+
.read = perf_swevent_read,
8033+
.attr_groups = probe_attr_groups,
8034+
};
8035+
8036+
static int perf_kprobe_event_init(struct perf_event *event)
8037+
{
8038+
int err;
8039+
bool is_retprobe;
8040+
8041+
if (event->attr.type != perf_kprobe.type)
8042+
return -ENOENT;
8043+
/*
8044+
* no branch sampling for probe events
8045+
*/
8046+
if (has_branch_stack(event))
8047+
return -EOPNOTSUPP;
8048+
8049+
is_retprobe = event->attr.config & PERF_PROBE_CONFIG_IS_RETPROBE;
8050+
err = perf_kprobe_init(event, is_retprobe);
8051+
if (err)
8052+
return err;
8053+
8054+
event->destroy = perf_kprobe_destroy;
8055+
8056+
return 0;
8057+
}
8058+
#endif /* CONFIG_KPROBE_EVENTS */
8059+
79958060
static inline void perf_tp_register(void)
79968061
{
79978062
perf_pmu_register(&perf_tracepoint, "tracepoint", PERF_TYPE_TRACEPOINT);
8063+
#ifdef CONFIG_KPROBE_EVENTS
8064+
perf_pmu_register(&perf_kprobe, "kprobe", -1);
8065+
#endif
79988066
}
79998067

80008068
static void perf_event_free_filter(struct perf_event *event)
@@ -8071,13 +8139,28 @@ static void perf_event_free_bpf_handler(struct perf_event *event)
80718139
}
80728140
#endif
80738141

8142+
/*
8143+
* returns true if the event is a tracepoint, or a kprobe/upprobe created
8144+
* with perf_event_open()
8145+
*/
8146+
static inline bool perf_event_is_tracing(struct perf_event *event)
8147+
{
8148+
if (event->pmu == &perf_tracepoint)
8149+
return true;
8150+
#ifdef CONFIG_KPROBE_EVENTS
8151+
if (event->pmu == &perf_kprobe)
8152+
return true;
8153+
#endif
8154+
return false;
8155+
}
8156+
80748157
static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd)
80758158
{
80768159
bool is_kprobe, is_tracepoint, is_syscall_tp;
80778160
struct bpf_prog *prog;
80788161
int ret;
80798162

8080-
if (event->attr.type != PERF_TYPE_TRACEPOINT)
8163+
if (!perf_event_is_tracing(event))
80818164
return perf_event_set_bpf_handler(event, prog_fd);
80828165

80838166
is_kprobe = event->tp_event->flags & TRACE_EVENT_FL_UKPROBE;
@@ -8116,7 +8199,7 @@ static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd)
81168199

81178200
static void perf_event_free_bpf_prog(struct perf_event *event)
81188201
{
8119-
if (event->attr.type != PERF_TYPE_TRACEPOINT) {
8202+
if (!perf_event_is_tracing(event)) {
81208203
perf_event_free_bpf_handler(event);
81218204
return;
81228205
}
@@ -8535,47 +8618,36 @@ perf_event_set_addr_filter(struct perf_event *event, char *filter_str)
85358618
return ret;
85368619
}
85378620

8538-
static int
8539-
perf_tracepoint_set_filter(struct perf_event *event, char *filter_str)
8540-
{
8541-
struct perf_event_context *ctx = event->ctx;
8542-
int ret;
8543-
8544-
/*
8545-
* Beware, here be dragons!!
8546-
*
8547-
* the tracepoint muck will deadlock against ctx->mutex, but the tracepoint
8548-
* stuff does not actually need it. So temporarily drop ctx->mutex. As per
8549-
* perf_event_ctx_lock() we already have a reference on ctx.
8550-
*
8551-
* This can result in event getting moved to a different ctx, but that
8552-
* does not affect the tracepoint state.
8553-
*/
8554-
mutex_unlock(&ctx->mutex);
8555-
ret = ftrace_profile_set_filter(event, event->attr.config, filter_str);
8556-
mutex_lock(&ctx->mutex);
8557-
8558-
return ret;
8559-
}
8560-
85618621
static int perf_event_set_filter(struct perf_event *event, void __user *arg)
85628622
{
8563-
char *filter_str;
85648623
int ret = -EINVAL;
8565-
8566-
if ((event->attr.type != PERF_TYPE_TRACEPOINT ||
8567-
!IS_ENABLED(CONFIG_EVENT_TRACING)) &&
8568-
!has_addr_filter(event))
8569-
return -EINVAL;
8624+
char *filter_str;
85708625

85718626
filter_str = strndup_user(arg, PAGE_SIZE);
85728627
if (IS_ERR(filter_str))
85738628
return PTR_ERR(filter_str);
85748629

8575-
if (IS_ENABLED(CONFIG_EVENT_TRACING) &&
8576-
event->attr.type == PERF_TYPE_TRACEPOINT)
8577-
ret = perf_tracepoint_set_filter(event, filter_str);
8578-
else if (has_addr_filter(event))
8630+
#ifdef CONFIG_EVENT_TRACING
8631+
if (perf_event_is_tracing(event)) {
8632+
struct perf_event_context *ctx = event->ctx;
8633+
8634+
/*
8635+
* Beware, here be dragons!!
8636+
*
8637+
* the tracepoint muck will deadlock against ctx->mutex, but
8638+
* the tracepoint stuff does not actually need it. So
8639+
* temporarily drop ctx->mutex. As per perf_event_ctx_lock() we
8640+
* already have a reference on ctx.
8641+
*
8642+
* This can result in event getting moved to a different ctx,
8643+
* but that does not affect the tracepoint state.
8644+
*/
8645+
mutex_unlock(&ctx->mutex);
8646+
ret = ftrace_profile_set_filter(event, event->attr.config, filter_str);
8647+
mutex_lock(&ctx->mutex);
8648+
} else
8649+
#endif
8650+
if (has_addr_filter(event))
85798651
ret = perf_event_set_addr_filter(event, filter_str);
85808652

85818653
kfree(filter_str);

kernel/trace/trace_event_perf.c

+49
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
#include <linux/module.h>
99
#include <linux/kprobes.h>
1010
#include "trace.h"
11+
#include "trace_probe.h"
1112

1213
static char __percpu *perf_trace_buf[PERF_NR_CONTEXTS];
1314

@@ -237,6 +238,54 @@ void perf_trace_destroy(struct perf_event *p_event)
237238
mutex_unlock(&event_mutex);
238239
}
239240

241+
#ifdef CONFIG_KPROBE_EVENTS
242+
int perf_kprobe_init(struct perf_event *p_event, bool is_retprobe)
243+
{
244+
int ret;
245+
char *func = NULL;
246+
struct trace_event_call *tp_event;
247+
248+
if (p_event->attr.kprobe_func) {
249+
func = kzalloc(KSYM_NAME_LEN, GFP_KERNEL);
250+
if (!func)
251+
return -ENOMEM;
252+
ret = strncpy_from_user(
253+
func, u64_to_user_ptr(p_event->attr.kprobe_func),
254+
KSYM_NAME_LEN);
255+
if (ret < 0)
256+
goto out;
257+
258+
if (func[0] == '\0') {
259+
kfree(func);
260+
func = NULL;
261+
}
262+
}
263+
264+
tp_event = create_local_trace_kprobe(
265+
func, (void *)(unsigned long)(p_event->attr.kprobe_addr),
266+
p_event->attr.probe_offset, is_retprobe);
267+
if (IS_ERR(tp_event)) {
268+
ret = PTR_ERR(tp_event);
269+
goto out;
270+
}
271+
272+
ret = perf_trace_event_init(tp_event, p_event);
273+
if (ret)
274+
destroy_local_trace_kprobe(tp_event);
275+
out:
276+
kfree(func);
277+
return ret;
278+
}
279+
280+
void perf_kprobe_destroy(struct perf_event *p_event)
281+
{
282+
perf_trace_event_close(p_event);
283+
perf_trace_event_unreg(p_event);
284+
285+
destroy_local_trace_kprobe(p_event->tp_event);
286+
}
287+
#endif /* CONFIG_KPROBE_EVENTS */
288+
240289
int perf_trace_add(struct perf_event *p_event, int flags)
241290
{
242291
struct trace_event_call *tp_event = p_event->tp_event;

kernel/trace/trace_kprobe.c

+83-8
Original file line numberDiff line numberDiff line change
@@ -438,6 +438,14 @@ disable_trace_kprobe(struct trace_kprobe *tk, struct trace_event_file *file)
438438
disable_kprobe(&tk->rp.kp);
439439
wait = 1;
440440
}
441+
442+
/*
443+
* if tk is not added to any list, it must be a local trace_kprobe
444+
* created with perf_event_open. We don't need to wait for these
445+
* trace_kprobes
446+
*/
447+
if (list_empty(&tk->list))
448+
wait = 0;
441449
out:
442450
if (wait) {
443451
/*
@@ -1313,12 +1321,9 @@ static struct trace_event_functions kprobe_funcs = {
13131321
.trace = print_kprobe_event
13141322
};
13151323

1316-
static int register_kprobe_event(struct trace_kprobe *tk)
1324+
static inline void init_trace_event_call(struct trace_kprobe *tk,
1325+
struct trace_event_call *call)
13171326
{
1318-
struct trace_event_call *call = &tk->tp.call;
1319-
int ret;
1320-
1321-
/* Initialize trace_event_call */
13221327
INIT_LIST_HEAD(&call->class->fields);
13231328
if (trace_kprobe_is_return(tk)) {
13241329
call->event.funcs = &kretprobe_funcs;
@@ -1327,16 +1332,26 @@ static int register_kprobe_event(struct trace_kprobe *tk)
13271332
call->event.funcs = &kprobe_funcs;
13281333
call->class->define_fields = kprobe_event_define_fields;
13291334
}
1335+
1336+
call->flags = TRACE_EVENT_FL_KPROBE;
1337+
call->class->reg = kprobe_register;
1338+
call->data = tk;
1339+
}
1340+
1341+
static int register_kprobe_event(struct trace_kprobe *tk)
1342+
{
1343+
struct trace_event_call *call = &tk->tp.call;
1344+
int ret = 0;
1345+
1346+
init_trace_event_call(tk, call);
1347+
13301348
if (set_print_fmt(&tk->tp, trace_kprobe_is_return(tk)) < 0)
13311349
return -ENOMEM;
13321350
ret = register_trace_event(&call->event);
13331351
if (!ret) {
13341352
kfree(call->print_fmt);
13351353
return -ENODEV;
13361354
}
1337-
call->flags = TRACE_EVENT_FL_KPROBE;
1338-
call->class->reg = kprobe_register;
1339-
call->data = tk;
13401355
ret = trace_add_event_call(call);
13411356
if (ret) {
13421357
pr_info("Failed to register kprobe event: %s\n",
@@ -1358,6 +1373,66 @@ static int unregister_kprobe_event(struct trace_kprobe *tk)
13581373
return ret;
13591374
}
13601375

1376+
#ifdef CONFIG_PERF_EVENTS
1377+
/* create a trace_kprobe, but don't add it to global lists */
1378+
struct trace_event_call *
1379+
create_local_trace_kprobe(char *func, void *addr, unsigned long offs,
1380+
bool is_return)
1381+
{
1382+
struct trace_kprobe *tk;
1383+
int ret;
1384+
char *event;
1385+
1386+
/*
1387+
* local trace_kprobes are not added to probe_list, so they are never
1388+
* searched in find_trace_kprobe(). Therefore, there is no concern of
1389+
* duplicated name here.
1390+
*/
1391+
event = func ? func : "DUMMY_EVENT";
1392+
1393+
tk = alloc_trace_kprobe(KPROBE_EVENT_SYSTEM, event, (void *)addr, func,
1394+
offs, 0 /* maxactive */, 0 /* nargs */,
1395+
is_return);
1396+
1397+
if (IS_ERR(tk)) {
1398+
pr_info("Failed to allocate trace_probe.(%d)\n",
1399+
(int)PTR_ERR(tk));
1400+
return ERR_CAST(tk);
1401+
}
1402+
1403+
init_trace_event_call(tk, &tk->tp.call);
1404+
1405+
if (set_print_fmt(&tk->tp, trace_kprobe_is_return(tk)) < 0) {
1406+
ret = -ENOMEM;
1407+
goto error;
1408+
}
1409+
1410+
ret = __register_trace_kprobe(tk);
1411+
if (ret < 0)
1412+
goto error;
1413+
1414+
return &tk->tp.call;
1415+
error:
1416+
free_trace_kprobe(tk);
1417+
return ERR_PTR(ret);
1418+
}
1419+
1420+
void destroy_local_trace_kprobe(struct trace_event_call *event_call)
1421+
{
1422+
struct trace_kprobe *tk;
1423+
1424+
tk = container_of(event_call, struct trace_kprobe, tp.call);
1425+
1426+
if (trace_probe_is_enabled(&tk->tp)) {
1427+
WARN_ON(1);
1428+
return;
1429+
}
1430+
1431+
__unregister_trace_kprobe(tk);
1432+
free_trace_kprobe(tk);
1433+
}
1434+
#endif /* CONFIG_PERF_EVENTS */
1435+
13611436
/* Make a tracefs interface for controlling probe points */
13621437
static __init int init_kprobe_trace(void)
13631438
{

kernel/trace/trace_probe.h

+7
Original file line numberDiff line numberDiff line change
@@ -404,3 +404,10 @@ store_trace_args(int ent_size, struct trace_probe *tp, struct pt_regs *regs,
404404
}
405405

406406
extern int set_print_fmt(struct trace_probe *tp, bool is_return);
407+
408+
#ifdef CONFIG_PERF_EVENTS
409+
extern struct trace_event_call *
410+
create_local_trace_kprobe(char *func, void *addr, unsigned long offs,
411+
bool is_return);
412+
extern void destroy_local_trace_kprobe(struct trace_event_call *event_call);
413+
#endif

0 commit comments

Comments
 (0)