From 2f5f6ad9390c1ebbf738d130dbfe80b60eaa167e Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 8 Aug 2011 16:57:47 -0400 Subject: [PATCH 001/282] ftrace: Pass ftrace_ops as third parameter to function trace callback Currently the function trace callback receives only the ip and parent_ip of the function that it traced. It would be more powerful to also return the ops that registered the function as well. This allows the same function to act differently depending on what ftrace_ops registered it. Link: http://lkml.kernel.org/r/20120612225424.267254552@goodmis.org Reviewed-by: Masami Hiramatsu Signed-off-by: Steven Rostedt --- arch/x86/include/asm/ftrace.h | 4 ++ arch/x86/kernel/entry_64.S | 1 + include/linux/ftrace.h | 16 ++++- kernel/trace/ftrace.c | 101 +++++++++++++++++++++--------- kernel/trace/trace_event_perf.c | 3 +- kernel/trace/trace_events.c | 3 +- kernel/trace/trace_functions.c | 9 ++- kernel/trace/trace_irqsoff.c | 3 +- kernel/trace/trace_sched_wakeup.c | 2 +- kernel/trace/trace_selftest.c | 15 +++-- kernel/trace/trace_stack.c | 2 +- 11 files changed, 113 insertions(+), 46 deletions(-) diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h index b0767bc0874..783b107eacb 100644 --- a/arch/x86/include/asm/ftrace.h +++ b/arch/x86/include/asm/ftrace.h @@ -32,6 +32,10 @@ #define MCOUNT_ADDR ((long)(mcount)) #define MCOUNT_INSN_SIZE 5 /* sizeof mcount call */ +#if defined(CONFIG_DYNAMIC_FTRACE) && defined(CONFIG_X86_64) +#define ARCH_SUPPORTS_FTRACE_OPS 1 +#endif + #ifndef __ASSEMBLY__ extern void mcount(void); extern atomic_t modifying_ftrace_code; diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 7d65133b51b..2b4f94c5dc6 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -79,6 +79,7 @@ ENTRY(ftrace_caller) MCOUNT_SAVE_FRAME + leaq function_trace_op, %rdx movq 0x38(%rsp), %rdi movq 8(%rbp), %rsi subq $MCOUNT_INSN_SIZE, %rdi diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 55e6d63d46d..2d596411988 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -18,6 +18,15 @@ #include +/* + * If the arch supports passing the variable contents of + * function_trace_op as the third parameter back from the + * mcount call, then the arch should define this as 1. + */ +#ifndef ARCH_SUPPORTS_FTRACE_OPS +#define ARCH_SUPPORTS_FTRACE_OPS 0 +#endif + struct module; struct ftrace_hash; @@ -29,7 +38,10 @@ ftrace_enable_sysctl(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos); -typedef void (*ftrace_func_t)(unsigned long ip, unsigned long parent_ip); +struct ftrace_ops; + +typedef void (*ftrace_func_t)(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *op); /* * FTRACE_OPS_FL_* bits denote the state of ftrace_ops struct and are @@ -163,7 +175,7 @@ static inline int ftrace_function_local_disabled(struct ftrace_ops *ops) return *this_cpu_ptr(ops->disabled); } -extern void ftrace_stub(unsigned long a0, unsigned long a1); +extern void ftrace_stub(unsigned long a0, unsigned long a1, struct ftrace_ops *op); #else /* !CONFIG_FUNCTION_TRACER */ /* diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index b4f20fba09f..4f2ab9352a6 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -64,12 +64,19 @@ #define FL_GLOBAL_CONTROL_MASK (FTRACE_OPS_FL_GLOBAL | FTRACE_OPS_FL_CONTROL) +static struct ftrace_ops ftrace_list_end __read_mostly = { + .func = ftrace_stub, +}; + /* ftrace_enabled is a method to turn ftrace on or off */ int ftrace_enabled __read_mostly; static int last_ftrace_enabled; /* Quick disabling of function tracer. */ -int function_trace_stop; +int function_trace_stop __read_mostly; + +/* Current function tracing op */ +struct ftrace_ops *function_trace_op __read_mostly = &ftrace_list_end; /* List for set_ftrace_pid's pids. */ LIST_HEAD(ftrace_pids); @@ -86,10 +93,6 @@ static int ftrace_disabled __read_mostly; static DEFINE_MUTEX(ftrace_lock); -static struct ftrace_ops ftrace_list_end __read_mostly = { - .func = ftrace_stub, -}; - static struct ftrace_ops *ftrace_global_list __read_mostly = &ftrace_list_end; static struct ftrace_ops *ftrace_control_list __read_mostly = &ftrace_list_end; static struct ftrace_ops *ftrace_ops_list __read_mostly = &ftrace_list_end; @@ -100,8 +103,14 @@ ftrace_func_t ftrace_pid_function __read_mostly = ftrace_stub; static struct ftrace_ops global_ops; static struct ftrace_ops control_ops; -static void -ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip); +#if ARCH_SUPPORTS_FTRACE_OPS +static void ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *op); +#else +/* See comment below, where ftrace_ops_list_func is defined */ +static void ftrace_ops_no_ops(unsigned long ip, unsigned long parent_ip); +#define ftrace_ops_list_func ((ftrace_func_t)ftrace_ops_no_ops) +#endif /* * Traverse the ftrace_global_list, invoking all entries. The reason that we @@ -112,29 +121,29 @@ ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip); * * Silly Alpha and silly pointer-speculation compiler optimizations! */ -static void ftrace_global_list_func(unsigned long ip, - unsigned long parent_ip) +static void +ftrace_global_list_func(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *op) { - struct ftrace_ops *op; - if (unlikely(trace_recursion_test(TRACE_GLOBAL_BIT))) return; trace_recursion_set(TRACE_GLOBAL_BIT); op = rcu_dereference_raw(ftrace_global_list); /*see above*/ while (op != &ftrace_list_end) { - op->func(ip, parent_ip); + op->func(ip, parent_ip, op); op = rcu_dereference_raw(op->next); /*see above*/ }; trace_recursion_clear(TRACE_GLOBAL_BIT); } -static void ftrace_pid_func(unsigned long ip, unsigned long parent_ip) +static void ftrace_pid_func(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *op) { if (!test_tsk_trace_trace(current)) return; - ftrace_pid_function(ip, parent_ip); + ftrace_pid_function(ip, parent_ip, op); } static void set_ftrace_pid_function(ftrace_func_t func) @@ -163,12 +172,13 @@ void clear_ftrace_function(void) * For those archs that do not test ftrace_trace_stop in their * mcount call site, we need to do it from C. */ -static void ftrace_test_stop_func(unsigned long ip, unsigned long parent_ip) +static void ftrace_test_stop_func(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *op) { if (function_trace_stop) return; - __ftrace_trace_function(ip, parent_ip); + __ftrace_trace_function(ip, parent_ip, op); } #endif @@ -230,15 +240,24 @@ static void update_ftrace_function(void) /* * If we are at the end of the list and this ops is - * not dynamic, then have the mcount trampoline call - * the function directly + * not dynamic and the arch supports passing ops, then have the + * mcount trampoline call the function directly. */ if (ftrace_ops_list == &ftrace_list_end || (ftrace_ops_list->next == &ftrace_list_end && - !(ftrace_ops_list->flags & FTRACE_OPS_FL_DYNAMIC))) + !(ftrace_ops_list->flags & FTRACE_OPS_FL_DYNAMIC) && + ARCH_SUPPORTS_FTRACE_OPS)) { + /* Set the ftrace_ops that the arch callback uses */ + if (ftrace_ops_list == &global_ops) + function_trace_op = ftrace_global_list; + else + function_trace_op = ftrace_ops_list; func = ftrace_ops_list->func; - else + } else { + /* Just use the default ftrace_ops */ + function_trace_op = &ftrace_list_end; func = ftrace_ops_list_func; + } #ifdef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST ftrace_trace_function = func; @@ -773,7 +792,8 @@ ftrace_profile_alloc(struct ftrace_profile_stat *stat, unsigned long ip) } static void -function_profile_call(unsigned long ip, unsigned long parent_ip) +function_profile_call(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *ops) { struct ftrace_profile_stat *stat; struct ftrace_profile *rec; @@ -803,7 +823,7 @@ function_profile_call(unsigned long ip, unsigned long parent_ip) #ifdef CONFIG_FUNCTION_GRAPH_TRACER static int profile_graph_entry(struct ftrace_graph_ent *trace) { - function_profile_call(trace->func, 0); + function_profile_call(trace->func, 0, NULL); return 1; } @@ -2790,8 +2810,8 @@ static int __init ftrace_mod_cmd_init(void) } device_initcall(ftrace_mod_cmd_init); -static void -function_trace_probe_call(unsigned long ip, unsigned long parent_ip) +static void function_trace_probe_call(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *op) { struct ftrace_func_probe *entry; struct hlist_head *hhd; @@ -3942,10 +3962,9 @@ ftrace_ops_test(struct ftrace_ops *ops, unsigned long ip) #endif /* CONFIG_DYNAMIC_FTRACE */ static void -ftrace_ops_control_func(unsigned long ip, unsigned long parent_ip) +ftrace_ops_control_func(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *op) { - struct ftrace_ops *op; - if (unlikely(trace_recursion_test(TRACE_CONTROL_BIT))) return; @@ -3959,7 +3978,7 @@ ftrace_ops_control_func(unsigned long ip, unsigned long parent_ip) while (op != &ftrace_list_end) { if (!ftrace_function_local_disabled(op) && ftrace_ops_test(op, ip)) - op->func(ip, parent_ip); + op->func(ip, parent_ip, op); op = rcu_dereference_raw(op->next); }; @@ -3971,8 +3990,9 @@ static struct ftrace_ops control_ops = { .func = ftrace_ops_control_func, }; -static void -ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip) +static inline void +__ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *ignored) { struct ftrace_ops *op; @@ -3988,13 +4008,32 @@ ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip) op = rcu_dereference_raw(ftrace_ops_list); while (op != &ftrace_list_end) { if (ftrace_ops_test(op, ip)) - op->func(ip, parent_ip); + op->func(ip, parent_ip, op); op = rcu_dereference_raw(op->next); }; preempt_enable_notrace(); trace_recursion_clear(TRACE_INTERNAL_BIT); } +/* + * Some archs only support passing ip and parent_ip. Even though + * the list function ignores the op parameter, we do not want any + * C side effects, where a function is called without the caller + * sending a third parameter. + */ +#if ARCH_SUPPORTS_FTRACE_OPS +static void ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *op) +{ + __ftrace_ops_list_func(ip, parent_ip, NULL); +} +#else +static void ftrace_ops_no_ops(unsigned long ip, unsigned long parent_ip) +{ + __ftrace_ops_list_func(ip, parent_ip, NULL); +} +#endif + static void clear_ftrace_swapper(void) { struct task_struct *p; diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c index fee3752ae8f..a872a9a298a 100644 --- a/kernel/trace/trace_event_perf.c +++ b/kernel/trace/trace_event_perf.c @@ -258,7 +258,8 @@ EXPORT_SYMBOL_GPL(perf_trace_buf_prepare); #ifdef CONFIG_FUNCTION_TRACER static void -perf_ftrace_function_call(unsigned long ip, unsigned long parent_ip) +perf_ftrace_function_call(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *ops) { struct ftrace_entry *entry; struct hlist_head *head; diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 29111da1d10..88daa5177bf 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -1681,7 +1681,8 @@ static __init void event_trace_self_tests(void) static DEFINE_PER_CPU(atomic_t, ftrace_test_event_disable); static void -function_test_events_call(unsigned long ip, unsigned long parent_ip) +function_test_events_call(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *op) { struct ring_buffer_event *event; struct ring_buffer *buffer; diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c index c7b0c6a7db0..fceb7a9aa06 100644 --- a/kernel/trace/trace_functions.c +++ b/kernel/trace/trace_functions.c @@ -48,7 +48,8 @@ static void function_trace_start(struct trace_array *tr) } static void -function_trace_call_preempt_only(unsigned long ip, unsigned long parent_ip) +function_trace_call_preempt_only(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *op) { struct trace_array *tr = func_trace; struct trace_array_cpu *data; @@ -75,7 +76,8 @@ function_trace_call_preempt_only(unsigned long ip, unsigned long parent_ip) } static void -function_trace_call(unsigned long ip, unsigned long parent_ip) +function_trace_call(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *op) { struct trace_array *tr = func_trace; struct trace_array_cpu *data; @@ -106,7 +108,8 @@ function_trace_call(unsigned long ip, unsigned long parent_ip) } static void -function_stack_trace_call(unsigned long ip, unsigned long parent_ip) +function_stack_trace_call(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *op) { struct trace_array *tr = func_trace; struct trace_array_cpu *data; diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c index 99d20e92036..2862c77f95d 100644 --- a/kernel/trace/trace_irqsoff.c +++ b/kernel/trace/trace_irqsoff.c @@ -136,7 +136,8 @@ static int func_prolog_dec(struct trace_array *tr, * irqsoff uses its own tracer function to keep the overhead down: */ static void -irqsoff_tracer_call(unsigned long ip, unsigned long parent_ip) +irqsoff_tracer_call(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *op) { struct trace_array *tr = irqsoff_trace; struct trace_array_cpu *data; diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c index ff791ea48b5..0caf4f5da56 100644 --- a/kernel/trace/trace_sched_wakeup.c +++ b/kernel/trace/trace_sched_wakeup.c @@ -108,7 +108,7 @@ out_enable: * wakeup uses its own tracer function to keep the overhead down: */ static void -wakeup_tracer_call(unsigned long ip, unsigned long parent_ip) +wakeup_tracer_call(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *op) { struct trace_array *tr = wakeup_trace; struct trace_array_cpu *data; diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index 288541f977f..9ae40c823af 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c @@ -103,35 +103,40 @@ static inline void warn_failed_init_tracer(struct tracer *trace, int init_ret) static int trace_selftest_test_probe1_cnt; static void trace_selftest_test_probe1_func(unsigned long ip, - unsigned long pip) + unsigned long pip, + struct ftrace_ops *op) { trace_selftest_test_probe1_cnt++; } static int trace_selftest_test_probe2_cnt; static void trace_selftest_test_probe2_func(unsigned long ip, - unsigned long pip) + unsigned long pip, + struct ftrace_ops *op) { trace_selftest_test_probe2_cnt++; } static int trace_selftest_test_probe3_cnt; static void trace_selftest_test_probe3_func(unsigned long ip, - unsigned long pip) + unsigned long pip, + struct ftrace_ops *op) { trace_selftest_test_probe3_cnt++; } static int trace_selftest_test_global_cnt; static void trace_selftest_test_global_func(unsigned long ip, - unsigned long pip) + unsigned long pip, + struct ftrace_ops *op) { trace_selftest_test_global_cnt++; } static int trace_selftest_test_dyn_cnt; static void trace_selftest_test_dyn_func(unsigned long ip, - unsigned long pip) + unsigned long pip, + struct ftrace_ops *op) { trace_selftest_test_dyn_cnt++; } diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c index d4545f49242..e20006d5fb6 100644 --- a/kernel/trace/trace_stack.c +++ b/kernel/trace/trace_stack.c @@ -111,7 +111,7 @@ static inline void check_stack(void) } static void -stack_trace_call(unsigned long ip, unsigned long parent_ip) +stack_trace_call(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *op) { int cpu; From ccf3672d530170c98c734dfc5db07d64bcbad2ad Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 5 Jun 2012 09:44:25 -0400 Subject: [PATCH 002/282] ftrace: Consolidate arch dependent functions with 'list' function As the function tracer starts to get more features, the support for theses features will spread out throughout the different architectures over time. These features boil down to what each arch does in the mcount trampoline (the ftrace_caller). Currently there's two features that are not the same throughout the archs. 1) Support to stop function tracing before the callback 2) passing of the ftrace ops Both of these require placing an indirect function to support the features if the mcount trampoline does not. On a side note, for all architectures, when more than one callback is registered to the function tracer, an intermediate 'list' function is called by the mcount trampoline to iterate through the callbacks that are registered. Instead of making a separate function for each of these features, and requiring several indirect calls, just use the single 'list' function as the intermediate, to handle all cases. If an arch does not support the 'stop function tracing' or the passing of ftrace ops, just force it to use the list function that will handle the features required. This makes the code cleaner and simpler and removes a lot of #ifdefs in the code. Link: http://lkml.kernel.org/r/20120612225424.495625483@goodmis.org Reviewed-by: Masami Hiramatsu Signed-off-by: Steven Rostedt --- include/linux/ftrace.h | 13 ++++++++++++ kernel/trace/ftrace.c | 45 ++++-------------------------------------- 2 files changed, 17 insertions(+), 41 deletions(-) diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 2d596411988..3651fdc3bec 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -27,6 +27,19 @@ #define ARCH_SUPPORTS_FTRACE_OPS 0 #endif +/* + * If the arch's mcount caller does not support all of ftrace's + * features, then it must call an indirect function that + * does. Or at least does enough to prevent any unwelcomed side effects. + */ +#if !defined(CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST) || \ + !ARCH_SUPPORTS_FTRACE_OPS +# define FTRACE_FORCE_LIST_FUNC 1 +#else +# define FTRACE_FORCE_LIST_FUNC 0 +#endif + + struct module; struct ftrace_hash; diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 4f2ab9352a6..4cbca2e6eb7 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -97,8 +97,6 @@ static struct ftrace_ops *ftrace_global_list __read_mostly = &ftrace_list_end; static struct ftrace_ops *ftrace_control_list __read_mostly = &ftrace_list_end; static struct ftrace_ops *ftrace_ops_list __read_mostly = &ftrace_list_end; ftrace_func_t ftrace_trace_function __read_mostly = ftrace_stub; -static ftrace_func_t __ftrace_trace_function_delay __read_mostly = ftrace_stub; -ftrace_func_t __ftrace_trace_function __read_mostly = ftrace_stub; ftrace_func_t ftrace_pid_function __read_mostly = ftrace_stub; static struct ftrace_ops global_ops; static struct ftrace_ops control_ops; @@ -162,26 +160,9 @@ static void set_ftrace_pid_function(ftrace_func_t func) void clear_ftrace_function(void) { ftrace_trace_function = ftrace_stub; - __ftrace_trace_function = ftrace_stub; - __ftrace_trace_function_delay = ftrace_stub; ftrace_pid_function = ftrace_stub; } -#ifndef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST -/* - * For those archs that do not test ftrace_trace_stop in their - * mcount call site, we need to do it from C. - */ -static void ftrace_test_stop_func(unsigned long ip, unsigned long parent_ip, - struct ftrace_ops *op) -{ - if (function_trace_stop) - return; - - __ftrace_trace_function(ip, parent_ip, op); -} -#endif - static void control_ops_disable_all(struct ftrace_ops *ops) { int cpu; @@ -246,7 +227,7 @@ static void update_ftrace_function(void) if (ftrace_ops_list == &ftrace_list_end || (ftrace_ops_list->next == &ftrace_list_end && !(ftrace_ops_list->flags & FTRACE_OPS_FL_DYNAMIC) && - ARCH_SUPPORTS_FTRACE_OPS)) { + !FTRACE_FORCE_LIST_FUNC)) { /* Set the ftrace_ops that the arch callback uses */ if (ftrace_ops_list == &global_ops) function_trace_op = ftrace_global_list; @@ -259,18 +240,7 @@ static void update_ftrace_function(void) func = ftrace_ops_list_func; } -#ifdef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST ftrace_trace_function = func; -#else -#ifdef CONFIG_DYNAMIC_FTRACE - /* do not update till all functions have been modified */ - __ftrace_trace_function_delay = func; -#else - __ftrace_trace_function = func; -#endif - ftrace_trace_function = - (func == ftrace_stub) ? func : ftrace_test_stop_func; -#endif } static void add_ftrace_ops(struct ftrace_ops **list, struct ftrace_ops *ops) @@ -1902,16 +1872,6 @@ static void ftrace_run_update_code(int command) */ arch_ftrace_update_code(command); -#ifndef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST - /* - * For archs that call ftrace_test_stop_func(), we must - * wait till after we update all the function callers - * before we update the callback. This keeps different - * ops that record different functions from corrupting - * each other. - */ - __ftrace_trace_function = __ftrace_trace_function_delay; -#endif function_trace_stop--; ret = ftrace_arch_code_modify_post_process(); @@ -3996,6 +3956,9 @@ __ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip, { struct ftrace_ops *op; + if (function_trace_stop) + return; + if (unlikely(trace_recursion_test(TRACE_INTERNAL_BIT))) return; From a1e2e31d175a1349274eba3465d17616c6725f8c Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 9 Aug 2011 12:50:46 -0400 Subject: [PATCH 003/282] ftrace: Return pt_regs to function trace callback Return as the 4th paramater to the function tracer callback the pt_regs. Later patches that implement regs passing for the architectures will require having the ftrace_ops set the SAVE_REGS flag, which will tell the arch to take the time to pass a full set of pt_regs to the ftrace_ops callback function. If the arch does not support it then it should pass NULL. If an arch can pass full regs, then it should define: ARCH_SUPPORTS_FTRACE_SAVE_REGS to 1 Link: http://lkml.kernel.org/r/20120702201821.019966811@goodmis.org Reviewed-by: Masami Hiramatsu Signed-off-by: Steven Rostedt --- include/linux/ftrace.h | 6 +++-- kernel/trace/ftrace.c | 37 ++++++++++++++++++------------- kernel/trace/trace_event_perf.c | 2 +- kernel/trace/trace_events.c | 2 +- kernel/trace/trace_functions.c | 7 +++--- kernel/trace/trace_irqsoff.c | 2 +- kernel/trace/trace_sched_wakeup.c | 3 ++- kernel/trace/trace_selftest.c | 15 ++++++++----- kernel/trace/trace_stack.c | 3 ++- 9 files changed, 47 insertions(+), 30 deletions(-) diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 3651fdc3bec..e4202881fb0 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -54,7 +55,7 @@ ftrace_enable_sysctl(struct ctl_table *table, int write, struct ftrace_ops; typedef void (*ftrace_func_t)(unsigned long ip, unsigned long parent_ip, - struct ftrace_ops *op); + struct ftrace_ops *op, struct pt_regs *regs); /* * FTRACE_OPS_FL_* bits denote the state of ftrace_ops struct and are @@ -188,7 +189,8 @@ static inline int ftrace_function_local_disabled(struct ftrace_ops *ops) return *this_cpu_ptr(ops->disabled); } -extern void ftrace_stub(unsigned long a0, unsigned long a1, struct ftrace_ops *op); +extern void ftrace_stub(unsigned long a0, unsigned long a1, + struct ftrace_ops *op, struct pt_regs *regs); #else /* !CONFIG_FUNCTION_TRACER */ /* diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 4cbca2e6eb7..6ff07ad0ede 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -103,7 +103,7 @@ static struct ftrace_ops control_ops; #if ARCH_SUPPORTS_FTRACE_OPS static void ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip, - struct ftrace_ops *op); + struct ftrace_ops *op, struct pt_regs *regs); #else /* See comment below, where ftrace_ops_list_func is defined */ static void ftrace_ops_no_ops(unsigned long ip, unsigned long parent_ip); @@ -121,7 +121,7 @@ static void ftrace_ops_no_ops(unsigned long ip, unsigned long parent_ip); */ static void ftrace_global_list_func(unsigned long ip, unsigned long parent_ip, - struct ftrace_ops *op) + struct ftrace_ops *op, struct pt_regs *regs) { if (unlikely(trace_recursion_test(TRACE_GLOBAL_BIT))) return; @@ -129,19 +129,19 @@ ftrace_global_list_func(unsigned long ip, unsigned long parent_ip, trace_recursion_set(TRACE_GLOBAL_BIT); op = rcu_dereference_raw(ftrace_global_list); /*see above*/ while (op != &ftrace_list_end) { - op->func(ip, parent_ip, op); + op->func(ip, parent_ip, op, regs); op = rcu_dereference_raw(op->next); /*see above*/ }; trace_recursion_clear(TRACE_GLOBAL_BIT); } static void ftrace_pid_func(unsigned long ip, unsigned long parent_ip, - struct ftrace_ops *op) + struct ftrace_ops *op, struct pt_regs *regs) { if (!test_tsk_trace_trace(current)) return; - ftrace_pid_function(ip, parent_ip, op); + ftrace_pid_function(ip, parent_ip, op, regs); } static void set_ftrace_pid_function(ftrace_func_t func) @@ -763,7 +763,7 @@ ftrace_profile_alloc(struct ftrace_profile_stat *stat, unsigned long ip) static void function_profile_call(unsigned long ip, unsigned long parent_ip, - struct ftrace_ops *ops) + struct ftrace_ops *ops, struct pt_regs *regs) { struct ftrace_profile_stat *stat; struct ftrace_profile *rec; @@ -793,7 +793,7 @@ function_profile_call(unsigned long ip, unsigned long parent_ip, #ifdef CONFIG_FUNCTION_GRAPH_TRACER static int profile_graph_entry(struct ftrace_graph_ent *trace) { - function_profile_call(trace->func, 0, NULL); + function_profile_call(trace->func, 0, NULL, NULL); return 1; } @@ -2771,7 +2771,7 @@ static int __init ftrace_mod_cmd_init(void) device_initcall(ftrace_mod_cmd_init); static void function_trace_probe_call(unsigned long ip, unsigned long parent_ip, - struct ftrace_ops *op) + struct ftrace_ops *op, struct pt_regs *pt_regs) { struct ftrace_func_probe *entry; struct hlist_head *hhd; @@ -3923,7 +3923,7 @@ ftrace_ops_test(struct ftrace_ops *ops, unsigned long ip) static void ftrace_ops_control_func(unsigned long ip, unsigned long parent_ip, - struct ftrace_ops *op) + struct ftrace_ops *op, struct pt_regs *regs) { if (unlikely(trace_recursion_test(TRACE_CONTROL_BIT))) return; @@ -3938,7 +3938,7 @@ ftrace_ops_control_func(unsigned long ip, unsigned long parent_ip, while (op != &ftrace_list_end) { if (!ftrace_function_local_disabled(op) && ftrace_ops_test(op, ip)) - op->func(ip, parent_ip, op); + op->func(ip, parent_ip, op, regs); op = rcu_dereference_raw(op->next); }; @@ -3952,7 +3952,7 @@ static struct ftrace_ops control_ops = { static inline void __ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip, - struct ftrace_ops *ignored) + struct ftrace_ops *ignored, struct pt_regs *regs) { struct ftrace_ops *op; @@ -3971,7 +3971,7 @@ __ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip, op = rcu_dereference_raw(ftrace_ops_list); while (op != &ftrace_list_end) { if (ftrace_ops_test(op, ip)) - op->func(ip, parent_ip, op); + op->func(ip, parent_ip, op, regs); op = rcu_dereference_raw(op->next); }; preempt_enable_notrace(); @@ -3983,17 +3983,24 @@ __ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip, * the list function ignores the op parameter, we do not want any * C side effects, where a function is called without the caller * sending a third parameter. + * Archs are to support both the regs and ftrace_ops at the same time. + * If they support ftrace_ops, it is assumed they support regs. + * If call backs want to use regs, they must either check for regs + * being NULL, or ARCH_SUPPORTS_FTRACE_SAVE_REGS. + * Note, ARCH_SUPPORT_SAVE_REGS expects a full regs to be saved. + * An architecture can pass partial regs with ftrace_ops and still + * set the ARCH_SUPPORT_FTARCE_OPS. */ #if ARCH_SUPPORTS_FTRACE_OPS static void ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip, - struct ftrace_ops *op) + struct ftrace_ops *op, struct pt_regs *regs) { - __ftrace_ops_list_func(ip, parent_ip, NULL); + __ftrace_ops_list_func(ip, parent_ip, NULL, regs); } #else static void ftrace_ops_no_ops(unsigned long ip, unsigned long parent_ip) { - __ftrace_ops_list_func(ip, parent_ip, NULL); + __ftrace_ops_list_func(ip, parent_ip, NULL, NULL); } #endif diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c index a872a9a298a..9824419c840 100644 --- a/kernel/trace/trace_event_perf.c +++ b/kernel/trace/trace_event_perf.c @@ -259,7 +259,7 @@ EXPORT_SYMBOL_GPL(perf_trace_buf_prepare); #ifdef CONFIG_FUNCTION_TRACER static void perf_ftrace_function_call(unsigned long ip, unsigned long parent_ip, - struct ftrace_ops *ops) + struct ftrace_ops *ops, struct pt_regs *pt_regs) { struct ftrace_entry *entry; struct hlist_head *head; diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 88daa5177bf..8c669683368 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -1682,7 +1682,7 @@ static DEFINE_PER_CPU(atomic_t, ftrace_test_event_disable); static void function_test_events_call(unsigned long ip, unsigned long parent_ip, - struct ftrace_ops *op) + struct ftrace_ops *op, struct pt_regs *pt_regs) { struct ring_buffer_event *event; struct ring_buffer *buffer; diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c index fceb7a9aa06..5675ebd541f 100644 --- a/kernel/trace/trace_functions.c +++ b/kernel/trace/trace_functions.c @@ -49,7 +49,7 @@ static void function_trace_start(struct trace_array *tr) static void function_trace_call_preempt_only(unsigned long ip, unsigned long parent_ip, - struct ftrace_ops *op) + struct ftrace_ops *op, struct pt_regs *pt_regs) { struct trace_array *tr = func_trace; struct trace_array_cpu *data; @@ -77,7 +77,8 @@ function_trace_call_preempt_only(unsigned long ip, unsigned long parent_ip, static void function_trace_call(unsigned long ip, unsigned long parent_ip, - struct ftrace_ops *op) + struct ftrace_ops *op, struct pt_regs *pt_regs) + { struct trace_array *tr = func_trace; struct trace_array_cpu *data; @@ -109,7 +110,7 @@ function_trace_call(unsigned long ip, unsigned long parent_ip, static void function_stack_trace_call(unsigned long ip, unsigned long parent_ip, - struct ftrace_ops *op) + struct ftrace_ops *op, struct pt_regs *pt_regs) { struct trace_array *tr = func_trace; struct trace_array_cpu *data; diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c index 2862c77f95d..c7a9ba936de 100644 --- a/kernel/trace/trace_irqsoff.c +++ b/kernel/trace/trace_irqsoff.c @@ -137,7 +137,7 @@ static int func_prolog_dec(struct trace_array *tr, */ static void irqsoff_tracer_call(unsigned long ip, unsigned long parent_ip, - struct ftrace_ops *op) + struct ftrace_ops *op, struct pt_regs *pt_regs) { struct trace_array *tr = irqsoff_trace; struct trace_array_cpu *data; diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c index 0caf4f5da56..7547e36d483 100644 --- a/kernel/trace/trace_sched_wakeup.c +++ b/kernel/trace/trace_sched_wakeup.c @@ -108,7 +108,8 @@ out_enable: * wakeup uses its own tracer function to keep the overhead down: */ static void -wakeup_tracer_call(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *op) +wakeup_tracer_call(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *op, struct pt_regs *pt_regs) { struct trace_array *tr = wakeup_trace; struct trace_array_cpu *data; diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index 9ae40c823af..add37e019fd 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c @@ -104,7 +104,8 @@ static inline void warn_failed_init_tracer(struct tracer *trace, int init_ret) static int trace_selftest_test_probe1_cnt; static void trace_selftest_test_probe1_func(unsigned long ip, unsigned long pip, - struct ftrace_ops *op) + struct ftrace_ops *op, + struct pt_regs *pt_regs) { trace_selftest_test_probe1_cnt++; } @@ -112,7 +113,8 @@ static void trace_selftest_test_probe1_func(unsigned long ip, static int trace_selftest_test_probe2_cnt; static void trace_selftest_test_probe2_func(unsigned long ip, unsigned long pip, - struct ftrace_ops *op) + struct ftrace_ops *op, + struct pt_regs *pt_regs) { trace_selftest_test_probe2_cnt++; } @@ -120,7 +122,8 @@ static void trace_selftest_test_probe2_func(unsigned long ip, static int trace_selftest_test_probe3_cnt; static void trace_selftest_test_probe3_func(unsigned long ip, unsigned long pip, - struct ftrace_ops *op) + struct ftrace_ops *op, + struct pt_regs *pt_regs) { trace_selftest_test_probe3_cnt++; } @@ -128,7 +131,8 @@ static void trace_selftest_test_probe3_func(unsigned long ip, static int trace_selftest_test_global_cnt; static void trace_selftest_test_global_func(unsigned long ip, unsigned long pip, - struct ftrace_ops *op) + struct ftrace_ops *op, + struct pt_regs *pt_regs) { trace_selftest_test_global_cnt++; } @@ -136,7 +140,8 @@ static void trace_selftest_test_global_func(unsigned long ip, static int trace_selftest_test_dyn_cnt; static void trace_selftest_test_dyn_func(unsigned long ip, unsigned long pip, - struct ftrace_ops *op) + struct ftrace_ops *op, + struct pt_regs *pt_regs) { trace_selftest_test_dyn_cnt++; } diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c index e20006d5fb6..2fa5328e889 100644 --- a/kernel/trace/trace_stack.c +++ b/kernel/trace/trace_stack.c @@ -111,7 +111,8 @@ static inline void check_stack(void) } static void -stack_trace_call(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *op) +stack_trace_call(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *op, struct pt_regs *pt_regs) { int cpu; From 28fb5dfa783c25dbeeb25a72663f8066a3a517f5 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 10 Aug 2011 22:00:55 -0400 Subject: [PATCH 004/282] ftrace/x86_32: Push ftrace_ops in as 3rd parameter to function tracer Add support of passing the current ftrace_ops into the 3rd parameter of the callback to the function tracer. Link: http://lkml.kernel.org/r/20120612225424.942411318@goodmis.org Reviewed-by: Masami Hiramatsu Signed-off-by: Steven Rostedt --- arch/x86/include/asm/ftrace.h | 2 +- arch/x86/kernel/entry_32.S | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h index 783b107eacb..b3bb1f3f277 100644 --- a/arch/x86/include/asm/ftrace.h +++ b/arch/x86/include/asm/ftrace.h @@ -32,7 +32,7 @@ #define MCOUNT_ADDR ((long)(mcount)) #define MCOUNT_INSN_SIZE 5 /* sizeof mcount call */ -#if defined(CONFIG_DYNAMIC_FTRACE) && defined(CONFIG_X86_64) +#ifdef CONFIG_DYNAMIC_FTRACE #define ARCH_SUPPORTS_FTRACE_OPS 1 #endif diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 623f2883747..e3e17a0b7ff 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -1111,6 +1111,7 @@ ENTRY(ftrace_caller) pushl %edx movl 0xc(%esp), %eax movl 0x4(%ebp), %edx + leal function_trace_op, %ecx subl $MCOUNT_INSN_SIZE, %eax .globl ftrace_call From 08f6fba503111e0336f2b4d6915a4a18f9b60e51 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 30 Apr 2012 16:20:23 -0400 Subject: [PATCH 005/282] ftrace/x86: Add separate function to save regs Add a way to have different functions calling different trampolines. If a ftrace_ops wants regs saved on the return, then have only the functions with ops registered to save regs. Functions registered by other ops would not be affected, unless the functions overlap. If one ftrace_ops registered functions A, B and C and another ops registered fucntions to save regs on A, and D, then only functions A and D would be saving regs. Function B and C would work as normal. Although A is registered by both ops: normal and saves regs; this is fine as saving the regs is needed to satisfy one of the ops that calls it but the regs are ignored by the other ops function. x86_64 implements the full regs saving, and i386 just passes a NULL for regs to satisfy the ftrace_ops passing. Where an arch must supply both regs and ftrace_ops parameters, even if regs is just NULL. It is OK for an arch to pass NULL regs. All function trace users that require regs passing must add the flag FTRACE_OPS_FL_SAVE_REGS when registering the ftrace_ops. If the arch does not support saving regs then the ftrace_ops will fail to register. The flag FTRACE_OPS_FL_SAVE_REGS_IF_SUPPORTED may be set that will prevent the ftrace_ops from failing to register. In this case, the handler may either check if regs is not NULL or check if ARCH_SUPPORTS_FTRACE_SAVE_REGS. If the arch supports passing regs it will set this macro and pass regs for ops that request them. All other archs will just pass NULL. Link: Link: http://lkml.kernel.org/r/20120711195745.107705970@goodmis.org Cc: Alexander van Heukelum Reviewed-by: Masami Hiramatsu Signed-off-by: Steven Rostedt --- arch/x86/include/asm/ftrace.h | 47 +++++++++------ arch/x86/kernel/entry_32.S | 4 +- arch/x86/kernel/entry_64.S | 94 ++++++++++++++++++++++++++--- arch/x86/kernel/ftrace.c | 77 ++++++++++++++++++++++-- include/linux/ftrace.h | 107 +++++++++++++++++++++++++++++++--- kernel/trace/ftrace.c | 91 ++++++++++++++++++++++++++--- 6 files changed, 373 insertions(+), 47 deletions(-) diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h index b3bb1f3f277..a8475014c4a 100644 --- a/arch/x86/include/asm/ftrace.h +++ b/arch/x86/include/asm/ftrace.h @@ -3,27 +3,33 @@ #ifdef __ASSEMBLY__ - .macro MCOUNT_SAVE_FRAME - /* taken from glibc */ - subq $0x38, %rsp - movq %rax, (%rsp) - movq %rcx, 8(%rsp) - movq %rdx, 16(%rsp) - movq %rsi, 24(%rsp) - movq %rdi, 32(%rsp) - movq %r8, 40(%rsp) - movq %r9, 48(%rsp) + /* skip is set if the stack was already partially adjusted */ + .macro MCOUNT_SAVE_FRAME skip=0 + /* + * We add enough stack to save all regs. + */ + subq $(SS+8-\skip), %rsp + movq %rax, RAX(%rsp) + movq %rcx, RCX(%rsp) + movq %rdx, RDX(%rsp) + movq %rsi, RSI(%rsp) + movq %rdi, RDI(%rsp) + movq %r8, R8(%rsp) + movq %r9, R9(%rsp) + /* Move RIP to its proper location */ + movq SS+8(%rsp), %rdx + movq %rdx, RIP(%rsp) .endm - .macro MCOUNT_RESTORE_FRAME - movq 48(%rsp), %r9 - movq 40(%rsp), %r8 - movq 32(%rsp), %rdi - movq 24(%rsp), %rsi - movq 16(%rsp), %rdx - movq 8(%rsp), %rcx - movq (%rsp), %rax - addq $0x38, %rsp + .macro MCOUNT_RESTORE_FRAME skip=0 + movq R9(%rsp), %r9 + movq R8(%rsp), %r8 + movq RDI(%rsp), %rdi + movq RSI(%rsp), %rsi + movq RDX(%rsp), %rdx + movq RCX(%rsp), %rcx + movq RAX(%rsp), %rax + addq $(SS+8-\skip), %rsp .endm #endif @@ -34,6 +40,9 @@ #ifdef CONFIG_DYNAMIC_FTRACE #define ARCH_SUPPORTS_FTRACE_OPS 1 +#ifdef CONFIG_X86_64 +#define ARCH_SUPPORTS_FTRACE_SAVE_REGS +#endif #endif #ifndef __ASSEMBLY__ diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index e3e17a0b7ff..5da11d1b85a 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -1109,7 +1109,8 @@ ENTRY(ftrace_caller) pushl %eax pushl %ecx pushl %edx - movl 0xc(%esp), %eax + pushl $0 /* Pass NULL as regs pointer */ + movl 4*4(%esp), %eax movl 0x4(%ebp), %edx leal function_trace_op, %ecx subl $MCOUNT_INSN_SIZE, %eax @@ -1118,6 +1119,7 @@ ENTRY(ftrace_caller) ftrace_call: call ftrace_stub + addl $4,%esp /* skip NULL pointer */ popl %edx popl %ecx popl %eax diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 2b4f94c5dc6..52bda2e8f7a 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -73,21 +73,34 @@ ENTRY(mcount) retq END(mcount) +/* skip is set if stack has been adjusted */ +.macro ftrace_caller_setup skip=0 + MCOUNT_SAVE_FRAME \skip + + /* Load the ftrace_ops into the 3rd parameter */ + leaq function_trace_op, %rdx + + /* Load ip into the first parameter */ + movq RIP(%rsp), %rdi + subq $MCOUNT_INSN_SIZE, %rdi + /* Load the parent_ip into the second parameter */ + movq 8(%rbp), %rsi +.endm + ENTRY(ftrace_caller) + /* Check if tracing was disabled (quick check) */ cmpl $0, function_trace_stop jne ftrace_stub - MCOUNT_SAVE_FRAME - - leaq function_trace_op, %rdx - movq 0x38(%rsp), %rdi - movq 8(%rbp), %rsi - subq $MCOUNT_INSN_SIZE, %rdi + ftrace_caller_setup + /* regs go into 4th parameter (but make it NULL) */ + movq $0, %rcx GLOBAL(ftrace_call) call ftrace_stub MCOUNT_RESTORE_FRAME +ftrace_return: #ifdef CONFIG_FUNCTION_GRAPH_TRACER GLOBAL(ftrace_graph_call) @@ -98,6 +111,71 @@ GLOBAL(ftrace_stub) retq END(ftrace_caller) +ENTRY(ftrace_regs_caller) + /* Save the current flags before compare (in SS location)*/ + pushfq + + /* Check if tracing was disabled (quick check) */ + cmpl $0, function_trace_stop + jne ftrace_restore_flags + + /* skip=8 to skip flags saved in SS */ + ftrace_caller_setup 8 + + /* Save the rest of pt_regs */ + movq %r15, R15(%rsp) + movq %r14, R14(%rsp) + movq %r13, R13(%rsp) + movq %r12, R12(%rsp) + movq %r11, R11(%rsp) + movq %r10, R10(%rsp) + movq %rbp, RBP(%rsp) + movq %rbx, RBX(%rsp) + /* Copy saved flags */ + movq SS(%rsp), %rcx + movq %rcx, EFLAGS(%rsp) + /* Kernel segments */ + movq $__KERNEL_DS, %rcx + movq %rcx, SS(%rsp) + movq $__KERNEL_CS, %rcx + movq %rcx, CS(%rsp) + /* Stack - skipping return address */ + leaq SS+16(%rsp), %rcx + movq %rcx, RSP(%rsp) + + /* regs go into 4th parameter */ + leaq (%rsp), %rcx + +GLOBAL(ftrace_regs_call) + call ftrace_stub + + /* Copy flags back to SS, to restore them */ + movq EFLAGS(%rsp), %rax + movq %rax, SS(%rsp) + + /* restore the rest of pt_regs */ + movq R15(%rsp), %r15 + movq R14(%rsp), %r14 + movq R13(%rsp), %r13 + movq R12(%rsp), %r12 + movq R10(%rsp), %r10 + movq RBP(%rsp), %rbp + movq RBX(%rsp), %rbx + + /* skip=8 to skip flags saved in SS */ + MCOUNT_RESTORE_FRAME 8 + + /* Restore flags */ + popfq + + jmp ftrace_return +ftrace_restore_flags: + popfq + jmp ftrace_stub + +END(ftrace_regs_caller) + + #else /* ! CONFIG_DYNAMIC_FTRACE */ ENTRY(mcount) cmpl $0, function_trace_stop @@ -120,7 +198,7 @@ GLOBAL(ftrace_stub) trace: MCOUNT_SAVE_FRAME - movq 0x38(%rsp), %rdi + movq RIP(%rsp), %rdi movq 8(%rbp), %rsi subq $MCOUNT_INSN_SIZE, %rdi @@ -141,7 +219,7 @@ ENTRY(ftrace_graph_caller) MCOUNT_SAVE_FRAME leaq 8(%rbp), %rdi - movq 0x38(%rsp), %rsi + movq RIP(%rsp), %rsi movq (%rbp), %rdx subq $MCOUNT_INSN_SIZE, %rsi diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index c3a7cb4bf6e..b90eb1a1307 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -206,6 +206,23 @@ static int ftrace_modify_code(unsigned long ip, unsigned const char *old_code, unsigned const char *new_code); +#ifdef ARCH_SUPPORTS_FTRACE_SAVE_REGS +/* + * Should never be called: + * As it is only called by __ftrace_replace_code() which is called by + * ftrace_replace_code() that x86 overrides, and by ftrace_update_code() + * which is called to turn mcount into nops or nops into function calls + * but not to convert a function from not using regs to one that uses + * regs, which ftrace_modify_call() is for. + */ +int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, + unsigned long addr) +{ + WARN_ON(1); + return -EINVAL; +} +#endif + int ftrace_update_ftrace_func(ftrace_func_t func) { unsigned long ip = (unsigned long)(&ftrace_call); @@ -220,6 +237,16 @@ int ftrace_update_ftrace_func(ftrace_func_t func) ret = ftrace_modify_code(ip, old, new); +#ifdef ARCH_SUPPORTS_FTRACE_SAVE_REGS + /* Also update the regs callback function */ + if (!ret) { + ip = (unsigned long)(&ftrace_regs_call); + memcpy(old, &ftrace_regs_call, MCOUNT_INSN_SIZE); + new = ftrace_call_replace(ip, (unsigned long)func); + ret = ftrace_modify_code(ip, old, new); + } +#endif + atomic_dec(&modifying_ftrace_code); return ret; @@ -299,6 +326,32 @@ static int add_brk_on_nop(struct dyn_ftrace *rec) return add_break(rec->ip, old); } +/* + * If the record has the FTRACE_FL_REGS set, that means that it + * wants to convert to a callback that saves all regs. If FTRACE_FL_REGS + * is not not set, then it wants to convert to the normal callback. + */ +static unsigned long get_ftrace_addr(struct dyn_ftrace *rec) +{ + if (rec->flags & FTRACE_FL_REGS) + return (unsigned long)FTRACE_REGS_ADDR; + else + return (unsigned long)FTRACE_ADDR; +} + +/* + * The FTRACE_FL_REGS_EN is set when the record already points to + * a function that saves all the regs. Basically the '_EN' version + * represents the current state of the function. + */ +static unsigned long get_ftrace_old_addr(struct dyn_ftrace *rec) +{ + if (rec->flags & FTRACE_FL_REGS_EN) + return (unsigned long)FTRACE_REGS_ADDR; + else + return (unsigned long)FTRACE_ADDR; +} + static int add_breakpoints(struct dyn_ftrace *rec, int enable) { unsigned long ftrace_addr; @@ -306,7 +359,7 @@ static int add_breakpoints(struct dyn_ftrace *rec, int enable) ret = ftrace_test_record(rec, enable); - ftrace_addr = (unsigned long)FTRACE_ADDR; + ftrace_addr = get_ftrace_addr(rec); switch (ret) { case FTRACE_UPDATE_IGNORE: @@ -316,6 +369,10 @@ static int add_breakpoints(struct dyn_ftrace *rec, int enable) /* converting nop to call */ return add_brk_on_nop(rec); + case FTRACE_UPDATE_MODIFY_CALL_REGS: + case FTRACE_UPDATE_MODIFY_CALL: + ftrace_addr = get_ftrace_old_addr(rec); + /* fall through */ case FTRACE_UPDATE_MAKE_NOP: /* converting a call to a nop */ return add_brk_on_call(rec, ftrace_addr); @@ -360,13 +417,21 @@ static int remove_breakpoint(struct dyn_ftrace *rec) * If not, don't touch the breakpoint, we make just create * a disaster. */ - ftrace_addr = (unsigned long)FTRACE_ADDR; + ftrace_addr = get_ftrace_addr(rec); + nop = ftrace_call_replace(ip, ftrace_addr); + + if (memcmp(&ins[1], &nop[1], MCOUNT_INSN_SIZE - 1) == 0) + goto update; + + /* Check both ftrace_addr and ftrace_old_addr */ + ftrace_addr = get_ftrace_old_addr(rec); nop = ftrace_call_replace(ip, ftrace_addr); if (memcmp(&ins[1], &nop[1], MCOUNT_INSN_SIZE - 1) != 0) return -EINVAL; } + update: return probe_kernel_write((void *)ip, &nop[0], 1); } @@ -405,12 +470,14 @@ static int add_update(struct dyn_ftrace *rec, int enable) ret = ftrace_test_record(rec, enable); - ftrace_addr = (unsigned long)FTRACE_ADDR; + ftrace_addr = get_ftrace_addr(rec); switch (ret) { case FTRACE_UPDATE_IGNORE: return 0; + case FTRACE_UPDATE_MODIFY_CALL_REGS: + case FTRACE_UPDATE_MODIFY_CALL: case FTRACE_UPDATE_MAKE_CALL: /* converting nop to call */ return add_update_call(rec, ftrace_addr); @@ -455,12 +522,14 @@ static int finish_update(struct dyn_ftrace *rec, int enable) ret = ftrace_update_record(rec, enable); - ftrace_addr = (unsigned long)FTRACE_ADDR; + ftrace_addr = get_ftrace_addr(rec); switch (ret) { case FTRACE_UPDATE_IGNORE: return 0; + case FTRACE_UPDATE_MODIFY_CALL_REGS: + case FTRACE_UPDATE_MODIFY_CALL: case FTRACE_UPDATE_MAKE_CALL: /* converting nop to call */ return finish_update_call(rec, ftrace_addr); diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index e4202881fb0..ab39990cc43 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -71,12 +71,28 @@ typedef void (*ftrace_func_t)(unsigned long ip, unsigned long parent_ip, * could be controled by following calls: * ftrace_function_local_enable * ftrace_function_local_disable + * SAVE_REGS - The ftrace_ops wants regs saved at each function called + * and passed to the callback. If this flag is set, but the + * architecture does not support passing regs + * (ARCH_SUPPORTS_FTRACE_SAVE_REGS is not defined), then the + * ftrace_ops will fail to register, unless the next flag + * is set. + * SAVE_REGS_IF_SUPPORTED - This is the same as SAVE_REGS, but if the + * handler can handle an arch that does not save regs + * (the handler tests if regs == NULL), then it can set + * this flag instead. It will not fail registering the ftrace_ops + * but, the regs field will be NULL if the arch does not support + * passing regs to the handler. + * Note, if this flag is set, the SAVE_REGS flag will automatically + * get set upon registering the ftrace_ops, if the arch supports it. */ enum { - FTRACE_OPS_FL_ENABLED = 1 << 0, - FTRACE_OPS_FL_GLOBAL = 1 << 1, - FTRACE_OPS_FL_DYNAMIC = 1 << 2, - FTRACE_OPS_FL_CONTROL = 1 << 3, + FTRACE_OPS_FL_ENABLED = 1 << 0, + FTRACE_OPS_FL_GLOBAL = 1 << 1, + FTRACE_OPS_FL_DYNAMIC = 1 << 2, + FTRACE_OPS_FL_CONTROL = 1 << 3, + FTRACE_OPS_FL_SAVE_REGS = 1 << 4, + FTRACE_OPS_FL_SAVE_REGS_IF_SUPPORTED = 1 << 5, }; struct ftrace_ops { @@ -254,12 +270,31 @@ extern void unregister_ftrace_function_probe_all(char *glob); extern int ftrace_text_reserved(void *start, void *end); +/* + * The dyn_ftrace record's flags field is split into two parts. + * the first part which is '0-FTRACE_REF_MAX' is a counter of + * the number of callbacks that have registered the function that + * the dyn_ftrace descriptor represents. + * + * The second part is a mask: + * ENABLED - the function is being traced + * REGS - the record wants the function to save regs + * REGS_EN - the function is set up to save regs. + * + * When a new ftrace_ops is registered and wants a function to save + * pt_regs, the rec->flag REGS is set. When the function has been + * set up to save regs, the REG_EN flag is set. Once a function + * starts saving regs it will do so until all ftrace_ops are removed + * from tracing that function. + */ enum { - FTRACE_FL_ENABLED = (1 << 30), + FTRACE_FL_ENABLED = (1UL << 29), + FTRACE_FL_REGS = (1UL << 30), + FTRACE_FL_REGS_EN = (1UL << 31) }; -#define FTRACE_FL_MASK (0x3UL << 30) -#define FTRACE_REF_MAX ((1 << 30) - 1) +#define FTRACE_FL_MASK (0x7UL << 29) +#define FTRACE_REF_MAX ((1UL << 29) - 1) struct dyn_ftrace { union { @@ -290,9 +325,23 @@ enum { FTRACE_STOP_FUNC_RET = (1 << 4), }; +/* + * The FTRACE_UPDATE_* enum is used to pass information back + * from the ftrace_update_record() and ftrace_test_record() + * functions. These are called by the code update routines + * to find out what is to be done for a given function. + * + * IGNORE - The function is already what we want it to be + * MAKE_CALL - Start tracing the function + * MODIFY_CALL - Stop saving regs for the function + * MODIFY_CALL_REGS - Start saving regs for the function + * MAKE_NOP - Stop tracing the function + */ enum { FTRACE_UPDATE_IGNORE, FTRACE_UPDATE_MAKE_CALL, + FTRACE_UPDATE_MODIFY_CALL, + FTRACE_UPDATE_MODIFY_CALL_REGS, FTRACE_UPDATE_MAKE_NOP, }; @@ -344,7 +393,9 @@ extern int ftrace_dyn_arch_init(void *data); extern void ftrace_replace_code(int enable); extern int ftrace_update_ftrace_func(ftrace_func_t func); extern void ftrace_caller(void); +extern void ftrace_regs_caller(void); extern void ftrace_call(void); +extern void ftrace_regs_call(void); extern void mcount_call(void); void ftrace_modify_all_code(int command); @@ -352,6 +403,15 @@ void ftrace_modify_all_code(int command); #ifndef FTRACE_ADDR #define FTRACE_ADDR ((unsigned long)ftrace_caller) #endif + +#ifndef FTRACE_REGS_ADDR +#ifdef ARCH_SUPPORTS_FTRACE_SAVE_REGS +# define FTRACE_REGS_ADDR ((unsigned long)ftrace_regs_caller) +#else +# define FTRACE_REGS_ADDR FTRACE_ADDR +#endif +#endif + #ifdef CONFIG_FUNCTION_GRAPH_TRACER extern void ftrace_graph_caller(void); extern int ftrace_enable_ftrace_graph_caller(void); @@ -407,6 +467,39 @@ extern int ftrace_make_nop(struct module *mod, */ extern int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr); +#ifdef ARCH_SUPPORTS_FTRACE_SAVE_REGS +/** + * ftrace_modify_call - convert from one addr to another (no nop) + * @rec: the mcount call site record + * @old_addr: the address expected to be currently called to + * @addr: the address to change to + * + * This is a very sensitive operation and great care needs + * to be taken by the arch. The operation should carefully + * read the location, check to see if what is read is indeed + * what we expect it to be, and then on success of the compare, + * it should write to the location. + * + * The code segment at @rec->ip should be a caller to @old_addr + * + * Return must be: + * 0 on success + * -EFAULT on error reading the location + * -EINVAL on a failed compare of the contents + * -EPERM on error writing to the location + * Any other value will be considered a failure. + */ +extern int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, + unsigned long addr); +#else +/* Should never be called */ +static inline int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, + unsigned long addr) +{ + return -EINVAL; +} +#endif + /* May be defined in arch */ extern int ftrace_arch_read_dyn_info(char *buf, int size); diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 6ff07ad0ede..c55f7e27461 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -314,6 +314,20 @@ static int __register_ftrace_function(struct ftrace_ops *ops) if ((ops->flags & FL_GLOBAL_CONTROL_MASK) == FL_GLOBAL_CONTROL_MASK) return -EINVAL; +#ifndef ARCH_SUPPORTS_FTRACE_SAVE_REGS + /* + * If the ftrace_ops specifies SAVE_REGS, then it only can be used + * if the arch supports it, or SAVE_REGS_IF_SUPPORTED is also set. + * Setting SAVE_REGS_IF_SUPPORTED makes SAVE_REGS irrelevant. + */ + if (ops->flags & FTRACE_OPS_FL_SAVE_REGS && + !(ops->flags & FTRACE_OPS_FL_SAVE_REGS_IF_SUPPORTED)) + return -EINVAL; + + if (ops->flags & FTRACE_OPS_FL_SAVE_REGS_IF_SUPPORTED) + ops->flags |= FTRACE_OPS_FL_SAVE_REGS; +#endif + if (!core_kernel_data((unsigned long)ops)) ops->flags |= FTRACE_OPS_FL_DYNAMIC; @@ -1515,6 +1529,12 @@ static void __ftrace_hash_rec_update(struct ftrace_ops *ops, rec->flags++; if (FTRACE_WARN_ON((rec->flags & ~FTRACE_FL_MASK) == FTRACE_REF_MAX)) return; + /* + * If any ops wants regs saved for this function + * then all ops will get saved regs. + */ + if (ops->flags & FTRACE_OPS_FL_SAVE_REGS) + rec->flags |= FTRACE_FL_REGS; } else { if (FTRACE_WARN_ON((rec->flags & ~FTRACE_FL_MASK) == 0)) return; @@ -1606,18 +1626,59 @@ static int ftrace_check_record(struct dyn_ftrace *rec, int enable, int update) if (enable && (rec->flags & ~FTRACE_FL_MASK)) flag = FTRACE_FL_ENABLED; + /* + * If enabling and the REGS flag does not match the REGS_EN, then + * do not ignore this record. Set flags to fail the compare against + * ENABLED. + */ + if (flag && + (!(rec->flags & FTRACE_FL_REGS) != !(rec->flags & FTRACE_FL_REGS_EN))) + flag |= FTRACE_FL_REGS; + /* If the state of this record hasn't changed, then do nothing */ if ((rec->flags & FTRACE_FL_ENABLED) == flag) return FTRACE_UPDATE_IGNORE; if (flag) { - if (update) + /* Save off if rec is being enabled (for return value) */ + flag ^= rec->flags & FTRACE_FL_ENABLED; + + if (update) { rec->flags |= FTRACE_FL_ENABLED; - return FTRACE_UPDATE_MAKE_CALL; + if (flag & FTRACE_FL_REGS) { + if (rec->flags & FTRACE_FL_REGS) + rec->flags |= FTRACE_FL_REGS_EN; + else + rec->flags &= ~FTRACE_FL_REGS_EN; + } + } + + /* + * If this record is being updated from a nop, then + * return UPDATE_MAKE_CALL. + * Otherwise, if the EN flag is set, then return + * UPDATE_MODIFY_CALL_REGS to tell the caller to convert + * from the non-save regs, to a save regs function. + * Otherwise, + * return UPDATE_MODIFY_CALL to tell the caller to convert + * from the save regs, to a non-save regs function. + */ + if (flag & FTRACE_FL_ENABLED) + return FTRACE_UPDATE_MAKE_CALL; + else if (rec->flags & FTRACE_FL_REGS_EN) + return FTRACE_UPDATE_MODIFY_CALL_REGS; + else + return FTRACE_UPDATE_MODIFY_CALL; } - if (update) - rec->flags &= ~FTRACE_FL_ENABLED; + if (update) { + /* If there's no more users, clear all flags */ + if (!(rec->flags & ~FTRACE_FL_MASK)) + rec->flags = 0; + else + /* Just disable the record (keep REGS state) */ + rec->flags &= ~FTRACE_FL_ENABLED; + } return FTRACE_UPDATE_MAKE_NOP; } @@ -1652,13 +1713,17 @@ int ftrace_test_record(struct dyn_ftrace *rec, int enable) static int __ftrace_replace_code(struct dyn_ftrace *rec, int enable) { + unsigned long ftrace_old_addr; unsigned long ftrace_addr; int ret; - ftrace_addr = (unsigned long)FTRACE_ADDR; - ret = ftrace_update_record(rec, enable); + if (rec->flags & FTRACE_FL_REGS) + ftrace_addr = (unsigned long)FTRACE_REGS_ADDR; + else + ftrace_addr = (unsigned long)FTRACE_ADDR; + switch (ret) { case FTRACE_UPDATE_IGNORE: return 0; @@ -1668,6 +1733,15 @@ __ftrace_replace_code(struct dyn_ftrace *rec, int enable) case FTRACE_UPDATE_MAKE_NOP: return ftrace_make_nop(NULL, rec, ftrace_addr); + + case FTRACE_UPDATE_MODIFY_CALL_REGS: + case FTRACE_UPDATE_MODIFY_CALL: + if (rec->flags & FTRACE_FL_REGS) + ftrace_old_addr = (unsigned long)FTRACE_ADDR; + else + ftrace_old_addr = (unsigned long)FTRACE_REGS_ADDR; + + return ftrace_modify_call(rec, ftrace_old_addr, ftrace_addr); } return -1; /* unknow ftrace bug */ @@ -2421,8 +2495,9 @@ static int t_show(struct seq_file *m, void *v) seq_printf(m, "%ps", (void *)rec->ip); if (iter->flags & FTRACE_ITER_ENABLED) - seq_printf(m, " (%ld)", - rec->flags & ~FTRACE_FL_MASK); + seq_printf(m, " (%ld)%s", + rec->flags & ~FTRACE_FL_MASK, + rec->flags & FTRACE_FL_REGS ? " R" : ""); seq_printf(m, "\n"); return 0; From 4de72395ff4cf48e23b61986dbc90b99a7c4ed97 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 5 Jun 2012 20:00:11 -0400 Subject: [PATCH 006/282] ftrace/x86: Add save_regs for i386 function calls Add saving full regs for function tracing on i386. The saving of regs was influenced by patches sent out by Masami Hiramatsu. Link: Link: http://lkml.kernel.org/r/20120711195745.379060003@goodmis.org Reviewed-by: Masami Hiramatsu Signed-off-by: Steven Rostedt --- arch/x86/include/asm/ftrace.h | 2 -- arch/x86/kernel/entry_32.S | 68 +++++++++++++++++++++++++++++++++++ arch/x86/kernel/ftrace.c | 4 --- 3 files changed, 68 insertions(+), 6 deletions(-) diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h index a8475014c4a..a6cae0c1720 100644 --- a/arch/x86/include/asm/ftrace.h +++ b/arch/x86/include/asm/ftrace.h @@ -40,10 +40,8 @@ #ifdef CONFIG_DYNAMIC_FTRACE #define ARCH_SUPPORTS_FTRACE_OPS 1 -#ifdef CONFIG_X86_64 #define ARCH_SUPPORTS_FTRACE_SAVE_REGS #endif -#endif #ifndef __ASSEMBLY__ extern void mcount(void); diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 5da11d1b85a..46caa5649a5 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -1123,6 +1123,7 @@ ftrace_call: popl %edx popl %ecx popl %eax +ftrace_ret: #ifdef CONFIG_FUNCTION_GRAPH_TRACER .globl ftrace_graph_call ftrace_graph_call: @@ -1134,6 +1135,73 @@ ftrace_stub: ret END(ftrace_caller) +ENTRY(ftrace_regs_caller) + pushf /* push flags before compare (in cs location) */ + cmpl $0, function_trace_stop + jne ftrace_restore_flags + + /* + * i386 does not save SS and ESP when coming from kernel. + * Instead, to get sp, ®s->sp is used (see ptrace.h). + * Unfortunately, that means eflags must be at the same location + * as the current return ip is. We move the return ip into the + * ip location, and move flags into the return ip location. + */ + pushl 4(%esp) /* save return ip into ip slot */ + subl $MCOUNT_INSN_SIZE, (%esp) /* Adjust ip */ + + pushl $0 /* Load 0 into orig_ax */ + pushl %gs + pushl %fs + pushl %es + pushl %ds + pushl %eax + pushl %ebp + pushl %edi + pushl %esi + pushl %edx + pushl %ecx + pushl %ebx + + movl 13*4(%esp), %eax /* Get the saved flags */ + movl %eax, 14*4(%esp) /* Move saved flags into regs->flags location */ + /* clobbering return ip */ + movl $__KERNEL_CS,13*4(%esp) + + movl 12*4(%esp), %eax /* Load ip (1st parameter) */ + movl 0x4(%ebp), %edx /* Load parent ip (2cd parameter) */ + lea (%esp), %ecx + pushl %ecx /* Save pt_regs as 4th parameter */ + leal function_trace_op, %ecx /* Save ftrace_pos in 3rd parameter */ + +GLOBAL(ftrace_regs_call) + call ftrace_stub + + addl $4, %esp /* Skip pt_regs */ + movl 14*4(%esp), %eax /* Move flags back into cs */ + movl %eax, 13*4(%esp) /* Needed to keep addl from modifying flags */ + movl 12*4(%esp), %eax /* Get return ip from regs->ip */ + addl $MCOUNT_INSN_SIZE, %eax + movl %eax, 14*4(%esp) /* Put return ip back for ret */ + + popl %ebx + popl %ecx + popl %edx + popl %esi + popl %edi + popl %ebp + popl %eax + popl %ds + popl %es + popl %fs + popl %gs + addl $8, %esp /* Skip orig_ax and ip */ + popf /* Pop flags at end (no addl to corrupt flags) */ + jmp ftrace_ret + +ftrace_restore_flags: + popf + jmp ftrace_stub #else /* ! CONFIG_DYNAMIC_FTRACE */ ENTRY(mcount) diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index b90eb1a1307..1d414029f1d 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -206,7 +206,6 @@ static int ftrace_modify_code(unsigned long ip, unsigned const char *old_code, unsigned const char *new_code); -#ifdef ARCH_SUPPORTS_FTRACE_SAVE_REGS /* * Should never be called: * As it is only called by __ftrace_replace_code() which is called by @@ -221,7 +220,6 @@ int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, WARN_ON(1); return -EINVAL; } -#endif int ftrace_update_ftrace_func(ftrace_func_t func) { @@ -237,7 +235,6 @@ int ftrace_update_ftrace_func(ftrace_func_t func) ret = ftrace_modify_code(ip, old, new); -#ifdef ARCH_SUPPORTS_FTRACE_SAVE_REGS /* Also update the regs callback function */ if (!ret) { ip = (unsigned long)(&ftrace_regs_call); @@ -245,7 +242,6 @@ int ftrace_update_ftrace_func(ftrace_func_t func) new = ftrace_call_replace(ip, (unsigned long)func); ret = ftrace_modify_code(ip, old, new); } -#endif atomic_dec(&modifying_ftrace_code); From e4ea3f6b1bf3d489674a3660db652636e50186f9 Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Thu, 19 Jul 2012 13:04:47 -0400 Subject: [PATCH 007/282] ftrace/x86_32: Simplify parameter setup for ftrace_regs_caller The final position of the stack after saving regs and setting up the parameters for ftrace_regs_call, is the position of the pt_regs needed for the 4th parameter. Instead of saving it into a temporary reg and pushing the reg, simply push the stack pointer. Link: http://lkml.kernel.org/r/1342702344.12353.16.camel@gandalf.stny.rr.com Reviewed-by: Masami Hiramatsu Signed-off-by: Uros Bizjak Signed-off-by: Steven Rostedt --- arch/x86/kernel/entry_32.S | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 46caa5649a5..4dc301709e7 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -1169,10 +1169,9 @@ ENTRY(ftrace_regs_caller) movl $__KERNEL_CS,13*4(%esp) movl 12*4(%esp), %eax /* Load ip (1st parameter) */ - movl 0x4(%ebp), %edx /* Load parent ip (2cd parameter) */ - lea (%esp), %ecx - pushl %ecx /* Save pt_regs as 4th parameter */ + movl 0x4(%ebp), %edx /* Load parent ip (2nd parameter) */ leal function_trace_op, %ecx /* Save ftrace_pos in 3rd parameter */ + pushl %esp /* Save pt_regs as 4th parameter */ GLOBAL(ftrace_regs_call) call ftrace_stub From 5767cfeaa9ec7b67c802143394f3ad9f8b174eb8 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 3 Jul 2012 16:16:09 -0400 Subject: [PATCH 008/282] ftrace/x86: Remove function_trace_stop check from graph caller The graph caller is called by the mcount callers, which already does the check against the function_trace_stop variable. No reason to check it again. Link: http://lkml.kernel.org/r/20120711195745.588538769@goodmis.org Signed-off-by: Steven Rostedt --- arch/x86/kernel/entry_32.S | 3 --- arch/x86/kernel/entry_64.S | 3 --- 2 files changed, 6 deletions(-) diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 4dc301709e7..061ac17ee97 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -1241,9 +1241,6 @@ END(mcount) #ifdef CONFIG_FUNCTION_GRAPH_TRACER ENTRY(ftrace_graph_caller) - cmpl $0, function_trace_stop - jne ftrace_stub - pushl %eax pushl %ecx pushl %edx diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 52bda2e8f7a..38308fa7d7e 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -213,9 +213,6 @@ END(mcount) #ifdef CONFIG_FUNCTION_GRAPH_TRACER ENTRY(ftrace_graph_caller) - cmpl $0, function_trace_stop - jne ftrace_stub - MCOUNT_SAVE_FRAME leaq 8(%rbp), %rdi From 4740974a6844156c14d741b0080b59d275679a23 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 20 Jul 2012 11:04:44 -0400 Subject: [PATCH 009/282] ftrace: Add default recursion protection for function tracing As more users of the function tracer utility are being added, they do not always add the necessary recursion protection. To protect from function recursion due to tracing, if the callback ftrace_ops does not specifically specify that it protects against recursion (by setting the FTRACE_OPS_FL_RECURSION_SAFE flag), the list operation will be called by the mcount trampoline which adds recursion protection. If the flag is set, then the function will be called directly with no extra protection. Note, the list operation is called if more than one function callback is registered, or if the arch does not support all of the function tracer features. Signed-off-by: Steven Rostedt --- include/linux/ftrace.h | 5 +++++ kernel/trace/ftrace.c | 10 ++++++++-- kernel/trace/trace_events.c | 1 + kernel/trace/trace_functions.c | 4 ++-- kernel/trace/trace_irqsoff.c | 2 +- kernel/trace/trace_sched_wakeup.c | 2 +- kernel/trace/trace_selftest.c | 7 +++++-- kernel/trace/trace_stack.c | 1 + 8 files changed, 24 insertions(+), 8 deletions(-) diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index ab39990cc43..65a14e47a0d 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -85,6 +85,10 @@ typedef void (*ftrace_func_t)(unsigned long ip, unsigned long parent_ip, * passing regs to the handler. * Note, if this flag is set, the SAVE_REGS flag will automatically * get set upon registering the ftrace_ops, if the arch supports it. + * RECURSION_SAFE - The ftrace_ops can set this to tell the ftrace infrastructure + * that the call back has its own recursion protection. If it does + * not set this, then the ftrace infrastructure will add recursion + * protection for the caller. */ enum { FTRACE_OPS_FL_ENABLED = 1 << 0, @@ -93,6 +97,7 @@ enum { FTRACE_OPS_FL_CONTROL = 1 << 3, FTRACE_OPS_FL_SAVE_REGS = 1 << 4, FTRACE_OPS_FL_SAVE_REGS_IF_SUPPORTED = 1 << 5, + FTRACE_OPS_FL_RECURSION_SAFE = 1 << 6, }; struct ftrace_ops { diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index c55f7e27461..ad765b4ba42 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -66,6 +66,7 @@ static struct ftrace_ops ftrace_list_end __read_mostly = { .func = ftrace_stub, + .flags = FTRACE_OPS_FL_RECURSION_SAFE, }; /* ftrace_enabled is a method to turn ftrace on or off */ @@ -221,12 +222,13 @@ static void update_ftrace_function(void) /* * If we are at the end of the list and this ops is - * not dynamic and the arch supports passing ops, then have the - * mcount trampoline call the function directly. + * recursion safe and not dynamic and the arch supports passing ops, + * then have the mcount trampoline call the function directly. */ if (ftrace_ops_list == &ftrace_list_end || (ftrace_ops_list->next == &ftrace_list_end && !(ftrace_ops_list->flags & FTRACE_OPS_FL_DYNAMIC) && + (ftrace_ops_list->flags & FTRACE_OPS_FL_RECURSION_SAFE) && !FTRACE_FORCE_LIST_FUNC)) { /* Set the ftrace_ops that the arch callback uses */ if (ftrace_ops_list == &global_ops) @@ -867,6 +869,7 @@ static void unregister_ftrace_profiler(void) #else static struct ftrace_ops ftrace_profile_ops __read_mostly = { .func = function_profile_call, + .flags = FTRACE_OPS_FL_RECURSION_SAFE, }; static int register_ftrace_profiler(void) @@ -1049,6 +1052,7 @@ static struct ftrace_ops global_ops = { .func = ftrace_stub, .notrace_hash = EMPTY_HASH, .filter_hash = EMPTY_HASH, + .flags = FTRACE_OPS_FL_RECURSION_SAFE, }; static DEFINE_MUTEX(ftrace_regex_lock); @@ -3967,6 +3971,7 @@ void __init ftrace_init(void) static struct ftrace_ops global_ops = { .func = ftrace_stub, + .flags = FTRACE_OPS_FL_RECURSION_SAFE, }; static int __init ftrace_nodyn_init(void) @@ -4023,6 +4028,7 @@ ftrace_ops_control_func(unsigned long ip, unsigned long parent_ip, static struct ftrace_ops control_ops = { .func = ftrace_ops_control_func, + .flags = FTRACE_OPS_FL_RECURSION_SAFE, }; static inline void diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 8c669683368..6825d833a25 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -1721,6 +1721,7 @@ function_test_events_call(unsigned long ip, unsigned long parent_ip, static struct ftrace_ops trace_ops __initdata = { .func = function_test_events_call, + .flags = FTRACE_OPS_FL_RECURSION_SAFE, }; static __init void event_trace_self_test_with_function(void) diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c index 5675ebd541f..fdff65dff1b 100644 --- a/kernel/trace/trace_functions.c +++ b/kernel/trace/trace_functions.c @@ -153,13 +153,13 @@ function_stack_trace_call(unsigned long ip, unsigned long parent_ip, static struct ftrace_ops trace_ops __read_mostly = { .func = function_trace_call, - .flags = FTRACE_OPS_FL_GLOBAL, + .flags = FTRACE_OPS_FL_GLOBAL | FTRACE_OPS_FL_RECURSION_SAFE, }; static struct ftrace_ops trace_stack_ops __read_mostly = { .func = function_stack_trace_call, - .flags = FTRACE_OPS_FL_GLOBAL, + .flags = FTRACE_OPS_FL_GLOBAL | FTRACE_OPS_FL_RECURSION_SAFE, }; /* Our two options */ diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c index c7a9ba936de..d98ee8283b2 100644 --- a/kernel/trace/trace_irqsoff.c +++ b/kernel/trace/trace_irqsoff.c @@ -154,7 +154,7 @@ irqsoff_tracer_call(unsigned long ip, unsigned long parent_ip, static struct ftrace_ops trace_ops __read_mostly = { .func = irqsoff_tracer_call, - .flags = FTRACE_OPS_FL_GLOBAL, + .flags = FTRACE_OPS_FL_GLOBAL | FTRACE_OPS_FL_RECURSION_SAFE, }; #endif /* CONFIG_FUNCTION_TRACER */ diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c index 7547e36d483..02170c00c41 100644 --- a/kernel/trace/trace_sched_wakeup.c +++ b/kernel/trace/trace_sched_wakeup.c @@ -130,7 +130,7 @@ wakeup_tracer_call(unsigned long ip, unsigned long parent_ip, static struct ftrace_ops trace_ops __read_mostly = { .func = wakeup_tracer_call, - .flags = FTRACE_OPS_FL_GLOBAL, + .flags = FTRACE_OPS_FL_GLOBAL | FTRACE_OPS_FL_RECURSION_SAFE, }; #endif /* CONFIG_FUNCTION_TRACER */ diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index add37e019fd..1fb6da85ff8 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c @@ -148,19 +148,22 @@ static void trace_selftest_test_dyn_func(unsigned long ip, static struct ftrace_ops test_probe1 = { .func = trace_selftest_test_probe1_func, + .flags = FTRACE_OPS_FL_RECURSION_SAFE, }; static struct ftrace_ops test_probe2 = { .func = trace_selftest_test_probe2_func, + .flags = FTRACE_OPS_FL_RECURSION_SAFE, }; static struct ftrace_ops test_probe3 = { .func = trace_selftest_test_probe3_func, + .flags = FTRACE_OPS_FL_RECURSION_SAFE, }; static struct ftrace_ops test_global = { - .func = trace_selftest_test_global_func, - .flags = FTRACE_OPS_FL_GLOBAL, + .func = trace_selftest_test_global_func, + .flags = FTRACE_OPS_FL_GLOBAL | FTRACE_OPS_FL_RECURSION_SAFE, }; static void print_counts(void) diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c index 2fa5328e889..0c1b165778e 100644 --- a/kernel/trace/trace_stack.c +++ b/kernel/trace/trace_stack.c @@ -137,6 +137,7 @@ stack_trace_call(unsigned long ip, unsigned long parent_ip, static struct ftrace_ops trace_ops __read_mostly = { .func = stack_trace_call, + .flags = FTRACE_OPS_FL_RECURSION_SAFE, }; static ssize_t From 47239c4d8d6a24796039cada69d477a2b8cac9d6 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 20 Jul 2012 11:13:07 -0400 Subject: [PATCH 010/282] ftrace: Only compile ftrace selftest if selftests are enabled No need to compile in the ftrace selftest helper file if selftests are not being executed. Signed-off-by: Steven Rostedt --- kernel/trace/Makefile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index b831087c820..837090808aa 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile @@ -5,10 +5,12 @@ ifdef CONFIG_FUNCTION_TRACER ORIG_CFLAGS := $(KBUILD_CFLAGS) KBUILD_CFLAGS = $(subst -pg,,$(ORIG_CFLAGS)) +ifdef CONFIG_FTRACE_SELFTEST # selftest needs instrumentation CFLAGS_trace_selftest_dynamic.o = -pg obj-y += trace_selftest_dynamic.o endif +endif # If unlikely tracing is enabled, do not trace these files ifdef CONFIG_TRACING_BRANCHES From ea701f11da44b44907af226fe5a5f57d2f26eeb2 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 20 Jul 2012 13:08:05 -0400 Subject: [PATCH 011/282] ftrace: Add selftest to test function trace recursion protection Add selftests to test the function tracing recursion protection actually does work. It also tests if a ftrace_ops states it will perform its own protection. Although, even if the ftrace_ops states it will protect itself, the ftrace infrastructure may still provide protection if the arch does not support all features or another ftrace_ops is registered. Signed-off-by: Steven Rostedt --- include/linux/ftrace.h | 6 ++ kernel/trace/ftrace.c | 21 ++++++ kernel/trace/trace_selftest.c | 136 ++++++++++++++++++++++++++++++++++ 3 files changed, 163 insertions(+) diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 65a14e47a0d..9962e954a63 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -220,6 +220,10 @@ extern void ftrace_stub(unsigned long a0, unsigned long a1, */ #define register_ftrace_function(ops) ({ 0; }) #define unregister_ftrace_function(ops) ({ 0; }) +static inline int ftrace_nr_registered_ops(void) +{ + return 0; +} static inline void clear_ftrace_function(void) { } static inline void ftrace_kill(void) { } static inline void ftrace_stop(void) { } @@ -275,6 +279,8 @@ extern void unregister_ftrace_function_probe_all(char *glob); extern int ftrace_text_reserved(void *start, void *end); +extern int ftrace_nr_registered_ops(void); + /* * The dyn_ftrace record's flags field is split into two parts. * the first part which is '0-FTRACE_REF_MAX' is a counter of diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index ad765b4ba42..528d997c7f9 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -111,6 +111,27 @@ static void ftrace_ops_no_ops(unsigned long ip, unsigned long parent_ip); #define ftrace_ops_list_func ((ftrace_func_t)ftrace_ops_no_ops) #endif +/** + * ftrace_nr_registered_ops - return number of ops registered + * + * Returns the number of ftrace_ops registered and tracing functions + */ +int ftrace_nr_registered_ops(void) +{ + struct ftrace_ops *ops; + int cnt = 0; + + mutex_lock(&ftrace_lock); + + for (ops = ftrace_ops_list; + ops != &ftrace_list_end; ops = ops->next) + cnt++; + + mutex_unlock(&ftrace_lock); + + return cnt; +} + /* * Traverse the ftrace_global_list, invoking all entries. The reason that we * can use rcu_dereference_raw() is that elements removed from this list diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index 1fb6da85ff8..86422f91dbe 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c @@ -406,8 +406,141 @@ int trace_selftest_startup_dynamic_tracing(struct tracer *trace, return ret; } + +static int trace_selftest_recursion_cnt; +static void trace_selftest_test_recursion_func(unsigned long ip, + unsigned long pip, + struct ftrace_ops *op, + struct pt_regs *pt_regs) +{ + /* + * This function is registered without the recursion safe flag. + * The ftrace infrastructure should provide the recursion + * protection. If not, this will crash the kernel! + */ + trace_selftest_recursion_cnt++; + DYN_FTRACE_TEST_NAME(); +} + +static void trace_selftest_test_recursion_safe_func(unsigned long ip, + unsigned long pip, + struct ftrace_ops *op, + struct pt_regs *pt_regs) +{ + /* + * We said we would provide our own recursion. By calling + * this function again, we should recurse back into this function + * and count again. But this only happens if the arch supports + * all of ftrace features and nothing else is using the function + * tracing utility. + */ + if (trace_selftest_recursion_cnt++) + return; + DYN_FTRACE_TEST_NAME(); +} + +static struct ftrace_ops test_rec_probe = { + .func = trace_selftest_test_recursion_func, +}; + +static struct ftrace_ops test_recsafe_probe = { + .func = trace_selftest_test_recursion_safe_func, + .flags = FTRACE_OPS_FL_RECURSION_SAFE, +}; + +static int +trace_selftest_function_recursion(void) +{ + int save_ftrace_enabled = ftrace_enabled; + int save_tracer_enabled = tracer_enabled; + char *func_name; + int len; + int ret; + int cnt; + + /* The previous test PASSED */ + pr_cont("PASSED\n"); + pr_info("Testing ftrace recursion: "); + + + /* enable tracing, and record the filter function */ + ftrace_enabled = 1; + tracer_enabled = 1; + + /* Handle PPC64 '.' name */ + func_name = "*" __stringify(DYN_FTRACE_TEST_NAME); + len = strlen(func_name); + + ret = ftrace_set_filter(&test_rec_probe, func_name, len, 1); + if (ret) { + pr_cont("*Could not set filter* "); + goto out; + } + + ret = register_ftrace_function(&test_rec_probe); + if (ret) { + pr_cont("*could not register callback* "); + goto out; + } + + DYN_FTRACE_TEST_NAME(); + + unregister_ftrace_function(&test_rec_probe); + + ret = -1; + if (trace_selftest_recursion_cnt != 1) { + pr_cont("*callback not called once (%d)* ", + trace_selftest_recursion_cnt); + goto out; + } + + trace_selftest_recursion_cnt = 1; + + pr_cont("PASSED\n"); + pr_info("Testing ftrace recursion safe: "); + + ret = ftrace_set_filter(&test_recsafe_probe, func_name, len, 1); + if (ret) { + pr_cont("*Could not set filter* "); + goto out; + } + + ret = register_ftrace_function(&test_recsafe_probe); + if (ret) { + pr_cont("*could not register callback* "); + goto out; + } + + DYN_FTRACE_TEST_NAME(); + + unregister_ftrace_function(&test_recsafe_probe); + + /* + * If arch supports all ftrace features, and no other task + * was on the list, we should be fine. + */ + if (!ftrace_nr_registered_ops() && !FTRACE_FORCE_LIST_FUNC) + cnt = 2; /* Should have recursed */ + else + cnt = 1; + + ret = -1; + if (trace_selftest_recursion_cnt != cnt) { + pr_cont("*callback not called expected %d times (%d)* ", + cnt, trace_selftest_recursion_cnt); + goto out; + } + + ret = 0; +out: + ftrace_enabled = save_ftrace_enabled; + tracer_enabled = save_tracer_enabled; + + return ret; +} #else # define trace_selftest_startup_dynamic_tracing(trace, tr, func) ({ 0; }) +# define trace_selftest_function_recursion() ({ 0; }) #endif /* CONFIG_DYNAMIC_FTRACE */ /* @@ -455,7 +588,10 @@ trace_selftest_startup_function(struct tracer *trace, struct trace_array *tr) ret = trace_selftest_startup_dynamic_tracing(trace, tr, DYN_FTRACE_TEST_NAME); + if (ret) + goto out; + ret = trace_selftest_function_recursion(); out: ftrace_enabled = save_ftrace_enabled; tracer_enabled = save_tracer_enabled; From ad97772ad82f57c83968079d0880c71ab126ab04 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 20 Jul 2012 13:45:59 -0400 Subject: [PATCH 012/282] ftrace: Add selftest to test function save-regs support Add selftests to test the save-regs functionality of ftrace. If the arch supports saving regs, then it will make sure that regs is at least not NULL in the callback. If the arch does not support saving regs, it makes sure that the registering of the ftrace_ops that requests saving regs fails. It then tests the registering of the ftrace_ops succeeds if the 'IF_SUPPORTED' flag is set. Then it makes sure that the regs passed to the function is NULL. Signed-off-by: Steven Rostedt --- kernel/trace/trace.h | 2 +- kernel/trace/trace_selftest.c | 114 ++++++++++++++++++++++++++++++++++ 2 files changed, 115 insertions(+), 1 deletion(-) diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 55e1f7f0db1..593debefc4e 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -472,11 +472,11 @@ extern void trace_find_cmdline(int pid, char comm[]); #ifdef CONFIG_DYNAMIC_FTRACE extern unsigned long ftrace_update_tot_cnt; +#endif #define DYN_FTRACE_TEST_NAME trace_selftest_dynamic_test_func extern int DYN_FTRACE_TEST_NAME(void); #define DYN_FTRACE_TEST_NAME2 trace_selftest_dynamic_test_func2 extern int DYN_FTRACE_TEST_NAME2(void); -#endif extern int ring_buffer_expanded; extern bool tracing_selftest_disabled; diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index 86422f91dbe..1003a4d5eb2 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c @@ -543,6 +543,116 @@ out: # define trace_selftest_function_recursion() ({ 0; }) #endif /* CONFIG_DYNAMIC_FTRACE */ +static enum { + TRACE_SELFTEST_REGS_START, + TRACE_SELFTEST_REGS_FOUND, + TRACE_SELFTEST_REGS_NOT_FOUND, +} trace_selftest_regs_stat; + +static void trace_selftest_test_regs_func(unsigned long ip, + unsigned long pip, + struct ftrace_ops *op, + struct pt_regs *pt_regs) +{ + if (pt_regs) + trace_selftest_regs_stat = TRACE_SELFTEST_REGS_FOUND; + else + trace_selftest_regs_stat = TRACE_SELFTEST_REGS_NOT_FOUND; +} + +static struct ftrace_ops test_regs_probe = { + .func = trace_selftest_test_regs_func, + .flags = FTRACE_OPS_FL_RECURSION_SAFE | FTRACE_OPS_FL_SAVE_REGS, +}; + +static int +trace_selftest_function_regs(void) +{ + int save_ftrace_enabled = ftrace_enabled; + int save_tracer_enabled = tracer_enabled; + char *func_name; + int len; + int ret; + int supported = 0; + +#ifdef ARCH_SUPPORTS_FTRACE_SAVE_REGS + supported = 1; +#endif + + /* The previous test PASSED */ + pr_cont("PASSED\n"); + pr_info("Testing ftrace regs%s: ", + !supported ? "(no arch support)" : ""); + + /* enable tracing, and record the filter function */ + ftrace_enabled = 1; + tracer_enabled = 1; + + /* Handle PPC64 '.' name */ + func_name = "*" __stringify(DYN_FTRACE_TEST_NAME); + len = strlen(func_name); + + ret = ftrace_set_filter(&test_regs_probe, func_name, len, 1); + /* + * If DYNAMIC_FTRACE is not set, then we just trace all functions. + * This test really doesn't care. + */ + if (ret && ret != -ENODEV) { + pr_cont("*Could not set filter* "); + goto out; + } + + ret = register_ftrace_function(&test_regs_probe); + /* + * Now if the arch does not support passing regs, then this should + * have failed. + */ + if (!supported) { + if (!ret) { + pr_cont("*registered save-regs without arch support* "); + goto out; + } + test_regs_probe.flags |= FTRACE_OPS_FL_SAVE_REGS_IF_SUPPORTED; + ret = register_ftrace_function(&test_regs_probe); + } + if (ret) { + pr_cont("*could not register callback* "); + goto out; + } + + + DYN_FTRACE_TEST_NAME(); + + unregister_ftrace_function(&test_regs_probe); + + ret = -1; + + switch (trace_selftest_regs_stat) { + case TRACE_SELFTEST_REGS_START: + pr_cont("*callback never called* "); + goto out; + + case TRACE_SELFTEST_REGS_FOUND: + if (supported) + break; + pr_cont("*callback received regs without arch support* "); + goto out; + + case TRACE_SELFTEST_REGS_NOT_FOUND: + if (!supported) + break; + pr_cont("*callback received NULL regs* "); + goto out; + } + + ret = 0; +out: + ftrace_enabled = save_ftrace_enabled; + tracer_enabled = save_tracer_enabled; + + return ret; +} + /* * Simple verification test of ftrace function tracer. * Enable ftrace, sleep 1/10 second, and then read the trace @@ -592,6 +702,10 @@ trace_selftest_startup_function(struct tracer *trace, struct trace_array *tr) goto out; ret = trace_selftest_function_recursion(); + if (ret) + goto out; + + ret = trace_selftest_function_regs(); out: ftrace_enabled = save_ftrace_enabled; tracer_enabled = save_tracer_enabled; From 647664eaf4033501739ac1f42dd52ce8c9266ccc Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 5 Jun 2012 19:28:08 +0900 Subject: [PATCH 013/282] ftrace: add ftrace_set_filter_ip() for address based filter Add a new filter update interface ftrace_set_filter_ip() to set ftrace filter by ip address, not only glob pattern. Link: http://lkml.kernel.org/r/20120605102808.27845.67952.stgit@localhost.localdomain Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: Ananth N Mavinakayanahalli Cc: "Frank Ch. Eigler" Cc: Andrew Morton Cc: Frederic Weisbecker Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt --- include/linux/ftrace.h | 3 +++ kernel/trace/ftrace.c | 59 ++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 60 insertions(+), 2 deletions(-) diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 9962e954a63..3e711122f66 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -317,6 +317,8 @@ struct dyn_ftrace { }; int ftrace_force_update(void); +int ftrace_set_filter_ip(struct ftrace_ops *ops, unsigned long ip, + int remove, int reset); int ftrace_set_filter(struct ftrace_ops *ops, unsigned char *buf, int len, int reset); int ftrace_set_notrace(struct ftrace_ops *ops, unsigned char *buf, @@ -544,6 +546,7 @@ static inline int ftrace_text_reserved(void *start, void *end) */ #define ftrace_regex_open(ops, flag, inod, file) ({ -ENODEV; }) #define ftrace_set_early_filter(ops, buf, enable) do { } while (0) +#define ftrace_set_filter_ip(ops, ip, remove, reset) ({ -ENODEV; }) #define ftrace_set_filter(ops, buf, len, reset) ({ -ENODEV; }) #define ftrace_set_notrace(ops, buf, len, reset) ({ -ENODEV; }) #define ftrace_free_filter(ops) do { } while (0) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 528d997c7f9..9dcf15d3838 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -3242,8 +3242,27 @@ ftrace_notrace_write(struct file *file, const char __user *ubuf, } static int -ftrace_set_regex(struct ftrace_ops *ops, unsigned char *buf, int len, - int reset, int enable) +ftrace_match_addr(struct ftrace_hash *hash, unsigned long ip, int remove) +{ + struct ftrace_func_entry *entry; + + if (!ftrace_location(ip)) + return -EINVAL; + + if (remove) { + entry = ftrace_lookup_ip(hash, ip); + if (!entry) + return -ENOENT; + free_hash_entry(hash, entry); + return 0; + } + + return add_hash_entry(hash, ip); +} + +static int +ftrace_set_hash(struct ftrace_ops *ops, unsigned char *buf, int len, + unsigned long ip, int remove, int reset, int enable) { struct ftrace_hash **orig_hash; struct ftrace_hash *hash; @@ -3272,6 +3291,11 @@ ftrace_set_regex(struct ftrace_ops *ops, unsigned char *buf, int len, ret = -EINVAL; goto out_regex_unlock; } + if (ip) { + ret = ftrace_match_addr(hash, ip, remove); + if (ret < 0) + goto out_regex_unlock; + } mutex_lock(&ftrace_lock); ret = ftrace_hash_move(ops, enable, orig_hash, hash); @@ -3288,6 +3312,37 @@ ftrace_set_regex(struct ftrace_ops *ops, unsigned char *buf, int len, return ret; } +static int +ftrace_set_addr(struct ftrace_ops *ops, unsigned long ip, int remove, + int reset, int enable) +{ + return ftrace_set_hash(ops, 0, 0, ip, remove, reset, enable); +} + +/** + * ftrace_set_filter_ip - set a function to filter on in ftrace by address + * @ops - the ops to set the filter with + * @ip - the address to add to or remove from the filter. + * @remove - non zero to remove the ip from the filter + * @reset - non zero to reset all filters before applying this filter. + * + * Filters denote which functions should be enabled when tracing is enabled + * If @ip is NULL, it failes to update filter. + */ +int ftrace_set_filter_ip(struct ftrace_ops *ops, unsigned long ip, + int remove, int reset) +{ + return ftrace_set_addr(ops, ip, remove, reset, 1); +} +EXPORT_SYMBOL_GPL(ftrace_set_filter_ip); + +static int +ftrace_set_regex(struct ftrace_ops *ops, unsigned char *buf, int len, + int reset, int enable) +{ + return ftrace_set_hash(ops, buf, len, 0, 0, reset, enable); +} + /** * ftrace_set_filter - set a function to filter on in ftrace * @ops - the ops to set the filter with From 72ef3794c5cd5f5f0e6355c24a529224c449cd14 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 5 Jun 2012 19:28:14 +0900 Subject: [PATCH 014/282] kprobes: Inverse taking of module_mutex with kprobe_mutex Currently module_mutex is taken before kprobe_mutex, but this can cause issues when we have kprobes register ftrace, as the ftrace mutex is taken before enabling a tracepoint, which currently takes the module mutex. If module_mutex is taken before kprobe_mutex, then we can not have kprobes use the ftrace infrastructure. There seems to be no reason that the kprobe_mutex can't be taken before the module_mutex. Running lockdep shows that it is safe among the kernels I've run. Link: http://lkml.kernel.org/r/20120605102814.27845.21047.stgit@localhost.localdomain Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: Ananth N Mavinakayanahalli Cc: "Frank Ch. Eigler" Cc: Andrew Morton Cc: Frederic Weisbecker Cc: Masami Hiramatsu Signed-off-by: Steven Rostedt --- kernel/kprobes.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/kprobes.c b/kernel/kprobes.c index c62b8546cc9..7a8a1222c7b 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -561,9 +561,9 @@ static __kprobes void kprobe_optimizer(struct work_struct *work) { LIST_HEAD(free_list); + mutex_lock(&kprobe_mutex); /* Lock modules while optimizing kprobes */ mutex_lock(&module_mutex); - mutex_lock(&kprobe_mutex); /* * Step 1: Unoptimize kprobes and collect cleaned (unused and disarmed) @@ -586,8 +586,8 @@ static __kprobes void kprobe_optimizer(struct work_struct *work) /* Step 4: Free cleaned kprobes after quiesence period */ do_free_cleaned_kprobes(&free_list); - mutex_unlock(&kprobe_mutex); mutex_unlock(&module_mutex); + mutex_unlock(&kprobe_mutex); /* Step 5: Kick optimizer again if needed */ if (!list_empty(&optimizing_list) || !list_empty(&unoptimizing_list)) From f7fa6ef0ded995aad68650a877198f70e44b7621 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 5 Jun 2012 19:28:20 +0900 Subject: [PATCH 015/282] kprobes: cleanup to separate probe-able check Separate probe-able address checking code from register_kprobe(). Link: http://lkml.kernel.org/r/20120605102820.27845.90133.stgit@localhost.localdomain Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: Ananth N Mavinakayanahalli Cc: "Frank Ch. Eigler" Cc: Andrew Morton Cc: Frederic Weisbecker Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt --- kernel/kprobes.c | 100 +++++++++++++++++++++++++---------------------- 1 file changed, 54 insertions(+), 46 deletions(-) diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 7a8a1222c7b..6137fe32b4b 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -1313,13 +1313,61 @@ static inline int check_kprobe_rereg(struct kprobe *p) return ret; } -int __kprobes register_kprobe(struct kprobe *p) +static __kprobes int check_kprobe_address_safe(struct kprobe *p, + struct module **probed_mod) { int ret = 0; + + jump_label_lock(); + preempt_disable(); + + /* Ensure it is not in reserved area nor out of text */ + if (!kernel_text_address((unsigned long) p->addr) || + in_kprobes_functions((unsigned long) p->addr) || + ftrace_text_reserved(p->addr, p->addr) || + jump_label_text_reserved(p->addr, p->addr)) { + ret = -EINVAL; + goto out; + } + + /* Check if are we probing a module */ + *probed_mod = __module_text_address((unsigned long) p->addr); + if (*probed_mod) { + /* + * We must hold a refcount of the probed module while updating + * its code to prohibit unexpected unloading. + */ + if (unlikely(!try_module_get(*probed_mod))) { + ret = -ENOENT; + goto out; + } + + /* + * If the module freed .init.text, we couldn't insert + * kprobes in there. + */ + if (within_module_init((unsigned long)p->addr, *probed_mod) && + (*probed_mod)->state != MODULE_STATE_COMING) { + module_put(*probed_mod); + *probed_mod = NULL; + ret = -ENOENT; + } + } +out: + preempt_enable(); + jump_label_unlock(); + + return ret; +} + +int __kprobes register_kprobe(struct kprobe *p) +{ + int ret; struct kprobe *old_p; struct module *probed_mod; kprobe_opcode_t *addr; + /* Adjust probe address from symbol */ addr = kprobe_addr(p); if (IS_ERR(addr)) return PTR_ERR(addr); @@ -1329,51 +1377,16 @@ int __kprobes register_kprobe(struct kprobe *p) if (ret) return ret; - jump_label_lock(); - preempt_disable(); - if (!kernel_text_address((unsigned long) p->addr) || - in_kprobes_functions((unsigned long) p->addr) || - ftrace_text_reserved(p->addr, p->addr) || - jump_label_text_reserved(p->addr, p->addr)) { - ret = -EINVAL; - goto cannot_probe; - } - /* User can pass only KPROBE_FLAG_DISABLED to register_kprobe */ p->flags &= KPROBE_FLAG_DISABLED; - - /* - * Check if are we probing a module. - */ - probed_mod = __module_text_address((unsigned long) p->addr); - if (probed_mod) { - /* Return -ENOENT if fail. */ - ret = -ENOENT; - /* - * We must hold a refcount of the probed module while updating - * its code to prohibit unexpected unloading. - */ - if (unlikely(!try_module_get(probed_mod))) - goto cannot_probe; - - /* - * If the module freed .init.text, we couldn't insert - * kprobes in there. - */ - if (within_module_init((unsigned long)p->addr, probed_mod) && - probed_mod->state != MODULE_STATE_COMING) { - module_put(probed_mod); - goto cannot_probe; - } - /* ret will be updated by following code */ - } - preempt_enable(); - jump_label_unlock(); - p->nmissed = 0; INIT_LIST_HEAD(&p->list); - mutex_lock(&kprobe_mutex); + ret = check_kprobe_address_safe(p, &probed_mod); + if (ret) + return ret; + + mutex_lock(&kprobe_mutex); jump_label_lock(); /* needed to call jump_label_text_reserved() */ get_online_cpus(); /* For avoiding text_mutex deadlock. */ @@ -1410,11 +1423,6 @@ out: module_put(probed_mod); return ret; - -cannot_probe: - preempt_enable(); - jump_label_unlock(); - return ret; } EXPORT_SYMBOL_GPL(register_kprobe); From 25764288d8dc4792f0f487baf043ccfee5d8c2ba Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 5 Jun 2012 19:28:26 +0900 Subject: [PATCH 016/282] kprobes: Move locks into appropriate functions Break a big critical region into fine-grained pieces at registering kprobe path. This helps us to solve circular locking dependency when introducing ftrace-based kprobes. Link: http://lkml.kernel.org/r/20120605102826.27845.81689.stgit@localhost.localdomain Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: Ananth N Mavinakayanahalli Cc: "Frank Ch. Eigler" Cc: Andrew Morton Cc: Frederic Weisbecker Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt --- kernel/kprobes.c | 63 ++++++++++++++++++++++++++++++++---------------- 1 file changed, 42 insertions(+), 21 deletions(-) diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 6137fe32b4b..9e47f44f353 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -759,20 +759,28 @@ static __kprobes void try_to_optimize_kprobe(struct kprobe *p) struct kprobe *ap; struct optimized_kprobe *op; + /* For preparing optimization, jump_label_text_reserved() is called */ + jump_label_lock(); + mutex_lock(&text_mutex); + ap = alloc_aggr_kprobe(p); if (!ap) - return; + goto out; op = container_of(ap, struct optimized_kprobe, kp); if (!arch_prepared_optinsn(&op->optinsn)) { /* If failed to setup optimizing, fallback to kprobe */ arch_remove_optimized_kprobe(op); kfree(op); - return; + goto out; } init_aggr_kprobe(ap, p); - optimize_kprobe(ap); + optimize_kprobe(ap); /* This just kicks optimizer thread */ + +out: + mutex_unlock(&text_mutex); + jump_label_unlock(); } #ifdef CONFIG_SYSCTL @@ -1144,12 +1152,6 @@ static int __kprobes add_new_kprobe(struct kprobe *ap, struct kprobe *p) if (p->post_handler && !ap->post_handler) ap->post_handler = aggr_post_handler; - if (kprobe_disabled(ap) && !kprobe_disabled(p)) { - ap->flags &= ~KPROBE_FLAG_DISABLED; - if (!kprobes_all_disarmed) - /* Arm the breakpoint again. */ - __arm_kprobe(ap); - } return 0; } @@ -1189,11 +1191,22 @@ static int __kprobes register_aggr_kprobe(struct kprobe *orig_p, int ret = 0; struct kprobe *ap = orig_p; + /* For preparing optimization, jump_label_text_reserved() is called */ + jump_label_lock(); + /* + * Get online CPUs to avoid text_mutex deadlock.with stop machine, + * which is invoked by unoptimize_kprobe() in add_new_kprobe() + */ + get_online_cpus(); + mutex_lock(&text_mutex); + if (!kprobe_aggrprobe(orig_p)) { /* If orig_p is not an aggr_kprobe, create new aggr_kprobe. */ ap = alloc_aggr_kprobe(orig_p); - if (!ap) - return -ENOMEM; + if (!ap) { + ret = -ENOMEM; + goto out; + } init_aggr_kprobe(ap, orig_p); } else if (kprobe_unused(ap)) /* This probe is going to die. Rescue it */ @@ -1213,7 +1226,7 @@ static int __kprobes register_aggr_kprobe(struct kprobe *orig_p, * free aggr_probe. It will be used next time, or * freed by unregister_kprobe. */ - return ret; + goto out; /* Prepare optimized instructions if possible. */ prepare_optimized_kprobe(ap); @@ -1228,7 +1241,20 @@ static int __kprobes register_aggr_kprobe(struct kprobe *orig_p, /* Copy ap's insn slot to p */ copy_kprobe(ap, p); - return add_new_kprobe(ap, p); + ret = add_new_kprobe(ap, p); + +out: + mutex_unlock(&text_mutex); + put_online_cpus(); + jump_label_unlock(); + + if (ret == 0 && kprobe_disabled(ap) && !kprobe_disabled(p)) { + ap->flags &= ~KPROBE_FLAG_DISABLED; + if (!kprobes_all_disarmed) + /* Arm the breakpoint again. */ + arm_kprobe(ap); + } + return ret; } static int __kprobes in_kprobes_functions(unsigned long addr) @@ -1387,10 +1413,6 @@ int __kprobes register_kprobe(struct kprobe *p) return ret; mutex_lock(&kprobe_mutex); - jump_label_lock(); /* needed to call jump_label_text_reserved() */ - - get_online_cpus(); /* For avoiding text_mutex deadlock. */ - mutex_lock(&text_mutex); old_p = get_kprobe(p->addr); if (old_p) { @@ -1399,7 +1421,9 @@ int __kprobes register_kprobe(struct kprobe *p) goto out; } + mutex_lock(&text_mutex); /* Avoiding text modification */ ret = arch_prepare_kprobe(p); + mutex_unlock(&text_mutex); if (ret) goto out; @@ -1408,15 +1432,12 @@ int __kprobes register_kprobe(struct kprobe *p) &kprobe_table[hash_ptr(p->addr, KPROBE_HASH_BITS)]); if (!kprobes_all_disarmed && !kprobe_disabled(p)) - __arm_kprobe(p); + arm_kprobe(p); /* Try to optimize kprobe */ try_to_optimize_kprobe(p); out: - mutex_unlock(&text_mutex); - put_online_cpus(); - jump_label_unlock(); mutex_unlock(&kprobe_mutex); if (probed_mod) From 4dc936769e8a6382a4cc12375e8a4daa2b829fda Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 6 Jun 2012 13:45:31 -0400 Subject: [PATCH 017/282] ftrace: Make ftrace_location() a nop on !DYNAMIC_FTRACE When CONFIG_DYNAMIC_FTRACE is not set, ftrace_location() is not defined. If a user (like kprobes) references this function, it will break the compile when CONFIG_DYNAMIC_FTRACE is not set. Add ftrace_location() as a nop (return 0) when DYNAMIC_FTRACE is not defined. Link: http://lkml.kernel.org/r/20120612225426.961092717@goodmis.org Signed-off-by: Steven Rostedt --- include/linux/ftrace.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 3e711122f66..a52f2f4fe03 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -520,7 +520,7 @@ extern int skip_trace(unsigned long ip); extern void ftrace_disable_daemon(void); extern void ftrace_enable_daemon(void); -#else +#else /* CONFIG_DYNAMIC_FTRACE */ static inline int skip_trace(unsigned long ip) { return 0; } static inline int ftrace_force_update(void) { return 0; } static inline void ftrace_disable_daemon(void) { } @@ -538,6 +538,10 @@ static inline int ftrace_text_reserved(void *start, void *end) { return 0; } +static inline unsigned long ftrace_location(unsigned long ip) +{ + return 0; +} /* * Again users of functions that have ftrace_ops may not From ae6aa16fdc163afe6b04b6c073ad4ddd4663c03b Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 5 Jun 2012 19:28:32 +0900 Subject: [PATCH 018/282] kprobes: introduce ftrace based optimization Introduce function trace based kprobes optimization. With using ftrace optimization, kprobes on the mcount calling address, use ftrace's mcount call instead of breakpoint. Furthermore, this optimization works with preemptive kernel not like as current jump-based optimization. Of cource, this feature works only if the probe is on mcount call. Only if kprobe.break_handler is set, that probe is not optimized with ftrace (nor put on ftrace). The reason why this limitation comes is that this break_handler may be used only from jprobes which changes ip address (for fetching the function arguments), but function tracer ignores modified ip address. Changes in v2: - Fix ftrace_ops registering right after setting its filter. - Unregister ftrace_ops if there is no kprobe using. - Remove notrace dependency from __kprobes macro. Link: http://lkml.kernel.org/r/20120605102832.27845.63461.stgit@localhost.localdomain Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: Ananth N Mavinakayanahalli Cc: "Frank Ch. Eigler" Cc: Andrew Morton Cc: Frederic Weisbecker Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt --- include/linux/kprobes.h | 27 +++++++++++ kernel/kprobes.c | 105 +++++++++++++++++++++++++++++++++++----- 2 files changed, 119 insertions(+), 13 deletions(-) diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index b6e1f8c0057..aa0d05e852e 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -38,6 +38,7 @@ #include #include #include +#include #ifdef CONFIG_KPROBES #include @@ -48,14 +49,26 @@ #define KPROBE_REENTER 0x00000004 #define KPROBE_HIT_SSDONE 0x00000008 +/* + * If function tracer is enabled and the arch supports full + * passing of pt_regs to function tracing, then kprobes can + * optimize on top of function tracing. + */ +#if defined(CONFIG_FUNCTION_TRACER) && defined(ARCH_SUPPORTS_FTRACE_SAVE_REGS) \ + && defined(ARCH_SUPPORTS_KPROBES_ON_FTRACE) +# define KPROBES_CAN_USE_FTRACE +#endif + /* Attach to insert probes on any functions which should be ignored*/ #define __kprobes __attribute__((__section__(".kprobes.text"))) + #else /* CONFIG_KPROBES */ typedef int kprobe_opcode_t; struct arch_specific_insn { int dummy; }; #define __kprobes + #endif /* CONFIG_KPROBES */ struct kprobe; @@ -128,6 +141,7 @@ struct kprobe { * NOTE: * this flag is only for optimized_kprobe. */ +#define KPROBE_FLAG_FTRACE 8 /* probe is using ftrace */ /* Has this kprobe gone ? */ static inline int kprobe_gone(struct kprobe *p) @@ -146,6 +160,13 @@ static inline int kprobe_optimized(struct kprobe *p) { return p->flags & KPROBE_FLAG_OPTIMIZED; } + +/* Is this kprobe uses ftrace ? */ +static inline int kprobe_ftrace(struct kprobe *p) +{ + return p->flags & KPROBE_FLAG_FTRACE; +} + /* * Special probe type that uses setjmp-longjmp type tricks to resume * execution at a specified entry with a matching prototype corresponding @@ -295,6 +316,12 @@ extern int proc_kprobes_optimization_handler(struct ctl_table *table, #endif #endif /* CONFIG_OPTPROBES */ +#ifdef KPROBES_CAN_USE_FTRACE +extern void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, + struct pt_regs *regs); +extern int arch_prepare_kprobe_ftrace(struct kprobe *p); +#endif + /* Get the kprobe at this addr (if any) - called with preemption disabled */ struct kprobe *get_kprobe(void *addr); diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 9e47f44f353..69c16efc315 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -759,6 +759,10 @@ static __kprobes void try_to_optimize_kprobe(struct kprobe *p) struct kprobe *ap; struct optimized_kprobe *op; + /* Impossible to optimize ftrace-based kprobe */ + if (kprobe_ftrace(p)) + return; + /* For preparing optimization, jump_label_text_reserved() is called */ jump_label_lock(); mutex_lock(&text_mutex); @@ -915,9 +919,64 @@ static __kprobes struct kprobe *alloc_aggr_kprobe(struct kprobe *p) } #endif /* CONFIG_OPTPROBES */ +#ifdef KPROBES_CAN_USE_FTRACE +static struct ftrace_ops kprobe_ftrace_ops __read_mostly = { + .regs_func = kprobe_ftrace_handler, + .flags = FTRACE_OPS_FL_SAVE_REGS, +}; +static int kprobe_ftrace_enabled; + +/* Must ensure p->addr is really on ftrace */ +static int __kprobes prepare_kprobe(struct kprobe *p) +{ + if (!kprobe_ftrace(p)) + return arch_prepare_kprobe(p); + + return arch_prepare_kprobe_ftrace(p); +} + +/* Caller must lock kprobe_mutex */ +static void __kprobes arm_kprobe_ftrace(struct kprobe *p) +{ + int ret; + + ret = ftrace_set_filter_ip(&kprobe_ftrace_ops, + (unsigned long)p->addr, 0, 0); + WARN(ret < 0, "Failed to arm kprobe-ftrace at %p (%d)\n", p->addr, ret); + kprobe_ftrace_enabled++; + if (kprobe_ftrace_enabled == 1) { + ret = register_ftrace_function(&kprobe_ftrace_ops); + WARN(ret < 0, "Failed to init kprobe-ftrace (%d)\n", ret); + } +} + +/* Caller must lock kprobe_mutex */ +static void __kprobes disarm_kprobe_ftrace(struct kprobe *p) +{ + int ret; + + kprobe_ftrace_enabled--; + if (kprobe_ftrace_enabled == 0) { + ret = unregister_ftrace_function(&kprobe_ftrace_ops); + WARN(ret < 0, "Failed to init kprobe-ftrace (%d)\n", ret); + } + ret = ftrace_set_filter_ip(&kprobe_ftrace_ops, + (unsigned long)p->addr, 1, 0); + WARN(ret < 0, "Failed to disarm kprobe-ftrace at %p (%d)\n", p->addr, ret); +} +#else /* !KPROBES_CAN_USE_FTRACE */ +#define prepare_kprobe(p) arch_prepare_kprobe(p) +#define arm_kprobe_ftrace(p) do {} while (0) +#define disarm_kprobe_ftrace(p) do {} while (0) +#endif + /* Arm a kprobe with text_mutex */ static void __kprobes arm_kprobe(struct kprobe *kp) { + if (unlikely(kprobe_ftrace(kp))) { + arm_kprobe_ftrace(kp); + return; + } /* * Here, since __arm_kprobe() doesn't use stop_machine(), * this doesn't cause deadlock on text_mutex. So, we don't @@ -929,11 +988,15 @@ static void __kprobes arm_kprobe(struct kprobe *kp) } /* Disarm a kprobe with text_mutex */ -static void __kprobes disarm_kprobe(struct kprobe *kp) +static void __kprobes disarm_kprobe(struct kprobe *kp, bool reopt) { + if (unlikely(kprobe_ftrace(kp))) { + disarm_kprobe_ftrace(kp); + return; + } /* Ditto */ mutex_lock(&text_mutex); - __disarm_kprobe(kp, true); + __disarm_kprobe(kp, reopt); mutex_unlock(&text_mutex); } @@ -1343,6 +1406,26 @@ static __kprobes int check_kprobe_address_safe(struct kprobe *p, struct module **probed_mod) { int ret = 0; + unsigned long ftrace_addr; + + /* + * If the address is located on a ftrace nop, set the + * breakpoint to the following instruction. + */ + ftrace_addr = ftrace_location((unsigned long)p->addr); + if (ftrace_addr) { +#ifdef KPROBES_CAN_USE_FTRACE + /* Given address is not on the instruction boundary */ + if ((unsigned long)p->addr != ftrace_addr) + return -EILSEQ; + /* break_handler (jprobe) can not work with ftrace */ + if (p->break_handler) + return -EINVAL; + p->flags |= KPROBE_FLAG_FTRACE; +#else /* !KPROBES_CAN_USE_FTRACE */ + return -EINVAL; +#endif + } jump_label_lock(); preempt_disable(); @@ -1350,7 +1433,6 @@ static __kprobes int check_kprobe_address_safe(struct kprobe *p, /* Ensure it is not in reserved area nor out of text */ if (!kernel_text_address((unsigned long) p->addr) || in_kprobes_functions((unsigned long) p->addr) || - ftrace_text_reserved(p->addr, p->addr) || jump_label_text_reserved(p->addr, p->addr)) { ret = -EINVAL; goto out; @@ -1422,7 +1504,7 @@ int __kprobes register_kprobe(struct kprobe *p) } mutex_lock(&text_mutex); /* Avoiding text modification */ - ret = arch_prepare_kprobe(p); + ret = prepare_kprobe(p); mutex_unlock(&text_mutex); if (ret) goto out; @@ -1480,7 +1562,7 @@ static struct kprobe *__kprobes __disable_kprobe(struct kprobe *p) /* Try to disarm and disable this/parent probe */ if (p == orig_p || aggr_kprobe_disabled(orig_p)) { - disarm_kprobe(orig_p); + disarm_kprobe(orig_p, true); orig_p->flags |= KPROBE_FLAG_DISABLED; } } @@ -2078,10 +2160,11 @@ static void __kprobes report_probe(struct seq_file *pi, struct kprobe *p, if (!pp) pp = p; - seq_printf(pi, "%s%s%s\n", + seq_printf(pi, "%s%s%s%s\n", (kprobe_gone(p) ? "[GONE]" : ""), ((kprobe_disabled(p) && !kprobe_gone(p)) ? "[DISABLED]" : ""), - (kprobe_optimized(pp) ? "[OPTIMIZED]" : "")); + (kprobe_optimized(pp) ? "[OPTIMIZED]" : ""), + (kprobe_ftrace(pp) ? "[FTRACE]" : "")); } static void __kprobes *kprobe_seq_start(struct seq_file *f, loff_t *pos) @@ -2160,14 +2243,12 @@ static void __kprobes arm_all_kprobes(void) goto already_enabled; /* Arming kprobes doesn't optimize kprobe itself */ - mutex_lock(&text_mutex); for (i = 0; i < KPROBE_TABLE_SIZE; i++) { head = &kprobe_table[i]; hlist_for_each_entry_rcu(p, node, head, hlist) if (!kprobe_disabled(p)) - __arm_kprobe(p); + arm_kprobe(p); } - mutex_unlock(&text_mutex); kprobes_all_disarmed = false; printk(KERN_INFO "Kprobes globally enabled\n"); @@ -2195,15 +2276,13 @@ static void __kprobes disarm_all_kprobes(void) kprobes_all_disarmed = true; printk(KERN_INFO "Kprobes globally disabled\n"); - mutex_lock(&text_mutex); for (i = 0; i < KPROBE_TABLE_SIZE; i++) { head = &kprobe_table[i]; hlist_for_each_entry_rcu(p, node, head, hlist) { if (!arch_trampoline_kprobe(p) && !kprobe_disabled(p)) - __disarm_kprobe(p, false); + disarm_kprobe(p, false); } } - mutex_unlock(&text_mutex); mutex_unlock(&kprobe_mutex); /* Wait for disarming all kprobes by optimizer */ From e52538965119319447c0800c534da73142c27be2 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 5 Jun 2012 19:28:38 +0900 Subject: [PATCH 019/282] kprobes/x86: ftrace based optimization for x86 Add function tracer based kprobe optimization support handlers on x86. This allows kprobes to use function tracer for probing on mcount call. Link: http://lkml.kernel.org/r/20120605102838.27845.26317.stgit@localhost.localdomain Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: Ananth N Mavinakayanahalli Cc: "Frank Ch. Eigler" Cc: Andrew Morton Cc: Frederic Weisbecker Signed-off-by: Masami Hiramatsu [ Updated to new port of ftrace save regs functions ] Signed-off-by: Steven Rostedt --- arch/x86/include/asm/kprobes.h | 1 + arch/x86/kernel/kprobes.c | 48 ++++++++++++++++++++++++++++++++++ include/linux/kprobes.h | 2 +- kernel/kprobes.c | 2 +- 4 files changed, 51 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/kprobes.h b/arch/x86/include/asm/kprobes.h index 54788253915..d3ddd17405d 100644 --- a/arch/x86/include/asm/kprobes.h +++ b/arch/x86/include/asm/kprobes.h @@ -27,6 +27,7 @@ #include #define __ARCH_WANT_KPROBES_INSN_SLOT +#define ARCH_SUPPORTS_KPROBES_ON_FTRACE struct pt_regs; struct kprobe; diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c index e2f751efb7b..47ae1023a93 100644 --- a/arch/x86/kernel/kprobes.c +++ b/arch/x86/kernel/kprobes.c @@ -1052,6 +1052,54 @@ int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) return 0; } +#ifdef KPROBES_CAN_USE_FTRACE +/* Ftrace callback handler for kprobes */ +void __kprobes kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *ops, struct pt_regs *regs) +{ + struct kprobe *p; + struct kprobe_ctlblk *kcb; + unsigned long flags; + + /* Disable irq for emulating a breakpoint and avoiding preempt */ + local_irq_save(flags); + + p = get_kprobe((kprobe_opcode_t *)ip); + if (unlikely(!p) || kprobe_disabled(p)) + goto end; + + kcb = get_kprobe_ctlblk(); + if (kprobe_running()) { + kprobes_inc_nmissed_count(p); + } else { + regs->ip += sizeof(kprobe_opcode_t); + + __this_cpu_write(current_kprobe, p); + kcb->kprobe_status = KPROBE_HIT_ACTIVE; + if (p->pre_handler) + p->pre_handler(p, regs); + + if (unlikely(p->post_handler)) { + /* Emulate singlestep as if there is a 5byte nop */ + regs->ip = ip + MCOUNT_INSN_SIZE; + kcb->kprobe_status = KPROBE_HIT_SSDONE; + p->post_handler(p, regs, 0); + } + __this_cpu_write(current_kprobe, NULL); + regs->ip = ip; /* Recover for next callback */ + } +end: + local_irq_restore(flags); +} + +int __kprobes arch_prepare_kprobe_ftrace(struct kprobe *p) +{ + p->ainsn.insn = NULL; + p->ainsn.boostable = -1; + return 0; +} +#endif + int __init arch_init_kprobes(void) { return arch_init_optprobes(); diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index aa0d05e852e..23755ba42ab 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -318,7 +318,7 @@ extern int proc_kprobes_optimization_handler(struct ctl_table *table, #endif /* CONFIG_OPTPROBES */ #ifdef KPROBES_CAN_USE_FTRACE extern void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, - struct pt_regs *regs); + struct ftrace_ops *ops, struct pt_regs *regs); extern int arch_prepare_kprobe_ftrace(struct kprobe *p); #endif diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 69c16efc315..35b4315d84f 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -921,7 +921,7 @@ static __kprobes struct kprobe *alloc_aggr_kprobe(struct kprobe *p) #ifdef KPROBES_CAN_USE_FTRACE static struct ftrace_ops kprobe_ftrace_ops __read_mostly = { - .regs_func = kprobe_ftrace_handler, + .func = kprobe_ftrace_handler, .flags = FTRACE_OPS_FL_SAVE_REGS, }; static int kprobe_ftrace_enabled; From a7cb8863dd352f052e7b2b86a17410070d1b69af Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 3 Aug 2012 13:53:40 -0300 Subject: [PATCH 020/282] perf hists browser: Add verbose mode hotkey Right now just shows the DSO name in callchain entries, to help debug the DWARF CFI post unwind code. Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-54gouunatugtfw92j6gddk45@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/hists.c | 39 +++++++++++++++++++++++----------- 1 file changed, 27 insertions(+), 12 deletions(-) diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index 413bd62eedb..b8094692c15 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -24,6 +24,7 @@ struct hist_browser { struct hist_entry *he_selection; struct map_symbol *selection; int print_seq; + bool show_dso; bool has_symbols; }; @@ -376,12 +377,19 @@ out: } static char *callchain_list__sym_name(struct callchain_list *cl, - char *bf, size_t bfsize) + char *bf, size_t bfsize, bool show_dso) { - if (cl->ms.sym) - return cl->ms.sym->name; + int printed; + + if (cl->ms.sym) + printed = scnprintf(bf, bfsize, "%s", cl->ms.sym->name); + else + printed = scnprintf(bf, bfsize, "%#" PRIx64, cl->ip); + + if (show_dso) + scnprintf(bf + printed, bfsize - printed, " %s", + cl->ms.map ? cl->ms.map->dso->short_name : "unknown"); - snprintf(bf, bfsize, "%#" PRIx64, cl->ip); return bf; } @@ -417,7 +425,7 @@ static int hist_browser__show_callchain_node_rb_tree(struct hist_browser *browse remaining -= cumul; list_for_each_entry(chain, &child->val, list) { - char ipstr[BITS_PER_LONG / 4 + 1], *alloc_str; + char bf[1024], *alloc_str; const char *str; int color; bool was_first = first; @@ -434,7 +442,8 @@ static int hist_browser__show_callchain_node_rb_tree(struct hist_browser *browse } alloc_str = NULL; - str = callchain_list__sym_name(chain, ipstr, sizeof(ipstr)); + str = callchain_list__sym_name(chain, bf, sizeof(bf), + browser->show_dso); if (was_first) { double percent = cumul * 100.0 / new_total; @@ -493,7 +502,7 @@ static int hist_browser__show_callchain_node(struct hist_browser *browser, char folded_sign = ' '; list_for_each_entry(chain, &node->val, list) { - char ipstr[BITS_PER_LONG / 4 + 1], *s; + char bf[1024], *s; int color; folded_sign = callchain_list__folded(chain); @@ -510,7 +519,8 @@ static int hist_browser__show_callchain_node(struct hist_browser *browser, *is_current_entry = true; } - s = callchain_list__sym_name(chain, ipstr, sizeof(ipstr)); + s = callchain_list__sym_name(chain, bf, sizeof(bf), + browser->show_dso); ui_browser__gotorc(&browser->b, row, 0); ui_browser__set_color(&browser->b, color); slsmg_write_nstring(" ", offset); @@ -830,7 +840,7 @@ static int hist_browser__fprintf_callchain_node_rb_tree(struct hist_browser *bro remaining -= cumul; list_for_each_entry(chain, &child->val, list) { - char ipstr[BITS_PER_LONG / 4 + 1], *alloc_str; + char bf[1024], *alloc_str; const char *str; bool was_first = first; @@ -842,7 +852,8 @@ static int hist_browser__fprintf_callchain_node_rb_tree(struct hist_browser *bro folded_sign = callchain_list__folded(chain); alloc_str = NULL; - str = callchain_list__sym_name(chain, ipstr, sizeof(ipstr)); + str = callchain_list__sym_name(chain, bf, sizeof(bf), + browser->show_dso); if (was_first) { double percent = cumul * 100.0 / new_total; @@ -880,10 +891,10 @@ static int hist_browser__fprintf_callchain_node(struct hist_browser *browser, int printed = 0; list_for_each_entry(chain, &node->val, list) { - char ipstr[BITS_PER_LONG / 4 + 1], *s; + char bf[1024], *s; folded_sign = callchain_list__folded(chain); - s = callchain_list__sym_name(chain, ipstr, sizeof(ipstr)); + s = callchain_list__sym_name(chain, bf, sizeof(bf), browser->show_dso); printed += fprintf(fp, "%*s%c %s\n", offset, " ", folded_sign, s); } @@ -1133,6 +1144,9 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, continue; case 'd': goto zoom_dso; + case 'V': + browser->show_dso = !browser->show_dso; + continue; case 't': goto zoom_thread; case '/': @@ -1164,6 +1178,7 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, "d Zoom into current DSO\n" "t Zoom into current Thread\n" "P Print histograms to perf.hist.N\n" + "V Verbose (DSO names in callchains, etc)\n" "/ Filter symbol by name"); continue; case K_ENTER: From 3c18c10bde65b6dcaffab7a4d040285e4defa49b Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 31 Jul 2012 10:23:37 -0400 Subject: [PATCH 021/282] tracing: Fix wakeup_rt self test on virtual machines The warkeup_rt self test used msleep() calls to wait for real time tasks to wake up and run. On bare-metal hardware, this was enough as the scheduler should let the RT task run way before the non-RT task wakes up from the msleep(). If it did not, then that would mean the scheduler was broken. But when dealing with virtual machines, this is a different story. If the RT task wakes up on a VCPU, it's up to the host to decide when that task gets to schedule, which can be far behind the time that the non-RT task wakes up. In this case, the test would fail incorrectly. As we are not testing the scheduler, but instead the wake up tracing, we can use completions to wait and not depend on scheduler timings to see if events happen on time. Link: http://lkml.kernel.org/r/1343663105.3847.7.camel@fedora Reported-by: Fengguang Wu Tested-by: Fengguang Wu Signed-off-by: Steven Rostedt --- kernel/trace/trace_selftest.c | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index 1003a4d5eb2..2c00a691a54 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c @@ -1041,6 +1041,8 @@ static int trace_wakeup_test_thread(void *data) set_current_state(TASK_INTERRUPTIBLE); schedule(); + complete(x); + /* we are awake, now wait to disappear */ while (!kthread_should_stop()) { /* @@ -1084,24 +1086,21 @@ trace_selftest_startup_wakeup(struct tracer *trace, struct trace_array *tr) /* reset the max latency */ tracing_max_latency = 0; - /* sleep to let the RT thread sleep too */ - msleep(100); + while (p->on_rq) { + /* + * Sleep to make sure the RT thread is asleep too. + * On virtual machines we can't rely on timings, + * but we want to make sure this test still works. + */ + msleep(100); + } - /* - * Yes this is slightly racy. It is possible that for some - * strange reason that the RT thread we created, did not - * call schedule for 100ms after doing the completion, - * and we do a wakeup on a task that already is awake. - * But that is extremely unlikely, and the worst thing that - * happens in such a case, is that we disable tracing. - * Honestly, if this race does happen something is horrible - * wrong with the system. - */ + init_completion(&isrt); wake_up_process(p); - /* give a little time to let the thread wake up */ - msleep(100); + /* Wait for the task to wake up */ + wait_for_completion(&isrt); /* stop the tracing. */ tracing_stop(); From 92d8d4a8b0f4c6eba70f6e62b48e38bd005a56e6 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 19 Jun 2012 17:47:52 +0200 Subject: [PATCH 022/282] tracing/filter: Add missing initialization Add missing initialization for ret variable. Its initialization is based on the re_cnt variable, which is being set deep down in the ftrace_function_filter_re function. I'm not sure compilers would be smart enough to see this in near future, so killing the warning this way. Link: http://lkml.kernel.org/r/1340120894-9465-2-git-send-email-jolsa@redhat.com Signed-off-by: Jiri Olsa Signed-off-by: Steven Rostedt --- kernel/trace/trace_events_filter.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index 431dba8b754..c154797a7ff 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -2002,7 +2002,7 @@ static int ftrace_function_set_regexp(struct ftrace_ops *ops, int filter, static int __ftrace_function_set_filter(int filter, char *buf, int len, struct function_filter_data *data) { - int i, re_cnt, ret; + int i, re_cnt, ret = -EINVAL; int *reset; char **re; From 87abb3b15c62033409f5bf2ffb5620c94f91cf2c Mon Sep 17 00:00:00 2001 From: Wang Tianhong Date: Thu, 2 Aug 2012 14:02:00 +0800 Subject: [PATCH 023/282] tracing/trivial: Fix some typos in kernel/trace Fix some typos in kernel/trace. Link: http://lkml.kernel.org/r/1343887320.2228.9.camel@louis-ThinkPad-T410 Signed-off-by: Wang Tianhong Signed-off-by: Steven Rostedt --- kernel/trace/ring_buffer.c | 4 ++-- kernel/trace/trace.c | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 49491fa7daa..b32ed0e385a 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -2816,7 +2816,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_record_enable); * to the buffer after this will fail and return NULL. * * This is different than ring_buffer_record_disable() as - * it works like an on/off switch, where as the disable() verison + * it works like an on/off switch, where as the disable() version * must be paired with a enable(). */ void ring_buffer_record_off(struct ring_buffer *buffer) @@ -2839,7 +2839,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_record_off); * ring_buffer_record_off(). * * This is different than ring_buffer_record_enable() as - * it works like an on/off switch, where as the enable() verison + * it works like an on/off switch, where as the enable() version * must be paired with a disable(). */ void ring_buffer_record_on(struct ring_buffer *buffer) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index a120f98c411..d1a8d07ec86 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -426,15 +426,15 @@ __setup("trace_buf_size=", set_buf_size); static int __init set_tracing_thresh(char *str) { - unsigned long threshhold; + unsigned long threshold; int ret; if (!str) return 0; - ret = strict_strtoul(str, 0, &threshhold); + ret = strict_strtoul(str, 0, &threshold); if (ret < 0) return 0; - tracing_thresh = threshhold * 1000; + tracing_thresh = threshold * 1000; return 1; } __setup("tracing_thresh=", set_tracing_thresh); From dc4552bf7176573ccf79af04ab8648b015738f4a Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 7 Aug 2012 23:32:05 -0300 Subject: [PATCH 024/282] perf tools: Add dump_stack function To help in debugging the tools, provides functionality roughly similar to the function with the same name in the kernel. Copied from glibc backtrace function man page. Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-6nw2sak21bqy8h1m2syyo816@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/util.c | 19 +++++++++++++++++++ tools/perf/util/util.h | 2 ++ 2 files changed, 21 insertions(+) diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c index d03599fbe78..1b8775c3707 100644 --- a/tools/perf/util/util.c +++ b/tools/perf/util/util.c @@ -1,6 +1,9 @@ #include "../perf.h" #include "util.h" #include +#include +#include +#include /* * XXX We need to find a better place for these things... @@ -158,3 +161,19 @@ size_t hex_width(u64 v) return n; } + +/* Obtain a backtrace and print it to stdout. */ +void dump_stack(void) +{ + void *array[16]; + size_t size = backtrace(array, ARRAY_SIZE(array)); + char **strings = backtrace_symbols(array, size); + size_t i; + + printf("Obtained %zd stack frames.\n", size); + + for (i = 0; i < size; i++) + printf("%s\n", strings[i]); + + free(strings); +} diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index b13c7331eaf..00a93a91a23 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -266,4 +266,6 @@ size_t hex_width(u64 v); char *rtrim(char *s); +void dump_stack(void); + #endif From 8b6ee4c5d48d93527dcf6e36c51cbb7703d7fffb Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 7 Aug 2012 23:36:16 -0300 Subject: [PATCH 025/282] perf header: Set the tracepoint names on PERF_RECORD_HEADER_TRACING_DATA We only have access to pevent after processing that event, so set the tracepoint names there. Right now this isn't a problem as we're deferring resolving the tracepoint names to when we process samples, but in the next patches we will be doing it in advance, to avoid relookups, so do it earlier, as soon as we process the tracing data event. Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-tzb7srmsl7a6o3icw592iv2o@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/header.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 74ea3c2f813..e2b4864bd11 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -2452,6 +2452,8 @@ int perf_event__process_tracing_data(union perf_event *event, if (size_read + padding != size) die("tracing data size mismatch"); + perf_evlist__set_tracepoint_names(session->evlist, session->pevent); + return size_read + padding; } From fcf65bf149afa91b875ffde4455967cb63ee0be9 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 7 Aug 2012 09:58:03 -0300 Subject: [PATCH 026/282] perf evsel: Cache associated event_format We already lookup the associated event_format when reading the perf.data header, so that we can cache the tracepoint name in evsel->name, so do it a little further and save the event_format itself, so that we can avoid relookups in tools that need to access it. Change the tools to take the most obvious advantage, when they were using pevent_find_event directly. More work is needed for further removing the need of a pointer to pevent, such as when asking for event field values ("common_pid" and the other common fields and per event_format fields). This is something that was planned but only got actually done when Andrey Wagin needed to do this lookup at perf_tool->sample() time, when we don't have access to pevent (session->pevent) to use with pevent_find_event(). Cc: Andrey Wagin Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Signed-off-by: Arnaldo Carvalho de Melo Link: http://lkml.kernel.org/n/tip-txkvew2ckko0b594ae8fbnyk@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-kmem.c | 39 ++++++------------- tools/perf/builtin-lock.c | 15 +++---- tools/perf/builtin-sched.c | 37 +++++------------- tools/perf/builtin-script.c | 29 +++----------- tools/perf/util/evsel.h | 1 + tools/perf/util/header.c | 1 + .../util/scripting-engines/trace-event-perl.c | 13 +++---- .../scripting-engines/trace-event-python.c | 21 +++++----- tools/perf/util/trace-event-parse.c | 26 +++++++------ tools/perf/util/trace-event.h | 2 + 10 files changed, 70 insertions(+), 114 deletions(-) diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c index ce35015f2dc..ffb93f42495 100644 --- a/tools/perf/builtin-kmem.c +++ b/tools/perf/builtin-kmem.c @@ -1,6 +1,7 @@ #include "builtin.h" #include "perf.h" +#include "util/evsel.h" #include "util/util.h" #include "util/cache.h" #include "util/symbol.h" @@ -57,11 +58,6 @@ static unsigned long nr_allocs, nr_cross_allocs; #define PATH_SYS_NODE "/sys/devices/system/node" -struct perf_kmem { - struct perf_tool tool; - struct perf_session *session; -}; - static void init_cpunode_map(void) { FILE *fp; @@ -283,16 +279,10 @@ static void process_free_event(void *data, s_alloc->alloc_cpu = -1; } -static void process_raw_event(struct perf_tool *tool, - union perf_event *raw_event __used, void *data, +static void process_raw_event(struct perf_evsel *evsel, void *data, int cpu, u64 timestamp, struct thread *thread) { - struct perf_kmem *kmem = container_of(tool, struct perf_kmem, tool); - struct event_format *event; - int type; - - type = trace_parse_common_type(kmem->session->pevent, data); - event = pevent_find_event(kmem->session->pevent, type); + struct event_format *event = evsel->tp_format; if (!strcmp(event->name, "kmalloc") || !strcmp(event->name, "kmem_cache_alloc")) { @@ -313,10 +303,10 @@ static void process_raw_event(struct perf_tool *tool, } } -static int process_sample_event(struct perf_tool *tool, +static int process_sample_event(struct perf_tool *tool __used, union perf_event *event, struct perf_sample *sample, - struct perf_evsel *evsel __used, + struct perf_evsel *evsel, struct machine *machine) { struct thread *thread = machine__findnew_thread(machine, event->ip.pid); @@ -329,18 +319,16 @@ static int process_sample_event(struct perf_tool *tool, dump_printf(" ... thread: %s:%d\n", thread->comm, thread->pid); - process_raw_event(tool, event, sample->raw_data, sample->cpu, + process_raw_event(evsel, sample->raw_data, sample->cpu, sample->time, thread); return 0; } -static struct perf_kmem perf_kmem = { - .tool = { - .sample = process_sample_event, - .comm = perf_event__process_comm, - .ordered_samples = true, - }, +static struct perf_tool perf_kmem = { + .sample = process_sample_event, + .comm = perf_event__process_comm, + .ordered_samples = true, }; static double fragmentation(unsigned long n_req, unsigned long n_alloc) @@ -497,13 +485,10 @@ static int __cmd_kmem(void) int err = -EINVAL; struct perf_session *session; - session = perf_session__new(input_name, O_RDONLY, 0, false, - &perf_kmem.tool); + session = perf_session__new(input_name, O_RDONLY, 0, false, &perf_kmem); if (session == NULL) return -ENOMEM; - perf_kmem.session = session; - if (perf_session__create_kernel_maps(session) < 0) goto out_delete; @@ -511,7 +496,7 @@ static int __cmd_kmem(void) goto out_delete; setup_pager(); - err = perf_session__process_events(session, &perf_kmem.tool); + err = perf_session__process_events(session, &perf_kmem); if (err != 0) goto out_delete; sort_result(); diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c index b3c42854886..142b3033e4b 100644 --- a/tools/perf/builtin-lock.c +++ b/tools/perf/builtin-lock.c @@ -1,6 +1,7 @@ #include "builtin.h" #include "perf.h" +#include "util/evsel.h" #include "util/util.h" #include "util/cache.h" #include "util/symbol.h" @@ -718,14 +719,10 @@ process_lock_release_event(void *data, trace_handler->release_event(&release_event, event, cpu, timestamp, thread); } -static void -process_raw_event(void *data, int cpu, u64 timestamp, struct thread *thread) +static void process_raw_event(struct perf_evsel *evsel, void *data, int cpu, + u64 timestamp, struct thread *thread) { - struct event_format *event; - int type; - - type = trace_parse_common_type(session->pevent, data); - event = pevent_find_event(session->pevent, type); + struct event_format *event = evsel->tp_format; if (!strcmp(event->name, "lock_acquire")) process_lock_acquire_event(data, event, cpu, timestamp, thread); @@ -849,7 +846,7 @@ static void dump_info(void) static int process_sample_event(struct perf_tool *tool __used, union perf_event *event, struct perf_sample *sample, - struct perf_evsel *evsel __used, + struct perf_evsel *evsel, struct machine *machine) { struct thread *thread = machine__findnew_thread(machine, sample->tid); @@ -860,7 +857,7 @@ static int process_sample_event(struct perf_tool *tool __used, return -1; } - process_raw_event(sample->raw_data, sample->cpu, sample->time, thread); + process_raw_event(evsel, sample->raw_data, sample->cpu, sample->time, thread); return 0; } diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index 7a9ad2b1ee7..30ef82aca88 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -43,11 +43,6 @@ static u64 sleep_measurement_overhead; static unsigned long nr_tasks; -struct perf_sched { - struct perf_tool tool; - struct perf_session *session; -}; - struct sched_atom; struct task_desc { @@ -1596,14 +1591,12 @@ typedef void (*tracepoint_handler)(struct perf_tool *tool, struct event_format * struct machine *machine, struct thread *thread); -static int perf_sched__process_tracepoint_sample(struct perf_tool *tool, +static int perf_sched__process_tracepoint_sample(struct perf_tool *tool __used, union perf_event *event __used, struct perf_sample *sample, struct perf_evsel *evsel, struct machine *machine) { - struct perf_sched *sched = container_of(tool, struct perf_sched, tool); - struct pevent *pevent = sched->session->pevent; struct thread *thread = machine__findnew_thread(machine, sample->pid); if (thread == NULL) { @@ -1617,25 +1610,18 @@ static int perf_sched__process_tracepoint_sample(struct perf_tool *tool, if (evsel->handler.func != NULL) { tracepoint_handler f = evsel->handler.func; - - if (evsel->handler.data == NULL) - evsel->handler.data = pevent_find_event(pevent, - evsel->attr.config); - - f(tool, evsel->handler.data, sample, machine, thread); + f(tool, evsel->tp_format, sample, machine, thread); } return 0; } -static struct perf_sched perf_sched = { - .tool = { - .sample = perf_sched__process_tracepoint_sample, - .comm = perf_event__process_comm, - .lost = perf_event__process_lost, - .fork = perf_event__process_task, - .ordered_samples = true, - }, +static struct perf_tool perf_sched = { + .sample = perf_sched__process_tracepoint_sample, + .comm = perf_event__process_comm, + .lost = perf_event__process_lost, + .fork = perf_event__process_task, + .ordered_samples = true, }; static void read_events(bool destroy, struct perf_session **psession) @@ -1652,18 +1638,15 @@ static void read_events(bool destroy, struct perf_session **psession) }; struct perf_session *session; - session = perf_session__new(input_name, O_RDONLY, 0, false, - &perf_sched.tool); + session = perf_session__new(input_name, O_RDONLY, 0, false, &perf_sched); if (session == NULL) die("No Memory"); - perf_sched.session = session; - err = perf_session__set_tracepoints_handlers(session, handlers); assert(err == 0); if (perf_session__has_traces(session, "record -R")) { - err = perf_session__process_events(session, &perf_sched.tool); + err = perf_session__process_events(session, &perf_sched); if (err) die("Failed to process events, error %d", err); diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 1e60ab70b2b..8dba4707b03 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -262,14 +262,11 @@ static int perf_session__check_output_opt(struct perf_session *session) return 0; } -static void print_sample_start(struct pevent *pevent, - struct perf_sample *sample, +static void print_sample_start(struct perf_sample *sample, struct thread *thread, struct perf_evsel *evsel) { - int type; struct perf_event_attr *attr = &evsel->attr; - struct event_format *event; const char *evname = NULL; unsigned long secs; unsigned long usecs; @@ -307,20 +304,7 @@ static void print_sample_start(struct pevent *pevent, } if (PRINT_FIELD(EVNAME)) { - if (attr->type == PERF_TYPE_TRACEPOINT) { - /* - * XXX Do we really need this here? - * perf_evlist__set_tracepoint_names should have done - * this already - */ - type = trace_parse_common_type(pevent, - sample->raw_data); - event = pevent_find_event(pevent, type); - if (event) - evname = event->name; - } else - evname = perf_evsel__name(evsel); - + evname = perf_evsel__name(evsel); printf("%s: ", evname ? evname : "[unknown]"); } } @@ -416,7 +400,7 @@ static void print_sample_bts(union perf_event *event, } static void process_event(union perf_event *event __unused, - struct pevent *pevent, + struct pevent *pevent __unused, struct perf_sample *sample, struct perf_evsel *evsel, struct machine *machine, @@ -427,7 +411,7 @@ static void process_event(union perf_event *event __unused, if (output[attr->type].fields == 0) return; - print_sample_start(pevent, sample, thread, evsel); + print_sample_start(sample, thread, evsel); if (is_bts_event(attr)) { print_sample_bts(event, sample, evsel, machine, thread); @@ -435,9 +419,8 @@ static void process_event(union perf_event *event __unused, } if (PRINT_FIELD(TRACE)) - print_trace_event(pevent, sample->cpu, sample->raw_data, - sample->raw_size); - + event_format__print(evsel->tp_format, sample->cpu, + sample->raw_data, sample->raw_size); if (PRINT_FIELD(ADDR)) print_sample_addr(event, sample, machine, thread, attr); diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index b559929983b..a56c4574b3f 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -56,6 +56,7 @@ struct perf_evsel { int ids; struct hists hists; char *name; + struct event_format *tp_format; union { void *priv; off_t id_offset; diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index e2b4864bd11..b2da439bce7 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -2123,6 +2123,7 @@ static int perf_evsel__set_tracepoint_name(struct perf_evsel *evsel, if (event->name == NULL) return -1; + evsel->tp_format = event; return 0; } diff --git a/tools/perf/util/scripting-engines/trace-event-perl.c b/tools/perf/util/scripting-engines/trace-event-perl.c index 02dfa19a467..c2662811659 100644 --- a/tools/perf/util/scripting-engines/trace-event-perl.c +++ b/tools/perf/util/scripting-engines/trace-event-perl.c @@ -237,16 +237,16 @@ static void define_event_symbols(struct event_format *event, define_event_symbols(event, ev_name, args->next); } -static inline -struct event_format *find_cache_event(struct pevent *pevent, int type) +static inline struct event_format *find_cache_event(struct perf_evsel *evsel) { static char ev_name[256]; struct event_format *event; + int type = evsel->attr.config; if (events[type]) return events[type]; - events[type] = event = pevent_find_event(pevent, type); + events[type] = event = evsel->tp_format; if (!event) return NULL; @@ -269,7 +269,6 @@ static void perl_process_tracepoint(union perf_event *perf_event __unused, unsigned long long val; unsigned long s, ns; struct event_format *event; - int type; int pid; int cpu = sample->cpu; void *data = sample->raw_data; @@ -281,11 +280,9 @@ static void perl_process_tracepoint(union perf_event *perf_event __unused, if (evsel->attr.type != PERF_TYPE_TRACEPOINT) return; - type = trace_parse_common_type(pevent, data); - - event = find_cache_event(pevent, type); + event = find_cache_event(evsel); if (!event) - die("ug! no event found for type %d", type); + die("ug! no event found for type %d", evsel->attr.config); pid = trace_parse_common_pid(pevent, data); diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index ce4d1b0c386..8006978d839 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -27,6 +27,7 @@ #include #include "../../perf.h" +#include "../evsel.h" #include "../util.h" #include "../event.h" #include "../thread.h" @@ -194,16 +195,21 @@ static void define_event_symbols(struct event_format *event, define_event_symbols(event, ev_name, args->next); } -static inline -struct event_format *find_cache_event(struct pevent *pevent, int type) +static inline struct event_format *find_cache_event(struct perf_evsel *evsel) { static char ev_name[256]; struct event_format *event; + int type = evsel->attr.config; + /* + * XXX: Do we really need to cache this since now we have evsel->tp_format + * cached already? Need to re-read this "cache" routine that as well calls + * define_event_symbols() :-\ + */ if (events[type]) return events[type]; - events[type] = event = pevent_find_event(pevent, type); + events[type] = event = evsel->tp_format; if (!event) return NULL; @@ -217,7 +223,7 @@ struct event_format *find_cache_event(struct pevent *pevent, int type) static void python_process_event(union perf_event *perf_event __unused, struct pevent *pevent, struct perf_sample *sample, - struct perf_evsel *evsel __unused, + struct perf_evsel *evsel, struct machine *machine __unused, struct thread *thread) { @@ -228,7 +234,6 @@ static void python_process_event(union perf_event *perf_event __unused, unsigned long s, ns; struct event_format *event; unsigned n = 0; - int type; int pid; int cpu = sample->cpu; void *data = sample->raw_data; @@ -239,11 +244,9 @@ static void python_process_event(union perf_event *perf_event __unused, if (!t) Py_FatalError("couldn't create Python tuple"); - type = trace_parse_common_type(pevent, data); - - event = find_cache_event(pevent, type); + event = find_cache_event(evsel); if (!event) - die("ug! no event found for type %d", type); + die("ug! no event found for type %d", (int)evsel->attr.config); pid = trace_parse_common_pid(pevent, data); diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c index 0715c843c2e..12088348ac0 100644 --- a/tools/perf/util/trace-event-parse.c +++ b/tools/perf/util/trace-event-parse.c @@ -167,20 +167,11 @@ unsigned long long read_size(struct pevent *pevent, void *ptr, int size) return pevent_read_number(pevent, ptr, size); } -void print_trace_event(struct pevent *pevent, int cpu, void *data, int size) +void event_format__print(struct event_format *event, + int cpu, void *data, int size) { - struct event_format *event; struct pevent_record record; struct trace_seq s; - int type; - - type = trace_parse_common_type(pevent, data); - - event = pevent_find_event(pevent, type); - if (!event) { - warning("ug! no event found for type %d", type); - return; - } memset(&record, 0, sizeof(record)); record.cpu = cpu; @@ -192,6 +183,19 @@ void print_trace_event(struct pevent *pevent, int cpu, void *data, int size) trace_seq_do_printf(&s); } +void print_trace_event(struct pevent *pevent, int cpu, void *data, int size) +{ + int type = trace_parse_common_type(pevent, data); + struct event_format *event = pevent_find_event(pevent, type); + + if (!event) { + warning("ug! no event found for type %d", type); + return; + } + + event_format__print(event, cpu, data, size); +} + void print_event(struct pevent *pevent, int cpu, void *data, int size, unsigned long long nsecs, char *comm) { diff --git a/tools/perf/util/trace-event.h b/tools/perf/util/trace-event.h index 8fef1d6687b..069d105e0fc 100644 --- a/tools/perf/util/trace-event.h +++ b/tools/perf/util/trace-event.h @@ -32,6 +32,8 @@ int bigendian(void); struct pevent *read_trace_init(int file_bigendian, int host_bigendian); void print_trace_event(struct pevent *pevent, int cpu, void *data, int size); +void event_format__print(struct event_format *event, + int cpu, void *data, int size); void print_event(struct pevent *pevent, int cpu, void *data, int size, unsigned long long nsecs, char *comm); From 22ad798c37cb554afae79a72c1d420ecb4d27b86 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 7 Aug 2012 10:56:43 -0300 Subject: [PATCH 027/282] perf kmem: Use evsel->tp_format and perf_sample To reduce the number of parameters passed to the various event handling functions. Cc: Andrey Wagin Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-p936ngz06yo5h797ggsm7xru@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-kmem.c | 41 ++++++++++++++++----------------------- 1 file changed, 17 insertions(+), 24 deletions(-) diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c index ffb93f42495..fc6607b383f 100644 --- a/tools/perf/builtin-kmem.c +++ b/tools/perf/builtin-kmem.c @@ -192,16 +192,15 @@ static void insert_caller_stat(unsigned long call_site, } } -static void process_alloc_event(void *data, - struct event_format *event, - int cpu, - u64 timestamp __used, - struct thread *thread __used, - int node) +static void perf_evsel__process_alloc_event(struct perf_evsel *evsel, + struct perf_sample *sample, + int node) { + struct event_format *event = evsel->tp_format; + void *data = sample->raw_data; unsigned long call_site; unsigned long ptr; - int bytes_req; + int bytes_req, cpu = sample->cpu; int bytes_alloc; int node1, node2; @@ -253,22 +252,18 @@ static struct alloc_stat *search_alloc_stat(unsigned long ptr, return NULL; } -static void process_free_event(void *data, - struct event_format *event, - int cpu, - u64 timestamp __used, - struct thread *thread __used) +static void perf_evsel__process_free_event(struct perf_evsel *evsel, + struct perf_sample *sample) { - unsigned long ptr; + unsigned long ptr = raw_field_value(evsel->tp_format, "ptr", + sample->raw_data); struct alloc_stat *s_alloc, *s_caller; - ptr = raw_field_value(event, "ptr", data); - s_alloc = search_alloc_stat(ptr, 0, &root_alloc_stat, ptr_cmp); if (!s_alloc) return; - if (cpu != s_alloc->alloc_cpu) { + if ((short)sample->cpu != s_alloc->alloc_cpu) { s_alloc->pingpong++; s_caller = search_alloc_stat(0, s_alloc->call_site, @@ -279,26 +274,26 @@ static void process_free_event(void *data, s_alloc->alloc_cpu = -1; } -static void process_raw_event(struct perf_evsel *evsel, void *data, - int cpu, u64 timestamp, struct thread *thread) +static void perf_evsel__process_kmem_event(struct perf_evsel *evsel, + struct perf_sample *sample) { struct event_format *event = evsel->tp_format; if (!strcmp(event->name, "kmalloc") || !strcmp(event->name, "kmem_cache_alloc")) { - process_alloc_event(data, event, cpu, timestamp, thread, 0); + perf_evsel__process_alloc_event(evsel, sample, 0); return; } if (!strcmp(event->name, "kmalloc_node") || !strcmp(event->name, "kmem_cache_alloc_node")) { - process_alloc_event(data, event, cpu, timestamp, thread, 1); + perf_evsel__process_alloc_event(evsel, sample, 1); return; } if (!strcmp(event->name, "kfree") || !strcmp(event->name, "kmem_cache_free")) { - process_free_event(data, event, cpu, timestamp, thread); + perf_evsel__process_free_event(evsel, sample); return; } } @@ -319,9 +314,7 @@ static int process_sample_event(struct perf_tool *tool __used, dump_printf(" ... thread: %s:%d\n", thread->comm, thread->pid); - process_raw_event(evsel, sample->raw_data, sample->cpu, - sample->time, thread); - + perf_evsel__process_kmem_event(evsel, sample); return 0; } From 01d955244b99827814570ed4b675271ca7b8af02 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 7 Aug 2012 10:59:44 -0300 Subject: [PATCH 028/282] perf lock: Use evsel->tp_format and perf_sample To reduce the number of parameters passed to the various event handling functions. Cc: Andrey Wagin Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-bipk647rzq357yot9ao6ih73@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-lock.c | 116 ++++++++++++++------------------------ 1 file changed, 42 insertions(+), 74 deletions(-) diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c index 142b3033e4b..3f8b9550a6e 100644 --- a/tools/perf/builtin-lock.c +++ b/tools/perf/builtin-lock.c @@ -357,28 +357,16 @@ struct trace_release_event { struct trace_lock_handler { void (*acquire_event)(struct trace_acquire_event *, - struct event_format *, - int cpu, - u64 timestamp, - struct thread *thread); + const struct perf_sample *sample); void (*acquired_event)(struct trace_acquired_event *, - struct event_format *, - int cpu, - u64 timestamp, - struct thread *thread); + const struct perf_sample *sample); void (*contended_event)(struct trace_contended_event *, - struct event_format *, - int cpu, - u64 timestamp, - struct thread *thread); + const struct perf_sample *sample); void (*release_event)(struct trace_release_event *, - struct event_format *, - int cpu, - u64 timestamp, - struct thread *thread); + const struct perf_sample *sample); }; static struct lock_seq_stat *get_seq(struct thread_stat *ts, void *addr) @@ -417,10 +405,7 @@ enum acquire_flags { static void report_lock_acquire_event(struct trace_acquire_event *acquire_event, - struct event_format *__event __used, - int cpu __used, - u64 timestamp __used, - struct thread *thread __used) + const struct perf_sample *sample) { struct lock_stat *ls; struct thread_stat *ts; @@ -430,7 +415,7 @@ report_lock_acquire_event(struct trace_acquire_event *acquire_event, if (ls->discard) return; - ts = thread_stat_findnew(thread->pid); + ts = thread_stat_findnew(sample->tid); seq = get_seq(ts, acquire_event->addr); switch (seq->state) { @@ -474,18 +459,16 @@ broken: } ls->nr_acquire++; - seq->prev_event_time = timestamp; + seq->prev_event_time = sample->time; end: return; } static void report_lock_acquired_event(struct trace_acquired_event *acquired_event, - struct event_format *__event __used, - int cpu __used, - u64 timestamp __used, - struct thread *thread __used) + const struct perf_sample *sample) { + u64 timestamp = sample->time; struct lock_stat *ls; struct thread_stat *ts; struct lock_seq_stat *seq; @@ -495,7 +478,7 @@ report_lock_acquired_event(struct trace_acquired_event *acquired_event, if (ls->discard) return; - ts = thread_stat_findnew(thread->pid); + ts = thread_stat_findnew(sample->tid); seq = get_seq(ts, acquired_event->addr); switch (seq->state) { @@ -537,10 +520,7 @@ end: static void report_lock_contended_event(struct trace_contended_event *contended_event, - struct event_format *__event __used, - int cpu __used, - u64 timestamp __used, - struct thread *thread __used) + const struct perf_sample *sample) { struct lock_stat *ls; struct thread_stat *ts; @@ -550,7 +530,7 @@ report_lock_contended_event(struct trace_contended_event *contended_event, if (ls->discard) return; - ts = thread_stat_findnew(thread->pid); + ts = thread_stat_findnew(sample->tid); seq = get_seq(ts, contended_event->addr); switch (seq->state) { @@ -577,17 +557,14 @@ report_lock_contended_event(struct trace_contended_event *contended_event, seq->state = SEQ_STATE_CONTENDED; ls->nr_contended++; - seq->prev_event_time = timestamp; + seq->prev_event_time = sample->time; end: return; } static void report_lock_release_event(struct trace_release_event *release_event, - struct event_format *__event __used, - int cpu __used, - u64 timestamp __used, - struct thread *thread __used) + const struct perf_sample *sample) { struct lock_stat *ls; struct thread_stat *ts; @@ -597,7 +574,7 @@ report_lock_release_event(struct trace_release_event *release_event, if (ls->discard) return; - ts = thread_stat_findnew(thread->pid); + ts = thread_stat_findnew(sample->tid); seq = get_seq(ts, release_event->addr); switch (seq->state) { @@ -646,14 +623,12 @@ static struct trace_lock_handler report_lock_ops = { static struct trace_lock_handler *trace_handler; -static void -process_lock_acquire_event(void *data, - struct event_format *event __used, - int cpu __used, - u64 timestamp __used, - struct thread *thread __used) +static void perf_evsel__process_lock_acquire(struct perf_evsel *evsel, + struct perf_sample *sample) { struct trace_acquire_event acquire_event; + struct event_format *event = evsel->tp_format; + void *data = sample->raw_data; u64 tmp; /* this is required for casting... */ tmp = raw_field_value(event, "lockdep_addr", data); @@ -662,17 +637,15 @@ process_lock_acquire_event(void *data, acquire_event.flag = (int)raw_field_value(event, "flag", data); if (trace_handler->acquire_event) - trace_handler->acquire_event(&acquire_event, event, cpu, timestamp, thread); + trace_handler->acquire_event(&acquire_event, sample); } -static void -process_lock_acquired_event(void *data, - struct event_format *event __used, - int cpu __used, - u64 timestamp __used, - struct thread *thread __used) +static void perf_evsel__process_lock_acquired(struct perf_evsel *evsel, + struct perf_sample *sample) { struct trace_acquired_event acquired_event; + struct event_format *event = evsel->tp_format; + void *data = sample->raw_data; u64 tmp; /* this is required for casting... */ tmp = raw_field_value(event, "lockdep_addr", data); @@ -680,17 +653,15 @@ process_lock_acquired_event(void *data, acquired_event.name = (char *)raw_field_ptr(event, "name", data); if (trace_handler->acquire_event) - trace_handler->acquired_event(&acquired_event, event, cpu, timestamp, thread); + trace_handler->acquired_event(&acquired_event, sample); } -static void -process_lock_contended_event(void *data, - struct event_format *event __used, - int cpu __used, - u64 timestamp __used, - struct thread *thread __used) +static void perf_evsel__process_lock_contended(struct perf_evsel *evsel, + struct perf_sample *sample) { struct trace_contended_event contended_event; + struct event_format *event = evsel->tp_format; + void *data = sample->raw_data; u64 tmp; /* this is required for casting... */ tmp = raw_field_value(event, "lockdep_addr", data); @@ -698,17 +669,15 @@ process_lock_contended_event(void *data, contended_event.name = (char *)raw_field_ptr(event, "name", data); if (trace_handler->acquire_event) - trace_handler->contended_event(&contended_event, event, cpu, timestamp, thread); + trace_handler->contended_event(&contended_event, sample); } -static void -process_lock_release_event(void *data, - struct event_format *event __used, - int cpu __used, - u64 timestamp __used, - struct thread *thread __used) +static void perf_evsel__process_lock_release(struct perf_evsel *evsel, + struct perf_sample *sample) { struct trace_release_event release_event; + struct event_format *event = evsel->tp_format; + void *data = sample->raw_data; u64 tmp; /* this is required for casting... */ tmp = raw_field_value(event, "lockdep_addr", data); @@ -716,22 +685,22 @@ process_lock_release_event(void *data, release_event.name = (char *)raw_field_ptr(event, "name", data); if (trace_handler->acquire_event) - trace_handler->release_event(&release_event, event, cpu, timestamp, thread); + trace_handler->release_event(&release_event, sample); } -static void process_raw_event(struct perf_evsel *evsel, void *data, int cpu, - u64 timestamp, struct thread *thread) +static void perf_evsel__process_lock_event(struct perf_evsel *evsel, + struct perf_sample *sample) { struct event_format *event = evsel->tp_format; if (!strcmp(event->name, "lock_acquire")) - process_lock_acquire_event(data, event, cpu, timestamp, thread); + perf_evsel__process_lock_acquire(evsel, sample); if (!strcmp(event->name, "lock_acquired")) - process_lock_acquired_event(data, event, cpu, timestamp, thread); + perf_evsel__process_lock_acquired(evsel, sample); if (!strcmp(event->name, "lock_contended")) - process_lock_contended_event(data, event, cpu, timestamp, thread); + perf_evsel__process_lock_contended(evsel, sample); if (!strcmp(event->name, "lock_release")) - process_lock_release_event(data, event, cpu, timestamp, thread); + perf_evsel__process_lock_release(evsel, sample); } static void print_bad_events(int bad, int total) @@ -857,8 +826,7 @@ static int process_sample_event(struct perf_tool *tool __used, return -1; } - process_raw_event(evsel, sample->raw_data, sample->cpu, sample->time, thread); - + perf_evsel__process_lock_event(evsel, sample); return 0; } From 7f7f8d0bea5d6bb985f4ae84ca3daff34802fd32 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 7 Aug 2012 11:33:42 -0300 Subject: [PATCH 029/282] perf sched: Use perf_sample To reduce the number of parameters passed to the various event handling functions. Cc: Andrey Wagin Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-fc537qykjjqzvyol5fecx6ug@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-sched.c | 113 ++++++++++++------------------------- 1 file changed, 37 insertions(+), 76 deletions(-) diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index 30ef82aca88..a25a023965b 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -729,46 +729,30 @@ struct trace_sched_handler { void (*switch_event)(struct trace_switch_event *, struct machine *, struct event_format *, - int cpu, - u64 timestamp, - struct thread *thread); + struct perf_sample *sample); void (*runtime_event)(struct trace_runtime_event *, struct machine *, - struct event_format *, - int cpu, - u64 timestamp, - struct thread *thread); + struct perf_sample *sample); void (*wakeup_event)(struct trace_wakeup_event *, struct machine *, struct event_format *, - int cpu, - u64 timestamp, - struct thread *thread); + struct perf_sample *sample); void (*fork_event)(struct trace_fork_event *, - struct event_format *, - int cpu, - u64 timestamp, - struct thread *thread); + struct event_format *event); void (*migrate_task_event)(struct trace_migrate_task_event *, - struct machine *machine, - struct event_format *, - int cpu, - u64 timestamp, - struct thread *thread); + struct machine *machine, + struct perf_sample *sample); }; static void replay_wakeup_event(struct trace_wakeup_event *wakeup_event, struct machine *machine __used, - struct event_format *event, - int cpu __used, - u64 timestamp __used, - struct thread *thread __used) + struct event_format *event, struct perf_sample *sample) { struct task_desc *waker, *wakee; @@ -784,7 +768,7 @@ replay_wakeup_event(struct trace_wakeup_event *wakeup_event, waker = register_pid(wakeup_event->common_pid, ""); wakee = register_pid(wakeup_event->pid, wakeup_event->comm); - add_sched_event_wakeup(waker, timestamp, wakee); + add_sched_event_wakeup(waker, sample->time, wakee); } static u64 cpu_last_switched[MAX_CPUS]; @@ -793,12 +777,11 @@ static void replay_switch_event(struct trace_switch_event *switch_event, struct machine *machine __used, struct event_format *event, - int cpu, - u64 timestamp, - struct thread *thread __used) + struct perf_sample *sample) { struct task_desc *prev, __used *next; - u64 timestamp0; + u64 timestamp0, timestamp = sample->time; + int cpu = sample->cpu; s64 delta; if (verbose) @@ -835,10 +818,7 @@ replay_switch_event(struct trace_switch_event *switch_event, static void replay_fork_event(struct trace_fork_event *fork_event, - struct event_format *event, - int cpu __used, - u64 timestamp __used, - struct thread *thread __used) + struct event_format *event) { if (verbose) { printf("sched_fork event %p\n", event); @@ -944,10 +924,7 @@ static void thread_atoms_insert(struct thread *thread) static void latency_fork_event(struct trace_fork_event *fork_event __used, - struct event_format *event __used, - int cpu __used, - u64 timestamp __used, - struct thread *thread __used) + struct event_format *event __used) { /* should insert the newcomer */ } @@ -1027,13 +1004,12 @@ static void latency_switch_event(struct trace_switch_event *switch_event, struct machine *machine, struct event_format *event __used, - int cpu, - u64 timestamp, - struct thread *thread __used) + struct perf_sample *sample) { struct work_atoms *out_events, *in_events; struct thread *sched_out, *sched_in; - u64 timestamp0; + u64 timestamp0, timestamp = sample->time; + int cpu = sample->cpu; s64 delta; BUG_ON(cpu >= MAX_CPUS || cpu < 0); @@ -1078,14 +1054,12 @@ latency_switch_event(struct trace_switch_event *switch_event, static void latency_runtime_event(struct trace_runtime_event *runtime_event, - struct machine *machine, - struct event_format *event __used, - int cpu, - u64 timestamp, - struct thread *this_thread __used) + struct machine *machine, struct perf_sample *sample) { struct thread *thread = machine__findnew_thread(machine, runtime_event->pid); struct work_atoms *atoms = thread_atoms_search(&atom_root, thread, &cmp_pid); + u64 timestamp = sample->time; + int cpu = sample->cpu; BUG_ON(cpu >= MAX_CPUS || cpu < 0); if (!atoms) { @@ -1101,15 +1075,13 @@ latency_runtime_event(struct trace_runtime_event *runtime_event, static void latency_wakeup_event(struct trace_wakeup_event *wakeup_event, - struct machine *machine, - struct event_format *__event __used, - int cpu __used, - u64 timestamp, - struct thread *thread __used) + struct machine *machine, struct event_format *event __used, + struct perf_sample *sample) { struct work_atoms *atoms; struct work_atom *atom; struct thread *wakee; + u64 timestamp = sample->time; /* Note for later, it may be interesting to observe the failing cases */ if (!wakeup_event->success) @@ -1149,12 +1121,9 @@ latency_wakeup_event(struct trace_wakeup_event *wakeup_event, static void latency_migrate_task_event(struct trace_migrate_task_event *migrate_task_event, - struct machine *machine, - struct event_format *__event __used, - int cpu __used, - u64 timestamp, - struct thread *thread __used) + struct machine *machine, struct perf_sample *sample) { + u64 timestamp = sample->time; struct work_atoms *atoms; struct work_atom *atom; struct thread *migrant; @@ -1364,7 +1333,7 @@ process_sched_wakeup_event(struct perf_tool *tool __used, struct event_format *event, struct perf_sample *sample, struct machine *machine, - struct thread *thread) + struct thread *thread __used) { void *data = sample->raw_data; struct trace_wakeup_event wakeup_event; @@ -1378,8 +1347,7 @@ process_sched_wakeup_event(struct perf_tool *tool __used, FILL_FIELD(wakeup_event, cpu, event, data); if (trace_handler->wakeup_event) - trace_handler->wakeup_event(&wakeup_event, machine, event, - sample->cpu, sample->time, thread); + trace_handler->wakeup_event(&wakeup_event, machine, event, sample); } /* @@ -1399,15 +1367,13 @@ static void map_switch_event(struct trace_switch_event *switch_event, struct machine *machine, struct event_format *event __used, - int this_cpu, - u64 timestamp, - struct thread *thread __used) + struct perf_sample *sample) { struct thread *sched_out __used, *sched_in; int new_shortname; - u64 timestamp0; + u64 timestamp0, timestamp = sample->time; s64 delta; - int cpu; + int cpu, this_cpu = sample->cpu; BUG_ON(this_cpu >= MAX_CPUS || this_cpu < 0); @@ -1479,7 +1445,7 @@ process_sched_switch_event(struct perf_tool *tool __used, struct event_format *event, struct perf_sample *sample, struct machine *machine, - struct thread *thread) + struct thread *thread __used) { int this_cpu = sample->cpu; void *data = sample->raw_data; @@ -1504,8 +1470,7 @@ process_sched_switch_event(struct perf_tool *tool __used, nr_context_switch_bugs++; } if (trace_handler->switch_event) - trace_handler->switch_event(&switch_event, machine, event, - this_cpu, sample->time, thread); + trace_handler->switch_event(&switch_event, machine, event, sample); curr_pid[this_cpu] = switch_event.next_pid; } @@ -1515,7 +1480,7 @@ process_sched_runtime_event(struct perf_tool *tool __used, struct event_format *event, struct perf_sample *sample, struct machine *machine, - struct thread *thread) + struct thread *thread __used) { void *data = sample->raw_data; struct trace_runtime_event runtime_event; @@ -1526,8 +1491,7 @@ process_sched_runtime_event(struct perf_tool *tool __used, FILL_FIELD(runtime_event, vruntime, event, data); if (trace_handler->runtime_event) - trace_handler->runtime_event(&runtime_event, machine, event, - sample->cpu, sample->time, thread); + trace_handler->runtime_event(&runtime_event, machine, sample); } static void @@ -1535,7 +1499,7 @@ process_sched_fork_event(struct perf_tool *tool __used, struct event_format *event, struct perf_sample *sample, struct machine *machine __used, - struct thread *thread) + struct thread *thread __used) { void *data = sample->raw_data; struct trace_fork_event fork_event; @@ -1548,8 +1512,7 @@ process_sched_fork_event(struct perf_tool *tool __used, FILL_FIELD(fork_event, child_pid, event, data); if (trace_handler->fork_event) - trace_handler->fork_event(&fork_event, event, - sample->cpu, sample->time, thread); + trace_handler->fork_event(&fork_event, event); } static void @@ -1568,7 +1531,7 @@ process_sched_migrate_task_event(struct perf_tool *tool __used, struct event_format *event, struct perf_sample *sample, struct machine *machine, - struct thread *thread) + struct thread *thread __used) { void *data = sample->raw_data; struct trace_migrate_task_event migrate_task_event; @@ -1581,9 +1544,7 @@ process_sched_migrate_task_event(struct perf_tool *tool __used, FILL_FIELD(migrate_task_event, cpu, event, data); if (trace_handler->migrate_task_event) - trace_handler->migrate_task_event(&migrate_task_event, machine, - event, sample->cpu, - sample->time, thread); + trace_handler->migrate_task_event(&migrate_task_event, machine, sample); } typedef void (*tracepoint_handler)(struct perf_tool *tool, struct event_format *event, From 9782243353ec135327a80c76c63464e592949cd1 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 7 Aug 2012 23:50:21 -0300 Subject: [PATCH 030/282] perf script: Stop using pevent directly We can get all that is needed using just event_format, that is available via evsel->tp_format now. Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-2hsr1686epa9f0vx4yg7z2zj@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-script.c | 48 +++++++------------ .../util/scripting-engines/trace-event-perl.c | 14 +++--- .../scripting-engines/trace-event-python.c | 5 +- tools/perf/util/trace-event-parse.c | 4 +- tools/perf/util/trace-event-scripting.c | 1 - tools/perf/util/trace-event.h | 3 +- 6 files changed, 28 insertions(+), 47 deletions(-) diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 8dba4707b03..6425612b4d9 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -28,11 +28,6 @@ static bool system_wide; static const char *cpu_list; static DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS); -struct perf_script { - struct perf_tool tool; - struct perf_session *session; -}; - enum perf_output_field { PERF_OUTPUT_COMM = 1U << 0, PERF_OUTPUT_TID = 1U << 1, @@ -399,11 +394,8 @@ static void print_sample_bts(union perf_event *event, printf("\n"); } -static void process_event(union perf_event *event __unused, - struct pevent *pevent __unused, - struct perf_sample *sample, - struct perf_evsel *evsel, - struct machine *machine, +static void process_event(union perf_event *event, struct perf_sample *sample, + struct perf_evsel *evsel, struct machine *machine, struct thread *thread) { struct perf_event_attr *attr = &evsel->attr; @@ -488,7 +480,6 @@ static int process_sample_event(struct perf_tool *tool __used, struct machine *machine) { struct addr_location al; - struct perf_script *scr = container_of(tool, struct perf_script, tool); struct thread *thread = machine__findnew_thread(machine, event->ip.tid); if (thread == NULL) { @@ -520,27 +511,24 @@ static int process_sample_event(struct perf_tool *tool __used, if (cpu_list && !test_bit(sample->cpu, cpu_bitmap)) return 0; - scripting_ops->process_event(event, scr->session->pevent, - sample, evsel, machine, thread); + scripting_ops->process_event(event, sample, evsel, machine, thread); evsel->hists.stats.total_period += sample->period; return 0; } -static struct perf_script perf_script = { - .tool = { - .sample = process_sample_event, - .mmap = perf_event__process_mmap, - .comm = perf_event__process_comm, - .exit = perf_event__process_task, - .fork = perf_event__process_task, - .attr = perf_event__process_attr, - .event_type = perf_event__process_event_type, - .tracing_data = perf_event__process_tracing_data, - .build_id = perf_event__process_build_id, - .ordered_samples = true, - .ordering_requires_timestamps = true, - }, +static struct perf_tool perf_script = { + .sample = process_sample_event, + .mmap = perf_event__process_mmap, + .comm = perf_event__process_comm, + .exit = perf_event__process_task, + .fork = perf_event__process_task, + .attr = perf_event__process_attr, + .event_type = perf_event__process_event_type, + .tracing_data = perf_event__process_tracing_data, + .build_id = perf_event__process_build_id, + .ordered_samples = true, + .ordering_requires_timestamps = true, }; extern volatile int session_done; @@ -556,7 +544,7 @@ static int __cmd_script(struct perf_session *session) signal(SIGINT, sig_handler); - ret = perf_session__process_events(session, &perf_script.tool); + ret = perf_session__process_events(session, &perf_script); if (debug_mode) pr_err("Misordered timestamps: %" PRIu64 "\n", nr_unordered); @@ -1339,12 +1327,10 @@ int cmd_script(int argc, const char **argv, const char *prefix __used) setup_pager(); session = perf_session__new(input_name, O_RDONLY, 0, false, - &perf_script.tool); + &perf_script); if (session == NULL) return -ENOMEM; - perf_script.session = session; - if (cpu_list) { if (perf_session__cpu_bitmap(session, cpu_list, cpu_bitmap)) return -1; diff --git a/tools/perf/util/scripting-engines/trace-event-perl.c b/tools/perf/util/scripting-engines/trace-event-perl.c index c2662811659..52580d09d75 100644 --- a/tools/perf/util/scripting-engines/trace-event-perl.c +++ b/tools/perf/util/scripting-engines/trace-event-perl.c @@ -258,7 +258,6 @@ static inline struct event_format *find_cache_event(struct perf_evsel *evsel) } static void perl_process_tracepoint(union perf_event *perf_event __unused, - struct pevent *pevent, struct perf_sample *sample, struct perf_evsel *evsel, struct machine *machine __unused, @@ -284,7 +283,7 @@ static void perl_process_tracepoint(union perf_event *perf_event __unused, if (!event) die("ug! no event found for type %d", evsel->attr.config); - pid = trace_parse_common_pid(pevent, data); + pid = raw_field_value(event, "common_pid", data); sprintf(handler, "%s::%s", event->system, event->name); @@ -317,7 +316,7 @@ static void perl_process_tracepoint(union perf_event *perf_event __unused, offset = field->offset; XPUSHs(sv_2mortal(newSVpv((char *)data + offset, 0))); } else { /* FIELD_IS_NUMERIC */ - val = read_size(pevent, data + field->offset, + val = read_size(event, data + field->offset, field->size); if (field->flags & FIELD_IS_SIGNED) { XPUSHs(sv_2mortal(newSViv(val))); @@ -346,9 +345,9 @@ static void perl_process_tracepoint(union perf_event *perf_event __unused, LEAVE; } -static void perl_process_event_generic(union perf_event *pevent __unused, +static void perl_process_event_generic(union perf_event *event, struct perf_sample *sample, - struct perf_evsel *evsel __unused, + struct perf_evsel *evsel, struct machine *machine __unused, struct thread *thread __unused) { @@ -360,7 +359,7 @@ static void perl_process_event_generic(union perf_event *pevent __unused, ENTER; SAVETMPS; PUSHMARK(SP); - XPUSHs(sv_2mortal(newSVpvn((const char *)pevent, pevent->header.size))); + XPUSHs(sv_2mortal(newSVpvn((const char *)event, event->header.size))); XPUSHs(sv_2mortal(newSVpvn((const char *)&evsel->attr, sizeof(evsel->attr)))); XPUSHs(sv_2mortal(newSVpvn((const char *)sample, sizeof(*sample)))); XPUSHs(sv_2mortal(newSVpvn((const char *)sample->raw_data, sample->raw_size))); @@ -373,13 +372,12 @@ static void perl_process_event_generic(union perf_event *pevent __unused, } static void perl_process_event(union perf_event *event, - struct pevent *pevent, struct perf_sample *sample, struct perf_evsel *evsel, struct machine *machine, struct thread *thread) { - perl_process_tracepoint(event, pevent, sample, evsel, machine, thread); + perl_process_tracepoint(event, sample, evsel, machine, thread); perl_process_event_generic(event, sample, evsel, machine, thread); } diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index 8006978d839..df7d33d1de0 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -221,7 +221,6 @@ static inline struct event_format *find_cache_event(struct perf_evsel *evsel) } static void python_process_event(union perf_event *perf_event __unused, - struct pevent *pevent, struct perf_sample *sample, struct perf_evsel *evsel, struct machine *machine __unused, @@ -248,7 +247,7 @@ static void python_process_event(union perf_event *perf_event __unused, if (!event) die("ug! no event found for type %d", (int)evsel->attr.config); - pid = trace_parse_common_pid(pevent, data); + pid = raw_field_value(event, "common_pid", data); sprintf(handler_name, "%s__%s", event->system, event->name); @@ -293,7 +292,7 @@ static void python_process_event(union perf_event *perf_event __unused, offset = field->offset; obj = PyString_FromString((char *)data + offset); } else { /* FIELD_IS_NUMERIC */ - val = read_size(pevent, data + field->offset, + val = read_size(event, data + field->offset, field->size); if (field->flags & FIELD_IS_SIGNED) { if ((long long)val >= LONG_MIN && diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c index 12088348ac0..4cb7f3831f7 100644 --- a/tools/perf/util/trace-event-parse.c +++ b/tools/perf/util/trace-event-parse.c @@ -162,9 +162,9 @@ int trace_parse_common_pid(struct pevent *pevent, void *data) return pevent_data_pid(pevent, &record); } -unsigned long long read_size(struct pevent *pevent, void *ptr, int size) +unsigned long long read_size(struct event_format *event, void *ptr, int size) { - return pevent_read_number(pevent, ptr, size); + return pevent_read_number(event->pevent, ptr, size); } void event_format__print(struct event_format *event, diff --git a/tools/perf/util/trace-event-scripting.c b/tools/perf/util/trace-event-scripting.c index 474aa7a7df4..aceb8eea15f 100644 --- a/tools/perf/util/trace-event-scripting.c +++ b/tools/perf/util/trace-event-scripting.c @@ -36,7 +36,6 @@ static int stop_script_unsupported(void) } static void process_event_unsupported(union perf_event *event __unused, - struct pevent *pevent __unused, struct perf_sample *sample __unused, struct perf_evsel *evsel __unused, struct machine *machine __unused, diff --git a/tools/perf/util/trace-event.h b/tools/perf/util/trace-event.h index 069d105e0fc..cee16350cd8 100644 --- a/tools/perf/util/trace-event.h +++ b/tools/perf/util/trace-event.h @@ -58,7 +58,7 @@ int trace_parse_common_pid(struct pevent *pevent, void *data); struct event_format *trace_find_next_event(struct pevent *pevent, struct event_format *event); -unsigned long long read_size(struct pevent *pevent, void *ptr, int size); +unsigned long long read_size(struct event_format *event, void *ptr, int size); unsigned long long eval_flag(const char *flag); struct pevent_record *trace_read_data(struct pevent *pevent, int cpu); @@ -81,7 +81,6 @@ struct scripting_ops { int (*start_script) (const char *script, int argc, const char **argv); int (*stop_script) (void); void (*process_event) (union perf_event *event, - struct pevent *pevent, struct perf_sample *sample, struct perf_evsel *evsel, struct machine *machine, From e6b978335424029d05edc01374744516b21b1ede Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Tue, 7 Aug 2012 19:43:11 +0200 Subject: [PATCH 031/282] perf tools: Fix version file for perf documentation with OUTPUT variable set Fixes the following: + make OUTPUT=/.../.build/perf-user/ DESTDIR=/.../.install/perf-user/ man install-man make -C Documentation man make[1]: Entering directory `/.../.source/linux.perf/tools/perf/Documentation' make[2]: Entering directory `/.../.source/linux.perf/tools/perf' make[2]: *** No rule to make target `PERF-VERSION-FILE'. Stop. Signed-off-by: Robert Richter Cc: Ingo Molnar Link: http://lkml.kernel.org/r/1344361396-7237-2-git-send-email-robert.richter@amd.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/Makefile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/perf/Documentation/Makefile b/tools/perf/Documentation/Makefile index ca600e09c8d..9f2e44f2b17 100644 --- a/tools/perf/Documentation/Makefile +++ b/tools/perf/Documentation/Makefile @@ -195,10 +195,10 @@ install-pdf: pdf #install-html: html # '$(SHELL_PATH_SQ)' ./install-webdoc.sh $(DESTDIR)$(htmldir) -../PERF-VERSION-FILE: .FORCE-PERF-VERSION-FILE - $(QUIET_SUBDIR0)../ $(QUIET_SUBDIR1) PERF-VERSION-FILE +$(OUTPUT)PERF-VERSION-FILE: .FORCE-PERF-VERSION-FILE + $(QUIET_SUBDIR0)../ $(QUIET_SUBDIR1) $(OUTPUT)PERF-VERSION-FILE --include ../PERF-VERSION-FILE +-include $(OUTPUT)PERF-VERSION-FILE # # Determine "include::" file references in asciidoc files. From 2ede8303db75ead3250f95c3390e6ba200cbe7d4 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Tue, 7 Aug 2012 19:43:12 +0200 Subject: [PATCH 032/282] perf tools: Fix lib/traceevent build dir with OUTPUT variable set With the OUTPUT variable set the libtraceevent.a file is wrongly built in the source directory: + make -d OUTPUT=/.../.build/perf-user/ DESTDIR=/.../.install/perf-user/ ... Considering target file `../lib/traceevent//libtraceevent.a'. File `../lib/traceevent//libtraceevent.a' does not exist. Finished prerequisites of target file `../lib/traceevent//libtraceevent.a'. Must remake target `../lib/traceevent//libtraceevent.a'. Invoking recipe from Makefile:837 to update target `../lib/traceevent//libtraceevent.a'. Putting child 0x703850 (../lib/traceevent//libtraceevent.a) PID 8365 on the chain. Live child 0x703850 (../lib/traceevent//libtraceevent.a) PID 8365 SUBDIR ../lib/traceevent/ $ git clean -nxd Would remove tools/lib/traceevent/.event-parse.d Would remove tools/lib/traceevent/.parse-filter.d Would remove tools/lib/traceevent/.parse-utils.d Would remove tools/lib/traceevent/.trace-seq.d Would remove tools/lib/traceevent/event-parse.o Would remove tools/lib/traceevent/libtraceevent.a Would remove tools/lib/traceevent/parse-filter.o Would remove tools/lib/traceevent/parse-utils.o Would remove tools/lib/traceevent/trace-seq.o This patch fixes this. Note: Though this should already work with O=$outputdir we better use the OUTPUT variable directly. Signed-off-by: Robert Richter Cc: Ingo Molnar Link: http://lkml.kernel.org/r/1344361396-7237-3-git-send-email-robert.richter@amd.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 35655c3a7b7..2d4bf6ef1c5 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -186,10 +186,10 @@ SCRIPTS = $(patsubst %.sh,%,$(SCRIPT_SH)) TRACE_EVENT_DIR = ../lib/traceevent/ -ifeq ("$(origin O)", "command line") - TE_PATH=$(OUTPUT)/ +ifneq ($(OUTPUT),) + TE_PATH=$(OUTPUT) else - TE_PATH=$(TRACE_EVENT_DIR)/ + TE_PATH=$(TRACE_EVENT_DIR) endif LIBTRACEEVENT = $(TE_PATH)libtraceevent.a @@ -842,7 +842,7 @@ $(LIB_FILE): $(LIB_OBJS) # libtraceevent.a $(LIBTRACEEVENT): - $(QUIET_SUBDIR0)$(TRACE_EVENT_DIR) $(QUIET_SUBDIR1) $(COMMAND_O) libtraceevent.a + $(QUIET_SUBDIR0)$(TRACE_EVENT_DIR) $(QUIET_SUBDIR1) O=$(OUTPUT) libtraceevent.a help: @echo 'Perf make targets:' From b527bab59be7ca2154f644fcc4b9e3c267a6d855 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Tue, 7 Aug 2012 19:43:13 +0200 Subject: [PATCH 033/282] perf tools: Fix parsing of 64 bit raw config value for 32 bit perf record fails on 32 bit with: invalid or unsupported event: 'r40000F7E0' Fixing this by parsing 64 bit num values. Signed-off-by: Robert Richter Cc: Ingo Molnar Link: http://lkml.kernel.org/r/1344361396-7237-4-git-send-email-robert.richter@amd.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/parse-events.c | 6 +++--- tools/perf/util/parse-events.h | 6 +++--- tools/perf/util/parse-events.l | 4 ++-- tools/perf/util/parse-events.y | 10 +++++----- 4 files changed, 13 insertions(+), 13 deletions(-) diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 74a5af4d33e..8bdfa3e5c8c 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -551,7 +551,7 @@ static int config_attr(struct perf_event_attr *attr, } int parse_events_add_numeric(struct list_head **list, int *idx, - unsigned long type, unsigned long config, + u32 type, u64 config, struct list_head *head_config) { struct perf_event_attr attr; @@ -1005,7 +1005,7 @@ int parse_events__is_hardcoded_term(struct parse_events__term *term) static int new_term(struct parse_events__term **_term, int type_val, int type_term, char *config, - char *str, long num) + char *str, u64 num) { struct parse_events__term *term; @@ -1034,7 +1034,7 @@ static int new_term(struct parse_events__term **_term, int type_val, } int parse_events__term_num(struct parse_events__term **term, - int type_term, char *config, long num) + int type_term, char *config, u64 num) { return new_term(term, PARSE_EVENTS__TERM_TYPE_NUM, type_term, config, NULL, num); diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h index ee9c218a193..163aad40461 100644 --- a/tools/perf/util/parse-events.h +++ b/tools/perf/util/parse-events.h @@ -55,7 +55,7 @@ struct parse_events__term { char *config; union { char *str; - long num; + u64 num; } val; int type_val; int type_term; @@ -73,7 +73,7 @@ struct parse_events_data__terms { int parse_events__is_hardcoded_term(struct parse_events__term *term); int parse_events__term_num(struct parse_events__term **_term, - int type_term, char *config, long num); + int type_term, char *config, u64 num); int parse_events__term_str(struct parse_events__term **_term, int type_term, char *config, char *str); int parse_events__term_clone(struct parse_events__term **new, @@ -83,7 +83,7 @@ int parse_events_modifier(struct list_head *list, char *str); int parse_events_add_tracepoint(struct list_head **list, int *idx, char *sys, char *event); int parse_events_add_numeric(struct list_head **list, int *idx, - unsigned long type, unsigned long config, + u32 type, u64 config, struct list_head *head_config); int parse_events_add_cache(struct list_head **list, int *idx, char *type, char *op_result1, char *op_result2); diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l index 384ca74c6b2..e4abdf25d49 100644 --- a/tools/perf/util/parse-events.l +++ b/tools/perf/util/parse-events.l @@ -15,10 +15,10 @@ YYSTYPE *parse_events_get_lval(yyscan_t yyscanner); static int __value(YYSTYPE *yylval, char *str, int base, int token) { - long num; + u64 num; errno = 0; - num = strtoul(str, NULL, base); + num = strtoull(str, NULL, base); if (errno) return PE_ERROR; diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y index 2bc5fbff2b5..423d3315146 100644 --- a/tools/perf/util/parse-events.y +++ b/tools/perf/util/parse-events.y @@ -57,7 +57,7 @@ do { \ %union { char *str; - unsigned long num; + u64 num; struct list_head *head; struct parse_events__term *term; } @@ -207,7 +207,7 @@ PE_VALUE ':' PE_VALUE struct parse_events_data__events *data = _data; struct list_head *list = NULL; - ABORT_ON(parse_events_add_numeric(&list, &data->idx, $1, $3, NULL)); + ABORT_ON(parse_events_add_numeric(&list, &data->idx, (u32)$1, $3, NULL)); $$ = list; } @@ -282,7 +282,7 @@ PE_TERM '=' PE_NAME { struct parse_events__term *term; - ABORT_ON(parse_events__term_str(&term, $1, NULL, $3)); + ABORT_ON(parse_events__term_str(&term, (int)$1, NULL, $3)); $$ = term; } | @@ -290,7 +290,7 @@ PE_TERM '=' PE_VALUE { struct parse_events__term *term; - ABORT_ON(parse_events__term_num(&term, $1, NULL, $3)); + ABORT_ON(parse_events__term_num(&term, (int)$1, NULL, $3)); $$ = term; } | @@ -298,7 +298,7 @@ PE_TERM { struct parse_events__term *term; - ABORT_ON(parse_events__term_num(&term, $1, NULL, 1)); + ABORT_ON(parse_events__term_num(&term, (int)$1, NULL, 1)); $$ = term; } From 0cf260131c52f681533d17db6fd07545a3dc184e Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Tue, 7 Aug 2012 19:43:14 +0200 Subject: [PATCH 034/282] tools lib traceevent: Fix cast from pointer to integer for 32 bit Fixing the integer cast reported by the following warning: tools/lib/traceevent/event-parse.c:3488:14: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] Signed-off-by: Robert Richter Cc: Ingo Molnar Link: http://lkml.kernel.org/r/1344361396-7237-5-git-send-email-robert.richter@amd.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/traceevent/event-parse.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c index 5f34aa371b5..b7c2c491f61 100644 --- a/tools/lib/traceevent/event-parse.c +++ b/tools/lib/traceevent/event-parse.c @@ -31,6 +31,7 @@ #include #include #include +#include #include "event-parse.h" #include "event-utils.h" @@ -3485,7 +3486,7 @@ process_defined_func(struct trace_seq *s, void *data, int size, if (!string->str) die("malloc str"); - args[i] = (unsigned long long)string->str; + args[i] = (uintptr_t)string->str; strings = string; trace_seq_destroy(&str); break; From 75bc5ca89827fe3f2399321b2920a30bcf658049 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Tue, 7 Aug 2012 19:43:15 +0200 Subject: [PATCH 035/282] perf list: Update documentation about raw event setup It was missing that only certain bit fields are passed to the config value which confused users. Updating it. Signed-off-by: Robert Richter Cc: Ingo Molnar Link: http://lkml.kernel.org/r/1344361396-7237-6-git-send-email-robert.richter@amd.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-list.txt | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tools/perf/Documentation/perf-list.txt b/tools/perf/Documentation/perf-list.txt index ddc22525228..232be519580 100644 --- a/tools/perf/Documentation/perf-list.txt +++ b/tools/perf/Documentation/perf-list.txt @@ -15,6 +15,7 @@ DESCRIPTION This command displays the symbolic event types which can be selected in the various perf commands with the -e option. +[[EVENT_MODIFIERS]] EVENT MODIFIERS --------------- @@ -44,6 +45,11 @@ layout of IA32_PERFEVTSELx MSRs (see [Intel® 64 and IA-32 Architectures Softwar of IA32_PERFEVTSELx MSRs) or AMD's PerfEvtSeln (see [AMD64 Architecture Programmer’s Manual Volume 2: System Programming], Page 344, Figure 13-7 Performance Event-Select Register (PerfEvtSeln)). +Note: Only the following bit fields can be set in x86 counter +registers: event, umask, edge, inv, cmask. Esp. guest/host only and +OS/user mode flags must be setup using <>. + Example: If the Intel docs for a QM720 Core i7 describe an event as: From 2055fdaf8703d3101b12e0d9b7cbceaeabe35c17 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Tue, 7 Aug 2012 19:43:16 +0200 Subject: [PATCH 036/282] perf list: Document precise event sampling for AMD IBS Updating man perf-list. Signed-off-by: Robert Richter Cc: Ingo Molnar Link: http://lkml.kernel.org/r/1344361396-7237-7-git-send-email-robert.richter@amd.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-list.txt | 42 ++++++++++++++++++-------- 1 file changed, 30 insertions(+), 12 deletions(-) diff --git a/tools/perf/Documentation/perf-list.txt b/tools/perf/Documentation/perf-list.txt index 232be519580..d1e39dc8c81 100644 --- a/tools/perf/Documentation/perf-list.txt +++ b/tools/perf/Documentation/perf-list.txt @@ -20,20 +20,38 @@ EVENT MODIFIERS --------------- Events can optionally have a modifer by appending a colon and one or -more modifiers. Modifiers allow the user to restrict when events are -counted with 'u' for user-space, 'k' for kernel, 'h' for hypervisor. -Additional modifiers are 'G' for guest counting (in KVM guests) and 'H' -for host counting (not in KVM guests). +more modifiers. Modifiers allow the user to restrict the events to be +counted. The following modifiers exist: + + u - user-space counting + k - kernel counting + h - hypervisor counting + G - guest counting (in KVM guests) + H - host counting (not in KVM guests) + p - precise level The 'p' modifier can be used for specifying how precise the instruction -address should be. The 'p' modifier is currently only implemented for -Intel PEBS and can be specified multiple times: - 0 - SAMPLE_IP can have arbitrary skid - 1 - SAMPLE_IP must have constant skid - 2 - SAMPLE_IP requested to have 0 skid - 3 - SAMPLE_IP must have 0 skid +address should be. The 'p' modifier can be specified multiple times: -The PEBS implementation now supports up to 2. + 0 - SAMPLE_IP can have arbitrary skid + 1 - SAMPLE_IP must have constant skid + 2 - SAMPLE_IP requested to have 0 skid + 3 - SAMPLE_IP must have 0 skid + +For Intel systems precise event sampling is implemented with PEBS +which supports up to precise-level 2. + +On AMD systems it is implemented using IBS (up to precise-level 2). +The precise modifier works with event types 0x76 (cpu-cycles, CPU +clocks not halted) and 0xC1 (micro-ops retired). Both events map to +IBS execution sampling (IBS op) with the IBS Op Counter Control bit +(IbsOpCntCtl) set respectively (see AMD64 Architecture Programmer’s +Manual Volume 2: System Programming, 13.3 Instruction-Based +Sampling). Examples to use IBS: + + perf record -a -e cpu-cycles:p ... # use ibs op counting cycles + perf record -a -e r076:p ... # same as -e cpu-cycles:p + perf record -a -e r0C1:p ... # use ibs op counting micro-ops RAW HARDWARE EVENT DESCRIPTOR ----------------------------- @@ -97,4 +115,4 @@ SEE ALSO linkperf:perf-stat[1], linkperf:perf-top[1], linkperf:perf-record[1], http://www.intel.com/Assets/PDF/manual/253669.pdf[Intel® 64 and IA-32 Architectures Software Developer's Manual Volume 3B: System Programming Guide], -http://support.amd.com/us/Processor_TechDocs/24593.pdf[AMD64 Architecture Programmer’s Manual Volume 2: System Programming] +http://support.amd.com/us/Processor_TechDocs/24593_APM_v2.pdf[AMD64 Architecture Programmer’s Manual Volume 2: System Programming] From 6a6daec2ae9f097703c1da4925367f1c198c9492 Mon Sep 17 00:00:00 2001 From: Feng Tang Date: Wed, 8 Aug 2012 17:57:51 +0800 Subject: [PATCH 037/282] perf script: Add general python handler to process non-tracepoint events This patch just follows Robert Richter's idea and the commit 37a058ea0 "perf script: Add generic perl handler to process events" to similarly add a python handler for general events other than tracepoints. For non-tracepoint events, this patch will try to find a function named "process_event" in the python script, and pass the event attribute, perf_sample, raw_data in format of raw string. And the python script can use "struct" module's unpack function to disasemble the needed info and process. Signed-off-by: Feng Tang Cc: Andi Kleen Cc: David Ahern Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Robert Richter Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1344419875-21665-2-git-send-email-feng.tang@intel.com [ committer note: Fixed up wrt da37896, i.e. pevent parm in script event handlers ] Signed-off-by: Arnaldo Carvalho de Melo --- .../scripting-engines/trace-event-python.c | 59 ++++++++++++++++++- 1 file changed, 58 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index df7d33d1de0..b9010d878b4 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -32,6 +32,7 @@ #include "../event.h" #include "../thread.h" #include "../trace-event.h" +#include "../evsel.h" PyMODINIT_FUNC initperf_trace_context(void); @@ -220,7 +221,7 @@ static inline struct event_format *find_cache_event(struct perf_evsel *evsel) return event; } -static void python_process_event(union perf_event *perf_event __unused, +static void python_process_tracepoint(union perf_event *perf_event __unused, struct perf_sample *sample, struct perf_evsel *evsel, struct machine *machine __unused, @@ -337,6 +338,62 @@ static void python_process_event(union perf_event *perf_event __unused, Py_DECREF(t); } +static void python_process_general_event(union perf_event *perf_event __unused, + struct perf_sample *sample, + struct perf_evsel *evsel, + struct machine *machine __unused, + struct thread *thread __unused) +{ + PyObject *handler, *retval, *t; + static char handler_name[64]; + unsigned n = 0; + void *data = sample->raw_data; + + t = PyTuple_New(MAX_FIELDS); + if (!t) + Py_FatalError("couldn't create Python tuple"); + + snprintf(handler_name, sizeof(handler_name), "%s", "process_event"); + + handler = PyDict_GetItemString(main_dict, handler_name); + if (handler && !PyCallable_Check(handler)) { + handler = NULL; + goto exit; + } + + /* Pass 3 parameters: event_attr, perf_sample, raw data */ + PyTuple_SetItem(t, n++, PyString_FromStringAndSize((void *)&evsel->attr, sizeof(evsel->attr))); + PyTuple_SetItem(t, n++, PyString_FromStringAndSize((void *)sample, sizeof(*sample))); + PyTuple_SetItem(t, n++, PyString_FromStringAndSize(data, sample->raw_size)); + + if (_PyTuple_Resize(&t, n) == -1) + Py_FatalError("error resizing Python tuple"); + + retval = PyObject_CallObject(handler, t); + if (retval == NULL) + handler_call_die(handler_name); +exit: + Py_DECREF(t); +} + +static void python_process_event(union perf_event *perf_event, + struct perf_sample *sample, + struct perf_evsel *evsel, + struct machine *machine, + struct thread *thread) +{ + switch (evsel->attr.type) { + case PERF_TYPE_TRACEPOINT: + python_process_tracepoint(perf_event, sample, evsel, + machine, thread); + break; + /* Reserve for future process_hw/sw/raw APIs */ + default: + python_process_general_event(perf_event, sample, evsel, + machine, thread); + } +} + static int run_start_sub(void) { PyObject *handler, *retval; From 73994dc158a24df4af77d0a76c9702f120f7a6ad Mon Sep 17 00:00:00 2001 From: Feng Tang Date: Wed, 8 Aug 2012 17:57:52 +0800 Subject: [PATCH 038/282] perf script: Replace "struct thread" with "struct addr_location" as a parameter for "process_event()" Both perl and python script start processing events other than trace points, and it's useful to pass the resolved symbol and the dso info to the event handler in script for better analysis and statistics. Struct thread is already a member of struct addr_location, using addr_location will keep the thread info, while providing additional symbol and dso info if exist, so that the script itself doesn't need to bother to do the symbol resolving and dso searching work. Tested-by: David Ahern Signed-off-by: Feng Tang Acked-by: David Ahern Cc: Andi Kleen Cc: David Ahern Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Robert Richter Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1344419875-21665-3-git-send-email-feng.tang@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-script.c | 5 +++-- .../perf/util/scripting-engines/trace-event-perl.c | 11 ++++++----- .../util/scripting-engines/trace-event-python.c | 13 +++++++------ tools/perf/util/trace-event-scripting.c | 2 +- tools/perf/util/trace-event.h | 5 +++-- 5 files changed, 20 insertions(+), 16 deletions(-) diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 6425612b4d9..30a9cb8c992 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -396,9 +396,10 @@ static void print_sample_bts(union perf_event *event, static void process_event(union perf_event *event, struct perf_sample *sample, struct perf_evsel *evsel, struct machine *machine, - struct thread *thread) + struct addr_location *al) { struct perf_event_attr *attr = &evsel->attr; + struct thread *thread = al->thread; if (output[attr->type].fields == 0) return; @@ -511,7 +512,7 @@ static int process_sample_event(struct perf_tool *tool __used, if (cpu_list && !test_bit(sample->cpu, cpu_bitmap)) return 0; - scripting_ops->process_event(event, sample, evsel, machine, thread); + scripting_ops->process_event(event, sample, evsel, machine, &al); evsel->hists.stats.total_period += sample->period; return 0; diff --git a/tools/perf/util/scripting-engines/trace-event-perl.c b/tools/perf/util/scripting-engines/trace-event-perl.c index 52580d09d75..d28001016fb 100644 --- a/tools/perf/util/scripting-engines/trace-event-perl.c +++ b/tools/perf/util/scripting-engines/trace-event-perl.c @@ -261,7 +261,7 @@ static void perl_process_tracepoint(union perf_event *perf_event __unused, struct perf_sample *sample, struct perf_evsel *evsel, struct machine *machine __unused, - struct thread *thread) + struct addr_location *al) { struct format_field *field; static char handler[256]; @@ -272,6 +272,7 @@ static void perl_process_tracepoint(union perf_event *perf_event __unused, int cpu = sample->cpu; void *data = sample->raw_data; unsigned long long nsecs = sample->time; + struct thread *thread = al->thread; char *comm = thread->comm; dSP; @@ -349,7 +350,7 @@ static void perl_process_event_generic(union perf_event *event, struct perf_sample *sample, struct perf_evsel *evsel, struct machine *machine __unused, - struct thread *thread __unused) + struct addr_location *al __unused) { dSP; @@ -375,10 +376,10 @@ static void perl_process_event(union perf_event *event, struct perf_sample *sample, struct perf_evsel *evsel, struct machine *machine, - struct thread *thread) + struct addr_location *al) { - perl_process_tracepoint(event, sample, evsel, machine, thread); - perl_process_event_generic(event, sample, evsel, machine, thread); + perl_process_tracepoint(event, sample, evsel, machine, al); + perl_process_event_generic(event, sample, evsel, machine, al); } static void run_start_sub(void) diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index b9010d878b4..24711b3536d 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -225,7 +225,7 @@ static void python_process_tracepoint(union perf_event *perf_event __unused, struct perf_sample *sample, struct perf_evsel *evsel, struct machine *machine __unused, - struct thread *thread) + struct addr_location *al) { PyObject *handler, *retval, *context, *t, *obj, *dict = NULL; static char handler_name[256]; @@ -238,6 +238,7 @@ static void python_process_tracepoint(union perf_event *perf_event __unused, int cpu = sample->cpu; void *data = sample->raw_data; unsigned long long nsecs = sample->time; + struct thread *thread = al->thread; char *comm = thread->comm; t = PyTuple_New(MAX_FIELDS); @@ -342,7 +343,7 @@ static void python_process_general_event(union perf_event *perf_event __unused, struct perf_sample *sample, struct perf_evsel *evsel, struct machine *machine __unused, - struct thread *thread __unused) + struct addr_location *al __unused) { PyObject *handler, *retval, *t; static char handler_name[64]; @@ -361,7 +362,7 @@ static void python_process_general_event(union perf_event *perf_event __unused, goto exit; } - /* Pass 3 parameters: event_attr, perf_sample, raw data */ + /* Pass 4 parameters: event_attr, perf_sample, raw data, thread name */ PyTuple_SetItem(t, n++, PyString_FromStringAndSize((void *)&evsel->attr, sizeof(evsel->attr))); PyTuple_SetItem(t, n++, PyString_FromStringAndSize((void *)sample, sizeof(*sample))); PyTuple_SetItem(t, n++, PyString_FromStringAndSize(data, sample->raw_size)); @@ -380,17 +381,17 @@ static void python_process_event(union perf_event *perf_event, struct perf_sample *sample, struct perf_evsel *evsel, struct machine *machine, - struct thread *thread) + struct addr_location *al) { switch (evsel->attr.type) { case PERF_TYPE_TRACEPOINT: python_process_tracepoint(perf_event, sample, evsel, - machine, thread); + machine, al); break; /* Reserve for future process_hw/sw/raw APIs */ default: python_process_general_event(perf_event, sample, evsel, - machine, thread); + machine, al); } } diff --git a/tools/perf/util/trace-event-scripting.c b/tools/perf/util/trace-event-scripting.c index aceb8eea15f..302ff262494 100644 --- a/tools/perf/util/trace-event-scripting.c +++ b/tools/perf/util/trace-event-scripting.c @@ -39,7 +39,7 @@ static void process_event_unsupported(union perf_event *event __unused, struct perf_sample *sample __unused, struct perf_evsel *evsel __unused, struct machine *machine __unused, - struct thread *thread __unused) + struct addr_location *al __unused) { } diff --git a/tools/perf/util/trace-event.h b/tools/perf/util/trace-event.h index cee16350cd8..7575dfd26e5 100644 --- a/tools/perf/util/trace-event.h +++ b/tools/perf/util/trace-event.h @@ -9,7 +9,6 @@ struct machine; struct perf_sample; union perf_event; struct perf_tool; -struct thread; extern int header_page_size_size; extern int header_page_ts_size; @@ -76,6 +75,8 @@ struct tracing_data *tracing_data_get(struct list_head *pattrs, void tracing_data_put(struct tracing_data *tdata); +struct addr_location; + struct scripting_ops { const char *name; int (*start_script) (const char *script, int argc, const char **argv); @@ -84,7 +85,7 @@ struct scripting_ops { struct perf_sample *sample, struct perf_evsel *evsel, struct machine *machine, - struct thread *thread); + struct addr_location *al); int (*generate_script) (struct pevent *pevent, const char *outfile); }; From fd6b858a1e110c76e701cd9972a284ed2a7bcc33 Mon Sep 17 00:00:00 2001 From: Feng Tang Date: Wed, 8 Aug 2012 17:57:53 +0800 Subject: [PATCH 039/282] perf scripts python: Pass event/thread/dso name and symbol info to event handler in python Also as suggested by Arnaldo, pack all these parameters to a dictionary, which is more expandable for adding new parameters while keeping the compatibility for old scripts. Signed-off-by: Feng Tang Cc: Andi Kleen Cc: David Ahern Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Robert Richter Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1344419875-21665-4-git-send-email-feng.tang@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- .../scripting-engines/trace-event-python.c | 35 +++++++++++++++---- 1 file changed, 29 insertions(+), 6 deletions(-) diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index 24711b3536d..7e3f57656c5 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -345,15 +345,23 @@ static void python_process_general_event(union perf_event *perf_event __unused, struct machine *machine __unused, struct addr_location *al __unused) { - PyObject *handler, *retval, *t; + PyObject *handler, *retval, *t, *dict; static char handler_name[64]; unsigned n = 0; - void *data = sample->raw_data; + struct thread *thread = al->thread; + /* + * Use the MAX_FIELDS to make the function expandable, though + * currently there is only one itme for the tuple. + */ t = PyTuple_New(MAX_FIELDS); if (!t) Py_FatalError("couldn't create Python tuple"); + dict = PyDict_New(); + if (!dict) + Py_FatalError("couldn't create Python dictionary"); + snprintf(handler_name, sizeof(handler_name), "%s", "process_event"); handler = PyDict_GetItemString(main_dict, handler_name); @@ -362,11 +370,25 @@ static void python_process_general_event(union perf_event *perf_event __unused, goto exit; } - /* Pass 4 parameters: event_attr, perf_sample, raw data, thread name */ - PyTuple_SetItem(t, n++, PyString_FromStringAndSize((void *)&evsel->attr, sizeof(evsel->attr))); - PyTuple_SetItem(t, n++, PyString_FromStringAndSize((void *)sample, sizeof(*sample))); - PyTuple_SetItem(t, n++, PyString_FromStringAndSize(data, sample->raw_size)); + PyDict_SetItemString(dict, "ev_name", PyString_FromString(perf_evsel__name(evsel))); + PyDict_SetItemString(dict, "attr", PyString_FromStringAndSize( + (const char *)&evsel->attr, sizeof(evsel->attr))); + PyDict_SetItemString(dict, "sample", PyString_FromStringAndSize( + (const char *)sample, sizeof(*sample))); + PyDict_SetItemString(dict, "raw_buf", PyString_FromStringAndSize( + (const char *)sample->raw_data, sample->raw_size)); + PyDict_SetItemString(dict, "comm", + PyString_FromString(thread->comm)); + if (al->map) { + PyDict_SetItemString(dict, "dso", + PyString_FromString(al->map->dso->name)); + } + if (al->sym) { + PyDict_SetItemString(dict, "symbol", + PyString_FromString(al->sym->name)); + } + PyTuple_SetItem(t, n++, dict); if (_PyTuple_Resize(&t, n) == -1) Py_FatalError("error resizing Python tuple"); @@ -374,6 +396,7 @@ static void python_process_general_event(union perf_event *perf_event __unused, if (retval == NULL) handler_call_die(handler_name); exit: + Py_DECREF(dict); Py_DECREF(t); } From 02f1c33f7d630183518ea42d45a6acf275541b08 Mon Sep 17 00:00:00 2001 From: Feng Tang Date: Wed, 8 Aug 2012 17:57:54 +0800 Subject: [PATCH 040/282] perf scripts python: Add a python library EventClass.py This library defines several class types for perf events which could help to better analyze the event samples. Currently there are just a few classes, PerfEvent is the base class for all perf events, PebsEvent is a HW base Intel x86 PEBS event, and user could add more SW/HW event classes based on requriements. Signed-off-by: Feng Tang Cc: Andi Kleen Cc: David Ahern Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Robert Richter Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1344419875-21665-5-git-send-email-feng.tang@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- .../lib/Perf/Trace/EventClass.py | 94 +++++++++++++++++++ 1 file changed, 94 insertions(+) create mode 100755 tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/EventClass.py diff --git a/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/EventClass.py b/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/EventClass.py new file mode 100755 index 00000000000..6372431188d --- /dev/null +++ b/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/EventClass.py @@ -0,0 +1,94 @@ +# EventClass.py +# +# This is a libray defining some events typs classes, which could +# be used by other scripts to analyzing the perf samples. +# +# Currently there are just a few classes defined for examples, +# PerfEvent is the base class for all perf event sample, PebsEvent +# is a HW base Intel x86 PEBS event, and user could add more SW/HW +# event classes based on requriements. + +import struct + +# Event types, user could add more here +EVTYPE_GENERIC = 0 +EVTYPE_PEBS = 1 # Basic PEBS event +EVTYPE_PEBS_LL = 2 # PEBS event with load latency info +EVTYPE_IBS = 3 + +# +# Currently we don't have good way to tell the event type, but by +# the size of raw buffer, raw PEBS event with load latency data's +# size is 176 bytes, while the pure PEBS event's size is 144 bytes. +# +def create_event(name, comm, dso, symbol, raw_buf): + if (len(raw_buf) == 144): + event = PebsEvent(name, comm, dso, symbol, raw_buf) + elif (len(raw_buf) == 176): + event = PebsNHM(name, comm, dso, symbol, raw_buf) + else: + event = PerfEvent(name, comm, dso, symbol, raw_buf) + + return event + +class PerfEvent(object): + event_num = 0 + def __init__(self, name, comm, dso, symbol, raw_buf, ev_type=EVTYPE_GENERIC): + self.name = name + self.comm = comm + self.dso = dso + self.symbol = symbol + self.raw_buf = raw_buf + self.ev_type = ev_type + PerfEvent.event_num += 1 + + def show(self): + print "PMU event: name=%12s, symbol=%24s, comm=%8s, dso=%12s" % (self.name, self.symbol, self.comm, self.dso) + +# +# Basic Intel PEBS (Precise Event-based Sampling) event, whose raw buffer +# contains the context info when that event happened: the EFLAGS and +# linear IP info, as well as all the registers. +# +class PebsEvent(PerfEvent): + pebs_num = 0 + def __init__(self, name, comm, dso, symbol, raw_buf, ev_type=EVTYPE_PEBS): + tmp_buf=raw_buf[0:80] + flags, ip, ax, bx, cx, dx, si, di, bp, sp = struct.unpack('QQQQQQQQQQ', tmp_buf) + self.flags = flags + self.ip = ip + self.ax = ax + self.bx = bx + self.cx = cx + self.dx = dx + self.si = si + self.di = di + self.bp = bp + self.sp = sp + + PerfEvent.__init__(self, name, comm, dso, symbol, raw_buf, ev_type) + PebsEvent.pebs_num += 1 + del tmp_buf + +# +# Intel Nehalem and Westmere support PEBS plus Load Latency info which lie +# in the four 64 bit words write after the PEBS data: +# Status: records the IA32_PERF_GLOBAL_STATUS register value +# DLA: Data Linear Address (EIP) +# DSE: Data Source Encoding, where the latency happens, hit or miss +# in L1/L2/L3 or IO operations +# LAT: the actual latency in cycles +# +class PebsNHM(PebsEvent): + pebs_nhm_num = 0 + def __init__(self, name, comm, dso, symbol, raw_buf, ev_type=EVTYPE_PEBS_LL): + tmp_buf=raw_buf[144:176] + status, dla, dse, lat = struct.unpack('QQQQ', tmp_buf) + self.status = status + self.dla = dla + self.dse = dse + self.lat = lat + + PebsEvent.__init__(self, name, comm, dso, symbol, raw_buf, ev_type) + PebsNHM.pebs_nhm_num += 1 + del tmp_buf From 0076d546b4f9b5c15121c6959d108a83fe43fa9a Mon Sep 17 00:00:00 2001 From: Feng Tang Date: Wed, 8 Aug 2012 17:57:55 +0800 Subject: [PATCH 041/282] perf scripts python: Add event_analyzing_sample.py as a sample for general event handling Currently only trace point events are supported in perf/python script, the first 3 patches of this serie add the support for all types of events. This script is just a simple sample to show how to gather the basic information of the events and analyze them. This script will create one object for each event sample and insert them into a table in a database, then leverage the simple SQL commands to sort/group them. User can modify or write their brand new functions according to their specific requirment. Here is the sample of how to use the script: $ perf record -a tree $ perf script -s process_event.py There is 100 records in gen_events table Statistics about the general events grouped by thread/symbol/dso: comm number histgram ========================================== swapper 56 ###### tree 20 ##### perf 10 #### sshd 8 #### kworker/7:2 4 ### ksoftirqd/7 1 # plugin-containe 1 # symbol number histgram ========================================================== native_write_msr_safe 40 ###### __lock_acquire 8 #### ftrace_graph_caller 4 ### prepare_ftrace_return 4 ### intel_idle 3 ## native_sched_clock 3 ## Unknown_symbol 2 ## do_softirq 2 ## lock_release 2 ## lock_release_holdtime 2 ## trace_graph_entry 2 ## _IO_putc 1 # __d_lookup_rcu 1 # __do_fault 1 # __schedule 1 # _raw_spin_lock 1 # delay_tsc 1 # generic_exec_single 1 # generic_fillattr 1 # dso number histgram ================================================================== [kernel.kallsyms] 95 ####### /lib/libc-2.12.1.so 5 ### Signed-off-by: Feng Tang Cc: Andi Kleen Cc: David Ahern Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Robert Richter Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1344419875-21665-6-git-send-email-feng.tang@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- .../scripts/python/event_analyzing_sample.py | 193 ++++++++++++++++++ 1 file changed, 193 insertions(+) create mode 100644 tools/perf/scripts/python/event_analyzing_sample.py diff --git a/tools/perf/scripts/python/event_analyzing_sample.py b/tools/perf/scripts/python/event_analyzing_sample.py new file mode 100644 index 00000000000..46f05aad6d0 --- /dev/null +++ b/tools/perf/scripts/python/event_analyzing_sample.py @@ -0,0 +1,193 @@ +# process_event.py: general event handler in python +# +# Current perf report is alreay very powerful with the anotation integrated, +# and this script is not trying to be as powerful as perf report, but +# providing end user/developer a flexible way to analyze the events other +# than trace points. +# +# The 2 database related functions in this script just show how to gather +# the basic information, and users can modify and write their own functions +# according to their specific requirment. +# +# The first sample "show_general_events" just does a baisc grouping for all +# generic events with the help of sqlite, and the 2nd one "show_pebs_ll" is +# for a x86 HW PMU event: PEBS with load latency data. +# + +import os +import sys +import math +import struct +import sqlite3 + +sys.path.append(os.environ['PERF_EXEC_PATH'] + \ + '/scripts/python/Perf-Trace-Util/lib/Perf/Trace') + +from perf_trace_context import * +from EventClass import * + +# +# If the perf.data has a big number of samples, then the insert operation +# will be very time consuming (about 10+ minutes for 10000 samples) if the +# .db database is on disk. Move the .db file to RAM based FS to speedup +# the handling, which will cut the time down to several seconds. +# +con = sqlite3.connect("/dev/shm/perf.db") +con.isolation_level = None + +def trace_begin(): + print "In trace_begin:\n" + + # + # Will create several tables at the start, pebs_ll is for PEBS data with + # load latency info, while gen_events is for general event. + # + con.execute(""" + create table if not exists gen_events ( + name text, + symbol text, + comm text, + dso text + );""") + con.execute(""" + create table if not exists pebs_ll ( + name text, + symbol text, + comm text, + dso text, + flags integer, + ip integer, + status integer, + dse integer, + dla integer, + lat integer + );""") + +# +# Create and insert event object to a database so that user could +# do more analysis with simple database commands. +# +def process_event(param_dict): + event_attr = param_dict["attr"] + sample = param_dict["sample"] + raw_buf = param_dict["raw_buf"] + comm = param_dict["comm"] + name = param_dict["ev_name"] + + # Symbol and dso info are not always resolved + if (param_dict.has_key("dso")): + dso = param_dict["dso"] + else: + dso = "Unknown_dso" + + if (param_dict.has_key("symbol")): + symbol = param_dict["symbol"] + else: + symbol = "Unknown_symbol" + + # Creat the event object and insert it to the right table in database + event = create_event(name, comm, dso, symbol, raw_buf) + insert_db(event) + +def insert_db(event): + if event.ev_type == EVTYPE_GENERIC: + con.execute("insert into gen_events values(?, ?, ?, ?)", + (event.name, event.symbol, event.comm, event.dso)) + elif event.ev_type == EVTYPE_PEBS_LL: + event.ip &= 0x7fffffffffffffff + event.dla &= 0x7fffffffffffffff + con.execute("insert into pebs_ll values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)", + (event.name, event.symbol, event.comm, event.dso, event.flags, + event.ip, event.status, event.dse, event.dla, event.lat)) + +def trace_end(): + print "In trace_end:\n" + # We show the basic info for the 2 type of event classes + show_general_events() + show_pebs_ll() + con.close() + +# +# As the event number may be very big, so we can't use linear way +# to show the histgram in real number, but use a log2 algorithm. +# + +def num2sym(num): + # Each number will have at least one '#' + snum = '#' * (int)(math.log(num, 2) + 1) + return snum + +def show_general_events(): + + # Check the total record number in the table + count = con.execute("select count(*) from gen_events") + for t in count: + print "There is %d records in gen_events table" % t[0] + if t[0] == 0: + return + + print "Statistics about the general events grouped by thread/symbol/dso: \n" + + # Group by thread + commq = con.execute("select comm, count(comm) from gen_events group by comm order by -count(comm)") + print "\n%16s %8s %16s\n%s" % ("comm", "number", "histgram", "="*42) + for row in commq: + print "%16s %8d %s" % (row[0], row[1], num2sym(row[1])) + + # Group by symbol + print "\n%32s %8s %16s\n%s" % ("symbol", "number", "histgram", "="*58) + symbolq = con.execute("select symbol, count(symbol) from gen_events group by symbol order by -count(symbol)") + for row in symbolq: + print "%32s %8d %s" % (row[0], row[1], num2sym(row[1])) + + # Group by dso + print "\n%40s %8s %16s\n%s" % ("dso", "number", "histgram", "="*74) + dsoq = con.execute("select dso, count(dso) from gen_events group by dso order by -count(dso)") + for row in dsoq: + print "%40s %8d %s" % (row[0], row[1], num2sym(row[1])) + +# +# This function just shows the basic info, and we could do more with the +# data in the tables, like checking the function parameters when some +# big latency events happen. +# +def show_pebs_ll(): + + count = con.execute("select count(*) from pebs_ll") + for t in count: + print "There is %d records in pebs_ll table" % t[0] + if t[0] == 0: + return + + print "Statistics about the PEBS Load Latency events grouped by thread/symbol/dse/latency: \n" + + # Group by thread + commq = con.execute("select comm, count(comm) from pebs_ll group by comm order by -count(comm)") + print "\n%16s %8s %16s\n%s" % ("comm", "number", "histgram", "="*42) + for row in commq: + print "%16s %8d %s" % (row[0], row[1], num2sym(row[1])) + + # Group by symbol + print "\n%32s %8s %16s\n%s" % ("symbol", "number", "histgram", "="*58) + symbolq = con.execute("select symbol, count(symbol) from pebs_ll group by symbol order by -count(symbol)") + for row in symbolq: + print "%32s %8d %s" % (row[0], row[1], num2sym(row[1])) + + # Group by dse + dseq = con.execute("select dse, count(dse) from pebs_ll group by dse order by -count(dse)") + print "\n%32s %8s %16s\n%s" % ("dse", "number", "histgram", "="*58) + for row in dseq: + print "%32s %8d %s" % (row[0], row[1], num2sym(row[1])) + + # Group by latency + latq = con.execute("select lat, count(lat) from pebs_ll group by lat order by lat") + print "\n%32s %8s %16s\n%s" % ("latency", "number", "histgram", "="*58) + for row in latq: + print "%32s %8d %s" % (row[0], row[1], num2sym(row[1])) + +def trace_unhandled(event_name, context, event_fields_dict): + print ' '.join(['%s=%s'%(k,str(v))for k,v in sorted(event_fields_dict.items())]) + +def print_header(event_name, cpu, secs, nsecs, pid, comm): + print "%-20s %5u %05u.%09u %8u %-20s " % \ + (event_name, cpu, secs, nsecs, pid, comm), From 87b6a3ad40ba304ec468b972e979e7e410852476 Mon Sep 17 00:00:00 2001 From: Feng Tang Date: Thu, 9 Aug 2012 13:46:13 +0800 Subject: [PATCH 042/282] perf script python: Correct handler check and spelling errors Correct the checking for handler returned by PyDict_GetItemString(), also fix some spelling error and remove some data code in event_analyzing_sample.py, as suggested by Namhyung Kim. v2: restore back the wrongly removed trace_unhandled() func Signed-off-by: Feng Tang Acked-by: Namhyung Kim Cc: Andi Kleen Cc: David Ahern Cc: Ingo Molnar Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Robert Richter Cc: Stephane Eranian Link: http://lkml.kernel.org/r/20120809134613.067104c4@feng-i7 Signed-off-by: Arnaldo Carvalho de Melo --- .../lib/Perf/Trace/EventClass.py | 4 +-- .../scripts/python/event_analyzing_sample.py | 30 ++++++++----------- .../scripting-engines/trace-event-python.c | 8 ++--- 3 files changed, 18 insertions(+), 24 deletions(-) diff --git a/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/EventClass.py b/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/EventClass.py index 6372431188d..9e0985794e2 100755 --- a/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/EventClass.py +++ b/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/EventClass.py @@ -1,12 +1,12 @@ # EventClass.py # -# This is a libray defining some events typs classes, which could +# This is a library defining some events types classes, which could # be used by other scripts to analyzing the perf samples. # # Currently there are just a few classes defined for examples, # PerfEvent is the base class for all perf event sample, PebsEvent # is a HW base Intel x86 PEBS event, and user could add more SW/HW -# event classes based on requriements. +# event classes based on requirements. import struct diff --git a/tools/perf/scripts/python/event_analyzing_sample.py b/tools/perf/scripts/python/event_analyzing_sample.py index 46f05aad6d0..163c39fa12d 100644 --- a/tools/perf/scripts/python/event_analyzing_sample.py +++ b/tools/perf/scripts/python/event_analyzing_sample.py @@ -1,15 +1,15 @@ -# process_event.py: general event handler in python +# event_analyzing_sample.py: general event handler in python # -# Current perf report is alreay very powerful with the anotation integrated, +# Current perf report is already very powerful with the annotation integrated, # and this script is not trying to be as powerful as perf report, but # providing end user/developer a flexible way to analyze the events other # than trace points. # # The 2 database related functions in this script just show how to gather # the basic information, and users can modify and write their own functions -# according to their specific requirment. +# according to their specific requirement. # -# The first sample "show_general_events" just does a baisc grouping for all +# The first function "show_general_events" just does a basic grouping for all # generic events with the help of sqlite, and the 2nd one "show_pebs_ll" is # for a x86 HW PMU event: PEBS with load latency data. # @@ -85,7 +85,7 @@ def process_event(param_dict): else: symbol = "Unknown_symbol" - # Creat the event object and insert it to the right table in database + # Create the event object and insert it to the right table in database event = create_event(name, comm, dso, symbol, raw_buf) insert_db(event) @@ -109,7 +109,7 @@ def trace_end(): # # As the event number may be very big, so we can't use linear way -# to show the histgram in real number, but use a log2 algorithm. +# to show the histogram in real number, but use a log2 algorithm. # def num2sym(num): @@ -130,18 +130,18 @@ def show_general_events(): # Group by thread commq = con.execute("select comm, count(comm) from gen_events group by comm order by -count(comm)") - print "\n%16s %8s %16s\n%s" % ("comm", "number", "histgram", "="*42) + print "\n%16s %8s %16s\n%s" % ("comm", "number", "histogram", "="*42) for row in commq: print "%16s %8d %s" % (row[0], row[1], num2sym(row[1])) # Group by symbol - print "\n%32s %8s %16s\n%s" % ("symbol", "number", "histgram", "="*58) + print "\n%32s %8s %16s\n%s" % ("symbol", "number", "histogram", "="*58) symbolq = con.execute("select symbol, count(symbol) from gen_events group by symbol order by -count(symbol)") for row in symbolq: print "%32s %8d %s" % (row[0], row[1], num2sym(row[1])) # Group by dso - print "\n%40s %8s %16s\n%s" % ("dso", "number", "histgram", "="*74) + print "\n%40s %8s %16s\n%s" % ("dso", "number", "histogram", "="*74) dsoq = con.execute("select dso, count(dso) from gen_events group by dso order by -count(dso)") for row in dsoq: print "%40s %8d %s" % (row[0], row[1], num2sym(row[1])) @@ -163,31 +163,27 @@ def show_pebs_ll(): # Group by thread commq = con.execute("select comm, count(comm) from pebs_ll group by comm order by -count(comm)") - print "\n%16s %8s %16s\n%s" % ("comm", "number", "histgram", "="*42) + print "\n%16s %8s %16s\n%s" % ("comm", "number", "histogram", "="*42) for row in commq: print "%16s %8d %s" % (row[0], row[1], num2sym(row[1])) # Group by symbol - print "\n%32s %8s %16s\n%s" % ("symbol", "number", "histgram", "="*58) + print "\n%32s %8s %16s\n%s" % ("symbol", "number", "histogram", "="*58) symbolq = con.execute("select symbol, count(symbol) from pebs_ll group by symbol order by -count(symbol)") for row in symbolq: print "%32s %8d %s" % (row[0], row[1], num2sym(row[1])) # Group by dse dseq = con.execute("select dse, count(dse) from pebs_ll group by dse order by -count(dse)") - print "\n%32s %8s %16s\n%s" % ("dse", "number", "histgram", "="*58) + print "\n%32s %8s %16s\n%s" % ("dse", "number", "histogram", "="*58) for row in dseq: print "%32s %8d %s" % (row[0], row[1], num2sym(row[1])) # Group by latency latq = con.execute("select lat, count(lat) from pebs_ll group by lat order by lat") - print "\n%32s %8s %16s\n%s" % ("latency", "number", "histgram", "="*58) + print "\n%32s %8s %16s\n%s" % ("latency", "number", "histogram", "="*58) for row in latq: print "%32s %8d %s" % (row[0], row[1], num2sym(row[1])) def trace_unhandled(event_name, context, event_fields_dict): print ' '.join(['%s=%s'%(k,str(v))for k,v in sorted(event_fields_dict.items())]) - -def print_header(event_name, cpu, secs, nsecs, pid, comm): - print "%-20s %5u %05u.%09u %8u %-20s " % \ - (event_name, cpu, secs, nsecs, pid, comm), diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index 7e3f57656c5..afba0972918 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -343,7 +343,7 @@ static void python_process_general_event(union perf_event *perf_event __unused, struct perf_sample *sample, struct perf_evsel *evsel, struct machine *machine __unused, - struct addr_location *al __unused) + struct addr_location *al) { PyObject *handler, *retval, *t, *dict; static char handler_name[64]; @@ -352,7 +352,7 @@ static void python_process_general_event(union perf_event *perf_event __unused, /* * Use the MAX_FIELDS to make the function expandable, though - * currently there is only one itme for the tuple. + * currently there is only one item for the tuple. */ t = PyTuple_New(MAX_FIELDS); if (!t) @@ -365,10 +365,8 @@ static void python_process_general_event(union perf_event *perf_event __unused, snprintf(handler_name, sizeof(handler_name), "%s", "process_event"); handler = PyDict_GetItemString(main_dict, handler_name); - if (handler && !PyCallable_Check(handler)) { - handler = NULL; + if (!handler || !PyCallable_Check(handler)) goto exit; - } PyDict_SetItemString(dict, "ev_name", PyString_FromString(perf_evsel__name(evsel))); PyDict_SetItemString(dict, "attr", PyString_FromStringAndSize( From d25dcba8541c1cc31621d5cefce0304dafb9ae4f Mon Sep 17 00:00:00 2001 From: David Ahern Date: Thu, 9 Aug 2012 10:35:37 -0600 Subject: [PATCH 043/282] perf lock record: improve message when tracepoints are not enabled If CONFIG options required for perf-lock are not enabled then the corresponding tracepoints will not be enabled. Currently, the message to the user is: $ perf lock record -a -- sleep 1 invalid or unsupported event: 'lock:lock_acquire' Run 'perf list' for a list of valid events Improve the message with a suggestion on which CONFIG options are needed: $ perf lock record -a -- sleep 1 tracepoint lock:lock_acquire is not enabled. Are CONFIG_LOCKDEP and CONFIG_LOCK_STAT enabled? Suggested-by: Arnaldo Carvalho de Melo Signed-off-by: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1344530137-25521-1-git-send-email-dsahern@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-lock.c | 31 +++++++++++++++++++++++++------ 1 file changed, 25 insertions(+), 6 deletions(-) diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c index 3f8b9550a6e..585aae2858b 100644 --- a/tools/perf/builtin-lock.c +++ b/tools/perf/builtin-lock.c @@ -903,16 +903,19 @@ static const struct option lock_options[] = { OPT_END() }; +static const char * const lock_tracepoints[] = { + "lock:lock_acquire", /* CONFIG_LOCKDEP */ + "lock:lock_acquired", /* CONFIG_LOCKDEP, CONFIG_LOCK_STAT */ + "lock:lock_contended", /* CONFIG_LOCKDEP, CONFIG_LOCK_STAT */ + "lock:lock_release", /* CONFIG_LOCKDEP */ +}; + static const char *record_args[] = { "record", "-R", "-f", "-m", "1024", "-c", "1", - "-e", "lock:lock_acquire", - "-e", "lock:lock_acquired", - "-e", "lock:lock_contended", - "-e", "lock:lock_release", }; static int __cmd_record(int argc, const char **argv) @@ -920,15 +923,31 @@ static int __cmd_record(int argc, const char **argv) unsigned int rec_argc, i, j; const char **rec_argv; - rec_argc = ARRAY_SIZE(record_args) + argc - 1; - rec_argv = calloc(rec_argc + 1, sizeof(char *)); + for (i = 0; i < ARRAY_SIZE(lock_tracepoints); i++) { + if (!is_valid_tracepoint(lock_tracepoints[i])) { + pr_err("tracepoint %s is not enabled. " + "Are CONFIG_LOCKDEP and CONFIG_LOCK_STAT enabled?\n", + lock_tracepoints[i]); + return 1; + } + } + rec_argc = ARRAY_SIZE(record_args) + argc - 1; + /* factor of 2 is for -e in front of each tracepoint */ + rec_argc += 2 * ARRAY_SIZE(lock_tracepoints); + + rec_argv = calloc(rec_argc + 1, sizeof(char *)); if (rec_argv == NULL) return -ENOMEM; for (i = 0; i < ARRAY_SIZE(record_args); i++) rec_argv[i] = strdup(record_args[i]); + for (j = 0; j < ARRAY_SIZE(lock_tracepoints); j++) { + rec_argv[i++] = "-e"; + rec_argv[i++] = strdup(lock_tracepoints[j]); + } + for (j = 1; j < (unsigned int)argc; j++, i++) rec_argv[i] = argv[j]; From 98a4179c9aa1e99adf5103e6e0d05f563d902de1 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 9 Aug 2012 16:31:51 +0200 Subject: [PATCH 044/282] perf tools: Initial bash completion support This implements bash completion for perf subcommands such as record, report, script, probe, etc... Signed-off-by: Frederic Weisbecker Cc: David Ahern Cc: Ingo Molnar Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1344522713-27951-2-git-send-email-fweisbec@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile | 1 + tools/perf/bash_completion | 22 ++++++++++++ tools/perf/perf.c | 69 +++++++++++++++++++++----------------- 3 files changed, 62 insertions(+), 30 deletions(-) create mode 100644 tools/perf/bash_completion diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 2d4bf6ef1c5..84b42278694 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -951,6 +951,7 @@ install: all $(INSTALL) scripts/python/Perf-Trace-Util/lib/Perf/Trace/* -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python/Perf-Trace-Util/lib/Perf/Trace' $(INSTALL) scripts/python/*.py -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python' $(INSTALL) scripts/python/bin/* -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python/bin' + $(INSTALL) -m 755 bash_completion $(DESTDIR_SQ)/etc/bash_completion.d/perf install-python_ext: $(PYTHON_WORD) util/setup.py --quiet install --root='/$(DESTDIR_SQ)' diff --git a/tools/perf/bash_completion b/tools/perf/bash_completion new file mode 100644 index 00000000000..9a31fa5ed2a --- /dev/null +++ b/tools/perf/bash_completion @@ -0,0 +1,22 @@ +# perf completion + +have perf && +_perf() +{ + local cur cmd + + COMPREPLY=() + _get_comp_words_by_ref cur + + cmd=${COMP_WORDS[0]} + + # List perf subcommands + if [ $COMP_CWORD -eq 1 ]; then + cmds=$($cmd --list-cmds) + COMPREPLY=( $( compgen -W '$cmds' -- "$cur" ) ) + # Fall down to list regular files + else + _filedir + fi +} && +complete -F _perf perf diff --git a/tools/perf/perf.c b/tools/perf/perf.c index 2b2e225a4d4..db37ee3f29b 100644 --- a/tools/perf/perf.c +++ b/tools/perf/perf.c @@ -24,6 +24,37 @@ const char perf_more_info_string[] = int use_browser = -1; static int use_pager = -1; +struct cmd_struct { + const char *cmd; + int (*fn)(int, const char **, const char *); + int option; +}; + +static struct cmd_struct commands[] = { + { "buildid-cache", cmd_buildid_cache, 0 }, + { "buildid-list", cmd_buildid_list, 0 }, + { "diff", cmd_diff, 0 }, + { "evlist", cmd_evlist, 0 }, + { "help", cmd_help, 0 }, + { "list", cmd_list, 0 }, + { "record", cmd_record, 0 }, + { "report", cmd_report, 0 }, + { "bench", cmd_bench, 0 }, + { "stat", cmd_stat, 0 }, + { "timechart", cmd_timechart, 0 }, + { "top", cmd_top, 0 }, + { "annotate", cmd_annotate, 0 }, + { "version", cmd_version, 0 }, + { "script", cmd_script, 0 }, + { "sched", cmd_sched, 0 }, + { "probe", cmd_probe, 0 }, + { "kmem", cmd_kmem, 0 }, + { "lock", cmd_lock, 0 }, + { "kvm", cmd_kvm, 0 }, + { "test", cmd_test, 0 }, + { "inject", cmd_inject, 0 }, +}; + struct pager_config { const char *cmd; int val; @@ -160,6 +191,14 @@ static int handle_options(const char ***argv, int *argc, int *envchanged) fprintf(stderr, "dir: %s\n", debugfs_mountpoint); if (envchanged) *envchanged = 1; + } else if (!strcmp(cmd, "--list-cmds")) { + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(commands); i++) { + struct cmd_struct *p = commands+i; + printf("%s ", p->cmd); + } + exit(0); } else { fprintf(stderr, "Unknown option: %s\n", cmd); usage(perf_usage_string); @@ -245,12 +284,6 @@ const char perf_version_string[] = PERF_VERSION; */ #define NEED_WORK_TREE (1<<2) -struct cmd_struct { - const char *cmd; - int (*fn)(int, const char **, const char *); - int option; -}; - static int run_builtin(struct cmd_struct *p, int argc, const char **argv) { int status; @@ -296,30 +329,6 @@ static int run_builtin(struct cmd_struct *p, int argc, const char **argv) static void handle_internal_command(int argc, const char **argv) { const char *cmd = argv[0]; - static struct cmd_struct commands[] = { - { "buildid-cache", cmd_buildid_cache, 0 }, - { "buildid-list", cmd_buildid_list, 0 }, - { "diff", cmd_diff, 0 }, - { "evlist", cmd_evlist, 0 }, - { "help", cmd_help, 0 }, - { "list", cmd_list, 0 }, - { "record", cmd_record, 0 }, - { "report", cmd_report, 0 }, - { "bench", cmd_bench, 0 }, - { "stat", cmd_stat, 0 }, - { "timechart", cmd_timechart, 0 }, - { "top", cmd_top, 0 }, - { "annotate", cmd_annotate, 0 }, - { "version", cmd_version, 0 }, - { "script", cmd_script, 0 }, - { "sched", cmd_sched, 0 }, - { "probe", cmd_probe, 0 }, - { "kmem", cmd_kmem, 0 }, - { "lock", cmd_lock, 0 }, - { "kvm", cmd_kvm, 0 }, - { "test", cmd_test, 0 }, - { "inject", cmd_inject, 0 }, - }; unsigned int i; static const char ext[] = STRIP_EXTENSION; From a3277d2d5a0d5d9492993ab68af8a8dc96760dd1 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 9 Aug 2012 16:31:52 +0200 Subject: [PATCH 045/282] perf tools: Support for events bash completion Add basic bash completion for the -e option in record, top and stat subcommands. Only hardware, software and tracepoint events are supported. Breakpoints, raw events and events grouping completion need more thinking. Signed-off-by: Frederic Weisbecker Cc: David Ahern Cc: Ingo Molnar Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1344522713-27951-3-git-send-email-fweisbec@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/bash_completion | 6 ++- tools/perf/builtin-list.c | 14 ++++--- tools/perf/util/parse-events.c | 68 +++++++++++++++++++++------------- tools/perf/util/parse-events.h | 7 ++-- 4 files changed, 60 insertions(+), 35 deletions(-) diff --git a/tools/perf/bash_completion b/tools/perf/bash_completion index 9a31fa5ed2a..1958fa539d0 100644 --- a/tools/perf/bash_completion +++ b/tools/perf/bash_completion @@ -6,7 +6,7 @@ _perf() local cur cmd COMPREPLY=() - _get_comp_words_by_ref cur + _get_comp_words_by_ref cur prev cmd=${COMP_WORDS[0]} @@ -14,6 +14,10 @@ _perf() if [ $COMP_CWORD -eq 1 ]; then cmds=$($cmd --list-cmds) COMPREPLY=( $( compgen -W '$cmds' -- "$cur" ) ) + # List possible events for -e option + elif [[ $prev == "-e" && "${COMP_WORDS[1]}" == @(record|stat|top) ]]; then + cmds=$($cmd list --raw-dump) + COMPREPLY=( $( compgen -W '$cmds' -- "$cur" ) ) # Fall down to list regular files else _filedir diff --git a/tools/perf/builtin-list.c b/tools/perf/builtin-list.c index 6313b6eb3eb..bdcff81b532 100644 --- a/tools/perf/builtin-list.c +++ b/tools/perf/builtin-list.c @@ -19,15 +19,15 @@ int cmd_list(int argc, const char **argv, const char *prefix __used) setup_pager(); if (argc == 1) - print_events(NULL); + print_events(NULL, false); else { int i; for (i = 1; i < argc; ++i) { - if (i > 1) + if (i > 2) putchar('\n'); if (strncmp(argv[i], "tracepoint", 10) == 0) - print_tracepoint_events(NULL, NULL); + print_tracepoint_events(NULL, NULL, false); else if (strcmp(argv[i], "hw") == 0 || strcmp(argv[i], "hardware") == 0) print_events_type(PERF_TYPE_HARDWARE); @@ -36,13 +36,15 @@ int cmd_list(int argc, const char **argv, const char *prefix __used) print_events_type(PERF_TYPE_SOFTWARE); else if (strcmp(argv[i], "cache") == 0 || strcmp(argv[i], "hwcache") == 0) - print_hwcache_events(NULL); + print_hwcache_events(NULL, false); + else if (strcmp(argv[i], "--raw-dump") == 0) + print_events(NULL, true); else { char *sep = strchr(argv[i], ':'), *s; int sep_idx; if (sep == NULL) { - print_events(argv[i]); + print_events(argv[i], false); continue; } sep_idx = sep - argv[i]; @@ -51,7 +53,7 @@ int cmd_list(int argc, const char **argv, const char *prefix __used) return -1; s[sep_idx] = '\0'; - print_tracepoint_events(s, s + sep_idx + 1); + print_tracepoint_events(s, s + sep_idx + 1, false); free(s); } } diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 8bdfa3e5c8c..3ec4bfcd3f9 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -799,7 +799,8 @@ static const char * const event_type_descriptors[] = { * Print the events from /tracing/events */ -void print_tracepoint_events(const char *subsys_glob, const char *event_glob) +void print_tracepoint_events(const char *subsys_glob, const char *event_glob, + bool name_only) { DIR *sys_dir, *evt_dir; struct dirent *sys_next, *evt_next, sys_dirent, evt_dirent; @@ -829,6 +830,11 @@ void print_tracepoint_events(const char *subsys_glob, const char *event_glob) !strglobmatch(evt_dirent.d_name, event_glob)) continue; + if (name_only) { + printf("%s:%s ", sys_dirent.d_name, evt_dirent.d_name); + continue; + } + snprintf(evt_path, MAXPATHLEN, "%s:%s", sys_dirent.d_name, evt_dirent.d_name); printf(" %-50s [%s]\n", evt_path, @@ -906,7 +912,7 @@ void print_events_type(u8 type) __print_events_type(type, event_symbols_hw, PERF_COUNT_HW_MAX); } -int print_hwcache_events(const char *event_glob) +int print_hwcache_events(const char *event_glob, bool name_only) { unsigned int type, op, i, printed = 0; char name[64]; @@ -923,8 +929,11 @@ int print_hwcache_events(const char *event_glob) if (event_glob != NULL && !strglobmatch(name, event_glob)) continue; - printf(" %-50s [%s]\n", name, - event_type_descriptors[PERF_TYPE_HW_CACHE]); + if (name_only) + printf("%s ", name); + else + printf(" %-50s [%s]\n", name, + event_type_descriptors[PERF_TYPE_HW_CACHE]); ++printed; } } @@ -934,7 +943,8 @@ int print_hwcache_events(const char *event_glob) } static void print_symbol_events(const char *event_glob, unsigned type, - struct event_symbol *syms, unsigned max) + struct event_symbol *syms, unsigned max, + bool name_only) { unsigned i, printed = 0; char name[MAX_NAME_LEN]; @@ -946,6 +956,11 @@ static void print_symbol_events(const char *event_glob, unsigned type, (syms->alias && strglobmatch(syms->alias, event_glob)))) continue; + if (name_only) { + printf("%s ", syms->symbol); + continue; + } + if (strlen(syms->alias)) snprintf(name, MAX_NAME_LEN, "%s OR %s", syms->symbol, syms->alias); else @@ -963,39 +978,42 @@ static void print_symbol_events(const char *event_glob, unsigned type, /* * Print the help text for the event symbols: */ -void print_events(const char *event_glob) +void print_events(const char *event_glob, bool name_only) { - - printf("\n"); - printf("List of pre-defined events (to be used in -e):\n"); + if (!name_only) { + printf("\n"); + printf("List of pre-defined events (to be used in -e):\n"); + } print_symbol_events(event_glob, PERF_TYPE_HARDWARE, - event_symbols_hw, PERF_COUNT_HW_MAX); + event_symbols_hw, PERF_COUNT_HW_MAX, name_only); print_symbol_events(event_glob, PERF_TYPE_SOFTWARE, - event_symbols_sw, PERF_COUNT_SW_MAX); + event_symbols_sw, PERF_COUNT_SW_MAX, name_only); - print_hwcache_events(event_glob); + print_hwcache_events(event_glob, name_only); if (event_glob != NULL) return; - printf("\n"); - printf(" %-50s [%s]\n", - "rNNN", - event_type_descriptors[PERF_TYPE_RAW]); - printf(" %-50s [%s]\n", - "cpu/t1=v1[,t2=v2,t3 ...]/modifier", - event_type_descriptors[PERF_TYPE_RAW]); - printf(" (see 'perf list --help' on how to encode it)\n"); - printf("\n"); + if (!name_only) { + printf("\n"); + printf(" %-50s [%s]\n", + "rNNN", + event_type_descriptors[PERF_TYPE_RAW]); + printf(" %-50s [%s]\n", + "cpu/t1=v1[,t2=v2,t3 ...]/modifier", + event_type_descriptors[PERF_TYPE_RAW]); + printf(" (see 'perf list --help' on how to encode it)\n"); + printf("\n"); - printf(" %-50s [%s]\n", - "mem:[:access]", + printf(" %-50s [%s]\n", + "mem:[:access]", event_type_descriptors[PERF_TYPE_BREAKPOINT]); - printf("\n"); + printf("\n"); + } - print_tracepoint_events(NULL, NULL); + print_tracepoint_events(NULL, NULL, name_only); } int parse_events__is_hardcoded_term(struct parse_events__term *term) diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h index 163aad40461..00416d7fd01 100644 --- a/tools/perf/util/parse-events.h +++ b/tools/perf/util/parse-events.h @@ -96,10 +96,11 @@ void parse_events_update_lists(struct list_head *list_event, void parse_events_error(void *data, void *scanner, char const *msg); int parse_events__test(void); -void print_events(const char *event_glob); +void print_events(const char *event_glob, bool name_only); void print_events_type(u8 type); -void print_tracepoint_events(const char *subsys_glob, const char *event_glob); -int print_hwcache_events(const char *event_glob); +void print_tracepoint_events(const char *subsys_glob, const char *event_glob, + bool name_only); +int print_hwcache_events(const char *event_glob, bool name_only); extern int is_valid_tracepoint(const char *event_string); extern int valid_debugfs_mount(const char *debugfs); From b25085be457b4292a563c0bf2fab2ef5b7bb3c45 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 9 Aug 2012 16:31:53 +0200 Subject: [PATCH 046/282] perf tools: Fix /etc config related installation Fix missing /etc/bash_completion.d directory creation, otherwise the installation fails miserably on systems that don't have bash completion installed yet or on specific target: $ make DESTDIR=/tmp/junk-perf O=/tmp/pbuild -C tools/perf/ install ... install -m 755 bash_completion /tmp/junk-perf/etc/bash_completion.d/perf install: cannot create regular file `/tmp/junk-perf/etc/bash_completion.d/perf': No such file or directory make: *** [install] Error 1 make: Leaving directory `/opt/sw/ahern/perf.git/tools/perf' Also use sysconfdir variable instead of the hardcoded /etc to handle overriden conf directory. Reported-by: David Ahern Signed-off-by: Namhyung Kim Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Jiri Olsa Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1344522713-27951-4-git-send-email-fweisbec@gmail.com Signed-off-by: Frederic Weisbecker Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 84b42278694..a9458b9dd7b 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -700,6 +700,7 @@ perfexecdir_SQ = $(subst ','\'',$(perfexecdir)) template_dir_SQ = $(subst ','\'',$(template_dir)) htmldir_SQ = $(subst ','\'',$(htmldir)) prefix_SQ = $(subst ','\'',$(prefix)) +sysconfdir_SQ = $(subst ','\'',$(sysconfdir)) SHELL_PATH_SQ = $(subst ','\'',$(SHELL_PATH)) @@ -951,7 +952,8 @@ install: all $(INSTALL) scripts/python/Perf-Trace-Util/lib/Perf/Trace/* -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python/Perf-Trace-Util/lib/Perf/Trace' $(INSTALL) scripts/python/*.py -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python' $(INSTALL) scripts/python/bin/* -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python/bin' - $(INSTALL) -m 755 bash_completion $(DESTDIR_SQ)/etc/bash_completion.d/perf + $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(sysconfdir_SQ)/bash_completion.d' + $(INSTALL) bash_completion '$(DESTDIR_SQ)$(sysconfdir_SQ)/bash_completion.d/perf' install-python_ext: $(PYTHON_WORD) util/setup.py --quiet install --root='/$(DESTDIR_SQ)' From 166ccc9c244828da9214a0e7ba4d5dde6a26dcc1 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 6 Aug 2012 13:41:19 +0900 Subject: [PATCH 047/282] perf symbols: Introduce symbol__elf_init() The symbol__elf_init() is for initializing internal libelf data structure and getting rid of its dependency outside of ELF/symboling handling code. Signed-off-by: Namhyung Kim Cc: Ingo Molnar Cc: Jiri Olsa Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1344228082-15569-2-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-buildid-list.c | 4 +--- tools/perf/util/symbol.c | 8 +++++++- tools/perf/util/symbol.h | 1 + 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/tools/perf/builtin-buildid-list.c b/tools/perf/builtin-buildid-list.c index 6b2bcfbde15..7d6842826a0 100644 --- a/tools/perf/builtin-buildid-list.c +++ b/tools/perf/builtin-buildid-list.c @@ -16,8 +16,6 @@ #include "util/session.h" #include "util/symbol.h" -#include - static const char *input_name; static bool force; static bool show_kernel; @@ -71,7 +69,7 @@ static int perf_session__list_build_ids(void) { struct perf_session *session; - elf_version(EV_CURRENT); + symbol__elf_init(); session = perf_session__new(input_name, O_RDONLY, force, false, &build_id__mark_dso_hit_ops); diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 8b63b678e12..a61fec4518d 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -1453,6 +1453,11 @@ out_close: return err; } +void symbol__elf_init(void) +{ + elf_version(EV_CURRENT); +} + static bool dso__build_id_equal(const struct dso *dso, u8 *build_id) { return memcmp(dso->build_id, build_id, sizeof(dso->build_id)) == 0; @@ -2754,7 +2759,8 @@ int symbol__init(void) symbol_conf.priv_size = ALIGN(symbol_conf.priv_size, sizeof(u64)); - elf_version(EV_CURRENT); + symbol__elf_init(); + if (symbol_conf.sort_by_name) symbol_conf.priv_size += (sizeof(struct symbol_name_rb_node) - sizeof(struct symbol)); diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index 1fe733a1e21..355993d3abb 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -309,6 +309,7 @@ void machines__destroy_guest_kernel_maps(struct rb_root *machines); int symbol__init(void); void symbol__exit(void); +void symbol__elf_init(void); size_t symbol__fprintf_symname_offs(const struct symbol *sym, const struct addr_location *al, FILE *fp); size_t symbol__fprintf_symname(const struct symbol *sym, FILE *fp); From e5a1845fc0aeca85c98115980c3531129f87e18d Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 6 Aug 2012 13:41:20 +0900 Subject: [PATCH 048/282] perf symbols: Split out util/symbol-elf.c Factor out the dependency of ELF handling into separate symbol-elf.c file. It is a preparation of building a minimalistic version perf tools which doesn't depend on the elfutils. Signed-off-by: Namhyung Kim Cc: Ingo Molnar Cc: Jiri Olsa Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1344228082-15569-3-git-send-email-namhyung@kernel.org [ committer note: removed blank line at symbol-elf.c EOF ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile | 1 + tools/perf/util/map.c | 19 + tools/perf/util/map.h | 1 + tools/perf/util/symbol-elf.c | 774 +++++++++++++++++++++++++++++++++ tools/perf/util/symbol.c | 804 +---------------------------------- tools/perf/util/symbol.h | 15 + 6 files changed, 817 insertions(+), 797 deletions(-) create mode 100644 tools/perf/util/symbol-elf.c diff --git a/tools/perf/Makefile b/tools/perf/Makefile index a9458b9dd7b..f790e3bf14c 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -356,6 +356,7 @@ LIB_OBJS += $(OUTPUT)util/usage.o LIB_OBJS += $(OUTPUT)util/wrapper.o LIB_OBJS += $(OUTPUT)util/sigchain.o LIB_OBJS += $(OUTPUT)util/symbol.o +LIB_OBJS += $(OUTPUT)util/symbol-elf.o LIB_OBJS += $(OUTPUT)util/dso-test-data.o LIB_OBJS += $(OUTPUT)util/color.o LIB_OBJS += $(OUTPUT)util/pager.o diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index cc33486ad9e..115654c469c 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c @@ -86,6 +86,25 @@ out_delete: return NULL; } +/* + * Constructor variant for modules (where we know from /proc/modules where + * they are loaded) and for vmlinux, where only after we load all the + * symbols we'll know where it starts and ends. + */ +struct map *map__new2(u64 start, struct dso *dso, enum map_type type) +{ + struct map *map = calloc(1, (sizeof(*map) + + (dso->kernel ? sizeof(struct kmap) : 0))); + if (map != NULL) { + /* + * ->end will be filled after we load all the symbols + */ + map__init(map, type, start, 0, 0, dso); + } + + return map; +} + void map__delete(struct map *self) { free(self); diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h index 03a1e9b08b2..1e183d1ae58 100644 --- a/tools/perf/util/map.h +++ b/tools/perf/util/map.h @@ -115,6 +115,7 @@ void map__init(struct map *self, enum map_type type, struct map *map__new(struct list_head *dsos__list, u64 start, u64 len, u64 pgoff, u32 pid, char *filename, enum map_type type); +struct map *map__new2(u64 start, struct dso *dso, enum map_type type); void map__delete(struct map *self); struct map *map__clone(struct map *self); int map__overlap(struct map *l, struct map *r); diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c new file mode 100644 index 00000000000..9ca89f8f6c9 --- /dev/null +++ b/tools/perf/util/symbol-elf.c @@ -0,0 +1,774 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "symbol.h" +#include "debug.h" + +#ifndef NT_GNU_BUILD_ID +#define NT_GNU_BUILD_ID 3 +#endif + +/** + * elf_symtab__for_each_symbol - iterate thru all the symbols + * + * @syms: struct elf_symtab instance to iterate + * @idx: uint32_t idx + * @sym: GElf_Sym iterator + */ +#define elf_symtab__for_each_symbol(syms, nr_syms, idx, sym) \ + for (idx = 0, gelf_getsym(syms, idx, &sym);\ + idx < nr_syms; \ + idx++, gelf_getsym(syms, idx, &sym)) + +static inline uint8_t elf_sym__type(const GElf_Sym *sym) +{ + return GELF_ST_TYPE(sym->st_info); +} + +static inline int elf_sym__is_function(const GElf_Sym *sym) +{ + return elf_sym__type(sym) == STT_FUNC && + sym->st_name != 0 && + sym->st_shndx != SHN_UNDEF; +} + +static inline bool elf_sym__is_object(const GElf_Sym *sym) +{ + return elf_sym__type(sym) == STT_OBJECT && + sym->st_name != 0 && + sym->st_shndx != SHN_UNDEF; +} + +static inline int elf_sym__is_label(const GElf_Sym *sym) +{ + return elf_sym__type(sym) == STT_NOTYPE && + sym->st_name != 0 && + sym->st_shndx != SHN_UNDEF && + sym->st_shndx != SHN_ABS; +} + +static bool elf_sym__is_a(GElf_Sym *sym, enum map_type type) +{ + switch (type) { + case MAP__FUNCTION: + return elf_sym__is_function(sym); + case MAP__VARIABLE: + return elf_sym__is_object(sym); + default: + return false; + } +} + +static inline const char *elf_sym__name(const GElf_Sym *sym, + const Elf_Data *symstrs) +{ + return symstrs->d_buf + sym->st_name; +} + +static inline const char *elf_sec__name(const GElf_Shdr *shdr, + const Elf_Data *secstrs) +{ + return secstrs->d_buf + shdr->sh_name; +} + +static inline int elf_sec__is_text(const GElf_Shdr *shdr, + const Elf_Data *secstrs) +{ + return strstr(elf_sec__name(shdr, secstrs), "text") != NULL; +} + +static inline bool elf_sec__is_data(const GElf_Shdr *shdr, + const Elf_Data *secstrs) +{ + return strstr(elf_sec__name(shdr, secstrs), "data") != NULL; +} + +static bool elf_sec__is_a(GElf_Shdr *shdr, Elf_Data *secstrs, + enum map_type type) +{ + switch (type) { + case MAP__FUNCTION: + return elf_sec__is_text(shdr, secstrs); + case MAP__VARIABLE: + return elf_sec__is_data(shdr, secstrs); + default: + return false; + } +} + +static size_t elf_addr_to_index(Elf *elf, GElf_Addr addr) +{ + Elf_Scn *sec = NULL; + GElf_Shdr shdr; + size_t cnt = 1; + + while ((sec = elf_nextscn(elf, sec)) != NULL) { + gelf_getshdr(sec, &shdr); + + if ((addr >= shdr.sh_addr) && + (addr < (shdr.sh_addr + shdr.sh_size))) + return cnt; + + ++cnt; + } + + return -1; +} + +static Elf_Scn *elf_section_by_name(Elf *elf, GElf_Ehdr *ep, + GElf_Shdr *shp, const char *name, + size_t *idx) +{ + Elf_Scn *sec = NULL; + size_t cnt = 1; + + while ((sec = elf_nextscn(elf, sec)) != NULL) { + char *str; + + gelf_getshdr(sec, shp); + str = elf_strptr(elf, ep->e_shstrndx, shp->sh_name); + if (!strcmp(name, str)) { + if (idx) + *idx = cnt; + break; + } + ++cnt; + } + + return sec; +} + +#define elf_section__for_each_rel(reldata, pos, pos_mem, idx, nr_entries) \ + for (idx = 0, pos = gelf_getrel(reldata, 0, &pos_mem); \ + idx < nr_entries; \ + ++idx, pos = gelf_getrel(reldata, idx, &pos_mem)) + +#define elf_section__for_each_rela(reldata, pos, pos_mem, idx, nr_entries) \ + for (idx = 0, pos = gelf_getrela(reldata, 0, &pos_mem); \ + idx < nr_entries; \ + ++idx, pos = gelf_getrela(reldata, idx, &pos_mem)) + +/* + * We need to check if we have a .dynsym, so that we can handle the + * .plt, synthesizing its symbols, that aren't on the symtabs (be it + * .dynsym or .symtab). + * And always look at the original dso, not at debuginfo packages, that + * have the PLT data stripped out (shdr_rel_plt.sh_type == SHT_NOBITS). + */ +int dso__synthesize_plt_symbols(struct dso *dso, char *name, struct map *map, + symbol_filter_t filter) +{ + uint32_t nr_rel_entries, idx; + GElf_Sym sym; + u64 plt_offset; + GElf_Shdr shdr_plt; + struct symbol *f; + GElf_Shdr shdr_rel_plt, shdr_dynsym; + Elf_Data *reldata, *syms, *symstrs; + Elf_Scn *scn_plt_rel, *scn_symstrs, *scn_dynsym; + size_t dynsym_idx; + GElf_Ehdr ehdr; + char sympltname[1024]; + Elf *elf; + int nr = 0, symidx, fd, err = 0; + + fd = open(name, O_RDONLY); + if (fd < 0) + goto out; + + elf = elf_begin(fd, PERF_ELF_C_READ_MMAP, NULL); + if (elf == NULL) + goto out_close; + + if (gelf_getehdr(elf, &ehdr) == NULL) + goto out_elf_end; + + scn_dynsym = elf_section_by_name(elf, &ehdr, &shdr_dynsym, + ".dynsym", &dynsym_idx); + if (scn_dynsym == NULL) + goto out_elf_end; + + scn_plt_rel = elf_section_by_name(elf, &ehdr, &shdr_rel_plt, + ".rela.plt", NULL); + if (scn_plt_rel == NULL) { + scn_plt_rel = elf_section_by_name(elf, &ehdr, &shdr_rel_plt, + ".rel.plt", NULL); + if (scn_plt_rel == NULL) + goto out_elf_end; + } + + err = -1; + + if (shdr_rel_plt.sh_link != dynsym_idx) + goto out_elf_end; + + if (elf_section_by_name(elf, &ehdr, &shdr_plt, ".plt", NULL) == NULL) + goto out_elf_end; + + /* + * Fetch the relocation section to find the idxes to the GOT + * and the symbols in the .dynsym they refer to. + */ + reldata = elf_getdata(scn_plt_rel, NULL); + if (reldata == NULL) + goto out_elf_end; + + syms = elf_getdata(scn_dynsym, NULL); + if (syms == NULL) + goto out_elf_end; + + scn_symstrs = elf_getscn(elf, shdr_dynsym.sh_link); + if (scn_symstrs == NULL) + goto out_elf_end; + + symstrs = elf_getdata(scn_symstrs, NULL); + if (symstrs == NULL) + goto out_elf_end; + + nr_rel_entries = shdr_rel_plt.sh_size / shdr_rel_plt.sh_entsize; + plt_offset = shdr_plt.sh_offset; + + if (shdr_rel_plt.sh_type == SHT_RELA) { + GElf_Rela pos_mem, *pos; + + elf_section__for_each_rela(reldata, pos, pos_mem, idx, + nr_rel_entries) { + symidx = GELF_R_SYM(pos->r_info); + plt_offset += shdr_plt.sh_entsize; + gelf_getsym(syms, symidx, &sym); + snprintf(sympltname, sizeof(sympltname), + "%s@plt", elf_sym__name(&sym, symstrs)); + + f = symbol__new(plt_offset, shdr_plt.sh_entsize, + STB_GLOBAL, sympltname); + if (!f) + goto out_elf_end; + + if (filter && filter(map, f)) + symbol__delete(f); + else { + symbols__insert(&dso->symbols[map->type], f); + ++nr; + } + } + } else if (shdr_rel_plt.sh_type == SHT_REL) { + GElf_Rel pos_mem, *pos; + elf_section__for_each_rel(reldata, pos, pos_mem, idx, + nr_rel_entries) { + symidx = GELF_R_SYM(pos->r_info); + plt_offset += shdr_plt.sh_entsize; + gelf_getsym(syms, symidx, &sym); + snprintf(sympltname, sizeof(sympltname), + "%s@plt", elf_sym__name(&sym, symstrs)); + + f = symbol__new(plt_offset, shdr_plt.sh_entsize, + STB_GLOBAL, sympltname); + if (!f) + goto out_elf_end; + + if (filter && filter(map, f)) + symbol__delete(f); + else { + symbols__insert(&dso->symbols[map->type], f); + ++nr; + } + } + } + + err = 0; +out_elf_end: + elf_end(elf); +out_close: + close(fd); + + if (err == 0) + return nr; +out: + pr_debug("%s: problems reading %s PLT info.\n", + __func__, dso->long_name); + return 0; +} + +/* + * Align offset to 4 bytes as needed for note name and descriptor data. + */ +#define NOTE_ALIGN(n) (((n) + 3) & -4U) + +static int elf_read_build_id(Elf *elf, void *bf, size_t size) +{ + int err = -1; + GElf_Ehdr ehdr; + GElf_Shdr shdr; + Elf_Data *data; + Elf_Scn *sec; + Elf_Kind ek; + void *ptr; + + if (size < BUILD_ID_SIZE) + goto out; + + ek = elf_kind(elf); + if (ek != ELF_K_ELF) + goto out; + + if (gelf_getehdr(elf, &ehdr) == NULL) { + pr_err("%s: cannot get elf header.\n", __func__); + goto out; + } + + /* + * Check following sections for notes: + * '.note.gnu.build-id' + * '.notes' + * '.note' (VDSO specific) + */ + do { + sec = elf_section_by_name(elf, &ehdr, &shdr, + ".note.gnu.build-id", NULL); + if (sec) + break; + + sec = elf_section_by_name(elf, &ehdr, &shdr, + ".notes", NULL); + if (sec) + break; + + sec = elf_section_by_name(elf, &ehdr, &shdr, + ".note", NULL); + if (sec) + break; + + return err; + + } while (0); + + data = elf_getdata(sec, NULL); + if (data == NULL) + goto out; + + ptr = data->d_buf; + while (ptr < (data->d_buf + data->d_size)) { + GElf_Nhdr *nhdr = ptr; + size_t namesz = NOTE_ALIGN(nhdr->n_namesz), + descsz = NOTE_ALIGN(nhdr->n_descsz); + const char *name; + + ptr += sizeof(*nhdr); + name = ptr; + ptr += namesz; + if (nhdr->n_type == NT_GNU_BUILD_ID && + nhdr->n_namesz == sizeof("GNU")) { + if (memcmp(name, "GNU", sizeof("GNU")) == 0) { + size_t sz = min(size, descsz); + memcpy(bf, ptr, sz); + memset(bf + sz, 0, size - sz); + err = descsz; + break; + } + } + ptr += descsz; + } + +out: + return err; +} + +int filename__read_build_id(const char *filename, void *bf, size_t size) +{ + int fd, err = -1; + Elf *elf; + + if (size < BUILD_ID_SIZE) + goto out; + + fd = open(filename, O_RDONLY); + if (fd < 0) + goto out; + + elf = elf_begin(fd, PERF_ELF_C_READ_MMAP, NULL); + if (elf == NULL) { + pr_debug2("%s: cannot read %s ELF file.\n", __func__, filename); + goto out_close; + } + + err = elf_read_build_id(elf, bf, size); + + elf_end(elf); +out_close: + close(fd); +out: + return err; +} + +int sysfs__read_build_id(const char *filename, void *build_id, size_t size) +{ + int fd, err = -1; + + if (size < BUILD_ID_SIZE) + goto out; + + fd = open(filename, O_RDONLY); + if (fd < 0) + goto out; + + while (1) { + char bf[BUFSIZ]; + GElf_Nhdr nhdr; + size_t namesz, descsz; + + if (read(fd, &nhdr, sizeof(nhdr)) != sizeof(nhdr)) + break; + + namesz = NOTE_ALIGN(nhdr.n_namesz); + descsz = NOTE_ALIGN(nhdr.n_descsz); + if (nhdr.n_type == NT_GNU_BUILD_ID && + nhdr.n_namesz == sizeof("GNU")) { + if (read(fd, bf, namesz) != (ssize_t)namesz) + break; + if (memcmp(bf, "GNU", sizeof("GNU")) == 0) { + size_t sz = min(descsz, size); + if (read(fd, build_id, sz) == (ssize_t)sz) { + memset(build_id + sz, 0, size - sz); + err = 0; + break; + } + } else if (read(fd, bf, descsz) != (ssize_t)descsz) + break; + } else { + int n = namesz + descsz; + if (read(fd, bf, n) != n) + break; + } + } + close(fd); +out: + return err; +} + +int filename__read_debuglink(const char *filename, char *debuglink, + size_t size) +{ + int fd, err = -1; + Elf *elf; + GElf_Ehdr ehdr; + GElf_Shdr shdr; + Elf_Data *data; + Elf_Scn *sec; + Elf_Kind ek; + + fd = open(filename, O_RDONLY); + if (fd < 0) + goto out; + + elf = elf_begin(fd, PERF_ELF_C_READ_MMAP, NULL); + if (elf == NULL) { + pr_debug2("%s: cannot read %s ELF file.\n", __func__, filename); + goto out_close; + } + + ek = elf_kind(elf); + if (ek != ELF_K_ELF) + goto out_close; + + if (gelf_getehdr(elf, &ehdr) == NULL) { + pr_err("%s: cannot get elf header.\n", __func__); + goto out_close; + } + + sec = elf_section_by_name(elf, &ehdr, &shdr, + ".gnu_debuglink", NULL); + if (sec == NULL) + goto out_close; + + data = elf_getdata(sec, NULL); + if (data == NULL) + goto out_close; + + /* the start of this section is a zero-terminated string */ + strncpy(debuglink, data->d_buf, size); + + elf_end(elf); + +out_close: + close(fd); +out: + return err; +} + +static int dso__swap_init(struct dso *dso, unsigned char eidata) +{ + static unsigned int const endian = 1; + + dso->needs_swap = DSO_SWAP__NO; + + switch (eidata) { + case ELFDATA2LSB: + /* We are big endian, DSO is little endian. */ + if (*(unsigned char const *)&endian != 1) + dso->needs_swap = DSO_SWAP__YES; + break; + + case ELFDATA2MSB: + /* We are little endian, DSO is big endian. */ + if (*(unsigned char const *)&endian != 0) + dso->needs_swap = DSO_SWAP__YES; + break; + + default: + pr_err("unrecognized DSO data encoding %d\n", eidata); + return -EINVAL; + } + + return 0; +} + +int dso__load_sym(struct dso *dso, struct map *map, const char *name, int fd, + symbol_filter_t filter, int kmodule, int want_symtab) +{ + struct kmap *kmap = dso->kernel ? map__kmap(map) : NULL; + struct map *curr_map = map; + struct dso *curr_dso = dso; + Elf_Data *symstrs, *secstrs; + uint32_t nr_syms; + int err = -1; + uint32_t idx; + GElf_Ehdr ehdr; + GElf_Shdr shdr, opdshdr; + Elf_Data *syms, *opddata = NULL; + GElf_Sym sym; + Elf_Scn *sec, *sec_strndx, *opdsec; + Elf *elf; + int nr = 0; + size_t opdidx = 0; + + elf = elf_begin(fd, PERF_ELF_C_READ_MMAP, NULL); + if (elf == NULL) { + pr_debug("%s: cannot read %s ELF file.\n", __func__, name); + goto out_close; + } + + if (gelf_getehdr(elf, &ehdr) == NULL) { + pr_debug("%s: cannot get elf header.\n", __func__); + goto out_elf_end; + } + + if (dso__swap_init(dso, ehdr.e_ident[EI_DATA])) + goto out_elf_end; + + /* Always reject images with a mismatched build-id: */ + if (dso->has_build_id) { + u8 build_id[BUILD_ID_SIZE]; + + if (elf_read_build_id(elf, build_id, BUILD_ID_SIZE) < 0) + goto out_elf_end; + + if (!dso__build_id_equal(dso, build_id)) + goto out_elf_end; + } + + sec = elf_section_by_name(elf, &ehdr, &shdr, ".symtab", NULL); + if (sec == NULL) { + if (want_symtab) + goto out_elf_end; + + sec = elf_section_by_name(elf, &ehdr, &shdr, ".dynsym", NULL); + if (sec == NULL) + goto out_elf_end; + } + + opdsec = elf_section_by_name(elf, &ehdr, &opdshdr, ".opd", &opdidx); + if (opdshdr.sh_type != SHT_PROGBITS) + opdsec = NULL; + if (opdsec) + opddata = elf_rawdata(opdsec, NULL); + + syms = elf_getdata(sec, NULL); + if (syms == NULL) + goto out_elf_end; + + sec = elf_getscn(elf, shdr.sh_link); + if (sec == NULL) + goto out_elf_end; + + symstrs = elf_getdata(sec, NULL); + if (symstrs == NULL) + goto out_elf_end; + + sec_strndx = elf_getscn(elf, ehdr.e_shstrndx); + if (sec_strndx == NULL) + goto out_elf_end; + + secstrs = elf_getdata(sec_strndx, NULL); + if (secstrs == NULL) + goto out_elf_end; + + nr_syms = shdr.sh_size / shdr.sh_entsize; + + memset(&sym, 0, sizeof(sym)); + if (dso->kernel == DSO_TYPE_USER) { + dso->adjust_symbols = (ehdr.e_type == ET_EXEC || + elf_section_by_name(elf, &ehdr, &shdr, + ".gnu.prelink_undo", + NULL) != NULL); + } else { + dso->adjust_symbols = 0; + } + elf_symtab__for_each_symbol(syms, nr_syms, idx, sym) { + struct symbol *f; + const char *elf_name = elf_sym__name(&sym, symstrs); + char *demangled = NULL; + int is_label = elf_sym__is_label(&sym); + const char *section_name; + + if (kmap && kmap->ref_reloc_sym && kmap->ref_reloc_sym->name && + strcmp(elf_name, kmap->ref_reloc_sym->name) == 0) + kmap->ref_reloc_sym->unrelocated_addr = sym.st_value; + + if (!is_label && !elf_sym__is_a(&sym, map->type)) + continue; + + /* Reject ARM ELF "mapping symbols": these aren't unique and + * don't identify functions, so will confuse the profile + * output: */ + if (ehdr.e_machine == EM_ARM) { + if (!strcmp(elf_name, "$a") || + !strcmp(elf_name, "$d") || + !strcmp(elf_name, "$t")) + continue; + } + + if (opdsec && sym.st_shndx == opdidx) { + u32 offset = sym.st_value - opdshdr.sh_addr; + u64 *opd = opddata->d_buf + offset; + sym.st_value = DSO__SWAP(dso, u64, *opd); + sym.st_shndx = elf_addr_to_index(elf, sym.st_value); + } + + sec = elf_getscn(elf, sym.st_shndx); + if (!sec) + goto out_elf_end; + + gelf_getshdr(sec, &shdr); + + if (is_label && !elf_sec__is_a(&shdr, secstrs, map->type)) + continue; + + section_name = elf_sec__name(&shdr, secstrs); + + /* On ARM, symbols for thumb functions have 1 added to + * the symbol address as a flag - remove it */ + if ((ehdr.e_machine == EM_ARM) && + (map->type == MAP__FUNCTION) && + (sym.st_value & 1)) + --sym.st_value; + + if (dso->kernel != DSO_TYPE_USER || kmodule) { + char dso_name[PATH_MAX]; + + if (strcmp(section_name, + (curr_dso->short_name + + dso->short_name_len)) == 0) + goto new_symbol; + + if (strcmp(section_name, ".text") == 0) { + curr_map = map; + curr_dso = dso; + goto new_symbol; + } + + snprintf(dso_name, sizeof(dso_name), + "%s%s", dso->short_name, section_name); + + curr_map = map_groups__find_by_name(kmap->kmaps, map->type, dso_name); + if (curr_map == NULL) { + u64 start = sym.st_value; + + if (kmodule) + start += map->start + shdr.sh_offset; + + curr_dso = dso__new(dso_name); + if (curr_dso == NULL) + goto out_elf_end; + curr_dso->kernel = dso->kernel; + curr_dso->long_name = dso->long_name; + curr_dso->long_name_len = dso->long_name_len; + curr_map = map__new2(start, curr_dso, + map->type); + if (curr_map == NULL) { + dso__delete(curr_dso); + goto out_elf_end; + } + curr_map->map_ip = identity__map_ip; + curr_map->unmap_ip = identity__map_ip; + curr_dso->symtab_type = dso->symtab_type; + map_groups__insert(kmap->kmaps, curr_map); + dsos__add(&dso->node, curr_dso); + dso__set_loaded(curr_dso, map->type); + } else + curr_dso = curr_map->dso; + + goto new_symbol; + } + + if (curr_dso->adjust_symbols) { + pr_debug4("%s: adjusting symbol: st_value: %#" PRIx64 " " + "sh_addr: %#" PRIx64 " sh_offset: %#" PRIx64 "\n", __func__, + (u64)sym.st_value, (u64)shdr.sh_addr, + (u64)shdr.sh_offset); + sym.st_value -= shdr.sh_addr - shdr.sh_offset; + } + /* + * We need to figure out if the object was created from C++ sources + * DWARF DW_compile_unit has this, but we don't always have access + * to it... + */ + demangled = bfd_demangle(NULL, elf_name, DMGL_PARAMS | DMGL_ANSI); + if (demangled != NULL) + elf_name = demangled; +new_symbol: + f = symbol__new(sym.st_value, sym.st_size, + GELF_ST_BIND(sym.st_info), elf_name); + free(demangled); + if (!f) + goto out_elf_end; + + if (filter && filter(curr_map, f)) + symbol__delete(f); + else { + symbols__insert(&curr_dso->symbols[curr_map->type], f); + nr++; + } + } + + /* + * For misannotated, zeroed, ASM function sizes. + */ + if (nr > 0) { + symbols__fixup_duplicate(&dso->symbols[map->type]); + symbols__fixup_end(&dso->symbols[map->type]); + if (kmap) { + /* + * We need to fixup this here too because we create new + * maps here, for things like vsyscall sections. + */ + __map_groups__fixup_end(kmap->kmaps, map->type); + } + } + err = nr; +out_elf_end: + elf_end(elf); +out_close: + return err; +} + +void symbol__elf_init(void) +{ + elf_version(EV_CURRENT); +} diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index a61fec4518d..f02de8ac842 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -15,8 +15,6 @@ #include "symbol.h" #include "strlist.h" -#include -#include #include #include #include @@ -25,15 +23,7 @@ #define KSYM_NAME_LEN 256 #endif -#ifndef NT_GNU_BUILD_ID -#define NT_GNU_BUILD_ID 3 -#endif - static void dso_cache__free(struct rb_root *root); -static bool dso__build_id_equal(const struct dso *dso, u8 *build_id); -static int elf_read_build_id(Elf *elf, void *bf, size_t size); -static void dsos__add(struct list_head *head, struct dso *dso); -static struct map *map__new2(u64 start, struct dso *dso, enum map_type type); static int dso__load_kernel_sym(struct dso *dso, struct map *map, symbol_filter_t filter); static int dso__load_guest_kernel_sym(struct dso *dso, struct map *map, @@ -170,7 +160,7 @@ static int choose_best_symbol(struct symbol *syma, struct symbol *symb) return SYMBOL_B; } -static void symbols__fixup_duplicate(struct rb_root *symbols) +void symbols__fixup_duplicate(struct rb_root *symbols) { struct rb_node *nd; struct symbol *curr, *next; @@ -199,7 +189,7 @@ again: } } -static void symbols__fixup_end(struct rb_root *symbols) +void symbols__fixup_end(struct rb_root *symbols) { struct rb_node *nd, *prevnd = rb_first(symbols); struct symbol *curr, *prev; @@ -222,7 +212,7 @@ static void symbols__fixup_end(struct rb_root *symbols) curr->end = roundup(curr->start, 4096); } -static void __map_groups__fixup_end(struct map_groups *mg, enum map_type type) +void __map_groups__fixup_end(struct map_groups *mg, enum map_type type) { struct map *prev, *curr; struct rb_node *nd, *prevnd = rb_first(&mg->maps[type]); @@ -252,8 +242,7 @@ static void map_groups__fixup_end(struct map_groups *mg) __map_groups__fixup_end(mg, i); } -static struct symbol *symbol__new(u64 start, u64 len, u8 binding, - const char *name) +struct symbol *symbol__new(u64 start, u64 len, u8 binding, const char *name) { size_t namelen = strlen(name) + 1; struct symbol *sym = calloc(1, (symbol_conf.priv_size + @@ -390,7 +379,7 @@ void dso__set_build_id(struct dso *dso, void *build_id) dso->has_build_id = 1; } -static void symbols__insert(struct rb_root *symbols, struct symbol *sym) +void symbols__insert(struct rb_root *symbols, struct symbol *sym) { struct rb_node **p = &symbols->rb_node; struct rb_node *parent = NULL; @@ -904,561 +893,7 @@ out_failure: return -1; } -/** - * elf_symtab__for_each_symbol - iterate thru all the symbols - * - * @syms: struct elf_symtab instance to iterate - * @idx: uint32_t idx - * @sym: GElf_Sym iterator - */ -#define elf_symtab__for_each_symbol(syms, nr_syms, idx, sym) \ - for (idx = 0, gelf_getsym(syms, idx, &sym);\ - idx < nr_syms; \ - idx++, gelf_getsym(syms, idx, &sym)) - -static inline uint8_t elf_sym__type(const GElf_Sym *sym) -{ - return GELF_ST_TYPE(sym->st_info); -} - -static inline int elf_sym__is_function(const GElf_Sym *sym) -{ - return elf_sym__type(sym) == STT_FUNC && - sym->st_name != 0 && - sym->st_shndx != SHN_UNDEF; -} - -static inline bool elf_sym__is_object(const GElf_Sym *sym) -{ - return elf_sym__type(sym) == STT_OBJECT && - sym->st_name != 0 && - sym->st_shndx != SHN_UNDEF; -} - -static inline int elf_sym__is_label(const GElf_Sym *sym) -{ - return elf_sym__type(sym) == STT_NOTYPE && - sym->st_name != 0 && - sym->st_shndx != SHN_UNDEF && - sym->st_shndx != SHN_ABS; -} - -static inline const char *elf_sec__name(const GElf_Shdr *shdr, - const Elf_Data *secstrs) -{ - return secstrs->d_buf + shdr->sh_name; -} - -static inline int elf_sec__is_text(const GElf_Shdr *shdr, - const Elf_Data *secstrs) -{ - return strstr(elf_sec__name(shdr, secstrs), "text") != NULL; -} - -static inline bool elf_sec__is_data(const GElf_Shdr *shdr, - const Elf_Data *secstrs) -{ - return strstr(elf_sec__name(shdr, secstrs), "data") != NULL; -} - -static inline const char *elf_sym__name(const GElf_Sym *sym, - const Elf_Data *symstrs) -{ - return symstrs->d_buf + sym->st_name; -} - -static Elf_Scn *elf_section_by_name(Elf *elf, GElf_Ehdr *ep, - GElf_Shdr *shp, const char *name, - size_t *idx) -{ - Elf_Scn *sec = NULL; - size_t cnt = 1; - - while ((sec = elf_nextscn(elf, sec)) != NULL) { - char *str; - - gelf_getshdr(sec, shp); - str = elf_strptr(elf, ep->e_shstrndx, shp->sh_name); - if (!strcmp(name, str)) { - if (idx) - *idx = cnt; - break; - } - ++cnt; - } - - return sec; -} - -#define elf_section__for_each_rel(reldata, pos, pos_mem, idx, nr_entries) \ - for (idx = 0, pos = gelf_getrel(reldata, 0, &pos_mem); \ - idx < nr_entries; \ - ++idx, pos = gelf_getrel(reldata, idx, &pos_mem)) - -#define elf_section__for_each_rela(reldata, pos, pos_mem, idx, nr_entries) \ - for (idx = 0, pos = gelf_getrela(reldata, 0, &pos_mem); \ - idx < nr_entries; \ - ++idx, pos = gelf_getrela(reldata, idx, &pos_mem)) - -/* - * We need to check if we have a .dynsym, so that we can handle the - * .plt, synthesizing its symbols, that aren't on the symtabs (be it - * .dynsym or .symtab). - * And always look at the original dso, not at debuginfo packages, that - * have the PLT data stripped out (shdr_rel_plt.sh_type == SHT_NOBITS). - */ -static int -dso__synthesize_plt_symbols(struct dso *dso, char *name, struct map *map, - symbol_filter_t filter) -{ - uint32_t nr_rel_entries, idx; - GElf_Sym sym; - u64 plt_offset; - GElf_Shdr shdr_plt; - struct symbol *f; - GElf_Shdr shdr_rel_plt, shdr_dynsym; - Elf_Data *reldata, *syms, *symstrs; - Elf_Scn *scn_plt_rel, *scn_symstrs, *scn_dynsym; - size_t dynsym_idx; - GElf_Ehdr ehdr; - char sympltname[1024]; - Elf *elf; - int nr = 0, symidx, fd, err = 0; - - fd = open(name, O_RDONLY); - if (fd < 0) - goto out; - - elf = elf_begin(fd, PERF_ELF_C_READ_MMAP, NULL); - if (elf == NULL) - goto out_close; - - if (gelf_getehdr(elf, &ehdr) == NULL) - goto out_elf_end; - - scn_dynsym = elf_section_by_name(elf, &ehdr, &shdr_dynsym, - ".dynsym", &dynsym_idx); - if (scn_dynsym == NULL) - goto out_elf_end; - - scn_plt_rel = elf_section_by_name(elf, &ehdr, &shdr_rel_plt, - ".rela.plt", NULL); - if (scn_plt_rel == NULL) { - scn_plt_rel = elf_section_by_name(elf, &ehdr, &shdr_rel_plt, - ".rel.plt", NULL); - if (scn_plt_rel == NULL) - goto out_elf_end; - } - - err = -1; - - if (shdr_rel_plt.sh_link != dynsym_idx) - goto out_elf_end; - - if (elf_section_by_name(elf, &ehdr, &shdr_plt, ".plt", NULL) == NULL) - goto out_elf_end; - - /* - * Fetch the relocation section to find the idxes to the GOT - * and the symbols in the .dynsym they refer to. - */ - reldata = elf_getdata(scn_plt_rel, NULL); - if (reldata == NULL) - goto out_elf_end; - - syms = elf_getdata(scn_dynsym, NULL); - if (syms == NULL) - goto out_elf_end; - - scn_symstrs = elf_getscn(elf, shdr_dynsym.sh_link); - if (scn_symstrs == NULL) - goto out_elf_end; - - symstrs = elf_getdata(scn_symstrs, NULL); - if (symstrs == NULL) - goto out_elf_end; - - nr_rel_entries = shdr_rel_plt.sh_size / shdr_rel_plt.sh_entsize; - plt_offset = shdr_plt.sh_offset; - - if (shdr_rel_plt.sh_type == SHT_RELA) { - GElf_Rela pos_mem, *pos; - - elf_section__for_each_rela(reldata, pos, pos_mem, idx, - nr_rel_entries) { - symidx = GELF_R_SYM(pos->r_info); - plt_offset += shdr_plt.sh_entsize; - gelf_getsym(syms, symidx, &sym); - snprintf(sympltname, sizeof(sympltname), - "%s@plt", elf_sym__name(&sym, symstrs)); - - f = symbol__new(plt_offset, shdr_plt.sh_entsize, - STB_GLOBAL, sympltname); - if (!f) - goto out_elf_end; - - if (filter && filter(map, f)) - symbol__delete(f); - else { - symbols__insert(&dso->symbols[map->type], f); - ++nr; - } - } - } else if (shdr_rel_plt.sh_type == SHT_REL) { - GElf_Rel pos_mem, *pos; - elf_section__for_each_rel(reldata, pos, pos_mem, idx, - nr_rel_entries) { - symidx = GELF_R_SYM(pos->r_info); - plt_offset += shdr_plt.sh_entsize; - gelf_getsym(syms, symidx, &sym); - snprintf(sympltname, sizeof(sympltname), - "%s@plt", elf_sym__name(&sym, symstrs)); - - f = symbol__new(plt_offset, shdr_plt.sh_entsize, - STB_GLOBAL, sympltname); - if (!f) - goto out_elf_end; - - if (filter && filter(map, f)) - symbol__delete(f); - else { - symbols__insert(&dso->symbols[map->type], f); - ++nr; - } - } - } - - err = 0; -out_elf_end: - elf_end(elf); -out_close: - close(fd); - - if (err == 0) - return nr; -out: - pr_debug("%s: problems reading %s PLT info.\n", - __func__, dso->long_name); - return 0; -} - -static bool elf_sym__is_a(GElf_Sym *sym, enum map_type type) -{ - switch (type) { - case MAP__FUNCTION: - return elf_sym__is_function(sym); - case MAP__VARIABLE: - return elf_sym__is_object(sym); - default: - return false; - } -} - -static bool elf_sec__is_a(GElf_Shdr *shdr, Elf_Data *secstrs, - enum map_type type) -{ - switch (type) { - case MAP__FUNCTION: - return elf_sec__is_text(shdr, secstrs); - case MAP__VARIABLE: - return elf_sec__is_data(shdr, secstrs); - default: - return false; - } -} - -static size_t elf_addr_to_index(Elf *elf, GElf_Addr addr) -{ - Elf_Scn *sec = NULL; - GElf_Shdr shdr; - size_t cnt = 1; - - while ((sec = elf_nextscn(elf, sec)) != NULL) { - gelf_getshdr(sec, &shdr); - - if ((addr >= shdr.sh_addr) && - (addr < (shdr.sh_addr + shdr.sh_size))) - return cnt; - - ++cnt; - } - - return -1; -} - -static int dso__swap_init(struct dso *dso, unsigned char eidata) -{ - static unsigned int const endian = 1; - - dso->needs_swap = DSO_SWAP__NO; - - switch (eidata) { - case ELFDATA2LSB: - /* We are big endian, DSO is little endian. */ - if (*(unsigned char const *)&endian != 1) - dso->needs_swap = DSO_SWAP__YES; - break; - - case ELFDATA2MSB: - /* We are little endian, DSO is big endian. */ - if (*(unsigned char const *)&endian != 0) - dso->needs_swap = DSO_SWAP__YES; - break; - - default: - pr_err("unrecognized DSO data encoding %d\n", eidata); - return -EINVAL; - } - - return 0; -} - -static int dso__load_sym(struct dso *dso, struct map *map, const char *name, - int fd, symbol_filter_t filter, int kmodule, - int want_symtab) -{ - struct kmap *kmap = dso->kernel ? map__kmap(map) : NULL; - struct map *curr_map = map; - struct dso *curr_dso = dso; - Elf_Data *symstrs, *secstrs; - uint32_t nr_syms; - int err = -1; - uint32_t idx; - GElf_Ehdr ehdr; - GElf_Shdr shdr, opdshdr; - Elf_Data *syms, *opddata = NULL; - GElf_Sym sym; - Elf_Scn *sec, *sec_strndx, *opdsec; - Elf *elf; - int nr = 0; - size_t opdidx = 0; - - elf = elf_begin(fd, PERF_ELF_C_READ_MMAP, NULL); - if (elf == NULL) { - pr_debug("%s: cannot read %s ELF file.\n", __func__, name); - goto out_close; - } - - if (gelf_getehdr(elf, &ehdr) == NULL) { - pr_debug("%s: cannot get elf header.\n", __func__); - goto out_elf_end; - } - - if (dso__swap_init(dso, ehdr.e_ident[EI_DATA])) - goto out_elf_end; - - /* Always reject images with a mismatched build-id: */ - if (dso->has_build_id) { - u8 build_id[BUILD_ID_SIZE]; - - if (elf_read_build_id(elf, build_id, BUILD_ID_SIZE) < 0) - goto out_elf_end; - - if (!dso__build_id_equal(dso, build_id)) - goto out_elf_end; - } - - sec = elf_section_by_name(elf, &ehdr, &shdr, ".symtab", NULL); - if (sec == NULL) { - if (want_symtab) - goto out_elf_end; - - sec = elf_section_by_name(elf, &ehdr, &shdr, ".dynsym", NULL); - if (sec == NULL) - goto out_elf_end; - } - - opdsec = elf_section_by_name(elf, &ehdr, &opdshdr, ".opd", &opdidx); - if (opdshdr.sh_type != SHT_PROGBITS) - opdsec = NULL; - if (opdsec) - opddata = elf_rawdata(opdsec, NULL); - - syms = elf_getdata(sec, NULL); - if (syms == NULL) - goto out_elf_end; - - sec = elf_getscn(elf, shdr.sh_link); - if (sec == NULL) - goto out_elf_end; - - symstrs = elf_getdata(sec, NULL); - if (symstrs == NULL) - goto out_elf_end; - - sec_strndx = elf_getscn(elf, ehdr.e_shstrndx); - if (sec_strndx == NULL) - goto out_elf_end; - - secstrs = elf_getdata(sec_strndx, NULL); - if (secstrs == NULL) - goto out_elf_end; - - nr_syms = shdr.sh_size / shdr.sh_entsize; - - memset(&sym, 0, sizeof(sym)); - if (dso->kernel == DSO_TYPE_USER) { - dso->adjust_symbols = (ehdr.e_type == ET_EXEC || - elf_section_by_name(elf, &ehdr, &shdr, - ".gnu.prelink_undo", - NULL) != NULL); - } else { - dso->adjust_symbols = 0; - } - elf_symtab__for_each_symbol(syms, nr_syms, idx, sym) { - struct symbol *f; - const char *elf_name = elf_sym__name(&sym, symstrs); - char *demangled = NULL; - int is_label = elf_sym__is_label(&sym); - const char *section_name; - - if (kmap && kmap->ref_reloc_sym && kmap->ref_reloc_sym->name && - strcmp(elf_name, kmap->ref_reloc_sym->name) == 0) - kmap->ref_reloc_sym->unrelocated_addr = sym.st_value; - - if (!is_label && !elf_sym__is_a(&sym, map->type)) - continue; - - /* Reject ARM ELF "mapping symbols": these aren't unique and - * don't identify functions, so will confuse the profile - * output: */ - if (ehdr.e_machine == EM_ARM) { - if (!strcmp(elf_name, "$a") || - !strcmp(elf_name, "$d") || - !strcmp(elf_name, "$t")) - continue; - } - - if (opdsec && sym.st_shndx == opdidx) { - u32 offset = sym.st_value - opdshdr.sh_addr; - u64 *opd = opddata->d_buf + offset; - sym.st_value = DSO__SWAP(dso, u64, *opd); - sym.st_shndx = elf_addr_to_index(elf, sym.st_value); - } - - sec = elf_getscn(elf, sym.st_shndx); - if (!sec) - goto out_elf_end; - - gelf_getshdr(sec, &shdr); - - if (is_label && !elf_sec__is_a(&shdr, secstrs, map->type)) - continue; - - section_name = elf_sec__name(&shdr, secstrs); - - /* On ARM, symbols for thumb functions have 1 added to - * the symbol address as a flag - remove it */ - if ((ehdr.e_machine == EM_ARM) && - (map->type == MAP__FUNCTION) && - (sym.st_value & 1)) - --sym.st_value; - - if (dso->kernel != DSO_TYPE_USER || kmodule) { - char dso_name[PATH_MAX]; - - if (strcmp(section_name, - (curr_dso->short_name + - dso->short_name_len)) == 0) - goto new_symbol; - - if (strcmp(section_name, ".text") == 0) { - curr_map = map; - curr_dso = dso; - goto new_symbol; - } - - snprintf(dso_name, sizeof(dso_name), - "%s%s", dso->short_name, section_name); - - curr_map = map_groups__find_by_name(kmap->kmaps, map->type, dso_name); - if (curr_map == NULL) { - u64 start = sym.st_value; - - if (kmodule) - start += map->start + shdr.sh_offset; - - curr_dso = dso__new(dso_name); - if (curr_dso == NULL) - goto out_elf_end; - curr_dso->kernel = dso->kernel; - curr_dso->long_name = dso->long_name; - curr_dso->long_name_len = dso->long_name_len; - curr_map = map__new2(start, curr_dso, - map->type); - if (curr_map == NULL) { - dso__delete(curr_dso); - goto out_elf_end; - } - curr_map->map_ip = identity__map_ip; - curr_map->unmap_ip = identity__map_ip; - curr_dso->symtab_type = dso->symtab_type; - map_groups__insert(kmap->kmaps, curr_map); - dsos__add(&dso->node, curr_dso); - dso__set_loaded(curr_dso, map->type); - } else - curr_dso = curr_map->dso; - - goto new_symbol; - } - - if (curr_dso->adjust_symbols) { - pr_debug4("%s: adjusting symbol: st_value: %#" PRIx64 " " - "sh_addr: %#" PRIx64 " sh_offset: %#" PRIx64 "\n", __func__, - (u64)sym.st_value, (u64)shdr.sh_addr, - (u64)shdr.sh_offset); - sym.st_value -= shdr.sh_addr - shdr.sh_offset; - } - /* - * We need to figure out if the object was created from C++ sources - * DWARF DW_compile_unit has this, but we don't always have access - * to it... - */ - demangled = bfd_demangle(NULL, elf_name, DMGL_PARAMS | DMGL_ANSI); - if (demangled != NULL) - elf_name = demangled; -new_symbol: - f = symbol__new(sym.st_value, sym.st_size, - GELF_ST_BIND(sym.st_info), elf_name); - free(demangled); - if (!f) - goto out_elf_end; - - if (filter && filter(curr_map, f)) - symbol__delete(f); - else { - symbols__insert(&curr_dso->symbols[curr_map->type], f); - nr++; - } - } - - /* - * For misannotated, zeroed, ASM function sizes. - */ - if (nr > 0) { - symbols__fixup_duplicate(&dso->symbols[map->type]); - symbols__fixup_end(&dso->symbols[map->type]); - if (kmap) { - /* - * We need to fixup this here too because we create new - * maps here, for things like vsyscall sections. - */ - __map_groups__fixup_end(kmap->kmaps, map->type); - } - } - err = nr; -out_elf_end: - elf_end(elf); -out_close: - return err; -} - -void symbol__elf_init(void) -{ - elf_version(EV_CURRENT); -} - -static bool dso__build_id_equal(const struct dso *dso, u8 *build_id) +bool dso__build_id_equal(const struct dso *dso, u8 *build_id) { return memcmp(dso->build_id, build_id, sizeof(dso->build_id)) == 0; } @@ -1485,212 +920,6 @@ bool __dsos__read_build_ids(struct list_head *head, bool with_hits) return have_build_id; } -/* - * Align offset to 4 bytes as needed for note name and descriptor data. - */ -#define NOTE_ALIGN(n) (((n) + 3) & -4U) - -static int elf_read_build_id(Elf *elf, void *bf, size_t size) -{ - int err = -1; - GElf_Ehdr ehdr; - GElf_Shdr shdr; - Elf_Data *data; - Elf_Scn *sec; - Elf_Kind ek; - void *ptr; - - if (size < BUILD_ID_SIZE) - goto out; - - ek = elf_kind(elf); - if (ek != ELF_K_ELF) - goto out; - - if (gelf_getehdr(elf, &ehdr) == NULL) { - pr_err("%s: cannot get elf header.\n", __func__); - goto out; - } - - /* - * Check following sections for notes: - * '.note.gnu.build-id' - * '.notes' - * '.note' (VDSO specific) - */ - do { - sec = elf_section_by_name(elf, &ehdr, &shdr, - ".note.gnu.build-id", NULL); - if (sec) - break; - - sec = elf_section_by_name(elf, &ehdr, &shdr, - ".notes", NULL); - if (sec) - break; - - sec = elf_section_by_name(elf, &ehdr, &shdr, - ".note", NULL); - if (sec) - break; - - return err; - - } while (0); - - data = elf_getdata(sec, NULL); - if (data == NULL) - goto out; - - ptr = data->d_buf; - while (ptr < (data->d_buf + data->d_size)) { - GElf_Nhdr *nhdr = ptr; - size_t namesz = NOTE_ALIGN(nhdr->n_namesz), - descsz = NOTE_ALIGN(nhdr->n_descsz); - const char *name; - - ptr += sizeof(*nhdr); - name = ptr; - ptr += namesz; - if (nhdr->n_type == NT_GNU_BUILD_ID && - nhdr->n_namesz == sizeof("GNU")) { - if (memcmp(name, "GNU", sizeof("GNU")) == 0) { - size_t sz = min(size, descsz); - memcpy(bf, ptr, sz); - memset(bf + sz, 0, size - sz); - err = descsz; - break; - } - } - ptr += descsz; - } - -out: - return err; -} - -int filename__read_build_id(const char *filename, void *bf, size_t size) -{ - int fd, err = -1; - Elf *elf; - - if (size < BUILD_ID_SIZE) - goto out; - - fd = open(filename, O_RDONLY); - if (fd < 0) - goto out; - - elf = elf_begin(fd, PERF_ELF_C_READ_MMAP, NULL); - if (elf == NULL) { - pr_debug2("%s: cannot read %s ELF file.\n", __func__, filename); - goto out_close; - } - - err = elf_read_build_id(elf, bf, size); - - elf_end(elf); -out_close: - close(fd); -out: - return err; -} - -int sysfs__read_build_id(const char *filename, void *build_id, size_t size) -{ - int fd, err = -1; - - if (size < BUILD_ID_SIZE) - goto out; - - fd = open(filename, O_RDONLY); - if (fd < 0) - goto out; - - while (1) { - char bf[BUFSIZ]; - GElf_Nhdr nhdr; - size_t namesz, descsz; - - if (read(fd, &nhdr, sizeof(nhdr)) != sizeof(nhdr)) - break; - - namesz = NOTE_ALIGN(nhdr.n_namesz); - descsz = NOTE_ALIGN(nhdr.n_descsz); - if (nhdr.n_type == NT_GNU_BUILD_ID && - nhdr.n_namesz == sizeof("GNU")) { - if (read(fd, bf, namesz) != (ssize_t)namesz) - break; - if (memcmp(bf, "GNU", sizeof("GNU")) == 0) { - size_t sz = min(descsz, size); - if (read(fd, build_id, sz) == (ssize_t)sz) { - memset(build_id + sz, 0, size - sz); - err = 0; - break; - } - } else if (read(fd, bf, descsz) != (ssize_t)descsz) - break; - } else { - int n = namesz + descsz; - if (read(fd, bf, n) != n) - break; - } - } - close(fd); -out: - return err; -} - -static int filename__read_debuglink(const char *filename, - char *debuglink, size_t size) -{ - int fd, err = -1; - Elf *elf; - GElf_Ehdr ehdr; - GElf_Shdr shdr; - Elf_Data *data; - Elf_Scn *sec; - Elf_Kind ek; - - fd = open(filename, O_RDONLY); - if (fd < 0) - goto out; - - elf = elf_begin(fd, PERF_ELF_C_READ_MMAP, NULL); - if (elf == NULL) { - pr_debug2("%s: cannot read %s ELF file.\n", __func__, filename); - goto out_close; - } - - ek = elf_kind(elf); - if (ek != ELF_K_ELF) - goto out_close; - - if (gelf_getehdr(elf, &ehdr) == NULL) { - pr_err("%s: cannot get elf header.\n", __func__); - goto out_close; - } - - sec = elf_section_by_name(elf, &ehdr, &shdr, - ".gnu_debuglink", NULL); - if (sec == NULL) - goto out_close; - - data = elf_getdata(sec, NULL); - if (data == NULL) - goto out_close; - - /* the start of this section is a zero-terminated string */ - strncpy(debuglink, data->d_buf, size); - - elf_end(elf); - -out_close: - close(fd); -out: - return err; -} - char dso__symtab_origin(const struct dso *dso) { static const char origin[] = { @@ -2035,25 +1264,6 @@ static int machine__set_modules_path(struct machine *machine) return map_groups__set_modules_path_dir(&machine->kmaps, modules_path); } -/* - * Constructor variant for modules (where we know from /proc/modules where - * they are loaded) and for vmlinux, where only after we load all the - * symbols we'll know where it starts and ends. - */ -static struct map *map__new2(u64 start, struct dso *dso, enum map_type type) -{ - struct map *map = calloc(1, (sizeof(*map) + - (dso->kernel ? sizeof(struct kmap) : 0))); - if (map != NULL) { - /* - * ->end will be filled after we load all the symbols - */ - map__init(map, type, start, 0, 0, dso); - } - - return map; -} - struct map *machine__new_module(struct machine *machine, u64 start, const char *filename) { @@ -2357,7 +1567,7 @@ out_try_fixup: return err; } -static void dsos__add(struct list_head *head, struct dso *dso) +void dsos__add(struct list_head *head, struct dso *dso) { list_add_tail(&dso->node, head); } diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index 355993d3abb..38ccbbb39fa 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -254,6 +254,7 @@ static inline void dso__set_loaded(struct dso *dso, enum map_type type) void dso__sort_by_name(struct dso *dso, enum map_type type); +void dsos__add(struct list_head *head, struct dso *dso); struct dso *__dsos__findnew(struct list_head *head, const char *name); int dso__load(struct dso *dso, struct map *map, symbol_filter_t filter); @@ -283,6 +284,7 @@ size_t dso__fprintf(struct dso *dso, enum map_type type, FILE *fp); char dso__symtab_origin(const struct dso *dso); void dso__set_long_name(struct dso *dso, char *name); void dso__set_build_id(struct dso *dso, void *build_id); +bool dso__build_id_equal(const struct dso *dso, u8 *build_id); void dso__read_running_kernel_build_id(struct dso *dso, struct machine *machine); struct map *dso__new_map(const char *name); @@ -298,6 +300,8 @@ int build_id__sprintf(const u8 *build_id, int len, char *bf); int kallsyms__parse(const char *filename, void *arg, int (*process_symbol)(void *arg, const char *name, char type, u64 start, u64 end)); +int filename__read_debuglink(const char *filename, char *debuglink, + size_t size); void machine__destroy_kernel_maps(struct machine *machine); int __machine__create_kernel_maps(struct machine *machine, struct dso *kernel); @@ -310,6 +314,7 @@ void machines__destroy_guest_kernel_maps(struct rb_root *machines); int symbol__init(void); void symbol__exit(void); void symbol__elf_init(void); +struct symbol *symbol__new(u64 start, u64 len, u8 binding, const char *name); size_t symbol__fprintf_symname_offs(const struct symbol *sym, const struct addr_location *al, FILE *fp); size_t symbol__fprintf_symname(const struct symbol *sym, FILE *fp); @@ -327,4 +332,14 @@ ssize_t dso__data_read_addr(struct dso *dso, struct map *map, struct machine *machine, u64 addr, u8 *data, ssize_t size); int dso__test_data(void); +int dso__load_sym(struct dso *dso, struct map *map, const char *name, int fd, + symbol_filter_t filter, int kmodule, int want_symtab); +int dso__synthesize_plt_symbols(struct dso *dso, char *name, struct map *map, + symbol_filter_t filter); + +void symbols__insert(struct rb_root *symbols, struct symbol *sym); +void symbols__fixup_duplicate(struct rb_root *symbols); +void symbols__fixup_end(struct rb_root *symbols); +void __map_groups__fixup_end(struct map_groups *mg, enum map_type type); + #endif /* __PERF_SYMBOL */ From 393be2e3747ea3ef0d2e724115a5f42b2fa50dbd Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 6 Aug 2012 13:41:21 +0900 Subject: [PATCH 049/282] perf symbols: Support minimal build without libelf Now we have isolated all ELF-specific stuff, it's possible to build without libelf. The output binary can do most of jobs but lacks (user level) symbol information - kernel symbols are still accessable thanks to the kallsyms. To build perf without libelf (elfutils), give NO_LIBELF=1 to make. For now, only 'perf probe' command is removed since it depends on libelf/libdw heavily. Signed-off-by: Namhyung Kim Cc: Ingo Molnar Cc: Jiri Olsa Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1344228082-15569-4-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile | 59 +++++++++++++++++++++-------- tools/perf/builtin-inject.c | 5 ++- tools/perf/command-list.txt | 2 +- tools/perf/perf.c | 2 + tools/perf/util/generate-cmdlist.sh | 15 ++++++++ tools/perf/util/map.c | 3 +- tools/perf/util/symbol-minimal.c | 39 +++++++++++++++++++ 7 files changed, 107 insertions(+), 18 deletions(-) create mode 100644 tools/perf/util/symbol-minimal.c diff --git a/tools/perf/Makefile b/tools/perf/Makefile index f790e3bf14c..de6aa8c706c 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -37,7 +37,12 @@ include config/utilities.mak # # Define NO_NEWT if you do not want TUI support. # +# Define NO_GTK2 if you do not want GTK+ GUI support. +# # Define NO_DEMANGLE if you do not want C++ symbol demangling. +# +# Define NO_LIBELF if you do not want libelf dependency (e.g. cross-builds) +# $(OUTPUT)PERF-VERSION-FILE: .FORCE-PERF-VERSION-FILE @$(SHELL_PATH) util/PERF-VERSION-GEN $(OUTPUT) @@ -450,13 +455,22 @@ PYRF_OBJS += $(OUTPUT)util/xyarray.o -include config.mak.autogen -include config.mak -ifndef NO_DWARF -FLAGS_DWARF=$(ALL_CFLAGS) -ldw -lelf $(ALL_LDFLAGS) $(EXTLIBS) -ifneq ($(call try-cc,$(SOURCE_DWARF),$(FLAGS_DWARF)),y) - msg := $(warning No libdw.h found or old libdw.h found or elfutils is older than 0.138, disables dwarf support. Please install new elfutils-devel/libdw-dev); +ifdef NO_LIBELF NO_DWARF := 1 -endif # Dwarf support -endif # NO_DWARF + NO_DEMANGLE := 1 +else +FLAGS_LIBELF=$(ALL_CFLAGS) $(ALL_LDFLAGS) $(EXTLIBS) +ifneq ($(call try-cc,$(SOURCE_LIBELF),$(FLAGS_LIBELF)),y) + FLAGS_GLIBC=$(ALL_CFLAGS) $(ALL_LDFLAGS) + ifneq ($(call try-cc,$(SOURCE_GLIBC),$(FLAGS_GLIBC)),y) + msg := $(error No gnu/libc-version.h found, please install glibc-dev[el]/glibc-static); + else + NO_LIBELF := 1 + NO_DWARF := 1 + NO_DEMANGLE := 1 + endif +endif +endif # NO_LIBELF -include arch/$(ARCH)/Makefile @@ -464,20 +478,34 @@ ifneq ($(OUTPUT),) BASIC_CFLAGS += -I$(OUTPUT) endif -FLAGS_LIBELF=$(ALL_CFLAGS) $(ALL_LDFLAGS) $(EXTLIBS) -ifneq ($(call try-cc,$(SOURCE_LIBELF),$(FLAGS_LIBELF)),y) - FLAGS_GLIBC=$(ALL_CFLAGS) $(ALL_LDFLAGS) - ifneq ($(call try-cc,$(SOURCE_GLIBC),$(FLAGS_GLIBC)),y) - msg := $(error No gnu/libc-version.h found, please install glibc-dev[el]/glibc-static); - else - msg := $(error No libelf.h/libelf found, please install libelf-dev/elfutils-libelf-devel); - endif -endif +ifdef NO_LIBELF +BASIC_CFLAGS += -DNO_LIBELF_SUPPORT + +EXTLIBS := $(filter-out -lelf,$(EXTLIBS)) + +# Remove ELF/DWARF dependent codes +LIB_OBJS := $(filter-out $(OUTPUT)util/symbol-elf.o,$(LIB_OBJS)) +LIB_OBJS := $(filter-out $(OUTPUT)util/dwarf-aux.o,$(LIB_OBJS)) +LIB_OBJS := $(filter-out $(OUTPUT)util/probe-event.o,$(LIB_OBJS)) +LIB_OBJS := $(filter-out $(OUTPUT)util/probe-finder.o,$(LIB_OBJS)) + +BUILTIN_OBJS := $(filter-out $(OUTPUT)builtin-probe.o,$(BUILTIN_OBJS)) + +# Use minimal symbol handling +LIB_OBJS += $(OUTPUT)util/symbol-minimal.o + +else # NO_LIBELF ifneq ($(call try-cc,$(SOURCE_ELF_MMAP),$(FLAGS_COMMON)),y) BASIC_CFLAGS += -DLIBELF_NO_MMAP endif +FLAGS_DWARF=$(ALL_CFLAGS) -ldw -lelf $(ALL_LDFLAGS) $(EXTLIBS) +ifneq ($(call try-cc,$(SOURCE_DWARF),$(FLAGS_DWARF)),y) + msg := $(warning No libdw.h found or old libdw.h found or elfutils is older than 0.138, disables dwarf support. Please install new elfutils-devel/libdw-dev); + NO_DWARF := 1 +endif # Dwarf support + ifndef NO_DWARF ifeq ($(origin PERF_HAVE_DWARF_REGS), undefined) msg := $(warning DWARF register mappings have not been defined for architecture $(ARCH), DWARF support disabled); @@ -488,6 +516,7 @@ else LIB_OBJS += $(OUTPUT)util/dwarf-aux.o endif # PERF_HAVE_DWARF_REGS endif # NO_DWARF +endif # NO_LIBELF ifdef NO_NEWT BASIC_CFLAGS += -DNO_NEWT_SUPPORT diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index 3beab489afc..64d8ba2fb7b 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -191,10 +191,13 @@ static int perf_event__inject_buildid(struct perf_tool *tool, * If this fails, too bad, let the other side * account this as unresolved. */ - } else + } else { +#ifndef NO_LIBELF_SUPPORT pr_warning("no symbols found in %s, maybe " "install a debug package?\n", al.map->dso->long_name); +#endif + } } } diff --git a/tools/perf/command-list.txt b/tools/perf/command-list.txt index d695fe40fbf..0303ec69227 100644 --- a/tools/perf/command-list.txt +++ b/tools/perf/command-list.txt @@ -18,7 +18,7 @@ perf-stat mainporcelain common perf-timechart mainporcelain common perf-top mainporcelain common perf-script mainporcelain common -perf-probe mainporcelain common +perf-probe mainporcelain full perf-kmem mainporcelain common perf-lock mainporcelain common perf-kvm mainporcelain common diff --git a/tools/perf/perf.c b/tools/perf/perf.c index db37ee3f29b..e7840e50071 100644 --- a/tools/perf/perf.c +++ b/tools/perf/perf.c @@ -47,7 +47,9 @@ static struct cmd_struct commands[] = { { "version", cmd_version, 0 }, { "script", cmd_script, 0 }, { "sched", cmd_sched, 0 }, +#ifndef NO_LIBELF_SUPPORT { "probe", cmd_probe, 0 }, +#endif { "kmem", cmd_kmem, 0 }, { "lock", cmd_lock, 0 }, { "kvm", cmd_kvm, 0 }, diff --git a/tools/perf/util/generate-cmdlist.sh b/tools/perf/util/generate-cmdlist.sh index f06f6fd148f..389590c1ad2 100755 --- a/tools/perf/util/generate-cmdlist.sh +++ b/tools/perf/util/generate-cmdlist.sh @@ -21,4 +21,19 @@ do p }' "Documentation/perf-$cmd.txt" done + +echo "#ifndef NO_LIBELF_SUPPORT" +sed -n -e 's/^perf-\([^ ]*\)[ ].* full.*/\1/p' command-list.txt | +sort | +while read cmd +do + sed -n ' + /^NAME/,/perf-'"$cmd"'/H + ${ + x + s/.*perf-'"$cmd"' - \(.*\)/ {"'"$cmd"'", "\1"},/ + p + }' "Documentation/perf-$cmd.txt" +done +echo "#endif /* NO_LIBELF_SUPPORT */" echo "};" diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index 115654c469c..287cb3452b4 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c @@ -156,6 +156,7 @@ int map__load(struct map *self, symbol_filter_t filter) pr_warning(", continuing without symbols\n"); return -1; } else if (nr == 0) { +#ifndef NO_LIBELF_SUPPORT const size_t len = strlen(name); const size_t real_len = len - sizeof(DSO__DELETED); @@ -168,7 +169,7 @@ int map__load(struct map *self, symbol_filter_t filter) pr_warning("no symbols found in %s, maybe install " "a debug package?\n", name); } - +#endif return -1; } /* diff --git a/tools/perf/util/symbol-minimal.c b/tools/perf/util/symbol-minimal.c new file mode 100644 index 00000000000..416ecf3bccf --- /dev/null +++ b/tools/perf/util/symbol-minimal.c @@ -0,0 +1,39 @@ +#include "symbol.h" + + +int filename__read_build_id(const char *filename __used, void *bf __used, + size_t size __used) +{ + return -1; +} + +int sysfs__read_build_id(const char *filename __used, void *build_id __used, + size_t size __used) +{ + return -1; +} + +int filename__read_debuglink(const char *filename __used, + char *debuglink __used, size_t size __used) +{ + return -1; +} + +int dso__synthesize_plt_symbols(struct dso *dso __used, char *name __used, + struct map *map __used, + symbol_filter_t filter __used) +{ + return 0; +} + +int dso__load_sym(struct dso *dso __used, struct map *map __used, + const char *name __used, int fd __used, + symbol_filter_t filter __used, int kmodule __used, + int want_symtab __used) +{ + return 0; +} + +void symbol__elf_init(void) +{ +} From b691f64360ecec49c44d534cf4c1798a438bfccb Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 6 Aug 2012 13:41:22 +0900 Subject: [PATCH 050/282] perf symbols: Implement poor man's ELF parser Implement a minimal elf parser for getting build-id. It assumes that required elf.h header is provided by libc header on the system and the parser only looks for PT_NOTE program header to check build-id. Signed-off-by: Namhyung Kim Cc: Ingo Molnar Cc: Jiri Olsa Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1344228082-15569-5-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/symbol-minimal.c | 242 ++++++++++++++++++++++++++++++- 1 file changed, 235 insertions(+), 7 deletions(-) diff --git a/tools/perf/util/symbol-minimal.c b/tools/perf/util/symbol-minimal.c index 416ecf3bccf..bd8720b6780 100644 --- a/tools/perf/util/symbol-minimal.c +++ b/tools/perf/util/symbol-minimal.c @@ -1,15 +1,71 @@ #include "symbol.h" +#include +#include +#include +#include +#include +#include -int filename__read_build_id(const char *filename __used, void *bf __used, - size_t size __used) + +static bool check_need_swap(int file_endian) { - return -1; + const int data = 1; + u8 *check = (u8 *)&data; + int host_endian; + + if (check[0] == 1) + host_endian = ELFDATA2LSB; + else + host_endian = ELFDATA2MSB; + + return host_endian != file_endian; } -int sysfs__read_build_id(const char *filename __used, void *build_id __used, - size_t size __used) +#define NOTE_ALIGN(sz) (((sz) + 3) & ~3) + +#define NT_GNU_BUILD_ID 3 + +static int read_build_id(void *note_data, size_t note_len, void *bf, + size_t size, bool need_swap) { + struct { + u32 n_namesz; + u32 n_descsz; + u32 n_type; + } *nhdr; + void *ptr; + + ptr = note_data; + while (ptr < (note_data + note_len)) { + const char *name; + size_t namesz, descsz; + + nhdr = ptr; + if (need_swap) { + nhdr->n_namesz = bswap_32(nhdr->n_namesz); + nhdr->n_descsz = bswap_32(nhdr->n_descsz); + nhdr->n_type = bswap_32(nhdr->n_type); + } + + namesz = NOTE_ALIGN(nhdr->n_namesz); + descsz = NOTE_ALIGN(nhdr->n_descsz); + + ptr += sizeof(*nhdr); + name = ptr; + ptr += namesz; + if (nhdr->n_type == NT_GNU_BUILD_ID && + nhdr->n_namesz == sizeof("GNU")) { + if (memcmp(name, "GNU", sizeof("GNU")) == 0) { + size_t sz = min(size, descsz); + memcpy(bf, ptr, sz); + memset(bf + sz, 0, size - sz); + return 0; + } + } + ptr += descsz; + } + return -1; } @@ -19,6 +75,172 @@ int filename__read_debuglink(const char *filename __used, return -1; } +/* + * Just try PT_NOTE header otherwise fails + */ +int filename__read_build_id(const char *filename, void *bf, size_t size) +{ + FILE *fp; + int ret = -1; + bool need_swap = false; + u8 e_ident[EI_NIDENT]; + size_t buf_size; + void *buf; + int i; + + fp = fopen(filename, "r"); + if (fp == NULL) + return -1; + + if (fread(e_ident, sizeof(e_ident), 1, fp) != 1) + goto out; + + if (memcmp(e_ident, ELFMAG, SELFMAG) || + e_ident[EI_VERSION] != EV_CURRENT) + goto out; + + need_swap = check_need_swap(e_ident[EI_DATA]); + + /* for simplicity */ + fseek(fp, 0, SEEK_SET); + + if (e_ident[EI_CLASS] == ELFCLASS32) { + Elf32_Ehdr ehdr; + Elf32_Phdr *phdr; + + if (fread(&ehdr, sizeof(ehdr), 1, fp) != 1) + goto out; + + if (need_swap) { + ehdr.e_phoff = bswap_32(ehdr.e_phoff); + ehdr.e_phentsize = bswap_16(ehdr.e_phentsize); + ehdr.e_phnum = bswap_16(ehdr.e_phnum); + } + + buf_size = ehdr.e_phentsize * ehdr.e_phnum; + buf = malloc(buf_size); + if (buf == NULL) + goto out; + + fseek(fp, ehdr.e_phoff, SEEK_SET); + if (fread(buf, buf_size, 1, fp) != 1) + goto out_free; + + for (i = 0, phdr = buf; i < ehdr.e_phnum; i++, phdr++) { + void *tmp; + + if (need_swap) { + phdr->p_type = bswap_32(phdr->p_type); + phdr->p_offset = bswap_32(phdr->p_offset); + phdr->p_filesz = bswap_32(phdr->p_filesz); + } + + if (phdr->p_type != PT_NOTE) + continue; + + buf_size = phdr->p_filesz; + tmp = realloc(buf, buf_size); + if (tmp == NULL) + goto out_free; + + buf = tmp; + fseek(fp, phdr->p_offset, SEEK_SET); + if (fread(buf, buf_size, 1, fp) != 1) + goto out_free; + + ret = read_build_id(buf, buf_size, bf, size, need_swap); + if (ret == 0) + ret = size; + break; + } + } else { + Elf64_Ehdr ehdr; + Elf64_Phdr *phdr; + + if (fread(&ehdr, sizeof(ehdr), 1, fp) != 1) + goto out; + + if (need_swap) { + ehdr.e_phoff = bswap_64(ehdr.e_phoff); + ehdr.e_phentsize = bswap_16(ehdr.e_phentsize); + ehdr.e_phnum = bswap_16(ehdr.e_phnum); + } + + buf_size = ehdr.e_phentsize * ehdr.e_phnum; + buf = malloc(buf_size); + if (buf == NULL) + goto out; + + fseek(fp, ehdr.e_phoff, SEEK_SET); + if (fread(buf, buf_size, 1, fp) != 1) + goto out_free; + + for (i = 0, phdr = buf; i < ehdr.e_phnum; i++, phdr++) { + void *tmp; + + if (need_swap) { + phdr->p_type = bswap_32(phdr->p_type); + phdr->p_offset = bswap_64(phdr->p_offset); + phdr->p_filesz = bswap_64(phdr->p_filesz); + } + + if (phdr->p_type != PT_NOTE) + continue; + + buf_size = phdr->p_filesz; + tmp = realloc(buf, buf_size); + if (tmp == NULL) + goto out_free; + + buf = tmp; + fseek(fp, phdr->p_offset, SEEK_SET); + if (fread(buf, buf_size, 1, fp) != 1) + goto out_free; + + ret = read_build_id(buf, buf_size, bf, size, need_swap); + if (ret == 0) + ret = size; + break; + } + } +out_free: + free(buf); +out: + fclose(fp); + return ret; +} + +int sysfs__read_build_id(const char *filename, void *build_id, size_t size) +{ + int fd; + int ret = -1; + struct stat stbuf; + size_t buf_size; + void *buf; + + fd = open(filename, O_RDONLY); + if (fd < 0) + return -1; + + if (fstat(fd, &stbuf) < 0) + goto out; + + buf_size = stbuf.st_size; + buf = malloc(buf_size); + if (buf == NULL) + goto out; + + if (read(fd, buf, buf_size) != (ssize_t) buf_size) + goto out_free; + + ret = read_build_id(buf, buf_size, build_id, size, false); +out_free: + free(buf); +out: + close(fd); + return ret; +} + int dso__synthesize_plt_symbols(struct dso *dso __used, char *name __used, struct map *map __used, symbol_filter_t filter __used) @@ -26,11 +248,17 @@ int dso__synthesize_plt_symbols(struct dso *dso __used, char *name __used, return 0; } -int dso__load_sym(struct dso *dso __used, struct map *map __used, - const char *name __used, int fd __used, +int dso__load_sym(struct dso *dso, struct map *map __used, + const char *name, int fd __used, symbol_filter_t filter __used, int kmodule __used, int want_symtab __used) { + unsigned char *build_id[BUILD_ID_SIZE]; + + if (filename__read_build_id(name, build_id, BUILD_ID_SIZE) > 0) { + dso__set_build_id(dso, build_id); + return 1; + } return 0; } From c5e63197db519bae1c33e41ea0342a50f39e7a93 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 7 Aug 2012 15:20:36 +0200 Subject: [PATCH 051/282] perf: Unified API to record selective sets of arch registers This brings a new API to help the selective dump of registers on event sampling, and its implementation for x86 arch. Added HAVE_PERF_REGS config option to determine if the architecture provides perf registers ABI. The information about desired registers will be passed in u64 mask. It's up to the architecture to map the registers into the mask bits. For the x86 arch implementation, both 32 and 64 bit registers bits are defined within single enum to ensure 64 bit system can provide register dump for compat task if needed in the future. Original-patch-by: Frederic Weisbecker [ Added missing linux/errno.h include ] Signed-off-by: Jiri Olsa Cc: "Frank Ch. Eigler" Cc: Arun Sharma Cc: Benjamin Redelings Cc: Corey Ashford Cc: Cyrill Gorcunov Cc: Frank Ch. Eigler Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Masami Hiramatsu Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Robert Richter Cc: Stephane Eranian Cc: Tom Zanussi Cc: Ulrich Drepper Link: http://lkml.kernel.org/r/1344345647-11536-2-git-send-email-jolsa@redhat.com Signed-off-by: Arnaldo Carvalho de Melo --- arch/Kconfig | 6 +++ arch/x86/Kconfig | 1 + arch/x86/include/asm/perf_regs.h | 33 ++++++++++++ arch/x86/kernel/Makefile | 2 + arch/x86/kernel/perf_regs.c | 90 ++++++++++++++++++++++++++++++++ include/linux/perf_regs.h | 19 +++++++ 6 files changed, 151 insertions(+) create mode 100644 arch/x86/include/asm/perf_regs.h create mode 100644 arch/x86/kernel/perf_regs.c create mode 100644 include/linux/perf_regs.h diff --git a/arch/Kconfig b/arch/Kconfig index 72f2fa189cc..68d827b7ae8 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -222,6 +222,12 @@ config HAVE_PERF_EVENTS_NMI subsystem. Also has support for calculating CPU cycle events to determine how many clock cycles in a given period. +config HAVE_PERF_REGS + bool + help + Support selective register dumps for perf events. This includes + bit-mapping of each registers and a unique architecture id. + config HAVE_ARCH_JUMP_LABEL bool diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 8ec3a1aa4ab..3fab6ec9edc 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -60,6 +60,7 @@ config X86 select HAVE_MIXED_BREAKPOINTS_REGS select PERF_EVENTS select HAVE_PERF_EVENTS_NMI + select HAVE_PERF_REGS select ANON_INODES select HAVE_ALIGNED_STRUCT_PAGE if SLUB && !M386 select HAVE_CMPXCHG_LOCAL if !M386 diff --git a/arch/x86/include/asm/perf_regs.h b/arch/x86/include/asm/perf_regs.h new file mode 100644 index 00000000000..3f2207bfd17 --- /dev/null +++ b/arch/x86/include/asm/perf_regs.h @@ -0,0 +1,33 @@ +#ifndef _ASM_X86_PERF_REGS_H +#define _ASM_X86_PERF_REGS_H + +enum perf_event_x86_regs { + PERF_REG_X86_AX, + PERF_REG_X86_BX, + PERF_REG_X86_CX, + PERF_REG_X86_DX, + PERF_REG_X86_SI, + PERF_REG_X86_DI, + PERF_REG_X86_BP, + PERF_REG_X86_SP, + PERF_REG_X86_IP, + PERF_REG_X86_FLAGS, + PERF_REG_X86_CS, + PERF_REG_X86_SS, + PERF_REG_X86_DS, + PERF_REG_X86_ES, + PERF_REG_X86_FS, + PERF_REG_X86_GS, + PERF_REG_X86_R8, + PERF_REG_X86_R9, + PERF_REG_X86_R10, + PERF_REG_X86_R11, + PERF_REG_X86_R12, + PERF_REG_X86_R13, + PERF_REG_X86_R14, + PERF_REG_X86_R15, + + PERF_REG_X86_32_MAX = PERF_REG_X86_GS + 1, + PERF_REG_X86_64_MAX = PERF_REG_X86_R15 + 1, +}; +#endif /* _ASM_X86_PERF_REGS_H */ diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 8215e5652d9..8d7a619718b 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -100,6 +100,8 @@ obj-$(CONFIG_SWIOTLB) += pci-swiotlb.o obj-$(CONFIG_OF) += devicetree.o obj-$(CONFIG_UPROBES) += uprobes.o +obj-$(CONFIG_PERF_EVENTS) += perf_regs.o + ### # 64 bit specific files ifeq ($(CONFIG_X86_64),y) diff --git a/arch/x86/kernel/perf_regs.c b/arch/x86/kernel/perf_regs.c new file mode 100644 index 00000000000..3d6923528b1 --- /dev/null +++ b/arch/x86/kernel/perf_regs.c @@ -0,0 +1,90 @@ +#include +#include +#include +#include +#include +#include + +#ifdef CONFIG_X86_32 +#define PERF_REG_X86_MAX PERF_REG_X86_32_MAX +#else +#define PERF_REG_X86_MAX PERF_REG_X86_64_MAX +#endif + +#define PT_REGS_OFFSET(id, r) [id] = offsetof(struct pt_regs, r) + +static unsigned int pt_regs_offset[PERF_REG_X86_MAX] = { + PT_REGS_OFFSET(PERF_REG_X86_AX, ax), + PT_REGS_OFFSET(PERF_REG_X86_BX, bx), + PT_REGS_OFFSET(PERF_REG_X86_CX, cx), + PT_REGS_OFFSET(PERF_REG_X86_DX, dx), + PT_REGS_OFFSET(PERF_REG_X86_SI, si), + PT_REGS_OFFSET(PERF_REG_X86_DI, di), + PT_REGS_OFFSET(PERF_REG_X86_BP, bp), + PT_REGS_OFFSET(PERF_REG_X86_SP, sp), + PT_REGS_OFFSET(PERF_REG_X86_IP, ip), + PT_REGS_OFFSET(PERF_REG_X86_FLAGS, flags), + PT_REGS_OFFSET(PERF_REG_X86_CS, cs), + PT_REGS_OFFSET(PERF_REG_X86_SS, ss), +#ifdef CONFIG_X86_32 + PT_REGS_OFFSET(PERF_REG_X86_DS, ds), + PT_REGS_OFFSET(PERF_REG_X86_ES, es), + PT_REGS_OFFSET(PERF_REG_X86_FS, fs), + PT_REGS_OFFSET(PERF_REG_X86_GS, gs), +#else + /* + * The pt_regs struct does not store + * ds, es, fs, gs in 64 bit mode. + */ + (unsigned int) -1, + (unsigned int) -1, + (unsigned int) -1, + (unsigned int) -1, +#endif +#ifdef CONFIG_X86_64 + PT_REGS_OFFSET(PERF_REG_X86_R8, r8), + PT_REGS_OFFSET(PERF_REG_X86_R9, r9), + PT_REGS_OFFSET(PERF_REG_X86_R10, r10), + PT_REGS_OFFSET(PERF_REG_X86_R11, r11), + PT_REGS_OFFSET(PERF_REG_X86_R12, r12), + PT_REGS_OFFSET(PERF_REG_X86_R13, r13), + PT_REGS_OFFSET(PERF_REG_X86_R14, r14), + PT_REGS_OFFSET(PERF_REG_X86_R15, r15), +#endif +}; + +u64 perf_reg_value(struct pt_regs *regs, int idx) +{ + if (WARN_ON_ONCE(idx > ARRAY_SIZE(pt_regs_offset))) + return 0; + + return regs_get_register(regs, pt_regs_offset[idx]); +} + +#define REG_RESERVED (~((1ULL << PERF_REG_X86_MAX) - 1ULL)) + +#ifdef CONFIG_X86_32 +int perf_reg_validate(u64 mask) +{ + if (!mask || mask & REG_RESERVED) + return -EINVAL; + + return 0; +} +#else /* CONFIG_X86_64 */ +#define REG_NOSUPPORT ((1ULL << PERF_REG_X86_DS) | \ + (1ULL << PERF_REG_X86_ES) | \ + (1ULL << PERF_REG_X86_FS) | \ + (1ULL << PERF_REG_X86_GS)) + +int perf_reg_validate(u64 mask) +{ + if (!mask || mask & REG_RESERVED) + return -EINVAL; + + if (mask & REG_NOSUPPORT) + return -EINVAL; + + return 0; +} +#endif /* CONFIG_X86_32 */ diff --git a/include/linux/perf_regs.h b/include/linux/perf_regs.h new file mode 100644 index 00000000000..a2f1a98f783 --- /dev/null +++ b/include/linux/perf_regs.h @@ -0,0 +1,19 @@ +#ifndef _LINUX_PERF_REGS_H +#define _LINUX_PERF_REGS_H + +#ifdef CONFIG_HAVE_PERF_REGS +#include +u64 perf_reg_value(struct pt_regs *regs, int idx); +int perf_reg_validate(u64 mask); +#else +static inline u64 perf_reg_value(struct pt_regs *regs, int idx) +{ + return 0; +} + +static inline int perf_reg_validate(u64 mask) +{ + return mask ? -ENOSYS : 0; +} +#endif /* CONFIG_HAVE_PERF_REGS */ +#endif /* _LINUX_PERF_REGS_H */ From 4018994f3d8785275ef0e7391b75c3462c029e56 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 7 Aug 2012 15:20:37 +0200 Subject: [PATCH 052/282] perf: Add ability to attach user level registers dump to sample Introducing PERF_SAMPLE_REGS_USER sample type bit to trigger the dump of user level registers on sample. Registers we want to dump are specified by sample_regs_user bitmask. Only user level registers are dumped at the moment. Meaning the register values of the user space context as it was before the user entered the kernel for whatever reason (syscall, irq, exception, or a PMI happening in userspace). The layout of the sample_regs_user bitmap is described in asm/perf_regs.h for archs that support register dump. This is going to be useful to bring Dwarf CFI based stack unwinding on top of samples. Original-patch-by: Frederic Weisbecker [ Dump registers ABI specification. ] Signed-off-by: Jiri Olsa Suggested-by: Stephane Eranian Cc: "Frank Ch. Eigler" Cc: Arun Sharma Cc: Benjamin Redelings Cc: Corey Ashford Cc: Cyrill Gorcunov Cc: Frank Ch. Eigler Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Masami Hiramatsu Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Robert Richter Cc: Stephane Eranian Cc: Tom Zanussi Cc: Ulrich Drepper Link: http://lkml.kernel.org/r/1344345647-11536-3-git-send-email-jolsa@redhat.com Signed-off-by: Arnaldo Carvalho de Melo --- arch/x86/kernel/perf_regs.c | 15 +++++++++ include/linux/perf_event.h | 35 ++++++++++++++++++-- include/linux/perf_regs.h | 6 ++++ kernel/events/core.c | 66 +++++++++++++++++++++++++++++++++++++ 4 files changed, 119 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/perf_regs.c b/arch/x86/kernel/perf_regs.c index 3d6923528b1..c5a3e5cfe07 100644 --- a/arch/x86/kernel/perf_regs.c +++ b/arch/x86/kernel/perf_regs.c @@ -1,5 +1,7 @@ #include #include +#include +#include #include #include #include @@ -71,6 +73,11 @@ int perf_reg_validate(u64 mask) return 0; } + +u64 perf_reg_abi(struct task_struct *task) +{ + return PERF_SAMPLE_REGS_ABI_32; +} #else /* CONFIG_X86_64 */ #define REG_NOSUPPORT ((1ULL << PERF_REG_X86_DS) | \ (1ULL << PERF_REG_X86_ES) | \ @@ -87,4 +94,12 @@ int perf_reg_validate(u64 mask) return 0; } + +u64 perf_reg_abi(struct task_struct *task) +{ + if (test_tsk_thread_flag(task, TIF_IA32)) + return PERF_SAMPLE_REGS_ABI_32; + else + return PERF_SAMPLE_REGS_ABI_64; +} #endif /* CONFIG_X86_32 */ diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 7602ccb3f40..3d4d84745f0 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -130,8 +130,9 @@ enum perf_event_sample_format { PERF_SAMPLE_STREAM_ID = 1U << 9, PERF_SAMPLE_RAW = 1U << 10, PERF_SAMPLE_BRANCH_STACK = 1U << 11, + PERF_SAMPLE_REGS_USER = 1U << 12, - PERF_SAMPLE_MAX = 1U << 12, /* non-ABI */ + PERF_SAMPLE_MAX = 1U << 13, /* non-ABI */ }; /* @@ -162,6 +163,15 @@ enum perf_branch_sample_type { PERF_SAMPLE_BRANCH_KERNEL|\ PERF_SAMPLE_BRANCH_HV) +/* + * Values to determine ABI of the registers dump. + */ +enum perf_sample_regs_abi { + PERF_SAMPLE_REGS_ABI_NONE = 0, + PERF_SAMPLE_REGS_ABI_32 = 1, + PERF_SAMPLE_REGS_ABI_64 = 2, +}; + /* * The format of the data returned by read() on a perf event fd, * as specified by attr.read_format: @@ -194,6 +204,7 @@ enum perf_event_read_format { #define PERF_ATTR_SIZE_VER0 64 /* sizeof first published struct */ #define PERF_ATTR_SIZE_VER1 72 /* add: config2 */ #define PERF_ATTR_SIZE_VER2 80 /* add: branch_sample_type */ +#define PERF_ATTR_SIZE_VER3 88 /* add: sample_regs_user */ /* * Hardware event_id to monitor via a performance monitoring event: @@ -271,7 +282,13 @@ struct perf_event_attr { __u64 bp_len; __u64 config2; /* extension of config1 */ }; - __u64 branch_sample_type; /* enum branch_sample_type */ + __u64 branch_sample_type; /* enum perf_branch_sample_type */ + + /* + * Defines set of user regs to dump on samples. + * See asm/perf_regs.h for details. + */ + __u64 sample_regs_user; }; /* @@ -548,6 +565,9 @@ enum perf_event_type { * char data[size];}&& PERF_SAMPLE_RAW * * { u64 from, to, flags } lbr[nr];} && PERF_SAMPLE_BRANCH_STACK + * + * { u64 abi; # enum perf_sample_regs_abi + * u64 regs[weight(mask)]; } && PERF_SAMPLE_REGS_USER * }; */ PERF_RECORD_SAMPLE = 9, @@ -609,6 +629,7 @@ struct perf_guest_info_callbacks { #include #include #include +#include #include struct perf_callchain_entry { @@ -654,6 +675,11 @@ struct perf_branch_stack { struct perf_branch_entry entries[0]; }; +struct perf_regs_user { + __u64 abi; + struct pt_regs *regs; +}; + struct task_struct; /* @@ -1133,6 +1159,7 @@ struct perf_sample_data { struct perf_callchain_entry *callchain; struct perf_raw_record *raw; struct perf_branch_stack *br_stack; + struct perf_regs_user regs_user; }; static inline void perf_sample_data_init(struct perf_sample_data *data, @@ -1142,7 +1169,9 @@ static inline void perf_sample_data_init(struct perf_sample_data *data, data->addr = addr; data->raw = NULL; data->br_stack = NULL; - data->period = period; + data->period = period; + data->regs_user.abi = PERF_SAMPLE_REGS_ABI_NONE; + data->regs_user.regs = NULL; } extern void perf_output_sample(struct perf_output_handle *handle, diff --git a/include/linux/perf_regs.h b/include/linux/perf_regs.h index a2f1a98f783..3c73d5fe18b 100644 --- a/include/linux/perf_regs.h +++ b/include/linux/perf_regs.h @@ -5,6 +5,7 @@ #include u64 perf_reg_value(struct pt_regs *regs, int idx); int perf_reg_validate(u64 mask); +u64 perf_reg_abi(struct task_struct *task); #else static inline u64 perf_reg_value(struct pt_regs *regs, int idx) { @@ -15,5 +16,10 @@ static inline int perf_reg_validate(u64 mask) { return mask ? -ENOSYS : 0; } + +static inline u64 perf_reg_abi(struct task_struct *task) +{ + return PERF_SAMPLE_REGS_ABI_NONE; +} #endif /* CONFIG_HAVE_PERF_REGS */ #endif /* _LINUX_PERF_REGS_H */ diff --git a/kernel/events/core.c b/kernel/events/core.c index b7935fcec7d..d3ce97525b9 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -3756,6 +3756,37 @@ int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *cbs) } EXPORT_SYMBOL_GPL(perf_unregister_guest_info_callbacks); +static void +perf_output_sample_regs(struct perf_output_handle *handle, + struct pt_regs *regs, u64 mask) +{ + int bit; + + for_each_set_bit(bit, (const unsigned long *) &mask, + sizeof(mask) * BITS_PER_BYTE) { + u64 val; + + val = perf_reg_value(regs, bit); + perf_output_put(handle, val); + } +} + +static void perf_sample_regs_user(struct perf_regs_user *regs_user, + struct pt_regs *regs) +{ + if (!user_mode(regs)) { + if (current->mm) + regs = task_pt_regs(current); + else + regs = NULL; + } + + if (regs) { + regs_user->regs = regs; + regs_user->abi = perf_reg_abi(current); + } +} + static void __perf_event_header__init_id(struct perf_event_header *header, struct perf_sample_data *data, struct perf_event *event) @@ -4016,6 +4047,23 @@ void perf_output_sample(struct perf_output_handle *handle, perf_output_put(handle, nr); } } + + if (sample_type & PERF_SAMPLE_REGS_USER) { + u64 abi = data->regs_user.abi; + + /* + * If there are no regs to dump, notice it through + * first u64 being zero (PERF_SAMPLE_REGS_ABI_NONE). + */ + perf_output_put(handle, abi); + + if (abi) { + u64 mask = event->attr.sample_regs_user; + perf_output_sample_regs(handle, + data->regs_user.regs, + mask); + } + } } void perf_prepare_sample(struct perf_event_header *header, @@ -4067,6 +4115,20 @@ void perf_prepare_sample(struct perf_event_header *header, } header->size += size; } + + if (sample_type & PERF_SAMPLE_REGS_USER) { + /* regs dump ABI info */ + int size = sizeof(u64); + + perf_sample_regs_user(&data->regs_user, regs); + + if (data->regs_user.regs) { + u64 mask = event->attr.sample_regs_user; + size += hweight64(mask) * sizeof(u64); + } + + header->size += size; + } } static void perf_event_output(struct perf_event *event, @@ -6142,6 +6204,10 @@ static int perf_copy_attr(struct perf_event_attr __user *uattr, attr->branch_sample_type = mask; } } + + if (attr->sample_type & PERF_SAMPLE_REGS_USER) + ret = perf_reg_validate(attr->sample_regs_user); + out: return ret; From 91d7753a45f8525dc75b6be01e427dc1c378dc16 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 7 Aug 2012 15:20:38 +0200 Subject: [PATCH 053/282] perf: Factor __output_copy to be usable with specific copy function Adding a generic way to use __output_copy function with specific copy function via DEFINE_PERF_OUTPUT_COPY macro. Using this to add new __output_copy_user function, that provides output copy from user pointers. For x86 the copy_from_user_nmi function is used and __copy_from_user_inatomic for the rest of the architectures. This new function will be used in user stack dump on sample, coming in next patches. Signed-off-by: Jiri Olsa Cc: "Frank Ch. Eigler" Cc: Arun Sharma Cc: Benjamin Redelings Cc: Corey Ashford Cc: Cyrill Gorcunov Cc: Frank Ch. Eigler Cc: Ingo Molnar Cc: Jiri Olsa Cc: Masami Hiramatsu Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Robert Richter Cc: Stephane Eranian Cc: Tom Zanussi Cc: Ulrich Drepper Link: http://lkml.kernel.org/r/1344345647-11536-4-git-send-email-jolsa@redhat.com Signed-off-by: Frederic Weisbecker Signed-off-by: Arnaldo Carvalho de Melo --- arch/x86/include/asm/perf_event.h | 2 + include/linux/perf_event.h | 2 +- kernel/events/internal.h | 64 ++++++++++++++++++++----------- kernel/events/ring_buffer.c | 4 +- 4 files changed, 47 insertions(+), 25 deletions(-) diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index cb4e43bce98..4fabcdf1cfa 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h @@ -262,4 +262,6 @@ static inline void perf_check_microcode(void) { } static inline void amd_pmu_disable_virt(void) { } #endif +#define arch_perf_out_copy_user copy_from_user_nmi + #endif /* _ASM_X86_PERF_EVENT_H */ diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 3d4d84745f0..d41394a1af3 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -1319,7 +1319,7 @@ static inline bool has_branch_stack(struct perf_event *event) extern int perf_output_begin(struct perf_output_handle *handle, struct perf_event *event, unsigned int size); extern void perf_output_end(struct perf_output_handle *handle); -extern void perf_output_copy(struct perf_output_handle *handle, +extern unsigned int perf_output_copy(struct perf_output_handle *handle, const void *buf, unsigned int len); extern int perf_swevent_get_recursion_context(void); extern void perf_swevent_put_recursion_context(int rctx); diff --git a/kernel/events/internal.h b/kernel/events/internal.h index a096c19f2c2..7fd5408493d 100644 --- a/kernel/events/internal.h +++ b/kernel/events/internal.h @@ -2,6 +2,7 @@ #define _KERNEL_EVENTS_INTERNAL_H #include +#include /* Buffer handling */ @@ -76,30 +77,49 @@ static inline unsigned long perf_data_size(struct ring_buffer *rb) return rb->nr_pages << (PAGE_SHIFT + page_order(rb)); } -static inline void -__output_copy(struct perf_output_handle *handle, - const void *buf, unsigned int len) -{ - do { - unsigned long size = min_t(unsigned long, handle->size, len); - - memcpy(handle->addr, buf, size); - - len -= size; - handle->addr += size; - buf += size; - handle->size -= size; - if (!handle->size) { - struct ring_buffer *rb = handle->rb; - - handle->page++; - handle->page &= rb->nr_pages - 1; - handle->addr = rb->data_pages[handle->page]; - handle->size = PAGE_SIZE << page_order(rb); - } - } while (len); +#define DEFINE_OUTPUT_COPY(func_name, memcpy_func) \ +static inline unsigned int \ +func_name(struct perf_output_handle *handle, \ + const void *buf, unsigned int len) \ +{ \ + unsigned long size, written; \ + \ + do { \ + size = min_t(unsigned long, handle->size, len); \ + \ + written = memcpy_func(handle->addr, buf, size); \ + \ + len -= written; \ + handle->addr += written; \ + buf += written; \ + handle->size -= written; \ + if (!handle->size) { \ + struct ring_buffer *rb = handle->rb; \ + \ + handle->page++; \ + handle->page &= rb->nr_pages - 1; \ + handle->addr = rb->data_pages[handle->page]; \ + handle->size = PAGE_SIZE << page_order(rb); \ + } \ + } while (len && written == size); \ + \ + return len; \ } +static inline int memcpy_common(void *dst, const void *src, size_t n) +{ + memcpy(dst, src, n); + return n; +} + +DEFINE_OUTPUT_COPY(__output_copy, memcpy_common) + +#ifndef arch_perf_out_copy_user +#define arch_perf_out_copy_user __copy_from_user_inatomic +#endif + +DEFINE_OUTPUT_COPY(__output_copy_user, arch_perf_out_copy_user) + /* Callchain handling */ extern struct perf_callchain_entry * perf_callchain(struct perf_event *event, struct pt_regs *regs); diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c index 6ddaba43fb7..b4c2ad3dee7 100644 --- a/kernel/events/ring_buffer.c +++ b/kernel/events/ring_buffer.c @@ -182,10 +182,10 @@ out: return -ENOSPC; } -void perf_output_copy(struct perf_output_handle *handle, +unsigned int perf_output_copy(struct perf_output_handle *handle, const void *buf, unsigned int len) { - __output_copy(handle, buf, len); + return __output_copy(handle, buf, len); } void perf_output_end(struct perf_output_handle *handle) From 5685e0ff45f5df67e79e9b052b6ffd501ff38c11 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 7 Aug 2012 15:20:39 +0200 Subject: [PATCH 054/282] perf: Add perf_output_skip function to skip bytes in sample Introducing perf_output_skip function to be able to skip data within the perf ring buffer. When writing data into perf ring buffer we first reserve needed place in ring buffer and then copy the actual data. There's a possibility we won't be able to fill all the reserved size with data, so we need a way to skip the remaining bytes. This is going to be useful when storing the user stack dump, where we might end up with less data than we originally requested. Signed-off-by: Jiri Olsa Acked-by: Frederic Weisbecker Cc: "Frank Ch. Eigler" Cc: Arun Sharma Cc: Benjamin Redelings Cc: Corey Ashford Cc: Cyrill Gorcunov Cc: Frank Ch. Eigler Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Masami Hiramatsu Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Robert Richter Cc: Stephane Eranian Cc: Tom Zanussi Cc: Ulrich Drepper Link: http://lkml.kernel.org/r/1344345647-11536-5-git-send-email-jolsa@redhat.com Signed-off-by: Arnaldo Carvalho de Melo --- include/linux/perf_event.h | 2 ++ kernel/events/internal.h | 4 ++++ kernel/events/ring_buffer.c | 6 ++++++ 3 files changed, 12 insertions(+) diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index d41394a1af3..8a73f75beb1 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -1321,6 +1321,8 @@ extern int perf_output_begin(struct perf_output_handle *handle, extern void perf_output_end(struct perf_output_handle *handle); extern unsigned int perf_output_copy(struct perf_output_handle *handle, const void *buf, unsigned int len); +extern unsigned int perf_output_skip(struct perf_output_handle *handle, + unsigned int len); extern int perf_swevent_get_recursion_context(void); extern void perf_swevent_put_recursion_context(int rctx); extern void perf_event_enable(struct perf_event *event); diff --git a/kernel/events/internal.h b/kernel/events/internal.h index 7fd5408493d..ce7bdfc1d04 100644 --- a/kernel/events/internal.h +++ b/kernel/events/internal.h @@ -114,6 +114,10 @@ static inline int memcpy_common(void *dst, const void *src, size_t n) DEFINE_OUTPUT_COPY(__output_copy, memcpy_common) +#define MEMCPY_SKIP(dst, src, n) (n) + +DEFINE_OUTPUT_COPY(__output_skip, MEMCPY_SKIP) + #ifndef arch_perf_out_copy_user #define arch_perf_out_copy_user __copy_from_user_inatomic #endif diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c index b4c2ad3dee7..23cb34ff397 100644 --- a/kernel/events/ring_buffer.c +++ b/kernel/events/ring_buffer.c @@ -188,6 +188,12 @@ unsigned int perf_output_copy(struct perf_output_handle *handle, return __output_copy(handle, buf, len); } +unsigned int perf_output_skip(struct perf_output_handle *handle, + unsigned int len) +{ + return __output_skip(handle, NULL, len); +} + void perf_output_end(struct perf_output_handle *handle) { perf_output_put_handle(handle); From c5ebcedb566ef17bda7b02686e0d658a7bb42ee7 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 7 Aug 2012 15:20:40 +0200 Subject: [PATCH 055/282] perf: Add ability to attach user stack dump to sample Introducing PERF_SAMPLE_STACK_USER sample type bit to trigger the dump of the user level stack on sample. The size of the dump is specified by sample_stack_user value. Being able to dump parts of the user stack, starting from the stack pointer, will be useful to make a post mortem dwarf CFI based stack unwinding. Added HAVE_PERF_USER_STACK_DUMP config option to determine if the architecture provides user stack dump on perf event samples. This needs access to the user stack pointer which is not unified across architectures. Enabling this for x86 architecture. Signed-off-by: Jiri Olsa Original-patch-by: Frederic Weisbecker Cc: "Frank Ch. Eigler" Cc: Arun Sharma Cc: Benjamin Redelings Cc: Corey Ashford Cc: Cyrill Gorcunov Cc: Frank Ch. Eigler Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Masami Hiramatsu Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Robert Richter Cc: Stephane Eranian Cc: Tom Zanussi Cc: Ulrich Drepper Link: http://lkml.kernel.org/r/1344345647-11536-6-git-send-email-jolsa@redhat.com Signed-off-by: Arnaldo Carvalho de Melo --- arch/Kconfig | 7 ++ arch/x86/Kconfig | 1 + include/linux/perf_event.h | 18 ++++- kernel/events/core.c | 150 ++++++++++++++++++++++++++++++++++++- kernel/events/internal.h | 16 ++++ 5 files changed, 190 insertions(+), 2 deletions(-) diff --git a/arch/Kconfig b/arch/Kconfig index 68d827b7ae8..2a83a3f6a61 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -228,6 +228,13 @@ config HAVE_PERF_REGS Support selective register dumps for perf events. This includes bit-mapping of each registers and a unique architecture id. +config HAVE_PERF_USER_STACK_DUMP + bool + help + Support user stack dumps for perf event samples. This needs + access to the user stack pointer which is not unified across + architectures. + config HAVE_ARCH_JUMP_LABEL bool diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 3fab6ec9edc..a2d19ee750c 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -61,6 +61,7 @@ config X86 select PERF_EVENTS select HAVE_PERF_EVENTS_NMI select HAVE_PERF_REGS + select HAVE_PERF_USER_STACK_DUMP select ANON_INODES select HAVE_ALIGNED_STRUCT_PAGE if SLUB && !M386 select HAVE_CMPXCHG_LOCAL if !M386 diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 8a73f75beb1..d1d25f6a5e2 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -131,8 +131,9 @@ enum perf_event_sample_format { PERF_SAMPLE_RAW = 1U << 10, PERF_SAMPLE_BRANCH_STACK = 1U << 11, PERF_SAMPLE_REGS_USER = 1U << 12, + PERF_SAMPLE_STACK_USER = 1U << 13, - PERF_SAMPLE_MAX = 1U << 13, /* non-ABI */ + PERF_SAMPLE_MAX = 1U << 14, /* non-ABI */ }; /* @@ -205,6 +206,7 @@ enum perf_event_read_format { #define PERF_ATTR_SIZE_VER1 72 /* add: config2 */ #define PERF_ATTR_SIZE_VER2 80 /* add: branch_sample_type */ #define PERF_ATTR_SIZE_VER3 88 /* add: sample_regs_user */ +#define PERF_ATTR_SIZE_VER4 96 /* add: sample_stack_user */ /* * Hardware event_id to monitor via a performance monitoring event: @@ -289,6 +291,14 @@ struct perf_event_attr { * See asm/perf_regs.h for details. */ __u64 sample_regs_user; + + /* + * Defines size of the user stack to dump on samples. + */ + __u32 sample_stack_user; + + /* Align to u64. */ + __u32 __reserved_2; }; /* @@ -568,6 +578,10 @@ enum perf_event_type { * * { u64 abi; # enum perf_sample_regs_abi * u64 regs[weight(mask)]; } && PERF_SAMPLE_REGS_USER + * + * { u64 size; + * char data[size]; + * u64 dyn_size; } && PERF_SAMPLE_STACK_USER * }; */ PERF_RECORD_SAMPLE = 9, @@ -1160,6 +1174,7 @@ struct perf_sample_data { struct perf_raw_record *raw; struct perf_branch_stack *br_stack; struct perf_regs_user regs_user; + u64 stack_user_size; }; static inline void perf_sample_data_init(struct perf_sample_data *data, @@ -1172,6 +1187,7 @@ static inline void perf_sample_data_init(struct perf_sample_data *data, data->period = period; data->regs_user.abi = PERF_SAMPLE_REGS_ABI_NONE; data->regs_user.regs = NULL; + data->stack_user_size = 0; } extern void perf_output_sample(struct perf_output_handle *handle, diff --git a/kernel/events/core.c b/kernel/events/core.c index d3ce97525b9..2ba890450d1 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -36,6 +36,7 @@ #include #include #include +#include #include "internal.h" @@ -3787,6 +3788,101 @@ static void perf_sample_regs_user(struct perf_regs_user *regs_user, } } +/* + * Get remaining task size from user stack pointer. + * + * It'd be better to take stack vma map and limit this more + * precisly, but there's no way to get it safely under interrupt, + * so using TASK_SIZE as limit. + */ +static u64 perf_ustack_task_size(struct pt_regs *regs) +{ + unsigned long addr = perf_user_stack_pointer(regs); + + if (!addr || addr >= TASK_SIZE) + return 0; + + return TASK_SIZE - addr; +} + +static u16 +perf_sample_ustack_size(u16 stack_size, u16 header_size, + struct pt_regs *regs) +{ + u64 task_size; + + /* No regs, no stack pointer, no dump. */ + if (!regs) + return 0; + + /* + * Check if we fit in with the requested stack size into the: + * - TASK_SIZE + * If we don't, we limit the size to the TASK_SIZE. + * + * - remaining sample size + * If we don't, we customize the stack size to + * fit in to the remaining sample size. + */ + + task_size = min((u64) USHRT_MAX, perf_ustack_task_size(regs)); + stack_size = min(stack_size, (u16) task_size); + + /* Current header size plus static size and dynamic size. */ + header_size += 2 * sizeof(u64); + + /* Do we fit in with the current stack dump size? */ + if ((u16) (header_size + stack_size) < header_size) { + /* + * If we overflow the maximum size for the sample, + * we customize the stack dump size to fit in. + */ + stack_size = USHRT_MAX - header_size - sizeof(u64); + stack_size = round_up(stack_size, sizeof(u64)); + } + + return stack_size; +} + +static void +perf_output_sample_ustack(struct perf_output_handle *handle, u64 dump_size, + struct pt_regs *regs) +{ + /* Case of a kernel thread, nothing to dump */ + if (!regs) { + u64 size = 0; + perf_output_put(handle, size); + } else { + unsigned long sp; + unsigned int rem; + u64 dyn_size; + + /* + * We dump: + * static size + * - the size requested by user or the best one we can fit + * in to the sample max size + * data + * - user stack dump data + * dynamic size + * - the actual dumped size + */ + + /* Static size. */ + perf_output_put(handle, dump_size); + + /* Data. */ + sp = perf_user_stack_pointer(regs); + rem = __output_copy_user(handle, (void *) sp, dump_size); + dyn_size = dump_size - rem; + + perf_output_skip(handle, rem); + + /* Dynamic size. */ + perf_output_put(handle, dyn_size); + } +} + static void __perf_event_header__init_id(struct perf_event_header *header, struct perf_sample_data *data, struct perf_event *event) @@ -4064,6 +4160,11 @@ void perf_output_sample(struct perf_output_handle *handle, mask); } } + + if (sample_type & PERF_SAMPLE_STACK_USER) + perf_output_sample_ustack(handle, + data->stack_user_size, + data->regs_user.regs); } void perf_prepare_sample(struct perf_event_header *header, @@ -4129,6 +4230,35 @@ void perf_prepare_sample(struct perf_event_header *header, header->size += size; } + + if (sample_type & PERF_SAMPLE_STACK_USER) { + /* + * Either we need PERF_SAMPLE_STACK_USER bit to be allways + * processed as the last one or have additional check added + * in case new sample type is added, because we could eat + * up the rest of the sample size. + */ + struct perf_regs_user *uregs = &data->regs_user; + u16 stack_size = event->attr.sample_stack_user; + u16 size = sizeof(u64); + + if (!uregs->abi) + perf_sample_regs_user(uregs, regs); + + stack_size = perf_sample_ustack_size(stack_size, header->size, + uregs->regs); + + /* + * If there is something to dump, add space for the dump + * itself and for the field that tells the dynamic size, + * which is how many have been actually dumped. + */ + if (stack_size) + size += sizeof(u64) + stack_size; + + data->stack_user_size = stack_size; + header->size += size; + } } static void perf_event_output(struct perf_event *event, @@ -6205,8 +6335,26 @@ static int perf_copy_attr(struct perf_event_attr __user *uattr, } } - if (attr->sample_type & PERF_SAMPLE_REGS_USER) + if (attr->sample_type & PERF_SAMPLE_REGS_USER) { ret = perf_reg_validate(attr->sample_regs_user); + if (ret) + return ret; + } + + if (attr->sample_type & PERF_SAMPLE_STACK_USER) { + if (!arch_perf_have_user_stack_dump()) + return -ENOSYS; + + /* + * We have __u32 type for the size, but so far + * we can only use __u16 as maximum due to the + * __u16 sample size limit. + */ + if (attr->sample_stack_user >= USHRT_MAX) + ret = -EINVAL; + else if (!IS_ALIGNED(attr->sample_stack_user, sizeof(u64))) + ret = -EINVAL; + } out: return ret; diff --git a/kernel/events/internal.h b/kernel/events/internal.h index ce7bdfc1d04..d56a64c99a8 100644 --- a/kernel/events/internal.h +++ b/kernel/events/internal.h @@ -158,4 +158,20 @@ static inline void put_recursion_context(int *recursion, int rctx) recursion[rctx]--; } +#ifdef CONFIG_HAVE_PERF_USER_STACK_DUMP +static inline bool arch_perf_have_user_stack_dump(void) +{ + return true; +} + +#define perf_user_stack_pointer(regs) user_stack_pointer(regs) +#else +static inline bool arch_perf_have_user_stack_dump(void) +{ + return false; +} + +#define perf_user_stack_pointer(regs) 0 +#endif /* CONFIG_HAVE_PERF_USER_STACK_DUMP */ + #endif /* _KERNEL_EVENTS_INTERNAL_H */ From d077526485d5c9b12fe85d0b2b3b7041e6bc5f91 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 7 Aug 2012 15:20:41 +0200 Subject: [PATCH 056/282] perf: Add attribute to filter out callchains Introducing following bits to the the perf_event_attr struct: - exclude_callchain_kernel to filter out kernel callchain from the sample dump - exclude_callchain_user to filter out user callchain from the sample dump We need to be able to disable standard user callchain dump when we use the dwarf cfi callchain mode, because frame pointer based user callchains are useless in this mode. Implementing also exclude_callchain_kernel to have complete set of options. Signed-off-by: Jiri Olsa [ Added kernel callchains filtering ] Cc: "Frank Ch. Eigler" Cc: Arun Sharma Cc: Benjamin Redelings Cc: Corey Ashford Cc: Cyrill Gorcunov Cc: Frank Ch. Eigler Cc: Ingo Molnar Cc: Jiri Olsa Cc: Masami Hiramatsu Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Robert Richter Cc: Stephane Eranian Cc: Tom Zanussi Cc: Ulrich Drepper Link: http://lkml.kernel.org/r/1344345647-11536-7-git-send-email-jolsa@redhat.com Signed-off-by: Frederic Weisbecker Signed-off-by: Arnaldo Carvalho de Melo --- include/linux/perf_event.h | 5 ++++- kernel/events/callchain.c | 36 +++++++++++++++++++++++------------- 2 files changed, 27 insertions(+), 14 deletions(-) diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index d1d25f6a5e2..297ca3db6b4 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -268,7 +268,10 @@ struct perf_event_attr { exclude_host : 1, /* don't count in host */ exclude_guest : 1, /* don't count in guest */ - __reserved_1 : 43; + exclude_callchain_kernel : 1, /* exclude kernel callchains */ + exclude_callchain_user : 1, /* exclude user callchains */ + + __reserved_1 : 41; union { __u32 wakeup_events; /* wakeup every n events */ diff --git a/kernel/events/callchain.c b/kernel/events/callchain.c index 98d4597f43d..c77206184b8 100644 --- a/kernel/events/callchain.c +++ b/kernel/events/callchain.c @@ -159,6 +159,11 @@ perf_callchain(struct perf_event *event, struct pt_regs *regs) int rctx; struct perf_callchain_entry *entry; + int kernel = !event->attr.exclude_callchain_kernel; + int user = !event->attr.exclude_callchain_user; + + if (!kernel && !user) + return NULL; entry = get_callchain_entry(&rctx); if (rctx == -1) @@ -169,24 +174,29 @@ perf_callchain(struct perf_event *event, struct pt_regs *regs) entry->nr = 0; - if (!user_mode(regs)) { + if (kernel && !user_mode(regs)) { perf_callchain_store(entry, PERF_CONTEXT_KERNEL); perf_callchain_kernel(entry, regs); - if (current->mm) - regs = task_pt_regs(current); - else - regs = NULL; } - if (regs) { - /* - * Disallow cross-task user callchains. - */ - if (event->ctx->task && event->ctx->task != current) - goto exit_put; + if (user) { + if (!user_mode(regs)) { + if (current->mm) + regs = task_pt_regs(current); + else + regs = NULL; + } - perf_callchain_store(entry, PERF_CONTEXT_USER); - perf_callchain_user(entry, regs); + if (regs) { + /* + * Disallow cross-task user callchains. + */ + if (event->ctx->task && event->ctx->task != current) + goto exit_put; + + perf_callchain_store(entry, PERF_CONTEXT_USER); + perf_callchain_user(entry, regs); + } } exit_put: From 239cc47819ec5e1acde42d416755c332ed5cf2c8 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 7 Aug 2012 15:20:42 +0200 Subject: [PATCH 057/282] perf tools: Adding PERF_ATTR_SIZE_VER2 to the header swap check Updating attr_file_abi_sizes array with PERF_ATTR_SIZE_VER2 version, so we have the swap check complete. Signed-off-by: Jiri Olsa Cc: "Frank Ch. Eigler" Cc: Arun Sharma Cc: Benjamin Redelings Cc: Corey Ashford Cc: Cyrill Gorcunov Cc: Frank Ch. Eigler Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Masami Hiramatsu Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Robert Richter Cc: Stephane Eranian Cc: Tom Zanussi Cc: Ulrich Drepper Link: http://lkml.kernel.org/r/1344345647-11536-8-git-send-email-jolsa@redhat.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/header.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index b2da439bce7..dd5a53c2944 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -1829,6 +1829,7 @@ out_free: static const int attr_file_abi_sizes[] = { [0] = PERF_ATTR_SIZE_VER0, [1] = PERF_ATTR_SIZE_VER1, + [2] = PERF_ATTR_SIZE_VER2, 0, }; From 2bcd355b71dafa5793a680c5db043abe9f708418 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 7 Aug 2012 15:20:43 +0200 Subject: [PATCH 058/282] perf tools: Add interface to arch registers sets Adding header files to access unified API for arch registers. util/perf_regs.h - global perf_reg declarations arch/x86/include/perf_regs.h - x86 arch specific Adding perf_reg_name function to obtain register name based on the reg ID value, and PERF_REGS_MASK macro with mask definition of all current arch registers (will be used in unwind patches). Signed-off-by: Jiri Olsa Cc: "Frank Ch. Eigler" Cc: Arun Sharma Cc: Benjamin Redelings Cc: Corey Ashford Cc: Cyrill Gorcunov Cc: Frank Ch. Eigler Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Masami Hiramatsu Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Robert Richter Cc: Stephane Eranian Cc: Tom Zanussi Cc: Ulrich Drepper Link: http://lkml.kernel.org/r/1344345647-11536-9-git-send-email-jolsa@redhat.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile | 13 +++- tools/perf/arch/x86/include/perf_regs.h | 80 +++++++++++++++++++++++++ tools/perf/util/perf_regs.h | 14 +++++ 3 files changed, 106 insertions(+), 1 deletion(-) create mode 100644 tools/perf/arch/x86/include/perf_regs.h create mode 100644 tools/perf/util/perf_regs.h diff --git a/tools/perf/Makefile b/tools/perf/Makefile index de6aa8c706c..27ae84bb5e4 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -55,13 +55,15 @@ ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/i386/ -e s/sun4u/sparc64/ \ -e s/s390x/s390/ -e s/parisc64/parisc/ \ -e s/ppc.*/powerpc/ -e s/mips.*/mips/ \ -e s/sh[234].*/sh/ ) +NO_PERF_REGS := 1 CC = $(CROSS_COMPILE)gcc AR = $(CROSS_COMPILE)ar # Additional ARCH settings for x86 ifeq ($(ARCH),i386) - ARCH := x86 + ARCH := x86 + NO_PERF_REGS := 0 endif ifeq ($(ARCH),x86_64) ARCH := x86 @@ -74,6 +76,7 @@ ifeq ($(ARCH),x86_64) ARCH_CFLAGS := -DARCH_X86_64 ARCH_INCLUDE = ../../arch/x86/lib/memcpy_64.S ../../arch/x86/lib/memset_64.S endif + NO_PERF_REGS := 0 endif # Treat warnings as errors unless directed not to @@ -326,6 +329,7 @@ LIB_H += $(TRACE_EVENT_DIR)event-parse.h LIB_H += util/target.h LIB_H += util/rblist.h LIB_H += util/intlist.h +LIB_H += util/perf_regs.h LIB_OBJS += $(OUTPUT)util/abspath.o LIB_OBJS += $(OUTPUT)util/alias.o @@ -704,6 +708,13 @@ else endif endif +ifeq ($(NO_PERF_REGS),0) + ifeq ($(ARCH),x86) + LIB_H += arch/x86/include/perf_regs.h + endif +else + BASIC_CFLAGS += -DNO_PERF_REGS +endif ifdef NO_STRLCPY BASIC_CFLAGS += -DNO_STRLCPY diff --git a/tools/perf/arch/x86/include/perf_regs.h b/tools/perf/arch/x86/include/perf_regs.h new file mode 100644 index 00000000000..46fc9f15c6b --- /dev/null +++ b/tools/perf/arch/x86/include/perf_regs.h @@ -0,0 +1,80 @@ +#ifndef ARCH_PERF_REGS_H +#define ARCH_PERF_REGS_H + +#include +#include "../../util/types.h" +#include "../../../../../arch/x86/include/asm/perf_regs.h" + +#ifndef ARCH_X86_64 +#define PERF_REGS_MASK ((1ULL << PERF_REG_X86_32_MAX) - 1) +#else +#define REG_NOSUPPORT ((1ULL << PERF_REG_X86_DS) | \ + (1ULL << PERF_REG_X86_ES) | \ + (1ULL << PERF_REG_X86_FS) | \ + (1ULL << PERF_REG_X86_GS)) +#define PERF_REGS_MASK (((1ULL << PERF_REG_X86_64_MAX) - 1) & ~REG_NOSUPPORT) +#endif +#define PERF_REG_IP PERF_REG_X86_IP +#define PERF_REG_SP PERF_REG_X86_SP + +static inline const char *perf_reg_name(int id) +{ + switch (id) { + case PERF_REG_X86_AX: + return "AX"; + case PERF_REG_X86_BX: + return "BX"; + case PERF_REG_X86_CX: + return "CX"; + case PERF_REG_X86_DX: + return "DX"; + case PERF_REG_X86_SI: + return "SI"; + case PERF_REG_X86_DI: + return "DI"; + case PERF_REG_X86_BP: + return "BP"; + case PERF_REG_X86_SP: + return "SP"; + case PERF_REG_X86_IP: + return "IP"; + case PERF_REG_X86_FLAGS: + return "FLAGS"; + case PERF_REG_X86_CS: + return "CS"; + case PERF_REG_X86_SS: + return "SS"; + case PERF_REG_X86_DS: + return "DS"; + case PERF_REG_X86_ES: + return "ES"; + case PERF_REG_X86_FS: + return "FS"; + case PERF_REG_X86_GS: + return "GS"; +#ifdef ARCH_X86_64 + case PERF_REG_X86_R8: + return "R8"; + case PERF_REG_X86_R9: + return "R9"; + case PERF_REG_X86_R10: + return "R10"; + case PERF_REG_X86_R11: + return "R11"; + case PERF_REG_X86_R12: + return "R12"; + case PERF_REG_X86_R13: + return "R13"; + case PERF_REG_X86_R14: + return "R14"; + case PERF_REG_X86_R15: + return "R15"; +#endif /* ARCH_X86_64 */ + default: + return NULL; + } + + return NULL; +} + +#endif /* ARCH_PERF_REGS_H */ diff --git a/tools/perf/util/perf_regs.h b/tools/perf/util/perf_regs.h new file mode 100644 index 00000000000..9bd6c4e069c --- /dev/null +++ b/tools/perf/util/perf_regs.h @@ -0,0 +1,14 @@ +#ifndef __PERF_REGS_H +#define __PERF_REGS_H + +#ifndef NO_PERF_REGS +#include +#else +#define PERF_REGS_MASK 0 + +static inline const char *perf_reg_name(int id __used) +{ + return NULL; +} +#endif /* NO_PERF_REGS */ +#endif /* __PERF_REGS_H */ From 6a40cd90f5deb6dec322eeb54587ae55a934db2c Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 7 Aug 2012 15:20:44 +0200 Subject: [PATCH 059/282] perf tools: Add libunwind dependency for DWARF CFI unwinding Adding libunwind to be linked with perf if available. It's required for the to get dwarf cfi unwinding support. Also building perf with the dwarf call frame informations by default, so that we can unwind callchains in perf itself. Adding LIBUNWIND_DIR Makefile variable allowing user to specify the directory with libunwind to be linked. This is used for debug purposes. Signed-off-by: Jiri Olsa Original-patch-by: Frederic Weisbecker Cc: "Frank Ch. Eigler" Cc: Arun Sharma Cc: Benjamin Redelings Cc: Corey Ashford Cc: Cyrill Gorcunov Cc: Frank Ch. Eigler Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Masami Hiramatsu Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Robert Richter Cc: Stephane Eranian Cc: Tom Zanussi Cc: Ulrich Drepper Link: http://lkml.kernel.org/r/1344345647-11536-10-git-send-email-jolsa@redhat.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile | 29 ++++++++++++++++++++++++++++- tools/perf/config/feature-tests.mak | 25 +++++++++++++++++++++++++ 2 files changed, 53 insertions(+), 1 deletion(-) diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 27ae84bb5e4..0aee6a91649 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -43,6 +43,8 @@ include config/utilities.mak # # Define NO_LIBELF if you do not want libelf dependency (e.g. cross-builds) # +# Define NO_LIBUNWIND if you do not want libunwind dependency for dwarf +# backtrace post unwind. $(OUTPUT)PERF-VERSION-FILE: .FORCE-PERF-VERSION-FILE @$(SHELL_PATH) util/PERF-VERSION-GEN $(OUTPUT) @@ -64,6 +66,7 @@ AR = $(CROSS_COMPILE)ar ifeq ($(ARCH),i386) ARCH := x86 NO_PERF_REGS := 0 + LIBUNWIND_LIBS = -lunwind -lunwind-x86 endif ifeq ($(ARCH),x86_64) ARCH := x86 @@ -77,6 +80,7 @@ ifeq ($(ARCH),x86_64) ARCH_INCLUDE = ../../arch/x86/lib/memcpy_64.S ../../arch/x86/lib/memset_64.S endif NO_PERF_REGS := 0 + LIBUNWIND_LIBS = -lunwind -lunwind-x86_64 endif # Treat warnings as errors unless directed not to @@ -97,7 +101,7 @@ ifdef PARSER_DEBUG PARSER_DEBUG_CFLAGS := -DPARSER_DEBUG endif -CFLAGS = -fno-omit-frame-pointer -ggdb3 -Wall -Wextra -std=gnu99 $(CFLAGS_WERROR) $(CFLAGS_OPTIMIZE) $(EXTRA_WARNINGS) $(EXTRA_CFLAGS) $(PARSER_DEBUG_CFLAGS) +CFLAGS = -fno-omit-frame-pointer -ggdb3 -funwind-tables -Wall -Wextra -std=gnu99 $(CFLAGS_WERROR) $(CFLAGS_OPTIMIZE) $(EXTRA_WARNINGS) $(EXTRA_CFLAGS) $(PARSER_DEBUG_CFLAGS) EXTLIBS = -lpthread -lrt -lelf -lm ALL_CFLAGS = $(CFLAGS) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE ALL_LDFLAGS = $(LDFLAGS) @@ -476,6 +480,21 @@ ifneq ($(call try-cc,$(SOURCE_LIBELF),$(FLAGS_LIBELF)),y) endif endif # NO_LIBELF +ifndef NO_LIBUNWIND +# for linking with debug library, run like: +# make DEBUG=1 LIBUNWIND_DIR=/opt/libunwind/ +ifdef LIBUNWIND_DIR + LIBUNWIND_CFLAGS := -I$(LIBUNWIND_DIR)/include + LIBUNWIND_LDFLAGS := -L$(LIBUNWIND_DIR)/lib +endif + +FLAGS_UNWIND=$(LIBUNWIND_CFLAGS) $(ALL_CFLAGS) $(LIBUNWIND_LDFLAGS) $(ALL_LDFLAGS) $(EXTLIBS) $(LIBUNWIND_LIBS) +ifneq ($(call try-cc,$(SOURCE_LIBUNWIND),$(FLAGS_UNWIND)),y) + msg := $(warning No libunwind found. Please install libunwind >= 0.99); + NO_LIBUNWIND := 1 +endif # Libunwind support +endif # NO_LIBUNWIND + -include arch/$(ARCH)/Makefile ifneq ($(OUTPUT),) @@ -522,6 +541,14 @@ endif # PERF_HAVE_DWARF_REGS endif # NO_DWARF endif # NO_LIBELF +ifdef NO_LIBUNWIND + BASIC_CFLAGS += -DNO_LIBUNWIND_SUPPORT +else + EXTLIBS += $(LIBUNWIND_LIBS) + BASIC_CFLAGS := $(LIBUNWIND_CFLAGS) $(BASIC_CFLAGS) + BASIC_LDFLAGS := $(LIBUNWIND_LDFLAGS) $(BASIC_LDFLAGS) +endif + ifdef NO_NEWT BASIC_CFLAGS += -DNO_NEWT_SUPPORT else diff --git a/tools/perf/config/feature-tests.mak b/tools/perf/config/feature-tests.mak index 6c18785a641..2f1156a62ab 100644 --- a/tools/perf/config/feature-tests.mak +++ b/tools/perf/config/feature-tests.mak @@ -154,3 +154,28 @@ int main(void) return 0; } endef + +ifndef NO_LIBUNWIND +define SOURCE_LIBUNWIND +#include +#include + +extern int UNW_OBJ(dwarf_search_unwind_table) (unw_addr_space_t as, + unw_word_t ip, + unw_dyn_info_t *di, + unw_proc_info_t *pi, + int need_unwind_info, void *arg); + + +#define dwarf_search_unwind_table UNW_OBJ(dwarf_search_unwind_table) + +int main(void) +{ + unw_addr_space_t addr_space; + addr_space = unw_create_addr_space(NULL, 0); + unw_init_remote(NULL, addr_space, NULL); + dwarf_search_unwind_table(addr_space, 0, NULL, NULL, 0, NULL); + return 0; +} +endef +endif From 0f6a30150ca2e0cf4f893e7173d61434a3c02e0e Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 7 Aug 2012 15:20:45 +0200 Subject: [PATCH 060/282] perf tools: Support user regs and stack in sample parsing Adding following info to be parsed out of the event sample: - user register set - user stack dump Both are global and specific to all events within the session. This info will be used in the unwind patches coming in shortly. Adding simple output printout (report -D) for both register and stack dumps. Signed-off-by: Jiri Olsa Original-patch-by: Frederic Weisbecker Cc: "Frank Ch. Eigler" Cc: Arun Sharma Cc: Benjamin Redelings Cc: Corey Ashford Cc: Cyrill Gorcunov Cc: Frank Ch. Eigler Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Masami Hiramatsu Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Robert Richter Cc: Stephane Eranian Cc: Tom Zanussi Cc: Ulrich Drepper Link: http://lkml.kernel.org/r/1344345647-11536-11-git-send-email-jolsa@redhat.com [ Use evsel->attr.sample_regs_user ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/event.h | 12 ++++++++++++ tools/perf/util/evsel.c | 28 ++++++++++++++++++++++++++ tools/perf/util/header.c | 2 ++ tools/perf/util/session.c | 41 ++++++++++++++++++++++++++++++++++++--- 4 files changed, 80 insertions(+), 3 deletions(-) diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index d84870b0642..0e088d046e5 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -69,6 +69,16 @@ struct sample_event { u64 array[]; }; +struct regs_dump { + u64 *regs; +}; + +struct stack_dump { + u16 offset; + u64 size; + char *data; +}; + struct perf_sample { u64 ip; u32 pid, tid; @@ -82,6 +92,8 @@ struct perf_sample { void *raw_data; struct ip_callchain *callchain; struct branch_stack *branch_stack; + struct regs_dump user_regs; + struct stack_dump user_stack; }; #define BUILD_ID_SIZE 20 diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 2eaae140def..a2da682db81 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -8,6 +8,7 @@ */ #include +#include #include "asm/bug.h" #include "evsel.h" #include "evlist.h" @@ -733,6 +734,7 @@ int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event, struct perf_sample *data, bool swapped) { u64 type = evsel->attr.sample_type; + u64 regs_user = evsel->attr.sample_regs_user; const u64 *array; /* @@ -869,6 +871,32 @@ int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event, sz /= sizeof(u64); array += sz; } + + if (type & PERF_SAMPLE_REGS_USER) { + /* First u64 tells us if we have any regs in sample. */ + u64 avail = *array++; + + if (avail) { + data->user_regs.regs = (u64 *)array; + array += hweight_long(regs_user); + } + } + + if (type & PERF_SAMPLE_STACK_USER) { + u64 size = *array++; + + data->user_stack.offset = ((char *)(array - 1) + - (char *) event); + + if (!size) { + data->user_stack.size = 0; + } else { + data->user_stack.data = (char *)array; + array += size / sizeof(*array); + data->user_stack.size = *array; + } + } + return 0; } diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index dd5a53c2944..7e7d34fc557 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -1830,6 +1830,8 @@ static const int attr_file_abi_sizes[] = { [0] = PERF_ATTR_SIZE_VER0, [1] = PERF_ATTR_SIZE_VER1, [2] = PERF_ATTR_SIZE_VER2, + [3] = PERF_ATTR_SIZE_VER3, + [4] = PERF_ATTR_SIZE_VER4, 0, }; diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 2437fb0b463..c9ed7e3cf23 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -15,6 +15,7 @@ #include "util.h" #include "cpumap.h" #include "event-parse.h" +#include "perf_regs.h" static int perf_session__open(struct perf_session *self, bool force) { @@ -860,6 +861,34 @@ static void branch_stack__printf(struct perf_sample *sample) sample->branch_stack->entries[i].to); } +static void regs_dump__printf(u64 mask, u64 *regs) +{ + unsigned rid, i = 0; + + for_each_set_bit(rid, (unsigned long *) &mask, sizeof(mask) * 8) { + u64 val = regs[i++]; + + printf(".... %-5s 0x%" PRIx64 "\n", + perf_reg_name(rid), val); + } +} + +static void regs_user__printf(struct perf_sample *sample, u64 mask) +{ + struct regs_dump *user_regs = &sample->user_regs; + + if (user_regs->regs) { + printf("... user regs: mask 0x%" PRIx64 "\n", mask); + regs_dump__printf(mask, user_regs->regs); + } +} + +static void stack_user__printf(struct stack_dump *dump) +{ + printf("... ustack: size %" PRIu64 ", offset 0x%x\n", + dump->size, dump->offset); +} + static void perf_session__print_tstamp(struct perf_session *session, union perf_event *event, struct perf_sample *sample) @@ -897,7 +926,7 @@ static void dump_event(struct perf_session *session, union perf_event *event, event->header.size, perf_event__name(event->header.type)); } -static void dump_sample(struct perf_session *session, union perf_event *event, +static void dump_sample(struct perf_evsel *evsel, union perf_event *event, struct perf_sample *sample) { u64 sample_type; @@ -909,13 +938,19 @@ static void dump_sample(struct perf_session *session, union perf_event *event, event->header.misc, sample->pid, sample->tid, sample->ip, sample->period, sample->addr); - sample_type = perf_evlist__sample_type(session->evlist); + sample_type = evsel->attr.sample_type; if (sample_type & PERF_SAMPLE_CALLCHAIN) callchain__printf(sample); if (sample_type & PERF_SAMPLE_BRANCH_STACK) branch_stack__printf(sample); + + if (sample_type & PERF_SAMPLE_REGS_USER) + regs_user__printf(sample, evsel->attr.sample_regs_user); + + if (sample_type & PERF_SAMPLE_STACK_USER) + stack_user__printf(&sample->user_stack); } static struct machine * @@ -973,7 +1008,7 @@ static int perf_session_deliver_event(struct perf_session *session, switch (event->header.type) { case PERF_RECORD_SAMPLE: - dump_sample(session, event, sample); + dump_sample(evsel, event, sample); if (evsel == NULL) { ++session->hists.stats.nr_unknown_id; return 0; From 71ad0f5e4e361c8bca864c7d09d14b64af6bc2fc Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 7 Aug 2012 15:20:46 +0200 Subject: [PATCH 061/282] perf tools: Support for DWARF CFI unwinding on post processing This brings the support for DWARF cfi unwinding on perf post processing. Call frame informations are retrieved and then passed to libunwind that requests memory and register content from the applications. Adding unwind object to handle the user stack backtrace based on the user register values and user stack dump. The unwind object access the libunwind via remote interface and provides to it all the necessary data to unwind the stack. The unwind interface provides following function: unwind__get_entries And callback (specified in above function) to retrieve the backtrace entries: typedef int (*unwind_entry_cb_t)(struct unwind_entry *entry, void *arg); Signed-off-by: Jiri Olsa Original-patch-by: Frederic Weisbecker Cc: "Frank Ch. Eigler" Cc: Arun Sharma Cc: Benjamin Redelings Cc: Corey Ashford Cc: Cyrill Gorcunov Cc: Frank Ch. Eigler Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Masami Hiramatsu Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Robert Richter Cc: Stephane Eranian Cc: Tom Zanussi Cc: Ulrich Drepper Link: http://lkml.kernel.org/r/1344345647-11536-12-git-send-email-jolsa@redhat.com [ Replaced use of perf_session by usage of perf_evsel ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile | 2 + tools/perf/arch/x86/Makefile | 3 + tools/perf/arch/x86/util/unwind.c | 111 +++++ tools/perf/builtin-report.c | 18 +- tools/perf/builtin-script.c | 4 +- tools/perf/builtin-top.c | 6 +- tools/perf/util/include/linux/compiler.h | 1 + tools/perf/util/map.h | 5 +- tools/perf/util/session.c | 66 ++- tools/perf/util/session.h | 6 +- tools/perf/util/trace-event.h | 2 + tools/perf/util/unwind.c | 567 +++++++++++++++++++++++ tools/perf/util/unwind.h | 34 ++ 13 files changed, 795 insertions(+), 30 deletions(-) create mode 100644 tools/perf/arch/x86/util/unwind.c create mode 100644 tools/perf/util/unwind.c create mode 100644 tools/perf/util/unwind.h diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 0aee6a91649..e457afa04b5 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -334,6 +334,7 @@ LIB_H += util/target.h LIB_H += util/rblist.h LIB_H += util/intlist.h LIB_H += util/perf_regs.h +LIB_H += util/unwind.h LIB_OBJS += $(OUTPUT)util/abspath.o LIB_OBJS += $(OUTPUT)util/alias.o @@ -547,6 +548,7 @@ else EXTLIBS += $(LIBUNWIND_LIBS) BASIC_CFLAGS := $(LIBUNWIND_CFLAGS) $(BASIC_CFLAGS) BASIC_LDFLAGS := $(LIBUNWIND_LDFLAGS) $(BASIC_LDFLAGS) + LIB_OBJS += $(OUTPUT)util/unwind.o endif ifdef NO_NEWT diff --git a/tools/perf/arch/x86/Makefile b/tools/perf/arch/x86/Makefile index 744e629797b..815841c04eb 100644 --- a/tools/perf/arch/x86/Makefile +++ b/tools/perf/arch/x86/Makefile @@ -2,4 +2,7 @@ ifndef NO_DWARF PERF_HAVE_DWARF_REGS := 1 LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/dwarf-regs.o endif +ifndef NO_LIBUNWIND +LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/unwind.o +endif LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/header.o diff --git a/tools/perf/arch/x86/util/unwind.c b/tools/perf/arch/x86/util/unwind.c new file mode 100644 index 00000000000..78d956eff96 --- /dev/null +++ b/tools/perf/arch/x86/util/unwind.c @@ -0,0 +1,111 @@ + +#include +#include +#include "perf_regs.h" +#include "../../util/unwind.h" + +#ifdef ARCH_X86_64 +int unwind__arch_reg_id(int regnum) +{ + int id; + + switch (regnum) { + case UNW_X86_64_RAX: + id = PERF_REG_X86_AX; + break; + case UNW_X86_64_RDX: + id = PERF_REG_X86_DX; + break; + case UNW_X86_64_RCX: + id = PERF_REG_X86_CX; + break; + case UNW_X86_64_RBX: + id = PERF_REG_X86_BX; + break; + case UNW_X86_64_RSI: + id = PERF_REG_X86_SI; + break; + case UNW_X86_64_RDI: + id = PERF_REG_X86_DI; + break; + case UNW_X86_64_RBP: + id = PERF_REG_X86_BP; + break; + case UNW_X86_64_RSP: + id = PERF_REG_X86_SP; + break; + case UNW_X86_64_R8: + id = PERF_REG_X86_R8; + break; + case UNW_X86_64_R9: + id = PERF_REG_X86_R9; + break; + case UNW_X86_64_R10: + id = PERF_REG_X86_R10; + break; + case UNW_X86_64_R11: + id = PERF_REG_X86_R11; + break; + case UNW_X86_64_R12: + id = PERF_REG_X86_R12; + break; + case UNW_X86_64_R13: + id = PERF_REG_X86_R13; + break; + case UNW_X86_64_R14: + id = PERF_REG_X86_R14; + break; + case UNW_X86_64_R15: + id = PERF_REG_X86_R15; + break; + case UNW_X86_64_RIP: + id = PERF_REG_X86_IP; + break; + default: + pr_err("unwind: invalid reg id %d\n", regnum); + return -EINVAL; + } + + return id; +} +#else +int unwind__arch_reg_id(int regnum) +{ + int id; + + switch (regnum) { + case UNW_X86_EAX: + id = PERF_REG_X86_AX; + break; + case UNW_X86_EDX: + id = PERF_REG_X86_DX; + break; + case UNW_X86_ECX: + id = PERF_REG_X86_CX; + break; + case UNW_X86_EBX: + id = PERF_REG_X86_BX; + break; + case UNW_X86_ESI: + id = PERF_REG_X86_SI; + break; + case UNW_X86_EDI: + id = PERF_REG_X86_DI; + break; + case UNW_X86_EBP: + id = PERF_REG_X86_BP; + break; + case UNW_X86_ESP: + id = PERF_REG_X86_SP; + break; + case UNW_X86_EIP: + id = PERF_REG_X86_IP; + break; + default: + pr_err("unwind: invalid reg id %d\n", regnum); + return -EINVAL; + } + + return id; +} +#endif /* ARCH_X86_64 */ diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 7c88a243b5d..d61825371ad 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -69,8 +69,8 @@ static int perf_report__add_branch_hist_entry(struct perf_tool *tool, if ((sort__has_parent || symbol_conf.use_callchain) && sample->callchain) { - err = machine__resolve_callchain(machine, al->thread, - sample->callchain, &parent); + err = machine__resolve_callchain(machine, evsel, al->thread, + sample, &parent); if (err) return err; } @@ -140,8 +140,8 @@ static int perf_evsel__add_hist_entry(struct perf_evsel *evsel, struct hist_entry *he; if ((sort__has_parent || symbol_conf.use_callchain) && sample->callchain) { - err = machine__resolve_callchain(machine, al->thread, - sample->callchain, &parent); + err = machine__resolve_callchain(machine, evsel, al->thread, + sample, &parent); if (err) return err; } @@ -397,17 +397,17 @@ static int __cmd_report(struct perf_report *rep) desc); } - if (dump_trace) { - perf_session__fprintf_nr_events(session, stdout); - goto out_delete; - } - if (verbose > 3) perf_session__fprintf(session, stdout); if (verbose > 2) perf_session__fprintf_dsos(session, stdout); + if (dump_trace) { + perf_session__fprintf_nr_events(session, stdout); + goto out_delete; + } + nr_samples = 0; list_for_each_entry(pos, &session->evlist->entries, node) { struct hists *hists = &pos->hists; diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 30a9cb8c992..2d6e3b226aa 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -380,7 +380,7 @@ static void print_sample_bts(union perf_event *event, printf(" "); else printf("\n"); - perf_event__print_ip(event, sample, machine, + perf_evsel__print_ip(evsel, event, sample, machine, PRINT_FIELD(SYM), PRINT_FIELD(DSO), PRINT_FIELD(SYMOFFSET)); } @@ -422,7 +422,7 @@ static void process_event(union perf_event *event, struct perf_sample *sample, printf(" "); else printf("\n"); - perf_event__print_ip(event, sample, machine, + perf_evsel__print_ip(evsel, event, sample, machine, PRINT_FIELD(SYM), PRINT_FIELD(DSO), PRINT_FIELD(SYMOFFSET)); } diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 68cd61ef6ac..e45a1ba6172 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -783,8 +783,10 @@ static void perf_event__process_sample(struct perf_tool *tool, if ((sort__has_parent || symbol_conf.use_callchain) && sample->callchain) { - err = machine__resolve_callchain(machine, al.thread, - sample->callchain, &parent); + err = machine__resolve_callchain(machine, evsel, + al.thread, sample, + &parent); + if (err) return; } diff --git a/tools/perf/util/include/linux/compiler.h b/tools/perf/util/include/linux/compiler.h index 547628e97f3..2dc867128e4 100644 --- a/tools/perf/util/include/linux/compiler.h +++ b/tools/perf/util/include/linux/compiler.h @@ -10,5 +10,6 @@ #endif #define __used __attribute__((__unused__)) +#define __packed __attribute__((__packed__)) #endif diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h index 1e183d1ae58..c98ab190060 100644 --- a/tools/perf/util/map.h +++ b/tools/perf/util/map.h @@ -158,9 +158,12 @@ int machine__init(struct machine *self, const char *root_dir, pid_t pid); void machine__exit(struct machine *self); void machine__delete(struct machine *self); +struct perf_evsel; +struct perf_sample; int machine__resolve_callchain(struct machine *machine, + struct perf_evsel *evsel, struct thread *thread, - struct ip_callchain *chain, + struct perf_sample *sample, struct symbol **parent); int maps__set_kallsyms_ref_reloc_sym(struct map **maps, const char *symbol_name, u64 addr); diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index c9ed7e3cf23..f7bb7ae328d 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -16,6 +16,7 @@ #include "cpumap.h" #include "event-parse.h" #include "perf_regs.h" +#include "unwind.h" static int perf_session__open(struct perf_session *self, bool force) { @@ -289,10 +290,11 @@ struct branch_info *machine__resolve_bstack(struct machine *self, return bi; } -int machine__resolve_callchain(struct machine *self, - struct thread *thread, - struct ip_callchain *chain, - struct symbol **parent) +static int machine__resolve_callchain_sample(struct machine *machine, + struct thread *thread, + struct ip_callchain *chain, + struct symbol **parent) + { u8 cpumode = PERF_RECORD_MISC_USER; unsigned int i; @@ -317,11 +319,14 @@ int machine__resolve_callchain(struct machine *self, if (ip >= PERF_CONTEXT_MAX) { switch (ip) { case PERF_CONTEXT_HV: - cpumode = PERF_RECORD_MISC_HYPERVISOR; break; + cpumode = PERF_RECORD_MISC_HYPERVISOR; + break; case PERF_CONTEXT_KERNEL: - cpumode = PERF_RECORD_MISC_KERNEL; break; + cpumode = PERF_RECORD_MISC_KERNEL; + break; case PERF_CONTEXT_USER: - cpumode = PERF_RECORD_MISC_USER; break; + cpumode = PERF_RECORD_MISC_USER; + break; default: pr_debug("invalid callchain context: " "%"PRId64"\n", (s64) ip); @@ -336,7 +341,7 @@ int machine__resolve_callchain(struct machine *self, } al.filtered = false; - thread__find_addr_location(thread, self, cpumode, + thread__find_addr_location(thread, machine, cpumode, MAP__FUNCTION, ip, &al, NULL); if (al.sym != NULL) { if (sort__has_parent && !*parent && @@ -355,6 +360,40 @@ int machine__resolve_callchain(struct machine *self, return 0; } +static int unwind_entry(struct unwind_entry *entry, void *arg) +{ + struct callchain_cursor *cursor = arg; + return callchain_cursor_append(cursor, entry->ip, + entry->map, entry->sym); +} + +int machine__resolve_callchain(struct machine *machine, + struct perf_evsel *evsel, + struct thread *thread, + struct perf_sample *sample, + struct symbol **parent) + +{ + int ret; + + callchain_cursor_reset(&callchain_cursor); + + ret = machine__resolve_callchain_sample(machine, thread, + sample->callchain, parent); + if (ret) + return ret; + + /* Can we do dwarf post unwind? */ + if (!((evsel->attr.sample_type & PERF_SAMPLE_REGS_USER) && + (evsel->attr.sample_type & PERF_SAMPLE_STACK_USER))) + return 0; + + return unwind__get_entries(unwind_entry, &callchain_cursor, machine, + thread, evsel->attr.sample_regs_user, + sample); + +} + static int process_event_synth_tracing_data_stub(union perf_event *event __used, struct perf_session *session __used) { @@ -1533,9 +1572,9 @@ struct perf_evsel *perf_session__find_first_evtype(struct perf_session *session, return NULL; } -void perf_event__print_ip(union perf_event *event, struct perf_sample *sample, - struct machine *machine, int print_sym, - int print_dso, int print_symoffset) +void perf_evsel__print_ip(struct perf_evsel *evsel, union perf_event *event, + struct perf_sample *sample, struct machine *machine, + int print_sym, int print_dso, int print_symoffset) { struct addr_location al; struct callchain_cursor_node *node; @@ -1549,8 +1588,9 @@ void perf_event__print_ip(union perf_event *event, struct perf_sample *sample, if (symbol_conf.use_callchain && sample->callchain) { - if (machine__resolve_callchain(machine, al.thread, - sample->callchain, NULL) != 0) { + + if (machine__resolve_callchain(machine, evsel, al.thread, + sample, NULL) != 0) { if (verbose) error("Failed to resolve callchain. Skipping\n"); return; diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h index 1f7ec87db7d..176a60902f5 100644 --- a/tools/perf/util/session.h +++ b/tools/perf/util/session.h @@ -129,9 +129,9 @@ size_t perf_session__fprintf_nr_events(struct perf_session *session, FILE *fp); struct perf_evsel *perf_session__find_first_evtype(struct perf_session *session, unsigned int type); -void perf_event__print_ip(union perf_event *event, struct perf_sample *sample, - struct machine *machine, int print_sym, - int print_dso, int print_symoffset); +void perf_evsel__print_ip(struct perf_evsel *evsel, union perf_event *event, + struct perf_sample *sample, struct machine *machine, + int print_sym, int print_dso, int print_symoffset); int perf_session__cpu_bitmap(struct perf_session *session, const char *cpu_list, unsigned long *cpu_bitmap); diff --git a/tools/perf/util/trace-event.h b/tools/perf/util/trace-event.h index 7575dfd26e5..a55fd37ffea 100644 --- a/tools/perf/util/trace-event.h +++ b/tools/perf/util/trace-event.h @@ -77,6 +77,8 @@ void tracing_data_put(struct tracing_data *tdata); struct addr_location; +struct perf_session; + struct scripting_ops { const char *name; int (*start_script) (const char *script, int argc, const char **argv); diff --git a/tools/perf/util/unwind.c b/tools/perf/util/unwind.c new file mode 100644 index 00000000000..00a42aa8d5c --- /dev/null +++ b/tools/perf/util/unwind.c @@ -0,0 +1,567 @@ +/* + * Post mortem Dwarf CFI based unwinding on top of regs and stack dumps. + * + * Lots of this code have been borrowed or heavily inspired from parts of + * the libunwind 0.99 code which are (amongst other contributors I may have + * forgotten): + * + * Copyright (C) 2002-2007 Hewlett-Packard Co + * Contributed by David Mosberger-Tang + * + * And the bugs have been added by: + * + * Copyright (C) 2010, Frederic Weisbecker + * Copyright (C) 2012, Jiri Olsa + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "thread.h" +#include "session.h" +#include "perf_regs.h" +#include "unwind.h" +#include "util.h" + +extern int +UNW_OBJ(dwarf_search_unwind_table) (unw_addr_space_t as, + unw_word_t ip, + unw_dyn_info_t *di, + unw_proc_info_t *pi, + int need_unwind_info, void *arg); + +#define dwarf_search_unwind_table UNW_OBJ(dwarf_search_unwind_table) + +#define DW_EH_PE_FORMAT_MASK 0x0f /* format of the encoded value */ +#define DW_EH_PE_APPL_MASK 0x70 /* how the value is to be applied */ + +/* Pointer-encoding formats: */ +#define DW_EH_PE_omit 0xff +#define DW_EH_PE_ptr 0x00 /* pointer-sized unsigned value */ +#define DW_EH_PE_udata4 0x03 /* unsigned 32-bit value */ +#define DW_EH_PE_udata8 0x04 /* unsigned 64-bit value */ +#define DW_EH_PE_sdata4 0x0b /* signed 32-bit value */ +#define DW_EH_PE_sdata8 0x0c /* signed 64-bit value */ + +/* Pointer-encoding application: */ +#define DW_EH_PE_absptr 0x00 /* absolute value */ +#define DW_EH_PE_pcrel 0x10 /* rel. to addr. of encoded value */ + +/* + * The following are not documented by LSB v1.3, yet they are used by + * GCC, presumably they aren't documented by LSB since they aren't + * used on Linux: + */ +#define DW_EH_PE_funcrel 0x40 /* start-of-procedure-relative */ +#define DW_EH_PE_aligned 0x50 /* aligned pointer */ + +/* Flags intentionaly not handled, since they're not needed: + * #define DW_EH_PE_indirect 0x80 + * #define DW_EH_PE_uleb128 0x01 + * #define DW_EH_PE_udata2 0x02 + * #define DW_EH_PE_sleb128 0x09 + * #define DW_EH_PE_sdata2 0x0a + * #define DW_EH_PE_textrel 0x20 + * #define DW_EH_PE_datarel 0x30 + */ + +struct unwind_info { + struct perf_sample *sample; + struct machine *machine; + struct thread *thread; + u64 sample_uregs; +}; + +#define dw_read(ptr, type, end) ({ \ + type *__p = (type *) ptr; \ + type __v; \ + if ((__p + 1) > (type *) end) \ + return -EINVAL; \ + __v = *__p++; \ + ptr = (typeof(ptr)) __p; \ + __v; \ + }) + +static int __dw_read_encoded_value(u8 **p, u8 *end, u64 *val, + u8 encoding) +{ + u8 *cur = *p; + *val = 0; + + switch (encoding) { + case DW_EH_PE_omit: + *val = 0; + goto out; + case DW_EH_PE_ptr: + *val = dw_read(cur, unsigned long, end); + goto out; + default: + break; + } + + switch (encoding & DW_EH_PE_APPL_MASK) { + case DW_EH_PE_absptr: + break; + case DW_EH_PE_pcrel: + *val = (unsigned long) cur; + break; + default: + return -EINVAL; + } + + if ((encoding & 0x07) == 0x00) + encoding |= DW_EH_PE_udata4; + + switch (encoding & DW_EH_PE_FORMAT_MASK) { + case DW_EH_PE_sdata4: + *val += dw_read(cur, s32, end); + break; + case DW_EH_PE_udata4: + *val += dw_read(cur, u32, end); + break; + case DW_EH_PE_sdata8: + *val += dw_read(cur, s64, end); + break; + case DW_EH_PE_udata8: + *val += dw_read(cur, u64, end); + break; + default: + return -EINVAL; + } + + out: + *p = cur; + return 0; +} + +#define dw_read_encoded_value(ptr, end, enc) ({ \ + u64 __v; \ + if (__dw_read_encoded_value(&ptr, end, &__v, enc)) { \ + return -EINVAL; \ + } \ + __v; \ + }) + +static Elf_Scn *elf_section_by_name(Elf *elf, GElf_Ehdr *ep, + GElf_Shdr *shp, const char *name) +{ + Elf_Scn *sec = NULL; + + while ((sec = elf_nextscn(elf, sec)) != NULL) { + char *str; + + gelf_getshdr(sec, shp); + str = elf_strptr(elf, ep->e_shstrndx, shp->sh_name); + if (!strcmp(name, str)) + break; + } + + return sec; +} + +static u64 elf_section_offset(int fd, const char *name) +{ + Elf *elf; + GElf_Ehdr ehdr; + GElf_Shdr shdr; + u64 offset = 0; + + elf = elf_begin(fd, PERF_ELF_C_READ_MMAP, NULL); + if (elf == NULL) + return 0; + + do { + if (gelf_getehdr(elf, &ehdr) == NULL) + break; + + if (!elf_section_by_name(elf, &ehdr, &shdr, name)) + break; + + offset = shdr.sh_offset; + } while (0); + + elf_end(elf); + return offset; +} + +struct table_entry { + u32 start_ip_offset; + u32 fde_offset; +}; + +struct eh_frame_hdr { + unsigned char version; + unsigned char eh_frame_ptr_enc; + unsigned char fde_count_enc; + unsigned char table_enc; + + /* + * The rest of the header is variable-length and consists of the + * following members: + * + * encoded_t eh_frame_ptr; + * encoded_t fde_count; + */ + + /* A single encoded pointer should not be more than 8 bytes. */ + u64 enc[2]; + + /* + * struct { + * encoded_t start_ip; + * encoded_t fde_addr; + * } binary_search_table[fde_count]; + */ + char data[0]; +} __packed; + +static int unwind_spec_ehframe(struct dso *dso, struct machine *machine, + u64 offset, u64 *table_data, u64 *segbase, + u64 *fde_count) +{ + struct eh_frame_hdr hdr; + u8 *enc = (u8 *) &hdr.enc; + u8 *end = (u8 *) &hdr.data; + ssize_t r; + + r = dso__data_read_offset(dso, machine, offset, + (u8 *) &hdr, sizeof(hdr)); + if (r != sizeof(hdr)) + return -EINVAL; + + /* We dont need eh_frame_ptr, just skip it. */ + dw_read_encoded_value(enc, end, hdr.eh_frame_ptr_enc); + + *fde_count = dw_read_encoded_value(enc, end, hdr.fde_count_enc); + *segbase = offset; + *table_data = (enc - (u8 *) &hdr) + offset; + return 0; +} + +static int read_unwind_spec(struct dso *dso, struct machine *machine, + u64 *table_data, u64 *segbase, u64 *fde_count) +{ + int ret = -EINVAL, fd; + u64 offset; + + fd = dso__data_fd(dso, machine); + if (fd < 0) + return -EINVAL; + + offset = elf_section_offset(fd, ".eh_frame_hdr"); + close(fd); + + if (offset) + ret = unwind_spec_ehframe(dso, machine, offset, + table_data, segbase, + fde_count); + + /* TODO .debug_frame check if eh_frame_hdr fails */ + return ret; +} + +static struct map *find_map(unw_word_t ip, struct unwind_info *ui) +{ + struct addr_location al; + + thread__find_addr_map(ui->thread, ui->machine, PERF_RECORD_MISC_USER, + MAP__FUNCTION, ip, &al); + return al.map; +} + +static int +find_proc_info(unw_addr_space_t as, unw_word_t ip, unw_proc_info_t *pi, + int need_unwind_info, void *arg) +{ + struct unwind_info *ui = arg; + struct map *map; + unw_dyn_info_t di; + u64 table_data, segbase, fde_count; + + map = find_map(ip, ui); + if (!map || !map->dso) + return -EINVAL; + + pr_debug("unwind: find_proc_info dso %s\n", map->dso->name); + + if (read_unwind_spec(map->dso, ui->machine, + &table_data, &segbase, &fde_count)) + return -EINVAL; + + memset(&di, 0, sizeof(di)); + di.format = UNW_INFO_FORMAT_REMOTE_TABLE; + di.start_ip = map->start; + di.end_ip = map->end; + di.u.rti.segbase = map->start + segbase; + di.u.rti.table_data = map->start + table_data; + di.u.rti.table_len = fde_count * sizeof(struct table_entry) + / sizeof(unw_word_t); + return dwarf_search_unwind_table(as, ip, &di, pi, + need_unwind_info, arg); +} + +static int access_fpreg(unw_addr_space_t __used as, unw_regnum_t __used num, + unw_fpreg_t __used *val, int __used __write, + void __used *arg) +{ + pr_err("unwind: access_fpreg unsupported\n"); + return -UNW_EINVAL; +} + +static int get_dyn_info_list_addr(unw_addr_space_t __used as, + unw_word_t __used *dil_addr, + void __used *arg) +{ + return -UNW_ENOINFO; +} + +static int resume(unw_addr_space_t __used as, unw_cursor_t __used *cu, + void __used *arg) +{ + pr_err("unwind: resume unsupported\n"); + return -UNW_EINVAL; +} + +static int +get_proc_name(unw_addr_space_t __used as, unw_word_t __used addr, + char __used *bufp, size_t __used buf_len, + unw_word_t __used *offp, void __used *arg) +{ + pr_err("unwind: get_proc_name unsupported\n"); + return -UNW_EINVAL; +} + +static int access_dso_mem(struct unwind_info *ui, unw_word_t addr, + unw_word_t *data) +{ + struct addr_location al; + ssize_t size; + + thread__find_addr_map(ui->thread, ui->machine, PERF_RECORD_MISC_USER, + MAP__FUNCTION, addr, &al); + if (!al.map) { + pr_debug("unwind: no map for %lx\n", (unsigned long)addr); + return -1; + } + + if (!al.map->dso) + return -1; + + size = dso__data_read_addr(al.map->dso, al.map, ui->machine, + addr, (u8 *) data, sizeof(*data)); + + return !(size == sizeof(*data)); +} + +static int reg_value(unw_word_t *valp, struct regs_dump *regs, int id, + u64 sample_regs) +{ + int i, idx = 0; + + if (!(sample_regs & (1 << id))) + return -EINVAL; + + for (i = 0; i < id; i++) { + if (sample_regs & (1 << i)) + idx++; + } + + *valp = regs->regs[idx]; + return 0; +} + +static int access_mem(unw_addr_space_t __used as, + unw_word_t addr, unw_word_t *valp, + int __write, void *arg) +{ + struct unwind_info *ui = arg; + struct stack_dump *stack = &ui->sample->user_stack; + unw_word_t start, end; + int offset; + int ret; + + /* Don't support write, probably not needed. */ + if (__write || !stack || !ui->sample->user_regs.regs) { + *valp = 0; + return 0; + } + + ret = reg_value(&start, &ui->sample->user_regs, PERF_REG_SP, + ui->sample_uregs); + if (ret) + return ret; + + end = start + stack->size; + + /* Check overflow. */ + if (addr + sizeof(unw_word_t) < addr) + return -EINVAL; + + if (addr < start || addr + sizeof(unw_word_t) >= end) { + ret = access_dso_mem(ui, addr, valp); + if (ret) { + pr_debug("unwind: access_mem %p not inside range %p-%p\n", + (void *)addr, (void *)start, (void *)end); + *valp = 0; + return ret; + } + return 0; + } + + offset = addr - start; + *valp = *(unw_word_t *)&stack->data[offset]; + pr_debug("unwind: access_mem addr %p, val %lx, offset %d\n", + (void *)addr, (unsigned long)*valp, offset); + return 0; +} + +static int access_reg(unw_addr_space_t __used as, + unw_regnum_t regnum, unw_word_t *valp, + int __write, void *arg) +{ + struct unwind_info *ui = arg; + int id, ret; + + /* Don't support write, I suspect we don't need it. */ + if (__write) { + pr_err("unwind: access_reg w %d\n", regnum); + return 0; + } + + if (!ui->sample->user_regs.regs) { + *valp = 0; + return 0; + } + + id = unwind__arch_reg_id(regnum); + if (id < 0) + return -EINVAL; + + ret = reg_value(valp, &ui->sample->user_regs, id, ui->sample_uregs); + if (ret) { + pr_err("unwind: can't read reg %d\n", regnum); + return ret; + } + + pr_debug("unwind: reg %d, val %lx\n", regnum, (unsigned long)*valp); + return 0; +} + +static void put_unwind_info(unw_addr_space_t __used as, + unw_proc_info_t *pi __used, + void *arg __used) +{ + pr_debug("unwind: put_unwind_info called\n"); +} + +static int entry(u64 ip, struct thread *thread, struct machine *machine, + unwind_entry_cb_t cb, void *arg) +{ + struct unwind_entry e; + struct addr_location al; + + thread__find_addr_location(thread, machine, + PERF_RECORD_MISC_USER, + MAP__FUNCTION, ip, &al, NULL); + + e.ip = ip; + e.map = al.map; + e.sym = al.sym; + + pr_debug("unwind: %s:ip = 0x%" PRIx64 " (0x%" PRIx64 ")\n", + al.sym ? al.sym->name : "''", + ip, + al.map ? al.map->map_ip(al.map, ip) : (u64) 0); + + return cb(&e, arg); +} + +static void display_error(int err) +{ + switch (err) { + case UNW_EINVAL: + pr_err("unwind: Only supports local.\n"); + break; + case UNW_EUNSPEC: + pr_err("unwind: Unspecified error.\n"); + break; + case UNW_EBADREG: + pr_err("unwind: Register unavailable.\n"); + break; + default: + break; + } +} + +static unw_accessors_t accessors = { + .find_proc_info = find_proc_info, + .put_unwind_info = put_unwind_info, + .get_dyn_info_list_addr = get_dyn_info_list_addr, + .access_mem = access_mem, + .access_reg = access_reg, + .access_fpreg = access_fpreg, + .resume = resume, + .get_proc_name = get_proc_name, +}; + +static int get_entries(struct unwind_info *ui, unwind_entry_cb_t cb, + void *arg) +{ + unw_addr_space_t addr_space; + unw_cursor_t c; + int ret; + + addr_space = unw_create_addr_space(&accessors, 0); + if (!addr_space) { + pr_err("unwind: Can't create unwind address space.\n"); + return -ENOMEM; + } + + ret = unw_init_remote(&c, addr_space, ui); + if (ret) + display_error(ret); + + while (!ret && (unw_step(&c) > 0)) { + unw_word_t ip; + + unw_get_reg(&c, UNW_REG_IP, &ip); + ret = entry(ip, ui->thread, ui->machine, cb, arg); + } + + unw_destroy_addr_space(addr_space); + return ret; +} + +int unwind__get_entries(unwind_entry_cb_t cb, void *arg, + struct machine *machine, struct thread *thread, + u64 sample_uregs, struct perf_sample *data) +{ + unw_word_t ip; + struct unwind_info ui = { + .sample = data, + .sample_uregs = sample_uregs, + .thread = thread, + .machine = machine, + }; + int ret; + + if (!data->user_regs.regs) + return -EINVAL; + + ret = reg_value(&ip, &data->user_regs, PERF_REG_IP, sample_uregs); + if (ret) + return ret; + + ret = entry(ip, thread, machine, cb, arg); + if (ret) + return -ENOMEM; + + return get_entries(&ui, cb, arg); +} diff --git a/tools/perf/util/unwind.h b/tools/perf/util/unwind.h new file mode 100644 index 00000000000..919bd6ad850 --- /dev/null +++ b/tools/perf/util/unwind.h @@ -0,0 +1,34 @@ +#ifndef __UNWIND_H +#define __UNWIND_H + +#include "types.h" +#include "event.h" +#include "symbol.h" + +struct unwind_entry { + struct map *map; + struct symbol *sym; + u64 ip; +}; + +typedef int (*unwind_entry_cb_t)(struct unwind_entry *entry, void *arg); + +#ifndef NO_LIBUNWIND_SUPPORT +int unwind__get_entries(unwind_entry_cb_t cb, void *arg, + struct machine *machine, + struct thread *thread, + u64 sample_uregs, + struct perf_sample *data); +int unwind__arch_reg_id(int regnum); +#else +static inline int +unwind__get_entries(unwind_entry_cb_t cb __used, void *arg __used, + struct machine *machine __used, + struct thread *thread __used, + u64 sample_uregs __used, + struct perf_sample *data __used) +{ + return 0; +} +#endif /* NO_LIBUNWIND_SUPPORT */ +#endif /* __UNWIND_H */ From 26d330226b9cf6208daae9b0b3697980c8fb51d8 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 7 Aug 2012 15:20:47 +0200 Subject: [PATCH 062/282] perf tools: Support for DWARF mode callchain This patch enables perf to use the DWARF unwind code. It extends the perf record '-g' option with following arguments: 'fp' - provides framepointer based user stack backtrace 'dwarf[,size]' - provides DWARF (libunwind) based user stack backtrace. The size specifies the size of the user stack dump. If omitted it is 8192 by default. If libunwind is found during the perf build, then the 'dwarf' argument becomes available for record command. The 'fp' stays as default option in any case. Examples: (perf compiled with libunwind) perf record -g dwarf ls - provides dwarf unwind with 8192 as stack dump size perf record -g dwarf,4096 ls - provides dwarf unwind with 4096 as stack dump size perf record -g -- ls perf record -g fp ls - provides frame pointer unwind Signed-off-by: Jiri Olsa Original-patch-by: Frederic Weisbecker Cc: "Frank Ch. Eigler" Cc: Arun Sharma Cc: Benjamin Redelings Cc: Corey Ashford Cc: Cyrill Gorcunov Cc: Frank Ch. Eigler Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Masami Hiramatsu Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Robert Richter Cc: Stephane Eranian Cc: Tom Zanussi Cc: Ulrich Drepper Link: http://lkml.kernel.org/r/1344345647-11536-13-git-send-email-jolsa@redhat.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-record.c | 114 +++++++++++++++++++++++++++++++++++- tools/perf/perf.h | 9 ++- tools/perf/util/evsel.c | 13 +++- 3 files changed, 132 insertions(+), 4 deletions(-) diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 4db6e1ba54e..22dd05d3680 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -31,6 +31,15 @@ #include #include +#define CALLCHAIN_HELP "do call-graph (stack chain/backtrace) recording: " + +#ifdef NO_LIBUNWIND_SUPPORT +static char callchain_help[] = CALLCHAIN_HELP "[fp]"; +#else +static unsigned long default_stack_dump_size = 8192; +static char callchain_help[] = CALLCHAIN_HELP "[fp] dwarf"; +#endif + enum write_mode_t { WRITE_FORCE, WRITE_APPEND @@ -732,6 +741,106 @@ error: return ret; } +#ifndef NO_LIBUNWIND_SUPPORT +static int get_stack_size(char *str, unsigned long *_size) +{ + char *endptr; + unsigned long size; + unsigned long max_size = round_down(USHRT_MAX, sizeof(u64)); + + size = strtoul(str, &endptr, 0); + + do { + if (*endptr) + break; + + size = round_up(size, sizeof(u64)); + if (!size || size > max_size) + break; + + *_size = size; + return 0; + + } while (0); + + pr_err("callchain: Incorrect stack dump size (max %ld): %s\n", + max_size, str); + return -1; +} +#endif /* !NO_LIBUNWIND_SUPPORT */ + +static int +parse_callchain_opt(const struct option *opt __used, const char *arg, + int unset) +{ + struct perf_record *rec = (struct perf_record *)opt->value; + char *tok, *name, *saveptr = NULL; + char *buf; + int ret = -1; + + /* --no-call-graph */ + if (unset) + return 0; + + /* We specified default option if none is provided. */ + BUG_ON(!arg); + + /* We need buffer that we know we can write to. */ + buf = malloc(strlen(arg) + 1); + if (!buf) + return -ENOMEM; + + strcpy(buf, arg); + + tok = strtok_r((char *)buf, ",", &saveptr); + name = tok ? : (char *)buf; + + do { + /* Framepointer style */ + if (!strncmp(name, "fp", sizeof("fp"))) { + if (!strtok_r(NULL, ",", &saveptr)) { + rec->opts.call_graph = CALLCHAIN_FP; + ret = 0; + } else + pr_err("callchain: No more arguments " + "needed for -g fp\n"); + break; + +#ifndef NO_LIBUNWIND_SUPPORT + /* Dwarf style */ + } else if (!strncmp(name, "dwarf", sizeof("dwarf"))) { + ret = 0; + rec->opts.call_graph = CALLCHAIN_DWARF; + rec->opts.stack_dump_size = default_stack_dump_size; + + tok = strtok_r(NULL, ",", &saveptr); + if (tok) { + unsigned long size = 0; + + ret = get_stack_size(tok, &size); + rec->opts.stack_dump_size = size; + } + + if (!ret) + pr_debug("callchain: stack dump size %d\n", + rec->opts.stack_dump_size); +#endif /* !NO_LIBUNWIND_SUPPORT */ + } else { + pr_err("callchain: Unknown -g option " + "value: %s\n", arg); + break; + } + + } while (0); + + free(buf); + + if (!ret) + pr_debug("callchain: type %d\n", rec->opts.call_graph); + + return ret; +} + static const char * const record_usage[] = { "perf record [] []", "perf record [] -- []", @@ -803,8 +912,9 @@ const struct option record_options[] = { "number of mmap data pages"), OPT_BOOLEAN(0, "group", &record.opts.group, "put the counters into a counter group"), - OPT_BOOLEAN('g', "call-graph", &record.opts.call_graph, - "do call-graph (stack chain/backtrace) recording"), + OPT_CALLBACK_DEFAULT('g', "call-graph", &record, "mode[,dump_size]", + callchain_help, &parse_callchain_opt, + "fp"), OPT_INCR('v', "verbose", &verbose, "be more verbose (show counter open errors, etc)"), OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"), diff --git a/tools/perf/perf.h b/tools/perf/perf.h index f960ccb2edc..87f4ec6d1f3 100644 --- a/tools/perf/perf.h +++ b/tools/perf/perf.h @@ -209,9 +209,15 @@ void pthread__unblock_sigwinch(void); #include "util/target.h" +enum perf_call_graph_mode { + CALLCHAIN_NONE, + CALLCHAIN_FP, + CALLCHAIN_DWARF +}; + struct perf_record_opts { struct perf_target target; - bool call_graph; + int call_graph; bool group; bool inherit_stat; bool no_delay; @@ -230,6 +236,7 @@ struct perf_record_opts { u64 branch_stack; u64 default_interval; u64 user_interval; + u16 stack_dump_size; }; #endif diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index a2da682db81..9c54e8fc2df 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -17,6 +17,8 @@ #include "thread_map.h" #include "target.h" #include "../../../include/linux/hw_breakpoint.h" +#include "../../include/linux/perf_event.h" +#include "perf_regs.h" #define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y)) #define GROUP_FD(group_fd, cpu) (*(int *)xyarray__entry(group_fd, cpu, 0)) @@ -368,9 +370,18 @@ void perf_evsel__config(struct perf_evsel *evsel, struct perf_record_opts *opts, attr->mmap_data = track; } - if (opts->call_graph) + if (opts->call_graph) { attr->sample_type |= PERF_SAMPLE_CALLCHAIN; + if (opts->call_graph == CALLCHAIN_DWARF) { + attr->sample_type |= PERF_SAMPLE_REGS_USER | + PERF_SAMPLE_STACK_USER; + attr->sample_regs_user = PERF_REGS_MASK; + attr->sample_stack_user = opts->stack_dump_size; + attr->exclude_callchain_user = 1; + } + } + if (perf_target__has_cpu(&opts->target)) attr->sample_type |= PERF_SAMPLE_CPU; From 8db24c70ab43a4dd38c39b0b0cb4d4874257de55 Mon Sep 17 00:00:00 2001 From: Cody P Schafer Date: Fri, 10 Aug 2012 15:22:49 -0700 Subject: [PATCH 063/282] perf symbols: Only un-prelink non-zero symbols Prelink only adjusts the addresses of non-zero symbols. Do the same when we reverse the adjustments. Signed-off-by: Cody P Schafer Cc: David Hansen Cc: Ingo Molnar Cc: Matt Hellsley Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Sukadev Bhattiprolu Link: http://lkml.kernel.org/r/1344637382-22789-4-git-send-email-cody@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/symbol-elf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index 9ca89f8f6c9..e0376094fa8 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -717,7 +717,7 @@ int dso__load_sym(struct dso *dso, struct map *map, const char *name, int fd, goto new_symbol; } - if (curr_dso->adjust_symbols) { + if (curr_dso->adjust_symbols && sym.st_value) { pr_debug4("%s: adjusting symbol: st_value: %#" PRIx64 " " "sh_addr: %#" PRIx64 " sh_offset: %#" PRIx64 "\n", __func__, (u64)sym.st_value, (u64)shdr.sh_addr, From 0f75a710dfd8fd5cd9e558e3f26c474c8fa63e3c Mon Sep 17 00:00:00 2001 From: Cody P Schafer Date: Fri, 10 Aug 2012 15:22:50 -0700 Subject: [PATCH 064/282] perf symbols: Remove unused function map__objdump_2ip map__objdump_2ip was introduced in: ee11b90b12 perf top: Fix annotate for userspace And it's last user removed in: 36532461a0 perf top: Ditch private annotation code, share perf annotate's Remove it. Signed-off-by: Cody P Schafer Cc: David Hansen Cc: Ingo Molnar Cc: Matt Hellsley Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Sukadev Bhattiprolu Link: http://lkml.kernel.org/r/1344637382-22789-5-git-send-email-cody@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/map.c | 8 -------- tools/perf/util/map.h | 1 - 2 files changed, 9 deletions(-) diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index 287cb3452b4..7d37159c1e9 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c @@ -262,14 +262,6 @@ u64 map__rip_2objdump(struct map *map, u64 rip) return addr; } -u64 map__objdump_2ip(struct map *map, u64 addr) -{ - u64 ip = map->dso->adjust_symbols ? - addr : - map->unmap_ip(map, addr); /* RIP -> IP */ - return ip; -} - void map_groups__init(struct map_groups *mg) { int i; diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h index c98ab190060..25ab4cdbc44 100644 --- a/tools/perf/util/map.h +++ b/tools/perf/util/map.h @@ -104,7 +104,6 @@ static inline u64 identity__map_ip(struct map *map __used, u64 ip) /* rip/ip <-> addr suitable for passing to `objdump --start-address=` */ u64 map__rip_2objdump(struct map *map, u64 rip); -u64 map__objdump_2ip(struct map *map, u64 addr); struct symbol; From 52f9ddba516214a59fdd8eb155b2583ab314dc31 Mon Sep 17 00:00:00 2001 From: Cody P Schafer Date: Fri, 10 Aug 2012 15:22:51 -0700 Subject: [PATCH 065/282] perf symbols: Don't try to synthesize plt without dynstr If .dynsym exists but .dynstr is empty (NO_BITS or size==0), a segfault occurs. Avoid this by checking that .dynstr is not empty. Signed-off-by: Cody P Schafer Cc: David Hansen Cc: Ingo Molnar Cc: Matt Hellsley Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Sukadev Bhattiprolu Link: http://lkml.kernel.org/r/1344637382-22789-6-git-send-email-cody@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/symbol-elf.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index e0376094fa8..a2e994e2605 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -232,6 +232,9 @@ int dso__synthesize_plt_symbols(struct dso *dso, char *name, struct map *map, if (symstrs == NULL) goto out_elf_end; + if (symstrs->d_size == 0) + goto out_elf_end; + nr_rel_entries = shdr_rel_plt.sh_size / shdr_rel_plt.sh_entsize; plt_offset = shdr_plt.sh_offset; From 261ee821c256dffa02e5608c7df51744a03992a2 Mon Sep 17 00:00:00 2001 From: Cody P Schafer Date: Fri, 10 Aug 2012 15:22:52 -0700 Subject: [PATCH 066/282] perf symbols: Remove unneeded call to dso__set_long_name() dso__set_long_name() is already called by dso__load_vmlinux(), avoid calling it a second time unnecessarily. Signed-off-by: Cody P Schafer Cc: David Hansen Cc: Ingo Molnar Cc: Matt Hellsley Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Sukadev Bhattiprolu Link: http://lkml.kernel.org/r/1344637382-22789-7-git-send-email-cody@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/symbol.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index f02de8ac842..42c0d943f94 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -1388,10 +1388,8 @@ int dso__load_vmlinux_path(struct dso *dso, struct map *map, filename = dso__build_id_filename(dso, NULL, 0); if (filename != NULL) { err = dso__load_vmlinux(dso, map, filename, filter); - if (err > 0) { - dso__set_long_name(dso, filename); + if (err > 0) goto out; - } free(filename); } From 72f86204419e1b83f18b9bc2c97141a52dc534d2 Mon Sep 17 00:00:00 2001 From: Cody P Schafer Date: Fri, 10 Aug 2012 15:22:47 -0700 Subject: [PATCH 067/282] perf symbols: Correct comment wrt kallsyms loading In kallsyms_parse() when calling process_symbol() (a callback argument to kallsyms_parse()), we pass start as both start & end (ie: start=start, end=start). In map__process_kallsym_symbol(), the length is calculated as 'end - start + 1', making the length 1, not 0. Essentially, start & end define an inclusive range. Signed-off-by: Cody P Schafer Cc: David Hansen Cc: Ingo Molnar Cc: Matt Hellsley Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Sukadev Bhattiprolu Link: http://lkml.kernel.org/r/1344637382-22789-2-git-send-email-cody@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/symbol.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 42c0d943f94..9f181a86f3b 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -605,7 +605,7 @@ int kallsyms__parse(const char *filename, void *arg, /* * module symbols are not sorted so we add all - * symbols with zero length and rely on + * symbols, setting length to 1, and rely on * symbols__fixup_end() to fix it up. */ err = process_symbol(arg, symbol_name, From 82151520938ec79e5e3adb7e61977f002031c38c Mon Sep 17 00:00:00 2001 From: Cody P Schafer Date: Fri, 10 Aug 2012 15:22:48 -0700 Subject: [PATCH 068/282] perf symbols: Remove unused 'end' arg in kallsyms parse cb kallsyms__parse() takes a callback that is called on every discovered symbol. As /proc/kallsyms does not supply symbol sizes, the callback was simply called with end=start, faking the symbol size to 1. All of the callbacks (there are 2) used in calls to kallsyms__parse() are _only_ used as callbacks for kallsyms__parse(). Given that kallsyms__parse() lacks real information about what end/length should be, don't make up a length in kallsyms__parse(). Instead have the callbacks handle guessing the length. Also relocate a comment regarding symbol creation to the callback which does symbol creation (kallsyms__parse() is not in general used to create symbols). Signed-off-by: Cody P Schafer Cc: David Hansen Cc: Ingo Molnar Cc: Matt Hellsley Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Sukadev Bhattiprolu Link: http://lkml.kernel.org/r/1344637382-22789-3-git-send-email-cody@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/event.c | 2 +- tools/perf/util/symbol.c | 21 ++++++++++----------- tools/perf/util/symbol.h | 2 +- 3 files changed, 12 insertions(+), 13 deletions(-) diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index 2a6f33cd888..3a0f1a5da91 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -412,7 +412,7 @@ struct process_symbol_args { }; static int find_symbol_cb(void *arg, const char *name, char type, - u64 start, u64 end __used) + u64 start) { struct process_symbol_args *args = arg; diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 9f181a86f3b..21270029f07 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -563,7 +563,7 @@ size_t dso__fprintf(struct dso *dso, enum map_type type, FILE *fp) int kallsyms__parse(const char *filename, void *arg, int (*process_symbol)(void *arg, const char *name, - char type, u64 start, u64 end)) + char type, u64 start)) { char *line = NULL; size_t n; @@ -603,13 +603,8 @@ int kallsyms__parse(const char *filename, void *arg, break; } - /* - * module symbols are not sorted so we add all - * symbols, setting length to 1, and rely on - * symbols__fixup_end() to fix it up. - */ err = process_symbol(arg, symbol_name, - symbol_type, start, start); + symbol_type, start); if (err) break; } @@ -636,7 +631,7 @@ static u8 kallsyms2elf_type(char type) } static int map__process_kallsym_symbol(void *arg, const char *name, - char type, u64 start, u64 end) + char type, u64 start) { struct symbol *sym; struct process_kallsyms_args *a = arg; @@ -645,8 +640,12 @@ static int map__process_kallsym_symbol(void *arg, const char *name, if (!symbol_type__is_a(type, a->map->type)) return 0; - sym = symbol__new(start, end - start + 1, - kallsyms2elf_type(type), name); + /* + * module symbols are not sorted so we add all + * symbols, setting length to 0, and rely on + * symbols__fixup_end() to fix it up. + */ + sym = symbol__new(start, 0, kallsyms2elf_type(type), name); if (sym == NULL) return -ENOMEM; /* @@ -1729,7 +1728,7 @@ struct process_args { }; static int symbol__in_kernel(void *arg, const char *name, - char type __used, u64 start, u64 end __used) + char type __used, u64 start) { struct process_args *args = arg; diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index 38ccbbb39fa..c9534fe0720 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -299,7 +299,7 @@ bool __dsos__read_build_ids(struct list_head *head, bool with_hits); int build_id__sprintf(const u8 *build_id, int len, char *bf); int kallsyms__parse(const char *filename, void *arg, int (*process_symbol)(void *arg, const char *name, - char type, u64 start, u64 end)); + char type, u64 start)); int filename__read_debuglink(const char *filename, char *debuglink, size_t size); From 0a0317b41e20770f81bc61d7b208957385466c3f Mon Sep 17 00:00:00 2001 From: Cody P Schafer Date: Fri, 10 Aug 2012 15:22:53 -0700 Subject: [PATCH 069/282] perf symbols: Simplify out_fixup in kernel syms loading The only site that jumps to out_fixup has (kallsyms_filename == NULL). And all paths that reach 'if (err > 0)' without 'goto out_fixup' have kallsyms_filename != NULL. So skip over both the check & dso__set_long_name(), and remove the check. Signed-off-by: Cody P Schafer Cc: David Hansen Cc: Ingo Molnar Cc: Matt Hellsley Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Sukadev Bhattiprolu Link: http://lkml.kernel.org/r/1344637382-22789-8-git-send-email-cody@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/symbol.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 21270029f07..e5c38179f72 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -1503,9 +1503,8 @@ do_kallsyms: free(kallsyms_allocated_filename); if (err > 0) { + dso__set_long_name(dso, strdup("[kernel.kallsyms]")); out_fixup: - if (kallsyms_filename != NULL) - dso__set_long_name(dso, strdup("[kernel.kallsyms]")); map__fixup_start(map); map__fixup_end(map); } From 515850e4fbd87c8f249446faa2e5ad98e672711d Mon Sep 17 00:00:00 2001 From: Cody P Schafer Date: Fri, 10 Aug 2012 15:22:54 -0700 Subject: [PATCH 070/282] perf symbols: only set vmlinux longname & mark loaded if really loaded dso__load_vmlinux() uses the filename passed to it to directly set the dso long_name, which resulted in a use after free due to dso__load_vmlinux_path() treating 0 symbols as a load failure and subsequently freeing the contents of dso->long_name. Change dso__load_vmlinux() so that finding 0 symbols does not cause it to consider itself loaded, and do not set long_name in such a case. Signed-off-by: Cody P Schafer Cc: David Hansen Cc: Ingo Molnar Cc: Matt Hellsley Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Sukadev Bhattiprolu Link: http://lkml.kernel.org/r/1344637382-22789-9-git-send-email-cody@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/symbol.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index e5c38179f72..96dbf28fc94 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -1364,13 +1364,14 @@ int dso__load_vmlinux(struct dso *dso, struct map *map, if (fd < 0) return -1; - dso__set_long_name(dso, (char *)vmlinux); - dso__set_loaded(dso, map->type); err = dso__load_sym(dso, map, symfs_vmlinux, fd, filter, 0, 0); close(fd); - if (err > 0) + if (err > 0) { + dso__set_long_name(dso, (char *)vmlinux); + dso__set_loaded(dso, map->type); pr_debug("Using %s for symbols\n", symfs_vmlinux); + } return err; } From 492746546fe380da768c8496213e26aa91b9b3aa Mon Sep 17 00:00:00 2001 From: Cody P Schafer Date: Fri, 10 Aug 2012 15:22:55 -0700 Subject: [PATCH 071/282] perf symbols: Avoid segfault in elf_strptr If we call elf_section_by_name() with a truncated elf image (ie: the file header indicates that the section headers are placed past the end of the file), elf_strptr() causes a segfault within libelf. Avoid this by checking that we can access the section string table properly. Should really be fixed in libelf/elfutils. Signed-off-by: Cody P Schafer Cc: David Hansen Cc: Ingo Molnar Cc: Matt Hellsley Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Sukadev Bhattiprolu Link: http://lkml.kernel.org/r/1344637382-22789-10-git-send-email-cody@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/symbol-elf.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index a2e994e2605..a9a194d49c1 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -129,6 +129,10 @@ static Elf_Scn *elf_section_by_name(Elf *elf, GElf_Ehdr *ep, Elf_Scn *sec = NULL; size_t cnt = 1; + /* Elf is corrupted/truncated, avoid calling elf_strptr. */ + if (!elf_rawdata(elf_getscn(elf, ep->e_shstrndx), NULL)) + return NULL; + while ((sec = elf_nextscn(elf, sec)) != NULL) { char *str; From 21ea4539b4d1b26de7f2eb227b5d1a092b32cc19 Mon Sep 17 00:00:00 2001 From: Cody P Schafer Date: Fri, 10 Aug 2012 15:22:56 -0700 Subject: [PATCH 072/282] perf symbols: Track symtab_type of vmlinux Previously, symtab_type would have been left at 0, or KALLSYMS, which is not quite accurate. Introduce DSO_BINARY_TYPE__VMLINUX[_GUEST]. Signed-off-by: Cody P Schafer Cc: David Hansen Cc: Ingo Molnar Cc: Matt Hellsley Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Sukadev Bhattiprolu Link: http://lkml.kernel.org/r/1344637382-22789-11-git-send-email-cody@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/symbol.c | 9 +++++++++ tools/perf/util/symbol.h | 2 ++ 2 files changed, 11 insertions(+) diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 96dbf28fc94..8f5cabbfc8b 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -923,6 +923,7 @@ char dso__symtab_origin(const struct dso *dso) { static const char origin[] = { [DSO_BINARY_TYPE__KALLSYMS] = 'k', + [DSO_BINARY_TYPE__VMLINUX] = 'v', [DSO_BINARY_TYPE__JAVA_JIT] = 'j', [DSO_BINARY_TYPE__DEBUGLINK] = 'l', [DSO_BINARY_TYPE__BUILD_ID_CACHE] = 'B', @@ -933,6 +934,7 @@ char dso__symtab_origin(const struct dso *dso) [DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE] = 'K', [DSO_BINARY_TYPE__GUEST_KALLSYMS] = 'g', [DSO_BINARY_TYPE__GUEST_KMODULE] = 'G', + [DSO_BINARY_TYPE__GUEST_VMLINUX] = 'V', }; if (dso == NULL || dso->symtab_type == DSO_BINARY_TYPE__NOT_FOUND) @@ -1008,7 +1010,9 @@ int dso__binary_type_file(struct dso *dso, enum dso_binary_type type, default: case DSO_BINARY_TYPE__KALLSYMS: + case DSO_BINARY_TYPE__VMLINUX: case DSO_BINARY_TYPE__GUEST_KALLSYMS: + case DSO_BINARY_TYPE__GUEST_VMLINUX: case DSO_BINARY_TYPE__JAVA_JIT: case DSO_BINARY_TYPE__NOT_FOUND: ret = -1; @@ -1364,6 +1368,11 @@ int dso__load_vmlinux(struct dso *dso, struct map *map, if (fd < 0) return -1; + if (dso->kernel == DSO_TYPE_GUEST_KERNEL) + dso->symtab_type = DSO_BINARY_TYPE__GUEST_VMLINUX; + else + dso->symtab_type = DSO_BINARY_TYPE__VMLINUX; + err = dso__load_sym(dso, map, symfs_vmlinux, fd, filter, 0, 0); close(fd); diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index c9534fe0720..37f1ea146c1 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -158,6 +158,8 @@ struct addr_location { enum dso_binary_type { DSO_BINARY_TYPE__KALLSYMS = 0, DSO_BINARY_TYPE__GUEST_KALLSYMS, + DSO_BINARY_TYPE__VMLINUX, + DSO_BINARY_TYPE__GUEST_VMLINUX, DSO_BINARY_TYPE__JAVA_JIT, DSO_BINARY_TYPE__DEBUGLINK, DSO_BINARY_TYPE__BUILD_ID_CACHE, From b68e2f919c6d3a0422239c98673c35ff503e52fb Mon Sep 17 00:00:00 2001 From: Cody P Schafer Date: Fri, 10 Aug 2012 15:22:57 -0700 Subject: [PATCH 073/282] perf symbols: Introduce symsrc structure. Factors opening of certain sections & tracking certain elf info into an external structure. The goal here is to keep multiple elfs (and their looked up sections/indexes) around during the symbol generation process (in dso__load()). We need this to properly resolve symbols on PPC due to the use of function descriptors & the .opd section (ie: symbols which are functions don't point to their actual location, they point to their function descriptor in .opd which contains their actual location. It would be possible to just keep the (Elf *) around, but then we'd end up with duplicate code for looking up the same sections and checking for the existence of an important section wouldn't be as clean (and we need to keep the Elf stuff confined to symtab-elf.c). Utilized by the later patch "perf symbols: Use both runtime and debug images" Signed-off-by: Cody P Schafer Cc: David Hansen Cc: Ingo Molnar Cc: Matt Hellsley Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Sukadev Bhattiprolu Link: http://lkml.kernel.org/r/1344637382-22789-12-git-send-email-cody@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/symbol-elf.c | 119 +++++++++++++++++++++++-------- tools/perf/util/symbol-minimal.c | 30 +++++++- tools/perf/util/symbol.c | 22 +++--- tools/perf/util/symbol.h | 36 +++++++++- 4 files changed, 163 insertions(+), 44 deletions(-) diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index a9a194d49c1..6974b2a44ee 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -536,24 +536,25 @@ static int dso__swap_init(struct dso *dso, unsigned char eidata) return 0; } -int dso__load_sym(struct dso *dso, struct map *map, const char *name, int fd, - symbol_filter_t filter, int kmodule, int want_symtab) + +void symsrc__destroy(struct symsrc *ss) +{ + free(ss->name); + elf_end(ss->elf); + close(ss->fd); +} + +int symsrc__init(struct symsrc *ss, struct dso *dso, const char *name, + enum dso_binary_type type) { - struct kmap *kmap = dso->kernel ? map__kmap(map) : NULL; - struct map *curr_map = map; - struct dso *curr_dso = dso; - Elf_Data *symstrs, *secstrs; - uint32_t nr_syms; int err = -1; - uint32_t idx; GElf_Ehdr ehdr; - GElf_Shdr shdr, opdshdr; - Elf_Data *syms, *opddata = NULL; - GElf_Sym sym; - Elf_Scn *sec, *sec_strndx, *opdsec; Elf *elf; - int nr = 0; - size_t opdidx = 0; + int fd; + + fd = open(name, O_RDONLY); + if (fd < 0) + return -1; elf = elf_begin(fd, PERF_ELF_C_READ_MMAP, NULL); if (elf == NULL) { @@ -580,19 +581,88 @@ int dso__load_sym(struct dso *dso, struct map *map, const char *name, int fd, goto out_elf_end; } - sec = elf_section_by_name(elf, &ehdr, &shdr, ".symtab", NULL); + ss->symtab = elf_section_by_name(elf, &ehdr, &ss->symshdr, ".symtab", + NULL); + if (ss->symshdr.sh_type != SHT_SYMTAB) + ss->symtab = NULL; + + ss->dynsym_idx = 0; + ss->dynsym = elf_section_by_name(elf, &ehdr, &ss->dynshdr, ".dynsym", + &ss->dynsym_idx); + if (ss->dynshdr.sh_type != SHT_DYNSYM) + ss->dynsym = NULL; + + ss->opdidx = 0; + ss->opdsec = elf_section_by_name(elf, &ehdr, &ss->opdshdr, ".opd", + &ss->opdidx); + if (ss->opdshdr.sh_type != SHT_PROGBITS) + ss->opdsec = NULL; + + if (dso->kernel == DSO_TYPE_USER) { + GElf_Shdr shdr; + ss->adjust_symbols = (ehdr.e_type == ET_EXEC || + elf_section_by_name(elf, &ehdr, &shdr, + ".gnu.prelink_undo", + NULL) != NULL); + } else { + ss->adjust_symbols = 0; + } + + ss->name = strdup(name); + if (!ss->name) + goto out_elf_end; + + ss->elf = elf; + ss->fd = fd; + ss->ehdr = ehdr; + ss->type = type; + + return 0; + +out_elf_end: + elf_end(elf); +out_close: + close(fd); + return err; +} + +int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *ss, + symbol_filter_t filter, int kmodule, int want_symtab) +{ + struct kmap *kmap = dso->kernel ? map__kmap(map) : NULL; + struct map *curr_map = map; + struct dso *curr_dso = dso; + Elf_Data *symstrs, *secstrs; + uint32_t nr_syms; + int err = -1; + uint32_t idx; + GElf_Ehdr ehdr; + GElf_Shdr shdr, opdshdr; + Elf_Data *syms, *opddata = NULL; + GElf_Sym sym; + Elf_Scn *sec, *sec_strndx, *opdsec; + Elf *elf; + int nr = 0; + size_t opdidx = 0; + + elf = ss->elf; + ehdr = ss->ehdr; + sec = ss->symtab; + shdr = ss->symshdr; + if (sec == NULL) { if (want_symtab) goto out_elf_end; - sec = elf_section_by_name(elf, &ehdr, &shdr, ".dynsym", NULL); + sec = ss->dynsym; + shdr = ss->dynshdr; if (sec == NULL) goto out_elf_end; } - opdsec = elf_section_by_name(elf, &ehdr, &opdshdr, ".opd", &opdidx); - if (opdshdr.sh_type != SHT_PROGBITS) - opdsec = NULL; + opdsec = ss->opdsec; + opdshdr = ss->opdshdr; + opdidx = ss->opdidx; if (opdsec) opddata = elf_rawdata(opdsec, NULL); @@ -619,14 +689,7 @@ int dso__load_sym(struct dso *dso, struct map *map, const char *name, int fd, nr_syms = shdr.sh_size / shdr.sh_entsize; memset(&sym, 0, sizeof(sym)); - if (dso->kernel == DSO_TYPE_USER) { - dso->adjust_symbols = (ehdr.e_type == ET_EXEC || - elf_section_by_name(elf, &ehdr, &shdr, - ".gnu.prelink_undo", - NULL) != NULL); - } else { - dso->adjust_symbols = 0; - } + dso->adjust_symbols = ss->adjust_symbols; elf_symtab__for_each_symbol(syms, nr_syms, idx, sym) { struct symbol *f; const char *elf_name = elf_sym__name(&sym, symstrs); @@ -770,8 +833,6 @@ new_symbol: } err = nr; out_elf_end: - elf_end(elf); -out_close: return err; } diff --git a/tools/perf/util/symbol-minimal.c b/tools/perf/util/symbol-minimal.c index bd8720b6780..1b16c2729a3 100644 --- a/tools/perf/util/symbol-minimal.c +++ b/tools/perf/util/symbol-minimal.c @@ -241,6 +241,31 @@ out: return ret; } +int symsrc__init(struct symsrc *ss, struct dso *dso __used, const char *name, + enum dso_binary_type type) +{ + int fd = open(name, O_RDONLY); + if (fd < 0) + return -1; + + ss->name = strdup(name); + if (!ss->name) + goto out_close; + + ss->type = type; + + return 0; +out_close: + close(fd); + return -1; +} + +void symsrc__destroy(struct symsrc *ss) +{ + free(ss->name); + close(ss->fd); +} + int dso__synthesize_plt_symbols(struct dso *dso __used, char *name __used, struct map *map __used, symbol_filter_t filter __used) @@ -248,14 +273,13 @@ int dso__synthesize_plt_symbols(struct dso *dso __used, char *name __used, return 0; } -int dso__load_sym(struct dso *dso, struct map *map __used, - const char *name, int fd __used, +int dso__load_sym(struct dso *dso, struct map *map __used, struct symsrc *ss, symbol_filter_t filter __used, int kmodule __used, int want_symtab __used) { unsigned char *build_id[BUILD_ID_SIZE]; - if (filename__read_build_id(name, build_id, BUILD_ID_SIZE) > 0) { + if (filename__read_build_id(ss->name, build_id, BUILD_ID_SIZE) > 0) { dso__set_build_id(dso, build_id); return 1; } diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 8f5cabbfc8b..afec3f048a9 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -1026,7 +1026,7 @@ int dso__load(struct dso *dso, struct map *map, symbol_filter_t filter) { char *name; int ret = -1; - int fd; + struct symsrc ss; u_int i; struct machine *machine; char *root_dir = (char *) ""; @@ -1086,13 +1086,12 @@ restart: continue; /* Name is now the name of the next image to try */ - fd = open(name, O_RDONLY); - if (fd < 0) + if (symsrc__init(&ss, dso, name, dso->symtab_type) < 0) continue; - ret = dso__load_sym(dso, map, name, fd, filter, 0, + ret = dso__load_sym(dso, map, &ss, filter, 0, want_symtab); - close(fd); + symsrc__destroy(&ss); /* * Some people seem to have debuginfo files _WITHOUT_ debug @@ -1359,22 +1358,23 @@ out_failure: int dso__load_vmlinux(struct dso *dso, struct map *map, const char *vmlinux, symbol_filter_t filter) { - int err = -1, fd; + int err = -1; + struct symsrc ss; char symfs_vmlinux[PATH_MAX]; snprintf(symfs_vmlinux, sizeof(symfs_vmlinux), "%s%s", symbol_conf.symfs, vmlinux); - fd = open(symfs_vmlinux, O_RDONLY); - if (fd < 0) - return -1; if (dso->kernel == DSO_TYPE_GUEST_KERNEL) dso->symtab_type = DSO_BINARY_TYPE__GUEST_VMLINUX; else dso->symtab_type = DSO_BINARY_TYPE__VMLINUX; - err = dso__load_sym(dso, map, symfs_vmlinux, fd, filter, 0, 0); - close(fd); + if (symsrc__init(&ss, dso, symfs_vmlinux, dso->symtab_type)) + return -1; + + err = dso__load_sym(dso, map, &ss, filter, 0, 0); + symsrc__destroy(&ss); if (err > 0) { dso__set_long_name(dso, (char *)vmlinux); diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index 37f1ea146c1..dd9e8678b35 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -11,6 +11,12 @@ #include #include +#ifndef NO_LIBELF_SUPPORT +#include +#include +#include +#endif + #ifdef HAVE_CPLUS_DEMANGLE extern char *cplus_demangle(const char *, int); @@ -219,6 +225,34 @@ struct dso { char name[0]; }; +struct symsrc { + char *name; + int fd; + enum dso_binary_type type; + +#ifndef NO_LIBELF_SUPPORT + Elf *elf; + GElf_Ehdr ehdr; + + Elf_Scn *opdsec; + size_t opdidx; + GElf_Shdr opdshdr; + + Elf_Scn *symtab; + GElf_Shdr symshdr; + + Elf_Scn *dynsym; + size_t dynsym_idx; + GElf_Shdr dynshdr; + + bool adjust_symbols; +#endif +}; + +void symsrc__destroy(struct symsrc *ss); +int symsrc__init(struct symsrc *ss, struct dso *dso, const char *name, + enum dso_binary_type type); + #define DSO__SWAP(dso, type, val) \ ({ \ type ____r = val; \ @@ -334,7 +368,7 @@ ssize_t dso__data_read_addr(struct dso *dso, struct map *map, struct machine *machine, u64 addr, u8 *data, ssize_t size); int dso__test_data(void); -int dso__load_sym(struct dso *dso, struct map *map, const char *name, int fd, +int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *ss, symbol_filter_t filter, int kmodule, int want_symtab); int dso__synthesize_plt_symbols(struct dso *dso, char *name, struct map *map, symbol_filter_t filter); From 005f92947a0da7eb47b0f1ff611f8fc3d7ab4751 Mon Sep 17 00:00:00 2001 From: Cody P Schafer Date: Fri, 10 Aug 2012 15:22:58 -0700 Subject: [PATCH 074/282] perf symbols: Set symtab_type in dso__load_sym In certain cases, dso__load requires dso->symbol_type to be set prior to calling it. With the introduction of symsrc*, the symtab_type is now stored in a symsrc which is then passed to dso__load_sym(). Change dso__load_sym() to use the symtab_type from them symsrc (setting dso->symtab_type as well). Setup for later patch "perf symbols: Use both runtime and debug images" Signed-off-by: Cody P Schafer Cc: David Hansen Cc: Ingo Molnar Cc: Matt Hellsley Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Sukadev Bhattiprolu Link: http://lkml.kernel.org/r/1344637382-22789-13-git-send-email-cody@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/symbol-elf.c | 2 ++ tools/perf/util/symbol.c | 13 +++++++------ 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index 6974b2a44ee..3a9c38a39bc 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -645,6 +645,8 @@ int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *ss, int nr = 0; size_t opdidx = 0; + dso->symtab_type = ss->type; + elf = ss->elf; ehdr = ss->ehdr; sec = ss->symtab; diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index afec3f048a9..2b3495a93fb 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -1079,14 +1079,14 @@ int dso__load(struct dso *dso, struct map *map, symbol_filter_t filter) restart: for (i = 0; i < DSO_BINARY_TYPE__SYMTAB_CNT; i++) { - dso->symtab_type = binary_type_symtab[i]; + enum dso_binary_type symtab_type = binary_type_symtab[i]; - if (dso__binary_type_file(dso, dso->symtab_type, + if (dso__binary_type_file(dso, symtab_type, root_dir, name, PATH_MAX)) continue; /* Name is now the name of the next image to try */ - if (symsrc__init(&ss, dso, name, dso->symtab_type) < 0) + if (symsrc__init(&ss, dso, name, symtab_type) < 0) continue; ret = dso__load_sym(dso, map, &ss, filter, 0, @@ -1361,16 +1361,17 @@ int dso__load_vmlinux(struct dso *dso, struct map *map, int err = -1; struct symsrc ss; char symfs_vmlinux[PATH_MAX]; + enum dso_binary_type symtab_type; snprintf(symfs_vmlinux, sizeof(symfs_vmlinux), "%s%s", symbol_conf.symfs, vmlinux); if (dso->kernel == DSO_TYPE_GUEST_KERNEL) - dso->symtab_type = DSO_BINARY_TYPE__GUEST_VMLINUX; + symtab_type = DSO_BINARY_TYPE__GUEST_VMLINUX; else - dso->symtab_type = DSO_BINARY_TYPE__VMLINUX; + symtab_type = DSO_BINARY_TYPE__VMLINUX; - if (symsrc__init(&ss, dso, symfs_vmlinux, dso->symtab_type)) + if (symsrc__init(&ss, dso, symfs_vmlinux, symtab_type)) return -1; err = dso__load_sym(dso, map, &ss, filter, 0, 0); From a44f605b2f6eadb771a052aa3a5eefb342b38a39 Mon Sep 17 00:00:00 2001 From: Cody P Schafer Date: Fri, 10 Aug 2012 15:22:59 -0700 Subject: [PATCH 075/282] perf symbols: Switch dso__synthesize_plt_symbols() to use symsrc Previously dso__synthesize_plt_symbols() was reopening the elf file to obtain dynsyms from it. Rather than reopen the file, use the already opened reference within the symsrc to access it. Setup for the later patch "perf symbols: Use both runtime and debug images" Signed-off-by: Cody P Schafer Cc: David Hansen Cc: Ingo Molnar Cc: Matt Hellsley Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Sukadev Bhattiprolu Link: http://lkml.kernel.org/r/1344637382-22789-14-git-send-email-cody@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/symbol-elf.c | 25 +++++++------------------ tools/perf/util/symbol-minimal.c | 3 ++- tools/perf/util/symbol.c | 8 +++++--- tools/perf/util/symbol.h | 4 ++-- 4 files changed, 16 insertions(+), 24 deletions(-) diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index 3a9c38a39bc..59159477590 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -166,7 +166,7 @@ static Elf_Scn *elf_section_by_name(Elf *elf, GElf_Ehdr *ep, * And always look at the original dso, not at debuginfo packages, that * have the PLT data stripped out (shdr_rel_plt.sh_type == SHT_NOBITS). */ -int dso__synthesize_plt_symbols(struct dso *dso, char *name, struct map *map, +int dso__synthesize_plt_symbols(struct dso *dso, struct symsrc *ss, struct map *map, symbol_filter_t filter) { uint32_t nr_rel_entries, idx; @@ -181,21 +181,15 @@ int dso__synthesize_plt_symbols(struct dso *dso, char *name, struct map *map, GElf_Ehdr ehdr; char sympltname[1024]; Elf *elf; - int nr = 0, symidx, fd, err = 0; + int nr = 0, symidx, err = 0; - fd = open(name, O_RDONLY); - if (fd < 0) - goto out; + elf = ss->elf; + ehdr = ss->ehdr; - elf = elf_begin(fd, PERF_ELF_C_READ_MMAP, NULL); - if (elf == NULL) - goto out_close; + scn_dynsym = ss->dynsym; + shdr_dynsym = ss->dynshdr; + dynsym_idx = ss->dynsym_idx; - if (gelf_getehdr(elf, &ehdr) == NULL) - goto out_elf_end; - - scn_dynsym = elf_section_by_name(elf, &ehdr, &shdr_dynsym, - ".dynsym", &dynsym_idx); if (scn_dynsym == NULL) goto out_elf_end; @@ -291,13 +285,8 @@ int dso__synthesize_plt_symbols(struct dso *dso, char *name, struct map *map, err = 0; out_elf_end: - elf_end(elf); -out_close: - close(fd); - if (err == 0) return nr; -out: pr_debug("%s: problems reading %s PLT info.\n", __func__, dso->long_name); return 0; diff --git a/tools/perf/util/symbol-minimal.c b/tools/perf/util/symbol-minimal.c index 1b16c2729a3..cc580c046c0 100644 --- a/tools/perf/util/symbol-minimal.c +++ b/tools/perf/util/symbol-minimal.c @@ -266,7 +266,8 @@ void symsrc__destroy(struct symsrc *ss) close(ss->fd); } -int dso__synthesize_plt_symbols(struct dso *dso __used, char *name __used, +int dso__synthesize_plt_symbols(struct dso *dso __used, + struct symsrc *ss __used, struct map *map __used, symbol_filter_t filter __used) { diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 2b3495a93fb..f8a30686520 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -1091,21 +1091,23 @@ restart: ret = dso__load_sym(dso, map, &ss, filter, 0, want_symtab); - symsrc__destroy(&ss); /* * Some people seem to have debuginfo files _WITHOUT_ debug * info!?!? */ - if (!ret) + if (!ret) { + symsrc__destroy(&ss); continue; + } if (ret > 0) { int nr_plt; - nr_plt = dso__synthesize_plt_symbols(dso, name, map, filter); + nr_plt = dso__synthesize_plt_symbols(dso, &ss, map, filter); if (nr_plt > 0) ret += nr_plt; + symsrc__destroy(&ss); break; } } diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index dd9e8678b35..2981513ce1d 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -370,8 +370,8 @@ ssize_t dso__data_read_addr(struct dso *dso, struct map *map, int dso__test_data(void); int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *ss, symbol_filter_t filter, int kmodule, int want_symtab); -int dso__synthesize_plt_symbols(struct dso *dso, char *name, struct map *map, - symbol_filter_t filter); +int dso__synthesize_plt_symbols(struct dso *dso, struct symsrc *ss, + struct map *map, symbol_filter_t filter); void symbols__insert(struct rb_root *symbols, struct symbol *sym); void symbols__fixup_duplicate(struct rb_root *symbols); From d26cd12b46cb6b5595143804b43ba5aa7968551e Mon Sep 17 00:00:00 2001 From: Cody P Schafer Date: Fri, 10 Aug 2012 15:23:00 -0700 Subject: [PATCH 076/282] perf symbols: Factor want_symtab out of dso__load_sym() Only one callsite of dso__load_sym() uses the want_symtab functionality, so place the logic at the callsite instead of within dso__load_sym(). This sets us up for removal of want_symtab completely once we keep multiple elf handles (within symsrc's) around. Setup for the later patch "perf symbols: Use both runtime and debug images" Signed-off-by: Cody P Schafer Cc: David Hansen Cc: Ingo Molnar Cc: Matt Hellsley Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Sukadev Bhattiprolu Link: http://lkml.kernel.org/r/1344637382-22789-15-git-send-email-cody@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/symbol-elf.c | 21 ++++++++++----------- tools/perf/util/symbol-minimal.c | 8 ++++++-- tools/perf/util/symbol.c | 10 +++++++--- tools/perf/util/symbol.h | 3 ++- 4 files changed, 25 insertions(+), 17 deletions(-) diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index 59159477590..492ebecfbfb 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -525,6 +525,10 @@ static int dso__swap_init(struct dso *dso, unsigned char eidata) return 0; } +bool symsrc__has_symtab(struct symsrc *ss) +{ + return ss->symtab != NULL; +} void symsrc__destroy(struct symsrc *ss) { @@ -616,7 +620,7 @@ out_close: } int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *ss, - symbol_filter_t filter, int kmodule, int want_symtab) + symbol_filter_t filter, int kmodule) { struct kmap *kmap = dso->kernel ? map__kmap(map) : NULL; struct map *curr_map = map; @@ -636,21 +640,16 @@ int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *ss, dso->symtab_type = ss->type; + if (!ss->symtab) { + ss->symtab = ss->dynsym; + ss->symshdr = ss->dynshdr; + } + elf = ss->elf; ehdr = ss->ehdr; sec = ss->symtab; shdr = ss->symshdr; - if (sec == NULL) { - if (want_symtab) - goto out_elf_end; - - sec = ss->dynsym; - shdr = ss->dynshdr; - if (sec == NULL) - goto out_elf_end; - } - opdsec = ss->opdsec; opdshdr = ss->opdshdr; opdidx = ss->opdidx; diff --git a/tools/perf/util/symbol-minimal.c b/tools/perf/util/symbol-minimal.c index cc580c046c0..11c2c606ce7 100644 --- a/tools/perf/util/symbol-minimal.c +++ b/tools/perf/util/symbol-minimal.c @@ -260,6 +260,11 @@ out_close: return -1; } +bool symsrc__has_symtab(struct symsrc *ss __used) +{ + return false; +} + void symsrc__destroy(struct symsrc *ss) { free(ss->name); @@ -275,8 +280,7 @@ int dso__synthesize_plt_symbols(struct dso *dso __used, } int dso__load_sym(struct dso *dso, struct map *map __used, struct symsrc *ss, - symbol_filter_t filter __used, int kmodule __used, - int want_symtab __used) + symbol_filter_t filter __used, int kmodule __used) { unsigned char *build_id[BUILD_ID_SIZE]; diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index f8a30686520..8e7d74ff6ba 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -1089,8 +1089,12 @@ restart: if (symsrc__init(&ss, dso, name, symtab_type) < 0) continue; - ret = dso__load_sym(dso, map, &ss, filter, 0, - want_symtab); + if (want_symtab && !symsrc__has_symtab(&ss)) { + symsrc__destroy(&ss); + continue; + } + + ret = dso__load_sym(dso, map, &ss, filter, 0); /* * Some people seem to have debuginfo files _WITHOUT_ debug @@ -1376,7 +1380,7 @@ int dso__load_vmlinux(struct dso *dso, struct map *map, if (symsrc__init(&ss, dso, symfs_vmlinux, symtab_type)) return -1; - err = dso__load_sym(dso, map, &ss, filter, 0, 0); + err = dso__load_sym(dso, map, &ss, filter, 0); symsrc__destroy(&ss); if (err > 0) { diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index 2981513ce1d..fa9f6b1cdc4 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -252,6 +252,7 @@ struct symsrc { void symsrc__destroy(struct symsrc *ss); int symsrc__init(struct symsrc *ss, struct dso *dso, const char *name, enum dso_binary_type type); +bool symsrc__has_symtab(struct symsrc *ss); #define DSO__SWAP(dso, type, val) \ ({ \ @@ -369,7 +370,7 @@ ssize_t dso__data_read_addr(struct dso *dso, struct map *map, u8 *data, ssize_t size); int dso__test_data(void); int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *ss, - symbol_filter_t filter, int kmodule, int want_symtab); + symbol_filter_t filter, int kmodule); int dso__synthesize_plt_symbols(struct dso *dso, struct symsrc *ss, struct map *map, symbol_filter_t filter); From 261360b6e90a782f0a63d8f61a67683c376c88cf Mon Sep 17 00:00:00 2001 From: Cody P Schafer Date: Fri, 10 Aug 2012 15:23:01 -0700 Subject: [PATCH 077/282] perf symbols: Convert dso__load_syms to take 2 symsrc's To properly handle platforms with an opd section, both a runtime image (which contains the opd section but possibly lacks symbols) and a symbol image (which probably lacks an opd section but has symbols). The next patch ("perf symbol: use both runtime and debug images") adjusts the callsite in dso__load() to take advantage of being able to pass both runtime & debug images. Assumptions made here: - The opd section, if it exists in the runtime image, has headers in both the runtime image and the debug/syms image. - The index of the opd section (again, only if it exists in the runtime image) is the same in both the runtime and debug/symbols image. Both of these are true on RHEL, but it is unclear how accurate they are in general (on platforms with function descriptors in opd sections). Signed-off-by: Cody P Schafer Cc: David Hansen Cc: Ingo Molnar Cc: Matt Hellsley Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Sukadev Bhattiprolu Link: http://lkml.kernel.org/r/1344637382-22789-16-git-send-email-cody@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/symbol-elf.c | 47 ++++++++++++++++---------------- tools/perf/util/symbol-minimal.c | 1 + tools/perf/util/symbol.c | 4 +-- tools/perf/util/symbol.h | 5 ++-- 4 files changed, 30 insertions(+), 27 deletions(-) diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index 492ebecfbfb..36e4a458e7a 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -619,7 +619,8 @@ out_close: return err; } -int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *ss, +int dso__load_sym(struct dso *dso, struct map *map, + struct symsrc *syms_ss, struct symsrc *runtime_ss, symbol_filter_t filter, int kmodule) { struct kmap *kmap = dso->kernel ? map__kmap(map) : NULL; @@ -630,31 +631,27 @@ int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *ss, int err = -1; uint32_t idx; GElf_Ehdr ehdr; - GElf_Shdr shdr, opdshdr; + GElf_Shdr shdr; Elf_Data *syms, *opddata = NULL; GElf_Sym sym; - Elf_Scn *sec, *sec_strndx, *opdsec; + Elf_Scn *sec, *sec_strndx; Elf *elf; int nr = 0; - size_t opdidx = 0; - dso->symtab_type = ss->type; + dso->symtab_type = syms_ss->type; - if (!ss->symtab) { - ss->symtab = ss->dynsym; - ss->symshdr = ss->dynshdr; + if (!syms_ss->symtab) { + syms_ss->symtab = syms_ss->dynsym; + syms_ss->symshdr = syms_ss->dynshdr; } - elf = ss->elf; - ehdr = ss->ehdr; - sec = ss->symtab; - shdr = ss->symshdr; + elf = syms_ss->elf; + ehdr = syms_ss->ehdr; + sec = syms_ss->symtab; + shdr = syms_ss->symshdr; - opdsec = ss->opdsec; - opdshdr = ss->opdshdr; - opdidx = ss->opdidx; - if (opdsec) - opddata = elf_rawdata(opdsec, NULL); + if (runtime_ss->opdsec) + opddata = elf_rawdata(runtime_ss->opdsec, NULL); syms = elf_getdata(sec, NULL); if (syms == NULL) @@ -679,13 +676,14 @@ int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *ss, nr_syms = shdr.sh_size / shdr.sh_entsize; memset(&sym, 0, sizeof(sym)); - dso->adjust_symbols = ss->adjust_symbols; + dso->adjust_symbols = runtime_ss->adjust_symbols; elf_symtab__for_each_symbol(syms, nr_syms, idx, sym) { struct symbol *f; const char *elf_name = elf_sym__name(&sym, symstrs); char *demangled = NULL; int is_label = elf_sym__is_label(&sym); const char *section_name; + bool used_opd = false; if (kmap && kmap->ref_reloc_sym && kmap->ref_reloc_sym->name && strcmp(elf_name, kmap->ref_reloc_sym->name) == 0) @@ -704,14 +702,16 @@ int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *ss, continue; } - if (opdsec && sym.st_shndx == opdidx) { - u32 offset = sym.st_value - opdshdr.sh_addr; + if (runtime_ss->opdsec && sym.st_shndx == runtime_ss->opdidx) { + u32 offset = sym.st_value - syms_ss->opdshdr.sh_addr; u64 *opd = opddata->d_buf + offset; sym.st_value = DSO__SWAP(dso, u64, *opd); - sym.st_shndx = elf_addr_to_index(elf, sym.st_value); + sym.st_shndx = elf_addr_to_index(runtime_ss->elf, + sym.st_value); + used_opd = true; } - sec = elf_getscn(elf, sym.st_shndx); + sec = elf_getscn(runtime_ss->elf, sym.st_shndx); if (!sec) goto out_elf_end; @@ -777,7 +777,8 @@ int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *ss, goto new_symbol; } - if (curr_dso->adjust_symbols && sym.st_value) { + if ((used_opd && runtime_ss->adjust_symbols) + || (!used_opd && syms_ss->adjust_symbols)) { pr_debug4("%s: adjusting symbol: st_value: %#" PRIx64 " " "sh_addr: %#" PRIx64 " sh_offset: %#" PRIx64 "\n", __func__, (u64)sym.st_value, (u64)shdr.sh_addr, diff --git a/tools/perf/util/symbol-minimal.c b/tools/perf/util/symbol-minimal.c index 11c2c606ce7..7747ea6d7e9 100644 --- a/tools/perf/util/symbol-minimal.c +++ b/tools/perf/util/symbol-minimal.c @@ -280,6 +280,7 @@ int dso__synthesize_plt_symbols(struct dso *dso __used, } int dso__load_sym(struct dso *dso, struct map *map __used, struct symsrc *ss, + struct symsrc *runtime_ss __used, symbol_filter_t filter __used, int kmodule __used) { unsigned char *build_id[BUILD_ID_SIZE]; diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 8e7d74ff6ba..739e5a32366 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -1094,7 +1094,7 @@ restart: continue; } - ret = dso__load_sym(dso, map, &ss, filter, 0); + ret = dso__load_sym(dso, map, &ss, &ss, filter, 0); /* * Some people seem to have debuginfo files _WITHOUT_ debug @@ -1380,7 +1380,7 @@ int dso__load_vmlinux(struct dso *dso, struct map *map, if (symsrc__init(&ss, dso, symfs_vmlinux, symtab_type)) return -1; - err = dso__load_sym(dso, map, &ss, filter, 0); + err = dso__load_sym(dso, map, &ss, &ss, filter, 0); symsrc__destroy(&ss); if (err > 0) { diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index fa9f6b1cdc4..1f3eed21f35 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -369,8 +369,9 @@ ssize_t dso__data_read_addr(struct dso *dso, struct map *map, struct machine *machine, u64 addr, u8 *data, ssize_t size); int dso__test_data(void); -int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *ss, - symbol_filter_t filter, int kmodule); +int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *syms_ss, + struct symsrc *runtime_ss, symbol_filter_t filter, + int kmodule); int dso__synthesize_plt_symbols(struct dso *dso, struct symsrc *ss, struct map *map, symbol_filter_t filter); From 3aafe5ae08f2aed378e06d78b207883879d25cbe Mon Sep 17 00:00:00 2001 From: Cody P Schafer Date: Fri, 10 Aug 2012 15:23:02 -0700 Subject: [PATCH 078/282] perf symbols: Use both runtime and debug images We keep both a 'runtime' elf image as well as a 'debug' elf image around and generate symbols by looking at both of these. This eliminates the need for the want_symtab/goto restart mechanism combined with iterating over and reopening the elf images a second time. Also give dso__synthsize_plt_symbols() the runtime image (which has dynsyms) instead of the symbol image (which may only have a symtab and no dynsyms). Previously if a debug image was found all runtime images were ignored. This fixes 2 issues: - Symbol resolution to failure on PowerPC systems with debug symbols installed, as the debug images lack a '.opd' section which contains function descriptors. - On all archs, plt synthesis failed when a debug image was loaded and that debug image lacks a dynsym section while a runtime image has a dynsym section. Assumptions: - If a .opd section exists, it is contained in the highest priority image with a dynsym section. - This generally implies that the debug image lacks a dynsym section (ie: it is marked as NO_BITS). Signed-off-by: Cody P Schafer Cc: David Hansen Cc: Ingo Molnar Cc: Matt Hellsley Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Sukadev Bhattiprolu Link: http://lkml.kernel.org/r/1344637382-22789-17-git-send-email-cody@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/symbol-elf.c | 5 +++ tools/perf/util/symbol-minimal.c | 6 +++ tools/perf/util/symbol.c | 77 ++++++++++++++++++-------------- tools/perf/util/symbol.h | 1 + 4 files changed, 56 insertions(+), 33 deletions(-) diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index 36e4a458e7a..5b37e13f08f 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -525,6 +525,11 @@ static int dso__swap_init(struct dso *dso, unsigned char eidata) return 0; } +bool symsrc__possibly_runtime(struct symsrc *ss) +{ + return ss->dynsym || ss->opdsec; +} + bool symsrc__has_symtab(struct symsrc *ss) { return ss->symtab != NULL; diff --git a/tools/perf/util/symbol-minimal.c b/tools/perf/util/symbol-minimal.c index 7747ea6d7e9..6738ea128c9 100644 --- a/tools/perf/util/symbol-minimal.c +++ b/tools/perf/util/symbol-minimal.c @@ -260,6 +260,12 @@ out_close: return -1; } +bool symsrc__possibly_runtime(struct symsrc *ss __used) +{ + /* Assume all sym sources could be a runtime image. */ + return true; +} + bool symsrc__has_symtab(struct symsrc *ss __used) { return false; diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 739e5a32366..2293a4a5af9 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -1026,11 +1026,12 @@ int dso__load(struct dso *dso, struct map *map, symbol_filter_t filter) { char *name; int ret = -1; - struct symsrc ss; u_int i; struct machine *machine; char *root_dir = (char *) ""; - int want_symtab; + int ss_pos = 0; + struct symsrc ss_[2]; + struct symsrc *syms_ss = NULL, *runtime_ss = NULL; dso__set_loaded(dso, map->type); @@ -1072,12 +1073,12 @@ int dso__load(struct dso *dso, struct map *map, symbol_filter_t filter) root_dir = machine->root_dir; /* Iterate over candidate debug images. - * On the first pass, only load images if they have a full symtab. - * Failing that, do a second pass where we accept .dynsym also + * Keep track of "interesting" ones (those which have a symtab, dynsym, + * and/or opd section) for processing. */ - want_symtab = 1; -restart: for (i = 0; i < DSO_BINARY_TYPE__SYMTAB_CNT; i++) { + struct symsrc *ss = &ss_[ss_pos]; + bool next_slot = false; enum dso_binary_type symtab_type = binary_type_symtab[i]; @@ -1086,45 +1087,55 @@ restart: continue; /* Name is now the name of the next image to try */ - if (symsrc__init(&ss, dso, name, symtab_type) < 0) + if (symsrc__init(ss, dso, name, symtab_type) < 0) continue; - if (want_symtab && !symsrc__has_symtab(&ss)) { - symsrc__destroy(&ss); - continue; + if (!syms_ss && symsrc__has_symtab(ss)) { + syms_ss = ss; + next_slot = true; } - ret = dso__load_sym(dso, map, &ss, &ss, filter, 0); - - /* - * Some people seem to have debuginfo files _WITHOUT_ debug - * info!?!? - */ - if (!ret) { - symsrc__destroy(&ss); - continue; + if (!runtime_ss && symsrc__possibly_runtime(ss)) { + runtime_ss = ss; + next_slot = true; } - if (ret > 0) { - int nr_plt; + if (next_slot) { + ss_pos++; - nr_plt = dso__synthesize_plt_symbols(dso, &ss, map, filter); - if (nr_plt > 0) - ret += nr_plt; - symsrc__destroy(&ss); - break; + if (syms_ss && runtime_ss) + break; } + } - /* - * If we wanted a full symtab but no image had one, - * relax our requirements and repeat the search. - */ - if (ret <= 0 && want_symtab) { - want_symtab = 0; - goto restart; + if (!runtime_ss && !syms_ss) + goto out_free; + + if (runtime_ss && !syms_ss) { + syms_ss = runtime_ss; } + /* We'll have to hope for the best */ + if (!runtime_ss && syms_ss) + runtime_ss = syms_ss; + + if (syms_ss) + ret = dso__load_sym(dso, map, syms_ss, runtime_ss, filter, 0); + else + ret = -1; + + if (ret > 0 && runtime_ss->dynsym) { + int nr_plt; + + nr_plt = dso__synthesize_plt_symbols(dso, runtime_ss, map, filter); + if (nr_plt > 0) + ret += nr_plt; + } + + for (; ss_pos > 0; ss_pos--) + symsrc__destroy(&ss_[ss_pos - 1]); +out_free: free(name); if (ret < 0 && strstr(dso->name, " (deleted)") != NULL) return 0; diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index 1f3eed21f35..fc4b1e630fd 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -253,6 +253,7 @@ void symsrc__destroy(struct symsrc *ss); int symsrc__init(struct symsrc *ss, struct dso *dso, const char *name, enum dso_binary_type type); bool symsrc__has_symtab(struct symsrc *ss); +bool symsrc__possibly_runtime(struct symsrc *ss); #define DSO__SWAP(dso, type, val) \ ({ \ From 0fe7d7e9761ec7e23350b5543ddac470bb3cde1e Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 9 Aug 2012 08:31:00 -0700 Subject: [PATCH 079/282] perf symbols: Add description of JIT interface Add a description of the JIT interface in the perf symbol resolution code. I reverse engineered the format from the source. Signed-off-by: Andi Kleen Acked-by: Pekka Enberg Cc: Pekka Enberg Link: http://lkml.kernel.org/r/1344526260-18721-1-git-send-email-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/jit-interface.txt | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100644 tools/perf/Documentation/jit-interface.txt diff --git a/tools/perf/Documentation/jit-interface.txt b/tools/perf/Documentation/jit-interface.txt new file mode 100644 index 00000000000..a8656f56491 --- /dev/null +++ b/tools/perf/Documentation/jit-interface.txt @@ -0,0 +1,15 @@ +perf supports a simple JIT interface to resolve symbols for dynamic code generated +by a JIT. + +The JIT has to write a /tmp/perf-%d.map (%d = pid of process) file + +This is a text file. + +Each line has the following format, fields separated with spaces: + +START SIZE symbolname + +START and SIZE are hex numbers without 0x. +symbolname is the rest of the line, so it could contain special characters. + +The ownership of the file has to match the process. From d0d3913895e5b50623bd4dafd876e41c1169031d Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 14 Aug 2012 10:57:03 +0900 Subject: [PATCH 080/282] perf script: Fix a NULL pointer dereference If 'perf script --gen-script' was called with a perf.data which contains no tracepoint event, it'd segfault due to NULL pevent pointer. Fix it. Signed-off-by: Namhyung Kim Cc: Feng Tang Cc: Ingo Molnar Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1344909423-26384-1-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/trace-event-parse.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c index 4cb7f3831f7..a5a554efeb5 100644 --- a/tools/perf/util/trace-event-parse.c +++ b/tools/perf/util/trace-event-parse.c @@ -293,7 +293,7 @@ struct event_format *trace_find_next_event(struct pevent *pevent, { static int idx; - if (!pevent->events) + if (!pevent || !pevent->events) return NULL; if (!event) { From a619183672aace2ce3de15728da922d303c0eef9 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 14 Aug 2012 14:17:30 -0300 Subject: [PATCH 081/282] perf tools: Add missing files to build the python binding Changeset 0f6a3015: "perf tools: Support user regs and stack in sample parsing" uses hweight_long in evsel.c, so we need to drag util/hweight.c to the python binding. Ditto for ee8dd3c: "perf tools: Change strlist to use the new rblist" where we need to add util/rblist.c. Now twatch.py works again: # export PYTHONPATH=~acme/git/build/perf/python/ # ~acme/git/linux/tools/perf/python/twatch.py cpu: 4, pid: 23639, tid: 23639 { type: fork, pid: 30659, ppid: 23639, tid: 30659, ptid: 23639, time: 36287872076780} cpu: 5, pid: 30659, tid: 30659 { type: comm, pid: 30659, tid: 30659, comm: ls } cpu: 5, pid: 30659, tid: 30659 { type: exit, pid: 30659, ppid: 30659, tid: 30659, ptid: 30659, time: 36287873681539} cpu: 4, pid: 23639, tid: 23639 { type: fork, pid: 30660, ppid: 23639, tid: 30660, ptid: 23639, time: 36291720420480} cpu: 5, pid: 30659, tid: 30659 { type: exit, pid: 30659, ppid: 30659, tid: 30659, ptid: 30659, time: 36287873685714} cpu: 5, pid: 30660, tid: 30660 { type: comm, pid: 30660, tid: 30660, comm: git } ^C KeyboardInterrupt Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-gmq82zp5blin9aml9g5tzokr@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/python-ext-sources | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/perf/util/python-ext-sources b/tools/perf/util/python-ext-sources index 2884e67ee62..213362850ab 100644 --- a/tools/perf/util/python-ext-sources +++ b/tools/perf/util/python-ext-sources @@ -10,10 +10,12 @@ util/ctype.c util/evlist.c util/evsel.c util/cpumap.c +util/hweight.c util/thread_map.c util/util.c util/xyarray.c util/cgroup.c util/debugfs.c +util/rblist.c util/strlist.c ../../lib/rbtree.c From 89efb029502d7f2d0993ed2aa9280c414336939c Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 8 Aug 2012 12:14:14 +0200 Subject: [PATCH 082/282] perf tools: Add support to parse event group syntax Adding scanner/parser bits to parse event groups. The grammar for group is: groups: groups ',' group | group group: group_name '{' events '}' group_mod group_name: name | empty group_mod: ':' group_mods | empty group_mods: event_mod It's possible to use standard event modifier as a modifier for group. It'll be used as an update to existing event modifiers. It's necessary to use quoting ("'\) when specifying group on command line, since {} characters are interpreted by most of the shells. It is now possible to specify groups in event syntax like: '{cycles,faults}' - anonymous group 'group1{cycles,faults} - group with name 'group1' '{cycles,faults}:k - anonymous group with event modifier 'k' '{cpu-clock,task-clock},{minor-faults,major-faults}' - two anonymous groups The grouping functionality itself is coming shortly. Reviewed-by: Namhyung Kim Signed-off-by: Jiri Olsa Acked-by: Peter Zijlstra Cc: Andi Kleen Cc: Arnaldo Carvalho de Melo Cc: Corey Ashford Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Ulrich Drepper Link: http://lkml.kernel.org/n/tip-p4j8bnvo879uokum4k4zk5q6@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/parse-events.c | 14 ++++- tools/perf/util/parse-events.h | 4 +- tools/perf/util/parse-events.l | 2 + tools/perf/util/parse-events.y | 93 +++++++++++++++++++++++++++++----- 4 files changed, 97 insertions(+), 16 deletions(-) diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 3ec4bfcd3f9..57d5809f286 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -611,19 +611,29 @@ int parse_events_add_pmu(struct list_head **list, int *idx, pmu_event_name(head_config)); } +int parse_events__modifier_group(struct list_head *list __used, + char *event_mod __used) +{ + return 0; +} + +void parse_events__group(char *name __used, struct list_head *list __used) +{ +} + void parse_events_update_lists(struct list_head *list_event, struct list_head *list_all) { /* * Called for single event definition. Update the - * 'all event' list, and reinit the 'signle event' + * 'all event' list, and reinit the 'single event' * list, for next event definition. */ list_splice_tail(list_event, list_all); free(list_event); } -int parse_events_modifier(struct list_head *list, char *str) +int parse_events__modifier_event(struct list_head *list, char *str) { struct perf_evsel *evsel; int exclude = 0, exclude_GH = 0; diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h index 00416d7fd01..c3bb04c6f02 100644 --- a/tools/perf/util/parse-events.h +++ b/tools/perf/util/parse-events.h @@ -79,7 +79,8 @@ int parse_events__term_str(struct parse_events__term **_term, int parse_events__term_clone(struct parse_events__term **new, struct parse_events__term *term); void parse_events__free_terms(struct list_head *terms); -int parse_events_modifier(struct list_head *list, char *str); +int parse_events__modifier_event(struct list_head *list, char *str); +int parse_events__modifier_group(struct list_head *list, char *event_mod); int parse_events_add_tracepoint(struct list_head **list, int *idx, char *sys, char *event); int parse_events_add_numeric(struct list_head **list, int *idx, @@ -91,6 +92,7 @@ int parse_events_add_breakpoint(struct list_head **list, int *idx, void *ptr, char *type); int parse_events_add_pmu(struct list_head **list, int *idx, char *pmu , struct list_head *head_config); +void parse_events__group(char *name, struct list_head *list); void parse_events_update_lists(struct list_head *list_event, struct list_head *list_all); void parse_events_error(void *data, void *scanner, char const *msg); diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l index e4abdf25d49..2c0d006d43d 100644 --- a/tools/perf/util/parse-events.l +++ b/tools/perf/util/parse-events.l @@ -151,6 +151,8 @@ r{num_raw_hex} { return raw(yyscanner); } - { return '-'; } , { return ','; } : { return ':'; } +"{" { return '{'; } +"}" { return '}'; } = { return '='; } \n { } diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y index 423d3315146..15e6e97e5b3 100644 --- a/tools/perf/util/parse-events.y +++ b/tools/perf/util/parse-events.y @@ -30,7 +30,7 @@ do { \ %token PE_NAME %token PE_MODIFIER_EVENT PE_MODIFIER_BP %token PE_NAME_CACHE_TYPE PE_NAME_CACHE_OP_RESULT -%token PE_PREFIX_MEM PE_PREFIX_RAW +%token PE_PREFIX_MEM PE_PREFIX_RAW PE_PREFIX_GROUP %token PE_ERROR %type PE_VALUE %type PE_VALUE_SYM_HW @@ -53,6 +53,11 @@ do { \ %type event_legacy_numeric %type event_legacy_raw %type event_def +%type event +%type events +%type group_def +%type group +%type groups %union { @@ -64,33 +69,95 @@ do { \ %% start: -PE_START_EVENTS events +PE_START_EVENTS start_events | -PE_START_TERMS terms +PE_START_TERMS start_terms + +start_events: groups +{ + struct parse_events_data__events *data = _data; + + parse_events_update_lists($1, &data->list); +} + +groups: +groups ',' group +{ + struct list_head *list = $1; + struct list_head *group = $3; + + parse_events_update_lists(group, list); + $$ = list; +} +| +groups ',' event +{ + struct list_head *list = $1; + struct list_head *event = $3; + + parse_events_update_lists(event, list); + $$ = list; +} +| +group +| +event + +group: +group_def ':' PE_MODIFIER_EVENT +{ + struct list_head *list = $1; + + ABORT_ON(parse_events__modifier_group(list, $3)); + $$ = list; +} +| +group_def + +group_def: +PE_NAME '{' events '}' +{ + struct list_head *list = $3; + + parse_events__group($1, list); + $$ = list; +} +| +'{' events '}' +{ + struct list_head *list = $2; + + parse_events__group(NULL, list); + $$ = list; +} events: -events ',' event | event +events ',' event +{ + struct list_head *event = $3; + struct list_head *list = $1; + + parse_events_update_lists(event, list); + $$ = list; +} +| +event event: event_def PE_MODIFIER_EVENT { - struct parse_events_data__events *data = _data; + struct list_head *list = $1; /* * Apply modifier on all events added by single event definition * (there could be more events added for multiple tracepoint * definitions via '*?'. */ - ABORT_ON(parse_events_modifier($1, $2)); - parse_events_update_lists($1, &data->list); + ABORT_ON(parse_events__modifier_event(list, $2)); + $$ = list; } | event_def -{ - struct parse_events_data__events *data = _data; - - parse_events_update_lists($1, &data->list); -} event_def: event_pmu | event_legacy_symbol | @@ -222,7 +289,7 @@ PE_RAW $$ = list; } -terms: event_config +start_terms: event_config { struct parse_events_data__terms *data = _data; data->terms = $1; From f5b1135bf79557563a814e53ecd610cce663c1e3 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 8 Aug 2012 12:21:54 +0200 Subject: [PATCH 083/282] perf tools: Add support to update event modifier Adding support to update already defined event's attribute with event modifier. This change will allow to use group modifier as an update to the existing event modifiers. Adding 'add' parameter to the parse_events__modifier_event function. Calling it with 'add' = false/true, the event modifier is initialized/updated respectively. Added exclude_GH flag to evsel struct, because we need to remember if one of 'GH' modifiers was used for event. The reason is that the default settings for exclude_guest is 1 and during the group modifier processing we have no other way of knowing if it was set by default or by event modifier. Keeping the current behaviour, that any event/group modifier reset the defaults for exclude_host (0) and exclude_guest (1). Reviewed-by: Namhyung Kim Signed-off-by: Jiri Olsa Acked-by: Peter Zijlstra Cc: Andi Kleen Cc: Arnaldo Carvalho de Melo Cc: Corey Ashford Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Ulrich Drepper Link: http://lkml.kernel.org/n/tip-8peaey3e2qc9dwtkvzbi4wmx@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evsel.h | 2 + tools/perf/util/parse-events.c | 76 ++++++++++++++++++++++++++++------ tools/perf/util/parse-events.h | 2 +- tools/perf/util/parse-events.y | 2 +- 4 files changed, 67 insertions(+), 15 deletions(-) diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index a56c4574b3f..6a258c90e7c 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -68,6 +68,8 @@ struct perf_evsel { } handler; unsigned int sample_size; bool supported; + /* parse modifier helper */ + int exclude_GH; }; struct cpu_map; diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 57d5809f286..4364575ce6a 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -633,14 +633,38 @@ void parse_events_update_lists(struct list_head *list_event, free(list_event); } -int parse_events__modifier_event(struct list_head *list, char *str) -{ - struct perf_evsel *evsel; - int exclude = 0, exclude_GH = 0; - int eu = 0, ek = 0, eh = 0, eH = 0, eG = 0, precise = 0; +struct event_modifier { + int eu; + int ek; + int eh; + int eH; + int eG; + int precise; + int exclude_GH; +}; - if (str == NULL) - return 0; +static int get_event_modifier(struct event_modifier *mod, char *str, + struct perf_evsel *evsel) +{ + int eu = evsel ? evsel->attr.exclude_user : 0; + int ek = evsel ? evsel->attr.exclude_kernel : 0; + int eh = evsel ? evsel->attr.exclude_hv : 0; + int eH = evsel ? evsel->attr.exclude_host : 0; + int eG = evsel ? evsel->attr.exclude_guest : 0; + int precise = evsel ? evsel->attr.precise_ip : 0; + + int exclude = eu | ek | eh; + int exclude_GH = evsel ? evsel->exclude_GH : 0; + + /* + * We are here for group and 'GH' was not set as event + * modifier and whatever event/group modifier override + * default 'GH' setup. + */ + if (evsel && !exclude_GH) + eH = eG = 0; + + memset(mod, 0, sizeof(*mod)); while (*str) { if (*str == 'u') { @@ -684,13 +708,39 @@ int parse_events__modifier_event(struct list_head *list, char *str) if (precise > 3) return -EINVAL; + mod->eu = eu; + mod->ek = ek; + mod->eh = eh; + mod->eH = eH; + mod->eG = eG; + mod->precise = precise; + mod->exclude_GH = exclude_GH; + return 0; +} + +int parse_events__modifier_event(struct list_head *list, char *str, bool add) +{ + struct perf_evsel *evsel; + struct event_modifier mod; + + if (str == NULL) + return 0; + + if (!add && get_event_modifier(&mod, str, NULL)) + return -EINVAL; + list_for_each_entry(evsel, list, node) { - evsel->attr.exclude_user = eu; - evsel->attr.exclude_kernel = ek; - evsel->attr.exclude_hv = eh; - evsel->attr.precise_ip = precise; - evsel->attr.exclude_host = eH; - evsel->attr.exclude_guest = eG; + + if (add && get_event_modifier(&mod, str, evsel)) + return -EINVAL; + + evsel->attr.exclude_user = mod.eu; + evsel->attr.exclude_kernel = mod.ek; + evsel->attr.exclude_hv = mod.eh; + evsel->attr.precise_ip = mod.precise; + evsel->attr.exclude_host = mod.eH; + evsel->attr.exclude_guest = mod.eG; + evsel->exclude_GH = mod.exclude_GH; } return 0; diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h index c3bb04c6f02..75a6800082a 100644 --- a/tools/perf/util/parse-events.h +++ b/tools/perf/util/parse-events.h @@ -79,7 +79,7 @@ int parse_events__term_str(struct parse_events__term **_term, int parse_events__term_clone(struct parse_events__term **new, struct parse_events__term *term); void parse_events__free_terms(struct list_head *terms); -int parse_events__modifier_event(struct list_head *list, char *str); +int parse_events__modifier_event(struct list_head *list, char *str, bool add); int parse_events__modifier_group(struct list_head *list, char *event_mod); int parse_events_add_tracepoint(struct list_head **list, int *idx, char *sys, char *event); diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y index 15e6e97e5b3..084c35fc2d2 100644 --- a/tools/perf/util/parse-events.y +++ b/tools/perf/util/parse-events.y @@ -153,7 +153,7 @@ event_def PE_MODIFIER_EVENT * (there could be more events added for multiple tracepoint * definitions via '*?'. */ - ABORT_ON(parse_events__modifier_event(list, $2)); + ABORT_ON(parse_events__modifier_event(list, $2, false)); $$ = list; } | From 6a4bb04caacc8c2d06f345130e9086e3fea38ca7 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 8 Aug 2012 12:22:36 +0200 Subject: [PATCH 084/282] perf tools: Enable grouping logic for parsed events This patch adds a functionality that allows to create event groups based on the way they are specified on the command line. Adding functionality to the '{}' group syntax introduced in earlier patch. The current '--group/-g' option behaviour remains intact. If you specify it for record/stat/top command, all the specified events become members of a single group with the first event as a group leader. With the new '{}' group syntax you can create group like: # perf record -e '{cycles,faults}' ls resulting in single event group containing 'cycles' and 'faults' events, with cycles event as group leader. All groups are created with regards to threads and cpus. Thus recording an event group within a 2 threads on server with 4 CPUs will create 8 separate groups. Examples (first event in brackets is group leader): # 1 group (cpu-clock,task-clock) perf record --group -e cpu-clock,task-clock ls perf record -e '{cpu-clock,task-clock}' ls # 2 groups (cpu-clock,task-clock) (minor-faults,major-faults) perf record -e '{cpu-clock,task-clock},{minor-faults,major-faults}' ls # 1 group (cpu-clock,task-clock,minor-faults,major-faults) perf record --group -e cpu-clock,task-clock -e minor-faults,major-faults ls perf record -e '{cpu-clock,task-clock,minor-faults,major-faults}' ls # 2 groups (cpu-clock,task-clock) (minor-faults,major-faults) perf record -e '{cpu-clock,task-clock} -e '{minor-faults,major-faults}' \ -e instructions ls # 1 group # (cpu-clock,task-clock,minor-faults,major-faults,instructions) perf record --group -e cpu-clock,task-clock \ -e minor-faults,major-faults -e instructions ls perf record -e '{cpu-clock,task-clock,minor-faults,major-faults,instructions}' ls It's possible to use standard event modifier for a group, which spans over all events in the group and updates each event modifier settings, for example: # perf record -r '{faults:k,cache-references}:p' resulting in ':kp' modifier being used for 'faults' and ':p' modifier being used for 'cache-references' event. Reviewed-by: Namhyung Kim Signed-off-by: Jiri Olsa Acked-by: Peter Zijlstra Cc: Andi Kleen Cc: Arnaldo Carvalho de Melo Cc: Corey Ashford Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Ulrich Drepper Link: http://lkml.kernel.org/n/tip-ho42u0wcr8mn1otkalqi13qp@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-record.c | 13 ++++----- tools/perf/builtin-stat.c | 13 ++++----- tools/perf/builtin-test.c | 8 +++--- tools/perf/builtin-top.c | 12 +++----- tools/perf/util/evlist.c | 20 ++++++------- tools/perf/util/evlist.h | 3 +- tools/perf/util/evsel.c | 51 ++++++++++++++++++++++------------ tools/perf/util/evsel.h | 11 ++++---- tools/perf/util/parse-events.c | 26 ++++++++++++++--- tools/perf/util/parse-events.h | 1 + tools/perf/util/python.c | 7 +++-- 11 files changed, 96 insertions(+), 69 deletions(-) diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 22dd05d3680..f5b6137c0f7 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -185,18 +185,18 @@ static bool perf_evlist__equal(struct perf_evlist *evlist, static void perf_record__open(struct perf_record *rec) { - struct perf_evsel *pos, *first; + struct perf_evsel *pos; struct perf_evlist *evlist = rec->evlist; struct perf_session *session = rec->session; struct perf_record_opts *opts = &rec->opts; - first = list_entry(evlist->entries.next, struct perf_evsel, node); - perf_evlist__config_attrs(evlist, opts); + if (opts->group) + perf_evlist__group(evlist); + list_for_each_entry(pos, &evlist->entries, node) { struct perf_event_attr *attr = &pos->attr; - struct xyarray *group_fd = NULL; /* * Check if parse_single_tracepoint_event has already asked for * PERF_SAMPLE_TIME. @@ -211,16 +211,13 @@ static void perf_record__open(struct perf_record *rec) */ bool time_needed = attr->sample_type & PERF_SAMPLE_TIME; - if (opts->group && pos != first) - group_fd = first->fd; fallback_missing_features: if (opts->exclude_guest_missing) attr->exclude_guest = attr->exclude_host = 0; retry_sample_id: attr->sample_id_all = opts->sample_id_all_missing ? 0 : 1; try_again: - if (perf_evsel__open(pos, evlist->cpus, evlist->threads, - opts->group, group_fd) < 0) { + if (perf_evsel__open(pos, evlist->cpus, evlist->threads) < 0) { int err = errno; if (err == EPERM || err == EACCES) { diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 861f0aec77a..23908a85bba 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -281,13 +281,9 @@ static int create_perf_stat_counter(struct perf_evsel *evsel, struct perf_evsel *first) { struct perf_event_attr *attr = &evsel->attr; - struct xyarray *group_fd = NULL; bool exclude_guest_missing = false; int ret; - if (group && evsel != first) - group_fd = first->fd; - if (scale) attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | PERF_FORMAT_TOTAL_TIME_RUNNING; @@ -299,8 +295,7 @@ retry: evsel->attr.exclude_guest = evsel->attr.exclude_host = 0; if (perf_target__has_cpu(&target)) { - ret = perf_evsel__open_per_cpu(evsel, evsel_list->cpus, - group, group_fd); + ret = perf_evsel__open_per_cpu(evsel, evsel_list->cpus); if (ret) goto check_ret; return 0; @@ -311,8 +306,7 @@ retry: attr->enable_on_exec = 1; } - ret = perf_evsel__open_per_thread(evsel, evsel_list->threads, - group, group_fd); + ret = perf_evsel__open_per_thread(evsel, evsel_list->threads); if (!ret) return 0; /* fall through */ @@ -483,6 +477,9 @@ static int run_perf_stat(int argc __used, const char **argv) close(child_ready_pipe[0]); } + if (group) + perf_evlist__group(evsel_list); + first = list_entry(evsel_list->entries.next, struct perf_evsel, node); list_for_each_entry(counter, &evsel_list->entries, node) { diff --git a/tools/perf/builtin-test.c b/tools/perf/builtin-test.c index 1d592f5cbea..9a479b68fc9 100644 --- a/tools/perf/builtin-test.c +++ b/tools/perf/builtin-test.c @@ -294,7 +294,7 @@ static int test__open_syscall_event(void) goto out_thread_map_delete; } - if (perf_evsel__open_per_thread(evsel, threads, false, NULL) < 0) { + if (perf_evsel__open_per_thread(evsel, threads) < 0) { pr_debug("failed to open counter: %s, " "tweak /proc/sys/kernel/perf_event_paranoid?\n", strerror(errno)); @@ -369,7 +369,7 @@ static int test__open_syscall_event_on_all_cpus(void) goto out_thread_map_delete; } - if (perf_evsel__open(evsel, cpus, threads, false, NULL) < 0) { + if (perf_evsel__open(evsel, cpus, threads) < 0) { pr_debug("failed to open counter: %s, " "tweak /proc/sys/kernel/perf_event_paranoid?\n", strerror(errno)); @@ -533,7 +533,7 @@ static int test__basic_mmap(void) perf_evlist__add(evlist, evsels[i]); - if (perf_evsel__open(evsels[i], cpus, threads, false, NULL) < 0) { + if (perf_evsel__open(evsels[i], cpus, threads) < 0) { pr_debug("failed to open counter: %s, " "tweak /proc/sys/kernel/perf_event_paranoid?\n", strerror(errno)); @@ -737,7 +737,7 @@ static int test__PERF_RECORD(void) * Call sys_perf_event_open on all the fds on all the evsels, * grouping them if asked to. */ - err = perf_evlist__open(evlist, opts.group); + err = perf_evlist__open(evlist); if (err < 0) { pr_debug("perf_evlist__open: %s\n", strerror(errno)); goto out_delete_evlist; diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index e45a1ba6172..392d2192b75 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -886,17 +886,14 @@ static void perf_top__mmap_read(struct perf_top *top) static void perf_top__start_counters(struct perf_top *top) { - struct perf_evsel *counter, *first; + struct perf_evsel *counter; struct perf_evlist *evlist = top->evlist; - first = list_entry(evlist->entries.next, struct perf_evsel, node); + if (top->group) + perf_evlist__group(evlist); list_for_each_entry(counter, &evlist->entries, node) { struct perf_event_attr *attr = &counter->attr; - struct xyarray *group_fd = NULL; - - if (top->group && counter != first) - group_fd = first->fd; attr->sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_TID; @@ -927,8 +924,7 @@ retry_sample_id: attr->sample_id_all = top->sample_id_all_missing ? 0 : 1; try_again: if (perf_evsel__open(counter, top->evlist->cpus, - top->evlist->threads, top->group, - group_fd) < 0) { + top->evlist->threads) < 0) { int err = errno; if (err == EPERM || err == EACCES) { diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 9b38681add9..feffee3f2bd 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -108,6 +108,12 @@ void perf_evlist__splice_list_tail(struct perf_evlist *evlist, evlist->nr_entries += nr_entries; } +void perf_evlist__group(struct perf_evlist *evlist) +{ + if (evlist->nr_entries) + parse_events__set_leader(&evlist->entries); +} + int perf_evlist__add_default(struct perf_evlist *evlist) { struct perf_event_attr attr = { @@ -757,21 +763,13 @@ void perf_evlist__set_selected(struct perf_evlist *evlist, evlist->selected = evsel; } -int perf_evlist__open(struct perf_evlist *evlist, bool group) +int perf_evlist__open(struct perf_evlist *evlist) { - struct perf_evsel *evsel, *first; + struct perf_evsel *evsel; int err, ncpus, nthreads; - first = list_entry(evlist->entries.next, struct perf_evsel, node); - list_for_each_entry(evsel, &evlist->entries, node) { - struct xyarray *group_fd = NULL; - - if (group && evsel != first) - group_fd = first->fd; - - err = perf_evsel__open(evsel, evlist->cpus, evlist->threads, - group, group_fd); + err = perf_evsel__open(evsel, evlist->cpus, evlist->threads); if (err < 0) goto out_err; } diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 528c1acd929..a19ccd7b51f 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -85,7 +85,7 @@ struct perf_evsel *perf_evlist__id2evsel(struct perf_evlist *evlist, u64 id); union perf_event *perf_evlist__mmap_read(struct perf_evlist *self, int idx); -int perf_evlist__open(struct perf_evlist *evlist, bool group); +int perf_evlist__open(struct perf_evlist *evlist); void perf_evlist__config_attrs(struct perf_evlist *evlist, struct perf_record_opts *opts); @@ -132,4 +132,5 @@ void perf_evlist__splice_list_tail(struct perf_evlist *evlist, struct list_head *list, int nr_entries); +void perf_evlist__group(struct perf_evlist *evlist); #endif /* __PERF_EVLIST_H */ diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 9c54e8fc2df..f5b68e73d75 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -21,7 +21,6 @@ #include "perf_regs.h" #define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y)) -#define GROUP_FD(group_fd, cpu) (*(int *)xyarray__entry(group_fd, cpu, 0)) static int __perf_evsel__sample_size(u64 sample_type) { @@ -493,6 +492,7 @@ void perf_evsel__delete(struct perf_evsel *evsel) { perf_evsel__exit(evsel); close_cgroup(evsel->cgrp); + free(evsel->group_name); free(evsel->name); free(evsel); } @@ -568,9 +568,28 @@ int __perf_evsel__read(struct perf_evsel *evsel, return 0; } +static int get_group_fd(struct perf_evsel *evsel, int cpu, int thread) +{ + struct perf_evsel *leader = evsel->leader; + int fd; + + if (!leader) + return -1; + + /* + * Leader must be already processed/open, + * if not it's a bug. + */ + BUG_ON(!leader->fd); + + fd = FD(leader, cpu, thread); + BUG_ON(fd == -1); + + return fd; +} + static int __perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus, - struct thread_map *threads, bool group, - struct xyarray *group_fds) + struct thread_map *threads) { int cpu, thread; unsigned long flags = 0; @@ -586,13 +605,15 @@ static int __perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus, } for (cpu = 0; cpu < cpus->nr; cpu++) { - int group_fd = group_fds ? GROUP_FD(group_fds, cpu) : -1; for (thread = 0; thread < threads->nr; thread++) { + int group_fd; if (!evsel->cgrp) pid = threads->map[thread]; + group_fd = get_group_fd(evsel, cpu, thread); + FD(evsel, cpu, thread) = sys_perf_event_open(&evsel->attr, pid, cpus->map[cpu], @@ -602,8 +623,9 @@ static int __perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus, goto out_close; } - if (group && group_fd == -1) - group_fd = FD(evsel, cpu, thread); + pr_debug("event cpu %d, thread %d, fd %d, group %d\n", + cpu, pid, FD(evsel, cpu, thread), + group_fd); } } @@ -647,8 +669,7 @@ static struct { }; int perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus, - struct thread_map *threads, bool group, - struct xyarray *group_fd) + struct thread_map *threads) { if (cpus == NULL) { /* Work around old compiler warnings about strict aliasing */ @@ -658,23 +679,19 @@ int perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus, if (threads == NULL) threads = &empty_thread_map.map; - return __perf_evsel__open(evsel, cpus, threads, group, group_fd); + return __perf_evsel__open(evsel, cpus, threads); } int perf_evsel__open_per_cpu(struct perf_evsel *evsel, - struct cpu_map *cpus, bool group, - struct xyarray *group_fd) + struct cpu_map *cpus) { - return __perf_evsel__open(evsel, cpus, &empty_thread_map.map, group, - group_fd); + return __perf_evsel__open(evsel, cpus, &empty_thread_map.map); } int perf_evsel__open_per_thread(struct perf_evsel *evsel, - struct thread_map *threads, bool group, - struct xyarray *group_fd) + struct thread_map *threads) { - return __perf_evsel__open(evsel, &empty_cpu_map.map, threads, group, - group_fd); + return __perf_evsel__open(evsel, &empty_cpu_map.map, threads); } static int perf_event__parse_id_sample(const union perf_event *event, u64 type, diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 6a258c90e7c..c411b421c88 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -70,6 +70,8 @@ struct perf_evsel { bool supported; /* parse modifier helper */ int exclude_GH; + struct perf_evsel *leader; + char *group_name; }; struct cpu_map; @@ -109,14 +111,11 @@ void perf_evsel__free_id(struct perf_evsel *evsel); void perf_evsel__close_fd(struct perf_evsel *evsel, int ncpus, int nthreads); int perf_evsel__open_per_cpu(struct perf_evsel *evsel, - struct cpu_map *cpus, bool group, - struct xyarray *group_fds); + struct cpu_map *cpus); int perf_evsel__open_per_thread(struct perf_evsel *evsel, - struct thread_map *threads, bool group, - struct xyarray *group_fds); + struct thread_map *threads); int perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus, - struct thread_map *threads, bool group, - struct xyarray *group_fds); + struct thread_map *threads); void perf_evsel__close(struct perf_evsel *evsel, int ncpus, int nthreads); #define perf_evsel__match(evsel, t, c) \ diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 4364575ce6a..f6453cd414a 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -611,14 +611,32 @@ int parse_events_add_pmu(struct list_head **list, int *idx, pmu_event_name(head_config)); } -int parse_events__modifier_group(struct list_head *list __used, - char *event_mod __used) +struct perf_evsel *parse_events__set_leader(struct list_head *list) { - return 0; + struct perf_evsel *evsel, *leader; + + leader = list_entry(list->next, struct perf_evsel, node); + leader->leader = NULL; + + list_for_each_entry(evsel, list, node) + if (evsel != leader) + evsel->leader = leader; + + return leader; } -void parse_events__group(char *name __used, struct list_head *list __used) +int parse_events__modifier_group(struct list_head *list, + char *event_mod) { + return parse_events__modifier_event(list, event_mod, true); +} + +void parse_events__group(char *name, struct list_head *list) +{ + struct perf_evsel *leader; + + leader = parse_events__set_leader(list); + leader->group_name = name ? strdup(name) : NULL; } void parse_events_update_lists(struct list_head *list_event, diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h index 75a6800082a..e1a184c9e35 100644 --- a/tools/perf/util/parse-events.h +++ b/tools/perf/util/parse-events.h @@ -92,6 +92,7 @@ int parse_events_add_breakpoint(struct list_head **list, int *idx, void *ptr, char *type); int parse_events_add_pmu(struct list_head **list, int *idx, char *pmu , struct list_head *head_config); +struct perf_evsel *parse_events__set_leader(struct list_head *list); void parse_events__group(char *name, struct list_head *list); void parse_events_update_lists(struct list_head *list_event, struct list_head *list_all); diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c index 0688bfb6d28..f5bba4b8eb9 100644 --- a/tools/perf/util/python.c +++ b/tools/perf/util/python.c @@ -627,7 +627,7 @@ static PyObject *pyrf_evsel__open(struct pyrf_evsel *pevsel, * This will group just the fds for this single evsel, to group * multiple events, use evlist.open(). */ - if (perf_evsel__open(evsel, cpus, threads, group, NULL) < 0) { + if (perf_evsel__open(evsel, cpus, threads) < 0) { PyErr_SetFromErrno(PyExc_OSError); return NULL; } @@ -824,7 +824,10 @@ static PyObject *pyrf_evlist__open(struct pyrf_evlist *pevlist, if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|OOii", kwlist, &group)) return NULL; - if (perf_evlist__open(evlist, group) < 0) { + if (group) + perf_evlist__group(evlist); + + if (perf_evlist__open(evlist) < 0) { PyErr_SetFromErrno(PyExc_OSError); return NULL; } From 905f5ee2f769c85eb6f0b6ee38f803829efd68a3 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 8 Aug 2012 12:23:52 +0200 Subject: [PATCH 085/282] perf test: Add automated tests for event group parsing Adding 5 more tests for new event group syntax. Tests are executed within the 'perf test parse' test suite. Reviewed-by: Namhyung Kim Signed-off-by: Jiri Olsa Acked-by: Peter Zijlstra Cc: Andi Kleen Cc: Arnaldo Carvalho de Melo Cc: Corey Ashford Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Ulrich Drepper Link: http://lkml.kernel.org/n/tip-dmhsv8mpoksx2wp97balqiem@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/parse-events-test.c | 302 +++++++++++++++++++++++++++- 1 file changed, 299 insertions(+), 3 deletions(-) diff --git a/tools/perf/util/parse-events-test.c b/tools/perf/util/parse-events-test.c index 127d648cc54..429dd688281 100644 --- a/tools/perf/util/parse-events-test.c +++ b/tools/perf/util/parse-events-test.c @@ -513,6 +513,285 @@ static int test__checkterms_simple(struct list_head *terms) return 0; } +static int test__group1(struct perf_evlist *evlist) +{ + struct perf_evsel *evsel, *leader; + + TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->nr_entries); + + /* instructions:k */ + evsel = leader = list_entry(evlist->entries.next, + struct perf_evsel, node); + TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type); + TEST_ASSERT_VAL("wrong config", + PERF_COUNT_HW_INSTRUCTIONS == evsel->attr.config); + TEST_ASSERT_VAL("wrong exclude_user", evsel->attr.exclude_user); + TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel); + TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv); + TEST_ASSERT_VAL("wrong exclude guest", !evsel->attr.exclude_guest); + TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host); + TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip); + TEST_ASSERT_VAL("wrong leader", evsel->leader == NULL); + + /* cycles:upp */ + evsel = list_entry(evsel->node.next, struct perf_evsel, node); + TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type); + TEST_ASSERT_VAL("wrong config", + PERF_COUNT_HW_CPU_CYCLES == evsel->attr.config); + TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user); + TEST_ASSERT_VAL("wrong exclude_kernel", evsel->attr.exclude_kernel); + TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv); + TEST_ASSERT_VAL("wrong exclude guest", !evsel->attr.exclude_guest); + TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host); + TEST_ASSERT_VAL("wrong precise_ip", evsel->attr.precise_ip == 2); + TEST_ASSERT_VAL("wrong leader", evsel->leader == leader); + + return 0; +} + +static int test__group2(struct perf_evlist *evlist) +{ + struct perf_evsel *evsel, *leader; + + TEST_ASSERT_VAL("wrong number of entries", 3 == evlist->nr_entries); + + /* faults + :ku modifier */ + evsel = leader = list_entry(evlist->entries.next, + struct perf_evsel, node); + TEST_ASSERT_VAL("wrong type", PERF_TYPE_SOFTWARE == evsel->attr.type); + TEST_ASSERT_VAL("wrong config", + PERF_COUNT_SW_PAGE_FAULTS == evsel->attr.config); + TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user); + TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel); + TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv); + TEST_ASSERT_VAL("wrong exclude guest", !evsel->attr.exclude_guest); + TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host); + TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip); + TEST_ASSERT_VAL("wrong leader", evsel->leader == NULL); + + /* cache-references + :u modifier */ + evsel = list_entry(evsel->node.next, struct perf_evsel, node); + TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type); + TEST_ASSERT_VAL("wrong config", + PERF_COUNT_HW_CACHE_REFERENCES == evsel->attr.config); + TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user); + TEST_ASSERT_VAL("wrong exclude_kernel", evsel->attr.exclude_kernel); + TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv); + TEST_ASSERT_VAL("wrong exclude guest", !evsel->attr.exclude_guest); + TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host); + TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip); + TEST_ASSERT_VAL("wrong leader", evsel->leader == leader); + + /* cycles:k */ + evsel = list_entry(evsel->node.next, struct perf_evsel, node); + TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type); + TEST_ASSERT_VAL("wrong config", + PERF_COUNT_HW_CPU_CYCLES == evsel->attr.config); + TEST_ASSERT_VAL("wrong exclude_user", evsel->attr.exclude_user); + TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel); + TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv); + TEST_ASSERT_VAL("wrong exclude guest", !evsel->attr.exclude_guest); + TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host); + TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip); + TEST_ASSERT_VAL("wrong leader", evsel->leader == NULL); + + return 0; +} + +static int test__group3(struct perf_evlist *evlist __used) +{ + struct perf_evsel *evsel, *leader; + + TEST_ASSERT_VAL("wrong number of entries", 5 == evlist->nr_entries); + + /* group1 syscalls:sys_enter_open:H */ + evsel = leader = list_entry(evlist->entries.next, + struct perf_evsel, node); + TEST_ASSERT_VAL("wrong type", PERF_TYPE_TRACEPOINT == evsel->attr.type); + TEST_ASSERT_VAL("wrong sample_type", + PERF_TP_SAMPLE_TYPE == evsel->attr.sample_type); + TEST_ASSERT_VAL("wrong sample_period", 1 == evsel->attr.sample_period); + TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user); + TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel); + TEST_ASSERT_VAL("wrong exclude_hv", !evsel->attr.exclude_hv); + TEST_ASSERT_VAL("wrong exclude guest", evsel->attr.exclude_guest); + TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host); + TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip); + TEST_ASSERT_VAL("wrong leader", evsel->leader == NULL); + TEST_ASSERT_VAL("wrong group name", + !strcmp(leader->group_name, "group1")); + + /* group1 cycles:kppp */ + evsel = list_entry(evsel->node.next, struct perf_evsel, node); + TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type); + TEST_ASSERT_VAL("wrong config", + PERF_COUNT_HW_CPU_CYCLES == evsel->attr.config); + TEST_ASSERT_VAL("wrong exclude_user", evsel->attr.exclude_user); + TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel); + TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv); + TEST_ASSERT_VAL("wrong exclude guest", !evsel->attr.exclude_guest); + TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host); + TEST_ASSERT_VAL("wrong precise_ip", evsel->attr.precise_ip == 3); + TEST_ASSERT_VAL("wrong leader", evsel->leader == leader); + TEST_ASSERT_VAL("wrong group name", !evsel->group_name); + + /* group2 cycles + G modifier */ + evsel = leader = list_entry(evsel->node.next, struct perf_evsel, node); + TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type); + TEST_ASSERT_VAL("wrong config", + PERF_COUNT_HW_CPU_CYCLES == evsel->attr.config); + TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user); + TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel); + TEST_ASSERT_VAL("wrong exclude_hv", !evsel->attr.exclude_hv); + TEST_ASSERT_VAL("wrong exclude guest", !evsel->attr.exclude_guest); + TEST_ASSERT_VAL("wrong exclude host", evsel->attr.exclude_host); + TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip); + TEST_ASSERT_VAL("wrong leader", evsel->leader == NULL); + TEST_ASSERT_VAL("wrong group name", + !strcmp(leader->group_name, "group2")); + + /* group2 1:3 + G modifier */ + evsel = list_entry(evsel->node.next, struct perf_evsel, node); + TEST_ASSERT_VAL("wrong type", 1 == evsel->attr.type); + TEST_ASSERT_VAL("wrong config", 3 == evsel->attr.config); + TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user); + TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel); + TEST_ASSERT_VAL("wrong exclude_hv", !evsel->attr.exclude_hv); + TEST_ASSERT_VAL("wrong exclude guest", !evsel->attr.exclude_guest); + TEST_ASSERT_VAL("wrong exclude host", evsel->attr.exclude_host); + TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip); + TEST_ASSERT_VAL("wrong leader", evsel->leader == leader); + + /* instructions:u */ + evsel = list_entry(evsel->node.next, struct perf_evsel, node); + TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type); + TEST_ASSERT_VAL("wrong config", + PERF_COUNT_HW_INSTRUCTIONS == evsel->attr.config); + TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user); + TEST_ASSERT_VAL("wrong exclude_kernel", evsel->attr.exclude_kernel); + TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv); + TEST_ASSERT_VAL("wrong exclude guest", !evsel->attr.exclude_guest); + TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host); + TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip); + TEST_ASSERT_VAL("wrong leader", evsel->leader == NULL); + + return 0; +} + +static int test__group4(struct perf_evlist *evlist __used) +{ + struct perf_evsel *evsel, *leader; + + TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->nr_entries); + + /* cycles:u + p */ + evsel = leader = list_entry(evlist->entries.next, + struct perf_evsel, node); + TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type); + TEST_ASSERT_VAL("wrong config", + PERF_COUNT_HW_CPU_CYCLES == evsel->attr.config); + TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user); + TEST_ASSERT_VAL("wrong exclude_kernel", evsel->attr.exclude_kernel); + TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv); + TEST_ASSERT_VAL("wrong exclude guest", !evsel->attr.exclude_guest); + TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host); + TEST_ASSERT_VAL("wrong precise_ip", evsel->attr.precise_ip == 1); + TEST_ASSERT_VAL("wrong group name", !evsel->group_name); + TEST_ASSERT_VAL("wrong leader", evsel->leader == NULL); + + /* instructions:kp + p */ + evsel = list_entry(evsel->node.next, struct perf_evsel, node); + TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type); + TEST_ASSERT_VAL("wrong config", + PERF_COUNT_HW_INSTRUCTIONS == evsel->attr.config); + TEST_ASSERT_VAL("wrong exclude_user", evsel->attr.exclude_user); + TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel); + TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv); + TEST_ASSERT_VAL("wrong exclude guest", !evsel->attr.exclude_guest); + TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host); + TEST_ASSERT_VAL("wrong precise_ip", evsel->attr.precise_ip == 2); + TEST_ASSERT_VAL("wrong leader", evsel->leader == leader); + + return 0; +} + +static int test__group5(struct perf_evlist *evlist __used) +{ + struct perf_evsel *evsel, *leader; + + TEST_ASSERT_VAL("wrong number of entries", 5 == evlist->nr_entries); + + /* cycles + G */ + evsel = leader = list_entry(evlist->entries.next, + struct perf_evsel, node); + TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type); + TEST_ASSERT_VAL("wrong config", + PERF_COUNT_HW_CPU_CYCLES == evsel->attr.config); + TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user); + TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel); + TEST_ASSERT_VAL("wrong exclude_hv", !evsel->attr.exclude_hv); + TEST_ASSERT_VAL("wrong exclude guest", !evsel->attr.exclude_guest); + TEST_ASSERT_VAL("wrong exclude host", evsel->attr.exclude_host); + TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip); + TEST_ASSERT_VAL("wrong group name", !evsel->group_name); + TEST_ASSERT_VAL("wrong leader", evsel->leader == NULL); + + /* instructions + G */ + evsel = list_entry(evsel->node.next, struct perf_evsel, node); + TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type); + TEST_ASSERT_VAL("wrong config", + PERF_COUNT_HW_INSTRUCTIONS == evsel->attr.config); + TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user); + TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel); + TEST_ASSERT_VAL("wrong exclude_hv", !evsel->attr.exclude_hv); + TEST_ASSERT_VAL("wrong exclude guest", !evsel->attr.exclude_guest); + TEST_ASSERT_VAL("wrong exclude host", evsel->attr.exclude_host); + TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip); + TEST_ASSERT_VAL("wrong leader", evsel->leader == leader); + + /* cycles:G */ + evsel = leader = list_entry(evsel->node.next, struct perf_evsel, node); + TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type); + TEST_ASSERT_VAL("wrong config", + PERF_COUNT_HW_CPU_CYCLES == evsel->attr.config); + TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user); + TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel); + TEST_ASSERT_VAL("wrong exclude_hv", !evsel->attr.exclude_hv); + TEST_ASSERT_VAL("wrong exclude guest", !evsel->attr.exclude_guest); + TEST_ASSERT_VAL("wrong exclude host", evsel->attr.exclude_host); + TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip); + TEST_ASSERT_VAL("wrong group name", !evsel->group_name); + TEST_ASSERT_VAL("wrong leader", evsel->leader == NULL); + + /* instructions:G */ + evsel = list_entry(evsel->node.next, struct perf_evsel, node); + TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type); + TEST_ASSERT_VAL("wrong config", + PERF_COUNT_HW_INSTRUCTIONS == evsel->attr.config); + TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user); + TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel); + TEST_ASSERT_VAL("wrong exclude_hv", !evsel->attr.exclude_hv); + TEST_ASSERT_VAL("wrong exclude guest", !evsel->attr.exclude_guest); + TEST_ASSERT_VAL("wrong exclude host", evsel->attr.exclude_host); + TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip); + TEST_ASSERT_VAL("wrong leader", evsel->leader == leader); + + /* cycles */ + evsel = list_entry(evsel->node.next, struct perf_evsel, node); + TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type); + TEST_ASSERT_VAL("wrong config", + PERF_COUNT_HW_CPU_CYCLES == evsel->attr.config); + TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user); + TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel); + TEST_ASSERT_VAL("wrong exclude_hv", !evsel->attr.exclude_hv); + TEST_ASSERT_VAL("wrong exclude guest", evsel->attr.exclude_guest); + TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host); + TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip); + TEST_ASSERT_VAL("wrong leader", evsel->leader == NULL); + + return 0; +} + struct test__event_st { const char *name; __u32 type; @@ -632,6 +911,26 @@ static struct test__event_st test__events[] = { .name = "mem:0:rw:kp", .check = test__checkevent_breakpoint_rw_modifier, }, + [28] = { + .name = "{instructions:k,cycles:upp}", + .check = test__group1, + }, + [29] = { + .name = "{faults:k,cache-references}:u,cycles:k", + .check = test__group2, + }, + [30] = { + .name = "group1{syscalls:sys_enter_open:H,cycles:kppp},group2{cycles,1:3}:G,instructions:u", + .check = test__group3, + }, + [31] = { + .name = "{cycles:u,instructions:kp}:p", + .check = test__group4, + }, + [32] = { + .name = "{cycles,instructions}:G,{cycles:G,instructions:G},cycles", + .check = test__group5, + }, }; static struct test__event_st test__events_pmu[] = { @@ -658,9 +957,6 @@ static struct test__term test__terms[] = { }, }; -#define TEST__TERMS_CNT (sizeof(test__terms) / \ - sizeof(struct test__term)) - static int test_event(struct test__event_st *e) { struct perf_evlist *evlist; From 63dab225f334e0e21f7106aed8d888b500b53ce6 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 14 Aug 2012 16:35:48 -0300 Subject: [PATCH 086/282] perf evlist: Rename __group method to __set_leader Just like was done for parse_events__set_leader. Also we need to have the list_entry set_leader method in evlist.c so that we don't grow another dep in the python binding: # ~acme/git/linux/tools/perf/python/twatch.py Traceback (most recent call last): File "/home/acme/git/linux/tools/perf/python/twatch.py", line 16, in import perf ImportError: /home/acme/git/build/perf/python/perf.so: undefined symbol: parse_events__set_leader And also remove a pr_debug from evsel.c so that we avoid this one too: # ~acme/git/linux/tools/perf/python/twatch.py Traceback (most recent call last): File "/home/acme/git/linux/tools/perf/python/twatch.py", line 16, in import perf ImportError: /home/acme/git/build/perf/python/perf.so: undefined symbol: eprintf Acked-by: Jiri Olsa Acked-by: Namhyung Kim Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-0hk9dazg9pora9jylkqngovm@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-record.c | 2 +- tools/perf/builtin-stat.c | 2 +- tools/perf/builtin-top.c | 2 +- tools/perf/util/evlist.c | 17 +++++++++++++++-- tools/perf/util/evlist.h | 5 +++-- tools/perf/util/evsel.c | 4 ---- tools/perf/util/parse-events.c | 19 +++---------------- tools/perf/util/parse-events.h | 3 +-- tools/perf/util/parse-events.y | 4 ++-- tools/perf/util/python.c | 2 +- 10 files changed, 28 insertions(+), 32 deletions(-) diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index f5b6137c0f7..c4e3b683e79 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -193,7 +193,7 @@ static void perf_record__open(struct perf_record *rec) perf_evlist__config_attrs(evlist, opts); if (opts->group) - perf_evlist__group(evlist); + perf_evlist__set_leader(evlist); list_for_each_entry(pos, &evlist->entries, node) { struct perf_event_attr *attr = &pos->attr; diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 23908a85bba..7b9c46341e0 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -478,7 +478,7 @@ static int run_perf_stat(int argc __used, const char **argv) } if (group) - perf_evlist__group(evsel_list); + perf_evlist__set_leader(evsel_list); first = list_entry(evsel_list->entries.next, struct perf_evsel, node); diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 392d2192b75..5a097beb868 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -890,7 +890,7 @@ static void perf_top__start_counters(struct perf_top *top) struct perf_evlist *evlist = top->evlist; if (top->group) - perf_evlist__group(evlist); + perf_evlist__set_leader(evlist); list_for_each_entry(counter, &evlist->entries, node) { struct perf_event_attr *attr = &counter->attr; diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index feffee3f2bd..6d09451430d 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -108,10 +108,23 @@ void perf_evlist__splice_list_tail(struct perf_evlist *evlist, evlist->nr_entries += nr_entries; } -void perf_evlist__group(struct perf_evlist *evlist) +void __perf_evlist__set_leader(struct list_head *list) +{ + struct perf_evsel *evsel, *leader; + + leader = list_entry(list->next, struct perf_evsel, node); + leader->leader = NULL; + + list_for_each_entry(evsel, list, node) { + if (evsel != leader) + evsel->leader = leader; + } +} + +void perf_evlist__set_leader(struct perf_evlist *evlist) { if (evlist->nr_entries) - parse_events__set_leader(&evlist->entries); + __perf_evlist__set_leader(&evlist->entries); } int perf_evlist__add_default(struct perf_evlist *evlist) diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index a19ccd7b51f..7fe677e6c31 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -118,6 +118,9 @@ int perf_evlist__create_maps(struct perf_evlist *evlist, void perf_evlist__delete_maps(struct perf_evlist *evlist); int perf_evlist__set_filters(struct perf_evlist *evlist); +void __perf_evlist__set_leader(struct list_head *list); +void perf_evlist__set_leader(struct perf_evlist *evlist); + u64 perf_evlist__sample_type(const struct perf_evlist *evlist); bool perf_evlist__sample_id_all(const const struct perf_evlist *evlist); u16 perf_evlist__id_hdr_size(const struct perf_evlist *evlist); @@ -131,6 +134,4 @@ bool perf_evlist__valid_sample_id_all(const struct perf_evlist *evlist); void perf_evlist__splice_list_tail(struct perf_evlist *evlist, struct list_head *list, int nr_entries); - -void perf_evlist__group(struct perf_evlist *evlist); #endif /* __PERF_EVLIST_H */ diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index f5b68e73d75..6c7dcc1fde5 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -622,10 +622,6 @@ static int __perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus, err = -errno; goto out_close; } - - pr_debug("event cpu %d, thread %d, fd %d, group %d\n", - cpu, pid, FD(evsel, cpu, thread), - group_fd); } } diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index f6453cd414a..4393a6b65c5 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -611,31 +611,18 @@ int parse_events_add_pmu(struct list_head **list, int *idx, pmu_event_name(head_config)); } -struct perf_evsel *parse_events__set_leader(struct list_head *list) -{ - struct perf_evsel *evsel, *leader; - - leader = list_entry(list->next, struct perf_evsel, node); - leader->leader = NULL; - - list_for_each_entry(evsel, list, node) - if (evsel != leader) - evsel->leader = leader; - - return leader; -} - int parse_events__modifier_group(struct list_head *list, char *event_mod) { return parse_events__modifier_event(list, event_mod, true); } -void parse_events__group(char *name, struct list_head *list) +void parse_events__set_leader(char *name, struct list_head *list) { struct perf_evsel *leader; - leader = parse_events__set_leader(list); + __perf_evlist__set_leader(list); + leader = list_entry(list->next, struct perf_evsel, node); leader->group_name = name ? strdup(name) : NULL; } diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h index e1a184c9e35..0b9782dc3da 100644 --- a/tools/perf/util/parse-events.h +++ b/tools/perf/util/parse-events.h @@ -92,8 +92,7 @@ int parse_events_add_breakpoint(struct list_head **list, int *idx, void *ptr, char *type); int parse_events_add_pmu(struct list_head **list, int *idx, char *pmu , struct list_head *head_config); -struct perf_evsel *parse_events__set_leader(struct list_head *list); -void parse_events__group(char *name, struct list_head *list); +void parse_events__set_leader(char *name, struct list_head *list); void parse_events_update_lists(struct list_head *list_event, struct list_head *list_all); void parse_events_error(void *data, void *scanner, char const *msg); diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y index 084c35fc2d2..66850f820df 100644 --- a/tools/perf/util/parse-events.y +++ b/tools/perf/util/parse-events.y @@ -119,7 +119,7 @@ PE_NAME '{' events '}' { struct list_head *list = $3; - parse_events__group($1, list); + parse_events__set_leader($1, list); $$ = list; } | @@ -127,7 +127,7 @@ PE_NAME '{' events '}' { struct list_head *list = $2; - parse_events__group(NULL, list); + parse_events__set_leader(NULL, list); $$ = list; } diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c index f5bba4b8eb9..27187f0b71f 100644 --- a/tools/perf/util/python.c +++ b/tools/perf/util/python.c @@ -825,7 +825,7 @@ static PyObject *pyrf_evlist__open(struct pyrf_evlist *pevlist, return NULL; if (group) - perf_evlist__group(evlist); + perf_evlist__set_leader(evlist); if (perf_evlist__open(evlist) < 0) { PyErr_SetFromErrno(PyExc_OSError); From 0c21f736e0a37c50f66ab248d2a52f711b28a4e4 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 14 Aug 2012 16:42:15 -0300 Subject: [PATCH 087/282] perf evlist: Introduce evsel list accessors To replace the longer list_entry constructs for things that are widely used: perf_evlist__{first,last}(evlist) perf_evsel__next(evsel) Acked-by: Jiri Olsa Acked-by: Namhyung Kim Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-ng7azq26wg1jd801qqpcozwp@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-record.c | 4 +- tools/perf/builtin-stat.c | 2 +- tools/perf/builtin-test.c | 2 +- tools/perf/builtin-top.c | 6 +- tools/perf/util/evlist.c | 40 ++++----- tools/perf/util/evlist.h | 23 +++-- tools/perf/util/evsel.h | 5 ++ tools/perf/util/header.c | 4 +- tools/perf/util/parse-events-test.c | 129 +++++++++++----------------- tools/perf/util/parse-events.c | 2 +- tools/perf/util/top.c | 3 +- 11 files changed, 95 insertions(+), 125 deletions(-) diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index c4e3b683e79..479ff2a038f 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -172,12 +172,12 @@ static bool perf_evlist__equal(struct perf_evlist *evlist, if (evlist->nr_entries != other->nr_entries) return false; - pair = list_entry(other->entries.next, struct perf_evsel, node); + pair = perf_evlist__first(other); list_for_each_entry(pos, &evlist->entries, node) { if (memcmp(&pos->attr, &pair->attr, sizeof(pos->attr) != 0)) return false; - pair = list_entry(pair->node.next, struct perf_evsel, node); + pair = perf_evsel__next(pair); } return true; diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 7b9c46341e0..d53d8ab099b 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -480,7 +480,7 @@ static int run_perf_stat(int argc __used, const char **argv) if (group) perf_evlist__set_leader(evsel_list); - first = list_entry(evsel_list->entries.next, struct perf_evsel, node); + first = perf_evlist__first(evsel_list); list_for_each_entry(counter, &evsel_list->entries, node) { if (create_perf_stat_counter(counter, first) < 0) { diff --git a/tools/perf/builtin-test.c b/tools/perf/builtin-test.c index 9a479b68fc9..381d5ab8712 100644 --- a/tools/perf/builtin-test.c +++ b/tools/perf/builtin-test.c @@ -710,7 +710,7 @@ static int test__PERF_RECORD(void) /* * Config the evsels, setting attr->comm on the first one, etc. */ - evsel = list_entry(evlist->entries.next, struct perf_evsel, node); + evsel = perf_evlist__first(evlist); evsel->attr.sample_type |= PERF_SAMPLE_CPU; evsel->attr.sample_type |= PERF_SAMPLE_TID; evsel->attr.sample_type |= PERF_SAMPLE_TIME; diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 5a097beb868..0513aaa659f 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -509,7 +509,7 @@ static void perf_top__handle_keypress(struct perf_top *top, int c) prompt_integer(&counter, "Enter details event counter"); if (counter >= top->evlist->nr_entries) { - top->sym_evsel = list_entry(top->evlist->entries.next, struct perf_evsel, node); + top->sym_evsel = perf_evlist__first(top->evlist); fprintf(stderr, "Sorry, no such event, using %s.\n", perf_evsel__name(top->sym_evsel)); sleep(1); break; @@ -518,7 +518,7 @@ static void perf_top__handle_keypress(struct perf_top *top, int c) if (top->sym_evsel->idx == counter) break; } else - top->sym_evsel = list_entry(top->evlist->entries.next, struct perf_evsel, node); + top->sym_evsel = perf_evlist__first(top->evlist); break; case 'f': prompt_integer(&top->count_filter, "Enter display event count filter"); @@ -1326,7 +1326,7 @@ int cmd_top(int argc, const char **argv, const char *prefix __used) pos->attr.sample_period = top.default_interval; } - top.sym_evsel = list_entry(top.evlist->entries.next, struct perf_evsel, node); + top.sym_evsel = perf_evlist__first(top.evlist); symbol_conf.priv_size = sizeof(struct annotation); diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 6d09451430d..4774ac1e3d5 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -57,7 +57,7 @@ void perf_evlist__config_attrs(struct perf_evlist *evlist, if (evlist->cpus->map[0] < 0) opts->no_inherit = true; - first = list_entry(evlist->entries.next, struct perf_evsel, node); + first = perf_evlist__first(evlist); list_for_each_entry(evsel, &evlist->entries, node) { perf_evsel__config(evsel, opts, first); @@ -376,7 +376,7 @@ struct perf_evsel *perf_evlist__id2evsel(struct perf_evlist *evlist, u64 id) int hash; if (evlist->nr_entries == 1) - return list_entry(evlist->entries.next, struct perf_evsel, node); + return perf_evlist__first(evlist); hash = hash_64(id, PERF_EVLIST__HLIST_BITS); head = &evlist->heads[hash]; @@ -386,7 +386,7 @@ struct perf_evsel *perf_evlist__id2evsel(struct perf_evlist *evlist, u64 id) return sid->evsel; if (!perf_evlist__sample_id_all(evlist)) - return list_entry(evlist->entries.next, struct perf_evsel, node); + return perf_evlist__first(evlist); return NULL; } @@ -694,11 +694,9 @@ int perf_evlist__set_filters(struct perf_evlist *evlist) return 0; } -bool perf_evlist__valid_sample_type(const struct perf_evlist *evlist) +bool perf_evlist__valid_sample_type(struct perf_evlist *evlist) { - struct perf_evsel *pos, *first; - - pos = first = list_entry(evlist->entries.next, struct perf_evsel, node); + struct perf_evsel *first = perf_evlist__first(evlist), *pos = first; list_for_each_entry_continue(pos, &evlist->entries, node) { if (first->attr.sample_type != pos->attr.sample_type) @@ -708,23 +706,19 @@ bool perf_evlist__valid_sample_type(const struct perf_evlist *evlist) return true; } -u64 perf_evlist__sample_type(const struct perf_evlist *evlist) +u64 perf_evlist__sample_type(struct perf_evlist *evlist) { - struct perf_evsel *first; - - first = list_entry(evlist->entries.next, struct perf_evsel, node); + struct perf_evsel *first = perf_evlist__first(evlist); return first->attr.sample_type; } -u16 perf_evlist__id_hdr_size(const struct perf_evlist *evlist) +u16 perf_evlist__id_hdr_size(struct perf_evlist *evlist) { - struct perf_evsel *first; + struct perf_evsel *first = perf_evlist__first(evlist); struct perf_sample *data; u64 sample_type; u16 size = 0; - first = list_entry(evlist->entries.next, struct perf_evsel, node); - if (!first->attr.sample_id_all) goto out; @@ -748,11 +742,9 @@ out: return size; } -bool perf_evlist__valid_sample_id_all(const struct perf_evlist *evlist) +bool perf_evlist__valid_sample_id_all(struct perf_evlist *evlist) { - struct perf_evsel *pos, *first; - - pos = first = list_entry(evlist->entries.next, struct perf_evsel, node); + struct perf_evsel *first = perf_evlist__first(evlist), *pos = first; list_for_each_entry_continue(pos, &evlist->entries, node) { if (first->attr.sample_id_all != pos->attr.sample_id_all) @@ -762,11 +754,9 @@ bool perf_evlist__valid_sample_id_all(const struct perf_evlist *evlist) return true; } -bool perf_evlist__sample_id_all(const struct perf_evlist *evlist) +bool perf_evlist__sample_id_all(struct perf_evlist *evlist) { - struct perf_evsel *first; - - first = list_entry(evlist->entries.next, struct perf_evsel, node); + struct perf_evsel *first = perf_evlist__first(evlist); return first->attr.sample_id_all; } @@ -896,6 +886,6 @@ int perf_evlist__start_workload(struct perf_evlist *evlist) int perf_evlist__parse_sample(struct perf_evlist *evlist, union perf_event *event, struct perf_sample *sample, bool swapped) { - struct perf_evsel *e = list_entry(evlist->entries.next, struct perf_evsel, node); - return perf_evsel__parse_sample(e, event, sample, swapped); + struct perf_evsel *evsel = perf_evlist__first(evlist); + return perf_evsel__parse_sample(evsel, event, sample, swapped); } diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 7fe677e6c31..2ed255792c6 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -5,6 +5,7 @@ #include #include "../perf.h" #include "event.h" +#include "evsel.h" #include "util.h" #include @@ -41,8 +42,6 @@ struct perf_evsel_str_handler { void *handler; }; -struct perf_evsel; - struct perf_evlist *perf_evlist__new(struct cpu_map *cpus, struct thread_map *threads); void perf_evlist__init(struct perf_evlist *evlist, struct cpu_map *cpus, @@ -121,17 +120,27 @@ int perf_evlist__set_filters(struct perf_evlist *evlist); void __perf_evlist__set_leader(struct list_head *list); void perf_evlist__set_leader(struct perf_evlist *evlist); -u64 perf_evlist__sample_type(const struct perf_evlist *evlist); -bool perf_evlist__sample_id_all(const const struct perf_evlist *evlist); -u16 perf_evlist__id_hdr_size(const struct perf_evlist *evlist); +u64 perf_evlist__sample_type(struct perf_evlist *evlist); +bool perf_evlist__sample_id_all(struct perf_evlist *evlist); +u16 perf_evlist__id_hdr_size(struct perf_evlist *evlist); int perf_evlist__parse_sample(struct perf_evlist *evlist, union perf_event *event, struct perf_sample *sample, bool swapped); -bool perf_evlist__valid_sample_type(const struct perf_evlist *evlist); -bool perf_evlist__valid_sample_id_all(const struct perf_evlist *evlist); +bool perf_evlist__valid_sample_type(struct perf_evlist *evlist); +bool perf_evlist__valid_sample_id_all(struct perf_evlist *evlist); void perf_evlist__splice_list_tail(struct perf_evlist *evlist, struct list_head *list, int nr_entries); + +static inline struct perf_evsel *perf_evlist__first(struct perf_evlist *evlist) +{ + return list_entry(evlist->entries.next, struct perf_evsel, node); +} + +static inline struct perf_evsel *perf_evlist__last(struct perf_evlist *evlist) +{ + return list_entry(evlist->entries.prev, struct perf_evsel, node); +} #endif /* __PERF_EVLIST_H */ diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index c411b421c88..65f39fdf821 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -184,4 +184,9 @@ void hists__init(struct hists *hists); int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event, struct perf_sample *sample, bool swapped); + +static inline struct perf_evsel *perf_evsel__next(struct perf_evsel *evsel) +{ + return list_entry(evsel->node.next, struct perf_evsel, node); +} #endif /* __PERF_EVSEL_H */ diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 7e7d34fc557..77832b807ef 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -1689,7 +1689,7 @@ int perf_session__write_header(struct perf_session *session, lseek(fd, sizeof(f_header), SEEK_SET); if (session->evlist != evlist) - pair = list_entry(session->evlist->entries.next, struct perf_evsel, node); + pair = perf_evlist__first(session->evlist); list_for_each_entry(attr, &evlist->entries, node) { attr->id_offset = lseek(fd, 0, SEEK_CUR); @@ -1704,7 +1704,7 @@ out_err_write: if (err < 0) goto out_err_write; attr->ids += pair->ids; - pair = list_entry(pair->node.next, struct perf_evsel, node); + pair = perf_evsel__next(pair); } } diff --git a/tools/perf/util/parse-events-test.c b/tools/perf/util/parse-events-test.c index 429dd688281..bf055ce1916 100644 --- a/tools/perf/util/parse-events-test.c +++ b/tools/perf/util/parse-events-test.c @@ -18,8 +18,7 @@ do { \ static int test__checkevent_tracepoint(struct perf_evlist *evlist) { - struct perf_evsel *evsel = list_entry(evlist->entries.next, - struct perf_evsel, node); + struct perf_evsel *evsel = perf_evlist__first(evlist); TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->nr_entries); TEST_ASSERT_VAL("wrong type", PERF_TYPE_TRACEPOINT == evsel->attr.type); @@ -48,8 +47,7 @@ static int test__checkevent_tracepoint_multi(struct perf_evlist *evlist) static int test__checkevent_raw(struct perf_evlist *evlist) { - struct perf_evsel *evsel = list_entry(evlist->entries.next, - struct perf_evsel, node); + struct perf_evsel *evsel = perf_evlist__first(evlist); TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->nr_entries); TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->attr.type); @@ -59,8 +57,7 @@ static int test__checkevent_raw(struct perf_evlist *evlist) static int test__checkevent_numeric(struct perf_evlist *evlist) { - struct perf_evsel *evsel = list_entry(evlist->entries.next, - struct perf_evsel, node); + struct perf_evsel *evsel = perf_evlist__first(evlist); TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->nr_entries); TEST_ASSERT_VAL("wrong type", 1 == evsel->attr.type); @@ -70,8 +67,7 @@ static int test__checkevent_numeric(struct perf_evlist *evlist) static int test__checkevent_symbolic_name(struct perf_evlist *evlist) { - struct perf_evsel *evsel = list_entry(evlist->entries.next, - struct perf_evsel, node); + struct perf_evsel *evsel = perf_evlist__first(evlist); TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->nr_entries); TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type); @@ -82,8 +78,7 @@ static int test__checkevent_symbolic_name(struct perf_evlist *evlist) static int test__checkevent_symbolic_name_config(struct perf_evlist *evlist) { - struct perf_evsel *evsel = list_entry(evlist->entries.next, - struct perf_evsel, node); + struct perf_evsel *evsel = perf_evlist__first(evlist); TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->nr_entries); TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type); @@ -100,8 +95,7 @@ static int test__checkevent_symbolic_name_config(struct perf_evlist *evlist) static int test__checkevent_symbolic_alias(struct perf_evlist *evlist) { - struct perf_evsel *evsel = list_entry(evlist->entries.next, - struct perf_evsel, node); + struct perf_evsel *evsel = perf_evlist__first(evlist); TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->nr_entries); TEST_ASSERT_VAL("wrong type", PERF_TYPE_SOFTWARE == evsel->attr.type); @@ -112,8 +106,7 @@ static int test__checkevent_symbolic_alias(struct perf_evlist *evlist) static int test__checkevent_genhw(struct perf_evlist *evlist) { - struct perf_evsel *evsel = list_entry(evlist->entries.next, - struct perf_evsel, node); + struct perf_evsel *evsel = perf_evlist__first(evlist); TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->nr_entries); TEST_ASSERT_VAL("wrong type", PERF_TYPE_HW_CACHE == evsel->attr.type); @@ -123,8 +116,7 @@ static int test__checkevent_genhw(struct perf_evlist *evlist) static int test__checkevent_breakpoint(struct perf_evlist *evlist) { - struct perf_evsel *evsel = list_entry(evlist->entries.next, - struct perf_evsel, node); + struct perf_evsel *evsel = perf_evlist__first(evlist); TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->nr_entries); TEST_ASSERT_VAL("wrong type", PERF_TYPE_BREAKPOINT == evsel->attr.type); @@ -138,8 +130,7 @@ static int test__checkevent_breakpoint(struct perf_evlist *evlist) static int test__checkevent_breakpoint_x(struct perf_evlist *evlist) { - struct perf_evsel *evsel = list_entry(evlist->entries.next, - struct perf_evsel, node); + struct perf_evsel *evsel = perf_evlist__first(evlist); TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->nr_entries); TEST_ASSERT_VAL("wrong type", PERF_TYPE_BREAKPOINT == evsel->attr.type); @@ -152,8 +143,7 @@ static int test__checkevent_breakpoint_x(struct perf_evlist *evlist) static int test__checkevent_breakpoint_r(struct perf_evlist *evlist) { - struct perf_evsel *evsel = list_entry(evlist->entries.next, - struct perf_evsel, node); + struct perf_evsel *evsel = perf_evlist__first(evlist); TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->nr_entries); TEST_ASSERT_VAL("wrong type", @@ -168,8 +158,7 @@ static int test__checkevent_breakpoint_r(struct perf_evlist *evlist) static int test__checkevent_breakpoint_w(struct perf_evlist *evlist) { - struct perf_evsel *evsel = list_entry(evlist->entries.next, - struct perf_evsel, node); + struct perf_evsel *evsel = perf_evlist__first(evlist); TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->nr_entries); TEST_ASSERT_VAL("wrong type", @@ -184,8 +173,7 @@ static int test__checkevent_breakpoint_w(struct perf_evlist *evlist) static int test__checkevent_breakpoint_rw(struct perf_evlist *evlist) { - struct perf_evsel *evsel = list_entry(evlist->entries.next, - struct perf_evsel, node); + struct perf_evsel *evsel = perf_evlist__first(evlist); TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->nr_entries); TEST_ASSERT_VAL("wrong type", @@ -200,8 +188,7 @@ static int test__checkevent_breakpoint_rw(struct perf_evlist *evlist) static int test__checkevent_tracepoint_modifier(struct perf_evlist *evlist) { - struct perf_evsel *evsel = list_entry(evlist->entries.next, - struct perf_evsel, node); + struct perf_evsel *evsel = perf_evlist__first(evlist); TEST_ASSERT_VAL("wrong exclude_user", evsel->attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel); @@ -232,8 +219,7 @@ test__checkevent_tracepoint_multi_modifier(struct perf_evlist *evlist) static int test__checkevent_raw_modifier(struct perf_evlist *evlist) { - struct perf_evsel *evsel = list_entry(evlist->entries.next, - struct perf_evsel, node); + struct perf_evsel *evsel = perf_evlist__first(evlist); TEST_ASSERT_VAL("wrong exclude_user", evsel->attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel); @@ -245,8 +231,7 @@ static int test__checkevent_raw_modifier(struct perf_evlist *evlist) static int test__checkevent_numeric_modifier(struct perf_evlist *evlist) { - struct perf_evsel *evsel = list_entry(evlist->entries.next, - struct perf_evsel, node); + struct perf_evsel *evsel = perf_evlist__first(evlist); TEST_ASSERT_VAL("wrong exclude_user", evsel->attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", evsel->attr.exclude_kernel); @@ -258,8 +243,7 @@ static int test__checkevent_numeric_modifier(struct perf_evlist *evlist) static int test__checkevent_symbolic_name_modifier(struct perf_evlist *evlist) { - struct perf_evsel *evsel = list_entry(evlist->entries.next, - struct perf_evsel, node); + struct perf_evsel *evsel = perf_evlist__first(evlist); TEST_ASSERT_VAL("wrong exclude_user", evsel->attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", evsel->attr.exclude_kernel); @@ -271,8 +255,7 @@ static int test__checkevent_symbolic_name_modifier(struct perf_evlist *evlist) static int test__checkevent_exclude_host_modifier(struct perf_evlist *evlist) { - struct perf_evsel *evsel = list_entry(evlist->entries.next, - struct perf_evsel, node); + struct perf_evsel *evsel = perf_evlist__first(evlist); TEST_ASSERT_VAL("wrong exclude guest", !evsel->attr.exclude_guest); TEST_ASSERT_VAL("wrong exclude host", evsel->attr.exclude_host); @@ -282,8 +265,7 @@ static int test__checkevent_exclude_host_modifier(struct perf_evlist *evlist) static int test__checkevent_exclude_guest_modifier(struct perf_evlist *evlist) { - struct perf_evsel *evsel = list_entry(evlist->entries.next, - struct perf_evsel, node); + struct perf_evsel *evsel = perf_evlist__first(evlist); TEST_ASSERT_VAL("wrong exclude guest", evsel->attr.exclude_guest); TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host); @@ -293,8 +275,7 @@ static int test__checkevent_exclude_guest_modifier(struct perf_evlist *evlist) static int test__checkevent_symbolic_alias_modifier(struct perf_evlist *evlist) { - struct perf_evsel *evsel = list_entry(evlist->entries.next, - struct perf_evsel, node); + struct perf_evsel *evsel = perf_evlist__first(evlist); TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", evsel->attr.exclude_kernel); @@ -306,8 +287,7 @@ static int test__checkevent_symbolic_alias_modifier(struct perf_evlist *evlist) static int test__checkevent_genhw_modifier(struct perf_evlist *evlist) { - struct perf_evsel *evsel = list_entry(evlist->entries.next, - struct perf_evsel, node); + struct perf_evsel *evsel = perf_evlist__first(evlist); TEST_ASSERT_VAL("wrong exclude_user", evsel->attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel); @@ -319,8 +299,7 @@ static int test__checkevent_genhw_modifier(struct perf_evlist *evlist) static int test__checkevent_breakpoint_modifier(struct perf_evlist *evlist) { - struct perf_evsel *evsel = list_entry(evlist->entries.next, - struct perf_evsel, node); + struct perf_evsel *evsel = perf_evlist__first(evlist); TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", evsel->attr.exclude_kernel); @@ -334,8 +313,7 @@ static int test__checkevent_breakpoint_modifier(struct perf_evlist *evlist) static int test__checkevent_breakpoint_x_modifier(struct perf_evlist *evlist) { - struct perf_evsel *evsel = list_entry(evlist->entries.next, - struct perf_evsel, node); + struct perf_evsel *evsel = perf_evlist__first(evlist); TEST_ASSERT_VAL("wrong exclude_user", evsel->attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel); @@ -349,8 +327,7 @@ static int test__checkevent_breakpoint_x_modifier(struct perf_evlist *evlist) static int test__checkevent_breakpoint_r_modifier(struct perf_evlist *evlist) { - struct perf_evsel *evsel = list_entry(evlist->entries.next, - struct perf_evsel, node); + struct perf_evsel *evsel = perf_evlist__first(evlist); TEST_ASSERT_VAL("wrong exclude_user", evsel->attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", evsel->attr.exclude_kernel); @@ -364,8 +341,7 @@ static int test__checkevent_breakpoint_r_modifier(struct perf_evlist *evlist) static int test__checkevent_breakpoint_w_modifier(struct perf_evlist *evlist) { - struct perf_evsel *evsel = list_entry(evlist->entries.next, - struct perf_evsel, node); + struct perf_evsel *evsel = perf_evlist__first(evlist); TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", evsel->attr.exclude_kernel); @@ -379,8 +355,7 @@ static int test__checkevent_breakpoint_w_modifier(struct perf_evlist *evlist) static int test__checkevent_breakpoint_rw_modifier(struct perf_evlist *evlist) { - struct perf_evsel *evsel = list_entry(evlist->entries.next, - struct perf_evsel, node); + struct perf_evsel *evsel = perf_evlist__first(evlist); TEST_ASSERT_VAL("wrong exclude_user", evsel->attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel); @@ -395,8 +370,7 @@ static int test__checkevent_breakpoint_rw_modifier(struct perf_evlist *evlist) static int test__checkevent_pmu(struct perf_evlist *evlist) { - struct perf_evsel *evsel = list_entry(evlist->entries.next, - struct perf_evsel, node); + struct perf_evsel *evsel = perf_evlist__first(evlist); TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->nr_entries); TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->attr.type); @@ -410,12 +384,11 @@ static int test__checkevent_pmu(struct perf_evlist *evlist) static int test__checkevent_list(struct perf_evlist *evlist) { - struct perf_evsel *evsel; + struct perf_evsel *evsel = perf_evlist__first(evlist); TEST_ASSERT_VAL("wrong number of entries", 3 == evlist->nr_entries); /* r1 */ - evsel = list_entry(evlist->entries.next, struct perf_evsel, node); TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->attr.type); TEST_ASSERT_VAL("wrong config", 1 == evsel->attr.config); TEST_ASSERT_VAL("wrong config1", 0 == evsel->attr.config1); @@ -426,7 +399,7 @@ static int test__checkevent_list(struct perf_evlist *evlist) TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip); /* syscalls:sys_enter_open:k */ - evsel = list_entry(evsel->node.next, struct perf_evsel, node); + evsel = perf_evsel__next(evsel); TEST_ASSERT_VAL("wrong type", PERF_TYPE_TRACEPOINT == evsel->attr.type); TEST_ASSERT_VAL("wrong sample_type", PERF_TP_SAMPLE_TYPE == evsel->attr.sample_type); @@ -437,7 +410,7 @@ static int test__checkevent_list(struct perf_evlist *evlist) TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip); /* 1:1:hp */ - evsel = list_entry(evsel->node.next, struct perf_evsel, node); + evsel = perf_evsel__next(evsel); TEST_ASSERT_VAL("wrong type", 1 == evsel->attr.type); TEST_ASSERT_VAL("wrong config", 1 == evsel->attr.config); TEST_ASSERT_VAL("wrong exclude_user", evsel->attr.exclude_user); @@ -450,17 +423,16 @@ static int test__checkevent_list(struct perf_evlist *evlist) static int test__checkevent_pmu_name(struct perf_evlist *evlist) { - struct perf_evsel *evsel; + struct perf_evsel *evsel = perf_evlist__first(evlist); /* cpu/config=1,name=krava/u */ - evsel = list_entry(evlist->entries.next, struct perf_evsel, node); TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->nr_entries); TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->attr.type); TEST_ASSERT_VAL("wrong config", 1 == evsel->attr.config); TEST_ASSERT_VAL("wrong name", !strcmp(perf_evsel__name(evsel), "krava")); /* cpu/config=2/u" */ - evsel = list_entry(evsel->node.next, struct perf_evsel, node); + evsel = perf_evsel__next(evsel); TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->nr_entries); TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->attr.type); TEST_ASSERT_VAL("wrong config", 2 == evsel->attr.config); @@ -520,8 +492,7 @@ static int test__group1(struct perf_evlist *evlist) TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->nr_entries); /* instructions:k */ - evsel = leader = list_entry(evlist->entries.next, - struct perf_evsel, node); + evsel = leader = perf_evlist__first(evlist); TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type); TEST_ASSERT_VAL("wrong config", PERF_COUNT_HW_INSTRUCTIONS == evsel->attr.config); @@ -534,7 +505,7 @@ static int test__group1(struct perf_evlist *evlist) TEST_ASSERT_VAL("wrong leader", evsel->leader == NULL); /* cycles:upp */ - evsel = list_entry(evsel->node.next, struct perf_evsel, node); + evsel = perf_evsel__next(evsel); TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type); TEST_ASSERT_VAL("wrong config", PERF_COUNT_HW_CPU_CYCLES == evsel->attr.config); @@ -556,8 +527,7 @@ static int test__group2(struct perf_evlist *evlist) TEST_ASSERT_VAL("wrong number of entries", 3 == evlist->nr_entries); /* faults + :ku modifier */ - evsel = leader = list_entry(evlist->entries.next, - struct perf_evsel, node); + evsel = leader = perf_evlist__first(evlist); TEST_ASSERT_VAL("wrong type", PERF_TYPE_SOFTWARE == evsel->attr.type); TEST_ASSERT_VAL("wrong config", PERF_COUNT_SW_PAGE_FAULTS == evsel->attr.config); @@ -570,7 +540,7 @@ static int test__group2(struct perf_evlist *evlist) TEST_ASSERT_VAL("wrong leader", evsel->leader == NULL); /* cache-references + :u modifier */ - evsel = list_entry(evsel->node.next, struct perf_evsel, node); + evsel = perf_evsel__next(evsel); TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type); TEST_ASSERT_VAL("wrong config", PERF_COUNT_HW_CACHE_REFERENCES == evsel->attr.config); @@ -583,7 +553,7 @@ static int test__group2(struct perf_evlist *evlist) TEST_ASSERT_VAL("wrong leader", evsel->leader == leader); /* cycles:k */ - evsel = list_entry(evsel->node.next, struct perf_evsel, node); + evsel = perf_evsel__next(evsel); TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type); TEST_ASSERT_VAL("wrong config", PERF_COUNT_HW_CPU_CYCLES == evsel->attr.config); @@ -605,8 +575,7 @@ static int test__group3(struct perf_evlist *evlist __used) TEST_ASSERT_VAL("wrong number of entries", 5 == evlist->nr_entries); /* group1 syscalls:sys_enter_open:H */ - evsel = leader = list_entry(evlist->entries.next, - struct perf_evsel, node); + evsel = leader = perf_evlist__first(evlist); TEST_ASSERT_VAL("wrong type", PERF_TYPE_TRACEPOINT == evsel->attr.type); TEST_ASSERT_VAL("wrong sample_type", PERF_TP_SAMPLE_TYPE == evsel->attr.sample_type); @@ -622,7 +591,7 @@ static int test__group3(struct perf_evlist *evlist __used) !strcmp(leader->group_name, "group1")); /* group1 cycles:kppp */ - evsel = list_entry(evsel->node.next, struct perf_evsel, node); + evsel = perf_evsel__next(evsel); TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type); TEST_ASSERT_VAL("wrong config", PERF_COUNT_HW_CPU_CYCLES == evsel->attr.config); @@ -636,7 +605,7 @@ static int test__group3(struct perf_evlist *evlist __used) TEST_ASSERT_VAL("wrong group name", !evsel->group_name); /* group2 cycles + G modifier */ - evsel = leader = list_entry(evsel->node.next, struct perf_evsel, node); + evsel = leader = perf_evsel__next(evsel); TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type); TEST_ASSERT_VAL("wrong config", PERF_COUNT_HW_CPU_CYCLES == evsel->attr.config); @@ -651,7 +620,7 @@ static int test__group3(struct perf_evlist *evlist __used) !strcmp(leader->group_name, "group2")); /* group2 1:3 + G modifier */ - evsel = list_entry(evsel->node.next, struct perf_evsel, node); + evsel = perf_evsel__next(evsel); TEST_ASSERT_VAL("wrong type", 1 == evsel->attr.type); TEST_ASSERT_VAL("wrong config", 3 == evsel->attr.config); TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user); @@ -663,7 +632,7 @@ static int test__group3(struct perf_evlist *evlist __used) TEST_ASSERT_VAL("wrong leader", evsel->leader == leader); /* instructions:u */ - evsel = list_entry(evsel->node.next, struct perf_evsel, node); + evsel = perf_evsel__next(evsel); TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type); TEST_ASSERT_VAL("wrong config", PERF_COUNT_HW_INSTRUCTIONS == evsel->attr.config); @@ -685,8 +654,7 @@ static int test__group4(struct perf_evlist *evlist __used) TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->nr_entries); /* cycles:u + p */ - evsel = leader = list_entry(evlist->entries.next, - struct perf_evsel, node); + evsel = leader = perf_evlist__first(evlist); TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type); TEST_ASSERT_VAL("wrong config", PERF_COUNT_HW_CPU_CYCLES == evsel->attr.config); @@ -700,7 +668,7 @@ static int test__group4(struct perf_evlist *evlist __used) TEST_ASSERT_VAL("wrong leader", evsel->leader == NULL); /* instructions:kp + p */ - evsel = list_entry(evsel->node.next, struct perf_evsel, node); + evsel = perf_evsel__next(evsel); TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type); TEST_ASSERT_VAL("wrong config", PERF_COUNT_HW_INSTRUCTIONS == evsel->attr.config); @@ -722,8 +690,7 @@ static int test__group5(struct perf_evlist *evlist __used) TEST_ASSERT_VAL("wrong number of entries", 5 == evlist->nr_entries); /* cycles + G */ - evsel = leader = list_entry(evlist->entries.next, - struct perf_evsel, node); + evsel = leader = perf_evlist__first(evlist); TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type); TEST_ASSERT_VAL("wrong config", PERF_COUNT_HW_CPU_CYCLES == evsel->attr.config); @@ -737,7 +704,7 @@ static int test__group5(struct perf_evlist *evlist __used) TEST_ASSERT_VAL("wrong leader", evsel->leader == NULL); /* instructions + G */ - evsel = list_entry(evsel->node.next, struct perf_evsel, node); + evsel = perf_evsel__next(evsel); TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type); TEST_ASSERT_VAL("wrong config", PERF_COUNT_HW_INSTRUCTIONS == evsel->attr.config); @@ -750,7 +717,7 @@ static int test__group5(struct perf_evlist *evlist __used) TEST_ASSERT_VAL("wrong leader", evsel->leader == leader); /* cycles:G */ - evsel = leader = list_entry(evsel->node.next, struct perf_evsel, node); + evsel = leader = perf_evsel__next(evsel); TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type); TEST_ASSERT_VAL("wrong config", PERF_COUNT_HW_CPU_CYCLES == evsel->attr.config); @@ -764,7 +731,7 @@ static int test__group5(struct perf_evlist *evlist __used) TEST_ASSERT_VAL("wrong leader", evsel->leader == NULL); /* instructions:G */ - evsel = list_entry(evsel->node.next, struct perf_evsel, node); + evsel = perf_evsel__next(evsel); TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type); TEST_ASSERT_VAL("wrong config", PERF_COUNT_HW_INSTRUCTIONS == evsel->attr.config); @@ -777,7 +744,7 @@ static int test__group5(struct perf_evlist *evlist __used) TEST_ASSERT_VAL("wrong leader", evsel->leader == leader); /* cycles */ - evsel = list_entry(evsel->node.next, struct perf_evsel, node); + evsel = perf_evsel__next(evsel); TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type); TEST_ASSERT_VAL("wrong config", PERF_COUNT_HW_CPU_CYCLES == evsel->attr.config); diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 4393a6b65c5..925784a930a 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -834,7 +834,7 @@ int parse_filter(const struct option *opt, const char *str, struct perf_evsel *last = NULL; if (evlist->nr_entries > 0) - last = list_entry(evlist->entries.prev, struct perf_evsel, node); + last = perf_evlist__last(evlist); if (last == NULL || last->attr.type != PERF_TYPE_TRACEPOINT) { fprintf(stderr, diff --git a/tools/perf/util/top.c b/tools/perf/util/top.c index 7eeebcee291..884dde9b9bc 100644 --- a/tools/perf/util/top.c +++ b/tools/perf/util/top.c @@ -58,8 +58,7 @@ size_t perf_top__header_snprintf(struct perf_top *top, char *bf, size_t size) } if (top->evlist->nr_entries == 1) { - struct perf_evsel *first; - first = list_entry(top->evlist->entries.next, struct perf_evsel, node); + struct perf_evsel *first = perf_evlist__first(top->evlist); ret += SNPRINTF(bf + ret, size - ret, "%" PRIu64 "%s ", (uint64_t)first->attr.sample_period, top->freq ? "Hz" : ""); From c883122acc0d97648d8b8f4726709017674e4420 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 13 Aug 2012 10:23:02 -0400 Subject: [PATCH 088/282] perf tools: Let O= makes handle relative paths When I did a compile of perf using a relative path for the output directory, the build failed when it tried to compile libtraceevent. This is because it continues to use the same relative path when the new working directory is in a different path. SUBDIR ../lib/traceevent/ /bin/sh: line 0: cd: ../../../nobackup/perf/: No such file or directory Makefile:74: *** output directory "../../../nobackup/perf/" does not exist. Stop. make: *** [../../../nobackup/perf///libtraceevent.a] Error 2 Make the path used an absolute path when building perf with O=. Boris: Teach Makefile to check whether the supplied O= directory exists and bail out if not. Reportedly, kernel dudes are idiots and need to be guarded so as not to shoot themselves in the foot when playing in the sandbox. Signed-off-by: Borislav Petkov Signed-off-by: Steven Rostedt Acked-by: Steven Rostedt Cc: Ingo Molnar Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20120815163923.GD15989@aftab.osrc.amd.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/scripts/Makefile.include | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tools/scripts/Makefile.include b/tools/scripts/Makefile.include index bde8521d56b..96ce80a3743 100644 --- a/tools/scripts/Makefile.include +++ b/tools/scripts/Makefile.include @@ -1,6 +1,8 @@ ifeq ("$(origin O)", "command line") - OUTPUT := $(O)/ - COMMAND_O := O=$(O) + dummy := $(if $(shell test -d $(O) || echo $(O)),$(error O=$(O) does not exist),) + ABSOLUTE_O := $(shell cd $(O) ; pwd) + OUTPUT := $(ABSOLUTE_O)/ + COMMAND_O := O=$(ABSOLUTE_O) endif ifneq ($(OUTPUT),) From e6e9046879493d8bf8f44ac1f2718c4a5628aa52 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 16 Aug 2012 17:14:50 +0900 Subject: [PATCH 089/282] perf ui: Introduce struct ui_helpline Add struct ui_helpline in order to provide flexible implementation of helpline APIs. And convert existing TUI implementation to use it. Signed-off-by: Namhyung Kim Cc: Ingo Molnar Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1345104894-14205-1-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile | 5 +-- tools/perf/ui/helpline.c | 60 +++++++++++++----------------------- tools/perf/ui/helpline.h | 10 +++++- tools/perf/ui/tui/helpline.c | 57 ++++++++++++++++++++++++++++++++++ 4 files changed, 90 insertions(+), 42 deletions(-) create mode 100644 tools/perf/ui/tui/helpline.c diff --git a/tools/perf/Makefile b/tools/perf/Makefile index e457afa04b5..483fb69fa4a 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -335,6 +335,7 @@ LIB_H += util/rblist.h LIB_H += util/intlist.h LIB_H += util/perf_regs.h LIB_H += util/unwind.h +LIB_H += ui/helpline.h LIB_OBJS += $(OUTPUT)util/abspath.o LIB_OBJS += $(OUTPUT)util/alias.o @@ -402,6 +403,7 @@ LIB_OBJS += $(OUTPUT)util/cgroup.o LIB_OBJS += $(OUTPUT)util/target.o LIB_OBJS += $(OUTPUT)util/rblist.o LIB_OBJS += $(OUTPUT)util/intlist.o +LIB_OBJS += $(OUTPUT)ui/helpline.o BUILTIN_OBJS += $(OUTPUT)builtin-annotate.o @@ -567,14 +569,13 @@ else LIB_OBJS += $(OUTPUT)ui/browsers/annotate.o LIB_OBJS += $(OUTPUT)ui/browsers/hists.o LIB_OBJS += $(OUTPUT)ui/browsers/map.o - LIB_OBJS += $(OUTPUT)ui/helpline.o LIB_OBJS += $(OUTPUT)ui/progress.o LIB_OBJS += $(OUTPUT)ui/util.o LIB_OBJS += $(OUTPUT)ui/tui/setup.o LIB_OBJS += $(OUTPUT)ui/tui/util.o + LIB_OBJS += $(OUTPUT)ui/tui/helpline.o LIB_H += ui/browser.h LIB_H += ui/browsers/map.h - LIB_H += ui/helpline.h LIB_H += ui/keysyms.h LIB_H += ui/libslang.h LIB_H += ui/progress.h diff --git a/tools/perf/ui/helpline.c b/tools/perf/ui/helpline.c index 2f950c2641c..78ba28ac7a2 100644 --- a/tools/perf/ui/helpline.c +++ b/tools/perf/ui/helpline.c @@ -5,23 +5,32 @@ #include "../debug.h" #include "helpline.h" #include "ui.h" -#include "libslang.h" - -void ui_helpline__pop(void) -{ -} char ui_helpline__current[512]; +static void nop_helpline__pop(void) +{ +} + +static void nop_helpline__push(const char *msg __used) +{ +} + +static struct ui_helpline default_helpline_fns = { + .pop = nop_helpline__pop, + .push = nop_helpline__push, +}; + +struct ui_helpline *helpline_fns = &default_helpline_fns; + +void ui_helpline__pop(void) +{ + helpline_fns->pop(); +} + void ui_helpline__push(const char *msg) { - const size_t sz = sizeof(ui_helpline__current); - - SLsmg_gotorc(SLtt_Screen_Rows - 1, 0); - SLsmg_set_color(0); - SLsmg_write_nstring((char *)msg, SLtt_Screen_Cols); - SLsmg_refresh(); - strncpy(ui_helpline__current, msg, sz)[sz - 1] = '\0'; + helpline_fns->push(msg); } void ui_helpline__vpush(const char *fmt, va_list ap) @@ -50,30 +59,3 @@ void ui_helpline__puts(const char *msg) ui_helpline__pop(); ui_helpline__push(msg); } - -void ui_helpline__init(void) -{ - ui_helpline__puts(" "); -} - -char ui_helpline__last_msg[1024]; - -int ui_helpline__show_help(const char *format, va_list ap) -{ - int ret; - static int backlog; - - pthread_mutex_lock(&ui__lock); - ret = vscnprintf(ui_helpline__last_msg + backlog, - sizeof(ui_helpline__last_msg) - backlog, format, ap); - backlog += ret; - - if (ui_helpline__last_msg[backlog - 1] == '\n') { - ui_helpline__puts(ui_helpline__last_msg); - SLsmg_refresh(); - backlog = 0; - } - pthread_mutex_unlock(&ui__lock); - - return ret; -} diff --git a/tools/perf/ui/helpline.h b/tools/perf/ui/helpline.h index 7bab6b34e35..61118b2bc24 100644 --- a/tools/perf/ui/helpline.h +++ b/tools/perf/ui/helpline.h @@ -4,13 +4,21 @@ #include #include +struct ui_helpline { + void (*pop)(void); + void (*push)(const char *msg); +}; + +extern struct ui_helpline *helpline_fns; + void ui_helpline__init(void); + void ui_helpline__pop(void); void ui_helpline__push(const char *msg); void ui_helpline__vpush(const char *fmt, va_list ap); void ui_helpline__fpush(const char *fmt, ...); void ui_helpline__puts(const char *msg); -extern char ui_helpline__current[]; +extern char ui_helpline__current[512]; #endif /* _PERF_UI_HELPLINE_H_ */ diff --git a/tools/perf/ui/tui/helpline.c b/tools/perf/ui/tui/helpline.c new file mode 100644 index 00000000000..2884d2f41e3 --- /dev/null +++ b/tools/perf/ui/tui/helpline.c @@ -0,0 +1,57 @@ +#include +#include +#include +#include + +#include "../../util/debug.h" +#include "../helpline.h" +#include "../ui.h" +#include "../libslang.h" + +static void tui_helpline__pop(void) +{ +} + +static void tui_helpline__push(const char *msg) +{ + const size_t sz = sizeof(ui_helpline__current); + + SLsmg_gotorc(SLtt_Screen_Rows - 1, 0); + SLsmg_set_color(0); + SLsmg_write_nstring((char *)msg, SLtt_Screen_Cols); + SLsmg_refresh(); + strncpy(ui_helpline__current, msg, sz)[sz - 1] = '\0'; +} + +struct ui_helpline tui_helpline_fns = { + .pop = tui_helpline__pop, + .push = tui_helpline__push, +}; + +void ui_helpline__init(void) +{ + helpline_fns = &tui_helpline_fns; + ui_helpline__puts(" "); +} + +char ui_helpline__last_msg[1024]; + +int ui_helpline__show_help(const char *format, va_list ap) +{ + int ret; + static int backlog; + + pthread_mutex_lock(&ui__lock); + ret = vscnprintf(ui_helpline__last_msg + backlog, + sizeof(ui_helpline__last_msg) - backlog, format, ap); + backlog += ret; + + if (ui_helpline__last_msg[backlog - 1] == '\n') { + ui_helpline__puts(ui_helpline__last_msg); + SLsmg_refresh(); + backlog = 0; + } + pthread_mutex_unlock(&ui__lock); + + return ret; +} From 4bb1646a80db65bb45c0a1bffb2435c6690c392e Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 16 Aug 2012 17:14:52 +0900 Subject: [PATCH 090/282] perf ui gtk: Implement helpline_fns Add helpline API implementation to GTK front-end. Signed-off-by: Namhyung Kim Acked-by: Pekka Enberg Cc: Ingo Molnar Cc: Paul Mackerras Cc: Pekka Enberg Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1345104894-14205-3-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile | 1 + tools/perf/ui/gtk/gtk.h | 2 ++ tools/perf/ui/gtk/helpline.c | 30 ++++++++++++++++++++++++++++++ tools/perf/ui/gtk/setup.c | 1 + 4 files changed, 34 insertions(+) create mode 100644 tools/perf/ui/gtk/helpline.c diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 483fb69fa4a..75af93dc52a 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -600,6 +600,7 @@ else LIB_OBJS += $(OUTPUT)ui/gtk/browser.o LIB_OBJS += $(OUTPUT)ui/gtk/setup.o LIB_OBJS += $(OUTPUT)ui/gtk/util.o + LIB_OBJS += $(OUTPUT)ui/gtk/helpline.o # Make sure that it'd be included only once. ifneq ($(findstring -DNO_NEWT_SUPPORT,$(BASIC_CFLAGS)),) LIB_OBJS += $(OUTPUT)ui/setup.o diff --git a/tools/perf/ui/gtk/gtk.h b/tools/perf/ui/gtk/gtk.h index a4d0f2b4a2d..793cb6116dd 100644 --- a/tools/perf/ui/gtk/gtk.h +++ b/tools/perf/ui/gtk/gtk.h @@ -29,6 +29,8 @@ static inline bool perf_gtk__is_active_context(struct perf_gtk_context *ctx) struct perf_gtk_context *perf_gtk__activate_context(GtkWidget *window); int perf_gtk__deactivate_context(struct perf_gtk_context **ctx); +void perf_gtk__init_helpline(void); + #ifndef HAVE_GTK_INFO_BAR static inline GtkWidget *perf_gtk__setup_info_bar(void) { diff --git a/tools/perf/ui/gtk/helpline.c b/tools/perf/ui/gtk/helpline.c new file mode 100644 index 00000000000..2511b374679 --- /dev/null +++ b/tools/perf/ui/gtk/helpline.c @@ -0,0 +1,30 @@ +#include "gtk.h" +#include "../helpline.h" + +static void gtk_helpline_pop(void) +{ + if (!perf_gtk__is_active_context(pgctx)) + return; + + gtk_statusbar_pop(GTK_STATUSBAR(pgctx->statbar), + pgctx->statbar_ctx_id); +} + +static void gtk_helpline_push(const char *msg) +{ + if (!perf_gtk__is_active_context(pgctx)) + return; + + gtk_statusbar_push(GTK_STATUSBAR(pgctx->statbar), + pgctx->statbar_ctx_id, msg); +} + +static struct ui_helpline gtk_helpline_fns = { + .pop = gtk_helpline_pop, + .push = gtk_helpline_push, +}; + +void perf_gtk__init_helpline(void) +{ + helpline_fns = >k_helpline_fns; +} diff --git a/tools/perf/ui/gtk/setup.c b/tools/perf/ui/gtk/setup.c index 92879ce61e2..ad40b3626fd 100644 --- a/tools/perf/ui/gtk/setup.c +++ b/tools/perf/ui/gtk/setup.c @@ -7,6 +7,7 @@ extern struct perf_error_ops perf_gtk_eops; int perf_gtk__init(void) { perf_error__register(&perf_gtk_eops); + perf_gtk__init_helpline(); return gtk_init_check(NULL, NULL) ? 0 : -1; } From ed70c609ae92e0cb03b746ab566c3cf8f2aaede4 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 16 Aug 2012 17:14:53 +0900 Subject: [PATCH 091/282] perf ui/gtk: Use helpline API in browser As we now have a helpline implementation, use it for displaying help messages. Signed-off-by: Namhyung Kim Acked-by: Pekka Enberg Cc: Ingo Molnar Cc: Paul Mackerras Cc: Pekka Enberg Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1345104894-14205-4-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/gtk/browser.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tools/perf/ui/gtk/browser.c b/tools/perf/ui/gtk/browser.c index ec12e0b4ded..26b5b652a8c 100644 --- a/tools/perf/ui/gtk/browser.c +++ b/tools/perf/ui/gtk/browser.c @@ -3,6 +3,7 @@ #include "../evsel.h" #include "../sort.h" #include "../hist.h" +#include "../helpline.h" #include "gtk.h" #include @@ -166,7 +167,7 @@ static GtkWidget *perf_gtk__setup_statusbar(void) } int perf_evlist__gtk_browse_hists(struct perf_evlist *evlist, - const char *help __used, + const char *help, void (*timer) (void *arg)__used, void *arg __used, int delay_secs __used) { @@ -233,6 +234,8 @@ int perf_evlist__gtk_browse_hists(struct perf_evlist *evlist, gtk_window_set_position(GTK_WINDOW(window), GTK_WIN_POS_CENTER); + ui_helpline__push(help); + gtk_main(); perf_gtk__deactivate_context(&pgctx); From 0985a94891c73740dea1e2697f9d598a4a7810ab Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 16 Aug 2012 17:14:54 +0900 Subject: [PATCH 092/282] perf ui gtk: Add perf_gtk__show_helpline() for pr_* Use helpline for printing error/debug messages. The code resembles a TUI counter part and only print the first line of the message. Signed-off-by: Namhyung Kim Acked-by: Pekka Enberg Cc: Ingo Molnar Cc: Paul Mackerras Cc: Pekka Enberg Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1345104894-14205-5-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/gtk/helpline.c | 26 ++++++++++++++++++++++++++ tools/perf/ui/gtk/util.c | 5 ----- tools/perf/ui/helpline.h | 23 +++++++++++++++++++++++ tools/perf/ui/setup.c | 4 ++++ tools/perf/ui/tui/setup.c | 2 -- tools/perf/util/debug.c | 4 +++- tools/perf/util/debug.h | 8 +------- 7 files changed, 57 insertions(+), 15 deletions(-) diff --git a/tools/perf/ui/gtk/helpline.c b/tools/perf/ui/gtk/helpline.c index 2511b374679..5db4432ff12 100644 --- a/tools/perf/ui/gtk/helpline.c +++ b/tools/perf/ui/gtk/helpline.c @@ -1,5 +1,10 @@ +#include +#include + #include "gtk.h" +#include "../ui.h" #include "../helpline.h" +#include "../../util/debug.h" static void gtk_helpline_pop(void) { @@ -28,3 +33,24 @@ void perf_gtk__init_helpline(void) { helpline_fns = >k_helpline_fns; } + +int perf_gtk__show_helpline(const char *fmt, va_list ap) +{ + int ret; + char *ptr; + static int backlog; + + ret = vscnprintf(ui_helpline__current + backlog, + sizeof(ui_helpline__current) - backlog, fmt, ap); + backlog += ret; + + /* only first line can be displayed */ + ptr = strchr(ui_helpline__current, '\n'); + if (ptr && (ptr - ui_helpline__current) <= backlog) { + *ptr = '\0'; + ui_helpline__puts(ui_helpline__current); + backlog = 0; + } + + return ret; +} diff --git a/tools/perf/ui/gtk/util.c b/tools/perf/ui/gtk/util.c index 0ead373c0df..b8efb966f94 100644 --- a/tools/perf/ui/gtk/util.c +++ b/tools/perf/ui/gtk/util.c @@ -117,11 +117,6 @@ struct perf_error_ops perf_gtk_eops = { * For now, just add stubs for NO_NEWT=1 build. */ #ifdef NO_NEWT_SUPPORT -int ui_helpline__show_help(const char *format __used, va_list ap __used) -{ - return 0; -} - void ui_progress__update(u64 curr __used, u64 total __used, const char *title __used) { diff --git a/tools/perf/ui/helpline.h b/tools/perf/ui/helpline.h index 61118b2bc24..a2487f93aa4 100644 --- a/tools/perf/ui/helpline.h +++ b/tools/perf/ui/helpline.h @@ -4,6 +4,8 @@ #include #include +#include "../util/cache.h" + struct ui_helpline { void (*pop)(void); void (*push)(const char *msg); @@ -21,4 +23,25 @@ void ui_helpline__puts(const char *msg); extern char ui_helpline__current[512]; +#ifdef NO_NEWT_SUPPORT +static inline int ui_helpline__show_help(const char *format __used, + va_list ap __used) +{ + return 0; +} +#else +extern char ui_helpline__last_msg[]; +int ui_helpline__show_help(const char *format, va_list ap); +#endif /* NO_NEWT_SUPPORT */ + +#ifdef NO_GTK2_SUPPORT +static inline int perf_gtk__show_helpline(const char *format __used, + va_list ap __used) +{ + return 0; +} +#else +int perf_gtk__show_helpline(const char *format, va_list ap); +#endif /* NO_GTK2_SUPPORT */ + #endif /* _PERF_UI_HELPLINE_H_ */ diff --git a/tools/perf/ui/setup.c b/tools/perf/ui/setup.c index 791fb15ce35..c7820e56966 100644 --- a/tools/perf/ui/setup.c +++ b/tools/perf/ui/setup.c @@ -1,7 +1,11 @@ +#include + #include "../cache.h" #include "../debug.h" +pthread_mutex_t ui__lock = PTHREAD_MUTEX_INITIALIZER; + void setup_browser(bool fallback_to_pager) { if (!isatty(1) || dump_trace) diff --git a/tools/perf/ui/tui/setup.c b/tools/perf/ui/tui/setup.c index e813c1d1734..4c936e09931 100644 --- a/tools/perf/ui/tui/setup.c +++ b/tools/perf/ui/tui/setup.c @@ -11,8 +11,6 @@ #include "../libslang.h" #include "../keysyms.h" -pthread_mutex_t ui__lock = PTHREAD_MUTEX_INITIALIZER; - static volatile int ui__need_resize; extern struct perf_error_ops perf_tui_eops; diff --git a/tools/perf/util/debug.c b/tools/perf/util/debug.c index 4dfe0bb3c32..66eb3828ceb 100644 --- a/tools/perf/util/debug.c +++ b/tools/perf/util/debug.c @@ -23,8 +23,10 @@ int eprintf(int level, const char *fmt, ...) if (verbose >= level) { va_start(args, fmt); - if (use_browser > 0) + if (use_browser == 1) ret = ui_helpline__show_help(fmt, args); + else if (use_browser == 2) + ret = perf_gtk__show_helpline(fmt, args); else ret = vfprintf(stderr, fmt, args); va_end(args); diff --git a/tools/perf/util/debug.h b/tools/perf/util/debug.h index 015c91dbc09..05e660cbf7e 100644 --- a/tools/perf/util/debug.h +++ b/tools/perf/util/debug.h @@ -4,6 +4,7 @@ #include #include "event.h" +#include "../ui/helpline.h" extern int verbose; extern bool quiet, dump_trace; @@ -15,11 +16,6 @@ struct ui_progress; struct perf_error_ops; #if defined(NO_NEWT_SUPPORT) && defined(NO_GTK2_SUPPORT) -static inline int ui_helpline__show_help(const char *format __used, va_list ap __used) -{ - return 0; -} - static inline void ui_progress__update(u64 curr __used, u64 total __used, const char *title __used) {} @@ -39,8 +35,6 @@ perf_error__unregister(struct perf_error_ops *eops __used) #else /* NO_NEWT_SUPPORT && NO_GTK2_SUPPORT */ -extern char ui_helpline__last_msg[]; -int ui_helpline__show_help(const char *format, va_list ap); #include "../ui/progress.h" int ui__error(const char *format, ...) __attribute__((format(printf, 1, 2))); #include "../ui/util.h" From f4d834367cda98eee3769638da6ad687607c74e6 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Thu, 16 Aug 2012 21:10:17 +0200 Subject: [PATCH 093/282] perf tools: Fix type for evsel->ids and add size check for ids Use same type for ids everywhere. In case of writing to perf.data the size is u32. In pipe mode it is limited to header.size (less than u16). Adding a size check here. Size overflow due to casting shouldn't actually happen in practice, but during development this may cause type missmatch warninngs/errors, unifying types avoids this. Signed-off-by: Robert Richter Cc: Ingo Molnar Cc: Jiri Olsa Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1345144224-27280-2-git-send-email-robert.richter@amd.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evsel.h | 2 +- tools/perf/util/header.c | 11 +++++++---- tools/perf/util/header.h | 2 +- 3 files changed, 9 insertions(+), 6 deletions(-) diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 65f39fdf821..94f6ba16747 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -53,7 +53,7 @@ struct perf_evsel { u64 *id; struct perf_counts *counts; int idx; - int ids; + u32 ids; struct hists hists; char *name; struct event_format *tp_format; diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 77832b807ef..471b0c42db3 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -2240,7 +2240,7 @@ out_delete_evlist: } int perf_event__synthesize_attr(struct perf_tool *tool, - struct perf_event_attr *attr, u16 ids, u64 *id, + struct perf_event_attr *attr, u32 ids, u64 *id, perf_event__handler_t process) { union perf_event *ev; @@ -2261,9 +2261,12 @@ int perf_event__synthesize_attr(struct perf_tool *tool, memcpy(ev->attr.id, id, ids * sizeof(u64)); ev->attr.header.type = PERF_RECORD_HEADER_ATTR; - ev->attr.header.size = size; + ev->attr.header.size = (u16)size; - err = process(tool, ev, NULL, NULL); + if (ev->attr.header.size == size) + err = process(tool, ev, NULL, NULL); + else + err = -E2BIG; free(ev); @@ -2292,7 +2295,7 @@ int perf_event__synthesize_attrs(struct perf_tool *tool, int perf_event__process_attr(union perf_event *event, struct perf_evlist **pevlist) { - unsigned int i, ids, n_ids; + u32 i, ids, n_ids; struct perf_evsel *evsel; struct perf_evlist *evlist = *pevlist; diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h index 2d42b3e1826..24962e707e5 100644 --- a/tools/perf/util/header.h +++ b/tools/perf/util/header.h @@ -99,7 +99,7 @@ int build_id_cache__add_s(const char *sbuild_id, const char *debugdir, int build_id_cache__remove_s(const char *sbuild_id, const char *debugdir); int perf_event__synthesize_attr(struct perf_tool *tool, - struct perf_event_attr *attr, u16 ids, u64 *id, + struct perf_event_attr *attr, u32 ids, u64 *id, perf_event__handler_t process); int perf_event__synthesize_attrs(struct perf_tool *tool, struct perf_session *session, From ca1b145761e9355f90bfae1f42c9d9194702066b Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Thu, 16 Aug 2012 21:10:18 +0200 Subject: [PATCH 094/282] perf tools: Report number of pmu type of unknown events If detection fails and an event name is unknown, report the type number. Example perf header output: # Samples: 10K of event 'unknown attr type: 7' Signed-off-by: Robert Richter Cc: Ingo Molnar Cc: Jiri Olsa Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1345144224-27280-3-git-send-email-robert.richter@amd.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evsel.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 6c7dcc1fde5..7ff3c8fb736 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -319,7 +319,8 @@ const char *perf_evsel__name(struct perf_evsel *evsel) break; default: - scnprintf(bf, sizeof(bf), "%s", "unknown attr type"); + scnprintf(bf, sizeof(bf), "unknown attr type: %d", + evsel->attr.type); break; } From 6606f8733d7b635e09e2dc3a391a5e1f0feb218f Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Thu, 16 Aug 2012 21:10:19 +0200 Subject: [PATCH 095/282] perf tools: Rename some variables for better understanding Trivial patch to improve understanding of code. Varible attr is usually used for struct perf_event_attr. Using it in a different context is irritating. Thus, renaming it. Signed-off-by: Robert Richter Cc: Ingo Molnar Cc: Jiri Olsa Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1345144224-27280-4-git-send-email-robert.richter@amd.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/header.c | 42 ++++++++++++++++++++-------------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 471b0c42db3..e2e5129873f 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -608,11 +608,11 @@ static int write_nrcpus(int fd, struct perf_header *h __used, static int write_event_desc(int fd, struct perf_header *h __used, struct perf_evlist *evlist) { - struct perf_evsel *attr; + struct perf_evsel *evsel; u32 nre = 0, nri, sz; int ret; - list_for_each_entry(attr, &evlist->entries, node) + list_for_each_entry(evsel, &evlist->entries, node) nre++; /* @@ -625,14 +625,14 @@ static int write_event_desc(int fd, struct perf_header *h __used, /* * size of perf_event_attr struct */ - sz = (u32)sizeof(attr->attr); + sz = (u32)sizeof(evsel->attr); ret = do_write(fd, &sz, sizeof(sz)); if (ret < 0) return ret; - list_for_each_entry(attr, &evlist->entries, node) { + list_for_each_entry(evsel, &evlist->entries, node) { - ret = do_write(fd, &attr->attr, sz); + ret = do_write(fd, &evsel->attr, sz); if (ret < 0) return ret; /* @@ -642,7 +642,7 @@ static int write_event_desc(int fd, struct perf_header *h __used, * copy into an nri to be independent of the * type of ids, */ - nri = attr->ids; + nri = evsel->ids; ret = do_write(fd, &nri, sizeof(nri)); if (ret < 0) return ret; @@ -650,13 +650,13 @@ static int write_event_desc(int fd, struct perf_header *h __used, /* * write event string as passed on cmdline */ - ret = do_write_string(fd, perf_evsel__name(attr)); + ret = do_write_string(fd, perf_evsel__name(evsel)); if (ret < 0) return ret; /* * write unique ids for this event */ - ret = do_write(fd, attr->id, attr->ids * sizeof(u64)); + ret = do_write(fd, evsel->id, evsel->ids * sizeof(u64)); if (ret < 0) return ret; } @@ -1683,7 +1683,7 @@ int perf_session__write_header(struct perf_session *session, struct perf_file_header f_header; struct perf_file_attr f_attr; struct perf_header *header = &session->header; - struct perf_evsel *attr, *pair = NULL; + struct perf_evsel *evsel, *pair = NULL; int err; lseek(fd, sizeof(f_header), SEEK_SET); @@ -1691,9 +1691,9 @@ int perf_session__write_header(struct perf_session *session, if (session->evlist != evlist) pair = perf_evlist__first(session->evlist); - list_for_each_entry(attr, &evlist->entries, node) { - attr->id_offset = lseek(fd, 0, SEEK_CUR); - err = do_write(fd, attr->id, attr->ids * sizeof(u64)); + list_for_each_entry(evsel, &evlist->entries, node) { + evsel->id_offset = lseek(fd, 0, SEEK_CUR); + err = do_write(fd, evsel->id, evsel->ids * sizeof(u64)); if (err < 0) { out_err_write: pr_debug("failed to write perf header\n"); @@ -1703,19 +1703,19 @@ out_err_write: err = do_write(fd, pair->id, pair->ids * sizeof(u64)); if (err < 0) goto out_err_write; - attr->ids += pair->ids; + evsel->ids += pair->ids; pair = perf_evsel__next(pair); } } header->attr_offset = lseek(fd, 0, SEEK_CUR); - list_for_each_entry(attr, &evlist->entries, node) { + list_for_each_entry(evsel, &evlist->entries, node) { f_attr = (struct perf_file_attr){ - .attr = attr->attr, + .attr = evsel->attr, .ids = { - .offset = attr->id_offset, - .size = attr->ids * sizeof(u64), + .offset = evsel->id_offset, + .size = evsel->ids * sizeof(u64), } }; err = do_write(fd, &f_attr, sizeof(f_attr)); @@ -2277,12 +2277,12 @@ int perf_event__synthesize_attrs(struct perf_tool *tool, struct perf_session *session, perf_event__handler_t process) { - struct perf_evsel *attr; + struct perf_evsel *evsel; int err = 0; - list_for_each_entry(attr, &session->evlist->entries, node) { - err = perf_event__synthesize_attr(tool, &attr->attr, attr->ids, - attr->id, process); + list_for_each_entry(evsel, &session->evlist->entries, node) { + err = perf_event__synthesize_attr(tool, &evsel->attr, evsel->ids, + evsel->id, process); if (err) { pr_debug("failed to create perf header attribute\n"); return err; From db146f06ac0bbea63271b6c78f4341ee46843e10 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Thu, 16 Aug 2012 21:10:20 +0200 Subject: [PATCH 096/282] perf tools: Rename global variable 'events' in util/header.c Trivial patch that renames global variable 'events' in util/header.c. Use a more specific naming to avoid conflicts. Same for variable 'event_count'. Signed-off-by: Robert Richter Cc: Ingo Molnar Cc: Jiri Olsa Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1345144224-27280-5-git-send-email-robert.richter@amd.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/header.c | 40 ++++++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index e2e5129873f..1e5b6aa6052 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -23,8 +23,8 @@ static bool no_buildid_cache = false; -static int event_count; -static struct perf_trace_event_type *events; +static int trace_event_count; +static struct perf_trace_event_type *trace_events; static u32 header_argc; static const char **header_argv; @@ -36,24 +36,24 @@ int perf_header__push_event(u64 id, const char *name) if (strlen(name) > MAX_EVENT_NAME) pr_warning("Event %s will be truncated\n", name); - nevents = realloc(events, (event_count + 1) * sizeof(*events)); + nevents = realloc(trace_events, (trace_event_count + 1) * sizeof(*trace_events)); if (nevents == NULL) return -ENOMEM; - events = nevents; + trace_events = nevents; - memset(&events[event_count], 0, sizeof(struct perf_trace_event_type)); - events[event_count].event_id = id; - strncpy(events[event_count].name, name, MAX_EVENT_NAME - 1); - event_count++; + memset(&trace_events[trace_event_count], 0, sizeof(struct perf_trace_event_type)); + trace_events[trace_event_count].event_id = id; + strncpy(trace_events[trace_event_count].name, name, MAX_EVENT_NAME - 1); + trace_event_count++; return 0; } char *perf_header__find_event(u64 id) { int i; - for (i = 0 ; i < event_count; i++) { - if (events[i].event_id == id) - return events[i].name; + for (i = 0 ; i < trace_event_count; i++) { + if (trace_events[i].event_id == id) + return trace_events[i].name; } return NULL; } @@ -1726,9 +1726,9 @@ out_err_write: } header->event_offset = lseek(fd, 0, SEEK_CUR); - header->event_size = event_count * sizeof(struct perf_trace_event_type); - if (events) { - err = do_write(fd, events, header->event_size); + header->event_size = trace_event_count * sizeof(struct perf_trace_event_type); + if (trace_events) { + err = do_write(fd, trace_events, header->event_size); if (err < 0) { pr_debug("failed to write perf header events\n"); return err; @@ -2211,13 +2211,13 @@ int perf_session__read_header(struct perf_session *session, int fd) if (f_header.event_types.size) { lseek(fd, f_header.event_types.offset, SEEK_SET); - events = malloc(f_header.event_types.size); - if (events == NULL) + trace_events = malloc(f_header.event_types.size); + if (trace_events == NULL) return -ENOMEM; - if (perf_header__getbuffer64(header, fd, events, + if (perf_header__getbuffer64(header, fd, trace_events, f_header.event_types.size)) goto out_errno; - event_count = f_header.event_types.size / sizeof(struct perf_trace_event_type); + trace_event_count = f_header.event_types.size / sizeof(struct perf_trace_event_type); } perf_header__process_sections(header, fd, &session->pevent, @@ -2362,8 +2362,8 @@ int perf_event__synthesize_event_types(struct perf_tool *tool, struct perf_trace_event_type *type; int i, err = 0; - for (i = 0; i < event_count; i++) { - type = &events[i]; + for (i = 0; i < trace_event_count; i++) { + type = &trace_events[i]; err = perf_event__synthesize_event_type(tool, type->event_id, type->name, process, From 2708bf3a30b7bfa02d60a3f03603b6b92d093f1a Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Sat, 18 Aug 2012 01:56:23 +0900 Subject: [PATCH 097/282] perf ui gtk: Ensure not to call gtk_main_quit() twice Currently the gtk_main_quit() is called twice when perf exits so the following warning is emitted: [penberg@tux perf]$ ./perf report --gtk ^Cperf: Interrupt (perf:4048): Gtk-CRITICAL **: IA__gtk_main_quit: assertion `main_loops != NULL' failed Fix it by not to call it unnecessarily. Reported-by: Pekka Enberg Signed-off-by: Namhyung Kim Acked-by: Pekka Enberg Cc: Ingo Molnar Cc: Pekka Enberg Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1345222583-3964-1-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/gtk/setup.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/perf/ui/gtk/setup.c b/tools/perf/ui/gtk/setup.c index ad40b3626fd..ec1ee26b485 100644 --- a/tools/perf/ui/gtk/setup.c +++ b/tools/perf/ui/gtk/setup.c @@ -13,6 +13,8 @@ int perf_gtk__init(void) void perf_gtk__exit(bool wait_for_ok __used) { + if (!perf_gtk__is_active_context(pgctx)) + return; perf_error__unregister(&perf_gtk_eops); gtk_main_quit(); } From e8d0f400ee3247ad22faa6afabfa7022260ef7a5 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Sun, 19 Aug 2012 09:46:22 -0600 Subject: [PATCH 098/282] perf script perl/python: Fix libexec scripts path in Documentation The libexec path is /libexec/perf-core/scripts/*/Perf-Trace-Util. Signed-off-by: David Ahern Cc: Ingo Molnar Link: http://lkml.kernel.org/r/1345391182-71825-1-git-send-email-dsahern@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-script-perl.txt | 4 ++-- tools/perf/Documentation/perf-script-python.txt | 10 +++++----- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/tools/perf/Documentation/perf-script-perl.txt b/tools/perf/Documentation/perf-script-perl.txt index 3152cca1550..d00bef23134 100644 --- a/tools/perf/Documentation/perf-script-perl.txt +++ b/tools/perf/Documentation/perf-script-perl.txt @@ -116,8 +116,8 @@ search path and 'use'ing a few support modules (see module descriptions below): ---- - use lib "$ENV{'PERF_EXEC_PATH'}/scripts/perl/perf-script-Util/lib"; - use lib "./perf-script-Util/lib"; + use lib "$ENV{'PERF_EXEC_PATH'}/scripts/perl/Perf-Trace-Util/lib"; + use lib "./Perf-Trace-Util/lib"; use Perf::Trace::Core; use Perf::Trace::Context; use Perf::Trace::Util; diff --git a/tools/perf/Documentation/perf-script-python.txt b/tools/perf/Documentation/perf-script-python.txt index 47102206911..a4027f221a5 100644 --- a/tools/perf/Documentation/perf-script-python.txt +++ b/tools/perf/Documentation/perf-script-python.txt @@ -129,7 +129,7 @@ import os import sys sys.path.append(os.environ['PERF_EXEC_PATH'] + \ - '/scripts/python/perf-script-Util/lib/Perf/Trace') + '/scripts/python/Perf-Trace-Util/lib/Perf/Trace') from perf_trace_context import * from Core import * @@ -216,7 +216,7 @@ import os import sys sys.path.append(os.environ['PERF_EXEC_PATH'] + \ - '/scripts/python/perf-script-Util/lib/Perf/Trace') + '/scripts/python/Perf-Trace-Util/lib/Perf/Trace') from perf_trace_context import * from Core import * @@ -279,7 +279,7 @@ import os import sys sys.path.append(os.environ['PERF_EXEC_PATH'] + \ - '/scripts/python/perf-script-Util/lib/Perf/Trace') + '/scripts/python/Perf-Trace-Util/lib/Perf/Trace') from perf_trace_context import * from Core import * @@ -391,7 +391,7 @@ drwxr-xr-x 4 trz trz 4096 2010-01-26 22:30 . drwxr-xr-x 4 trz trz 4096 2010-01-26 22:29 .. drwxr-xr-x 2 trz trz 4096 2010-01-26 22:29 bin -rw-r--r-- 1 trz trz 2548 2010-01-26 22:29 check-perf-script.py -drwxr-xr-x 3 trz trz 4096 2010-01-26 22:49 perf-script-Util +drwxr-xr-x 3 trz trz 4096 2010-01-26 22:49 Perf-Trace-Util -rw-r--r-- 1 trz trz 1462 2010-01-26 22:30 syscall-counts.py ---- @@ -518,7 +518,7 @@ descriptions below): import sys sys.path.append(os.environ['PERF_EXEC_PATH'] + \ - '/scripts/python/perf-script-Util/lib/Perf/Trace') + '/scripts/python/Perf-Trace-Util/lib/Perf/Trace') from perf_trace_context import * from Core import * From 1c09bf4a79e808c13c02d4ca8221fb2957bf3ccb Mon Sep 17 00:00:00 2001 From: David Ahern Date: Sun, 19 Aug 2012 09:46:42 -0600 Subject: [PATCH 099/282] perf: silence GTK2 probing errors If GTK2 development packages are not installed, make is rather noisy: $ make -C tools/perf O=/tmp/pbuild Package gtk+-2.0 was not found in the pkg-config search path. Perhaps you should add the directory containing `gtk+-2.0.pc' to the PKG_CONFIG_PATH environment variable No package 'gtk+-2.0' found make: Entering directory `/opt/sw/ahern/perf.git/tools/perf' Makefile:593: GTK2 not found, disables GTK2 support. Please install gtk2-devel or libgtk2.0-dev PERF_VERSION = 3.6.rc1.205.gdb146f.dirty make: Leaving directory `/opt/sw/ahern/perf.git/tools/perf' Package gtk+-2.0 was not found in the pkg-config search path. Perhaps you should add the directory containing `gtk+-2.0.pc' to the PKG_CONFIG_PATH environment variable No package 'gtk+-2.0' found make: Entering directory `/opt/sw/ahern/perf.git/tools/perf' Makefile:593: GTK2 not found, disables GTK2 support. Please install gtk2-devel or libgtk2.0-dev ... Silence the pkg-config errors. Aftewards: $ make -C tools/perf O=/tmp/pbuild make: Entering directory `/opt/sw/ahern/perf.git/tools/perf' Makefile:593: GTK2 not found, disables GTK2 support. Please install gtk2-devel or libgtk2.0-dev PERF_VERSION = 3.6.rc1.206.gd43ff9.dirty make: Leaving directory `/opt/sw/ahern/perf.git/tools/perf' make: Entering directory `/opt/sw/ahern/perf.git/tools/perf' Makefile:593: GTK2 not found, disables GTK2 support. Please install gtk2-devel or libgtk2.0-dev ... Signed-off-by: David Ahern Acked-by: Namhyung Kim Acked-by: Pekka Enberg Cc: Namhyung Kim Cc: Pekka Enberg Link: http://lkml.kernel.org/r/1345391202-71865-1-git-send-email-dsahern@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 75af93dc52a..8beff9944f4 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -587,7 +587,7 @@ endif ifdef NO_GTK2 BASIC_CFLAGS += -DNO_GTK2_SUPPORT else - FLAGS_GTK2=$(ALL_CFLAGS) $(ALL_LDFLAGS) $(EXTLIBS) $(shell pkg-config --libs --cflags gtk+-2.0) + FLAGS_GTK2=$(ALL_CFLAGS) $(ALL_LDFLAGS) $(EXTLIBS) $(shell pkg-config --libs --cflags gtk+-2.0 2>/dev/null) ifneq ($(call try-cc,$(SOURCE_GTK2),$(FLAGS_GTK2)),y) msg := $(warning GTK2 not found, disables GTK2 support. Please install gtk2-devel or libgtk2.0-dev); BASIC_CFLAGS += -DNO_GTK2_SUPPORT @@ -595,8 +595,8 @@ else ifeq ($(call try-cc,$(SOURCE_GTK2_INFOBAR),$(FLAGS_GTK2)),y) BASIC_CFLAGS += -DHAVE_GTK_INFO_BAR endif - BASIC_CFLAGS += $(shell pkg-config --cflags gtk+-2.0) - EXTLIBS += $(shell pkg-config --libs gtk+-2.0) + BASIC_CFLAGS += $(shell pkg-config --cflags gtk+-2.0 2>/dev/null) + EXTLIBS += $(shell pkg-config --libs gtk+-2.0 2>/dev/null) LIB_OBJS += $(OUTPUT)ui/gtk/browser.o LIB_OBJS += $(OUTPUT)ui/gtk/setup.o LIB_OBJS += $(OUTPUT)ui/gtk/util.o From f47b58b75f5e2a424834eb15f7565a7458a12f44 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Sun, 19 Aug 2012 09:47:14 -0600 Subject: [PATCH 100/282] perf symbols: Fix builds with NO_LIBELF set MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Build currently fails: $ make -C tools/perf O=/tmp/pbuild NO_LIBELF=1 util/symbol.c: In function ‘dso__load’: util/symbol.c:1128:27: error: ‘struct symsrc’ has no member named ‘dynsym’ CC /tmp/pbuild/util/pager.o make: *** [/tmp/pbuild/util/symbol.o] Error 1 make: *** Waiting for unfinished jobs.... Moving the dynsym reference to symbol-elf.c reveals that NO_LIBELF requires NO_LIBUNWIND: $ make -C tools/perf O=/tmp/pbuild NO_LIBELF=1 LINK /tmp/pbuild/perf /tmp/pbuild/libperf.a(unwind.o): In function `elf_section_offset': /opt/sw/ahern/perf.git/tools/perf/util/unwind.c:176: undefined reference to `elf_begin' /opt/sw/ahern/perf.git/tools/perf/util/unwind.c:181: undefined reference to `gelf_getehdr' /tmp/pbuild/libperf.a(unwind.o): In function `elf_section_by_name': /opt/sw/ahern/perf.git/tools/perf/util/unwind.c:157: undefined reference to `elf_nextscn' /opt/sw/ahern/perf.git/tools/perf/util/unwind.c:160: undefined reference to `gelf_getshdr' /opt/sw/ahern/perf.git/tools/perf/util/unwind.c:161: undefined reference to `elf_strptr' /tmp/pbuild/libperf.a(unwind.o): In function `elf_section_offset': /opt/sw/ahern/perf.git/tools/perf/util/unwind.c:190: undefined reference to `elf_end' /tmp/pbuild/libperf.a(unwind.o): In function `read_unwind_spec': /opt/sw/ahern/perf.git/tools/perf/util/unwind.c:190: undefined reference to `elf_end' collect2: ld returned 1 exit status make: *** [/tmp/pbuild/perf] Error 1 make: Leaving directory `/opt/sw/ahern/perf.git/tools/perf' This patch fixes both. Reviewed-by: Namhyung Kim Signed-off-by: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Link: http://lkml.kernel.org/r/1345391234-71906-1-git-send-email-dsahern@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile | 1 + tools/perf/util/symbol-elf.c | 3 +++ tools/perf/util/symbol.c | 2 +- 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 8beff9944f4..90cfecf7913 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -469,6 +469,7 @@ PYRF_OBJS += $(OUTPUT)util/xyarray.o ifdef NO_LIBELF NO_DWARF := 1 NO_DEMANGLE := 1 + NO_LIBUNWIND := 1 else FLAGS_LIBELF=$(ALL_CFLAGS) $(ALL_LDFLAGS) $(EXTLIBS) ifneq ($(call try-cc,$(SOURCE_LIBELF),$(FLAGS_LIBELF)),y) diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index 5b37e13f08f..db0cc92cf2e 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -183,6 +183,9 @@ int dso__synthesize_plt_symbols(struct dso *dso, struct symsrc *ss, struct map * Elf *elf; int nr = 0, symidx, err = 0; + if (!ss->dynsym) + return 0; + elf = ss->elf; ehdr = ss->ehdr; diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 2293a4a5af9..753699a20bc 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -1125,7 +1125,7 @@ int dso__load(struct dso *dso, struct map *map, symbol_filter_t filter) else ret = -1; - if (ret > 0 && runtime_ss->dynsym) { + if (ret > 0) { int nr_plt; nr_plt = dso__synthesize_plt_symbols(dso, runtime_ss, map, filter); From ed7e2c2ec59422b68b643c479a5816197620016e Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Mon, 20 Aug 2012 00:47:19 +0100 Subject: [PATCH 101/282] perf tools: Fix include order for bison/flex-generated C files When we use a separate output directory, we add util/ to the include path for the generated C files. However, this is currently added to the end of the path, behind /usr/include/slang and /usr/include/gtk-2.0 if use of the respective libraries is enabled. Thus the '#include "../perf.h"' in util/parse-events.l can actually include /usr/include/perf.h if it exists. Move '-Iutil/' ahead of all the other preprocessor options. Reported-by: Sedat Dilek Signed-off-by: Ben Hutchings Cc: Ingo Molnar Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Sedat Dilek Link: http://lkml.kernel.org/r/1345420039.22400.80.camel@deadeye.wl.decadent.org.uk Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 90cfecf7913..77e7ae3ef1b 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -841,10 +841,10 @@ $(OUTPUT)perf.o perf.spec \ # over the general rule for .o $(OUTPUT)util/%-flex.o: $(OUTPUT)util/%-flex.c $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) -Iutil/ -w $< + $(QUIET_CC)$(CC) -o $@ -c -Iutil/ $(ALL_CFLAGS) -w $< $(OUTPUT)util/%-bison.o: $(OUTPUT)util/%-bison.c $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) -DYYENABLE_NLS=0 -DYYLTYPE_IS_TRIVIAL=0 -Iutil/ -w $< + $(QUIET_CC)$(CC) -o $@ -c -Iutil/ $(ALL_CFLAGS) -DYYENABLE_NLS=0 -DYYLTYPE_IS_TRIVIAL=0 -w $< $(OUTPUT)%.o: %.c $(OUTPUT)PERF-CFLAGS $(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) $< From 7ccf4f9058ecff6eec11a271001d08d9024da8c0 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 20 Aug 2012 13:52:05 +0900 Subject: [PATCH 102/282] perf hists: Separate out hist print functions Separate out those functions into ui/stdio/hist.c. This is required for upcoming changes. Signed-off-by: Namhyung Kim Cc: Ingo Molnar Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1345438331-20234-2-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile | 3 +- tools/perf/ui/stdio/hist.c | 648 +++++++++++++++++++++++++++++++++++ tools/perf/util/hist.c | 677 +------------------------------------ tools/perf/util/hist.h | 2 + 4 files changed, 669 insertions(+), 661 deletions(-) create mode 100644 tools/perf/ui/stdio/hist.c diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 77e7ae3ef1b..6bd888d04b6 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -404,11 +404,10 @@ LIB_OBJS += $(OUTPUT)util/target.o LIB_OBJS += $(OUTPUT)util/rblist.o LIB_OBJS += $(OUTPUT)util/intlist.o LIB_OBJS += $(OUTPUT)ui/helpline.o +LIB_OBJS += $(OUTPUT)ui/stdio/hist.o BUILTIN_OBJS += $(OUTPUT)builtin-annotate.o - BUILTIN_OBJS += $(OUTPUT)builtin-bench.o - # Benchmark modules BUILTIN_OBJS += $(OUTPUT)bench/sched-messaging.o BUILTIN_OBJS += $(OUTPUT)bench/sched-pipe.o diff --git a/tools/perf/ui/stdio/hist.c b/tools/perf/ui/stdio/hist.c new file mode 100644 index 00000000000..7881d625e17 --- /dev/null +++ b/tools/perf/ui/stdio/hist.c @@ -0,0 +1,648 @@ +#include +#include + +#include "../../util/util.h" +#include "../../util/hist.h" +#include "../../util/sort.h" + + +static size_t callchain__fprintf_left_margin(FILE *fp, int left_margin) +{ + int i; + int ret = fprintf(fp, " "); + + for (i = 0; i < left_margin; i++) + ret += fprintf(fp, " "); + + return ret; +} + +static size_t ipchain__fprintf_graph_line(FILE *fp, int depth, int depth_mask, + int left_margin) +{ + int i; + size_t ret = callchain__fprintf_left_margin(fp, left_margin); + + for (i = 0; i < depth; i++) + if (depth_mask & (1 << i)) + ret += fprintf(fp, "| "); + else + ret += fprintf(fp, " "); + + ret += fprintf(fp, "\n"); + + return ret; +} + +static size_t ipchain__fprintf_graph(FILE *fp, struct callchain_list *chain, + int depth, int depth_mask, int period, + u64 total_samples, u64 hits, + int left_margin) +{ + int i; + size_t ret = 0; + + ret += callchain__fprintf_left_margin(fp, left_margin); + for (i = 0; i < depth; i++) { + if (depth_mask & (1 << i)) + ret += fprintf(fp, "|"); + else + ret += fprintf(fp, " "); + if (!period && i == depth - 1) { + double percent; + + percent = hits * 100.0 / total_samples; + ret += percent_color_fprintf(fp, "--%2.2f%%-- ", percent); + } else + ret += fprintf(fp, "%s", " "); + } + if (chain->ms.sym) + ret += fprintf(fp, "%s\n", chain->ms.sym->name); + else + ret += fprintf(fp, "0x%0" PRIx64 "\n", chain->ip); + + return ret; +} + +static struct symbol *rem_sq_bracket; +static struct callchain_list rem_hits; + +static void init_rem_hits(void) +{ + rem_sq_bracket = malloc(sizeof(*rem_sq_bracket) + 6); + if (!rem_sq_bracket) { + fprintf(stderr, "Not enough memory to display remaining hits\n"); + return; + } + + strcpy(rem_sq_bracket->name, "[...]"); + rem_hits.ms.sym = rem_sq_bracket; +} + +static size_t __callchain__fprintf_graph(FILE *fp, struct rb_root *root, + u64 total_samples, int depth, + int depth_mask, int left_margin) +{ + struct rb_node *node, *next; + struct callchain_node *child; + struct callchain_list *chain; + int new_depth_mask = depth_mask; + u64 remaining; + size_t ret = 0; + int i; + uint entries_printed = 0; + + remaining = total_samples; + + node = rb_first(root); + while (node) { + u64 new_total; + u64 cumul; + + child = rb_entry(node, struct callchain_node, rb_node); + cumul = callchain_cumul_hits(child); + remaining -= cumul; + + /* + * The depth mask manages the output of pipes that show + * the depth. We don't want to keep the pipes of the current + * level for the last child of this depth. + * Except if we have remaining filtered hits. They will + * supersede the last child + */ + next = rb_next(node); + if (!next && (callchain_param.mode != CHAIN_GRAPH_REL || !remaining)) + new_depth_mask &= ~(1 << (depth - 1)); + + /* + * But we keep the older depth mask for the line separator + * to keep the level link until we reach the last child + */ + ret += ipchain__fprintf_graph_line(fp, depth, depth_mask, + left_margin); + i = 0; + list_for_each_entry(chain, &child->val, list) { + ret += ipchain__fprintf_graph(fp, chain, depth, + new_depth_mask, i++, + total_samples, + cumul, + left_margin); + } + + if (callchain_param.mode == CHAIN_GRAPH_REL) + new_total = child->children_hit; + else + new_total = total_samples; + + ret += __callchain__fprintf_graph(fp, &child->rb_root, new_total, + depth + 1, + new_depth_mask | (1 << depth), + left_margin); + node = next; + if (++entries_printed == callchain_param.print_limit) + break; + } + + if (callchain_param.mode == CHAIN_GRAPH_REL && + remaining && remaining != total_samples) { + + if (!rem_sq_bracket) + return ret; + + new_depth_mask &= ~(1 << (depth - 1)); + ret += ipchain__fprintf_graph(fp, &rem_hits, depth, + new_depth_mask, 0, total_samples, + remaining, left_margin); + } + + return ret; +} + +static size_t callchain__fprintf_graph(FILE *fp, struct rb_root *root, + u64 total_samples, int left_margin) +{ + struct callchain_node *cnode; + struct callchain_list *chain; + u32 entries_printed = 0; + bool printed = false; + struct rb_node *node; + int i = 0; + int ret = 0; + + /* + * If have one single callchain root, don't bother printing + * its percentage (100 % in fractal mode and the same percentage + * than the hist in graph mode). This also avoid one level of column. + */ + node = rb_first(root); + if (node && !rb_next(node)) { + cnode = rb_entry(node, struct callchain_node, rb_node); + list_for_each_entry(chain, &cnode->val, list) { + /* + * If we sort by symbol, the first entry is the same than + * the symbol. No need to print it otherwise it appears as + * displayed twice. + */ + if (!i++ && sort__first_dimension == SORT_SYM) + continue; + if (!printed) { + ret += callchain__fprintf_left_margin(fp, left_margin); + ret += fprintf(fp, "|\n"); + ret += callchain__fprintf_left_margin(fp, left_margin); + ret += fprintf(fp, "---"); + left_margin += 3; + printed = true; + } else + ret += callchain__fprintf_left_margin(fp, left_margin); + + if (chain->ms.sym) + ret += fprintf(fp, " %s\n", chain->ms.sym->name); + else + ret += fprintf(fp, " %p\n", (void *)(long)chain->ip); + + if (++entries_printed == callchain_param.print_limit) + break; + } + root = &cnode->rb_root; + } + + ret += __callchain__fprintf_graph(fp, root, total_samples, + 1, 1, left_margin); + ret += fprintf(fp, "\n"); + + return ret; +} + +static size_t __callchain__fprintf_flat(FILE *fp, + struct callchain_node *self, + u64 total_samples) +{ + struct callchain_list *chain; + size_t ret = 0; + + if (!self) + return 0; + + ret += __callchain__fprintf_flat(fp, self->parent, total_samples); + + + list_for_each_entry(chain, &self->val, list) { + if (chain->ip >= PERF_CONTEXT_MAX) + continue; + if (chain->ms.sym) + ret += fprintf(fp, " %s\n", chain->ms.sym->name); + else + ret += fprintf(fp, " %p\n", + (void *)(long)chain->ip); + } + + return ret; +} + +static size_t callchain__fprintf_flat(FILE *fp, struct rb_root *self, + u64 total_samples) +{ + size_t ret = 0; + u32 entries_printed = 0; + struct rb_node *rb_node; + struct callchain_node *chain; + + rb_node = rb_first(self); + while (rb_node) { + double percent; + + chain = rb_entry(rb_node, struct callchain_node, rb_node); + percent = chain->hit * 100.0 / total_samples; + + ret = percent_color_fprintf(fp, " %6.2f%%\n", percent); + ret += __callchain__fprintf_flat(fp, chain, total_samples); + ret += fprintf(fp, "\n"); + if (++entries_printed == callchain_param.print_limit) + break; + + rb_node = rb_next(rb_node); + } + + return ret; +} + +static size_t hist_entry_callchain__fprintf(struct hist_entry *he, + u64 total_samples, int left_margin, + FILE *fp) +{ + switch (callchain_param.mode) { + case CHAIN_GRAPH_REL: + return callchain__fprintf_graph(fp, &he->sorted_chain, he->period, + left_margin); + break; + case CHAIN_GRAPH_ABS: + return callchain__fprintf_graph(fp, &he->sorted_chain, total_samples, + left_margin); + break; + case CHAIN_FLAT: + return callchain__fprintf_flat(fp, &he->sorted_chain, total_samples); + break; + case CHAIN_NONE: + break; + default: + pr_err("Bad callchain mode\n"); + } + + return 0; +} + +static int hist_entry__pcnt_snprintf(struct hist_entry *he, char *s, + size_t size, struct hists *pair_hists, + bool show_displacement, long displacement, + bool color, u64 total_period) +{ + u64 period, total, period_sys, period_us, period_guest_sys, period_guest_us; + u64 nr_events; + const char *sep = symbol_conf.field_sep; + int ret; + + if (symbol_conf.exclude_other && !he->parent) + return 0; + + if (pair_hists) { + period = he->pair ? he->pair->period : 0; + nr_events = he->pair ? he->pair->nr_events : 0; + total = pair_hists->stats.total_period; + period_sys = he->pair ? he->pair->period_sys : 0; + period_us = he->pair ? he->pair->period_us : 0; + period_guest_sys = he->pair ? he->pair->period_guest_sys : 0; + period_guest_us = he->pair ? he->pair->period_guest_us : 0; + } else { + period = he->period; + nr_events = he->nr_events; + total = total_period; + period_sys = he->period_sys; + period_us = he->period_us; + period_guest_sys = he->period_guest_sys; + period_guest_us = he->period_guest_us; + } + + if (total) { + if (color) + ret = percent_color_snprintf(s, size, + sep ? "%.2f" : " %6.2f%%", + (period * 100.0) / total); + else + ret = scnprintf(s, size, sep ? "%.2f" : " %6.2f%%", + (period * 100.0) / total); + if (symbol_conf.show_cpu_utilization) { + ret += percent_color_snprintf(s + ret, size - ret, + sep ? "%.2f" : " %6.2f%%", + (period_sys * 100.0) / total); + ret += percent_color_snprintf(s + ret, size - ret, + sep ? "%.2f" : " %6.2f%%", + (period_us * 100.0) / total); + if (perf_guest) { + ret += percent_color_snprintf(s + ret, + size - ret, + sep ? "%.2f" : " %6.2f%%", + (period_guest_sys * 100.0) / + total); + ret += percent_color_snprintf(s + ret, + size - ret, + sep ? "%.2f" : " %6.2f%%", + (period_guest_us * 100.0) / + total); + } + } + } else + ret = scnprintf(s, size, sep ? "%" PRIu64 : "%12" PRIu64 " ", period); + + if (symbol_conf.show_nr_samples) { + if (sep) + ret += scnprintf(s + ret, size - ret, "%c%" PRIu64, *sep, nr_events); + else + ret += scnprintf(s + ret, size - ret, "%11" PRIu64, nr_events); + } + + if (symbol_conf.show_total_period) { + if (sep) + ret += scnprintf(s + ret, size - ret, "%c%" PRIu64, *sep, period); + else + ret += scnprintf(s + ret, size - ret, " %12" PRIu64, period); + } + + if (pair_hists) { + char bf[32]; + double old_percent = 0, new_percent = 0, diff; + + if (total > 0) + old_percent = (period * 100.0) / total; + if (total_period > 0) + new_percent = (he->period * 100.0) / total_period; + + diff = new_percent - old_percent; + + if (fabs(diff) >= 0.01) + scnprintf(bf, sizeof(bf), "%+4.2F%%", diff); + else + scnprintf(bf, sizeof(bf), " "); + + if (sep) + ret += scnprintf(s + ret, size - ret, "%c%s", *sep, bf); + else + ret += scnprintf(s + ret, size - ret, "%11.11s", bf); + + if (show_displacement) { + if (displacement) + scnprintf(bf, sizeof(bf), "%+4ld", displacement); + else + scnprintf(bf, sizeof(bf), " "); + + if (sep) + ret += scnprintf(s + ret, size - ret, "%c%s", *sep, bf); + else + ret += scnprintf(s + ret, size - ret, "%6.6s", bf); + } + } + + return ret; +} + +int hist_entry__snprintf(struct hist_entry *he, char *s, size_t size, + struct hists *hists) +{ + const char *sep = symbol_conf.field_sep; + struct sort_entry *se; + int ret = 0; + + list_for_each_entry(se, &hist_entry__sort_list, list) { + if (se->elide) + continue; + + ret += scnprintf(s + ret, size - ret, "%s", sep ?: " "); + ret += se->se_snprintf(he, s + ret, size - ret, + hists__col_len(hists, se->se_width_idx)); + } + + return ret; +} + +static int hist_entry__fprintf(struct hist_entry *he, size_t size, + struct hists *hists, struct hists *pair_hists, + bool show_displacement, long displacement, + u64 total_period, FILE *fp) +{ + char bf[512]; + int ret; + + if (size == 0 || size > sizeof(bf)) + size = sizeof(bf); + + ret = hist_entry__pcnt_snprintf(he, bf, size, pair_hists, + show_displacement, displacement, + true, total_period); + hist_entry__snprintf(he, bf + ret, size - ret, hists); + return fprintf(fp, "%s\n", bf); +} + +static size_t hist_entry__fprintf_callchain(struct hist_entry *he, + struct hists *hists, + u64 total_period, FILE *fp) +{ + int left_margin = 0; + + if (sort__first_dimension == SORT_COMM) { + struct sort_entry *se = list_first_entry(&hist_entry__sort_list, + typeof(*se), list); + left_margin = hists__col_len(hists, se->se_width_idx); + left_margin -= thread__comm_len(he->thread); + } + + return hist_entry_callchain__fprintf(he, total_period, left_margin, fp); +} + +size_t hists__fprintf(struct hists *hists, struct hists *pair, + bool show_displacement, bool show_header, int max_rows, + int max_cols, FILE *fp) +{ + struct sort_entry *se; + struct rb_node *nd; + size_t ret = 0; + u64 total_period; + unsigned long position = 1; + long displacement = 0; + unsigned int width; + const char *sep = symbol_conf.field_sep; + const char *col_width = symbol_conf.col_width_list_str; + int nr_rows = 0; + + init_rem_hits(); + + if (!show_header) + goto print_entries; + + fprintf(fp, "# %s", pair ? "Baseline" : "Overhead"); + + if (symbol_conf.show_cpu_utilization) { + if (sep) { + ret += fprintf(fp, "%csys", *sep); + ret += fprintf(fp, "%cus", *sep); + if (perf_guest) { + ret += fprintf(fp, "%cguest sys", *sep); + ret += fprintf(fp, "%cguest us", *sep); + } + } else { + ret += fprintf(fp, " sys "); + ret += fprintf(fp, " us "); + if (perf_guest) { + ret += fprintf(fp, " guest sys "); + ret += fprintf(fp, " guest us "); + } + } + } + + if (symbol_conf.show_nr_samples) { + if (sep) + fprintf(fp, "%cSamples", *sep); + else + fputs(" Samples ", fp); + } + + if (symbol_conf.show_total_period) { + if (sep) + ret += fprintf(fp, "%cPeriod", *sep); + else + ret += fprintf(fp, " Period "); + } + + if (pair) { + if (sep) + ret += fprintf(fp, "%cDelta", *sep); + else + ret += fprintf(fp, " Delta "); + + if (show_displacement) { + if (sep) + ret += fprintf(fp, "%cDisplacement", *sep); + else + ret += fprintf(fp, " Displ"); + } + } + + list_for_each_entry(se, &hist_entry__sort_list, list) { + if (se->elide) + continue; + if (sep) { + fprintf(fp, "%c%s", *sep, se->se_header); + continue; + } + width = strlen(se->se_header); + if (symbol_conf.col_width_list_str) { + if (col_width) { + hists__set_col_len(hists, se->se_width_idx, + atoi(col_width)); + col_width = strchr(col_width, ','); + if (col_width) + ++col_width; + } + } + if (!hists__new_col_len(hists, se->se_width_idx, width)) + width = hists__col_len(hists, se->se_width_idx); + fprintf(fp, " %*s", width, se->se_header); + } + + fprintf(fp, "\n"); + if (max_rows && ++nr_rows >= max_rows) + goto out; + + if (sep) + goto print_entries; + + fprintf(fp, "# ........"); + if (symbol_conf.show_cpu_utilization) + fprintf(fp, " ....... ......."); + if (symbol_conf.show_nr_samples) + fprintf(fp, " .........."); + if (symbol_conf.show_total_period) + fprintf(fp, " ............"); + if (pair) { + fprintf(fp, " .........."); + if (show_displacement) + fprintf(fp, " ....."); + } + list_for_each_entry(se, &hist_entry__sort_list, list) { + unsigned int i; + + if (se->elide) + continue; + + fprintf(fp, " "); + width = hists__col_len(hists, se->se_width_idx); + if (width == 0) + width = strlen(se->se_header); + for (i = 0; i < width; i++) + fprintf(fp, "."); + } + + fprintf(fp, "\n"); + if (max_rows && ++nr_rows >= max_rows) + goto out; + + fprintf(fp, "#\n"); + if (max_rows && ++nr_rows >= max_rows) + goto out; + +print_entries: + total_period = hists->stats.total_period; + + for (nd = rb_first(&hists->entries); nd; nd = rb_next(nd)) { + struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); + + if (h->filtered) + continue; + + if (show_displacement) { + if (h->pair != NULL) + displacement = ((long)h->pair->position - + (long)position); + else + displacement = 0; + ++position; + } + ret += hist_entry__fprintf(h, max_cols, hists, pair, show_displacement, + displacement, total_period, fp); + + if (symbol_conf.use_callchain) + ret += hist_entry__fprintf_callchain(h, hists, total_period, fp); + if (max_rows && ++nr_rows >= max_rows) + goto out; + + if (h->ms.map == NULL && verbose > 1) { + __map_groups__fprintf_maps(&h->thread->mg, + MAP__FUNCTION, verbose, fp); + fprintf(fp, "%.10s end\n", graph_dotted_line); + } + } +out: + free(rem_sq_bracket); + + return ret; +} + +size_t hists__fprintf_nr_events(struct hists *hists, FILE *fp) +{ + int i; + size_t ret = 0; + + for (i = 0; i < PERF_RECORD_HEADER_MAX; ++i) { + const char *name; + + if (hists->stats.nr_events[i] == 0) + continue; + + name = perf_event__name(i); + if (!strcmp(name, "UNKNOWN")) + continue; + + ret += fprintf(fp, "%16s events: %10d\n", name, + hists->stats.nr_events[i]); + } + + return ret; +} diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index f247ef2789a..b1817f15bb8 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -45,7 +45,7 @@ bool hists__new_col_len(struct hists *hists, enum hist_column col, u16 len) return false; } -static void hists__reset_col_len(struct hists *hists) +void hists__reset_col_len(struct hists *hists) { enum hist_column col; @@ -63,7 +63,7 @@ static void hists__set_unres_dso_col_len(struct hists *hists, int dso) hists__set_col_len(hists, dso, unresolved_col_width); } -static void hists__calc_col_len(struct hists *hists, struct hist_entry *h) +void hists__calc_col_len(struct hists *hists, struct hist_entry *h) { const unsigned int unresolved_col_width = BITS_PER_LONG / 4; u16 len; @@ -114,6 +114,22 @@ static void hists__calc_col_len(struct hists *hists, struct hist_entry *h) } } +void hists__output_recalc_col_len(struct hists *hists, int max_rows) +{ + struct rb_node *next = rb_first(&hists->entries); + struct hist_entry *n; + int row = 0; + + hists__reset_col_len(hists); + + while (next && row++ < max_rows) { + n = rb_entry(next, struct hist_entry, rb_node); + if (!n->filtered) + hists__calc_col_len(hists, n); + next = rb_next(&n->rb_node); + } +} + static void hist_entry__add_cpumode_period(struct hist_entry *he, unsigned int cpumode, u64 period) { @@ -547,641 +563,6 @@ void hists__output_resort_threaded(struct hists *hists) return __hists__output_resort(hists, true); } -static size_t callchain__fprintf_left_margin(FILE *fp, int left_margin) -{ - int i; - int ret = fprintf(fp, " "); - - for (i = 0; i < left_margin; i++) - ret += fprintf(fp, " "); - - return ret; -} - -static size_t ipchain__fprintf_graph_line(FILE *fp, int depth, int depth_mask, - int left_margin) -{ - int i; - size_t ret = callchain__fprintf_left_margin(fp, left_margin); - - for (i = 0; i < depth; i++) - if (depth_mask & (1 << i)) - ret += fprintf(fp, "| "); - else - ret += fprintf(fp, " "); - - ret += fprintf(fp, "\n"); - - return ret; -} - -static size_t ipchain__fprintf_graph(FILE *fp, struct callchain_list *chain, - int depth, int depth_mask, int period, - u64 total_samples, u64 hits, - int left_margin) -{ - int i; - size_t ret = 0; - - ret += callchain__fprintf_left_margin(fp, left_margin); - for (i = 0; i < depth; i++) { - if (depth_mask & (1 << i)) - ret += fprintf(fp, "|"); - else - ret += fprintf(fp, " "); - if (!period && i == depth - 1) { - double percent; - - percent = hits * 100.0 / total_samples; - ret += percent_color_fprintf(fp, "--%2.2f%%-- ", percent); - } else - ret += fprintf(fp, "%s", " "); - } - if (chain->ms.sym) - ret += fprintf(fp, "%s\n", chain->ms.sym->name); - else - ret += fprintf(fp, "0x%0" PRIx64 "\n", chain->ip); - - return ret; -} - -static struct symbol *rem_sq_bracket; -static struct callchain_list rem_hits; - -static void init_rem_hits(void) -{ - rem_sq_bracket = malloc(sizeof(*rem_sq_bracket) + 6); - if (!rem_sq_bracket) { - fprintf(stderr, "Not enough memory to display remaining hits\n"); - return; - } - - strcpy(rem_sq_bracket->name, "[...]"); - rem_hits.ms.sym = rem_sq_bracket; -} - -static size_t __callchain__fprintf_graph(FILE *fp, struct rb_root *root, - u64 total_samples, int depth, - int depth_mask, int left_margin) -{ - struct rb_node *node, *next; - struct callchain_node *child; - struct callchain_list *chain; - int new_depth_mask = depth_mask; - u64 remaining; - size_t ret = 0; - int i; - uint entries_printed = 0; - - remaining = total_samples; - - node = rb_first(root); - while (node) { - u64 new_total; - u64 cumul; - - child = rb_entry(node, struct callchain_node, rb_node); - cumul = callchain_cumul_hits(child); - remaining -= cumul; - - /* - * The depth mask manages the output of pipes that show - * the depth. We don't want to keep the pipes of the current - * level for the last child of this depth. - * Except if we have remaining filtered hits. They will - * supersede the last child - */ - next = rb_next(node); - if (!next && (callchain_param.mode != CHAIN_GRAPH_REL || !remaining)) - new_depth_mask &= ~(1 << (depth - 1)); - - /* - * But we keep the older depth mask for the line separator - * to keep the level link until we reach the last child - */ - ret += ipchain__fprintf_graph_line(fp, depth, depth_mask, - left_margin); - i = 0; - list_for_each_entry(chain, &child->val, list) { - ret += ipchain__fprintf_graph(fp, chain, depth, - new_depth_mask, i++, - total_samples, - cumul, - left_margin); - } - - if (callchain_param.mode == CHAIN_GRAPH_REL) - new_total = child->children_hit; - else - new_total = total_samples; - - ret += __callchain__fprintf_graph(fp, &child->rb_root, new_total, - depth + 1, - new_depth_mask | (1 << depth), - left_margin); - node = next; - if (++entries_printed == callchain_param.print_limit) - break; - } - - if (callchain_param.mode == CHAIN_GRAPH_REL && - remaining && remaining != total_samples) { - - if (!rem_sq_bracket) - return ret; - - new_depth_mask &= ~(1 << (depth - 1)); - ret += ipchain__fprintf_graph(fp, &rem_hits, depth, - new_depth_mask, 0, total_samples, - remaining, left_margin); - } - - return ret; -} - -static size_t callchain__fprintf_graph(FILE *fp, struct rb_root *root, - u64 total_samples, int left_margin) -{ - struct callchain_node *cnode; - struct callchain_list *chain; - u32 entries_printed = 0; - bool printed = false; - struct rb_node *node; - int i = 0; - int ret = 0; - - /* - * If have one single callchain root, don't bother printing - * its percentage (100 % in fractal mode and the same percentage - * than the hist in graph mode). This also avoid one level of column. - */ - node = rb_first(root); - if (node && !rb_next(node)) { - cnode = rb_entry(node, struct callchain_node, rb_node); - list_for_each_entry(chain, &cnode->val, list) { - /* - * If we sort by symbol, the first entry is the same than - * the symbol. No need to print it otherwise it appears as - * displayed twice. - */ - if (!i++ && sort__first_dimension == SORT_SYM) - continue; - if (!printed) { - ret += callchain__fprintf_left_margin(fp, left_margin); - ret += fprintf(fp, "|\n"); - ret += callchain__fprintf_left_margin(fp, left_margin); - ret += fprintf(fp, "---"); - left_margin += 3; - printed = true; - } else - ret += callchain__fprintf_left_margin(fp, left_margin); - - if (chain->ms.sym) - ret += fprintf(fp, " %s\n", chain->ms.sym->name); - else - ret += fprintf(fp, " %p\n", (void *)(long)chain->ip); - - if (++entries_printed == callchain_param.print_limit) - break; - } - root = &cnode->rb_root; - } - - ret += __callchain__fprintf_graph(fp, root, total_samples, - 1, 1, left_margin); - ret += fprintf(fp, "\n"); - - return ret; -} - -static size_t __callchain__fprintf_flat(FILE *fp, - struct callchain_node *self, - u64 total_samples) -{ - struct callchain_list *chain; - size_t ret = 0; - - if (!self) - return 0; - - ret += __callchain__fprintf_flat(fp, self->parent, total_samples); - - - list_for_each_entry(chain, &self->val, list) { - if (chain->ip >= PERF_CONTEXT_MAX) - continue; - if (chain->ms.sym) - ret += fprintf(fp, " %s\n", chain->ms.sym->name); - else - ret += fprintf(fp, " %p\n", - (void *)(long)chain->ip); - } - - return ret; -} - -static size_t callchain__fprintf_flat(FILE *fp, struct rb_root *self, - u64 total_samples) -{ - size_t ret = 0; - u32 entries_printed = 0; - struct rb_node *rb_node; - struct callchain_node *chain; - - rb_node = rb_first(self); - while (rb_node) { - double percent; - - chain = rb_entry(rb_node, struct callchain_node, rb_node); - percent = chain->hit * 100.0 / total_samples; - - ret = percent_color_fprintf(fp, " %6.2f%%\n", percent); - ret += __callchain__fprintf_flat(fp, chain, total_samples); - ret += fprintf(fp, "\n"); - if (++entries_printed == callchain_param.print_limit) - break; - - rb_node = rb_next(rb_node); - } - - return ret; -} - -static size_t hist_entry_callchain__fprintf(struct hist_entry *he, - u64 total_samples, int left_margin, - FILE *fp) -{ - switch (callchain_param.mode) { - case CHAIN_GRAPH_REL: - return callchain__fprintf_graph(fp, &he->sorted_chain, he->period, - left_margin); - break; - case CHAIN_GRAPH_ABS: - return callchain__fprintf_graph(fp, &he->sorted_chain, total_samples, - left_margin); - break; - case CHAIN_FLAT: - return callchain__fprintf_flat(fp, &he->sorted_chain, total_samples); - break; - case CHAIN_NONE: - break; - default: - pr_err("Bad callchain mode\n"); - } - - return 0; -} - -void hists__output_recalc_col_len(struct hists *hists, int max_rows) -{ - struct rb_node *next = rb_first(&hists->entries); - struct hist_entry *n; - int row = 0; - - hists__reset_col_len(hists); - - while (next && row++ < max_rows) { - n = rb_entry(next, struct hist_entry, rb_node); - if (!n->filtered) - hists__calc_col_len(hists, n); - next = rb_next(&n->rb_node); - } -} - -static int hist_entry__pcnt_snprintf(struct hist_entry *he, char *s, - size_t size, struct hists *pair_hists, - bool show_displacement, long displacement, - bool color, u64 total_period) -{ - u64 period, total, period_sys, period_us, period_guest_sys, period_guest_us; - u64 nr_events; - const char *sep = symbol_conf.field_sep; - int ret; - - if (symbol_conf.exclude_other && !he->parent) - return 0; - - if (pair_hists) { - period = he->pair ? he->pair->period : 0; - nr_events = he->pair ? he->pair->nr_events : 0; - total = pair_hists->stats.total_period; - period_sys = he->pair ? he->pair->period_sys : 0; - period_us = he->pair ? he->pair->period_us : 0; - period_guest_sys = he->pair ? he->pair->period_guest_sys : 0; - period_guest_us = he->pair ? he->pair->period_guest_us : 0; - } else { - period = he->period; - nr_events = he->nr_events; - total = total_period; - period_sys = he->period_sys; - period_us = he->period_us; - period_guest_sys = he->period_guest_sys; - period_guest_us = he->period_guest_us; - } - - if (total) { - if (color) - ret = percent_color_snprintf(s, size, - sep ? "%.2f" : " %6.2f%%", - (period * 100.0) / total); - else - ret = scnprintf(s, size, sep ? "%.2f" : " %6.2f%%", - (period * 100.0) / total); - if (symbol_conf.show_cpu_utilization) { - ret += percent_color_snprintf(s + ret, size - ret, - sep ? "%.2f" : " %6.2f%%", - (period_sys * 100.0) / total); - ret += percent_color_snprintf(s + ret, size - ret, - sep ? "%.2f" : " %6.2f%%", - (period_us * 100.0) / total); - if (perf_guest) { - ret += percent_color_snprintf(s + ret, - size - ret, - sep ? "%.2f" : " %6.2f%%", - (period_guest_sys * 100.0) / - total); - ret += percent_color_snprintf(s + ret, - size - ret, - sep ? "%.2f" : " %6.2f%%", - (period_guest_us * 100.0) / - total); - } - } - } else - ret = scnprintf(s, size, sep ? "%" PRIu64 : "%12" PRIu64 " ", period); - - if (symbol_conf.show_nr_samples) { - if (sep) - ret += scnprintf(s + ret, size - ret, "%c%" PRIu64, *sep, nr_events); - else - ret += scnprintf(s + ret, size - ret, "%11" PRIu64, nr_events); - } - - if (symbol_conf.show_total_period) { - if (sep) - ret += scnprintf(s + ret, size - ret, "%c%" PRIu64, *sep, period); - else - ret += scnprintf(s + ret, size - ret, " %12" PRIu64, period); - } - - if (pair_hists) { - char bf[32]; - double old_percent = 0, new_percent = 0, diff; - - if (total > 0) - old_percent = (period * 100.0) / total; - if (total_period > 0) - new_percent = (he->period * 100.0) / total_period; - - diff = new_percent - old_percent; - - if (fabs(diff) >= 0.01) - scnprintf(bf, sizeof(bf), "%+4.2F%%", diff); - else - scnprintf(bf, sizeof(bf), " "); - - if (sep) - ret += scnprintf(s + ret, size - ret, "%c%s", *sep, bf); - else - ret += scnprintf(s + ret, size - ret, "%11.11s", bf); - - if (show_displacement) { - if (displacement) - scnprintf(bf, sizeof(bf), "%+4ld", displacement); - else - scnprintf(bf, sizeof(bf), " "); - - if (sep) - ret += scnprintf(s + ret, size - ret, "%c%s", *sep, bf); - else - ret += scnprintf(s + ret, size - ret, "%6.6s", bf); - } - } - - return ret; -} - -int hist_entry__snprintf(struct hist_entry *he, char *s, size_t size, - struct hists *hists) -{ - const char *sep = symbol_conf.field_sep; - struct sort_entry *se; - int ret = 0; - - list_for_each_entry(se, &hist_entry__sort_list, list) { - if (se->elide) - continue; - - ret += scnprintf(s + ret, size - ret, "%s", sep ?: " "); - ret += se->se_snprintf(he, s + ret, size - ret, - hists__col_len(hists, se->se_width_idx)); - } - - return ret; -} - -static int hist_entry__fprintf(struct hist_entry *he, size_t size, - struct hists *hists, struct hists *pair_hists, - bool show_displacement, long displacement, - u64 total_period, FILE *fp) -{ - char bf[512]; - int ret; - - if (size == 0 || size > sizeof(bf)) - size = sizeof(bf); - - ret = hist_entry__pcnt_snprintf(he, bf, size, pair_hists, - show_displacement, displacement, - true, total_period); - hist_entry__snprintf(he, bf + ret, size - ret, hists); - return fprintf(fp, "%s\n", bf); -} - -static size_t hist_entry__fprintf_callchain(struct hist_entry *he, - struct hists *hists, - u64 total_period, FILE *fp) -{ - int left_margin = 0; - - if (sort__first_dimension == SORT_COMM) { - struct sort_entry *se = list_first_entry(&hist_entry__sort_list, - typeof(*se), list); - left_margin = hists__col_len(hists, se->se_width_idx); - left_margin -= thread__comm_len(he->thread); - } - - return hist_entry_callchain__fprintf(he, total_period, left_margin, fp); -} - -size_t hists__fprintf(struct hists *hists, struct hists *pair, - bool show_displacement, bool show_header, int max_rows, - int max_cols, FILE *fp) -{ - struct sort_entry *se; - struct rb_node *nd; - size_t ret = 0; - u64 total_period; - unsigned long position = 1; - long displacement = 0; - unsigned int width; - const char *sep = symbol_conf.field_sep; - const char *col_width = symbol_conf.col_width_list_str; - int nr_rows = 0; - - init_rem_hits(); - - if (!show_header) - goto print_entries; - - fprintf(fp, "# %s", pair ? "Baseline" : "Overhead"); - - if (symbol_conf.show_cpu_utilization) { - if (sep) { - ret += fprintf(fp, "%csys", *sep); - ret += fprintf(fp, "%cus", *sep); - if (perf_guest) { - ret += fprintf(fp, "%cguest sys", *sep); - ret += fprintf(fp, "%cguest us", *sep); - } - } else { - ret += fprintf(fp, " sys "); - ret += fprintf(fp, " us "); - if (perf_guest) { - ret += fprintf(fp, " guest sys "); - ret += fprintf(fp, " guest us "); - } - } - } - - if (symbol_conf.show_nr_samples) { - if (sep) - fprintf(fp, "%cSamples", *sep); - else - fputs(" Samples ", fp); - } - - if (symbol_conf.show_total_period) { - if (sep) - ret += fprintf(fp, "%cPeriod", *sep); - else - ret += fprintf(fp, " Period "); - } - - if (pair) { - if (sep) - ret += fprintf(fp, "%cDelta", *sep); - else - ret += fprintf(fp, " Delta "); - - if (show_displacement) { - if (sep) - ret += fprintf(fp, "%cDisplacement", *sep); - else - ret += fprintf(fp, " Displ"); - } - } - - list_for_each_entry(se, &hist_entry__sort_list, list) { - if (se->elide) - continue; - if (sep) { - fprintf(fp, "%c%s", *sep, se->se_header); - continue; - } - width = strlen(se->se_header); - if (symbol_conf.col_width_list_str) { - if (col_width) { - hists__set_col_len(hists, se->se_width_idx, - atoi(col_width)); - col_width = strchr(col_width, ','); - if (col_width) - ++col_width; - } - } - if (!hists__new_col_len(hists, se->se_width_idx, width)) - width = hists__col_len(hists, se->se_width_idx); - fprintf(fp, " %*s", width, se->se_header); - } - - fprintf(fp, "\n"); - if (max_rows && ++nr_rows >= max_rows) - goto out; - - if (sep) - goto print_entries; - - fprintf(fp, "# ........"); - if (symbol_conf.show_cpu_utilization) - fprintf(fp, " ....... ......."); - if (symbol_conf.show_nr_samples) - fprintf(fp, " .........."); - if (symbol_conf.show_total_period) - fprintf(fp, " ............"); - if (pair) { - fprintf(fp, " .........."); - if (show_displacement) - fprintf(fp, " ....."); - } - list_for_each_entry(se, &hist_entry__sort_list, list) { - unsigned int i; - - if (se->elide) - continue; - - fprintf(fp, " "); - width = hists__col_len(hists, se->se_width_idx); - if (width == 0) - width = strlen(se->se_header); - for (i = 0; i < width; i++) - fprintf(fp, "."); - } - - fprintf(fp, "\n"); - if (max_rows && ++nr_rows >= max_rows) - goto out; - - fprintf(fp, "#\n"); - if (max_rows && ++nr_rows >= max_rows) - goto out; - -print_entries: - total_period = hists->stats.total_period; - - for (nd = rb_first(&hists->entries); nd; nd = rb_next(nd)) { - struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); - - if (h->filtered) - continue; - - if (show_displacement) { - if (h->pair != NULL) - displacement = ((long)h->pair->position - - (long)position); - else - displacement = 0; - ++position; - } - ret += hist_entry__fprintf(h, max_cols, hists, pair, show_displacement, - displacement, total_period, fp); - - if (symbol_conf.use_callchain) - ret += hist_entry__fprintf_callchain(h, hists, total_period, fp); - if (max_rows && ++nr_rows >= max_rows) - goto out; - - if (h->ms.map == NULL && verbose > 1) { - __map_groups__fprintf_maps(&h->thread->mg, - MAP__FUNCTION, verbose, fp); - fprintf(fp, "%.10s end\n", graph_dotted_line); - } - } -out: - free(rem_sq_bracket); - - return ret; -} - /* * See hists__fprintf to match the column widths */ @@ -1342,25 +723,3 @@ void hists__inc_nr_events(struct hists *hists, u32 type) ++hists->stats.nr_events[0]; ++hists->stats.nr_events[type]; } - -size_t hists__fprintf_nr_events(struct hists *hists, FILE *fp) -{ - int i; - size_t ret = 0; - - for (i = 0; i < PERF_RECORD_HEADER_MAX; ++i) { - const char *name; - - if (hists->stats.nr_events[i] == 0) - continue; - - name = perf_event__name(i); - if (!strcmp(name, "UNKNOWN")) - continue; - - ret += fprintf(fp, "%16s events: %10d\n", name, - hists->stats.nr_events[i]); - } - - return ret; -} diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 0b096c27a41..69fab7d9abc 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -112,6 +112,8 @@ void hists__filter_by_symbol(struct hists *hists); u16 hists__col_len(struct hists *self, enum hist_column col); void hists__set_col_len(struct hists *self, enum hist_column col, u16 len); bool hists__new_col_len(struct hists *self, enum hist_column col, u16 len); +void hists__reset_col_len(struct hists *hists); +void hists__calc_col_len(struct hists *hists, struct hist_entry *he); struct perf_evlist; From 000078bc3ee69efb1124b8478c7527389a826074 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 20 Aug 2012 13:52:06 +0900 Subject: [PATCH 103/282] perf hists: Rename and move some functions Rename functions for consistency and move callchain print function into hist_entry__fprintf(). Signed-off-by: Namhyung Kim Cc: Ingo Molnar Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1345438331-20234-3-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/hists.c | 4 +-- tools/perf/ui/stdio/hist.c | 53 +++++++++++++++++++--------------- tools/perf/util/hist.h | 4 +-- 3 files changed, 33 insertions(+), 28 deletions(-) diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index b8094692c15..81bd8c2af73 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -586,7 +586,7 @@ static int hist_browser__show_entry(struct hist_browser *browser, } if (row_offset == 0) { - hist_entry__snprintf(entry, s, sizeof(s), browser->hists); + hist_entry__sort_snprintf(entry, s, sizeof(s), browser->hists); percent = (entry->period * 100.0) / browser->hists->stats.total_period; ui_browser__set_percent_color(&browser->b, percent, current_entry); @@ -931,7 +931,7 @@ static int hist_browser__fprintf_entry(struct hist_browser *browser, if (symbol_conf.use_callchain) folded_sign = hist_entry__folded(he); - hist_entry__snprintf(he, s, sizeof(s), browser->hists); + hist_entry__sort_snprintf(he, s, sizeof(s), browser->hists); percent = (he->period * 100.0) / browser->hists->stats.total_period; if (symbol_conf.use_callchain) diff --git a/tools/perf/ui/stdio/hist.c b/tools/perf/ui/stdio/hist.c index 7881d625e17..9bf7e9e5a72 100644 --- a/tools/perf/ui/stdio/hist.c +++ b/tools/perf/ui/stdio/hist.c @@ -291,7 +291,7 @@ static size_t hist_entry_callchain__fprintf(struct hist_entry *he, return 0; } -static int hist_entry__pcnt_snprintf(struct hist_entry *he, char *s, +static int hist_entry__period_snprintf(struct hist_entry *he, char *s, size_t size, struct hists *pair_hists, bool show_displacement, long displacement, bool color, u64 total_period) @@ -404,8 +404,8 @@ static int hist_entry__pcnt_snprintf(struct hist_entry *he, char *s, return ret; } -int hist_entry__snprintf(struct hist_entry *he, char *s, size_t size, - struct hists *hists) +int hist_entry__sort_snprintf(struct hist_entry *he, char *s, size_t size, + struct hists *hists) { const char *sep = symbol_conf.field_sep; struct sort_entry *se; @@ -423,25 +423,7 @@ int hist_entry__snprintf(struct hist_entry *he, char *s, size_t size, return ret; } -static int hist_entry__fprintf(struct hist_entry *he, size_t size, - struct hists *hists, struct hists *pair_hists, - bool show_displacement, long displacement, - u64 total_period, FILE *fp) -{ - char bf[512]; - int ret; - - if (size == 0 || size > sizeof(bf)) - size = sizeof(bf); - - ret = hist_entry__pcnt_snprintf(he, bf, size, pair_hists, - show_displacement, displacement, - true, total_period); - hist_entry__snprintf(he, bf + ret, size - ret, hists); - return fprintf(fp, "%s\n", bf); -} - -static size_t hist_entry__fprintf_callchain(struct hist_entry *he, +static size_t hist_entry__callchain_fprintf(struct hist_entry *he, struct hists *hists, u64 total_period, FILE *fp) { @@ -457,6 +439,31 @@ static size_t hist_entry__fprintf_callchain(struct hist_entry *he, return hist_entry_callchain__fprintf(he, total_period, left_margin, fp); } +static int hist_entry__fprintf(struct hist_entry *he, size_t size, + struct hists *hists, struct hists *pair_hists, + bool show_displacement, long displacement, + u64 total_period, FILE *fp) +{ + char bf[512]; + int ret; + + if (size == 0 || size > sizeof(bf)) + size = sizeof(bf); + + ret = hist_entry__period_snprintf(he, bf, size, pair_hists, + show_displacement, displacement, + true, total_period); + hist_entry__sort_snprintf(he, bf + ret, size - ret, hists); + + ret = fprintf(fp, "%s\n", bf); + + if (symbol_conf.use_callchain) + ret += hist_entry__callchain_fprintf(he, hists, + total_period, fp); + + return ret; +} + size_t hists__fprintf(struct hists *hists, struct hists *pair, bool show_displacement, bool show_header, int max_rows, int max_cols, FILE *fp) @@ -608,8 +615,6 @@ print_entries: ret += hist_entry__fprintf(h, max_cols, hists, pair, show_displacement, displacement, total_period, fp); - if (symbol_conf.use_callchain) - ret += hist_entry__fprintf_callchain(h, hists, total_period, fp); if (max_rows && ++nr_rows >= max_rows) goto out; diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 69fab7d9abc..2e650ffb7d2 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -75,8 +75,8 @@ struct hist_entry *__hists__add_entry(struct hists *self, struct symbol *parent, u64 period); int64_t hist_entry__cmp(struct hist_entry *left, struct hist_entry *right); int64_t hist_entry__collapse(struct hist_entry *left, struct hist_entry *right); -int hist_entry__snprintf(struct hist_entry *self, char *bf, size_t size, - struct hists *hists); +int hist_entry__sort_snprintf(struct hist_entry *self, char *bf, size_t size, + struct hists *hists); void hist_entry__free(struct hist_entry *); struct hist_entry *__hists__add_branch_entry(struct hists *self, From d45a3e00687bb52a20d9256d3d1068eea271013f Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 22 Aug 2012 10:38:12 +0200 Subject: [PATCH 104/282] perf tools: Fix 'No libunwind found' make warning message Changing error message when libunwind support is not found to inform properly to install libunwind-dev[el] package. Reported-by: Ingo Molnar Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: Ben Hutchings Cc: Borislav Petkov Cc: Corey Ashford Cc: David Ahern Cc: Feng Tang Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Mike Galbraith Cc: Namhyung Kim Cc: Namhyung Kim Cc: Namhyung Kim Cc: Paul Mackerras Cc: Pekka Enberg Cc: Peter Zijlstra Cc: Robert Richter Cc: Sedat Dilek Cc: Stephane Eranian Cc: Steven Rostedt Cc: Thomas Gleixner Cc: Ulrich Drepper Link: http://lkml.kernel.org/r/20120822083812.GC1003@krava.brq.redhat.com [ committer note: s/disable/disabling/g rewording suggested by Steven Rostedt ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 6bd888d04b6..722ddee61f9 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -493,7 +493,7 @@ endif FLAGS_UNWIND=$(LIBUNWIND_CFLAGS) $(ALL_CFLAGS) $(LIBUNWIND_LDFLAGS) $(ALL_LDFLAGS) $(EXTLIBS) $(LIBUNWIND_LIBS) ifneq ($(call try-cc,$(SOURCE_LIBUNWIND),$(FLAGS_UNWIND)),y) - msg := $(warning No libunwind found. Please install libunwind >= 0.99); + msg := $(warning No libunwind found, disabling post unwind support. Please install libunwind-dev[el] >= 0.99); NO_LIBUNWIND := 1 endif # Libunwind support endif # NO_LIBUNWIND From 9bfbbc6d1e6b4d055860231e232b807911bf8325 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Tue, 21 Aug 2012 20:03:15 +0200 Subject: [PATCH 105/282] perf test: Do not abort tests on error Run through all tests regardless of failures. On errors, return the first error code detected. Signed-off-by: Robert Richter Cc: Ingo Molnar Cc: Jiri Olsa Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1345572195-23857-2-git-send-email-robert.richter@amd.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/parse-events-test.c | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/tools/perf/util/parse-events-test.c b/tools/perf/util/parse-events-test.c index bf055ce1916..392308cef17 100644 --- a/tools/perf/util/parse-events-test.c +++ b/tools/perf/util/parse-events-test.c @@ -948,19 +948,19 @@ static int test_event(struct test__event_st *e) static int test_events(struct test__event_st *events, unsigned cnt) { - int ret = 0; + int ret1, ret2 = 0; unsigned i; for (i = 0; i < cnt; i++) { struct test__event_st *e = &events[i]; pr_debug("running test %d '%s'\n", i, e->name); - ret = test_event(e); - if (ret) - break; + ret1 = test_event(e); + if (ret1) + ret2 = ret1; } - return ret; + return ret2; } static int test_term(struct test__term *t) @@ -1021,13 +1021,13 @@ static int test_pmu(void) int parse_events__test(void) { - int ret; + int ret1, ret2 = 0; #define TEST_EVENTS(tests) \ do { \ - ret = test_events(tests, ARRAY_SIZE(tests)); \ - if (ret) \ - return ret; \ + ret1 = test_events(tests, ARRAY_SIZE(tests)); \ + if (!ret2) \ + ret2 = ret1; \ } while (0) TEST_EVENTS(test__events); @@ -1035,5 +1035,9 @@ do { \ if (test_pmu()) TEST_EVENTS(test__events_pmu); - return test_terms(test__terms, ARRAY_SIZE(test__terms)); + ret1 = test_terms(test__terms, ARRAY_SIZE(test__terms)); + if (!ret2) + ret2 = ret1; + + return ret2; } From ac2ba9f36bb400755e411309f3e76dbf308a10e7 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Thu, 16 Aug 2012 21:10:21 +0200 Subject: [PATCH 106/282] perf tools: Catch event names from command line Use command line string provided by the -e option to name events. This way we get unique events names that also support pmu event syntax (//). No need to reconstruct the name anymore from its attributes. We use the event_desc of the header to store the name in the perf.data header. Thus it is also available for perf report. Implemented by putting the parser in different states to parse events or configs. And since event names are now generated from the command line specification. Update event names in test cases accordingly. Signed-off-by: Robert Richter Cc: Ingo Molnar Cc: Jiri Olsa Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1345144224-27280-6-git-send-email-robert.richter@amd.com [ committer note: Folded patch fixing 'perf test' failure reported by Jiri Olsa ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/parse-events-test.c | 13 ++++---- tools/perf/util/parse-events.c | 12 +++++++ tools/perf/util/parse-events.h | 1 + tools/perf/util/parse-events.l | 50 +++++++++++++++++++++++------ tools/perf/util/parse-events.y | 20 ++++++++++-- 5 files changed, 78 insertions(+), 18 deletions(-) diff --git a/tools/perf/util/parse-events-test.c b/tools/perf/util/parse-events-test.c index 392308cef17..bc8b65130ae 100644 --- a/tools/perf/util/parse-events-test.c +++ b/tools/perf/util/parse-events-test.c @@ -301,12 +301,13 @@ static int test__checkevent_breakpoint_modifier(struct perf_evlist *evlist) { struct perf_evsel *evsel = perf_evlist__first(evlist); + TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", evsel->attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv); TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip); TEST_ASSERT_VAL("wrong name", - !strcmp(perf_evsel__name(evsel), "mem:0x0:rw:u")); + !strcmp(perf_evsel__name(evsel), "mem:0:u")); return test__checkevent_breakpoint(evlist); } @@ -320,7 +321,7 @@ static int test__checkevent_breakpoint_x_modifier(struct perf_evlist *evlist) TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv); TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip); TEST_ASSERT_VAL("wrong name", - !strcmp(perf_evsel__name(evsel), "mem:0x0:x:k")); + !strcmp(perf_evsel__name(evsel), "mem:0:x:k")); return test__checkevent_breakpoint_x(evlist); } @@ -334,7 +335,7 @@ static int test__checkevent_breakpoint_r_modifier(struct perf_evlist *evlist) TEST_ASSERT_VAL("wrong exclude_hv", !evsel->attr.exclude_hv); TEST_ASSERT_VAL("wrong precise_ip", evsel->attr.precise_ip); TEST_ASSERT_VAL("wrong name", - !strcmp(perf_evsel__name(evsel), "mem:0x0:r:hp")); + !strcmp(perf_evsel__name(evsel), "mem:0:r:hp")); return test__checkevent_breakpoint_r(evlist); } @@ -348,7 +349,7 @@ static int test__checkevent_breakpoint_w_modifier(struct perf_evlist *evlist) TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv); TEST_ASSERT_VAL("wrong precise_ip", evsel->attr.precise_ip); TEST_ASSERT_VAL("wrong name", - !strcmp(perf_evsel__name(evsel), "mem:0x0:w:up")); + !strcmp(perf_evsel__name(evsel), "mem:0:w:up")); return test__checkevent_breakpoint_w(evlist); } @@ -362,7 +363,7 @@ static int test__checkevent_breakpoint_rw_modifier(struct perf_evlist *evlist) TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv); TEST_ASSERT_VAL("wrong precise_ip", evsel->attr.precise_ip); TEST_ASSERT_VAL("wrong name", - !strcmp(perf_evsel__name(evsel), "mem:0x0:rw:kp")); + !strcmp(perf_evsel__name(evsel), "mem:0:rw:kp")); return test__checkevent_breakpoint_rw(evlist); } @@ -437,7 +438,7 @@ static int test__checkevent_pmu_name(struct perf_evlist *evlist) TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->attr.type); TEST_ASSERT_VAL("wrong config", 2 == evsel->attr.config); TEST_ASSERT_VAL("wrong name", - !strcmp(perf_evsel__name(evsel), "raw 0x2:u")); + !strcmp(perf_evsel__name(evsel), "cpu/config=2/u")); return 0; } diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 925784a930a..b24630398b9 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -751,6 +751,18 @@ int parse_events__modifier_event(struct list_head *list, char *str, bool add) return 0; } +int parse_events_name(struct list_head *list, char *name) +{ + struct perf_evsel *evsel; + + list_for_each_entry(evsel, list, node) { + if (!evsel->name) + evsel->name = strdup(name); + } + + return 0; +} + static int parse_events__scanner(const char *str, void *data, int start_token) { YY_BUFFER_STATE buffer; diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h index 0b9782dc3da..c356e443448 100644 --- a/tools/perf/util/parse-events.h +++ b/tools/perf/util/parse-events.h @@ -81,6 +81,7 @@ int parse_events__term_clone(struct parse_events__term **new, void parse_events__free_terms(struct list_head *terms); int parse_events__modifier_event(struct list_head *list, char *str, bool add); int parse_events__modifier_group(struct list_head *list, char *event_mod); +int parse_events_name(struct list_head *list, char *name); int parse_events_add_tracepoint(struct list_head **list, int *idx, char *sys, char *event); int parse_events_add_numeric(struct list_head **list, int *idx, diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l index 2c0d006d43d..f5e28dc6827 100644 --- a/tools/perf/util/parse-events.l +++ b/tools/perf/util/parse-events.l @@ -70,6 +70,12 @@ static int term(yyscan_t scanner, int type) %} %x mem +%s config +%x event + +group [^,{}/]*[{][^}]*[}][^,{}/]* +event_pmu [^,{}/]+[/][^/]*[/][^,{}/]* +event [^,{}/]+ num_dec [0-9]+ num_hex 0x[a-fA-F0-9]+ @@ -84,7 +90,13 @@ modifier_bp [rwx]{1,3} { int start_token; - start_token = (int) parse_events_get_extra(yyscanner); + start_token = parse_events_get_extra(yyscanner); + + if (start_token == PE_START_TERMS) + BEGIN(config); + else if (start_token == PE_START_EVENTS) + BEGIN(event); + if (start_token) { parse_events_set_extra(NULL, yyscanner); return start_token; @@ -92,6 +104,26 @@ modifier_bp [rwx]{1,3} } %} +{ + +{group} { + BEGIN(INITIAL); yyless(0); + } + +{event_pmu} | +{event} { + str(yyscanner, PE_EVENT_NAME); + BEGIN(INITIAL); yyless(0); + return PE_EVENT_NAME; + } + +. | +<> { + BEGIN(INITIAL); yyless(0); + } + +} + cpu-cycles|cycles { return sym(yyscanner, PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES); } stalled-cycles-frontend|idle-cycles-frontend { return sym(yyscanner, PERF_TYPE_HARDWARE, PERF_COUNT_HW_STALLED_CYCLES_FRONTEND); } stalled-cycles-backend|idle-cycles-backend { return sym(yyscanner, PERF_TYPE_HARDWARE, PERF_COUNT_HW_STALLED_CYCLES_BACKEND); } @@ -127,18 +159,16 @@ speculative-read|speculative-load | refs|Reference|ops|access | misses|miss { return str(yyscanner, PE_NAME_CACHE_OP_RESULT); } - /* - * These are event config hardcoded term names to be specified - * within xxx/.../ syntax. So far we dont clash with other names, - * so we can put them here directly. In case the we have a conflict - * in future, this needs to go into '//' condition block. - */ +{ config { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CONFIG); } config1 { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CONFIG1); } config2 { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CONFIG2); } name { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_NAME); } period { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD); } branch_type { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_BRANCH_SAMPLE_TYPE); } +, { return ','; } +"/" { BEGIN(INITIAL); return '/'; } +} mem: { BEGIN(mem); return PE_PREFIX_MEM; } r{num_raw_hex} { return raw(yyscanner); } @@ -147,11 +177,11 @@ r{num_raw_hex} { return raw(yyscanner); } {modifier_event} { return str(yyscanner, PE_MODIFIER_EVENT); } {name} { return str(yyscanner, PE_NAME); } -"/" { return '/'; } +"/" { BEGIN(config); return '/'; } - { return '-'; } -, { return ','; } +, { BEGIN(event); return ','; } : { return ':'; } -"{" { return '{'; } +"{" { BEGIN(event); return '{'; } "}" { return '}'; } = { return '='; } \n { } diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y index 66850f820df..42d9a17b83b 100644 --- a/tools/perf/util/parse-events.y +++ b/tools/perf/util/parse-events.y @@ -27,6 +27,7 @@ do { \ %token PE_START_EVENTS PE_START_TERMS %token PE_VALUE PE_VALUE_SYM_HW PE_VALUE_SYM_SW PE_RAW PE_TERM +%token PE_EVENT_NAME %token PE_NAME %token PE_MODIFIER_EVENT PE_MODIFIER_BP %token PE_NAME_CACHE_TYPE PE_NAME_CACHE_OP_RESULT @@ -42,6 +43,7 @@ do { \ %type PE_NAME_CACHE_OP_RESULT %type PE_MODIFIER_EVENT %type PE_MODIFIER_BP +%type PE_EVENT_NAME %type value_sym %type event_config %type event_term @@ -53,6 +55,8 @@ do { \ %type event_legacy_numeric %type event_legacy_raw %type event_def +%type event_mod +%type event_name %type event %type events %type group_def @@ -143,8 +147,10 @@ events ',' event | event -event: -event_def PE_MODIFIER_EVENT +event: event_mod + +event_mod: +event_name PE_MODIFIER_EVENT { struct list_head *list = $1; @@ -157,6 +163,16 @@ event_def PE_MODIFIER_EVENT $$ = list; } | +event_name + +event_name: +PE_EVENT_NAME event_def +{ + ABORT_ON(parse_events_name($2, $1)); + free($1); + $$ = $2; +} +| event_def event_def: event_pmu | From 4e1b9c679fcb208275b55eb8fc46c2d58ef6a2ee Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Thu, 16 Aug 2012 21:10:22 +0200 Subject: [PATCH 107/282] perf tools: Refactor print_event_desc() For later use we need a function read_event_desc() for processing the event_desc feature. Split it from print_event_desc(). Signed-off-by: Robert Richter Cc: Ingo Molnar Cc: Jiri Olsa Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1345144224-27280-7-git-send-email-robert.richter@amd.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/header.c | 134 +++++++++++++++++++++++++++------------ 1 file changed, 93 insertions(+), 41 deletions(-) diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 1e5b6aa6052..7454cf4eedb 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -1148,12 +1148,29 @@ static void print_cpu_topology(struct perf_header *ph, int fd, FILE *fp) } } -static void print_event_desc(struct perf_header *ph, int fd, FILE *fp) +static void free_event_desc(struct perf_evsel *events) { - struct perf_event_attr attr; - uint64_t id; + struct perf_evsel *evsel; + + if (!events) + return; + + for (evsel = events; evsel->attr.size; evsel++) { + if (evsel->name) + free(evsel->name); + if (evsel->id) + free(evsel->id); + } + + free(events); +} + +static struct perf_evsel * +read_event_desc(struct perf_header *ph, int fd) +{ + struct perf_evsel *evsel, *events = NULL; + u64 *id; void *buf = NULL; - char *str; u32 nre, sz, nr, i, j; ssize_t ret; size_t msz; @@ -1173,18 +1190,22 @@ static void print_event_desc(struct perf_header *ph, int fd, FILE *fp) if (ph->needs_swap) sz = bswap_32(sz); - memset(&attr, 0, sizeof(attr)); - /* buffer to hold on file attr struct */ buf = malloc(sz); if (!buf) goto error; - msz = sizeof(attr); + /* the last event terminates with evsel->attr.size == 0: */ + events = calloc(nre + 1, sizeof(*events)); + if (!events) + goto error; + + msz = sizeof(evsel->attr); if (sz < msz) msz = sz; - for (i = 0 ; i < nre; i++) { + for (i = 0, evsel = events; i < nre; evsel++, i++) { + evsel->idx = i; /* * must read entire on-file attr struct to @@ -1197,7 +1218,7 @@ static void print_event_desc(struct perf_header *ph, int fd, FILE *fp) if (ph->needs_swap) perf_event__attr_swap(buf); - memcpy(&attr, buf, msz); + memcpy(&evsel->attr, buf, msz); ret = read(fd, &nr, sizeof(nr)); if (ret != (ssize_t)sizeof(nr)) @@ -1206,51 +1227,82 @@ static void print_event_desc(struct perf_header *ph, int fd, FILE *fp) if (ph->needs_swap) nr = bswap_32(nr); - str = do_read_string(fd, ph); - fprintf(fp, "# event : name = %s, ", str); - free(str); + evsel->name = do_read_string(fd, ph); + + if (!nr) + continue; + + id = calloc(nr, sizeof(*id)); + if (!id) + goto error; + evsel->ids = nr; + evsel->id = id; + + for (j = 0 ; j < nr; j++) { + ret = read(fd, id, sizeof(*id)); + if (ret != (ssize_t)sizeof(*id)) + goto error; + if (ph->needs_swap) + *id = bswap_64(*id); + id++; + } + } +out: + if (buf) + free(buf); + return events; +error: + if (events) + free_event_desc(events); + events = NULL; + goto out; +} + +static void print_event_desc(struct perf_header *ph, int fd, FILE *fp) +{ + struct perf_evsel *evsel, *events = read_event_desc(ph, fd); + u32 j; + u64 *id; + + if (!events) { + fprintf(fp, "# event desc: not available or unable to read\n"); + return; + } + + for (evsel = events; evsel->attr.size; evsel++) { + fprintf(fp, "# event : name = %s, ", evsel->name); fprintf(fp, "type = %d, config = 0x%"PRIx64 ", config1 = 0x%"PRIx64", config2 = 0x%"PRIx64, - attr.type, - (u64)attr.config, - (u64)attr.config1, - (u64)attr.config2); + evsel->attr.type, + (u64)evsel->attr.config, + (u64)evsel->attr.config1, + (u64)evsel->attr.config2); fprintf(fp, ", excl_usr = %d, excl_kern = %d", - attr.exclude_user, - attr.exclude_kernel); + evsel->attr.exclude_user, + evsel->attr.exclude_kernel); fprintf(fp, ", excl_host = %d, excl_guest = %d", - attr.exclude_host, - attr.exclude_guest); + evsel->attr.exclude_host, + evsel->attr.exclude_guest); - fprintf(fp, ", precise_ip = %d", attr.precise_ip); + fprintf(fp, ", precise_ip = %d", evsel->attr.precise_ip); - if (nr) + if (evsel->ids) { fprintf(fp, ", id = {"); - - for (j = 0 ; j < nr; j++) { - ret = read(fd, &id, sizeof(id)); - if (ret != (ssize_t)sizeof(id)) - goto error; - - if (ph->needs_swap) - id = bswap_64(id); - - if (j) - fputc(',', fp); - - fprintf(fp, " %"PRIu64, id); - } - if (nr && j == nr) + for (j = 0, id = evsel->id; j < evsel->ids; j++, id++) { + if (j) + fputc(',', fp); + fprintf(fp, " %"PRIu64, *id); + } fprintf(fp, " }"); + } + fputc('\n', fp); } - free(buf); - return; -error: - fprintf(fp, "# event desc: not available or unable to read\n"); + + free_event_desc(events); } static void print_total_mem(struct perf_header *h __used, int fd, FILE *fp) From 7c2f7afd36cc111dd08eb8eb8ef74fb6564fd131 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Thu, 16 Aug 2012 21:10:23 +0200 Subject: [PATCH 108/282] perf report: Update event names from header description Name events based on the event description in the perf.data header. Example output: $ perf report | grep '^#.*event' # event : name = ibs_op/cnt_ctl=1/GH, type = 7, config = 0x80000, config1 = 0x0, config2 = 0x0, excl_usr = 0, excl_kern = 0, excl_host = 0, excl_guest = 0, precise_ip = 0, id = { 49, 50, 51, 52, 53, 54, 55, 56 } # event : name = ibs_fetch/config=0/, type = 6, config = 0x0, config1 = 0x0, config2 = 0x0, excl_usr = 0, excl_kern = 0, excl_host = 0, excl_guest = 1, precise_ip = 0, id = { 57, 58, 59, 60, 61, 62, 63, 64 } # Samples: 20K of event 'ibs_op/cnt_ctl=1/GH' # Samples: 4K of event 'ibs_fetch/config=0/' Note the new pmu event syntax of the names. Signed-off-by: Robert Richter Cc: Ingo Molnar Cc: Jiri Olsa Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1345144224-27280-8-git-send-email-robert.richter@amd.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/header.c | 52 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 51 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 7454cf4eedb..69374deeb4a 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -1556,6 +1556,56 @@ static int process_build_id(struct perf_file_section *section, return 0; } +static struct perf_evsel * +perf_evlist__find_by_index(struct perf_evlist *evlist, int idx) +{ + struct perf_evsel *evsel; + + list_for_each_entry(evsel, &evlist->entries, node) { + if (evsel->idx == idx) + return evsel; + } + + return NULL; +} + +static void +perf_evlist__set_event_name(struct perf_evlist *evlist, struct perf_evsel *event) +{ + struct perf_evsel *evsel; + + if (!event->name) + return; + + evsel = perf_evlist__find_by_index(evlist, event->idx); + if (!evsel) + return; + + if (evsel->name) + return; + + evsel->name = strdup(event->name); +} + +static int +process_event_desc(struct perf_file_section *section __unused, + struct perf_header *header, int feat __unused, int fd, + void *data __used) +{ + struct perf_session *session = container_of(header, struct perf_session, header); + struct perf_evsel *evsel, *events = read_event_desc(header, fd); + + if (!events) + return 0; + + for (evsel = events; evsel->attr.size; evsel++) + perf_evlist__set_event_name(session->evlist, evsel); + + free_event_desc(events); + + return 0; +} + struct feature_ops { int (*write)(int fd, struct perf_header *h, struct perf_evlist *evlist); void (*print)(struct perf_header *h, int fd, FILE *fp); @@ -1589,7 +1639,7 @@ static const struct feature_ops feat_ops[HEADER_LAST_FEATURE] = { FEAT_OPA(HEADER_CPUDESC, cpudesc), FEAT_OPA(HEADER_CPUID, cpuid), FEAT_OPA(HEADER_TOTAL_MEM, total_mem), - FEAT_OPA(HEADER_EVENT_DESC, event_desc), + FEAT_OPP(HEADER_EVENT_DESC, event_desc), FEAT_OPA(HEADER_CMDLINE, cmdline), FEAT_OPF(HEADER_CPU_TOPOLOGY, cpu_topology), FEAT_OPF(HEADER_NUMA_TOPOLOGY, numa_topology), From 50a9667c9383982d7ec4e6bbc707f67be0e163d2 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Thu, 16 Aug 2012 21:10:24 +0200 Subject: [PATCH 109/282] perf tools: Add pmu mappings to header information With dynamic pmu allocation there are also dynamically assigned pmu ids. These ids are used in event->attr.type to describe the pmu to be used for that event. The information is available in sysfs, e.g: /sys/bus/event_source/devices/breakpoint/type: 5 /sys/bus/event_source/devices/cpu/type: 4 /sys/bus/event_source/devices/ibs_fetch/type: 6 /sys/bus/event_source/devices/ibs_op/type: 7 /sys/bus/event_source/devices/software/type: 1 /sys/bus/event_source/devices/tracepoint/type: 2 These mappings are needed to know which samples belong to which pmu. If a pmu is added dynamically like for ibs_fetch or ibs_op the type value may vary. Now, when decoding samples from perf.data this information in sysfs might be no longer available or may have changed. We need to store it in perf.data. Using the header for this. Now the header information created with perf report contains an additional section looking like this: # pmu mappings: ibs_op = 7, ibs_fetch = 6, cpu = 4, breakpoint = 5, tracepoint = 2, software = 1 Signed-off-by: Robert Richter Acked-by: Peter Zijlstra Cc: Ingo Molnar Cc: Jiri Olsa Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1345144224-27280-9-git-send-email-robert.richter@amd.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/header.c | 78 ++++++++++++++++++++++++++++++++++++++++ tools/perf/util/header.h | 1 + tools/perf/util/pmu.c | 50 ++++++++++++++++++++++++-- tools/perf/util/pmu.h | 2 ++ 4 files changed, 129 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 69374deeb4a..63aad3109e3 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -20,6 +20,7 @@ #include "symbol.h" #include "debug.h" #include "cpumap.h" +#include "pmu.h" static bool no_buildid_cache = false; @@ -1003,6 +1004,45 @@ done: return ret; } +/* + * File format: + * + * struct pmu_mappings { + * u32 pmu_num; + * struct pmu_map { + * u32 type; + * char name[]; + * }[pmu_num]; + * }; + */ + +static int write_pmu_mappings(int fd, struct perf_header *h __used, + struct perf_evlist *evlist __used) +{ + struct perf_pmu *pmu = NULL; + off_t offset = lseek(fd, 0, SEEK_CUR); + __u32 pmu_num = 0; + + /* write real pmu_num later */ + do_write(fd, &pmu_num, sizeof(pmu_num)); + + while ((pmu = perf_pmu__scan(pmu))) { + if (!pmu->name) + continue; + pmu_num++; + do_write(fd, &pmu->type, sizeof(pmu->type)); + do_write_string(fd, pmu->name); + } + + if (pwrite(fd, &pmu_num, sizeof(pmu_num), offset) != sizeof(pmu_num)) { + /* discard all */ + lseek(fd, offset, SEEK_SET); + return -1; + } + + return 0; +} + /* * default get_cpuid(): nothing gets recorded * actual implementation must be in arch/$(ARCH)/util/header.c @@ -1389,6 +1429,43 @@ static void print_branch_stack(struct perf_header *ph __used, int fd __used, fprintf(fp, "# contains samples with branch stack\n"); } +static void print_pmu_mappings(struct perf_header *ph, int fd, FILE *fp) +{ + const char *delimiter = "# pmu mappings: "; + char *name; + int ret; + u32 pmu_num; + u32 type; + + ret = read(fd, &pmu_num, sizeof(pmu_num)); + if (ret != sizeof(pmu_num)) + goto error; + + if (!pmu_num) { + fprintf(fp, "# pmu mappings: not available\n"); + return; + } + + while (pmu_num) { + if (read(fd, &type, sizeof(type)) != sizeof(type)) + break; + name = do_read_string(fd, ph); + if (!name) + break; + pmu_num--; + fprintf(fp, "%s%s = %" PRIu32, delimiter, name, type); + free(name); + delimiter = ", "; + } + + fprintf(fp, "\n"); + + if (!pmu_num) + return; +error: + fprintf(fp, "# pmu mappings: unable to read\n"); +} + static int __event_process_build_id(struct build_id_event *bev, char *filename, struct perf_session *session) @@ -1644,6 +1721,7 @@ static const struct feature_ops feat_ops[HEADER_LAST_FEATURE] = { FEAT_OPF(HEADER_CPU_TOPOLOGY, cpu_topology), FEAT_OPF(HEADER_NUMA_TOPOLOGY, numa_topology), FEAT_OPA(HEADER_BRANCH_STACK, branch_stack), + FEAT_OPA(HEADER_PMU_MAPPINGS, pmu_mappings), }; struct header_print_data { diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h index 24962e707e5..9d5eedceda7 100644 --- a/tools/perf/util/header.h +++ b/tools/perf/util/header.h @@ -28,6 +28,7 @@ enum { HEADER_CPU_TOPOLOGY, HEADER_NUMA_TOPOLOGY, HEADER_BRANCH_STACK, + HEADER_PMU_MAPPINGS, HEADER_LAST_FEATURE, HEADER_FEAT_BITS = 256, }; diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 67715a42cd6..6631d828db3 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -10,6 +10,8 @@ #include "pmu.h" #include "parse-events.h" +#define EVENT_SOURCE_DEVICE_PATH "/bus/event_source/devices/" + int perf_pmu_parse(struct list_head *list, char *name); extern FILE *perf_pmu_in; @@ -69,7 +71,7 @@ static int pmu_format(char *name, struct list_head *format) return -1; snprintf(path, PATH_MAX, - "%s/bus/event_source/devices/%s/format", sysfs, name); + "%s" EVENT_SOURCE_DEVICE_PATH "%s/format", sysfs, name); if (stat(path, &st) < 0) return 0; /* no error if format does not exist */ @@ -206,7 +208,7 @@ static int pmu_type(char *name, __u32 *type) return -1; snprintf(path, PATH_MAX, - "%s/bus/event_source/devices/%s/type", sysfs, name); + "%s" EVENT_SOURCE_DEVICE_PATH "%s/type", sysfs, name); if (stat(path, &st) < 0) return -1; @@ -222,6 +224,35 @@ static int pmu_type(char *name, __u32 *type) return ret; } +/* Add all pmus in sysfs to pmu list: */ +static void pmu_read_sysfs(void) +{ + char path[PATH_MAX]; + const char *sysfs; + DIR *dir; + struct dirent *dent; + + sysfs = sysfs_find_mountpoint(); + if (!sysfs) + return; + + snprintf(path, PATH_MAX, + "%s" EVENT_SOURCE_DEVICE_PATH, sysfs); + + dir = opendir(path); + if (!dir) + return; + + while ((dent = readdir(dir))) { + if (!strcmp(dent->d_name, ".") || !strcmp(dent->d_name, "..")) + continue; + /* add to static LIST_HEAD(pmus): */ + perf_pmu__find(dent->d_name); + } + + closedir(dir); +} + static struct perf_pmu *pmu_lookup(char *name) { struct perf_pmu *pmu; @@ -267,6 +298,21 @@ static struct perf_pmu *pmu_find(char *name) return NULL; } +struct perf_pmu *perf_pmu__scan(struct perf_pmu *pmu) +{ + /* + * pmu iterator: If pmu is NULL, we start at the begin, + * otherwise return the next pmu. Returns NULL on end. + */ + if (!pmu) { + pmu_read_sysfs(); + pmu = list_prepare_entry(pmu, &pmus, list); + } + list_for_each_entry_continue(pmu, &pmus, list) + return pmu; + return NULL; +} + struct perf_pmu *perf_pmu__find(char *name) { struct perf_pmu *pmu; diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h index 535f2c5258a..47f68d3cc5d 100644 --- a/tools/perf/util/pmu.h +++ b/tools/perf/util/pmu.h @@ -46,5 +46,7 @@ int perf_pmu__new_format(struct list_head *list, char *name, int config, unsigned long *bits); void perf_pmu__set_format(unsigned long *bits, long from, long to); +struct perf_pmu *perf_pmu__scan(struct perf_pmu *pmu); + int perf_pmu__test(void); #endif /* __PMU_H */ From 1659d129ed014b715b0b2120e6fd929bdd33ed03 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 22 Aug 2012 10:35:40 +0200 Subject: [PATCH 110/282] perf tools: Keep the perf_event_attr on version 3 Stashing version 4 under version 3 and removing version 4, because both version changes were within single patchset. Reported-by: Peter Zijlstra Signed-off-by: Jiri Olsa Acked-by: Peter Zijlstra Cc: Arun Sharma Cc: Benjamin Redelings Cc: Corey Ashford Cc: Cyrill Gorcunov Cc: Frank Ch. Eigler Cc: Frederic Weisbecker Cc: H. Peter Anvin Cc: Ingo Molnar Cc: Masami Hiramatsu Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Robert Richter Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Tom Zanussi Cc: Ulrich Drepper Link: http://lkml.kernel.org/r/20120822083540.GB1003@krava.brq.redhat.com Signed-off-by: Arnaldo Carvalho de Melo --- include/linux/perf_event.h | 4 ++-- tools/perf/util/header.c | 1 - 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 297ca3db6b4..28f9cee3fbc 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -205,8 +205,8 @@ enum perf_event_read_format { #define PERF_ATTR_SIZE_VER0 64 /* sizeof first published struct */ #define PERF_ATTR_SIZE_VER1 72 /* add: config2 */ #define PERF_ATTR_SIZE_VER2 80 /* add: branch_sample_type */ -#define PERF_ATTR_SIZE_VER3 88 /* add: sample_regs_user */ -#define PERF_ATTR_SIZE_VER4 96 /* add: sample_stack_user */ +#define PERF_ATTR_SIZE_VER3 96 /* add: sample_regs_user */ + /* add: sample_stack_user */ /* * Hardware event_id to monitor via a performance monitoring event: diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 63aad3109e3..9696e64c9db 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -2011,7 +2011,6 @@ static const int attr_file_abi_sizes[] = { [1] = PERF_ATTR_SIZE_VER1, [2] = PERF_ATTR_SIZE_VER2, [3] = PERF_ATTR_SIZE_VER3, - [4] = PERF_ATTR_SIZE_VER4, 0, }; From fd34f0b26c9d0f3c3c5c5f83207efa6324cd19f7 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 22 Aug 2012 16:00:28 +0900 Subject: [PATCH 111/282] tools lib traceevent: Do not link broken field arg for an old ftrace event Defer linking a newly allocated arg to print_fmt.args until all of its field is setup so that later access to ->field.name cannot be NULL. Signed-off-by: Namhyung Kim Acked-by: Steven Rostedt Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Steven Rostedt Link: http://lkml.kernel.org/r/1345618831-9148-2-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/traceevent/event-parse.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c index b7c2c491f61..33fcd943f09 100644 --- a/tools/lib/traceevent/event-parse.c +++ b/tools/lib/traceevent/event-parse.c @@ -4754,20 +4754,20 @@ int pevent_parse_event(struct pevent *pevent, struct print_arg *arg, **list; /* old ftrace had no args */ - list = &event->print_fmt.args; for (field = event->format.fields; field; field = field->next) { arg = alloc_arg(); - *list = arg; - list = &arg->next; arg->type = PRINT_FIELD; arg->field.name = strdup(field->name); if (!arg->field.name) { do_warning("failed to allocate field name"); event->flags |= EVENT_FL_FAILED; + free_arg(arg); return -1; } arg->field.field = field; + *list = arg; + list = &arg->next; } return 0; } From bffddffde7f9bd093909235a25dbb806fe639dde Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 22 Aug 2012 16:00:29 +0900 Subject: [PATCH 112/282] tools lib traceevent: Introduce pevent_errno Define and use error numbers for pevent_parse_event() and get rid of die() and do_warning() calls. If the function returns non-zero value, the caller can check the return code and do appropriate things. I chose the error numbers to be negative not to clash with standard errno, and as usual, 0 for success. Signed-off-by: Namhyung Kim Acked-by: Steven Rostedt Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Steven Rostedt Link: http://lkml.kernel.org/r/1345618831-9148-3-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/traceevent/event-parse.c | 50 ++++++++++++++++++------------ tools/lib/traceevent/event-parse.h | 26 ++++++++++++++-- 2 files changed, 54 insertions(+), 22 deletions(-) diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c index 33fcd943f09..a46cae701db 100644 --- a/tools/lib/traceevent/event-parse.c +++ b/tools/lib/traceevent/event-parse.c @@ -4686,9 +4686,8 @@ static int find_event_handle(struct pevent *pevent, struct event_format *event) * * /sys/kernel/debug/tracing/events/.../.../format */ -int pevent_parse_event(struct pevent *pevent, - const char *buf, unsigned long size, - const char *sys) +enum pevent_errno pevent_parse_event(struct pevent *pevent, const char *buf, + unsigned long size, const char *sys) { struct event_format *event; int ret; @@ -4697,17 +4696,16 @@ int pevent_parse_event(struct pevent *pevent, event = alloc_event(); if (!event) - return -ENOMEM; + return PEVENT_ERRNO__MEM_ALLOC_FAILED; event->name = event_read_name(); if (!event->name) { /* Bad event? */ - free(event); - return -1; + ret = PEVENT_ERRNO__MEM_ALLOC_FAILED; + goto event_alloc_failed; } if (strcmp(sys, "ftrace") == 0) { - event->flags |= EVENT_FL_ISFTRACE; if (strcmp(event->name, "bprint") == 0) @@ -4715,20 +4713,28 @@ int pevent_parse_event(struct pevent *pevent, } event->id = event_read_id(); - if (event->id < 0) - die("failed to read event id"); + if (event->id < 0) { + ret = PEVENT_ERRNO__READ_ID_FAILED; + /* + * This isn't an allocation error actually. + * But as the ID is critical, just bail out. + */ + goto event_alloc_failed; + } event->system = strdup(sys); - if (!event->system) - die("failed to allocate system"); + if (!event->system) { + ret = PEVENT_ERRNO__MEM_ALLOC_FAILED; + goto event_alloc_failed; + } /* Add pevent to event so that it can be referenced */ event->pevent = pevent; ret = event_read_format(event); if (ret < 0) { - do_warning("failed to read event format for %s", event->name); - goto event_failed; + ret = PEVENT_ERRNO__READ_FORMAT_FAILED; + goto event_parse_failed; } /* @@ -4740,10 +4746,9 @@ int pevent_parse_event(struct pevent *pevent, ret = event_read_print(event); if (ret < 0) { - do_warning("failed to read event print fmt for %s", - event->name); show_warning = 1; - goto event_failed; + ret = PEVENT_ERRNO__READ_PRINT_FAILED; + goto event_parse_failed; } show_warning = 1; @@ -4760,10 +4765,9 @@ int pevent_parse_event(struct pevent *pevent, arg->type = PRINT_FIELD; arg->field.name = strdup(field->name); if (!arg->field.name) { - do_warning("failed to allocate field name"); event->flags |= EVENT_FL_FAILED; free_arg(arg); - return -1; + return PEVENT_ERRNO__OLD_FTRACE_ARG_FAILED; } arg->field.field = field; *list = arg; @@ -4778,11 +4782,17 @@ int pevent_parse_event(struct pevent *pevent, return 0; - event_failed: + event_parse_failed: event->flags |= EVENT_FL_FAILED; /* still add it even if it failed */ add_event(pevent, event); - return -1; + return ret; + + event_alloc_failed: + free(event->system); + free(event->name); + free(event); + return ret; } int get_field_val(struct trace_seq *s, struct format_field *field, diff --git a/tools/lib/traceevent/event-parse.h b/tools/lib/traceevent/event-parse.h index 5772ad8cb38..3c48229a7bc 100644 --- a/tools/lib/traceevent/event-parse.h +++ b/tools/lib/traceevent/event-parse.h @@ -345,6 +345,28 @@ enum pevent_flag { PEVENT_NSEC_OUTPUT = 1, /* output in NSECS */ }; +enum pevent_errno { + PEVENT_ERRNO__SUCCESS = 0, + + /* + * Choose an arbitrary negative big number not to clash with standard + * errno since SUS requires the errno has distinct positive values. + * See 'Issue 6' in the link below. + * + * http://pubs.opengroup.org/onlinepubs/9699919799/basedefs/errno.h.html + */ + __PEVENT_ERRNO__START = -100000, + + PEVENT_ERRNO__MEM_ALLOC_FAILED = __PEVENT_ERRNO__START, + PEVENT_ERRNO__PARSE_EVENT_FAILED, + PEVENT_ERRNO__READ_ID_FAILED, + PEVENT_ERRNO__READ_FORMAT_FAILED, + PEVENT_ERRNO__READ_PRINT_FAILED, + PEVENT_ERRNO__OLD_FTRACE_ARG_FAILED, + + __PEVENT_ERRNO__END, +}; + struct cmdline; struct cmdline_list; struct func_map; @@ -509,8 +531,8 @@ void pevent_print_event(struct pevent *pevent, struct trace_seq *s, int pevent_parse_header_page(struct pevent *pevent, char *buf, unsigned long size, int long_size); -int pevent_parse_event(struct pevent *pevent, const char *buf, - unsigned long size, const char *sys); +enum pevent_errno pevent_parse_event(struct pevent *pevent, const char *buf, + unsigned long size, const char *sys); void *pevent_get_field_raw(struct trace_seq *s, struct event_format *event, const char *name, struct pevent_record *record, From 2f197b9d7eeaa723a80243610956fe4a17e7b5a4 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 22 Aug 2012 16:00:30 +0900 Subject: [PATCH 113/282] tools lib traceevent: Introduce pevent_strerror The pevent_strerror() sets @buf to a string that describes the (libtraceevent-specific) error condition that is passed via @errnum. This is similar to strerror_r() and does same thing if @errnum has a standard errno value. To sync error string with its code, define PEVENT_ERRORS with _PE() macro and use it as suggested by Steven. Signed-off-by: Namhyung Kim Acked-by: Steven Rostedt Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Steven Rostedt Link: http://lkml.kernel.org/r/1345618831-9148-4-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/traceevent/event-parse.c | 43 ++++++++++++++++++++++++++++++ tools/lib/traceevent/event-parse.h | 20 +++++++++----- 2 files changed, 57 insertions(+), 6 deletions(-) diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c index a46cae701db..1373e4cf109 100644 --- a/tools/lib/traceevent/event-parse.c +++ b/tools/lib/traceevent/event-parse.c @@ -4795,6 +4795,49 @@ enum pevent_errno pevent_parse_event(struct pevent *pevent, const char *buf, return ret; } +#undef _PE +#define _PE(code, str) str +static const char * const pevent_error_str[] = { + PEVENT_ERRORS +}; +#undef _PE + +int pevent_strerror(struct pevent *pevent, enum pevent_errno errnum, + char *buf, size_t buflen) +{ + int idx; + const char *msg; + + if (errnum >= 0) { + strerror_r(errnum, buf, buflen); + return 0; + } + + if (errnum <= __PEVENT_ERRNO__START || + errnum >= __PEVENT_ERRNO__END) + return -1; + + idx = errnum - __PEVENT_ERRNO__START; + msg = pevent_error_str[idx]; + + switch (errnum) { + case PEVENT_ERRNO__MEM_ALLOC_FAILED: + case PEVENT_ERRNO__PARSE_EVENT_FAILED: + case PEVENT_ERRNO__READ_ID_FAILED: + case PEVENT_ERRNO__READ_FORMAT_FAILED: + case PEVENT_ERRNO__READ_PRINT_FAILED: + case PEVENT_ERRNO__OLD_FTRACE_ARG_FAILED: + snprintf(buf, buflen, "%s", msg); + break; + + default: + /* cannot reach here */ + break; + } + + return 0; +} + int get_field_val(struct trace_seq *s, struct format_field *field, const char *name, struct pevent_record *record, unsigned long long *val, int err) diff --git a/tools/lib/traceevent/event-parse.h b/tools/lib/traceevent/event-parse.h index 3c48229a7bc..527df038a25 100644 --- a/tools/lib/traceevent/event-parse.h +++ b/tools/lib/traceevent/event-parse.h @@ -345,6 +345,16 @@ enum pevent_flag { PEVENT_NSEC_OUTPUT = 1, /* output in NSECS */ }; +#define PEVENT_ERRORS \ + _PE(MEM_ALLOC_FAILED, "failed to allocate memory"), \ + _PE(PARSE_EVENT_FAILED, "failed to parse event"), \ + _PE(READ_ID_FAILED, "failed to read event id"), \ + _PE(READ_FORMAT_FAILED, "failed to read event format"), \ + _PE(READ_PRINT_FAILED, "failed to read event print fmt"), \ + _PE(OLD_FTRACE_ARG_FAILED,"failed to allocate field name for ftrace") + +#undef _PE +#define _PE(__code, __str) PEVENT_ERRNO__ ## __code enum pevent_errno { PEVENT_ERRNO__SUCCESS = 0, @@ -357,15 +367,11 @@ enum pevent_errno { */ __PEVENT_ERRNO__START = -100000, - PEVENT_ERRNO__MEM_ALLOC_FAILED = __PEVENT_ERRNO__START, - PEVENT_ERRNO__PARSE_EVENT_FAILED, - PEVENT_ERRNO__READ_ID_FAILED, - PEVENT_ERRNO__READ_FORMAT_FAILED, - PEVENT_ERRNO__READ_PRINT_FAILED, - PEVENT_ERRNO__OLD_FTRACE_ARG_FAILED, + PEVENT_ERRORS, __PEVENT_ERRNO__END, }; +#undef _PE struct cmdline; struct cmdline_list; @@ -583,6 +589,8 @@ int pevent_data_pid(struct pevent *pevent, struct pevent_record *rec); const char *pevent_data_comm_from_pid(struct pevent *pevent, int pid); void pevent_event_info(struct trace_seq *s, struct event_format *event, struct pevent_record *record); +int pevent_strerror(struct pevent *pevent, enum pevent_errno errnum, + char *buf, size_t buflen); struct event_format **pevent_list_events(struct pevent *pevent, enum event_sort_type); struct format_field **pevent_event_common_fields(struct event_format *event); From e1aa7c30c599e99b4544f9e5b4c275c8a5325bdc Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 22 Aug 2012 16:00:31 +0900 Subject: [PATCH 114/282] tools lib traceevent: Fix strerror_r() use in pevent_strerror glibc-2.16 starts to mark the function with attribute warn_unused_result so that it can cause a build warning. Since GNU version of strerror_r() can return a pointer to a string without setting @buf, check the return value and copy/truncate it to our buffer if needed. Signed-off-by: Namhyung Kim Acked-by: Kirill A. Shutemov Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Kirill A. Shutemov Cc: Peter Zijlstra Cc: Steven Rostedt Link: http://lkml.kernel.org/r/1345618831-9148-5-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/traceevent/event-parse.c | 7 ++++++- tools/lib/traceevent/event-utils.h | 6 ++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c index 1373e4cf109..f978c59f67b 100644 --- a/tools/lib/traceevent/event-parse.c +++ b/tools/lib/traceevent/event-parse.c @@ -4809,7 +4809,12 @@ int pevent_strerror(struct pevent *pevent, enum pevent_errno errnum, const char *msg; if (errnum >= 0) { - strerror_r(errnum, buf, buflen); + msg = strerror_r(errnum, buf, buflen); + if (msg != buf) { + size_t len = strlen(msg); + char *c = mempcpy(buf, msg, min(buflen-1, len)); + *c = '\0'; + } return 0; } diff --git a/tools/lib/traceevent/event-utils.h b/tools/lib/traceevent/event-utils.h index 08296383d1e..bc075006966 100644 --- a/tools/lib/traceevent/event-utils.h +++ b/tools/lib/traceevent/event-utils.h @@ -39,6 +39,12 @@ void __vdie(const char *fmt, ...); void __vwarning(const char *fmt, ...); void __vpr_stat(const char *fmt, ...); +#define min(x, y) ({ \ + typeof(x) _min1 = (x); \ + typeof(y) _min2 = (y); \ + (void) (&_min1 == &_min2); \ + _min1 < _min2 ? _min1 : _min2; }) + static inline char *strim(char *string) { char *ret; From f63fe79fa0b1362e8ea1c1e4996cf70d14917b02 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 23 Aug 2012 16:37:00 +0900 Subject: [PATCH 115/282] tools lib traceevent: Fix off-by-one bug in pevent_strerror() As pevent_errno is defined using PEVENT_ERRORS which uses _PE macro magic, the first errno is bigger than __PEVENT_ERRNO_START by 1. So we need to subtract the 1 also when calculating the index of the error strings. Signed-off-by: Namhyung Kim Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Steven Rostedt Link: http://lkml.kernel.org/r/1345707420-21767-1-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/traceevent/event-parse.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c index f978c59f67b..b5b4d806ffa 100644 --- a/tools/lib/traceevent/event-parse.c +++ b/tools/lib/traceevent/event-parse.c @@ -4822,7 +4822,7 @@ int pevent_strerror(struct pevent *pevent, enum pevent_errno errnum, errnum >= __PEVENT_ERRNO__END) return -1; - idx = errnum - __PEVENT_ERRNO__START; + idx = errnum - __PEVENT_ERRNO__START - 1; msg = pevent_error_str[idx]; switch (errnum) { From 48bb5dc6cd9d30fe0d594947563da1f8bd9abada Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 9 Feb 2011 13:13:23 -0500 Subject: [PATCH 116/282] ftrace: Make recordmcount.c handle __fentry__ With gcc 4.6.0 the -mfentry feature places the function profiling call at the start of the function. When this is used, the call is to __fentry__ and not mcount. Change recordmcount.c to record both callers to __fentry__ and mcount. Link: http://lkml.kernel.org/r/20120807194058.990674363@goodmis.org Acked-by: H. Peter Anvin Acked-by: John Reiser Acked-by: Ingo Molnar Cc: Andi Kleen Signed-off-by: Steven Rostedt --- scripts/recordmcount.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/scripts/recordmcount.h b/scripts/recordmcount.h index 54e35c1e594..9d1421e63ff 100644 --- a/scripts/recordmcount.h +++ b/scripts/recordmcount.h @@ -261,11 +261,13 @@ static unsigned get_mcountsym(Elf_Sym const *const sym0, &sym0[Elf_r_sym(relp)]; char const *symname = &str0[w(symp->st_name)]; char const *mcount = gpfx == '_' ? "_mcount" : "mcount"; + char const *fentry = "__fentry__"; if (symname[0] == '.') ++symname; /* ppc64 hack */ if (strcmp(mcount, symname) == 0 || - (altmcount && strcmp(altmcount, symname) == 0)) + (altmcount && strcmp(altmcount, symname) == 0) || + (strcmp(fentry, symname) == 0)) mcountsym = Elf_r_sym(relp); return mcountsym; From a2546fae01124fb8063747439300fcf39bac033a Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 9 Feb 2011 13:15:59 -0500 Subject: [PATCH 117/282] ftrace: Add -mfentry to Makefile on function tracer Thanks to Andi Kleen, gcc 4.6.0 now supports -mfentry for x86 (and hopefully soon for other archs). What this does is to have the function profiler start at the beginning of the function instead of after the stack is set up. As plain -pg (mcount) is called after the stack is set up, and in some cases can have issues with the function graph tracer. It also requires frame pointers to be enabled. The -mfentry now calls __fentry__ at the beginning of the function. This allows for compiling without frame pointers and even has the ability to access parameters if needed. If the architecture and the compiler both support -mfentry then use that instead. Link: http://lkml.kernel.org/r/20120807194059.392617243@goodmis.org Acked-by: H. Peter Anvin Acked-by: Ingo Molnar Cc: Michal Marek Cc: Andi Kleen Signed-off-by: Steven Rostedt --- Makefile | 6 +++++- kernel/trace/Kconfig | 5 +++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index ddf5be952e4..e7ca93f1785 100644 --- a/Makefile +++ b/Makefile @@ -609,7 +609,11 @@ KBUILD_CFLAGS += $(call cc-option, -femit-struct-debug-baseonly) endif ifdef CONFIG_FUNCTION_TRACER -KBUILD_CFLAGS += -pg +ifdef CONFIG_HAVE_FENTRY +CC_USING_FENTRY := $(call cc-option, -mfentry -DCC_USING_FENTRY) +endif +KBUILD_CFLAGS += -pg $(CC_USING_FENTRY) +KBUILD_AFLAGS += $(CC_USING_FENTRY) ifdef CONFIG_DYNAMIC_FTRACE ifdef CONFIG_HAVE_C_RECORDMCOUNT BUILD_C_RECORDMCOUNT := y diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 8c4c07071cc..9301a0e35e0 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -49,6 +49,11 @@ config HAVE_SYSCALL_TRACEPOINTS help See Documentation/trace/ftrace-design.txt +config HAVE_FENTRY + bool + help + Arch supports the gcc options -pg with -mfentry + config HAVE_C_RECORDMCOUNT bool help From 781d06248234e221edb560a18461d65808a8a942 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 9 Feb 2011 13:27:22 -0500 Subject: [PATCH 118/282] ftrace: Do not test frame pointers if -mfentry is used The function graph has a test to check if the frame pointer is corrupted, which can happen with various options of gcc with mcount. But this is not an issue with -mfentry as -mfentry does not need nor use frame pointers for function graph tracing. Link: http://lkml.kernel.org/r/20120807194059.773895870@goodmis.org Acked-by: H. Peter Anvin Acked-by: Ingo Molnar Cc: Andi Kleen Signed-off-by: Steven Rostedt --- kernel/trace/trace_functions_graph.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index ce27c8ba8d3..99b4378393d 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -143,7 +143,7 @@ ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret, return; } -#ifdef CONFIG_HAVE_FUNCTION_GRAPH_FP_TEST +#if defined(CONFIG_HAVE_FUNCTION_GRAPH_FP_TEST) && !defined(CC_USING_FENTRY) /* * The arch may choose to record the frame pointer used * and check it here to make sure that it is what we expect it @@ -154,6 +154,9 @@ ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret, * * Currently, x86_32 with optimize for size (-Os) makes the latest * gcc do the above. + * + * Note, -mfentry does not use frame pointers, and this test + * is not needed if CC_USING_FENTRY is set. */ if (unlikely(current->ret_stack[index].fp != frame_pointer)) { ftrace_graph_stop(); From d57c5d51a30152f3175d2344cb6395f08bf8ee0c Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 9 Feb 2011 13:32:18 -0500 Subject: [PATCH 119/282] ftrace/x86: Add support for -mfentry to x86_64 If the kernel is compiled with gcc 4.6.0 which supports -mfentry, then use that instead of mcount. With mcount, frame pointers are forced with the -pg option and we get something like: : 55 push %rbp 48 89 e5 mov %rsp,%rbp 53 push %rbx 41 51 push %r9 e8 fe 6a 39 00 callq ffffffff81483d00 31 c0 xor %eax,%eax 48 89 fb mov %rdi,%rbx 48 89 d7 mov %rdx,%rdi 48 33 73 30 xor 0x30(%rbx),%rsi 48 f7 c6 ff ff ff f7 test $0xfffffffff7ffffff,%rsi With -mfentry, frame pointers are no longer forced and the call looks like this: : e8 33 af 37 00 callq ffffffff81461b40 <__fentry__> 53 push %rbx 48 89 fb mov %rdi,%rbx 31 c0 xor %eax,%eax 48 89 d7 mov %rdx,%rdi 41 51 push %r9 48 33 73 30 xor 0x30(%rbx),%rsi 48 f7 c6 ff ff ff f7 test $0xfffffffff7ffffff,%rsi This adds the ftrace hook at the beginning of the function before a frame is set up, and allows the function callbacks to be able to access parameters. As kprobes now can use function tracing (at least on x86) this speeds up the kprobe hooks that are at the beginning of the function. Link: http://lkml.kernel.org/r/20120807194100.130477900@goodmis.org Acked-by: Ingo Molnar Reviewed-by: Masami Hiramatsu Cc: Andi Kleen Signed-off-by: Steven Rostedt --- arch/x86/Kconfig | 1 + arch/x86/include/asm/ftrace.h | 7 ++++++- arch/x86/kernel/entry_64.S | 32 +++++++++++++++++++++++++++----- arch/x86/kernel/x8664_ksyms_64.c | 6 +++++- 4 files changed, 39 insertions(+), 7 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index a2d19ee750c..28dd891a0a1 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -36,6 +36,7 @@ config X86 select HAVE_KRETPROBES select HAVE_OPTPROBES select HAVE_FTRACE_MCOUNT_RECORD + select HAVE_FENTRY if X86_64 select HAVE_C_RECORDMCOUNT select HAVE_DYNAMIC_FTRACE select HAVE_FUNCTION_TRACER diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h index a6cae0c1720..9a25b522d37 100644 --- a/arch/x86/include/asm/ftrace.h +++ b/arch/x86/include/asm/ftrace.h @@ -35,7 +35,11 @@ #endif #ifdef CONFIG_FUNCTION_TRACER -#define MCOUNT_ADDR ((long)(mcount)) +#ifdef CC_USING_FENTRY +# define MCOUNT_ADDR ((long)(__fentry__)) +#else +# define MCOUNT_ADDR ((long)(mcount)) +#endif #define MCOUNT_INSN_SIZE 5 /* sizeof mcount call */ #ifdef CONFIG_DYNAMIC_FTRACE @@ -46,6 +50,7 @@ #ifndef __ASSEMBLY__ extern void mcount(void); extern atomic_t modifying_ftrace_code; +extern void __fentry__(void); static inline unsigned long ftrace_call_adjust(unsigned long addr) { diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index b7a81dcb736..ed767b747fe 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -68,10 +68,18 @@ .section .entry.text, "ax" #ifdef CONFIG_FUNCTION_TRACER + +#ifdef CC_USING_FENTRY +# define function_hook __fentry__ +#else +# define function_hook mcount +#endif + #ifdef CONFIG_DYNAMIC_FTRACE -ENTRY(mcount) + +ENTRY(function_hook) retq -END(mcount) +END(function_hook) /* skip is set if stack has been adjusted */ .macro ftrace_caller_setup skip=0 @@ -84,7 +92,11 @@ END(mcount) movq RIP(%rsp), %rdi subq $MCOUNT_INSN_SIZE, %rdi /* Load the parent_ip into the second parameter */ +#ifdef CC_USING_FENTRY + movq SS+16(%rsp), %rsi +#else movq 8(%rbp), %rsi +#endif .endm ENTRY(ftrace_caller) @@ -177,7 +189,8 @@ END(ftrace_regs_caller) #else /* ! CONFIG_DYNAMIC_FTRACE */ -ENTRY(mcount) + +ENTRY(function_hook) cmpl $0, function_trace_stop jne ftrace_stub @@ -199,7 +212,11 @@ trace: MCOUNT_SAVE_FRAME movq RIP(%rsp), %rdi +#ifdef CC_USING_FENTRY + movq SS+16(%rsp), %rsi +#else movq 8(%rbp), %rsi +#endif subq $MCOUNT_INSN_SIZE, %rdi call *ftrace_trace_function @@ -207,7 +224,7 @@ trace: MCOUNT_RESTORE_FRAME jmp ftrace_stub -END(mcount) +END(function_hook) #endif /* CONFIG_DYNAMIC_FTRACE */ #endif /* CONFIG_FUNCTION_TRACER */ @@ -215,9 +232,14 @@ END(mcount) ENTRY(ftrace_graph_caller) MCOUNT_SAVE_FRAME +#ifdef CC_USING_FENTRY + leaq SS+16(%rsp), %rdi + movq $0, %rdx /* No framepointers needed */ +#else leaq 8(%rbp), %rdi - movq RIP(%rsp), %rsi movq (%rbp), %rdx +#endif + movq RIP(%rsp), %rsi subq $MCOUNT_INSN_SIZE, %rsi call prepare_ftrace_return diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c index 6020f6f5927..1330dd10295 100644 --- a/arch/x86/kernel/x8664_ksyms_64.c +++ b/arch/x86/kernel/x8664_ksyms_64.c @@ -13,9 +13,13 @@ #include #ifdef CONFIG_FUNCTION_TRACER -/* mcount is defined in assembly */ +/* mcount and __fentry__ are defined in assembly */ +#ifdef CC_USING_FENTRY +EXPORT_SYMBOL(__fentry__); +#else EXPORT_SYMBOL(mcount); #endif +#endif EXPORT_SYMBOL(__get_user_1); EXPORT_SYMBOL(__get_user_2); From 61bccf191fe2d55b8d003b4ea3f94913745aaefa Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Wed, 22 Aug 2012 09:23:51 +0200 Subject: [PATCH 120/282] oprofile: Remove 'WQ on CPUx, prefer CPUy' warning Under certain workloads we see the following warnings: WQ on CPU0, prefer CPU1 WQ on CPU0, prefer CPU2 WQ on CPU0, prefer CPU3 It warns the user that the wq to access a per-cpu buffers runs not on the same cpu. This happens if the wq is rescheduled on a different cpu than where the buffer is located. This was probably implemented to detect performance issues. Not sure if there actually is one as the buffers are copied to a single buffer anyway which should be the actual bottleneck. We wont change WQ implementation. Since a user can do nothing the warning is pointless. Removing it. Cc: Andi Kleen Signed-off-by: Robert Richter --- drivers/oprofile/cpu_buffer.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/drivers/oprofile/cpu_buffer.c b/drivers/oprofile/cpu_buffer.c index b8ef8ddcc29..8aa73fac6ad 100644 --- a/drivers/oprofile/cpu_buffer.c +++ b/drivers/oprofile/cpu_buffer.c @@ -451,14 +451,9 @@ static void wq_sync_buffer(struct work_struct *work) { struct oprofile_cpu_buffer *b = container_of(work, struct oprofile_cpu_buffer, work.work); - if (b->cpu != smp_processor_id()) { - printk(KERN_DEBUG "WQ on CPU%d, prefer CPU%d\n", - smp_processor_id(), b->cpu); - - if (!cpu_online(b->cpu)) { - cancel_delayed_work(&b->work); - return; - } + if (b->cpu != smp_processor_id() && !cpu_online(b->cpu)) { + cancel_delayed_work(&b->work); + return; } sync_buffer(b->cpu); From 61e1d394984110e2e76f25572d5b1b5d48796751 Mon Sep 17 00:00:00 2001 From: Srikar Dronamraju Date: Fri, 1 Jun 2012 14:49:50 +0530 Subject: [PATCH 121/282] uprobes: Remove redundant lock_page/unlock_page Since read_opcode() reads from the referenced page and doesnt modify the page contents nor the page attributes, there is no need to lock the page. Signed-off-by: Srikar Dronamraju Signed-off-by: Oleg Nesterov --- kernel/events/uprobes.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index c08a22d02f7..7cff24c60dd 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -280,12 +280,10 @@ static int read_opcode(struct mm_struct *mm, unsigned long vaddr, uprobe_opcode_ if (ret <= 0) return ret; - lock_page(page); vaddr_new = kmap_atomic(page); vaddr &= ~PAGE_MASK; memcpy(opcode, vaddr_new + vaddr, UPROBE_SWBP_INSN_SIZE); kunmap_atomic(vaddr_new); - unlock_page(page); put_page(page); From 8bd874456e2ec49b9e64372ddc89a6f88901d184 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Tue, 7 Aug 2012 18:12:30 +0200 Subject: [PATCH 122/282] uprobes: Remove check for uprobe variable in handle_swbp() by the time we get here (after we pass cleanup_ret) uprobe is always is set. If it is NULL we leave very early in the code. Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Oleg Nesterov --- kernel/events/uprobes.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 7cff24c60dd..0cefde27664 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -1516,17 +1516,15 @@ cleanup_ret: utask->active_uprobe = NULL; utask->state = UTASK_RUNNING; } - if (uprobe) { - if (!(uprobe->flags & UPROBE_SKIP_SSTEP)) + if (!(uprobe->flags & UPROBE_SKIP_SSTEP)) - /* - * cannot singlestep; cannot skip instruction; - * re-execute the instruction. - */ - instruction_pointer_set(regs, bp_vaddr); + /* + * cannot singlestep; cannot skip instruction; + * re-execute the instruction. + */ + instruction_pointer_set(regs, bp_vaddr); - put_uprobe(uprobe); - } + put_uprobe(uprobe); } /* From 647c42dfd40fec032a4c8525a755160f0765921f Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 6 Aug 2012 13:15:09 +0200 Subject: [PATCH 123/282] uprobes: Kill uprobes_state->count uprobes_state->count is only needed to avoid the slow path in uprobe_pre_sstep_notifier(). It is also checked in uprobe_munmap() but ironically its only goal to decrement this counter. However, it is very broken. Just some examples: - uprobe_mmap() can race with uprobe_unregister() and wrongly increment the counter if it hits the non-uprobe "int3". Note that install_breakpoint() checks ->consumers first and returns -EEXIST if it is NULL. "atomic_sub() if error" in uprobe_mmap() looks obviously wrong too. - uprobe_munmap() can race with uprobe_register() and wrongly decrement the counter by the same reason. - Suppose an appication tries to increase the mmapped area via sys_mremap(). vma_adjust() does uprobe_munmap(whole_vma) first, this can nullify the counter temporarily and race with another thread which can hit the bp, the application will be killed by SIGTRAP. - Suppose an application mmaps 2 consecutive areas in the same file and one (or both) of these areas has uprobes. In the likely case mmap_region()->vma_merge() suceeds. Like above, this leads to uprobe_munmap/uprobe_mmap from vma_merge()->vma_adjust() but then mmap_region() does another uprobe_mmap(resulting_vma) and doubles the counter. This patch only removes this counter and fixes the compile errors, then we will try to cleanup the changed code and add something else instead. Signed-off-by: Oleg Nesterov Acked-by: Srikar Dronamraju --- include/linux/uprobes.h | 2 +- kernel/events/uprobes.c | 38 ++------------------------------------ 2 files changed, 3 insertions(+), 37 deletions(-) diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h index efe4b3308c7..03ae547c1c3 100644 --- a/include/linux/uprobes.h +++ b/include/linux/uprobes.h @@ -99,8 +99,8 @@ struct xol_area { struct uprobes_state { struct xol_area *xol_area; - atomic_t count; }; + extern int __weak set_swbp(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long vaddr); extern int __weak set_orig_insn(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long vaddr, bool verify); extern bool __weak is_swbp_insn(uprobe_opcode_t *insn); diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 0cefde27664..6f1664d217d 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -678,18 +678,7 @@ install_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, uprobe->flags |= UPROBE_COPY_INSN; } - /* - * Ideally, should be updating the probe count after the breakpoint - * has been successfully inserted. However a thread could hit the - * breakpoint we just inserted even before the probe count is - * incremented. If this is the first breakpoint placed, breakpoint - * notifier might ignore uprobes and pass the trap to the thread. - * Hence increment before and decrement on failure. - */ - atomic_inc(&mm->uprobes_state.count); ret = set_swbp(&uprobe->arch, mm, vaddr); - if (ret) - atomic_dec(&mm->uprobes_state.count); return ret; } @@ -697,8 +686,7 @@ install_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, static void remove_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, unsigned long vaddr) { - if (!set_orig_insn(&uprobe->arch, mm, vaddr, true)) - atomic_dec(&mm->uprobes_state.count); + set_orig_insn(&uprobe->arch, mm, vaddr, true); } /* @@ -1051,13 +1039,6 @@ int uprobe_mmap(struct vm_area_struct *vma) if (!is_swbp_at_addr(vma->vm_mm, vaddr)) continue; - - /* - * Unable to insert a breakpoint, but - * breakpoint lies underneath. Increment the - * probe count. - */ - atomic_inc(&vma->vm_mm->uprobes_state.count); } if (!ret) @@ -1068,9 +1049,6 @@ int uprobe_mmap(struct vm_area_struct *vma) mutex_unlock(uprobes_mmap_hash(inode)); - if (ret) - atomic_sub(count, &vma->vm_mm->uprobes_state.count); - return ret; } @@ -1089,9 +1067,6 @@ void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned lon if (!atomic_read(&vma->vm_mm->mm_users)) /* called by mmput() ? */ return; - if (!atomic_read(&vma->vm_mm->uprobes_state.count)) - return; - inode = vma->vm_file->f_mapping->host; if (!inode) return; @@ -1100,13 +1075,6 @@ void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned lon build_probe_list(inode, vma, start, end, &tmp_list); list_for_each_entry_safe(uprobe, u, &tmp_list, pending_list) { - unsigned long vaddr = offset_to_vaddr(vma, uprobe->offset); - /* - * An unregister could have removed the probe before - * unmap. So check before we decrement the count. - */ - if (is_swbp_at_addr(vma->vm_mm, vaddr) == 1) - atomic_dec(&vma->vm_mm->uprobes_state.count); put_uprobe(uprobe); } mutex_unlock(uprobes_mmap_hash(inode)); @@ -1217,7 +1185,6 @@ void uprobe_clear_state(struct mm_struct *mm) void uprobe_reset_state(struct mm_struct *mm) { mm->uprobes_state.xol_area = NULL; - atomic_set(&mm->uprobes_state.count, 0); } /* @@ -1585,8 +1552,7 @@ int uprobe_pre_sstep_notifier(struct pt_regs *regs) { struct uprobe_task *utask; - if (!current->mm || !atomic_read(¤t->mm->uprobes_state.count)) - /* task is currently not uprobed */ + if (!current->mm) return 0; utask = current->utask; From f1a45d023193f7d8e55e384090b645d609325393 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 6 Aug 2012 14:13:23 +0200 Subject: [PATCH 124/282] uprobes: Kill dup_mmap()->uprobe_mmap(), simplify uprobe_mmap/munmap 1. Kill dup_mmap()->uprobe_mmap(), it was only needed to calculate new_mm->uprobes_state.count removed by the previous patch. If the forking process has a pending uprobe (int3) in vma, it will be copied by copy_page_range(), note that it checks vma->anon_vma so "Don't copy ptes" is not possible after install_breakpoint() which does anon_vma_prepare(). 2. Remove is_swbp_at_addr() and "int count" in uprobe_mmap(). Again, this was needed for uprobes_state.count. As a side effect this fixes the bug pointed out by Srikar, this code lacked the necessary put_uprobe(). 3. uprobe_munmap() becomes a nop after the previous patch. Remove the meaningless code but do not remove the helper, we will need it. Signed-off-by: Oleg Nesterov Acked-by: Srikar Dronamraju --- kernel/events/uprobes.c | 30 +++--------------------------- kernel/fork.c | 3 --- 2 files changed, 3 insertions(+), 30 deletions(-) diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 6f1664d217d..ce59c100d65 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -1010,7 +1010,7 @@ int uprobe_mmap(struct vm_area_struct *vma) struct list_head tmp_list; struct uprobe *uprobe, *u; struct inode *inode; - int ret, count; + int ret; if (!atomic_read(&uprobe_events) || !valid_vma(vma, true)) return 0; @@ -1023,8 +1023,6 @@ int uprobe_mmap(struct vm_area_struct *vma) build_probe_list(inode, vma, vma->vm_start, vma->vm_end, &tmp_list); ret = 0; - count = 0; - list_for_each_entry_safe(uprobe, u, &tmp_list, pending_list) { if (!ret) { unsigned long vaddr = offset_to_vaddr(vma, uprobe->offset); @@ -1034,19 +1032,11 @@ int uprobe_mmap(struct vm_area_struct *vma) * We can race against uprobe_register(), see the * comment near uprobe_hash(). */ - if (ret == -EEXIST) { + if (ret == -EEXIST) ret = 0; - - if (!is_swbp_at_addr(vma->vm_mm, vaddr)) - continue; - } - - if (!ret) - count++; } put_uprobe(uprobe); } - mutex_unlock(uprobes_mmap_hash(inode)); return ret; @@ -1057,27 +1047,13 @@ int uprobe_mmap(struct vm_area_struct *vma) */ void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned long end) { - struct list_head tmp_list; - struct uprobe *uprobe, *u; - struct inode *inode; - if (!atomic_read(&uprobe_events) || !valid_vma(vma, false)) return; if (!atomic_read(&vma->vm_mm->mm_users)) /* called by mmput() ? */ return; - inode = vma->vm_file->f_mapping->host; - if (!inode) - return; - - mutex_lock(uprobes_mmap_hash(inode)); - build_probe_list(inode, vma, start, end, &tmp_list); - - list_for_each_entry_safe(uprobe, u, &tmp_list, pending_list) { - put_uprobe(uprobe); - } - mutex_unlock(uprobes_mmap_hash(inode)); + /* TODO: unmapping uprobe(s) will need more work */ } /* Slot allocation for XOL */ diff --git a/kernel/fork.c b/kernel/fork.c index 2c8857e1285..912b6f6fe5b 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -454,9 +454,6 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) if (retval) goto out; - - if (file) - uprobe_mmap(tmp); } /* a new mm has just been created */ arch_dup_mmap(oldmm, mm); From 5e5be71ab3fd8bd2076606923791ece1634c199c Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 6 Aug 2012 14:49:56 +0200 Subject: [PATCH 125/282] uprobes: Change uprobe_mmap() to ignore the errors but check fatal_signal_pending() Once install_breakpoint() fails uprobe_mmap() "ignores" all other uprobes and returns the error. It was never really needed to to stop after the first error, and in fact it was always wrong at least in -ENOTSUPP case. Change uprobe_mmap() to ignore the errors and always return 0. This is not what we want in the long term, but until we teach the callers to handle the failure it would be better to remove the pointless complications. And this doesn't look too bad, the only "reasonable" error is ENOMEM but in this case the caller should be oom-killed in the likely case or the system has more serious problems. However it makes sense to stop if fatal_signal_pending() == T. In particular this helps if the task was oom-killed. Signed-off-by: Oleg Nesterov Acked-by: Srikar Dronamraju --- kernel/events/uprobes.c | 27 ++++++--------------------- 1 file changed, 6 insertions(+), 21 deletions(-) diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index ce59c100d65..298fbbdf57e 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -994,23 +994,16 @@ static void build_probe_list(struct inode *inode, } /* - * Called from mmap_region. - * called with mm->mmap_sem acquired. + * Called from mmap_region/vma_adjust with mm->mmap_sem acquired. * - * Return -ve no if we fail to insert probes and we cannot - * bail-out. - * Return 0 otherwise. i.e: - * - * - successful insertion of probes - * - (or) no possible probes to be inserted. - * - (or) insertion of probes failed but we can bail-out. + * Currently we ignore all errors and always return 0, the callers + * can't handle the failure anyway. */ int uprobe_mmap(struct vm_area_struct *vma) { struct list_head tmp_list; struct uprobe *uprobe, *u; struct inode *inode; - int ret; if (!atomic_read(&uprobe_events) || !valid_vma(vma, true)) return 0; @@ -1022,24 +1015,16 @@ int uprobe_mmap(struct vm_area_struct *vma) mutex_lock(uprobes_mmap_hash(inode)); build_probe_list(inode, vma, vma->vm_start, vma->vm_end, &tmp_list); - ret = 0; list_for_each_entry_safe(uprobe, u, &tmp_list, pending_list) { - if (!ret) { + if (!fatal_signal_pending(current)) { unsigned long vaddr = offset_to_vaddr(vma, uprobe->offset); - - ret = install_breakpoint(uprobe, vma->vm_mm, vma, vaddr); - /* - * We can race against uprobe_register(), see the - * comment near uprobe_hash(). - */ - if (ret == -EEXIST) - ret = 0; + install_breakpoint(uprobe, vma->vm_mm, vma, vaddr); } put_uprobe(uprobe); } mutex_unlock(uprobes_mmap_hash(inode)); - return ret; + return 0; } /* From 78f7411668aa0b2006d331f6a288416dd91b8e5d Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 8 Aug 2012 17:35:08 +0200 Subject: [PATCH 126/282] uprobes: Do not use -EEXIST in install_breakpoint() paths -EEXIST from install_breakpoint() no longer makes sense, all callers should simply treat it as "success". Change the code to return zero and simplify register_for_each_vma(). Signed-off-by: Oleg Nesterov Acked-by: Srikar Dronamraju --- kernel/events/uprobes.c | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 298fbbdf57e..3e2996b809b 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -332,7 +332,7 @@ int __weak set_swbp(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned */ result = is_swbp_at_addr(mm, vaddr); if (result == 1) - return -EEXIST; + return 0; if (result) return result; @@ -657,7 +657,7 @@ install_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, * Hence behave as if probe already existed. */ if (!uprobe->consumers) - return -EEXIST; + return 0; if (!(uprobe->flags & UPROBE_COPY_INSN)) { ret = copy_insn(uprobe, vma->vm_file); @@ -817,17 +817,11 @@ static int register_for_each_vma(struct uprobe *uprobe, bool is_register) vaddr_to_offset(vma, info->vaddr) != uprobe->offset) goto unlock; - if (is_register) { + if (is_register) err = install_breakpoint(uprobe, mm, vma, info->vaddr); - /* - * We can race against uprobe_mmap(), see the - * comment near uprobe_hash(). - */ - if (err == -EEXIST) - err = 0; - } else { + else remove_breakpoint(uprobe, mm, info->vaddr); - } + unlock: up_write(&mm->mmap_sem); free: From f8ac4ec9c064b330dcc49e03c450fe74298c4622 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 8 Aug 2012 17:11:42 +0200 Subject: [PATCH 127/282] uprobes: Introduce MMF_HAS_UPROBES Add the new MMF_HAS_UPROBES flag. It is set by install_breakpoint() and it is copied by dup_mmap(), uprobe_pre_sstep_notifier() checks it to avoid the slow path if the task was never probed. Perhaps it makes sense to check it in valid_vma(is_register => false) as well. This needs the new dup_mmap()->uprobe_dup_mmap() hook. We can't use uprobe_reset_state() or put MMF_HAS_UPROBES into MMF_INIT_MASK, we need oldmm->mmap_sem to avoid the race with uprobe_register() or mmap() from another thread. Currently we never clear this bit, it can be false-positive after uprobe_unregister() or uprobe_munmap() or if dup_mmap() hits the probed VM_DONTCOPY vma. But this is fine correctness-wise and has no effect unless the task hits the non-uprobe breakpoint. Signed-off-by: Oleg Nesterov Acked-by: Srikar Dronamraju --- include/linux/sched.h | 2 ++ include/linux/uprobes.h | 5 +++++ kernel/events/uprobes.c | 22 +++++++++++++++++++++- kernel/fork.c | 1 + 4 files changed, 29 insertions(+), 1 deletion(-) diff --git a/include/linux/sched.h b/include/linux/sched.h index b8c86648a2f..3667c332e61 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -446,6 +446,8 @@ extern int get_dumpable(struct mm_struct *mm); #define MMF_VM_HUGEPAGE 17 /* set when VM_HUGEPAGE is set on vma */ #define MMF_EXE_FILE_CHANGED 18 /* see prctl_set_mm_exe_file() */ +#define MMF_HAS_UPROBES 19 /* might have uprobes */ + #define MMF_INIT_MASK (MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK) struct sighand_struct { diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h index 03ae547c1c3..4a37ab15324 100644 --- a/include/linux/uprobes.h +++ b/include/linux/uprobes.h @@ -108,6 +108,7 @@ extern int uprobe_register(struct inode *inode, loff_t offset, struct uprobe_con extern void uprobe_unregister(struct inode *inode, loff_t offset, struct uprobe_consumer *uc); extern int uprobe_mmap(struct vm_area_struct *vma); extern void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned long end); +extern void uprobe_dup_mmap(struct mm_struct *oldmm, struct mm_struct *newmm); extern void uprobe_free_utask(struct task_struct *t); extern void uprobe_copy_process(struct task_struct *t); extern unsigned long __weak uprobe_get_swbp_addr(struct pt_regs *regs); @@ -138,6 +139,10 @@ static inline void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned long end) { } +static inline void +uprobe_dup_mmap(struct mm_struct *oldmm, struct mm_struct *newmm) +{ +} static inline void uprobe_notify_resume(struct pt_regs *regs) { } diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 3e2996b809b..33870b17e1d 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -647,6 +647,7 @@ static int install_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, struct vm_area_struct *vma, unsigned long vaddr) { + bool first_uprobe; int ret; /* @@ -678,7 +679,17 @@ install_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, uprobe->flags |= UPROBE_COPY_INSN; } + /* + * set MMF_HAS_UPROBES in advance for uprobe_pre_sstep_notifier(), + * the task can hit this breakpoint right after __replace_page(). + */ + first_uprobe = !test_bit(MMF_HAS_UPROBES, &mm->flags); + if (first_uprobe) + set_bit(MMF_HAS_UPROBES, &mm->flags); + ret = set_swbp(&uprobe->arch, mm, vaddr); + if (ret && first_uprobe) + clear_bit(MMF_HAS_UPROBES, &mm->flags); return ret; } @@ -1032,6 +1043,9 @@ void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned lon if (!atomic_read(&vma->vm_mm->mm_users)) /* called by mmput() ? */ return; + if (!test_bit(MMF_HAS_UPROBES, &vma->vm_mm->flags)) + return; + /* TODO: unmapping uprobe(s) will need more work */ } @@ -1142,6 +1156,12 @@ void uprobe_reset_state(struct mm_struct *mm) mm->uprobes_state.xol_area = NULL; } +void uprobe_dup_mmap(struct mm_struct *oldmm, struct mm_struct *newmm) +{ + if (test_bit(MMF_HAS_UPROBES, &oldmm->flags)) + set_bit(MMF_HAS_UPROBES, &newmm->flags); +} + /* * - search for a free slot. */ @@ -1507,7 +1527,7 @@ int uprobe_pre_sstep_notifier(struct pt_regs *regs) { struct uprobe_task *utask; - if (!current->mm) + if (!current->mm || !test_bit(MMF_HAS_UPROBES, ¤t->mm->flags)) return 0; utask = current->utask; diff --git a/kernel/fork.c b/kernel/fork.c index 912b6f6fe5b..cbb5f9fcd3e 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -353,6 +353,7 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) down_write(&oldmm->mmap_sem); flush_cache_dup_mm(oldmm); + uprobe_dup_mmap(oldmm, mm); /* * Not linked in yet - no deadlock potential: */ From 61559a8165da2b6bab7621ac36379c6280efacb6 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 8 Aug 2012 17:17:46 +0200 Subject: [PATCH 128/282] uprobes: Fold uprobe_reset_state() into uprobe_dup_mmap() Now that we have uprobe_dup_mmap() we can fold uprobe_reset_state() into the new hook and remove it. mmput()->uprobe_clear_state() can't be called before dup_mmap(). Signed-off-by: Oleg Nesterov Acked-by: Srikar Dronamraju --- include/linux/uprobes.h | 4 ---- kernel/events/uprobes.c | 10 ++-------- kernel/fork.c | 2 -- 3 files changed, 2 insertions(+), 14 deletions(-) diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h index 4a37ab15324..30297f95c8a 100644 --- a/include/linux/uprobes.h +++ b/include/linux/uprobes.h @@ -118,7 +118,6 @@ extern void uprobe_notify_resume(struct pt_regs *regs); extern bool uprobe_deny_signal(void); extern bool __weak arch_uprobe_skip_sstep(struct arch_uprobe *aup, struct pt_regs *regs); extern void uprobe_clear_state(struct mm_struct *mm); -extern void uprobe_reset_state(struct mm_struct *mm); #else /* !CONFIG_UPROBES */ struct uprobes_state { }; @@ -163,8 +162,5 @@ static inline void uprobe_copy_process(struct task_struct *t) static inline void uprobe_clear_state(struct mm_struct *mm) { } -static inline void uprobe_reset_state(struct mm_struct *mm) -{ -} #endif /* !CONFIG_UPROBES */ #endif /* _LINUX_UPROBES_H */ diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 33870b17e1d..610e1c8050c 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -1148,16 +1148,10 @@ void uprobe_clear_state(struct mm_struct *mm) kfree(area); } -/* - * uprobe_reset_state - Free the area allocated for slots. - */ -void uprobe_reset_state(struct mm_struct *mm) -{ - mm->uprobes_state.xol_area = NULL; -} - void uprobe_dup_mmap(struct mm_struct *oldmm, struct mm_struct *newmm) { + newmm->uprobes_state.xol_area = NULL; + if (test_bit(MMF_HAS_UPROBES, &oldmm->flags)) set_bit(MMF_HAS_UPROBES, &newmm->flags); } diff --git a/kernel/fork.c b/kernel/fork.c index cbb5f9fcd3e..2343c9eaaaf 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -837,8 +837,6 @@ struct mm_struct *dup_mm(struct task_struct *tsk) #ifdef CONFIG_TRANSPARENT_HUGEPAGE mm->pmd_huge_pte = NULL; #endif - uprobe_reset_state(mm); - if (!mm_init(mm, tsk)) goto fail_nomem; From ded86e7c8fc4404414c4700010c9962ea8bd083a Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 8 Aug 2012 18:07:03 +0200 Subject: [PATCH 129/282] uprobes: Remove "verify" argument from set_orig_insn() Nobody does set_orig_insn(verify => false), and I think nobody will. Remove this argument. IIUC set_orig_insn(verify => false) was needed to single-step without xol area. Signed-off-by: Oleg Nesterov Acked-by: Srikar Dronamraju --- include/linux/uprobes.h | 2 +- kernel/events/uprobes.c | 20 +++++++++----------- 2 files changed, 10 insertions(+), 12 deletions(-) diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h index 30297f95c8a..6d4fe79a1a6 100644 --- a/include/linux/uprobes.h +++ b/include/linux/uprobes.h @@ -102,7 +102,7 @@ struct uprobes_state { }; extern int __weak set_swbp(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long vaddr); -extern int __weak set_orig_insn(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long vaddr, bool verify); +extern int __weak set_orig_insn(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long vaddr); extern bool __weak is_swbp_insn(uprobe_opcode_t *insn); extern int uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer *uc); extern void uprobe_unregister(struct inode *inode, loff_t offset, struct uprobe_consumer *uc); diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 610e1c8050c..1666632e6ed 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -345,24 +345,22 @@ int __weak set_swbp(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned * @mm: the probed process address space. * @auprobe: arch specific probepoint information. * @vaddr: the virtual address to insert the opcode. - * @verify: if true, verify existance of breakpoint instruction. * * For mm @mm, restore the original opcode (opcode) at @vaddr. * Return 0 (success) or a negative errno. */ int __weak -set_orig_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long vaddr, bool verify) +set_orig_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long vaddr) { - if (verify) { - int result; + int result; - result = is_swbp_at_addr(mm, vaddr); - if (!result) - return -EINVAL; + result = is_swbp_at_addr(mm, vaddr); + if (!result) + return -EINVAL; + + if (result != 1) + return result; - if (result != 1) - return result; - } return write_opcode(auprobe, mm, vaddr, *(uprobe_opcode_t *)auprobe->insn); } @@ -697,7 +695,7 @@ install_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, static void remove_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, unsigned long vaddr) { - set_orig_insn(&uprobe->arch, mm, vaddr, true); + set_orig_insn(&uprobe->arch, mm, vaddr); } /* From ff1a70e75fd005821ab5f2211312a8aa13bbf959 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 23 Aug 2012 11:22:01 -0400 Subject: [PATCH 130/282] tools lib traceevent: Modify header to work in C++ programs Replace keyword "private" to "priv" in event-parse.h to allow it to be used in C++ programs. Signed-off-by: Steven Rostedt Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1345735321.5069.62.camel@gandalf.local.home Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/traceevent/event-parse.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/lib/traceevent/event-parse.h b/tools/lib/traceevent/event-parse.h index 527df038a25..863a0bbda7f 100644 --- a/tools/lib/traceevent/event-parse.h +++ b/tools/lib/traceevent/event-parse.h @@ -49,7 +49,7 @@ struct pevent_record { int cpu; int ref_count; int locked; /* Do not free, even if ref_count is zero */ - void *private; + void *priv; #if DEBUG_RECORD struct pevent_record *prev; struct pevent_record *next; @@ -106,7 +106,7 @@ struct plugin_option { char *plugin_alias; char *description; char *value; - void *private; + void *priv; int set; }; From d25380cd3be38baff4ab31935b9d19b7f58ba7ac Mon Sep 17 00:00:00 2001 From: David Ahern Date: Sun, 26 Aug 2012 12:24:41 -0600 Subject: [PATCH 131/282] perf session: flush_sample_queue needs to handle errors from handlers Allows errors to propogate through event processing code and back to commands. Signed-off-by: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1346005487-62961-2-git-send-email-dsahern@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/session.c | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index f7bb7ae328d..945375897c2 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -692,7 +692,7 @@ static int perf_session_deliver_event(struct perf_session *session, struct perf_tool *tool, u64 file_offset); -static void flush_sample_queue(struct perf_session *s, +static int flush_sample_queue(struct perf_session *s, struct perf_tool *tool) { struct ordered_samples *os = &s->ordered_samples; @@ -705,7 +705,7 @@ static void flush_sample_queue(struct perf_session *s, int ret; if (!tool->ordered_samples || !limit) - return; + return 0; list_for_each_entry_safe(iter, tmp, head, list) { if (iter->timestamp > limit) @@ -715,9 +715,12 @@ static void flush_sample_queue(struct perf_session *s, s->header.needs_swap); if (ret) pr_err("Can't parse sample, err = %d\n", ret); - else - perf_session_deliver_event(s, iter->event, &sample, tool, - iter->file_offset); + else { + ret = perf_session_deliver_event(s, iter->event, &sample, tool, + iter->file_offset); + if (ret) + return ret; + } os->last_flush = iter->timestamp; list_del(&iter->list); @@ -737,6 +740,8 @@ static void flush_sample_queue(struct perf_session *s, } os->nr_samples = 0; + + return 0; } /* @@ -782,10 +787,11 @@ static int process_finished_round(struct perf_tool *tool, union perf_event *event __used, struct perf_session *session) { - flush_sample_queue(session, tool); - session->ordered_samples.next_flush = session->ordered_samples.max_timestamp; + int ret = flush_sample_queue(session, tool); + if (!ret) + session->ordered_samples.next_flush = session->ordered_samples.max_timestamp; - return 0; + return ret; } /* The queue is ordered by time */ @@ -1443,7 +1449,7 @@ more: err = 0; /* do the final flush for ordered samples */ session->ordered_samples.next_flush = ULLONG_MAX; - flush_sample_queue(session, tool); + err = flush_sample_queue(session, tool); out_err: perf_session__warn_about_errors(session, tool); perf_session_free_sample_buffers(session); From 1e6d53223884225f0c3f9f1a3ac54a224d97ab24 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Sun, 26 Aug 2012 12:24:42 -0600 Subject: [PATCH 132/282] perf tool: handle errors in synthesized event functions Handle error from process callback and propagate back to caller. Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1346005487-62961-3-git-send-email-dsahern@gmail.com Signed-off-by: David Ahern Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/event.c | 35 ++++++++++++++++++++++++++--------- 1 file changed, 26 insertions(+), 9 deletions(-) diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index 3a0f1a5da91..84ff6f160cd 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -120,7 +120,9 @@ static pid_t perf_event__synthesize_comm(struct perf_tool *tool, if (!full) { event->comm.tid = pid; - process(tool, event, &synth_sample, machine); + if (process(tool, event, &synth_sample, machine) != 0) + return -1; + goto out; } @@ -151,7 +153,10 @@ static pid_t perf_event__synthesize_comm(struct perf_tool *tool, event->comm.tid = pid; - process(tool, event, &synth_sample, machine); + if (process(tool, event, &synth_sample, machine) != 0) { + tgid = -1; + break; + } } closedir(tasks); @@ -167,6 +172,7 @@ static int perf_event__synthesize_mmap_events(struct perf_tool *tool, { char filename[PATH_MAX]; FILE *fp; + int rc = 0; snprintf(filename, sizeof(filename), "/proc/%d/maps", pid); @@ -231,18 +237,22 @@ static int perf_event__synthesize_mmap_events(struct perf_tool *tool, event->mmap.pid = tgid; event->mmap.tid = pid; - process(tool, event, &synth_sample, machine); + if (process(tool, event, &synth_sample, machine) != 0) { + rc = -1; + break; + } } } fclose(fp); - return 0; + return rc; } int perf_event__synthesize_modules(struct perf_tool *tool, perf_event__handler_t process, struct machine *machine) { + int rc = 0; struct rb_node *nd; struct map_groups *kmaps = &machine->kmaps; union perf_event *event = zalloc((sizeof(event->mmap) + @@ -284,11 +294,14 @@ int perf_event__synthesize_modules(struct perf_tool *tool, memcpy(event->mmap.filename, pos->dso->long_name, pos->dso->long_name_len + 1); - process(tool, event, &synth_sample, machine); + if (process(tool, event, &synth_sample, machine) != 0) { + rc = -1; + break; + } } free(event); - return 0; + return rc; } static int __event__synthesize_thread(union perf_event *comm_event, @@ -392,12 +405,16 @@ int perf_event__synthesize_threads(struct perf_tool *tool, if (*end) /* only interested in proper numerical dirents */ continue; - __event__synthesize_thread(comm_event, mmap_event, pid, 1, - process, tool, machine); + if (__event__synthesize_thread(comm_event, mmap_event, pid, 1, + process, tool, machine) != 0) { + err = -1; + goto out_closedir; + } } - closedir(proc); err = 0; +out_closedir: + closedir(proc); out_free_mmap: free(mmap_event); out_free_comm: From 33d6aef5136075930f7e9a05175bf4f772d8428e Mon Sep 17 00:00:00 2001 From: David Ahern Date: Sun, 26 Aug 2012 12:24:43 -0600 Subject: [PATCH 133/282] perf lock: Remove use of die and handle errors Allows perf to clean up properly on exit. Signed-off-by: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1346005487-62961-4-git-send-email-dsahern@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-lock.c | 181 ++++++++++++++++++++++++++------------ 1 file changed, 124 insertions(+), 57 deletions(-) diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c index 585aae2858b..75153c87e65 100644 --- a/tools/perf/builtin-lock.c +++ b/tools/perf/builtin-lock.c @@ -161,8 +161,10 @@ static struct thread_stat *thread_stat_findnew_after_first(u32 tid) return st; st = zalloc(sizeof(struct thread_stat)); - if (!st) - die("memory allocation failed\n"); + if (!st) { + pr_err("memory allocation failed\n"); + return NULL; + } st->tid = tid; INIT_LIST_HEAD(&st->seq_list); @@ -181,8 +183,10 @@ static struct thread_stat *thread_stat_findnew_first(u32 tid) struct thread_stat *st; st = zalloc(sizeof(struct thread_stat)); - if (!st) - die("memory allocation failed\n"); + if (!st) { + pr_err("memory allocation failed\n"); + return NULL; + } st->tid = tid; INIT_LIST_HEAD(&st->seq_list); @@ -248,18 +252,20 @@ struct lock_key keys[] = { { NULL, NULL } }; -static void select_key(void) +static int select_key(void) { int i; for (i = 0; keys[i].name; i++) { if (!strcmp(keys[i].name, sort_key)) { compare = keys[i].key; - return; + return 0; } } - die("Unknown compare key:%s\n", sort_key); + pr_err("Unknown compare key: %s\n", sort_key); + + return -1; } static void insert_to_result(struct lock_stat *st, @@ -324,7 +330,8 @@ static struct lock_stat *lock_stat_findnew(void *addr, const char *name) return new; alloc_failed: - die("memory allocation failed\n"); + pr_err("memory allocation failed\n"); + return NULL; } static const char *input_name; @@ -356,16 +363,16 @@ struct trace_release_event { }; struct trace_lock_handler { - void (*acquire_event)(struct trace_acquire_event *, + int (*acquire_event)(struct trace_acquire_event *, const struct perf_sample *sample); - void (*acquired_event)(struct trace_acquired_event *, + int (*acquired_event)(struct trace_acquired_event *, const struct perf_sample *sample); - void (*contended_event)(struct trace_contended_event *, + int (*contended_event)(struct trace_contended_event *, const struct perf_sample *sample); - void (*release_event)(struct trace_release_event *, + int (*release_event)(struct trace_release_event *, const struct perf_sample *sample); }; @@ -379,8 +386,10 @@ static struct lock_seq_stat *get_seq(struct thread_stat *ts, void *addr) } seq = zalloc(sizeof(struct lock_seq_stat)); - if (!seq) - die("Not enough memory\n"); + if (!seq) { + pr_err("memory allocation failed\n"); + return NULL; + } seq->state = SEQ_STATE_UNINITIALIZED; seq->addr = addr; @@ -403,7 +412,7 @@ enum acquire_flags { READ_LOCK = 2, }; -static void +static int report_lock_acquire_event(struct trace_acquire_event *acquire_event, const struct perf_sample *sample) { @@ -412,11 +421,18 @@ report_lock_acquire_event(struct trace_acquire_event *acquire_event, struct lock_seq_stat *seq; ls = lock_stat_findnew(acquire_event->addr, acquire_event->name); + if (!ls) + return -1; if (ls->discard) - return; + return 0; ts = thread_stat_findnew(sample->tid); + if (!ts) + return -1; + seq = get_seq(ts, acquire_event->addr); + if (!seq) + return -1; switch (seq->state) { case SEQ_STATE_UNINITIALIZED: @@ -461,10 +477,10 @@ broken: ls->nr_acquire++; seq->prev_event_time = sample->time; end: - return; + return 0; } -static void +static int report_lock_acquired_event(struct trace_acquired_event *acquired_event, const struct perf_sample *sample) { @@ -475,16 +491,23 @@ report_lock_acquired_event(struct trace_acquired_event *acquired_event, u64 contended_term; ls = lock_stat_findnew(acquired_event->addr, acquired_event->name); + if (!ls) + return -1; if (ls->discard) - return; + return 0; ts = thread_stat_findnew(sample->tid); + if (!ts) + return -1; + seq = get_seq(ts, acquired_event->addr); + if (!seq) + return -1; switch (seq->state) { case SEQ_STATE_UNINITIALIZED: /* orphan event, do nothing */ - return; + return 0; case SEQ_STATE_ACQUIRING: break; case SEQ_STATE_CONTENDED: @@ -515,10 +538,10 @@ report_lock_acquired_event(struct trace_acquired_event *acquired_event, ls->nr_acquired++; seq->prev_event_time = timestamp; end: - return; + return 0; } -static void +static int report_lock_contended_event(struct trace_contended_event *contended_event, const struct perf_sample *sample) { @@ -527,16 +550,23 @@ report_lock_contended_event(struct trace_contended_event *contended_event, struct lock_seq_stat *seq; ls = lock_stat_findnew(contended_event->addr, contended_event->name); + if (!ls) + return -1; if (ls->discard) - return; + return 0; ts = thread_stat_findnew(sample->tid); + if (!ts) + return -1; + seq = get_seq(ts, contended_event->addr); + if (!seq) + return -1; switch (seq->state) { case SEQ_STATE_UNINITIALIZED: /* orphan event, do nothing */ - return; + return 0; case SEQ_STATE_ACQUIRING: break; case SEQ_STATE_RELEASED: @@ -559,10 +589,10 @@ report_lock_contended_event(struct trace_contended_event *contended_event, ls->nr_contended++; seq->prev_event_time = sample->time; end: - return; + return 0; } -static void +static int report_lock_release_event(struct trace_release_event *release_event, const struct perf_sample *sample) { @@ -571,11 +601,18 @@ report_lock_release_event(struct trace_release_event *release_event, struct lock_seq_stat *seq; ls = lock_stat_findnew(release_event->addr, release_event->name); + if (!ls) + return -1; if (ls->discard) - return; + return 0; ts = thread_stat_findnew(sample->tid); + if (!ts) + return -1; + seq = get_seq(ts, release_event->addr); + if (!seq) + return -1; switch (seq->state) { case SEQ_STATE_UNINITIALIZED: @@ -609,7 +646,7 @@ free_seq: list_del(&seq->list); free(seq); end: - return; + return 0; } /* lock oriented handlers */ @@ -623,13 +660,14 @@ static struct trace_lock_handler report_lock_ops = { static struct trace_lock_handler *trace_handler; -static void perf_evsel__process_lock_acquire(struct perf_evsel *evsel, +static int perf_evsel__process_lock_acquire(struct perf_evsel *evsel, struct perf_sample *sample) { struct trace_acquire_event acquire_event; struct event_format *event = evsel->tp_format; void *data = sample->raw_data; u64 tmp; /* this is required for casting... */ + int rc = 0; tmp = raw_field_value(event, "lockdep_addr", data); memcpy(&acquire_event.addr, &tmp, sizeof(void *)); @@ -637,70 +675,84 @@ static void perf_evsel__process_lock_acquire(struct perf_evsel *evsel, acquire_event.flag = (int)raw_field_value(event, "flag", data); if (trace_handler->acquire_event) - trace_handler->acquire_event(&acquire_event, sample); + rc = trace_handler->acquire_event(&acquire_event, sample); + + return rc; } -static void perf_evsel__process_lock_acquired(struct perf_evsel *evsel, +static int perf_evsel__process_lock_acquired(struct perf_evsel *evsel, struct perf_sample *sample) { struct trace_acquired_event acquired_event; struct event_format *event = evsel->tp_format; void *data = sample->raw_data; u64 tmp; /* this is required for casting... */ + int rc = 0; tmp = raw_field_value(event, "lockdep_addr", data); memcpy(&acquired_event.addr, &tmp, sizeof(void *)); acquired_event.name = (char *)raw_field_ptr(event, "name", data); - if (trace_handler->acquire_event) - trace_handler->acquired_event(&acquired_event, sample); + if (trace_handler->acquired_event) + rc = trace_handler->acquired_event(&acquired_event, sample); + + return rc; } -static void perf_evsel__process_lock_contended(struct perf_evsel *evsel, +static int perf_evsel__process_lock_contended(struct perf_evsel *evsel, struct perf_sample *sample) { struct trace_contended_event contended_event; struct event_format *event = evsel->tp_format; void *data = sample->raw_data; u64 tmp; /* this is required for casting... */ + int rc = 0; tmp = raw_field_value(event, "lockdep_addr", data); memcpy(&contended_event.addr, &tmp, sizeof(void *)); contended_event.name = (char *)raw_field_ptr(event, "name", data); - if (trace_handler->acquire_event) - trace_handler->contended_event(&contended_event, sample); + if (trace_handler->contended_event) + rc = trace_handler->contended_event(&contended_event, sample); + + return rc; } -static void perf_evsel__process_lock_release(struct perf_evsel *evsel, +static int perf_evsel__process_lock_release(struct perf_evsel *evsel, struct perf_sample *sample) { struct trace_release_event release_event; struct event_format *event = evsel->tp_format; void *data = sample->raw_data; u64 tmp; /* this is required for casting... */ + int rc = 0; tmp = raw_field_value(event, "lockdep_addr", data); memcpy(&release_event.addr, &tmp, sizeof(void *)); release_event.name = (char *)raw_field_ptr(event, "name", data); - if (trace_handler->acquire_event) - trace_handler->release_event(&release_event, sample); + if (trace_handler->release_event) + rc = trace_handler->release_event(&release_event, sample); + + return rc; } -static void perf_evsel__process_lock_event(struct perf_evsel *evsel, +static int perf_evsel__process_lock_event(struct perf_evsel *evsel, struct perf_sample *sample) { struct event_format *event = evsel->tp_format; + int rc = 0; if (!strcmp(event->name, "lock_acquire")) - perf_evsel__process_lock_acquire(evsel, sample); + rc = perf_evsel__process_lock_acquire(evsel, sample); if (!strcmp(event->name, "lock_acquired")) - perf_evsel__process_lock_acquired(evsel, sample); + rc = perf_evsel__process_lock_acquired(evsel, sample); if (!strcmp(event->name, "lock_contended")) - perf_evsel__process_lock_contended(evsel, sample); + rc = perf_evsel__process_lock_contended(evsel, sample); if (!strcmp(event->name, "lock_release")) - perf_evsel__process_lock_release(evsel, sample); + rc = perf_evsel__process_lock_release(evsel, sample); + + return rc; } static void print_bad_events(int bad, int total) @@ -802,14 +854,20 @@ static void dump_map(void) } } -static void dump_info(void) +static int dump_info(void) { + int rc = 0; + if (info_threads) dump_threads(); else if (info_map) dump_map(); - else - die("Unknown type of information\n"); + else { + rc = -1; + pr_err("Unknown type of information\n"); + } + + return rc; } static int process_sample_event(struct perf_tool *tool __used, @@ -826,8 +884,7 @@ static int process_sample_event(struct perf_tool *tool __used, return -1; } - perf_evsel__process_lock_event(evsel, sample); - return 0; + return perf_evsel__process_lock_event(evsel, sample); } static struct perf_tool eops = { @@ -839,8 +896,10 @@ static struct perf_tool eops = { static int read_events(void) { session = perf_session__new(input_name, O_RDONLY, 0, false, &eops); - if (!session) - die("Initializing perf session failed\n"); + if (!session) { + pr_err("Initializing perf session failed\n"); + return -1; + } return perf_session__process_events(session, &eops); } @@ -857,13 +916,18 @@ static void sort_result(void) } } -static void __cmd_report(void) +static int __cmd_report(void) { setup_pager(); - select_key(); - read_events(); + + if ((select_key() != 0) || + (read_events() != 0)) + return -1; + sort_result(); print_result(); + + return 0; } static const char * const report_usage[] = { @@ -959,6 +1023,7 @@ static int __cmd_record(int argc, const char **argv) int cmd_lock(int argc, const char **argv, const char *prefix __used) { unsigned int i; + int rc = 0; symbol__init(); for (i = 0; i < LOCKHASH_SIZE; i++) @@ -993,11 +1058,13 @@ int cmd_lock(int argc, const char **argv, const char *prefix __used) /* recycling report_lock_ops */ trace_handler = &report_lock_ops; setup_pager(); - read_events(); - dump_info(); + if (read_events() != 0) + rc = -1; + else + rc = dump_info(); } else { usage_with_options(lock_usage, lock_options); } - return 0; + return rc; } From fceda7feb4a7822feee9662bc64968230d8f37bf Mon Sep 17 00:00:00 2001 From: David Ahern Date: Sun, 26 Aug 2012 12:24:44 -0600 Subject: [PATCH 134/282] perf stat: Remove use of die/exit and handle errors Allows perf to clean up properly on program termination. Signed-off-by: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1346005487-62961-5-git-send-email-dsahern@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index d53d8ab099b..02f49eba677 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -428,7 +428,7 @@ static int run_perf_stat(int argc __used, const char **argv) if (forks && (pipe(child_ready_pipe) < 0 || pipe(go_pipe) < 0)) { perror("failed to create pipes"); - exit(1); + return -1; } if (forks) { @@ -510,7 +510,8 @@ static int run_perf_stat(int argc __used, const char **argv) } if (child_pid != -1) kill(child_pid, SIGTERM); - die("Not all events could be opened.\n"); + + pr_err("Not all events could be opened.\n"); return -1; } counter->supported = true; @@ -1189,7 +1190,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix __used) output = fopen(output_name, mode); if (!output) { perror("failed to create output file"); - exit(-1); + return -1; } clock_gettime(CLOCK_REALTIME, &tm); fprintf(output, "# started on %s\n", ctime(&tm.tv_sec)); From cc58482133296f52873be909a2795f6d934ecec9 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Sun, 26 Aug 2012 12:24:45 -0600 Subject: [PATCH 135/282] perf help: Remove use of die and handle errors Allows perf to clean up properly on exit. Signed-off-by: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1346005487-62961-6-git-send-email-dsahern@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-help.c | 48 +++++++++++++++++++++++++++------------ 1 file changed, 34 insertions(+), 14 deletions(-) diff --git a/tools/perf/builtin-help.c b/tools/perf/builtin-help.c index 6d5a8a7faf4..f9daae5ac47 100644 --- a/tools/perf/builtin-help.c +++ b/tools/perf/builtin-help.c @@ -24,13 +24,14 @@ static struct man_viewer_info_list { } *man_viewer_info_list; enum help_format { + HELP_FORMAT_NONE, HELP_FORMAT_MAN, HELP_FORMAT_INFO, HELP_FORMAT_WEB, }; static bool show_all = false; -static enum help_format help_format = HELP_FORMAT_MAN; +static enum help_format help_format = HELP_FORMAT_NONE; static struct option builtin_help_options[] = { OPT_BOOLEAN('a', "all", &show_all, "print all available commands"), OPT_SET_UINT('m', "man", &help_format, "show man page", HELP_FORMAT_MAN), @@ -54,7 +55,9 @@ static enum help_format parse_help_format(const char *format) return HELP_FORMAT_INFO; if (!strcmp(format, "web") || !strcmp(format, "html")) return HELP_FORMAT_WEB; - die("unrecognized help format '%s'", format); + + pr_err("unrecognized help format '%s'", format); + return HELP_FORMAT_NONE; } static const char *get_man_viewer_info(const char *name) @@ -259,6 +262,8 @@ static int perf_help_config(const char *var, const char *value, void *cb) if (!value) return config_error_nonbool(var); help_format = parse_help_format(value); + if (help_format == HELP_FORMAT_NONE) + return -1; return 0; } if (!strcmp(var, "man.viewer")) { @@ -352,7 +357,7 @@ static void exec_viewer(const char *name, const char *page) warning("'%s': unknown man viewer.", name); } -static void show_man_page(const char *perf_cmd) +static int show_man_page(const char *perf_cmd) { struct man_viewer_list *viewer; const char *page = cmd_to_page(perf_cmd); @@ -365,28 +370,35 @@ static void show_man_page(const char *perf_cmd) if (fallback) exec_viewer(fallback, page); exec_viewer("man", page); - die("no man viewer handled the request"); + + pr_err("no man viewer handled the request"); + return -1; } -static void show_info_page(const char *perf_cmd) +static int show_info_page(const char *perf_cmd) { const char *page = cmd_to_page(perf_cmd); setenv("INFOPATH", system_path(PERF_INFO_PATH), 1); execlp("info", "info", "perfman", page, NULL); + return -1; } -static void get_html_page_path(struct strbuf *page_path, const char *page) +static int get_html_page_path(struct strbuf *page_path, const char *page) { struct stat st; const char *html_path = system_path(PERF_HTML_PATH); /* Check that we have a perf documentation directory. */ if (stat(mkpath("%s/perf.html", html_path), &st) - || !S_ISREG(st.st_mode)) - die("'%s': not a documentation directory.", html_path); + || !S_ISREG(st.st_mode)) { + pr_err("'%s': not a documentation directory.", html_path); + return -1; + } strbuf_init(page_path, 0); strbuf_addf(page_path, "%s/%s.html", html_path, page); + + return 0; } /* @@ -401,19 +413,23 @@ static void open_html(const char *path) } #endif -static void show_html_page(const char *perf_cmd) +static int show_html_page(const char *perf_cmd) { const char *page = cmd_to_page(perf_cmd); struct strbuf page_path; /* it leaks but we exec bellow */ - get_html_page_path(&page_path, page); + if (get_html_page_path(&page_path, page) != 0) + return -1; open_html(page_path.buf); + + return 0; } int cmd_help(int argc, const char **argv, const char *prefix __used) { const char *alias; + int rc = 0; load_command_list("perf-", &main_cmds, &other_cmds); @@ -444,16 +460,20 @@ int cmd_help(int argc, const char **argv, const char *prefix __used) switch (help_format) { case HELP_FORMAT_MAN: - show_man_page(argv[0]); + rc = show_man_page(argv[0]); break; case HELP_FORMAT_INFO: - show_info_page(argv[0]); + rc = show_info_page(argv[0]); break; case HELP_FORMAT_WEB: - show_html_page(argv[0]); + rc = show_html_page(argv[0]); + break; + case HELP_FORMAT_NONE: + /* fall-through */ default: + rc = -1; break; } - return 0; + return rc; } From d54b1a9e0eaca92cde678d19bd82b9594ed00450 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Sun, 26 Aug 2012 12:24:46 -0600 Subject: [PATCH 136/282] perf script: Remove use of die/exit Allows perf to clean up properly on exit. Only exits left are exec failures which are appropriate and usage callbacks that list available options. Signed-off-by: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1346005487-62961-7-git-send-email-dsahern@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-script.c | 60 +++++++++++++++++++++++++------------ 1 file changed, 41 insertions(+), 19 deletions(-) diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 2d6e3b226aa..c350cfee315 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -1153,18 +1153,23 @@ static const struct option options[] = { OPT_END() }; -static bool have_cmd(int argc, const char **argv) +static int have_cmd(int argc, const char **argv) { char **__argv = malloc(sizeof(const char *) * argc); - if (!__argv) - die("malloc"); + if (!__argv) { + pr_err("malloc failed\n"); + return -1; + } + memcpy(__argv, argv, sizeof(const char *) * argc); argc = parse_options(argc, (const char **)__argv, record_options, NULL, PARSE_OPT_STOP_AT_NON_OPTION); free(__argv); - return argc != 0; + system_wide = (argc == 0); + + return 0; } int cmd_script(int argc, const char **argv, const char *prefix __used) @@ -1231,13 +1236,13 @@ int cmd_script(int argc, const char **argv, const char *prefix __used) if (pipe(live_pipe) < 0) { perror("failed to create pipe"); - exit(-1); + return -1; } pid = fork(); if (pid < 0) { perror("failed to fork"); - exit(-1); + return -1; } if (!pid) { @@ -1249,13 +1254,18 @@ int cmd_script(int argc, const char **argv, const char *prefix __used) if (is_top_script(argv[0])) { system_wide = true; } else if (!system_wide) { - system_wide = !have_cmd(argc - rep_args, - &argv[rep_args]); + if (have_cmd(argc - rep_args, &argv[rep_args]) != 0) { + err = -1; + goto out; + } } __argv = malloc((argc + 6) * sizeof(const char *)); - if (!__argv) - die("malloc"); + if (!__argv) { + pr_err("malloc failed\n"); + err = -ENOMEM; + goto out; + } __argv[j++] = "/bin/sh"; __argv[j++] = rec_script_path; @@ -1277,8 +1287,12 @@ int cmd_script(int argc, const char **argv, const char *prefix __used) close(live_pipe[1]); __argv = malloc((argc + 4) * sizeof(const char *)); - if (!__argv) - die("malloc"); + if (!__argv) { + pr_err("malloc failed\n"); + err = -ENOMEM; + goto out; + } + j = 0; __argv[j++] = "/bin/sh"; __argv[j++] = rep_script_path; @@ -1303,12 +1317,20 @@ int cmd_script(int argc, const char **argv, const char *prefix __used) if (!rec_script_path) system_wide = false; - else if (!system_wide) - system_wide = !have_cmd(argc - 1, &argv[1]); + else if (!system_wide) { + if (have_cmd(argc - 1, &argv[1]) != 0) { + err = -1; + goto out; + } + } __argv = malloc((argc + 2) * sizeof(const char *)); - if (!__argv) - die("malloc"); + if (!__argv) { + pr_err("malloc failed\n"); + err = -ENOMEM; + goto out; + } + __argv[j++] = "/bin/sh"; __argv[j++] = script_path; if (system_wide) @@ -1357,18 +1379,18 @@ int cmd_script(int argc, const char **argv, const char *prefix __used) input = open(session->filename, O_RDONLY); /* input_name */ if (input < 0) { perror("failed to open file"); - exit(-1); + return -1; } err = fstat(input, &perf_stat); if (err < 0) { perror("failed to stat file"); - exit(-1); + return -1; } if (!perf_stat.st_size) { fprintf(stderr, "zero-sized file, nothing to do!\n"); - exit(0); + return 0; } scripting_ops = script_spec__lookup(generate_script_lang); From 8d3eca20b9f31cf10088e283d704f6a71b9a4ee2 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Sun, 26 Aug 2012 12:24:47 -0600 Subject: [PATCH 137/282] perf record: Remove use of die/exit Allows perf to clean up properly on exit. If perf-record is exiting due to failure, the on_exit should not run as the session has been deleted. Signed-off-by: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1346005487-62961-8-git-send-email-dsahern@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-record.c | 158 +++++++++++++++++++++++++----------- 1 file changed, 109 insertions(+), 49 deletions(-) diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 479ff2a038f..7b8b891d4d5 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -71,19 +71,23 @@ static void advance_output(struct perf_record *rec, size_t size) rec->bytes_written += size; } -static void write_output(struct perf_record *rec, void *buf, size_t size) +static int write_output(struct perf_record *rec, void *buf, size_t size) { while (size) { int ret = write(rec->output, buf, size); - if (ret < 0) - die("failed to write"); + if (ret < 0) { + pr_err("failed to write\n"); + return -1; + } size -= ret; buf += ret; rec->bytes_written += ret; } + + return 0; } static int process_synthesized_event(struct perf_tool *tool, @@ -92,11 +96,13 @@ static int process_synthesized_event(struct perf_tool *tool, struct machine *machine __used) { struct perf_record *rec = container_of(tool, struct perf_record, tool); - write_output(rec, event, event->header.size); + if (write_output(rec, event, event->header.size) < 0) + return -1; + return 0; } -static void perf_record__mmap_read(struct perf_record *rec, +static int perf_record__mmap_read(struct perf_record *rec, struct perf_mmap *md) { unsigned int head = perf_mmap__read_head(md); @@ -104,9 +110,10 @@ static void perf_record__mmap_read(struct perf_record *rec, unsigned char *data = md->base + rec->page_size; unsigned long size; void *buf; + int rc = 0; if (old == head) - return; + return 0; rec->samples++; @@ -117,17 +124,26 @@ static void perf_record__mmap_read(struct perf_record *rec, size = md->mask + 1 - (old & md->mask); old += size; - write_output(rec, buf, size); + if (write_output(rec, buf, size) < 0) { + rc = -1; + goto out; + } } buf = &data[old & md->mask]; size = head - old; old += size; - write_output(rec, buf, size); + if (write_output(rec, buf, size) < 0) { + rc = -1; + goto out; + } md->prev = old; perf_mmap__write_tail(md, old); + +out: + return rc; } static volatile int done = 0; @@ -183,12 +199,13 @@ static bool perf_evlist__equal(struct perf_evlist *evlist, return true; } -static void perf_record__open(struct perf_record *rec) +static int perf_record__open(struct perf_record *rec) { struct perf_evsel *pos; struct perf_evlist *evlist = rec->evlist; struct perf_session *session = rec->session; struct perf_record_opts *opts = &rec->opts; + int rc = 0; perf_evlist__config_attrs(evlist, opts); @@ -222,10 +239,13 @@ try_again: if (err == EPERM || err == EACCES) { ui__error_paranoid(); - exit(EXIT_FAILURE); + rc = -err; + goto out; } else if (err == ENODEV && opts->target.cpu_list) { - die("No such device - did you specify" - " an out-of-range profile CPU?\n"); + pr_err("No such device - did you specify" + " an out-of-range profile CPU?\n"); + rc = -err; + goto out; } else if (err == EINVAL) { if (!opts->exclude_guest_missing && (attr->exclude_guest || attr->exclude_host)) { @@ -272,7 +292,8 @@ try_again: if (err == ENOENT) { ui__error("The %s event is not supported.\n", perf_evsel__name(pos)); - exit(EXIT_FAILURE); + rc = -err; + goto out; } printf("\n"); @@ -280,34 +301,46 @@ try_again: err, strerror(err)); #if defined(__i386__) || defined(__x86_64__) - if (attr->type == PERF_TYPE_HARDWARE && err == EOPNOTSUPP) - die("No hardware sampling interrupt available." - " No APIC? If so then you can boot the kernel" - " with the \"lapic\" boot parameter to" - " force-enable it.\n"); + if (attr->type == PERF_TYPE_HARDWARE && + err == EOPNOTSUPP) { + pr_err("No hardware sampling interrupt available." + " No APIC? If so then you can boot the kernel" + " with the \"lapic\" boot parameter to" + " force-enable it.\n"); + rc = -err; + goto out; + } #endif - die("No CONFIG_PERF_EVENTS=y kernel support configured?\n"); + pr_err("No CONFIG_PERF_EVENTS=y kernel support configured?\n"); + rc = -err; + goto out; } } if (perf_evlist__set_filters(evlist)) { error("failed to set filter with %d (%s)\n", errno, strerror(errno)); - exit(-1); + rc = -1; + goto out; } if (perf_evlist__mmap(evlist, opts->mmap_pages, false) < 0) { - if (errno == EPERM) - die("Permission error mapping pages.\n" - "Consider increasing " - "/proc/sys/kernel/perf_event_mlock_kb,\n" - "or try again with a smaller value of -m/--mmap_pages.\n" - "(current value: %d)\n", opts->mmap_pages); - else if (!is_power_of_2(opts->mmap_pages)) - die("--mmap_pages/-m value must be a power of two."); - - die("failed to mmap with %d (%s)\n", errno, strerror(errno)); + if (errno == EPERM) { + pr_err("Permission error mapping pages.\n" + "Consider increasing " + "/proc/sys/kernel/perf_event_mlock_kb,\n" + "or try again with a smaller value of -m/--mmap_pages.\n" + "(current value: %d)\n", opts->mmap_pages); + rc = -errno; + } else if (!is_power_of_2(opts->mmap_pages)) { + pr_err("--mmap_pages/-m value must be a power of two."); + rc = -EINVAL; + } else { + pr_err("failed to mmap with %d (%s)\n", errno, strerror(errno)); + rc = -errno; + } + goto out; } if (rec->file_new) @@ -315,11 +348,14 @@ try_again: else { if (!perf_evlist__equal(session->evlist, evlist)) { fprintf(stderr, "incompatible append\n"); - exit(-1); + rc = -1; + goto out; } } perf_session__set_id_hdr_size(session); +out: + return rc; } static int process_buildids(struct perf_record *rec) @@ -335,10 +371,13 @@ static int process_buildids(struct perf_record *rec) size, &build_id__mark_dso_hit_ops); } -static void perf_record__exit(int status __used, void *arg) +static void perf_record__exit(int status, void *arg) { struct perf_record *rec = arg; + if (status != 0) + return; + if (!rec->opts.pipe_output) { rec->session->header.data_size += rec->bytes_written; @@ -393,17 +432,26 @@ static struct perf_event_header finished_round_event = { .type = PERF_RECORD_FINISHED_ROUND, }; -static void perf_record__mmap_read_all(struct perf_record *rec) +static int perf_record__mmap_read_all(struct perf_record *rec) { int i; + int rc = 0; for (i = 0; i < rec->evlist->nr_mmaps; i++) { - if (rec->evlist->mmap[i].base) - perf_record__mmap_read(rec, &rec->evlist->mmap[i]); + if (rec->evlist->mmap[i].base) { + if (perf_record__mmap_read(rec, &rec->evlist->mmap[i]) != 0) { + rc = -1; + goto out; + } + } } if (perf_header__has_feat(&rec->session->header, HEADER_TRACING_DATA)) - write_output(rec, &finished_round_event, sizeof(finished_round_event)); + rc = write_output(rec, &finished_round_event, + sizeof(finished_round_event)); + +out: + return rc; } static int __cmd_record(struct perf_record *rec, int argc, const char **argv) @@ -463,7 +511,7 @@ static int __cmd_record(struct perf_record *rec, int argc, const char **argv) output = open(output_name, flags, S_IRUSR | S_IWUSR); if (output < 0) { perror("failed to create output file"); - exit(-1); + return -1; } rec->output = output; @@ -503,7 +551,10 @@ static int __cmd_record(struct perf_record *rec, int argc, const char **argv) } } - perf_record__open(rec); + if (perf_record__open(rec) != 0) { + err = -1; + goto out_delete_session; + } /* * perf_session__delete(session) will be called at perf_record__exit() @@ -513,19 +564,20 @@ static int __cmd_record(struct perf_record *rec, int argc, const char **argv) if (opts->pipe_output) { err = perf_header__write_pipe(output); if (err < 0) - return err; + goto out_delete_session; } else if (rec->file_new) { err = perf_session__write_header(session, evsel_list, output, false); if (err < 0) - return err; + goto out_delete_session; } if (!rec->no_buildid && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) { pr_err("Couldn't generate buildids. " "Use --no-buildid to profile anyway.\n"); - return -1; + err = -1; + goto out_delete_session; } rec->post_processing_offset = lseek(output, 0, SEEK_CUR); @@ -533,7 +585,8 @@ static int __cmd_record(struct perf_record *rec, int argc, const char **argv) machine = perf_session__find_host_machine(session); if (!machine) { pr_err("Couldn't find native kernel information.\n"); - return -1; + err = -1; + goto out_delete_session; } if (opts->pipe_output) { @@ -541,14 +594,14 @@ static int __cmd_record(struct perf_record *rec, int argc, const char **argv) process_synthesized_event); if (err < 0) { pr_err("Couldn't synthesize attrs.\n"); - return err; + goto out_delete_session; } err = perf_event__synthesize_event_types(tool, process_synthesized_event, machine); if (err < 0) { pr_err("Couldn't synthesize event_types.\n"); - return err; + goto out_delete_session; } if (have_tracepoints(&evsel_list->entries)) { @@ -564,7 +617,7 @@ static int __cmd_record(struct perf_record *rec, int argc, const char **argv) process_synthesized_event); if (err <= 0) { pr_err("Couldn't record tracing data.\n"); - return err; + goto out_delete_session; } advance_output(rec, err); } @@ -592,20 +645,24 @@ static int __cmd_record(struct perf_record *rec, int argc, const char **argv) perf_event__synthesize_guest_os); if (!opts->target.system_wide) - perf_event__synthesize_thread_map(tool, evsel_list->threads, + err = perf_event__synthesize_thread_map(tool, evsel_list->threads, process_synthesized_event, machine); else - perf_event__synthesize_threads(tool, process_synthesized_event, + err = perf_event__synthesize_threads(tool, process_synthesized_event, machine); + if (err != 0) + goto out_delete_session; + if (rec->realtime_prio) { struct sched_param param; param.sched_priority = rec->realtime_prio; if (sched_setscheduler(0, SCHED_FIFO, ¶m)) { pr_err("Could not set realtime priority.\n"); - exit(-1); + err = -1; + goto out_delete_session; } } @@ -620,7 +677,10 @@ static int __cmd_record(struct perf_record *rec, int argc, const char **argv) for (;;) { int hits = rec->samples; - perf_record__mmap_read_all(rec); + if (perf_record__mmap_read_all(rec) < 0) { + err = -1; + goto out_delete_session; + } if (hits == rec->samples) { if (done) From 09a2f16a916178489fc4bf439de668d81fda7616 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Mon, 27 Aug 2012 13:05:54 -0600 Subject: [PATCH 138/282] perf tools: Fix x86 builds with ARCH specified on the command line e.g., compiling i386 on x86_64 using: $ make -C tools/perf ARCH=i386 fails with: CC /tmp/pbuild/util/evsel.o In file included from util/evsel.c:21:0: util/perf_regs.h:5:23: fatal error: perf_regs.h: No such file or directory compilation terminated. Adding V=1 you see that the include argument for the arch is '-Iarch/i386/include' is wrong. It is supposed to be -Iarch/x86/include per the redefinition of ARCH in the Makefile. According to the make manual, http://www.gnu.org/software/make/manual/make.html#Override-Directive: "If a variable has been set with a command argument (see Overriding Variables), then ordinary assignments in the makefile are ignored. If you want to set the variable in the makefile even though it was set with a command argument, you can use an override directive ..." Make it so. Signed-off-by: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1346094354-74356-1-git-send-email-dsahern@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 722ddee61f9..939cf6d898a 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -64,12 +64,12 @@ AR = $(CROSS_COMPILE)ar # Additional ARCH settings for x86 ifeq ($(ARCH),i386) - ARCH := x86 + override ARCH := x86 NO_PERF_REGS := 0 LIBUNWIND_LIBS = -lunwind -lunwind-x86 endif ifeq ($(ARCH),x86_64) - ARCH := x86 + override ARCH := x86 IS_X86_64 := 0 ifeq (, $(findstring m32,$(EXTRA_CFLAGS))) IS_X86_64 := $(shell echo __x86_64__ | ${CC} -E -xc - | tail -n 1) From 60ebf328762914b80d3e4e5f07bda599043c8eda Mon Sep 17 00:00:00 2001 From: "Suzuki K. Poulose" Date: Fri, 31 Aug 2012 12:28:47 +0530 Subject: [PATCH 139/282] perf tools: Fix intlist node removal Similar to the one in : https://lkml.org/lkml/2012/8/29/27 Make sure we remove the node from the rblist before we delete the node. The rblist__remove_node() will invoke rblist->node_delete, which will take care of deleting the node with the suitable function provided by the user. Signed-off-by: Suzuki K Poulose Acked-by: David Ahern Cc: David Ahern Cc: Suzuki K Poulose Link: http://lkml.kernel.org/r/20120831065840.5167.90318.stgit@suzukikp.in.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/intlist.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/intlist.c b/tools/perf/util/intlist.c index fd530dced9c..77c504ff008 100644 --- a/tools/perf/util/intlist.c +++ b/tools/perf/util/intlist.c @@ -52,9 +52,9 @@ int intlist__add(struct intlist *ilist, int i) return rblist__add_node(&ilist->rblist, (void *)((long)i)); } -void intlist__remove(struct intlist *ilist __used, struct int_node *node) +void intlist__remove(struct intlist *ilist, struct int_node *node) { - int_node__delete(node); + rblist__remove_node(&ilist->rblist, &node->rb_node); } struct int_node *intlist__find(struct intlist *ilist, int i) From 4592281403e74dc4401d5803ec9948d43bbee7ae Mon Sep 17 00:00:00 2001 From: "Suzuki K. Poulose" Date: Wed, 29 Aug 2012 11:30:07 +0530 Subject: [PATCH 140/282] perf tools: Remove the node from rblist in strlist__remove The following commit: author David Ahern Tue, 31 Jul 2012 04:31:33 +0000 (22:31 -0600) committer Arnaldo Carvalho de Melo Fri, 3 Aug 2012 13:39:51 +0000 (10:39 -0300) commit ee8dd3ca43f151d9fbe1edeef68fb8a77eb9f047 causes a double free during a probe deletion as the node is never removed from the list via strlist__remove(), even though it gets 'deleted' (read free()'d). This causes a double free when we do strlist__delete() as the node is already deleted but present in the rblist. [suzukikp@suzukikp perf]$ sudo ./perf probe -a do_fork Added new event: probe:do_fork (on do_fork) You can now use it in all perf tools, such as: perf record -e probe:do_fork -aR sleep 1 [suzukikp@suzukikp perf]$ sudo ./perf probe -d do_fork Removed event: probe:do_fork *** glibc detected *** ./perf: double free or corruption (fasttop): 0x000000000133d600 *** ======= Backtrace: ========= /lib64/libc.so.6[0x38eec7dda6] ./perf(rblist__delete+0x5c)[0x47d3dc] ./perf(del_perf_probe_events+0xb6)[0x47b826] ./perf(cmd_probe+0x471)[0x42c8d1] ./perf[0x4150b3] ./perf(main+0x501)[0x4148e1] /lib64/libc.so.6(__libc_start_main+0xed)[0x38eec2169d] ./perf[0x414a61] Make sure we remove the node from the rblist before we delete the node. The rblist__remove_node() will invoke rblist->node_delete, which will take care of deleting the node with the suitable function provided by the user. Reported-by: Ananth N. Mavinakayanahalli Signed-off-by: Suzuki K. Poulose Acked-by: David Ahern Cc: Ananth N Mavinakayanahalli Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20120829055840.7802.1459.stgit@suzukikp.in.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/strlist.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/strlist.c b/tools/perf/util/strlist.c index 95856ff3dda..155d8b7078a 100644 --- a/tools/perf/util/strlist.c +++ b/tools/perf/util/strlist.c @@ -93,7 +93,7 @@ out: void strlist__remove(struct strlist *slist, struct str_node *snode) { - str_node__delete(snode, slist->dupstr); + rblist__remove_node(&slist->rblist, &snode->rb_node); } struct str_node *strlist__find(struct strlist *slist, const char *entry) From 12046099160e65cddb639f8b3dda2bd0701c09d6 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Wed, 29 Aug 2012 09:55:32 -0600 Subject: [PATCH 141/282] perf tools: remove unneeded include of network header files perf does not have networking related functionality, and the inclusion of these headers is one of the causes of compile failures for Android: https://lkml.org/lkml/2012/8/23/316 https://lkml.org/lkml/2012/8/28/293 So, remove them. Signed-off-by: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1346255732-93246-1-git-send-email-dsahern@gmail.com [ committer note: fix trace-event-perl.c compile failure by reordering includes ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/scripting-engines/trace-event-perl.c | 8 ++++---- tools/perf/util/util.h | 5 ----- 2 files changed, 4 insertions(+), 9 deletions(-) diff --git a/tools/perf/util/scripting-engines/trace-event-perl.c b/tools/perf/util/scripting-engines/trace-event-perl.c index d28001016fb..94e673643bc 100644 --- a/tools/perf/util/scripting-engines/trace-event-perl.c +++ b/tools/perf/util/scripting-engines/trace-event-perl.c @@ -25,16 +25,16 @@ #include #include -#include "../../perf.h" #include "../util.h" +#include +#include + +#include "../../perf.h" #include "../thread.h" #include "../event.h" #include "../trace-event.h" #include "../evsel.h" -#include -#include - void boot_Perf__Trace__Context(pTHX_ CV *cv); void boot_DynaLoader(pTHX_ CV *cv); typedef PerlInterpreter * INTERP; diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index 00a93a91a23..67a371355c7 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -69,11 +69,6 @@ #include #include #include -#include -#include -#include -#include -#include #include #include "../../../include/linux/magic.h" #include "types.h" From 74ba9e11f02a4ce0c7708dc77d1756b93065e440 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 5 Sep 2012 14:02:47 +0900 Subject: [PATCH 142/282] perf header: Use evlist->nr_entries on write_event_desc() Number of events (evsels) in a evlist is kept on nr_entries field so that we don't need to recalculate it. Signed-off-by: Namhyung Kim Cc: Ingo Molnar Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1346821373-31621-2-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/header.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 9696e64c9db..a124b932817 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -610,11 +610,10 @@ static int write_event_desc(int fd, struct perf_header *h __used, struct perf_evlist *evlist) { struct perf_evsel *evsel; - u32 nre = 0, nri, sz; + u32 nre, nri, sz; int ret; - list_for_each_entry(evsel, &evlist->entries, node) - nre++; + nre = evlist->nr_entries; /* * write number of events From 618a3f1d30ea0ee2ff3a88661b8d6a4035123211 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 5 Sep 2012 14:02:48 +0900 Subject: [PATCH 143/282] perf header: Set tracepoint event name only if not set The event name can be set already by processing a event_desc data. So check it before setting to prevent possible leak. Signed-off-by: Namhyung Kim Cc: Ingo Molnar Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1346821373-31621-3-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/header.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index a124b932817..05c9310c3da 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -2314,7 +2314,7 @@ static int perf_evlist__set_tracepoint_names(struct perf_evlist *evlist, struct perf_evsel *pos; list_for_each_entry(pos, &evlist->entries, node) { - if (pos->attr.type == PERF_TYPE_TRACEPOINT && + if (pos->attr.type == PERF_TYPE_TRACEPOINT && !pos->name && perf_evsel__set_tracepoint_name(pos, pevent)) return -1; } From be4a2dedf6816871349fbddd018f266e93e3c22d Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 5 Sep 2012 14:02:49 +0900 Subject: [PATCH 144/282] perf header: Swap pmu mapping numbers if needed Like others, the numbers can be saved in a different endian format than a host machine. Swap them if needed. Signed-off-by: Namhyung Kim Cc: Ingo Molnar Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Robert Richter Link: http://lkml.kernel.org/r/1346821373-31621-4-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/header.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 05c9310c3da..43425b75f0c 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -1440,6 +1440,9 @@ static void print_pmu_mappings(struct perf_header *ph, int fd, FILE *fp) if (ret != sizeof(pmu_num)) goto error; + if (ph->needs_swap) + pmu_num = bswap_32(pmu_num); + if (!pmu_num) { fprintf(fp, "# pmu mappings: not available\n"); return; @@ -1448,6 +1451,9 @@ static void print_pmu_mappings(struct perf_header *ph, int fd, FILE *fp) while (pmu_num) { if (read(fd, &type, sizeof(type)) != sizeof(type)) break; + if (ph->needs_swap) + type = bswap_32(type); + name = do_read_string(fd, ph); if (!name) break; From 60ff92f515a4efb36931f1b5b042332016e0f123 Mon Sep 17 00:00:00 2001 From: Irina Tirdea Date: Wed, 29 Aug 2012 01:22:16 +0300 Subject: [PATCH 145/282] perf tools: Replace mempcpy with memcpy mempcpy is not supported by bionic in Android and will lead to compilation errors. Replacing mempcpy with memcpy so it will work in Android. Signed-off-by: Irina Tirdea Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Steven Rostedt Link: http://lkml.kernel.org/r/CANg8OW+Y3ZMG-GdhYu2_yKOYH_XEMgw73PdCX_23UTnfYhmttA@mail.gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/target.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/target.c b/tools/perf/util/target.c index 051eaa68095..065528b7563 100644 --- a/tools/perf/util/target.c +++ b/tools/perf/util/target.c @@ -117,8 +117,8 @@ int perf_target__strerror(struct perf_target *target, int errnum, if (err != buf) { size_t len = strlen(err); - char *c = mempcpy(buf, err, min(buflen - 1, len)); - *c = '\0'; + memcpy(buf, err, min(buflen - 1, len)); + *(buf + min(buflen - 1, len)) = '\0'; } return 0; From 7a4ec938857cf534270b23545495300fbac7f5de Mon Sep 17 00:00:00 2001 From: Maciek Borzecki Date: Tue, 4 Sep 2012 12:32:30 +0200 Subject: [PATCH 146/282] perf tools: Allow user to indicate path to objdump in command line When analyzing perf data from hosts of other architecture than one of the local host it's useful to call objdump that is part of a toolchain for that architecture. Instead of calling regular objdump, call one that user specified in command line. Signed-off-by: Maciek Borzecki Acked-by: David Ahern Link: http://lkml.kernel.org/r/1346754750.16299.3.camel@localhost.localdomain Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-annotate.txt | 3 +++ tools/perf/Documentation/perf-report.txt | 3 +++ tools/perf/builtin-annotate.c | 2 ++ tools/perf/builtin-report.c | 2 ++ tools/perf/util/annotate.c | 4 +++- tools/perf/util/annotate.h | 1 + 6 files changed, 14 insertions(+), 1 deletion(-) diff --git a/tools/perf/Documentation/perf-annotate.txt b/tools/perf/Documentation/perf-annotate.txt index c89f9e1453f..c8ffd9fd5c6 100644 --- a/tools/perf/Documentation/perf-annotate.txt +++ b/tools/perf/Documentation/perf-annotate.txt @@ -85,6 +85,9 @@ OPTIONS -M:: --disassembler-style=:: Set disassembler style for objdump. +--objdump=:: + Path to objdump binary. + SEE ALSO -------- linkperf:perf-record[1], linkperf:perf-report[1] diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index 495210a612c..f4d91bebd59 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt @@ -168,6 +168,9 @@ OPTIONS branch stacks and it will automatically switch to the branch view mode, unless --no-branch-stack is used. +--objdump=:: + Path to objdump binary. + SEE ALSO -------- linkperf:perf-stat[1], linkperf:perf-annotate[1] diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 67522cf8740..2f3f0029c0f 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -282,6 +282,8 @@ int cmd_annotate(int argc, const char **argv, const char *prefix __used) "Display raw encoding of assembly instructions (default)"), OPT_STRING('M', "disassembler-style", &disassembler_style, "disassembler style", "Specify disassembler style (e.g. -M intel for intel syntax)"), + OPT_STRING(0, "objdump", &objdump_path, "path", + "objdump binary to use for disassembly and annotations"), OPT_END() }; diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index d61825371ad..1f8d11b4f7f 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -638,6 +638,8 @@ int cmd_report(int argc, const char **argv, const char *prefix __used) "Show a column with the sum of periods"), OPT_CALLBACK_NOOPT('b', "branch-stack", &sort__branch_mode, "", "use branch records for histogram filling", parse_branch_mode), + OPT_STRING(0, "objdump", &objdump_path, "path", + "objdump binary to use for disassembly and annotations"), OPT_END() }; diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 3a282c0057d..51ef69c9841 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -17,6 +17,7 @@ #include const char *disassembler_style; +const char *objdump_path; static struct ins *ins__find(const char *name); static int disasm_line__parse(char *line, char **namep, char **rawp); @@ -820,9 +821,10 @@ fallback: dso, dso->long_name, sym, sym->name); snprintf(command, sizeof(command), - "objdump %s%s --start-address=0x%016" PRIx64 + "%s %s%s --start-address=0x%016" PRIx64 " --stop-address=0x%016" PRIx64 " -d %s %s -C %s|grep -v %s|expand", + objdump_path ? objdump_path : "objdump", disassembler_style ? "-M " : "", disassembler_style ? disassembler_style : "", map__rip_2objdump(map, sym->start), diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index 78a5692dd71..a6d6bc5d716 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -152,5 +152,6 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map, int evidx, #endif extern const char *disassembler_style; +extern const char *objdump_path; #endif /* __PERF_ANNOTATE_H */ From eea9b6842950924876a1a21ca197f189f8bb335a Mon Sep 17 00:00:00 2001 From: David Ahern Date: Wed, 5 Sep 2012 18:53:36 -0600 Subject: [PATCH 147/282] perf tools: Clean target should do clean for lib/traceevent too It's built as part of perf, so it should be cleaned too. Tested-by: Namhyung Kim Signed-off-by: David Ahern Cc: Namhyung Kim Cc: Steven Rostedt Link: http://lkml.kernel.org/r/1346892816-61779-1-git-send-email-dsahern@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 939cf6d898a..afd50757490 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -917,6 +917,9 @@ $(LIB_FILE): $(LIB_OBJS) $(LIBTRACEEVENT): $(QUIET_SUBDIR0)$(TRACE_EVENT_DIR) $(QUIET_SUBDIR1) O=$(OUTPUT) libtraceevent.a +$(LIBTRACEEVENT)-clean: + $(QUIET_SUBDIR0)$(TRACE_EVENT_DIR) $(QUIET_SUBDIR1) O=$(OUTPUT) clean + help: @echo 'Perf make targets:' @echo ' doc - make *all* documentation (see below)' @@ -1056,7 +1059,7 @@ quick-install-html: ### Cleaning rules -clean: +clean: $(LIBTRACEEVENT)-clean $(RM) $(LIB_OBJS) $(BUILTIN_OBJS) $(LIB_FILE) $(OUTPUT)perf-archive $(OUTPUT)perf.o $(LANG_BINDINGS) $(RM) $(ALL_PROGRAMS) perf $(RM) *.spec *.pyc *.pyo */*.pyc */*.pyo $(OUTPUT)common-cmds.h TAGS tags cscope* From ae42c6bb9300cf25990bd15f1bd6ee38598f7483 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 6 Sep 2012 11:10:45 +0900 Subject: [PATCH 148/282] perf header: Fix a typo on evsel For checking return value of the strdup, 'event' should be 'evsel'. Signed-off-by: Namhyung Kim Cc: David Ahern Cc: Ingo Molnar Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1346897446-16569-1-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/header.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 43425b75f0c..8b0b873c229 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -2307,7 +2307,7 @@ static int perf_evsel__set_tracepoint_name(struct perf_evsel *evsel, snprintf(bf, sizeof(bf), "%s:%s", event->system, event->name); evsel->name = strdup(bf); - if (event->name == NULL) + if (evsel->name == NULL) return -1; evsel->tp_format = event; From 831394bdd9dd3ac1661336505c7cbdfd786d8cd4 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 6 Sep 2012 11:10:46 +0900 Subject: [PATCH 149/282] perf header: Prepare tracepoint events regardless of name Current perf_evlist__set_tracepoint_names is a misnomer because it finds and sets correspoding event_format in addition to the name. So skipping it when a event has set name already caused a trouble. Rename it and set name only a event doesn't have one. Reported-by: David Ahern Signed-off-by: Namhyung Kim Cc: David Ahern Cc: Ingo Molnar Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1346897446-16569-2-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/header.c | 36 ++++++++++++++++++++++-------------- 1 file changed, 22 insertions(+), 14 deletions(-) diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 8b0b873c229..d07bc134e56 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -2295,33 +2295,39 @@ static int read_attr(int fd, struct perf_header *ph, return ret <= 0 ? -1 : 0; } -static int perf_evsel__set_tracepoint_name(struct perf_evsel *evsel, - struct pevent *pevent) +static int perf_evsel__prepare_tracepoint_event(struct perf_evsel *evsel, + struct pevent *pevent) { - struct event_format *event = pevent_find_event(pevent, - evsel->attr.config); + struct event_format *event; char bf[128]; + /* already prepared */ + if (evsel->tp_format) + return 0; + + event = pevent_find_event(pevent, evsel->attr.config); if (event == NULL) return -1; - snprintf(bf, sizeof(bf), "%s:%s", event->system, event->name); - evsel->name = strdup(bf); - if (evsel->name == NULL) - return -1; + if (!evsel->name) { + snprintf(bf, sizeof(bf), "%s:%s", event->system, event->name); + evsel->name = strdup(bf); + if (evsel->name == NULL) + return -1; + } evsel->tp_format = event; return 0; } -static int perf_evlist__set_tracepoint_names(struct perf_evlist *evlist, - struct pevent *pevent) +static int perf_evlist__prepare_tracepoint_events(struct perf_evlist *evlist, + struct pevent *pevent) { struct perf_evsel *pos; list_for_each_entry(pos, &evlist->entries, node) { - if (pos->attr.type == PERF_TYPE_TRACEPOINT && !pos->name && - perf_evsel__set_tracepoint_name(pos, pevent)) + if (pos->attr.type == PERF_TYPE_TRACEPOINT && + perf_evsel__prepare_tracepoint_event(pos, pevent)) return -1; } @@ -2409,7 +2415,8 @@ int perf_session__read_header(struct perf_session *session, int fd) lseek(fd, header->data_offset, SEEK_SET); - if (perf_evlist__set_tracepoint_names(session->evlist, session->pevent)) + if (perf_evlist__prepare_tracepoint_events(session->evlist, + session->pevent)) goto out_delete_evlist; header->frozen = 1; @@ -2643,7 +2650,8 @@ int perf_event__process_tracing_data(union perf_event *event, if (size_read + padding != size) die("tracing data size mismatch"); - perf_evlist__set_tracepoint_names(session->evlist, session->pevent); + perf_evlist__prepare_tracepoint_events(session->evlist, + session->pevent); return size_read + padding; } From 8ad7013b252ba683055df19e657eb03d98f4f312 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 6 Sep 2012 13:11:18 -0300 Subject: [PATCH 150/282] perf test: Add round trip test for sw and hw event names It basically traverses the hardware and software event name arrays creating an evlist with all events, then it uses perf_evsel__name to check that the name is the expected one. With it I noticed this problem: [root@sandy ~]# perf test 10 10: roundtrip evsel->name check:invalid or unsupported event: 'CPU-migrations' Run 'perf list' for a list of valid events FAILED! Changed it to "cpu-migrations" in the software event arrays and it worked. This is to catch problems like the one reported by Joel Uckelman in http://permalink.gmane.org/gmane.linux.kernel.perf.user/1016 Hardware cache events will be checked in the following patch. Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-5jskfkuqvf2fi257zmni0ftz@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-test.c | 53 +++++++++++++++++++++++++++++++++++++++ tools/perf/util/evsel.c | 6 ++--- tools/perf/util/evsel.h | 6 +++-- 3 files changed, 60 insertions(+), 5 deletions(-) diff --git a/tools/perf/builtin-test.c b/tools/perf/builtin-test.c index 381d5ab8712..ba94fbe1fa4 100644 --- a/tools/perf/builtin-test.c +++ b/tools/perf/builtin-test.c @@ -1092,6 +1092,55 @@ static int test__perf_pmu(void) return perf_pmu__test(); } +static int __perf_evsel__name_array_test(const char *names[], int nr_names) +{ + int i, err; + struct perf_evsel *evsel; + struct perf_evlist *evlist = perf_evlist__new(NULL, NULL); + + if (evlist == NULL) + return -ENOMEM; + + for (i = 0; i < nr_names; ++i) { + err = parse_events(evlist, names[i], 0); + if (err) { + pr_debug("failed to parse event '%s', err %d\n", + names[i], err); + goto out_delete_evlist; + } + } + + err = 0; + list_for_each_entry(evsel, &evlist->entries, node) { + if (strcmp(perf_evsel__name(evsel), names[evsel->idx])) { + --err; + pr_debug("%s != %s\n", perf_evsel__name(evsel), names[evsel->idx]); + } + } + +out_delete_evlist: + perf_evlist__delete(evlist); + return err; +} + +#define perf_evsel__name_array_test(names) \ + __perf_evsel__name_array_test(names, ARRAY_SIZE(names)) + +static int perf_evsel__roundtrip_name_test(void) +{ + int err = 0, ret = 0; + + err = perf_evsel__name_array_test(perf_evsel__hw_names); + if (err) + ret = err; + + err = perf_evsel__name_array_test(perf_evsel__sw_names); + if (err) + ret = err; + + return ret; +} + static struct test { const char *desc; int (*func)(void); @@ -1134,6 +1183,10 @@ static struct test { .desc = "Test dso data interface", .func = dso__test_data, }, + { + .desc = "roundtrip evsel->name check", + .func = perf_evsel__roundtrip_name_test, + }, { .func = NULL, }, diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 7ff3c8fb736..06f76441547 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -68,7 +68,7 @@ struct perf_evsel *perf_evsel__new(struct perf_event_attr *attr, int idx) return evsel; } -static const char *perf_evsel__hw_names[PERF_COUNT_HW_MAX] = { +const char *perf_evsel__hw_names[PERF_COUNT_HW_MAX] = { "cycles", "instructions", "cache-references", @@ -131,12 +131,12 @@ static int perf_evsel__hw_name(struct perf_evsel *evsel, char *bf, size_t size) return r + perf_evsel__add_modifiers(evsel, bf + r, size - r); } -static const char *perf_evsel__sw_names[PERF_COUNT_SW_MAX] = { +const char *perf_evsel__sw_names[PERF_COUNT_SW_MAX] = { "cpu-clock", "task-clock", "page-faults", "context-switches", - "CPU-migrations", + "cpu-migrations", "minor-faults", "major-faults", "alignment-faults", diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 94f6ba16747..a3f562cec43 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -97,8 +97,10 @@ extern const char *perf_evsel__hw_cache[PERF_COUNT_HW_CACHE_MAX] [PERF_EVSEL__MAX_ALIASES]; extern const char *perf_evsel__hw_cache_op[PERF_COUNT_HW_CACHE_OP_MAX] [PERF_EVSEL__MAX_ALIASES]; -const char *perf_evsel__hw_cache_result[PERF_COUNT_HW_CACHE_RESULT_MAX] - [PERF_EVSEL__MAX_ALIASES]; +extern const char *perf_evsel__hw_cache_result[PERF_COUNT_HW_CACHE_RESULT_MAX] + [PERF_EVSEL__MAX_ALIASES]; +extern const char *perf_evsel__hw_names[PERF_COUNT_HW_MAX]; +extern const char *perf_evsel__sw_names[PERF_COUNT_SW_MAX]; int __perf_evsel__hw_cache_type_op_res_name(u8 type, u8 op, u8 result, char *bf, size_t size); const char *perf_evsel__name(struct perf_evsel *evsel); From 42e1fb776087713b5482cd7cf6cac998fbdd6544 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 6 Sep 2012 14:43:28 -0300 Subject: [PATCH 151/282] perf tools: Remove extraneous newline when parsing hardware cache events Noticed while developing a 'perf test' entry to verify that perf_evsel__name works. Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-xz6zgh38mp3cjnd2udh38z8f@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/parse-events.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index b24630398b9..66d235e0cc9 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -308,7 +308,7 @@ int parse_events_add_cache(struct list_head **list, int *idx, for (i = 0; (i < 2) && (op_result[i]); i++) { char *str = op_result[i]; - snprintf(name + n, MAX_NAME_LEN - n, "-%s\n", str); + snprintf(name + n, MAX_NAME_LEN - n, "-%s", str); if (cache_op == -1) { cache_op = parse_aliases(str, perf_evsel__hw_cache_op, From 78f067b38bed1adce6a2fa7868cf8ea37d61f537 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 6 Sep 2012 14:54:11 -0300 Subject: [PATCH 152/282] perf evlist: Add fprintf method For debugging, etc. Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-fjimge1ovgh976qlt8dtmlp0@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evlist.c | 13 +++++++++++++ tools/perf/util/evlist.h | 2 ++ 2 files changed, 15 insertions(+) diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 4774ac1e3d5..892353729c7 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -889,3 +889,16 @@ int perf_evlist__parse_sample(struct perf_evlist *evlist, union perf_event *even struct perf_evsel *evsel = perf_evlist__first(evlist); return perf_evsel__parse_sample(evsel, event, sample, swapped); } + +size_t perf_evlist__fprintf(struct perf_evlist *evlist, FILE *fp) +{ + struct perf_evsel *evsel; + size_t printed = 0; + + list_for_each_entry(evsel, &evlist->entries, node) { + printed += fprintf(fp, "%s%s", evsel->idx ? ", " : "", + perf_evsel__name(evsel)); + } + + return printed + fprintf(fp, "\n");; +} diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 2ed255792c6..3f2e1e4ccdd 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -143,4 +143,6 @@ static inline struct perf_evsel *perf_evlist__last(struct perf_evlist *evlist) { return list_entry(evlist->entries.prev, struct perf_evsel, node); } + +size_t perf_evlist__fprintf(struct perf_evlist *evlist, FILE *fp); #endif /* __PERF_EVLIST_H */ From 49f20d723e25a221fbcf1cbf4e51bb2942326e4f Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 6 Sep 2012 14:55:44 -0300 Subject: [PATCH 153/282] perf test: Add roundtrip test for hardware cache events That nicely catches the problem reported by Joel Uckelman in http://permalink.gmane.org/gmane.linux.kernel.perf.user/1016 : [root@sandy ~]# perf test 1: vmlinux symtab matches kallsyms: Ok 2: detect open syscall event: Ok 3: detect open syscall event on all cpus: Ok 4: read samples using the mmap interface: Ok 5: parse events tests: Ok 6: x86 rdpmc test: Ok 7: Validate PERF_RECORD_* events & perf_sample fields: Ok 8: Test perf pmu format parsing: Ok 9: Test dso data interface: Ok 10: roundtrip evsel->name check: FAILED! [root@sandy ~]# perf test -v 10 10: roundtrip evsel->name check: --- start --- L1-dcache-misses != L1-dcache-load-misses L1-dcache-misses != L1-dcache-store-misses L1-dcache-misses != L1-dcache-prefetch-misses L1-icache-misses != L1-icache-load-misses L1-icache-misses != L1-icache-prefetch-misses LLC-misses != LLC-load-misses LLC-misses != LLC-store-misses LLC-misses != LLC-prefetch-misses dTLB-misses != dTLB-load-misses dTLB-misses != dTLB-store-misses dTLB-misses != dTLB-prefetch-misses iTLB-misses != iTLB-load-misses branch-misses != branch-load-misses node-misses != node-load-misses node-misses != node-store-misses node-misses != node-prefetch-misses ---- end ---- roundtrip evsel->name check: FAILED! [root@sandy ~]# Now lemme apply Jiri's fix and try it again... Cc: David Ahern Cc: Frederic Weisbecker Cc: Joel Uckelman Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-bbewtxw0rfipp5qy1j3jtg5d@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-test.c | 61 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 61 insertions(+) diff --git a/tools/perf/builtin-test.c b/tools/perf/builtin-test.c index ba94fbe1fa4..cf33e5081c3 100644 --- a/tools/perf/builtin-test.c +++ b/tools/perf/builtin-test.c @@ -1092,6 +1092,63 @@ static int test__perf_pmu(void) return perf_pmu__test(); } +static int perf_evsel__roundtrip_cache_name_test(void) +{ + char name[128]; + int type, op, err = 0, ret = 0, i, idx; + struct perf_evsel *evsel; + struct perf_evlist *evlist = perf_evlist__new(NULL, NULL); + + if (evlist == NULL) + return -ENOMEM; + + for (type = 0; type < PERF_COUNT_HW_CACHE_MAX; type++) { + for (op = 0; op < PERF_COUNT_HW_CACHE_OP_MAX; op++) { + /* skip invalid cache type */ + if (!perf_evsel__is_cache_op_valid(type, op)) + continue; + + for (i = 0; i < PERF_COUNT_HW_CACHE_RESULT_MAX; i++) { + __perf_evsel__hw_cache_type_op_res_name(type, op, i, + name, sizeof(name)); + err = parse_events(evlist, name, 0); + if (err) + ret = err; + } + } + } + + idx = 0; + evsel = perf_evlist__first(evlist); + + for (type = 0; type < PERF_COUNT_HW_CACHE_MAX; type++) { + for (op = 0; op < PERF_COUNT_HW_CACHE_OP_MAX; op++) { + /* skip invalid cache type */ + if (!perf_evsel__is_cache_op_valid(type, op)) + continue; + + for (i = 0; i < PERF_COUNT_HW_CACHE_RESULT_MAX; i++) { + __perf_evsel__hw_cache_type_op_res_name(type, op, i, + name, sizeof(name)); + if (evsel->idx != idx) + continue; + + ++idx; + + if (strcmp(perf_evsel__name(evsel), name)) { + pr_debug("%s != %s\n", perf_evsel__name(evsel), name); + ret = -1; + } + + evsel = perf_evsel__next(evsel); + } + } + } + + perf_evlist__delete(evlist); + return ret; +} + static int __perf_evsel__name_array_test(const char *names[], int nr_names) { int i, err; @@ -1138,6 +1195,10 @@ static int perf_evsel__roundtrip_name_test(void) if (err) ret = err; + err = perf_evsel__roundtrip_cache_name_test(); + if (err) + ret = err; + return ret; } From 275ef3878f698941353780440fec6926107a320b Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 5 Sep 2012 19:51:33 +0200 Subject: [PATCH 154/282] perf tools: Fix cache event name generation If the event name is specified with all 3 components, the last one overwrites the previous one during the name composing within the parse_events_add_cache function. Fixing this by properly adjusting the string index. Reported-by: Joel Uckelman Signed-off-by: Jiri Olsa Cc: Corey Ashford Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Joel Uckelman Cc: Paul Mackerras Cc: Peter Zijlstra LPU-Reference: 20120905175133.GA18352@krava.brq.redhat.com [ committer note: Remove the newline fix, done already in 42e1fb7 ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/parse-events.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 66d235e0cc9..a031ee1f54f 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -308,7 +308,7 @@ int parse_events_add_cache(struct list_head **list, int *idx, for (i = 0; (i < 2) && (op_result[i]); i++) { char *str = op_result[i]; - snprintf(name + n, MAX_NAME_LEN - n, "-%s", str); + n += snprintf(name + n, MAX_NAME_LEN - n, "-%s", str); if (cache_op == -1) { cache_op = parse_aliases(str, perf_evsel__hw_cache_op, From 9a2936878a198553a17d21d1e843aa574ad9fa38 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Fri, 7 Sep 2012 10:55:53 +0900 Subject: [PATCH 155/282] perf tools: Ignore compiled python binaries It prevents mindless git add from adding those files. Signed-off-by: Namhyung Kim Cc: Ingo Molnar Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1346982953-30824-1-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/.gitignore | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/perf/.gitignore b/tools/perf/.gitignore index 26b823b61aa..8f8fbc227a4 100644 --- a/tools/perf/.gitignore +++ b/tools/perf/.gitignore @@ -21,3 +21,5 @@ config.mak config.mak.autogen *-bison.* *-flex.* +*.pyc +*.pyo From 245c5a18433090da0e1a799bdb0faa78552b5992 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Fri, 7 Sep 2012 11:49:45 +0900 Subject: [PATCH 156/282] tools lib traceevent: Get rid of die() from pretty_print() There are three cases that call die() in the pretty_print. 1. insufficient number of argument: cannot proceed anymore. 2. too long format conversion specifier: truncate and proceed. 3. bad size specifier in format string: skip and proceed. For all cases, convert die to do_warning, mark the event as EVENT_FL_FAILED and print error message at the last. Signed-off-by: Namhyung Kim Acked-by: Steven Rostedt Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Steven Rostedt Link: http://lkml.kernel.org/r/1346986187-5170-1-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/traceevent/event-parse.c | 43 ++++++++++++++++++++++-------- 1 file changed, 32 insertions(+), 11 deletions(-) diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c index b5b4d806ffa..6d5e75987a3 100644 --- a/tools/lib/traceevent/event-parse.c +++ b/tools/lib/traceevent/event-parse.c @@ -3889,8 +3889,11 @@ static void pretty_print(struct trace_seq *s, void *data, int size, struct event goto cont_process; case '*': /* The argument is the length. */ - if (!arg) - die("no argument match"); + if (!arg) { + do_warning("no argument match"); + event->flags |= EVENT_FL_FAILED; + goto out_failed; + } len_arg = eval_num_arg(data, size, event, arg); len_as_arg = 1; arg = arg->next; @@ -3923,15 +3926,21 @@ static void pretty_print(struct trace_seq *s, void *data, int size, struct event case 'x': case 'X': case 'u': - if (!arg) - die("no argument match"); + if (!arg) { + do_warning("no argument match"); + event->flags |= EVENT_FL_FAILED; + goto out_failed; + } len = ((unsigned long)ptr + 1) - (unsigned long)saveptr; /* should never happen */ - if (len > 31) - die("bad format!"); + if (len > 31) { + do_warning("bad format!"); + event->flags |= EVENT_FL_FAILED; + len = 31; + } memcpy(format, saveptr, len); format[len] = 0; @@ -3995,19 +4004,26 @@ static void pretty_print(struct trace_seq *s, void *data, int size, struct event trace_seq_printf(s, format, (long long)val); break; default: - die("bad count (%d)", ls); + do_warning("bad count (%d)", ls); + event->flags |= EVENT_FL_FAILED; } break; case 's': - if (!arg) - die("no matching argument"); + if (!arg) { + do_warning("no matching argument"); + event->flags |= EVENT_FL_FAILED; + goto out_failed; + } len = ((unsigned long)ptr + 1) - (unsigned long)saveptr; /* should never happen */ - if (len > 31) - die("bad format!"); + if (len > 31) { + do_warning("bad format!"); + event->flags |= EVENT_FL_FAILED; + len = 31; + } memcpy(format, saveptr, len); format[len] = 0; @@ -4025,6 +4041,11 @@ static void pretty_print(struct trace_seq *s, void *data, int size, struct event trace_seq_putc(s, *ptr); } + if (event->flags & EVENT_FL_FAILED) { +out_failed: + trace_seq_printf(s, "[FAILED TO PARSE]"); + } + if (args) { free_args(args); free(bprint_fmt); From 0ca8da00ad170c12c12350c3a2500591a7bec535 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Fri, 7 Sep 2012 11:49:46 +0900 Subject: [PATCH 157/282] tools lib traceevent: Get rid of die() from pevent_register_event_handler If memory allocation for handler fails, return gracefully instead of calling die(). Note that casts to void * are needed because gcc complained about discarding 'const' qualifier during implicit argument cast. Signed-off-by: Namhyung Kim Acked-by: Steven Rostedt Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Steven Rostedt Link: http://lkml.kernel.org/r/1346986187-5170-2-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/traceevent/event-parse.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c index 6d5e75987a3..17fd01d46e6 100644 --- a/tools/lib/traceevent/event-parse.c +++ b/tools/lib/traceevent/event-parse.c @@ -5183,7 +5183,12 @@ int pevent_register_event_handler(struct pevent *pevent, not_found: /* Save for later use. */ - handle = malloc_or_die(sizeof(*handle)); + handle = malloc(sizeof(*handle)); + if (!handle) { + do_warning("Failed to allocate event handler"); + return PEVENT_ERRNO__MEM_ALLOC_FAILED; + } + memset(handle, 0, sizeof(*handle)); handle->id = id; if (event_name) @@ -5193,7 +5198,11 @@ int pevent_register_event_handler(struct pevent *pevent, if ((event_name && !handle->event_name) || (sys_name && !handle->sys_name)) { - die("Failed to allocate event/sys name"); + do_warning("Failed to allocate event/sys name"); + free((void *)handle->event_name); + free((void *)handle->sys_name); + free(handle); + return PEVENT_ERRNO__MEM_ALLOC_FAILED; } handle->func = func; From 67ed939c9eb029c28057eb75de456a9d0e899fd4 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Fri, 7 Sep 2012 11:49:47 +0900 Subject: [PATCH 158/282] tools lib traceevent: Get rid of die() from pevent_register_print_function If memory allocation for handler fails or argument type is not match, return gracefully instead of calling die(). Also add an new error code for the later case. Signed-off-by: Namhyung Kim Acked-by: Steven Rostedt Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Steven Rostedt Link: http://lkml.kernel.org/r/1346986187-5170-3-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/traceevent/event-parse.c | 26 ++++++++++++++++++++------ tools/lib/traceevent/event-parse.h | 3 ++- 2 files changed, 22 insertions(+), 7 deletions(-) diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c index 17fd01d46e6..4595aeb3c43 100644 --- a/tools/lib/traceevent/event-parse.c +++ b/tools/lib/traceevent/event-parse.c @@ -5080,6 +5080,7 @@ int pevent_register_print_function(struct pevent *pevent, struct pevent_func_params *param; enum pevent_func_arg_type type; va_list ap; + int ret; func_handle = find_func_handler(pevent, name); if (func_handle) { @@ -5092,14 +5093,21 @@ int pevent_register_print_function(struct pevent *pevent, remove_func_handler(pevent, name); } - func_handle = malloc_or_die(sizeof(*func_handle)); + func_handle = malloc(sizeof(*func_handle)); + if (!func_handle) { + do_warning("Failed to allocate function handler"); + return PEVENT_ERRNO__MEM_ALLOC_FAILED; + } memset(func_handle, 0, sizeof(*func_handle)); func_handle->ret_type = ret_type; func_handle->name = strdup(name); func_handle->func = func; - if (!func_handle->name) - die("Failed to allocate function name"); + if (!func_handle->name) { + do_warning("Failed to allocate function name"); + free(func_handle); + return PEVENT_ERRNO__MEM_ALLOC_FAILED; + } next_param = &(func_handle->params); va_start(ap, name); @@ -5109,11 +5117,17 @@ int pevent_register_print_function(struct pevent *pevent, break; if (type < 0 || type >= PEVENT_FUNC_ARG_MAX_TYPES) { - warning("Invalid argument type %d", type); + do_warning("Invalid argument type %d", type); + ret = PEVENT_ERRNO__INVALID_ARG_TYPE; goto out_free; } - param = malloc_or_die(sizeof(*param)); + param = malloc(sizeof(*param)); + if (!param) { + do_warning("Failed to allocate function param"); + ret = PEVENT_ERRNO__MEM_ALLOC_FAILED; + goto out_free; + } param->type = type; param->next = NULL; @@ -5131,7 +5145,7 @@ int pevent_register_print_function(struct pevent *pevent, out_free: va_end(ap); free_func_handle(func_handle); - return -1; + return ret; } /** diff --git a/tools/lib/traceevent/event-parse.h b/tools/lib/traceevent/event-parse.h index 863a0bbda7f..3318963f1c9 100644 --- a/tools/lib/traceevent/event-parse.h +++ b/tools/lib/traceevent/event-parse.h @@ -351,7 +351,8 @@ enum pevent_flag { _PE(READ_ID_FAILED, "failed to read event id"), \ _PE(READ_FORMAT_FAILED, "failed to read event format"), \ _PE(READ_PRINT_FAILED, "failed to read event print fmt"), \ - _PE(OLD_FTRACE_ARG_FAILED,"failed to allocate field name for ftrace") + _PE(OLD_FTRACE_ARG_FAILED,"failed to allocate field name for ftrace"),\ + _PE(INVALID_ARG_TYPE, "invalid argument type") #undef _PE #define _PE(__code, __str) PEVENT_ERRNO__ ## __code From 863e451f69ddf255e877567e325298edd3b21519 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 6 Sep 2012 17:46:55 +0200 Subject: [PATCH 159/282] perf diff: Make diff command work with evsel hists Putting 'perf diff' command back on track with the 'latest' evsel hists changes. Each evsel has its own 'hists' object gathering stats for the particular event. While currently counts are accumulated for the whole session regardless of the events diversification within compared sessions. The 'perf diff' command now outputs all matching events within compared sessions (with event name specified). The per event diff output stays the same. $ ./perf diff # Event 'cycles' # # Baseline Delta Shared Object Symbol # ........ .......... ................. .............................. # 0.00% +15.14% [kernel.kallsyms] [k] __wake_up 0.00% +13.38% [kernel.kallsyms] [k] ext4fs_dirhash ... SNIP 0.00% +0.42% [kernel.kallsyms] [k] local_clock 0.17% -0.05% [kernel.kallsyms] [k] native_write_msr_safe # Event 'faults' # # Baseline Delta Shared Object Symbol # ........ .......... ................. .............................. # 0.00% +79.12% ld-2.15.so [.] _dl_relocate_object 0.00% +11.62% ld-2.15.so [.] openaux Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: Corey Ashford Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Namhyung Kim Cc: Paul E. McKenney Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1346946426-13496-2-git-send-email-jolsa@redhat.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-diff.txt | 3 + tools/perf/builtin-diff.c | 93 +++++++++++++++++--------- tools/perf/util/evsel.h | 7 ++ tools/perf/util/session.h | 4 +- 4 files changed, 72 insertions(+), 35 deletions(-) diff --git a/tools/perf/Documentation/perf-diff.txt b/tools/perf/Documentation/perf-diff.txt index 74d7481ed7a..ab7f667de1b 100644 --- a/tools/perf/Documentation/perf-diff.txt +++ b/tools/perf/Documentation/perf-diff.txt @@ -17,6 +17,9 @@ captured via perf record. If no parameters are passed it will assume perf.data.old and perf.data. +The differential profile is displayed only for events matching both +specified perf.data files. + OPTIONS ------- -M:: diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c index d29d350fb2b..e9933fdd256 100644 --- a/tools/perf/builtin-diff.c +++ b/tools/perf/builtin-diff.c @@ -10,6 +10,7 @@ #include "util/event.h" #include "util/hist.h" #include "util/evsel.h" +#include "util/evlist.h" #include "util/session.h" #include "util/tool.h" #include "util/sort.h" @@ -24,11 +25,6 @@ static char diff__default_sort_order[] = "dso,symbol"; static bool force; static bool show_displacement; -struct perf_diff { - struct perf_tool tool; - struct perf_session *session; -}; - static int hists__add_entry(struct hists *self, struct addr_location *al, u64 period) { @@ -37,14 +33,12 @@ static int hists__add_entry(struct hists *self, return -ENOMEM; } -static int diff__process_sample_event(struct perf_tool *tool, +static int diff__process_sample_event(struct perf_tool *tool __used, union perf_event *event, struct perf_sample *sample, - struct perf_evsel *evsel __used, + struct perf_evsel *evsel, struct machine *machine) { - struct perf_diff *_diff = container_of(tool, struct perf_diff, tool); - struct perf_session *session = _diff->session; struct addr_location al; if (perf_event__preprocess_sample(event, machine, &al, sample, NULL) < 0) { @@ -56,26 +50,24 @@ static int diff__process_sample_event(struct perf_tool *tool, if (al.filtered || al.sym == NULL) return 0; - if (hists__add_entry(&session->hists, &al, sample->period)) { + if (hists__add_entry(&evsel->hists, &al, sample->period)) { pr_warning("problem incrementing symbol period, skipping event\n"); return -1; } - session->hists.stats.total_period += sample->period; + evsel->hists.stats.total_period += sample->period; return 0; } -static struct perf_diff diff = { - .tool = { - .sample = diff__process_sample_event, - .mmap = perf_event__process_mmap, - .comm = perf_event__process_comm, - .exit = perf_event__process_task, - .fork = perf_event__process_task, - .lost = perf_event__process_lost, - .ordered_samples = true, - .ordering_requires_timestamps = true, - }, +static struct perf_tool tool = { + .sample = diff__process_sample_event, + .mmap = perf_event__process_mmap, + .comm = perf_event__process_comm, + .exit = perf_event__process_task, + .fork = perf_event__process_task, + .lost = perf_event__process_lost, + .ordered_samples = true, + .ordering_requires_timestamps = true, }; static void perf_session__insert_hist_entry_by_name(struct rb_root *root, @@ -146,34 +138,71 @@ static void hists__match(struct hists *older, struct hists *newer) } } +static struct perf_evsel *evsel_match(struct perf_evsel *evsel, + struct perf_evlist *evlist) +{ + struct perf_evsel *e; + + list_for_each_entry(e, &evlist->entries, node) + if (perf_evsel__match2(evsel, e)) + return e; + + return NULL; +} + static int __cmd_diff(void) { int ret, i; #define older (session[0]) #define newer (session[1]) struct perf_session *session[2]; + struct perf_evlist *evlist_new, *evlist_old; + struct perf_evsel *evsel; + bool first = true; older = perf_session__new(input_old, O_RDONLY, force, false, - &diff.tool); + &tool); newer = perf_session__new(input_new, O_RDONLY, force, false, - &diff.tool); + &tool); if (session[0] == NULL || session[1] == NULL) return -ENOMEM; for (i = 0; i < 2; ++i) { - diff.session = session[i]; - ret = perf_session__process_events(session[i], &diff.tool); + ret = perf_session__process_events(session[i], &tool); if (ret) goto out_delete; - hists__output_resort(&session[i]->hists); } - if (show_displacement) - hists__resort_entries(&older->hists); + evlist_old = older->evlist; + evlist_new = newer->evlist; + + list_for_each_entry(evsel, &evlist_new->entries, node) + hists__output_resort(&evsel->hists); + + list_for_each_entry(evsel, &evlist_old->entries, node) { + hists__output_resort(&evsel->hists); + + if (show_displacement) + hists__resort_entries(&evsel->hists); + } + + list_for_each_entry(evsel, &evlist_new->entries, node) { + struct perf_evsel *evsel_old; + + evsel_old = evsel_match(evsel, evlist_old); + if (!evsel_old) + continue; + + fprintf(stdout, "%s# Event '%s'\n#\n", first ? "" : "\n", + perf_evsel__name(evsel)); + + first = false; + + hists__match(&evsel_old->hists, &evsel->hists); + hists__fprintf(&evsel->hists, &evsel_old->hists, + show_displacement, true, 0, 0, stdout); + } - hists__match(&older->hists, &newer->hists); - hists__fprintf(&newer->hists, &older->hists, - show_displacement, true, 0, 0, stdout); out_delete: for (i = 0; i < 2; ++i) perf_session__delete(session[i]); diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index a3f562cec43..390690eb878 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -124,6 +124,13 @@ void perf_evsel__close(struct perf_evsel *evsel, int ncpus, int nthreads); (evsel->attr.type == PERF_TYPE_##t && \ evsel->attr.config == PERF_COUNT_##c) +static inline bool perf_evsel__match2(struct perf_evsel *e1, + struct perf_evsel *e2) +{ + return (e1->attr.type == e2->attr.type) && + (e1->attr.config == e2->attr.config); +} + int __perf_evsel__read_on_cpu(struct perf_evsel *evsel, int cpu, int thread, bool scale); diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h index 176a60902f5..aab414fbb64 100644 --- a/tools/perf/util/session.h +++ b/tools/perf/util/session.h @@ -36,9 +36,7 @@ struct perf_session { struct pevent *pevent; /* * FIXME: Need to split this up further, we need global - * stats + per event stats. 'perf diff' also needs - * to properly support multiple events in a single - * perf.data file. + * stats + per event stats. */ struct hists hists; int fd; From 0ca0c130419a4aa05d28fbecc5d360f051944251 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 6 Sep 2012 17:46:56 +0200 Subject: [PATCH 160/282] perf tools: Replace sort's standalone field_sep with symbol_conf.field_sep The repsep_snprintf function was still using standalone field_sep, which not even set anymore. Replacing it with 'symbol_conf.field_sep'. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: Corey Ashford Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Namhyung Kim Cc: Paul E. McKenney Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1346946426-13496-3-git-send-email-jolsa@redhat.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/sort.c | 6 ++---- tools/perf/util/sort.h | 1 - 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 0f5a0a496bc..7a2fbd8855b 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -12,8 +12,6 @@ int sort__branch_mode = -1; /* -1 = means not set */ enum sort_type sort__first_dimension; -char * field_sep; - LIST_HEAD(hist_entry__sort_list); static int repsep_snprintf(char *bf, size_t size, const char *fmt, ...) @@ -23,11 +21,11 @@ static int repsep_snprintf(char *bf, size_t size, const char *fmt, ...) va_start(ap, fmt); n = vsnprintf(bf, size, fmt, ap); - if (field_sep && n > 0) { + if (symbol_conf.field_sep && n > 0) { char *sep = bf; while (1) { - sep = strchr(sep, *field_sep); + sep = strchr(sep, *symbol_conf.field_sep); if (sep == NULL) break; *sep = '.'; diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index e724b26acd5..e459c981b03 100644 --- a/tools/perf/util/sort.h +++ b/tools/perf/util/sort.h @@ -32,7 +32,6 @@ extern const char default_sort_order[]; extern int sort__need_collapse; extern int sort__has_parent; extern int sort__branch_mode; -extern char *field_sep; extern struct sort_entry sort_comm; extern struct sort_entry sort_dso; extern struct sort_entry sort_sym; From b771a8306205f0261496e93574a71bc7106844dc Mon Sep 17 00:00:00 2001 From: Irina Tirdea Date: Sat, 8 Sep 2012 03:43:17 +0300 Subject: [PATCH 161/282] perf tools: include basename for non-glibc systems perf uses the glibc version of basename(), by defining _GNU_SOURCE, including string.h and not including libgen.h. The glibc version of basename is better than the POSIX version since it does not modify its argument. Android has only one version of basename which is defined in libgen.h. This version is the same as the glibc version. Error on Android: util/annotate.c: In function 'symbol__annotate_printf': util/annotate.c:503:3: error: implicit declaration of function 'basename' [-Werror=implicit-function-declaration] util/annotate.c:503:3: error: nested extern declaration of 'basename' [-Werror=nested-externs] util/annotate.c:503:14: error: assignment makes pointer from integer without a cast [-Werror] On Android libgen.h should be included to define basename. Signed-off-by: Irina Tirdea Cc: David Ahern Cc: Ingo Molnar Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Steven Rostedt Link: http://lkml.kernel.org/r/1347065004-15306-6-git-send-email-irina.tirdea@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/symbol.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index fc4b1e630fd..d3b330cbc3e 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -10,6 +10,9 @@ #include #include #include +#if defined(__BIONIC__) +#include +#endif #ifndef NO_LIBELF_SUPPORT #include From 57ec0a942d6855cd7a5711fcf4adb57c51259659 Mon Sep 17 00:00:00 2001 From: Irina Tirdea Date: Sat, 8 Sep 2012 03:43:18 +0300 Subject: [PATCH 162/282] perf tools: fix missing winsize definition In Android, struct winsize is not defined in the headers already included in help.c. This leads to a compile error. Including termios.h fixes the compilation error since it defines struct winsize. Signed-off-by: Irina Tirdea Cc: David Ahern Cc: Ingo Molnar Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Steven Rostedt Link: http://lkml.kernel.org/r/1347065004-15306-7-git-send-email-irina.tirdea@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/help.c | 1 + tools/perf/util/top.h | 1 + 2 files changed, 2 insertions(+) diff --git a/tools/perf/util/help.c b/tools/perf/util/help.c index 6f2975a0035..4fa764d8f7d 100644 --- a/tools/perf/util/help.c +++ b/tools/perf/util/help.c @@ -3,6 +3,7 @@ #include "exec_cmd.h" #include "levenshtein.h" #include "help.h" +#include void add_cmdname(struct cmdnames *cmds, const char *name, size_t len) { diff --git a/tools/perf/util/top.h b/tools/perf/util/top.h index 33347ca89ee..86ff1b15059 100644 --- a/tools/perf/util/top.h +++ b/tools/perf/util/top.h @@ -5,6 +5,7 @@ #include "types.h" #include #include +#include struct perf_evlist; struct perf_evsel; From 27683dc5cc9286ace82b6c900ab794428ed5e487 Mon Sep 17 00:00:00 2001 From: Irina Tirdea Date: Sat, 8 Sep 2012 03:43:19 +0300 Subject: [PATCH 163/282] perf tools: include missing pthread.h header pthread variables are used in some files without explicitely including pthread.h. This leads to compile errors on Android. e.g.: in annotate.h, error: unknown type name 'pthread_mutex_t' Including pthread.h explicitely in files that use it to have all definitions included. Signed-off-by: Irina Tirdea Cc: David Ahern Cc: Ingo Molnar Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Steven Rostedt Link: http://lkml.kernel.org/r/1347065004-15306-8-git-send-email-irina.tirdea@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/perf.c | 1 + tools/perf/util/annotate.h | 1 + 2 files changed, 2 insertions(+) diff --git a/tools/perf/perf.c b/tools/perf/perf.c index e7840e50071..fb8578cfa03 100644 --- a/tools/perf/perf.c +++ b/tools/perf/perf.c @@ -14,6 +14,7 @@ #include "util/run-command.h" #include "util/parse-events.h" #include "util/debugfs.h" +#include const char perf_usage_string[] = "perf [--version] [--help] COMMAND [ARGS]"; diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index a6d6bc5d716..62a6e7a7365 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -7,6 +7,7 @@ #include "symbol.h" #include #include +#include struct ins; From 7b45f21c2e42f265f6fd469e43857e98e2fdf01c Mon Sep 17 00:00:00 2001 From: Irina Tirdea Date: Sat, 8 Sep 2012 03:43:21 +0300 Subject: [PATCH 164/282] perf tools: replace mkostemp with mkstemp The mkostemp function is only available in glibc. This leads to compile error in Android, since bionic is derived from BSD. Replacing mkostemp with mkstemp. mkstemp is available on both glibc and bionic. Signed-off-by: Irina Tirdea Cc: David Ahern Cc: Ingo Molnar Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Steven Rostedt Link: http://lkml.kernel.org/r/1347065004-15306-10-git-send-email-irina.tirdea@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/dso-test-data.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/dso-test-data.c b/tools/perf/util/dso-test-data.c index 541cdc72c7d..c6caedeb1d6 100644 --- a/tools/perf/util/dso-test-data.c +++ b/tools/perf/util/dso-test-data.c @@ -23,7 +23,7 @@ static char *test_file(int size) int fd, i; unsigned char *buf; - fd = mkostemp(templ, O_CREAT|O_WRONLY|O_TRUNC); + fd = mkstemp(templ); buf = malloc(size); if (!buf) { From 9612ef6716efc20fac0f57d59452cda4e6846bda Mon Sep 17 00:00:00 2001 From: Irina Tirdea Date: Sat, 8 Sep 2012 03:43:22 +0300 Subject: [PATCH 165/282] tools lib traceevent: replace mempcpy with memcpy The mempcpy function is not supported by bionic in Android and will lead to compilation errors. Replacing mempcpy with memcpy so it will work in Android. Signed-off-by: Irina Tirdea Cc: David Ahern Cc: Ingo Molnar Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Steven Rostedt Link: http://lkml.kernel.org/r/1347065004-15306-11-git-send-email-irina.tirdea@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/traceevent/event-parse.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c index 4595aeb3c43..f4190b5764d 100644 --- a/tools/lib/traceevent/event-parse.c +++ b/tools/lib/traceevent/event-parse.c @@ -4833,8 +4833,8 @@ int pevent_strerror(struct pevent *pevent, enum pevent_errno errnum, msg = strerror_r(errnum, buf, buflen); if (msg != buf) { size_t len = strlen(msg); - char *c = mempcpy(buf, msg, min(buflen-1, len)); - *c = '\0'; + memcpy(buf, msg, min(buflen - 1, len)); + *(buf + min(buflen - 1, len)) = '\0'; } return 0; } From c9f08bee50c71b06685ccff8110e56a7c05662b7 Mon Sep 17 00:00:00 2001 From: Irina Tirdea Date: Sat, 8 Sep 2012 03:43:23 +0300 Subject: [PATCH 166/282] perf tools: add NO_BACKTRACE for application self-debugging perf has support for self-debugging by defining dump_stack function. This function uses backtrace and backtrace_symbols functions defined as GNU extensions. In Android, bionic does not offer support for these functions and compilation will fail with the following error: target C: libperf <= tools/perf/util/util.c tools/perf/util/util.c:4:22: fatal error: execinfo.h: No such file or directory compilation terminated. Add a compile-time option (NO_BACKTRACE) to enable or disable self-debugging functionality in perf. This can also help in debugging since it offers the possibility to turn on/off printing the backtrace. Signed-off-by: Irina Tirdea Cc: David Ahern Cc: Ingo Molnar Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Steven Rostedt Link: http://lkml.kernel.org/r/1347065004-15306-12-git-send-email-irina.tirdea@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile | 8 ++++++++ tools/perf/config/feature-tests.mak | 14 ++++++++++++++ tools/perf/util/util.c | 6 ++++++ 3 files changed, 28 insertions(+) diff --git a/tools/perf/Makefile b/tools/perf/Makefile index afd50757490..3eda4921573 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -755,6 +755,14 @@ else endif endif +ifdef NO_BACKTRACE + BASIC_CFLAGS += -DNO_BACKTRACE +else + ifneq ($(call try-cc,$(SOURCE_BACKTRACE),),y) + BASIC_CFLAGS += -DNO_BACKTRACE + endif +endif + ifdef ASCIIDOC8 export ASCIIDOC8 endif diff --git a/tools/perf/config/feature-tests.mak b/tools/perf/config/feature-tests.mak index 2f1156a62ab..116690a669d 100644 --- a/tools/perf/config/feature-tests.mak +++ b/tools/perf/config/feature-tests.mak @@ -179,3 +179,17 @@ int main(void) } endef endif + +ifndef NO_BACKTRACE +define SOURCE_BACKTRACE +#include +#include + +int main(void) +{ + backtrace(NULL, 0); + backtrace_symbols(NULL, 0); + return 0; +} +endef +endif diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c index 1b8775c3707..2055cf38041 100644 --- a/tools/perf/util/util.c +++ b/tools/perf/util/util.c @@ -1,7 +1,9 @@ #include "../perf.h" #include "util.h" #include +#ifndef NO_BACKTRACE #include +#endif #include #include @@ -163,6 +165,7 @@ size_t hex_width(u64 v) } /* Obtain a backtrace and print it to stdout. */ +#ifndef NO_BACKTRACE void dump_stack(void) { void *array[16]; @@ -177,3 +180,6 @@ void dump_stack(void) free(strings); } +#else +void dump_stack(void) {} +#endif From b155a09015135cf59ada8d48109ccbd9891c1b42 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 31 Aug 2012 10:49:27 +0300 Subject: [PATCH 167/282] perf tools: Fix build for another rbtree.c change Fixes: ../../lib/rbtree.c: In function 'rb_insert_color': ../../lib/rbtree.c:95:9: error: 'true' undeclared (first use in this function) ../../lib/rbtree.c:95:9: note: each undeclared identifier is reported only once for each function it appears in ../../lib/rbtree.c: In function '__rb_erase_color': ../../lib/rbtree.c:216:9: error: 'true' undeclared (first use in this function) ../../lib/rbtree.c: In function 'rb_erase': ../../lib/rbtree.c:368:2: error: unknown type name 'bool' make: *** [util/rbtree.o] Error 1 Signed-off-by: Adrian Hunter Cc: Michel Lespinasse Cc: Andrew Morton Cc: linux-mm@kvack.org Link: http://lkml.kernel.org/r/50406F60.5040707@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/include/linux/rbtree.h | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/util/include/linux/rbtree.h b/tools/perf/util/include/linux/rbtree.h index 7a243a14303..2a030c5af3a 100644 --- a/tools/perf/util/include/linux/rbtree.h +++ b/tools/perf/util/include/linux/rbtree.h @@ -1 +1,2 @@ +#include #include "../../../../include/linux/rbtree.h" From 8bf98b89688c3d7ec071bf26d49761e38d846b47 Mon Sep 17 00:00:00 2001 From: Irina Tirdea Date: Sat, 8 Sep 2012 08:35:51 +0300 Subject: [PATCH 168/282] perf bench: fix assert when NDEBUG is defined MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When NDEBUG is defined, the assert macro will be expanded to nothing. Some assert calls used in perf are also including some functionality (e.g. system calls), not only validity checks. Therefore, if NDEBUG is defined, this functionality will be removed along with the assert. Perf also defines BUG_ON based on assert, so it has the same problem. Define BUG_ON so that the condition will be executed when NDEBUG is defined. Replace the assert statements that have these side effects with BUG_ON. For defining BUG_ON, use "if (cond) {}" insted of "if (cond) ;" because in the latter case build fails with "error: suggest braces around empty body in an ‘if’ statement [-Werror=empty-body]" Suggested-by: Peter Zijlstra Signed-off-by: Irina Tirdea Reviewed-by: Namhyung Kim Reviewed-by: Pekka Enberg Cc: David Ahern Cc: Ingo Molnar Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Steven Rostedt Link: http://lkml.kernel.org/r/1347082551-2394-1-git-send-email-irina.tirdea@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/bench/sched-pipe.c | 6 +++--- tools/perf/util/include/linux/kernel.h | 4 ++++ 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/tools/perf/bench/sched-pipe.c b/tools/perf/bench/sched-pipe.c index 0c7454f8b8a..15911e9c587 100644 --- a/tools/perf/bench/sched-pipe.c +++ b/tools/perf/bench/sched-pipe.c @@ -56,13 +56,13 @@ int bench_sched_pipe(int argc, const char **argv, * causes error in building environment for perf */ int __used ret, wait_stat; - pid_t pid, retpid; + pid_t pid, retpid __used; argc = parse_options(argc, argv, options, bench_sched_pipe_usage, 0); - assert(!pipe(pipe_1)); - assert(!pipe(pipe_2)); + BUG_ON(pipe(pipe_1)); + BUG_ON(pipe(pipe_2)); pid = fork(); assert(pid >= 0); diff --git a/tools/perf/util/include/linux/kernel.h b/tools/perf/util/include/linux/kernel.h index b6842c1d02a..4af9a10cc2d 100644 --- a/tools/perf/util/include/linux/kernel.h +++ b/tools/perf/util/include/linux/kernel.h @@ -47,8 +47,12 @@ #endif #ifndef BUG_ON +#ifdef NDEBUG +#define BUG_ON(cond) do { if (cond) {} } while (0) +#else #define BUG_ON(cond) assert(!(cond)) #endif +#endif /* * Both need more care to handle endianness From ea251d51d2c7d7233790123227f787c477f567f5 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 3 Sep 2012 11:53:06 +0900 Subject: [PATCH 169/282] perf hists: Introduce perf_hpp for hist period printing Current hist print functions are messy because it has to consider many of command line options and the code doing that is scattered around to places. So when someone wants to add an option to manipulate the hist output it'd very easy to miss to update all of them in sync. And things getting worse as more options/features are added continuously. So I'd like to refactor them using hpp formats and move common code to ui/hist.c in order to make it easy to maintain and to add new features. Signed-off-by: Namhyung Kim Cc: Ingo Molnar Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1346640790-17197-2-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile | 2 + tools/perf/builtin-diff.c | 1 + tools/perf/ui/hist.c | 340 +++++++++++++++++++++++++++++++++++++ tools/perf/ui/setup.c | 8 +- tools/perf/ui/stdio/hist.c | 238 +++++--------------------- tools/perf/util/hist.h | 37 ++++ 6 files changed, 426 insertions(+), 200 deletions(-) create mode 100644 tools/perf/ui/hist.c diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 3eda4921573..e4b2e8f2606 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -403,7 +403,9 @@ LIB_OBJS += $(OUTPUT)util/cgroup.o LIB_OBJS += $(OUTPUT)util/target.o LIB_OBJS += $(OUTPUT)util/rblist.o LIB_OBJS += $(OUTPUT)util/intlist.o + LIB_OBJS += $(OUTPUT)ui/helpline.o +LIB_OBJS += $(OUTPUT)ui/hist.o LIB_OBJS += $(OUTPUT)ui/stdio/hist.o BUILTIN_OBJS += $(OUTPUT)builtin-annotate.o diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c index e9933fdd256..c4c6d76b70e 100644 --- a/tools/perf/builtin-diff.c +++ b/tools/perf/builtin-diff.c @@ -264,6 +264,7 @@ int cmd_diff(int argc, const char **argv, const char *prefix __used) if (symbol__init() < 0) return -1; + perf_hpp__init(true, show_displacement); setup_sorting(diff_usage, options); setup_pager(); diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c new file mode 100644 index 00000000000..8ccd1f2330d --- /dev/null +++ b/tools/perf/ui/hist.c @@ -0,0 +1,340 @@ +#include + +#include "../util/hist.h" +#include "../util/util.h" +#include "../util/sort.h" + + +/* hist period print (hpp) functions */ +static int hpp__header_overhead(struct perf_hpp *hpp) +{ + if (hpp->ptr) + return scnprintf(hpp->buf, hpp->size, "Baseline"); + else + return scnprintf(hpp->buf, hpp->size, "Overhead"); +} + +static int hpp__width_overhead(struct perf_hpp *hpp __used) +{ + return 8; +} + +static int hpp__color_overhead(struct perf_hpp *hpp, struct hist_entry *he) +{ + double percent = 100.0 * he->period / hpp->total_period; + + if (hpp->ptr) { + struct hists *old_hists = hpp->ptr; + u64 total_period = old_hists->stats.total_period; + u64 base_period = he->pair ? he->pair->period : 0; + + if (total_period) + percent = 100.0 * base_period / total_period; + else + percent = 0.0; + } + + return percent_color_snprintf(hpp->buf, hpp->size, " %5.2f%%", percent); +} + +static int hpp__entry_overhead(struct perf_hpp *hpp, struct hist_entry *he) +{ + double percent = 100.0 * he->period / hpp->total_period; + + if (hpp->ptr) { + struct hists *old_hists = hpp->ptr; + u64 total_period = old_hists->stats.total_period; + u64 base_period = he->pair ? he->pair->period : 0; + + if (total_period) + percent = 100.0 * base_period / total_period; + else + percent = 0.0; + } + + return scnprintf(hpp->buf, hpp->size, " %5.2f%%", percent); +} + +static int hpp__header_overhead_sys(struct perf_hpp *hpp) +{ + return scnprintf(hpp->buf, hpp->size, " sys "); +} + +static int hpp__width_overhead_sys(struct perf_hpp *hpp __used) +{ + return 6; +} + +static int hpp__color_overhead_sys(struct perf_hpp *hpp, struct hist_entry *he) +{ + double percent = 100.0 * he->period_sys / hpp->total_period; + return percent_color_snprintf(hpp->buf, hpp->size, "%5.2f%%", percent); +} + +static int hpp__entry_overhead_sys(struct perf_hpp *hpp, struct hist_entry *he) +{ + double percent = 100.0 * he->period_sys / hpp->total_period; + return scnprintf(hpp->buf, hpp->size, "%5.2f%%", percent); +} + +static int hpp__header_overhead_us(struct perf_hpp *hpp) +{ + return scnprintf(hpp->buf, hpp->size, " user "); +} + +static int hpp__width_overhead_us(struct perf_hpp *hpp __used) +{ + return 6; +} + +static int hpp__color_overhead_us(struct perf_hpp *hpp, struct hist_entry *he) +{ + double percent = 100.0 * he->period_us / hpp->total_period; + return percent_color_snprintf(hpp->buf, hpp->size, "%5.2f%%", percent); +} + +static int hpp__entry_overhead_us(struct perf_hpp *hpp, struct hist_entry *he) +{ + double percent = 100.0 * he->period_us / hpp->total_period; + return scnprintf(hpp->buf, hpp->size, "%5.2f%%", percent); +} + +static int hpp__header_overhead_guest_sys(struct perf_hpp *hpp) +{ + return scnprintf(hpp->buf, hpp->size, "guest sys"); +} + +static int hpp__width_overhead_guest_sys(struct perf_hpp *hpp __used) +{ + return 9; +} + +static int hpp__color_overhead_guest_sys(struct perf_hpp *hpp, + struct hist_entry *he) +{ + double percent = 100.0 * he->period_guest_sys / hpp->total_period; + return percent_color_snprintf(hpp->buf, hpp->size, " %5.2f%% ", percent); +} + +static int hpp__entry_overhead_guest_sys(struct perf_hpp *hpp, + struct hist_entry *he) +{ + double percent = 100.0 * he->period_guest_sys / hpp->total_period; + return scnprintf(hpp->buf, hpp->size, " %5.2f%% ", percent); +} + +static int hpp__header_overhead_guest_us(struct perf_hpp *hpp) +{ + return scnprintf(hpp->buf, hpp->size, "guest usr"); +} + +static int hpp__width_overhead_guest_us(struct perf_hpp *hpp __used) +{ + return 9; +} + +static int hpp__color_overhead_guest_us(struct perf_hpp *hpp, + struct hist_entry *he) +{ + double percent = 100.0 * he->period_guest_us / hpp->total_period; + return percent_color_snprintf(hpp->buf, hpp->size, " %5.2f%% ", percent); +} + +static int hpp__entry_overhead_guest_us(struct perf_hpp *hpp, + struct hist_entry *he) +{ + double percent = 100.0 * he->period_guest_us / hpp->total_period; + return scnprintf(hpp->buf, hpp->size, " %5.2f%% ", percent); +} + +static int hpp__header_samples(struct perf_hpp *hpp) +{ + return scnprintf(hpp->buf, hpp->size, " Samples "); +} + +static int hpp__width_samples(struct perf_hpp *hpp __used) +{ + return 11; +} + +static int hpp__entry_samples(struct perf_hpp *hpp, struct hist_entry *he) +{ + return scnprintf(hpp->buf, hpp->size, "%11" PRIu64, he->nr_events); +} + +static int hpp__header_period(struct perf_hpp *hpp) +{ + return scnprintf(hpp->buf, hpp->size, " Period "); +} + +static int hpp__width_period(struct perf_hpp *hpp __used) +{ + return 12; +} + +static int hpp__entry_period(struct perf_hpp *hpp, struct hist_entry *he) +{ + return scnprintf(hpp->buf, hpp->size, "%12" PRIu64, he->period); +} + +static int hpp__header_delta(struct perf_hpp *hpp) +{ + return scnprintf(hpp->buf, hpp->size, " Delta "); +} + +static int hpp__width_delta(struct perf_hpp *hpp __used) +{ + return 7; +} + +static int hpp__entry_delta(struct perf_hpp *hpp, struct hist_entry *he) +{ + struct hists *pair_hists = hpp->ptr; + u64 old_total, new_total; + double old_percent = 0, new_percent = 0; + double diff; + char buf[32]; + + old_total = pair_hists->stats.total_period; + if (old_total > 0 && he->pair) + old_percent = 100.0 * he->pair->period / old_total; + + new_total = hpp->total_period; + if (new_total > 0) + new_percent = 100.0 * he->period / new_total; + + diff = new_percent - old_percent; + if (fabs(diff) < 0.01) + return scnprintf(hpp->buf, hpp->size, " "); + + scnprintf(buf, sizeof(buf), "%+4.2F%%", diff); + return scnprintf(hpp->buf, hpp->size, "%7.7s", buf); +} + +static int hpp__header_displ(struct perf_hpp *hpp) +{ + return scnprintf(hpp->buf, hpp->size, "Displ."); +} + +static int hpp__width_displ(struct perf_hpp *hpp __used) +{ + return 6; +} + +static int hpp__entry_displ(struct perf_hpp *hpp, struct hist_entry *he __used) +{ + char buf[32]; + + if (!hpp->displacement) + return scnprintf(hpp->buf, hpp->size, " "); + + scnprintf(buf, sizeof(buf), "%+4ld", hpp->displacement); + return scnprintf(hpp->buf, hpp->size, "%6.6s", buf); +} + +#define HPP__COLOR_PRINT_FNS(_name) \ + .header = hpp__header_ ## _name, \ + .width = hpp__width_ ## _name, \ + .color = hpp__color_ ## _name, \ + .entry = hpp__entry_ ## _name + +#define HPP__PRINT_FNS(_name) \ + .header = hpp__header_ ## _name, \ + .width = hpp__width_ ## _name, \ + .entry = hpp__entry_ ## _name + +struct perf_hpp_fmt perf_hpp__format[] = { + { .cond = true, HPP__COLOR_PRINT_FNS(overhead) }, + { .cond = false, HPP__COLOR_PRINT_FNS(overhead_sys) }, + { .cond = false, HPP__COLOR_PRINT_FNS(overhead_us) }, + { .cond = false, HPP__COLOR_PRINT_FNS(overhead_guest_sys) }, + { .cond = false, HPP__COLOR_PRINT_FNS(overhead_guest_us) }, + { .cond = false, HPP__PRINT_FNS(samples) }, + { .cond = false, HPP__PRINT_FNS(period) }, + { .cond = false, HPP__PRINT_FNS(delta) }, + { .cond = false, HPP__PRINT_FNS(displ) } +}; + +#undef HPP__COLOR_PRINT_FNS +#undef HPP__PRINT_FNS + +void perf_hpp__init(bool need_pair, bool show_displacement) +{ + if (symbol_conf.show_cpu_utilization) { + perf_hpp__format[PERF_HPP__OVERHEAD_SYS].cond = true; + perf_hpp__format[PERF_HPP__OVERHEAD_US].cond = true; + + if (perf_guest) { + perf_hpp__format[PERF_HPP__OVERHEAD_GUEST_SYS].cond = true; + perf_hpp__format[PERF_HPP__OVERHEAD_GUEST_US].cond = true; + } + } + + if (symbol_conf.show_nr_samples) + perf_hpp__format[PERF_HPP__SAMPLES].cond = true; + + if (symbol_conf.show_total_period) + perf_hpp__format[PERF_HPP__PERIOD].cond = true; + + if (need_pair) { + perf_hpp__format[PERF_HPP__DELTA].cond = true; + + if (show_displacement) + perf_hpp__format[PERF_HPP__DISPL].cond = true; + } +} + +static inline void advance_hpp(struct perf_hpp *hpp, int inc) +{ + hpp->buf += inc; + hpp->size -= inc; +} + +int hist_entry__period_snprintf(struct perf_hpp *hpp, struct hist_entry *he, + bool color) +{ + const char *sep = symbol_conf.field_sep; + char *start = hpp->buf; + int i, ret; + + if (symbol_conf.exclude_other && !he->parent) + return 0; + + for (i = 0; i < PERF_HPP__MAX_INDEX; i++) { + if (!perf_hpp__format[i].cond) + continue; + + if (!sep || i > 0) { + ret = scnprintf(hpp->buf, hpp->size, "%s", sep ?: " "); + advance_hpp(hpp, ret); + } + + if (color && perf_hpp__format[i].color) + ret = perf_hpp__format[i].color(hpp, he); + else + ret = perf_hpp__format[i].entry(hpp, he); + + advance_hpp(hpp, ret); + } + + return hpp->buf - start; +} + +int hist_entry__sort_snprintf(struct hist_entry *he, char *s, size_t size, + struct hists *hists) +{ + const char *sep = symbol_conf.field_sep; + struct sort_entry *se; + int ret = 0; + + list_for_each_entry(se, &hist_entry__sort_list, list) { + if (se->elide) + continue; + + ret += scnprintf(s + ret, size - ret, "%s", sep ?: " "); + ret += se->se_snprintf(he, s + ret, size - ret, + hists__col_len(hists, se->se_width_idx)); + } + + return ret; +} diff --git a/tools/perf/ui/setup.c b/tools/perf/ui/setup.c index c7820e56966..bd7d460f844 100644 --- a/tools/perf/ui/setup.c +++ b/tools/perf/ui/setup.c @@ -1,8 +1,8 @@ #include -#include "../cache.h" -#include "../debug.h" - +#include "../util/cache.h" +#include "../util/debug.h" +#include "../util/hist.h" pthread_mutex_t ui__lock = PTHREAD_MUTEX_INITIALIZER; @@ -29,6 +29,8 @@ void setup_browser(bool fallback_to_pager) use_browser = 0; if (fallback_to_pager) setup_pager(); + + perf_hpp__init(false, false); break; } } diff --git a/tools/perf/ui/stdio/hist.c b/tools/perf/ui/stdio/hist.c index 9bf7e9e5a72..4228b4c6b72 100644 --- a/tools/perf/ui/stdio/hist.c +++ b/tools/perf/ui/stdio/hist.c @@ -1,5 +1,4 @@ #include -#include #include "../../util/util.h" #include "../../util/hist.h" @@ -291,138 +290,6 @@ static size_t hist_entry_callchain__fprintf(struct hist_entry *he, return 0; } -static int hist_entry__period_snprintf(struct hist_entry *he, char *s, - size_t size, struct hists *pair_hists, - bool show_displacement, long displacement, - bool color, u64 total_period) -{ - u64 period, total, period_sys, period_us, period_guest_sys, period_guest_us; - u64 nr_events; - const char *sep = symbol_conf.field_sep; - int ret; - - if (symbol_conf.exclude_other && !he->parent) - return 0; - - if (pair_hists) { - period = he->pair ? he->pair->period : 0; - nr_events = he->pair ? he->pair->nr_events : 0; - total = pair_hists->stats.total_period; - period_sys = he->pair ? he->pair->period_sys : 0; - period_us = he->pair ? he->pair->period_us : 0; - period_guest_sys = he->pair ? he->pair->period_guest_sys : 0; - period_guest_us = he->pair ? he->pair->period_guest_us : 0; - } else { - period = he->period; - nr_events = he->nr_events; - total = total_period; - period_sys = he->period_sys; - period_us = he->period_us; - period_guest_sys = he->period_guest_sys; - period_guest_us = he->period_guest_us; - } - - if (total) { - if (color) - ret = percent_color_snprintf(s, size, - sep ? "%.2f" : " %6.2f%%", - (period * 100.0) / total); - else - ret = scnprintf(s, size, sep ? "%.2f" : " %6.2f%%", - (period * 100.0) / total); - if (symbol_conf.show_cpu_utilization) { - ret += percent_color_snprintf(s + ret, size - ret, - sep ? "%.2f" : " %6.2f%%", - (period_sys * 100.0) / total); - ret += percent_color_snprintf(s + ret, size - ret, - sep ? "%.2f" : " %6.2f%%", - (period_us * 100.0) / total); - if (perf_guest) { - ret += percent_color_snprintf(s + ret, - size - ret, - sep ? "%.2f" : " %6.2f%%", - (period_guest_sys * 100.0) / - total); - ret += percent_color_snprintf(s + ret, - size - ret, - sep ? "%.2f" : " %6.2f%%", - (period_guest_us * 100.0) / - total); - } - } - } else - ret = scnprintf(s, size, sep ? "%" PRIu64 : "%12" PRIu64 " ", period); - - if (symbol_conf.show_nr_samples) { - if (sep) - ret += scnprintf(s + ret, size - ret, "%c%" PRIu64, *sep, nr_events); - else - ret += scnprintf(s + ret, size - ret, "%11" PRIu64, nr_events); - } - - if (symbol_conf.show_total_period) { - if (sep) - ret += scnprintf(s + ret, size - ret, "%c%" PRIu64, *sep, period); - else - ret += scnprintf(s + ret, size - ret, " %12" PRIu64, period); - } - - if (pair_hists) { - char bf[32]; - double old_percent = 0, new_percent = 0, diff; - - if (total > 0) - old_percent = (period * 100.0) / total; - if (total_period > 0) - new_percent = (he->period * 100.0) / total_period; - - diff = new_percent - old_percent; - - if (fabs(diff) >= 0.01) - scnprintf(bf, sizeof(bf), "%+4.2F%%", diff); - else - scnprintf(bf, sizeof(bf), " "); - - if (sep) - ret += scnprintf(s + ret, size - ret, "%c%s", *sep, bf); - else - ret += scnprintf(s + ret, size - ret, "%11.11s", bf); - - if (show_displacement) { - if (displacement) - scnprintf(bf, sizeof(bf), "%+4ld", displacement); - else - scnprintf(bf, sizeof(bf), " "); - - if (sep) - ret += scnprintf(s + ret, size - ret, "%c%s", *sep, bf); - else - ret += scnprintf(s + ret, size - ret, "%6.6s", bf); - } - } - - return ret; -} - -int hist_entry__sort_snprintf(struct hist_entry *he, char *s, size_t size, - struct hists *hists) -{ - const char *sep = symbol_conf.field_sep; - struct sort_entry *se; - int ret = 0; - - list_for_each_entry(se, &hist_entry__sort_list, list) { - if (se->elide) - continue; - - ret += scnprintf(s + ret, size - ret, "%s", sep ?: " "); - ret += se->se_snprintf(he, s + ret, size - ret, - hists__col_len(hists, se->se_width_idx)); - } - - return ret; -} - static size_t hist_entry__callchain_fprintf(struct hist_entry *he, struct hists *hists, u64 total_period, FILE *fp) @@ -441,18 +308,22 @@ static size_t hist_entry__callchain_fprintf(struct hist_entry *he, static int hist_entry__fprintf(struct hist_entry *he, size_t size, struct hists *hists, struct hists *pair_hists, - bool show_displacement, long displacement, - u64 total_period, FILE *fp) + long displacement, u64 total_period, FILE *fp) { char bf[512]; int ret; + struct perf_hpp hpp = { + .buf = bf, + .size = size, + .total_period = total_period, + .displacement = displacement, + .ptr = pair_hists, + }; if (size == 0 || size > sizeof(bf)) - size = sizeof(bf); + size = hpp.size = sizeof(bf); - ret = hist_entry__period_snprintf(he, bf, size, pair_hists, - show_displacement, displacement, - true, total_period); + ret = hist_entry__period_snprintf(&hpp, he, true); hist_entry__sort_snprintf(he, bf + ret, size - ret, hists); ret = fprintf(fp, "%s\n", bf); @@ -477,59 +348,29 @@ size_t hists__fprintf(struct hists *hists, struct hists *pair, unsigned int width; const char *sep = symbol_conf.field_sep; const char *col_width = symbol_conf.col_width_list_str; - int nr_rows = 0; + int idx, nr_rows = 0; + char bf[64]; + struct perf_hpp dummy_hpp = { + .buf = bf, + .size = sizeof(bf), + .ptr = pair, + }; init_rem_hits(); if (!show_header) goto print_entries; - fprintf(fp, "# %s", pair ? "Baseline" : "Overhead"); + fprintf(fp, "# "); + for (idx = 0; idx < PERF_HPP__MAX_INDEX; idx++) { + if (!perf_hpp__format[idx].cond) + continue; - if (symbol_conf.show_cpu_utilization) { - if (sep) { - ret += fprintf(fp, "%csys", *sep); - ret += fprintf(fp, "%cus", *sep); - if (perf_guest) { - ret += fprintf(fp, "%cguest sys", *sep); - ret += fprintf(fp, "%cguest us", *sep); - } - } else { - ret += fprintf(fp, " sys "); - ret += fprintf(fp, " us "); - if (perf_guest) { - ret += fprintf(fp, " guest sys "); - ret += fprintf(fp, " guest us "); - } - } - } + if (idx) + fprintf(fp, "%s", sep ?: " "); - if (symbol_conf.show_nr_samples) { - if (sep) - fprintf(fp, "%cSamples", *sep); - else - fputs(" Samples ", fp); - } - - if (symbol_conf.show_total_period) { - if (sep) - ret += fprintf(fp, "%cPeriod", *sep); - else - ret += fprintf(fp, " Period "); - } - - if (pair) { - if (sep) - ret += fprintf(fp, "%cDelta", *sep); - else - ret += fprintf(fp, " Delta "); - - if (show_displacement) { - if (sep) - ret += fprintf(fp, "%cDisplacement", *sep); - else - ret += fprintf(fp, " Displ"); - } + perf_hpp__format[idx].header(&dummy_hpp); + fprintf(fp, "%s", bf); } list_for_each_entry(se, &hist_entry__sort_list, list) { @@ -561,18 +402,21 @@ size_t hists__fprintf(struct hists *hists, struct hists *pair, if (sep) goto print_entries; - fprintf(fp, "# ........"); - if (symbol_conf.show_cpu_utilization) - fprintf(fp, " ....... ......."); - if (symbol_conf.show_nr_samples) - fprintf(fp, " .........."); - if (symbol_conf.show_total_period) - fprintf(fp, " ............"); - if (pair) { - fprintf(fp, " .........."); - if (show_displacement) - fprintf(fp, " ....."); + fprintf(fp, "# "); + for (idx = 0; idx < PERF_HPP__MAX_INDEX; idx++) { + unsigned int i; + + if (!perf_hpp__format[idx].cond) + continue; + + if (idx) + fprintf(fp, "%s", sep ?: " "); + + width = perf_hpp__format[idx].width(&dummy_hpp); + for (i = 0; i < width; i++) + fprintf(fp, "."); } + list_for_each_entry(se, &hist_entry__sort_list, list) { unsigned int i; @@ -612,8 +456,8 @@ print_entries: displacement = 0; ++position; } - ret += hist_entry__fprintf(h, max_cols, hists, pair, show_displacement, - displacement, total_period, fp); + ret += hist_entry__fprintf(h, max_cols, hists, pair, displacement, + total_period, fp); if (max_rows && ++nr_rows >= max_rows) goto out; diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 2e650ffb7d2..4146f51124f 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -115,6 +115,43 @@ bool hists__new_col_len(struct hists *self, enum hist_column col, u16 len); void hists__reset_col_len(struct hists *hists); void hists__calc_col_len(struct hists *hists, struct hist_entry *he); +struct perf_hpp { + char *buf; + size_t size; + u64 total_period; + const char *sep; + long displacement; + void *ptr; +}; + +struct perf_hpp_fmt { + bool cond; + int (*header)(struct perf_hpp *hpp); + int (*width)(struct perf_hpp *hpp); + int (*color)(struct perf_hpp *hpp, struct hist_entry *he); + int (*entry)(struct perf_hpp *hpp, struct hist_entry *he); +}; + +extern struct perf_hpp_fmt perf_hpp__format[]; + +enum { + PERF_HPP__OVERHEAD, + PERF_HPP__OVERHEAD_SYS, + PERF_HPP__OVERHEAD_US, + PERF_HPP__OVERHEAD_GUEST_SYS, + PERF_HPP__OVERHEAD_GUEST_US, + PERF_HPP__SAMPLES, + PERF_HPP__PERIOD, + PERF_HPP__DELTA, + PERF_HPP__DISPL, + + PERF_HPP__MAX_INDEX +}; + +void perf_hpp__init(bool need_pair, bool show_displacement); +int hist_entry__period_snprintf(struct perf_hpp *hpp, struct hist_entry *he, + bool color); + struct perf_evlist; #ifdef NO_NEWT_SUPPORT From 9ffad987ff565999d91fc2783dd77f08094a743b Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 3 Sep 2012 11:53:07 +0900 Subject: [PATCH 170/282] perf hists: Handle field separator properly When a field separator is given, the output format doesn't need to be fancy like aligning to column length, coloring the percent value and so on. And since there's a slight difference to normal format, fix it not to break backward compatibility. Signed-off-by: Namhyung Kim Cc: Ingo Molnar Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1346640790-17197-3-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/hist.c | 74 ++++++++++++++++++++++++-------------- tools/perf/ui/stdio/hist.c | 3 +- 2 files changed, 50 insertions(+), 27 deletions(-) diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c index 8ccd1f2330d..009adf206c8 100644 --- a/tools/perf/ui/hist.c +++ b/tools/perf/ui/hist.c @@ -8,10 +8,9 @@ /* hist period print (hpp) functions */ static int hpp__header_overhead(struct perf_hpp *hpp) { - if (hpp->ptr) - return scnprintf(hpp->buf, hpp->size, "Baseline"); - else - return scnprintf(hpp->buf, hpp->size, "Overhead"); + const char *fmt = hpp->ptr ? "Baseline" : "Overhead"; + + return scnprintf(hpp->buf, hpp->size, fmt); } static int hpp__width_overhead(struct perf_hpp *hpp __used) @@ -40,6 +39,7 @@ static int hpp__color_overhead(struct perf_hpp *hpp, struct hist_entry *he) static int hpp__entry_overhead(struct perf_hpp *hpp, struct hist_entry *he) { double percent = 100.0 * he->period / hpp->total_period; + const char *fmt = symbol_conf.field_sep ? "%.2f" : " %5.2f%%"; if (hpp->ptr) { struct hists *old_hists = hpp->ptr; @@ -52,12 +52,14 @@ static int hpp__entry_overhead(struct perf_hpp *hpp, struct hist_entry *he) percent = 0.0; } - return scnprintf(hpp->buf, hpp->size, " %5.2f%%", percent); + return scnprintf(hpp->buf, hpp->size, fmt, percent); } static int hpp__header_overhead_sys(struct perf_hpp *hpp) { - return scnprintf(hpp->buf, hpp->size, " sys "); + const char *fmt = symbol_conf.field_sep ? "%s" : "%6s"; + + return scnprintf(hpp->buf, hpp->size, fmt, "sys"); } static int hpp__width_overhead_sys(struct perf_hpp *hpp __used) @@ -74,12 +76,16 @@ static int hpp__color_overhead_sys(struct perf_hpp *hpp, struct hist_entry *he) static int hpp__entry_overhead_sys(struct perf_hpp *hpp, struct hist_entry *he) { double percent = 100.0 * he->period_sys / hpp->total_period; - return scnprintf(hpp->buf, hpp->size, "%5.2f%%", percent); + const char *fmt = symbol_conf.field_sep ? "%.2f" : "%5.2f%%"; + + return scnprintf(hpp->buf, hpp->size, fmt, percent); } static int hpp__header_overhead_us(struct perf_hpp *hpp) { - return scnprintf(hpp->buf, hpp->size, " user "); + const char *fmt = symbol_conf.field_sep ? "%s" : "%6s"; + + return scnprintf(hpp->buf, hpp->size, fmt, "user"); } static int hpp__width_overhead_us(struct perf_hpp *hpp __used) @@ -96,7 +102,9 @@ static int hpp__color_overhead_us(struct perf_hpp *hpp, struct hist_entry *he) static int hpp__entry_overhead_us(struct perf_hpp *hpp, struct hist_entry *he) { double percent = 100.0 * he->period_us / hpp->total_period; - return scnprintf(hpp->buf, hpp->size, "%5.2f%%", percent); + const char *fmt = symbol_conf.field_sep ? "%.2f" : "%5.2f%%"; + + return scnprintf(hpp->buf, hpp->size, fmt, percent); } static int hpp__header_overhead_guest_sys(struct perf_hpp *hpp) @@ -120,7 +128,9 @@ static int hpp__entry_overhead_guest_sys(struct perf_hpp *hpp, struct hist_entry *he) { double percent = 100.0 * he->period_guest_sys / hpp->total_period; - return scnprintf(hpp->buf, hpp->size, " %5.2f%% ", percent); + const char *fmt = symbol_conf.field_sep ? "%.2f" : " %5.2f%% "; + + return scnprintf(hpp->buf, hpp->size, fmt, percent); } static int hpp__header_overhead_guest_us(struct perf_hpp *hpp) @@ -144,12 +154,16 @@ static int hpp__entry_overhead_guest_us(struct perf_hpp *hpp, struct hist_entry *he) { double percent = 100.0 * he->period_guest_us / hpp->total_period; - return scnprintf(hpp->buf, hpp->size, " %5.2f%% ", percent); + const char *fmt = symbol_conf.field_sep ? "%.2f" : " %5.2f%% "; + + return scnprintf(hpp->buf, hpp->size, fmt, percent); } static int hpp__header_samples(struct perf_hpp *hpp) { - return scnprintf(hpp->buf, hpp->size, " Samples "); + const char *fmt = symbol_conf.field_sep ? "%s" : "%11s"; + + return scnprintf(hpp->buf, hpp->size, fmt, "Samples"); } static int hpp__width_samples(struct perf_hpp *hpp __used) @@ -159,12 +173,16 @@ static int hpp__width_samples(struct perf_hpp *hpp __used) static int hpp__entry_samples(struct perf_hpp *hpp, struct hist_entry *he) { - return scnprintf(hpp->buf, hpp->size, "%11" PRIu64, he->nr_events); + const char *fmt = symbol_conf.field_sep ? "%" PRIu64 : "%11" PRIu64; + + return scnprintf(hpp->buf, hpp->size, fmt, he->nr_events); } static int hpp__header_period(struct perf_hpp *hpp) { - return scnprintf(hpp->buf, hpp->size, " Period "); + const char *fmt = symbol_conf.field_sep ? "%s" : "%12s"; + + return scnprintf(hpp->buf, hpp->size, fmt, "Period"); } static int hpp__width_period(struct perf_hpp *hpp __used) @@ -174,12 +192,16 @@ static int hpp__width_period(struct perf_hpp *hpp __used) static int hpp__entry_period(struct perf_hpp *hpp, struct hist_entry *he) { - return scnprintf(hpp->buf, hpp->size, "%12" PRIu64, he->period); + const char *fmt = symbol_conf.field_sep ? "%" PRIu64 : "%12" PRIu64; + + return scnprintf(hpp->buf, hpp->size, fmt, he->period); } static int hpp__header_delta(struct perf_hpp *hpp) { - return scnprintf(hpp->buf, hpp->size, " Delta "); + const char *fmt = symbol_conf.field_sep ? "%s" : "%7s"; + + return scnprintf(hpp->buf, hpp->size, fmt, "Delta"); } static int hpp__width_delta(struct perf_hpp *hpp __used) @@ -193,7 +215,8 @@ static int hpp__entry_delta(struct perf_hpp *hpp, struct hist_entry *he) u64 old_total, new_total; double old_percent = 0, new_percent = 0; double diff; - char buf[32]; + const char *fmt = symbol_conf.field_sep ? "%s" : "%7.7s"; + char buf[32] = " "; old_total = pair_hists->stats.total_period; if (old_total > 0 && he->pair) @@ -204,11 +227,10 @@ static int hpp__entry_delta(struct perf_hpp *hpp, struct hist_entry *he) new_percent = 100.0 * he->period / new_total; diff = new_percent - old_percent; - if (fabs(diff) < 0.01) - return scnprintf(hpp->buf, hpp->size, " "); + if (fabs(diff) >= 0.01) + scnprintf(buf, sizeof(buf), "%+4.2F%%", diff); - scnprintf(buf, sizeof(buf), "%+4.2F%%", diff); - return scnprintf(hpp->buf, hpp->size, "%7.7s", buf); + return scnprintf(hpp->buf, hpp->size, fmt, buf); } static int hpp__header_displ(struct perf_hpp *hpp) @@ -223,13 +245,13 @@ static int hpp__width_displ(struct perf_hpp *hpp __used) static int hpp__entry_displ(struct perf_hpp *hpp, struct hist_entry *he __used) { - char buf[32]; + const char *fmt = symbol_conf.field_sep ? "%s" : "%6.6s"; + char buf[32] = " "; - if (!hpp->displacement) - return scnprintf(hpp->buf, hpp->size, " "); + if (hpp->displacement) + scnprintf(buf, sizeof(buf), "%+4ld", hpp->displacement); - scnprintf(buf, sizeof(buf), "%+4ld", hpp->displacement); - return scnprintf(hpp->buf, hpp->size, "%6.6s", buf); + return scnprintf(hpp->buf, hpp->size, fmt, buf); } #define HPP__COLOR_PRINT_FNS(_name) \ diff --git a/tools/perf/ui/stdio/hist.c b/tools/perf/ui/stdio/hist.c index 4228b4c6b72..882461a4283 100644 --- a/tools/perf/ui/stdio/hist.c +++ b/tools/perf/ui/stdio/hist.c @@ -319,11 +319,12 @@ static int hist_entry__fprintf(struct hist_entry *he, size_t size, .displacement = displacement, .ptr = pair_hists, }; + bool color = !symbol_conf.field_sep; if (size == 0 || size > sizeof(bf)) size = hpp.size = sizeof(bf); - ret = hist_entry__period_snprintf(&hpp, he, true); + ret = hist_entry__period_snprintf(&hpp, he, color); hist_entry__sort_snprintf(he, bf + ret, size - ret, hists); ret = fprintf(fp, "%s\n", bf); From 7e62ef44e89e7b7f2c48090a048f2a5dffa838c7 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 3 Sep 2012 11:53:08 +0900 Subject: [PATCH 171/282] perf hists: Use perf_hpp__format->width to calculate the column widths Signed-off-by: Namhyung Kim Cc: Ingo Molnar Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1346640790-17197-4-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/hist.c | 27 +++++++++++++++++++++++++++ tools/perf/util/hist.c | 33 --------------------------------- 2 files changed, 27 insertions(+), 33 deletions(-) diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c index 009adf206c8..031b349a3f8 100644 --- a/tools/perf/ui/hist.c +++ b/tools/perf/ui/hist.c @@ -360,3 +360,30 @@ int hist_entry__sort_snprintf(struct hist_entry *he, char *s, size_t size, return ret; } + +/* + * See hists__fprintf to match the column widths + */ +unsigned int hists__sort_list_width(struct hists *hists) +{ + struct sort_entry *se; + int i, ret = 0; + + for (i = 0; i < PERF_HPP__MAX_INDEX; i++) { + if (!perf_hpp__format[i].cond) + continue; + if (i) + ret += 2; + + ret += perf_hpp__format[i].width(NULL); + } + + list_for_each_entry(se, &hist_entry__sort_list, list) + if (!se->elide) + ret += 2 + hists__col_len(hists, se->se_width_idx); + + if (verbose) /* Addr + origin */ + ret += 3 + BITS_PER_LONG / 4; + + return ret; +} diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index b1817f15bb8..0ba65ad07cd 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -563,39 +563,6 @@ void hists__output_resort_threaded(struct hists *hists) return __hists__output_resort(hists, true); } -/* - * See hists__fprintf to match the column widths - */ -unsigned int hists__sort_list_width(struct hists *hists) -{ - struct sort_entry *se; - int ret = 9; /* total % */ - - if (symbol_conf.show_cpu_utilization) { - ret += 7; /* count_sys % */ - ret += 6; /* count_us % */ - if (perf_guest) { - ret += 13; /* count_guest_sys % */ - ret += 12; /* count_guest_us % */ - } - } - - if (symbol_conf.show_nr_samples) - ret += 11; - - if (symbol_conf.show_total_period) - ret += 13; - - list_for_each_entry(se, &hist_entry__sort_list, list) - if (!se->elide) - ret += 2 + hists__col_len(hists, se->se_width_idx); - - if (verbose) /* Addr + origin */ - ret += 3 + BITS_PER_LONG / 4; - - return ret; -} - static void hists__remove_entry_filter(struct hists *hists, struct hist_entry *h, enum hist_filter filter) { From f5951d56a2ab4181b96806a4746895aaa5a3d72e Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 3 Sep 2012 11:53:09 +0900 Subject: [PATCH 172/282] perf hists browser: Use perf_hpp__format functions Override hpp->color functions for TUI. Because line coloring is done outside of the function, it just sets the percent value and pass it. Signed-off-by: Namhyung Kim Cc: Ingo Molnar Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1346640790-17197-5-git-send-email-namhyung@kernel.org [ committer note: Keep previous layout by showing the overhead at column 1 ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/hists.c | 98 +++++++++++++++++++++++++--------- tools/perf/ui/tui/setup.c | 4 ++ 2 files changed, 78 insertions(+), 24 deletions(-) diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index 81bd8c2af73..5a5739bbe6a 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -28,6 +28,8 @@ struct hist_browser { bool has_symbols; }; +extern void hist_browser__init_hpp(void); + static int hists__browser_title(struct hists *hists, char *bf, size_t size, const char *ev_name); @@ -563,14 +565,47 @@ static int hist_browser__show_callchain(struct hist_browser *browser, return row - first_row; } +#define HPP__COLOR_FN(_name, _field) \ +static int hist_browser__hpp_color_ ## _name(struct perf_hpp *hpp, \ + struct hist_entry *he) \ +{ \ + double percent = 100.0 * he->_field / hpp->total_period; \ + *(double *)hpp->ptr = percent; \ + return scnprintf(hpp->buf, hpp->size, "%5.2f%%", percent); \ +} + +HPP__COLOR_FN(overhead, period) +HPP__COLOR_FN(overhead_sys, period_sys) +HPP__COLOR_FN(overhead_us, period_us) +HPP__COLOR_FN(overhead_guest_sys, period_guest_sys) +HPP__COLOR_FN(overhead_guest_us, period_guest_us) + +#undef HPP__COLOR_FN + +void hist_browser__init_hpp(void) +{ + perf_hpp__init(false, false); + + perf_hpp__format[PERF_HPP__OVERHEAD].color = + hist_browser__hpp_color_overhead; + perf_hpp__format[PERF_HPP__OVERHEAD_SYS].color = + hist_browser__hpp_color_overhead_sys; + perf_hpp__format[PERF_HPP__OVERHEAD_US].color = + hist_browser__hpp_color_overhead_us; + perf_hpp__format[PERF_HPP__OVERHEAD_GUEST_SYS].color = + hist_browser__hpp_color_overhead_guest_sys; + perf_hpp__format[PERF_HPP__OVERHEAD_GUEST_US].color = + hist_browser__hpp_color_overhead_guest_us; +} + static int hist_browser__show_entry(struct hist_browser *browser, struct hist_entry *entry, unsigned short row) { char s[256]; double percent; - int printed = 0; - int width = browser->b.width - 6; /* The percentage */ + int i, printed = 0; + int width = browser->b.width - 1; char folded_sign = ' '; bool current_entry = ui_browser__is_current_entry(&browser->b, row); off_t row_offset = entry->row_offset; @@ -586,35 +621,50 @@ static int hist_browser__show_entry(struct hist_browser *browser, } if (row_offset == 0) { - hist_entry__sort_snprintf(entry, s, sizeof(s), browser->hists); - percent = (entry->period * 100.0) / browser->hists->stats.total_period; + struct perf_hpp hpp = { + .buf = s, + .size = sizeof(s), + .total_period = browser->hists->stats.total_period, + }; - ui_browser__set_percent_color(&browser->b, percent, current_entry); - ui_browser__gotorc(&browser->b, row, 0); - if (symbol_conf.use_callchain) { - slsmg_printf("%c ", folded_sign); - width -= 2; + ui_browser__gotorc(&browser->b, row, 1); + + for (i = 0; i < PERF_HPP__MAX_INDEX; i++) { + if (!perf_hpp__format[i].cond) + continue; + + if (i) { + slsmg_printf(" "); + width -= 2; + } + + if (perf_hpp__format[i].color) { + hpp.ptr = &percent; + /* It will set percent for us. See HPP__COLOR_FN above. */ + width -= perf_hpp__format[i].color(&hpp, entry); + + ui_browser__set_percent_color(&browser->b, percent, current_entry); + + if (i == 0 && symbol_conf.use_callchain) { + slsmg_printf("%c ", folded_sign); + width -= 2; + } + + slsmg_printf("%s", s); + + if (!current_entry || !browser->b.navkeypressed) + ui_browser__set_color(&browser->b, HE_COLORSET_NORMAL); + } else { + width -= perf_hpp__format[i].entry(&hpp, entry); + slsmg_printf("%s", s); + } } - slsmg_printf(" %5.2f%%", percent); - /* The scroll bar isn't being used */ if (!browser->b.navkeypressed) width += 1; - if (!current_entry || !browser->b.navkeypressed) - ui_browser__set_color(&browser->b, HE_COLORSET_NORMAL); - - if (symbol_conf.show_nr_samples) { - slsmg_printf(" %11u", entry->nr_events); - width -= 12; - } - - if (symbol_conf.show_total_period) { - slsmg_printf(" %12" PRIu64, entry->period); - width -= 13; - } - + hist_entry__sort_snprintf(entry, s, sizeof(s), browser->hists); slsmg_write_nstring(s, width); ++row; ++printed; diff --git a/tools/perf/ui/tui/setup.c b/tools/perf/ui/tui/setup.c index 4c936e09931..4dc0887c04f 100644 --- a/tools/perf/ui/tui/setup.c +++ b/tools/perf/ui/tui/setup.c @@ -15,6 +15,8 @@ static volatile int ui__need_resize; extern struct perf_error_ops perf_tui_eops; +extern void hist_browser__init_hpp(void); + void ui__refresh_dimensions(bool force) { if (force || ui__need_resize) { @@ -124,6 +126,8 @@ int ui__init(void) signal(SIGTERM, ui__signal); perf_error__register(&perf_tui_eops); + + hist_browser__init_hpp(); out: return err; } From 12ceaded6b276c13d905cf09f0c1a1322022ea58 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 3 Sep 2012 11:53:10 +0900 Subject: [PATCH 173/282] perf gtk/browser: Use perf_hpp__format functions Now we can support color using pango markup with this change. Signed-off-by: Namhyung Kim Acked-by: Pekka Enberg Cc: Ingo Molnar Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1346640790-17197-6-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/gtk/browser.c | 101 ++++++++++++++++++++++++++++++------ tools/perf/ui/gtk/gtk.h | 1 + tools/perf/ui/gtk/setup.c | 1 + 3 files changed, 87 insertions(+), 16 deletions(-) diff --git a/tools/perf/ui/gtk/browser.c b/tools/perf/ui/gtk/browser.c index 26b5b652a8c..3c16ab50e0f 100644 --- a/tools/perf/ui/gtk/browser.c +++ b/tools/perf/ui/gtk/browser.c @@ -36,6 +36,57 @@ static void perf_gtk__resize_window(GtkWidget *window) gtk_window_resize(GTK_WINDOW(window), width, height); } +static const char *perf_gtk__get_percent_color(double percent) +{ + if (percent >= MIN_RED) + return ""; + if (percent >= MIN_GREEN) + return ""; + return NULL; +} + +#define HPP__COLOR_FN(_name, _field) \ +static int perf_gtk__hpp_color_ ## _name(struct perf_hpp *hpp, \ + struct hist_entry *he) \ +{ \ + double percent = 100.0 * he->_field / hpp->total_period; \ + const char *markup; \ + int ret = 0; \ + \ + markup = perf_gtk__get_percent_color(percent); \ + if (markup) \ + ret += scnprintf(hpp->buf, hpp->size, "%s", markup); \ + ret += scnprintf(hpp->buf + ret, hpp->size - ret, "%5.2f%%", percent); \ + if (markup) \ + ret += scnprintf(hpp->buf + ret, hpp->size - ret, ""); \ + \ + return ret; \ +} + +HPP__COLOR_FN(overhead, period) +HPP__COLOR_FN(overhead_sys, period_sys) +HPP__COLOR_FN(overhead_us, period_us) +HPP__COLOR_FN(overhead_guest_sys, period_guest_sys) +HPP__COLOR_FN(overhead_guest_us, period_guest_us) + +#undef HPP__COLOR_FN + +void perf_gtk__init_hpp(void) +{ + perf_hpp__init(false, false); + + perf_hpp__format[PERF_HPP__OVERHEAD].color = + perf_gtk__hpp_color_overhead; + perf_hpp__format[PERF_HPP__OVERHEAD_SYS].color = + perf_gtk__hpp_color_overhead_sys; + perf_hpp__format[PERF_HPP__OVERHEAD_US].color = + perf_gtk__hpp_color_overhead_us; + perf_hpp__format[PERF_HPP__OVERHEAD_GUEST_SYS].color = + perf_gtk__hpp_color_overhead_guest_sys; + perf_hpp__format[PERF_HPP__OVERHEAD_GUEST_US].color = + perf_gtk__hpp_color_overhead_guest_us; +} + static void perf_gtk__show_hists(GtkWidget *window, struct hists *hists) { GType col_types[MAX_COLUMNS]; @@ -43,15 +94,25 @@ static void perf_gtk__show_hists(GtkWidget *window, struct hists *hists) struct sort_entry *se; GtkListStore *store; struct rb_node *nd; - u64 total_period; GtkWidget *view; - int col_idx; + int i, col_idx; int nr_cols; + char s[512]; + + struct perf_hpp hpp = { + .buf = s, + .size = sizeof(s), + .total_period = hists->stats.total_period, + }; nr_cols = 0; - /* The percentage column */ - col_types[nr_cols++] = G_TYPE_STRING; + for (i = 0; i < PERF_HPP__MAX_INDEX; i++) { + if (!perf_hpp__format[i].cond) + continue; + + col_types[nr_cols++] = G_TYPE_STRING; + } list_for_each_entry(se, &hist_entry__sort_list, list) { if (se->elide) @@ -68,11 +129,17 @@ static void perf_gtk__show_hists(GtkWidget *window, struct hists *hists) col_idx = 0; - /* The percentage column */ - gtk_tree_view_insert_column_with_attributes(GTK_TREE_VIEW(view), - -1, "Overhead (%)", - renderer, "text", - col_idx++, NULL); + for (i = 0; i < PERF_HPP__MAX_INDEX; i++) { + if (!perf_hpp__format[i].cond) + continue; + + perf_hpp__format[i].header(&hpp); + + gtk_tree_view_insert_column_with_attributes(GTK_TREE_VIEW(view), + -1, s, + renderer, "markup", + col_idx++, NULL); + } list_for_each_entry(se, &hist_entry__sort_list, list) { if (se->elide) @@ -88,13 +155,9 @@ static void perf_gtk__show_hists(GtkWidget *window, struct hists *hists) g_object_unref(GTK_TREE_MODEL(store)); - total_period = hists->stats.total_period; - for (nd = rb_first(&hists->entries); nd; nd = rb_next(nd)) { struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); GtkTreeIter iter; - double percent; - char s[512]; if (h->filtered) continue; @@ -103,11 +166,17 @@ static void perf_gtk__show_hists(GtkWidget *window, struct hists *hists) col_idx = 0; - percent = (h->period * 100.0) / total_period; + for (i = 0; i < PERF_HPP__MAX_INDEX; i++) { + if (!perf_hpp__format[i].cond) + continue; - snprintf(s, ARRAY_SIZE(s), "%.2f", percent); + if (perf_hpp__format[i].color) + perf_hpp__format[i].color(&hpp, h); + else + perf_hpp__format[i].entry(&hpp, h); - gtk_list_store_set(store, &iter, col_idx++, s, -1); + gtk_list_store_set(store, &iter, col_idx++, s, -1); + } list_for_each_entry(se, &hist_entry__sort_list, list) { if (se->elide) diff --git a/tools/perf/ui/gtk/gtk.h b/tools/perf/ui/gtk/gtk.h index 793cb6116dd..687af0bba18 100644 --- a/tools/perf/ui/gtk/gtk.h +++ b/tools/perf/ui/gtk/gtk.h @@ -30,6 +30,7 @@ struct perf_gtk_context *perf_gtk__activate_context(GtkWidget *window); int perf_gtk__deactivate_context(struct perf_gtk_context **ctx); void perf_gtk__init_helpline(void); +void perf_gtk__init_hpp(void); #ifndef HAVE_GTK_INFO_BAR static inline GtkWidget *perf_gtk__setup_info_bar(void) diff --git a/tools/perf/ui/gtk/setup.c b/tools/perf/ui/gtk/setup.c index ec1ee26b485..26429437e19 100644 --- a/tools/perf/ui/gtk/setup.c +++ b/tools/perf/ui/gtk/setup.c @@ -8,6 +8,7 @@ int perf_gtk__init(void) { perf_error__register(&perf_gtk_eops); perf_gtk__init_helpline(); + perf_gtk__init_hpp(); return gtk_init_check(NULL, NULL) ? 0 : -1; } From bfd14b9a7231e7cf77520bca1848c40c6b6360ae Mon Sep 17 00:00:00 2001 From: David Ahern Date: Sat, 8 Sep 2012 09:06:50 -0600 Subject: [PATCH 174/282] perf annotate: Make a copy of filename for passing to basename The basename function may modify the string passed to it, so the string should not be marked const. Signed-off-by: David Ahern Acked-by: Pekka Enberg Cc: Irina Tirdea Cc: Pekka Enberg Link: http://lkml.kernel.org/r/1347116812-93646-2-git-send-email-dsahern@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/annotate.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 51ef69c9841..04eafd3939d 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -984,7 +984,8 @@ int symbol__annotate_printf(struct symbol *sym, struct map *map, int evidx, int context) { struct dso *dso = map->dso; - const char *filename = dso->long_name, *d_filename; + char *filename; + const char *d_filename; struct annotation *notes = symbol__annotation(sym); struct disasm_line *pos, *queue = NULL; u64 start = map__rip_2objdump(map, sym->start); @@ -992,6 +993,10 @@ int symbol__annotate_printf(struct symbol *sym, struct map *map, int evidx, int more = 0; u64 len; + filename = strdup(dso->long_name); + if (!filename) + return -ENOMEM; + if (full_paths) d_filename = filename; else @@ -1042,6 +1047,8 @@ int symbol__annotate_printf(struct symbol *sym, struct map *map, int evidx, } } + free(filename); + return more; } From 1fb89448871500e5d5e7ad93d6dae68ee4fa464a Mon Sep 17 00:00:00 2001 From: David Ahern Date: Sat, 8 Sep 2012 09:06:51 -0600 Subject: [PATCH 175/282] perf probe: Make a copy of exec path for passing to basename The basename function may modify the string passed to it, so the string should not be marked const. Signed-off-by: David Ahern Acked-by: Pekka Enberg Cc: Irina Tirdea Cc: Pekka Enberg Cc: Srikar Dronamraju Link: http://lkml.kernel.org/r/1347116812-93646-3-git-send-email-dsahern@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/probe-event.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index 0dda25d82d0..e8c72de0f70 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -2307,10 +2307,17 @@ static int convert_name_to_addr(struct perf_probe_event *pev, const char *exec) function = NULL; } if (!pev->group) { - char *ptr1, *ptr2; + char *ptr1, *ptr2, *exec_copy; pev->group = zalloc(sizeof(char *) * 64); - ptr1 = strdup(basename(exec)); + exec_copy = strdup(exec); + if (!exec_copy) { + ret = -ENOMEM; + pr_warning("Failed to copy exec string.\n"); + goto out; + } + + ptr1 = strdup(basename(exec_copy)); if (ptr1) { ptr2 = strpbrk(ptr1, "-._"); if (ptr2) @@ -2319,6 +2326,7 @@ static int convert_name_to_addr(struct perf_probe_event *pev, const char *exec) ptr1); free(ptr1); } + free(exec_copy); } free(pp->function); pp->function = zalloc(sizeof(char *) * MAX_PROBE_ARGS); From 6c7f631261064762a8ba1ee34fc2b76d117ef3fa Mon Sep 17 00:00:00 2001 From: David Ahern Date: Sat, 8 Sep 2012 09:06:52 -0600 Subject: [PATCH 176/282] perf symbols: Remove BIONIC wrapper around libgen.h Now that the 2 offenders are fixed, the BIONIC conditional around libgen.h can be removed. Signed-off-by: David Ahern Acked-by: Pekka Enberg Cc: Irina Tirdea Cc: Pekka Enberg Link: http://lkml.kernel.org/r/1347116812-93646-4-git-send-email-dsahern@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/symbol.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index d3b330cbc3e..41a15dac412 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -10,9 +10,7 @@ #include #include #include -#if defined(__BIONIC__) #include -#endif #ifndef NO_LIBELF_SUPPORT #include From 32c7f7383a096a4fc878fdda686c7725945e8a8f Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sat, 8 Sep 2012 22:53:06 -0300 Subject: [PATCH 177/282] perf test: Remove die() calls Just use pr_err() + return -1 and let the other tests run as well and then the perf's main() exit doing whatever it needs. Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-n5ahw26e94klmde9cz6rxsdf@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-test.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/tools/perf/builtin-test.c b/tools/perf/builtin-test.c index cf33e5081c3..6ae102eba5f 100644 --- a/tools/perf/builtin-test.c +++ b/tools/perf/builtin-test.c @@ -1023,14 +1023,16 @@ static int __test__rdpmc(void) fd = sys_perf_event_open(&attr, 0, -1, -1, 0); if (fd < 0) { - die("Error: sys_perf_event_open() syscall returned " - "with %d (%s)\n", fd, strerror(errno)); + pr_debug("Error: sys_perf_event_open() syscall returned " + "with %d (%s)\n", fd, strerror(errno)); + return -1; } addr = mmap(NULL, page_size, PROT_READ, MAP_SHARED, fd, 0); if (addr == (void *)(-1)) { - die("Error: mmap() syscall returned " - "with (%s)\n", strerror(errno)); + pr_debug("Error: mmap() syscall returned with (%s)\n", + strerror(errno)); + goto out_close; } for (n = 0; n < 6; n++) { @@ -1051,9 +1053,9 @@ static int __test__rdpmc(void) } munmap(addr, page_size); - close(fd); - pr_debug(" "); +out_close: + close(fd); if (!delta_sum) return -1; From a116e05dcf61c8d758e0f0aed40325534aee2c13 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sat, 8 Sep 2012 22:53:06 -0300 Subject: [PATCH 178/282] perf sched: Remove die() calls Just use pr_err() + return -1 and perf_session__process_events to abort when some event would call die(), then let the perf's main() exit doing whatever it needs. Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-88cwdogxqomsy9tfr8r0as58@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-sched.c | 281 +++++++++++++++++++++++-------------- 1 file changed, 179 insertions(+), 102 deletions(-) diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index a25a023965b..782f66d3610 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -423,8 +423,8 @@ static int self_open_counters(void) fd = sys_perf_event_open(&attr, 0, -1, -1, 0); if (fd < 0) - die("Error: sys_perf_event_open() syscall returned" - "with %d (%s)\n", fd, strerror(errno)); + pr_debug("Error: sys_perf_event_open() syscall returned" + "with %d (%s)\n", fd, strerror(errno)); return fd; } @@ -450,7 +450,8 @@ static void *thread_func(void *ctx) sprintf(comm2, ":%s", this_task->comm); prctl(PR_SET_NAME, comm2); fd = self_open_counters(); - + if (fd < 0) + return NULL; again: ret = sem_post(&this_task->ready_for_work); BUG_ON(ret); @@ -726,30 +727,30 @@ struct trace_migrate_task_event { }; struct trace_sched_handler { - void (*switch_event)(struct trace_switch_event *, - struct machine *, - struct event_format *, + int (*switch_event)(struct trace_switch_event *event, + struct machine *machine, + struct event_format *tp_format, + struct perf_sample *sample); + + int (*runtime_event)(struct trace_runtime_event *event, + struct machine *machine, struct perf_sample *sample); - void (*runtime_event)(struct trace_runtime_event *, - struct machine *, - struct perf_sample *sample); + int (*wakeup_event)(struct trace_wakeup_event *event, + struct machine *machine, + struct event_format *tp_format, + struct perf_sample *sample); - void (*wakeup_event)(struct trace_wakeup_event *, - struct machine *, - struct event_format *, - struct perf_sample *sample); + int (*fork_event)(struct trace_fork_event *event, + struct event_format *tp_format); - void (*fork_event)(struct trace_fork_event *, - struct event_format *event); - - void (*migrate_task_event)(struct trace_migrate_task_event *, - struct machine *machine, - struct perf_sample *sample); + int (*migrate_task_event)(struct trace_migrate_task_event *event, + struct machine *machine, + struct perf_sample *sample); }; -static void +static int replay_wakeup_event(struct trace_wakeup_event *wakeup_event, struct machine *machine __used, struct event_format *event, struct perf_sample *sample) @@ -769,11 +770,12 @@ replay_wakeup_event(struct trace_wakeup_event *wakeup_event, wakee = register_pid(wakeup_event->pid, wakeup_event->comm); add_sched_event_wakeup(waker, sample->time, wakee); + return 0; } static u64 cpu_last_switched[MAX_CPUS]; -static void +static int replay_switch_event(struct trace_switch_event *switch_event, struct machine *machine __used, struct event_format *event, @@ -788,7 +790,7 @@ replay_switch_event(struct trace_switch_event *switch_event, printf("sched_switch event %p\n", event); if (cpu >= MAX_CPUS || cpu < 0) - return; + return 0; timestamp0 = cpu_last_switched[cpu]; if (timestamp0) @@ -796,8 +798,10 @@ replay_switch_event(struct trace_switch_event *switch_event, else delta = 0; - if (delta < 0) - die("hm, delta: %" PRIu64 " < 0 ?\n", delta); + if (delta < 0) { + pr_debug("hm, delta: %" PRIu64 " < 0 ?\n", delta); + return -1; + } if (verbose) { printf(" ... switch from %s/%d to %s/%d [ran %" PRIu64 " nsecs]\n", @@ -813,10 +817,12 @@ replay_switch_event(struct trace_switch_event *switch_event, add_sched_event_run(prev, timestamp, delta); add_sched_event_sleep(prev, timestamp, switch_event->prev_state); + + return 0; } -static void +static int replay_fork_event(struct trace_fork_event *fork_event, struct event_format *event) { @@ -827,6 +833,7 @@ replay_fork_event(struct trace_fork_event *fork_event, } register_pid(fork_event->parent_pid, fork_event->parent_comm); register_pid(fork_event->child_pid, fork_event->child_comm); + return 0; } static struct trace_sched_handler replay_ops = { @@ -911,22 +918,26 @@ __thread_latency_insert(struct rb_root *root, struct work_atoms *data, rb_insert_color(&data->node, root); } -static void thread_atoms_insert(struct thread *thread) +static int thread_atoms_insert(struct thread *thread) { struct work_atoms *atoms = zalloc(sizeof(*atoms)); - if (!atoms) - die("No memory"); + if (!atoms) { + pr_err("No memory at %s\n", __func__); + return -1; + } atoms->thread = thread; INIT_LIST_HEAD(&atoms->work_list); __thread_latency_insert(&atom_root, atoms, &cmp_pid); + return 0; } -static void +static int latency_fork_event(struct trace_fork_event *fork_event __used, struct event_format *event __used) { /* should insert the newcomer */ + return 0; } __used @@ -937,14 +948,16 @@ static char sched_out_state(struct trace_switch_event *switch_event) return str[switch_event->prev_state]; } -static void +static int add_sched_out_event(struct work_atoms *atoms, char run_state, u64 timestamp) { struct work_atom *atom = zalloc(sizeof(*atom)); - if (!atom) - die("Non memory"); + if (!atom) { + pr_err("Non memory at %s", __func__); + return -1; + } atom->sched_out_time = timestamp; @@ -954,6 +967,7 @@ add_sched_out_event(struct work_atoms *atoms, } list_add_tail(&atom->list, &atoms->work_list); + return 0; } static void @@ -1000,7 +1014,7 @@ add_sched_in_event(struct work_atoms *atoms, u64 timestamp) atoms->nb_atoms++; } -static void +static int latency_switch_event(struct trace_switch_event *switch_event, struct machine *machine, struct event_format *event __used, @@ -1021,38 +1035,49 @@ latency_switch_event(struct trace_switch_event *switch_event, else delta = 0; - if (delta < 0) - die("hm, delta: %" PRIu64 " < 0 ?\n", delta); - + if (delta < 0) { + pr_err("hm, delta: %" PRIu64 " < 0 ?\n", delta); + return -1; + } sched_out = machine__findnew_thread(machine, switch_event->prev_pid); sched_in = machine__findnew_thread(machine, switch_event->next_pid); out_events = thread_atoms_search(&atom_root, sched_out, &cmp_pid); if (!out_events) { - thread_atoms_insert(sched_out); + if (thread_atoms_insert(sched_out)) + return -1; out_events = thread_atoms_search(&atom_root, sched_out, &cmp_pid); - if (!out_events) - die("out-event: Internal tree error"); + if (!out_events) { + pr_err("out-event: Internal tree error"); + return -1; + } } - add_sched_out_event(out_events, sched_out_state(switch_event), timestamp); + if (add_sched_out_event(out_events, sched_out_state(switch_event), timestamp)) + return -1; in_events = thread_atoms_search(&atom_root, sched_in, &cmp_pid); if (!in_events) { - thread_atoms_insert(sched_in); + if (thread_atoms_insert(sched_in)) + return -1; in_events = thread_atoms_search(&atom_root, sched_in, &cmp_pid); - if (!in_events) - die("in-event: Internal tree error"); + if (!in_events) { + pr_err("in-event: Internal tree error"); + return -1; + } /* * Take came in we have not heard about yet, * add in an initial atom in runnable state: */ - add_sched_out_event(in_events, 'R', timestamp); + if (add_sched_out_event(in_events, 'R', timestamp)) + return -1; } add_sched_in_event(in_events, timestamp); + + return 0; } -static void +static int latency_runtime_event(struct trace_runtime_event *runtime_event, struct machine *machine, struct perf_sample *sample) { @@ -1063,17 +1088,22 @@ latency_runtime_event(struct trace_runtime_event *runtime_event, BUG_ON(cpu >= MAX_CPUS || cpu < 0); if (!atoms) { - thread_atoms_insert(thread); + if (thread_atoms_insert(thread)) + return -1; atoms = thread_atoms_search(&atom_root, thread, &cmp_pid); - if (!atoms) - die("in-event: Internal tree error"); - add_sched_out_event(atoms, 'R', timestamp); + if (!atoms) { + pr_debug("in-event: Internal tree error"); + return -1; + } + if (add_sched_out_event(atoms, 'R', timestamp)) + return -1; } add_runtime_event(atoms, runtime_event->runtime, timestamp); + return 0; } -static void +static int latency_wakeup_event(struct trace_wakeup_event *wakeup_event, struct machine *machine, struct event_format *event __used, struct perf_sample *sample) @@ -1085,16 +1115,20 @@ latency_wakeup_event(struct trace_wakeup_event *wakeup_event, /* Note for later, it may be interesting to observe the failing cases */ if (!wakeup_event->success) - return; + return 0; wakee = machine__findnew_thread(machine, wakeup_event->pid); atoms = thread_atoms_search(&atom_root, wakee, &cmp_pid); if (!atoms) { - thread_atoms_insert(wakee); + if (thread_atoms_insert(wakee)) + return -1; atoms = thread_atoms_search(&atom_root, wakee, &cmp_pid); - if (!atoms) - die("wakeup-event: Internal tree error"); - add_sched_out_event(atoms, 'S', timestamp); + if (!atoms) { + pr_debug("wakeup-event: Internal tree error"); + return -1; + } + if (add_sched_out_event(atoms, 'S', timestamp)) + return -1; } BUG_ON(list_empty(&atoms->work_list)); @@ -1112,14 +1146,15 @@ latency_wakeup_event(struct trace_wakeup_event *wakeup_event, nr_timestamps++; if (atom->sched_out_time > timestamp) { nr_unordered_timestamps++; - return; + return 0; } atom->state = THREAD_WAIT_CPU; atom->wake_up_time = timestamp; + return 0; } -static void +static int latency_migrate_task_event(struct trace_migrate_task_event *migrate_task_event, struct machine *machine, struct perf_sample *sample) { @@ -1132,17 +1167,21 @@ latency_migrate_task_event(struct trace_migrate_task_event *migrate_task_event, * Only need to worry about migration when profiling one CPU. */ if (profile_cpu == -1) - return; + return 0; migrant = machine__findnew_thread(machine, migrate_task_event->pid); atoms = thread_atoms_search(&atom_root, migrant, &cmp_pid); if (!atoms) { - thread_atoms_insert(migrant); + if (thread_atoms_insert(migrant)) + return -1; register_pid(migrant->pid, migrant->comm); atoms = thread_atoms_search(&atom_root, migrant, &cmp_pid); - if (!atoms) - die("migration-event: Internal tree error"); - add_sched_out_event(atoms, 'R', timestamp); + if (!atoms) { + pr_debug("migration-event: Internal tree error"); + return -1; + } + if (add_sched_out_event(atoms, 'R', timestamp)) + return -1; } BUG_ON(list_empty(&atoms->work_list)); @@ -1154,6 +1193,8 @@ latency_migrate_task_event(struct trace_migrate_task_event *migrate_task_event, if (atom->sched_out_time > timestamp) nr_unordered_timestamps++; + + return 0; } static struct trace_sched_handler lat_ops = { @@ -1328,7 +1369,7 @@ static void sort_lat(void) static struct trace_sched_handler *trace_handler; -static void +static int process_sched_wakeup_event(struct perf_tool *tool __used, struct event_format *event, struct perf_sample *sample, @@ -1337,6 +1378,7 @@ process_sched_wakeup_event(struct perf_tool *tool __used, { void *data = sample->raw_data; struct trace_wakeup_event wakeup_event; + int err = 0; FILL_COMMON_FIELDS(wakeup_event, event, data); @@ -1347,7 +1389,9 @@ process_sched_wakeup_event(struct perf_tool *tool __used, FILL_FIELD(wakeup_event, cpu, event, data); if (trace_handler->wakeup_event) - trace_handler->wakeup_event(&wakeup_event, machine, event, sample); + err = trace_handler->wakeup_event(&wakeup_event, machine, event, sample); + + return err; } /* @@ -1363,7 +1407,7 @@ static struct thread *curr_thread[MAX_CPUS]; static char next_shortname1 = 'A'; static char next_shortname2 = '0'; -static void +static int map_switch_event(struct trace_switch_event *switch_event, struct machine *machine, struct event_format *event __used, @@ -1387,9 +1431,10 @@ map_switch_event(struct trace_switch_event *switch_event, else delta = 0; - if (delta < 0) - die("hm, delta: %" PRIu64 " < 0 ?\n", delta); - + if (delta < 0) { + pr_debug("hm, delta: %" PRIu64 " < 0 ?\n", delta); + return -1; + } sched_out = machine__findnew_thread(machine, switch_event->prev_pid); sched_in = machine__findnew_thread(machine, switch_event->next_pid); @@ -1438,16 +1483,18 @@ map_switch_event(struct trace_switch_event *switch_event, } else { printf("\n"); } + + return 0; } -static void +static int process_sched_switch_event(struct perf_tool *tool __used, struct event_format *event, struct perf_sample *sample, struct machine *machine, struct thread *thread __used) { - int this_cpu = sample->cpu; + int this_cpu = sample->cpu, err = 0; void *data = sample->raw_data; struct trace_switch_event switch_event; @@ -1470,12 +1517,13 @@ process_sched_switch_event(struct perf_tool *tool __used, nr_context_switch_bugs++; } if (trace_handler->switch_event) - trace_handler->switch_event(&switch_event, machine, event, sample); + err = trace_handler->switch_event(&switch_event, machine, event, sample); curr_pid[this_cpu] = switch_event.next_pid; + return err; } -static void +static int process_sched_runtime_event(struct perf_tool *tool __used, struct event_format *event, struct perf_sample *sample, @@ -1484,6 +1532,7 @@ process_sched_runtime_event(struct perf_tool *tool __used, { void *data = sample->raw_data; struct trace_runtime_event runtime_event; + int err = 0; FILL_ARRAY(runtime_event, comm, event, data); FILL_FIELD(runtime_event, pid, event, data); @@ -1491,10 +1540,12 @@ process_sched_runtime_event(struct perf_tool *tool __used, FILL_FIELD(runtime_event, vruntime, event, data); if (trace_handler->runtime_event) - trace_handler->runtime_event(&runtime_event, machine, sample); + err = trace_handler->runtime_event(&runtime_event, machine, sample); + + return err; } -static void +static int process_sched_fork_event(struct perf_tool *tool __used, struct event_format *event, struct perf_sample *sample, @@ -1503,6 +1554,7 @@ process_sched_fork_event(struct perf_tool *tool __used, { void *data = sample->raw_data; struct trace_fork_event fork_event; + int err = 0; FILL_COMMON_FIELDS(fork_event, event, data); @@ -1512,10 +1564,12 @@ process_sched_fork_event(struct perf_tool *tool __used, FILL_FIELD(fork_event, child_pid, event, data); if (trace_handler->fork_event) - trace_handler->fork_event(&fork_event, event); + err = trace_handler->fork_event(&fork_event, event); + + return err; } -static void +static int process_sched_exit_event(struct perf_tool *tool __used, struct event_format *event, struct perf_sample *sample __used, @@ -1524,9 +1578,11 @@ process_sched_exit_event(struct perf_tool *tool __used, { if (verbose) printf("sched_exit event %p\n", event); + + return 0; } -static void +static int process_sched_migrate_task_event(struct perf_tool *tool __used, struct event_format *event, struct perf_sample *sample, @@ -1535,6 +1591,7 @@ process_sched_migrate_task_event(struct perf_tool *tool __used, { void *data = sample->raw_data; struct trace_migrate_task_event migrate_task_event; + int err = 0; FILL_COMMON_FIELDS(migrate_task_event, event, data); @@ -1544,13 +1601,16 @@ process_sched_migrate_task_event(struct perf_tool *tool __used, FILL_FIELD(migrate_task_event, cpu, event, data); if (trace_handler->migrate_task_event) - trace_handler->migrate_task_event(&migrate_task_event, machine, sample); + err = trace_handler->migrate_task_event(&migrate_task_event, machine, sample); + + return err; } -typedef void (*tracepoint_handler)(struct perf_tool *tool, struct event_format *event, - struct perf_sample *sample, - struct machine *machine, - struct thread *thread); +typedef int (*tracepoint_handler)(struct perf_tool *tool, + struct event_format *tp_format, + struct perf_sample *sample, + struct machine *machine, + struct thread *thread); static int perf_sched__process_tracepoint_sample(struct perf_tool *tool __used, union perf_event *event __used, @@ -1559,6 +1619,7 @@ static int perf_sched__process_tracepoint_sample(struct perf_tool *tool __used, struct machine *machine) { struct thread *thread = machine__findnew_thread(machine, sample->pid); + int err = 0; if (thread == NULL) { pr_debug("problem processing %s event, skipping it.\n", @@ -1571,10 +1632,10 @@ static int perf_sched__process_tracepoint_sample(struct perf_tool *tool __used, if (evsel->handler.func != NULL) { tracepoint_handler f = evsel->handler.func; - f(tool, evsel->tp_format, sample, machine, thread); + err = f(tool, evsel->tp_format, sample, machine, thread); } - return 0; + return err; } static struct perf_tool perf_sched = { @@ -1585,9 +1646,8 @@ static struct perf_tool perf_sched = { .ordered_samples = true, }; -static void read_events(bool destroy, struct perf_session **psession) +static int read_events(bool destroy, struct perf_session **psession) { - int err = -EINVAL; const struct perf_evsel_str_handler handlers[] = { { "sched:sched_switch", process_sched_switch_event, }, { "sched:sched_stat_runtime", process_sched_runtime_event, }, @@ -1600,16 +1660,20 @@ static void read_events(bool destroy, struct perf_session **psession) struct perf_session *session; session = perf_session__new(input_name, O_RDONLY, 0, false, &perf_sched); - if (session == NULL) - die("No Memory"); + if (session == NULL) { + pr_debug("No Memory for session\n"); + return -1; + } - err = perf_session__set_tracepoints_handlers(session, handlers); - assert(err == 0); + if (perf_session__set_tracepoints_handlers(session, handlers)) + goto out_delete; if (perf_session__has_traces(session, "record -R")) { - err = perf_session__process_events(session, &perf_sched); - if (err) - die("Failed to process events, error %d", err); + int err = perf_session__process_events(session, &perf_sched); + if (err) { + pr_err("Failed to process events, error %d", err); + goto out_delete; + } nr_events = session->hists.stats.nr_events[0]; nr_lost_events = session->hists.stats.total_lost; @@ -1621,6 +1685,12 @@ static void read_events(bool destroy, struct perf_session **psession) if (psession) *psession = session; + + return 0; + +out_delete: + perf_session__delete(session); + return -1; } static void print_bad_events(void) @@ -1653,13 +1723,14 @@ static void print_bad_events(void) } } -static void __cmd_lat(void) +static int __cmd_lat(void) { struct rb_node *next; struct perf_session *session; setup_pager(); - read_events(false, &session); + if (read_events(false, &session)) + return -1; sort_lat(); printf("\n ---------------------------------------------------------------------------------------------------------------\n"); @@ -1686,6 +1757,7 @@ static void __cmd_lat(void) printf("\n"); perf_session__delete(session); + return 0; } static struct trace_sched_handler map_ops = { @@ -1695,16 +1767,18 @@ static struct trace_sched_handler map_ops = { .fork_event = NULL, }; -static void __cmd_map(void) +static int __cmd_map(void) { max_cpu = sysconf(_SC_NPROCESSORS_CONF); setup_pager(); - read_events(true, NULL); + if (read_events(true, NULL)) + return -1; print_bad_events(); + return 0; } -static void __cmd_replay(void) +static int __cmd_replay(void) { unsigned long i; @@ -1713,7 +1787,8 @@ static void __cmd_replay(void) test_calibrations(); - read_events(true, NULL); + if (read_events(true, NULL)) + return -1; printf("nr_run_events: %ld\n", nr_run_events); printf("nr_sleep_events: %ld\n", nr_sleep_events); @@ -1734,6 +1809,8 @@ static void __cmd_replay(void) printf("------------------------------------------------------------\n"); for (i = 0; i < replay_repeat; i++) run_one_test(); + + return 0; } @@ -1865,11 +1942,11 @@ int cmd_sched(int argc, const char **argv, const char *prefix __used) usage_with_options(latency_usage, latency_options); } setup_sorting(); - __cmd_lat(); + return __cmd_lat(); } else if (!strcmp(argv[0], "map")) { trace_handler = &map_ops; setup_sorting(); - __cmd_map(); + return __cmd_map(); } else if (!strncmp(argv[0], "rep", 3)) { trace_handler = &replay_ops; if (argc) { @@ -1877,7 +1954,7 @@ int cmd_sched(int argc, const char **argv, const char *prefix __used) if (argc) usage_with_options(replay_usage, replay_options); } - __cmd_replay(); + return __cmd_replay(); } else { usage_with_options(sched_usage, sched_options); } From 2814eb05720baa54ffe0950714fd55a5bcc8a565 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sat, 8 Sep 2012 22:53:06 -0300 Subject: [PATCH 179/282] perf kmem: Remove die() calls Just use pr_err() + return -1 and perf_session__process_events to abort when some event would call die(), then let the perf's main() exit doing whatever it needs. Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-i7rhuqfwshjiwc9gr9m1vov4@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-kmem.c | 108 +++++++++++++++++++++++--------------- 1 file changed, 67 insertions(+), 41 deletions(-) diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c index fc6607b383f..ad9f5209738 100644 --- a/tools/perf/builtin-kmem.c +++ b/tools/perf/builtin-kmem.c @@ -58,41 +58,52 @@ static unsigned long nr_allocs, nr_cross_allocs; #define PATH_SYS_NODE "/sys/devices/system/node" -static void init_cpunode_map(void) +static int init_cpunode_map(void) { FILE *fp; - int i; + int i, err = -1; fp = fopen("/sys/devices/system/cpu/kernel_max", "r"); if (!fp) { max_cpu_num = 4096; - return; + return 0; + } + + if (fscanf(fp, "%d", &max_cpu_num) < 1) { + pr_err("Failed to read 'kernel_max' from sysfs"); + goto out_close; } - if (fscanf(fp, "%d", &max_cpu_num) < 1) - die("Failed to read 'kernel_max' from sysfs"); max_cpu_num++; cpunode_map = calloc(max_cpu_num, sizeof(int)); - if (!cpunode_map) - die("calloc"); + if (!cpunode_map) { + pr_err("%s: calloc failed\n", __func__); + goto out_close; + } + for (i = 0; i < max_cpu_num; i++) cpunode_map[i] = -1; + + err = 0; +out_close: fclose(fp); + return err; } -static void setup_cpunode_map(void) +static int setup_cpunode_map(void) { struct dirent *dent1, *dent2; DIR *dir1, *dir2; unsigned int cpu, mem; char buf[PATH_MAX]; - init_cpunode_map(); + if (init_cpunode_map()) + return -1; dir1 = opendir(PATH_SYS_NODE); if (!dir1) - return; + return -1; while ((dent1 = readdir(dir1)) != NULL) { if (dent1->d_type != DT_DIR || @@ -112,10 +123,11 @@ static void setup_cpunode_map(void) closedir(dir2); } closedir(dir1); + return 0; } -static void insert_alloc_stat(unsigned long call_site, unsigned long ptr, - int bytes_req, int bytes_alloc, int cpu) +static int insert_alloc_stat(unsigned long call_site, unsigned long ptr, + int bytes_req, int bytes_alloc, int cpu) { struct rb_node **node = &root_alloc_stat.rb_node; struct rb_node *parent = NULL; @@ -139,8 +151,10 @@ static void insert_alloc_stat(unsigned long call_site, unsigned long ptr, data->bytes_alloc += bytes_alloc; } else { data = malloc(sizeof(*data)); - if (!data) - die("malloc"); + if (!data) { + pr_err("%s: malloc failed\n", __func__); + return -1; + } data->ptr = ptr; data->pingpong = 0; data->hit = 1; @@ -152,9 +166,10 @@ static void insert_alloc_stat(unsigned long call_site, unsigned long ptr, } data->call_site = call_site; data->alloc_cpu = cpu; + return 0; } -static void insert_caller_stat(unsigned long call_site, +static int insert_caller_stat(unsigned long call_site, int bytes_req, int bytes_alloc) { struct rb_node **node = &root_caller_stat.rb_node; @@ -179,8 +194,10 @@ static void insert_caller_stat(unsigned long call_site, data->bytes_alloc += bytes_alloc; } else { data = malloc(sizeof(*data)); - if (!data) - die("malloc"); + if (!data) { + pr_err("%s: malloc failed\n", __func__); + return -1; + } data->call_site = call_site; data->pingpong = 0; data->hit = 1; @@ -190,11 +207,12 @@ static void insert_caller_stat(unsigned long call_site, rb_link_node(&data->node, parent, node); rb_insert_color(&data->node, &root_caller_stat); } + + return 0; } -static void perf_evsel__process_alloc_event(struct perf_evsel *evsel, - struct perf_sample *sample, - int node) +static int perf_evsel__process_alloc_event(struct perf_evsel *evsel, + struct perf_sample *sample, int node) { struct event_format *event = evsel->tp_format; void *data = sample->raw_data; @@ -209,8 +227,9 @@ static void perf_evsel__process_alloc_event(struct perf_evsel *evsel, bytes_req = raw_field_value(event, "bytes_req", data); bytes_alloc = raw_field_value(event, "bytes_alloc", data); - insert_alloc_stat(call_site, ptr, bytes_req, bytes_alloc, cpu); - insert_caller_stat(call_site, bytes_req, bytes_alloc); + if (insert_alloc_stat(call_site, ptr, bytes_req, bytes_alloc, cpu) || + insert_caller_stat(call_site, bytes_req, bytes_alloc)) + return -1; total_requested += bytes_req; total_allocated += bytes_alloc; @@ -222,6 +241,7 @@ static void perf_evsel__process_alloc_event(struct perf_evsel *evsel, nr_cross_allocs++; } nr_allocs++; + return 0; } static int ptr_cmp(struct alloc_stat *, struct alloc_stat *); @@ -252,8 +272,8 @@ static struct alloc_stat *search_alloc_stat(unsigned long ptr, return NULL; } -static void perf_evsel__process_free_event(struct perf_evsel *evsel, - struct perf_sample *sample) +static int perf_evsel__process_free_event(struct perf_evsel *evsel, + struct perf_sample *sample) { unsigned long ptr = raw_field_value(evsel->tp_format, "ptr", sample->raw_data); @@ -261,41 +281,43 @@ static void perf_evsel__process_free_event(struct perf_evsel *evsel, s_alloc = search_alloc_stat(ptr, 0, &root_alloc_stat, ptr_cmp); if (!s_alloc) - return; + return 0; if ((short)sample->cpu != s_alloc->alloc_cpu) { s_alloc->pingpong++; s_caller = search_alloc_stat(0, s_alloc->call_site, &root_caller_stat, callsite_cmp); - assert(s_caller); + if (!s_caller) + return -1; s_caller->pingpong++; } s_alloc->alloc_cpu = -1; + + return 0; } -static void perf_evsel__process_kmem_event(struct perf_evsel *evsel, - struct perf_sample *sample) +static int perf_evsel__process_kmem_event(struct perf_evsel *evsel, + struct perf_sample *sample) { struct event_format *event = evsel->tp_format; if (!strcmp(event->name, "kmalloc") || !strcmp(event->name, "kmem_cache_alloc")) { - perf_evsel__process_alloc_event(evsel, sample, 0); - return; + return perf_evsel__process_alloc_event(evsel, sample, 0); } if (!strcmp(event->name, "kmalloc_node") || !strcmp(event->name, "kmem_cache_alloc_node")) { - perf_evsel__process_alloc_event(evsel, sample, 1); - return; + return perf_evsel__process_alloc_event(evsel, sample, 1); } if (!strcmp(event->name, "kfree") || !strcmp(event->name, "kmem_cache_free")) { - perf_evsel__process_free_event(evsel, sample); - return; + return perf_evsel__process_free_event(evsel, sample); } + + return 0; } static int process_sample_event(struct perf_tool *tool __used, @@ -314,8 +336,7 @@ static int process_sample_event(struct perf_tool *tool __used, dump_printf(" ... thread: %s:%d\n", thread->comm, thread->pid); - perf_evsel__process_kmem_event(evsel, sample); - return 0; + return perf_evsel__process_kmem_event(evsel, sample); } static struct perf_tool perf_kmem = { @@ -613,8 +634,10 @@ static int sort_dimension__add(const char *tok, struct list_head *list) for (i = 0; i < NUM_AVAIL_SORTS; i++) { if (!strcmp(avail_sorts[i]->name, tok)) { sort = malloc(sizeof(*sort)); - if (!sort) - die("malloc"); + if (!sort) { + pr_err("%s: malloc failed\n", __func__); + return -1; + } memcpy(sort, avail_sorts[i], sizeof(*sort)); list_add_tail(&sort->list, list); return 0; @@ -629,8 +652,10 @@ static int setup_sorting(struct list_head *sort_list, const char *arg) char *tok; char *str = strdup(arg); - if (!str) - die("strdup"); + if (!str) { + pr_err("%s: strdup failed\n", __func__); + return -1; + } while (true) { tok = strsep(&str, ","); @@ -758,7 +783,8 @@ int cmd_kmem(int argc, const char **argv, const char *prefix __used) if (!strncmp(argv[0], "rec", 3)) { return __cmd_record(argc, argv); } else if (!strcmp(argv[0], "stat")) { - setup_cpunode_map(); + if (setup_cpunode_map()) + return -1; if (list_empty(&caller_sort)) setup_sorting(&caller_sort, default_sort_order); From f8fcd776212bcaa1e2359e39a43c1dbd752b8773 Mon Sep 17 00:00:00 2001 From: Irina Tirdea Date: Tue, 11 Sep 2012 01:14:58 +0300 Subject: [PATCH 180/282] perf tools: include wrapper for magic.h perf is currently including magic.h directly from the kernel. If the glibc magic.h is also included, this leads to warnings that the constants are redefined. This happens on some systems (e.g. Android). Redefinition errors on Android: In file included from util/util.h:79:0, from util/cache.h:5, from util/abspath.c:1: util/../../../include/linux/magic.h:5:0: error: "AFFS_SUPER_MAGIC" redefined [-Werror] bionic/libc/include/sys/vfs.h:53:0: note: this is the location of the previous definition util/../../../include/linux/magic.h:19:0: error: "EFS_SUPER_MAGIC" redefined [-Werror] bionic/libc/include/sys/vfs.h:61:0: note: this is the location of the previous definition util/../../../include/linux/magic.h:26:0: error: "HPFS_SUPER_MAGIC" redefined [-Werror] bionic/libc/include/sys/vfs.h:67:0: note: this is the location of the previous definition Only two constants from magic.h are used by perf (DEBUGFS_MAGIC and SYSFS_MAGIC). This fix provides a wrapper for magic.h that includes only these constants instead of including the kernel header file directly. Signed-off-by: Irina Tirdea Acked-by: Pekka Enberg Cc: David Ahern Cc: Ingo Molnar Cc: Irina Tirdea Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Steven Rostedt Link: http://lkml.kernel.org/r/1347315303-29906-2-git-send-email-irina.tirdea@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile | 1 + tools/perf/util/include/linux/magic.h | 12 ++++++++++++ tools/perf/util/util.h | 2 +- 3 files changed, 14 insertions(+), 1 deletion(-) create mode 100644 tools/perf/util/include/linux/magic.h diff --git a/tools/perf/Makefile b/tools/perf/Makefile index e4b2e8f2606..1d2723c01b8 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -264,6 +264,7 @@ LIB_H += util/include/linux/ctype.h LIB_H += util/include/linux/kernel.h LIB_H += util/include/linux/list.h LIB_H += util/include/linux/export.h +LIB_H += util/include/linux/magic.h LIB_H += util/include/linux/poison.h LIB_H += util/include/linux/prefetch.h LIB_H += util/include/linux/rbtree.h diff --git a/tools/perf/util/include/linux/magic.h b/tools/perf/util/include/linux/magic.h new file mode 100644 index 00000000000..58b64ed4da1 --- /dev/null +++ b/tools/perf/util/include/linux/magic.h @@ -0,0 +1,12 @@ +#ifndef _PERF_LINUX_MAGIC_H_ +#define _PERF_LINUX_MAGIC_H_ + +#ifndef DEBUGFS_MAGIC +#define DEBUGFS_MAGIC 0x64626720 +#endif + +#ifndef SYSFS_MAGIC +#define SYSFS_MAGIC 0x62656572 +#endif + +#endif diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index 67a371355c7..70fa70b535b 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -70,7 +70,7 @@ #include #include #include -#include "../../../include/linux/magic.h" +#include #include "types.h" #include From fe392c535644b8ee370a219ca63557611e0b0365 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 11 Sep 2012 11:42:41 -0300 Subject: [PATCH 181/282] perf tools: Add missing perf_regs.h file to MANIFEST The 2bcd355 broke the perf-tar*-src-pkg generated tarballs builds, fix it. Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-2ndz2o636rn4q175fwn18x32@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/MANIFEST | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST index b4b572e8c10..05180799093 100644 --- a/tools/perf/MANIFEST +++ b/tools/perf/MANIFEST @@ -10,6 +10,7 @@ include/linux/stringify.h lib/rbtree.c include/linux/swab.h arch/*/include/asm/unistd*.h +arch/*/include/asm/perf_regs.h arch/*/lib/memcpy*.S arch/*/lib/memset*.S include/linux/poison.h From 86d5a70c1eeb3d35bcadc94753fd9651df8835a8 Mon Sep 17 00:00:00 2001 From: Irina Tirdea Date: Tue, 11 Sep 2012 01:14:59 +0300 Subject: [PATCH 182/282] perf tools: Update types definitions for Android Some type definitions are missing from Android or are already defined in bionic and lead to redefinition errors. Android defines in types.h __le32. Since perf is wrapping with a local version, we need to define this constant in the local version too. Error in Android: In file included from bionic/libc/include/unistd.h:36:0, from external/perf/tools/perf/util/util.h:46, from external/perf/tools/perf/util/cache.h:5, from external/perf/tools/perf/util/abspath.c:1: bionic/libc/kernel/common/linux/capability.h:60:2: error: unknown type name '__le32' roundup() definition is missing: util/symbol.c: In function 'symbols__fixup_end': util/symbol.c:106: warning: implicit declaration of function 'roundup' util/symbol.c:106: warning: nested extern declaration of 'roundup' __force macro defined in perf is also defined in libc which leads to redefinition errors. In order to avoid these, we guard these definition with Signed-off-by: Irina Tirdea Acked-by: Pekka Enberg Cc: David Ahern Cc: Ingo Molnar Cc: Irina Tirdea Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Steven Rostedt Link: http://lkml.kernel.org/r/1347315303-29906-3-git-send-email-irina.tirdea@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/include/linux/compiler.h | 4 ++++ tools/perf/util/include/linux/kernel.h | 9 +++++++++ tools/perf/util/include/linux/types.h | 8 ++++++++ 3 files changed, 21 insertions(+) diff --git a/tools/perf/util/include/linux/compiler.h b/tools/perf/util/include/linux/compiler.h index 2dc867128e4..ce2367b7b3f 100644 --- a/tools/perf/util/include/linux/compiler.h +++ b/tools/perf/util/include/linux/compiler.h @@ -12,4 +12,8 @@ #define __used __attribute__((__unused__)) #define __packed __attribute__((__packed__)) +#ifndef __force +#define __force +#endif + #endif diff --git a/tools/perf/util/include/linux/kernel.h b/tools/perf/util/include/linux/kernel.h index 4af9a10cc2d..a978f269d8d 100644 --- a/tools/perf/util/include/linux/kernel.h +++ b/tools/perf/util/include/linux/kernel.h @@ -46,6 +46,15 @@ _min1 < _min2 ? _min1 : _min2; }) #endif +#ifndef roundup +#define roundup(x, y) ( \ +{ \ + const typeof(y) __y = y; \ + (((x) + (__y - 1)) / __y) * __y; \ +} \ +) +#endif + #ifndef BUG_ON #ifdef NDEBUG #define BUG_ON(cond) do { if (cond) {} } while (0) diff --git a/tools/perf/util/include/linux/types.h b/tools/perf/util/include/linux/types.h index 12de3b8112f..eb464786c08 100644 --- a/tools/perf/util/include/linux/types.h +++ b/tools/perf/util/include/linux/types.h @@ -3,6 +3,14 @@ #include +#ifndef __bitwise +#define __bitwise +#endif + +#ifndef __le32 +typedef __u32 __bitwise __le32; +#endif + #define DECLARE_BITMAP(name,bits) \ unsigned long name[BITS_TO_LONGS(bits)] From 3f34f6c0233ae055b592e8f8da23d873b82070bb Mon Sep 17 00:00:00 2001 From: Irina Tirdea Date: Tue, 11 Sep 2012 01:15:00 +0300 Subject: [PATCH 183/282] perf tools: include __WORDSIZE definition __WORDSIZE is GLibC-specific and is not defined on all systems or glibc versions (e.g. Android's bionic does not define it). In file included from util/include/linux/bitmap.h:5:0, from util/header.h:10, from util/session.h:6, from util/build-id.h:4, from util/annotate.c:11: util/include/linux/bitops.h: In function 'set_bit': util/include/linux/bitops.h:25:12: error: '__WORDSIZE' undeclared (first use in this function) util/include/linux/bitops.h:25:12: note: each undeclared identifier is reported only once for each function it appears in util/include/linux/bitops.h:23:51: error: parameter 'addr' set but not used [-Werror=unused-but-set-parameter] util/include/linux/bitops.h: In function 'clear_bit': util/include/linux/bitops.h:30:12: error: '__WORDSIZE' undeclared (first use in this function) util/include/linux/bitops.h:28:53: error: parameter 'addr' set but not used [-Werror=unused-but-set-parameter] In file included from util/header.h:10:0, from util/session.h:6, from util/build-id.h:4, from util/annotate.c:11: util/include/linux/bitmap.h: In function 'bitmap_zero': util/include/linux/bitmap.h:22:6: error: '__WORDSIZE' undeclared (first use in this function) Defining __WORDSIZE in perf's headers if it is not already defined. Suggested-by: Peter Zijlstra Suggested-by: Pekka Enberg Signed-off-by: Irina Tirdea Cc: David Ahern Cc: Ingo Molnar Cc: Irina Tirdea Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Steven Rostedt Link: http://lkml.kernel.org/r/1347315303-29906-4-git-send-email-irina.tirdea@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/include/linux/bitops.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/perf/util/include/linux/bitops.h b/tools/perf/util/include/linux/bitops.h index 587a230d207..a55d8cf083c 100644 --- a/tools/perf/util/include/linux/bitops.h +++ b/tools/perf/util/include/linux/bitops.h @@ -5,6 +5,10 @@ #include #include +#ifndef __WORDSIZE +#define __WORDSIZE (__SIZEOF_LONG__ * 8) +#endif + #define BITS_PER_LONG __WORDSIZE #define BITS_PER_BYTE 8 #define BITS_TO_LONGS(nr) DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(long)) From 9ac3e487f0eeef0fa058d72da7681398cc052ee9 Mon Sep 17 00:00:00 2001 From: Irina Tirdea Date: Tue, 11 Sep 2012 01:15:01 +0300 Subject: [PATCH 184/282] perf tools: fix ALIGN redefinition in system headers On some systems (e.g. Android), ALIGN is defined in system headers as ALIGN(p). The definition of ALIGN used in perf takes 2 parameters: ALIGN(x,a). This leads to redefinition conflicts. Redefinition error on Android: In file included from util/include/linux/list.h:1:0, from util/callchain.h:5, from util/hist.h:6, from util/session.h:4, from util/build-id.h:4, from util/annotate.c:11: util/include/linux/kernel.h:11:0: error: "ALIGN" redefined [-Werror] bionic/libc/include/sys/param.h:38:0: note: this is the location of the previous definition Conflics with system defined ALIGN in Android: util/event.c: In function 'perf_event__synthesize_comm': util/event.c:115:32: error: macro "ALIGN" passed 2 arguments, but takes just 1 util/event.c:115:9: error: 'ALIGN' undeclared (first use in this function) util/event.c:115:9: note: each undeclared identifier is reported only once for each function it appears in In order to avoid this redefinition, ALIGN is renamed to PERF_ALIGN. Signed-off-by: Irina Tirdea Acked-by: Pekka Enberg Cc: David Ahern Cc: Ingo Molnar Cc: Irina Tirdea Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Steven Rostedt Link: http://lkml.kernel.org/r/1347315303-29906-5-git-send-email-irina.tirdea@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/event.c | 10 +++++----- tools/perf/util/event.h | 2 +- tools/perf/util/header.c | 16 ++++++++-------- tools/perf/util/include/linux/kernel.h | 4 ++-- tools/perf/util/session.c | 4 ++-- tools/perf/util/symbol.c | 2 +- 6 files changed, 19 insertions(+), 19 deletions(-) diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index 84ff6f160cd..f7f480503af 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -112,7 +112,7 @@ static pid_t perf_event__synthesize_comm(struct perf_tool *tool, event->comm.header.type = PERF_RECORD_COMM; size = strlen(event->comm.comm) + 1; - size = ALIGN(size, sizeof(u64)); + size = PERF_ALIGN(size, sizeof(u64)); memset(event->comm.comm + size, 0, machine->id_hdr_size); event->comm.header.size = (sizeof(event->comm) - (sizeof(event->comm.comm) - size) + @@ -145,7 +145,7 @@ static pid_t perf_event__synthesize_comm(struct perf_tool *tool, sizeof(event->comm.comm)); size = strlen(event->comm.comm) + 1; - size = ALIGN(size, sizeof(u64)); + size = PERF_ALIGN(size, sizeof(u64)); memset(event->comm.comm + size, 0, machine->id_hdr_size); event->comm.header.size = (sizeof(event->comm) - (sizeof(event->comm.comm) - size) + @@ -228,7 +228,7 @@ static int perf_event__synthesize_mmap_events(struct perf_tool *tool, size = strlen(execname); execname[size - 1] = '\0'; /* Remove \n */ memcpy(event->mmap.filename, execname, size); - size = ALIGN(size, sizeof(u64)); + size = PERF_ALIGN(size, sizeof(u64)); event->mmap.len -= event->mmap.start; event->mmap.header.size = (sizeof(event->mmap) - (sizeof(event->mmap.filename) - size)); @@ -282,7 +282,7 @@ int perf_event__synthesize_modules(struct perf_tool *tool, if (pos->dso->kernel) continue; - size = ALIGN(pos->dso->long_name_len + 1, sizeof(u64)); + size = PERF_ALIGN(pos->dso->long_name_len + 1, sizeof(u64)); event->mmap.header.type = PERF_RECORD_MMAP; event->mmap.header.size = (sizeof(event->mmap) - (sizeof(event->mmap.filename) - size)); @@ -494,7 +494,7 @@ int perf_event__synthesize_kernel_mmap(struct perf_tool *tool, map = machine->vmlinux_maps[MAP__FUNCTION]; size = snprintf(event->mmap.filename, sizeof(event->mmap.filename), "%s%s", mmap_name, symbol_name) + 1; - size = ALIGN(size, sizeof(u64)); + size = PERF_ALIGN(size, sizeof(u64)); event->mmap.header.type = PERF_RECORD_MMAP; event->mmap.header.size = (sizeof(event->mmap) - (sizeof(event->mmap.filename) - size) + machine->id_hdr_size); diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index 0e088d046e5..21b99e741a8 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -101,7 +101,7 @@ struct perf_sample { struct build_id_event { struct perf_event_header header; pid_t pid; - u8 build_id[ALIGN(BUILD_ID_SIZE, sizeof(u64))]; + u8 build_id[PERF_ALIGN(BUILD_ID_SIZE, sizeof(u64))]; char filename[]; }; diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index d07bc134e56..974e7589a6b 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -129,7 +129,7 @@ static int do_write_string(int fd, const char *str) int ret; olen = strlen(str) + 1; - len = ALIGN(olen, NAME_ALIGN); + len = PERF_ALIGN(olen, NAME_ALIGN); /* write len, incl. \0 */ ret = do_write(fd, &len, sizeof(len)); @@ -220,7 +220,7 @@ static int __dsos__write_buildid_table(struct list_head *head, pid_t pid, if (!pos->hit) continue; len = pos->long_name_len + 1; - len = ALIGN(len, NAME_ALIGN); + len = PERF_ALIGN(len, NAME_ALIGN); memset(&b, 0, sizeof(b)); memcpy(&b.build_id, pos->build_id, sizeof(pos->build_id)); b.pid = pid; @@ -1532,7 +1532,7 @@ static int perf_header__read_build_ids_abi_quirk(struct perf_header *header, struct perf_session *session = container_of(header, struct perf_session, header); struct { struct perf_event_header header; - u8 build_id[ALIGN(BUILD_ID_SIZE, sizeof(u64))]; + u8 build_id[PERF_ALIGN(BUILD_ID_SIZE, sizeof(u64))]; char filename[0]; } old_bev; struct build_id_event bev; @@ -2439,7 +2439,7 @@ int perf_event__synthesize_attr(struct perf_tool *tool, int err; size = sizeof(struct perf_event_attr); - size = ALIGN(size, sizeof(u64)); + size = PERF_ALIGN(size, sizeof(u64)); size += sizeof(struct perf_event_header); size += ids * sizeof(u64); @@ -2537,7 +2537,7 @@ int perf_event__synthesize_event_type(struct perf_tool *tool, ev.event_type.header.type = PERF_RECORD_HEADER_EVENT_TYPE; size = strlen(ev.event_type.event_type.name); - size = ALIGN(size, sizeof(u64)); + size = PERF_ALIGN(size, sizeof(u64)); ev.event_type.header.size = sizeof(ev.event_type) - (sizeof(ev.event_type.event_type.name) - size); @@ -2606,7 +2606,7 @@ int perf_event__synthesize_tracing_data(struct perf_tool *tool, int fd, ev.tracing_data.header.type = PERF_RECORD_HEADER_TRACING_DATA; size = tdata->size; - aligned_size = ALIGN(size, sizeof(u64)); + aligned_size = PERF_ALIGN(size, sizeof(u64)); padding = aligned_size - size; ev.tracing_data.header.size = sizeof(ev.tracing_data); ev.tracing_data.size = aligned_size; @@ -2637,7 +2637,7 @@ int perf_event__process_tracing_data(union perf_event *event, size_read = trace_report(session->fd, &session->pevent, session->repipe); - padding = ALIGN(size_read, sizeof(u64)) - size_read; + padding = PERF_ALIGN(size_read, sizeof(u64)) - size_read; if (read(session->fd, buf, padding) < 0) die("reading input file"); @@ -2671,7 +2671,7 @@ int perf_event__synthesize_build_id(struct perf_tool *tool, memset(&ev, 0, sizeof(ev)); len = pos->long_name_len + 1; - len = ALIGN(len, NAME_ALIGN); + len = PERF_ALIGN(len, NAME_ALIGN); memcpy(&ev.build_id.build_id, pos->build_id, sizeof(pos->build_id)); ev.build_id.header.type = PERF_RECORD_HEADER_BUILD_ID; ev.build_id.header.misc = misc; diff --git a/tools/perf/util/include/linux/kernel.h b/tools/perf/util/include/linux/kernel.h index a978f269d8d..d8c927c868e 100644 --- a/tools/perf/util/include/linux/kernel.h +++ b/tools/perf/util/include/linux/kernel.h @@ -8,8 +8,8 @@ #define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d)) -#define ALIGN(x,a) __ALIGN_MASK(x,(typeof(x))(a)-1) -#define __ALIGN_MASK(x,mask) (((x)+(mask))&~(mask)) +#define PERF_ALIGN(x, a) __PERF_ALIGN_MASK(x, (typeof(x))(a)-1) +#define __PERF_ALIGN_MASK(x, mask) (((x)+(mask))&~(mask)) #ifndef offsetof #define offsetof(TYPE, MEMBER) ((size_t) &((TYPE *)0)->MEMBER) diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 945375897c2..3806ea49f1f 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -527,7 +527,7 @@ static void perf_event__comm_swap(union perf_event *event, bool sample_id_all) if (sample_id_all) { void *data = &event->comm.comm; - data += ALIGN(strlen(data) + 1, sizeof(u64)); + data += PERF_ALIGN(strlen(data) + 1, sizeof(u64)); swap_sample_id_all(event, data); } } @@ -544,7 +544,7 @@ static void perf_event__mmap_swap(union perf_event *event, if (sample_id_all) { void *data = &event->mmap.filename; - data += ALIGN(strlen(data) + 1, sizeof(u64)); + data += PERF_ALIGN(strlen(data) + 1, sizeof(u64)); swap_sample_id_all(event, data); } } diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 753699a20bc..ba85d4ffef0 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -1991,7 +1991,7 @@ int symbol__init(void) if (symbol_conf.initialized) return 0; - symbol_conf.priv_size = ALIGN(symbol_conf.priv_size, sizeof(u64)); + symbol_conf.priv_size = PERF_ALIGN(symbol_conf.priv_size, sizeof(u64)); symbol__elf_init(); From bdde37163e1fd474509aab90f5eaacee46100107 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 10 Sep 2012 18:50:16 +0200 Subject: [PATCH 185/282] perf tools: Do backtrace post unwind only if we regs and stack were captured Bail out without error if we want to do backtrace post unwind, but were not able to capture user registers or user stack during the record phase, which is possible and valid case. Signed-off-by: Jiri Olsa Cc: Frederic Weisbecker Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1347295819-23177-2-git-send-email-jolsa@redhat.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/session.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 3806ea49f1f..0ecd62be209 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -388,6 +388,11 @@ int machine__resolve_callchain(struct machine *machine, (evsel->attr.sample_type & PERF_SAMPLE_STACK_USER))) return 0; + /* Bail out if nothing was captured. */ + if ((!sample->user_regs.regs) || + (!sample->user_stack.size)) + return 0; + return unwind__get_entries(unwind_entry, &callchain_cursor, machine, thread, evsel->attr.sample_regs_user, sample); From b232e0732b1d763834c3d5b098d25d59337ba075 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 10 Sep 2012 18:50:17 +0200 Subject: [PATCH 186/282] perf tools: Add memdup function Adding memdup function to duplicate region of memory. void *memdup(const void *src, size_t len) Signed-off-by: Jiri Olsa Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1347295819-23177-3-git-send-email-jolsa@redhat.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/include/linux/string.h | 2 ++ tools/perf/util/string.c | 18 +++++++++++++++++- 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/include/linux/string.h b/tools/perf/util/include/linux/string.h index 3b2f5900276..6f19c548ecc 100644 --- a/tools/perf/util/include/linux/string.h +++ b/tools/perf/util/include/linux/string.h @@ -1 +1,3 @@ #include + +void *memdup(const void *src, size_t len); diff --git a/tools/perf/util/string.c b/tools/perf/util/string.c index 199bc4d8905..32170590892 100644 --- a/tools/perf/util/string.c +++ b/tools/perf/util/string.c @@ -1,5 +1,5 @@ #include "util.h" -#include "string.h" +#include "linux/string.h" #define K 1024LL /* @@ -335,3 +335,19 @@ char *rtrim(char *s) return s; } + +/** + * memdup - duplicate region of memory + * @src: memory region to duplicate + * @len: memory region length + */ +void *memdup(const void *src, size_t len) +{ + void *p; + + p = malloc(len); + if (p) + memcpy(p, src, len); + + return p; +} From 1c4be9ff5933e5c0f033ea98169cd89e22c90900 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 10 Sep 2012 18:50:18 +0200 Subject: [PATCH 187/282] perf symbols: Make dsos__find function globally available Changing dsos__find function from static to be globally available. Signed-off-by: Jiri Olsa Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1347295819-23177-4-git-send-email-jolsa@redhat.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/symbol.c | 2 +- tools/perf/util/symbol.h | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index ba85d4ffef0..bbb24e95165 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -1596,7 +1596,7 @@ void dsos__add(struct list_head *head, struct dso *dso) list_add_tail(&dso->node, head); } -static struct dso *dsos__find(struct list_head *head, const char *name) +struct dso *dsos__find(struct list_head *head, const char *name) { struct dso *pos; diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index 41a15dac412..dde8a26f7be 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -294,6 +294,7 @@ static inline void dso__set_loaded(struct dso *dso, enum map_type type) void dso__sort_by_name(struct dso *dso, enum map_type type); void dsos__add(struct list_head *head, struct dso *dso); +struct dso *dsos__find(struct list_head *head, const char *name); struct dso *__dsos__findnew(struct list_head *head, const char *name); int dso__load(struct dso *dso, struct map *map, symbol_filter_t filter); From 7dbf4dcfe2987c35c2c4675cd7ae1b6006979176 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 10 Sep 2012 18:50:19 +0200 Subject: [PATCH 188/282] perf tools: Back [vdso] DSO with real data Storing data for VDSO shared object, because we need it for the post unwind processing. The VDSO shared object is same for all process on a running system, so it makes no difference when we store it inside the tracer - perf. When [vdso] map memory is hit, we retrieve [vdso] DSO image and store it into temporary file. During the build-id processing phase, the [vdso] DSO image is stored in build-id db, and build-id reference is made inside perf.data. The build-id vdso file object is called '[vdso]'. We don't use temporary file name which gets removed when record is finished. During report phase the vdso build-id object is treated as any other build-id DSO object. Adding following API for vdso object: bool is_vdso_map(const char *filename) - returns true if the filename matches vdso map name struct dso *vdso__dso_findnew(struct list_head *head) - find/create proper vdso DSO object vdso__exit(void) - removes temporary VDSO image if there's any This change makes backtrace dwarf post unwind possible from [vdso] maps. Following output is current report of [vdso] sample dwarf backtrace: # Overhead Command Shared Object Symbol # ........ ....... ................. ............................. # 99.52% ex [vdso] [.] 0x00007fff3ace89af | --- 0x7fff3ace89af Following output is new report of [vdso] sample dwarf backtrace: # Overhead Command Shared Object Symbol # ........ ....... ................. ............................. # 99.52% ex [vdso] [.] 0x00000000000009af | --- 0x7fff3ace89af main __libc_start_main _start Signed-off-by: Jiri Olsa Acked-by: Peter Zijlstra Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1347295819-23177-5-git-send-email-jolsa@redhat.com [ committer note: s/ALIGN/PERF_ALIGN/g to cope with the android build changes ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile | 2 + tools/perf/builtin-buildid-cache.c | 3 +- tools/perf/util/header.c | 70 ++++++++++++------ tools/perf/util/header.h | 2 +- tools/perf/util/map.c | 12 +++- tools/perf/util/session.c | 2 + tools/perf/util/vdso.c | 111 +++++++++++++++++++++++++++++ tools/perf/util/vdso.h | 18 +++++ 8 files changed, 194 insertions(+), 26 deletions(-) create mode 100644 tools/perf/util/vdso.c create mode 100644 tools/perf/util/vdso.h diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 1d2723c01b8..209774bcee2 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -337,6 +337,7 @@ LIB_H += util/intlist.h LIB_H += util/perf_regs.h LIB_H += util/unwind.h LIB_H += ui/helpline.h +LIB_H += util/vdso.h LIB_OBJS += $(OUTPUT)util/abspath.o LIB_OBJS += $(OUTPUT)util/alias.o @@ -404,6 +405,7 @@ LIB_OBJS += $(OUTPUT)util/cgroup.o LIB_OBJS += $(OUTPUT)util/target.o LIB_OBJS += $(OUTPUT)util/rblist.o LIB_OBJS += $(OUTPUT)util/intlist.o +LIB_OBJS += $(OUTPUT)util/vdso.o LIB_OBJS += $(OUTPUT)ui/helpline.o LIB_OBJS += $(OUTPUT)ui/hist.o diff --git a/tools/perf/builtin-buildid-cache.c b/tools/perf/builtin-buildid-cache.c index 29ad20e6791..995368e84e4 100644 --- a/tools/perf/builtin-buildid-cache.c +++ b/tools/perf/builtin-buildid-cache.c @@ -43,7 +43,8 @@ static int build_id_cache__add_file(const char *filename, const char *debugdir) } build_id__sprintf(build_id, sizeof(build_id), sbuild_id); - err = build_id_cache__add_s(sbuild_id, debugdir, filename, false); + err = build_id_cache__add_s(sbuild_id, debugdir, filename, + false, false); if (verbose) pr_info("Adding %s %s: %s\n", sbuild_id, filename, err ? "FAIL" : "Ok"); diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 974e7589a6b..87996cab21d 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -21,6 +21,7 @@ #include "debug.h" #include "cpumap.h" #include "pmu.h" +#include "vdso.h" static bool no_buildid_cache = false; @@ -207,6 +208,29 @@ perf_header__set_cmdline(int argc, const char **argv) continue; \ else +static int write_buildid(char *name, size_t name_len, u8 *build_id, + pid_t pid, u16 misc, int fd) +{ + int err; + struct build_id_event b; + size_t len; + + len = name_len + 1; + len = PERF_ALIGN(len, NAME_ALIGN); + + memset(&b, 0, sizeof(b)); + memcpy(&b.build_id, build_id, BUILD_ID_SIZE); + b.pid = pid; + b.header.misc = misc; + b.header.size = sizeof(b) + len; + + err = do_write(fd, &b, sizeof(b)); + if (err < 0) + return err; + + return write_padded(fd, name, name_len + 1, len); +} + static int __dsos__write_buildid_table(struct list_head *head, pid_t pid, u16 misc, int fd) { @@ -214,24 +238,23 @@ static int __dsos__write_buildid_table(struct list_head *head, pid_t pid, dsos__for_each_with_build_id(pos, head) { int err; - struct build_id_event b; - size_t len; + char *name; + size_t name_len; if (!pos->hit) continue; - len = pos->long_name_len + 1; - len = PERF_ALIGN(len, NAME_ALIGN); - memset(&b, 0, sizeof(b)); - memcpy(&b.build_id, pos->build_id, sizeof(pos->build_id)); - b.pid = pid; - b.header.misc = misc; - b.header.size = sizeof(b) + len; - err = do_write(fd, &b, sizeof(b)); - if (err < 0) - return err; - err = write_padded(fd, pos->long_name, - pos->long_name_len + 1, len); - if (err < 0) + + if (is_vdso_map(pos->short_name)) { + name = (char *) VDSO__MAP_NAME; + name_len = sizeof(VDSO__MAP_NAME) + 1; + } else { + name = pos->long_name; + name_len = pos->long_name_len + 1; + } + + err = write_buildid(name, name_len, pos->build_id, + pid, misc, fd); + if (err) return err; } @@ -277,19 +300,20 @@ static int dsos__write_buildid_table(struct perf_header *header, int fd) } int build_id_cache__add_s(const char *sbuild_id, const char *debugdir, - const char *name, bool is_kallsyms) + const char *name, bool is_kallsyms, bool is_vdso) { const size_t size = PATH_MAX; char *realname, *filename = zalloc(size), *linkname = zalloc(size), *targetname; int len, err = -1; + bool slash = is_kallsyms || is_vdso; if (is_kallsyms) { if (symbol_conf.kptr_restrict) { pr_debug("Not caching a kptr_restrict'ed /proc/kallsyms\n"); return 0; } - realname = (char *)name; + realname = (char *) name; } else realname = realpath(name, NULL); @@ -297,7 +321,8 @@ int build_id_cache__add_s(const char *sbuild_id, const char *debugdir, goto out_free; len = scnprintf(filename, size, "%s%s%s", - debugdir, is_kallsyms ? "/" : "", realname); + debugdir, slash ? "/" : "", + is_vdso ? VDSO__MAP_NAME : realname); if (mkdir_p(filename, 0755)) goto out_free; @@ -333,13 +358,14 @@ out_free: static int build_id_cache__add_b(const u8 *build_id, size_t build_id_size, const char *name, const char *debugdir, - bool is_kallsyms) + bool is_kallsyms, bool is_vdso) { char sbuild_id[BUILD_ID_SIZE * 2 + 1]; build_id__sprintf(build_id, build_id_size, sbuild_id); - return build_id_cache__add_s(sbuild_id, debugdir, name, is_kallsyms); + return build_id_cache__add_s(sbuild_id, debugdir, name, + is_kallsyms, is_vdso); } int build_id_cache__remove_s(const char *sbuild_id, const char *debugdir) @@ -383,9 +409,11 @@ out_free: static int dso__cache_build_id(struct dso *dso, const char *debugdir) { bool is_kallsyms = dso->kernel && dso->long_name[0] != '/'; + bool is_vdso = is_vdso_map(dso->short_name); return build_id_cache__add_b(dso->build_id, sizeof(dso->build_id), - dso->long_name, debugdir, is_kallsyms); + dso->long_name, debugdir, + is_kallsyms, is_vdso); } static int __dsos__cache_build_ids(struct list_head *head, const char *debugdir) diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h index 9d5eedceda7..209dad4fee2 100644 --- a/tools/perf/util/header.h +++ b/tools/perf/util/header.h @@ -96,7 +96,7 @@ int perf_header__process_sections(struct perf_header *header, int fd, int perf_header__fprintf_info(struct perf_session *s, FILE *fp, bool full); int build_id_cache__add_s(const char *sbuild_id, const char *debugdir, - const char *name, bool is_kallsyms); + const char *name, bool is_kallsyms, bool is_vdso); int build_id_cache__remove_s(const char *sbuild_id, const char *debugdir); int perf_event__synthesize_attr(struct perf_tool *tool, diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index 7d37159c1e9..b442ee49452 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c @@ -9,6 +9,7 @@ #include "map.h" #include "thread.h" #include "strlist.h" +#include "vdso.h" const char *map_type__name[MAP__NR_TYPES] = { [MAP__FUNCTION] = "Functions", @@ -23,7 +24,6 @@ static inline int is_anon_memory(const char *filename) static inline int is_no_dso_memory(const char *filename) { return !strcmp(filename, "[stack]") || - !strcmp(filename, "[vdso]") || !strcmp(filename, "[heap]"); } @@ -52,9 +52,10 @@ struct map *map__new(struct list_head *dsos__list, u64 start, u64 len, if (self != NULL) { char newfilename[PATH_MAX]; struct dso *dso; - int anon, no_dso; + int anon, no_dso, vdso; anon = is_anon_memory(filename); + vdso = is_vdso_map(filename); no_dso = is_no_dso_memory(filename); if (anon) { @@ -62,7 +63,12 @@ struct map *map__new(struct list_head *dsos__list, u64 start, u64 len, filename = newfilename; } - dso = __dsos__findnew(dsos__list, filename); + if (vdso) { + pgoff = 0; + dso = vdso__dso_findnew(dsos__list); + } else + dso = __dsos__findnew(dsos__list, filename); + if (dso == NULL) goto out_delete; diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 0ecd62be209..e0fd6c71cc5 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -17,6 +17,7 @@ #include "event-parse.h" #include "perf_regs.h" #include "unwind.h" +#include "vdso.h" static int perf_session__open(struct perf_session *self, bool force) { @@ -211,6 +212,7 @@ void perf_session__delete(struct perf_session *self) machine__exit(&self->host_machine); close(self->fd); free(self); + vdso__exit(); } void machine__remove_thread(struct machine *self, struct thread *th) diff --git a/tools/perf/util/vdso.c b/tools/perf/util/vdso.c new file mode 100644 index 00000000000..e60951fcdb1 --- /dev/null +++ b/tools/perf/util/vdso.c @@ -0,0 +1,111 @@ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "vdso.h" +#include "util.h" +#include "symbol.h" +#include "linux/string.h" + +static bool vdso_found; +static char vdso_file[] = "/tmp/perf-vdso.so-XXXXXX"; + +static int find_vdso_map(void **start, void **end) +{ + FILE *maps; + char line[128]; + int found = 0; + + maps = fopen("/proc/self/maps", "r"); + if (!maps) { + pr_err("vdso: cannot open maps\n"); + return -1; + } + + while (!found && fgets(line, sizeof(line), maps)) { + int m = -1; + + /* We care only about private r-x mappings. */ + if (2 != sscanf(line, "%p-%p r-xp %*x %*x:%*x %*u %n", + start, end, &m)) + continue; + if (m < 0) + continue; + + if (!strncmp(&line[m], VDSO__MAP_NAME, + sizeof(VDSO__MAP_NAME) - 1)) + found = 1; + } + + fclose(maps); + return !found; +} + +static char *get_file(void) +{ + char *vdso = NULL; + char *buf = NULL; + void *start, *end; + size_t size; + int fd; + + if (vdso_found) + return vdso_file; + + if (find_vdso_map(&start, &end)) + return NULL; + + size = end - start; + + buf = memdup(start, size); + if (!buf) + return NULL; + + fd = mkstemp(vdso_file); + if (fd < 0) + goto out; + + if (size == (size_t) write(fd, buf, size)) + vdso = vdso_file; + + close(fd); + + out: + free(buf); + + vdso_found = (vdso != NULL); + return vdso; +} + +void vdso__exit(void) +{ + if (vdso_found) + unlink(vdso_file); +} + +struct dso *vdso__dso_findnew(struct list_head *head) +{ + struct dso *dso = dsos__find(head, VDSO__MAP_NAME); + + if (!dso) { + char *file; + + file = get_file(); + if (!file) + return NULL; + + dso = dso__new(VDSO__MAP_NAME); + if (dso != NULL) { + dsos__add(head, dso); + dso__set_long_name(dso, file); + } + } + + return dso; +} diff --git a/tools/perf/util/vdso.h b/tools/perf/util/vdso.h new file mode 100644 index 00000000000..0f76e7caf6f --- /dev/null +++ b/tools/perf/util/vdso.h @@ -0,0 +1,18 @@ +#ifndef __PERF_VDSO__ +#define __PERF_VDSO__ + +#include +#include +#include + +#define VDSO__MAP_NAME "[vdso]" + +static inline bool is_vdso_map(const char *filename) +{ + return !strcmp(filename, VDSO__MAP_NAME); +} + +struct dso *vdso__dso_findnew(struct list_head *head); +void vdso__exit(void); + +#endif /* __PERF_VDSO__ */ From 1d037ca1648b775277fc96401ec2aa233724906c Mon Sep 17 00:00:00 2001 From: Irina Tirdea Date: Tue, 11 Sep 2012 01:15:03 +0300 Subject: [PATCH 189/282] perf tools: Use __maybe_used for unused variables perf defines both __used and __unused variables to use for marking unused variables. The variable __used is defined to __attribute__((__unused__)), which contradicts the kernel definition to __attribute__((__used__)) for new gcc versions. On Android, __used is also defined in system headers and this leads to warnings like: warning: '__used__' attribute ignored __unused is not defined in the kernel and is not a standard definition. If __unused is included everywhere instead of __used, this leads to conflicts with glibc headers, since glibc has a variables with this name in its headers. The best approach is to use __maybe_unused, the definition used in the kernel for __attribute__((unused)). In this way there is only one definition in perf sources (instead of 2 definitions that point to the same thing: __used and __unused) and it works on both Linux and Android. This patch simply replaces all instances of __used and __unused with __maybe_unused. Signed-off-by: Irina Tirdea Acked-by: Pekka Enberg Cc: David Ahern Cc: Ingo Molnar Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Steven Rostedt Link: http://lkml.kernel.org/r/1347315303-29906-7-git-send-email-irina.tirdea@intel.com [ committer note: fixed up conflict with a116e05 in builtin-sched.c ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/traceevent/event-parse.c | 8 +- tools/lib/traceevent/event-parse.h | 4 +- tools/perf/bench/bench.h | 3 +- tools/perf/bench/mem-memcpy.c | 2 +- tools/perf/bench/mem-memset.c | 2 +- tools/perf/bench/sched-messaging.c | 2 +- tools/perf/bench/sched-pipe.c | 6 +- tools/perf/builtin-annotate.c | 2 +- tools/perf/builtin-bench.c | 2 +- tools/perf/builtin-buildid-cache.c | 7 +- tools/perf/builtin-buildid-list.c | 3 +- tools/perf/builtin-diff.c | 4 +- tools/perf/builtin-evlist.c | 2 +- tools/perf/builtin-help.c | 2 +- tools/perf/builtin-inject.c | 24 +++-- tools/perf/builtin-kmem.c | 22 ++-- tools/perf/builtin-kvm.c | 2 +- tools/perf/builtin-list.c | 2 +- tools/perf/builtin-lock.c | 4 +- tools/perf/builtin-probe.c | 24 ++--- tools/perf/builtin-record.c | 10 +- tools/perf/builtin-report.c | 11 +- tools/perf/builtin-sched.c | 102 +++++++++--------- tools/perf/builtin-script.c | 29 ++--- tools/perf/builtin-stat.c | 40 ++++--- tools/perf/builtin-test.c | 9 +- tools/perf/builtin-timechart.c | 30 +++--- tools/perf/builtin-top.c | 7 +- tools/perf/ui/browser.c | 7 +- tools/perf/ui/browsers/annotate.c | 6 +- tools/perf/ui/gtk/browser.c | 5 +- tools/perf/ui/gtk/setup.c | 2 +- tools/perf/ui/gtk/util.c | 4 +- tools/perf/ui/helpline.c | 2 +- tools/perf/ui/helpline.h | 8 +- tools/perf/ui/hist.c | 21 ++-- tools/perf/ui/tui/setup.c | 4 +- tools/perf/util/alias.c | 3 +- tools/perf/util/annotate.c | 6 +- tools/perf/util/annotate.h | 13 +-- tools/perf/util/build-id.c | 11 +- tools/perf/util/cache.h | 6 +- tools/perf/util/callchain.c | 6 +- tools/perf/util/cgroup.c | 4 +- tools/perf/util/config.c | 6 +- tools/perf/util/debug.h | 9 +- tools/perf/util/event.c | 19 ++-- tools/perf/util/header.c | 93 ++++++++-------- tools/perf/util/help.c | 3 +- tools/perf/util/hist.c | 2 +- tools/perf/util/hist.h | 32 +++--- tools/perf/util/include/linux/compiler.h | 4 +- tools/perf/util/intlist.c | 4 +- tools/perf/util/map.h | 2 +- tools/perf/util/parse-events-test.c | 6 +- tools/perf/util/parse-events.c | 7 +- tools/perf/util/parse-events.l | 2 +- tools/perf/util/parse-events.y | 4 +- tools/perf/util/parse-options.c | 3 +- tools/perf/util/perf_regs.h | 2 +- tools/perf/util/pmu.y | 6 +- tools/perf/util/probe-event.c | 21 ++-- tools/perf/util/probe-finder.c | 4 +- tools/perf/util/python.c | 8 +- .../util/scripting-engines/trace-event-perl.c | 8 +- .../scripting-engines/trace-event-python.c | 10 +- tools/perf/util/session.c | 50 +++++---- tools/perf/util/sort.c | 14 ++- tools/perf/util/symbol-minimal.c | 28 ++--- tools/perf/util/symbol.c | 2 +- tools/perf/util/symbol.h | 7 +- tools/perf/util/trace-event-parse.c | 4 +- tools/perf/util/trace-event-scripting.c | 33 +++--- tools/perf/util/unwind.c | 36 ++++--- tools/perf/util/unwind.h | 11 +- tools/perf/util/wrapper.c | 3 +- 76 files changed, 498 insertions(+), 418 deletions(-) diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c index f4190b5764d..2c54cdd8ae1 100644 --- a/tools/lib/traceevent/event-parse.c +++ b/tools/lib/traceevent/event-parse.c @@ -1824,7 +1824,7 @@ process_op(struct event_format *event, struct print_arg *arg, char **tok) } static enum event_type -process_entry(struct event_format *event __unused, struct print_arg *arg, +process_entry(struct event_format *event __maybe_unused, struct print_arg *arg, char **tok) { enum event_type type; @@ -2458,7 +2458,8 @@ process_paren(struct event_format *event, struct print_arg *arg, char **tok) static enum event_type -process_str(struct event_format *event __unused, struct print_arg *arg, char **tok) +process_str(struct event_format *event __maybe_unused, struct print_arg *arg, + char **tok) { enum event_type type; char *token; @@ -3653,7 +3654,8 @@ static void free_args(struct print_arg *args) } static char * -get_bprint_format(void *data, int size __unused, struct event_format *event) +get_bprint_format(void *data, int size __maybe_unused, + struct event_format *event) { struct pevent *pevent = event->pevent; unsigned long long addr; diff --git a/tools/lib/traceevent/event-parse.h b/tools/lib/traceevent/event-parse.h index 3318963f1c9..a4bbe243792 100644 --- a/tools/lib/traceevent/event-parse.h +++ b/tools/lib/traceevent/event-parse.h @@ -24,8 +24,8 @@ #include #include -#ifndef __unused -#define __unused __attribute__ ((unused)) +#ifndef __maybe_unused +#define __maybe_unused __attribute__((unused)) #endif /* ----------------------- trace_seq ----------------------- */ diff --git a/tools/perf/bench/bench.h b/tools/perf/bench/bench.h index a09bece6dad..8f89998eeaf 100644 --- a/tools/perf/bench/bench.h +++ b/tools/perf/bench/bench.h @@ -3,7 +3,8 @@ extern int bench_sched_messaging(int argc, const char **argv, const char *prefix); extern int bench_sched_pipe(int argc, const char **argv, const char *prefix); -extern int bench_mem_memcpy(int argc, const char **argv, const char *prefix __used); +extern int bench_mem_memcpy(int argc, const char **argv, + const char *prefix __maybe_unused); extern int bench_mem_memset(int argc, const char **argv, const char *prefix); #define BENCH_FORMAT_DEFAULT_STR "default" diff --git a/tools/perf/bench/mem-memcpy.c b/tools/perf/bench/mem-memcpy.c index 02dad5d3359..93c83e3cb4a 100644 --- a/tools/perf/bench/mem-memcpy.c +++ b/tools/perf/bench/mem-memcpy.c @@ -177,7 +177,7 @@ static double do_memcpy_gettimeofday(memcpy_t fn, size_t len, bool prefault) } while (0) int bench_mem_memcpy(int argc, const char **argv, - const char *prefix __used) + const char *prefix __maybe_unused) { int i; size_t len; diff --git a/tools/perf/bench/mem-memset.c b/tools/perf/bench/mem-memset.c index 350cc955726..c6e4bc52349 100644 --- a/tools/perf/bench/mem-memset.c +++ b/tools/perf/bench/mem-memset.c @@ -171,7 +171,7 @@ static double do_memset_gettimeofday(memset_t fn, size_t len, bool prefault) } while (0) int bench_mem_memset(int argc, const char **argv, - const char *prefix __used) + const char *prefix __maybe_unused) { int i; size_t len; diff --git a/tools/perf/bench/sched-messaging.c b/tools/perf/bench/sched-messaging.c index d1d1b30f99c..cc1190a0849 100644 --- a/tools/perf/bench/sched-messaging.c +++ b/tools/perf/bench/sched-messaging.c @@ -267,7 +267,7 @@ static const char * const bench_sched_message_usage[] = { }; int bench_sched_messaging(int argc, const char **argv, - const char *prefix __used) + const char *prefix __maybe_unused) { unsigned int i, total_children; struct timeval start, stop, diff; diff --git a/tools/perf/bench/sched-pipe.c b/tools/perf/bench/sched-pipe.c index 15911e9c587..69cfba8d4c6 100644 --- a/tools/perf/bench/sched-pipe.c +++ b/tools/perf/bench/sched-pipe.c @@ -43,7 +43,7 @@ static const char * const bench_sched_pipe_usage[] = { }; int bench_sched_pipe(int argc, const char **argv, - const char *prefix __used) + const char *prefix __maybe_unused) { int pipe_1[2], pipe_2[2]; int m = 0, i; @@ -55,8 +55,8 @@ int bench_sched_pipe(int argc, const char **argv, * discarding returned value of read(), write() * causes error in building environment for perf */ - int __used ret, wait_stat; - pid_t pid, retpid __used; + int __maybe_unused ret, wait_stat; + pid_t pid, retpid __maybe_unused; argc = parse_options(argc, argv, options, bench_sched_pipe_usage, 0); diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 2f3f0029c0f..9ea38540b87 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -239,7 +239,7 @@ static const char * const annotate_usage[] = { NULL }; -int cmd_annotate(int argc, const char **argv, const char *prefix __used) +int cmd_annotate(int argc, const char **argv, const char *prefix __maybe_unused) { struct perf_annotate annotate = { .tool = { diff --git a/tools/perf/builtin-bench.c b/tools/perf/builtin-bench.c index 1f310021644..cae9a5fd2ec 100644 --- a/tools/perf/builtin-bench.c +++ b/tools/perf/builtin-bench.c @@ -173,7 +173,7 @@ static void all_subsystem(void) all_suite(&subsystems[i]); } -int cmd_bench(int argc, const char **argv, const char *prefix __used) +int cmd_bench(int argc, const char **argv, const char *prefix __maybe_unused) { int i, j, status = 0; diff --git a/tools/perf/builtin-buildid-cache.c b/tools/perf/builtin-buildid-cache.c index 995368e84e4..83654557e10 100644 --- a/tools/perf/builtin-buildid-cache.c +++ b/tools/perf/builtin-buildid-cache.c @@ -51,8 +51,8 @@ static int build_id_cache__add_file(const char *filename, const char *debugdir) return err; } -static int build_id_cache__remove_file(const char *filename __used, - const char *debugdir __used) +static int build_id_cache__remove_file(const char *filename __maybe_unused, + const char *debugdir __maybe_unused) { u8 build_id[BUILD_ID_SIZE]; char sbuild_id[BUILD_ID_SIZE * 2 + 1]; @@ -120,7 +120,8 @@ static int __cmd_buildid_cache(void) return 0; } -int cmd_buildid_cache(int argc, const char **argv, const char *prefix __used) +int cmd_buildid_cache(int argc, const char **argv, + const char *prefix __maybe_unused) { argc = parse_options(argc, argv, buildid_cache_options, buildid_cache_usage, 0); diff --git a/tools/perf/builtin-buildid-list.c b/tools/perf/builtin-buildid-list.c index 7d6842826a0..1159feeebb1 100644 --- a/tools/perf/builtin-buildid-list.c +++ b/tools/perf/builtin-buildid-list.c @@ -103,7 +103,8 @@ static int __cmd_buildid_list(void) return perf_session__list_build_ids(); } -int cmd_buildid_list(int argc, const char **argv, const char *prefix __used) +int cmd_buildid_list(int argc, const char **argv, + const char *prefix __maybe_unused) { argc = parse_options(argc, argv, options, buildid_list_usage, 0); setup_pager(); diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c index c4c6d76b70e..761f4197a9e 100644 --- a/tools/perf/builtin-diff.c +++ b/tools/perf/builtin-diff.c @@ -33,7 +33,7 @@ static int hists__add_entry(struct hists *self, return -ENOMEM; } -static int diff__process_sample_event(struct perf_tool *tool __used, +static int diff__process_sample_event(struct perf_tool *tool __maybe_unused, union perf_event *event, struct perf_sample *sample, struct perf_evsel *evsel, @@ -242,7 +242,7 @@ static const struct option options[] = { OPT_END() }; -int cmd_diff(int argc, const char **argv, const char *prefix __used) +int cmd_diff(int argc, const char **argv, const char *prefix __maybe_unused) { sort_order = diff__default_sort_order; argc = parse_options(argc, argv, options, diff_usage, 0); diff --git a/tools/perf/builtin-evlist.c b/tools/perf/builtin-evlist.c index 0dd5a058f76..1fb164164fd 100644 --- a/tools/perf/builtin-evlist.c +++ b/tools/perf/builtin-evlist.c @@ -113,7 +113,7 @@ static const char * const evlist_usage[] = { NULL }; -int cmd_evlist(int argc, const char **argv, const char *prefix __used) +int cmd_evlist(int argc, const char **argv, const char *prefix __maybe_unused) { struct perf_attr_details details = { .verbose = false, }; const char *input_name = NULL; diff --git a/tools/perf/builtin-help.c b/tools/perf/builtin-help.c index f9daae5ac47..25c8b942ff8 100644 --- a/tools/perf/builtin-help.c +++ b/tools/perf/builtin-help.c @@ -426,7 +426,7 @@ static int show_html_page(const char *perf_cmd) return 0; } -int cmd_help(int argc, const char **argv, const char *prefix __used) +int cmd_help(int argc, const char **argv, const char *prefix __maybe_unused) { const char *alias; int rc = 0; diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index 64d8ba2fb7b..1eaa6617c81 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -17,9 +17,9 @@ static char const *input_name = "-"; static bool inject_build_ids; -static int perf_event__repipe_synth(struct perf_tool *tool __used, +static int perf_event__repipe_synth(struct perf_tool *tool __maybe_unused, union perf_event *event, - struct machine *machine __used) + struct machine *machine __maybe_unused) { uint32_t size; void *buf = event; @@ -40,7 +40,8 @@ static int perf_event__repipe_synth(struct perf_tool *tool __used, static int perf_event__repipe_op2_synth(struct perf_tool *tool, union perf_event *event, - struct perf_session *session __used) + struct perf_session *session + __maybe_unused) { return perf_event__repipe_synth(tool, event, NULL); } @@ -52,13 +53,14 @@ static int perf_event__repipe_event_type_synth(struct perf_tool *tool, } static int perf_event__repipe_tracing_data_synth(union perf_event *event, - struct perf_session *session __used) + struct perf_session *session + __maybe_unused) { return perf_event__repipe_synth(NULL, event, NULL); } static int perf_event__repipe_attr(union perf_event *event, - struct perf_evlist **pevlist __used) + struct perf_evlist **pevlist __maybe_unused) { int ret; ret = perf_event__process_attr(event, pevlist); @@ -70,7 +72,7 @@ static int perf_event__repipe_attr(union perf_event *event, static int perf_event__repipe(struct perf_tool *tool, union perf_event *event, - struct perf_sample *sample __used, + struct perf_sample *sample __maybe_unused, struct machine *machine) { return perf_event__repipe_synth(tool, event, machine); @@ -78,8 +80,8 @@ static int perf_event__repipe(struct perf_tool *tool, static int perf_event__repipe_sample(struct perf_tool *tool, union perf_event *event, - struct perf_sample *sample __used, - struct perf_evsel *evsel __used, + struct perf_sample *sample __maybe_unused, + struct perf_evsel *evsel __maybe_unused, struct machine *machine) { return perf_event__repipe_synth(tool, event, machine); @@ -163,7 +165,7 @@ static int dso__inject_build_id(struct dso *self, struct perf_tool *tool, static int perf_event__inject_buildid(struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, - struct perf_evsel *evsel __used, + struct perf_evsel *evsel __maybe_unused, struct machine *machine) { struct addr_location al; @@ -224,7 +226,7 @@ struct perf_tool perf_inject = { extern volatile int session_done; -static void sig_handler(int sig __attribute__((__unused__))) +static void sig_handler(int sig __maybe_unused) { session_done = 1; } @@ -267,7 +269,7 @@ static const struct option options[] = { OPT_END() }; -int cmd_inject(int argc, const char **argv, const char *prefix __used) +int cmd_inject(int argc, const char **argv, const char *prefix __maybe_unused) { argc = parse_options(argc, argv, options, report_usage, 0); diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c index ad9f5209738..f5f8a6b745a 100644 --- a/tools/perf/builtin-kmem.c +++ b/tools/perf/builtin-kmem.c @@ -320,7 +320,7 @@ static int perf_evsel__process_kmem_event(struct perf_evsel *evsel, return 0; } -static int process_sample_event(struct perf_tool *tool __used, +static int process_sample_event(struct perf_tool *tool __maybe_unused, union perf_event *event, struct perf_sample *sample, struct perf_evsel *evsel, @@ -672,8 +672,8 @@ static int setup_sorting(struct list_head *sort_list, const char *arg) return 0; } -static int parse_sort_opt(const struct option *opt __used, - const char *arg, int unset __used) +static int parse_sort_opt(const struct option *opt __maybe_unused, + const char *arg, int unset __maybe_unused) { if (!arg) return -1; @@ -686,22 +686,24 @@ static int parse_sort_opt(const struct option *opt __used, return 0; } -static int parse_caller_opt(const struct option *opt __used, - const char *arg __used, int unset __used) +static int parse_caller_opt(const struct option *opt __maybe_unused, + const char *arg __maybe_unused, + int unset __maybe_unused) { caller_flag = (alloc_flag + 1); return 0; } -static int parse_alloc_opt(const struct option *opt __used, - const char *arg __used, int unset __used) +static int parse_alloc_opt(const struct option *opt __maybe_unused, + const char *arg __maybe_unused, + int unset __maybe_unused) { alloc_flag = (caller_flag + 1); return 0; } -static int parse_line_opt(const struct option *opt __used, - const char *arg, int unset __used) +static int parse_line_opt(const struct option *opt __maybe_unused, + const char *arg, int unset __maybe_unused) { int lines; @@ -771,7 +773,7 @@ static int __cmd_record(int argc, const char **argv) return cmd_record(i, rec_argv, NULL); } -int cmd_kmem(int argc, const char **argv, const char *prefix __used) +int cmd_kmem(int argc, const char **argv, const char *prefix __maybe_unused) { argc = parse_options(argc, argv, kmem_options, kmem_usage, 0); diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c index 9fc6e0fa3dc..4d2aa2cbeca 100644 --- a/tools/perf/builtin-kvm.c +++ b/tools/perf/builtin-kvm.c @@ -102,7 +102,7 @@ static int __cmd_buildid_list(int argc, const char **argv) return cmd_buildid_list(i, rec_argv, NULL); } -int cmd_kvm(int argc, const char **argv, const char *prefix __used) +int cmd_kvm(int argc, const char **argv, const char *prefix __maybe_unused) { perf_host = 0; perf_guest = 1; diff --git a/tools/perf/builtin-list.c b/tools/perf/builtin-list.c index bdcff81b532..1948eceb517 100644 --- a/tools/perf/builtin-list.c +++ b/tools/perf/builtin-list.c @@ -14,7 +14,7 @@ #include "util/parse-events.h" #include "util/cache.h" -int cmd_list(int argc, const char **argv, const char *prefix __used) +int cmd_list(int argc, const char **argv, const char *prefix __maybe_unused) { setup_pager(); diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c index 75153c87e65..a8035207a3d 100644 --- a/tools/perf/builtin-lock.c +++ b/tools/perf/builtin-lock.c @@ -870,7 +870,7 @@ static int dump_info(void) return rc; } -static int process_sample_event(struct perf_tool *tool __used, +static int process_sample_event(struct perf_tool *tool __maybe_unused, union perf_event *event, struct perf_sample *sample, struct perf_evsel *evsel, @@ -1020,7 +1020,7 @@ static int __cmd_record(int argc, const char **argv) return cmd_record(i, rec_argv, NULL); } -int cmd_lock(int argc, const char **argv, const char *prefix __used) +int cmd_lock(int argc, const char **argv, const char *prefix __maybe_unused) { unsigned int i; int rc = 0; diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c index e215ae61b2a..118aa894657 100644 --- a/tools/perf/builtin-probe.c +++ b/tools/perf/builtin-probe.c @@ -143,8 +143,8 @@ static int parse_probe_event_argv(int argc, const char **argv) return ret; } -static int opt_add_probe_event(const struct option *opt __used, - const char *str, int unset __used) +static int opt_add_probe_event(const struct option *opt __maybe_unused, + const char *str, int unset __maybe_unused) { if (str) { params.mod_events = true; @@ -153,8 +153,8 @@ static int opt_add_probe_event(const struct option *opt __used, return 0; } -static int opt_del_probe_event(const struct option *opt __used, - const char *str, int unset __used) +static int opt_del_probe_event(const struct option *opt __maybe_unused, + const char *str, int unset __maybe_unused) { if (str) { params.mod_events = true; @@ -166,7 +166,7 @@ static int opt_del_probe_event(const struct option *opt __used, } static int opt_set_target(const struct option *opt, const char *str, - int unset __used) + int unset __maybe_unused) { int ret = -ENOENT; @@ -188,8 +188,8 @@ static int opt_set_target(const struct option *opt, const char *str, } #ifdef DWARF_SUPPORT -static int opt_show_lines(const struct option *opt __used, - const char *str, int unset __used) +static int opt_show_lines(const struct option *opt __maybe_unused, + const char *str, int unset __maybe_unused) { int ret = 0; @@ -209,8 +209,8 @@ static int opt_show_lines(const struct option *opt __used, return ret; } -static int opt_show_vars(const struct option *opt __used, - const char *str, int unset __used) +static int opt_show_vars(const struct option *opt __maybe_unused, + const char *str, int unset __maybe_unused) { struct perf_probe_event *pev = ¶ms.events[params.nevents]; int ret; @@ -229,8 +229,8 @@ static int opt_show_vars(const struct option *opt __used, } #endif -static int opt_set_filter(const struct option *opt __used, - const char *str, int unset __used) +static int opt_set_filter(const struct option *opt __maybe_unused, + const char *str, int unset __maybe_unused) { const char *err; @@ -327,7 +327,7 @@ static const struct option options[] = { OPT_END() }; -int cmd_probe(int argc, const char **argv, const char *prefix __used) +int cmd_probe(int argc, const char **argv, const char *prefix __maybe_unused) { int ret; diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 7b8b891d4d5..c643ed669ef 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -92,8 +92,8 @@ static int write_output(struct perf_record *rec, void *buf, size_t size) static int process_synthesized_event(struct perf_tool *tool, union perf_event *event, - struct perf_sample *sample __used, - struct machine *machine __used) + struct perf_sample *sample __maybe_unused, + struct machine *machine __maybe_unused) { struct perf_record *rec = container_of(tool, struct perf_record, tool); if (write_output(rec, event, event->header.size) < 0) @@ -159,7 +159,7 @@ static void sig_handler(int sig) signr = sig; } -static void perf_record__sig_exit(int exit_status __used, void *arg) +static void perf_record__sig_exit(int exit_status __maybe_unused, void *arg) { struct perf_record *rec = arg; int status; @@ -827,7 +827,7 @@ static int get_stack_size(char *str, unsigned long *_size) #endif /* !NO_LIBUNWIND_SUPPORT */ static int -parse_callchain_opt(const struct option *opt __used, const char *arg, +parse_callchain_opt(const struct option *opt __maybe_unused, const char *arg, int unset) { struct perf_record *rec = (struct perf_record *)opt->value; @@ -1003,7 +1003,7 @@ const struct option record_options[] = { OPT_END() }; -int cmd_record(int argc, const char **argv, const char *prefix __used) +int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused) { int err = -ENOMEM; struct perf_evsel *pos; diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 1f8d11b4f7f..97b2e6300f4 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -223,9 +223,9 @@ static int process_sample_event(struct perf_tool *tool, static int process_read_event(struct perf_tool *tool, union perf_event *event, - struct perf_sample *sample __used, + struct perf_sample *sample __maybe_unused, struct perf_evsel *evsel, - struct machine *machine __used) + struct machine *machine __maybe_unused) { struct perf_report *rep = container_of(tool, struct perf_report, tool); @@ -287,7 +287,7 @@ static int perf_report__setup_sample_type(struct perf_report *rep) extern volatile int session_done; -static void sig_handler(int sig __used) +static void sig_handler(int sig __maybe_unused) { session_done = 1; } @@ -533,13 +533,14 @@ setup: } static int -parse_branch_mode(const struct option *opt __used, const char *str __used, int unset) +parse_branch_mode(const struct option *opt __maybe_unused, + const char *str __maybe_unused, int unset) { sort__branch_mode = !unset; return 0; } -int cmd_report(int argc, const char **argv, const char *prefix __used) +int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused) { struct perf_session *session; struct stat st; diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index 782f66d3610..82e8ec2c43b 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -299,7 +299,7 @@ add_sched_event_wakeup(struct task_desc *task, u64 timestamp, static void add_sched_event_sleep(struct task_desc *task, u64 timestamp, - u64 task_state __used) + u64 task_state __maybe_unused) { struct sched_atom *event = get_new_event(task, timestamp); @@ -369,8 +369,8 @@ static void add_cross_task_wakeups(void) } } -static void -process_sched_event(struct task_desc *this_task __used, struct sched_atom *atom) +static void process_sched_event(struct task_desc *this_task __maybe_unused, + struct sched_atom *atom) { int ret = 0; @@ -752,7 +752,7 @@ struct trace_sched_handler { static int replay_wakeup_event(struct trace_wakeup_event *wakeup_event, - struct machine *machine __used, + struct machine *machine __maybe_unused, struct event_format *event, struct perf_sample *sample) { struct task_desc *waker, *wakee; @@ -777,11 +777,11 @@ static u64 cpu_last_switched[MAX_CPUS]; static int replay_switch_event(struct trace_switch_event *switch_event, - struct machine *machine __used, + struct machine *machine __maybe_unused, struct event_format *event, struct perf_sample *sample) { - struct task_desc *prev, __used *next; + struct task_desc *prev, __maybe_unused *next; u64 timestamp0, timestamp = sample->time; int cpu = sample->cpu; s64 delta; @@ -932,15 +932,13 @@ static int thread_atoms_insert(struct thread *thread) return 0; } -static int -latency_fork_event(struct trace_fork_event *fork_event __used, - struct event_format *event __used) +static int latency_fork_event(struct trace_fork_event *fork_event __maybe_unused, + struct event_format *event __maybe_unused) { /* should insert the newcomer */ return 0; } -__used static char sched_out_state(struct trace_switch_event *switch_event) { const char *str = TASK_STATE_TO_CHAR_STR; @@ -971,7 +969,8 @@ add_sched_out_event(struct work_atoms *atoms, } static void -add_runtime_event(struct work_atoms *atoms, u64 delta, u64 timestamp __used) +add_runtime_event(struct work_atoms *atoms, u64 delta, + u64 timestamp __maybe_unused) { struct work_atom *atom; @@ -1017,7 +1016,7 @@ add_sched_in_event(struct work_atoms *atoms, u64 timestamp) static int latency_switch_event(struct trace_switch_event *switch_event, struct machine *machine, - struct event_format *event __used, + struct event_format *event __maybe_unused, struct perf_sample *sample) { struct work_atoms *out_events, *in_events; @@ -1105,7 +1104,8 @@ latency_runtime_event(struct trace_runtime_event *runtime_event, static int latency_wakeup_event(struct trace_wakeup_event *wakeup_event, - struct machine *machine, struct event_format *event __used, + struct machine *machine, + struct event_format *event __maybe_unused, struct perf_sample *sample) { struct work_atoms *atoms; @@ -1369,12 +1369,11 @@ static void sort_lat(void) static struct trace_sched_handler *trace_handler; -static int -process_sched_wakeup_event(struct perf_tool *tool __used, - struct event_format *event, - struct perf_sample *sample, - struct machine *machine, - struct thread *thread __used) +static int process_sched_wakeup_event(struct perf_tool *tool __maybe_unused, + struct event_format *event, + struct perf_sample *sample, + struct machine *machine, + struct thread *thread __maybe_unused) { void *data = sample->raw_data; struct trace_wakeup_event wakeup_event; @@ -1410,10 +1409,10 @@ static char next_shortname2 = '0'; static int map_switch_event(struct trace_switch_event *switch_event, struct machine *machine, - struct event_format *event __used, + struct event_format *event __maybe_unused, struct perf_sample *sample) { - struct thread *sched_out __used, *sched_in; + struct thread *sched_out __maybe_unused, *sched_in; int new_shortname; u64 timestamp0, timestamp = sample->time; s64 delta; @@ -1487,12 +1486,11 @@ map_switch_event(struct trace_switch_event *switch_event, return 0; } -static int -process_sched_switch_event(struct perf_tool *tool __used, - struct event_format *event, - struct perf_sample *sample, - struct machine *machine, - struct thread *thread __used) +static int process_sched_switch_event(struct perf_tool *tool __maybe_unused, + struct event_format *event, + struct perf_sample *sample, + struct machine *machine, + struct thread *thread __maybe_unused) { int this_cpu = sample->cpu, err = 0; void *data = sample->raw_data; @@ -1523,12 +1521,11 @@ process_sched_switch_event(struct perf_tool *tool __used, return err; } -static int -process_sched_runtime_event(struct perf_tool *tool __used, - struct event_format *event, - struct perf_sample *sample, - struct machine *machine, - struct thread *thread __used) +static int process_sched_runtime_event(struct perf_tool *tool __maybe_unused, + struct event_format *event, + struct perf_sample *sample, + struct machine *machine, + struct thread *thread __maybe_unused) { void *data = sample->raw_data; struct trace_runtime_event runtime_event; @@ -1545,12 +1542,11 @@ process_sched_runtime_event(struct perf_tool *tool __used, return err; } -static int -process_sched_fork_event(struct perf_tool *tool __used, - struct event_format *event, - struct perf_sample *sample, - struct machine *machine __used, - struct thread *thread __used) +static int process_sched_fork_event(struct perf_tool *tool __maybe_unused, + struct event_format *event, + struct perf_sample *sample, + struct machine *machine __maybe_unused, + struct thread *thread __maybe_unused) { void *data = sample->raw_data; struct trace_fork_event fork_event; @@ -1569,12 +1565,11 @@ process_sched_fork_event(struct perf_tool *tool __used, return err; } -static int -process_sched_exit_event(struct perf_tool *tool __used, - struct event_format *event, - struct perf_sample *sample __used, - struct machine *machine __used, - struct thread *thread __used) +static int process_sched_exit_event(struct perf_tool *tool __maybe_unused, + struct event_format *event, + struct perf_sample *sample __maybe_unused, + struct machine *machine __maybe_unused, + struct thread *thread __maybe_unused) { if (verbose) printf("sched_exit event %p\n", event); @@ -1582,12 +1577,11 @@ process_sched_exit_event(struct perf_tool *tool __used, return 0; } -static int -process_sched_migrate_task_event(struct perf_tool *tool __used, - struct event_format *event, - struct perf_sample *sample, - struct machine *machine, - struct thread *thread __used) +static int process_sched_migrate_task_event(struct perf_tool *tool __maybe_unused, + struct event_format *event, + struct perf_sample *sample, + struct machine *machine, + struct thread *thread __maybe_unused) { void *data = sample->raw_data; struct trace_migrate_task_event migrate_task_event; @@ -1612,8 +1606,8 @@ typedef int (*tracepoint_handler)(struct perf_tool *tool, struct machine *machine, struct thread *thread); -static int perf_sched__process_tracepoint_sample(struct perf_tool *tool __used, - union perf_event *event __used, +static int perf_sched__process_tracepoint_sample(struct perf_tool *tool __maybe_unused, + union perf_event *event __maybe_unused, struct perf_sample *sample, struct perf_evsel *evsel, struct machine *machine) @@ -1918,7 +1912,7 @@ static int __cmd_record(int argc, const char **argv) return cmd_record(i, rec_argv, NULL); } -int cmd_sched(int argc, const char **argv, const char *prefix __used) +int cmd_sched(int argc, const char **argv, const char *prefix __maybe_unused) { argc = parse_options(argc, argv, sched_options, sched_usage, PARSE_OPT_STOP_AT_NON_OPTION); diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index c350cfee315..6d98a83d5a6 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -430,9 +430,9 @@ static void process_event(union perf_event *event, struct perf_sample *sample, printf("\n"); } -static int default_start_script(const char *script __unused, - int argc __unused, - const char **argv __unused) +static int default_start_script(const char *script __maybe_unused, + int argc __maybe_unused, + const char **argv __maybe_unused) { return 0; } @@ -442,8 +442,8 @@ static int default_stop_script(void) return 0; } -static int default_generate_script(struct pevent *pevent __unused, - const char *outfile __unused) +static int default_generate_script(struct pevent *pevent __maybe_unused, + const char *outfile __maybe_unused) { return 0; } @@ -474,7 +474,7 @@ static int cleanup_scripting(void) static const char *input_name; -static int process_sample_event(struct perf_tool *tool __used, +static int process_sample_event(struct perf_tool *tool __maybe_unused, union perf_event *event, struct perf_sample *sample, struct perf_evsel *evsel, @@ -534,7 +534,7 @@ static struct perf_tool perf_script = { extern volatile int session_done; -static void sig_handler(int sig __unused) +static void sig_handler(int sig __maybe_unused) { session_done = 1; } @@ -644,8 +644,8 @@ static void list_available_languages(void) fprintf(stderr, "\n"); } -static int parse_scriptname(const struct option *opt __used, - const char *str, int unset __used) +static int parse_scriptname(const struct option *opt __maybe_unused, + const char *str, int unset __maybe_unused) { char spec[PATH_MAX]; const char *script, *ext; @@ -690,8 +690,8 @@ static int parse_scriptname(const struct option *opt __used, return 0; } -static int parse_output_fields(const struct option *opt __used, - const char *arg, int unset __used) +static int parse_output_fields(const struct option *opt __maybe_unused, + const char *arg, int unset __maybe_unused) { char *tok; int i, imax = sizeof(all_output_options) / sizeof(struct output_option); @@ -982,8 +982,9 @@ static char *get_script_root(struct dirent *script_dirent, const char *suffix) return script_root; } -static int list_available_scripts(const struct option *opt __used, - const char *s __used, int unset __used) +static int list_available_scripts(const struct option *opt __maybe_unused, + const char *s __maybe_unused, + int unset __maybe_unused) { struct dirent *script_next, *lang_next, script_dirent, lang_dirent; char scripts_path[MAXPATHLEN]; @@ -1172,7 +1173,7 @@ static int have_cmd(int argc, const char **argv) return 0; } -int cmd_script(int argc, const char **argv, const char *prefix __used) +int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused) { char *rec_script_path = NULL; char *rep_script_path = NULL; diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 02f49eba677..dab347d7b01 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -417,7 +417,7 @@ static int read_counter(struct perf_evsel *counter) return 0; } -static int run_perf_stat(int argc __used, const char **argv) +static int run_perf_stat(int argc __maybe_unused, const char **argv) { unsigned long long t0, t1; struct perf_evsel *counter, *first; @@ -634,7 +634,9 @@ static const char *get_ratio_color(enum grc_type type, double ratio) return color; } -static void print_stalled_cycles_frontend(int cpu, struct perf_evsel *evsel __used, double avg) +static void print_stalled_cycles_frontend(int cpu, + struct perf_evsel *evsel + __maybe_unused, double avg) { double total, ratio = 0.0; const char *color; @@ -651,7 +653,9 @@ static void print_stalled_cycles_frontend(int cpu, struct perf_evsel *evsel __us fprintf(output, " frontend cycles idle "); } -static void print_stalled_cycles_backend(int cpu, struct perf_evsel *evsel __used, double avg) +static void print_stalled_cycles_backend(int cpu, + struct perf_evsel *evsel + __maybe_unused, double avg) { double total, ratio = 0.0; const char *color; @@ -668,7 +672,9 @@ static void print_stalled_cycles_backend(int cpu, struct perf_evsel *evsel __use fprintf(output, " backend cycles idle "); } -static void print_branch_misses(int cpu, struct perf_evsel *evsel __used, double avg) +static void print_branch_misses(int cpu, + struct perf_evsel *evsel __maybe_unused, + double avg) { double total, ratio = 0.0; const char *color; @@ -685,7 +691,9 @@ static void print_branch_misses(int cpu, struct perf_evsel *evsel __used, double fprintf(output, " of all branches "); } -static void print_l1_dcache_misses(int cpu, struct perf_evsel *evsel __used, double avg) +static void print_l1_dcache_misses(int cpu, + struct perf_evsel *evsel __maybe_unused, + double avg) { double total, ratio = 0.0; const char *color; @@ -702,7 +710,9 @@ static void print_l1_dcache_misses(int cpu, struct perf_evsel *evsel __used, dou fprintf(output, " of all L1-dcache hits "); } -static void print_l1_icache_misses(int cpu, struct perf_evsel *evsel __used, double avg) +static void print_l1_icache_misses(int cpu, + struct perf_evsel *evsel __maybe_unused, + double avg) { double total, ratio = 0.0; const char *color; @@ -719,7 +729,9 @@ static void print_l1_icache_misses(int cpu, struct perf_evsel *evsel __used, dou fprintf(output, " of all L1-icache hits "); } -static void print_dtlb_cache_misses(int cpu, struct perf_evsel *evsel __used, double avg) +static void print_dtlb_cache_misses(int cpu, + struct perf_evsel *evsel __maybe_unused, + double avg) { double total, ratio = 0.0; const char *color; @@ -736,7 +748,9 @@ static void print_dtlb_cache_misses(int cpu, struct perf_evsel *evsel __used, do fprintf(output, " of all dTLB cache hits "); } -static void print_itlb_cache_misses(int cpu, struct perf_evsel *evsel __used, double avg) +static void print_itlb_cache_misses(int cpu, + struct perf_evsel *evsel __maybe_unused, + double avg) { double total, ratio = 0.0; const char *color; @@ -753,7 +767,9 @@ static void print_itlb_cache_misses(int cpu, struct perf_evsel *evsel __used, do fprintf(output, " of all iTLB cache hits "); } -static void print_ll_cache_misses(int cpu, struct perf_evsel *evsel __used, double avg) +static void print_ll_cache_misses(int cpu, + struct perf_evsel *evsel __maybe_unused, + double avg) { double total, ratio = 0.0; const char *color; @@ -1059,8 +1075,8 @@ static const char * const stat_usage[] = { NULL }; -static int stat__set_big_num(const struct option *opt __used, - const char *s __used, int unset) +static int stat__set_big_num(const struct option *opt __maybe_unused, + const char *s __maybe_unused, int unset) { big_num_opt = unset ? 0 : 1; return 0; @@ -1154,7 +1170,7 @@ static int add_default_attributes(void) return perf_evlist__add_default_attrs(evsel_list, very_very_detailed_attrs); } -int cmd_stat(int argc, const char **argv, const char *prefix __used) +int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused) { struct perf_evsel *pos; int status = -ENOMEM; diff --git a/tools/perf/builtin-test.c b/tools/perf/builtin-test.c index 6ae102eba5f..d33143efefc 100644 --- a/tools/perf/builtin-test.c +++ b/tools/perf/builtin-test.c @@ -18,7 +18,8 @@ #include -static int vmlinux_matches_kallsyms_filter(struct map *map __used, struct symbol *sym) +static int vmlinux_matches_kallsyms_filter(struct map *map __maybe_unused, + struct symbol *sym) { bool *visited = symbol__priv(sym); *visited = true; @@ -996,7 +997,9 @@ static u64 mmap_read_self(void *addr) /* * If the RDPMC instruction faults then signal this back to the test parent task: */ -static void segfault_handler(int sig __used, siginfo_t *info __used, void *uc __used) +static void segfault_handler(int sig __maybe_unused, + siginfo_t *info __maybe_unused, + void *uc __maybe_unused) { exit(-1); } @@ -1315,7 +1318,7 @@ static int perf_test__list(int argc, const char **argv) return 0; } -int cmd_test(int argc, const char **argv, const char *prefix __used) +int cmd_test(int argc, const char **argv, const char *prefix __maybe_unused) { const char * const test_usage[] = { "perf test [] [{list |[|]}]", diff --git a/tools/perf/builtin-timechart.c b/tools/perf/builtin-timechart.c index 3b75b2e21ea..55a3a6c6b9e 100644 --- a/tools/perf/builtin-timechart.c +++ b/tools/perf/builtin-timechart.c @@ -275,28 +275,28 @@ static int cpus_cstate_state[MAX_CPUS]; static u64 cpus_pstate_start_times[MAX_CPUS]; static u64 cpus_pstate_state[MAX_CPUS]; -static int process_comm_event(struct perf_tool *tool __used, +static int process_comm_event(struct perf_tool *tool __maybe_unused, union perf_event *event, - struct perf_sample *sample __used, - struct machine *machine __used) + struct perf_sample *sample __maybe_unused, + struct machine *machine __maybe_unused) { pid_set_comm(event->comm.tid, event->comm.comm); return 0; } -static int process_fork_event(struct perf_tool *tool __used, +static int process_fork_event(struct perf_tool *tool __maybe_unused, union perf_event *event, - struct perf_sample *sample __used, - struct machine *machine __used) + struct perf_sample *sample __maybe_unused, + struct machine *machine __maybe_unused) { pid_fork(event->fork.pid, event->fork.ppid, event->fork.time); return 0; } -static int process_exit_event(struct perf_tool *tool __used, +static int process_exit_event(struct perf_tool *tool __maybe_unused, union perf_event *event, - struct perf_sample *sample __used, - struct machine *machine __used) + struct perf_sample *sample __maybe_unused, + struct machine *machine __maybe_unused) { pid_exit(event->fork.pid, event->fork.time); return 0; @@ -491,11 +491,11 @@ static void sched_switch(int cpu, u64 timestamp, struct trace_entry *te) } -static int process_sample_event(struct perf_tool *tool __used, - union perf_event *event __used, +static int process_sample_event(struct perf_tool *tool __maybe_unused, + union perf_event *event __maybe_unused, struct perf_sample *sample, struct perf_evsel *evsel, - struct machine *machine __used) + struct machine *machine __maybe_unused) { struct trace_entry *te; @@ -1081,7 +1081,8 @@ static int __cmd_record(int argc, const char **argv) } static int -parse_process(const struct option *opt __used, const char *arg, int __used unset) +parse_process(const struct option *opt __maybe_unused, const char *arg, + int __maybe_unused unset) { if (arg) add_process_filter(arg); @@ -1106,7 +1107,8 @@ static const struct option options[] = { }; -int cmd_timechart(int argc, const char **argv, const char *prefix __used) +int cmd_timechart(int argc, const char **argv, + const char *prefix __maybe_unused) { argc = parse_options(argc, argv, options, timechart_usage, PARSE_OPT_STOP_AT_NON_OPTION); diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 0513aaa659f..5550754c05f 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -95,7 +95,8 @@ static void perf_top__update_print_entries(struct perf_top *top) top->print_entries -= 9; } -static void perf_top__sig_winch(int sig __used, siginfo_t *info __used, void *arg) +static void perf_top__sig_winch(int sig __maybe_unused, + siginfo_t *info __maybe_unused, void *arg) { struct perf_top *top = arg; @@ -663,7 +664,7 @@ static const char *skip_symbols[] = { NULL }; -static int symbol_filter(struct map *map __used, struct symbol *sym) +static int symbol_filter(struct map *map __maybe_unused, struct symbol *sym) { const char *name = sym->name; int i; @@ -1163,7 +1164,7 @@ static const char * const top_usage[] = { NULL }; -int cmd_top(int argc, const char **argv, const char *prefix __used) +int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused) { struct perf_evsel *pos; int status; diff --git a/tools/perf/ui/browser.c b/tools/perf/ui/browser.c index 1818a531f1d..4aeb7d5df93 100644 --- a/tools/perf/ui/browser.c +++ b/tools/perf/ui/browser.c @@ -269,7 +269,7 @@ int ui_browser__show(struct ui_browser *browser, const char *title, return err ? 0 : -1; } -void ui_browser__hide(struct ui_browser *browser __used) +void ui_browser__hide(struct ui_browser *browser __maybe_unused) { pthread_mutex_lock(&ui__lock); ui_helpline__pop(); @@ -518,7 +518,7 @@ static struct ui_browser__colorset { static int ui_browser__color_config(const char *var, const char *value, - void *data __used) + void *data __maybe_unused) { char *fg = NULL, *bg; int i; @@ -602,7 +602,8 @@ void __ui_browser__vline(struct ui_browser *browser, unsigned int column, SLsmg_set_char_set(0); } -void ui_browser__write_graph(struct ui_browser *browser __used, int graph) +void ui_browser__write_graph(struct ui_browser *browser __maybe_unused, + int graph) { SLsmg_set_char_set(1); SLsmg_write_char(graph); diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index 67a2703e666..8f8cd2d73b3 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -54,7 +54,8 @@ static inline struct browser_disasm_line *disasm_line__browser(struct disasm_lin return (struct browser_disasm_line *)(dl + 1); } -static bool disasm_line__filter(struct ui_browser *browser __used, void *entry) +static bool disasm_line__filter(struct ui_browser *browser __maybe_unused, + void *entry) { if (annotate_browser__opts.hide_src_code) { struct disasm_line *dl = list_entry(entry, struct disasm_line, node); @@ -928,7 +929,8 @@ static int annotate_config__cmp(const void *name, const void *cfgp) return strcmp(name, cfg->name); } -static int annotate__config(const char *var, const char *value, void *data __used) +static int annotate__config(const char *var, const char *value, + void *data __maybe_unused) { struct annotate__config *cfg; const char *name; diff --git a/tools/perf/ui/gtk/browser.c b/tools/perf/ui/gtk/browser.c index 3c16ab50e0f..55acba6e0df 100644 --- a/tools/perf/ui/gtk/browser.c +++ b/tools/perf/ui/gtk/browser.c @@ -237,8 +237,9 @@ static GtkWidget *perf_gtk__setup_statusbar(void) int perf_evlist__gtk_browse_hists(struct perf_evlist *evlist, const char *help, - void (*timer) (void *arg)__used, - void *arg __used, int delay_secs __used) + void (*timer) (void *arg)__maybe_unused, + void *arg __maybe_unused, + int delay_secs __maybe_unused) { struct perf_evsel *pos; GtkWidget *vbox; diff --git a/tools/perf/ui/gtk/setup.c b/tools/perf/ui/gtk/setup.c index 26429437e19..3c4c6ef7828 100644 --- a/tools/perf/ui/gtk/setup.c +++ b/tools/perf/ui/gtk/setup.c @@ -12,7 +12,7 @@ int perf_gtk__init(void) return gtk_init_check(NULL, NULL) ? 0 : -1; } -void perf_gtk__exit(bool wait_for_ok __used) +void perf_gtk__exit(bool wait_for_ok __maybe_unused) { if (!perf_gtk__is_active_context(pgctx)) return; diff --git a/tools/perf/ui/gtk/util.c b/tools/perf/ui/gtk/util.c index b8efb966f94..8aada5b3c04 100644 --- a/tools/perf/ui/gtk/util.c +++ b/tools/perf/ui/gtk/util.c @@ -117,8 +117,8 @@ struct perf_error_ops perf_gtk_eops = { * For now, just add stubs for NO_NEWT=1 build. */ #ifdef NO_NEWT_SUPPORT -void ui_progress__update(u64 curr __used, u64 total __used, - const char *title __used) +void ui_progress__update(u64 curr __maybe_unused, u64 total __maybe_unused, + const char *title __maybe_unused) { } #endif diff --git a/tools/perf/ui/helpline.c b/tools/perf/ui/helpline.c index 78ba28ac7a2..a49bcf3c190 100644 --- a/tools/perf/ui/helpline.c +++ b/tools/perf/ui/helpline.c @@ -12,7 +12,7 @@ static void nop_helpline__pop(void) { } -static void nop_helpline__push(const char *msg __used) +static void nop_helpline__push(const char *msg __maybe_unused) { } diff --git a/tools/perf/ui/helpline.h b/tools/perf/ui/helpline.h index a2487f93aa4..2b667ee454c 100644 --- a/tools/perf/ui/helpline.h +++ b/tools/perf/ui/helpline.h @@ -24,8 +24,8 @@ void ui_helpline__puts(const char *msg); extern char ui_helpline__current[512]; #ifdef NO_NEWT_SUPPORT -static inline int ui_helpline__show_help(const char *format __used, - va_list ap __used) +static inline int ui_helpline__show_help(const char *format __maybe_unused, + va_list ap __maybe_unused) { return 0; } @@ -35,8 +35,8 @@ int ui_helpline__show_help(const char *format, va_list ap); #endif /* NO_NEWT_SUPPORT */ #ifdef NO_GTK2_SUPPORT -static inline int perf_gtk__show_helpline(const char *format __used, - va_list ap __used) +static inline int perf_gtk__show_helpline(const char *format __maybe_unused, + va_list ap __maybe_unused) { return 0; } diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c index 031b349a3f8..407e855cccb 100644 --- a/tools/perf/ui/hist.c +++ b/tools/perf/ui/hist.c @@ -13,7 +13,7 @@ static int hpp__header_overhead(struct perf_hpp *hpp) return scnprintf(hpp->buf, hpp->size, fmt); } -static int hpp__width_overhead(struct perf_hpp *hpp __used) +static int hpp__width_overhead(struct perf_hpp *hpp __maybe_unused) { return 8; } @@ -62,7 +62,7 @@ static int hpp__header_overhead_sys(struct perf_hpp *hpp) return scnprintf(hpp->buf, hpp->size, fmt, "sys"); } -static int hpp__width_overhead_sys(struct perf_hpp *hpp __used) +static int hpp__width_overhead_sys(struct perf_hpp *hpp __maybe_unused) { return 6; } @@ -88,7 +88,7 @@ static int hpp__header_overhead_us(struct perf_hpp *hpp) return scnprintf(hpp->buf, hpp->size, fmt, "user"); } -static int hpp__width_overhead_us(struct perf_hpp *hpp __used) +static int hpp__width_overhead_us(struct perf_hpp *hpp __maybe_unused) { return 6; } @@ -112,7 +112,7 @@ static int hpp__header_overhead_guest_sys(struct perf_hpp *hpp) return scnprintf(hpp->buf, hpp->size, "guest sys"); } -static int hpp__width_overhead_guest_sys(struct perf_hpp *hpp __used) +static int hpp__width_overhead_guest_sys(struct perf_hpp *hpp __maybe_unused) { return 9; } @@ -138,7 +138,7 @@ static int hpp__header_overhead_guest_us(struct perf_hpp *hpp) return scnprintf(hpp->buf, hpp->size, "guest usr"); } -static int hpp__width_overhead_guest_us(struct perf_hpp *hpp __used) +static int hpp__width_overhead_guest_us(struct perf_hpp *hpp __maybe_unused) { return 9; } @@ -166,7 +166,7 @@ static int hpp__header_samples(struct perf_hpp *hpp) return scnprintf(hpp->buf, hpp->size, fmt, "Samples"); } -static int hpp__width_samples(struct perf_hpp *hpp __used) +static int hpp__width_samples(struct perf_hpp *hpp __maybe_unused) { return 11; } @@ -185,7 +185,7 @@ static int hpp__header_period(struct perf_hpp *hpp) return scnprintf(hpp->buf, hpp->size, fmt, "Period"); } -static int hpp__width_period(struct perf_hpp *hpp __used) +static int hpp__width_period(struct perf_hpp *hpp __maybe_unused) { return 12; } @@ -204,7 +204,7 @@ static int hpp__header_delta(struct perf_hpp *hpp) return scnprintf(hpp->buf, hpp->size, fmt, "Delta"); } -static int hpp__width_delta(struct perf_hpp *hpp __used) +static int hpp__width_delta(struct perf_hpp *hpp __maybe_unused) { return 7; } @@ -238,12 +238,13 @@ static int hpp__header_displ(struct perf_hpp *hpp) return scnprintf(hpp->buf, hpp->size, "Displ."); } -static int hpp__width_displ(struct perf_hpp *hpp __used) +static int hpp__width_displ(struct perf_hpp *hpp __maybe_unused) { return 6; } -static int hpp__entry_displ(struct perf_hpp *hpp, struct hist_entry *he __used) +static int hpp__entry_displ(struct perf_hpp *hpp, + struct hist_entry *he __maybe_unused) { const char *fmt = symbol_conf.field_sep ? "%s" : "%6.6s"; char buf[32] = " "; diff --git a/tools/perf/ui/tui/setup.c b/tools/perf/ui/tui/setup.c index 4dc0887c04f..60debb81537 100644 --- a/tools/perf/ui/tui/setup.c +++ b/tools/perf/ui/tui/setup.c @@ -28,7 +28,7 @@ void ui__refresh_dimensions(bool force) } } -static void ui__sigwinch(int sig __used) +static void ui__sigwinch(int sig __maybe_unused) { ui__need_resize = 1; } @@ -88,7 +88,7 @@ int ui__getch(int delay_secs) return SLkp_getkey(); } -static void newt_suspend(void *d __used) +static void newt_suspend(void *d __maybe_unused) { newtSuspend(); raise(SIGTSTP); diff --git a/tools/perf/util/alias.c b/tools/perf/util/alias.c index b8144e80bb1..e6d134773d0 100644 --- a/tools/perf/util/alias.c +++ b/tools/perf/util/alias.c @@ -3,7 +3,8 @@ static const char *alias_key; static char *alias_val; -static int alias_lookup_cb(const char *k, const char *v, void *cb __used) +static int alias_lookup_cb(const char *k, const char *v, + void *cb __maybe_unused) { if (!prefixcmp(k, "alias.") && !strcmp(k+6, alias_key)) { if (!v) diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 04eafd3939d..f0a91037137 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -313,8 +313,8 @@ static struct ins_ops dec_ops = { .scnprintf = dec__scnprintf, }; -static int nop__scnprintf(struct ins *ins __used, char *bf, size_t size, - struct ins_operands *ops __used) +static int nop__scnprintf(struct ins *ins __maybe_unused, char *bf, size_t size, + struct ins_operands *ops __maybe_unused) { return scnprintf(bf, size, "%-6.6s", "nop"); } @@ -416,7 +416,7 @@ static struct ins *ins__find(const char *name) return bsearch(name, instructions, nmemb, sizeof(struct ins), ins__cmp); } -int symbol__annotate_init(struct map *map __used, struct symbol *sym) +int symbol__annotate_init(struct map *map __maybe_unused, struct symbol *sym) { struct annotation *notes = symbol__annotation(sym); pthread_mutex_init(¬es->lock, NULL); diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index 62a6e7a7365..9b5b21e7b03 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -126,7 +126,7 @@ int symbol__alloc_hist(struct symbol *sym); void symbol__annotate_zero_histograms(struct symbol *sym); int symbol__annotate(struct symbol *sym, struct map *map, size_t privsize); -int symbol__annotate_init(struct map *map __used, struct symbol *sym); +int symbol__annotate_init(struct map *map __maybe_unused, struct symbol *sym); int symbol__annotate_printf(struct symbol *sym, struct map *map, int evidx, bool full_paths, int min_pcnt, int max_lines, int context); @@ -139,11 +139,12 @@ int symbol__tty_annotate(struct symbol *sym, struct map *map, int evidx, int max_lines); #ifdef NO_NEWT_SUPPORT -static inline int symbol__tui_annotate(struct symbol *sym __used, - struct map *map __used, - int evidx __used, - void(*timer)(void *arg) __used, - void *arg __used, int delay_secs __used) +static inline int symbol__tui_annotate(struct symbol *sym __maybe_unused, + struct map *map __maybe_unused, + int evidx __maybe_unused, + void(*timer)(void *arg) __maybe_unused, + void *arg __maybe_unused, + int delay_secs __maybe_unused) { return 0; } diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c index fd9a5944b62..8e3a740ddbd 100644 --- a/tools/perf/util/build-id.c +++ b/tools/perf/util/build-id.c @@ -16,10 +16,10 @@ #include "session.h" #include "tool.h" -static int build_id__mark_dso_hit(struct perf_tool *tool __used, +static int build_id__mark_dso_hit(struct perf_tool *tool __maybe_unused, union perf_event *event, - struct perf_sample *sample __used, - struct perf_evsel *evsel __used, + struct perf_sample *sample __maybe_unused, + struct perf_evsel *evsel __maybe_unused, struct machine *machine) { struct addr_location al; @@ -41,9 +41,10 @@ static int build_id__mark_dso_hit(struct perf_tool *tool __used, return 0; } -static int perf_event__exit_del_thread(struct perf_tool *tool __used, +static int perf_event__exit_del_thread(struct perf_tool *tool __maybe_unused, union perf_event *event, - struct perf_sample *sample __used, + struct perf_sample *sample + __maybe_unused, struct machine *machine) { struct thread *thread = machine__findnew_thread(machine, event->fork.tid); diff --git a/tools/perf/util/cache.h b/tools/perf/util/cache.h index cff18c617d1..ab176942654 100644 --- a/tools/perf/util/cache.h +++ b/tools/perf/util/cache.h @@ -39,7 +39,7 @@ static inline void setup_browser(bool fallback_to_pager) if (fallback_to_pager) setup_pager(); } -static inline void exit_browser(bool wait_for_ok __used) {} +static inline void exit_browser(bool wait_for_ok __maybe_unused) {} #else void setup_browser(bool fallback_to_pager); void exit_browser(bool wait_for_ok); @@ -49,7 +49,7 @@ static inline int ui__init(void) { return -1; } -static inline void ui__exit(bool wait_for_ok __used) {} +static inline void ui__exit(bool wait_for_ok __maybe_unused) {} #else int ui__init(void); void ui__exit(bool wait_for_ok); @@ -60,7 +60,7 @@ static inline int perf_gtk__init(void) { return -1; } -static inline void perf_gtk__exit(bool wait_for_ok __used) {} +static inline void perf_gtk__exit(bool wait_for_ok __maybe_unused) {} #else int perf_gtk__init(void); void perf_gtk__exit(bool wait_for_ok); diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index 3a6bff47614..d3b3f5d8213 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -93,7 +93,7 @@ __sort_chain_flat(struct rb_root *rb_root, struct callchain_node *node, */ static void sort_chain_flat(struct rb_root *rb_root, struct callchain_root *root, - u64 min_hit, struct callchain_param *param __used) + u64 min_hit, struct callchain_param *param __maybe_unused) { __sort_chain_flat(rb_root, &root->node, min_hit); } @@ -115,7 +115,7 @@ static void __sort_chain_graph_abs(struct callchain_node *node, static void sort_chain_graph_abs(struct rb_root *rb_root, struct callchain_root *chain_root, - u64 min_hit, struct callchain_param *param __used) + u64 min_hit, struct callchain_param *param __maybe_unused) { __sort_chain_graph_abs(&chain_root->node, min_hit); rb_root->rb_node = chain_root->node.rb_root.rb_node; @@ -140,7 +140,7 @@ static void __sort_chain_graph_rel(struct callchain_node *node, static void sort_chain_graph_rel(struct rb_root *rb_root, struct callchain_root *chain_root, - u64 min_hit __used, struct callchain_param *param) + u64 min_hit __maybe_unused, struct callchain_param *param) { __sort_chain_graph_rel(&chain_root->node, param->min_percent / 100.0); rb_root->rb_node = chain_root->node.rb_root.rb_node; diff --git a/tools/perf/util/cgroup.c b/tools/perf/util/cgroup.c index dbe2f16b1a1..96bbda1ddb8 100644 --- a/tools/perf/util/cgroup.c +++ b/tools/perf/util/cgroup.c @@ -138,8 +138,8 @@ void close_cgroup(struct cgroup_sel *cgrp) } } -int parse_cgroups(const struct option *opt __used, const char *str, - int unset __used) +int parse_cgroups(const struct option *opt __maybe_unused, const char *str, + int unset __maybe_unused) { struct perf_evlist *evlist = *(struct perf_evlist **)opt->value; const char *p, *e, *eos = str + strlen(str); diff --git a/tools/perf/util/config.c b/tools/perf/util/config.c index 6faa3a18bfb..3e0fdd369cc 100644 --- a/tools/perf/util/config.c +++ b/tools/perf/util/config.c @@ -342,13 +342,15 @@ const char *perf_config_dirname(const char *name, const char *value) return value; } -static int perf_default_core_config(const char *var __used, const char *value __used) +static int perf_default_core_config(const char *var __maybe_unused, + const char *value __maybe_unused) { /* Add other config variables here. */ return 0; } -int perf_default_config(const char *var, const char *value, void *dummy __used) +int perf_default_config(const char *var, const char *value, + void *dummy __maybe_unused) { if (!prefixcmp(var, "core.")) return perf_default_core_config(var, value); diff --git a/tools/perf/util/debug.h b/tools/perf/util/debug.h index 05e660cbf7e..bb2e7d1007a 100644 --- a/tools/perf/util/debug.h +++ b/tools/perf/util/debug.h @@ -16,19 +16,20 @@ struct ui_progress; struct perf_error_ops; #if defined(NO_NEWT_SUPPORT) && defined(NO_GTK2_SUPPORT) -static inline void ui_progress__update(u64 curr __used, u64 total __used, - const char *title __used) {} +static inline void ui_progress__update(u64 curr __maybe_unused, + u64 total __maybe_unused, + const char *title __maybe_unused) {} #define ui__error(format, arg...) ui__warning(format, ##arg) static inline int -perf_error__register(struct perf_error_ops *eops __used) +perf_error__register(struct perf_error_ops *eops __maybe_unused) { return 0; } static inline int -perf_error__unregister(struct perf_error_ops *eops __used) +perf_error__unregister(struct perf_error_ops *eops __maybe_unused) { return 0; } diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index f7f480503af..8202f5ca048 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -514,9 +514,9 @@ size_t perf_event__fprintf_comm(union perf_event *event, FILE *fp) return fprintf(fp, ": %s:%d\n", event->comm.comm, event->comm.tid); } -int perf_event__process_comm(struct perf_tool *tool __used, +int perf_event__process_comm(struct perf_tool *tool __maybe_unused, union perf_event *event, - struct perf_sample *sample __used, + struct perf_sample *sample __maybe_unused, struct machine *machine) { struct thread *thread = machine__findnew_thread(machine, event->comm.tid); @@ -532,10 +532,10 @@ int perf_event__process_comm(struct perf_tool *tool __used, return 0; } -int perf_event__process_lost(struct perf_tool *tool __used, +int perf_event__process_lost(struct perf_tool *tool __maybe_unused, union perf_event *event, - struct perf_sample *sample __used, - struct machine *machine __used) + struct perf_sample *sample __maybe_unused, + struct machine *machine __maybe_unused) { dump_printf(": id:%" PRIu64 ": lost:%" PRIu64 "\n", event->lost.id, event->lost.lost); @@ -555,7 +555,8 @@ static void perf_event__set_kernel_mmap_len(union perf_event *event, maps[MAP__FUNCTION]->end = ~0ULL; } -static int perf_event__process_kernel_mmap(struct perf_tool *tool __used, +static int perf_event__process_kernel_mmap(struct perf_tool *tool + __maybe_unused, union perf_event *event, struct machine *machine) { @@ -657,7 +658,7 @@ size_t perf_event__fprintf_mmap(union perf_event *event, FILE *fp) int perf_event__process_mmap(struct perf_tool *tool, union perf_event *event, - struct perf_sample *sample __used, + struct perf_sample *sample __maybe_unused, struct machine *machine) { struct thread *thread; @@ -701,9 +702,9 @@ size_t perf_event__fprintf_task(union perf_event *event, FILE *fp) event->fork.ppid, event->fork.ptid); } -int perf_event__process_task(struct perf_tool *tool __used, +int perf_event__process_task(struct perf_tool *tool __maybe_unused, union perf_event *event, - struct perf_sample *sample __used, + struct perf_sample *sample __maybe_unused, struct machine *machine) { struct thread *thread = machine__findnew_thread(machine, event->fork.tid); diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 87996cab21d..acbf6336199 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -475,7 +475,7 @@ static bool perf_session__read_build_ids(struct perf_session *session, bool with return ret; } -static int write_tracing_data(int fd, struct perf_header *h __used, +static int write_tracing_data(int fd, struct perf_header *h __maybe_unused, struct perf_evlist *evlist) { return read_tracing_data(fd, &evlist->entries); @@ -483,7 +483,7 @@ static int write_tracing_data(int fd, struct perf_header *h __used, static int write_build_id(int fd, struct perf_header *h, - struct perf_evlist *evlist __used) + struct perf_evlist *evlist __maybe_unused) { struct perf_session *session; int err; @@ -504,8 +504,8 @@ static int write_build_id(int fd, struct perf_header *h, return 0; } -static int write_hostname(int fd, struct perf_header *h __used, - struct perf_evlist *evlist __used) +static int write_hostname(int fd, struct perf_header *h __maybe_unused, + struct perf_evlist *evlist __maybe_unused) { struct utsname uts; int ret; @@ -517,8 +517,8 @@ static int write_hostname(int fd, struct perf_header *h __used, return do_write_string(fd, uts.nodename); } -static int write_osrelease(int fd, struct perf_header *h __used, - struct perf_evlist *evlist __used) +static int write_osrelease(int fd, struct perf_header *h __maybe_unused, + struct perf_evlist *evlist __maybe_unused) { struct utsname uts; int ret; @@ -530,8 +530,8 @@ static int write_osrelease(int fd, struct perf_header *h __used, return do_write_string(fd, uts.release); } -static int write_arch(int fd, struct perf_header *h __used, - struct perf_evlist *evlist __used) +static int write_arch(int fd, struct perf_header *h __maybe_unused, + struct perf_evlist *evlist __maybe_unused) { struct utsname uts; int ret; @@ -543,14 +543,14 @@ static int write_arch(int fd, struct perf_header *h __used, return do_write_string(fd, uts.machine); } -static int write_version(int fd, struct perf_header *h __used, - struct perf_evlist *evlist __used) +static int write_version(int fd, struct perf_header *h __maybe_unused, + struct perf_evlist *evlist __maybe_unused) { return do_write_string(fd, perf_version_string); } -static int write_cpudesc(int fd, struct perf_header *h __used, - struct perf_evlist *evlist __used) +static int write_cpudesc(int fd, struct perf_header *h __maybe_unused, + struct perf_evlist *evlist __maybe_unused) { #ifndef CPUINFO_PROC #define CPUINFO_PROC NULL @@ -608,8 +608,8 @@ done: return ret; } -static int write_nrcpus(int fd, struct perf_header *h __used, - struct perf_evlist *evlist __used) +static int write_nrcpus(int fd, struct perf_header *h __maybe_unused, + struct perf_evlist *evlist __maybe_unused) { long nr; u32 nrc, nra; @@ -634,7 +634,7 @@ static int write_nrcpus(int fd, struct perf_header *h __used, return do_write(fd, &nra, sizeof(nra)); } -static int write_event_desc(int fd, struct perf_header *h __used, +static int write_event_desc(int fd, struct perf_header *h __maybe_unused, struct perf_evlist *evlist) { struct perf_evsel *evsel; @@ -691,8 +691,8 @@ static int write_event_desc(int fd, struct perf_header *h __used, return 0; } -static int write_cmdline(int fd, struct perf_header *h __used, - struct perf_evlist *evlist __used) +static int write_cmdline(int fd, struct perf_header *h __maybe_unused, + struct perf_evlist *evlist __maybe_unused) { char buf[MAXPATHLEN]; char proc[32]; @@ -860,8 +860,8 @@ static struct cpu_topo *build_cpu_topology(void) return tp; } -static int write_cpu_topology(int fd, struct perf_header *h __used, - struct perf_evlist *evlist __used) +static int write_cpu_topology(int fd, struct perf_header *h __maybe_unused, + struct perf_evlist *evlist __maybe_unused) { struct cpu_topo *tp; u32 i; @@ -896,8 +896,8 @@ done: -static int write_total_mem(int fd, struct perf_header *h __used, - struct perf_evlist *evlist __used) +static int write_total_mem(int fd, struct perf_header *h __maybe_unused, + struct perf_evlist *evlist __maybe_unused) { char *buf = NULL; FILE *fp; @@ -982,8 +982,8 @@ done: return ret; } -static int write_numa_topology(int fd, struct perf_header *h __used, - struct perf_evlist *evlist __used) +static int write_numa_topology(int fd, struct perf_header *h __maybe_unused, + struct perf_evlist *evlist __maybe_unused) { char *buf = NULL; size_t len = 0; @@ -1043,8 +1043,8 @@ done: * }; */ -static int write_pmu_mappings(int fd, struct perf_header *h __used, - struct perf_evlist *evlist __used) +static int write_pmu_mappings(int fd, struct perf_header *h __maybe_unused, + struct perf_evlist *evlist __maybe_unused) { struct perf_pmu *pmu = NULL; off_t offset = lseek(fd, 0, SEEK_CUR); @@ -1074,13 +1074,14 @@ static int write_pmu_mappings(int fd, struct perf_header *h __used, * default get_cpuid(): nothing gets recorded * actual implementation must be in arch/$(ARCH)/util/header.c */ -int __attribute__((weak)) get_cpuid(char *buffer __used, size_t sz __used) +int __attribute__ ((weak)) get_cpuid(char *buffer __maybe_unused, + size_t sz __maybe_unused) { return -1; } -static int write_cpuid(int fd, struct perf_header *h __used, - struct perf_evlist *evlist __used) +static int write_cpuid(int fd, struct perf_header *h __maybe_unused, + struct perf_evlist *evlist __maybe_unused) { char buffer[64]; int ret; @@ -1094,8 +1095,9 @@ write_it: return do_write_string(fd, buffer); } -static int write_branch_stack(int fd __used, struct perf_header *h __used, - struct perf_evlist *evlist __used) +static int write_branch_stack(int fd __maybe_unused, + struct perf_header *h __maybe_unused, + struct perf_evlist *evlist __maybe_unused) { return 0; } @@ -1372,7 +1374,8 @@ static void print_event_desc(struct perf_header *ph, int fd, FILE *fp) free_event_desc(events); } -static void print_total_mem(struct perf_header *h __used, int fd, FILE *fp) +static void print_total_mem(struct perf_header *h __maybe_unused, int fd, + FILE *fp) { uint64_t mem; ssize_t ret; @@ -1390,7 +1393,8 @@ error: fprintf(fp, "# total memory : unknown\n"); } -static void print_numa_topology(struct perf_header *h __used, int fd, FILE *fp) +static void print_numa_topology(struct perf_header *h __maybe_unused, int fd, + FILE *fp) { ssize_t ret; u32 nr, c, i; @@ -1450,7 +1454,8 @@ static void print_cpuid(struct perf_header *ph, int fd, FILE *fp) free(str); } -static void print_branch_stack(struct perf_header *ph __used, int fd __used, +static void print_branch_stack(struct perf_header *ph __maybe_unused, + int fd __maybe_unused, FILE *fp) { fprintf(fp, "# contains samples with branch stack\n"); @@ -1649,9 +1654,10 @@ out: return err; } -static int process_tracing_data(struct perf_file_section *section __unused, - struct perf_header *ph __unused, - int feat __unused, int fd, void *data) +static int process_tracing_data(struct perf_file_section *section + __maybe_unused, + struct perf_header *ph __maybe_unused, + int feat __maybe_unused, int fd, void *data) { trace_report(fd, data, false); return 0; @@ -1659,7 +1665,8 @@ static int process_tracing_data(struct perf_file_section *section __unused, static int process_build_id(struct perf_file_section *section, struct perf_header *ph, - int feat __unused, int fd, void *data __used) + int feat __maybe_unused, int fd, + void *data __maybe_unused) { if (perf_header__read_build_ids(ph, fd, section->offset, section->size)) pr_debug("Failed to read buildids, continuing...\n"); @@ -1698,9 +1705,9 @@ perf_evlist__set_event_name(struct perf_evlist *evlist, struct perf_evsel *event } static int -process_event_desc(struct perf_file_section *section __unused, - struct perf_header *header, int feat __unused, int fd, - void *data __used) +process_event_desc(struct perf_file_section *section __maybe_unused, + struct perf_header *header, int feat __maybe_unused, int fd, + void *data __maybe_unused) { struct perf_session *session = container_of(header, struct perf_session, header); struct perf_evsel *evsel, *events = read_event_desc(header, fd); @@ -2596,7 +2603,7 @@ int perf_event__synthesize_event_types(struct perf_tool *tool, return err; } -int perf_event__process_event_type(struct perf_tool *tool __unused, +int perf_event__process_event_type(struct perf_tool *tool __maybe_unused, union perf_event *event) { if (perf_header__push_event(event->event_type.event_type.event_id, @@ -2613,7 +2620,7 @@ int perf_event__synthesize_tracing_data(struct perf_tool *tool, int fd, union perf_event ev; struct tracing_data *tdata; ssize_t size = 0, aligned_size = 0, padding; - int err __used = 0; + int err __maybe_unused = 0; /* * We are going to store the size of the data followed @@ -2712,7 +2719,7 @@ int perf_event__synthesize_build_id(struct perf_tool *tool, return err; } -int perf_event__process_build_id(struct perf_tool *tool __used, +int perf_event__process_build_id(struct perf_tool *tool __maybe_unused, union perf_event *event, struct perf_session *session) { diff --git a/tools/perf/util/help.c b/tools/perf/util/help.c index 4fa764d8f7d..8b1f6e891b8 100644 --- a/tools/perf/util/help.c +++ b/tools/perf/util/help.c @@ -332,7 +332,8 @@ const char *help_unknown_cmd(const char *cmd) exit(1); } -int cmd_version(int argc __used, const char **argv __used, const char *prefix __used) +int cmd_version(int argc __maybe_unused, const char **argv __maybe_unused, + const char *prefix __maybe_unused) { printf("perf version %s\n", perf_version_string); return 0; diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 0ba65ad07cd..6ec5398de89 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -394,7 +394,7 @@ void hist_entry__free(struct hist_entry *he) * collapse the histogram */ -static bool hists__collapse_insert_entry(struct hists *hists __used, +static bool hists__collapse_insert_entry(struct hists *hists __maybe_unused, struct rb_root *root, struct hist_entry *he) { diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 4146f51124f..f011ad4756e 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -156,20 +156,22 @@ struct perf_evlist; #ifdef NO_NEWT_SUPPORT static inline -int perf_evlist__tui_browse_hists(struct perf_evlist *evlist __used, - const char *help __used, - void(*timer)(void *arg) __used, - void *arg __used, - int refresh __used) +int perf_evlist__tui_browse_hists(struct perf_evlist *evlist __maybe_unused, + const char *help __maybe_unused, + void(*timer)(void *arg) __maybe_unused, + void *arg __maybe_unused, + int refresh __maybe_unused) { return 0; } -static inline int hist_entry__tui_annotate(struct hist_entry *self __used, - int evidx __used, - void(*timer)(void *arg) __used, - void *arg __used, - int delay_secs __used) +static inline int hist_entry__tui_annotate(struct hist_entry *self + __maybe_unused, + int evidx __maybe_unused, + void(*timer)(void *arg) + __maybe_unused, + void *arg __maybe_unused, + int delay_secs __maybe_unused) { return 0; } @@ -187,11 +189,11 @@ int perf_evlist__tui_browse_hists(struct perf_evlist *evlist, const char *help, #ifdef NO_GTK2_SUPPORT static inline -int perf_evlist__gtk_browse_hists(struct perf_evlist *evlist __used, - const char *help __used, - void(*timer)(void *arg) __used, - void *arg __used, - int refresh __used) +int perf_evlist__gtk_browse_hists(struct perf_evlist *evlist __maybe_unused, + const char *help __maybe_unused, + void(*timer)(void *arg) __maybe_unused, + void *arg __maybe_unused, + int refresh __maybe_unused) { return 0; } diff --git a/tools/perf/util/include/linux/compiler.h b/tools/perf/util/include/linux/compiler.h index ce2367b7b3f..96b919dae11 100644 --- a/tools/perf/util/include/linux/compiler.h +++ b/tools/perf/util/include/linux/compiler.h @@ -9,7 +9,9 @@ #define __attribute_const__ #endif -#define __used __attribute__((__unused__)) +#ifndef __maybe_unused +#define __maybe_unused __attribute__((unused)) +#endif #define __packed __attribute__((__packed__)) #ifndef __force diff --git a/tools/perf/util/intlist.c b/tools/perf/util/intlist.c index 77c504ff008..9d0740024ba 100644 --- a/tools/perf/util/intlist.c +++ b/tools/perf/util/intlist.c @@ -11,7 +11,7 @@ #include "intlist.h" -static struct rb_node *intlist__node_new(struct rblist *rblist __used, +static struct rb_node *intlist__node_new(struct rblist *rblist __maybe_unused, const void *entry) { int i = (int)((long)entry); @@ -31,7 +31,7 @@ static void int_node__delete(struct int_node *ilist) free(ilist); } -static void intlist__node_delete(struct rblist *rblist __used, +static void intlist__node_delete(struct rblist *rblist __maybe_unused, struct rb_node *rb_node) { struct int_node *node = container_of(rb_node, struct int_node, rb_node); diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h index 25ab4cdbc44..d2250fc97e2 100644 --- a/tools/perf/util/map.h +++ b/tools/perf/util/map.h @@ -96,7 +96,7 @@ static inline u64 map__unmap_ip(struct map *map, u64 ip) return ip + map->start - map->pgoff; } -static inline u64 identity__map_ip(struct map *map __used, u64 ip) +static inline u64 identity__map_ip(struct map *map __maybe_unused, u64 ip) { return ip; } diff --git a/tools/perf/util/parse-events-test.c b/tools/perf/util/parse-events-test.c index bc8b65130ae..d7244e55367 100644 --- a/tools/perf/util/parse-events-test.c +++ b/tools/perf/util/parse-events-test.c @@ -569,7 +569,7 @@ static int test__group2(struct perf_evlist *evlist) return 0; } -static int test__group3(struct perf_evlist *evlist __used) +static int test__group3(struct perf_evlist *evlist __maybe_unused) { struct perf_evsel *evsel, *leader; @@ -648,7 +648,7 @@ static int test__group3(struct perf_evlist *evlist __used) return 0; } -static int test__group4(struct perf_evlist *evlist __used) +static int test__group4(struct perf_evlist *evlist __maybe_unused) { struct perf_evsel *evsel, *leader; @@ -684,7 +684,7 @@ static int test__group4(struct perf_evlist *evlist __used) return 0; } -static int test__group5(struct perf_evlist *evlist __used) +static int test__group5(struct perf_evlist *evlist __maybe_unused) { struct perf_evsel *evsel, *leader; diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index a031ee1f54f..44afcf40f79 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -807,7 +807,8 @@ int parse_events_terms(struct list_head *terms, const char *str) return ret; } -int parse_events(struct perf_evlist *evlist, const char *str, int unset __used) +int parse_events(struct perf_evlist *evlist, const char *str, + int unset __maybe_unused) { struct parse_events_data__events data = { .list = LIST_HEAD_INIT(data.list), @@ -833,14 +834,14 @@ int parse_events(struct perf_evlist *evlist, const char *str, int unset __used) } int parse_events_option(const struct option *opt, const char *str, - int unset __used) + int unset __maybe_unused) { struct perf_evlist *evlist = *(struct perf_evlist **)opt->value; return parse_events(evlist, str, unset); } int parse_filter(const struct option *opt, const char *str, - int unset __used) + int unset __maybe_unused) { struct perf_evlist *evlist = *(struct perf_evlist **)opt->value; struct perf_evsel *last = NULL; diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l index f5e28dc6827..c87efc12579 100644 --- a/tools/perf/util/parse-events.l +++ b/tools/perf/util/parse-events.l @@ -207,7 +207,7 @@ r{num_raw_hex} { return raw(yyscanner); } %% -int parse_events_wrap(void *scanner __used) +int parse_events_wrap(void *scanner __maybe_unused) { return 1; } diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y index 42d9a17b83b..cd88209e3c5 100644 --- a/tools/perf/util/parse-events.y +++ b/tools/perf/util/parse-events.y @@ -391,7 +391,7 @@ sep_slash_dc: '/' | ':' | %% -void parse_events_error(void *data __used, void *scanner __used, - char const *msg __used) +void parse_events_error(void *data __maybe_unused, void *scanner __maybe_unused, + char const *msg __maybe_unused) { } diff --git a/tools/perf/util/parse-options.c b/tools/perf/util/parse-options.c index 594f8fad5ec..443fc116512 100644 --- a/tools/perf/util/parse-options.c +++ b/tools/perf/util/parse-options.c @@ -557,7 +557,8 @@ int parse_options_usage(const char * const *usagestr, } -int parse_opt_verbosity_cb(const struct option *opt, const char *arg __used, +int parse_opt_verbosity_cb(const struct option *opt, + const char *arg __maybe_unused, int unset) { int *target = opt->value; diff --git a/tools/perf/util/perf_regs.h b/tools/perf/util/perf_regs.h index 9bd6c4e069c..316dbe7f86e 100644 --- a/tools/perf/util/perf_regs.h +++ b/tools/perf/util/perf_regs.h @@ -6,7 +6,7 @@ #else #define PERF_REGS_MASK 0 -static inline const char *perf_reg_name(int id __used) +static inline const char *perf_reg_name(int id __maybe_unused) { return NULL; } diff --git a/tools/perf/util/pmu.y b/tools/perf/util/pmu.y index 20ea77e9316..ec898047ebb 100644 --- a/tools/perf/util/pmu.y +++ b/tools/perf/util/pmu.y @@ -86,8 +86,8 @@ PP_VALUE %% -void perf_pmu_error(struct list_head *list __used, - char *name __used, - char const *msg __used) +void perf_pmu_error(struct list_head *list __maybe_unused, + char *name __maybe_unused, + char const *msg __maybe_unused) { } diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index e8c72de0f70..4ce04c2281d 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -41,7 +41,7 @@ #include "symbol.h" #include "thread.h" #include "debugfs.h" -#include "trace-event.h" /* For __unused */ +#include "trace-event.h" /* For __maybe_unused */ #include "probe-event.h" #include "probe-finder.h" #include "session.h" @@ -647,8 +647,8 @@ static int kprobe_convert_to_perf_probe(struct probe_trace_point *tp, } static int try_to_find_probe_trace_events(struct perf_probe_event *pev, - struct probe_trace_event **tevs __unused, - int max_tevs __unused, const char *target) + struct probe_trace_event **tevs __maybe_unused, + int max_tevs __maybe_unused, const char *target) { if (perf_probe_event_need_dwarf(pev)) { pr_warning("Debuginfo-analysis is not supported.\n"); @@ -661,17 +661,18 @@ static int try_to_find_probe_trace_events(struct perf_probe_event *pev, return 0; } -int show_line_range(struct line_range *lr __unused, const char *module __unused) +int show_line_range(struct line_range *lr __maybe_unused, + const char *module __maybe_unused) { pr_warning("Debuginfo-analysis is not supported.\n"); return -ENOSYS; } -int show_available_vars(struct perf_probe_event *pevs __unused, - int npevs __unused, int max_vls __unused, - const char *module __unused, - struct strfilter *filter __unused, - bool externs __unused) +int show_available_vars(struct perf_probe_event *pevs __maybe_unused, + int npevs __maybe_unused, int max_vls __maybe_unused, + const char *module __maybe_unused, + struct strfilter *filter __maybe_unused, + bool externs __maybe_unused) { pr_warning("Debuginfo-analysis is not supported.\n"); return -ENOSYS; @@ -2183,7 +2184,7 @@ static struct strfilter *available_func_filter; * If a symbol corresponds to a function with global binding and * matches filter return 0. For all others return 1. */ -static int filter_available_functions(struct map *map __unused, +static int filter_available_functions(struct map *map __maybe_unused, struct symbol *sym) { if (sym->binding == STB_GLOBAL && diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c index d448984ed78..526ba56e720 100644 --- a/tools/perf/util/probe-finder.c +++ b/tools/perf/util/probe-finder.c @@ -207,7 +207,7 @@ static int debuginfo__init_online_kernel_dwarf(struct debuginfo *self, #else /* With older elfutils, this just support kernel module... */ static int debuginfo__init_online_kernel_dwarf(struct debuginfo *self, - Dwarf_Addr addr __used) + Dwarf_Addr addr __maybe_unused) { const char *path = kernel_get_module_path("kernel"); @@ -1419,7 +1419,7 @@ static int line_range_add_line(const char *src, unsigned int lineno, } static int line_range_walk_cb(const char *fname, int lineno, - Dwarf_Addr addr __used, + Dwarf_Addr addr __maybe_unused, void *data) { struct line_finder *lf = data; diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c index 27187f0b71f..ca85444bcfb 100644 --- a/tools/perf/util/python.c +++ b/tools/perf/util/python.c @@ -672,7 +672,7 @@ struct pyrf_evlist { }; static int pyrf_evlist__init(struct pyrf_evlist *pevlist, - PyObject *args, PyObject *kwargs __used) + PyObject *args, PyObject *kwargs __maybe_unused) { PyObject *pcpus = NULL, *pthreads = NULL; struct cpu_map *cpus; @@ -733,7 +733,8 @@ static PyObject *pyrf_evlist__poll(struct pyrf_evlist *pevlist, } static PyObject *pyrf_evlist__get_pollfd(struct pyrf_evlist *pevlist, - PyObject *args __used, PyObject *kwargs __used) + PyObject *args __maybe_unused, + PyObject *kwargs __maybe_unused) { struct perf_evlist *evlist = &pevlist->evlist; PyObject *list = PyList_New(0); @@ -765,7 +766,8 @@ free_list: static PyObject *pyrf_evlist__add(struct pyrf_evlist *pevlist, - PyObject *args, PyObject *kwargs __used) + PyObject *args, + PyObject *kwargs __maybe_unused) { struct perf_evlist *evlist = &pevlist->evlist; PyObject *pevsel; diff --git a/tools/perf/util/scripting-engines/trace-event-perl.c b/tools/perf/util/scripting-engines/trace-event-perl.c index 94e673643bc..ffde3e4e34a 100644 --- a/tools/perf/util/scripting-engines/trace-event-perl.c +++ b/tools/perf/util/scripting-engines/trace-event-perl.c @@ -257,10 +257,10 @@ static inline struct event_format *find_cache_event(struct perf_evsel *evsel) return event; } -static void perl_process_tracepoint(union perf_event *perf_event __unused, +static void perl_process_tracepoint(union perf_event *perf_event __maybe_unused, struct perf_sample *sample, struct perf_evsel *evsel, - struct machine *machine __unused, + struct machine *machine __maybe_unused, struct addr_location *al) { struct format_field *field; @@ -349,8 +349,8 @@ static void perl_process_tracepoint(union perf_event *perf_event __unused, static void perl_process_event_generic(union perf_event *event, struct perf_sample *sample, struct perf_evsel *evsel, - struct machine *machine __unused, - struct addr_location *al __unused) + struct machine *machine __maybe_unused, + struct addr_location *al __maybe_unused) { dSP; diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index afba0972918..730c6630cba 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -221,10 +221,11 @@ static inline struct event_format *find_cache_event(struct perf_evsel *evsel) return event; } -static void python_process_tracepoint(union perf_event *perf_event __unused, +static void python_process_tracepoint(union perf_event *perf_event + __maybe_unused, struct perf_sample *sample, struct perf_evsel *evsel, - struct machine *machine __unused, + struct machine *machine __maybe_unused, struct addr_location *al) { PyObject *handler, *retval, *context, *t, *obj, *dict = NULL; @@ -339,10 +340,11 @@ static void python_process_tracepoint(union perf_event *perf_event __unused, Py_DECREF(t); } -static void python_process_general_event(union perf_event *perf_event __unused, +static void python_process_general_event(union perf_event *perf_event + __maybe_unused, struct perf_sample *sample, struct perf_evsel *evsel, - struct machine *machine __unused, + struct machine *machine __maybe_unused, struct addr_location *al) { PyObject *handler, *retval, *t, *dict; diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index e0fd6c71cc5..3049b0ae700 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -401,49 +401,53 @@ int machine__resolve_callchain(struct machine *machine, } -static int process_event_synth_tracing_data_stub(union perf_event *event __used, - struct perf_session *session __used) +static int process_event_synth_tracing_data_stub(union perf_event *event + __maybe_unused, + struct perf_session *session + __maybe_unused) { dump_printf(": unhandled!\n"); return 0; } -static int process_event_synth_attr_stub(union perf_event *event __used, - struct perf_evlist **pevlist __used) +static int process_event_synth_attr_stub(union perf_event *event __maybe_unused, + struct perf_evlist **pevlist + __maybe_unused) { dump_printf(": unhandled!\n"); return 0; } -static int process_event_sample_stub(struct perf_tool *tool __used, - union perf_event *event __used, - struct perf_sample *sample __used, - struct perf_evsel *evsel __used, - struct machine *machine __used) +static int process_event_sample_stub(struct perf_tool *tool __maybe_unused, + union perf_event *event __maybe_unused, + struct perf_sample *sample __maybe_unused, + struct perf_evsel *evsel __maybe_unused, + struct machine *machine __maybe_unused) { dump_printf(": unhandled!\n"); return 0; } -static int process_event_stub(struct perf_tool *tool __used, - union perf_event *event __used, - struct perf_sample *sample __used, - struct machine *machine __used) +static int process_event_stub(struct perf_tool *tool __maybe_unused, + union perf_event *event __maybe_unused, + struct perf_sample *sample __maybe_unused, + struct machine *machine __maybe_unused) { dump_printf(": unhandled!\n"); return 0; } -static int process_finished_round_stub(struct perf_tool *tool __used, - union perf_event *event __used, - struct perf_session *perf_session __used) +static int process_finished_round_stub(struct perf_tool *tool __maybe_unused, + union perf_event *event __maybe_unused, + struct perf_session *perf_session + __maybe_unused) { dump_printf(": unhandled!\n"); return 0; } -static int process_event_type_stub(struct perf_tool *tool __used, - union perf_event *event __used) +static int process_event_type_stub(struct perf_tool *tool __maybe_unused, + union perf_event *event __maybe_unused) { dump_printf(": unhandled!\n"); return 0; @@ -520,7 +524,7 @@ static void swap_sample_id_all(union perf_event *event, void *data) } static void perf_event__all64_swap(union perf_event *event, - bool sample_id_all __used) + bool sample_id_all __maybe_unused) { struct perf_event_header *hdr = &event->header; mem_bswap_64(hdr + 1, event->header.size - sizeof(*hdr)); @@ -631,7 +635,7 @@ void perf_event__attr_swap(struct perf_event_attr *attr) } static void perf_event__hdr_attr_swap(union perf_event *event, - bool sample_id_all __used) + bool sample_id_all __maybe_unused) { size_t size; @@ -643,14 +647,14 @@ static void perf_event__hdr_attr_swap(union perf_event *event, } static void perf_event__event_type_swap(union perf_event *event, - bool sample_id_all __used) + bool sample_id_all __maybe_unused) { event->event_type.event_type.event_id = bswap_64(event->event_type.event_type.event_id); } static void perf_event__tracing_data_swap(union perf_event *event, - bool sample_id_all __used) + bool sample_id_all __maybe_unused) { event->tracing_data.size = bswap_32(event->tracing_data.size); } @@ -791,7 +795,7 @@ static int flush_sample_queue(struct perf_session *s, * etc... */ static int process_finished_round(struct perf_tool *tool, - union perf_event *event __used, + union perf_event *event __maybe_unused, struct perf_session *session) { int ret = flush_sample_queue(session, tool); diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 7a2fbd8855b..0981bc7a291 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -170,7 +170,7 @@ static int hist_entry__dso_snprintf(struct hist_entry *self, char *bf, static int _hist_entry__sym_snprintf(struct map *map, struct symbol *sym, u64 ip, char level, char *bf, size_t size, - unsigned int width __used) + unsigned int width __maybe_unused) { size_t ret = 0; @@ -205,7 +205,8 @@ struct sort_entry sort_dso = { }; static int hist_entry__sym_snprintf(struct hist_entry *self, char *bf, - size_t size, unsigned int width __used) + size_t size, + unsigned int width __maybe_unused) { return _hist_entry__sym_snprintf(self->ms.map, self->ms.sym, self->ip, self->level, bf, size, width); @@ -248,7 +249,8 @@ sort__srcline_cmp(struct hist_entry *left, struct hist_entry *right) } static int hist_entry__srcline_snprintf(struct hist_entry *self, char *bf, - size_t size, unsigned int width __used) + size_t size, + unsigned int width __maybe_unused) { FILE *fp; char cmd[PATH_MAX + 2], *path = self->srcline, *nl; @@ -397,7 +399,8 @@ sort__sym_to_cmp(struct hist_entry *left, struct hist_entry *right) } static int hist_entry__sym_from_snprintf(struct hist_entry *self, char *bf, - size_t size, unsigned int width __used) + size_t size, + unsigned int width __maybe_unused) { struct addr_map_symbol *from = &self->branch_info->from; return _hist_entry__sym_snprintf(from->map, from->sym, from->addr, @@ -406,7 +409,8 @@ static int hist_entry__sym_from_snprintf(struct hist_entry *self, char *bf, } static int hist_entry__sym_to_snprintf(struct hist_entry *self, char *bf, - size_t size, unsigned int width __used) + size_t size, + unsigned int width __maybe_unused) { struct addr_map_symbol *to = &self->branch_info->to; return _hist_entry__sym_snprintf(to->map, to->sym, to->addr, diff --git a/tools/perf/util/symbol-minimal.c b/tools/perf/util/symbol-minimal.c index 6738ea128c9..259f8f2ea9c 100644 --- a/tools/perf/util/symbol-minimal.c +++ b/tools/perf/util/symbol-minimal.c @@ -69,8 +69,9 @@ static int read_build_id(void *note_data, size_t note_len, void *bf, return -1; } -int filename__read_debuglink(const char *filename __used, - char *debuglink __used, size_t size __used) +int filename__read_debuglink(const char *filename __maybe_unused, + char *debuglink __maybe_unused, + size_t size __maybe_unused) { return -1; } @@ -241,7 +242,8 @@ out: return ret; } -int symsrc__init(struct symsrc *ss, struct dso *dso __used, const char *name, +int symsrc__init(struct symsrc *ss, struct dso *dso __maybe_unused, + const char *name, enum dso_binary_type type) { int fd = open(name, O_RDONLY); @@ -260,13 +262,13 @@ out_close: return -1; } -bool symsrc__possibly_runtime(struct symsrc *ss __used) +bool symsrc__possibly_runtime(struct symsrc *ss __maybe_unused) { /* Assume all sym sources could be a runtime image. */ return true; } -bool symsrc__has_symtab(struct symsrc *ss __used) +bool symsrc__has_symtab(struct symsrc *ss __maybe_unused) { return false; } @@ -277,17 +279,19 @@ void symsrc__destroy(struct symsrc *ss) close(ss->fd); } -int dso__synthesize_plt_symbols(struct dso *dso __used, - struct symsrc *ss __used, - struct map *map __used, - symbol_filter_t filter __used) +int dso__synthesize_plt_symbols(struct dso *dso __maybe_unused, + struct symsrc *ss __maybe_unused, + struct map *map __maybe_unused, + symbol_filter_t filter __maybe_unused) { return 0; } -int dso__load_sym(struct dso *dso, struct map *map __used, struct symsrc *ss, - struct symsrc *runtime_ss __used, - symbol_filter_t filter __used, int kmodule __used) +int dso__load_sym(struct dso *dso, struct map *map __maybe_unused, + struct symsrc *ss, + struct symsrc *runtime_ss __maybe_unused, + symbol_filter_t filter __maybe_unused, + int kmodule __maybe_unused) { unsigned char *build_id[BUILD_ID_SIZE]; diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index bbb24e95165..e2e8c697cff 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -1755,7 +1755,7 @@ struct process_args { }; static int symbol__in_kernel(void *arg, const char *name, - char type __used, u64 start) + char type __maybe_unused, u64 start) { struct process_args *args = arg; diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index dde8a26f7be..4ff45e30c72 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -21,14 +21,15 @@ #ifdef HAVE_CPLUS_DEMANGLE extern char *cplus_demangle(const char *, int); -static inline char *bfd_demangle(void __used *v, const char *c, int i) +static inline char *bfd_demangle(void __maybe_unused *v, const char *c, int i) { return cplus_demangle(c, i); } #else #ifdef NO_DEMANGLE -static inline char *bfd_demangle(void __used *v, const char __used *c, - int __used i) +static inline char *bfd_demangle(void __maybe_unused *v, + const char __maybe_unused *c, + int __maybe_unused i) { return NULL; } diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c index a5a554efeb5..aa4c860a21d 100644 --- a/tools/perf/util/trace-event-parse.c +++ b/tools/perf/util/trace-event-parse.c @@ -221,7 +221,7 @@ void print_event(struct pevent *pevent, int cpu, void *data, int size, } void parse_proc_kallsyms(struct pevent *pevent, - char *file, unsigned int size __unused) + char *file, unsigned int size __maybe_unused) { unsigned long long addr; char *func; @@ -253,7 +253,7 @@ void parse_proc_kallsyms(struct pevent *pevent, } void parse_ftrace_printk(struct pevent *pevent, - char *file, unsigned int size __unused) + char *file, unsigned int size __maybe_unused) { unsigned long long addr; char *printk; diff --git a/tools/perf/util/trace-event-scripting.c b/tools/perf/util/trace-event-scripting.c index 302ff262494..8715a1006d0 100644 --- a/tools/perf/util/trace-event-scripting.c +++ b/tools/perf/util/trace-event-scripting.c @@ -35,11 +35,11 @@ static int stop_script_unsupported(void) return 0; } -static void process_event_unsupported(union perf_event *event __unused, - struct perf_sample *sample __unused, - struct perf_evsel *evsel __unused, - struct machine *machine __unused, - struct addr_location *al __unused) +static void process_event_unsupported(union perf_event *event __maybe_unused, + struct perf_sample *sample __maybe_unused, + struct perf_evsel *evsel __maybe_unused, + struct machine *machine __maybe_unused, + struct addr_location *al __maybe_unused) { } @@ -52,17 +52,19 @@ static void print_python_unsupported_msg(void) "\n etc.\n"); } -static int python_start_script_unsupported(const char *script __unused, - int argc __unused, - const char **argv __unused) +static int python_start_script_unsupported(const char *script __maybe_unused, + int argc __maybe_unused, + const char **argv __maybe_unused) { print_python_unsupported_msg(); return -1; } -static int python_generate_script_unsupported(struct pevent *pevent __unused, - const char *outfile __unused) +static int python_generate_script_unsupported(struct pevent *pevent + __maybe_unused, + const char *outfile + __maybe_unused) { print_python_unsupported_msg(); @@ -114,17 +116,18 @@ static void print_perl_unsupported_msg(void) "\n etc.\n"); } -static int perl_start_script_unsupported(const char *script __unused, - int argc __unused, - const char **argv __unused) +static int perl_start_script_unsupported(const char *script __maybe_unused, + int argc __maybe_unused, + const char **argv __maybe_unused) { print_perl_unsupported_msg(); return -1; } -static int perl_generate_script_unsupported(struct pevent *pevent __unused, - const char *outfile __unused) +static int perl_generate_script_unsupported(struct pevent *pevent + __maybe_unused, + const char *outfile __maybe_unused) { print_perl_unsupported_msg(); diff --git a/tools/perf/util/unwind.c b/tools/perf/util/unwind.c index 00a42aa8d5c..958723ba3d2 100644 --- a/tools/perf/util/unwind.c +++ b/tools/perf/util/unwind.c @@ -307,32 +307,36 @@ find_proc_info(unw_addr_space_t as, unw_word_t ip, unw_proc_info_t *pi, need_unwind_info, arg); } -static int access_fpreg(unw_addr_space_t __used as, unw_regnum_t __used num, - unw_fpreg_t __used *val, int __used __write, - void __used *arg) +static int access_fpreg(unw_addr_space_t __maybe_unused as, + unw_regnum_t __maybe_unused num, + unw_fpreg_t __maybe_unused *val, + int __maybe_unused __write, + void __maybe_unused *arg) { pr_err("unwind: access_fpreg unsupported\n"); return -UNW_EINVAL; } -static int get_dyn_info_list_addr(unw_addr_space_t __used as, - unw_word_t __used *dil_addr, - void __used *arg) +static int get_dyn_info_list_addr(unw_addr_space_t __maybe_unused as, + unw_word_t __maybe_unused *dil_addr, + void __maybe_unused *arg) { return -UNW_ENOINFO; } -static int resume(unw_addr_space_t __used as, unw_cursor_t __used *cu, - void __used *arg) +static int resume(unw_addr_space_t __maybe_unused as, + unw_cursor_t __maybe_unused *cu, + void __maybe_unused *arg) { pr_err("unwind: resume unsupported\n"); return -UNW_EINVAL; } static int -get_proc_name(unw_addr_space_t __used as, unw_word_t __used addr, - char __used *bufp, size_t __used buf_len, - unw_word_t __used *offp, void __used *arg) +get_proc_name(unw_addr_space_t __maybe_unused as, + unw_word_t __maybe_unused addr, + char __maybe_unused *bufp, size_t __maybe_unused buf_len, + unw_word_t __maybe_unused *offp, void __maybe_unused *arg) { pr_err("unwind: get_proc_name unsupported\n"); return -UNW_EINVAL; @@ -377,7 +381,7 @@ static int reg_value(unw_word_t *valp, struct regs_dump *regs, int id, return 0; } -static int access_mem(unw_addr_space_t __used as, +static int access_mem(unw_addr_space_t __maybe_unused as, unw_word_t addr, unw_word_t *valp, int __write, void *arg) { @@ -422,7 +426,7 @@ static int access_mem(unw_addr_space_t __used as, return 0; } -static int access_reg(unw_addr_space_t __used as, +static int access_reg(unw_addr_space_t __maybe_unused as, unw_regnum_t regnum, unw_word_t *valp, int __write, void *arg) { @@ -454,9 +458,9 @@ static int access_reg(unw_addr_space_t __used as, return 0; } -static void put_unwind_info(unw_addr_space_t __used as, - unw_proc_info_t *pi __used, - void *arg __used) +static void put_unwind_info(unw_addr_space_t __maybe_unused as, + unw_proc_info_t *pi __maybe_unused, + void *arg __maybe_unused) { pr_debug("unwind: put_unwind_info called\n"); } diff --git a/tools/perf/util/unwind.h b/tools/perf/util/unwind.h index 919bd6ad850..a78c8b303bb 100644 --- a/tools/perf/util/unwind.h +++ b/tools/perf/util/unwind.h @@ -22,11 +22,12 @@ int unwind__get_entries(unwind_entry_cb_t cb, void *arg, int unwind__arch_reg_id(int regnum); #else static inline int -unwind__get_entries(unwind_entry_cb_t cb __used, void *arg __used, - struct machine *machine __used, - struct thread *thread __used, - u64 sample_uregs __used, - struct perf_sample *data __used) +unwind__get_entries(unwind_entry_cb_t cb __maybe_unused, + void *arg __maybe_unused, + struct machine *machine __maybe_unused, + struct thread *thread __maybe_unused, + u64 sample_uregs __maybe_unused, + struct perf_sample *data __maybe_unused) { return 0; } diff --git a/tools/perf/util/wrapper.c b/tools/perf/util/wrapper.c index 73e900edb5a..19f15b65070 100644 --- a/tools/perf/util/wrapper.c +++ b/tools/perf/util/wrapper.c @@ -7,7 +7,8 @@ * There's no pack memory to release - but stay close to the Git * version so wrap this away: */ -static inline void release_pack_memory(size_t size __used, int flag __used) +static inline void release_pack_memory(size_t size __maybe_unused, + int flag __maybe_unused) { } From 4218e6734197f3842fc9b6362f12973918d913aa Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 11 Sep 2012 13:18:47 -0300 Subject: [PATCH 190/282] perf sched: Remove unused thread parameter From the tracepoint handling routines. Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-mcqd9mv34z6he0wqiz4a3mh9@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-sched.c | 23 ++++++++--------------- 1 file changed, 8 insertions(+), 15 deletions(-) diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index 82e8ec2c43b..af11b1aa1bd 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -1372,8 +1372,7 @@ static struct trace_sched_handler *trace_handler; static int process_sched_wakeup_event(struct perf_tool *tool __maybe_unused, struct event_format *event, struct perf_sample *sample, - struct machine *machine, - struct thread *thread __maybe_unused) + struct machine *machine) { void *data = sample->raw_data; struct trace_wakeup_event wakeup_event; @@ -1489,8 +1488,7 @@ map_switch_event(struct trace_switch_event *switch_event, static int process_sched_switch_event(struct perf_tool *tool __maybe_unused, struct event_format *event, struct perf_sample *sample, - struct machine *machine, - struct thread *thread __maybe_unused) + struct machine *machine) { int this_cpu = sample->cpu, err = 0; void *data = sample->raw_data; @@ -1524,8 +1522,7 @@ static int process_sched_switch_event(struct perf_tool *tool __maybe_unused, static int process_sched_runtime_event(struct perf_tool *tool __maybe_unused, struct event_format *event, struct perf_sample *sample, - struct machine *machine, - struct thread *thread __maybe_unused) + struct machine *machine) { void *data = sample->raw_data; struct trace_runtime_event runtime_event; @@ -1545,8 +1542,7 @@ static int process_sched_runtime_event(struct perf_tool *tool __maybe_unused, static int process_sched_fork_event(struct perf_tool *tool __maybe_unused, struct event_format *event, struct perf_sample *sample, - struct machine *machine __maybe_unused, - struct thread *thread __maybe_unused) + struct machine *machine __maybe_unused) { void *data = sample->raw_data; struct trace_fork_event fork_event; @@ -1568,8 +1564,7 @@ static int process_sched_fork_event(struct perf_tool *tool __maybe_unused, static int process_sched_exit_event(struct perf_tool *tool __maybe_unused, struct event_format *event, struct perf_sample *sample __maybe_unused, - struct machine *machine __maybe_unused, - struct thread *thread __maybe_unused) + struct machine *machine __maybe_unused) { if (verbose) printf("sched_exit event %p\n", event); @@ -1580,8 +1575,7 @@ static int process_sched_exit_event(struct perf_tool *tool __maybe_unused, static int process_sched_migrate_task_event(struct perf_tool *tool __maybe_unused, struct event_format *event, struct perf_sample *sample, - struct machine *machine, - struct thread *thread __maybe_unused) + struct machine *machine) { void *data = sample->raw_data; struct trace_migrate_task_event migrate_task_event; @@ -1603,8 +1597,7 @@ static int process_sched_migrate_task_event(struct perf_tool *tool __maybe_unuse typedef int (*tracepoint_handler)(struct perf_tool *tool, struct event_format *tp_format, struct perf_sample *sample, - struct machine *machine, - struct thread *thread); + struct machine *machine); static int perf_sched__process_tracepoint_sample(struct perf_tool *tool __maybe_unused, union perf_event *event __maybe_unused, @@ -1626,7 +1619,7 @@ static int perf_sched__process_tracepoint_sample(struct perf_tool *tool __maybe_ if (evsel->handler.func != NULL) { tracepoint_handler f = evsel->handler.func; - err = f(tool, evsel->tp_format, sample, machine, thread); + err = f(tool, evsel->tp_format, sample, machine); } return err; From 0e9b07e574e544c1e840c59dabf39fef120620ae Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 11 Sep 2012 17:29:27 -0300 Subject: [PATCH 191/282] perf sched: Use perf_tool as ancestor So that we can remove all the globals. Before: text data bss dec hex filename 1586833 110368 1438600 3135801 2fd939 /tmp/oldperf After: text data bss dec hex filename 1629329 93568 848328 2571225 273bd9 /root/bin/perf Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-oph40vikij0crjz4eyapneov@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-sched.c | 1136 ++++++++++++++++++------------------ 1 file changed, 562 insertions(+), 574 deletions(-) diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index af11b1aa1bd..79f88fa3f7a 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -23,26 +23,12 @@ #include #include -static const char *input_name; - -static char default_sort_order[] = "avg, max, switch, runtime"; -static const char *sort_order = default_sort_order; - -static int profile_cpu = -1; - #define PR_SET_NAME 15 /* Set process name */ #define MAX_CPUS 4096 - -static u64 run_measurement_overhead; -static u64 sleep_measurement_overhead; - #define COMM_LEN 20 #define SYM_LEN 129 - #define MAX_PID 65536 -static unsigned long nr_tasks; - struct sched_atom; struct task_desc { @@ -80,44 +66,6 @@ struct sched_atom { struct task_desc *wakee; }; -static struct task_desc *pid_to_task[MAX_PID]; - -static struct task_desc **tasks; - -static pthread_mutex_t start_work_mutex = PTHREAD_MUTEX_INITIALIZER; -static u64 start_time; - -static pthread_mutex_t work_done_wait_mutex = PTHREAD_MUTEX_INITIALIZER; - -static unsigned long nr_run_events; -static unsigned long nr_sleep_events; -static unsigned long nr_wakeup_events; - -static unsigned long nr_sleep_corrections; -static unsigned long nr_run_events_optimized; - -static unsigned long targetless_wakeups; -static unsigned long multitarget_wakeups; - -static u64 cpu_usage; -static u64 runavg_cpu_usage; -static u64 parent_cpu_usage; -static u64 runavg_parent_cpu_usage; - -static unsigned long nr_runs; -static u64 sum_runtime; -static u64 sum_fluct; -static u64 run_avg; - -static unsigned int replay_repeat = 10; -static unsigned long nr_timestamps; -static unsigned long nr_unordered_timestamps; -static unsigned long nr_state_machine_bugs; -static unsigned long nr_context_switch_bugs; -static unsigned long nr_events; -static unsigned long nr_lost_chunks; -static unsigned long nr_lost_events; - #define TASK_STATE_TO_CHAR_STR "RSDTtZX" enum thread_state { @@ -149,11 +97,169 @@ struct work_atoms { typedef int (*sort_fn_t)(struct work_atoms *, struct work_atoms *); -static struct rb_root atom_root, sorted_atom_root; +struct trace_switch_event { + u32 size; -static u64 all_runtime; -static u64 all_count; + u16 common_type; + u8 common_flags; + u8 common_preempt_count; + u32 common_pid; + u32 common_tgid; + char prev_comm[16]; + u32 prev_pid; + u32 prev_prio; + u64 prev_state; + char next_comm[16]; + u32 next_pid; + u32 next_prio; +}; + +struct trace_runtime_event { + u32 size; + + u16 common_type; + u8 common_flags; + u8 common_preempt_count; + u32 common_pid; + u32 common_tgid; + + char comm[16]; + u32 pid; + u64 runtime; + u64 vruntime; +}; + +struct trace_wakeup_event { + u32 size; + + u16 common_type; + u8 common_flags; + u8 common_preempt_count; + u32 common_pid; + u32 common_tgid; + + char comm[16]; + u32 pid; + + u32 prio; + u32 success; + u32 cpu; +}; + +struct trace_fork_event { + u32 size; + + u16 common_type; + u8 common_flags; + u8 common_preempt_count; + u32 common_pid; + u32 common_tgid; + + char parent_comm[16]; + u32 parent_pid; + char child_comm[16]; + u32 child_pid; +}; + +struct trace_migrate_task_event { + u32 size; + + u16 common_type; + u8 common_flags; + u8 common_preempt_count; + u32 common_pid; + u32 common_tgid; + + char comm[16]; + u32 pid; + + u32 prio; + u32 cpu; +}; + +struct perf_sched; + +struct trace_sched_handler { + int (*switch_event)(struct perf_sched *sched, + struct trace_switch_event *event, + struct machine *machine, + struct event_format *tp_format, + struct perf_sample *sample); + + int (*runtime_event)(struct perf_sched *sched, + struct trace_runtime_event *event, + struct machine *machine, + struct perf_sample *sample); + + int (*wakeup_event)(struct perf_sched *sched, + struct trace_wakeup_event *event, + struct machine *machine, + struct event_format *tp_format, + struct perf_sample *sample); + + int (*fork_event)(struct perf_sched *sched, + struct trace_fork_event *event, + struct event_format *tp_format); + + int (*migrate_task_event)(struct perf_sched *sched, + struct trace_migrate_task_event *event, + struct machine *machine, + struct perf_sample *sample); +}; + +struct perf_sched { + struct perf_tool tool; + const char *input_name; + const char *sort_order; + unsigned long nr_tasks; + struct task_desc *pid_to_task[MAX_PID]; + struct task_desc **tasks; + const struct trace_sched_handler *tp_handler; + pthread_mutex_t start_work_mutex; + pthread_mutex_t work_done_wait_mutex; + int profile_cpu; +/* + * Track the current task - that way we can know whether there's any + * weird events, such as a task being switched away that is not current. + */ + int max_cpu; + u32 curr_pid[MAX_CPUS]; + struct thread *curr_thread[MAX_CPUS]; + char next_shortname1; + char next_shortname2; + unsigned int replay_repeat; + unsigned long nr_run_events; + unsigned long nr_sleep_events; + unsigned long nr_wakeup_events; + unsigned long nr_sleep_corrections; + unsigned long nr_run_events_optimized; + unsigned long targetless_wakeups; + unsigned long multitarget_wakeups; + unsigned long nr_runs; + unsigned long nr_timestamps; + unsigned long nr_unordered_timestamps; + unsigned long nr_state_machine_bugs; + unsigned long nr_context_switch_bugs; + unsigned long nr_events; + unsigned long nr_lost_chunks; + unsigned long nr_lost_events; + u64 run_measurement_overhead; + u64 sleep_measurement_overhead; + u64 start_time; + u64 cpu_usage; + u64 runavg_cpu_usage; + u64 parent_cpu_usage; + u64 runavg_parent_cpu_usage; + u64 sum_runtime; + u64 sum_fluct; + u64 run_avg; + u64 all_runtime; + u64 all_count; + u64 cpu_last_switched[MAX_CPUS]; + struct rb_root atom_root, sorted_atom_root; + struct list_head sort_list, cmp_pid; +}; static u64 get_nsecs(void) { @@ -164,13 +270,13 @@ static u64 get_nsecs(void) return ts.tv_sec * 1000000000ULL + ts.tv_nsec; } -static void burn_nsecs(u64 nsecs) +static void burn_nsecs(struct perf_sched *sched, u64 nsecs) { u64 T0 = get_nsecs(), T1; do { T1 = get_nsecs(); - } while (T1 + run_measurement_overhead < T0 + nsecs); + } while (T1 + sched->run_measurement_overhead < T0 + nsecs); } static void sleep_nsecs(u64 nsecs) @@ -183,24 +289,24 @@ static void sleep_nsecs(u64 nsecs) nanosleep(&ts, NULL); } -static void calibrate_run_measurement_overhead(void) +static void calibrate_run_measurement_overhead(struct perf_sched *sched) { u64 T0, T1, delta, min_delta = 1000000000ULL; int i; for (i = 0; i < 10; i++) { T0 = get_nsecs(); - burn_nsecs(0); + burn_nsecs(sched, 0); T1 = get_nsecs(); delta = T1-T0; min_delta = min(min_delta, delta); } - run_measurement_overhead = min_delta; + sched->run_measurement_overhead = min_delta; printf("run measurement overhead: %" PRIu64 " nsecs\n", min_delta); } -static void calibrate_sleep_measurement_overhead(void) +static void calibrate_sleep_measurement_overhead(struct perf_sched *sched) { u64 T0, T1, delta, min_delta = 1000000000ULL; int i; @@ -213,7 +319,7 @@ static void calibrate_sleep_measurement_overhead(void) min_delta = min(min_delta, delta); } min_delta -= 10000; - sleep_measurement_overhead = min_delta; + sched->sleep_measurement_overhead = min_delta; printf("sleep measurement overhead: %" PRIu64 " nsecs\n", min_delta); } @@ -246,8 +352,8 @@ static struct sched_atom *last_event(struct task_desc *task) return task->atoms[task->nr_events - 1]; } -static void -add_sched_event_run(struct task_desc *task, u64 timestamp, u64 duration) +static void add_sched_event_run(struct perf_sched *sched, struct task_desc *task, + u64 timestamp, u64 duration) { struct sched_atom *event, *curr_event = last_event(task); @@ -256,7 +362,7 @@ add_sched_event_run(struct task_desc *task, u64 timestamp, u64 duration) * to it: */ if (curr_event && curr_event->type == SCHED_EVENT_RUN) { - nr_run_events_optimized++; + sched->nr_run_events_optimized++; curr_event->duration += duration; return; } @@ -266,12 +372,11 @@ add_sched_event_run(struct task_desc *task, u64 timestamp, u64 duration) event->type = SCHED_EVENT_RUN; event->duration = duration; - nr_run_events++; + sched->nr_run_events++; } -static void -add_sched_event_wakeup(struct task_desc *task, u64 timestamp, - struct task_desc *wakee) +static void add_sched_event_wakeup(struct perf_sched *sched, struct task_desc *task, + u64 timestamp, struct task_desc *wakee) { struct sched_atom *event, *wakee_event; @@ -281,11 +386,11 @@ add_sched_event_wakeup(struct task_desc *task, u64 timestamp, wakee_event = last_event(wakee); if (!wakee_event || wakee_event->type != SCHED_EVENT_SLEEP) { - targetless_wakeups++; + sched->targetless_wakeups++; return; } if (wakee_event->wait_sem) { - multitarget_wakeups++; + sched->multitarget_wakeups++; return; } @@ -294,89 +399,89 @@ add_sched_event_wakeup(struct task_desc *task, u64 timestamp, wakee_event->specific_wait = 1; event->wait_sem = wakee_event->wait_sem; - nr_wakeup_events++; + sched->nr_wakeup_events++; } -static void -add_sched_event_sleep(struct task_desc *task, u64 timestamp, - u64 task_state __maybe_unused) +static void add_sched_event_sleep(struct perf_sched *sched, struct task_desc *task, + u64 timestamp, u64 task_state __maybe_unused) { struct sched_atom *event = get_new_event(task, timestamp); event->type = SCHED_EVENT_SLEEP; - nr_sleep_events++; + sched->nr_sleep_events++; } -static struct task_desc *register_pid(unsigned long pid, const char *comm) +static struct task_desc *register_pid(struct perf_sched *sched, + unsigned long pid, const char *comm) { struct task_desc *task; BUG_ON(pid >= MAX_PID); - task = pid_to_task[pid]; + task = sched->pid_to_task[pid]; if (task) return task; task = zalloc(sizeof(*task)); task->pid = pid; - task->nr = nr_tasks; + task->nr = sched->nr_tasks; strcpy(task->comm, comm); /* * every task starts in sleeping state - this gets ignored * if there's no wakeup pointing to this sleep state: */ - add_sched_event_sleep(task, 0, 0); + add_sched_event_sleep(sched, task, 0, 0); - pid_to_task[pid] = task; - nr_tasks++; - tasks = realloc(tasks, nr_tasks*sizeof(struct task_task *)); - BUG_ON(!tasks); - tasks[task->nr] = task; + sched->pid_to_task[pid] = task; + sched->nr_tasks++; + sched->tasks = realloc(sched->tasks, sched->nr_tasks * sizeof(struct task_task *)); + BUG_ON(!sched->tasks); + sched->tasks[task->nr] = task; if (verbose) - printf("registered task #%ld, PID %ld (%s)\n", nr_tasks, pid, comm); + printf("registered task #%ld, PID %ld (%s)\n", sched->nr_tasks, pid, comm); return task; } -static void print_task_traces(void) +static void print_task_traces(struct perf_sched *sched) { struct task_desc *task; unsigned long i; - for (i = 0; i < nr_tasks; i++) { - task = tasks[i]; + for (i = 0; i < sched->nr_tasks; i++) { + task = sched->tasks[i]; printf("task %6ld (%20s:%10ld), nr_events: %ld\n", task->nr, task->comm, task->pid, task->nr_events); } } -static void add_cross_task_wakeups(void) +static void add_cross_task_wakeups(struct perf_sched *sched) { struct task_desc *task1, *task2; unsigned long i, j; - for (i = 0; i < nr_tasks; i++) { - task1 = tasks[i]; + for (i = 0; i < sched->nr_tasks; i++) { + task1 = sched->tasks[i]; j = i + 1; - if (j == nr_tasks) + if (j == sched->nr_tasks) j = 0; - task2 = tasks[j]; - add_sched_event_wakeup(task1, 0, task2); + task2 = sched->tasks[j]; + add_sched_event_wakeup(sched, task1, 0, task2); } } -static void process_sched_event(struct task_desc *this_task __maybe_unused, - struct sched_atom *atom) +static void perf_sched__process_event(struct perf_sched *sched, + struct sched_atom *atom) { int ret = 0; switch (atom->type) { case SCHED_EVENT_RUN: - burn_nsecs(atom->duration); + burn_nsecs(sched, atom->duration); break; case SCHED_EVENT_SLEEP: if (atom->wait_sem) @@ -439,14 +544,23 @@ static u64 get_cpu_usage_nsec_self(int fd) return runtime; } +struct sched_thread_parms { + struct task_desc *task; + struct perf_sched *sched; +}; + static void *thread_func(void *ctx) { - struct task_desc *this_task = ctx; + struct sched_thread_parms *parms = ctx; + struct task_desc *this_task = parms->task; + struct perf_sched *sched = parms->sched; u64 cpu_usage_0, cpu_usage_1; unsigned long i, ret; char comm2[22]; int fd; + free(parms); + sprintf(comm2, ":%s", this_task->comm); prctl(PR_SET_NAME, comm2); fd = self_open_counters(); @@ -455,16 +569,16 @@ static void *thread_func(void *ctx) again: ret = sem_post(&this_task->ready_for_work); BUG_ON(ret); - ret = pthread_mutex_lock(&start_work_mutex); + ret = pthread_mutex_lock(&sched->start_work_mutex); BUG_ON(ret); - ret = pthread_mutex_unlock(&start_work_mutex); + ret = pthread_mutex_unlock(&sched->start_work_mutex); BUG_ON(ret); cpu_usage_0 = get_cpu_usage_nsec_self(fd); for (i = 0; i < this_task->nr_events; i++) { this_task->curr_event = i; - process_sched_event(this_task, this_task->atoms[i]); + perf_sched__process_event(sched, this_task->atoms[i]); } cpu_usage_1 = get_cpu_usage_nsec_self(fd); @@ -472,15 +586,15 @@ again: ret = sem_post(&this_task->work_done_sem); BUG_ON(ret); - ret = pthread_mutex_lock(&work_done_wait_mutex); + ret = pthread_mutex_lock(&sched->work_done_wait_mutex); BUG_ON(ret); - ret = pthread_mutex_unlock(&work_done_wait_mutex); + ret = pthread_mutex_unlock(&sched->work_done_wait_mutex); BUG_ON(ret); goto again; } -static void create_tasks(void) +static void create_tasks(struct perf_sched *sched) { struct task_desc *task; pthread_attr_t attr; @@ -492,128 +606,129 @@ static void create_tasks(void) err = pthread_attr_setstacksize(&attr, (size_t) max(16 * 1024, PTHREAD_STACK_MIN)); BUG_ON(err); - err = pthread_mutex_lock(&start_work_mutex); + err = pthread_mutex_lock(&sched->start_work_mutex); BUG_ON(err); - err = pthread_mutex_lock(&work_done_wait_mutex); + err = pthread_mutex_lock(&sched->work_done_wait_mutex); BUG_ON(err); - for (i = 0; i < nr_tasks; i++) { - task = tasks[i]; + for (i = 0; i < sched->nr_tasks; i++) { + struct sched_thread_parms *parms = malloc(sizeof(*parms)); + BUG_ON(parms == NULL); + parms->task = task = sched->tasks[i]; + parms->sched = sched; sem_init(&task->sleep_sem, 0, 0); sem_init(&task->ready_for_work, 0, 0); sem_init(&task->work_done_sem, 0, 0); task->curr_event = 0; - err = pthread_create(&task->thread, &attr, thread_func, task); + err = pthread_create(&task->thread, &attr, thread_func, parms); BUG_ON(err); } } -static void wait_for_tasks(void) +static void wait_for_tasks(struct perf_sched *sched) { u64 cpu_usage_0, cpu_usage_1; struct task_desc *task; unsigned long i, ret; - start_time = get_nsecs(); - cpu_usage = 0; - pthread_mutex_unlock(&work_done_wait_mutex); + sched->start_time = get_nsecs(); + sched->cpu_usage = 0; + pthread_mutex_unlock(&sched->work_done_wait_mutex); - for (i = 0; i < nr_tasks; i++) { - task = tasks[i]; + for (i = 0; i < sched->nr_tasks; i++) { + task = sched->tasks[i]; ret = sem_wait(&task->ready_for_work); BUG_ON(ret); sem_init(&task->ready_for_work, 0, 0); } - ret = pthread_mutex_lock(&work_done_wait_mutex); + ret = pthread_mutex_lock(&sched->work_done_wait_mutex); BUG_ON(ret); cpu_usage_0 = get_cpu_usage_nsec_parent(); - pthread_mutex_unlock(&start_work_mutex); + pthread_mutex_unlock(&sched->start_work_mutex); - for (i = 0; i < nr_tasks; i++) { - task = tasks[i]; + for (i = 0; i < sched->nr_tasks; i++) { + task = sched->tasks[i]; ret = sem_wait(&task->work_done_sem); BUG_ON(ret); sem_init(&task->work_done_sem, 0, 0); - cpu_usage += task->cpu_usage; + sched->cpu_usage += task->cpu_usage; task->cpu_usage = 0; } cpu_usage_1 = get_cpu_usage_nsec_parent(); - if (!runavg_cpu_usage) - runavg_cpu_usage = cpu_usage; - runavg_cpu_usage = (runavg_cpu_usage*9 + cpu_usage)/10; + if (!sched->runavg_cpu_usage) + sched->runavg_cpu_usage = sched->cpu_usage; + sched->runavg_cpu_usage = (sched->runavg_cpu_usage * 9 + sched->cpu_usage) / 10; - parent_cpu_usage = cpu_usage_1 - cpu_usage_0; - if (!runavg_parent_cpu_usage) - runavg_parent_cpu_usage = parent_cpu_usage; - runavg_parent_cpu_usage = (runavg_parent_cpu_usage*9 + - parent_cpu_usage)/10; + sched->parent_cpu_usage = cpu_usage_1 - cpu_usage_0; + if (!sched->runavg_parent_cpu_usage) + sched->runavg_parent_cpu_usage = sched->parent_cpu_usage; + sched->runavg_parent_cpu_usage = (sched->runavg_parent_cpu_usage * 9 + + sched->parent_cpu_usage)/10; - ret = pthread_mutex_lock(&start_work_mutex); + ret = pthread_mutex_lock(&sched->start_work_mutex); BUG_ON(ret); - for (i = 0; i < nr_tasks; i++) { - task = tasks[i]; + for (i = 0; i < sched->nr_tasks; i++) { + task = sched->tasks[i]; sem_init(&task->sleep_sem, 0, 0); task->curr_event = 0; } } -static void run_one_test(void) +static void run_one_test(struct perf_sched *sched) { u64 T0, T1, delta, avg_delta, fluct; T0 = get_nsecs(); - wait_for_tasks(); + wait_for_tasks(sched); T1 = get_nsecs(); delta = T1 - T0; - sum_runtime += delta; - nr_runs++; + sched->sum_runtime += delta; + sched->nr_runs++; - avg_delta = sum_runtime / nr_runs; + avg_delta = sched->sum_runtime / sched->nr_runs; if (delta < avg_delta) fluct = avg_delta - delta; else fluct = delta - avg_delta; - sum_fluct += fluct; - if (!run_avg) - run_avg = delta; - run_avg = (run_avg*9 + delta)/10; + sched->sum_fluct += fluct; + if (!sched->run_avg) + sched->run_avg = delta; + sched->run_avg = (sched->run_avg * 9 + delta) / 10; - printf("#%-3ld: %0.3f, ", - nr_runs, (double)delta/1000000.0); + printf("#%-3ld: %0.3f, ", sched->nr_runs, (double)delta / 1000000.0); - printf("ravg: %0.2f, ", - (double)run_avg/1e6); + printf("ravg: %0.2f, ", (double)sched->run_avg / 1e6); printf("cpu: %0.2f / %0.2f", - (double)cpu_usage/1e6, (double)runavg_cpu_usage/1e6); + (double)sched->cpu_usage / 1e6, (double)sched->runavg_cpu_usage / 1e6); #if 0 /* * rusage statistics done by the parent, these are less - * accurate than the sum_exec_runtime based statistics: + * accurate than the sched->sum_exec_runtime based statistics: */ printf(" [%0.2f / %0.2f]", - (double)parent_cpu_usage/1e6, - (double)runavg_parent_cpu_usage/1e6); + (double)sched->parent_cpu_usage/1e6, + (double)sched->runavg_parent_cpu_usage/1e6); #endif printf("\n"); - if (nr_sleep_corrections) - printf(" (%ld sleep corrections)\n", nr_sleep_corrections); - nr_sleep_corrections = 0; + if (sched->nr_sleep_corrections) + printf(" (%ld sleep corrections)\n", sched->nr_sleep_corrections); + sched->nr_sleep_corrections = 0; } -static void test_calibrations(void) +static void test_calibrations(struct perf_sched *sched) { u64 T0, T1; T0 = get_nsecs(); - burn_nsecs(1e6); + burn_nsecs(sched, 1e6); T1 = get_nsecs(); printf("the run test took %" PRIu64 " nsecs\n", T1 - T0); @@ -643,115 +758,9 @@ do { \ FILL_FIELD(ptr, common_tgid, event, data); \ } while (0) - - -struct trace_switch_event { - u32 size; - - u16 common_type; - u8 common_flags; - u8 common_preempt_count; - u32 common_pid; - u32 common_tgid; - - char prev_comm[16]; - u32 prev_pid; - u32 prev_prio; - u64 prev_state; - char next_comm[16]; - u32 next_pid; - u32 next_prio; -}; - -struct trace_runtime_event { - u32 size; - - u16 common_type; - u8 common_flags; - u8 common_preempt_count; - u32 common_pid; - u32 common_tgid; - - char comm[16]; - u32 pid; - u64 runtime; - u64 vruntime; -}; - -struct trace_wakeup_event { - u32 size; - - u16 common_type; - u8 common_flags; - u8 common_preempt_count; - u32 common_pid; - u32 common_tgid; - - char comm[16]; - u32 pid; - - u32 prio; - u32 success; - u32 cpu; -}; - -struct trace_fork_event { - u32 size; - - u16 common_type; - u8 common_flags; - u8 common_preempt_count; - u32 common_pid; - u32 common_tgid; - - char parent_comm[16]; - u32 parent_pid; - char child_comm[16]; - u32 child_pid; -}; - -struct trace_migrate_task_event { - u32 size; - - u16 common_type; - u8 common_flags; - u8 common_preempt_count; - u32 common_pid; - u32 common_tgid; - - char comm[16]; - u32 pid; - - u32 prio; - u32 cpu; -}; - -struct trace_sched_handler { - int (*switch_event)(struct trace_switch_event *event, - struct machine *machine, - struct event_format *tp_format, - struct perf_sample *sample); - - int (*runtime_event)(struct trace_runtime_event *event, - struct machine *machine, - struct perf_sample *sample); - - int (*wakeup_event)(struct trace_wakeup_event *event, - struct machine *machine, - struct event_format *tp_format, - struct perf_sample *sample); - - int (*fork_event)(struct trace_fork_event *event, - struct event_format *tp_format); - - int (*migrate_task_event)(struct trace_migrate_task_event *event, - struct machine *machine, - struct perf_sample *sample); -}; - - static int -replay_wakeup_event(struct trace_wakeup_event *wakeup_event, +replay_wakeup_event(struct perf_sched *sched, + struct trace_wakeup_event *wakeup_event, struct machine *machine __maybe_unused, struct event_format *event, struct perf_sample *sample) { @@ -761,22 +770,19 @@ replay_wakeup_event(struct trace_wakeup_event *wakeup_event, printf("sched_wakeup event %p\n", event); printf(" ... pid %d woke up %s/%d\n", - wakeup_event->common_pid, - wakeup_event->comm, - wakeup_event->pid); + wakeup_event->common_pid, wakeup_event->comm, wakeup_event->pid); } - waker = register_pid(wakeup_event->common_pid, ""); - wakee = register_pid(wakeup_event->pid, wakeup_event->comm); + waker = register_pid(sched, wakeup_event->common_pid, ""); + wakee = register_pid(sched, wakeup_event->pid, wakeup_event->comm); - add_sched_event_wakeup(waker, sample->time, wakee); + add_sched_event_wakeup(sched, waker, sample->time, wakee); return 0; } -static u64 cpu_last_switched[MAX_CPUS]; - static int -replay_switch_event(struct trace_switch_event *switch_event, +replay_switch_event(struct perf_sched *sched, + struct trace_switch_event *switch_event, struct machine *machine __maybe_unused, struct event_format *event, struct perf_sample *sample) @@ -792,7 +798,7 @@ replay_switch_event(struct trace_switch_event *switch_event, if (cpu >= MAX_CPUS || cpu < 0) return 0; - timestamp0 = cpu_last_switched[cpu]; + timestamp0 = sched->cpu_last_switched[cpu]; if (timestamp0) delta = timestamp - timestamp0; else @@ -810,20 +816,19 @@ replay_switch_event(struct trace_switch_event *switch_event, delta); } - prev = register_pid(switch_event->prev_pid, switch_event->prev_comm); - next = register_pid(switch_event->next_pid, switch_event->next_comm); + prev = register_pid(sched, switch_event->prev_pid, switch_event->prev_comm); + next = register_pid(sched, switch_event->next_pid, switch_event->next_comm); - cpu_last_switched[cpu] = timestamp; + sched->cpu_last_switched[cpu] = timestamp; - add_sched_event_run(prev, timestamp, delta); - add_sched_event_sleep(prev, timestamp, switch_event->prev_state); + add_sched_event_run(sched, prev, timestamp, delta); + add_sched_event_sleep(sched, prev, timestamp, switch_event->prev_state); return 0; } - static int -replay_fork_event(struct trace_fork_event *fork_event, +replay_fork_event(struct perf_sched *sched, struct trace_fork_event *fork_event, struct event_format *event) { if (verbose) { @@ -831,25 +836,17 @@ replay_fork_event(struct trace_fork_event *fork_event, printf("... parent: %s/%d\n", fork_event->parent_comm, fork_event->parent_pid); printf("... child: %s/%d\n", fork_event->child_comm, fork_event->child_pid); } - register_pid(fork_event->parent_pid, fork_event->parent_comm); - register_pid(fork_event->child_pid, fork_event->child_comm); + register_pid(sched, fork_event->parent_pid, fork_event->parent_comm); + register_pid(sched, fork_event->child_pid, fork_event->child_comm); return 0; } -static struct trace_sched_handler replay_ops = { - .wakeup_event = replay_wakeup_event, - .switch_event = replay_switch_event, - .fork_event = replay_fork_event, -}; - struct sort_dimension { const char *name; sort_fn_t cmp; struct list_head list; }; -static LIST_HEAD(cmp_pid); - static int thread_lat_cmp(struct list_head *list, struct work_atoms *l, struct work_atoms *r) { @@ -918,7 +915,7 @@ __thread_latency_insert(struct rb_root *root, struct work_atoms *data, rb_insert_color(&data->node, root); } -static int thread_atoms_insert(struct thread *thread) +static int thread_atoms_insert(struct perf_sched *sched, struct thread *thread) { struct work_atoms *atoms = zalloc(sizeof(*atoms)); if (!atoms) { @@ -928,11 +925,12 @@ static int thread_atoms_insert(struct thread *thread) atoms->thread = thread; INIT_LIST_HEAD(&atoms->work_list); - __thread_latency_insert(&atom_root, atoms, &cmp_pid); + __thread_latency_insert(&sched->atom_root, atoms, &sched->cmp_pid); return 0; } -static int latency_fork_event(struct trace_fork_event *fork_event __maybe_unused, +static int latency_fork_event(struct perf_sched *sched __maybe_unused, + struct trace_fork_event *fork_event __maybe_unused, struct event_format *event __maybe_unused) { /* should insert the newcomer */ @@ -1014,7 +1012,8 @@ add_sched_in_event(struct work_atoms *atoms, u64 timestamp) } static int -latency_switch_event(struct trace_switch_event *switch_event, +latency_switch_event(struct perf_sched *sched, + struct trace_switch_event *switch_event, struct machine *machine, struct event_format *event __maybe_unused, struct perf_sample *sample) @@ -1027,8 +1026,8 @@ latency_switch_event(struct trace_switch_event *switch_event, BUG_ON(cpu >= MAX_CPUS || cpu < 0); - timestamp0 = cpu_last_switched[cpu]; - cpu_last_switched[cpu] = timestamp; + timestamp0 = sched->cpu_last_switched[cpu]; + sched->cpu_last_switched[cpu] = timestamp; if (timestamp0) delta = timestamp - timestamp0; else @@ -1042,11 +1041,11 @@ latency_switch_event(struct trace_switch_event *switch_event, sched_out = machine__findnew_thread(machine, switch_event->prev_pid); sched_in = machine__findnew_thread(machine, switch_event->next_pid); - out_events = thread_atoms_search(&atom_root, sched_out, &cmp_pid); + out_events = thread_atoms_search(&sched->atom_root, sched_out, &sched->cmp_pid); if (!out_events) { - if (thread_atoms_insert(sched_out)) + if (thread_atoms_insert(sched, sched_out)) return -1; - out_events = thread_atoms_search(&atom_root, sched_out, &cmp_pid); + out_events = thread_atoms_search(&sched->atom_root, sched_out, &sched->cmp_pid); if (!out_events) { pr_err("out-event: Internal tree error"); return -1; @@ -1055,11 +1054,11 @@ latency_switch_event(struct trace_switch_event *switch_event, if (add_sched_out_event(out_events, sched_out_state(switch_event), timestamp)) return -1; - in_events = thread_atoms_search(&atom_root, sched_in, &cmp_pid); + in_events = thread_atoms_search(&sched->atom_root, sched_in, &sched->cmp_pid); if (!in_events) { - if (thread_atoms_insert(sched_in)) + if (thread_atoms_insert(sched, sched_in)) return -1; - in_events = thread_atoms_search(&atom_root, sched_in, &cmp_pid); + in_events = thread_atoms_search(&sched->atom_root, sched_in, &sched->cmp_pid); if (!in_events) { pr_err("in-event: Internal tree error"); return -1; @@ -1077,19 +1076,20 @@ latency_switch_event(struct trace_switch_event *switch_event, } static int -latency_runtime_event(struct trace_runtime_event *runtime_event, +latency_runtime_event(struct perf_sched *sched, + struct trace_runtime_event *runtime_event, struct machine *machine, struct perf_sample *sample) { struct thread *thread = machine__findnew_thread(machine, runtime_event->pid); - struct work_atoms *atoms = thread_atoms_search(&atom_root, thread, &cmp_pid); + struct work_atoms *atoms = thread_atoms_search(&sched->atom_root, thread, &sched->cmp_pid); u64 timestamp = sample->time; int cpu = sample->cpu; BUG_ON(cpu >= MAX_CPUS || cpu < 0); if (!atoms) { - if (thread_atoms_insert(thread)) + if (thread_atoms_insert(sched, thread)) return -1; - atoms = thread_atoms_search(&atom_root, thread, &cmp_pid); + atoms = thread_atoms_search(&sched->atom_root, thread, &sched->cmp_pid); if (!atoms) { pr_debug("in-event: Internal tree error"); return -1; @@ -1103,7 +1103,8 @@ latency_runtime_event(struct trace_runtime_event *runtime_event, } static int -latency_wakeup_event(struct trace_wakeup_event *wakeup_event, +latency_wakeup_event(struct perf_sched *sched, + struct trace_wakeup_event *wakeup_event, struct machine *machine, struct event_format *event __maybe_unused, struct perf_sample *sample) @@ -1118,11 +1119,11 @@ latency_wakeup_event(struct trace_wakeup_event *wakeup_event, return 0; wakee = machine__findnew_thread(machine, wakeup_event->pid); - atoms = thread_atoms_search(&atom_root, wakee, &cmp_pid); + atoms = thread_atoms_search(&sched->atom_root, wakee, &sched->cmp_pid); if (!atoms) { - if (thread_atoms_insert(wakee)) + if (thread_atoms_insert(sched, wakee)) return -1; - atoms = thread_atoms_search(&atom_root, wakee, &cmp_pid); + atoms = thread_atoms_search(&sched->atom_root, wakee, &sched->cmp_pid); if (!atoms) { pr_debug("wakeup-event: Internal tree error"); return -1; @@ -1140,12 +1141,12 @@ latency_wakeup_event(struct trace_wakeup_event *wakeup_event, * one CPU, or are only looking at only one, so don't * make useless noise. */ - if (profile_cpu == -1 && atom->state != THREAD_SLEEPING) - nr_state_machine_bugs++; + if (sched->profile_cpu == -1 && atom->state != THREAD_SLEEPING) + sched->nr_state_machine_bugs++; - nr_timestamps++; + sched->nr_timestamps++; if (atom->sched_out_time > timestamp) { - nr_unordered_timestamps++; + sched->nr_unordered_timestamps++; return 0; } @@ -1155,7 +1156,8 @@ latency_wakeup_event(struct trace_wakeup_event *wakeup_event, } static int -latency_migrate_task_event(struct trace_migrate_task_event *migrate_task_event, +latency_migrate_task_event(struct perf_sched *sched, + struct trace_migrate_task_event *migrate_task_event, struct machine *machine, struct perf_sample *sample) { u64 timestamp = sample->time; @@ -1166,16 +1168,16 @@ latency_migrate_task_event(struct trace_migrate_task_event *migrate_task_event, /* * Only need to worry about migration when profiling one CPU. */ - if (profile_cpu == -1) + if (sched->profile_cpu == -1) return 0; migrant = machine__findnew_thread(machine, migrate_task_event->pid); - atoms = thread_atoms_search(&atom_root, migrant, &cmp_pid); + atoms = thread_atoms_search(&sched->atom_root, migrant, &sched->cmp_pid); if (!atoms) { - if (thread_atoms_insert(migrant)) + if (thread_atoms_insert(sched, migrant)) return -1; - register_pid(migrant->pid, migrant->comm); - atoms = thread_atoms_search(&atom_root, migrant, &cmp_pid); + register_pid(sched, migrant->pid, migrant->comm); + atoms = thread_atoms_search(&sched->atom_root, migrant, &sched->cmp_pid); if (!atoms) { pr_debug("migration-event: Internal tree error"); return -1; @@ -1189,23 +1191,15 @@ latency_migrate_task_event(struct trace_migrate_task_event *migrate_task_event, atom = list_entry(atoms->work_list.prev, struct work_atom, list); atom->sched_in_time = atom->sched_out_time = atom->wake_up_time = timestamp; - nr_timestamps++; + sched->nr_timestamps++; if (atom->sched_out_time > timestamp) - nr_unordered_timestamps++; + sched->nr_unordered_timestamps++; return 0; } -static struct trace_sched_handler lat_ops = { - .wakeup_event = latency_wakeup_event, - .switch_event = latency_switch_event, - .runtime_event = latency_runtime_event, - .fork_event = latency_fork_event, - .migrate_task_event = latency_migrate_task_event, -}; - -static void output_lat_thread(struct work_atoms *work_list) +static void output_lat_thread(struct perf_sched *sched, struct work_atoms *work_list) { int i; int ret; @@ -1219,8 +1213,8 @@ static void output_lat_thread(struct work_atoms *work_list) if (!strcmp(work_list->thread->comm, "swapper")) return; - all_runtime += work_list->total_runtime; - all_count += work_list->nb_atoms; + sched->all_runtime += work_list->total_runtime; + sched->all_count += work_list->nb_atoms; ret = printf(" %s:%d ", work_list->thread->comm, work_list->thread->pid); @@ -1246,11 +1240,6 @@ static int pid_cmp(struct work_atoms *l, struct work_atoms *r) return 0; } -static struct sort_dimension pid_sort_dimension = { - .name = "pid", - .cmp = pid_cmp, -}; - static int avg_cmp(struct work_atoms *l, struct work_atoms *r) { u64 avgl, avgr; @@ -1272,11 +1261,6 @@ static int avg_cmp(struct work_atoms *l, struct work_atoms *r) return 0; } -static struct sort_dimension avg_sort_dimension = { - .name = "avg", - .cmp = avg_cmp, -}; - static int max_cmp(struct work_atoms *l, struct work_atoms *r) { if (l->max_lat < r->max_lat) @@ -1287,11 +1271,6 @@ static int max_cmp(struct work_atoms *l, struct work_atoms *r) return 0; } -static struct sort_dimension max_sort_dimension = { - .name = "max", - .cmp = max_cmp, -}; - static int switch_cmp(struct work_atoms *l, struct work_atoms *r) { if (l->nb_atoms < r->nb_atoms) @@ -1302,11 +1281,6 @@ static int switch_cmp(struct work_atoms *l, struct work_atoms *r) return 0; } -static struct sort_dimension switch_sort_dimension = { - .name = "switch", - .cmp = switch_cmp, -}; - static int runtime_cmp(struct work_atoms *l, struct work_atoms *r) { if (l->total_runtime < r->total_runtime) @@ -1317,28 +1291,38 @@ static int runtime_cmp(struct work_atoms *l, struct work_atoms *r) return 0; } -static struct sort_dimension runtime_sort_dimension = { - .name = "runtime", - .cmp = runtime_cmp, -}; - -static struct sort_dimension *available_sorts[] = { - &pid_sort_dimension, - &avg_sort_dimension, - &max_sort_dimension, - &switch_sort_dimension, - &runtime_sort_dimension, -}; - -#define NB_AVAILABLE_SORTS (int)(sizeof(available_sorts) / sizeof(struct sort_dimension *)) - -static LIST_HEAD(sort_list); - static int sort_dimension__add(const char *tok, struct list_head *list) { - int i; + size_t i; + static struct sort_dimension avg_sort_dimension = { + .name = "avg", + .cmp = avg_cmp, + }; + static struct sort_dimension max_sort_dimension = { + .name = "max", + .cmp = max_cmp, + }; + static struct sort_dimension pid_sort_dimension = { + .name = "pid", + .cmp = pid_cmp, + }; + static struct sort_dimension runtime_sort_dimension = { + .name = "runtime", + .cmp = runtime_cmp, + }; + static struct sort_dimension switch_sort_dimension = { + .name = "switch", + .cmp = switch_cmp, + }; + struct sort_dimension *available_sorts[] = { + &pid_sort_dimension, + &avg_sort_dimension, + &max_sort_dimension, + &switch_sort_dimension, + &runtime_sort_dimension, + }; - for (i = 0; i < NB_AVAILABLE_SORTS; i++) { + for (i = 0; i < ARRAY_SIZE(available_sorts); i++) { if (!strcmp(available_sorts[i]->name, tok)) { list_add_tail(&available_sorts[i]->list, list); @@ -1349,31 +1333,28 @@ static int sort_dimension__add(const char *tok, struct list_head *list) return -1; } -static void setup_sorting(void); - -static void sort_lat(void) +static void perf_sched__sort_lat(struct perf_sched *sched) { struct rb_node *node; for (;;) { struct work_atoms *data; - node = rb_first(&atom_root); + node = rb_first(&sched->atom_root); if (!node) break; - rb_erase(node, &atom_root); + rb_erase(node, &sched->atom_root); data = rb_entry(node, struct work_atoms, node); - __thread_latency_insert(&sorted_atom_root, data, &sort_list); + __thread_latency_insert(&sched->sorted_atom_root, data, &sched->sort_list); } } -static struct trace_sched_handler *trace_handler; - -static int process_sched_wakeup_event(struct perf_tool *tool __maybe_unused, +static int process_sched_wakeup_event(struct perf_tool *tool, struct event_format *event, struct perf_sample *sample, struct machine *machine) { + struct perf_sched *sched = container_of(tool, struct perf_sched, tool); void *data = sample->raw_data; struct trace_wakeup_event wakeup_event; int err = 0; @@ -1386,27 +1367,15 @@ static int process_sched_wakeup_event(struct perf_tool *tool __maybe_unused, FILL_FIELD(wakeup_event, success, event, data); FILL_FIELD(wakeup_event, cpu, event, data); - if (trace_handler->wakeup_event) - err = trace_handler->wakeup_event(&wakeup_event, machine, event, sample); + if (sched->tp_handler->wakeup_event) + err = sched->tp_handler->wakeup_event(sched, &wakeup_event, machine, event, sample); return err; } -/* - * Track the current task - that way we can know whether there's any - * weird events, such as a task being switched away that is not current. - */ -static int max_cpu; - -static u32 curr_pid[MAX_CPUS] = { [0 ... MAX_CPUS-1] = -1 }; - -static struct thread *curr_thread[MAX_CPUS]; - -static char next_shortname1 = 'A'; -static char next_shortname2 = '0'; - static int -map_switch_event(struct trace_switch_event *switch_event, +map_switch_event(struct perf_sched *sched, + struct trace_switch_event *switch_event, struct machine *machine, struct event_format *event __maybe_unused, struct perf_sample *sample) @@ -1419,11 +1388,11 @@ map_switch_event(struct trace_switch_event *switch_event, BUG_ON(this_cpu >= MAX_CPUS || this_cpu < 0); - if (this_cpu > max_cpu) - max_cpu = this_cpu; + if (this_cpu > sched->max_cpu) + sched->max_cpu = this_cpu; - timestamp0 = cpu_last_switched[this_cpu]; - cpu_last_switched[this_cpu] = timestamp; + timestamp0 = sched->cpu_last_switched[this_cpu]; + sched->cpu_last_switched[this_cpu] = timestamp; if (timestamp0) delta = timestamp - timestamp0; else @@ -1437,37 +1406,37 @@ map_switch_event(struct trace_switch_event *switch_event, sched_out = machine__findnew_thread(machine, switch_event->prev_pid); sched_in = machine__findnew_thread(machine, switch_event->next_pid); - curr_thread[this_cpu] = sched_in; + sched->curr_thread[this_cpu] = sched_in; printf(" "); new_shortname = 0; if (!sched_in->shortname[0]) { - sched_in->shortname[0] = next_shortname1; - sched_in->shortname[1] = next_shortname2; + sched_in->shortname[0] = sched->next_shortname1; + sched_in->shortname[1] = sched->next_shortname2; - if (next_shortname1 < 'Z') { - next_shortname1++; + if (sched->next_shortname1 < 'Z') { + sched->next_shortname1++; } else { - next_shortname1='A'; - if (next_shortname2 < '9') { - next_shortname2++; + sched->next_shortname1='A'; + if (sched->next_shortname2 < '9') { + sched->next_shortname2++; } else { - next_shortname2='0'; + sched->next_shortname2='0'; } } new_shortname = 1; } - for (cpu = 0; cpu <= max_cpu; cpu++) { + for (cpu = 0; cpu <= sched->max_cpu; cpu++) { if (cpu != this_cpu) printf(" "); else printf("*"); - if (curr_thread[cpu]) { - if (curr_thread[cpu]->pid) - printf("%2s ", curr_thread[cpu]->shortname); + if (sched->curr_thread[cpu]) { + if (sched->curr_thread[cpu]->pid) + printf("%2s ", sched->curr_thread[cpu]->shortname); else printf(". "); } else @@ -1485,11 +1454,12 @@ map_switch_event(struct trace_switch_event *switch_event, return 0; } -static int process_sched_switch_event(struct perf_tool *tool __maybe_unused, +static int process_sched_switch_event(struct perf_tool *tool, struct event_format *event, struct perf_sample *sample, struct machine *machine) { + struct perf_sched *sched = container_of(tool, struct perf_sched, tool); int this_cpu = sample->cpu, err = 0; void *data = sample->raw_data; struct trace_switch_event switch_event; @@ -1504,26 +1474,27 @@ static int process_sched_switch_event(struct perf_tool *tool __maybe_unused, FILL_FIELD(switch_event, next_pid, event, data); FILL_FIELD(switch_event, next_prio, event, data); - if (curr_pid[this_cpu] != (u32)-1) { + if (sched->curr_pid[this_cpu] != (u32)-1) { /* * Are we trying to switch away a PID that is * not current? */ - if (curr_pid[this_cpu] != switch_event.prev_pid) - nr_context_switch_bugs++; + if (sched->curr_pid[this_cpu] != switch_event.prev_pid) + sched->nr_context_switch_bugs++; } - if (trace_handler->switch_event) - err = trace_handler->switch_event(&switch_event, machine, event, sample); + if (sched->tp_handler->switch_event) + err = sched->tp_handler->switch_event(sched, &switch_event, machine, event, sample); - curr_pid[this_cpu] = switch_event.next_pid; + sched->curr_pid[this_cpu] = switch_event.next_pid; return err; } -static int process_sched_runtime_event(struct perf_tool *tool __maybe_unused, +static int process_sched_runtime_event(struct perf_tool *tool, struct event_format *event, struct perf_sample *sample, struct machine *machine) { + struct perf_sched *sched = container_of(tool, struct perf_sched, tool); void *data = sample->raw_data; struct trace_runtime_event runtime_event; int err = 0; @@ -1533,17 +1504,18 @@ static int process_sched_runtime_event(struct perf_tool *tool __maybe_unused, FILL_FIELD(runtime_event, runtime, event, data); FILL_FIELD(runtime_event, vruntime, event, data); - if (trace_handler->runtime_event) - err = trace_handler->runtime_event(&runtime_event, machine, sample); + if (sched->tp_handler->runtime_event) + err = sched->tp_handler->runtime_event(sched, &runtime_event, machine, sample); return err; } -static int process_sched_fork_event(struct perf_tool *tool __maybe_unused, +static int process_sched_fork_event(struct perf_tool *tool, struct event_format *event, struct perf_sample *sample, struct machine *machine __maybe_unused) { + struct perf_sched *sched = container_of(tool, struct perf_sched, tool); void *data = sample->raw_data; struct trace_fork_event fork_event; int err = 0; @@ -1555,8 +1527,8 @@ static int process_sched_fork_event(struct perf_tool *tool __maybe_unused, FILL_ARRAY(fork_event, child_comm, event, data); FILL_FIELD(fork_event, child_pid, event, data); - if (trace_handler->fork_event) - err = trace_handler->fork_event(&fork_event, event); + if (sched->tp_handler->fork_event) + err = sched->tp_handler->fork_event(sched, &fork_event, event); return err; } @@ -1572,11 +1544,12 @@ static int process_sched_exit_event(struct perf_tool *tool __maybe_unused, return 0; } -static int process_sched_migrate_task_event(struct perf_tool *tool __maybe_unused, +static int process_sched_migrate_task_event(struct perf_tool *tool, struct event_format *event, struct perf_sample *sample, struct machine *machine) { + struct perf_sched *sched = container_of(tool, struct perf_sched, tool); void *data = sample->raw_data; struct trace_migrate_task_event migrate_task_event; int err = 0; @@ -1588,8 +1561,8 @@ static int process_sched_migrate_task_event(struct perf_tool *tool __maybe_unuse FILL_FIELD(migrate_task_event, prio, event, data); FILL_FIELD(migrate_task_event, cpu, event, data); - if (trace_handler->migrate_task_event) - err = trace_handler->migrate_task_event(&migrate_task_event, machine, sample); + if (sched->tp_handler->migrate_task_event) + err = sched->tp_handler->migrate_task_event(sched, &migrate_task_event, machine, sample); return err; } @@ -1625,15 +1598,8 @@ static int perf_sched__process_tracepoint_sample(struct perf_tool *tool __maybe_ return err; } -static struct perf_tool perf_sched = { - .sample = perf_sched__process_tracepoint_sample, - .comm = perf_event__process_comm, - .lost = perf_event__process_lost, - .fork = perf_event__process_task, - .ordered_samples = true, -}; - -static int read_events(bool destroy, struct perf_session **psession) +static int perf_sched__read_events(struct perf_sched *sched, bool destroy, + struct perf_session **psession) { const struct perf_evsel_str_handler handlers[] = { { "sched:sched_switch", process_sched_switch_event, }, @@ -1646,7 +1612,7 @@ static int read_events(bool destroy, struct perf_session **psession) }; struct perf_session *session; - session = perf_session__new(input_name, O_RDONLY, 0, false, &perf_sched); + session = perf_session__new(sched->input_name, O_RDONLY, 0, false, &sched->tool); if (session == NULL) { pr_debug("No Memory for session\n"); return -1; @@ -1656,15 +1622,15 @@ static int read_events(bool destroy, struct perf_session **psession) goto out_delete; if (perf_session__has_traces(session, "record -R")) { - int err = perf_session__process_events(session, &perf_sched); + int err = perf_session__process_events(session, &sched->tool); if (err) { pr_err("Failed to process events, error %d", err); goto out_delete; } - nr_events = session->hists.stats.nr_events[0]; - nr_lost_events = session->hists.stats.total_lost; - nr_lost_chunks = session->hists.stats.nr_events[PERF_RECORD_LOST]; + sched->nr_events = session->hists.stats.nr_events[0]; + sched->nr_lost_events = session->hists.stats.total_lost; + sched->nr_lost_chunks = session->hists.stats.nr_events[PERF_RECORD_LOST]; } if (destroy) @@ -1680,213 +1646,158 @@ out_delete: return -1; } -static void print_bad_events(void) +static void print_bad_events(struct perf_sched *sched) { - if (nr_unordered_timestamps && nr_timestamps) { + if (sched->nr_unordered_timestamps && sched->nr_timestamps) { printf(" INFO: %.3f%% unordered timestamps (%ld out of %ld)\n", - (double)nr_unordered_timestamps/(double)nr_timestamps*100.0, - nr_unordered_timestamps, nr_timestamps); + (double)sched->nr_unordered_timestamps/(double)sched->nr_timestamps*100.0, + sched->nr_unordered_timestamps, sched->nr_timestamps); } - if (nr_lost_events && nr_events) { + if (sched->nr_lost_events && sched->nr_events) { printf(" INFO: %.3f%% lost events (%ld out of %ld, in %ld chunks)\n", - (double)nr_lost_events/(double)nr_events*100.0, - nr_lost_events, nr_events, nr_lost_chunks); + (double)sched->nr_lost_events/(double)sched->nr_events * 100.0, + sched->nr_lost_events, sched->nr_events, sched->nr_lost_chunks); } - if (nr_state_machine_bugs && nr_timestamps) { + if (sched->nr_state_machine_bugs && sched->nr_timestamps) { printf(" INFO: %.3f%% state machine bugs (%ld out of %ld)", - (double)nr_state_machine_bugs/(double)nr_timestamps*100.0, - nr_state_machine_bugs, nr_timestamps); - if (nr_lost_events) + (double)sched->nr_state_machine_bugs/(double)sched->nr_timestamps*100.0, + sched->nr_state_machine_bugs, sched->nr_timestamps); + if (sched->nr_lost_events) printf(" (due to lost events?)"); printf("\n"); } - if (nr_context_switch_bugs && nr_timestamps) { + if (sched->nr_context_switch_bugs && sched->nr_timestamps) { printf(" INFO: %.3f%% context switch bugs (%ld out of %ld)", - (double)nr_context_switch_bugs/(double)nr_timestamps*100.0, - nr_context_switch_bugs, nr_timestamps); - if (nr_lost_events) + (double)sched->nr_context_switch_bugs/(double)sched->nr_timestamps*100.0, + sched->nr_context_switch_bugs, sched->nr_timestamps); + if (sched->nr_lost_events) printf(" (due to lost events?)"); printf("\n"); } } -static int __cmd_lat(void) +static int perf_sched__lat(struct perf_sched *sched) { struct rb_node *next; struct perf_session *session; setup_pager(); - if (read_events(false, &session)) + if (perf_sched__read_events(sched, false, &session)) return -1; - sort_lat(); + perf_sched__sort_lat(sched); printf("\n ---------------------------------------------------------------------------------------------------------------\n"); printf(" Task | Runtime ms | Switches | Average delay ms | Maximum delay ms | Maximum delay at |\n"); printf(" ---------------------------------------------------------------------------------------------------------------\n"); - next = rb_first(&sorted_atom_root); + next = rb_first(&sched->sorted_atom_root); while (next) { struct work_atoms *work_list; work_list = rb_entry(next, struct work_atoms, node); - output_lat_thread(work_list); + output_lat_thread(sched, work_list); next = rb_next(next); } printf(" -----------------------------------------------------------------------------------------\n"); printf(" TOTAL: |%11.3f ms |%9" PRIu64 " |\n", - (double)all_runtime/1e6, all_count); + (double)sched->all_runtime / 1e6, sched->all_count); printf(" ---------------------------------------------------\n"); - print_bad_events(); + print_bad_events(sched); printf("\n"); perf_session__delete(session); return 0; } -static struct trace_sched_handler map_ops = { - .wakeup_event = NULL, - .switch_event = map_switch_event, - .runtime_event = NULL, - .fork_event = NULL, -}; - -static int __cmd_map(void) +static int perf_sched__map(struct perf_sched *sched) { - max_cpu = sysconf(_SC_NPROCESSORS_CONF); + sched->max_cpu = sysconf(_SC_NPROCESSORS_CONF); setup_pager(); - if (read_events(true, NULL)) + if (perf_sched__read_events(sched, true, NULL)) return -1; - print_bad_events(); + print_bad_events(sched); return 0; } -static int __cmd_replay(void) +static int perf_sched__replay(struct perf_sched *sched) { unsigned long i; - calibrate_run_measurement_overhead(); - calibrate_sleep_measurement_overhead(); + calibrate_run_measurement_overhead(sched); + calibrate_sleep_measurement_overhead(sched); - test_calibrations(); + test_calibrations(sched); - if (read_events(true, NULL)) + if (perf_sched__read_events(sched, true, NULL)) return -1; - printf("nr_run_events: %ld\n", nr_run_events); - printf("nr_sleep_events: %ld\n", nr_sleep_events); - printf("nr_wakeup_events: %ld\n", nr_wakeup_events); + printf("nr_run_events: %ld\n", sched->nr_run_events); + printf("nr_sleep_events: %ld\n", sched->nr_sleep_events); + printf("nr_wakeup_events: %ld\n", sched->nr_wakeup_events); - if (targetless_wakeups) - printf("target-less wakeups: %ld\n", targetless_wakeups); - if (multitarget_wakeups) - printf("multi-target wakeups: %ld\n", multitarget_wakeups); - if (nr_run_events_optimized) + if (sched->targetless_wakeups) + printf("target-less wakeups: %ld\n", sched->targetless_wakeups); + if (sched->multitarget_wakeups) + printf("multi-target wakeups: %ld\n", sched->multitarget_wakeups); + if (sched->nr_run_events_optimized) printf("run atoms optimized: %ld\n", - nr_run_events_optimized); + sched->nr_run_events_optimized); - print_task_traces(); - add_cross_task_wakeups(); + print_task_traces(sched); + add_cross_task_wakeups(sched); - create_tasks(); + create_tasks(sched); printf("------------------------------------------------------------\n"); - for (i = 0; i < replay_repeat; i++) - run_one_test(); + for (i = 0; i < sched->replay_repeat; i++) + run_one_test(sched); return 0; } - -static const char * const sched_usage[] = { - "perf sched [] {record|latency|map|replay|script}", - NULL -}; - -static const struct option sched_options[] = { - OPT_STRING('i', "input", &input_name, "file", - "input file name"), - OPT_INCR('v', "verbose", &verbose, - "be more verbose (show symbol address, etc)"), - OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, - "dump raw trace in ASCII"), - OPT_END() -}; - -static const char * const latency_usage[] = { - "perf sched latency []", - NULL -}; - -static const struct option latency_options[] = { - OPT_STRING('s', "sort", &sort_order, "key[,key2...]", - "sort by key(s): runtime, switch, avg, max"), - OPT_INCR('v', "verbose", &verbose, - "be more verbose (show symbol address, etc)"), - OPT_INTEGER('C', "CPU", &profile_cpu, - "CPU to profile on"), - OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, - "dump raw trace in ASCII"), - OPT_END() -}; - -static const char * const replay_usage[] = { - "perf sched replay []", - NULL -}; - -static const struct option replay_options[] = { - OPT_UINTEGER('r', "repeat", &replay_repeat, - "repeat the workload replay N times (-1: infinite)"), - OPT_INCR('v', "verbose", &verbose, - "be more verbose (show symbol address, etc)"), - OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, - "dump raw trace in ASCII"), - OPT_END() -}; - -static void setup_sorting(void) +static void setup_sorting(struct perf_sched *sched, const struct option *options, + const char * const usage_msg[]) { - char *tmp, *tok, *str = strdup(sort_order); + char *tmp, *tok, *str = strdup(sched->sort_order); for (tok = strtok_r(str, ", ", &tmp); tok; tok = strtok_r(NULL, ", ", &tmp)) { - if (sort_dimension__add(tok, &sort_list) < 0) { + if (sort_dimension__add(tok, &sched->sort_list) < 0) { error("Unknown --sort key: `%s'", tok); - usage_with_options(latency_usage, latency_options); + usage_with_options(usage_msg, options); } } free(str); - sort_dimension__add("pid", &cmp_pid); + sort_dimension__add("pid", &sched->cmp_pid); } -static const char *record_args[] = { - "record", - "-a", - "-R", - "-f", - "-m", "1024", - "-c", "1", - "-e", "sched:sched_switch", - "-e", "sched:sched_stat_wait", - "-e", "sched:sched_stat_sleep", - "-e", "sched:sched_stat_iowait", - "-e", "sched:sched_stat_runtime", - "-e", "sched:sched_process_exit", - "-e", "sched:sched_process_fork", - "-e", "sched:sched_wakeup", - "-e", "sched:sched_migrate_task", -}; - static int __cmd_record(int argc, const char **argv) { unsigned int rec_argc, i, j; const char **rec_argv; + const char * const record_args[] = { + "record", + "-a", + "-R", + "-f", + "-m", "1024", + "-c", "1", + "-e", "sched:sched_switch", + "-e", "sched:sched_stat_wait", + "-e", "sched:sched_stat_sleep", + "-e", "sched:sched_stat_iowait", + "-e", "sched:sched_stat_runtime", + "-e", "sched:sched_process_exit", + "-e", "sched:sched_process_fork", + "-e", "sched:sched_wakeup", + "-e", "sched:sched_migrate_task", + }; rec_argc = ARRAY_SIZE(record_args) + argc - 1; rec_argv = calloc(rec_argc + 1, sizeof(char *)); @@ -1907,6 +1818,83 @@ static int __cmd_record(int argc, const char **argv) int cmd_sched(int argc, const char **argv, const char *prefix __maybe_unused) { + const char default_sort_order[] = "avg, max, switch, runtime"; + struct perf_sched sched = { + .tool = { + .sample = perf_sched__process_tracepoint_sample, + .comm = perf_event__process_comm, + .lost = perf_event__process_lost, + .fork = perf_event__process_task, + .ordered_samples = true, + }, + .cmp_pid = LIST_HEAD_INIT(sched.cmp_pid), + .sort_list = LIST_HEAD_INIT(sched.sort_list), + .start_work_mutex = PTHREAD_MUTEX_INITIALIZER, + .work_done_wait_mutex = PTHREAD_MUTEX_INITIALIZER, + .curr_pid = { [0 ... MAX_CPUS - 1] = -1 }, + .sort_order = default_sort_order, + .replay_repeat = 10, + .profile_cpu = -1, + .next_shortname1 = 'A', + .next_shortname2 = '0', + }; + const struct option latency_options[] = { + OPT_STRING('s', "sort", &sched.sort_order, "key[,key2...]", + "sort by key(s): runtime, switch, avg, max"), + OPT_INCR('v', "verbose", &verbose, + "be more verbose (show symbol address, etc)"), + OPT_INTEGER('C', "CPU", &sched.profile_cpu, + "CPU to profile on"), + OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, + "dump raw trace in ASCII"), + OPT_END() + }; + const struct option replay_options[] = { + OPT_UINTEGER('r', "repeat", &sched.replay_repeat, + "repeat the workload replay N times (-1: infinite)"), + OPT_INCR('v', "verbose", &verbose, + "be more verbose (show symbol address, etc)"), + OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, + "dump raw trace in ASCII"), + OPT_END() + }; + const struct option sched_options[] = { + OPT_STRING('i', "input", &sched.input_name, "file", + "input file name"), + OPT_INCR('v', "verbose", &verbose, + "be more verbose (show symbol address, etc)"), + OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, + "dump raw trace in ASCII"), + OPT_END() + }; + const char * const latency_usage[] = { + "perf sched latency []", + NULL + }; + const char * const replay_usage[] = { + "perf sched replay []", + NULL + }; + const char * const sched_usage[] = { + "perf sched [] {record|latency|map|replay|script}", + NULL + }; + struct trace_sched_handler lat_ops = { + .wakeup_event = latency_wakeup_event, + .switch_event = latency_switch_event, + .runtime_event = latency_runtime_event, + .fork_event = latency_fork_event, + .migrate_task_event = latency_migrate_task_event, + }; + struct trace_sched_handler map_ops = { + .switch_event = map_switch_event, + }; + struct trace_sched_handler replay_ops = { + .wakeup_event = replay_wakeup_event, + .switch_event = replay_switch_event, + .fork_event = replay_fork_event, + }; + argc = parse_options(argc, argv, sched_options, sched_usage, PARSE_OPT_STOP_AT_NON_OPTION); if (!argc) @@ -1922,26 +1910,26 @@ int cmd_sched(int argc, const char **argv, const char *prefix __maybe_unused) if (!strncmp(argv[0], "rec", 3)) { return __cmd_record(argc, argv); } else if (!strncmp(argv[0], "lat", 3)) { - trace_handler = &lat_ops; + sched.tp_handler = &lat_ops; if (argc > 1) { argc = parse_options(argc, argv, latency_options, latency_usage, 0); if (argc) usage_with_options(latency_usage, latency_options); } - setup_sorting(); - return __cmd_lat(); + setup_sorting(&sched, latency_options, latency_usage); + return perf_sched__lat(&sched); } else if (!strcmp(argv[0], "map")) { - trace_handler = &map_ops; - setup_sorting(); - return __cmd_map(); + sched.tp_handler = &map_ops; + setup_sorting(&sched, latency_options, latency_usage); + return perf_sched__map(&sched); } else if (!strncmp(argv[0], "rep", 3)) { - trace_handler = &replay_ops; + sched.tp_handler = &replay_ops; if (argc) { argc = parse_options(argc, argv, replay_options, replay_usage, 0); if (argc) usage_with_options(replay_usage, replay_options); } - return __cmd_replay(); + return perf_sched__replay(&sched); } else { usage_with_options(sched_usage, sched_options); } From 5555ded44698ed82ffa3d8742ec2994f695127bc Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 11 Sep 2012 19:24:23 -0300 Subject: [PATCH 192/282] perf evsel: Introduce perf_evsel__{str,int}val methods Wrappers to the libtraceevent routines, so that we can further reduce the surface contact perf builtins have with it. Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-rtmgzptvrifzjxqwb9vs6g1b@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evsel.c | 35 +++++++++++++++++++++++++++++++++++ tools/perf/util/evsel.h | 7 +++++++ 2 files changed, 42 insertions(+) diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 06f76441547..1506ba0453f 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -10,6 +10,7 @@ #include #include #include "asm/bug.h" +#include "event-parse.h" #include "evsel.h" #include "evlist.h" #include "util.h" @@ -1000,3 +1001,37 @@ int perf_event__synthesize_sample(union perf_event *event, u64 type, return 0; } + +char *perf_evsel__strval(struct perf_evsel *evsel, struct perf_sample *sample, + const char *name) +{ + struct format_field *field = pevent_find_field(evsel->tp_format, name); + int offset; + + if (!field) + return NULL; + + offset = field->offset; + + if (field->flags & FIELD_IS_DYNAMIC) { + offset = *(int *)(sample->raw_data + field->offset); + offset &= 0xffff; + } + + return sample->raw_data + offset; +} + +u64 perf_evsel__intval(struct perf_evsel *evsel, struct perf_sample *sample, + const char *name) +{ + struct format_field *field = pevent_find_field(evsel->tp_format, name); + u64 val; + + if (!field) + return 0; + + val = pevent_read_number(evsel->tp_format->pevent, + sample->raw_data + field->offset, field->size); + return val; + +} diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 390690eb878..dc40fe32210 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -120,6 +120,13 @@ int perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus, struct thread_map *threads); void perf_evsel__close(struct perf_evsel *evsel, int ncpus, int nthreads); +struct perf_sample; + +char *perf_evsel__strval(struct perf_evsel *evsel, struct perf_sample *sample, + const char *name); +u64 perf_evsel__intval(struct perf_evsel *evsel, struct perf_sample *sample, + const char *name); + #define perf_evsel__match(evsel, t, c) \ (evsel->attr.type == PERF_TYPE_##t && \ evsel->attr.config == PERF_COUNT_##c) From 2b7fcbc5a9c719a306af1c4986a9f5c2cbfcec65 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 11 Sep 2012 19:29:17 -0300 Subject: [PATCH 193/282] perf sched: Use perf_evsel__{int,str}val This patch also stops reading the common fields, as they were not being used except for one ->common_pid case that was replaced by sample->tid, i.e. the info is already in the perf_sample struct. Also it only fills the _event structures when there is a handler. [root@sandy ~]# perf sched record sleep 30s [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 8.585 MB perf.data (~375063 samples) ] Before: [root@sandy ~]# perf stat -r 10 perf sched lat > /dev/null Performance counter stats for 'perf sched lat' (10 runs): 129.117838 task-clock # 0.994 CPUs utilized ( +- 0.28% ) 14 context-switches # 0.111 K/sec ( +- 2.10% ) 0 cpu-migrations # 0.002 K/sec ( +- 66.67% ) 7,654 page-faults # 0.059 M/sec ( +- 0.67% ) 438,121,661 cycles # 3.393 GHz ( +- 0.06% ) [83.06%] 150,808,605 stalled-cycles-frontend # 34.42% frontend cycles idle ( +- 0.14% ) [83.10%] 80,748,941 stalled-cycles-backend # 18.43% backend cycles idle ( +- 0.64% ) [66.73%] 758,605,879 instructions # 1.73 insns per cycle # 0.20 stalled cycles per insn ( +- 0.08% ) [83.54%] 162,164,321 branches # 1255.940 M/sec ( +- 0.10% ) [83.70%] 1,609,903 branch-misses # 0.99% of all branches ( +- 0.08% ) [83.62%] 0.129949153 seconds time elapsed ( +- 0.28% ) After: [root@sandy ~]# perf stat -r 10 perf sched lat > /dev/null Performance counter stats for 'perf sched lat' (10 runs): 103.592215 task-clock # 0.993 CPUs utilized ( +- 0.33% ) 12 context-switches # 0.114 K/sec ( +- 3.29% ) 0 cpu-migrations # 0.000 K/sec 7,605 page-faults # 0.073 M/sec ( +- 0.00% ) 345,796,112 cycles # 3.338 GHz ( +- 0.07% ) [82.90%] 106,876,796 stalled-cycles-frontend # 30.91% frontend cycles idle ( +- 0.38% ) [83.23%] 62,060,877 stalled-cycles-backend # 17.95% backend cycles idle ( +- 0.80% ) [67.14%] 628,246,586 instructions # 1.82 insns per cycle # 0.17 stalled cycles per insn ( +- 0.04% ) [83.64%] 134,962,057 branches # 1302.820 M/sec ( +- 0.10% ) [83.64%] 1,233,037 branch-misses # 0.91% of all branches ( +- 0.29% ) [83.41%] 0.104333272 seconds time elapsed ( +- 0.33% ) [root@sandy ~]# Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-weu9t63zkrfrazkn0gxj48xy@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-sched.c | 279 ++++++++++++++----------------------- 1 file changed, 105 insertions(+), 174 deletions(-) diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index 79f88fa3f7a..0df5e7a08c6 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -98,82 +98,40 @@ struct work_atoms { typedef int (*sort_fn_t)(struct work_atoms *, struct work_atoms *); struct trace_switch_event { - u32 size; - - u16 common_type; - u8 common_flags; - u8 common_preempt_count; - u32 common_pid; - u32 common_tgid; - - char prev_comm[16]; + char *prev_comm; u32 prev_pid; u32 prev_prio; u64 prev_state; - char next_comm[16]; + char *next_comm; u32 next_pid; u32 next_prio; }; struct trace_runtime_event { - u32 size; - - u16 common_type; - u8 common_flags; - u8 common_preempt_count; - u32 common_pid; - u32 common_tgid; - - char comm[16]; + char *comm; u32 pid; u64 runtime; u64 vruntime; }; struct trace_wakeup_event { - u32 size; - - u16 common_type; - u8 common_flags; - u8 common_preempt_count; - u32 common_pid; - u32 common_tgid; - - char comm[16]; + char *comm; u32 pid; - u32 prio; u32 success; u32 cpu; }; struct trace_fork_event { - u32 size; - - u16 common_type; - u8 common_flags; - u8 common_preempt_count; - u32 common_pid; - u32 common_tgid; - - char parent_comm[16]; + char *parent_comm; u32 parent_pid; - char child_comm[16]; + char *child_comm; u32 child_pid; }; struct trace_migrate_task_event { - u32 size; - - u16 common_type; - u8 common_flags; - u8 common_preempt_count; - u32 common_pid; - u32 common_tgid; - - char comm[16]; + char *comm; u32 pid; - u32 prio; u32 cpu; }; @@ -184,7 +142,7 @@ struct trace_sched_handler { int (*switch_event)(struct perf_sched *sched, struct trace_switch_event *event, struct machine *machine, - struct event_format *tp_format, + struct perf_evsel *evsel, struct perf_sample *sample); int (*runtime_event)(struct perf_sched *sched, @@ -195,12 +153,12 @@ struct trace_sched_handler { int (*wakeup_event)(struct perf_sched *sched, struct trace_wakeup_event *event, struct machine *machine, - struct event_format *tp_format, + struct perf_evsel *evsel, struct perf_sample *sample); int (*fork_event)(struct perf_sched *sched, struct trace_fork_event *event, - struct event_format *tp_format); + struct perf_evsel *evsel); int (*migrate_task_event)(struct perf_sched *sched, struct trace_migrate_task_event *event, @@ -740,40 +698,22 @@ static void test_calibrations(struct perf_sched *sched) printf("the sleep test took %" PRIu64 " nsecs\n", T1 - T0); } -#define FILL_FIELD(ptr, field, event, data) \ - ptr.field = (typeof(ptr.field)) raw_field_value(event, #field, data) - -#define FILL_ARRAY(ptr, array, event, data) \ -do { \ - void *__array = raw_field_ptr(event, #array, data); \ - memcpy(ptr.array, __array, sizeof(ptr.array)); \ -} while(0) - -#define FILL_COMMON_FIELDS(ptr, event, data) \ -do { \ - FILL_FIELD(ptr, common_type, event, data); \ - FILL_FIELD(ptr, common_flags, event, data); \ - FILL_FIELD(ptr, common_preempt_count, event, data); \ - FILL_FIELD(ptr, common_pid, event, data); \ - FILL_FIELD(ptr, common_tgid, event, data); \ -} while (0) - static int replay_wakeup_event(struct perf_sched *sched, struct trace_wakeup_event *wakeup_event, struct machine *machine __maybe_unused, - struct event_format *event, struct perf_sample *sample) + struct perf_evsel *evsel, struct perf_sample *sample) { struct task_desc *waker, *wakee; if (verbose) { - printf("sched_wakeup event %p\n", event); + printf("sched_wakeup event %p\n", evsel); printf(" ... pid %d woke up %s/%d\n", - wakeup_event->common_pid, wakeup_event->comm, wakeup_event->pid); + sample->tid, wakeup_event->comm, wakeup_event->pid); } - waker = register_pid(sched, wakeup_event->common_pid, ""); + waker = register_pid(sched, sample->tid, ""); wakee = register_pid(sched, wakeup_event->pid, wakeup_event->comm); add_sched_event_wakeup(sched, waker, sample->time, wakee); @@ -784,7 +724,7 @@ static int replay_switch_event(struct perf_sched *sched, struct trace_switch_event *switch_event, struct machine *machine __maybe_unused, - struct event_format *event, + struct perf_evsel *evsel, struct perf_sample *sample) { struct task_desc *prev, __maybe_unused *next; @@ -793,7 +733,7 @@ replay_switch_event(struct perf_sched *sched, s64 delta; if (verbose) - printf("sched_switch event %p\n", event); + printf("sched_switch event %p\n", evsel); if (cpu >= MAX_CPUS || cpu < 0) return 0; @@ -829,10 +769,10 @@ replay_switch_event(struct perf_sched *sched, static int replay_fork_event(struct perf_sched *sched, struct trace_fork_event *fork_event, - struct event_format *event) + struct perf_evsel *evsel) { if (verbose) { - printf("sched_fork event %p\n", event); + printf("sched_fork event %p\n", evsel); printf("... parent: %s/%d\n", fork_event->parent_comm, fork_event->parent_pid); printf("... child: %s/%d\n", fork_event->child_comm, fork_event->child_pid); } @@ -931,7 +871,7 @@ static int thread_atoms_insert(struct perf_sched *sched, struct thread *thread) static int latency_fork_event(struct perf_sched *sched __maybe_unused, struct trace_fork_event *fork_event __maybe_unused, - struct event_format *event __maybe_unused) + struct perf_evsel *evsel __maybe_unused) { /* should insert the newcomer */ return 0; @@ -1015,7 +955,7 @@ static int latency_switch_event(struct perf_sched *sched, struct trace_switch_event *switch_event, struct machine *machine, - struct event_format *event __maybe_unused, + struct perf_evsel *evsel __maybe_unused, struct perf_sample *sample) { struct work_atoms *out_events, *in_events; @@ -1106,7 +1046,7 @@ static int latency_wakeup_event(struct perf_sched *sched, struct trace_wakeup_event *wakeup_event, struct machine *machine, - struct event_format *event __maybe_unused, + struct perf_evsel *evsel __maybe_unused, struct perf_sample *sample) { struct work_atoms *atoms; @@ -1350,34 +1290,32 @@ static void perf_sched__sort_lat(struct perf_sched *sched) } static int process_sched_wakeup_event(struct perf_tool *tool, - struct event_format *event, + struct perf_evsel *evsel, struct perf_sample *sample, struct machine *machine) { struct perf_sched *sched = container_of(tool, struct perf_sched, tool); - void *data = sample->raw_data; - struct trace_wakeup_event wakeup_event; - int err = 0; - FILL_COMMON_FIELDS(wakeup_event, event, data); + if (sched->tp_handler->wakeup_event) { + struct trace_wakeup_event event = { + .comm = perf_evsel__strval(evsel, sample, "comm"), + .pid = perf_evsel__intval(evsel, sample, "pid"), + .prio = perf_evsel__intval(evsel, sample, "prio"), + .success = perf_evsel__intval(evsel, sample, "success"), + .cpu = perf_evsel__intval(evsel, sample, "cpu"), + }; - FILL_ARRAY(wakeup_event, comm, event, data); - FILL_FIELD(wakeup_event, pid, event, data); - FILL_FIELD(wakeup_event, prio, event, data); - FILL_FIELD(wakeup_event, success, event, data); - FILL_FIELD(wakeup_event, cpu, event, data); + return sched->tp_handler->wakeup_event(sched, &event, machine, evsel, sample); + } - if (sched->tp_handler->wakeup_event) - err = sched->tp_handler->wakeup_event(sched, &wakeup_event, machine, event, sample); - - return err; + return 0; } static int map_switch_event(struct perf_sched *sched, struct trace_switch_event *switch_event, struct machine *machine, - struct event_format *event __maybe_unused, + struct perf_evsel *evsel __maybe_unused, struct perf_sample *sample) { struct thread *sched_out __maybe_unused, *sched_in; @@ -1455,120 +1393,113 @@ map_switch_event(struct perf_sched *sched, } static int process_sched_switch_event(struct perf_tool *tool, - struct event_format *event, + struct perf_evsel *evsel, struct perf_sample *sample, struct machine *machine) { struct perf_sched *sched = container_of(tool, struct perf_sched, tool); int this_cpu = sample->cpu, err = 0; - void *data = sample->raw_data; - struct trace_switch_event switch_event; - - FILL_COMMON_FIELDS(switch_event, event, data); - - FILL_ARRAY(switch_event, prev_comm, event, data); - FILL_FIELD(switch_event, prev_pid, event, data); - FILL_FIELD(switch_event, prev_prio, event, data); - FILL_FIELD(switch_event, prev_state, event, data); - FILL_ARRAY(switch_event, next_comm, event, data); - FILL_FIELD(switch_event, next_pid, event, data); - FILL_FIELD(switch_event, next_prio, event, data); + u32 prev_pid = perf_evsel__intval(evsel, sample, "prev_pid"), + next_pid = perf_evsel__intval(evsel, sample, "next_pid"); if (sched->curr_pid[this_cpu] != (u32)-1) { /* * Are we trying to switch away a PID that is * not current? */ - if (sched->curr_pid[this_cpu] != switch_event.prev_pid) + if (sched->curr_pid[this_cpu] != prev_pid) sched->nr_context_switch_bugs++; } - if (sched->tp_handler->switch_event) - err = sched->tp_handler->switch_event(sched, &switch_event, machine, event, sample); - sched->curr_pid[this_cpu] = switch_event.next_pid; + if (sched->tp_handler->switch_event) { + struct trace_switch_event event = { + .prev_comm = perf_evsel__strval(evsel, sample, "prev_comm"), + .prev_pid = prev_pid, + .prev_prio = perf_evsel__intval(evsel, sample, "prev_prio"), + .prev_state = perf_evsel__intval(evsel, sample, "prev_state"), + .next_comm = perf_evsel__strval(evsel, sample, "next_comm"), + .next_pid = next_pid, + .next_prio = perf_evsel__intval(evsel, sample, "next_prio"), + }; + + err = sched->tp_handler->switch_event(sched, &event, machine, evsel, sample); + } + + sched->curr_pid[this_cpu] = next_pid; return err; } static int process_sched_runtime_event(struct perf_tool *tool, - struct event_format *event, + struct perf_evsel *evsel, struct perf_sample *sample, struct machine *machine) { struct perf_sched *sched = container_of(tool, struct perf_sched, tool); - void *data = sample->raw_data; - struct trace_runtime_event runtime_event; - int err = 0; - FILL_ARRAY(runtime_event, comm, event, data); - FILL_FIELD(runtime_event, pid, event, data); - FILL_FIELD(runtime_event, runtime, event, data); - FILL_FIELD(runtime_event, vruntime, event, data); - - if (sched->tp_handler->runtime_event) - err = sched->tp_handler->runtime_event(sched, &runtime_event, machine, sample); - - return err; -} - -static int process_sched_fork_event(struct perf_tool *tool, - struct event_format *event, - struct perf_sample *sample, - struct machine *machine __maybe_unused) -{ - struct perf_sched *sched = container_of(tool, struct perf_sched, tool); - void *data = sample->raw_data; - struct trace_fork_event fork_event; - int err = 0; - - FILL_COMMON_FIELDS(fork_event, event, data); - - FILL_ARRAY(fork_event, parent_comm, event, data); - FILL_FIELD(fork_event, parent_pid, event, data); - FILL_ARRAY(fork_event, child_comm, event, data); - FILL_FIELD(fork_event, child_pid, event, data); - - if (sched->tp_handler->fork_event) - err = sched->tp_handler->fork_event(sched, &fork_event, event); - - return err; -} - -static int process_sched_exit_event(struct perf_tool *tool __maybe_unused, - struct event_format *event, - struct perf_sample *sample __maybe_unused, - struct machine *machine __maybe_unused) -{ - if (verbose) - printf("sched_exit event %p\n", event); + if (sched->tp_handler->runtime_event) { + struct trace_runtime_event event = { + .comm = perf_evsel__strval(evsel, sample, "comm"), + .pid = perf_evsel__intval(evsel, sample, "pid"), + .runtime = perf_evsel__intval(evsel, sample, "runtime"), + .vruntime = perf_evsel__intval(evsel, sample, "vruntime"), + }; + return sched->tp_handler->runtime_event(sched, &event, machine, sample); + } return 0; } +static int process_sched_fork_event(struct perf_tool *tool, + struct perf_evsel *evsel, + struct perf_sample *sample, + struct machine *machine __maybe_unused) +{ + struct perf_sched *sched = container_of(tool, struct perf_sched, tool); + + if (sched->tp_handler->fork_event) { + struct trace_fork_event event = { + .parent_comm = perf_evsel__strval(evsel, sample, "parent_comm"), + .child_comm = perf_evsel__strval(evsel, sample, "child_comm"), + .parent_pid = perf_evsel__intval(evsel, sample, "parent_pid"), + .child_pid = perf_evsel__intval(evsel, sample, "child_pid"), + }; + return sched->tp_handler->fork_event(sched, &event, evsel); + } + + return 0; +} + +static int process_sched_exit_event(struct perf_tool *tool __maybe_unused, + struct perf_evsel *evsel, + struct perf_sample *sample __maybe_unused, + struct machine *machine __maybe_unused) +{ + pr_debug("sched_exit event %p\n", evsel); + return 0; +} + static int process_sched_migrate_task_event(struct perf_tool *tool, - struct event_format *event, + struct perf_evsel *evsel, struct perf_sample *sample, struct machine *machine) { struct perf_sched *sched = container_of(tool, struct perf_sched, tool); - void *data = sample->raw_data; - struct trace_migrate_task_event migrate_task_event; - int err = 0; - FILL_COMMON_FIELDS(migrate_task_event, event, data); + if (sched->tp_handler->migrate_task_event) { + struct trace_migrate_task_event event = { + .comm = perf_evsel__strval(evsel, sample, "comm"), + .pid = perf_evsel__intval(evsel, sample, "pid"), + .prio = perf_evsel__intval(evsel, sample, "prio"), + .cpu = perf_evsel__intval(evsel, sample, "cpu"), + }; + return sched->tp_handler->migrate_task_event(sched, &event, machine, sample); + } - FILL_ARRAY(migrate_task_event, comm, event, data); - FILL_FIELD(migrate_task_event, pid, event, data); - FILL_FIELD(migrate_task_event, prio, event, data); - FILL_FIELD(migrate_task_event, cpu, event, data); - - if (sched->tp_handler->migrate_task_event) - err = sched->tp_handler->migrate_task_event(sched, &migrate_task_event, machine, sample); - - return err; + return 0; } typedef int (*tracepoint_handler)(struct perf_tool *tool, - struct event_format *tp_format, + struct perf_evsel *evsel, struct perf_sample *sample, struct machine *machine); @@ -1592,7 +1523,7 @@ static int perf_sched__process_tracepoint_sample(struct perf_tool *tool __maybe_ if (evsel->handler.func != NULL) { tracepoint_handler f = evsel->handler.func; - err = f(tool, evsel->tp_format, sample, machine); + err = f(tool, evsel, sample, machine); } return err; From 9ec3f4e437ede2f3b5087d412abe16a0219b3b99 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 11 Sep 2012 19:29:17 -0300 Subject: [PATCH 194/282] perf sched: Don't read all tracepoint variables in advance Do it just at the actual consumer of these fields, that way we avoid needless lookups: [root@sandy ~]# perf sched record sleep 30s [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 8.585 MB perf.data (~375063 samples) ] Before: [root@sandy ~]# perf stat -r 10 perf sched lat > /dev/null Performance counter stats for 'perf sched lat' (10 runs): 103.592215 task-clock # 0.993 CPUs utilized ( +- 0.33% ) 12 context-switches # 0.114 K/sec ( +- 3.29% ) 0 cpu-migrations # 0.000 K/sec 7,605 page-faults # 0.073 M/sec ( +- 0.00% ) 345,796,112 cycles # 3.338 GHz ( +- 0.07% ) [82.90%] 106,876,796 stalled-cycles-frontend # 30.91% frontend cycles idle ( +- 0.38% ) [83.23%] 62,060,877 stalled-cycles-backend # 17.95% backend cycles idle ( +- 0.80% ) [67.14%] 628,246,586 instructions # 1.82 insns per cycle # 0.17 stalled cycles per insn ( +- 0.04% ) [83.64%] 134,962,057 branches # 1302.820 M/sec ( +- 0.10% ) [83.64%] 1,233,037 branch-misses # 0.91% of all branches ( +- 0.29% ) [83.41%] 0.104333272 seconds time elapsed ( +- 0.33% ) [root@sandy ~]# perf stat -r 10 perf sched lat > /dev/null Performance counter stats for 'perf sched lat' (10 runs): 98.848272 task-clock # 0.993 CPUs utilized ( +- 0.48% ) 11 context-switches # 0.112 K/sec ( +- 2.83% ) 0 cpu-migrations # 0.003 K/sec ( +- 50.92% ) 7,604 page-faults # 0.077 M/sec ( +- 0.00% ) 332,216,085 cycles # 3.361 GHz ( +- 0.14% ) [82.87%] 100,623,710 stalled-cycles-frontend # 30.29% frontend cycles idle ( +- 0.53% ) [82.95%] 58,788,692 stalled-cycles-backend # 17.70% backend cycles idle ( +- 0.59% ) [67.15%] 609,402,433 instructions # 1.83 insns per cycle # 0.17 stalled cycles per insn ( +- 0.04% ) [83.76%] 131,277,138 branches # 1328.067 M/sec ( +- 0.06% ) [83.77%] 1,117,871 branch-misses # 0.85% of all branches ( +- 0.32% ) [83.51%] 0.099580430 seconds time elapsed ( +- 0.48% ) [root@sandy ~]# Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-kracdpw8wqlr0xjh75uk8g11@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-sched.c | 273 +++++++++++++------------------------ 1 file changed, 95 insertions(+), 178 deletions(-) diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index 0df5e7a08c6..af305f57bd2 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -97,73 +97,25 @@ struct work_atoms { typedef int (*sort_fn_t)(struct work_atoms *, struct work_atoms *); -struct trace_switch_event { - char *prev_comm; - u32 prev_pid; - u32 prev_prio; - u64 prev_state; - char *next_comm; - u32 next_pid; - u32 next_prio; -}; - -struct trace_runtime_event { - char *comm; - u32 pid; - u64 runtime; - u64 vruntime; -}; - -struct trace_wakeup_event { - char *comm; - u32 pid; - u32 prio; - u32 success; - u32 cpu; -}; - -struct trace_fork_event { - char *parent_comm; - u32 parent_pid; - char *child_comm; - u32 child_pid; -}; - -struct trace_migrate_task_event { - char *comm; - u32 pid; - u32 prio; - u32 cpu; -}; - struct perf_sched; struct trace_sched_handler { - int (*switch_event)(struct perf_sched *sched, - struct trace_switch_event *event, - struct machine *machine, - struct perf_evsel *evsel, - struct perf_sample *sample); + int (*switch_event)(struct perf_sched *sched, struct perf_evsel *evsel, + struct perf_sample *sample, struct machine *machine); - int (*runtime_event)(struct perf_sched *sched, - struct trace_runtime_event *event, - struct machine *machine, - struct perf_sample *sample); + int (*runtime_event)(struct perf_sched *sched, struct perf_evsel *evsel, + struct perf_sample *sample, struct machine *machine); - int (*wakeup_event)(struct perf_sched *sched, - struct trace_wakeup_event *event, - struct machine *machine, - struct perf_evsel *evsel, - struct perf_sample *sample); + int (*wakeup_event)(struct perf_sched *sched, struct perf_evsel *evsel, + struct perf_sample *sample, struct machine *machine); - int (*fork_event)(struct perf_sched *sched, - struct trace_fork_event *event, - struct perf_evsel *evsel); + int (*fork_event)(struct perf_sched *sched, struct perf_evsel *evsel, + struct perf_sample *sample); int (*migrate_task_event)(struct perf_sched *sched, - struct trace_migrate_task_event *event, - struct machine *machine, - struct perf_sample *sample); + struct perf_evsel *evsel, + struct perf_sample *sample, + struct machine *machine); }; struct perf_sched { @@ -700,33 +652,36 @@ static void test_calibrations(struct perf_sched *sched) static int replay_wakeup_event(struct perf_sched *sched, - struct trace_wakeup_event *wakeup_event, - struct machine *machine __maybe_unused, - struct perf_evsel *evsel, struct perf_sample *sample) + struct perf_evsel *evsel, struct perf_sample *sample, + struct machine *machine __maybe_unused) { + const char *comm = perf_evsel__strval(evsel, sample, "comm"); + const u32 pid = perf_evsel__intval(evsel, sample, "pid"); struct task_desc *waker, *wakee; if (verbose) { printf("sched_wakeup event %p\n", evsel); - printf(" ... pid %d woke up %s/%d\n", - sample->tid, wakeup_event->comm, wakeup_event->pid); + printf(" ... pid %d woke up %s/%d\n", sample->tid, comm, pid); } waker = register_pid(sched, sample->tid, ""); - wakee = register_pid(sched, wakeup_event->pid, wakeup_event->comm); + wakee = register_pid(sched, pid, comm); add_sched_event_wakeup(sched, waker, sample->time, wakee); return 0; } -static int -replay_switch_event(struct perf_sched *sched, - struct trace_switch_event *switch_event, - struct machine *machine __maybe_unused, - struct perf_evsel *evsel, - struct perf_sample *sample) +static int replay_switch_event(struct perf_sched *sched, + struct perf_evsel *evsel, + struct perf_sample *sample, + struct machine *machine __maybe_unused) { + const char *prev_comm = perf_evsel__strval(evsel, sample, "prev_comm"), + *next_comm = perf_evsel__strval(evsel, sample, "next_comm"); + const u32 prev_pid = perf_evsel__intval(evsel, sample, "prev_pid"), + next_pid = perf_evsel__intval(evsel, sample, "next_pid"); + const u64 prev_state = perf_evsel__intval(evsel, sample, "prev_state"); struct task_desc *prev, __maybe_unused *next; u64 timestamp0, timestamp = sample->time; int cpu = sample->cpu; @@ -749,35 +704,36 @@ replay_switch_event(struct perf_sched *sched, return -1; } - if (verbose) { - printf(" ... switch from %s/%d to %s/%d [ran %" PRIu64 " nsecs]\n", - switch_event->prev_comm, switch_event->prev_pid, - switch_event->next_comm, switch_event->next_pid, - delta); - } + pr_debug(" ... switch from %s/%d to %s/%d [ran %" PRIu64 " nsecs]\n", + prev_comm, prev_pid, next_comm, next_pid, delta); - prev = register_pid(sched, switch_event->prev_pid, switch_event->prev_comm); - next = register_pid(sched, switch_event->next_pid, switch_event->next_comm); + prev = register_pid(sched, prev_pid, prev_comm); + next = register_pid(sched, next_pid, next_comm); sched->cpu_last_switched[cpu] = timestamp; add_sched_event_run(sched, prev, timestamp, delta); - add_sched_event_sleep(sched, prev, timestamp, switch_event->prev_state); + add_sched_event_sleep(sched, prev, timestamp, prev_state); return 0; } -static int -replay_fork_event(struct perf_sched *sched, struct trace_fork_event *fork_event, - struct perf_evsel *evsel) +static int replay_fork_event(struct perf_sched *sched, struct perf_evsel *evsel, + struct perf_sample *sample) { + const char *parent_comm = perf_evsel__strval(evsel, sample, "parent_comm"), + *child_comm = perf_evsel__strval(evsel, sample, "child_comm"); + const u32 parent_pid = perf_evsel__intval(evsel, sample, "parent_pid"), + child_pid = perf_evsel__intval(evsel, sample, "child_pid"); + if (verbose) { printf("sched_fork event %p\n", evsel); - printf("... parent: %s/%d\n", fork_event->parent_comm, fork_event->parent_pid); - printf("... child: %s/%d\n", fork_event->child_comm, fork_event->child_pid); + printf("... parent: %s/%d\n", parent_comm, parent_pid); + printf("... child: %s/%d\n", child_comm, child_pid); } - register_pid(sched, fork_event->parent_pid, fork_event->parent_comm); - register_pid(sched, fork_event->child_pid, fork_event->child_comm); + + register_pid(sched, parent_pid, parent_comm); + register_pid(sched, child_pid, child_comm); return 0; } @@ -870,18 +826,18 @@ static int thread_atoms_insert(struct perf_sched *sched, struct thread *thread) } static int latency_fork_event(struct perf_sched *sched __maybe_unused, - struct trace_fork_event *fork_event __maybe_unused, - struct perf_evsel *evsel __maybe_unused) + struct perf_evsel *evsel __maybe_unused, + struct perf_sample *sample __maybe_unused) { /* should insert the newcomer */ return 0; } -static char sched_out_state(struct trace_switch_event *switch_event) +static char sched_out_state(u64 prev_state) { const char *str = TASK_STATE_TO_CHAR_STR; - return str[switch_event->prev_state]; + return str[prev_state]; } static int @@ -951,13 +907,14 @@ add_sched_in_event(struct work_atoms *atoms, u64 timestamp) atoms->nb_atoms++; } -static int -latency_switch_event(struct perf_sched *sched, - struct trace_switch_event *switch_event, - struct machine *machine, - struct perf_evsel *evsel __maybe_unused, - struct perf_sample *sample) +static int latency_switch_event(struct perf_sched *sched, + struct perf_evsel *evsel, + struct perf_sample *sample, + struct machine *machine) { + const u32 prev_pid = perf_evsel__intval(evsel, sample, "prev_pid"), + next_pid = perf_evsel__intval(evsel, sample, "next_pid"); + const u64 prev_state = perf_evsel__intval(evsel, sample, "prev_state"); struct work_atoms *out_events, *in_events; struct thread *sched_out, *sched_in; u64 timestamp0, timestamp = sample->time; @@ -978,8 +935,8 @@ latency_switch_event(struct perf_sched *sched, return -1; } - sched_out = machine__findnew_thread(machine, switch_event->prev_pid); - sched_in = machine__findnew_thread(machine, switch_event->next_pid); + sched_out = machine__findnew_thread(machine, prev_pid); + sched_in = machine__findnew_thread(machine, next_pid); out_events = thread_atoms_search(&sched->atom_root, sched_out, &sched->cmp_pid); if (!out_events) { @@ -991,7 +948,7 @@ latency_switch_event(struct perf_sched *sched, return -1; } } - if (add_sched_out_event(out_events, sched_out_state(switch_event), timestamp)) + if (add_sched_out_event(out_events, sched_out_state(prev_state), timestamp)) return -1; in_events = thread_atoms_search(&sched->atom_root, sched_in, &sched->cmp_pid); @@ -1015,12 +972,14 @@ latency_switch_event(struct perf_sched *sched, return 0; } -static int -latency_runtime_event(struct perf_sched *sched, - struct trace_runtime_event *runtime_event, - struct machine *machine, struct perf_sample *sample) +static int latency_runtime_event(struct perf_sched *sched, + struct perf_evsel *evsel, + struct perf_sample *sample, + struct machine *machine) { - struct thread *thread = machine__findnew_thread(machine, runtime_event->pid); + const u32 pid = perf_evsel__intval(evsel, sample, "pid"); + const u64 runtime = perf_evsel__intval(evsel, sample, "runtime"); + struct thread *thread = machine__findnew_thread(machine, pid); struct work_atoms *atoms = thread_atoms_search(&sched->atom_root, thread, &sched->cmp_pid); u64 timestamp = sample->time; int cpu = sample->cpu; @@ -1038,27 +997,27 @@ latency_runtime_event(struct perf_sched *sched, return -1; } - add_runtime_event(atoms, runtime_event->runtime, timestamp); + add_runtime_event(atoms, runtime, timestamp); return 0; } -static int -latency_wakeup_event(struct perf_sched *sched, - struct trace_wakeup_event *wakeup_event, - struct machine *machine, - struct perf_evsel *evsel __maybe_unused, - struct perf_sample *sample) +static int latency_wakeup_event(struct perf_sched *sched, + struct perf_evsel *evsel, + struct perf_sample *sample, + struct machine *machine) { + const u32 pid = perf_evsel__intval(evsel, sample, "pid"), + success = perf_evsel__intval(evsel, sample, "success"); struct work_atoms *atoms; struct work_atom *atom; struct thread *wakee; u64 timestamp = sample->time; /* Note for later, it may be interesting to observe the failing cases */ - if (!wakeup_event->success) + if (!success) return 0; - wakee = machine__findnew_thread(machine, wakeup_event->pid); + wakee = machine__findnew_thread(machine, pid); atoms = thread_atoms_search(&sched->atom_root, wakee, &sched->cmp_pid); if (!atoms) { if (thread_atoms_insert(sched, wakee)) @@ -1095,11 +1054,12 @@ latency_wakeup_event(struct perf_sched *sched, return 0; } -static int -latency_migrate_task_event(struct perf_sched *sched, - struct trace_migrate_task_event *migrate_task_event, - struct machine *machine, struct perf_sample *sample) +static int latency_migrate_task_event(struct perf_sched *sched, + struct perf_evsel *evsel, + struct perf_sample *sample, + struct machine *machine) { + const u32 pid = perf_evsel__intval(evsel, sample, "pid"); u64 timestamp = sample->time; struct work_atoms *atoms; struct work_atom *atom; @@ -1111,7 +1071,7 @@ latency_migrate_task_event(struct perf_sched *sched, if (sched->profile_cpu == -1) return 0; - migrant = machine__findnew_thread(machine, migrate_task_event->pid); + migrant = machine__findnew_thread(machine, pid); atoms = thread_atoms_search(&sched->atom_root, migrant, &sched->cmp_pid); if (!atoms) { if (thread_atoms_insert(sched, migrant)) @@ -1296,28 +1256,17 @@ static int process_sched_wakeup_event(struct perf_tool *tool, { struct perf_sched *sched = container_of(tool, struct perf_sched, tool); - if (sched->tp_handler->wakeup_event) { - struct trace_wakeup_event event = { - .comm = perf_evsel__strval(evsel, sample, "comm"), - .pid = perf_evsel__intval(evsel, sample, "pid"), - .prio = perf_evsel__intval(evsel, sample, "prio"), - .success = perf_evsel__intval(evsel, sample, "success"), - .cpu = perf_evsel__intval(evsel, sample, "cpu"), - }; - - return sched->tp_handler->wakeup_event(sched, &event, machine, evsel, sample); - } + if (sched->tp_handler->wakeup_event) + return sched->tp_handler->wakeup_event(sched, evsel, sample, machine); return 0; } -static int -map_switch_event(struct perf_sched *sched, - struct trace_switch_event *switch_event, - struct machine *machine, - struct perf_evsel *evsel __maybe_unused, - struct perf_sample *sample) +static int map_switch_event(struct perf_sched *sched, struct perf_evsel *evsel, + struct perf_sample *sample, struct machine *machine) { + const u32 prev_pid = perf_evsel__intval(evsel, sample, "prev_pid"), + next_pid = perf_evsel__intval(evsel, sample, "next_pid"); struct thread *sched_out __maybe_unused, *sched_in; int new_shortname; u64 timestamp0, timestamp = sample->time; @@ -1341,8 +1290,8 @@ map_switch_event(struct perf_sched *sched, return -1; } - sched_out = machine__findnew_thread(machine, switch_event->prev_pid); - sched_in = machine__findnew_thread(machine, switch_event->next_pid); + sched_out = machine__findnew_thread(machine, prev_pid); + sched_in = machine__findnew_thread(machine, next_pid); sched->curr_thread[this_cpu] = sched_in; @@ -1411,19 +1360,8 @@ static int process_sched_switch_event(struct perf_tool *tool, sched->nr_context_switch_bugs++; } - if (sched->tp_handler->switch_event) { - struct trace_switch_event event = { - .prev_comm = perf_evsel__strval(evsel, sample, "prev_comm"), - .prev_pid = prev_pid, - .prev_prio = perf_evsel__intval(evsel, sample, "prev_prio"), - .prev_state = perf_evsel__intval(evsel, sample, "prev_state"), - .next_comm = perf_evsel__strval(evsel, sample, "next_comm"), - .next_pid = next_pid, - .next_prio = perf_evsel__intval(evsel, sample, "next_prio"), - }; - - err = sched->tp_handler->switch_event(sched, &event, machine, evsel, sample); - } + if (sched->tp_handler->switch_event) + err = sched->tp_handler->switch_event(sched, evsel, sample, machine); sched->curr_pid[this_cpu] = next_pid; return err; @@ -1436,15 +1374,8 @@ static int process_sched_runtime_event(struct perf_tool *tool, { struct perf_sched *sched = container_of(tool, struct perf_sched, tool); - if (sched->tp_handler->runtime_event) { - struct trace_runtime_event event = { - .comm = perf_evsel__strval(evsel, sample, "comm"), - .pid = perf_evsel__intval(evsel, sample, "pid"), - .runtime = perf_evsel__intval(evsel, sample, "runtime"), - .vruntime = perf_evsel__intval(evsel, sample, "vruntime"), - }; - return sched->tp_handler->runtime_event(sched, &event, machine, sample); - } + if (sched->tp_handler->runtime_event) + return sched->tp_handler->runtime_event(sched, evsel, sample, machine); return 0; } @@ -1456,15 +1387,8 @@ static int process_sched_fork_event(struct perf_tool *tool, { struct perf_sched *sched = container_of(tool, struct perf_sched, tool); - if (sched->tp_handler->fork_event) { - struct trace_fork_event event = { - .parent_comm = perf_evsel__strval(evsel, sample, "parent_comm"), - .child_comm = perf_evsel__strval(evsel, sample, "child_comm"), - .parent_pid = perf_evsel__intval(evsel, sample, "parent_pid"), - .child_pid = perf_evsel__intval(evsel, sample, "child_pid"), - }; - return sched->tp_handler->fork_event(sched, &event, evsel); - } + if (sched->tp_handler->fork_event) + return sched->tp_handler->fork_event(sched, evsel, sample); return 0; } @@ -1485,15 +1409,8 @@ static int process_sched_migrate_task_event(struct perf_tool *tool, { struct perf_sched *sched = container_of(tool, struct perf_sched, tool); - if (sched->tp_handler->migrate_task_event) { - struct trace_migrate_task_event event = { - .comm = perf_evsel__strval(evsel, sample, "comm"), - .pid = perf_evsel__intval(evsel, sample, "pid"), - .prio = perf_evsel__intval(evsel, sample, "prio"), - .cpu = perf_evsel__intval(evsel, sample, "cpu"), - }; - return sched->tp_handler->migrate_task_event(sched, &event, machine, sample); - } + if (sched->tp_handler->migrate_task_event) + return sched->tp_handler->migrate_task_event(sched, evsel, sample, machine); return 0; } From 76bab1b78ab6f25d5f74165f94526c25fc93d984 Mon Sep 17 00:00:00 2001 From: Yuanhan Liu Date: Mon, 27 Aug 2012 15:13:45 +0800 Subject: [PATCH 195/282] tracing: Skip printing "OK" if failed to disable event No acutal case found. But logically, we should skip "OK" in case any error met. Link: http://lkml.kernel.org/r/1346051625-25231-1-git-send-email-yuanhan.liu@linux.intel.com Signed-off-by: Yuanhan Liu Signed-off-by: Steven Rostedt --- kernel/trace/trace_events.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 6825d833a25..bbb0e63d78e 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -1646,9 +1646,11 @@ static __init void event_trace_self_tests(void) event_test_stuff(); ret = __ftrace_set_clr_event(NULL, system->name, NULL, 0); - if (WARN_ON_ONCE(ret)) + if (WARN_ON_ONCE(ret)) { pr_warning("error disabling system %s\n", system->name); + continue; + } pr_cont("OK\n"); } From ea632e9f12033346cc68247faa3b924d54936b8b Mon Sep 17 00:00:00 2001 From: Josh Triplett Date: Sun, 2 Sep 2012 19:45:14 -0700 Subject: [PATCH 196/282] trace: Stop compiling in trace_clock unconditionally Commit 56449f437 "tracing: make the trace clocks available generally", in April 2009, made trace_clock available unconditionally, since CONFIG_X86_DS used it too. Commit faa4602e47 "x86, perf, bts, mm: Delete the never used BTS-ptrace code", in March 2010, removed CONFIG_X86_DS, and now only CONFIG_RING_BUFFER (split out from CONFIG_TRACING for general use) has a dependency on trace_clock. So, only compile in trace_clock with CONFIG_RING_BUFFER or CONFIG_TRACING enabled. Link: http://lkml.kernel.org/r/20120903024513.GA19583@leaf Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Andrew Morton Cc: "Eric W. Biederman" Cc: Al Viro Signed-off-by: Josh Triplett Signed-off-by: Steven Rostedt --- kernel/Makefile | 2 +- kernel/trace/Kconfig | 5 +++++ kernel/trace/Makefile | 6 +----- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/kernel/Makefile b/kernel/Makefile index c0cc67ad764..29d993be7db 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -98,7 +98,7 @@ obj-$(CONFIG_COMPAT_BINFMT_ELF) += elfcore.o obj-$(CONFIG_BINFMT_ELF_FDPIC) += elfcore.o obj-$(CONFIG_FUNCTION_TRACER) += trace/ obj-$(CONFIG_TRACING) += trace/ -obj-$(CONFIG_X86_DS) += trace/ +obj-$(CONFIG_TRACE_CLOCK) += trace/ obj-$(CONFIG_RING_BUFFER) += trace/ obj-$(CONFIG_TRACEPOINTS) += trace/ obj-$(CONFIG_IRQ_WORK) += irq_work.o diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 9301a0e35e0..4cea4f41c1d 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -62,8 +62,12 @@ config HAVE_C_RECORDMCOUNT config TRACER_MAX_TRACE bool +config TRACE_CLOCK + bool + config RING_BUFFER bool + select TRACE_CLOCK config FTRACE_NMI_ENTER bool @@ -114,6 +118,7 @@ config TRACING select NOP_TRACER select BINARY_PRINTF select EVENT_TRACING + select TRACE_CLOCK config GENERIC_TRACER bool diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index 837090808aa..d7e2068e4b7 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile @@ -19,11 +19,7 @@ endif CFLAGS_trace_events_filter.o := -I$(src) -# -# Make the trace clocks available generally: it's infrastructure -# relied on by ptrace for example: -# -obj-y += trace_clock.o +obj-$(CONFIG_TRACE_CLOCK) += trace_clock.o obj-$(CONFIG_FUNCTION_TRACER) += libftrace.o obj-$(CONFIG_RING_BUFFER) += ring_buffer.o From a5e37863ab31d78faddff15675c89979792bc0bd Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Wed, 5 Sep 2012 23:31:00 +0900 Subject: [PATCH 197/282] ftrace/x86: Adjust x86 regs.ip as like as x86-64 Adjust x86 regs.ip to ip + MCOUNT_INSN_SIZE as like as on x86-64. This helps us to consolidate codes which use regs->ip on both of x86/x86-64. Link: http://lkml.kernel.org/r/20120905143100.10329.60109.stgit@localhost.localdomain Cc: Peter Zijlstra Cc: Frederic Weisbecker Cc: Thomas Gleixner Cc: "H. Peter Anvin" Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt --- arch/x86/kernel/entry_32.S | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 061ac17ee97..f438a44bf8f 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -1148,7 +1148,6 @@ ENTRY(ftrace_regs_caller) * ip location, and move flags into the return ip location. */ pushl 4(%esp) /* save return ip into ip slot */ - subl $MCOUNT_INSN_SIZE, (%esp) /* Adjust ip */ pushl $0 /* Load 0 into orig_ax */ pushl %gs @@ -1169,6 +1168,7 @@ ENTRY(ftrace_regs_caller) movl $__KERNEL_CS,13*4(%esp) movl 12*4(%esp), %eax /* Load ip (1st parameter) */ + subl $MCOUNT_INSN_SIZE, %eax /* Adjust ip */ movl 0x4(%ebp), %edx /* Load parent ip (2nd parameter) */ leal function_trace_op, %ecx /* Save ftrace_pos in 3rd parameter */ pushl %esp /* Save pt_regs as 4th parameter */ @@ -1180,7 +1180,6 @@ GLOBAL(ftrace_regs_call) movl 14*4(%esp), %eax /* Move flags back into cs */ movl %eax, 13*4(%esp) /* Needed to keep addl from modifying flags */ movl 12*4(%esp), %eax /* Get return ip from regs->ip */ - addl $MCOUNT_INSN_SIZE, %eax movl %eax, 14*4(%esp) /* Put return ip back for ret */ popl %ebx From 4b036d54bf849a75d0103b33d92a53f89ecb9315 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Wed, 5 Sep 2012 23:31:12 +0900 Subject: [PATCH 198/282] kprobes/x86: Fix kprobes to collectly handle IP on ftrace Current kprobe_ftrace_handler expects regs->ip == ip, but it is incorrect (originally on x86-64). Actually, ftrace handler sets regs->ip = ip + MCOUNT_INSN_SIZE. kprobe_ftrace_handler must take care for that. Link: http://lkml.kernel.org/r/20120905143112.10329.72069.stgit@localhost.localdomain Cc: Peter Zijlstra Cc: Frederic Weisbecker Cc: Thomas Gleixner Cc: "H. Peter Anvin" Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt --- arch/x86/kernel/kprobes.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c index 47ae1023a93..f49f60cca40 100644 --- a/arch/x86/kernel/kprobes.c +++ b/arch/x86/kernel/kprobes.c @@ -1072,7 +1072,8 @@ void __kprobes kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, if (kprobe_running()) { kprobes_inc_nmissed_count(p); } else { - regs->ip += sizeof(kprobe_opcode_t); + /* Kprobe handler expects regs->ip = ip + 1 as breakpoint hit */ + regs->ip = ip + sizeof(kprobe_opcode_t); __this_cpu_write(current_kprobe, p); kcb->kprobe_status = KPROBE_HIT_ACTIVE; @@ -1080,13 +1081,15 @@ void __kprobes kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, p->pre_handler(p, regs); if (unlikely(p->post_handler)) { - /* Emulate singlestep as if there is a 5byte nop */ + /* + * Emulate singlestep (and also recover regs->ip) + * as if there is a 5byte nop + */ regs->ip = ip + MCOUNT_INSN_SIZE; kcb->kprobe_status = KPROBE_HIT_SSDONE; p->post_handler(p, regs, 0); } __this_cpu_write(current_kprobe, NULL); - regs->ip = ip; /* Recover for next callback */ } end: local_irq_restore(flags); From 47d5a5f88b9d25d6464c9b60c28f391e84e3ed65 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 5 Sep 2012 23:31:18 +0900 Subject: [PATCH 199/282] ftrace/x86-64: Allow to change RIP in handlers Allow ftrace handlers to change RIP register (regs->ip) in handlers. This will allow handlers to call another function instead of original function. Link: http://lkml.kernel.org/r/20120905143118.10329.5078.stgit@localhost.localdomain Cc: Peter Zijlstra Cc: Frederic Weisbecker Cc: Thomas Gleixner Cc: "H. Peter Anvin" Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt --- arch/x86/kernel/entry_64.S | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index ed767b747fe..e9cc2b32bdf 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -165,6 +165,10 @@ GLOBAL(ftrace_regs_call) movq EFLAGS(%rsp), %rax movq %rax, SS(%rsp) + /* Handlers can change the RIP */ + movq RIP(%rsp), %rax + movq %rax, SS+8(%rsp) + /* restore the rest of pt_regs */ movq R15(%rsp), %r15 movq R14(%rsp), %r14 From c6aaf4d0bb86e2154ea31a33804cec300611255f Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Wed, 5 Sep 2012 23:31:25 +0900 Subject: [PATCH 200/282] kprobes/x86: Fix to support jprobes on ftrace-based kprobe Fix kprobes/x86 to support jprobes on ftrace-based kprobes. Because of -mfentry support of ftrace, ftrace is now put on the beginning of function where jprobes are put. Originally ftrace-based kprobes doesn't support jprobe because it will change regs->ip and ftrace doesn't support changing IP and ftrace itself doesn't conflict jprobe. However, ftrace -mfentry support moves mcount call on the top of functions where jprobes are put. This means that jprobe always conflicts with ftrace-based kprobe and fails. This patch allows ftrace-based kprobes to support jprobes by allowing to modify regs->ip and kprobes breakpoint handler also allows to skip singlestepping because there is a ftrace call (not an original instruction). Link: http://lkml.kernel.org/r/20120905143125.10329.90836.stgit@localhost.localdomain Reported-by: Fengguang Wu Cc: Peter Zijlstra Cc: Frederic Weisbecker Cc: Thomas Gleixner Cc: "H. Peter Anvin" Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt --- arch/x86/kernel/kprobes.c | 42 +++++++++++++++++++++++++++------------ kernel/kprobes.c | 3 --- 2 files changed, 29 insertions(+), 16 deletions(-) diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c index f49f60cca40..b7c2a85d192 100644 --- a/arch/x86/kernel/kprobes.c +++ b/arch/x86/kernel/kprobes.c @@ -541,6 +541,8 @@ reenter_kprobe(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb return 1; } +static void __kprobes skip_singlestep(struct kprobe *p, struct pt_regs *regs, + struct kprobe_ctlblk *kcb); /* * Interrupts are disabled on entry as trap3 is an interrupt gate and they * remain disabled throughout this function. @@ -599,6 +601,12 @@ static int __kprobes kprobe_handler(struct pt_regs *regs) } else if (kprobe_running()) { p = __this_cpu_read(current_kprobe); if (p->break_handler && p->break_handler(p, regs)) { +#ifdef KPROBES_CAN_USE_FTRACE + if (kprobe_ftrace(p)) { + skip_singlestep(p, regs, kcb); + return 1; + } +#endif setup_singlestep(p, regs, kcb, 0); return 1; } @@ -1053,6 +1061,21 @@ int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) } #ifdef KPROBES_CAN_USE_FTRACE +static void __kprobes skip_singlestep(struct kprobe *p, struct pt_regs *regs, + struct kprobe_ctlblk *kcb) +{ + /* + * Emulate singlestep (and also recover regs->ip) + * as if there is a 5byte nop + */ + regs->ip = (unsigned long)p->addr + MCOUNT_INSN_SIZE; + if (unlikely(p->post_handler)) { + kcb->kprobe_status = KPROBE_HIT_SSDONE; + p->post_handler(p, regs, 0); + } + __this_cpu_write(current_kprobe, NULL); +} + /* Ftrace callback handler for kprobes */ void __kprobes kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *ops, struct pt_regs *regs) @@ -1077,19 +1100,12 @@ void __kprobes kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, __this_cpu_write(current_kprobe, p); kcb->kprobe_status = KPROBE_HIT_ACTIVE; - if (p->pre_handler) - p->pre_handler(p, regs); - - if (unlikely(p->post_handler)) { - /* - * Emulate singlestep (and also recover regs->ip) - * as if there is a 5byte nop - */ - regs->ip = ip + MCOUNT_INSN_SIZE; - kcb->kprobe_status = KPROBE_HIT_SSDONE; - p->post_handler(p, regs, 0); - } - __this_cpu_write(current_kprobe, NULL); + if (!p->pre_handler || !p->pre_handler(p, regs)) + skip_singlestep(p, regs, kcb); + /* + * If pre_handler returns !0, it sets regs->ip and + * resets current kprobe. + */ } end: local_irq_restore(flags); diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 35b4315d84f..098f396aa40 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -1418,9 +1418,6 @@ static __kprobes int check_kprobe_address_safe(struct kprobe *p, /* Given address is not on the instruction boundary */ if ((unsigned long)p->addr != ftrace_addr) return -EILSEQ; - /* break_handler (jprobe) can not work with ftrace */ - if (p->break_handler) - return -EINVAL; p->flags |= KPROBE_FLAG_FTRACE; #else /* !KPROBES_CAN_USE_FTRACE */ return -EINVAL; From 7b0295b3db20a89b3296673871858b9ab6b68404 Mon Sep 17 00:00:00 2001 From: Hyeoncheol Lee Date: Wed, 12 Sep 2012 16:57:45 +0900 Subject: [PATCH 201/282] perf probe: Add union member access support Union members can be accessed with '.' or '->' like data structure member access Signed-off-by: Hyunchul Lee Acked-by: Masami Hiramatsu Cc: Masami Hiramatsu Cc: Srikar Dronamraju Link: http://lkml.kernel.org/r/CANFS6baeuSBxPGQ8SUZWZErJ2bWs-Nojg+FSo138E1QK8bJJig@mail.gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/probe-finder.c | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c index 526ba56e720..1daf5c14e75 100644 --- a/tools/perf/util/probe-finder.c +++ b/tools/perf/util/probe-finder.c @@ -525,8 +525,10 @@ static int convert_variable_fields(Dwarf_Die *vr_die, const char *varname, return -ENOENT; } /* Verify it is a data structure */ - if (dwarf_tag(&type) != DW_TAG_structure_type) { - pr_warning("%s is not a data structure.\n", varname); + tag = dwarf_tag(&type); + if (tag != DW_TAG_structure_type && tag != DW_TAG_union_type) { + pr_warning("%s is not a data structure nor an union.\n", + varname); return -EINVAL; } @@ -539,8 +541,9 @@ static int convert_variable_fields(Dwarf_Die *vr_die, const char *varname, *ref_ptr = ref; } else { /* Verify it is a data structure */ - if (tag != DW_TAG_structure_type) { - pr_warning("%s is not a data structure.\n", varname); + if (tag != DW_TAG_structure_type && tag != DW_TAG_union_type) { + pr_warning("%s is not a data structure nor an union.\n", + varname); return -EINVAL; } if (field->name[0] == '[') { @@ -567,10 +570,15 @@ static int convert_variable_fields(Dwarf_Die *vr_die, const char *varname, } /* Get the offset of the field */ - ret = die_get_data_member_location(die_mem, &offs); - if (ret < 0) { - pr_warning("Failed to get the offset of %s.\n", field->name); - return ret; + if (tag == DW_TAG_union_type) { + offs = 0; + } else { + ret = die_get_data_member_location(die_mem, &offs); + if (ret < 0) { + pr_warning("Failed to get the offset of %s.\n", + field->name); + return ret; + } } ref->offset += (long)offs; From bb77ac3a36f896bc6acdfcafdebfa1434f775ed4 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 12 Sep 2012 11:11:05 +0900 Subject: [PATCH 202/282] perf test: Fixup for the die() removal The commit 32c7f7383a09 ("perf test: Remove die() calls") replaced die() call to pr_debug + return -1, but it should be pr_err otherwise it'll not show up unless -v option is given. Fix it. Signed-off-by: Namhyung Kim Cc: Ingo Molnar Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1347415866-303-1-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-test.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/perf/builtin-test.c b/tools/perf/builtin-test.c index d33143efefc..4aed1553db5 100644 --- a/tools/perf/builtin-test.c +++ b/tools/perf/builtin-test.c @@ -1026,15 +1026,15 @@ static int __test__rdpmc(void) fd = sys_perf_event_open(&attr, 0, -1, -1, 0); if (fd < 0) { - pr_debug("Error: sys_perf_event_open() syscall returned " - "with %d (%s)\n", fd, strerror(errno)); + pr_err("Error: sys_perf_event_open() syscall returned " + "with %d (%s)\n", fd, strerror(errno)); return -1; } addr = mmap(NULL, page_size, PROT_READ, MAP_SHARED, fd, 0); if (addr == (void *)(-1)) { - pr_debug("Error: mmap() syscall returned with (%s)\n", - strerror(errno)); + pr_err("Error: mmap() syscall returned with (%s)\n", + strerror(errno)); goto out_close; } From 60b7d14af46ef07778e89556429ec9ab5a7fad0b Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 12 Sep 2012 11:11:06 +0900 Subject: [PATCH 203/282] perf sched: Fixup for the die() removal The commit a116e05dcf61 ("perf sched: Remove die() calls") replaced die() call to pr_debug + return -1, but it should be pr_err otherwise it'll not show up unless -v option is given. Fix it. Signed-off-by: Namhyung Kim Cc: Ingo Molnar Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1347415866-303-2-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-sched.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index af305f57bd2..9b9e32eaa80 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -438,8 +438,8 @@ static int self_open_counters(void) fd = sys_perf_event_open(&attr, 0, -1, -1, 0); if (fd < 0) - pr_debug("Error: sys_perf_event_open() syscall returned" - "with %d (%s)\n", fd, strerror(errno)); + pr_err("Error: sys_perf_event_open() syscall returned " + "with %d (%s)\n", fd, strerror(errno)); return fd; } @@ -700,7 +700,7 @@ static int replay_switch_event(struct perf_sched *sched, delta = 0; if (delta < 0) { - pr_debug("hm, delta: %" PRIu64 " < 0 ?\n", delta); + pr_err("hm, delta: %" PRIu64 " < 0 ?\n", delta); return -1; } @@ -990,7 +990,7 @@ static int latency_runtime_event(struct perf_sched *sched, return -1; atoms = thread_atoms_search(&sched->atom_root, thread, &sched->cmp_pid); if (!atoms) { - pr_debug("in-event: Internal tree error"); + pr_err("in-event: Internal tree error"); return -1; } if (add_sched_out_event(atoms, 'R', timestamp)) @@ -1024,7 +1024,7 @@ static int latency_wakeup_event(struct perf_sched *sched, return -1; atoms = thread_atoms_search(&sched->atom_root, wakee, &sched->cmp_pid); if (!atoms) { - pr_debug("wakeup-event: Internal tree error"); + pr_err("wakeup-event: Internal tree error"); return -1; } if (add_sched_out_event(atoms, 'S', timestamp)) @@ -1079,7 +1079,7 @@ static int latency_migrate_task_event(struct perf_sched *sched, register_pid(sched, migrant->pid, migrant->comm); atoms = thread_atoms_search(&sched->atom_root, migrant, &sched->cmp_pid); if (!atoms) { - pr_debug("migration-event: Internal tree error"); + pr_err("migration-event: Internal tree error"); return -1; } if (add_sched_out_event(atoms, 'R', timestamp)) @@ -1286,7 +1286,7 @@ static int map_switch_event(struct perf_sched *sched, struct perf_evsel *evsel, delta = 0; if (delta < 0) { - pr_debug("hm, delta: %" PRIu64 " < 0 ?\n", delta); + pr_err("hm, delta: %" PRIu64 " < 0 ?\n", delta); return -1; } From c055875b7091a301e2b1107ef8914b7d24e12fb0 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 12 Sep 2012 14:29:40 -0300 Subject: [PATCH 204/282] tools lib traceevent: Define _GNU_SOURCE in Makefile For the reasons stated on: commit 0a84f00 Author: David Daney perf tools: Fix broken build by defining _GNU_SOURCE in Makefile Acked-by: Steven Rostedt Cc: David Ahern Cc: David Daney Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Steven Rostedt Link: http://lkml.kernel.org/n/tip-e2nofbmj4uf0ykgsytxvt9pu@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/traceevent/Makefile | 2 +- tools/lib/traceevent/event-parse.c | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/tools/lib/traceevent/Makefile b/tools/lib/traceevent/Makefile index 14131cb0522..04d959fa022 100644 --- a/tools/lib/traceevent/Makefile +++ b/tools/lib/traceevent/Makefile @@ -129,7 +129,7 @@ CFLAGS ?= -g -Wall # Append required CFLAGS override CFLAGS += $(CONFIG_FLAGS) $(INCLUDES) $(PLUGIN_DIR_SQ) -override CFLAGS += $(udis86-flags) +override CFLAGS += $(udis86-flags) -D_GNU_SOURCE ifeq ($(VERBOSE),1) Q = diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c index 2c54cdd8ae1..77ebeb8266f 100644 --- a/tools/lib/traceevent/event-parse.c +++ b/tools/lib/traceevent/event-parse.c @@ -24,7 +24,6 @@ * Frederic Weisbecker gave his permission to relicense the code to * the Lesser General Public License. */ -#define _GNU_SOURCE #include #include #include From 721b3112f5882a6de7b86b9bef5813c80d1223de Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 12 Sep 2012 15:35:05 +0900 Subject: [PATCH 205/282] perf hists browser: Fix output for 100.00% Current hpp format functions assume that the output will fit to 6 character including % sign (XX.YY%) so used "%5.2f%%" as a format string. However it might be the case if collapsing resulted in a single entry which has 100.00% (7 character) of period. In this case the output will be shifted by 1 character. Signed-off-by: Namhyung Kim Cc: Ingo Molnar Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1347431706-7839-1-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/hists.c | 2 +- tools/perf/ui/gtk/browser.c | 2 +- tools/perf/ui/hist.c | 28 ++++++++++++++-------------- 3 files changed, 16 insertions(+), 16 deletions(-) diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index 5a5739bbe6a..11783170ac6 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -571,7 +571,7 @@ static int hist_browser__hpp_color_ ## _name(struct perf_hpp *hpp, \ { \ double percent = 100.0 * he->_field / hpp->total_period; \ *(double *)hpp->ptr = percent; \ - return scnprintf(hpp->buf, hpp->size, "%5.2f%%", percent); \ + return scnprintf(hpp->buf, hpp->size, "%6.2f%%", percent); \ } HPP__COLOR_FN(overhead, period) diff --git a/tools/perf/ui/gtk/browser.c b/tools/perf/ui/gtk/browser.c index 55acba6e0df..7ff99ec1d95 100644 --- a/tools/perf/ui/gtk/browser.c +++ b/tools/perf/ui/gtk/browser.c @@ -56,7 +56,7 @@ static int perf_gtk__hpp_color_ ## _name(struct perf_hpp *hpp, \ markup = perf_gtk__get_percent_color(percent); \ if (markup) \ ret += scnprintf(hpp->buf, hpp->size, "%s", markup); \ - ret += scnprintf(hpp->buf + ret, hpp->size - ret, "%5.2f%%", percent); \ + ret += scnprintf(hpp->buf + ret, hpp->size - ret, "%6.2f%%", percent); \ if (markup) \ ret += scnprintf(hpp->buf + ret, hpp->size - ret, ""); \ \ diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c index 407e855cccb..e3f8cd46e7d 100644 --- a/tools/perf/ui/hist.c +++ b/tools/perf/ui/hist.c @@ -33,13 +33,13 @@ static int hpp__color_overhead(struct perf_hpp *hpp, struct hist_entry *he) percent = 0.0; } - return percent_color_snprintf(hpp->buf, hpp->size, " %5.2f%%", percent); + return percent_color_snprintf(hpp->buf, hpp->size, " %6.2f%%", percent); } static int hpp__entry_overhead(struct perf_hpp *hpp, struct hist_entry *he) { double percent = 100.0 * he->period / hpp->total_period; - const char *fmt = symbol_conf.field_sep ? "%.2f" : " %5.2f%%"; + const char *fmt = symbol_conf.field_sep ? "%.2f" : " %6.2f%%"; if (hpp->ptr) { struct hists *old_hists = hpp->ptr; @@ -57,52 +57,52 @@ static int hpp__entry_overhead(struct perf_hpp *hpp, struct hist_entry *he) static int hpp__header_overhead_sys(struct perf_hpp *hpp) { - const char *fmt = symbol_conf.field_sep ? "%s" : "%6s"; + const char *fmt = symbol_conf.field_sep ? "%s" : "%7s"; return scnprintf(hpp->buf, hpp->size, fmt, "sys"); } static int hpp__width_overhead_sys(struct perf_hpp *hpp __maybe_unused) { - return 6; + return 7; } static int hpp__color_overhead_sys(struct perf_hpp *hpp, struct hist_entry *he) { double percent = 100.0 * he->period_sys / hpp->total_period; - return percent_color_snprintf(hpp->buf, hpp->size, "%5.2f%%", percent); + return percent_color_snprintf(hpp->buf, hpp->size, "%6.2f%%", percent); } static int hpp__entry_overhead_sys(struct perf_hpp *hpp, struct hist_entry *he) { double percent = 100.0 * he->period_sys / hpp->total_period; - const char *fmt = symbol_conf.field_sep ? "%.2f" : "%5.2f%%"; + const char *fmt = symbol_conf.field_sep ? "%.2f" : "%6.2f%%"; return scnprintf(hpp->buf, hpp->size, fmt, percent); } static int hpp__header_overhead_us(struct perf_hpp *hpp) { - const char *fmt = symbol_conf.field_sep ? "%s" : "%6s"; + const char *fmt = symbol_conf.field_sep ? "%s" : "%7s"; return scnprintf(hpp->buf, hpp->size, fmt, "user"); } static int hpp__width_overhead_us(struct perf_hpp *hpp __maybe_unused) { - return 6; + return 7; } static int hpp__color_overhead_us(struct perf_hpp *hpp, struct hist_entry *he) { double percent = 100.0 * he->period_us / hpp->total_period; - return percent_color_snprintf(hpp->buf, hpp->size, "%5.2f%%", percent); + return percent_color_snprintf(hpp->buf, hpp->size, "%6.2f%%", percent); } static int hpp__entry_overhead_us(struct perf_hpp *hpp, struct hist_entry *he) { double percent = 100.0 * he->period_us / hpp->total_period; - const char *fmt = symbol_conf.field_sep ? "%.2f" : "%5.2f%%"; + const char *fmt = symbol_conf.field_sep ? "%.2f" : "%6.2f%%"; return scnprintf(hpp->buf, hpp->size, fmt, percent); } @@ -121,14 +121,14 @@ static int hpp__color_overhead_guest_sys(struct perf_hpp *hpp, struct hist_entry *he) { double percent = 100.0 * he->period_guest_sys / hpp->total_period; - return percent_color_snprintf(hpp->buf, hpp->size, " %5.2f%% ", percent); + return percent_color_snprintf(hpp->buf, hpp->size, " %6.2f%% ", percent); } static int hpp__entry_overhead_guest_sys(struct perf_hpp *hpp, struct hist_entry *he) { double percent = 100.0 * he->period_guest_sys / hpp->total_period; - const char *fmt = symbol_conf.field_sep ? "%.2f" : " %5.2f%% "; + const char *fmt = symbol_conf.field_sep ? "%.2f" : " %6.2f%% "; return scnprintf(hpp->buf, hpp->size, fmt, percent); } @@ -147,14 +147,14 @@ static int hpp__color_overhead_guest_us(struct perf_hpp *hpp, struct hist_entry *he) { double percent = 100.0 * he->period_guest_us / hpp->total_period; - return percent_color_snprintf(hpp->buf, hpp->size, " %5.2f%% ", percent); + return percent_color_snprintf(hpp->buf, hpp->size, " %6.2f%% ", percent); } static int hpp__entry_overhead_guest_us(struct perf_hpp *hpp, struct hist_entry *he) { double percent = 100.0 * he->period_guest_us / hpp->total_period; - const char *fmt = symbol_conf.field_sep ? "%.2f" : " %5.2f%% "; + const char *fmt = symbol_conf.field_sep ? "%.2f" : " %6.2f%% "; return scnprintf(hpp->buf, hpp->size, fmt, percent); } From 67d2591656df7a76d4cabca04f1cbe1c7d1c9b3a Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 12 Sep 2012 15:35:06 +0900 Subject: [PATCH 206/282] perf hists browser: Fix first column printing As a side effect of commit f5951d56a2ab ("perf hists browser: Use perf_hpp__format functions") perf report TUI got a problem of not refreshing the first character. Since the previous patch restores the column width of "overhead" to 7 we can start at column 0 now. Reported-by: Ingo Molnar Signed-off-by: Namhyung Kim Cc: Ingo Molnar Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1347431706-7839-2-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/hists.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index 11783170ac6..a21f40bebba 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -605,7 +605,7 @@ static int hist_browser__show_entry(struct hist_browser *browser, char s[256]; double percent; int i, printed = 0; - int width = browser->b.width - 1; + int width = browser->b.width; char folded_sign = ' '; bool current_entry = ui_browser__is_current_entry(&browser->b, row); off_t row_offset = entry->row_offset; @@ -627,7 +627,7 @@ static int hist_browser__show_entry(struct hist_browser *browser, .total_period = browser->hists->stats.total_period, }; - ui_browser__gotorc(&browser->b, row, 1); + ui_browser__gotorc(&browser->b, row, 0); for (i = 0; i < PERF_HPP__MAX_INDEX; i++) { if (!perf_hpp__format[i].cond) From 6d1d8dfa8b65831cfa9a528e3d17efa7e7f4226c Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 30 Aug 2012 19:26:22 +0200 Subject: [PATCH 207/282] uprobes: Don't put NULL pointer in uprobe_register() alloc_uprobe() might return a NULL pointer, put_uprobe() can't deal with this. Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Oleg Nesterov Acked-by: Srikar Dronamraju --- kernel/events/uprobes.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 1666632e6ed..336f06948de 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -897,7 +897,8 @@ int uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer * } mutex_unlock(uprobes_hash(inode)); - put_uprobe(uprobe); + if (uprobe) + put_uprobe(uprobe); return ret; } From 6f47caa0e1e4887aa2ddca8388d058d35725d815 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sat, 18 Aug 2012 17:01:57 +0200 Subject: [PATCH 208/282] uprobes: uprobes_treelock should not disable irqs Nobody plays with uprobes_tree/uprobes_treelock in interrupt context, no need to disable irqs. Signed-off-by: Oleg Nesterov Acked-by: Srikar Dronamraju --- kernel/events/uprobes.c | 21 ++++++++------------- 1 file changed, 8 insertions(+), 13 deletions(-) diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 336f06948de..ba9f1e7c606 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -411,11 +411,10 @@ static struct uprobe *__find_uprobe(struct inode *inode, loff_t offset) static struct uprobe *find_uprobe(struct inode *inode, loff_t offset) { struct uprobe *uprobe; - unsigned long flags; - spin_lock_irqsave(&uprobes_treelock, flags); + spin_lock(&uprobes_treelock); uprobe = __find_uprobe(inode, offset); - spin_unlock_irqrestore(&uprobes_treelock, flags); + spin_unlock(&uprobes_treelock); return uprobe; } @@ -462,12 +461,11 @@ static struct uprobe *__insert_uprobe(struct uprobe *uprobe) */ static struct uprobe *insert_uprobe(struct uprobe *uprobe) { - unsigned long flags; struct uprobe *u; - spin_lock_irqsave(&uprobes_treelock, flags); + spin_lock(&uprobes_treelock); u = __insert_uprobe(uprobe); - spin_unlock_irqrestore(&uprobes_treelock, flags); + spin_unlock(&uprobes_treelock); /* For now assume that the instruction need not be single-stepped */ uprobe->flags |= UPROBE_SKIP_SSTEP; @@ -705,11 +703,9 @@ remove_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, unsigned long vad */ static void delete_uprobe(struct uprobe *uprobe) { - unsigned long flags; - - spin_lock_irqsave(&uprobes_treelock, flags); + spin_lock(&uprobes_treelock); rb_erase(&uprobe->rb_node, &uprobes_tree); - spin_unlock_irqrestore(&uprobes_treelock, flags); + spin_unlock(&uprobes_treelock); iput(uprobe->inode); put_uprobe(uprobe); atomic_dec(&uprobe_events); @@ -968,7 +964,6 @@ static void build_probe_list(struct inode *inode, struct list_head *head) { loff_t min, max; - unsigned long flags; struct rb_node *n, *t; struct uprobe *u; @@ -976,7 +971,7 @@ static void build_probe_list(struct inode *inode, min = vaddr_to_offset(vma, start); max = min + (end - start) - 1; - spin_lock_irqsave(&uprobes_treelock, flags); + spin_lock(&uprobes_treelock); n = find_node_in_range(inode, min, max); if (n) { for (t = n; t; t = rb_prev(t)) { @@ -994,7 +989,7 @@ static void build_probe_list(struct inode *inode, atomic_inc(&u->ref); } } - spin_unlock_irqrestore(&uprobes_treelock, flags); + spin_unlock(&uprobes_treelock); } /* From 9f68f672c47b9bd4cfe0a667ecb0b1382c61e2de Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sun, 19 Aug 2012 16:15:09 +0200 Subject: [PATCH 209/282] uprobes: Introduce MMF_RECALC_UPROBES Add the new MMF_RECALC_UPROBES flag, it means that MMF_HAS_UPROBES can be false positive after remove_breakpoint() or uprobe_munmap(). It is also set by uprobe_dup_mmap(), this is not optimal but simple. We could add the new hook, uprobe_dup_vma(), to set MMF_HAS_UPROBES only if the new mm actually has uprobes, but I don't think this makes sense. The next patch will use this flag to clear MMF_HAS_UPROBES. Signed-off-by: Oleg Nesterov Acked-by: Srikar Dronamraju --- include/linux/sched.h | 3 ++- kernel/events/uprobes.c | 39 +++++++++++++++++++++++++++++++++++---- 2 files changed, 37 insertions(+), 5 deletions(-) diff --git a/include/linux/sched.h b/include/linux/sched.h index 3667c332e61..255661d4883 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -446,7 +446,8 @@ extern int get_dumpable(struct mm_struct *mm); #define MMF_VM_HUGEPAGE 17 /* set when VM_HUGEPAGE is set on vma */ #define MMF_EXE_FILE_CHANGED 18 /* see prctl_set_mm_exe_file() */ -#define MMF_HAS_UPROBES 19 /* might have uprobes */ +#define MMF_HAS_UPROBES 19 /* has uprobes */ +#define MMF_RECALC_UPROBES 20 /* MMF_HAS_UPROBES can be wrong */ #define MMF_INIT_MASK (MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK) diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index ba9f1e7c606..9a7f08bab91 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -684,7 +684,9 @@ install_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, set_bit(MMF_HAS_UPROBES, &mm->flags); ret = set_swbp(&uprobe->arch, mm, vaddr); - if (ret && first_uprobe) + if (!ret) + clear_bit(MMF_RECALC_UPROBES, &mm->flags); + else if (first_uprobe) clear_bit(MMF_HAS_UPROBES, &mm->flags); return ret; @@ -693,6 +695,11 @@ install_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, static void remove_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, unsigned long vaddr) { + /* can happen if uprobe_register() fails */ + if (!test_bit(MMF_HAS_UPROBES, &mm->flags)) + return; + + set_bit(MMF_RECALC_UPROBES, &mm->flags); set_orig_insn(&uprobe->arch, mm, vaddr); } @@ -1026,6 +1033,25 @@ int uprobe_mmap(struct vm_area_struct *vma) return 0; } +static bool +vma_has_uprobes(struct vm_area_struct *vma, unsigned long start, unsigned long end) +{ + loff_t min, max; + struct inode *inode; + struct rb_node *n; + + inode = vma->vm_file->f_mapping->host; + + min = vaddr_to_offset(vma, start); + max = min + (end - start) - 1; + + spin_lock(&uprobes_treelock); + n = find_node_in_range(inode, min, max); + spin_unlock(&uprobes_treelock); + + return !!n; +} + /* * Called in context of a munmap of a vma. */ @@ -1037,10 +1063,12 @@ void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned lon if (!atomic_read(&vma->vm_mm->mm_users)) /* called by mmput() ? */ return; - if (!test_bit(MMF_HAS_UPROBES, &vma->vm_mm->flags)) + if (!test_bit(MMF_HAS_UPROBES, &vma->vm_mm->flags) || + test_bit(MMF_RECALC_UPROBES, &vma->vm_mm->flags)) return; - /* TODO: unmapping uprobe(s) will need more work */ + if (vma_has_uprobes(vma, start, end)) + set_bit(MMF_RECALC_UPROBES, &vma->vm_mm->flags); } /* Slot allocation for XOL */ @@ -1146,8 +1174,11 @@ void uprobe_dup_mmap(struct mm_struct *oldmm, struct mm_struct *newmm) { newmm->uprobes_state.xol_area = NULL; - if (test_bit(MMF_HAS_UPROBES, &oldmm->flags)) + if (test_bit(MMF_HAS_UPROBES, &oldmm->flags)) { set_bit(MMF_HAS_UPROBES, &newmm->flags); + /* unconditionally, dup_mmap() skips VM_DONTCOPY vmas */ + set_bit(MMF_RECALC_UPROBES, &newmm->flags); + } } /* From 499a4f3ec057a0f79636cc3c1e581bb6e977a30f Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sun, 19 Aug 2012 17:41:34 +0200 Subject: [PATCH 210/282] uprobes: Teach find_active_uprobe() to clear MMF_HAS_UPROBES The wrong MMF_HAS_UPROBES doesn't really hurt, just it triggers the "slow" and unnecessary handle_swbp() path if the task hits the non-uprobe breakpoint. So this patch changes find_active_uprobe() to check every valid vma and clear MMF_HAS_UPROBES if no uprobes were found. This is adds the slow O(n) path, but it is only called in unlikely case when the task hits the normal breakpoint first time after uprobe_unregister(). Note the "not strictly accurate" comment in mmf_recalc_uprobes(). We can fix this, we only need to teach vma_has_uprobes() to return a bit more more info, but I am not sure this worth the trouble. Signed-off-by: Oleg Nesterov Acked-by: Srikar Dronamraju --- kernel/events/uprobes.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 9a7f08bab91..e4a906ce2e1 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -1396,6 +1396,25 @@ static bool can_skip_sstep(struct uprobe *uprobe, struct pt_regs *regs) return false; } +static void mmf_recalc_uprobes(struct mm_struct *mm) +{ + struct vm_area_struct *vma; + + for (vma = mm->mmap; vma; vma = vma->vm_next) { + if (!valid_vma(vma, false)) + continue; + /* + * This is not strictly accurate, we can race with + * uprobe_unregister() and see the already removed + * uprobe if delete_uprobe() was not yet called. + */ + if (vma_has_uprobes(vma, vma->vm_start, vma->vm_end)) + return; + } + + clear_bit(MMF_HAS_UPROBES, &mm->flags); +} + static struct uprobe *find_active_uprobe(unsigned long bp_vaddr, int *is_swbp) { struct mm_struct *mm = current->mm; @@ -1417,6 +1436,9 @@ static struct uprobe *find_active_uprobe(unsigned long bp_vaddr, int *is_swbp) } else { *is_swbp = -EFAULT; } + + if (!uprobe && test_and_clear_bit(MMF_RECALC_UPROBES, &mm->flags)) + mmf_recalc_uprobes(mm); up_read(&mm->mmap_sem); return uprobe; From 9d778782266f95e5c6ec43ed8195ba331c821018 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Tue, 7 Aug 2012 18:12:28 +0200 Subject: [PATCH 211/282] uprobes: Introduce arch_uprobe_enable/disable_step() As Oleg pointed out in [0] uprobe should not use the ptrace interface for enabling/disabling single stepping. [0] http://lkml.kernel.org/r/20120730141638.GA5306@redhat.com Add the new "__weak arch" helpers which simply call user_*_single_step() as a preparation. This is only needed to not break the powerpc port, we will fold this logic into arch_uprobe_pre/post_xol() hooks later. We should also change handle_singlestep(), _disable_step(&uprobe->arch) should be called before put_uprobe(). Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Oleg Nesterov Acked-by: Srikar Dronamraju --- include/linux/uprobes.h | 2 ++ kernel/events/uprobes.c | 14 ++++++++++++-- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h index 6d4fe79a1a6..e6f0331e3d4 100644 --- a/include/linux/uprobes.h +++ b/include/linux/uprobes.h @@ -112,6 +112,8 @@ extern void uprobe_dup_mmap(struct mm_struct *oldmm, struct mm_struct *newmm); extern void uprobe_free_utask(struct task_struct *t); extern void uprobe_copy_process(struct task_struct *t); extern unsigned long __weak uprobe_get_swbp_addr(struct pt_regs *regs); +extern void __weak arch_uprobe_enable_step(struct arch_uprobe *arch); +extern void __weak arch_uprobe_disable_step(struct arch_uprobe *arch); extern int uprobe_post_sstep_notifier(struct pt_regs *regs); extern int uprobe_pre_sstep_notifier(struct pt_regs *regs); extern void uprobe_notify_resume(struct pt_regs *regs); diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index e4a906ce2e1..912ef48d28a 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -1444,6 +1444,16 @@ static struct uprobe *find_active_uprobe(unsigned long bp_vaddr, int *is_swbp) return uprobe; } +void __weak arch_uprobe_enable_step(struct arch_uprobe *arch) +{ + user_enable_single_step(current); +} + +void __weak arch_uprobe_disable_step(struct arch_uprobe *arch) +{ + user_disable_single_step(current); +} + /* * Run handler and ask thread to singlestep. * Ensure all non-fatal signals cannot interrupt thread while it singlesteps. @@ -1490,7 +1500,7 @@ static void handle_swbp(struct pt_regs *regs) utask->state = UTASK_SSTEP; if (!pre_ssout(uprobe, regs, bp_vaddr)) { - user_enable_single_step(current); + arch_uprobe_enable_step(&uprobe->arch); return; } @@ -1526,10 +1536,10 @@ static void handle_singlestep(struct uprobe_task *utask, struct pt_regs *regs) else WARN_ON_ONCE(1); + arch_uprobe_disable_step(&uprobe->arch); put_uprobe(uprobe); utask->active_uprobe = NULL; utask->state = UTASK_RUNNING; - user_disable_single_step(current); xol_free_insn_slot(current); spin_lock_irq(¤t->sighand->siglock); From bdc1e47217315be14ba04881b0a4c8ecb3ff320c Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Mon, 20 Aug 2012 12:47:34 +0200 Subject: [PATCH 212/282] uprobes/x86: Implement x86 specific arch_uprobe_*_step The arch specific implementation behaves like user_enable_single_step() except that it does not disable single stepping if it was already enabled by ptrace. This allows the debugger to single step over an uprobe. The state of block stepping is not restored. It makes only sense together with TF and if that was enabled then the debugger is notified. Note: this is still not correct. For example, TIF_SINGLESTEP check is not right, the application itself can set X86_EFLAGS_TF. And otoh we leak TIF_SINGLESTEP (set by enable) if the probed insn is "popf". See the next patches, we need the changes in arch/x86/kernel/step.c first. Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Oleg Nesterov Acked-by: Srikar Dronamraju --- arch/x86/include/asm/uprobes.h | 2 ++ arch/x86/kernel/uprobes.c | 33 +++++++++++++++++++++++++++++++++ 2 files changed, 35 insertions(+) diff --git a/arch/x86/include/asm/uprobes.h b/arch/x86/include/asm/uprobes.h index f3971bbcd1d..cee58624cb3 100644 --- a/arch/x86/include/asm/uprobes.h +++ b/arch/x86/include/asm/uprobes.h @@ -46,6 +46,8 @@ struct arch_uprobe_task { #ifdef CONFIG_X86_64 unsigned long saved_scratch_register; #endif +#define UPROBE_CLEAR_TF (1 << 0) + unsigned int restore_flags; }; extern int arch_uprobe_analyze_insn(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long addr); diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c index 36fd42091fa..309a0e02b12 100644 --- a/arch/x86/kernel/uprobes.c +++ b/arch/x86/kernel/uprobes.c @@ -41,6 +41,9 @@ /* Adjust the return address of a call insn */ #define UPROBE_FIX_CALL 0x2 +/* Instruction will modify TF, don't change it */ +#define UPROBE_FIX_SETF 0x4 + #define UPROBE_FIX_RIP_AX 0x8000 #define UPROBE_FIX_RIP_CX 0x4000 @@ -239,6 +242,10 @@ static void prepare_fixups(struct arch_uprobe *auprobe, struct insn *insn) insn_get_opcode(insn); /* should be a nop */ switch (OPCODE1(insn)) { + case 0x9d: + /* popf */ + auprobe->fixups |= UPROBE_FIX_SETF; + break; case 0xc3: /* ret/lret */ case 0xcb: case 0xc2: @@ -673,3 +680,29 @@ bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs) } return false; } + +void arch_uprobe_enable_step(struct arch_uprobe *auprobe) +{ + struct uprobe_task *utask = current->utask; + struct arch_uprobe_task *autask = &utask->autask; + + autask->restore_flags = 0; + if (!test_tsk_thread_flag(current, TIF_SINGLESTEP) && + !(auprobe->fixups & UPROBE_FIX_SETF)) + autask->restore_flags |= UPROBE_CLEAR_TF; + /* + * The state of TIF_BLOCKSTEP is not saved. With the TF flag set we + * would to examine the opcode and the flags to make it right. Without + * TF block stepping makes no sense. + */ + user_enable_single_step(current); +} + +void arch_uprobe_disable_step(struct arch_uprobe *auprobe) +{ + struct uprobe_task *utask = current->utask; + struct arch_uprobe_task *autask = &utask->autask; + + if (autask->restore_flags & UPROBE_CLEAR_TF) + user_disable_single_step(current); +} From 848e8f5f0ad3169560c516fff6471be65f76e69f Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Fri, 3 Aug 2012 17:31:46 +0200 Subject: [PATCH 213/282] ptrace/x86: Introduce set_task_blockstep() helper No functional changes, preparation for the next fix and for uprobes single-step fixes. Move the code playing with TIF_BLOCKSTEP/DEBUGCTLMSR_BTF into the new helper, set_task_blockstep(). Signed-off-by: Oleg Nesterov Acked-by: Srikar Dronamraju --- arch/x86/kernel/step.c | 41 +++++++++++++++++++++-------------------- 1 file changed, 21 insertions(+), 20 deletions(-) diff --git a/arch/x86/kernel/step.c b/arch/x86/kernel/step.c index c346d116148..7a514986ca0 100644 --- a/arch/x86/kernel/step.c +++ b/arch/x86/kernel/step.c @@ -157,6 +157,21 @@ static int enable_single_step(struct task_struct *child) return 1; } +static void set_task_blockstep(struct task_struct *task, bool on) +{ + unsigned long debugctl; + + debugctl = get_debugctlmsr(); + if (on) { + debugctl |= DEBUGCTLMSR_BTF; + set_tsk_thread_flag(task, TIF_BLOCKSTEP); + } else { + debugctl &= ~DEBUGCTLMSR_BTF; + clear_tsk_thread_flag(task, TIF_BLOCKSTEP); + } + update_debugctlmsr(debugctl); +} + /* * Enable single or block step. */ @@ -169,19 +184,10 @@ static void enable_step(struct task_struct *child, bool block) * So no one should try to use debugger block stepping in a program * that uses user-mode single stepping itself. */ - if (enable_single_step(child) && block) { - unsigned long debugctl = get_debugctlmsr(); - - debugctl |= DEBUGCTLMSR_BTF; - update_debugctlmsr(debugctl); - set_tsk_thread_flag(child, TIF_BLOCKSTEP); - } else if (test_tsk_thread_flag(child, TIF_BLOCKSTEP)) { - unsigned long debugctl = get_debugctlmsr(); - - debugctl &= ~DEBUGCTLMSR_BTF; - update_debugctlmsr(debugctl); - clear_tsk_thread_flag(child, TIF_BLOCKSTEP); - } + if (enable_single_step(child) && block) + set_task_blockstep(child, true); + else if (test_tsk_thread_flag(child, TIF_BLOCKSTEP)) + set_task_blockstep(child, false); } void user_enable_single_step(struct task_struct *child) @@ -199,13 +205,8 @@ void user_disable_single_step(struct task_struct *child) /* * Make sure block stepping (BTF) is disabled. */ - if (test_tsk_thread_flag(child, TIF_BLOCKSTEP)) { - unsigned long debugctl = get_debugctlmsr(); - - debugctl &= ~DEBUGCTLMSR_BTF; - update_debugctlmsr(debugctl); - clear_tsk_thread_flag(child, TIF_BLOCKSTEP); - } + if (test_tsk_thread_flag(child, TIF_BLOCKSTEP)) + set_task_blockstep(child, false); /* Always clear TIF_SINGLESTEP... */ clear_tsk_thread_flag(child, TIF_SINGLESTEP); From 95cf00fa5d5e2a200a2c044c84bde8389a237e02 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sat, 11 Aug 2012 18:06:42 +0200 Subject: [PATCH 214/282] ptrace/x86: Partly fix set_task_blockstep()->update_debugctlmsr() logic Afaics the usage of update_debugctlmsr() and TIF_BLOCKSTEP in step.c was always very wrong. 1. update_debugctlmsr() was simply unneeded. The child sleeps TASK_TRACED, __switch_to_xtra(next_p => child) should notice TIF_BLOCKSTEP and set/clear DEBUGCTLMSR_BTF after resume if needed. 2. It is wrong. The state of DEBUGCTLMSR_BTF bit in CPU register should always match the state of current's TIF_BLOCKSTEP bit. 3. Even get_debugctlmsr() + update_debugctlmsr() itself does not look right. Irq can change other bits in MSR_IA32_DEBUGCTLMSR register or the caller can be preempted in between. 4. It is not safe to play with TIF_BLOCKSTEP if task != current. DEBUGCTLMSR_BTF and TIF_BLOCKSTEP should always match each other if the task is running. The tracee is stopped but it can be SIGKILL'ed right before set/clear_tsk_thread_flag(). However, now that uprobes uses user_enable_single_step(current) we can't simply remove update_debugctlmsr(). So this patch adds the additional "task == current" check and disables irqs to avoid the race with interrupts/preemption. Unfortunately this patch doesn't solve the last problem, we need another fix. Probably we should teach ptrace_stop() to set/clear single/block stepping after resume. And afaics there is yet another problem: perf can play with MSR_IA32_DEBUGCTLMSR from nmi, this obviously means that even __switch_to_xtra() has problems. Signed-off-by: Oleg Nesterov --- arch/x86/kernel/step.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/step.c b/arch/x86/kernel/step.c index 7a514986ca0..f89cdc6ccd5 100644 --- a/arch/x86/kernel/step.c +++ b/arch/x86/kernel/step.c @@ -161,6 +161,16 @@ static void set_task_blockstep(struct task_struct *task, bool on) { unsigned long debugctl; + /* + * Ensure irq/preemption can't change debugctl in between. + * Note also that both TIF_BLOCKSTEP and debugctl should + * be changed atomically wrt preemption. + * FIXME: this means that set/clear TIF_BLOCKSTEP is simply + * wrong if task != current, SIGKILL can wakeup the stopped + * tracee and set/clear can play with the running task, this + * can confuse the next __switch_to_xtra(). + */ + local_irq_disable(); debugctl = get_debugctlmsr(); if (on) { debugctl |= DEBUGCTLMSR_BTF; @@ -169,7 +179,9 @@ static void set_task_blockstep(struct task_struct *task, bool on) debugctl &= ~DEBUGCTLMSR_BTF; clear_tsk_thread_flag(task, TIF_BLOCKSTEP); } - update_debugctlmsr(debugctl); + if (task == current) + update_debugctlmsr(debugctl); + local_irq_enable(); } /* From 9bd1190a11c9d2c59d35cb999b8d170ad52aab5f Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 3 Sep 2012 15:24:17 +0200 Subject: [PATCH 215/282] uprobes/x86: Do not (ab)use TIF_SINGLESTEP/user_*_single_step() for single-stepping user_enable/disable_single_step() was designed for ptrace, it assumes a single user and does unnecessary and wrong things for uprobes. For example: - arch_uprobe_enable_step() can't trust TIF_SINGLESTEP, an application itself can set X86_EFLAGS_TF which must be preserved after arch_uprobe_disable_step(). - we do not want to set TIF_SINGLESTEP/TIF_FORCED_TF in arch_uprobe_enable_step(), this only makes sense for ptrace. - otoh we leak TIF_SINGLESTEP if arch_uprobe_disable_step() doesn't do user_disable_single_step(), the application will be killed after the next syscall. - arch_uprobe_enable_step() does access_process_vm() we do not need/want. Change arch_uprobe_enable/disable_step() to set/clear X86_EFLAGS_TF directly, this is much simpler and more correct. However, we need to clear TIF_BLOCKSTEP/DEBUGCTLMSR_BTF before executing the probed insn, add set_task_blockstep(false). Note: with or without this patch, there is another (hopefully minor) problem. A probed "pushf" insn can see the wrong X86_EFLAGS_TF set by uprobes. Perhaps we should change _disable to update the stack, or teach arch_uprobe_skip_sstep() to emulate this insn. Signed-off-by: Oleg Nesterov Acked-by: Srikar Dronamraju --- arch/x86/include/asm/processor.h | 2 ++ arch/x86/kernel/step.c | 2 +- arch/x86/kernel/uprobes.c | 32 ++++++++++++++++++-------------- 3 files changed, 21 insertions(+), 15 deletions(-) diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index d048cad9bca..433d2e5c98a 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -759,6 +759,8 @@ static inline void update_debugctlmsr(unsigned long debugctlmsr) wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctlmsr); } +extern void set_task_blockstep(struct task_struct *task, bool on); + /* * from system description table in BIOS. Mostly for MCA use, but * others may find it useful: diff --git a/arch/x86/kernel/step.c b/arch/x86/kernel/step.c index f89cdc6ccd5..cd3b2438a98 100644 --- a/arch/x86/kernel/step.c +++ b/arch/x86/kernel/step.c @@ -157,7 +157,7 @@ static int enable_single_step(struct task_struct *child) return 1; } -static void set_task_blockstep(struct task_struct *task, bool on) +void set_task_blockstep(struct task_struct *task, bool on) { unsigned long debugctl; diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c index 309a0e02b12..3b4aae68efe 100644 --- a/arch/x86/kernel/uprobes.c +++ b/arch/x86/kernel/uprobes.c @@ -683,26 +683,30 @@ bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs) void arch_uprobe_enable_step(struct arch_uprobe *auprobe) { - struct uprobe_task *utask = current->utask; - struct arch_uprobe_task *autask = &utask->autask; + struct task_struct *task = current; + struct arch_uprobe_task *autask = &task->utask->autask; + struct pt_regs *regs = task_pt_regs(task); autask->restore_flags = 0; - if (!test_tsk_thread_flag(current, TIF_SINGLESTEP) && - !(auprobe->fixups & UPROBE_FIX_SETF)) + if (!(regs->flags & X86_EFLAGS_TF) && + !(auprobe->fixups & UPROBE_FIX_SETF)) autask->restore_flags |= UPROBE_CLEAR_TF; - /* - * The state of TIF_BLOCKSTEP is not saved. With the TF flag set we - * would to examine the opcode and the flags to make it right. Without - * TF block stepping makes no sense. - */ - user_enable_single_step(current); + + regs->flags |= X86_EFLAGS_TF; + if (test_tsk_thread_flag(task, TIF_BLOCKSTEP)) + set_task_blockstep(task, false); } void arch_uprobe_disable_step(struct arch_uprobe *auprobe) { - struct uprobe_task *utask = current->utask; - struct arch_uprobe_task *autask = &utask->autask; - + struct task_struct *task = current; + struct arch_uprobe_task *autask = &task->utask->autask; + struct pt_regs *regs = task_pt_regs(task); + /* + * The state of TIF_BLOCKSTEP was not saved so we can get an extra + * SIGTRAP if we do not clear TF. We need to examine the opcode to + * make it right. + */ if (autask->restore_flags & UPROBE_CLEAR_TF) - user_disable_single_step(current); + regs->flags &= ~X86_EFLAGS_TF; } From 3a4664aa8362d9fa9110828f55afa9f9fcd7e484 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 3 Sep 2012 16:05:10 +0200 Subject: [PATCH 216/282] uprobes/x86: Xol should send SIGTRAP if X86_EFLAGS_TF was set arch_uprobe_disable_step() correctly preserves X86_EFLAGS_TF and returns to user-mode. But this means the application gets SIGTRAP only after the next insn. This means that UPROBE_CLEAR_TF logic is not really right. _enable should only record the state of X86_EFLAGS_TF, and _disable should check it separately from UPROBE_FIX_SETF. Remove arch_uprobe_task->restore_flags, add ->saved_tf instead, and change enable/disable accordingly. This assumes that the probed insn was not trapped, see the next patch. arch_uprobe_skip_sstep() logic has the same problem, change it to check X86_EFLAGS_TF and send SIGTRAP as well. We will cleanup this all after we fold enable/disable_step into pre/post_hol hooks. Note: send_sig(SIGTRAP) is not actually right, we need send_sigtrap(). But this needs more changes, handle_swbp() does the same and this is equally wrong. Signed-off-by: Oleg Nesterov Acked-by: Srikar Dronamraju --- arch/x86/include/asm/uprobes.h | 3 +-- arch/x86/kernel/uprobes.c | 19 +++++++++++++------ 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/arch/x86/include/asm/uprobes.h b/arch/x86/include/asm/uprobes.h index cee58624cb3..d561ff5a3d4 100644 --- a/arch/x86/include/asm/uprobes.h +++ b/arch/x86/include/asm/uprobes.h @@ -46,8 +46,7 @@ struct arch_uprobe_task { #ifdef CONFIG_X86_64 unsigned long saved_scratch_register; #endif -#define UPROBE_CLEAR_TF (1 << 0) - unsigned int restore_flags; + unsigned int saved_tf; }; extern int arch_uprobe_analyze_insn(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long addr); diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c index 3b4aae68efe..7e993d1f199 100644 --- a/arch/x86/kernel/uprobes.c +++ b/arch/x86/kernel/uprobes.c @@ -653,7 +653,7 @@ void arch_uprobe_abort_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) * Skip these instructions as per the currently known x86 ISA. * 0x66* { 0x90 | 0x0f 0x1f | 0x0f 0x19 | 0x87 0xc0 } */ -bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs) +static bool __skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs) { int i; @@ -681,16 +681,21 @@ bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs) return false; } +bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs) +{ + bool ret = __skip_sstep(auprobe, regs); + if (ret && (regs->flags & X86_EFLAGS_TF)) + send_sig(SIGTRAP, current, 0); + return ret; +} + void arch_uprobe_enable_step(struct arch_uprobe *auprobe) { struct task_struct *task = current; struct arch_uprobe_task *autask = &task->utask->autask; struct pt_regs *regs = task_pt_regs(task); - autask->restore_flags = 0; - if (!(regs->flags & X86_EFLAGS_TF) && - !(auprobe->fixups & UPROBE_FIX_SETF)) - autask->restore_flags |= UPROBE_CLEAR_TF; + autask->saved_tf = !!(regs->flags & X86_EFLAGS_TF); regs->flags |= X86_EFLAGS_TF; if (test_tsk_thread_flag(task, TIF_BLOCKSTEP)) @@ -707,6 +712,8 @@ void arch_uprobe_disable_step(struct arch_uprobe *auprobe) * SIGTRAP if we do not clear TF. We need to examine the opcode to * make it right. */ - if (autask->restore_flags & UPROBE_CLEAR_TF) + if (autask->saved_tf) + send_sig(SIGTRAP, task, 0); + else if (!(auprobe->fixups & UPROBE_FIX_SETF)) regs->flags &= ~X86_EFLAGS_TF; } From d6a00b35e411519d774d978cdf80e4406d01b36b Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sat, 8 Sep 2012 18:38:15 +0200 Subject: [PATCH 217/282] uprobes/x86: Fix arch_uprobe_disable_step() && UTASK_SSTEP_TRAPPED interaction arch_uprobe_disable_step() should also take UTASK_SSTEP_TRAPPED into account. In this case the probed insn was not executed, we need to clear X86_EFLAGS_TF if it was set by us and that is all. Again, this code will look more clean when we move it into arch_uprobe_post_xol() and arch_uprobe_abort_xol(). Signed-off-by: Oleg Nesterov Acked-by: Srikar Dronamraju --- arch/x86/kernel/uprobes.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c index 7e993d1f199..9538f00827a 100644 --- a/arch/x86/kernel/uprobes.c +++ b/arch/x86/kernel/uprobes.c @@ -706,14 +706,20 @@ void arch_uprobe_disable_step(struct arch_uprobe *auprobe) { struct task_struct *task = current; struct arch_uprobe_task *autask = &task->utask->autask; + bool trapped = (task->utask->state == UTASK_SSTEP_TRAPPED); struct pt_regs *regs = task_pt_regs(task); /* * The state of TIF_BLOCKSTEP was not saved so we can get an extra * SIGTRAP if we do not clear TF. We need to examine the opcode to * make it right. */ - if (autask->saved_tf) - send_sig(SIGTRAP, task, 0); - else if (!(auprobe->fixups & UPROBE_FIX_SETF)) - regs->flags &= ~X86_EFLAGS_TF; + if (unlikely(trapped)) { + if (!autask->saved_tf) + regs->flags &= ~X86_EFLAGS_TF; + } else { + if (autask->saved_tf) + send_sig(SIGTRAP, task, 0); + else if (!(auprobe->fixups & UPROBE_FIX_SETF)) + regs->flags &= ~X86_EFLAGS_TF; + } } From baedbf02b1912225d60dd7403acb4b4e003088b5 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 3 Sep 2012 17:02:16 +0200 Subject: [PATCH 218/282] uprobes: Make arch_uprobe_task->saved_trap_nr "unsigned int" Make arch_uprobe_task->saved_trap_nr "unsigned int" and move it down after ->saved_scratch_register, this changes sizeof() from 24 to 16. Signed-off-by: Oleg Nesterov Acked-by: Srikar Dronamraju --- arch/x86/include/asm/uprobes.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/include/asm/uprobes.h b/arch/x86/include/asm/uprobes.h index d561ff5a3d4..8ff8be7835a 100644 --- a/arch/x86/include/asm/uprobes.h +++ b/arch/x86/include/asm/uprobes.h @@ -42,10 +42,10 @@ struct arch_uprobe { }; struct arch_uprobe_task { - unsigned long saved_trap_nr; #ifdef CONFIG_X86_64 unsigned long saved_scratch_register; #endif + unsigned int saved_trap_nr; unsigned int saved_tf; }; From 1af556406670f2076ea235ba8ba16da13d227e99 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Fri, 14 Sep 2012 17:35:27 +0900 Subject: [PATCH 219/282] perf tools: Add sort__has_sym The sort__has_sym variable is for checking whether the sort_list includes 'symbol' as a sort key. It will be used for later patch. Signed-off-by: Namhyung Kim Cc: Ingo Molnar Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1347611729-16994-1-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/sort.c | 5 +++++ tools/perf/util/sort.h | 1 + 2 files changed, 6 insertions(+) diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 0981bc7a291..b5b1b921196 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -8,6 +8,7 @@ const char default_sort_order[] = "comm,dso,symbol"; const char *sort_order = default_sort_order; int sort__need_collapse = 0; int sort__has_parent = 0; +int sort__has_sym = 0; int sort__branch_mode = -1; /* -1 = means not set */ enum sort_type sort__first_dimension; @@ -511,6 +512,10 @@ int sort_dimension__add(const char *tok) return -EINVAL; } sort__has_parent = 1; + } else if (sd->entry == &sort_sym || + sd->entry == &sort_sym_from || + sd->entry == &sort_sym_to) { + sort__has_sym = 1; } if (sd->taken) diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index e459c981b03..12d634792de 100644 --- a/tools/perf/util/sort.h +++ b/tools/perf/util/sort.h @@ -31,6 +31,7 @@ extern const char *parent_pattern; extern const char default_sort_order[]; extern int sort__need_collapse; extern int sort__has_parent; +extern int sort__has_sym; extern int sort__branch_mode; extern struct sort_entry sort_comm; extern struct sort_entry sort_dso; From 034a9265c289d5298bac7bfd824d3d5b9ec892b4 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Fri, 14 Sep 2012 17:35:28 +0900 Subject: [PATCH 220/282] perf report: Enable integrated annotation only if possible The integrated annotation feature is supported only in TUI mode. Also it should be enabled with 'symbol' sort key otherwise resulting hist entry doesn't need to have same symbol as of a sample so that it can fail on hist_entry__inc_addr_samples with -ERANGE. You can easily see it when start perf report TUI without symbol* sort key. This patch fixes the problem. Signed-off-by: Namhyung Kim Cc: Ingo Molnar Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1347611729-16994-2-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-report.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 97b2e6300f4..b6696dd51cb 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -93,7 +93,7 @@ static int perf_report__add_branch_hist_entry(struct perf_tool *tool, struct annotation *notes; err = -ENOMEM; bx = he->branch_info; - if (bx->from.sym && use_browser > 0) { + if (bx->from.sym && use_browser == 1 && sort__has_sym) { notes = symbol__annotation(bx->from.sym); if (!notes->src && symbol__alloc_hist(bx->from.sym) < 0) @@ -107,7 +107,7 @@ static int perf_report__add_branch_hist_entry(struct perf_tool *tool, goto out; } - if (bx->to.sym && use_browser > 0) { + if (bx->to.sym && use_browser == 1 && sort__has_sym) { notes = symbol__annotation(bx->to.sym); if (!notes->src && symbol__alloc_hist(bx->to.sym) < 0) @@ -162,7 +162,7 @@ static int perf_evsel__add_hist_entry(struct perf_evsel *evsel, * so we don't allocated the extra space needed because the stdio * code will not use it. */ - if (he->ms.sym != NULL && use_browser > 0) { + if (he->ms.sym != NULL && use_browser == 1 && sort__has_sym) { struct annotation *notes = symbol__annotation(he->ms.sym); assert(evsel != NULL); @@ -692,12 +692,14 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused) else use_browser = 0; + setup_sorting(report_usage, options); + /* * Only in the newt browser we are doing integrated annotation, * so don't allocate extra space that won't be used in the stdio * implementation. */ - if (use_browser > 0) { + if (use_browser == 1 && sort__has_sym) { symbol_conf.priv_size = sizeof(struct annotation); report.annotate_init = symbol__annotate_init; /* @@ -720,8 +722,6 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused) if (symbol__init() < 0) goto error; - setup_sorting(report_usage, options); - if (parent_pattern != default_parent_pattern) { if (sort_dimension__add("parent") < 0) goto error; From 0007eceaceb11520071d053acfe06ee3326b1d13 Mon Sep 17 00:00:00 2001 From: Xiao Guangrong Date: Mon, 17 Sep 2012 16:31:14 +0800 Subject: [PATCH 221/282] perf stat: Move stats related code to util/stat.c Then, the code can be shared between kvm events and perf stat. Signed-off-by: Xiao Guangrong [ Dong Hao : rebase it on acme's git tree ] Signed-off-by: Dong Hao Cc: Avi Kivity Cc: David Ahern Cc: Ingo Molnar Cc: kvm@vger.kernel.org Cc: Marcelo Tosatti Cc: Runzhen Wang Cc: Xiao Guangrong --- tools/perf/Makefile | 1 + tools/perf/builtin-stat.c | 56 ++------------------------------------ tools/perf/util/stat.c | 57 +++++++++++++++++++++++++++++++++++++++ tools/perf/util/stat.h | 16 +++++++++++ 4 files changed, 76 insertions(+), 54 deletions(-) create mode 100644 tools/perf/util/stat.c create mode 100644 tools/perf/util/stat.h diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 209774bcee2..5077f8e2ef7 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -406,6 +406,7 @@ LIB_OBJS += $(OUTPUT)util/target.o LIB_OBJS += $(OUTPUT)util/rblist.o LIB_OBJS += $(OUTPUT)util/intlist.o LIB_OBJS += $(OUTPUT)util/vdso.o +LIB_OBJS += $(OUTPUT)util/stat.o LIB_OBJS += $(OUTPUT)ui/helpline.o LIB_OBJS += $(OUTPUT)ui/hist.o diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index dab347d7b01..3c43a3578f3 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -51,13 +51,13 @@ #include "util/evsel.h" #include "util/debug.h" #include "util/color.h" +#include "util/stat.h" #include "util/header.h" #include "util/cpumap.h" #include "util/thread.h" #include "util/thread_map.h" #include -#include #include #define DEFAULT_SEPARATOR " " @@ -199,11 +199,6 @@ static int output_fd; static volatile int done = 0; -struct stats -{ - double n, mean, M2; -}; - struct perf_stat { struct stats res_stats[3]; }; @@ -220,50 +215,6 @@ static void perf_evsel__free_stat_priv(struct perf_evsel *evsel) evsel->priv = NULL; } -static void update_stats(struct stats *stats, u64 val) -{ - double delta; - - stats->n++; - delta = val - stats->mean; - stats->mean += delta / stats->n; - stats->M2 += delta*(val - stats->mean); -} - -static double avg_stats(struct stats *stats) -{ - return stats->mean; -} - -/* - * http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance - * - * (\Sum n_i^2) - ((\Sum n_i)^2)/n - * s^2 = ------------------------------- - * n - 1 - * - * http://en.wikipedia.org/wiki/Stddev - * - * The std dev of the mean is related to the std dev by: - * - * s - * s_mean = ------- - * sqrt(n) - * - */ -static double stddev_stats(struct stats *stats) -{ - double variance, variance_mean; - - if (!stats->n) - return 0.0; - - variance = stats->M2 / (stats->n - 1); - variance_mean = variance / stats->n; - - return sqrt(variance_mean); -} - static struct stats runtime_nsecs_stats[MAX_NR_CPUS]; static struct stats runtime_cycles_stats[MAX_NR_CPUS]; static struct stats runtime_stalled_cycles_front_stats[MAX_NR_CPUS]; @@ -559,10 +510,7 @@ static int run_perf_stat(int argc __maybe_unused, const char **argv) static void print_noise_pct(double total, double avg) { - double pct = 0.0; - - if (avg) - pct = 100.0*total/avg; + double pct = rel_stddev_stats(total, avg); if (csv_output) fprintf(output, "%s%.2f%%", csv_sep, pct); diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c new file mode 100644 index 00000000000..23742126f47 --- /dev/null +++ b/tools/perf/util/stat.c @@ -0,0 +1,57 @@ +#include + +#include "stat.h" + +void update_stats(struct stats *stats, u64 val) +{ + double delta; + + stats->n++; + delta = val - stats->mean; + stats->mean += delta / stats->n; + stats->M2 += delta*(val - stats->mean); +} + +double avg_stats(struct stats *stats) +{ + return stats->mean; +} + +/* + * http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance + * + * (\Sum n_i^2) - ((\Sum n_i)^2)/n + * s^2 = ------------------------------- + * n - 1 + * + * http://en.wikipedia.org/wiki/Stddev + * + * The std dev of the mean is related to the std dev by: + * + * s + * s_mean = ------- + * sqrt(n) + * + */ +double stddev_stats(struct stats *stats) +{ + double variance, variance_mean; + + if (!stats->n) + return 0.0; + + variance = stats->M2 / (stats->n - 1); + variance_mean = variance / stats->n; + + return sqrt(variance_mean); +} + +double rel_stddev_stats(double stddev, double avg) +{ + double pct = 0.0; + + if (avg) + pct = 100.0 * stddev/avg; + + return pct; +} diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h new file mode 100644 index 00000000000..588367c3c76 --- /dev/null +++ b/tools/perf/util/stat.h @@ -0,0 +1,16 @@ +#ifndef __PERF_STATS_H +#define __PERF_STATS_H + +#include "types.h" + +struct stats +{ + double n, mean, M2; +}; + +void update_stats(struct stats *stats, u64 val); +double avg_stats(struct stats *stats); +double stddev_stats(struct stats *stats); +double rel_stddev_stats(double stddev, double avg); + +#endif From 73eb422c10aa2a4afc1100d1bd54974ed72ad373 Mon Sep 17 00:00:00 2001 From: Irina Tirdea Date: Fri, 14 Sep 2012 01:07:42 +0300 Subject: [PATCH 222/282] perf archive: Remove -f from the rm command In Android, rm does not support the -f parameter. Remove -f from rm and make sure rm does not fail even if the files to be removed are not found. Signed-off-by: Irina Tirdea Cc: David Ahern Cc: Ingo Molnar Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Steven Rostedt Link: http://lkml.kernel.org/r/1347574063-22521-4-git-send-email-irina.tirdea@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/perf-archive.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/perf-archive.sh b/tools/perf/perf-archive.sh index 95b6f8b6177..da94179f9ab 100644 --- a/tools/perf/perf-archive.sh +++ b/tools/perf/perf-archive.sh @@ -24,7 +24,7 @@ NOBUILDID=0000000000000000000000000000000000000000 perf buildid-list -i $PERF_DATA --with-hits | grep -v "^$NOBUILDID " > $BUILDIDS if [ ! -s $BUILDIDS ] ; then echo "perf archive: no build-ids found" - rm -f $BUILDIDS + rm $BUILDIDS || true exit 1 fi @@ -40,7 +40,7 @@ while read build_id ; do done tar cfj $PERF_DATA.tar.bz2 -C $PERF_BUILDID_DIR -T $MANIFEST -rm -f $MANIFEST $BUILDIDS +rm $MANIFEST $BUILDIDS || true echo -e "Now please run:\n" echo -e "$ tar xvf $PERF_DATA.tar.bz2 -C ~/.debug\n" echo "wherever you need to run 'perf report' on." From 87ff50a3192152944d8e65b485bbf3d03214d0b6 Mon Sep 17 00:00:00 2001 From: Irina Tirdea Date: Fri, 14 Sep 2012 01:07:43 +0300 Subject: [PATCH 223/282] perf archive: Make 'f' the last parameter for tar On some systems, tar needs to specify the name of the archive immediately after the -f parameter. Change the order of the parameters so tar can run properly. Signed-off-by: Irina Tirdea Cc: David Ahern Cc: Ingo Molnar Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Steven Rostedt Link: http://lkml.kernel.org/r/1347574063-22521-5-git-send-email-irina.tirdea@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/perf-archive.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/perf-archive.sh b/tools/perf/perf-archive.sh index da94179f9ab..e9193062026 100644 --- a/tools/perf/perf-archive.sh +++ b/tools/perf/perf-archive.sh @@ -39,7 +39,7 @@ while read build_id ; do echo ${filename#$PERF_BUILDID_LINKDIR} >> $MANIFEST done -tar cfj $PERF_DATA.tar.bz2 -C $PERF_BUILDID_DIR -T $MANIFEST +tar cjf $PERF_DATA.tar.bz2 -C $PERF_BUILDID_DIR -T $MANIFEST rm $MANIFEST $BUILDIDS || true echo -e "Now please run:\n" echo -e "$ tar xvf $PERF_DATA.tar.bz2 -C ~/.debug\n" From 1500b93b61fc70a1176871b64f1c8ae3bd4da9dd Mon Sep 17 00:00:00 2001 From: Feng Tang Date: Fri, 7 Sep 2012 16:42:23 +0800 Subject: [PATCH 224/282] perf symbols: Filter samples with unresolved symbol when "--symbols" option is used Report/top commands support to only handle specific symbols with "--symbols" option, but current code will keep those samples whose symbol can't be resolved, which should actually be filtered. If we run following commands: $perf record -a tree $perf report --symbols intel_idle -n the output will be: Without the patch: ================== 46.27% 156 sshd [unknown] 26.05% 48 swapper [kernel.kallsyms] 17.26% 38 tree libc-2.12.1.so 7.69% 17 tree tree 2.73% 6 tree ld-2.12.1.so With the patch: =============== 100.00% 48 swapper [kernel.kallsyms] Signed-off-by: Feng Tang Cc: Andi Kleen Cc: David Ahern Cc: Ingo Molnar Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1347007349-3102-2-git-send-email-feng.tang@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/event.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index 8202f5ca048..6715b193872 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -904,8 +904,9 @@ int perf_event__preprocess_sample(const union perf_event *event, al->sym = map__find_symbol(al->map, al->addr, filter); } - if (symbol_conf.sym_list && al->sym && - !strlist__has_entry(symbol_conf.sym_list, al->sym->name)) + if (symbol_conf.sym_list && + (!al->sym || !strlist__has_entry(symbol_conf.sym_list, + al->sym->name))) goto out_filtered; return 0; From 36385be55da10b3271407c45c3a62d9af3db666e Mon Sep 17 00:00:00 2001 From: Feng Tang Date: Fri, 7 Sep 2012 16:42:24 +0800 Subject: [PATCH 225/282] perf scripts: Add --symbols option to handle specific symbols Since perf script no longer only handle the trace points, we can add the symbol filter option so that scripts can handle specified samples. Signed-off-by: Feng Tang Cc: Andi Kleen Cc: David Ahern Cc: Ingo Molnar Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1347007349-3102-3-git-send-email-feng.tang@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-script.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 6d98a83d5a6..76577e67c3b 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -14,6 +14,7 @@ #include "util/util.h" #include "util/evlist.h" #include "util/evsel.h" +#include "util/sort.h" #include static char const *script_name; @@ -1143,6 +1144,8 @@ static const struct option options[] = { parse_output_fields), OPT_BOOLEAN('a', "all-cpus", &system_wide, "system-wide collection from all CPUs"), + OPT_STRING('S', "symbols", &symbol_conf.sym_list_str, "symbol[,symbol...]", + "only consider these symbols"), OPT_STRING('C', "cpu", &cpu_list, "cpu", "list of cpus to profile"), OPT_STRING('c', "comms", &symbol_conf.comm_list_str, "comm[,comm...]", "only display events for these comms"), From 59cbea229473350168930941986ebe5bf685cc23 Mon Sep 17 00:00:00 2001 From: Feng Tang Date: Fri, 7 Sep 2012 16:42:25 +0800 Subject: [PATCH 226/282] perf scripts: Add event_analyzing_sample-record/report So that event_analyzing_sample.py can be shown by "perf script -l" Signed-off-by: Feng Tang Cc: Andi Kleen Cc: David Ahern Cc: Ingo Molnar Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1347007349-3102-4-git-send-email-feng.tang@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- .../perf/scripts/python/bin/event_analyzing_sample-record | 8 ++++++++ .../perf/scripts/python/bin/event_analyzing_sample-report | 3 +++ 2 files changed, 11 insertions(+) create mode 100644 tools/perf/scripts/python/bin/event_analyzing_sample-record create mode 100644 tools/perf/scripts/python/bin/event_analyzing_sample-report diff --git a/tools/perf/scripts/python/bin/event_analyzing_sample-record b/tools/perf/scripts/python/bin/event_analyzing_sample-record new file mode 100644 index 00000000000..5ce652dabd0 --- /dev/null +++ b/tools/perf/scripts/python/bin/event_analyzing_sample-record @@ -0,0 +1,8 @@ +#!/bin/bash + +# +# event_analyzing_sample.py can cover all type of perf samples including +# the tracepoints, so no special record requirements, just record what +# you want to analyze. +# +perf record $@ diff --git a/tools/perf/scripts/python/bin/event_analyzing_sample-report b/tools/perf/scripts/python/bin/event_analyzing_sample-report new file mode 100644 index 00000000000..0941fc94e15 --- /dev/null +++ b/tools/perf/scripts/python/bin/event_analyzing_sample-report @@ -0,0 +1,3 @@ +#!/bin/bash +# description: analyze all perf samples +perf script $@ -s "$PERF_EXEC_PATH"/scripts/python/event_analyzing_sample.py From e5f3705e62b03251797a5173024184bfc223599d Mon Sep 17 00:00:00 2001 From: Feng Tang Date: Fri, 7 Sep 2012 16:42:26 +0800 Subject: [PATCH 227/282] perf scripts: Export a find_scripts() function So that other perf commands/browser has a way to dig out the available scripts info in system, this is a preparation for the script browser. Signed-off-by: Feng Tang Cc: Andi Kleen Cc: David Ahern Cc: Ingo Molnar Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1347007349-3102-5-git-send-email-feng.tang@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-script.c | 55 +++++++++++++++++++++++++++++++++++++ tools/perf/builtin.h | 1 + 2 files changed, 56 insertions(+) diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 76577e67c3b..1be843aa154 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -1032,6 +1032,61 @@ static int list_available_scripts(const struct option *opt __maybe_unused, exit(0); } +/* + * Return -1 if none is found, otherwise the actual scripts number. + * + * Currently the only user of this function is the script browser, which + * will list all statically runnable scripts, select one, execute it and + * show the output in a perf browser. + */ +int find_scripts(char **scripts_array, char **scripts_path_array) +{ + struct dirent *script_next, *lang_next, script_dirent, lang_dirent; + char scripts_path[MAXPATHLEN]; + DIR *scripts_dir, *lang_dir; + char lang_path[MAXPATHLEN]; + char *temp; + int i = 0; + + snprintf(scripts_path, MAXPATHLEN, "%s/scripts", perf_exec_path()); + + scripts_dir = opendir(scripts_path); + if (!scripts_dir) + return -1; + + for_each_lang(scripts_path, scripts_dir, lang_dirent, lang_next) { + snprintf(lang_path, MAXPATHLEN, "%s/%s", scripts_path, + lang_dirent.d_name); +#ifdef NO_LIBPERL + if (strstr(lang_path, "perl")) + continue; +#endif +#ifdef NO_LIBPYTHON + if (strstr(lang_path, "python")) + continue; +#endif + + lang_dir = opendir(lang_path); + if (!lang_dir) + continue; + + for_each_script(lang_path, lang_dir, script_dirent, script_next) { + /* Skip those real time scripts: xxxtop.p[yl] */ + if (strstr(script_dirent.d_name, "top.")) + continue; + sprintf(scripts_path_array[i], "%s/%s", lang_path, + script_dirent.d_name); + temp = strchr(script_dirent.d_name, '.'); + snprintf(scripts_array[i], + (temp - script_dirent.d_name) + 1, + "%s", script_dirent.d_name); + i++; + } + } + + return i; +} + static char *get_script_path(const char *script_root, const char *suffix) { struct dirent *script_next, *lang_next, script_dirent, lang_dirent; diff --git a/tools/perf/builtin.h b/tools/perf/builtin.h index b382bd551aa..3ea74ed1b26 100644 --- a/tools/perf/builtin.h +++ b/tools/perf/builtin.h @@ -36,4 +36,5 @@ extern int cmd_kvm(int argc, const char **argv, const char *prefix); extern int cmd_test(int argc, const char **argv, const char *prefix); extern int cmd_inject(int argc, const char **argv, const char *prefix); +extern int find_scripts(char **scripts_array, char **scripts_path_array); #endif From 60e5c706b3ea56f87afc2a4a3096118d28f9cc24 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 13 Sep 2012 13:14:30 +0900 Subject: [PATCH 228/282] perf report: Add missing perf_hpp__init for pipe-mode The perf_hpp__init() function was only called from setup_browser() so that the pipe-mode missed the initialization thus didn't respond to related options. Fix it. Reported-by: Robert Richter Tested-by: Robert Richter Signed-off-by: Namhyung Kim Cc: H. Peter Anvin Cc: Ingo Molnar Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Robert Richter Cc: Thomas Gleixner Cc: linux-tip-commits@vger.kernel.org Link: http://lkml.kernel.org/r/87txv28spl.fsf_-_@sejong.aot.lge.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-report.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index b6696dd51cb..1da243dfbc3 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -689,8 +689,10 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused) if (strcmp(report.input_name, "-") != 0) setup_browser(true); - else + else { use_browser = 0; + perf_hpp__init(false, false); + } setup_sorting(report_usage, options); From 314d9f63f385096580e9e2a06eaa0745d92fe4ac Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Mon, 10 Sep 2012 15:53:49 +0800 Subject: [PATCH 229/282] perf/x86: Add cpumask for uncore pmu This patch adds a cpumask file to the uncore pmu sysfs directory. The cpumask file contains one active cpu for every socket. Signed-off-by: "Yan, Zheng" Acked-by: Peter Zijlstra Acked-by: Ingo Molnar Cc: Andi Kleen Cc: Jiri Olsa Cc: Peter Zijlstra Cc: Stephane Eranian Cc: "Yan, Zheng" Link: http://lkml.kernel.org/r/1347263631-23175-2-git-send-email-zheng.z.yan@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- arch/x86/kernel/cpu/perf_event_intel_uncore.c | 28 +++++++++++++++++-- arch/x86/kernel/cpu/perf_event_intel_uncore.h | 6 ++-- 2 files changed, 29 insertions(+), 5 deletions(-) diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c index 0a5571080e7..62ec3e6af7e 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c @@ -2341,6 +2341,27 @@ int uncore_pmu_event_init(struct perf_event *event) return ret; } +static ssize_t uncore_get_attr_cpumask(struct device *dev, + struct device_attribute *attr, char *buf) +{ + int n = cpulist_scnprintf(buf, PAGE_SIZE - 2, &uncore_cpu_mask); + + buf[n++] = '\n'; + buf[n] = '\0'; + return n; +} + +static DEVICE_ATTR(cpumask, S_IRUGO, uncore_get_attr_cpumask, NULL); + +static struct attribute *uncore_pmu_attrs[] = { + &dev_attr_cpumask.attr, + NULL, +}; + +static struct attribute_group uncore_pmu_attr_group = { + .attrs = uncore_pmu_attrs, +}; + static int __init uncore_pmu_register(struct intel_uncore_pmu *pmu) { int ret; @@ -2378,8 +2399,8 @@ static void __init uncore_type_exit(struct intel_uncore_type *type) free_percpu(type->pmus[i].box); kfree(type->pmus); type->pmus = NULL; - kfree(type->attr_groups[1]); - type->attr_groups[1] = NULL; + kfree(type->events_group); + type->events_group = NULL; } static void __init uncore_types_exit(struct intel_uncore_type **types) @@ -2431,9 +2452,10 @@ static int __init uncore_type_init(struct intel_uncore_type *type) for (j = 0; j < i; j++) attrs[j] = &type->event_descs[j].attr.attr; - type->attr_groups[1] = events_group; + type->events_group = events_group; } + type->pmu_group = &uncore_pmu_attr_group; type->pmus = pmus; return 0; fail: diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.h b/arch/x86/kernel/cpu/perf_event_intel_uncore.h index 5b81c1856aa..e68a4550e95 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.h +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.h @@ -369,10 +369,12 @@ struct intel_uncore_type { struct intel_uncore_pmu *pmus; struct intel_uncore_ops *ops; struct uncore_event_desc *event_descs; - const struct attribute_group *attr_groups[3]; + const struct attribute_group *attr_groups[4]; }; -#define format_group attr_groups[0] +#define pmu_group attr_groups[0] +#define format_group attr_groups[1] +#define events_group attr_groups[2] struct intel_uncore_ops { void (*init_box)(struct intel_uncore_box *); From 7ae92e744e3fb389afb1e24920ecda331d360c61 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Mon, 10 Sep 2012 15:53:50 +0800 Subject: [PATCH 230/282] perf stat: Check PMU cpumask file If user doesn't explicitly specify CPU list, perf-stat only collects events on CPUs listed in the PMU cpumask file. Signed-off-by: "Yah, Zheng" Cc: Andi Kleen Cc: Ingo Molnar Cc: Jiri Olsa Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1347263631-23175-3-git-send-email-zheng.z.yan@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 30 ++++++++++++++++++++---------- tools/perf/util/cpumap.c | 22 +++++++++++++++------- tools/perf/util/cpumap.h | 2 +- tools/perf/util/evsel.h | 1 + tools/perf/util/parse-events.c | 18 ++++++++++++++---- tools/perf/util/pmu.c | 30 ++++++++++++++++++++++++++++++ tools/perf/util/pmu.h | 1 + 7 files changed, 82 insertions(+), 22 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 3c43a3578f3..e0f65fe6594 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -215,6 +215,16 @@ static void perf_evsel__free_stat_priv(struct perf_evsel *evsel) evsel->priv = NULL; } +static inline struct cpu_map *perf_evsel__cpus(struct perf_evsel *evsel) +{ + return (evsel->cpus && !target.cpu_list) ? evsel->cpus : evsel_list->cpus; +} + +static inline int perf_evsel__nr_cpus(struct perf_evsel *evsel) +{ + return perf_evsel__cpus(evsel)->nr; +} + static struct stats runtime_nsecs_stats[MAX_NR_CPUS]; static struct stats runtime_cycles_stats[MAX_NR_CPUS]; static struct stats runtime_stalled_cycles_front_stats[MAX_NR_CPUS]; @@ -246,7 +256,7 @@ retry: evsel->attr.exclude_guest = evsel->attr.exclude_host = 0; if (perf_target__has_cpu(&target)) { - ret = perf_evsel__open_per_cpu(evsel, evsel_list->cpus); + ret = perf_evsel__open_per_cpu(evsel, perf_evsel__cpus(evsel)); if (ret) goto check_ret; return 0; @@ -327,7 +337,7 @@ static int read_counter_aggr(struct perf_evsel *counter) u64 *count = counter->counts->aggr.values; int i; - if (__perf_evsel__read(counter, evsel_list->cpus->nr, + if (__perf_evsel__read(counter, perf_evsel__nr_cpus(counter), evsel_list->threads->nr, scale) < 0) return -1; @@ -356,7 +366,7 @@ static int read_counter(struct perf_evsel *counter) u64 *count; int cpu; - for (cpu = 0; cpu < evsel_list->cpus->nr; cpu++) { + for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) { if (__perf_evsel__read_on_cpu(counter, cpu, 0, scale) < 0) return -1; @@ -495,12 +505,12 @@ static int run_perf_stat(int argc __maybe_unused, const char **argv) if (no_aggr) { list_for_each_entry(counter, &evsel_list->entries, node) { read_counter(counter); - perf_evsel__close_fd(counter, evsel_list->cpus->nr, 1); + perf_evsel__close_fd(counter, perf_evsel__nr_cpus(counter), 1); } } else { list_for_each_entry(counter, &evsel_list->entries, node) { read_counter_aggr(counter); - perf_evsel__close_fd(counter, evsel_list->cpus->nr, + perf_evsel__close_fd(counter, perf_evsel__nr_cpus(counter), evsel_list->threads->nr); } } @@ -538,7 +548,7 @@ static void nsec_printout(int cpu, struct perf_evsel *evsel, double avg) if (no_aggr) sprintf(cpustr, "CPU%*d%s", csv_output ? 0 : -4, - evsel_list->cpus->map[cpu], csv_sep); + perf_evsel__cpus(evsel)->map[cpu], csv_sep); fprintf(output, fmt, cpustr, msecs, csv_sep, perf_evsel__name(evsel)); @@ -750,7 +760,7 @@ static void abs_printout(int cpu, struct perf_evsel *evsel, double avg) if (no_aggr) sprintf(cpustr, "CPU%*d%s", csv_output ? 0 : -4, - evsel_list->cpus->map[cpu], csv_sep); + perf_evsel__cpus(evsel)->map[cpu], csv_sep); else cpu = 0; @@ -911,14 +921,14 @@ static void print_counter(struct perf_evsel *counter) u64 ena, run, val; int cpu; - for (cpu = 0; cpu < evsel_list->cpus->nr; cpu++) { + for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) { val = counter->counts->cpu[cpu].val; ena = counter->counts->cpu[cpu].ena; run = counter->counts->cpu[cpu].run; if (run == 0 || ena == 0) { fprintf(output, "CPU%*d%s%*s%s%*s", csv_output ? 0 : -4, - evsel_list->cpus->map[cpu], csv_sep, + perf_evsel__cpus(counter)->map[cpu], csv_sep, csv_output ? 0 : 18, counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED, csv_sep, @@ -1217,7 +1227,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused) list_for_each_entry(pos, &evsel_list->entries, node) { if (perf_evsel__alloc_stat_priv(pos) < 0 || - perf_evsel__alloc_counts(pos, evsel_list->cpus->nr) < 0) + perf_evsel__alloc_counts(pos, perf_evsel__nr_cpus(pos)) < 0) goto out_free_fd; } diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c index adc72f09914..2b32ffa9ebd 100644 --- a/tools/perf/util/cpumap.c +++ b/tools/perf/util/cpumap.c @@ -38,24 +38,19 @@ static struct cpu_map *cpu_map__trim_new(int nr_cpus, int *tmp_cpus) return cpus; } -static struct cpu_map *cpu_map__read_all_cpu_map(void) +struct cpu_map *cpu_map__read(FILE *file) { struct cpu_map *cpus = NULL; - FILE *onlnf; int nr_cpus = 0; int *tmp_cpus = NULL, *tmp; int max_entries = 0; int n, cpu, prev; char sep; - onlnf = fopen("/sys/devices/system/cpu/online", "r"); - if (!onlnf) - return cpu_map__default_new(); - sep = 0; prev = -1; for (;;) { - n = fscanf(onlnf, "%u%c", &cpu, &sep); + n = fscanf(file, "%u%c", &cpu, &sep); if (n <= 0) break; if (prev >= 0) { @@ -95,6 +90,19 @@ static struct cpu_map *cpu_map__read_all_cpu_map(void) cpus = cpu_map__default_new(); out_free_tmp: free(tmp_cpus); + return cpus; +} + +static struct cpu_map *cpu_map__read_all_cpu_map(void) +{ + struct cpu_map *cpus = NULL; + FILE *onlnf; + + onlnf = fopen("/sys/devices/system/cpu/online", "r"); + if (!onlnf) + return cpu_map__default_new(); + + cpus = cpu_map__read(onlnf); fclose(onlnf); return cpus; } diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h index c41518573c6..17b5264f643 100644 --- a/tools/perf/util/cpumap.h +++ b/tools/perf/util/cpumap.h @@ -11,7 +11,7 @@ struct cpu_map { struct cpu_map *cpu_map__new(const char *cpu_list); struct cpu_map *cpu_map__dummy_new(void); void cpu_map__delete(struct cpu_map *map); - +struct cpu_map *cpu_map__read(FILE *file); size_t cpu_map__fprintf(struct cpu_map *map, FILE *fp); #endif /* __PERF_CPUMAP_H */ diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index dc40fe32210..93876bad2e5 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -66,6 +66,7 @@ struct perf_evsel { void *func; void *data; } handler; + struct cpu_map *cpus; unsigned int sample_size; bool supported; /* parse modifier helper */ diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 44afcf40f79..bf5d033ee1b 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -239,8 +239,11 @@ const char *event_type(int type) return "unknown"; } -static int add_event(struct list_head **_list, int *idx, - struct perf_event_attr *attr, char *name) + + +static int __add_event(struct list_head **_list, int *idx, + struct perf_event_attr *attr, + char *name, struct cpu_map *cpus) { struct perf_evsel *evsel; struct list_head *list = *_list; @@ -260,6 +263,7 @@ static int add_event(struct list_head **_list, int *idx, return -ENOMEM; } + evsel->cpus = cpus; if (name) evsel->name = strdup(name); list_add_tail(&evsel->node, list); @@ -267,6 +271,12 @@ static int add_event(struct list_head **_list, int *idx, return 0; } +static int add_event(struct list_head **_list, int *idx, + struct perf_event_attr *attr, char *name) +{ + return __add_event(_list, idx, attr, name, NULL); +} + static int parse_aliases(char *str, const char *names[][PERF_EVSEL__MAX_ALIASES], int size) { int i, j; @@ -607,8 +617,8 @@ int parse_events_add_pmu(struct list_head **list, int *idx, if (perf_pmu__config(pmu, &attr, head_config)) return -EINVAL; - return add_event(list, idx, &attr, - pmu_event_name(head_config)); + return __add_event(list, idx, &attr, pmu_event_name(head_config), + pmu->cpus); } int parse_events__modifier_group(struct list_head *list, diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 6631d828db3..8a2229da594 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -9,6 +9,7 @@ #include "util.h" #include "pmu.h" #include "parse-events.h" +#include "cpumap.h" #define EVENT_SOURCE_DEVICE_PATH "/bus/event_source/devices/" @@ -253,6 +254,33 @@ static void pmu_read_sysfs(void) closedir(dir); } +static struct cpu_map *pmu_cpumask(char *name) +{ + struct stat st; + char path[PATH_MAX]; + const char *sysfs; + FILE *file; + struct cpu_map *cpus; + + sysfs = sysfs_find_mountpoint(); + if (!sysfs) + return NULL; + + snprintf(path, PATH_MAX, + "%s/bus/event_source/devices/%s/cpumask", sysfs, name); + + if (stat(path, &st) < 0) + return NULL; + + file = fopen(path, "r"); + if (!file) + return NULL; + + cpus = cpu_map__read(file); + fclose(file); + return cpus; +} + static struct perf_pmu *pmu_lookup(char *name) { struct perf_pmu *pmu; @@ -275,6 +303,8 @@ static struct perf_pmu *pmu_lookup(char *name) if (!pmu) return NULL; + pmu->cpus = pmu_cpumask(name); + pmu_aliases(name, &aliases); INIT_LIST_HEAD(&pmu->format); diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h index 47f68d3cc5d..53c7794fc4b 100644 --- a/tools/perf/util/pmu.h +++ b/tools/perf/util/pmu.h @@ -28,6 +28,7 @@ struct perf_pmu__alias { struct perf_pmu { char *name; __u32 type; + struct cpu_map *cpus; struct list_head format; struct list_head aliases; struct list_head list; From 1e6dd8adc78d4a153db253d051fd4ef6c49c9019 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 5 Sep 2012 15:31:26 +0300 Subject: [PATCH 231/282] perf: Fix off by one test in perf_reg_value() The test should be >= ARRAY_SIZE() instead of > ARRAY_SIZE(). Signed-off-by: Dan Carpenter Acked-by: Jiri Olsa Acked-by: Peter Zijlstra Cc: Frederic Weisbecker Cc: Arnaldo Carvalho de Melo Link: http://lkml.kernel.org/r/20120905123126.GC6128@elgon.mountain Signed-off-by: Ingo Molnar --- arch/x86/kernel/perf_regs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/perf_regs.c b/arch/x86/kernel/perf_regs.c index c5a3e5cfe07..e309cc5c276 100644 --- a/arch/x86/kernel/perf_regs.c +++ b/arch/x86/kernel/perf_regs.c @@ -57,7 +57,7 @@ static unsigned int pt_regs_offset[PERF_REG_X86_MAX] = { u64 perf_reg_value(struct pt_regs *regs, int idx) { - if (WARN_ON_ONCE(idx > ARRAY_SIZE(pt_regs_offset))) + if (WARN_ON_ONCE(idx >= ARRAY_SIZE(pt_regs_offset))) return 0; return regs_get_register(regs, pt_regs_offset[idx]); From b1ab1bd1921536c2a97adb888effeff4370a3246 Mon Sep 17 00:00:00 2001 From: Feng Tang Date: Thu, 20 Sep 2012 08:30:21 -0300 Subject: [PATCH 232/282] perf tools: Fix a compiling error in trace-event-perl.c for 32 bits machine On my x86_32 mahcine, there is a compile error: CC util/scripting-engines/trace-event-perl.o cc1: warnings being treated as errors util/scripting-engines/trace-event-perl.c: In function perl_process_tracepoint: util/scripting-engines/trace-event-perl.c:285: error: format expects type 'int', but argument 2 has type '__u64' make: *** [util/scripting-engines/trace-event-perl.o] Error 1 Fix it by using the "%PRIu64" for __u64. v2: use PRIu64 as suggested by Arnaldo. Signed-off-by: Feng Tang Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20120828101730.6b2fd97e@feng-i7 Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/scripting-engines/trace-event-perl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/scripting-engines/trace-event-perl.c b/tools/perf/util/scripting-engines/trace-event-perl.c index ffde3e4e34a..f80605eb185 100644 --- a/tools/perf/util/scripting-engines/trace-event-perl.c +++ b/tools/perf/util/scripting-engines/trace-event-perl.c @@ -282,7 +282,7 @@ static void perl_process_tracepoint(union perf_event *perf_event __maybe_unused, event = find_cache_event(evsel); if (!event) - die("ug! no event found for type %d", evsel->attr.config); + die("ug! no event found for type %" PRIu64, evsel->attr.config); pid = raw_field_value(event, "common_pid", data); From 8f28f19a87cb48d13570ba774a3e85776eb36bb4 Mon Sep 17 00:00:00 2001 From: Feng Tang Date: Mon, 27 Aug 2012 15:38:26 +0800 Subject: [PATCH 233/282] perf tools: Fix a compiling error in util/map.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch fix a compile warning taken as error: CC util/map.o cc1: warnings being treated as errors util/map.c: In function ‘map__fprintf_dsoname’: util/map.c:240: error: ‘dsoname’ may be used uninitialized in this function make: *** [util/map.o] Error 1 Signed-off-by: Feng Tang Cc: David Ahern Cc: Ingo Molnar Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1346053107-11946-3-git-send-email-feng.tang@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/map.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index b442ee49452..ead5316b3f8 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c @@ -243,15 +243,14 @@ size_t map__fprintf(struct map *self, FILE *fp) size_t map__fprintf_dsoname(struct map *map, FILE *fp) { - const char *dsoname; + const char *dsoname = "[unknown]"; if (map && map->dso && (map->dso->name || map->dso->long_name)) { if (symbol_conf.show_kernel_path && map->dso->long_name) dsoname = map->dso->long_name; else if (map->dso->name) dsoname = map->dso->name; - } else - dsoname = "[unknown]"; + } return fprintf(fp, "%s", dsoname); } From 50a011f6409e888d5f41343024d24885281f048c Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Thu, 20 Sep 2012 14:43:54 +0200 Subject: [PATCH 234/282] kprobes/x86: Move skip_singlestep up MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit I get this warning: arch/x86/kernel/kprobes.c:544:23: warning: ‘skip_singlestep’ declared ‘static’ but never defined on tip/auto-latest. Put the skip_singlestep function declaration up, in KPROBES_CAN_USE_FTRACE and drop the superfluous forward declaration. Signed-off-by: Borislav Petkov Acked-by: Masami Hiramatsu Cc: Steven Rostedt Link: http://lkml.kernel.org/r/1348145034-16603-1-git-send-email-bp@amd64.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/kprobes.c | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c index b7c2a85d192..57916c0d3cf 100644 --- a/arch/x86/kernel/kprobes.c +++ b/arch/x86/kernel/kprobes.c @@ -541,8 +541,23 @@ reenter_kprobe(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb return 1; } +#ifdef KPROBES_CAN_USE_FTRACE static void __kprobes skip_singlestep(struct kprobe *p, struct pt_regs *regs, - struct kprobe_ctlblk *kcb); + struct kprobe_ctlblk *kcb) +{ + /* + * Emulate singlestep (and also recover regs->ip) + * as if there is a 5byte nop + */ + regs->ip = (unsigned long)p->addr + MCOUNT_INSN_SIZE; + if (unlikely(p->post_handler)) { + kcb->kprobe_status = KPROBE_HIT_SSDONE; + p->post_handler(p, regs, 0); + } + __this_cpu_write(current_kprobe, NULL); +} +#endif + /* * Interrupts are disabled on entry as trap3 is an interrupt gate and they * remain disabled throughout this function. @@ -1061,21 +1076,6 @@ int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) } #ifdef KPROBES_CAN_USE_FTRACE -static void __kprobes skip_singlestep(struct kprobe *p, struct pt_regs *regs, - struct kprobe_ctlblk *kcb) -{ - /* - * Emulate singlestep (and also recover regs->ip) - * as if there is a 5byte nop - */ - regs->ip = (unsigned long)p->addr + MCOUNT_INSN_SIZE; - if (unlikely(p->post_handler)) { - kcb->kprobe_status = KPROBE_HIT_SSDONE; - p->post_handler(p, regs, 0); - } - __this_cpu_write(current_kprobe, NULL); -} - /* Ftrace callback handler for kprobes */ void __kprobes kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *ops, struct pt_regs *regs) From 1863fbbb781b1129da0cfdad46ef875370a34117 Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Thu, 20 Sep 2012 18:19:45 +0200 Subject: [PATCH 235/282] perf record: Print event causing perf_event_open() to fail Got tired of not getting the event that caused the perf_event_open() syscall to fail. So I fixed the error message. This is very useful when monitoring lots of events in a single run. Signed-off-by: Stephane Eranian Cc: David Ahern Cc: Ingo Molnar Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20120920161945.GA7064@quad Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-record.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index c643ed669ef..2cb74343de3 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -297,8 +297,10 @@ try_again: } printf("\n"); - error("sys_perf_event_open() syscall returned with %d (%s). /bin/dmesg may provide additional information.\n", - err, strerror(err)); + error("sys_perf_event_open() syscall returned with %d " + "(%s) for event %s. /bin/dmesg may provide " + "additional information.\n", + err, strerror(err), perf_evsel__name(pos)); #if defined(__i386__) || defined(__x86_64__) if (attr->type == PERF_TYPE_HARDWARE && From e6048fb8602648eabee96476b2703807d5e89409 Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Thu, 20 Sep 2012 21:31:44 -0500 Subject: [PATCH 236/282] perf tools: Fix parallel build Parallel builds of perf were failing for me on a 32p box, with: * new build flags or prefix util/pmu.l:7:23: error: pmu-bison.h: No such file or directory ... make: *** [util/pmu-flex.o] Error 1 make: *** Waiting for unfinished jobs.... This can pretty quickly be seen by adding a sleep in front of the bison calls in tools/perf/Makefile and running make -j4 on a smaller box i.e.: sleep 10; $(QUIET_BISON)$(BISON) -v util/pmu.y -d -o $(OUTPUT)util/pmu-bison.c Adding the following dependencies fixes it for me. Signed-off-by: Eric Sandeen Reviewed-by: Namhyung Kim Cc: Namhyung Kim Link: http://lkml.kernel.org/r/505BD190.40707@redhat.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 5077f8e2ef7..3ae6a59635c 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -233,13 +233,13 @@ export PERL_PATH FLEX = flex BISON= bison -$(OUTPUT)util/parse-events-flex.c: util/parse-events.l +$(OUTPUT)util/parse-events-flex.c: util/parse-events.l $(OUTPUT)util/parse-events-bison.c $(QUIET_FLEX)$(FLEX) --header-file=$(OUTPUT)util/parse-events-flex.h $(PARSER_DEBUG_FLEX) -t util/parse-events.l > $(OUTPUT)util/parse-events-flex.c $(OUTPUT)util/parse-events-bison.c: util/parse-events.y $(QUIET_BISON)$(BISON) -v util/parse-events.y -d $(PARSER_DEBUG_BISON) -o $(OUTPUT)util/parse-events-bison.c -$(OUTPUT)util/pmu-flex.c: util/pmu.l +$(OUTPUT)util/pmu-flex.c: util/pmu.l $(OUTPUT)util/pmu-bison.c $(QUIET_FLEX)$(FLEX) --header-file=$(OUTPUT)util/pmu-flex.h -t util/pmu.l > $(OUTPUT)util/pmu-flex.c $(OUTPUT)util/pmu-bison.c: util/pmu.y From 26bf264e871a4b9a8ac09c21a2b518e7f23830d5 Mon Sep 17 00:00:00 2001 From: Xiao Guangrong Date: Mon, 17 Sep 2012 16:31:13 +0800 Subject: [PATCH 237/282] KVM: x86: Export svm/vmx exit code and vector code to userspace Exporting KVM exit information to userspace to be consumed by perf. Signed-off-by: Dong Hao [ Dong Hao : rebase it on acme's git tree ] Signed-off-by: Xiao Guangrong Acked-by: Marcelo Tosatti Cc: Avi Kivity Cc: David Ahern Cc: Ingo Molnar Cc: Marcelo Tosatti Cc: kvm@vger.kernel.org Cc: Runzhen Wang Link: http://lkml.kernel.org/r/1347870675-31495-2-git-send-email-haodong@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- arch/x86/include/asm/kvm.h | 16 +++ arch/x86/include/asm/kvm_host.h | 16 --- arch/x86/include/asm/svm.h | 205 ++++++++++++++++++++------------ arch/x86/include/asm/vmx.h | 127 +++++++++++++------- arch/x86/kvm/trace.h | 89 -------------- 5 files changed, 230 insertions(+), 223 deletions(-) diff --git a/arch/x86/include/asm/kvm.h b/arch/x86/include/asm/kvm.h index 246617efd67..41e08cb6a09 100644 --- a/arch/x86/include/asm/kvm.h +++ b/arch/x86/include/asm/kvm.h @@ -9,6 +9,22 @@ #include #include +#define DE_VECTOR 0 +#define DB_VECTOR 1 +#define BP_VECTOR 3 +#define OF_VECTOR 4 +#define BR_VECTOR 5 +#define UD_VECTOR 6 +#define NM_VECTOR 7 +#define DF_VECTOR 8 +#define TS_VECTOR 10 +#define NP_VECTOR 11 +#define SS_VECTOR 12 +#define GP_VECTOR 13 +#define PF_VECTOR 14 +#define MF_VECTOR 16 +#define MC_VECTOR 18 + /* Select x86 specific features in */ #define __KVM_HAVE_PIT #define __KVM_HAVE_IOAPIC diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 09155d64cf7..1eaa6b05667 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -75,22 +75,6 @@ #define KVM_HPAGE_MASK(x) (~(KVM_HPAGE_SIZE(x) - 1)) #define KVM_PAGES_PER_HPAGE(x) (KVM_HPAGE_SIZE(x) / PAGE_SIZE) -#define DE_VECTOR 0 -#define DB_VECTOR 1 -#define BP_VECTOR 3 -#define OF_VECTOR 4 -#define BR_VECTOR 5 -#define UD_VECTOR 6 -#define NM_VECTOR 7 -#define DF_VECTOR 8 -#define TS_VECTOR 10 -#define NP_VECTOR 11 -#define SS_VECTOR 12 -#define GP_VECTOR 13 -#define PF_VECTOR 14 -#define MF_VECTOR 16 -#define MC_VECTOR 18 - #define SELECTOR_TI_MASK (1 << 2) #define SELECTOR_RPL_MASK 0x03 diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h index f2b83bc7d78..cdf5674dd23 100644 --- a/arch/x86/include/asm/svm.h +++ b/arch/x86/include/asm/svm.h @@ -1,6 +1,135 @@ #ifndef __SVM_H #define __SVM_H +#define SVM_EXIT_READ_CR0 0x000 +#define SVM_EXIT_READ_CR3 0x003 +#define SVM_EXIT_READ_CR4 0x004 +#define SVM_EXIT_READ_CR8 0x008 +#define SVM_EXIT_WRITE_CR0 0x010 +#define SVM_EXIT_WRITE_CR3 0x013 +#define SVM_EXIT_WRITE_CR4 0x014 +#define SVM_EXIT_WRITE_CR8 0x018 +#define SVM_EXIT_READ_DR0 0x020 +#define SVM_EXIT_READ_DR1 0x021 +#define SVM_EXIT_READ_DR2 0x022 +#define SVM_EXIT_READ_DR3 0x023 +#define SVM_EXIT_READ_DR4 0x024 +#define SVM_EXIT_READ_DR5 0x025 +#define SVM_EXIT_READ_DR6 0x026 +#define SVM_EXIT_READ_DR7 0x027 +#define SVM_EXIT_WRITE_DR0 0x030 +#define SVM_EXIT_WRITE_DR1 0x031 +#define SVM_EXIT_WRITE_DR2 0x032 +#define SVM_EXIT_WRITE_DR3 0x033 +#define SVM_EXIT_WRITE_DR4 0x034 +#define SVM_EXIT_WRITE_DR5 0x035 +#define SVM_EXIT_WRITE_DR6 0x036 +#define SVM_EXIT_WRITE_DR7 0x037 +#define SVM_EXIT_EXCP_BASE 0x040 +#define SVM_EXIT_INTR 0x060 +#define SVM_EXIT_NMI 0x061 +#define SVM_EXIT_SMI 0x062 +#define SVM_EXIT_INIT 0x063 +#define SVM_EXIT_VINTR 0x064 +#define SVM_EXIT_CR0_SEL_WRITE 0x065 +#define SVM_EXIT_IDTR_READ 0x066 +#define SVM_EXIT_GDTR_READ 0x067 +#define SVM_EXIT_LDTR_READ 0x068 +#define SVM_EXIT_TR_READ 0x069 +#define SVM_EXIT_IDTR_WRITE 0x06a +#define SVM_EXIT_GDTR_WRITE 0x06b +#define SVM_EXIT_LDTR_WRITE 0x06c +#define SVM_EXIT_TR_WRITE 0x06d +#define SVM_EXIT_RDTSC 0x06e +#define SVM_EXIT_RDPMC 0x06f +#define SVM_EXIT_PUSHF 0x070 +#define SVM_EXIT_POPF 0x071 +#define SVM_EXIT_CPUID 0x072 +#define SVM_EXIT_RSM 0x073 +#define SVM_EXIT_IRET 0x074 +#define SVM_EXIT_SWINT 0x075 +#define SVM_EXIT_INVD 0x076 +#define SVM_EXIT_PAUSE 0x077 +#define SVM_EXIT_HLT 0x078 +#define SVM_EXIT_INVLPG 0x079 +#define SVM_EXIT_INVLPGA 0x07a +#define SVM_EXIT_IOIO 0x07b +#define SVM_EXIT_MSR 0x07c +#define SVM_EXIT_TASK_SWITCH 0x07d +#define SVM_EXIT_FERR_FREEZE 0x07e +#define SVM_EXIT_SHUTDOWN 0x07f +#define SVM_EXIT_VMRUN 0x080 +#define SVM_EXIT_VMMCALL 0x081 +#define SVM_EXIT_VMLOAD 0x082 +#define SVM_EXIT_VMSAVE 0x083 +#define SVM_EXIT_STGI 0x084 +#define SVM_EXIT_CLGI 0x085 +#define SVM_EXIT_SKINIT 0x086 +#define SVM_EXIT_RDTSCP 0x087 +#define SVM_EXIT_ICEBP 0x088 +#define SVM_EXIT_WBINVD 0x089 +#define SVM_EXIT_MONITOR 0x08a +#define SVM_EXIT_MWAIT 0x08b +#define SVM_EXIT_MWAIT_COND 0x08c +#define SVM_EXIT_XSETBV 0x08d +#define SVM_EXIT_NPF 0x400 + +#define SVM_EXIT_ERR -1 + +#define SVM_EXIT_REASONS \ + { SVM_EXIT_READ_CR0, "read_cr0" }, \ + { SVM_EXIT_READ_CR3, "read_cr3" }, \ + { SVM_EXIT_READ_CR4, "read_cr4" }, \ + { SVM_EXIT_READ_CR8, "read_cr8" }, \ + { SVM_EXIT_WRITE_CR0, "write_cr0" }, \ + { SVM_EXIT_WRITE_CR3, "write_cr3" }, \ + { SVM_EXIT_WRITE_CR4, "write_cr4" }, \ + { SVM_EXIT_WRITE_CR8, "write_cr8" }, \ + { SVM_EXIT_READ_DR0, "read_dr0" }, \ + { SVM_EXIT_READ_DR1, "read_dr1" }, \ + { SVM_EXIT_READ_DR2, "read_dr2" }, \ + { SVM_EXIT_READ_DR3, "read_dr3" }, \ + { SVM_EXIT_WRITE_DR0, "write_dr0" }, \ + { SVM_EXIT_WRITE_DR1, "write_dr1" }, \ + { SVM_EXIT_WRITE_DR2, "write_dr2" }, \ + { SVM_EXIT_WRITE_DR3, "write_dr3" }, \ + { SVM_EXIT_WRITE_DR5, "write_dr5" }, \ + { SVM_EXIT_WRITE_DR7, "write_dr7" }, \ + { SVM_EXIT_EXCP_BASE + DB_VECTOR, "DB excp" }, \ + { SVM_EXIT_EXCP_BASE + BP_VECTOR, "BP excp" }, \ + { SVM_EXIT_EXCP_BASE + UD_VECTOR, "UD excp" }, \ + { SVM_EXIT_EXCP_BASE + PF_VECTOR, "PF excp" }, \ + { SVM_EXIT_EXCP_BASE + NM_VECTOR, "NM excp" }, \ + { SVM_EXIT_EXCP_BASE + MC_VECTOR, "MC excp" }, \ + { SVM_EXIT_INTR, "interrupt" }, \ + { SVM_EXIT_NMI, "nmi" }, \ + { SVM_EXIT_SMI, "smi" }, \ + { SVM_EXIT_INIT, "init" }, \ + { SVM_EXIT_VINTR, "vintr" }, \ + { SVM_EXIT_CPUID, "cpuid" }, \ + { SVM_EXIT_INVD, "invd" }, \ + { SVM_EXIT_HLT, "hlt" }, \ + { SVM_EXIT_INVLPG, "invlpg" }, \ + { SVM_EXIT_INVLPGA, "invlpga" }, \ + { SVM_EXIT_IOIO, "io" }, \ + { SVM_EXIT_MSR, "msr" }, \ + { SVM_EXIT_TASK_SWITCH, "task_switch" }, \ + { SVM_EXIT_SHUTDOWN, "shutdown" }, \ + { SVM_EXIT_VMRUN, "vmrun" }, \ + { SVM_EXIT_VMMCALL, "hypercall" }, \ + { SVM_EXIT_VMLOAD, "vmload" }, \ + { SVM_EXIT_VMSAVE, "vmsave" }, \ + { SVM_EXIT_STGI, "stgi" }, \ + { SVM_EXIT_CLGI, "clgi" }, \ + { SVM_EXIT_SKINIT, "skinit" }, \ + { SVM_EXIT_WBINVD, "wbinvd" }, \ + { SVM_EXIT_MONITOR, "monitor" }, \ + { SVM_EXIT_MWAIT, "mwait" }, \ + { SVM_EXIT_XSETBV, "xsetbv" }, \ + { SVM_EXIT_NPF, "npf" } + +#ifdef __KERNEL__ + enum { INTERCEPT_INTR, INTERCEPT_NMI, @@ -264,81 +393,6 @@ struct __attribute__ ((__packed__)) vmcb { #define SVM_EXITINFO_REG_MASK 0x0F -#define SVM_EXIT_READ_CR0 0x000 -#define SVM_EXIT_READ_CR3 0x003 -#define SVM_EXIT_READ_CR4 0x004 -#define SVM_EXIT_READ_CR8 0x008 -#define SVM_EXIT_WRITE_CR0 0x010 -#define SVM_EXIT_WRITE_CR3 0x013 -#define SVM_EXIT_WRITE_CR4 0x014 -#define SVM_EXIT_WRITE_CR8 0x018 -#define SVM_EXIT_READ_DR0 0x020 -#define SVM_EXIT_READ_DR1 0x021 -#define SVM_EXIT_READ_DR2 0x022 -#define SVM_EXIT_READ_DR3 0x023 -#define SVM_EXIT_READ_DR4 0x024 -#define SVM_EXIT_READ_DR5 0x025 -#define SVM_EXIT_READ_DR6 0x026 -#define SVM_EXIT_READ_DR7 0x027 -#define SVM_EXIT_WRITE_DR0 0x030 -#define SVM_EXIT_WRITE_DR1 0x031 -#define SVM_EXIT_WRITE_DR2 0x032 -#define SVM_EXIT_WRITE_DR3 0x033 -#define SVM_EXIT_WRITE_DR4 0x034 -#define SVM_EXIT_WRITE_DR5 0x035 -#define SVM_EXIT_WRITE_DR6 0x036 -#define SVM_EXIT_WRITE_DR7 0x037 -#define SVM_EXIT_EXCP_BASE 0x040 -#define SVM_EXIT_INTR 0x060 -#define SVM_EXIT_NMI 0x061 -#define SVM_EXIT_SMI 0x062 -#define SVM_EXIT_INIT 0x063 -#define SVM_EXIT_VINTR 0x064 -#define SVM_EXIT_CR0_SEL_WRITE 0x065 -#define SVM_EXIT_IDTR_READ 0x066 -#define SVM_EXIT_GDTR_READ 0x067 -#define SVM_EXIT_LDTR_READ 0x068 -#define SVM_EXIT_TR_READ 0x069 -#define SVM_EXIT_IDTR_WRITE 0x06a -#define SVM_EXIT_GDTR_WRITE 0x06b -#define SVM_EXIT_LDTR_WRITE 0x06c -#define SVM_EXIT_TR_WRITE 0x06d -#define SVM_EXIT_RDTSC 0x06e -#define SVM_EXIT_RDPMC 0x06f -#define SVM_EXIT_PUSHF 0x070 -#define SVM_EXIT_POPF 0x071 -#define SVM_EXIT_CPUID 0x072 -#define SVM_EXIT_RSM 0x073 -#define SVM_EXIT_IRET 0x074 -#define SVM_EXIT_SWINT 0x075 -#define SVM_EXIT_INVD 0x076 -#define SVM_EXIT_PAUSE 0x077 -#define SVM_EXIT_HLT 0x078 -#define SVM_EXIT_INVLPG 0x079 -#define SVM_EXIT_INVLPGA 0x07a -#define SVM_EXIT_IOIO 0x07b -#define SVM_EXIT_MSR 0x07c -#define SVM_EXIT_TASK_SWITCH 0x07d -#define SVM_EXIT_FERR_FREEZE 0x07e -#define SVM_EXIT_SHUTDOWN 0x07f -#define SVM_EXIT_VMRUN 0x080 -#define SVM_EXIT_VMMCALL 0x081 -#define SVM_EXIT_VMLOAD 0x082 -#define SVM_EXIT_VMSAVE 0x083 -#define SVM_EXIT_STGI 0x084 -#define SVM_EXIT_CLGI 0x085 -#define SVM_EXIT_SKINIT 0x086 -#define SVM_EXIT_RDTSCP 0x087 -#define SVM_EXIT_ICEBP 0x088 -#define SVM_EXIT_WBINVD 0x089 -#define SVM_EXIT_MONITOR 0x08a -#define SVM_EXIT_MWAIT 0x08b -#define SVM_EXIT_MWAIT_COND 0x08c -#define SVM_EXIT_XSETBV 0x08d -#define SVM_EXIT_NPF 0x400 - -#define SVM_EXIT_ERR -1 - #define SVM_CR0_SELECTIVE_MASK (X86_CR0_TS | X86_CR0_MP) #define SVM_VMLOAD ".byte 0x0f, 0x01, 0xda" @@ -350,3 +404,4 @@ struct __attribute__ ((__packed__)) vmcb { #endif +#endif diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index 74fcb963595..36ec21c36d6 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -25,6 +25,88 @@ * */ +#define VMX_EXIT_REASONS_FAILED_VMENTRY 0x80000000 + +#define EXIT_REASON_EXCEPTION_NMI 0 +#define EXIT_REASON_EXTERNAL_INTERRUPT 1 +#define EXIT_REASON_TRIPLE_FAULT 2 + +#define EXIT_REASON_PENDING_INTERRUPT 7 +#define EXIT_REASON_NMI_WINDOW 8 +#define EXIT_REASON_TASK_SWITCH 9 +#define EXIT_REASON_CPUID 10 +#define EXIT_REASON_HLT 12 +#define EXIT_REASON_INVD 13 +#define EXIT_REASON_INVLPG 14 +#define EXIT_REASON_RDPMC 15 +#define EXIT_REASON_RDTSC 16 +#define EXIT_REASON_VMCALL 18 +#define EXIT_REASON_VMCLEAR 19 +#define EXIT_REASON_VMLAUNCH 20 +#define EXIT_REASON_VMPTRLD 21 +#define EXIT_REASON_VMPTRST 22 +#define EXIT_REASON_VMREAD 23 +#define EXIT_REASON_VMRESUME 24 +#define EXIT_REASON_VMWRITE 25 +#define EXIT_REASON_VMOFF 26 +#define EXIT_REASON_VMON 27 +#define EXIT_REASON_CR_ACCESS 28 +#define EXIT_REASON_DR_ACCESS 29 +#define EXIT_REASON_IO_INSTRUCTION 30 +#define EXIT_REASON_MSR_READ 31 +#define EXIT_REASON_MSR_WRITE 32 +#define EXIT_REASON_INVALID_STATE 33 +#define EXIT_REASON_MWAIT_INSTRUCTION 36 +#define EXIT_REASON_MONITOR_INSTRUCTION 39 +#define EXIT_REASON_PAUSE_INSTRUCTION 40 +#define EXIT_REASON_MCE_DURING_VMENTRY 41 +#define EXIT_REASON_TPR_BELOW_THRESHOLD 43 +#define EXIT_REASON_APIC_ACCESS 44 +#define EXIT_REASON_EPT_VIOLATION 48 +#define EXIT_REASON_EPT_MISCONFIG 49 +#define EXIT_REASON_WBINVD 54 +#define EXIT_REASON_XSETBV 55 +#define EXIT_REASON_INVPCID 58 + +#define VMX_EXIT_REASONS \ + { EXIT_REASON_EXCEPTION_NMI, "EXCEPTION_NMI" }, \ + { EXIT_REASON_EXTERNAL_INTERRUPT, "EXTERNAL_INTERRUPT" }, \ + { EXIT_REASON_TRIPLE_FAULT, "TRIPLE_FAULT" }, \ + { EXIT_REASON_PENDING_INTERRUPT, "PENDING_INTERRUPT" }, \ + { EXIT_REASON_NMI_WINDOW, "NMI_WINDOW" }, \ + { EXIT_REASON_TASK_SWITCH, "TASK_SWITCH" }, \ + { EXIT_REASON_CPUID, "CPUID" }, \ + { EXIT_REASON_HLT, "HLT" }, \ + { EXIT_REASON_INVLPG, "INVLPG" }, \ + { EXIT_REASON_RDPMC, "RDPMC" }, \ + { EXIT_REASON_RDTSC, "RDTSC" }, \ + { EXIT_REASON_VMCALL, "VMCALL" }, \ + { EXIT_REASON_VMCLEAR, "VMCLEAR" }, \ + { EXIT_REASON_VMLAUNCH, "VMLAUNCH" }, \ + { EXIT_REASON_VMPTRLD, "VMPTRLD" }, \ + { EXIT_REASON_VMPTRST, "VMPTRST" }, \ + { EXIT_REASON_VMREAD, "VMREAD" }, \ + { EXIT_REASON_VMRESUME, "VMRESUME" }, \ + { EXIT_REASON_VMWRITE, "VMWRITE" }, \ + { EXIT_REASON_VMOFF, "VMOFF" }, \ + { EXIT_REASON_VMON, "VMON" }, \ + { EXIT_REASON_CR_ACCESS, "CR_ACCESS" }, \ + { EXIT_REASON_DR_ACCESS, "DR_ACCESS" }, \ + { EXIT_REASON_IO_INSTRUCTION, "IO_INSTRUCTION" }, \ + { EXIT_REASON_MSR_READ, "MSR_READ" }, \ + { EXIT_REASON_MSR_WRITE, "MSR_WRITE" }, \ + { EXIT_REASON_MWAIT_INSTRUCTION, "MWAIT_INSTRUCTION" }, \ + { EXIT_REASON_MONITOR_INSTRUCTION, "MONITOR_INSTRUCTION" }, \ + { EXIT_REASON_PAUSE_INSTRUCTION, "PAUSE_INSTRUCTION" }, \ + { EXIT_REASON_MCE_DURING_VMENTRY, "MCE_DURING_VMENTRY" }, \ + { EXIT_REASON_TPR_BELOW_THRESHOLD, "TPR_BELOW_THRESHOLD" }, \ + { EXIT_REASON_APIC_ACCESS, "APIC_ACCESS" }, \ + { EXIT_REASON_EPT_VIOLATION, "EPT_VIOLATION" }, \ + { EXIT_REASON_EPT_MISCONFIG, "EPT_MISCONFIG" }, \ + { EXIT_REASON_WBINVD, "WBINVD" } + +#ifdef __KERNEL__ + #include /* @@ -241,49 +323,6 @@ enum vmcs_field { HOST_RIP = 0x00006c16, }; -#define VMX_EXIT_REASONS_FAILED_VMENTRY 0x80000000 - -#define EXIT_REASON_EXCEPTION_NMI 0 -#define EXIT_REASON_EXTERNAL_INTERRUPT 1 -#define EXIT_REASON_TRIPLE_FAULT 2 - -#define EXIT_REASON_PENDING_INTERRUPT 7 -#define EXIT_REASON_NMI_WINDOW 8 -#define EXIT_REASON_TASK_SWITCH 9 -#define EXIT_REASON_CPUID 10 -#define EXIT_REASON_HLT 12 -#define EXIT_REASON_INVD 13 -#define EXIT_REASON_INVLPG 14 -#define EXIT_REASON_RDPMC 15 -#define EXIT_REASON_RDTSC 16 -#define EXIT_REASON_VMCALL 18 -#define EXIT_REASON_VMCLEAR 19 -#define EXIT_REASON_VMLAUNCH 20 -#define EXIT_REASON_VMPTRLD 21 -#define EXIT_REASON_VMPTRST 22 -#define EXIT_REASON_VMREAD 23 -#define EXIT_REASON_VMRESUME 24 -#define EXIT_REASON_VMWRITE 25 -#define EXIT_REASON_VMOFF 26 -#define EXIT_REASON_VMON 27 -#define EXIT_REASON_CR_ACCESS 28 -#define EXIT_REASON_DR_ACCESS 29 -#define EXIT_REASON_IO_INSTRUCTION 30 -#define EXIT_REASON_MSR_READ 31 -#define EXIT_REASON_MSR_WRITE 32 -#define EXIT_REASON_INVALID_STATE 33 -#define EXIT_REASON_MWAIT_INSTRUCTION 36 -#define EXIT_REASON_MONITOR_INSTRUCTION 39 -#define EXIT_REASON_PAUSE_INSTRUCTION 40 -#define EXIT_REASON_MCE_DURING_VMENTRY 41 -#define EXIT_REASON_TPR_BELOW_THRESHOLD 43 -#define EXIT_REASON_APIC_ACCESS 44 -#define EXIT_REASON_EPT_VIOLATION 48 -#define EXIT_REASON_EPT_MISCONFIG 49 -#define EXIT_REASON_WBINVD 54 -#define EXIT_REASON_XSETBV 55 -#define EXIT_REASON_INVPCID 58 - /* * Interruption-information format */ @@ -488,3 +527,5 @@ enum vm_instruction_error_number { }; #endif + +#endif diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h index a71faf727ff..bca63f04dcc 100644 --- a/arch/x86/kvm/trace.h +++ b/arch/x86/kvm/trace.h @@ -183,95 +183,6 @@ TRACE_EVENT(kvm_apic, #define KVM_ISA_VMX 1 #define KVM_ISA_SVM 2 -#define VMX_EXIT_REASONS \ - { EXIT_REASON_EXCEPTION_NMI, "EXCEPTION_NMI" }, \ - { EXIT_REASON_EXTERNAL_INTERRUPT, "EXTERNAL_INTERRUPT" }, \ - { EXIT_REASON_TRIPLE_FAULT, "TRIPLE_FAULT" }, \ - { EXIT_REASON_PENDING_INTERRUPT, "PENDING_INTERRUPT" }, \ - { EXIT_REASON_NMI_WINDOW, "NMI_WINDOW" }, \ - { EXIT_REASON_TASK_SWITCH, "TASK_SWITCH" }, \ - { EXIT_REASON_CPUID, "CPUID" }, \ - { EXIT_REASON_HLT, "HLT" }, \ - { EXIT_REASON_INVLPG, "INVLPG" }, \ - { EXIT_REASON_RDPMC, "RDPMC" }, \ - { EXIT_REASON_RDTSC, "RDTSC" }, \ - { EXIT_REASON_VMCALL, "VMCALL" }, \ - { EXIT_REASON_VMCLEAR, "VMCLEAR" }, \ - { EXIT_REASON_VMLAUNCH, "VMLAUNCH" }, \ - { EXIT_REASON_VMPTRLD, "VMPTRLD" }, \ - { EXIT_REASON_VMPTRST, "VMPTRST" }, \ - { EXIT_REASON_VMREAD, "VMREAD" }, \ - { EXIT_REASON_VMRESUME, "VMRESUME" }, \ - { EXIT_REASON_VMWRITE, "VMWRITE" }, \ - { EXIT_REASON_VMOFF, "VMOFF" }, \ - { EXIT_REASON_VMON, "VMON" }, \ - { EXIT_REASON_CR_ACCESS, "CR_ACCESS" }, \ - { EXIT_REASON_DR_ACCESS, "DR_ACCESS" }, \ - { EXIT_REASON_IO_INSTRUCTION, "IO_INSTRUCTION" }, \ - { EXIT_REASON_MSR_READ, "MSR_READ" }, \ - { EXIT_REASON_MSR_WRITE, "MSR_WRITE" }, \ - { EXIT_REASON_MWAIT_INSTRUCTION, "MWAIT_INSTRUCTION" }, \ - { EXIT_REASON_MONITOR_INSTRUCTION, "MONITOR_INSTRUCTION" }, \ - { EXIT_REASON_PAUSE_INSTRUCTION, "PAUSE_INSTRUCTION" }, \ - { EXIT_REASON_MCE_DURING_VMENTRY, "MCE_DURING_VMENTRY" }, \ - { EXIT_REASON_TPR_BELOW_THRESHOLD, "TPR_BELOW_THRESHOLD" }, \ - { EXIT_REASON_APIC_ACCESS, "APIC_ACCESS" }, \ - { EXIT_REASON_EPT_VIOLATION, "EPT_VIOLATION" }, \ - { EXIT_REASON_EPT_MISCONFIG, "EPT_MISCONFIG" }, \ - { EXIT_REASON_WBINVD, "WBINVD" } - -#define SVM_EXIT_REASONS \ - { SVM_EXIT_READ_CR0, "read_cr0" }, \ - { SVM_EXIT_READ_CR3, "read_cr3" }, \ - { SVM_EXIT_READ_CR4, "read_cr4" }, \ - { SVM_EXIT_READ_CR8, "read_cr8" }, \ - { SVM_EXIT_WRITE_CR0, "write_cr0" }, \ - { SVM_EXIT_WRITE_CR3, "write_cr3" }, \ - { SVM_EXIT_WRITE_CR4, "write_cr4" }, \ - { SVM_EXIT_WRITE_CR8, "write_cr8" }, \ - { SVM_EXIT_READ_DR0, "read_dr0" }, \ - { SVM_EXIT_READ_DR1, "read_dr1" }, \ - { SVM_EXIT_READ_DR2, "read_dr2" }, \ - { SVM_EXIT_READ_DR3, "read_dr3" }, \ - { SVM_EXIT_WRITE_DR0, "write_dr0" }, \ - { SVM_EXIT_WRITE_DR1, "write_dr1" }, \ - { SVM_EXIT_WRITE_DR2, "write_dr2" }, \ - { SVM_EXIT_WRITE_DR3, "write_dr3" }, \ - { SVM_EXIT_WRITE_DR5, "write_dr5" }, \ - { SVM_EXIT_WRITE_DR7, "write_dr7" }, \ - { SVM_EXIT_EXCP_BASE + DB_VECTOR, "DB excp" }, \ - { SVM_EXIT_EXCP_BASE + BP_VECTOR, "BP excp" }, \ - { SVM_EXIT_EXCP_BASE + UD_VECTOR, "UD excp" }, \ - { SVM_EXIT_EXCP_BASE + PF_VECTOR, "PF excp" }, \ - { SVM_EXIT_EXCP_BASE + NM_VECTOR, "NM excp" }, \ - { SVM_EXIT_EXCP_BASE + MC_VECTOR, "MC excp" }, \ - { SVM_EXIT_INTR, "interrupt" }, \ - { SVM_EXIT_NMI, "nmi" }, \ - { SVM_EXIT_SMI, "smi" }, \ - { SVM_EXIT_INIT, "init" }, \ - { SVM_EXIT_VINTR, "vintr" }, \ - { SVM_EXIT_CPUID, "cpuid" }, \ - { SVM_EXIT_INVD, "invd" }, \ - { SVM_EXIT_HLT, "hlt" }, \ - { SVM_EXIT_INVLPG, "invlpg" }, \ - { SVM_EXIT_INVLPGA, "invlpga" }, \ - { SVM_EXIT_IOIO, "io" }, \ - { SVM_EXIT_MSR, "msr" }, \ - { SVM_EXIT_TASK_SWITCH, "task_switch" }, \ - { SVM_EXIT_SHUTDOWN, "shutdown" }, \ - { SVM_EXIT_VMRUN, "vmrun" }, \ - { SVM_EXIT_VMMCALL, "hypercall" }, \ - { SVM_EXIT_VMLOAD, "vmload" }, \ - { SVM_EXIT_VMSAVE, "vmsave" }, \ - { SVM_EXIT_STGI, "stgi" }, \ - { SVM_EXIT_CLGI, "clgi" }, \ - { SVM_EXIT_SKINIT, "skinit" }, \ - { SVM_EXIT_WBINVD, "wbinvd" }, \ - { SVM_EXIT_MONITOR, "monitor" }, \ - { SVM_EXIT_MWAIT, "mwait" }, \ - { SVM_EXIT_XSETBV, "xsetbv" }, \ - { SVM_EXIT_NPF, "npf" } - /* * Tracepoint for kvm guest exit: */ From bcf6edcd6fdb8965290f0b635a530fa3c6c212e1 Mon Sep 17 00:00:00 2001 From: Xiao Guangrong Date: Mon, 17 Sep 2012 16:31:15 +0800 Subject: [PATCH 238/282] perf kvm: Events analysis tool Add 'perf kvm stat' support to analyze kvm vmexit/mmio/ioport smartly Usage: - kvm stat run a command and gather performance counter statistics, it is the alias of perf stat - trace kvm events: perf kvm stat record, or, if other tracepoints are interesting as well, we can append the events like this: perf kvm stat record -e timer:* -a If many guests are running, we can track the specified guest by using -p or --pid, -a is used to track events generated by all guests. - show the result: perf kvm stat report The output example is following: 13005 13059 total 2 guests are running on the host Then, track the guest whose pid is 13059: ^C[ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.253 MB perf.data.guest (~11065 samples) ] See the vmexit events: Analyze events for all VCPUs: VM-EXIT Samples Samples% Time% Avg time APIC_ACCESS 460 70.55% 0.01% 22.44us ( +- 1.75% ) HLT 93 14.26% 99.98% 832077.26us ( +- 10.42% ) EXTERNAL_INTERRUPT 64 9.82% 0.00% 35.35us ( +- 14.21% ) PENDING_INTERRUPT 24 3.68% 0.00% 9.29us ( +- 31.39% ) CR_ACCESS 7 1.07% 0.00% 8.12us ( +- 5.76% ) IO_INSTRUCTION 3 0.46% 0.00% 18.00us ( +- 11.79% ) EXCEPTION_NMI 1 0.15% 0.00% 5.83us ( +- -nan% ) Total Samples:652, Total events handled time:77396109.80us. See the mmio events: Analyze events for all VCPUs: MMIO Access Samples Samples% Time% Avg time 0xfee00380:W 387 84.31% 79.28% 8.29us ( +- 3.32% ) 0xfee00300:W 24 5.23% 9.96% 16.79us ( +- 1.97% ) 0xfee00300:R 24 5.23% 7.83% 13.20us ( +- 3.00% ) 0xfee00310:W 24 5.23% 2.93% 4.94us ( +- 3.84% ) Total Samples:459, Total events handled time:4044.59us. See the ioport event: Analyze events for all VCPUs: IO Port Access Samples Samples% Time% Avg time 0xc050:POUT 3 100.00% 100.00% 13.75us ( +- 10.83% ) Total Samples:3, Total events handled time:41.26us. And, --vcpu is used to track the specified vcpu and --key is used to sort the result: Analyze events for VCPU 0: VM-EXIT Samples Samples% Time% Avg time HLT 27 13.85% 99.97% 405790.24us ( +- 12.70% ) EXTERNAL_INTERRUPT 13 6.67% 0.00% 27.94us ( +- 22.26% ) APIC_ACCESS 146 74.87% 0.03% 21.69us ( +- 2.91% ) IO_INSTRUCTION 2 1.03% 0.00% 17.77us ( +- 20.56% ) CR_ACCESS 2 1.03% 0.00% 8.55us ( +- 6.47% ) PENDING_INTERRUPT 5 2.56% 0.00% 6.27us ( +- 3.94% ) Total Samples:195, Total events handled time:10959950.90us. Signed-off-by: Dong Hao Signed-off-by: Runzhen Wang [ Dong Hao Runzhen Wang : - rebase it on current acme's tree - fix the compiling-error on i386 ] Signed-off-by: Xiao Guangrong Acked-by: David Ahern Cc: Avi Kivity Cc: David Ahern Cc: Ingo Molnar Cc: Marcelo Tosatti Cc: kvm@vger.kernel.org Cc: Runzhen Wang Link: http://lkml.kernel.org/r/1347870675-31495-4-git-send-email-haodong@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-kvm.txt | 30 +- tools/perf/MANIFEST | 3 + tools/perf/builtin-kvm.c | 840 +++++++++++++++++++++++++- tools/perf/util/header.c | 59 +- tools/perf/util/header.h | 1 + tools/perf/util/thread.h | 2 + 6 files changed, 929 insertions(+), 6 deletions(-) diff --git a/tools/perf/Documentation/perf-kvm.txt b/tools/perf/Documentation/perf-kvm.txt index dd84cb2f0a8..326f2cb333c 100644 --- a/tools/perf/Documentation/perf-kvm.txt +++ b/tools/perf/Documentation/perf-kvm.txt @@ -12,7 +12,7 @@ SYNOPSIS [--guestkallsyms= --guestmodules= | --guestvmlinux=]] {top|record|report|diff|buildid-list} 'perf kvm' [--host] [--guest] [--guestkallsyms= --guestmodules= - | --guestvmlinux=] {top|record|report|diff|buildid-list} + | --guestvmlinux=] {top|record|report|diff|buildid-list|stat} DESCRIPTION ----------- @@ -38,6 +38,18 @@ There are a couple of variants of perf kvm: so that other tools can be used to fetch packages with matching symbol tables for use by perf report. + 'perf kvm stat ' to run a command and gather performance counter + statistics. + Especially, perf 'kvm stat record/report' generates a statistical analysis + of KVM events. Currently, vmexit, mmio and ioport events are supported. + 'perf kvm stat record ' records kvm events and the events between + start and end . + And this command produces a file which contains tracing results of kvm + events. + + 'perf kvm stat report' reports statistical data which includes events + handled time, samples, and so on. + OPTIONS ------- -i:: @@ -68,7 +80,21 @@ OPTIONS --guestvmlinux=:: Guest os kernel vmlinux. +STAT REPORT OPTIONS +------------------- +--vcpu=:: + analyze events which occures on this vcpu. (default: all vcpus) + +--events=:: + events to be analyzed. Possible values: vmexit, mmio, ioport. + (default: vmexit) +-k:: +--key=:: + Sorting key. Possible values: sample (default, sort by samples + number), time (sort by average time). + SEE ALSO -------- linkperf:perf-top[1], linkperf:perf-record[1], linkperf:perf-report[1], -linkperf:perf-diff[1], linkperf:perf-buildid-list[1] +linkperf:perf-diff[1], linkperf:perf-buildid-list[1], +linkperf:perf-stat[1] diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST index 05180799093..80db3f4bcf7 100644 --- a/tools/perf/MANIFEST +++ b/tools/perf/MANIFEST @@ -16,3 +16,6 @@ arch/*/lib/memset*.S include/linux/poison.h include/linux/magic.h include/linux/hw_breakpoint.h +arch/x86/include/asm/svm.h +arch/x86/include/asm/vmx.h +arch/x86/include/asm/kvm_host.h diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c index 4d2aa2cbeca..1878947df5c 100644 --- a/tools/perf/builtin-kvm.c +++ b/tools/perf/builtin-kvm.c @@ -1,6 +1,7 @@ #include "builtin.h" #include "perf.h" +#include "util/evsel.h" #include "util/util.h" #include "util/cache.h" #include "util/symbol.h" @@ -10,8 +11,10 @@ #include "util/parse-options.h" #include "util/trace-event.h" - #include "util/debug.h" +#include "util/debugfs.h" +#include "util/tool.h" +#include "util/stat.h" #include @@ -19,11 +22,840 @@ #include #include -static const char *file_name; +#include "../../arch/x86/include/asm/svm.h" +#include "../../arch/x86/include/asm/vmx.h" +#include "../../arch/x86/include/asm/kvm.h" + +struct event_key { + #define INVALID_KEY (~0ULL) + u64 key; + int info; +}; + +struct kvm_events_ops { + bool (*is_begin_event)(struct event_format *event, void *data, + struct event_key *key); + bool (*is_end_event)(struct event_format *event, void *data, + struct event_key *key); + void (*decode_key)(struct event_key *key, char decode[20]); + const char *name; +}; + +static void exit_event_get_key(struct event_format *event, void *data, + struct event_key *key) +{ + key->info = 0; + key->key = raw_field_value(event, "exit_reason", data); +} + +static bool kvm_exit_event(struct event_format *event) +{ + return !strcmp(event->name, "kvm_exit"); +} + +static bool exit_event_begin(struct event_format *event, void *data, + struct event_key *key) +{ + if (kvm_exit_event(event)) { + exit_event_get_key(event, data, key); + return true; + } + + return false; +} + +static bool kvm_entry_event(struct event_format *event) +{ + return !strcmp(event->name, "kvm_entry"); +} + +static bool exit_event_end(struct event_format *event, void *data __maybe_unused, + struct event_key *key __maybe_unused) +{ + return kvm_entry_event(event); +} + +struct exit_reasons_table { + unsigned long exit_code; + const char *reason; +}; + +struct exit_reasons_table vmx_exit_reasons[] = { + VMX_EXIT_REASONS +}; + +struct exit_reasons_table svm_exit_reasons[] = { + SVM_EXIT_REASONS +}; + +static int cpu_isa; + +static const char *get_exit_reason(u64 exit_code) +{ + int table_size = ARRAY_SIZE(svm_exit_reasons); + struct exit_reasons_table *table = svm_exit_reasons; + + if (cpu_isa == 1) { + table = vmx_exit_reasons; + table_size = ARRAY_SIZE(vmx_exit_reasons); + } + + while (table_size--) { + if (table->exit_code == exit_code) + return table->reason; + table++; + } + + pr_err("unknown kvm exit code:%lld on %s\n", + (unsigned long long)exit_code, cpu_isa ? "VMX" : "SVM"); + return "UNKNOWN"; +} + +static void exit_event_decode_key(struct event_key *key, char decode[20]) +{ + const char *exit_reason = get_exit_reason(key->key); + + scnprintf(decode, 20, "%s", exit_reason); +} + +static struct kvm_events_ops exit_events = { + .is_begin_event = exit_event_begin, + .is_end_event = exit_event_end, + .decode_key = exit_event_decode_key, + .name = "VM-EXIT" +}; + + /* + * For the mmio events, we treat: + * the time of MMIO write: kvm_mmio(KVM_TRACE_MMIO_WRITE...) -> kvm_entry + * the time of MMIO read: kvm_exit -> kvm_mmio(KVM_TRACE_MMIO_READ...). + */ +static void mmio_event_get_key(struct event_format *event, void *data, + struct event_key *key) +{ + key->key = raw_field_value(event, "gpa", data); + key->info = raw_field_value(event, "type", data); +} + +#define KVM_TRACE_MMIO_READ_UNSATISFIED 0 +#define KVM_TRACE_MMIO_READ 1 +#define KVM_TRACE_MMIO_WRITE 2 + +static bool mmio_event_begin(struct event_format *event, void *data, + struct event_key *key) +{ + /* MMIO read begin event in kernel. */ + if (kvm_exit_event(event)) + return true; + + /* MMIO write begin event in kernel. */ + if (!strcmp(event->name, "kvm_mmio") && + raw_field_value(event, "type", data) == KVM_TRACE_MMIO_WRITE) { + mmio_event_get_key(event, data, key); + return true; + } + + return false; +} + +static bool mmio_event_end(struct event_format *event, void *data, + struct event_key *key) +{ + /* MMIO write end event in kernel. */ + if (kvm_entry_event(event)) + return true; + + /* MMIO read end event in kernel.*/ + if (!strcmp(event->name, "kvm_mmio") && + raw_field_value(event, "type", data) == KVM_TRACE_MMIO_READ) { + mmio_event_get_key(event, data, key); + return true; + } + + return false; +} + +static void mmio_event_decode_key(struct event_key *key, char decode[20]) +{ + scnprintf(decode, 20, "%#lx:%s", (unsigned long)key->key, + key->info == KVM_TRACE_MMIO_WRITE ? "W" : "R"); +} + +static struct kvm_events_ops mmio_events = { + .is_begin_event = mmio_event_begin, + .is_end_event = mmio_event_end, + .decode_key = mmio_event_decode_key, + .name = "MMIO Access" +}; + + /* The time of emulation pio access is from kvm_pio to kvm_entry. */ +static void ioport_event_get_key(struct event_format *event, void *data, + struct event_key *key) +{ + key->key = raw_field_value(event, "port", data); + key->info = raw_field_value(event, "rw", data); +} + +static bool ioport_event_begin(struct event_format *event, void *data, + struct event_key *key) +{ + if (!strcmp(event->name, "kvm_pio")) { + ioport_event_get_key(event, data, key); + return true; + } + + return false; +} + +static bool ioport_event_end(struct event_format *event, void *data __maybe_unused, + struct event_key *key __maybe_unused) +{ + if (kvm_entry_event(event)) + return true; + + return false; +} + +static void ioport_event_decode_key(struct event_key *key, char decode[20]) +{ + scnprintf(decode, 20, "%#llx:%s", (unsigned long long)key->key, + key->info ? "POUT" : "PIN"); +} + +static struct kvm_events_ops ioport_events = { + .is_begin_event = ioport_event_begin, + .is_end_event = ioport_event_end, + .decode_key = ioport_event_decode_key, + .name = "IO Port Access" +}; + +static const char *report_event = "vmexit"; +struct kvm_events_ops *events_ops; + +static bool register_kvm_events_ops(void) +{ + bool ret = true; + + if (!strcmp(report_event, "vmexit")) + events_ops = &exit_events; + else if (!strcmp(report_event, "mmio")) + events_ops = &mmio_events; + else if (!strcmp(report_event, "ioport")) + events_ops = &ioport_events; + else { + pr_err("Unknown report event:%s\n", report_event); + ret = false; + } + + return ret; +} + +struct kvm_event_stats { + u64 time; + struct stats stats; +}; + +struct kvm_event { + struct list_head hash_entry; + struct rb_node rb; + + struct event_key key; + + struct kvm_event_stats total; + + #define DEFAULT_VCPU_NUM 8 + int max_vcpu; + struct kvm_event_stats *vcpu; +}; + +struct vcpu_event_record { + int vcpu_id; + u64 start_time; + struct kvm_event *last_event; +}; + +#define EVENTS_BITS 12 +#define EVENTS_CACHE_SIZE (1UL << EVENTS_BITS) + +static u64 total_time; +static u64 total_count; +static struct list_head kvm_events_cache[EVENTS_CACHE_SIZE]; + +static void init_kvm_event_record(void) +{ + int i; + + for (i = 0; i < (int)EVENTS_CACHE_SIZE; i++) + INIT_LIST_HEAD(&kvm_events_cache[i]); +} + +static int kvm_events_hash_fn(u64 key) +{ + return key & (EVENTS_CACHE_SIZE - 1); +} + +static bool kvm_event_expand(struct kvm_event *event, int vcpu_id) +{ + int old_max_vcpu = event->max_vcpu; + + if (vcpu_id < event->max_vcpu) + return true; + + while (event->max_vcpu <= vcpu_id) + event->max_vcpu += DEFAULT_VCPU_NUM; + + event->vcpu = realloc(event->vcpu, + event->max_vcpu * sizeof(*event->vcpu)); + if (!event->vcpu) { + pr_err("Not enough memory\n"); + return false; + } + + memset(event->vcpu + old_max_vcpu, 0, + (event->max_vcpu - old_max_vcpu) * sizeof(*event->vcpu)); + return true; +} + +static struct kvm_event *kvm_alloc_init_event(struct event_key *key) +{ + struct kvm_event *event; + + event = zalloc(sizeof(*event)); + if (!event) { + pr_err("Not enough memory\n"); + return NULL; + } + + event->key = *key; + return event; +} + +static struct kvm_event *find_create_kvm_event(struct event_key *key) +{ + struct kvm_event *event; + struct list_head *head; + + BUG_ON(key->key == INVALID_KEY); + + head = &kvm_events_cache[kvm_events_hash_fn(key->key)]; + list_for_each_entry(event, head, hash_entry) + if (event->key.key == key->key && event->key.info == key->info) + return event; + + event = kvm_alloc_init_event(key); + if (!event) + return NULL; + + list_add(&event->hash_entry, head); + return event; +} + +static bool handle_begin_event(struct vcpu_event_record *vcpu_record, + struct event_key *key, u64 timestamp) +{ + struct kvm_event *event = NULL; + + if (key->key != INVALID_KEY) + event = find_create_kvm_event(key); + + vcpu_record->last_event = event; + vcpu_record->start_time = timestamp; + return true; +} + +static void +kvm_update_event_stats(struct kvm_event_stats *kvm_stats, u64 time_diff) +{ + kvm_stats->time += time_diff; + update_stats(&kvm_stats->stats, time_diff); +} + +static double kvm_event_rel_stddev(int vcpu_id, struct kvm_event *event) +{ + struct kvm_event_stats *kvm_stats = &event->total; + + if (vcpu_id != -1) + kvm_stats = &event->vcpu[vcpu_id]; + + return rel_stddev_stats(stddev_stats(&kvm_stats->stats), + avg_stats(&kvm_stats->stats)); +} + +static bool update_kvm_event(struct kvm_event *event, int vcpu_id, + u64 time_diff) +{ + kvm_update_event_stats(&event->total, time_diff); + + if (!kvm_event_expand(event, vcpu_id)) + return false; + + kvm_update_event_stats(&event->vcpu[vcpu_id], time_diff); + return true; +} + +static bool handle_end_event(struct vcpu_event_record *vcpu_record, + struct event_key *key, u64 timestamp) +{ + struct kvm_event *event; + u64 time_begin, time_diff; + + event = vcpu_record->last_event; + time_begin = vcpu_record->start_time; + + /* The begin event is not caught. */ + if (!time_begin) + return true; + + /* + * In some case, the 'begin event' only records the start timestamp, + * the actual event is recognized in the 'end event' (e.g. mmio-event). + */ + + /* Both begin and end events did not get the key. */ + if (!event && key->key == INVALID_KEY) + return true; + + if (!event) + event = find_create_kvm_event(key); + + if (!event) + return false; + + vcpu_record->last_event = NULL; + vcpu_record->start_time = 0; + + BUG_ON(timestamp < time_begin); + + time_diff = timestamp - time_begin; + return update_kvm_event(event, vcpu_record->vcpu_id, time_diff); +} + +static struct vcpu_event_record +*per_vcpu_record(struct thread *thread, struct event_format *event, void *data) +{ + /* Only kvm_entry records vcpu id. */ + if (!thread->priv && kvm_entry_event(event)) { + struct vcpu_event_record *vcpu_record; + + vcpu_record = zalloc(sizeof(struct vcpu_event_record)); + if (!vcpu_record) { + pr_err("Not enough memory\n"); + return NULL; + } + + vcpu_record->vcpu_id = raw_field_value(event, "vcpu_id", data); + thread->priv = vcpu_record; + } + + return (struct vcpu_event_record *)thread->priv; +} + +static bool handle_kvm_event(struct thread *thread, struct event_format *event, + void *data, u64 timestamp) +{ + struct vcpu_event_record *vcpu_record; + struct event_key key = {.key = INVALID_KEY}; + + vcpu_record = per_vcpu_record(thread, event, data); + if (!vcpu_record) + return true; + + if (events_ops->is_begin_event(event, data, &key)) + return handle_begin_event(vcpu_record, &key, timestamp); + + if (events_ops->is_end_event(event, data, &key)) + return handle_end_event(vcpu_record, &key, timestamp); + + return true; +} + +typedef int (*key_cmp_fun)(struct kvm_event*, struct kvm_event*, int); +struct kvm_event_key { + const char *name; + key_cmp_fun key; +}; + +static int trace_vcpu = -1; +#define GET_EVENT_KEY(func, field) \ +static u64 get_event_ ##func(struct kvm_event *event, int vcpu) \ +{ \ + if (vcpu == -1) \ + return event->total.field; \ + \ + if (vcpu >= event->max_vcpu) \ + return 0; \ + \ + return event->vcpu[vcpu].field; \ +} + +#define COMPARE_EVENT_KEY(func, field) \ +GET_EVENT_KEY(func, field) \ +static int compare_kvm_event_ ## func(struct kvm_event *one, \ + struct kvm_event *two, int vcpu)\ +{ \ + return get_event_ ##func(one, vcpu) > \ + get_event_ ##func(two, vcpu); \ +} + +GET_EVENT_KEY(time, time); +COMPARE_EVENT_KEY(count, stats.n); +COMPARE_EVENT_KEY(mean, stats.mean); + +#define DEF_SORT_NAME_KEY(name, compare_key) \ + { #name, compare_kvm_event_ ## compare_key } + +static struct kvm_event_key keys[] = { + DEF_SORT_NAME_KEY(sample, count), + DEF_SORT_NAME_KEY(time, mean), + { NULL, NULL } +}; + +static const char *sort_key = "sample"; +static key_cmp_fun compare; + +static bool select_key(void) +{ + int i; + + for (i = 0; keys[i].name; i++) { + if (!strcmp(keys[i].name, sort_key)) { + compare = keys[i].key; + return true; + } + } + + pr_err("Unknown compare key:%s\n", sort_key); + return false; +} + +static struct rb_root result; +static void insert_to_result(struct kvm_event *event, key_cmp_fun bigger, + int vcpu) +{ + struct rb_node **rb = &result.rb_node; + struct rb_node *parent = NULL; + struct kvm_event *p; + + while (*rb) { + p = container_of(*rb, struct kvm_event, rb); + parent = *rb; + + if (bigger(event, p, vcpu)) + rb = &(*rb)->rb_left; + else + rb = &(*rb)->rb_right; + } + + rb_link_node(&event->rb, parent, rb); + rb_insert_color(&event->rb, &result); +} + +static void update_total_count(struct kvm_event *event, int vcpu) +{ + total_count += get_event_count(event, vcpu); + total_time += get_event_time(event, vcpu); +} + +static bool event_is_valid(struct kvm_event *event, int vcpu) +{ + return !!get_event_count(event, vcpu); +} + +static void sort_result(int vcpu) +{ + unsigned int i; + struct kvm_event *event; + + for (i = 0; i < EVENTS_CACHE_SIZE; i++) + list_for_each_entry(event, &kvm_events_cache[i], hash_entry) + if (event_is_valid(event, vcpu)) { + update_total_count(event, vcpu); + insert_to_result(event, compare, vcpu); + } +} + +/* returns left most element of result, and erase it */ +static struct kvm_event *pop_from_result(void) +{ + struct rb_node *node = rb_first(&result); + + if (!node) + return NULL; + + rb_erase(node, &result); + return container_of(node, struct kvm_event, rb); +} + +static void print_vcpu_info(int vcpu) +{ + pr_info("Analyze events for "); + + if (vcpu == -1) + pr_info("all VCPUs:\n\n"); + else + pr_info("VCPU %d:\n\n", vcpu); +} + +static void print_result(int vcpu) +{ + char decode[20]; + struct kvm_event *event; + + pr_info("\n\n"); + print_vcpu_info(vcpu); + pr_info("%20s ", events_ops->name); + pr_info("%10s ", "Samples"); + pr_info("%9s ", "Samples%"); + + pr_info("%9s ", "Time%"); + pr_info("%16s ", "Avg time"); + pr_info("\n\n"); + + while ((event = pop_from_result())) { + u64 ecount, etime; + + ecount = get_event_count(event, vcpu); + etime = get_event_time(event, vcpu); + + events_ops->decode_key(&event->key, decode); + pr_info("%20s ", decode); + pr_info("%10llu ", (unsigned long long)ecount); + pr_info("%8.2f%% ", (double)ecount / total_count * 100); + pr_info("%8.2f%% ", (double)etime / total_time * 100); + pr_info("%9.2fus ( +-%7.2f%% )", (double)etime / ecount/1e3, + kvm_event_rel_stddev(vcpu, event)); + pr_info("\n"); + } + + pr_info("\nTotal Samples:%lld, Total events handled time:%.2fus.\n\n", + (unsigned long long)total_count, total_time / 1e3); +} + +static int process_sample_event(struct perf_tool *tool __maybe_unused, + union perf_event *event, + struct perf_sample *sample, + struct perf_evsel *evsel, + struct machine *machine) +{ + struct thread *thread = machine__findnew_thread(machine, sample->tid); + + if (thread == NULL) { + pr_debug("problem processing %d event, skipping it.\n", + event->header.type); + return -1; + } + + if (!handle_kvm_event(thread, evsel->tp_format, sample->raw_data, + sample->time)) + return -1; + + return 0; +} + +static struct perf_tool eops = { + .sample = process_sample_event, + .comm = perf_event__process_comm, + .ordered_samples = true, +}; + +static int get_cpu_isa(struct perf_session *session) +{ + char *cpuid; + int isa; + + cpuid = perf_header__read_feature(session, HEADER_CPUID); + + if (!cpuid) { + pr_err("read HEADER_CPUID failed.\n"); + return -ENOTSUP; + } + + if (strstr(cpuid, "Intel")) + isa = 1; + else if (strstr(cpuid, "AMD")) + isa = 0; + else { + pr_err("CPU %s is not supported.\n", cpuid); + isa = -ENOTSUP; + } + + free(cpuid); + return isa; +} + +static const char *file_name; + +static int read_events(void) +{ + struct perf_session *kvm_session; + int ret; + + kvm_session = perf_session__new(file_name, O_RDONLY, 0, false, &eops); + if (!kvm_session) { + pr_err("Initializing perf session failed\n"); + return -EINVAL; + } + + if (!perf_session__has_traces(kvm_session, "kvm record")) + return -EINVAL; + + /* + * Do not use 'isa' recorded in kvm_exit tracepoint since it is not + * traced in the old kernel. + */ + ret = get_cpu_isa(kvm_session); + + if (ret < 0) + return ret; + + cpu_isa = ret; + + return perf_session__process_events(kvm_session, &eops); +} + +static bool verify_vcpu(int vcpu) +{ + if (vcpu != -1 && vcpu < 0) { + pr_err("Invalid vcpu:%d.\n", vcpu); + return false; + } + + return true; +} + +static int kvm_events_report_vcpu(int vcpu) +{ + int ret = -EINVAL; + + if (!verify_vcpu(vcpu)) + goto exit; + + if (!select_key()) + goto exit; + + if (!register_kvm_events_ops()) + goto exit; + + init_kvm_event_record(); + setup_pager(); + + ret = read_events(); + if (ret) + goto exit; + + sort_result(vcpu); + print_result(vcpu); +exit: + return ret; +} + +static const char * const record_args[] = { + "record", + "-R", + "-f", + "-m", "1024", + "-c", "1", + "-e", "kvm:kvm_entry", + "-e", "kvm:kvm_exit", + "-e", "kvm:kvm_mmio", + "-e", "kvm:kvm_pio", +}; + +#define STRDUP_FAIL_EXIT(s) \ + ({ char *_p; \ + _p = strdup(s); \ + if (!_p) \ + return -ENOMEM; \ + _p; \ + }) + +static int kvm_events_record(int argc, const char **argv) +{ + unsigned int rec_argc, i, j; + const char **rec_argv; + + rec_argc = ARRAY_SIZE(record_args) + argc + 2; + rec_argv = calloc(rec_argc + 1, sizeof(char *)); + + if (rec_argv == NULL) + return -ENOMEM; + + for (i = 0; i < ARRAY_SIZE(record_args); i++) + rec_argv[i] = STRDUP_FAIL_EXIT(record_args[i]); + + rec_argv[i++] = STRDUP_FAIL_EXIT("-o"); + rec_argv[i++] = STRDUP_FAIL_EXIT(file_name); + + for (j = 1; j < (unsigned int)argc; j++, i++) + rec_argv[i] = argv[j]; + + return cmd_record(i, rec_argv, NULL); +} + +static const char * const kvm_events_report_usage[] = { + "perf kvm stat report []", + NULL +}; + +static const struct option kvm_events_report_options[] = { + OPT_STRING(0, "event", &report_event, "report event", + "event for reporting: vmexit, mmio, ioport"), + OPT_INTEGER(0, "vcpu", &trace_vcpu, + "vcpu id to report"), + OPT_STRING('k', "key", &sort_key, "sort-key", + "key for sorting: sample(sort by samples number)" + " time (sort by avg time)"), + OPT_END() +}; + +static int kvm_events_report(int argc, const char **argv) +{ + symbol__init(); + + if (argc) { + argc = parse_options(argc, argv, + kvm_events_report_options, + kvm_events_report_usage, 0); + if (argc) + usage_with_options(kvm_events_report_usage, + kvm_events_report_options); + } + + return kvm_events_report_vcpu(trace_vcpu); +} + +static void print_kvm_stat_usage(void) +{ + printf("Usage: perf kvm stat \n\n"); + + printf("# Available commands:\n"); + printf("\trecord: record kvm events\n"); + printf("\treport: report statistical data of kvm events\n"); + + printf("\nOtherwise, it is the alias of 'perf stat':\n"); +} + +static int kvm_cmd_stat(int argc, const char **argv) +{ + if (argc == 1) { + print_kvm_stat_usage(); + goto perf_stat; + } + + if (!strncmp(argv[1], "rec", 3)) + return kvm_events_record(argc - 1, argv + 1); + + if (!strncmp(argv[1], "rep", 3)) + return kvm_events_report(argc - 1 , argv + 1); + +perf_stat: + return cmd_stat(argc, argv, NULL); +} + static char name_buffer[256]; static const char * const kvm_usage[] = { - "perf kvm [] {top|record|report|diff|buildid-list}", + "perf kvm [] {top|record|report|diff|buildid-list|stat}", NULL }; @@ -135,6 +967,8 @@ int cmd_kvm(int argc, const char **argv, const char *prefix __maybe_unused) return cmd_top(argc, argv, NULL); else if (!strncmp(argv[0], "buildid-list", 12)) return __cmd_buildid_list(argc, argv); + else if (!strncmp(argv[0], "stat", 4)) + return kvm_cmd_stat(argc, argv); else usage_with_options(kvm_usage, kvm_options); diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index acbf6336199..ad72b2814ba 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -1673,6 +1673,11 @@ static int process_build_id(struct perf_file_section *section, return 0; } +static char *read_cpuid(struct perf_header *ph, int fd) +{ + return do_read_string(fd, ph); +} + static struct perf_evsel * perf_evlist__find_by_index(struct perf_evlist *evlist, int idx) { @@ -1726,6 +1731,7 @@ process_event_desc(struct perf_file_section *section __maybe_unused, struct feature_ops { int (*write)(int fd, struct perf_header *h, struct perf_evlist *evlist); void (*print)(struct perf_header *h, int fd, FILE *fp); + char *(*read)(struct perf_header *h, int fd); int (*process)(struct perf_file_section *section, struct perf_header *h, int feat, int fd, void *data); const char *name; @@ -1740,6 +1746,9 @@ struct feature_ops { #define FEAT_OPF(n, func) \ [n] = { .name = #n, .write = write_##func, .print = print_##func, \ .full_only = true } +#define FEAT_OPA_R(n, func) \ + [n] = { .name = #n, .write = write_##func, .print = print_##func, \ + .read = read_##func } /* feature_ops not implemented: */ #define print_tracing_data NULL @@ -1754,7 +1763,7 @@ static const struct feature_ops feat_ops[HEADER_LAST_FEATURE] = { FEAT_OPA(HEADER_ARCH, arch), FEAT_OPA(HEADER_NRCPUS, nrcpus), FEAT_OPA(HEADER_CPUDESC, cpudesc), - FEAT_OPA(HEADER_CPUID, cpuid), + FEAT_OPA_R(HEADER_CPUID, cpuid), FEAT_OPA(HEADER_TOTAL_MEM, total_mem), FEAT_OPP(HEADER_EVENT_DESC, event_desc), FEAT_OPA(HEADER_CMDLINE, cmdline), @@ -1809,6 +1818,54 @@ int perf_header__fprintf_info(struct perf_session *session, FILE *fp, bool full) return 0; } +struct header_read_data { + int feat; + char *result; +}; + +static int perf_file_section__read_feature(struct perf_file_section *section, + struct perf_header *ph, + int feat, int fd, void *data) +{ + struct header_read_data *hd = data; + + if (feat != hd->feat) + return 0; + + if (lseek(fd, section->offset, SEEK_SET) == (off_t)-1) { + pr_debug("Failed to lseek to %" PRIu64 " offset for feature " + "%d, continuing...\n", section->offset, feat); + return 0; + } + + if (feat >= HEADER_LAST_FEATURE) { + pr_warning("unknown feature %d\n", feat); + return 0; + } + + if (!feat_ops[feat].read) { + pr_warning("read is not supported for feature %d\n", feat); + return 0; + } + + hd->result = feat_ops[feat].read(ph, fd); + return 0; +} + +char *perf_header__read_feature(struct perf_session *session, int feat) +{ + struct perf_header *header = &session->header; + struct header_read_data hd; + int fd = session->fd; + + hd.feat = feat; + hd.result = NULL; + + perf_header__process_sections(header, fd, &hd, + perf_file_section__read_feature); + return hd.result; +} + static int do_write_feat(int fd, struct perf_header *h, int type, struct perf_file_section **p, struct perf_evlist *evlist) diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h index 209dad4fee2..58de08b21bc 100644 --- a/tools/perf/util/header.h +++ b/tools/perf/util/header.h @@ -94,6 +94,7 @@ int perf_header__process_sections(struct perf_header *header, int fd, int feat, int fd, void *data)); int perf_header__fprintf_info(struct perf_session *s, FILE *fp, bool full); +char *perf_header__read_feature(struct perf_session *session, int feat); int build_id_cache__add_s(const char *sbuild_id, const char *debugdir, const char *name, bool is_kallsyms, bool is_vdso); diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h index 70c2c13ff67..f66610b7bac 100644 --- a/tools/perf/util/thread.h +++ b/tools/perf/util/thread.h @@ -16,6 +16,8 @@ struct thread { bool comm_set; char *comm; int comm_len; + + void *priv; }; struct machine; From 14907e73830a256ad726a35a93789a9bc9595490 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 21 Sep 2012 16:10:26 -0300 Subject: [PATCH 239/282] perf kvm: Use perf_evsel__intval Using plain raw_field_value(evsel->tp_format) will look at the common fields as well, and since this tool doesn't need those, speed it up a bit by looking at just the event specific fields. Also in general use just evsel and sample, just like was done in 'perf sched'. v2: Fixed up test against evsel->name, that contains the subsys name too, by David Ahern. Cc: David Ahern Cc: Dong Hao Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Xiao Guangrong Cc: Runzhen Wang Link: http://lkml.kernel.org/n/tip-v9x3q9rv4caxtox7wtjpchq5@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-kvm.c | 130 ++++++++++++++++++++------------------- 1 file changed, 67 insertions(+), 63 deletions(-) diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c index 1878947df5c..3eb53e33c02 100644 --- a/tools/perf/builtin-kvm.c +++ b/tools/perf/builtin-kvm.c @@ -33,46 +33,49 @@ struct event_key { }; struct kvm_events_ops { - bool (*is_begin_event)(struct event_format *event, void *data, - struct event_key *key); - bool (*is_end_event)(struct event_format *event, void *data, - struct event_key *key); + bool (*is_begin_event)(struct perf_evsel *evsel, + struct perf_sample *sample, + struct event_key *key); + bool (*is_end_event)(struct perf_evsel *evsel, + struct perf_sample *sample, struct event_key *key); void (*decode_key)(struct event_key *key, char decode[20]); const char *name; }; -static void exit_event_get_key(struct event_format *event, void *data, - struct event_key *key) +static void exit_event_get_key(struct perf_evsel *evsel, + struct perf_sample *sample, + struct event_key *key) { key->info = 0; - key->key = raw_field_value(event, "exit_reason", data); + key->key = perf_evsel__intval(evsel, sample, "exit_reason"); } -static bool kvm_exit_event(struct event_format *event) +static bool kvm_exit_event(struct perf_evsel *evsel) { - return !strcmp(event->name, "kvm_exit"); + return !strcmp(evsel->name, "kvm:kvm_exit"); } -static bool exit_event_begin(struct event_format *event, void *data, - struct event_key *key) +static bool exit_event_begin(struct perf_evsel *evsel, + struct perf_sample *sample, struct event_key *key) { - if (kvm_exit_event(event)) { - exit_event_get_key(event, data, key); + if (kvm_exit_event(evsel)) { + exit_event_get_key(evsel, sample, key); return true; } return false; } -static bool kvm_entry_event(struct event_format *event) +static bool kvm_entry_event(struct perf_evsel *evsel) { - return !strcmp(event->name, "kvm_entry"); + return !strcmp(evsel->name, "kvm:kvm_entry"); } -static bool exit_event_end(struct event_format *event, void *data __maybe_unused, - struct event_key *key __maybe_unused) +static bool exit_event_end(struct perf_evsel *evsel, + struct perf_sample *sample __maybe_unused, + struct event_key *key __maybe_unused) { - return kvm_entry_event(event); + return kvm_entry_event(evsel); } struct exit_reasons_table { @@ -130,45 +133,45 @@ static struct kvm_events_ops exit_events = { * the time of MMIO write: kvm_mmio(KVM_TRACE_MMIO_WRITE...) -> kvm_entry * the time of MMIO read: kvm_exit -> kvm_mmio(KVM_TRACE_MMIO_READ...). */ -static void mmio_event_get_key(struct event_format *event, void *data, - struct event_key *key) +static void mmio_event_get_key(struct perf_evsel *evsel, struct perf_sample *sample, + struct event_key *key) { - key->key = raw_field_value(event, "gpa", data); - key->info = raw_field_value(event, "type", data); + key->key = perf_evsel__intval(evsel, sample, "gpa"); + key->info = perf_evsel__intval(evsel, sample, "type"); } #define KVM_TRACE_MMIO_READ_UNSATISFIED 0 #define KVM_TRACE_MMIO_READ 1 #define KVM_TRACE_MMIO_WRITE 2 -static bool mmio_event_begin(struct event_format *event, void *data, - struct event_key *key) +static bool mmio_event_begin(struct perf_evsel *evsel, + struct perf_sample *sample, struct event_key *key) { /* MMIO read begin event in kernel. */ - if (kvm_exit_event(event)) + if (kvm_exit_event(evsel)) return true; /* MMIO write begin event in kernel. */ - if (!strcmp(event->name, "kvm_mmio") && - raw_field_value(event, "type", data) == KVM_TRACE_MMIO_WRITE) { - mmio_event_get_key(event, data, key); + if (!strcmp(evsel->name, "kvm:kvm_mmio") && + perf_evsel__intval(evsel, sample, "type") == KVM_TRACE_MMIO_WRITE) { + mmio_event_get_key(evsel, sample, key); return true; } return false; } -static bool mmio_event_end(struct event_format *event, void *data, - struct event_key *key) +static bool mmio_event_end(struct perf_evsel *evsel, struct perf_sample *sample, + struct event_key *key) { /* MMIO write end event in kernel. */ - if (kvm_entry_event(event)) + if (kvm_entry_event(evsel)) return true; /* MMIO read end event in kernel.*/ - if (!strcmp(event->name, "kvm_mmio") && - raw_field_value(event, "type", data) == KVM_TRACE_MMIO_READ) { - mmio_event_get_key(event, data, key); + if (!strcmp(evsel->name, "kvm:kvm_mmio") && + perf_evsel__intval(evsel, sample, "type") == KVM_TRACE_MMIO_READ) { + mmio_event_get_key(evsel, sample, key); return true; } @@ -189,31 +192,31 @@ static struct kvm_events_ops mmio_events = { }; /* The time of emulation pio access is from kvm_pio to kvm_entry. */ -static void ioport_event_get_key(struct event_format *event, void *data, - struct event_key *key) +static void ioport_event_get_key(struct perf_evsel *evsel, + struct perf_sample *sample, + struct event_key *key) { - key->key = raw_field_value(event, "port", data); - key->info = raw_field_value(event, "rw", data); + key->key = perf_evsel__intval(evsel, sample, "port"); + key->info = perf_evsel__intval(evsel, sample, "rw"); } -static bool ioport_event_begin(struct event_format *event, void *data, - struct event_key *key) +static bool ioport_event_begin(struct perf_evsel *evsel, + struct perf_sample *sample, + struct event_key *key) { - if (!strcmp(event->name, "kvm_pio")) { - ioport_event_get_key(event, data, key); + if (!strcmp(evsel->name, "kvm:kvm_pio")) { + ioport_event_get_key(evsel, sample, key); return true; } return false; } -static bool ioport_event_end(struct event_format *event, void *data __maybe_unused, +static bool ioport_event_end(struct perf_evsel *evsel, + struct perf_sample *sample __maybe_unused, struct event_key *key __maybe_unused) { - if (kvm_entry_event(event)) - return true; - - return false; + return kvm_entry_event(evsel); } static void ioport_event_decode_key(struct event_key *key, char decode[20]) @@ -430,41 +433,43 @@ static bool handle_end_event(struct vcpu_event_record *vcpu_record, return update_kvm_event(event, vcpu_record->vcpu_id, time_diff); } -static struct vcpu_event_record -*per_vcpu_record(struct thread *thread, struct event_format *event, void *data) +static +struct vcpu_event_record *per_vcpu_record(struct thread *thread, + struct perf_evsel *evsel, + struct perf_sample *sample) { /* Only kvm_entry records vcpu id. */ - if (!thread->priv && kvm_entry_event(event)) { + if (!thread->priv && kvm_entry_event(evsel)) { struct vcpu_event_record *vcpu_record; - vcpu_record = zalloc(sizeof(struct vcpu_event_record)); + vcpu_record = zalloc(sizeof(*vcpu_record)); if (!vcpu_record) { - pr_err("Not enough memory\n"); + pr_err("%s: Not enough memory\n", __func__); return NULL; } - vcpu_record->vcpu_id = raw_field_value(event, "vcpu_id", data); + vcpu_record->vcpu_id = perf_evsel__intval(evsel, sample, "vcpu_id"); thread->priv = vcpu_record; } - return (struct vcpu_event_record *)thread->priv; + return thread->priv; } -static bool handle_kvm_event(struct thread *thread, struct event_format *event, - void *data, u64 timestamp) +static bool handle_kvm_event(struct thread *thread, struct perf_evsel *evsel, + struct perf_sample *sample) { struct vcpu_event_record *vcpu_record; struct event_key key = {.key = INVALID_KEY}; - vcpu_record = per_vcpu_record(thread, event, data); + vcpu_record = per_vcpu_record(thread, evsel, sample); if (!vcpu_record) return true; - if (events_ops->is_begin_event(event, data, &key)) - return handle_begin_event(vcpu_record, &key, timestamp); + if (events_ops->is_begin_event(evsel, sample, &key)) + return handle_begin_event(vcpu_record, &key, sample->time); - if (events_ops->is_end_event(event, data, &key)) - return handle_end_event(vcpu_record, &key, timestamp); + if (events_ops->is_end_event(evsel, sample, &key)) + return handle_end_event(vcpu_record, &key, sample->time); return true; } @@ -645,8 +650,7 @@ static int process_sample_event(struct perf_tool *tool __maybe_unused, return -1; } - if (!handle_kvm_event(thread, evsel->tp_format, sample->raw_data, - sample->time)) + if (!handle_kvm_event(thread, evsel, sample)) return -1; return 0; From 0f7d2f1b65e3415854bbe842705f698a3350d7da Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 24 Sep 2012 10:46:54 -0300 Subject: [PATCH 240/282] perf kmem: Use perf_evsel__intval and perf_session__set_tracepoints_handlers Following the model of 'perf sched': . raw_field_value searches first on the common fields, that are unused in this tool . Using perf_session__set_tracepoints_handlers will save all those strcmp to find the right handler at sample processing time, do it just once and get the handler from evsel->handler.func. Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-v9x3q9rv4caxtox7wtjpchq5@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-kmem.c | 88 +++++++++++++++++++-------------------- 1 file changed, 44 insertions(+), 44 deletions(-) diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c index f5f8a6b745a..bc912c68f49 100644 --- a/tools/perf/builtin-kmem.c +++ b/tools/perf/builtin-kmem.c @@ -1,6 +1,7 @@ #include "builtin.h" #include "perf.h" +#include "util/evlist.h" #include "util/evsel.h" #include "util/util.h" #include "util/cache.h" @@ -212,36 +213,38 @@ static int insert_caller_stat(unsigned long call_site, } static int perf_evsel__process_alloc_event(struct perf_evsel *evsel, - struct perf_sample *sample, int node) + struct perf_sample *sample) { - struct event_format *event = evsel->tp_format; - void *data = sample->raw_data; - unsigned long call_site; - unsigned long ptr; - int bytes_req, cpu = sample->cpu; - int bytes_alloc; - int node1, node2; + unsigned long ptr = perf_evsel__intval(evsel, sample, "ptr"), + call_site = perf_evsel__intval(evsel, sample, "call_site"); + int bytes_req = perf_evsel__intval(evsel, sample, "bytes_req"), + bytes_alloc = perf_evsel__intval(evsel, sample, "bytes_alloc"); - ptr = raw_field_value(event, "ptr", data); - call_site = raw_field_value(event, "call_site", data); - bytes_req = raw_field_value(event, "bytes_req", data); - bytes_alloc = raw_field_value(event, "bytes_alloc", data); - - if (insert_alloc_stat(call_site, ptr, bytes_req, bytes_alloc, cpu) || + if (insert_alloc_stat(call_site, ptr, bytes_req, bytes_alloc, sample->cpu) || insert_caller_stat(call_site, bytes_req, bytes_alloc)) return -1; total_requested += bytes_req; total_allocated += bytes_alloc; - if (node) { - node1 = cpunode_map[cpu]; - node2 = raw_field_value(event, "node", data); + nr_allocs++; + return 0; +} + +static int perf_evsel__process_alloc_node_event(struct perf_evsel *evsel, + struct perf_sample *sample) +{ + int ret = perf_evsel__process_alloc_event(evsel, sample); + + if (!ret) { + int node1 = cpunode_map[sample->cpu], + node2 = perf_evsel__intval(evsel, sample, "node"); + if (node1 != node2) nr_cross_allocs++; } - nr_allocs++; - return 0; + + return ret; } static int ptr_cmp(struct alloc_stat *, struct alloc_stat *); @@ -275,8 +278,7 @@ static struct alloc_stat *search_alloc_stat(unsigned long ptr, static int perf_evsel__process_free_event(struct perf_evsel *evsel, struct perf_sample *sample) { - unsigned long ptr = raw_field_value(evsel->tp_format, "ptr", - sample->raw_data); + unsigned long ptr = perf_evsel__intval(evsel, sample, "ptr"); struct alloc_stat *s_alloc, *s_caller; s_alloc = search_alloc_stat(ptr, 0, &root_alloc_stat, ptr_cmp); @@ -297,28 +299,8 @@ static int perf_evsel__process_free_event(struct perf_evsel *evsel, return 0; } -static int perf_evsel__process_kmem_event(struct perf_evsel *evsel, - struct perf_sample *sample) -{ - struct event_format *event = evsel->tp_format; - - if (!strcmp(event->name, "kmalloc") || - !strcmp(event->name, "kmem_cache_alloc")) { - return perf_evsel__process_alloc_event(evsel, sample, 0); - } - - if (!strcmp(event->name, "kmalloc_node") || - !strcmp(event->name, "kmem_cache_alloc_node")) { - return perf_evsel__process_alloc_event(evsel, sample, 1); - } - - if (!strcmp(event->name, "kfree") || - !strcmp(event->name, "kmem_cache_free")) { - return perf_evsel__process_free_event(evsel, sample); - } - - return 0; -} +typedef int (*tracepoint_handler)(struct perf_evsel *evsel, + struct perf_sample *sample); static int process_sample_event(struct perf_tool *tool __maybe_unused, union perf_event *event, @@ -336,7 +318,12 @@ static int process_sample_event(struct perf_tool *tool __maybe_unused, dump_printf(" ... thread: %s:%d\n", thread->comm, thread->pid); - return perf_evsel__process_kmem_event(evsel, sample); + if (evsel->handler.func != NULL) { + tracepoint_handler f = evsel->handler.func; + return f(evsel, sample); + } + + return 0; } static struct perf_tool perf_kmem = { @@ -498,6 +485,14 @@ static int __cmd_kmem(void) { int err = -EINVAL; struct perf_session *session; + const struct perf_evsel_str_handler kmem_tracepoints[] = { + { "kmem:kmalloc", perf_evsel__process_alloc_event, }, + { "kmem:kmem_cache_alloc", perf_evsel__process_alloc_event, }, + { "kmem:kmalloc_node", perf_evsel__process_alloc_node_event, }, + { "kmem:kmem_cache_alloc_node", perf_evsel__process_alloc_node_event, }, + { "kmem:kfree", perf_evsel__process_free_event, }, + { "kmem:kmem_cache_free", perf_evsel__process_free_event, }, + }; session = perf_session__new(input_name, O_RDONLY, 0, false, &perf_kmem); if (session == NULL) @@ -509,6 +504,11 @@ static int __cmd_kmem(void) if (!perf_session__has_traces(session, "kmem record")) goto out_delete; + if (perf_session__set_tracepoints_handlers(session, kmem_tracepoints)) { + pr_err("Initializing perf session tracepoint handlers failed\n"); + return -1; + } + setup_pager(); err = perf_session__process_events(session, &perf_kmem); if (err != 0) From 746f16ec6ae370d58ecf4370c9955bd6f35d44a3 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 24 Sep 2012 10:52:12 -0300 Subject: [PATCH 241/282] perf lock: Use perf_evsel__intval and perf_session__set_tracepoints_handlers Following the model of 'perf sched': . raw_field_value searches first on the common fields, that are unused in this tool . Leave using perf_evsel__intval to the actual handlers, some may not need to incur some of the cost because they may not need all the fields values. . Using perf_session__set_tracepoints_handlers will save all those strcmp to find the right handler at sample processing time, do it just once and get the handler from evsel->handler.func. Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-v9x3q9rv4caxtox7wtjpchq5@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-lock.c | 233 ++++++++++++++------------------------ 1 file changed, 87 insertions(+), 146 deletions(-) diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c index a8035207a3d..7d6e0994988 100644 --- a/tools/perf/builtin-lock.c +++ b/tools/perf/builtin-lock.c @@ -1,6 +1,7 @@ #include "builtin.h" #include "perf.h" +#include "util/evlist.h" #include "util/evsel.h" #include "util/util.h" #include "util/cache.h" @@ -41,7 +42,7 @@ struct lock_stat { struct rb_node rb; /* used for sorting */ /* - * FIXME: raw_field_value() returns unsigned long long, + * FIXME: perf_evsel__intval() returns u64, * so address of lockdep_map should be dealed as 64bit. * Is there more better solution? */ @@ -336,44 +337,18 @@ alloc_failed: static const char *input_name; -struct raw_event_sample { - u32 size; - char data[0]; -}; - -struct trace_acquire_event { - void *addr; - const char *name; - int flag; -}; - -struct trace_acquired_event { - void *addr; - const char *name; -}; - -struct trace_contended_event { - void *addr; - const char *name; -}; - -struct trace_release_event { - void *addr; - const char *name; -}; - struct trace_lock_handler { - int (*acquire_event)(struct trace_acquire_event *, - const struct perf_sample *sample); + int (*acquire_event)(struct perf_evsel *evsel, + struct perf_sample *sample); - int (*acquired_event)(struct trace_acquired_event *, - const struct perf_sample *sample); + int (*acquired_event)(struct perf_evsel *evsel, + struct perf_sample *sample); - int (*contended_event)(struct trace_contended_event *, - const struct perf_sample *sample); + int (*contended_event)(struct perf_evsel *evsel, + struct perf_sample *sample); - int (*release_event)(struct trace_release_event *, - const struct perf_sample *sample); + int (*release_event)(struct perf_evsel *evsel, + struct perf_sample *sample); }; static struct lock_seq_stat *get_seq(struct thread_stat *ts, void *addr) @@ -412,15 +387,20 @@ enum acquire_flags { READ_LOCK = 2, }; -static int -report_lock_acquire_event(struct trace_acquire_event *acquire_event, - const struct perf_sample *sample) +static int report_lock_acquire_event(struct perf_evsel *evsel, + struct perf_sample *sample) { + void *addr; struct lock_stat *ls; struct thread_stat *ts; struct lock_seq_stat *seq; + const char *name = perf_evsel__strval(evsel, sample, "name"); + u64 tmp = perf_evsel__intval(evsel, sample, "lockdep_addr"); + int flag = perf_evsel__intval(evsel, sample, "flag"); - ls = lock_stat_findnew(acquire_event->addr, acquire_event->name); + memcpy(&addr, &tmp, sizeof(void *)); + + ls = lock_stat_findnew(addr, name); if (!ls) return -1; if (ls->discard) @@ -430,19 +410,19 @@ report_lock_acquire_event(struct trace_acquire_event *acquire_event, if (!ts) return -1; - seq = get_seq(ts, acquire_event->addr); + seq = get_seq(ts, addr); if (!seq) return -1; switch (seq->state) { case SEQ_STATE_UNINITIALIZED: case SEQ_STATE_RELEASED: - if (!acquire_event->flag) { + if (!flag) { seq->state = SEQ_STATE_ACQUIRING; } else { - if (acquire_event->flag & TRY_LOCK) + if (flag & TRY_LOCK) ls->nr_trylock++; - if (acquire_event->flag & READ_LOCK) + if (flag & READ_LOCK) ls->nr_readlock++; seq->state = SEQ_STATE_READ_ACQUIRED; seq->read_count = 1; @@ -450,7 +430,7 @@ report_lock_acquire_event(struct trace_acquire_event *acquire_event, } break; case SEQ_STATE_READ_ACQUIRED: - if (acquire_event->flag & READ_LOCK) { + if (flag & READ_LOCK) { seq->read_count++; ls->nr_acquired++; goto end; @@ -480,17 +460,20 @@ end: return 0; } -static int -report_lock_acquired_event(struct trace_acquired_event *acquired_event, - const struct perf_sample *sample) +static int report_lock_acquired_event(struct perf_evsel *evsel, + struct perf_sample *sample) { - u64 timestamp = sample->time; + void *addr; struct lock_stat *ls; struct thread_stat *ts; struct lock_seq_stat *seq; u64 contended_term; + const char *name = perf_evsel__strval(evsel, sample, "name"); + u64 tmp = perf_evsel__intval(evsel, sample, "lockdep_addr"); - ls = lock_stat_findnew(acquired_event->addr, acquired_event->name); + memcpy(&addr, &tmp, sizeof(void *)); + + ls = lock_stat_findnew(addr, name); if (!ls) return -1; if (ls->discard) @@ -500,7 +483,7 @@ report_lock_acquired_event(struct trace_acquired_event *acquired_event, if (!ts) return -1; - seq = get_seq(ts, acquired_event->addr); + seq = get_seq(ts, addr); if (!seq) return -1; @@ -511,7 +494,7 @@ report_lock_acquired_event(struct trace_acquired_event *acquired_event, case SEQ_STATE_ACQUIRING: break; case SEQ_STATE_CONTENDED: - contended_term = timestamp - seq->prev_event_time; + contended_term = sample->time - seq->prev_event_time; ls->wait_time_total += contended_term; if (contended_term < ls->wait_time_min) ls->wait_time_min = contended_term; @@ -536,20 +519,24 @@ report_lock_acquired_event(struct trace_acquired_event *acquired_event, seq->state = SEQ_STATE_ACQUIRED; ls->nr_acquired++; - seq->prev_event_time = timestamp; + seq->prev_event_time = sample->time; end: return 0; } -static int -report_lock_contended_event(struct trace_contended_event *contended_event, - const struct perf_sample *sample) +static int report_lock_contended_event(struct perf_evsel *evsel, + struct perf_sample *sample) { + void *addr; struct lock_stat *ls; struct thread_stat *ts; struct lock_seq_stat *seq; + const char *name = perf_evsel__strval(evsel, sample, "name"); + u64 tmp = perf_evsel__intval(evsel, sample, "lockdep_addr"); - ls = lock_stat_findnew(contended_event->addr, contended_event->name); + memcpy(&addr, &tmp, sizeof(void *)); + + ls = lock_stat_findnew(addr, name); if (!ls) return -1; if (ls->discard) @@ -559,7 +546,7 @@ report_lock_contended_event(struct trace_contended_event *contended_event, if (!ts) return -1; - seq = get_seq(ts, contended_event->addr); + seq = get_seq(ts, addr); if (!seq) return -1; @@ -592,15 +579,19 @@ end: return 0; } -static int -report_lock_release_event(struct trace_release_event *release_event, - const struct perf_sample *sample) +static int report_lock_release_event(struct perf_evsel *evsel, + struct perf_sample *sample) { + void *addr; struct lock_stat *ls; struct thread_stat *ts; struct lock_seq_stat *seq; + const char *name = perf_evsel__strval(evsel, sample, "name"); + u64 tmp = perf_evsel__intval(evsel, sample, "lockdep_addr"); - ls = lock_stat_findnew(release_event->addr, release_event->name); + memcpy(&addr, &tmp, sizeof(void *)); + + ls = lock_stat_findnew(addr, name); if (!ls) return -1; if (ls->discard) @@ -610,7 +601,7 @@ report_lock_release_event(struct trace_release_event *release_event, if (!ts) return -1; - seq = get_seq(ts, release_event->addr); + seq = get_seq(ts, addr); if (!seq) return -1; @@ -663,96 +654,33 @@ static struct trace_lock_handler *trace_handler; static int perf_evsel__process_lock_acquire(struct perf_evsel *evsel, struct perf_sample *sample) { - struct trace_acquire_event acquire_event; - struct event_format *event = evsel->tp_format; - void *data = sample->raw_data; - u64 tmp; /* this is required for casting... */ - int rc = 0; - - tmp = raw_field_value(event, "lockdep_addr", data); - memcpy(&acquire_event.addr, &tmp, sizeof(void *)); - acquire_event.name = (char *)raw_field_ptr(event, "name", data); - acquire_event.flag = (int)raw_field_value(event, "flag", data); - if (trace_handler->acquire_event) - rc = trace_handler->acquire_event(&acquire_event, sample); - - return rc; + return trace_handler->acquire_event(evsel, sample); + return 0; } static int perf_evsel__process_lock_acquired(struct perf_evsel *evsel, struct perf_sample *sample) { - struct trace_acquired_event acquired_event; - struct event_format *event = evsel->tp_format; - void *data = sample->raw_data; - u64 tmp; /* this is required for casting... */ - int rc = 0; - - tmp = raw_field_value(event, "lockdep_addr", data); - memcpy(&acquired_event.addr, &tmp, sizeof(void *)); - acquired_event.name = (char *)raw_field_ptr(event, "name", data); - if (trace_handler->acquired_event) - rc = trace_handler->acquired_event(&acquired_event, sample); - - return rc; + return trace_handler->acquired_event(evsel, sample); + return 0; } static int perf_evsel__process_lock_contended(struct perf_evsel *evsel, - struct perf_sample *sample) + struct perf_sample *sample) { - struct trace_contended_event contended_event; - struct event_format *event = evsel->tp_format; - void *data = sample->raw_data; - u64 tmp; /* this is required for casting... */ - int rc = 0; - - tmp = raw_field_value(event, "lockdep_addr", data); - memcpy(&contended_event.addr, &tmp, sizeof(void *)); - contended_event.name = (char *)raw_field_ptr(event, "name", data); - if (trace_handler->contended_event) - rc = trace_handler->contended_event(&contended_event, sample); - - return rc; + return trace_handler->contended_event(evsel, sample); + return 0; } static int perf_evsel__process_lock_release(struct perf_evsel *evsel, - struct perf_sample *sample) + struct perf_sample *sample) { - struct trace_release_event release_event; - struct event_format *event = evsel->tp_format; - void *data = sample->raw_data; - u64 tmp; /* this is required for casting... */ - int rc = 0; - - tmp = raw_field_value(event, "lockdep_addr", data); - memcpy(&release_event.addr, &tmp, sizeof(void *)); - release_event.name = (char *)raw_field_ptr(event, "name", data); - if (trace_handler->release_event) - rc = trace_handler->release_event(&release_event, sample); - - return rc; -} - -static int perf_evsel__process_lock_event(struct perf_evsel *evsel, - struct perf_sample *sample) -{ - struct event_format *event = evsel->tp_format; - int rc = 0; - - if (!strcmp(event->name, "lock_acquire")) - rc = perf_evsel__process_lock_acquire(evsel, sample); - if (!strcmp(event->name, "lock_acquired")) - rc = perf_evsel__process_lock_acquired(evsel, sample); - if (!strcmp(event->name, "lock_contended")) - rc = perf_evsel__process_lock_contended(evsel, sample); - if (!strcmp(event->name, "lock_release")) - rc = perf_evsel__process_lock_release(evsel, sample); - - return rc; + return trace_handler->release_event(evsel, sample); + return 0; } static void print_bad_events(int bad, int total) @@ -870,6 +798,9 @@ static int dump_info(void) return rc; } +typedef int (*tracepoint_handler)(struct perf_evsel *evsel, + struct perf_sample *sample); + static int process_sample_event(struct perf_tool *tool __maybe_unused, union perf_event *event, struct perf_sample *sample, @@ -884,7 +815,12 @@ static int process_sample_event(struct perf_tool *tool __maybe_unused, return -1; } - return perf_evsel__process_lock_event(evsel, sample); + if (evsel->handler.func != NULL) { + tracepoint_handler f = evsel->handler.func; + return f(evsel, sample); + } + + return 0; } static struct perf_tool eops = { @@ -893,6 +829,13 @@ static struct perf_tool eops = { .ordered_samples = true, }; +static const struct perf_evsel_str_handler lock_tracepoints[] = { + { "lock:lock_acquire", perf_evsel__process_lock_acquire, }, /* CONFIG_LOCKDEP */ + { "lock:lock_acquired", perf_evsel__process_lock_acquired, }, /* CONFIG_LOCKDEP, CONFIG_LOCK_STAT */ + { "lock:lock_contended", perf_evsel__process_lock_contended, }, /* CONFIG_LOCKDEP, CONFIG_LOCK_STAT */ + { "lock:lock_release", perf_evsel__process_lock_release, }, /* CONFIG_LOCKDEP */ +}; + static int read_events(void) { session = perf_session__new(input_name, O_RDONLY, 0, false, &eops); @@ -901,6 +844,11 @@ static int read_events(void) return -1; } + if (perf_session__set_tracepoints_handlers(session, lock_tracepoints)) { + pr_err("Initializing perf session tracepoint handlers failed\n"); + return -1; + } + return perf_session__process_events(session, &eops); } @@ -967,13 +915,6 @@ static const struct option lock_options[] = { OPT_END() }; -static const char * const lock_tracepoints[] = { - "lock:lock_acquire", /* CONFIG_LOCKDEP */ - "lock:lock_acquired", /* CONFIG_LOCKDEP, CONFIG_LOCK_STAT */ - "lock:lock_contended", /* CONFIG_LOCKDEP, CONFIG_LOCK_STAT */ - "lock:lock_release", /* CONFIG_LOCKDEP */ -}; - static const char *record_args[] = { "record", "-R", @@ -988,10 +929,10 @@ static int __cmd_record(int argc, const char **argv) const char **rec_argv; for (i = 0; i < ARRAY_SIZE(lock_tracepoints); i++) { - if (!is_valid_tracepoint(lock_tracepoints[i])) { + if (!is_valid_tracepoint(lock_tracepoints[i].name)) { pr_err("tracepoint %s is not enabled. " "Are CONFIG_LOCKDEP and CONFIG_LOCK_STAT enabled?\n", - lock_tracepoints[i]); + lock_tracepoints[i].name); return 1; } } @@ -1009,7 +950,7 @@ static int __cmd_record(int argc, const char **argv) for (j = 0; j < ARRAY_SIZE(lock_tracepoints); j++) { rec_argv[i++] = "-e"; - rec_argv[i++] = strdup(lock_tracepoints[j]); + rec_argv[i++] = strdup(lock_tracepoints[j].name); } for (j = 1; j < (unsigned int)argc; j++, i++) From e0dcd6fb2564c20e53ee8b7d2884a7e375fe9e75 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 24 Sep 2012 11:16:40 -0300 Subject: [PATCH 242/282] perf timechart: Use zalloc and fix a couple leaks Use zalloc for the malloc+memset open coded sequence. Fix leak on the #ifdef'ed C state handling and when detecting invalid data in p_state_change(). Cc: Arjan van de Ven Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-v9x3q9rv4caxtox7wtjpchq5@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-timechart.c | 42 ++++++++++++---------------------- 1 file changed, 15 insertions(+), 27 deletions(-) diff --git a/tools/perf/builtin-timechart.c b/tools/perf/builtin-timechart.c index 55a3a6c6b9e..b1a8a3b841c 100644 --- a/tools/perf/builtin-timechart.c +++ b/tools/perf/builtin-timechart.c @@ -168,9 +168,8 @@ static struct per_pid *find_create_pid(int pid) return cursor; cursor = cursor->next; } - cursor = malloc(sizeof(struct per_pid)); + cursor = zalloc(sizeof(*cursor)); assert(cursor != NULL); - memset(cursor, 0, sizeof(struct per_pid)); cursor->pid = pid; cursor->next = all_data; all_data = cursor; @@ -195,9 +194,8 @@ static void pid_set_comm(int pid, char *comm) } c = c->next; } - c = malloc(sizeof(struct per_pidcomm)); + c = zalloc(sizeof(*c)); assert(c != NULL); - memset(c, 0, sizeof(struct per_pidcomm)); c->comm = strdup(comm); p->current = c; c->next = p->all; @@ -239,17 +237,15 @@ pid_put_sample(int pid, int type, unsigned int cpu, u64 start, u64 end) p = find_create_pid(pid); c = p->current; if (!c) { - c = malloc(sizeof(struct per_pidcomm)); + c = zalloc(sizeof(*c)); assert(c != NULL); - memset(c, 0, sizeof(struct per_pidcomm)); p->current = c; c->next = p->all; p->all = c; } - sample = malloc(sizeof(struct cpu_sample)); + sample = zalloc(sizeof(*sample)); assert(sample != NULL); - memset(sample, 0, sizeof(struct cpu_sample)); sample->start_time = start; sample->end_time = end; sample->type = type; @@ -373,11 +369,10 @@ static void c_state_start(int cpu, u64 timestamp, int state) static void c_state_end(int cpu, u64 timestamp) { - struct power_event *pwr; - pwr = malloc(sizeof(struct power_event)); + struct power_event *pwr = zalloc(sizeof(*pwr)); + if (!pwr) return; - memset(pwr, 0, sizeof(struct power_event)); pwr->state = cpus_cstate_state[cpu]; pwr->start_time = cpus_cstate_start_times[cpu]; @@ -392,14 +387,13 @@ static void c_state_end(int cpu, u64 timestamp) static void p_state_change(int cpu, u64 timestamp, u64 new_freq) { struct power_event *pwr; - pwr = malloc(sizeof(struct power_event)); if (new_freq > 8000000) /* detect invalid data */ return; + pwr = zalloc(sizeof(*pwr)); if (!pwr) return; - memset(pwr, 0, sizeof(struct power_event)); pwr->state = cpus_pstate_state[cpu]; pwr->start_time = cpus_pstate_start_times[cpu]; @@ -429,15 +423,13 @@ static void p_state_change(int cpu, u64 timestamp, u64 new_freq) static void sched_wakeup(int cpu, u64 timestamp, int pid, struct trace_entry *te) { - struct wake_event *we; struct per_pid *p; struct wakeup_entry *wake = (void *)te; + struct wake_event *we = zalloc(sizeof(*we)); - we = malloc(sizeof(struct wake_event)); if (!we) return; - memset(we, 0, sizeof(struct wake_event)); we->time = timestamp; we->waker = pid; @@ -579,13 +571,12 @@ static void end_sample_processing(void) struct power_event *pwr; for (cpu = 0; cpu <= numcpus; cpu++) { - pwr = malloc(sizeof(struct power_event)); - if (!pwr) - return; - memset(pwr, 0, sizeof(struct power_event)); - /* C state */ #if 0 + pwr = zalloc(sizeof(*pwr)); + if (!pwr) + return; + pwr->state = cpus_cstate_state[cpu]; pwr->start_time = cpus_cstate_start_times[cpu]; pwr->end_time = last_time; @@ -597,10 +588,9 @@ static void end_sample_processing(void) #endif /* P state */ - pwr = malloc(sizeof(struct power_event)); + pwr = zalloc(sizeof(*pwr)); if (!pwr) return; - memset(pwr, 0, sizeof(struct power_event)); pwr->state = cpus_pstate_state[cpu]; pwr->start_time = cpus_pstate_start_times[cpu]; @@ -830,11 +820,9 @@ static void draw_process_bars(void) static void add_process_filter(const char *string) { - struct process_filter *filt; - int pid; + int pid = strtoull(string, NULL, 10); + struct process_filter *filt = malloc(sizeof(*filt)); - pid = strtoull(string, NULL, 10); - filt = malloc(sizeof(struct process_filter)); if (!filt) return; From e93699b3c7aeb13eee473b1dc36cbe3a8a0ca397 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 24 Sep 2012 17:14:58 +0900 Subject: [PATCH 243/282] perf header: Add struct perf_session_env The struct perf_session_env will preserve environment information at the time of perf record. It can be accessed anytime after parsing a perf.data file if needed. Signed-off-by: Namhyung Kim Cc: David Ahern Cc: Ingo Molnar Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1348474503-15070-2-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/header.h | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h index 58de08b21bc..5867c7d74f9 100644 --- a/tools/perf/util/header.h +++ b/tools/perf/util/header.h @@ -58,6 +58,29 @@ struct perf_header; int perf_file_header__read(struct perf_file_header *header, struct perf_header *ph, int fd); +struct perf_session_env { + char *hostname; + char *os_release; + char *version; + char *arch; + int nr_cpus_online; + int nr_cpus_avail; + char *cpu_desc; + char *cpuid; + unsigned long long total_mem; + + int nr_cmdline; + char *cmdline; + int nr_sibling_cores; + char *sibling_cores; + int nr_sibling_threads; + char *sibling_threads; + int nr_numa_nodes; + char *numa_nodes; + int nr_pmu_mappings; + char *pmu_mappings; +}; + struct perf_header { int frozen; bool needs_swap; @@ -67,6 +90,7 @@ struct perf_header { u64 event_offset; u64 event_size; DECLARE_BITMAP(adds_features, HEADER_FEAT_BITS); + struct perf_session_env env; }; struct perf_evlist; From a1ae565528757f26d315897929b6fe58c5647915 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 24 Sep 2012 17:14:59 +0900 Subject: [PATCH 244/282] perf header: Add ->process callbacks to most of features From now on each feature information is processed and saved in perf header so that it can be used wherever needed. The BRANCH_STACK feature is an exception since it needs nothing to be done. Signed-off-by: Namhyung Kim Cc: David Ahern Cc: Ingo Molnar Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Robert Richter Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1348474503-15070-3-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/header.c | 319 +++++++++++++++++++++++++++++++++++++-- 1 file changed, 308 insertions(+), 11 deletions(-) diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index ad72b2814ba..d74b58d4105 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -22,6 +22,7 @@ #include "cpumap.h" #include "pmu.h" #include "vdso.h" +#include "strbuf.h" static bool no_buildid_cache = false; @@ -1673,6 +1674,99 @@ static int process_build_id(struct perf_file_section *section, return 0; } +static int process_hostname(struct perf_file_section *section __maybe_unused, + struct perf_header *ph, int feat __maybe_unused, + int fd, void *data __maybe_unused) +{ + ph->env.hostname = do_read_string(fd, ph); + return ph->env.hostname ? 0 : -ENOMEM; +} + +static int process_osrelease(struct perf_file_section *section __maybe_unused, + struct perf_header *ph, int feat __maybe_unused, + int fd, void *data __maybe_unused) +{ + ph->env.os_release = do_read_string(fd, ph); + return ph->env.os_release ? 0 : -ENOMEM; +} + +static int process_version(struct perf_file_section *section __maybe_unused, + struct perf_header *ph, int feat __maybe_unused, + int fd, void *data __maybe_unused) +{ + ph->env.version = do_read_string(fd, ph); + return ph->env.version ? 0 : -ENOMEM; +} + +static int process_arch(struct perf_file_section *section __maybe_unused, + struct perf_header *ph, int feat __maybe_unused, + int fd, void *data __maybe_unused) +{ + ph->env.arch = do_read_string(fd, ph); + return ph->env.arch ? 0 : -ENOMEM; +} + +static int process_nrcpus(struct perf_file_section *section __maybe_unused, + struct perf_header *ph, int feat __maybe_unused, + int fd, void *data __maybe_unused) +{ + size_t ret; + u32 nr; + + ret = read(fd, &nr, sizeof(nr)); + if (ret != sizeof(nr)) + return -1; + + if (ph->needs_swap) + nr = bswap_32(nr); + + ph->env.nr_cpus_online = nr; + + ret = read(fd, &nr, sizeof(nr)); + if (ret != sizeof(nr)) + return -1; + + if (ph->needs_swap) + nr = bswap_32(nr); + + ph->env.nr_cpus_avail = nr; + return 0; +} + +static int process_cpudesc(struct perf_file_section *section __maybe_unused, + struct perf_header *ph, int feat __maybe_unused, + int fd, void *data __maybe_unused) +{ + ph->env.cpu_desc = do_read_string(fd, ph); + return ph->env.cpu_desc ? 0 : -ENOMEM; +} + +static int process_cpuid(struct perf_file_section *section __maybe_unused, + struct perf_header *ph, int feat __maybe_unused, + int fd, void *data __maybe_unused) +{ + ph->env.cpuid = do_read_string(fd, ph); + return ph->env.cpuid ? 0 : -ENOMEM; +} + +static int process_total_mem(struct perf_file_section *section __maybe_unused, + struct perf_header *ph, int feat __maybe_unused, + int fd, void *data __maybe_unused) +{ + uint64_t mem; + size_t ret; + + ret = read(fd, &mem, sizeof(mem)); + if (ret != sizeof(mem)) + return -1; + + if (ph->needs_swap) + mem = bswap_64(mem); + + ph->env.total_mem = mem; + return 0; +} + static char *read_cpuid(struct perf_header *ph, int fd) { return do_read_string(fd, ph); @@ -1728,6 +1822,208 @@ process_event_desc(struct perf_file_section *section __maybe_unused, return 0; } +static int process_cmdline(struct perf_file_section *section __maybe_unused, + struct perf_header *ph, int feat __maybe_unused, + int fd, void *data __maybe_unused) +{ + size_t ret; + char *str; + u32 nr, i; + struct strbuf sb; + + ret = read(fd, &nr, sizeof(nr)); + if (ret != sizeof(nr)) + return -1; + + if (ph->needs_swap) + nr = bswap_32(nr); + + ph->env.nr_cmdline = nr; + strbuf_init(&sb, 128); + + for (i = 0; i < nr; i++) { + str = do_read_string(fd, ph); + if (!str) + goto error; + + /* include a NULL character at the end */ + strbuf_add(&sb, str, strlen(str) + 1); + free(str); + } + ph->env.cmdline = strbuf_detach(&sb, NULL); + return 0; + +error: + strbuf_release(&sb); + return -1; +} + +static int process_cpu_topology(struct perf_file_section *section __maybe_unused, + struct perf_header *ph, int feat __maybe_unused, + int fd, void *data __maybe_unused) +{ + size_t ret; + u32 nr, i; + char *str; + struct strbuf sb; + + ret = read(fd, &nr, sizeof(nr)); + if (ret != sizeof(nr)) + return -1; + + if (ph->needs_swap) + nr = bswap_32(nr); + + ph->env.nr_sibling_cores = nr; + strbuf_init(&sb, 128); + + for (i = 0; i < nr; i++) { + str = do_read_string(fd, ph); + if (!str) + goto error; + + /* include a NULL character at the end */ + strbuf_add(&sb, str, strlen(str) + 1); + free(str); + } + ph->env.sibling_cores = strbuf_detach(&sb, NULL); + + ret = read(fd, &nr, sizeof(nr)); + if (ret != sizeof(nr)) + return -1; + + if (ph->needs_swap) + nr = bswap_32(nr); + + ph->env.nr_sibling_threads = nr; + + for (i = 0; i < nr; i++) { + str = do_read_string(fd, ph); + if (!str) + goto error; + + /* include a NULL character at the end */ + strbuf_add(&sb, str, strlen(str) + 1); + free(str); + } + ph->env.sibling_threads = strbuf_detach(&sb, NULL); + return 0; + +error: + strbuf_release(&sb); + return -1; +} + +static int process_numa_topology(struct perf_file_section *section __maybe_unused, + struct perf_header *ph, int feat __maybe_unused, + int fd, void *data __maybe_unused) +{ + size_t ret; + u32 nr, node, i; + char *str; + uint64_t mem_total, mem_free; + struct strbuf sb; + + /* nr nodes */ + ret = read(fd, &nr, sizeof(nr)); + if (ret != sizeof(nr)) + goto error; + + if (ph->needs_swap) + nr = bswap_32(nr); + + ph->env.nr_numa_nodes = nr; + strbuf_init(&sb, 256); + + for (i = 0; i < nr; i++) { + /* node number */ + ret = read(fd, &node, sizeof(node)); + if (ret != sizeof(node)) + goto error; + + ret = read(fd, &mem_total, sizeof(u64)); + if (ret != sizeof(u64)) + goto error; + + ret = read(fd, &mem_free, sizeof(u64)); + if (ret != sizeof(u64)) + goto error; + + if (ph->needs_swap) { + node = bswap_32(node); + mem_total = bswap_64(mem_total); + mem_free = bswap_64(mem_free); + } + + strbuf_addf(&sb, "%u:%"PRIu64":%"PRIu64":", + node, mem_total, mem_free); + + str = do_read_string(fd, ph); + if (!str) + goto error; + + /* include a NULL character at the end */ + strbuf_add(&sb, str, strlen(str) + 1); + free(str); + } + ph->env.numa_nodes = strbuf_detach(&sb, NULL); + return 0; + +error: + strbuf_release(&sb); + return -1; +} + +static int process_pmu_mappings(struct perf_file_section *section __maybe_unused, + struct perf_header *ph, int feat __maybe_unused, + int fd, void *data __maybe_unused) +{ + size_t ret; + char *name; + u32 pmu_num; + u32 type; + struct strbuf sb; + + ret = read(fd, &pmu_num, sizeof(pmu_num)); + if (ret != sizeof(pmu_num)) + return -1; + + if (ph->needs_swap) + pmu_num = bswap_32(pmu_num); + + if (!pmu_num) { + pr_debug("pmu mappings not available\n"); + return 0; + } + + ph->env.nr_pmu_mappings = pmu_num; + strbuf_init(&sb, 128); + + while (pmu_num) { + if (read(fd, &type, sizeof(type)) != sizeof(type)) + goto error; + if (ph->needs_swap) + type = bswap_32(type); + + name = do_read_string(fd, ph); + if (!name) + goto error; + + strbuf_addf(&sb, "%u:%s", type, name); + /* include a NULL character at the end */ + strbuf_add(&sb, "", 1); + + free(name); + pmu_num--; + } + ph->env.pmu_mappings = strbuf_detach(&sb, NULL); + return 0; + +error: + strbuf_release(&sb); + return -1; +} + struct feature_ops { int (*write)(int fd, struct perf_header *h, struct perf_evlist *evlist); void (*print)(struct perf_header *h, int fd, FILE *fp); @@ -1745,10 +2041,11 @@ struct feature_ops { .process = process_##func } #define FEAT_OPF(n, func) \ [n] = { .name = #n, .write = write_##func, .print = print_##func, \ - .full_only = true } + .process = process_##func, .full_only = true } #define FEAT_OPA_R(n, func) \ [n] = { .name = #n, .write = write_##func, .print = print_##func, \ - .read = read_##func } + .read = read_##func, .process = process_##func, \ + .full_only = true } /* feature_ops not implemented: */ #define print_tracing_data NULL @@ -1757,20 +2054,20 @@ struct feature_ops { static const struct feature_ops feat_ops[HEADER_LAST_FEATURE] = { FEAT_OPP(HEADER_TRACING_DATA, tracing_data), FEAT_OPP(HEADER_BUILD_ID, build_id), - FEAT_OPA(HEADER_HOSTNAME, hostname), - FEAT_OPA(HEADER_OSRELEASE, osrelease), - FEAT_OPA(HEADER_VERSION, version), - FEAT_OPA(HEADER_ARCH, arch), - FEAT_OPA(HEADER_NRCPUS, nrcpus), - FEAT_OPA(HEADER_CPUDESC, cpudesc), + FEAT_OPP(HEADER_HOSTNAME, hostname), + FEAT_OPP(HEADER_OSRELEASE, osrelease), + FEAT_OPP(HEADER_VERSION, version), + FEAT_OPP(HEADER_ARCH, arch), + FEAT_OPP(HEADER_NRCPUS, nrcpus), + FEAT_OPP(HEADER_CPUDESC, cpudesc), FEAT_OPA_R(HEADER_CPUID, cpuid), - FEAT_OPA(HEADER_TOTAL_MEM, total_mem), + FEAT_OPP(HEADER_TOTAL_MEM, total_mem), FEAT_OPP(HEADER_EVENT_DESC, event_desc), - FEAT_OPA(HEADER_CMDLINE, cmdline), + FEAT_OPP(HEADER_CMDLINE, cmdline), FEAT_OPF(HEADER_CPU_TOPOLOGY, cpu_topology), FEAT_OPF(HEADER_NUMA_TOPOLOGY, numa_topology), FEAT_OPA(HEADER_BRANCH_STACK, branch_stack), - FEAT_OPA(HEADER_PMU_MAPPINGS, pmu_mappings), + FEAT_OPP(HEADER_PMU_MAPPINGS, pmu_mappings), }; struct header_print_data { From 7e94cfcc9d201984a7be00a4fb42050c69ec4c56 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 24 Sep 2012 17:15:00 +0900 Subject: [PATCH 245/282] perf header: Use pre-processed session env when printing From now on each feature information is processed and saved in perf header so that it can be used for printing. The event desc and branch stack features are not touched since they're not saved. Signed-off-by: Namhyung Kim Cc: David Ahern Cc: Ingo Molnar Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Robert Richter Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1348474503-15070-4-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/header.c | 209 +++++++++++++-------------------------- 1 file changed, 67 insertions(+), 142 deletions(-) diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index d74b58d4105..b2929d7a3d4 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -1103,118 +1103,80 @@ static int write_branch_stack(int fd __maybe_unused, return 0; } -static void print_hostname(struct perf_header *ph, int fd, FILE *fp) +static void print_hostname(struct perf_header *ph, int fd __maybe_unused, + FILE *fp) { - char *str = do_read_string(fd, ph); - fprintf(fp, "# hostname : %s\n", str); - free(str); + fprintf(fp, "# hostname : %s\n", ph->env.hostname); } -static void print_osrelease(struct perf_header *ph, int fd, FILE *fp) +static void print_osrelease(struct perf_header *ph, int fd __maybe_unused, + FILE *fp) { - char *str = do_read_string(fd, ph); - fprintf(fp, "# os release : %s\n", str); - free(str); + fprintf(fp, "# os release : %s\n", ph->env.os_release); } -static void print_arch(struct perf_header *ph, int fd, FILE *fp) +static void print_arch(struct perf_header *ph, int fd __maybe_unused, FILE *fp) { - char *str = do_read_string(fd, ph); - fprintf(fp, "# arch : %s\n", str); - free(str); + fprintf(fp, "# arch : %s\n", ph->env.arch); } -static void print_cpudesc(struct perf_header *ph, int fd, FILE *fp) +static void print_cpudesc(struct perf_header *ph, int fd __maybe_unused, + FILE *fp) { - char *str = do_read_string(fd, ph); - fprintf(fp, "# cpudesc : %s\n", str); - free(str); + fprintf(fp, "# cpudesc : %s\n", ph->env.cpu_desc); } -static void print_nrcpus(struct perf_header *ph, int fd, FILE *fp) +static void print_nrcpus(struct perf_header *ph, int fd __maybe_unused, + FILE *fp) { - ssize_t ret; - u32 nr; - - ret = read(fd, &nr, sizeof(nr)); - if (ret != (ssize_t)sizeof(nr)) - nr = -1; /* interpreted as error */ - - if (ph->needs_swap) - nr = bswap_32(nr); - - fprintf(fp, "# nrcpus online : %u\n", nr); - - ret = read(fd, &nr, sizeof(nr)); - if (ret != (ssize_t)sizeof(nr)) - nr = -1; /* interpreted as error */ - - if (ph->needs_swap) - nr = bswap_32(nr); - - fprintf(fp, "# nrcpus avail : %u\n", nr); + fprintf(fp, "# nrcpus online : %u\n", ph->env.nr_cpus_online); + fprintf(fp, "# nrcpus avail : %u\n", ph->env.nr_cpus_avail); } -static void print_version(struct perf_header *ph, int fd, FILE *fp) +static void print_version(struct perf_header *ph, int fd __maybe_unused, + FILE *fp) { - char *str = do_read_string(fd, ph); - fprintf(fp, "# perf version : %s\n", str); - free(str); + fprintf(fp, "# perf version : %s\n", ph->env.version); } -static void print_cmdline(struct perf_header *ph, int fd, FILE *fp) +static void print_cmdline(struct perf_header *ph, int fd __maybe_unused, + FILE *fp) { - ssize_t ret; + int nr, i; char *str; - u32 nr, i; - ret = read(fd, &nr, sizeof(nr)); - if (ret != (ssize_t)sizeof(nr)) - return; - - if (ph->needs_swap) - nr = bswap_32(nr); + nr = ph->env.nr_cmdline; + str = ph->env.cmdline; fprintf(fp, "# cmdline : "); for (i = 0; i < nr; i++) { - str = do_read_string(fd, ph); fprintf(fp, "%s ", str); - free(str); + str += strlen(str) + 1; } fputc('\n', fp); } -static void print_cpu_topology(struct perf_header *ph, int fd, FILE *fp) +static void print_cpu_topology(struct perf_header *ph, int fd __maybe_unused, + FILE *fp) { - ssize_t ret; - u32 nr, i; + int nr, i; char *str; - ret = read(fd, &nr, sizeof(nr)); - if (ret != (ssize_t)sizeof(nr)) - return; - - if (ph->needs_swap) - nr = bswap_32(nr); + nr = ph->env.nr_sibling_cores; + str = ph->env.sibling_cores; for (i = 0; i < nr; i++) { - str = do_read_string(fd, ph); fprintf(fp, "# sibling cores : %s\n", str); - free(str); + str += strlen(str) + 1; } - ret = read(fd, &nr, sizeof(nr)); - if (ret != (ssize_t)sizeof(nr)) - return; - - if (ph->needs_swap) - nr = bswap_32(nr); + nr = ph->env.nr_sibling_threads; + str = ph->env.sibling_threads; for (i = 0; i < nr; i++) { - str = do_read_string(fd, ph); fprintf(fp, "# sibling threads : %s\n", str); - free(str); + str += strlen(str) + 1; } } @@ -1375,126 +1337,89 @@ static void print_event_desc(struct perf_header *ph, int fd, FILE *fp) free_event_desc(events); } -static void print_total_mem(struct perf_header *h __maybe_unused, int fd, +static void print_total_mem(struct perf_header *ph, int fd __maybe_unused, FILE *fp) { - uint64_t mem; - ssize_t ret; - - ret = read(fd, &mem, sizeof(mem)); - if (ret != sizeof(mem)) - goto error; - - if (h->needs_swap) - mem = bswap_64(mem); - - fprintf(fp, "# total memory : %"PRIu64" kB\n", mem); - return; -error: - fprintf(fp, "# total memory : unknown\n"); + fprintf(fp, "# total memory : %Lu kB\n", ph->env.total_mem); } -static void print_numa_topology(struct perf_header *h __maybe_unused, int fd, +static void print_numa_topology(struct perf_header *ph, int fd __maybe_unused, FILE *fp) { - ssize_t ret; u32 nr, c, i; - char *str; + char *str, *tmp; uint64_t mem_total, mem_free; /* nr nodes */ - ret = read(fd, &nr, sizeof(nr)); - if (ret != (ssize_t)sizeof(nr)) - goto error; - - if (h->needs_swap) - nr = bswap_32(nr); + nr = ph->env.nr_numa_nodes; + str = ph->env.numa_nodes; for (i = 0; i < nr; i++) { - /* node number */ - ret = read(fd, &c, sizeof(c)); - if (ret != (ssize_t)sizeof(c)) + c = strtoul(str, &tmp, 0); + if (*tmp != ':') goto error; - if (h->needs_swap) - c = bswap_32(c); - - ret = read(fd, &mem_total, sizeof(u64)); - if (ret != sizeof(u64)) + str = tmp + 1; + mem_total = strtoull(str, &tmp, 0); + if (*tmp != ':') goto error; - ret = read(fd, &mem_free, sizeof(u64)); - if (ret != sizeof(u64)) + str = tmp + 1; + mem_free = strtoull(str, &tmp, 0); + if (*tmp != ':') goto error; - if (h->needs_swap) { - mem_total = bswap_64(mem_total); - mem_free = bswap_64(mem_free); - } - fprintf(fp, "# node%u meminfo : total = %"PRIu64" kB," " free = %"PRIu64" kB\n", - c, - mem_total, - mem_free); + c, mem_total, mem_free); - str = do_read_string(fd, h); + str = tmp + 1; fprintf(fp, "# node%u cpu list : %s\n", c, str); - free(str); } return; error: fprintf(fp, "# numa topology : not available\n"); } -static void print_cpuid(struct perf_header *ph, int fd, FILE *fp) +static void print_cpuid(struct perf_header *ph, int fd __maybe_unused, FILE *fp) { - char *str = do_read_string(fd, ph); - fprintf(fp, "# cpuid : %s\n", str); - free(str); + fprintf(fp, "# cpuid : %s\n", ph->env.cpuid); } static void print_branch_stack(struct perf_header *ph __maybe_unused, - int fd __maybe_unused, - FILE *fp) + int fd __maybe_unused, FILE *fp) { fprintf(fp, "# contains samples with branch stack\n"); } -static void print_pmu_mappings(struct perf_header *ph, int fd, FILE *fp) +static void print_pmu_mappings(struct perf_header *ph, int fd __maybe_unused, + FILE *fp) { const char *delimiter = "# pmu mappings: "; - char *name; - int ret; + char *str, *tmp; u32 pmu_num; u32 type; - ret = read(fd, &pmu_num, sizeof(pmu_num)); - if (ret != sizeof(pmu_num)) - goto error; - - if (ph->needs_swap) - pmu_num = bswap_32(pmu_num); - + pmu_num = ph->env.nr_pmu_mappings; if (!pmu_num) { fprintf(fp, "# pmu mappings: not available\n"); return; } - while (pmu_num) { - if (read(fd, &type, sizeof(type)) != sizeof(type)) - break; - if (ph->needs_swap) - type = bswap_32(type); + str = ph->env.pmu_mappings; + + while (pmu_num) { + type = strtoul(str, &tmp, 0); + if (*tmp != ':') + goto error; + + str = tmp + 1; + fprintf(fp, "%s%s = %" PRIu32, delimiter, str, type); - name = do_read_string(fd, ph); - if (!name) - break; - pmu_num--; - fprintf(fp, "%s%s = %" PRIu32, delimiter, name, type); - free(name); delimiter = ", "; + str += strlen(str) + 1; + pmu_num--; } fprintf(fp, "\n"); From 3d7eb86b9d84e7493c8c835fbcd2a3fe4a1f5937 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 24 Sep 2012 17:15:01 +0900 Subject: [PATCH 246/282] perf header: Remove unused @feat arg from ->process callback As the @feat arg is not used anywhere, get rid of it from the signature. Signed-off-by: Namhyung Kim Cc: David Ahern Cc: Ingo Molnar Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Robert Richter Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1348474503-15070-5-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/header.c | 70 ++++++++++++++++++++-------------------- 1 file changed, 35 insertions(+), 35 deletions(-) diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index b2929d7a3d4..4b028df83a4 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -1580,18 +1580,16 @@ out: return err; } -static int process_tracing_data(struct perf_file_section *section - __maybe_unused, - struct perf_header *ph __maybe_unused, - int feat __maybe_unused, int fd, void *data) +static int process_tracing_data(struct perf_file_section *section __maybe_unused, + struct perf_header *ph __maybe_unused, + int fd, void *data) { trace_report(fd, data, false); return 0; } static int process_build_id(struct perf_file_section *section, - struct perf_header *ph, - int feat __maybe_unused, int fd, + struct perf_header *ph, int fd, void *data __maybe_unused) { if (perf_header__read_build_ids(ph, fd, section->offset, section->size)) @@ -1600,40 +1598,40 @@ static int process_build_id(struct perf_file_section *section, } static int process_hostname(struct perf_file_section *section __maybe_unused, - struct perf_header *ph, int feat __maybe_unused, - int fd, void *data __maybe_unused) + struct perf_header *ph, int fd, + void *data __maybe_unused) { ph->env.hostname = do_read_string(fd, ph); return ph->env.hostname ? 0 : -ENOMEM; } static int process_osrelease(struct perf_file_section *section __maybe_unused, - struct perf_header *ph, int feat __maybe_unused, - int fd, void *data __maybe_unused) + struct perf_header *ph, int fd, + void *data __maybe_unused) { ph->env.os_release = do_read_string(fd, ph); return ph->env.os_release ? 0 : -ENOMEM; } static int process_version(struct perf_file_section *section __maybe_unused, - struct perf_header *ph, int feat __maybe_unused, - int fd, void *data __maybe_unused) + struct perf_header *ph, int fd, + void *data __maybe_unused) { ph->env.version = do_read_string(fd, ph); return ph->env.version ? 0 : -ENOMEM; } static int process_arch(struct perf_file_section *section __maybe_unused, - struct perf_header *ph, int feat __maybe_unused, - int fd, void *data __maybe_unused) + struct perf_header *ph, int fd, + void *data __maybe_unused) { ph->env.arch = do_read_string(fd, ph); return ph->env.arch ? 0 : -ENOMEM; } static int process_nrcpus(struct perf_file_section *section __maybe_unused, - struct perf_header *ph, int feat __maybe_unused, - int fd, void *data __maybe_unused) + struct perf_header *ph, int fd, + void *data __maybe_unused) { size_t ret; u32 nr; @@ -1659,24 +1657,24 @@ static int process_nrcpus(struct perf_file_section *section __maybe_unused, } static int process_cpudesc(struct perf_file_section *section __maybe_unused, - struct perf_header *ph, int feat __maybe_unused, - int fd, void *data __maybe_unused) + struct perf_header *ph, int fd, + void *data __maybe_unused) { ph->env.cpu_desc = do_read_string(fd, ph); return ph->env.cpu_desc ? 0 : -ENOMEM; } static int process_cpuid(struct perf_file_section *section __maybe_unused, - struct perf_header *ph, int feat __maybe_unused, - int fd, void *data __maybe_unused) + struct perf_header *ph, int fd, + void *data __maybe_unused) { ph->env.cpuid = do_read_string(fd, ph); return ph->env.cpuid ? 0 : -ENOMEM; } static int process_total_mem(struct perf_file_section *section __maybe_unused, - struct perf_header *ph, int feat __maybe_unused, - int fd, void *data __maybe_unused) + struct perf_header *ph, int fd, + void *data __maybe_unused) { uint64_t mem; size_t ret; @@ -1711,7 +1709,8 @@ perf_evlist__find_by_index(struct perf_evlist *evlist, int idx) } static void -perf_evlist__set_event_name(struct perf_evlist *evlist, struct perf_evsel *event) +perf_evlist__set_event_name(struct perf_evlist *evlist, + struct perf_evsel *event) { struct perf_evsel *evsel; @@ -1730,15 +1729,16 @@ perf_evlist__set_event_name(struct perf_evlist *evlist, struct perf_evsel *event static int process_event_desc(struct perf_file_section *section __maybe_unused, - struct perf_header *header, int feat __maybe_unused, int fd, + struct perf_header *header, int fd, void *data __maybe_unused) { - struct perf_session *session = container_of(header, struct perf_session, header); + struct perf_session *session; struct perf_evsel *evsel, *events = read_event_desc(header, fd); if (!events) return 0; + session = container_of(header, struct perf_session, header); for (evsel = events; evsel->attr.size; evsel++) perf_evlist__set_event_name(session->evlist, evsel); @@ -1748,8 +1748,8 @@ process_event_desc(struct perf_file_section *section __maybe_unused, } static int process_cmdline(struct perf_file_section *section __maybe_unused, - struct perf_header *ph, int feat __maybe_unused, - int fd, void *data __maybe_unused) + struct perf_header *ph, int fd, + void *data __maybe_unused) { size_t ret; char *str; @@ -1784,8 +1784,8 @@ error: } static int process_cpu_topology(struct perf_file_section *section __maybe_unused, - struct perf_header *ph, int feat __maybe_unused, - int fd, void *data __maybe_unused) + struct perf_header *ph, int fd, + void *data __maybe_unused) { size_t ret; u32 nr, i; @@ -1840,8 +1840,8 @@ error: } static int process_numa_topology(struct perf_file_section *section __maybe_unused, - struct perf_header *ph, int feat __maybe_unused, - int fd, void *data __maybe_unused) + struct perf_header *ph, int fd, + void *data __maybe_unused) { size_t ret; u32 nr, node, i; @@ -1900,8 +1900,8 @@ error: } static int process_pmu_mappings(struct perf_file_section *section __maybe_unused, - struct perf_header *ph, int feat __maybe_unused, - int fd, void *data __maybe_unused) + struct perf_header *ph, int fd, + void *data __maybe_unused) { size_t ret; char *name; @@ -1954,7 +1954,7 @@ struct feature_ops { void (*print)(struct perf_header *h, int fd, FILE *fp); char *(*read)(struct perf_header *h, int fd); int (*process)(struct perf_file_section *section, - struct perf_header *h, int feat, int fd, void *data); + struct perf_header *h, int fd, void *data); const char *name; bool full_only; }; @@ -2520,7 +2520,7 @@ static int perf_file_section__process(struct perf_file_section *section, if (!feat_ops[feat].process) return 0; - return feat_ops[feat].process(section, ph, feat, fd, data); + return feat_ops[feat].process(section, ph, fd, data); } static int perf_file_header__read_pipe(struct perf_pipe_file_header *header, From 2f9e97aa8b4c6220c0770a966fb99d7366679813 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 24 Sep 2012 17:15:02 +0900 Subject: [PATCH 247/282] perf kvm: Use perf_session_env for reading cpuid We have processed and saved cpuid information to perf_session_env so reuse it for get_cpu_isa(). Signed-off-by: Namhyung Kim Cc: David Ahern Cc: Dong Hao Cc: Ingo Molnar Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Xiao Guangrong Link: http://lkml.kernel.org/r/1348474503-15070-6-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-kvm.c | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c index 3eb53e33c02..a28c9cad904 100644 --- a/tools/perf/builtin-kvm.c +++ b/tools/perf/builtin-kvm.c @@ -664,16 +664,9 @@ static struct perf_tool eops = { static int get_cpu_isa(struct perf_session *session) { - char *cpuid; + char *cpuid = session->header.env.cpuid; int isa; - cpuid = perf_header__read_feature(session, HEADER_CPUID); - - if (!cpuid) { - pr_err("read HEADER_CPUID failed.\n"); - return -ENOTSUP; - } - if (strstr(cpuid, "Intel")) isa = 1; else if (strstr(cpuid, "AMD")) @@ -683,7 +676,6 @@ static int get_cpu_isa(struct perf_session *session) isa = -ENOTSUP; } - free(cpuid); return isa; } From 37e9d750e672d6fa8c25463bd76240410bbbc786 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 24 Sep 2012 17:15:03 +0900 Subject: [PATCH 248/282] perf header: Remove perf_header__read_feature Because its only user builtin-kvm::get_cpu_isa() has gone, It can be removed safely. In general, we have the feature information in perf_session_env already, no need to read it again. Signed-off-by: Namhyung Kim Cc: David Ahern Cc: Dong Hao Cc: Ingo Molnar Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Xiao Guangrong Link: http://lkml.kernel.org/r/1348474503-15070-7-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/header.c | 60 +--------------------------------------- tools/perf/util/header.h | 1 - 2 files changed, 1 insertion(+), 60 deletions(-) diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 4b028df83a4..6aae3290358 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -1690,11 +1690,6 @@ static int process_total_mem(struct perf_file_section *section __maybe_unused, return 0; } -static char *read_cpuid(struct perf_header *ph, int fd) -{ - return do_read_string(fd, ph); -} - static struct perf_evsel * perf_evlist__find_by_index(struct perf_evlist *evlist, int idx) { @@ -1952,7 +1947,6 @@ error: struct feature_ops { int (*write)(int fd, struct perf_header *h, struct perf_evlist *evlist); void (*print)(struct perf_header *h, int fd, FILE *fp); - char *(*read)(struct perf_header *h, int fd); int (*process)(struct perf_file_section *section, struct perf_header *h, int fd, void *data); const char *name; @@ -1967,10 +1961,6 @@ struct feature_ops { #define FEAT_OPF(n, func) \ [n] = { .name = #n, .write = write_##func, .print = print_##func, \ .process = process_##func, .full_only = true } -#define FEAT_OPA_R(n, func) \ - [n] = { .name = #n, .write = write_##func, .print = print_##func, \ - .read = read_##func, .process = process_##func, \ - .full_only = true } /* feature_ops not implemented: */ #define print_tracing_data NULL @@ -1985,7 +1975,7 @@ static const struct feature_ops feat_ops[HEADER_LAST_FEATURE] = { FEAT_OPP(HEADER_ARCH, arch), FEAT_OPP(HEADER_NRCPUS, nrcpus), FEAT_OPP(HEADER_CPUDESC, cpudesc), - FEAT_OPA_R(HEADER_CPUID, cpuid), + FEAT_OPP(HEADER_CPUID, cpuid), FEAT_OPP(HEADER_TOTAL_MEM, total_mem), FEAT_OPP(HEADER_EVENT_DESC, event_desc), FEAT_OPP(HEADER_CMDLINE, cmdline), @@ -2040,54 +2030,6 @@ int perf_header__fprintf_info(struct perf_session *session, FILE *fp, bool full) return 0; } -struct header_read_data { - int feat; - char *result; -}; - -static int perf_file_section__read_feature(struct perf_file_section *section, - struct perf_header *ph, - int feat, int fd, void *data) -{ - struct header_read_data *hd = data; - - if (feat != hd->feat) - return 0; - - if (lseek(fd, section->offset, SEEK_SET) == (off_t)-1) { - pr_debug("Failed to lseek to %" PRIu64 " offset for feature " - "%d, continuing...\n", section->offset, feat); - return 0; - } - - if (feat >= HEADER_LAST_FEATURE) { - pr_warning("unknown feature %d\n", feat); - return 0; - } - - if (!feat_ops[feat].read) { - pr_warning("read is not supported for feature %d\n", feat); - return 0; - } - - hd->result = feat_ops[feat].read(ph, fd); - return 0; -} - -char *perf_header__read_feature(struct perf_session *session, int feat) -{ - struct perf_header *header = &session->header; - struct header_read_data hd; - int fd = session->fd; - - hd.feat = feat; - hd.result = NULL; - - perf_header__process_sections(header, fd, &hd, - perf_file_section__read_feature); - return hd.result; -} - static int do_write_feat(int fd, struct perf_header *h, int type, struct perf_file_section **p, struct perf_evlist *evlist) diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h index 5867c7d74f9..99bdd3abce5 100644 --- a/tools/perf/util/header.h +++ b/tools/perf/util/header.h @@ -118,7 +118,6 @@ int perf_header__process_sections(struct perf_header *header, int fd, int feat, int fd, void *data)); int perf_header__fprintf_info(struct perf_session *s, FILE *fp, bool full); -char *perf_header__read_feature(struct perf_session *session, int feat); int build_id_cache__add_s(const char *sbuild_id, const char *debugdir, const char *name, bool is_kallsyms, bool is_vdso); From bcbd004020bf0d725722be75da35fd326ff63ef4 Mon Sep 17 00:00:00 2001 From: Irina Tirdea Date: Thu, 20 Sep 2012 23:37:50 +0300 Subject: [PATCH 249/282] perf tools: remove sscanf extension %as perf uses sscanf extension %as to read and allocate a string in the same step. This is a non-standard extension only present in new versions of glibc. Replacing the use of sscanf and %as with strtok_r calls in order to parse a given string into its components. This is needed in Android since bionic does not support %as extension for sscanf. Reviewed-by: Masami Hiramatsu Tested-by: Masami Hiramatsu Signed-off-by: Irina Tirdea Acked-by: Masami Hiramatsu Cc: David Ahern Cc: Ingo Molnar Cc: Masami Hiramatsu Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Steven Rostedt Link: http://lkml.kernel.org/r/1348173470-4936-1-git-send-email-irina.tirdea@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/probe-event.c | 36 +++++++++++++++++++++++------ tools/perf/util/trace-event-parse.c | 18 +++++++-------- 2 files changed, 37 insertions(+), 17 deletions(-) diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index 4ce04c2281d..49a256e6e0a 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -1100,6 +1100,7 @@ static int parse_probe_trace_command(const char *cmd, struct probe_trace_point *tp = &tev->point; char pr; char *p; + char *argv0_str = NULL, *fmt, *fmt1_str, *fmt2_str, *fmt3_str; int ret, i, argc; char **argv; @@ -1116,14 +1117,27 @@ static int parse_probe_trace_command(const char *cmd, } /* Scan event and group name. */ - ret = sscanf(argv[0], "%c:%a[^/ \t]/%a[^ \t]", - &pr, (float *)(void *)&tev->group, - (float *)(void *)&tev->event); - if (ret != 3) { + argv0_str = strdup(argv[0]); + if (argv0_str == NULL) { + ret = -ENOMEM; + goto out; + } + fmt1_str = strtok_r(argv0_str, ":", &fmt); + fmt2_str = strtok_r(NULL, "/", &fmt); + fmt3_str = strtok_r(NULL, " \t", &fmt); + if (fmt1_str == NULL || strlen(fmt1_str) != 1 || fmt2_str == NULL + || fmt3_str == NULL) { semantic_error("Failed to parse event name: %s\n", argv[0]); ret = -EINVAL; goto out; } + pr = fmt1_str[0]; + tev->group = strdup(fmt2_str); + tev->event = strdup(fmt3_str); + if (tev->group == NULL || tev->event == NULL) { + ret = -ENOMEM; + goto out; + } pr_debug("Group:%s Event:%s probe:%c\n", tev->group, tev->event, pr); tp->retprobe = (pr == 'r'); @@ -1135,10 +1149,17 @@ static int parse_probe_trace_command(const char *cmd, p++; } else p = argv[1]; - ret = sscanf(p, "%a[^+]+%lu", (float *)(void *)&tp->symbol, - &tp->offset); - if (ret == 1) + fmt1_str = strtok_r(p, "+", &fmt); + tp->symbol = strdup(fmt1_str); + if (tp->symbol == NULL) { + ret = -ENOMEM; + goto out; + } + fmt2_str = strtok_r(NULL, "", &fmt); + if (fmt2_str == NULL) tp->offset = 0; + else + tp->offset = strtoul(fmt2_str, NULL, 10); tev->nargs = argc - 2; tev->args = zalloc(sizeof(struct probe_trace_arg) * tev->nargs); @@ -1162,6 +1183,7 @@ static int parse_probe_trace_command(const char *cmd, } ret = 0; out: + free(argv0_str); argv_free(argv); return ret; } diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c index aa4c860a21d..3aabcd687cd 100644 --- a/tools/perf/util/trace-event-parse.c +++ b/tools/perf/util/trace-event-parse.c @@ -229,24 +229,22 @@ void parse_proc_kallsyms(struct pevent *pevent, char *next = NULL; char *addr_str; char *mod; - char ch; + char *fmt; line = strtok_r(file, "\n", &next); while (line) { mod = NULL; - sscanf(line, "%as %c %as\t[%as", - (float *)(void *)&addr_str, /* workaround gcc warning */ - &ch, (float *)(void *)&func, (float *)(void *)&mod); + addr_str = strtok_r(line, " ", &fmt); addr = strtoull(addr_str, NULL, 16); - free(addr_str); - - /* truncate the extra ']' */ + /* skip character */ + strtok_r(NULL, " ", &fmt); + func = strtok_r(NULL, "\t", &fmt); + mod = strtok_r(NULL, "]", &fmt); + /* truncate the extra '[' */ if (mod) - mod[strlen(mod) - 1] = 0; + mod = mod + 1; pevent_register_function(pevent, func, addr, mod); - free(func); - free(mod); line = strtok_r(NULL, "\n", &next); } From 1bce6e0fec28434fd66de585773a14bf4d2c3715 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 19 Sep 2012 15:58:41 +0900 Subject: [PATCH 250/282] tools lib traceevent: Fix error path on process_array() free_token() under out_free should be called with 'token' and no need to set *tok to NULL since it's set already. Signed-off-by: Namhyung Kim Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Steven Rostedt Link: http://lkml.kernel.org/r/1348037924-17568-2-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/traceevent/event-parse.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c index 77ebeb8266f..6270ee2e983 100644 --- a/tools/lib/traceevent/event-parse.c +++ b/tools/lib/traceevent/event-parse.c @@ -1595,8 +1595,7 @@ process_array(struct event_format *event, struct print_arg *top, char **tok) return type; out_free: - free_token(*tok); - *tok = NULL; + free_token(token); free_arg(arg); return EVENT_ERROR; } From 41e51a289b3ca83e08395213f4488c9c7c6b2e29 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 19 Sep 2012 15:58:42 +0900 Subject: [PATCH 251/282] tools lib traceevent: Make sure that arg->op.right is set properly When process_op failed, @arg will be freed on a caller with type of PRINT_OP. Thus free_arg() will try to free ->op.right field which can have stale value if something bad happens in the middle. Signed-off-by: Namhyung Kim Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Steven Rostedt Link: http://lkml.kernel.org/r/1348037924-17568-3-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/traceevent/event-parse.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c index 6270ee2e983..27088c55af1 100644 --- a/tools/lib/traceevent/event-parse.c +++ b/tools/lib/traceevent/event-parse.c @@ -1719,6 +1719,7 @@ process_op(struct event_format *event, struct print_arg *arg, char **tok) arg->op.left = left; arg->op.prio = 0; + /* it will set arg->op.right */ type = process_cond(event, arg, tok); } else if (strcmp(token, ">>") == 0 || @@ -1745,6 +1746,7 @@ process_op(struct event_format *event, struct print_arg *arg, char **tok) arg->type = PRINT_OP; arg->op.op = token; arg->op.left = left; + arg->op.right = NULL; if (set_op_prio(arg) == -1) { event->flags |= EVENT_FL_FAILED; @@ -1792,6 +1794,7 @@ process_op(struct event_format *event, struct print_arg *arg, char **tok) arg->op.prio = 0; + /* it will set arg->op.right */ type = process_array(event, arg, tok); } else { From f8c49d2645e5028e48ba15ec72728be121eddf95 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 19 Sep 2012 15:58:43 +0900 Subject: [PATCH 252/282] tools lib traceevent: Free field if an error occurs on process_fields The field should be freed on error paths. Signed-off-by: Namhyung Kim Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Steven Rostedt Link: http://lkml.kernel.org/r/1348037924-17568-4-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/traceevent/event-parse.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c index 27088c55af1..a776ed5cd2b 100644 --- a/tools/lib/traceevent/event-parse.c +++ b/tools/lib/traceevent/event-parse.c @@ -2186,10 +2186,10 @@ process_fields(struct event_format *event, struct print_flag_sym **list, char ** value = arg_eval(arg); if (value == NULL) - goto out_free; + goto out_free_field; field->value = strdup(value); if (field->value == NULL) - goto out_free; + goto out_free_field; free_arg(arg); arg = alloc_arg(); @@ -2197,14 +2197,14 @@ process_fields(struct event_format *event, struct print_flag_sym **list, char ** free_token(token); type = process_arg(event, arg, &token); if (test_type_token(type, token, EVENT_OP, "}")) - goto out_free; + goto out_free_field; value = arg_eval(arg); if (value == NULL) - goto out_free; + goto out_free_field; field->str = strdup(value); if (field->str == NULL) - goto out_free; + goto out_free_field; free_arg(arg); arg = NULL; @@ -2218,6 +2218,8 @@ process_fields(struct event_format *event, struct print_flag_sym **list, char ** *tok = token; return type; +out_free_field: + free_flag_sym(field); out_free: free_arg(arg); free_token(token); From 70d9304475730a63dd8da884abc7c76ee4772cd2 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 19 Sep 2012 15:58:44 +0900 Subject: [PATCH 253/282] tools lib traceevent: Free field if an error occurs on process_flags/symbols The field should be freed on error paths. Signed-off-by: Namhyung Kim Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Steven Rostedt Link: http://lkml.kernel.org/r/1348037924-17568-5-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/traceevent/event-parse.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c index a776ed5cd2b..acf40381b48 100644 --- a/tools/lib/traceevent/event-parse.c +++ b/tools/lib/traceevent/event-parse.c @@ -2247,7 +2247,7 @@ process_flags(struct event_format *event, struct print_arg *arg, char **tok) type = process_op(event, field, &token); if (test_type_token(type, token, EVENT_DELIM, ",")) - goto out_free; + goto out_free_field; free_token(token); arg->flags.field = field; @@ -2269,7 +2269,9 @@ process_flags(struct event_format *event, struct print_arg *arg, char **tok) type = read_token_item(tok); return type; - out_free: +out_free_field: + free_arg(field); +out_free: free_token(token); *tok = NULL; return EVENT_ERROR; @@ -2289,7 +2291,7 @@ process_symbols(struct event_format *event, struct print_arg *arg, char **tok) type = process_arg(event, field, &token); if (test_type_token(type, token, EVENT_DELIM, ",")) - goto out_free; + goto out_free_field; arg->symbol.field = field; @@ -2301,7 +2303,9 @@ process_symbols(struct event_format *event, struct print_arg *arg, char **tok) type = read_token_item(tok); return type; - out_free: +out_free_field: + free_arg(field); +out_free: free_token(token); *tok = NULL; return EVENT_ERROR; From 3ce711a6abc27abce1554e1d671a8762b7187690 Mon Sep 17 00:00:00 2001 From: Markus Trippelsdorf Date: Wed, 19 Sep 2012 09:29:02 +0200 Subject: [PATCH 254/282] perf tools: bfd.h/libbfd detection fails with recent binutils With recent binutils I get: perf % make Makefile:668: No bfd.h/libbfd found, install binutils-dev[el]/zlib-static to gain symbol demanglin That happens because bfd.h now contains: I've reopened a bug in the hope that this check will be deleted: http://sourceware.org/bugzilla/show_bug.cgi?id=14243 But in the meantime, the following patch fixes the problem Signed-off-by: Markus Trippelsdorf Cc: Ingo Molnar Cc: Mike Frysinger Cc: Paul Mackerras Link: http://lkml.kernel.org/r/20120919072902.GA262@x4 Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile | 2 +- tools/perf/util/symbol.h | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 3ae6a59635c..251dcd7fb5a 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -715,7 +715,7 @@ else EXTLIBS += -liberty BASIC_CFLAGS += -DHAVE_CPLUS_DEMANGLE else - FLAGS_BFD=$(ALL_CFLAGS) $(ALL_LDFLAGS) $(EXTLIBS) -lbfd + FLAGS_BFD=$(ALL_CFLAGS) $(ALL_LDFLAGS) $(EXTLIBS) -DPACKAGE='perf' -lbfd has_bfd := $(call try-cc,$(SOURCE_BFD),$(FLAGS_BFD)) ifeq ($(has_bfd),y) EXTLIBS += -lbfd diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index 4ff45e30c72..b441b07172b 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -34,6 +34,7 @@ static inline char *bfd_demangle(void __maybe_unused *v, return NULL; } #else +#define PACKAGE 'perf' #include #endif #endif From 0dbca1e364aba20dba70d88c083239c5152440ac Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 12 Sep 2012 15:39:59 -0300 Subject: [PATCH 255/282] tools lib traceevent: Use asprintf were applicable Replacing the equivalent open coded malloc + sprintf bits. Reviewed-by: Namhyung Kim Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Steven Rostedt Link: http://lkml.kernel.org/n/tip-ghokwtdw2hgmmmn7oa9s03r4@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/traceevent/event-parse.c | 58 ++++++++++++++++-------------- 1 file changed, 31 insertions(+), 27 deletions(-) diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c index acf40381b48..2e05d567353 100644 --- a/tools/lib/traceevent/event-parse.c +++ b/tools/lib/traceevent/event-parse.c @@ -827,9 +827,9 @@ static enum event_type __read_token(char **tok) switch (type) { case EVENT_NEWLINE: case EVENT_DELIM: - *tok = malloc_or_die(2); - (*tok)[0] = ch; - (*tok)[1] = 0; + if (asprintf(tok, "%c", ch) < 0) + return EVENT_ERROR; + return type; case EVENT_OP: @@ -2777,10 +2777,8 @@ static int event_read_print(struct event_format *event) if (type == EVENT_DQUOTE) { char *cat; - cat = malloc_or_die(strlen(event->print_fmt.format) + - strlen(token) + 1); - strcpy(cat, event->print_fmt.format); - strcat(cat, token); + if (asprintf(&cat, "%s%s", event->print_fmt.format, token) < 0) + goto fail; free_token(token); free_token(event->print_fmt.format); event->print_fmt.format = NULL; @@ -3524,6 +3522,18 @@ process_defined_func(struct trace_seq *s, void *data, int size, return ret; } +static void free_args(struct print_arg *args) +{ + struct print_arg *next; + + while (args) { + next = args->next; + + free_arg(args); + args = next; + } +} + static struct print_arg *make_bprint_args(char *fmt, void *data, int size, struct event_format *event) { struct pevent *pevent = event->pevent; @@ -3559,8 +3569,9 @@ static struct print_arg *make_bprint_args(char *fmt, void *data, int size, struc next = &arg->next; arg->type = PRINT_ATOM; - arg->atom.atom = malloc_or_die(32); - sprintf(arg->atom.atom, "%lld", ip); + + if (asprintf(&arg->atom.atom, "%lld", ip) < 0) + goto out_free; /* skip the first "%pf : " */ for (ptr = fmt + 6, bptr = data + field->offset; @@ -3617,8 +3628,10 @@ static struct print_arg *make_bprint_args(char *fmt, void *data, int size, struc arg = alloc_arg(); arg->next = NULL; arg->type = PRINT_ATOM; - arg->atom.atom = malloc_or_die(32); - sprintf(arg->atom.atom, "%lld", val); + if (asprintf(&arg->atom.atom, "%lld", val) < 0) { + free(arg); + goto out_free; + } *next = arg; next = &arg->next; /* @@ -3646,18 +3659,10 @@ static struct print_arg *make_bprint_args(char *fmt, void *data, int size, struc } return args; -} -static void free_args(struct print_arg *args) -{ - struct print_arg *next; - - while (args) { - next = args->next; - - free_arg(args); - args = next; - } +out_free: + free_args(args); + return NULL; } static char * @@ -3684,9 +3689,8 @@ get_bprint_format(void *data, int size __maybe_unused, printk = find_printk(pevent, addr); if (!printk) { - format = malloc_or_die(45); - sprintf(format, "%%pf : (NO FORMAT FOUND at %llx)\n", - addr); + if (asprintf(&format, "%%pf : (NO FORMAT FOUND at %llx)\n", addr) < 0) + return NULL; return format; } @@ -3694,8 +3698,8 @@ get_bprint_format(void *data, int size __maybe_unused, /* Remove any quotes. */ if (*p == '"') p++; - format = malloc_or_die(strlen(p) + 10); - sprintf(format, "%s : %s", "%pf", p); + if (asprintf(&format, "%s : %s", "%pf", p) < 0) + return NULL; /* remove ending quotes and new line since we will add one too */ p = format + strlen(format) - 1; if (*p == '"') From 87162d816f5f344d72e25249acd9b823b646a5c9 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 12 Sep 2012 15:39:59 -0300 Subject: [PATCH 256/282] tools lib traceevent: Use calloc were applicable Replacing the equivalent open coded malloc + memset bits. Reviewed-by: Namhyung Kim Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Steven Rostedt Link: http://lkml.kernel.org/n/tip-598fjtjbzal4wxh7fp0yv0q1@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/traceevent/event-parse.c | 44 ++++++++++-------------------- 1 file changed, 14 insertions(+), 30 deletions(-) diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c index 2e05d567353..2aeae5555a6 100644 --- a/tools/lib/traceevent/event-parse.c +++ b/tools/lib/traceevent/event-parse.c @@ -117,14 +117,7 @@ void breakpoint(void) struct print_arg *alloc_arg(void) { - struct print_arg *arg; - - arg = malloc_or_die(sizeof(*arg)); - if (!arg) - return NULL; - memset(arg, 0, sizeof(*arg)); - - return arg; + return calloc(1, sizeof(struct print_arg)); } struct cmdline { @@ -619,14 +612,7 @@ void pevent_print_printk(struct pevent *pevent) static struct event_format *alloc_event(void) { - struct event_format *event; - - event = malloc(sizeof(*event)); - if (!event) - return NULL; - memset(event, 0, sizeof(*event)); - - return event; + return calloc(1, sizeof(struct event_format)); } static void add_event(struct pevent *pevent, struct event_format *event) @@ -1240,8 +1226,10 @@ static int event_read_fields(struct event_format *event, struct format_field **f last_token = token; - field = malloc_or_die(sizeof(*field)); - memset(field, 0, sizeof(*field)); + field = calloc(1, sizeof(*field)); + if (!field) + goto fail; + field->event = event; /* read the rest of the type */ @@ -2181,8 +2169,9 @@ process_fields(struct event_format *event, struct print_flag_sym **list, char ** if (test_type_token(type, token, EVENT_DELIM, ",")) goto out_free; - field = malloc_or_die(sizeof(*field)); - memset(field, 0, sizeof(*field)); + field = calloc(1, sizeof(*field)); + if (!field) + goto out_free; value = arg_eval(arg); if (value == NULL) @@ -5106,12 +5095,11 @@ int pevent_register_print_function(struct pevent *pevent, remove_func_handler(pevent, name); } - func_handle = malloc(sizeof(*func_handle)); + func_handle = calloc(1, sizeof(*func_handle)); if (!func_handle) { do_warning("Failed to allocate function handler"); return PEVENT_ERRNO__MEM_ALLOC_FAILED; } - memset(func_handle, 0, sizeof(*func_handle)); func_handle->ret_type = ret_type; func_handle->name = strdup(name); @@ -5210,13 +5198,12 @@ int pevent_register_event_handler(struct pevent *pevent, not_found: /* Save for later use. */ - handle = malloc(sizeof(*handle)); + handle = calloc(1, sizeof(*handle)); if (!handle) { do_warning("Failed to allocate event handler"); return PEVENT_ERRNO__MEM_ALLOC_FAILED; } - memset(handle, 0, sizeof(*handle)); handle->id = id; if (event_name) handle->event_name = strdup(event_name); @@ -5245,13 +5232,10 @@ int pevent_register_event_handler(struct pevent *pevent, */ struct pevent *pevent_alloc(void) { - struct pevent *pevent; + struct pevent *pevent = calloc(1, sizeof(*pevent)); - pevent = malloc(sizeof(*pevent)); - if (!pevent) - return NULL; - memset(pevent, 0, sizeof(*pevent)); - pevent->ref_count = 1; + if (pevent) + pevent->ref_count = 1; return pevent; } From b85119200dfaf51d361008d986d591156c7473d4 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 12 Sep 2012 15:39:59 -0300 Subject: [PATCH 257/282] tools lib traceevent: Fix afterlife gotos Instead of dying, just use do_warning and let the goto that is there to take place. Reviewed-by: Namhyung Kim Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Steven Rostedt Link: http://lkml.kernel.org/n/tip-aoaus46ngnt9oc2pt7ckot5d@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/traceevent/event-parse.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c index 2aeae5555a6..2091991691a 100644 --- a/tools/lib/traceevent/event-parse.c +++ b/tools/lib/traceevent/event-parse.c @@ -1270,7 +1270,7 @@ static int event_read_fields(struct event_format *event, struct format_field **f } if (!field->type) { - die("no type found"); + do_warning("%s: no type found", __func__); goto fail; } field->name = last_token; @@ -1317,7 +1317,7 @@ static int event_read_fields(struct event_format *event, struct format_field **f free_token(token); type = read_token(&token); if (type == EVENT_NONE) { - die("failed to find token"); + do_warning("failed to find token"); goto fail; } } @@ -1669,7 +1669,7 @@ process_op(struct event_format *event, struct print_arg *arg, char **tok) if (arg->type == PRINT_OP && !arg->op.left) { /* handle single op */ if (token[1]) { - die("bad op token %s", token); + do_warning("bad op token %s", token); goto out_free; } switch (token[0]) { From a6d2a61ac653a85718aa61000d2648803f211ba3 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 12 Sep 2012 17:30:50 -0300 Subject: [PATCH 258/282] tools lib traceevent: Remove some die() calls Cleaned event-parse.c this time, just propagate the errors and in handle them the call sites. Reviewed-by: Namhyung Kim Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Steven Rostedt Link: http://lkml.kernel.org/n/tip-9ebpr2vgfk2qs2841i99sa8y@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/traceevent/event-parse.c | 249 ++++++++++++++++++++--------- 1 file changed, 175 insertions(+), 74 deletions(-) diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c index 2091991691a..b3bc13079c4 100644 --- a/tools/lib/traceevent/event-parse.c +++ b/tools/lib/traceevent/event-parse.c @@ -31,6 +31,7 @@ #include #include #include +#include #include "event-parse.h" #include "event-utils.h" @@ -151,7 +152,9 @@ static int cmdline_init(struct pevent *pevent) struct cmdline *cmdlines; int i; - cmdlines = malloc_or_die(sizeof(*cmdlines) * pevent->cmdline_count); + cmdlines = malloc(sizeof(*cmdlines) * pevent->cmdline_count); + if (!cmdlines) + return -1; i = 0; while (cmdlist) { @@ -179,8 +182,8 @@ static char *find_cmdline(struct pevent *pevent, int pid) if (!pid) return ""; - if (!pevent->cmdlines) - cmdline_init(pevent); + if (!pevent->cmdlines && cmdline_init(pevent)) + return ""; key.pid = pid; @@ -208,8 +211,8 @@ int pevent_pid_is_registered(struct pevent *pevent, int pid) if (!pid) return 1; - if (!pevent->cmdlines) - cmdline_init(pevent); + if (!pevent->cmdlines && cmdline_init(pevent)) + return 0; key.pid = pid; @@ -251,10 +254,14 @@ static int add_new_comm(struct pevent *pevent, const char *comm, int pid) return -1; } - cmdlines[pevent->cmdline_count].pid = pid; cmdlines[pevent->cmdline_count].comm = strdup(comm); - if (!cmdlines[pevent->cmdline_count].comm) - die("malloc comm"); + if (!cmdlines[pevent->cmdline_count].comm) { + free(cmdlines); + errno = ENOMEM; + return -1; + } + + cmdlines[pevent->cmdline_count].pid = pid; if (cmdlines[pevent->cmdline_count].comm) pevent->cmdline_count++; @@ -281,10 +288,15 @@ int pevent_register_comm(struct pevent *pevent, const char *comm, int pid) if (pevent->cmdlines) return add_new_comm(pevent, comm, pid); - item = malloc_or_die(sizeof(*item)); + item = malloc(sizeof(*item)); + if (!item) + return -1; + item->comm = strdup(comm); - if (!item->comm) - die("malloc comm"); + if (!item->comm) { + free(item); + return -1; + } item->pid = pid; item->next = pevent->cmdlist; @@ -348,7 +360,10 @@ static int func_map_init(struct pevent *pevent) struct func_map *func_map; int i; - func_map = malloc_or_die(sizeof(*func_map) * (pevent->func_count + 1)); + func_map = malloc(sizeof(*func_map) * (pevent->func_count + 1)); + if (!func_map) + return -1; + funclist = pevent->funclist; i = 0; @@ -448,25 +463,36 @@ pevent_find_function_address(struct pevent *pevent, unsigned long long addr) int pevent_register_function(struct pevent *pevent, char *func, unsigned long long addr, char *mod) { - struct func_list *item; + struct func_list *item = malloc(sizeof(*item)); - item = malloc_or_die(sizeof(*item)); + if (!item) + return -1; item->next = pevent->funclist; item->func = strdup(func); - if (mod) + if (!item->func) + goto out_free; + + if (mod) { item->mod = strdup(mod); - else + if (!item->mod) + goto out_free_func; + } else item->mod = NULL; item->addr = addr; - if (!item->func || (mod && !item->mod)) - die("malloc func"); - pevent->funclist = item; pevent->func_count++; return 0; + +out_free_func: + free(item->func); + item->func = NULL; +out_free: + free(item); + errno = ENOMEM; + return -1; } /** @@ -517,14 +543,16 @@ static int printk_cmp(const void *a, const void *b) return 0; } -static void printk_map_init(struct pevent *pevent) +static int printk_map_init(struct pevent *pevent) { struct printk_list *printklist; struct printk_list *item; struct printk_map *printk_map; int i; - printk_map = malloc_or_die(sizeof(*printk_map) * (pevent->printk_count + 1)); + printk_map = malloc(sizeof(*printk_map) * (pevent->printk_count + 1)); + if (!printk_map) + return -1; printklist = pevent->printklist; @@ -542,6 +570,8 @@ static void printk_map_init(struct pevent *pevent) pevent->printk_map = printk_map; pevent->printklist = NULL; + + return 0; } static struct printk_map * @@ -550,8 +580,8 @@ find_printk(struct pevent *pevent, unsigned long long addr) struct printk_map *printk; struct printk_map key; - if (!pevent->printk_map) - printk_map_init(pevent); + if (!pevent->printk_map && printk_map_init(pevent)) + return NULL; key.addr = addr; @@ -573,21 +603,27 @@ find_printk(struct pevent *pevent, unsigned long long addr) int pevent_register_print_string(struct pevent *pevent, char *fmt, unsigned long long addr) { - struct printk_list *item; + struct printk_list *item = malloc(sizeof(*item)); - item = malloc_or_die(sizeof(*item)); + if (!item) + return -1; item->next = pevent->printklist; - item->printk = strdup(fmt); item->addr = addr; + item->printk = strdup(fmt); if (!item->printk) - die("malloc fmt"); + goto out_free; pevent->printklist = item; pevent->printk_count++; return 0; + +out_free: + free(item); + errno = ENOMEM; + return -1; } /** @@ -615,14 +651,15 @@ static struct event_format *alloc_event(void) return calloc(1, sizeof(struct event_format)); } -static void add_event(struct pevent *pevent, struct event_format *event) +static int add_event(struct pevent *pevent, struct event_format *event) { int i; + struct event_format **events = realloc(pevent->events, sizeof(event) * + (pevent->nr_events + 1)); + if (!events) + return -1; - pevent->events = realloc(pevent->events, sizeof(event) * - (pevent->nr_events + 1)); - if (!pevent->events) - die("Can not allocate events"); + pevent->events = events; for (i = 0; i < pevent->nr_events; i++) { if (pevent->events[i]->id > event->id) @@ -637,6 +674,8 @@ static void add_event(struct pevent *pevent, struct event_format *event) pevent->nr_events++; event->pevent = pevent; + + return 0; } static int event_item_type(enum event_type type) @@ -1751,8 +1790,10 @@ process_op(struct event_format *event, struct print_arg *arg, char **tok) type == EVENT_DELIM && (strcmp(token, ")") == 0)) { char *new_atom; - if (left->type != PRINT_ATOM) - die("bad pointer type"); + if (left->type != PRINT_ATOM) { + do_warning("bad pointer type"); + goto out_free; + } new_atom = realloc(left->atom.atom, strlen(left->atom.atom) + 3); if (!new_atom) @@ -1870,7 +1911,11 @@ eval_type_str(unsigned long long val, const char *type, int pointer) return val; } - ref = malloc_or_die(len); + ref = malloc(len); + if (!ref) { + do_warning("%s: not enough memory!", __func__); + return val; + } memcpy(ref, type, len); /* chop off the " *" */ @@ -1947,8 +1992,10 @@ eval_type_str(unsigned long long val, const char *type, int pointer) static unsigned long long eval_type(unsigned long long val, struct print_arg *arg, int pointer) { - if (arg->type != PRINT_TYPE) - die("expected type argument"); + if (arg->type != PRINT_TYPE) { + do_warning("expected type argument"); + return 0; + } return eval_type_str(val, arg->typecast.type, pointer); } @@ -2133,7 +2180,7 @@ static char *arg_eval (struct print_arg *arg) case PRINT_STRING: case PRINT_BSTRING: default: - die("invalid eval type %d", arg->type); + do_warning("invalid eval type %d", arg->type); break; } @@ -2431,8 +2478,10 @@ process_paren(struct event_format *event, struct print_arg *arg, char **tok) /* make this a typecast and contine */ /* prevous must be an atom */ - if (arg->type != PRINT_ATOM) - die("previous needed to be PRINT_ATOM"); + if (arg->type != PRINT_ATOM) { + do_warning("previous needed to be PRINT_ATOM"); + goto out_free; + } item_arg = alloc_arg(); @@ -2674,7 +2723,8 @@ process_arg_token(struct event_format *event, struct print_arg *arg, case EVENT_ERROR ... EVENT_NEWLINE: default: - die("unexpected type %d", type); + do_warning("unexpected type %d", type); + return EVENT_ERROR; } *tok = token; @@ -2921,8 +2971,10 @@ static int get_common_info(struct pevent *pevent, * All events should have the same common elements. * Pick any event to find where the type is; */ - if (!pevent->events) - die("no event_list!"); + if (!pevent->events) { + do_warning("no event_list!"); + return -1; + } event = pevent->events[0]; field = pevent_find_common_field(event, type); @@ -3080,7 +3132,8 @@ eval_num_arg(void *data, int size, struct event_format *event, struct print_arg if (!arg->field.field) { arg->field.field = pevent_find_any_field(event, arg->field.name); if (!arg->field.field) - die("field %s not found", arg->field.name); + goto out_warning_field; + } /* must be a number */ val = pevent_read_number(pevent, data + arg->field.field->offset, @@ -3141,8 +3194,10 @@ eval_num_arg(void *data, int size, struct event_format *event, struct print_arg if (!larg->field.field) { larg->field.field = pevent_find_any_field(event, larg->field.name); - if (!larg->field.field) - die("field %s not found", larg->field.name); + if (!larg->field.field) { + arg = larg; + goto out_warning_field; + } } field_size = larg->field.field->elementsize; offset = larg->field.field->offset + @@ -3178,7 +3233,7 @@ eval_num_arg(void *data, int size, struct event_format *event, struct print_arg val = left != right; break; default: - die("unknown op '%s'", arg->op.op); + goto out_warning_op; } break; case '~': @@ -3208,7 +3263,7 @@ eval_num_arg(void *data, int size, struct event_format *event, struct print_arg val = left <= right; break; default: - die("unknown op '%s'", arg->op.op); + goto out_warning_op; } break; case '>': @@ -3223,12 +3278,13 @@ eval_num_arg(void *data, int size, struct event_format *event, struct print_arg val = left >= right; break; default: - die("unknown op '%s'", arg->op.op); + goto out_warning_op; } break; case '=': if (arg->op.op[1] != '=') - die("unknown op '%s'", arg->op.op); + goto out_warning_op; + val = left == right; break; case '-': @@ -3244,13 +3300,21 @@ eval_num_arg(void *data, int size, struct event_format *event, struct print_arg val = left * right; break; default: - die("unknown op '%s'", arg->op.op); + goto out_warning_op; } break; default: /* not sure what to do there */ return 0; } return val; + +out_warning_op: + do_warning("%s: unknown op '%s'", __func__, arg->op.op); + return 0; + +out_warning_field: + do_warning("%s: field %s not found", __func__, arg->field.name); + return 0; } struct flag { @@ -3327,8 +3391,10 @@ static void print_str_arg(struct trace_seq *s, void *data, int size, field = arg->field.field; if (!field) { field = pevent_find_any_field(event, arg->field.name); - if (!field) - die("field %s not found", arg->field.name); + if (!field) { + str = arg->field.name; + goto out_warning_field; + } arg->field.field = field; } /* Zero sized fields, mean the rest of the data */ @@ -3345,7 +3411,11 @@ static void print_str_arg(struct trace_seq *s, void *data, int size, trace_seq_printf(s, "%lx", addr); break; } - str = malloc_or_die(len + 1); + str = malloc(len + 1); + if (!str) { + do_warning("%s: not enough memory!", __func__); + return; + } memcpy(str, data + field->offset, len); str[len] = 0; print_str_to_seq(s, format, len_arg, str); @@ -3385,7 +3455,7 @@ static void print_str_arg(struct trace_seq *s, void *data, int size, str = arg->hex.field->field.name; field = pevent_find_any_field(event, str); if (!field) - die("field %s not found", str); + goto out_warning_field; arg->hex.field->field.field = field; } hex = data + field->offset; @@ -3437,6 +3507,11 @@ static void print_str_arg(struct trace_seq *s, void *data, int size, /* well... */ break; } + + return; + +out_warning_field: + do_warning("%s: field %s not found", __func__, arg->field.name); } static unsigned long long @@ -3463,7 +3538,11 @@ process_defined_func(struct trace_seq *s, void *data, int size, farg = arg->func.args; param = func_handle->params; - args = malloc_or_die(sizeof(*args) * func_handle->nr_args); + ret = ULLONG_MAX; + args = malloc(sizeof(*args) * func_handle->nr_args); + if (!args) + goto out; + for (i = 0; i < func_handle->nr_args; i++) { switch (param->type) { case PEVENT_FUNC_ARG_INT: @@ -3475,12 +3554,18 @@ process_defined_func(struct trace_seq *s, void *data, int size, trace_seq_init(&str); print_str_arg(&str, data, size, event, "%s", -1, farg); trace_seq_terminate(&str); - string = malloc_or_die(sizeof(*string)); + string = malloc(sizeof(*string)); + if (!string) { + do_warning("%s(%d): malloc str", __func__, __LINE__); + goto out_free; + } string->next = strings; string->str = strdup(str.buffer); - if (!string->str) - die("malloc str"); - + if (!string->str) { + free(string); + do_warning("%s(%d): malloc str", __func__, __LINE__); + goto out_free; + } args[i] = (uintptr_t)string->str; strings = string; trace_seq_destroy(&str); @@ -3490,14 +3575,15 @@ process_defined_func(struct trace_seq *s, void *data, int size, * Something went totally wrong, this is not * an input error, something in this code broke. */ - die("Unexpected end of arguments\n"); - break; + do_warning("Unexpected end of arguments\n"); + goto out_free; } farg = farg->next; param = param->next; } ret = (*func_handle->func)(s, args); +out_free: free(args); while (strings) { string = strings; @@ -3538,11 +3624,15 @@ static struct print_arg *make_bprint_args(char *fmt, void *data, int size, struc if (!field) { field = pevent_find_field(event, "buf"); - if (!field) - die("can't find buffer field for binary printk"); + if (!field) { + do_warning("can't find buffer field for binary printk"); + return NULL; + } ip_field = pevent_find_field(event, "ip"); - if (!ip_field) - die("can't find ip field for binary printk"); + if (!ip_field) { + do_warning("can't find ip field for binary printk"); + return NULL; + } pevent->bprint_buf_field = field; pevent->bprint_ip_field = ip_field; } @@ -3637,7 +3727,7 @@ static struct print_arg *make_bprint_args(char *fmt, void *data, int size, struc arg->type = PRINT_BSTRING; arg->string.string = strdup(bptr); if (!arg->string.string) - break; + goto out_free; bptr += strlen(bptr) + 1; *next = arg; next = &arg->next; @@ -3669,8 +3759,10 @@ get_bprint_format(void *data, int size __maybe_unused, if (!field) { field = pevent_find_field(event, "fmt"); - if (!field) - die("can't find format field for binary printk"); + if (!field) { + do_warning("can't find format field for binary printk"); + return NULL; + } pevent->bprint_fmt_field = field; } @@ -3723,8 +3815,11 @@ static void print_mac_arg(struct trace_seq *s, int mac, void *data, int size, if (!arg->field.field) { arg->field.field = pevent_find_any_field(event, arg->field.name); - if (!arg->field.field) - die("field %s not found", arg->field.name); + if (!arg->field.field) { + do_warning("%s: field %s not found", + __func__, arg->field.name); + return; + } } if (arg->field.field->size != 6) { trace_seq_printf(s, "INVALIDMAC"); @@ -4380,7 +4475,10 @@ get_event_fields(const char *type, const char *name, struct format_field *field; int i = 0; - fields = malloc_or_die(sizeof(*fields) * (count + 1)); + fields = malloc(sizeof(*fields) * (count + 1)); + if (!fields) + return NULL; + for (field = list; field; field = field->next) { fields[i++] = field; if (i == count + 1) { @@ -4775,7 +4873,8 @@ enum pevent_errno pevent_parse_event(struct pevent *pevent, const char *buf, } show_warning = 1; - add_event(pevent, event); + if (add_event(pevent, event)) + goto event_alloc_failed; if (!ret && (event->flags & EVENT_FL_ISFTRACE)) { struct format_field *field; @@ -4808,7 +4907,9 @@ enum pevent_errno pevent_parse_event(struct pevent *pevent, const char *buf, event_parse_failed: event->flags |= EVENT_FL_FAILED; /* still add it even if it failed */ - add_event(pevent, event); + if (add_event(pevent, event)) + goto event_alloc_failed; + return ret; event_alloc_failed: From 2b29175d2b212d88f100c8819aaea097be61e062 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 18 Sep 2012 11:13:15 -0300 Subject: [PATCH 259/282] tools lib traceevent: Carve out events format parsing routine The pevent_parse_event() routine will parse a events/sys/tp/format file and add an event_format instance to the pevent struct. This patch introduces a pevent_parse_format() routine with just the bits needed to parse the event/sys/tp/format file and just return the event_format instance, useful for when all we want is to parse the format file, without requiring the pevent struct. Acked-by: Steven Rostedt Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Steven Rostedt Link: http://lkml.kernel.org/n/tip-lge0afl47arh86om0m6a5bqr@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/traceevent/event-parse.c | 98 ++++++++++++++++++++++-------- tools/lib/traceevent/event-parse.h | 3 + 2 files changed, 76 insertions(+), 25 deletions(-) diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c index b3bc13079c4..1fa71caf295 100644 --- a/tools/lib/traceevent/event-parse.c +++ b/tools/lib/traceevent/event-parse.c @@ -4794,8 +4794,7 @@ static int find_event_handle(struct pevent *pevent, struct event_format *event) } /** - * pevent_parse_event - parse the event format - * @pevent: the handle to the pevent + * __pevent_parse_format - parse the event format * @buf: the buffer storing the event format string * @size: the size of @buf * @sys: the system the event belongs to @@ -4807,15 +4806,16 @@ static int find_event_handle(struct pevent *pevent, struct event_format *event) * * /sys/kernel/debug/tracing/events/.../.../format */ -enum pevent_errno pevent_parse_event(struct pevent *pevent, const char *buf, - unsigned long size, const char *sys) +enum pevent_errno __pevent_parse_format(struct event_format **eventp, + struct pevent *pevent, const char *buf, + unsigned long size, const char *sys) { struct event_format *event; int ret; init_input_buf(buf, size); - event = alloc_event(); + *eventp = event = alloc_event(); if (!event) return PEVENT_ERRNO__MEM_ALLOC_FAILED; @@ -4849,9 +4849,6 @@ enum pevent_errno pevent_parse_event(struct pevent *pevent, const char *buf, goto event_alloc_failed; } - /* Add pevent to event so that it can be referenced */ - event->pevent = pevent; - ret = event_read_format(event); if (ret < 0) { ret = PEVENT_ERRNO__READ_FORMAT_FAILED; @@ -4862,19 +4859,16 @@ enum pevent_errno pevent_parse_event(struct pevent *pevent, const char *buf, * If the event has an override, don't print warnings if the event * print format fails to parse. */ - if (find_event_handle(pevent, event)) + if (pevent && find_event_handle(pevent, event)) show_warning = 0; ret = event_read_print(event); + show_warning = 1; + if (ret < 0) { - show_warning = 1; ret = PEVENT_ERRNO__READ_PRINT_FAILED; goto event_parse_failed; } - show_warning = 1; - - if (add_event(pevent, event)) - goto event_alloc_failed; if (!ret && (event->flags & EVENT_FL_ISFTRACE)) { struct format_field *field; @@ -4898,21 +4892,75 @@ enum pevent_errno pevent_parse_event(struct pevent *pevent, const char *buf, return 0; } + return 0; + + event_parse_failed: + event->flags |= EVENT_FL_FAILED; + return ret; + + event_alloc_failed: + free(event->system); + free(event->name); + free(event); + *eventp = NULL; + return ret; +} + +/** + * pevent_parse_format - parse the event format + * @buf: the buffer storing the event format string + * @size: the size of @buf + * @sys: the system the event belongs to + * + * This parses the event format and creates an event structure + * to quickly parse raw data for a given event. + * + * These files currently come from: + * + * /sys/kernel/debug/tracing/events/.../.../format + */ +enum pevent_errno pevent_parse_format(struct event_format **eventp, const char *buf, + unsigned long size, const char *sys) +{ + return __pevent_parse_format(eventp, NULL, buf, size, sys); +} + +/** + * pevent_parse_event - parse the event format + * @pevent: the handle to the pevent + * @buf: the buffer storing the event format string + * @size: the size of @buf + * @sys: the system the event belongs to + * + * This parses the event format and creates an event structure + * to quickly parse raw data for a given event. + * + * These files currently come from: + * + * /sys/kernel/debug/tracing/events/.../.../format + */ +enum pevent_errno pevent_parse_event(struct pevent *pevent, const char *buf, + unsigned long size, const char *sys) +{ + struct event_format *event = NULL; + int ret = __pevent_parse_format(&event, pevent, buf, size, sys); + + if (event == NULL) + return ret; + + /* Add pevent to event so that it can be referenced */ + event->pevent = pevent; + + if (add_event(pevent, event)) + goto event_add_failed; + #define PRINT_ARGS 0 if (PRINT_ARGS && event->print_fmt.args) print_args(event->print_fmt.args); return 0; - event_parse_failed: - event->flags |= EVENT_FL_FAILED; - /* still add it even if it failed */ - if (add_event(pevent, event)) - goto event_alloc_failed; - - return ret; - - event_alloc_failed: +event_add_failed: free(event->system); free(event->name); free(event); @@ -5365,7 +5413,7 @@ static void free_formats(struct format *format) free_format_fields(format->fields); } -static void free_event(struct event_format *event) +void pevent_free_format(struct event_format *event) { free(event->name); free(event->system); @@ -5451,7 +5499,7 @@ void pevent_free(struct pevent *pevent) } for (i = 0; i < pevent->nr_events; i++) - free_event(pevent->events[i]); + pevent_free_format(pevent->events[i]); while (pevent->handlers) { handle = pevent->handlers; diff --git a/tools/lib/traceevent/event-parse.h b/tools/lib/traceevent/event-parse.h index a4bbe243792..24a4bbabc5d 100644 --- a/tools/lib/traceevent/event-parse.h +++ b/tools/lib/traceevent/event-parse.h @@ -540,6 +540,9 @@ int pevent_parse_header_page(struct pevent *pevent, char *buf, unsigned long siz enum pevent_errno pevent_parse_event(struct pevent *pevent, const char *buf, unsigned long size, const char *sys); +enum pevent_errno pevent_parse_format(struct event_format **eventp, const char *buf, + unsigned long size, const char *sys); +void pevent_free_format(struct event_format *event); void *pevent_get_field_raw(struct trace_seq *s, struct event_format *event, const char *name, struct pevent_record *record, From efd2b924d3af0efd360cb5b7f29ddc9bc74fce4b Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 18 Sep 2012 11:21:50 -0300 Subject: [PATCH 260/282] perf evsel: Provide a new constructor for tracepoints The existing constructor receives a perf_event_attr filled with the event type and the config. To reduce the boilerplate for tracepoints, provide a new constructor, perf_evsel__newtp() that receives the tracepoint name and will open the debugfs file, call into libtraceevent new pevent_parse_format file to fill its ->tp_format member, so that users can then just call perf_evsel__field() to access its fields. Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Steven Rostedt Link: http://lkml.kernel.org/n/tip-6du8dl1hz0y5l4cybodye7hn@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evsel.c | 88 ++++++++++++++++++++++++++++++++++++++--- tools/perf/util/evsel.h | 5 +++ 2 files changed, 87 insertions(+), 6 deletions(-) diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 1506ba0453f..00936ad29ff 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -10,6 +10,7 @@ #include #include #include "asm/bug.h" +#include "debugfs.h" #include "event-parse.h" #include "evsel.h" #include "evlist.h" @@ -69,6 +70,72 @@ struct perf_evsel *perf_evsel__new(struct perf_event_attr *attr, int idx) return evsel; } +static struct event_format *event_format__new(const char *sys, const char *name) +{ + int fd, n; + char *filename; + void *bf = NULL, *nbf; + size_t size = 0, alloc_size = 0; + struct event_format *format = NULL; + + if (asprintf(&filename, "%s/%s/%s/format", tracing_events_path, sys, name) < 0) + goto out; + + fd = open(filename, O_RDONLY); + if (fd < 0) + goto out_free_filename; + + do { + if (size == alloc_size) { + alloc_size += BUFSIZ; + nbf = realloc(bf, alloc_size); + if (nbf == NULL) + goto out_free_bf; + bf = nbf; + } + + n = read(fd, bf + size, BUFSIZ); + if (n < 0) + goto out_free_bf; + size += n; + } while (n > 0); + + pevent_parse_format(&format, bf, size, sys); + +out_free_bf: + free(bf); + close(fd); +out_free_filename: + free(filename); +out: + return format; +} + +struct perf_evsel *perf_evsel__newtp(const char *sys, const char *name, int idx) +{ + struct perf_evsel *evsel = zalloc(sizeof(*evsel)); + + if (evsel != NULL) { + struct perf_event_attr attr = { + .type = PERF_TYPE_TRACEPOINT, + }; + + evsel->tp_format = event_format__new(sys, name); + if (evsel->tp_format == NULL) + goto out_free; + + attr.config = evsel->tp_format->id; + perf_evsel__init(evsel, &attr, idx); + evsel->name = evsel->tp_format->name; + } + + return evsel; + +out_free: + free(evsel); + return NULL; +} + const char *perf_evsel__hw_names[PERF_COUNT_HW_MAX] = { "cycles", "instructions", @@ -495,6 +562,10 @@ void perf_evsel__delete(struct perf_evsel *evsel) perf_evsel__exit(evsel); close_cgroup(evsel->cgrp); free(evsel->group_name); + if (evsel->tp_format && evsel->name == evsel->tp_format->name) { + evsel->name = NULL; + pevent_free_format(evsel->tp_format); + } free(evsel->name); free(evsel); } @@ -1002,14 +1073,19 @@ int perf_event__synthesize_sample(union perf_event *event, u64 type, return 0; } +struct format_field *perf_evsel__field(struct perf_evsel *evsel, const char *name) +{ + return pevent_find_field(evsel->tp_format, name); +} + char *perf_evsel__strval(struct perf_evsel *evsel, struct perf_sample *sample, const char *name) { - struct format_field *field = pevent_find_field(evsel->tp_format, name); + struct format_field *field = perf_evsel__field(evsel, name); int offset; - if (!field) - return NULL; + if (!field) + return NULL; offset = field->offset; @@ -1024,11 +1100,11 @@ char *perf_evsel__strval(struct perf_evsel *evsel, struct perf_sample *sample, u64 perf_evsel__intval(struct perf_evsel *evsel, struct perf_sample *sample, const char *name) { - struct format_field *field = pevent_find_field(evsel->tp_format, name); + struct format_field *field = perf_evsel__field(evsel, name); u64 val; - if (!field) - return 0; + if (!field) + return 0; val = pevent_read_number(evsel->tp_format->pevent, sample->raw_data + field->offset, field->size); diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 93876bad2e5..bb445d1cbc7 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -81,6 +81,7 @@ struct perf_evlist; struct perf_record_opts; struct perf_evsel *perf_evsel__new(struct perf_event_attr *attr, int idx); +struct perf_evsel *perf_evsel__newtp(const char *sys, const char *name, int idx); void perf_evsel__init(struct perf_evsel *evsel, struct perf_event_attr *attr, int idx); void perf_evsel__exit(struct perf_evsel *evsel); @@ -128,6 +129,10 @@ char *perf_evsel__strval(struct perf_evsel *evsel, struct perf_sample *sample, u64 perf_evsel__intval(struct perf_evsel *evsel, struct perf_sample *sample, const char *name); +struct format_field; + +struct format_field *perf_evsel__field(struct perf_evsel *evsel, const char *name); + #define perf_evsel__match(evsel, t, c) \ (evsel->attr.type == PERF_TYPE_##t && \ evsel->attr.config == PERF_COUNT_##c) From 6a6cd11d4e5793ce1a2fb88dc7a09dbf43f9c618 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 18 Sep 2012 11:56:28 -0300 Subject: [PATCH 261/282] perf test: Add test for the sched tracepoint format fields So that we make sure the routines that do event format parsing are working on at least two well know scheduler tracepoints. Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-g3rm9b3wtim4djx3z8dkftrj@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-test.c | 86 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 86 insertions(+) diff --git a/tools/perf/builtin-test.c b/tools/perf/builtin-test.c index 4aed1553db5..32caf13cfe0 100644 --- a/tools/perf/builtin-test.c +++ b/tools/perf/builtin-test.c @@ -14,6 +14,7 @@ #include "util/symbol.h" #include "util/thread_map.h" #include "util/pmu.h" +#include "event-parse.h" #include "../../include/linux/hw_breakpoint.h" #include @@ -1207,6 +1208,87 @@ static int perf_evsel__roundtrip_name_test(void) return ret; } +static int perf_evsel__test_field(struct perf_evsel *evsel, const char *name, + int size, bool should_be_signed) +{ + struct format_field *field = perf_evsel__field(evsel, name); + int is_signed; + int ret = 0; + + if (field == NULL) { + pr_debug("%s: \"%s\" field not found!\n", evsel->name, name); + return -1; + } + + is_signed = !!(field->flags | FIELD_IS_SIGNED); + if (should_be_signed && !is_signed) { + pr_debug("%s: \"%s\" signedness(%d) is wrong, should be %d\n", + evsel->name, name, is_signed, should_be_signed); + ret = -1; + } + + if (field->size != size) { + pr_debug("%s: \"%s\" size (%d) should be %d!\n", + evsel->name, name, field->size, size); + ret = -1; + } + + return 0; +} + +static int perf_evsel__tp_sched_test(void) +{ + struct perf_evsel *evsel = perf_evsel__newtp("sched", "sched_switch", 0); + int ret = 0; + + if (evsel == NULL) { + pr_debug("perf_evsel__new\n"); + return -1; + } + + if (perf_evsel__test_field(evsel, "prev_comm", 16, true)) + ret = -1; + + if (perf_evsel__test_field(evsel, "prev_pid", 4, true)) + ret = -1; + + if (perf_evsel__test_field(evsel, "prev_prio", 4, true)) + ret = -1; + + if (perf_evsel__test_field(evsel, "prev_state", 8, true)) + ret = -1; + + if (perf_evsel__test_field(evsel, "next_comm", 16, true)) + ret = -1; + + if (perf_evsel__test_field(evsel, "next_pid", 4, true)) + ret = -1; + + if (perf_evsel__test_field(evsel, "next_prio", 4, true)) + ret = -1; + + perf_evsel__delete(evsel); + + evsel = perf_evsel__newtp("sched", "sched_wakeup", 0); + + if (perf_evsel__test_field(evsel, "comm", 16, true)) + ret = -1; + + if (perf_evsel__test_field(evsel, "pid", 4, true)) + ret = -1; + + if (perf_evsel__test_field(evsel, "prio", 4, true)) + ret = -1; + + if (perf_evsel__test_field(evsel, "success", 4, true)) + ret = -1; + + if (perf_evsel__test_field(evsel, "target_cpu", 4, true)) + ret = -1; + + return 0; +} + static struct test { const char *desc; int (*func)(void); @@ -1253,6 +1335,10 @@ static struct test { .desc = "roundtrip evsel->name check", .func = perf_evsel__roundtrip_name_test, }, + { + .desc = "Check parsing of sched tracepoints fields", + .func = perf_evsel__tp_sched_test, + }, { .func = NULL, }, From b1ac754b67b5a875d63bee880f60ccb0c6bd8899 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 20 Sep 2012 11:09:19 +0900 Subject: [PATCH 262/282] tools lib traceevent: Handle alloc_arg failure Now alloc_arg returns NULL if memory allocation failed, it should be handled on callsites properly. Signed-off-by: Namhyung Kim Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Steven Rostedt Link: http://lkml.kernel.org/r/87k3vpzbqo.fsf_-_@sejong.aot.lge.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/traceevent/event-parse.c | 97 +++++++++++++++++++++++++++++- 1 file changed, 95 insertions(+), 2 deletions(-) diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c index 1fa71caf295..17c922145e8 100644 --- a/tools/lib/traceevent/event-parse.c +++ b/tools/lib/traceevent/event-parse.c @@ -1565,6 +1565,14 @@ process_cond(struct event_format *event, struct print_arg *top, char **tok) left = alloc_arg(); right = alloc_arg(); + if (!arg || !left || !right) { + do_warning("%s: not enough memory!", __func__); + /* arg will be freed at out_free */ + free_arg(left); + free_arg(right); + goto out_free; + } + arg->type = PRINT_OP; arg->op.left = left; arg->op.right = right; @@ -1607,6 +1615,12 @@ process_array(struct event_format *event, struct print_arg *top, char **tok) char *token = NULL; arg = alloc_arg(); + if (!arg) { + do_warning("%s: not enough memory!", __func__); + /* '*tok' is set to top->op.op. No need to free. */ + *tok = NULL; + return EVENT_ERROR; + } *tok = NULL; type = process_arg(event, arg, &token); @@ -1725,10 +1739,16 @@ process_op(struct event_format *event, struct print_arg *arg, char **tok) /* make an empty left */ left = alloc_arg(); + if (!left) + goto out_warn_free; + left->type = PRINT_NULL; arg->op.left = left; right = alloc_arg(); + if (!right) + goto out_warn_free; + arg->op.right = right; /* do not free the token, it belongs to an op */ @@ -1738,6 +1758,9 @@ process_op(struct event_format *event, struct print_arg *arg, char **tok) } else if (strcmp(token, "?") == 0) { left = alloc_arg(); + if (!left) + goto out_warn_free; + /* copy the top arg to the left */ *left = *arg; @@ -1766,6 +1789,8 @@ process_op(struct event_format *event, struct print_arg *arg, char **tok) strcmp(token, "!=") == 0) { left = alloc_arg(); + if (!left) + goto out_warn_free; /* copy the top arg to the left */ *left = *arg; @@ -1797,7 +1822,7 @@ process_op(struct event_format *event, struct print_arg *arg, char **tok) new_atom = realloc(left->atom.atom, strlen(left->atom.atom) + 3); if (!new_atom) - goto out_free; + goto out_warn_free; left->atom.atom = new_atom; strcat(left->atom.atom, " *"); @@ -1809,12 +1834,18 @@ process_op(struct event_format *event, struct print_arg *arg, char **tok) } right = alloc_arg(); + if (!right) + goto out_warn_free; + type = process_arg_token(event, right, tok, type); arg->op.right = right; } else if (strcmp(token, "[") == 0) { left = alloc_arg(); + if (!left) + goto out_warn_free; + *left = *arg; arg->type = PRINT_OP; @@ -1847,7 +1878,9 @@ process_op(struct event_format *event, struct print_arg *arg, char **tok) return type; - out_free: +out_warn_free: + do_warning("%s: not enough memory!", __func__); +out_free: free_token(token); *tok = NULL; return EVENT_ERROR; @@ -2203,6 +2236,8 @@ process_fields(struct event_format *event, struct print_flag_sym **list, char ** break; arg = alloc_arg(); + if (!arg) + goto out_free; free_token(token); type = process_arg(event, arg, &token); @@ -2229,6 +2264,8 @@ process_fields(struct event_format *event, struct print_flag_sym **list, char ** free_arg(arg); arg = alloc_arg(); + if (!arg) + goto out_free; free_token(token); type = process_arg(event, arg, &token); @@ -2275,6 +2312,10 @@ process_flags(struct event_format *event, struct print_arg *arg, char **tok) arg->type = PRINT_FLAGS; field = alloc_arg(); + if (!field) { + do_warning("%s: not enough memory!", __func__); + goto out_free; + } type = process_arg(event, field, &token); @@ -2324,6 +2365,10 @@ process_symbols(struct event_format *event, struct print_arg *arg, char **tok) arg->type = PRINT_SYMBOL; field = alloc_arg(); + if (!field) { + do_warning("%s: not enough memory!", __func__); + goto out_free; + } type = process_arg(event, field, &token); if (test_type_token(type, token, EVENT_DELIM, ",")) @@ -2358,6 +2403,11 @@ process_hex(struct event_format *event, struct print_arg *arg, char **tok) arg->type = PRINT_HEX; field = alloc_arg(); + if (!field) { + do_warning("%s: not enough memory!", __func__); + goto out_free; + } + type = process_arg(event, field, &token); if (test_type_token(type, token, EVENT_DELIM, ",")) @@ -2368,6 +2418,12 @@ process_hex(struct event_format *event, struct print_arg *arg, char **tok) free_token(token); field = alloc_arg(); + if (!field) { + do_warning("%s: not enough memory!", __func__); + *tok = NULL; + return EVENT_ERROR; + } + type = process_arg(event, field, &token); if (test_type_token(type, token, EVENT_DELIM, ")")) @@ -2425,6 +2481,12 @@ process_dynamic_array(struct event_format *event, struct print_arg *arg, char ** free_token(token); arg = alloc_arg(); + if (!field) { + do_warning("%s: not enough memory!", __func__); + *tok = NULL; + return EVENT_ERROR; + } + type = process_arg(event, arg, &token); if (type == EVENT_ERROR) goto out_free_arg; @@ -2484,6 +2546,10 @@ process_paren(struct event_format *event, struct print_arg *arg, char **tok) } item_arg = alloc_arg(); + if (!item_arg) { + do_warning("%s: not enough memory!", __func__); + goto out_free; + } arg->type = PRINT_TYPE; arg->typecast.type = arg->atom.atom; @@ -2579,6 +2645,11 @@ process_func_handler(struct event_format *event, struct pevent_function_handler next_arg = &(arg->func.args); for (i = 0; i < func->nr_args; i++) { farg = alloc_arg(); + if (!farg) { + do_warning("%s: not enough memory!", __func__); + return EVENT_ERROR; + } + type = process_arg(event, farg, &token); if (i < (func->nr_args - 1)) test = ","; @@ -2745,6 +2816,10 @@ static int event_read_print_args(struct event_format *event, struct print_arg ** } arg = alloc_arg(); + if (!arg) { + do_warning("%s: not enough memory!", __func__); + return -1; + } type = process_arg(event, arg, &token); @@ -3643,6 +3718,10 @@ static struct print_arg *make_bprint_args(char *fmt, void *data, int size, struc * The first arg is the IP pointer. */ args = alloc_arg(); + if (!args) { + do_warning("%s(%d): not enough memory!", __func__, __LINE__); + return NULL; + } arg = args; arg->next = NULL; next = &arg->next; @@ -3705,6 +3784,11 @@ static struct print_arg *make_bprint_args(char *fmt, void *data, int size, struc val = pevent_read_number(pevent, bptr, vsize); bptr += vsize; arg = alloc_arg(); + if (!arg) { + do_warning("%s(%d): not enough memory!", + __func__, __LINE__); + goto out_free; + } arg->next = NULL; arg->type = PRINT_ATOM; if (asprintf(&arg->atom.atom, "%lld", val) < 0) { @@ -3723,6 +3807,11 @@ static struct print_arg *make_bprint_args(char *fmt, void *data, int size, struc break; case 's': arg = alloc_arg(); + if (!arg) { + do_warning("%s(%d): not enough memory!", + __func__, __LINE__); + goto out_free; + } arg->next = NULL; arg->type = PRINT_BSTRING; arg->string.string = strdup(bptr); @@ -4878,6 +4967,10 @@ enum pevent_errno __pevent_parse_format(struct event_format **eventp, list = &event->print_fmt.args; for (field = event->format.fields; field; field = field->next) { arg = alloc_arg(); + if (!arg) { + event->flags |= EVENT_FL_FAILED; + return PEVENT_ERRNO__OLD_FTRACE_ARG_FAILED; + } arg->type = PRINT_FIELD; arg->field.name = strdup(field->name); if (!arg->field.name) { From 5224c3a31549f1c056039545b289e1b01ed02f12 Mon Sep 17 00:00:00 2001 From: Mandeep Singh Baines Date: Fri, 7 Sep 2012 18:12:19 -0700 Subject: [PATCH 263/282] tracing: Add an option for disabling markers In our application, we have trace markers spread through user-space. We have markers in GL, X, etc. These are super handy for Chrome's about:tracing feature (Chrome + system + kernel trace view), but can be very distracting when you're trying to debug a kernel issue. I normally, use "grep -v tracing_mark_write" but it would be nice if I could just temporarily disable markers all together. Link: http://lkml.kernel.org/r/1347066739-26285-1-git-send-email-msb@chromium.org CC: Frederic Weisbecker Signed-off-by: Mandeep Singh Baines Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 6 +++++- kernel/trace/trace.h | 1 + 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 08acf42e325..1ec5c1dab62 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -328,7 +328,7 @@ static DECLARE_WAIT_QUEUE_HEAD(trace_wait); unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK | TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | TRACE_ITER_SLEEP_TIME | TRACE_ITER_GRAPH_TIME | TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE | - TRACE_ITER_IRQ_INFO; + TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS; static int trace_stop_count; static DEFINE_RAW_SPINLOCK(tracing_start_lock); @@ -470,6 +470,7 @@ static const char *trace_options[] = { "overwrite", "disable_on_free", "irq-info", + "markers", NULL }; @@ -3886,6 +3887,9 @@ tracing_mark_write(struct file *filp, const char __user *ubuf, if (tracing_disabled) return -EINVAL; + if (!(trace_flags & TRACE_ITER_MARKERS)) + return -EINVAL; + if (cnt > TRACE_BUF_SIZE) cnt = TRACE_BUF_SIZE; diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 593debefc4e..63a2da0b9a6 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -680,6 +680,7 @@ enum trace_iterator_flags { TRACE_ITER_OVERWRITE = 0x200000, TRACE_ITER_STOP_ON_FREE = 0x400000, TRACE_ITER_IRQ_INFO = 0x800000, + TRACE_ITER_MARKERS = 0x1000000, }; /* From 8781915ad2716adcd8cd5cc52cee791fc8b00fdf Mon Sep 17 00:00:00 2001 From: Ezequiel Garcia Date: Wed, 12 Sep 2012 11:47:57 -0300 Subject: [PATCH 264/282] trace: Move trace event enable from fs_initcall to core_initcall This patch splits trace event initialization in two stages: * ftrace enable * sysfs event entry creation This allows to capture trace events from an earlier point by using 'trace_event' kernel parameter and is important to trace boot-up allocations. Note that, in order to enable events at core_initcall, it's necessary to move init_ftrace_syscalls() from core_initcall to early_initcall. Link: http://lkml.kernel.org/r/1347461277-25302-1-git-send-email-elezegarcia@gmail.com Signed-off-by: Ezequiel Garcia Signed-off-by: Steven Rostedt --- kernel/trace/trace_events.c | 108 ++++++++++++++++++++++------------ kernel/trace/trace_syscalls.c | 2 +- 2 files changed, 73 insertions(+), 37 deletions(-) diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index bbb0e63d78e..d608d09d08c 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -1199,6 +1199,31 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events, return 0; } +static void event_remove(struct ftrace_event_call *call) +{ + ftrace_event_enable_disable(call, 0); + if (call->event.funcs) + __unregister_ftrace_event(&call->event); + list_del(&call->list); +} + +static int event_init(struct ftrace_event_call *call) +{ + int ret = 0; + + if (WARN_ON(!call->name)) + return -EINVAL; + + if (call->class->raw_init) { + ret = call->class->raw_init(call); + if (ret < 0 && ret != -ENOSYS) + pr_warn("Could not initialize trace events/%s\n", + call->name); + } + + return ret; +} + static int __trace_add_event_call(struct ftrace_event_call *call, struct module *mod, const struct file_operations *id, @@ -1209,19 +1234,9 @@ __trace_add_event_call(struct ftrace_event_call *call, struct module *mod, struct dentry *d_events; int ret; - /* The linker may leave blanks */ - if (!call->name) - return -EINVAL; - - if (call->class->raw_init) { - ret = call->class->raw_init(call); - if (ret < 0) { - if (ret != -ENOSYS) - pr_warning("Could not initialize trace events/%s\n", - call->name); - return ret; - } - } + ret = event_init(call); + if (ret < 0) + return ret; d_events = event_trace_events_dir(); if (!d_events) @@ -1272,13 +1287,10 @@ static void remove_subsystem_dir(const char *name) */ static void __trace_remove_event_call(struct ftrace_event_call *call) { - ftrace_event_enable_disable(call, 0); - if (call->event.funcs) - __unregister_ftrace_event(&call->event); - debugfs_remove_recursive(call->dir); - list_del(&call->list); + event_remove(call); trace_destroy_fields(call); destroy_preds(call); + debugfs_remove_recursive(call->dir); remove_subsystem_dir(call->class->system); } @@ -1450,15 +1462,43 @@ static __init int setup_trace_event(char *str) } __setup("trace_event=", setup_trace_event); +static __init int event_trace_enable(void) +{ + struct ftrace_event_call **iter, *call; + char *buf = bootup_event_buf; + char *token; + int ret; + + for_each_event(iter, __start_ftrace_events, __stop_ftrace_events) { + + call = *iter; + ret = event_init(call); + if (!ret) + list_add(&call->list, &ftrace_events); + } + + while (true) { + token = strsep(&buf, ","); + + if (!token) + break; + if (!*token) + continue; + + ret = ftrace_set_clr_event(token, 1); + if (ret) + pr_warn("Failed to enable trace event: %s\n", token); + } + return 0; +} + static __init int event_trace_init(void) { - struct ftrace_event_call **call; + struct ftrace_event_call *call; struct dentry *d_tracer; struct dentry *entry; struct dentry *d_events; int ret; - char *buf = bootup_event_buf; - char *token; d_tracer = tracing_init_dentry(); if (!d_tracer) @@ -1497,24 +1537,19 @@ static __init int event_trace_init(void) if (trace_define_common_fields()) pr_warning("tracing: Failed to allocate common fields"); - for_each_event(call, __start_ftrace_events, __stop_ftrace_events) { - __trace_add_event_call(*call, NULL, &ftrace_event_id_fops, + /* + * Early initialization already enabled ftrace event. + * Now it's only necessary to create the event directory. + */ + list_for_each_entry(call, &ftrace_events, list) { + + ret = event_create_dir(call, d_events, + &ftrace_event_id_fops, &ftrace_enable_fops, &ftrace_event_filter_fops, &ftrace_event_format_fops); - } - - while (true) { - token = strsep(&buf, ","); - - if (!token) - break; - if (!*token) - continue; - - ret = ftrace_set_clr_event(token, 1); - if (ret) - pr_warning("Failed to enable trace event: %s\n", token); + if (ret < 0) + event_remove(call); } ret = register_module_notifier(&trace_module_nb); @@ -1523,6 +1558,7 @@ static __init int event_trace_init(void) return 0; } +core_initcall(event_trace_enable); fs_initcall(event_trace_init); #ifdef CONFIG_FTRACE_STARTUP_TEST diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index 6b245f64c8d..2485a7d09b1 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -487,7 +487,7 @@ int __init init_ftrace_syscalls(void) return 0; } -core_initcall(init_ftrace_syscalls); +early_initcall(init_ftrace_syscalls); #ifdef CONFIG_PERF_EVENTS From af9da88f14cbe6882b13492b59b3363682427b4d Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 25 Sep 2012 11:20:28 +0900 Subject: [PATCH 265/282] perf test: Fix build failure MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The commit 6a6cd11d4e57 ("perf test: Add test for the sched tracepoint format fields") added following build error: CC builtin-test.o builtin-test.c: In function ‘perf_evsel__test_field’: builtin-test.c:1216:6: error: variable ‘ret’ set but not used [-Werror=unused-but-set-variable] builtin-test.c: In function ‘perf_evsel__tp_sched_test’: builtin-test.c:1242:6: error: variable ‘ret’ set but not used [-Werror=unused-but-set-variable] cc1: all warnings being treated as errors make: *** [builtin-test.o] Error 1 Signed-off-by: Namhyung Kim Cc: Ingo Molnar Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1348539628-3821-1-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-test.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/builtin-test.c b/tools/perf/builtin-test.c index 32caf13cfe0..78b47a75a7c 100644 --- a/tools/perf/builtin-test.c +++ b/tools/perf/builtin-test.c @@ -1233,7 +1233,7 @@ static int perf_evsel__test_field(struct perf_evsel *evsel, const char *name, ret = -1; } - return 0; + return ret; } static int perf_evsel__tp_sched_test(void) @@ -1286,7 +1286,7 @@ static int perf_evsel__tp_sched_test(void) if (perf_evsel__test_field(evsel, "target_cpu", 4, true)) ret = -1; - return 0; + return ret; } static struct test { From f1b2256d66fe8d613b9afcc1c16079362f9fc05c Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 25 Sep 2012 21:25:19 +0900 Subject: [PATCH 266/282] tools lib traceevent: Fix error path on pevent_parse_event If __pevent_parse_format() succeeded but add_event() failed, 'ret' didn't have a proper error code. Set it to PEVENT_ERRNO__MEM_ALLOC_FAILED. In addition, at that point 'event' also has fields and format information and they all need to be freed. Call pevent_free_format() to handle it. Signed-off-by: Namhyung Kim Cc: Ingo Molnar Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1348575919-4954-1-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/traceevent/event-parse.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c index 17c922145e8..47264b4652b 100644 --- a/tools/lib/traceevent/event-parse.c +++ b/tools/lib/traceevent/event-parse.c @@ -5044,8 +5044,10 @@ enum pevent_errno pevent_parse_event(struct pevent *pevent, const char *buf, /* Add pevent to event so that it can be referenced */ event->pevent = pevent; - if (add_event(pevent, event)) + if (add_event(pevent, event)) { + ret = PEVENT_ERRNO__MEM_ALLOC_FAILED; goto event_add_failed; + } #define PRINT_ARGS 0 if (PRINT_ARGS && event->print_fmt.args) @@ -5054,9 +5056,7 @@ enum pevent_errno pevent_parse_event(struct pevent *pevent, const char *buf, return 0; event_add_failed: - free(event->system); - free(event->name); - free(event); + pevent_free_format(event); return ret; } From 0b80f8b32f31f80e001fb6355fd86769555c4a3c Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 26 Sep 2012 12:28:26 -0300 Subject: [PATCH 267/282] perf evsel: Improve tracepoint constructor setup It needs to properly set the sample_type, sample_period and the KVM related perf_event_attr fields. Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-v9x3q9rv4caxtox7wtjpchq5@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evsel.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 00936ad29ff..2467eaf2968 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -117,14 +117,18 @@ struct perf_evsel *perf_evsel__newtp(const char *sys, const char *name, int idx) if (evsel != NULL) { struct perf_event_attr attr = { - .type = PERF_TYPE_TRACEPOINT, + .type = PERF_TYPE_TRACEPOINT, + .sample_type = (PERF_SAMPLE_RAW | PERF_SAMPLE_TIME | + PERF_SAMPLE_CPU | PERF_SAMPLE_PERIOD), }; evsel->tp_format = event_format__new(sys, name); if (evsel->tp_format == NULL) goto out_free; + event_attr_init(&attr); attr.config = evsel->tp_format->id; + attr.sample_period = 1; perf_evsel__init(evsel, &attr, idx); evsel->name = evsel->tp_format->name; } From a14bb7a6fd4a3d563ca971daf462ba4dc294a7a6 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 26 Sep 2012 12:41:14 -0300 Subject: [PATCH 268/282] perf tools: Allow handling a NULL cpu_map as meaning "all cpus" Or one with cpu_map->map[0] == -1. Reducing the boilerplate in setting up an evlist by nor requiring a cpu_map to be created at all. Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-rnaqn3dtnsfo1wlbbf3fhx00@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/cpumap.h | 11 +++++++++++ tools/perf/util/evlist.c | 12 ++++++------ 2 files changed, 17 insertions(+), 6 deletions(-) diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h index 17b5264f643..2f68a3b8c28 100644 --- a/tools/perf/util/cpumap.h +++ b/tools/perf/util/cpumap.h @@ -2,6 +2,7 @@ #define __PERF_CPUMAP_H #include +#include struct cpu_map { int nr; @@ -14,4 +15,14 @@ void cpu_map__delete(struct cpu_map *map); struct cpu_map *cpu_map__read(FILE *file); size_t cpu_map__fprintf(struct cpu_map *map, FILE *fp); +static inline int cpu_map__nr(const struct cpu_map *map) +{ + return map ? map->nr : 1; +} + +static inline bool cpu_map__all(const struct cpu_map *map) +{ + return map ? map->map[0] == -1 : true; +} + #endif /* __PERF_CPUMAP_H */ diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 892353729c7..27f6de3aab1 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -304,7 +304,7 @@ void perf_evlist__enable(struct perf_evlist *evlist) int cpu, thread; struct perf_evsel *pos; - for (cpu = 0; cpu < evlist->cpus->nr; cpu++) { + for (cpu = 0; cpu < cpu_map__nr(evlist->cpus); cpu++) { list_for_each_entry(pos, &evlist->entries, node) { for (thread = 0; thread < evlist->threads->nr; thread++) ioctl(FD(pos, cpu, thread), @@ -315,7 +315,7 @@ void perf_evlist__enable(struct perf_evlist *evlist) static int perf_evlist__alloc_pollfd(struct perf_evlist *evlist) { - int nfds = evlist->cpus->nr * evlist->threads->nr * evlist->nr_entries; + int nfds = cpu_map__nr(evlist->cpus) * evlist->threads->nr * evlist->nr_entries; evlist->pollfd = malloc(sizeof(struct pollfd) * nfds); return evlist->pollfd != NULL ? 0 : -ENOMEM; } @@ -475,8 +475,8 @@ void perf_evlist__munmap(struct perf_evlist *evlist) static int perf_evlist__alloc_mmap(struct perf_evlist *evlist) { - evlist->nr_mmaps = evlist->cpus->nr; - if (evlist->cpus->map[0] == -1) + evlist->nr_mmaps = cpu_map__nr(evlist->cpus); + if (cpu_map__all(evlist->cpus)) evlist->nr_mmaps = evlist->threads->nr; evlist->mmap = zalloc(evlist->nr_mmaps * sizeof(struct perf_mmap)); return evlist->mmap != NULL ? 0 : -ENOMEM; @@ -622,11 +622,11 @@ int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages, list_for_each_entry(evsel, &evlist->entries, node) { if ((evsel->attr.read_format & PERF_FORMAT_ID) && evsel->sample_id == NULL && - perf_evsel__alloc_id(evsel, cpus->nr, threads->nr) < 0) + perf_evsel__alloc_id(evsel, cpu_map__nr(cpus), threads->nr) < 0) return -ENOMEM; } - if (evlist->cpus->map[0] == -1) + if (cpu_map__all(cpus)) return perf_evlist__mmap_per_thread(evlist, prot, mask); return perf_evlist__mmap_per_cpu(evlist, prot, mask); From 0807d2d8a381f4fc600ad481c3e77e5cdb624eed Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 26 Sep 2012 12:48:18 -0300 Subject: [PATCH 269/282] perf evsel: Know if byte swap is needed Instead of passing it around for parsing as an explicit parameter, will help with reading tracepoint fields when not using a perf session or pevent structure, i.e. for non perf.data centered workflows. Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-qa67ikv2sm49cwa7dyjhhp6g@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-test.c | 4 ++-- tools/perf/builtin-top.c | 2 +- tools/perf/util/evlist.c | 4 ++-- tools/perf/util/evlist.h | 2 +- tools/perf/util/evsel.c | 13 ++++++++----- tools/perf/util/evsel.h | 3 ++- tools/perf/util/header.c | 6 +++++- tools/perf/util/python.c | 2 +- tools/perf/util/session.c | 6 ++---- 9 files changed, 24 insertions(+), 18 deletions(-) diff --git a/tools/perf/builtin-test.c b/tools/perf/builtin-test.c index 78b47a75a7c..2fd2c031f62 100644 --- a/tools/perf/builtin-test.c +++ b/tools/perf/builtin-test.c @@ -564,7 +564,7 @@ static int test__basic_mmap(void) goto out_munmap; } - err = perf_evlist__parse_sample(evlist, event, &sample, false); + err = perf_evlist__parse_sample(evlist, event, &sample); if (err) { pr_err("Can't parse sample, err = %d\n", err); goto out_munmap; @@ -781,7 +781,7 @@ static int test__PERF_RECORD(void) if (type < PERF_RECORD_MAX) nr_events[type]++; - err = perf_evlist__parse_sample(evlist, event, &sample, false); + err = perf_evlist__parse_sample(evlist, event, &sample); if (err < 0) { if (verbose) perf_event__fprintf(event, stderr); diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 5550754c05f..e434a16bb5a 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -823,7 +823,7 @@ static void perf_top__mmap_read_idx(struct perf_top *top, int idx) int ret; while ((event = perf_evlist__mmap_read(top->evlist, idx)) != NULL) { - ret = perf_evlist__parse_sample(top->evlist, event, &sample, false); + ret = perf_evlist__parse_sample(top->evlist, event, &sample); if (ret) { pr_err("Can't parse sample, err = %d\n", ret); continue; diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 27f6de3aab1..704bd91ce05 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -884,10 +884,10 @@ int perf_evlist__start_workload(struct perf_evlist *evlist) } int perf_evlist__parse_sample(struct perf_evlist *evlist, union perf_event *event, - struct perf_sample *sample, bool swapped) + struct perf_sample *sample) { struct perf_evsel *evsel = perf_evlist__first(evlist); - return perf_evsel__parse_sample(evsel, event, sample, swapped); + return perf_evsel__parse_sample(evsel, event, sample); } size_t perf_evlist__fprintf(struct perf_evlist *evlist, FILE *fp) diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 3f2e1e4ccdd..008a95702bf 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -125,7 +125,7 @@ bool perf_evlist__sample_id_all(struct perf_evlist *evlist); u16 perf_evlist__id_hdr_size(struct perf_evlist *evlist); int perf_evlist__parse_sample(struct perf_evlist *evlist, union perf_event *event, - struct perf_sample *sample, bool swapped); + struct perf_sample *sample); bool perf_evlist__valid_sample_type(struct perf_evlist *evlist); bool perf_evlist__valid_sample_id_all(struct perf_evlist *evlist); diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 2467eaf2968..fe9581b0323 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -767,11 +767,13 @@ int perf_evsel__open_per_thread(struct perf_evsel *evsel, return __perf_evsel__open(evsel, &empty_cpu_map.map, threads); } -static int perf_event__parse_id_sample(const union perf_event *event, u64 type, - struct perf_sample *sample, - bool swapped) +static int perf_evsel__parse_id_sample(const struct perf_evsel *evsel, + const union perf_event *event, + struct perf_sample *sample) { + u64 type = evsel->attr.sample_type; const u64 *array = event->sample.array; + bool swapped = evsel->needs_swap; union u64_swap u; array += ((event->header.size - @@ -832,10 +834,11 @@ static bool sample_overlap(const union perf_event *event, } int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event, - struct perf_sample *data, bool swapped) + struct perf_sample *data) { u64 type = evsel->attr.sample_type; u64 regs_user = evsel->attr.sample_regs_user; + bool swapped = evsel->needs_swap; const u64 *array; /* @@ -852,7 +855,7 @@ int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event, if (event->header.type != PERF_RECORD_SAMPLE) { if (!evsel->attr.sample_id_all) return 0; - return perf_event__parse_id_sample(event, type, data, swapped); + return perf_evsel__parse_id_sample(evsel, event, data); } array = event->sample.array; diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index bb445d1cbc7..60d28853748 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -69,6 +69,7 @@ struct perf_evsel { struct cpu_map *cpus; unsigned int sample_size; bool supported; + bool needs_swap; /* parse modifier helper */ int exclude_GH; struct perf_evsel *leader; @@ -205,7 +206,7 @@ static inline int perf_evsel__read_scaled(struct perf_evsel *evsel, void hists__init(struct hists *hists); int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event, - struct perf_sample *sample, bool swapped); + struct perf_sample *sample); static inline struct perf_evsel *perf_evsel__next(struct perf_evsel *evsel) { diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 6aae3290358..7daad237dea 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -1256,8 +1256,10 @@ read_event_desc(struct perf_header *ph, int fd) if (ret != (ssize_t)sizeof(nr)) goto error; - if (ph->needs_swap) + if (ph->needs_swap) { nr = bswap_32(nr); + evsel->needs_swap = true; + } evsel->name = do_read_string(fd, ph); @@ -2626,6 +2628,8 @@ int perf_session__read_header(struct perf_session *session, int fd) if (evsel == NULL) goto out_delete_evlist; + + evsel->needs_swap = header->needs_swap; /* * Do it before so that if perf_evsel__alloc_id fails, this * entry gets purged too at perf_evlist__delete(). diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c index ca85444bcfb..9181bf212fb 100644 --- a/tools/perf/util/python.c +++ b/tools/perf/util/python.c @@ -805,7 +805,7 @@ static PyObject *pyrf_evlist__read_on_cpu(struct pyrf_evlist *pevlist, if (pyevent == NULL) return PyErr_NoMemory(); - err = perf_evlist__parse_sample(evlist, event, &pevent->sample, false); + err = perf_evlist__parse_sample(evlist, event, &pevent->sample); if (err) return PyErr_Format(PyExc_OSError, "perf: can't parse sample, err=%d", err); diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 3049b0ae700..8cdd23239c9 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -722,8 +722,7 @@ static int flush_sample_queue(struct perf_session *s, if (iter->timestamp > limit) break; - ret = perf_evlist__parse_sample(s->evlist, iter->event, &sample, - s->header.needs_swap); + ret = perf_evlist__parse_sample(s->evlist, iter->event, &sample); if (ret) pr_err("Can't parse sample, err = %d\n", ret); else { @@ -1174,8 +1173,7 @@ static int perf_session__process_event(struct perf_session *session, /* * For all kernel events we get the sample data */ - ret = perf_evlist__parse_sample(session->evlist, event, &sample, - session->header.needs_swap); + ret = perf_evlist__parse_sample(session->evlist, event, &sample); if (ret) return ret; From e6b6f6795265ec19ff35572f527bb74c07ff9399 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 26 Sep 2012 13:13:04 -0300 Subject: [PATCH 270/282] perf evsel: Handle endianity in intval method We were relying on the info in pevent, but since we have it in perf_evsel, set up by the perf_session routine if read from a perf.data file or by whoever creates the evsels, use it. New 'perf test' entries will use it to parse locally generated events, in a non perf.data centered workflow. As well as use byteswap.h to get per arch optimized swap routines, like other parts of perf (header, perf_evsel__parse_sample, symbol, etc) already do. Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-8tjuxk09mlsfmh7macgkxsip@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evsel.c | 38 ++++++++++++++++++++++++++++++++++---- 1 file changed, 34 insertions(+), 4 deletions(-) diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index fe9581b0323..c78e42ab977 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -1108,13 +1108,43 @@ u64 perf_evsel__intval(struct perf_evsel *evsel, struct perf_sample *sample, const char *name) { struct format_field *field = perf_evsel__field(evsel, name); - u64 val; + void *ptr; + u64 value; if (!field) return 0; - val = pevent_read_number(evsel->tp_format->pevent, - sample->raw_data + field->offset, field->size); - return val; + ptr = sample->raw_data + field->offset; + switch (field->size) { + case 1: + return *(u8 *)ptr; + case 2: + value = *(u16 *)ptr; + break; + case 4: + value = *(u32 *)ptr; + break; + case 8: + value = *(u64 *)ptr; + break; + default: + return 0; + } + + if (!evsel->needs_swap) + return value; + + switch (field->size) { + case 2: + return bswap_16(value); + case 4: + return bswap_32(value); + case 8: + return bswap_64(value); + default: + return 0; + } + + return 0; } From eb2f270338461a4de659a21946739748160c0816 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 26 Sep 2012 13:23:10 -0300 Subject: [PATCH 271/282] perf test: Add test to check we correctly parse and match syscall open parms It will set up a syscall open tracepoint event, generate an open with invalid flags, then check those flags were the ones reported in the tracepoint fired. For the filename we need vfs:getname, but that will go thru some more iterations as the vfs getname codebase is going thru changes lately. When that is in I'll just check that the perf_evsel__newtp constructor is not bailing out and then add it to the evlist, catch the event and check the filename against the one used in the 'open' call used to trigger the event. Cc: David Ahern Cc: Eric Paris Cc: Frederic Weisbecker Cc: Jeff Layton Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-p5w9aq0jcbb91ghzqomowm16@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-test.c | 116 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 116 insertions(+) diff --git a/tools/perf/builtin-test.c b/tools/perf/builtin-test.c index 2fd2c031f62..484f26cc0c0 100644 --- a/tools/perf/builtin-test.c +++ b/tools/perf/builtin-test.c @@ -1289,6 +1289,118 @@ static int perf_evsel__tp_sched_test(void) return ret; } +static int test__syscall_open_tp_fields(void) +{ + struct perf_record_opts opts = { + .target = { + .uid = UINT_MAX, + .uses_mmap = true, + }, + .no_delay = true, + .freq = 1, + .mmap_pages = 256, + .raw_samples = true, + }; + const char *filename = "/etc/passwd"; + int flags = O_RDONLY | O_DIRECTORY; + struct perf_evlist *evlist = perf_evlist__new(NULL, NULL); + struct perf_evsel *evsel; + int err = -1, i, nr_events = 0, nr_polls = 0; + + if (evlist == NULL) { + pr_debug("%s: perf_evlist__new\n", __func__); + goto out; + } + + evsel = perf_evsel__newtp("syscalls", "sys_enter_open", 0); + if (evsel == NULL) { + pr_debug("%s: perf_evsel__newtp\n", __func__); + goto out_delete_evlist; + } + + perf_evlist__add(evlist, evsel); + + err = perf_evlist__create_maps(evlist, &opts.target); + if (err < 0) { + pr_debug("%s: perf_evlist__create_maps\n", __func__); + goto out_delete_evlist; + } + + perf_evsel__config(evsel, &opts, evsel); + + evlist->threads->map[0] = getpid(); + + err = perf_evlist__open(evlist); + if (err < 0) { + pr_debug("perf_evlist__open: %s\n", strerror(errno)); + goto out_delete_evlist; + } + + err = perf_evlist__mmap(evlist, UINT_MAX, false); + if (err < 0) { + pr_debug("perf_evlist__mmap: %s\n", strerror(errno)); + goto out_delete_evlist; + } + + perf_evlist__enable(evlist); + + /* + * Generate the event: + */ + open(filename, flags); + + while (1) { + int before = nr_events; + + for (i = 0; i < evlist->nr_mmaps; i++) { + union perf_event *event; + + while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) { + const u32 type = event->header.type; + int tp_flags; + struct perf_sample sample; + + ++nr_events; + + if (type != PERF_RECORD_SAMPLE) + continue; + + err = perf_evsel__parse_sample(evsel, event, &sample); + if (err) { + pr_err("Can't parse sample, err = %d\n", err); + goto out_munmap; + } + + tp_flags = perf_evsel__intval(evsel, &sample, "flags"); + + if (flags != tp_flags) { + pr_debug("%s: Expected flags=%#x, got %#x\n", + __func__, flags, tp_flags); + goto out_munmap; + } + + goto out_ok; + } + } + + if (nr_events == before) + poll(evlist->pollfd, evlist->nr_fds, 10); + + if (++nr_polls > 5) { + pr_debug("%s: no events!\n", __func__); + goto out_munmap; + } + } +out_ok: + err = 0; +out_munmap: + perf_evlist__munmap(evlist); +out_delete_evlist: + perf_evlist__delete(evlist); +out: + return err; +} + static struct test { const char *desc; int (*func)(void); @@ -1339,6 +1451,10 @@ static struct test { .desc = "Check parsing of sched tracepoints fields", .func = perf_evsel__tp_sched_test, }, + { + .desc = "Generate and check syscalls:sys_enter_open event fields", + .func = test__syscall_open_tp_fields, + }, { .func = NULL, }, From 1491a63218d0fd764e7fab13aa6b82164bfc14d6 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 26 Sep 2012 14:43:13 -0300 Subject: [PATCH 272/282] perf evlist: Renane set_filters method to apply_filters Because that is what it really does, i.e. it applies the filters that were parsed from the command line and stashed into the evsels they refer to. We'll need the set_filter method name to actually apply a filter to all the evsels in an evlist, for instance, to ask that a syswide tracer doesn't trace itself. Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-v9x3q9rv4caxtox7wtjpchq5@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-record.c | 2 +- tools/perf/builtin-stat.c | 2 +- tools/perf/util/evlist.c | 2 +- tools/perf/util/evlist.h | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 2cb74343de3..f14cb5fdb91 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -320,7 +320,7 @@ try_again: } } - if (perf_evlist__set_filters(evlist)) { + if (perf_evlist__apply_filters(evlist)) { error("failed to set filter with %d (%s)\n", errno, strerror(errno)); rc = -1; diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index e0f65fe6594..e8cd4d81b06 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -478,7 +478,7 @@ static int run_perf_stat(int argc __maybe_unused, const char **argv) counter->supported = true; } - if (perf_evlist__set_filters(evsel_list)) { + if (perf_evlist__apply_filters(evsel_list)) { error("failed to set filter with %d (%s)\n", errno, strerror(errno)); return -1; diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 704bd91ce05..c3ec7878985 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -666,7 +666,7 @@ void perf_evlist__delete_maps(struct perf_evlist *evlist) evlist->threads = NULL; } -int perf_evlist__set_filters(struct perf_evlist *evlist) +int perf_evlist__apply_filters(struct perf_evlist *evlist) { const struct thread_map *threads = evlist->threads; const struct cpu_map *cpus = evlist->cpus; diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 008a95702bf..25e49e02810 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -115,7 +115,7 @@ static inline void perf_evlist__set_maps(struct perf_evlist *evlist, int perf_evlist__create_maps(struct perf_evlist *evlist, struct perf_target *target); void perf_evlist__delete_maps(struct perf_evlist *evlist); -int perf_evlist__set_filters(struct perf_evlist *evlist); +int perf_evlist__apply_filters(struct perf_evlist *evlist); void __perf_evlist__set_leader(struct list_head *list); void perf_evlist__set_leader(struct perf_evlist *evlist); From 745cefc5fba4350243bcb63cd6f75fb47ca77725 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 26 Sep 2012 15:07:39 -0300 Subject: [PATCH 273/282] perf evlist: Introduce set_filter() method To apply a filter to all the evsels in an evlist. Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-v9x3q9rv4caxtox7wtjpchq5@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evlist.c | 43 +++++++++++++++++++++++----------------- tools/perf/util/evlist.h | 2 ++ tools/perf/util/evsel.c | 18 +++++++++++++++++ tools/perf/util/evsel.h | 3 +++ 4 files changed, 48 insertions(+), 18 deletions(-) diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index c3ec7878985..ae89686102f 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -668,30 +668,37 @@ void perf_evlist__delete_maps(struct perf_evlist *evlist) int perf_evlist__apply_filters(struct perf_evlist *evlist) { - const struct thread_map *threads = evlist->threads; - const struct cpu_map *cpus = evlist->cpus; struct perf_evsel *evsel; - char *filter; - int thread; - int cpu; - int err; - int fd; + int err = 0; + const int ncpus = cpu_map__nr(evlist->cpus), + nthreads = evlist->threads->nr; list_for_each_entry(evsel, &evlist->entries, node) { - filter = evsel->filter; - if (!filter) + if (evsel->filter == NULL) continue; - for (cpu = 0; cpu < cpus->nr; cpu++) { - for (thread = 0; thread < threads->nr; thread++) { - fd = FD(evsel, cpu, thread); - err = ioctl(fd, PERF_EVENT_IOC_SET_FILTER, filter); - if (err) - return err; - } - } + + err = perf_evsel__set_filter(evsel, ncpus, nthreads, evsel->filter); + if (err) + break; } - return 0; + return err; +} + +int perf_evlist__set_filter(struct perf_evlist *evlist, const char *filter) +{ + struct perf_evsel *evsel; + int err = 0; + const int ncpus = cpu_map__nr(evlist->cpus), + nthreads = evlist->threads->nr; + + list_for_each_entry(evsel, &evlist->entries, node) { + err = perf_evsel__set_filter(evsel, ncpus, nthreads, filter); + if (err) + break; + } + + return err; } bool perf_evlist__valid_sample_type(struct perf_evlist *evlist) diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 25e49e02810..3f1fb66be02 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -72,6 +72,8 @@ int perf_evlist__set_tracepoints_handlers(struct perf_evlist *evlist, #define perf_evlist__set_tracepoints_handlers_array(evlist, array) \ perf_evlist__set_tracepoints_handlers(evlist, array, ARRAY_SIZE(array)) +int perf_evlist__set_filter(struct perf_evlist *evlist, const char *filter); + struct perf_evsel * perf_evlist__find_tracepoint_by_id(struct perf_evlist *evlist, int id); diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index c78e42ab977..6022daaa741 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -505,6 +505,24 @@ int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads) return evsel->fd != NULL ? 0 : -ENOMEM; } +int perf_evsel__set_filter(struct perf_evsel *evsel, int ncpus, int nthreads, + const char *filter) +{ + int cpu, thread; + + for (cpu = 0; cpu < ncpus; cpu++) { + for (thread = 0; thread < nthreads; thread++) { + int fd = FD(evsel, cpu, thread), + err = ioctl(fd, PERF_EVENT_IOC_SET_FILTER, filter); + + if (err) + return err; + } + } + + return 0; +} + int perf_evsel__alloc_id(struct perf_evsel *evsel, int ncpus, int nthreads) { evsel->sample_id = xyarray__new(ncpus, nthreads, sizeof(struct perf_sample_id)); diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 60d28853748..25043ff17fd 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -115,6 +115,9 @@ void perf_evsel__free_fd(struct perf_evsel *evsel); void perf_evsel__free_id(struct perf_evsel *evsel); void perf_evsel__close_fd(struct perf_evsel *evsel, int ncpus, int nthreads); +int perf_evsel__set_filter(struct perf_evsel *evsel, int ncpus, int nthreads, + const char *filter); + int perf_evsel__open_per_cpu(struct perf_evsel *evsel, struct cpu_map *cpus); int perf_evsel__open_per_thread(struct perf_evsel *evsel, From e48ffe2bd49936314d367a8c6b5eaaa17d581d13 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 26 Sep 2012 17:11:38 -0300 Subject: [PATCH 274/282] perf evsel: The tracepoint constructor should store sys:name Not event_format->name, that doesn't contains the sys: part. Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-v9x3q9rv4caxtox7wtjpchq5@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evsel.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 6022daaa741..6f2a8c30413 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -122,6 +122,9 @@ struct perf_evsel *perf_evsel__newtp(const char *sys, const char *name, int idx) PERF_SAMPLE_CPU | PERF_SAMPLE_PERIOD), }; + if (asprintf(&evsel->name, "%s:%s", sys, name) < 0) + goto out_free; + evsel->tp_format = event_format__new(sys, name); if (evsel->tp_format == NULL) goto out_free; @@ -130,12 +133,12 @@ struct perf_evsel *perf_evsel__newtp(const char *sys, const char *name, int idx) attr.config = evsel->tp_format->id; attr.sample_period = 1; perf_evsel__init(evsel, &attr, idx); - evsel->name = evsel->tp_format->name; } return evsel; out_free: + free(evsel->name); free(evsel); return NULL; } @@ -584,10 +587,8 @@ void perf_evsel__delete(struct perf_evsel *evsel) perf_evsel__exit(evsel); close_cgroup(evsel->cgrp); free(evsel->group_name); - if (evsel->tp_format && evsel->name == evsel->tp_format->name) { - evsel->name = NULL; + if (evsel->tp_format) pevent_free_format(evsel->tp_format); - } free(evsel->name); free(evsel); } From 82fe1c290cd26a3d092beb1e9755647d284a4134 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 26 Sep 2012 17:13:07 -0300 Subject: [PATCH 275/282] perf tools: Use perf_evsel__newtp in the event parser Elliminating code duplication. Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-9v4zl7ldlp8v6azrpsu5lupk@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/parse-events.c | 46 ++++++++++++---------------------- 1 file changed, 16 insertions(+), 30 deletions(-) diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index bf5d033ee1b..aed38e4b9df 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -356,42 +356,28 @@ int parse_events_add_cache(struct list_head **list, int *idx, return add_event(list, idx, &attr, name); } -static int add_tracepoint(struct list_head **list, int *idx, +static int add_tracepoint(struct list_head **listp, int *idx, char *sys_name, char *evt_name) { - struct perf_event_attr attr; - char name[MAX_NAME_LEN]; - char evt_path[MAXPATHLEN]; - char id_buf[4]; - u64 id; - int fd; + struct perf_evsel *evsel; + struct list_head *list = *listp; - snprintf(evt_path, MAXPATHLEN, "%s/%s/%s/id", tracing_events_path, - sys_name, evt_name); - - fd = open(evt_path, O_RDONLY); - if (fd < 0) - return -1; - - if (read(fd, id_buf, sizeof(id_buf)) < 0) { - close(fd); - return -1; + if (!list) { + list = malloc(sizeof(*list)); + if (!list) + return -ENOMEM; + INIT_LIST_HEAD(list); } - close(fd); - id = atoll(id_buf); + evsel = perf_evsel__newtp(sys_name, evt_name, (*idx)++); + if (!evsel) { + free(list); + return -ENOMEM; + } - memset(&attr, 0, sizeof(attr)); - attr.config = id; - attr.type = PERF_TYPE_TRACEPOINT; - attr.sample_type |= PERF_SAMPLE_RAW; - attr.sample_type |= PERF_SAMPLE_TIME; - attr.sample_type |= PERF_SAMPLE_CPU; - attr.sample_type |= PERF_SAMPLE_PERIOD; - attr.sample_period = 1; - - snprintf(name, MAX_NAME_LEN, "%s:%s", sys_name, evt_name); - return add_event(list, idx, &attr, name); + list_add_tail(&evsel->node, list); + *listp = list; + return 0; } static int add_tracepoint_multi(struct list_head **list, int *idx, From 5d2074ea4fdb28b2199958f42edf914d27c926f6 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 26 Sep 2012 20:22:00 -0300 Subject: [PATCH 276/282] perf evsel: Introduce rawptr() method Will be used for things like the args field in the raw_syscalls:sys_enter tracepoint. Implement strval with it, its basicaly strval returning void *. Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-v9x3q9rv4caxtox7wtjpchq5@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evsel.c | 2 +- tools/perf/util/evsel.h | 9 ++++++++- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 6f2a8c30413..e992b67e0f3 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -1104,7 +1104,7 @@ struct format_field *perf_evsel__field(struct perf_evsel *evsel, const char *nam return pevent_find_field(evsel->tp_format, name); } -char *perf_evsel__strval(struct perf_evsel *evsel, struct perf_sample *sample, +void *perf_evsel__rawptr(struct perf_evsel *evsel, struct perf_sample *sample, const char *name) { struct format_field *field = perf_evsel__field(evsel, name); diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 25043ff17fd..2c490502010 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -128,11 +128,18 @@ void perf_evsel__close(struct perf_evsel *evsel, int ncpus, int nthreads); struct perf_sample; -char *perf_evsel__strval(struct perf_evsel *evsel, struct perf_sample *sample, +void *perf_evsel__rawptr(struct perf_evsel *evsel, struct perf_sample *sample, const char *name); u64 perf_evsel__intval(struct perf_evsel *evsel, struct perf_sample *sample, const char *name); +static inline char *perf_evsel__strval(struct perf_evsel *evsel, + struct perf_sample *sample, + const char *name) +{ + return perf_evsel__rawptr(evsel, sample, name); +} + struct format_field; struct format_field *perf_evsel__field(struct perf_evsel *evsel, const char *name); From 201b7334dc4e977479eb22b106ee8ec506e5e55c Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 26 Sep 2012 20:24:19 -0300 Subject: [PATCH 277/282] perf evsel: Export the event_format constructor It'll be needed in the next patches, where it'll be not associated directly to an evsel. Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-v9x3q9rv4caxtox7wtjpchq5@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evsel.c | 2 +- tools/perf/util/evsel.h | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index e992b67e0f3..ffdd94e9c9c 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -70,7 +70,7 @@ struct perf_evsel *perf_evsel__new(struct perf_event_attr *attr, int idx) return evsel; } -static struct event_format *event_format__new(const char *sys, const char *name) +struct event_format *event_format__new(const char *sys, const char *name) { int fd, n; char *filename; diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 2c490502010..3ead0d59c03 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -83,6 +83,9 @@ struct perf_record_opts; struct perf_evsel *perf_evsel__new(struct perf_event_attr *attr, int idx); struct perf_evsel *perf_evsel__newtp(const char *sys, const char *name, int idx); + +struct event_format *event_format__new(const char *sys, const char *name); + void perf_evsel__init(struct perf_evsel *evsel, struct perf_event_attr *attr, int idx); void perf_evsel__exit(struct perf_evsel *evsel); From 514f1c67c2fdae7b334fdc5adee63a484781241a Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 26 Sep 2012 20:05:56 -0300 Subject: [PATCH 278/282] perf trace: New tool Initially should look loosely like the venerable 'strace' tool, but using the infrastructure in the perf tools to allow tracing extra targets: [acme@sandy linux]$ perf trace --hell Error: unknown option `hell' usage: perf trace -p, --pid trace events on existing process id --tid trace events on existing thread id --all-cpus system-wide collection from all CPUs --cpu list of cpus to monitor --no-inherit child tasks do not inherit counters --mmap-pages number of mmap data pages --uid user to profile [acme@sandy linux]$ Those should have the same semantics as when using with 'perf record'. It gets stuck sometimes, but hey, it works sometimes too! In time it should support perf.data based workloads, i.e. it should have a: -o filename Command line option that will produce a perf.data file that can then be used with 'perf trace' or any of the other perf tools (script, report, etc). It will also eventually have the set of functionalities described in the previous 'trace' prototype by Thomas Gleixner: "Announcing a new utility: 'trace'" http://lwn.net/Articles/415728/ Also planned is to have some of the features suggested in the comments of that LWN article. Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Steven Rostedt Cc: Thomas Gleixner Link: http://lkml.kernel.org/n/tip-v9x3q9rv4caxtox7wtjpchq5@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-trace.txt | 53 +++++ tools/perf/Makefile | 3 +- tools/perf/builtin-trace.c | 300 ++++++++++++++++++++++++ tools/perf/builtin.h | 1 + tools/perf/command-list.txt | 1 + tools/perf/perf.c | 1 + 6 files changed, 358 insertions(+), 1 deletion(-) create mode 100644 tools/perf/Documentation/perf-trace.txt create mode 100644 tools/perf/builtin-trace.c diff --git a/tools/perf/Documentation/perf-trace.txt b/tools/perf/Documentation/perf-trace.txt new file mode 100644 index 00000000000..3a2ae37310a --- /dev/null +++ b/tools/perf/Documentation/perf-trace.txt @@ -0,0 +1,53 @@ +perf-trace(1) +============= + +NAME +---- +perf-trace - strace inspired tool + +SYNOPSIS +-------- +[verse] +'perf trace' + +DESCRIPTION +----------- +This command will show the events associated with the target, initially +syscalls, but other system events like pagefaults, task lifetime events, +scheduling events, etc. + +Initially this is a live mode only tool, but eventually will work with +perf.data files like the other tools, allowing a detached 'record' from +analysis phases. + +OPTIONS +------- + +--all-cpus:: + System-wide collection from all CPUs. + +-p:: +--pid=:: + Record events on existing process ID (comma separated list). + +--tid=:: + Record events on existing thread ID (comma separated list). + +--uid=:: + Record events in threads owned by uid. Name or number. + +--no-inherit:: + Child tasks do not inherit counters. + +--mmap-pages=:: + Number of mmap data pages. Must be a power of two. + +--cpu:: +Collect samples only on the list of CPUs provided. Multiple CPUs can be provided as a +comma-separated list with no space: 0,1. Ranges of CPUs are specified with -: 0-2. +In per-thread mode with inheritance mode on (default), Events are captured only when +the thread executes on the designated CPUs. Default is to monitor all CPUs. + +SEE ALSO +-------- +linkperf:perf-record[1], linkperf:perf-script[1] diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 251dcd7fb5a..6958ba4f5dc 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -102,7 +102,7 @@ ifdef PARSER_DEBUG endif CFLAGS = -fno-omit-frame-pointer -ggdb3 -funwind-tables -Wall -Wextra -std=gnu99 $(CFLAGS_WERROR) $(CFLAGS_OPTIMIZE) $(EXTRA_WARNINGS) $(EXTRA_CFLAGS) $(PARSER_DEBUG_CFLAGS) -EXTLIBS = -lpthread -lrt -lelf -lm +EXTLIBS = -lpthread -lrt -lelf -lm -laudit ALL_CFLAGS = $(CFLAGS) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE ALL_LDFLAGS = $(LDFLAGS) STRIP ?= strip @@ -442,6 +442,7 @@ BUILTIN_OBJS += $(OUTPUT)builtin-kmem.o BUILTIN_OBJS += $(OUTPUT)builtin-lock.o BUILTIN_OBJS += $(OUTPUT)builtin-kvm.o BUILTIN_OBJS += $(OUTPUT)builtin-test.o +BUILTIN_OBJS += $(OUTPUT)builtin-trace.o BUILTIN_OBJS += $(OUTPUT)builtin-inject.o PERFLIBS = $(LIB_FILE) $(LIBTRACEEVENT) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c new file mode 100644 index 00000000000..5fa1820cc96 --- /dev/null +++ b/tools/perf/builtin-trace.c @@ -0,0 +1,300 @@ +#include "builtin.h" +#include "util/evlist.h" +#include "util/parse-options.h" +#include "util/thread_map.h" +#include "event-parse.h" + +#include +#include + +static struct syscall_fmt { + const char *name; + bool errmsg; + bool timeout; +} syscall_fmts[] = { + { .name = "futex", .errmsg = true, }, + { .name = "poll", .errmsg = true, .timeout = true, }, + { .name = "ppoll", .errmsg = true, .timeout = true, }, + { .name = "read", .errmsg = true, }, + { .name = "recvfrom", .errmsg = true, }, + { .name = "select", .errmsg = true, .timeout = true, }, +}; + +static int syscall_fmt__cmp(const void *name, const void *fmtp) +{ + const struct syscall_fmt *fmt = fmtp; + return strcmp(name, fmt->name); +} + +static struct syscall_fmt *syscall_fmt__find(const char *name) +{ + const int nmemb = ARRAY_SIZE(syscall_fmts); + return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp); +} + +struct syscall { + struct event_format *tp_format; + const char *name; + struct syscall_fmt *fmt; +}; + +struct trace { + int audit_machine; + struct { + int max; + struct syscall *table; + } syscalls; + struct perf_record_opts opts; +}; + +static int trace__read_syscall_info(struct trace *trace, int id) +{ + char tp_name[128]; + struct syscall *sc; + + if (id > trace->syscalls.max) { + struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc)); + + if (nsyscalls == NULL) + return -1; + + if (trace->syscalls.max != -1) { + memset(nsyscalls + trace->syscalls.max + 1, 0, + (id - trace->syscalls.max) * sizeof(*sc)); + } else { + memset(nsyscalls, 0, (id + 1) * sizeof(*sc)); + } + + trace->syscalls.table = nsyscalls; + trace->syscalls.max = id; + } + + sc = trace->syscalls.table + id; + sc->name = audit_syscall_to_name(id, trace->audit_machine); + if (sc->name == NULL) + return -1; + + snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name); + + sc->tp_format = event_format__new("syscalls", tp_name); + sc->fmt = syscall_fmt__find(sc->name); + + return sc->tp_format != NULL ? 0 : -1; +} + +static size_t syscall__fprintf_args(struct syscall *sc, unsigned long *args, FILE *fp) +{ + int i = 0; + size_t printed = 0; + + if (sc->tp_format != NULL) { + struct format_field *field; + + for (field = sc->tp_format->format.fields->next; field; field = field->next) { + printed += fprintf(fp, "%s%s: %ld", printed ? ", " : "", + field->name, args[i++]); + } + } else { + while (i < 6) { + printed += fprintf(fp, "%sarg%d: %ld", printed ? ", " : "", i, args[i]); + ++i; + } + } + + return printed; +} + +static int trace__run(struct trace *trace) +{ + struct perf_evlist *evlist = perf_evlist__new(NULL, NULL); + struct perf_evsel *evsel, *evsel_enter, *evsel_exit; + int err = -1, i, nr_events = 0, before; + + if (evlist == NULL) { + printf("Not enough memory to run!\n"); + goto out; + } + + evsel_enter = perf_evsel__newtp("raw_syscalls", "sys_enter", 0); + if (evsel_enter == NULL) { + printf("Couldn't read the raw_syscalls:sys_enter tracepoint information!\n"); + goto out_delete_evlist; + } + + perf_evlist__add(evlist, evsel_enter); + + evsel_exit = perf_evsel__newtp("raw_syscalls", "sys_exit", 1); + if (evsel_exit == NULL) { + printf("Couldn't read the raw_syscalls:sys_exit tracepoint information!\n"); + goto out_delete_evlist; + } + + perf_evlist__add(evlist, evsel_exit); + + err = perf_evlist__create_maps(evlist, &trace->opts.target); + if (err < 0) { + printf("Problems parsing the target to trace, check your options!\n"); + goto out_delete_evlist; + } + + perf_evlist__config_attrs(evlist, &trace->opts); + + err = perf_evlist__open(evlist); + if (err < 0) { + printf("Couldn't create the events: %s\n", strerror(errno)); + goto out_delete_evlist; + } + + err = perf_evlist__mmap(evlist, UINT_MAX, false); + if (err < 0) { + printf("Couldn't mmap the events: %s\n", strerror(errno)); + goto out_delete_evlist; + } + + perf_evlist__enable(evlist); +again: + before = nr_events; + + for (i = 0; i < evlist->nr_mmaps; i++) { + union perf_event *event; + + while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) { + const u32 type = event->header.type; + struct syscall *sc; + struct perf_sample sample; + int id; + + ++nr_events; + + switch (type) { + case PERF_RECORD_SAMPLE: + break; + case PERF_RECORD_LOST: + printf("LOST %" PRIu64 " events!\n", event->lost.lost); + continue; + default: + printf("Unexpected %s event, skipping...\n", + perf_event__name(type)); + continue; + } + + err = perf_evlist__parse_sample(evlist, event, &sample); + if (err) { + printf("Can't parse sample, err = %d, skipping...\n", err); + continue; + } + + evsel = perf_evlist__id2evsel(evlist, sample.id); + if (evsel == NULL) { + printf("Unknown tp ID %" PRIu64 ", skipping...\n", sample.id); + continue; + } + + id = perf_evsel__intval(evsel, &sample, "id"); + if (id < 0) { + printf("Invalid syscall %d id, skipping...\n", id); + continue; + } + + if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) && + trace__read_syscall_info(trace, id)) + continue; + + if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL)) + continue; + + sc = &trace->syscalls.table[id]; + + if (evlist->threads->map[0] == -1 || evlist->threads->nr > 1) + printf("%d ", sample.tid); + + if (evsel == evsel_enter) { + void *args = perf_evsel__rawptr(evsel, &sample, "args"); + + printf("%s(", sc->name); + syscall__fprintf_args(sc, args, stdout); + } else if (evsel == evsel_exit) { + int ret = perf_evsel__intval(evsel, &sample, "ret"); + + if (ret < 0 && sc->fmt && sc->fmt->errmsg) { + char bf[256]; + const char *emsg = strerror_r(-ret, bf, sizeof(bf)), + *e = audit_errno_to_name(-ret); + + printf(") = -1 %s %s", e, emsg); + } else if (ret == 0 && sc->fmt && sc->fmt->timeout) + printf(") = 0 Timeout"); + else + printf(") = %d", ret); + + putchar('\n'); + } + } + } + + if (nr_events == before) + poll(evlist->pollfd, evlist->nr_fds, -1); + + goto again; + +out_delete_evlist: + perf_evlist__delete(evlist); +out: + return err; +} + +int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused) +{ + const char * const trace_usage[] = { + "perf trace []", + NULL + }; + struct trace trace = { + .audit_machine = audit_detect_machine(), + .syscalls = { + . max = -1, + }, + .opts = { + .target = { + .uid = UINT_MAX, + .uses_mmap = true, + }, + .user_freq = UINT_MAX, + .user_interval = ULLONG_MAX, + .no_delay = true, + .mmap_pages = 1024, + }, + }; + const struct option trace_options[] = { + OPT_STRING('p', "pid", &trace.opts.target.pid, "pid", + "trace events on existing process id"), + OPT_STRING(0, "tid", &trace.opts.target.tid, "tid", + "trace events on existing thread id"), + OPT_BOOLEAN(0, "all-cpus", &trace.opts.target.system_wide, + "system-wide collection from all CPUs"), + OPT_STRING(0, "cpu", &trace.opts.target.cpu_list, "cpu", + "list of cpus to monitor"), + OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit, + "child tasks do not inherit counters"), + OPT_UINTEGER(0, "mmap-pages", &trace.opts.mmap_pages, + "number of mmap data pages"), + OPT_STRING(0, "uid", &trace.opts.target.uid_str, "user", + "user to profile"), + OPT_END() + }; + int err; + + argc = parse_options(argc, argv, trace_options, trace_usage, 0); + if (argc) + usage_with_options(trace_usage, trace_options); + + err = perf_target__parse_uid(&trace.opts.target); + if (err) { + char bf[BUFSIZ]; + perf_target__strerror(&trace.opts.target, err, bf, sizeof(bf)); + printf("%s", bf); + return err; + } + + return trace__run(&trace); +} diff --git a/tools/perf/builtin.h b/tools/perf/builtin.h index 3ea74ed1b26..08143bd854c 100644 --- a/tools/perf/builtin.h +++ b/tools/perf/builtin.h @@ -34,6 +34,7 @@ extern int cmd_kmem(int argc, const char **argv, const char *prefix); extern int cmd_lock(int argc, const char **argv, const char *prefix); extern int cmd_kvm(int argc, const char **argv, const char *prefix); extern int cmd_test(int argc, const char **argv, const char *prefix); +extern int cmd_trace(int argc, const char **argv, const char *prefix); extern int cmd_inject(int argc, const char **argv, const char *prefix); extern int find_scripts(char **scripts_array, char **scripts_path_array); diff --git a/tools/perf/command-list.txt b/tools/perf/command-list.txt index 0303ec69227..3e86bbd8c2d 100644 --- a/tools/perf/command-list.txt +++ b/tools/perf/command-list.txt @@ -17,6 +17,7 @@ perf-report mainporcelain common perf-stat mainporcelain common perf-timechart mainporcelain common perf-top mainporcelain common +perf-trace mainporcelain common perf-script mainporcelain common perf-probe mainporcelain full perf-kmem mainporcelain common diff --git a/tools/perf/perf.c b/tools/perf/perf.c index fb8578cfa03..3fb052c9a27 100644 --- a/tools/perf/perf.c +++ b/tools/perf/perf.c @@ -55,6 +55,7 @@ static struct cmd_struct commands[] = { { "lock", cmd_lock, 0 }, { "kvm", cmd_kvm, 0 }, { "test", cmd_test, 0 }, + { "trace", cmd_trace, 0 }, { "inject", cmd_inject, 0 }, }; From 9ec60972a38011ad8a5676f4cd5e51ac508c36b6 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 26 Sep 2012 16:47:28 +0900 Subject: [PATCH 279/282] perf hists: Add missing period_* fields when collapsing a hist entry So that the perf report won't lost the cpu utilization information. For example, if there're two process that have same name. $ perf report --stdio --showcpuutilization -s pid [SNIP] # Overhead sys us Command: Pid # ........ ........ ........ ............. # 55.12% 0.01% 55.10% noploop:28781 44.88% 0.06% 44.83% noploop:28782 Before: $ perf report --stdio --showcpuutilization -s comm [SNIP] # Overhead sys us # ........ ........ ........ # 100.00% 0.06% 44.83% After: $ perf report --stdio --showcpuutilization -s comm [SNIP] # Overhead sys us # ........ ........ ........ # 100.00% 0.07% 99.93% Signed-off-by: Namhyung Kim Cc: Arun Sharma Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Jiri Olsa Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1348645663-25303-2-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/hist.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 6ec5398de89..236bc9d98ff 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -410,8 +410,13 @@ static bool hists__collapse_insert_entry(struct hists *hists __maybe_unused, cmp = hist_entry__collapse(iter, he); if (!cmp) { - iter->period += he->period; - iter->nr_events += he->nr_events; + iter->period += he->period; + iter->period_sys += he->period_sys; + iter->period_us += he->period_us; + iter->period_guest_sys += he->period_guest_sys; + iter->period_guest_us += he->period_guest_us; + iter->nr_events += he->nr_events; + if (symbol_conf.use_callchain) { callchain_cursor_reset(&callchain_cursor); callchain_merge(&callchain_cursor, From 4d29089c2b70ffeb61656ffd1b9c9c52602ddd44 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 27 Sep 2012 20:23:38 +0900 Subject: [PATCH 280/282] perf tools: Check libaudit availability for perf-trace builtin The newly added trace command requires an external audit library. However it can cause a build error because it's not checked whether the libaudit is installed on system: CC builtin-trace.o builtin-trace.c:7:22: fatal error: libaudit.h: No such file or directory compilation terminated. make: *** [builtin-trace.o] Error 1 Signed-off-by: Namhyung Kim Cc: David Ahern Cc: Ingo Molnar Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1348745018-21744-1-git-send-email-namhyung@kernel.org [ committer note: Added ", disables 'trace tool' to the feature warning msg ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile | 16 ++++++++++++++-- tools/perf/config/feature-tests.mak | 11 +++++++++++ tools/perf/perf.c | 2 ++ 3 files changed, 27 insertions(+), 2 deletions(-) diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 6958ba4f5dc..e5e71e7d95a 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -102,7 +102,7 @@ ifdef PARSER_DEBUG endif CFLAGS = -fno-omit-frame-pointer -ggdb3 -funwind-tables -Wall -Wextra -std=gnu99 $(CFLAGS_WERROR) $(CFLAGS_OPTIMIZE) $(EXTRA_WARNINGS) $(EXTRA_CFLAGS) $(PARSER_DEBUG_CFLAGS) -EXTLIBS = -lpthread -lrt -lelf -lm -laudit +EXTLIBS = -lpthread -lrt -lelf -lm ALL_CFLAGS = $(CFLAGS) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE ALL_LDFLAGS = $(LDFLAGS) STRIP ?= strip @@ -442,7 +442,6 @@ BUILTIN_OBJS += $(OUTPUT)builtin-kmem.o BUILTIN_OBJS += $(OUTPUT)builtin-lock.o BUILTIN_OBJS += $(OUTPUT)builtin-kvm.o BUILTIN_OBJS += $(OUTPUT)builtin-test.o -BUILTIN_OBJS += $(OUTPUT)builtin-trace.o BUILTIN_OBJS += $(OUTPUT)builtin-inject.o PERFLIBS = $(LIB_FILE) $(LIBTRACEEVENT) @@ -560,6 +559,19 @@ else LIB_OBJS += $(OUTPUT)util/unwind.o endif +ifdef NO_LIBAUDIT + BASIC_CFLAGS += -DNO_LIBAUDIT_SUPPORT +else + FLAGS_LIBAUDIT = $(ALL_CFLAGS) $(ALL_LDFLAGS) -laudit + ifneq ($(call try-cc,$(SOURCE_LIBAUDIT),$(FLAGS_LIBAUDIT)),y) + msg := $(warning No libaudit.h found, disables 'trace' tool, please install audit-libs-devel or libaudit-dev); + BASIC_CFLAGS += -DNO_LIBAUDIT_SUPPORT + else + BUILTIN_OBJS += $(OUTPUT)builtin-trace.o + EXTLIBS += -laudit + endif +endif + ifdef NO_NEWT BASIC_CFLAGS += -DNO_NEWT_SUPPORT else diff --git a/tools/perf/config/feature-tests.mak b/tools/perf/config/feature-tests.mak index 116690a669d..4add41bb0c7 100644 --- a/tools/perf/config/feature-tests.mak +++ b/tools/perf/config/feature-tests.mak @@ -193,3 +193,14 @@ int main(void) } endef endif + +ifndef NO_LIBAUDIT +define SOURCE_LIBAUDIT +#include + +int main(void) +{ + return audit_open(); +} +endef +endif \ No newline at end of file diff --git a/tools/perf/perf.c b/tools/perf/perf.c index 3fb052c9a27..fc2f770e302 100644 --- a/tools/perf/perf.c +++ b/tools/perf/perf.c @@ -55,7 +55,9 @@ static struct cmd_struct commands[] = { { "lock", cmd_lock, 0 }, { "kvm", cmd_kvm, 0 }, { "test", cmd_test, 0 }, +#ifndef NO_LIBAUDIT_SUPPORT { "trace", cmd_trace, 0 }, +#endif { "inject", cmd_inject, 0 }, }; From bb2d17a014914b628df42b0c76c85fa8a8c57e79 Mon Sep 17 00:00:00 2001 From: Hyeoncheol Lee Date: Thu, 27 Sep 2012 11:36:39 +0900 Subject: [PATCH 281/282] perf probe: Print an enum type variable in "enum variable-name" format when showing accessible variables When showing accessible variables, an enum type variable was printed in "variable-name" format. Change this format into "enum variable-name". Signed-off-by: Hyeoncheol Lee Acked-by: Masami Hiramatsu Cc: Masami Hiramatsu Link: http://lkml.kernel.org/r/1348713399-4541-1-git-send-email-hyc.lee@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/dwarf-aux.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/perf/util/dwarf-aux.c b/tools/perf/util/dwarf-aux.c index ee51e9b4dc0..3e5f5430a28 100644 --- a/tools/perf/util/dwarf-aux.c +++ b/tools/perf/util/dwarf-aux.c @@ -804,6 +804,8 @@ int die_get_typename(Dwarf_Die *vr_die, char *buf, int len) tmp = "union "; else if (tag == DW_TAG_structure_type) tmp = "struct "; + else if (tag == DW_TAG_enumeration_type) + tmp = "enum "; /* Write a base name */ ret = snprintf(buf, len, "%s%s", tmp, dwarf_diename(&type)); return (ret >= len) ? -E2BIG : ret; From aec1930b0f6f281a0ca038cd03aceace3fe0bb61 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 27 Sep 2012 13:16:00 -0300 Subject: [PATCH 282/282] perf trace: Add aliases for some syscalls What we get from audit_syscall_to_name isn't what we find in the syscalls: tracepoint events, so add the alias that allows the tool to find prctl, fstat, fstatat and stat. Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-3m9su7jhwnxvepnr3ne1du5k@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 28 +++++++++++++++++++--------- 1 file changed, 19 insertions(+), 9 deletions(-) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 5fa1820cc96..8f113dab8bf 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -9,15 +9,20 @@ static struct syscall_fmt { const char *name; + const char *alias; bool errmsg; bool timeout; } syscall_fmts[] = { - { .name = "futex", .errmsg = true, }, - { .name = "poll", .errmsg = true, .timeout = true, }, - { .name = "ppoll", .errmsg = true, .timeout = true, }, - { .name = "read", .errmsg = true, }, - { .name = "recvfrom", .errmsg = true, }, - { .name = "select", .errmsg = true, .timeout = true, }, + { .name = "arch_prctl", .errmsg = true, .alias = "prctl", }, + { .name = "fstat", .errmsg = true, .alias = "newfstat", }, + { .name = "fstatat", .errmsg = true, .alias = "newfstatat", }, + { .name = "futex", .errmsg = true, }, + { .name = "poll", .errmsg = true, .timeout = true, }, + { .name = "ppoll", .errmsg = true, .timeout = true, }, + { .name = "read", .errmsg = true, }, + { .name = "recvfrom", .errmsg = true, }, + { .name = "select", .errmsg = true, .timeout = true, }, + { .name = "stat", .errmsg = true, .alias = "newstat", }, }; static int syscall_fmt__cmp(const void *name, const void *fmtp) @@ -74,11 +79,16 @@ static int trace__read_syscall_info(struct trace *trace, int id) if (sc->name == NULL) return -1; - snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name); - - sc->tp_format = event_format__new("syscalls", tp_name); sc->fmt = syscall_fmt__find(sc->name); + snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name); + sc->tp_format = event_format__new("syscalls", tp_name); + + if (sc->tp_format == NULL && sc->fmt && sc->fmt->alias) { + snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias); + sc->tp_format = event_format__new("syscalls", tp_name); + } + return sc->tp_format != NULL ? 0 : -1; }