dect
/
linux-2.6
Archived
13
0
Fork 0
This repository has been archived on 2022-02-17. You can view files and clone it, but cannot push or open issues or pull requests.
linux-2.6/include/trace/ftrace.h

494 lines
13 KiB
C
Raw Normal View History

/*
* Stage 1 of the trace events.
*
* Override the macros in <trace/trace_events.h> to include the following:
*
* struct ftrace_raw_<call> {
* struct trace_entry ent;
* <type> <item>;
* <type2> <item2>[<len>];
* [...]
* };
*
* The <type> <item> is created by the __field(type, item) macro or
* the __array(type2, item2, len) macro.
* We simply do "type item;", and that will create the fields
* in the structure.
*/
#include <linux/ftrace_event.h>
#undef TRACE_FORMAT
#define TRACE_FORMAT(call, proto, args, fmt)
#undef __array
#define __array(type, item, len) type item[len];
#undef __field
#define __field(type, item) type item;
#undef TP_STRUCT__entry
#define TP_STRUCT__entry(args...) args
#undef TRACE_EVENT
#define TRACE_EVENT(name, proto, args, tstruct, assign, print) \
struct ftrace_raw_##name { \
struct trace_entry ent; \
tstruct \
}; \
static struct ftrace_event_call event_##name
#include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
/*
* Stage 2 of the trace events.
*
* Override the macros in <trace/trace_events.h> to include the following:
*
* enum print_line_t
* ftrace_raw_output_<call>(struct trace_iterator *iter, int flags)
* {
* struct trace_seq *s = &iter->seq;
* struct ftrace_raw_<call> *field; <-- defined in stage 1
* struct trace_entry *entry;
* int ret;
*
* entry = iter->ent;
*
* if (entry->type != event_<call>.id) {
* WARN_ON_ONCE(1);
* return TRACE_TYPE_UNHANDLED;
* }
*
* field = (typeof(field))entry;
*
* ret = trace_seq_printf(s, <TP_printk> "\n");
* if (!ret)
* return TRACE_TYPE_PARTIAL_LINE;
*
* return TRACE_TYPE_HANDLED;
* }
*
* This is the method used to print the raw event to the trace
* output format. Note, this is not needed if the data is read
* in binary.
*/
#undef __entry
#define __entry field
#undef TP_printk
#define TP_printk(fmt, args...) fmt "\n", args
#undef TRACE_EVENT
#define TRACE_EVENT(call, proto, args, tstruct, assign, print) \
enum print_line_t \
ftrace_raw_output_##call(struct trace_iterator *iter, int flags) \
{ \
struct trace_seq *s = &iter->seq; \
struct ftrace_raw_##call *field; \
struct trace_entry *entry; \
int ret; \
\
entry = iter->ent; \
\
if (entry->type != event_##call.id) { \
WARN_ON_ONCE(1); \
return TRACE_TYPE_UNHANDLED; \
} \
\
field = (typeof(field))entry; \
\
ret = trace_seq_printf(s, #call ": " print); \
if (!ret) \
return TRACE_TYPE_PARTIAL_LINE; \
\
return TRACE_TYPE_HANDLED; \
}
#include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
/*
* Setup the showing format of trace point.
*
* int
* ftrace_format_##call(struct trace_seq *s)
* {
* struct ftrace_raw_##call field;
* int ret;
*
* ret = trace_seq_printf(s, #type " " #item ";"
* " offset:%u; size:%u;\n",
* offsetof(struct ftrace_raw_##call, item),
* sizeof(field.type));
*
* }
*/
#undef TP_STRUCT__entry
#define TP_STRUCT__entry(args...) args
#undef __field
#define __field(type, item) \
ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \
"offset:%u;\tsize:%u;\n", \
(unsigned int)offsetof(typeof(field), item), \
(unsigned int)sizeof(field.item)); \
if (!ret) \
return 0;
#undef __array
#define __array(type, item, len) \
ret = trace_seq_printf(s, "\tfield:" #type " " #item "[" #len "];\t" \
"offset:%u;\tsize:%u;\n", \
(unsigned int)offsetof(typeof(field), item), \
(unsigned int)sizeof(field.item)); \
if (!ret) \
return 0;
#undef __entry
#define __entry REC
#undef TP_printk
#define TP_printk(fmt, args...) "%s, %s\n", #fmt, __stringify(args)
#undef TP_fast_assign
#define TP_fast_assign(args...) args
#undef TRACE_EVENT
#define TRACE_EVENT(call, proto, args, tstruct, func, print) \
static int \
ftrace_format_##call(struct trace_seq *s) \
{ \
struct ftrace_raw_##call field; \
int ret; \
\
tstruct; \
\
trace_seq_printf(s, "\nprint fmt: " print); \
\
return ret; \
}
#include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
#undef __field
#define __field(type, item) \
ret = trace_define_field(event_call, #type, #item, \
offsetof(typeof(field), item), \
sizeof(field.item)); \
if (ret) \
return ret;
#undef __array
#define __array(type, item, len) \
BUILD_BUG_ON(len > MAX_FILTER_STR_VAL); \
ret = trace_define_field(event_call, #type "[" #len "]", #item, \
offsetof(typeof(field), item), \
sizeof(field.item)); \
if (ret) \
return ret;
#undef TRACE_EVENT
#define TRACE_EVENT(call, proto, args, tstruct, func, print) \
int \
ftrace_define_fields_##call(void) \
{ \
struct ftrace_raw_##call field; \
struct ftrace_event_call *event_call = &event_##call; \
int ret; \
\
__common_field(unsigned char, type); \
__common_field(unsigned char, flags); \
__common_field(unsigned char, preempt_count); \
__common_field(int, pid); \
__common_field(int, tgid); \
\
tstruct; \
\
return ret; \
}
#include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
tracing: add raw trace point recording infrastructure Impact: lower overhead tracing The current event tracer can automatically pick up trace points that are registered with the TRACE_FORMAT macro. But it required a printf format string and parsing. Although, this adds the ability to get guaranteed information like task names and such, it took a hit in overhead processing. This processing can add about 500-1000 nanoseconds overhead, but in some cases that too is considered too much and we want to shave off as much from this overhead as possible. Tom Zanussi recently posted tracing patches to lkml that are based on a nice idea about capturing the data via C structs using STRUCT_ENTER, STRUCT_EXIT type of macros. I liked that method very much, but did not like the implementation that required a developer to add data/code in several disjoint locations. This patch extends the event_tracer macros to do a similar "raw C" approach that Tom Zanussi did. But instead of having the developers needing to tweak a bunch of code all over the place, they can do it all in one macro - preferably placed near the code that it is tracing. That makes it much more likely that tracepoints will be maintained on an ongoing basis by the code they modify. The new macro TRACE_EVENT_FORMAT is created for this approach. (Note, a developer may still utilize the more low level DECLARE_TRACE macros if they don't care about getting their traces automatically in the event tracer.) They can also use the existing TRACE_FORMAT if they don't need to code the tracepoint in C, but just want to use the convenience of printf. So if the developer wants to "hardwire" a tracepoint in the fastest possible way, and wants to acquire their data via a user space utility in a raw binary format, or wants to see it in the trace output but not sacrifice any performance, then they can implement the faster but more complex TRACE_EVENT_FORMAT macro. Here's what usage looks like: TRACE_EVENT_FORMAT(name, TPPROTO(proto), TPARGS(args), TPFMT(fmt, fmt_args), TRACE_STUCT( TRACE_FIELD(type1, item1, assign1) TRACE_FIELD(type2, item2, assign2) [...] ), TPRAWFMT(raw_fmt) ); Note name, proto, args, and fmt, are all identical to what TRACE_FORMAT uses. name: is the unique identifier of the trace point proto: The proto type that the trace point uses args: the args in the proto type fmt: printf format to use with the event printf tracer fmt_args: the printf argments to match fmt TRACE_STRUCT starts the ability to create a structure. Each item in the structure is defined with a TRACE_FIELD TRACE_FIELD(type, item, assign) type: the C type of item. item: the name of the item in the stucture assign: what to assign the item in the trace point callback raw_fmt is a way to pretty print the struct. It must match the order of the items are added in TRACE_STUCT An example of this would be: TRACE_EVENT_FORMAT(sched_wakeup, TPPROTO(struct rq *rq, struct task_struct *p, int success), TPARGS(rq, p, success), TPFMT("task %s:%d %s", p->comm, p->pid, success?"succeeded":"failed"), TRACE_STRUCT( TRACE_FIELD(pid_t, pid, p->pid) TRACE_FIELD(int, success, success) ), TPRAWFMT("task %d success=%d") ); This creates us a unique struct of: struct { pid_t pid; int success; }; And the way the call back would assign these values would be: entry->pid = p->pid; entry->success = success; The nice part about this is that the creation of the assignent is done via macro magic in the event tracer. Once the TRACE_EVENT_FORMAT is created, the developer will then have a faster method to record into the ring buffer. They do not need to worry about the tracer itself. The developer would only need to touch the files in include/trace/*.h Again, I would like to give special thanks to Tom Zanussi for this nice idea. Idea-from: Tom Zanussi <tzanussi@gmail.com> Signed-off-by: Steven Rostedt <srostedt@redhat.com>
2009-02-28 00:12:30 +00:00
/*
* Stage 3 of the trace events.
*
* Override the macros in <trace/trace_events.h> to include the following:
tracing: add raw trace point recording infrastructure Impact: lower overhead tracing The current event tracer can automatically pick up trace points that are registered with the TRACE_FORMAT macro. But it required a printf format string and parsing. Although, this adds the ability to get guaranteed information like task names and such, it took a hit in overhead processing. This processing can add about 500-1000 nanoseconds overhead, but in some cases that too is considered too much and we want to shave off as much from this overhead as possible. Tom Zanussi recently posted tracing patches to lkml that are based on a nice idea about capturing the data via C structs using STRUCT_ENTER, STRUCT_EXIT type of macros. I liked that method very much, but did not like the implementation that required a developer to add data/code in several disjoint locations. This patch extends the event_tracer macros to do a similar "raw C" approach that Tom Zanussi did. But instead of having the developers needing to tweak a bunch of code all over the place, they can do it all in one macro - preferably placed near the code that it is tracing. That makes it much more likely that tracepoints will be maintained on an ongoing basis by the code they modify. The new macro TRACE_EVENT_FORMAT is created for this approach. (Note, a developer may still utilize the more low level DECLARE_TRACE macros if they don't care about getting their traces automatically in the event tracer.) They can also use the existing TRACE_FORMAT if they don't need to code the tracepoint in C, but just want to use the convenience of printf. So if the developer wants to "hardwire" a tracepoint in the fastest possible way, and wants to acquire their data via a user space utility in a raw binary format, or wants to see it in the trace output but not sacrifice any performance, then they can implement the faster but more complex TRACE_EVENT_FORMAT macro. Here's what usage looks like: TRACE_EVENT_FORMAT(name, TPPROTO(proto), TPARGS(args), TPFMT(fmt, fmt_args), TRACE_STUCT( TRACE_FIELD(type1, item1, assign1) TRACE_FIELD(type2, item2, assign2) [...] ), TPRAWFMT(raw_fmt) ); Note name, proto, args, and fmt, are all identical to what TRACE_FORMAT uses. name: is the unique identifier of the trace point proto: The proto type that the trace point uses args: the args in the proto type fmt: printf format to use with the event printf tracer fmt_args: the printf argments to match fmt TRACE_STRUCT starts the ability to create a structure. Each item in the structure is defined with a TRACE_FIELD TRACE_FIELD(type, item, assign) type: the C type of item. item: the name of the item in the stucture assign: what to assign the item in the trace point callback raw_fmt is a way to pretty print the struct. It must match the order of the items are added in TRACE_STUCT An example of this would be: TRACE_EVENT_FORMAT(sched_wakeup, TPPROTO(struct rq *rq, struct task_struct *p, int success), TPARGS(rq, p, success), TPFMT("task %s:%d %s", p->comm, p->pid, success?"succeeded":"failed"), TRACE_STRUCT( TRACE_FIELD(pid_t, pid, p->pid) TRACE_FIELD(int, success, success) ), TPRAWFMT("task %d success=%d") ); This creates us a unique struct of: struct { pid_t pid; int success; }; And the way the call back would assign these values would be: entry->pid = p->pid; entry->success = success; The nice part about this is that the creation of the assignent is done via macro magic in the event tracer. Once the TRACE_EVENT_FORMAT is created, the developer will then have a faster method to record into the ring buffer. They do not need to worry about the tracer itself. The developer would only need to touch the files in include/trace/*.h Again, I would like to give special thanks to Tom Zanussi for this nice idea. Idea-from: Tom Zanussi <tzanussi@gmail.com> Signed-off-by: Steven Rostedt <srostedt@redhat.com>
2009-02-28 00:12:30 +00:00
*
* static void ftrace_event_<call>(proto)
* {
* event_trace_printk(_RET_IP_, "<call>: " <fmt>);
tracing: add raw trace point recording infrastructure Impact: lower overhead tracing The current event tracer can automatically pick up trace points that are registered with the TRACE_FORMAT macro. But it required a printf format string and parsing. Although, this adds the ability to get guaranteed information like task names and such, it took a hit in overhead processing. This processing can add about 500-1000 nanoseconds overhead, but in some cases that too is considered too much and we want to shave off as much from this overhead as possible. Tom Zanussi recently posted tracing patches to lkml that are based on a nice idea about capturing the data via C structs using STRUCT_ENTER, STRUCT_EXIT type of macros. I liked that method very much, but did not like the implementation that required a developer to add data/code in several disjoint locations. This patch extends the event_tracer macros to do a similar "raw C" approach that Tom Zanussi did. But instead of having the developers needing to tweak a bunch of code all over the place, they can do it all in one macro - preferably placed near the code that it is tracing. That makes it much more likely that tracepoints will be maintained on an ongoing basis by the code they modify. The new macro TRACE_EVENT_FORMAT is created for this approach. (Note, a developer may still utilize the more low level DECLARE_TRACE macros if they don't care about getting their traces automatically in the event tracer.) They can also use the existing TRACE_FORMAT if they don't need to code the tracepoint in C, but just want to use the convenience of printf. So if the developer wants to "hardwire" a tracepoint in the fastest possible way, and wants to acquire their data via a user space utility in a raw binary format, or wants to see it in the trace output but not sacrifice any performance, then they can implement the faster but more complex TRACE_EVENT_FORMAT macro. Here's what usage looks like: TRACE_EVENT_FORMAT(name, TPPROTO(proto), TPARGS(args), TPFMT(fmt, fmt_args), TRACE_STUCT( TRACE_FIELD(type1, item1, assign1) TRACE_FIELD(type2, item2, assign2) [...] ), TPRAWFMT(raw_fmt) ); Note name, proto, args, and fmt, are all identical to what TRACE_FORMAT uses. name: is the unique identifier of the trace point proto: The proto type that the trace point uses args: the args in the proto type fmt: printf format to use with the event printf tracer fmt_args: the printf argments to match fmt TRACE_STRUCT starts the ability to create a structure. Each item in the structure is defined with a TRACE_FIELD TRACE_FIELD(type, item, assign) type: the C type of item. item: the name of the item in the stucture assign: what to assign the item in the trace point callback raw_fmt is a way to pretty print the struct. It must match the order of the items are added in TRACE_STUCT An example of this would be: TRACE_EVENT_FORMAT(sched_wakeup, TPPROTO(struct rq *rq, struct task_struct *p, int success), TPARGS(rq, p, success), TPFMT("task %s:%d %s", p->comm, p->pid, success?"succeeded":"failed"), TRACE_STRUCT( TRACE_FIELD(pid_t, pid, p->pid) TRACE_FIELD(int, success, success) ), TPRAWFMT("task %d success=%d") ); This creates us a unique struct of: struct { pid_t pid; int success; }; And the way the call back would assign these values would be: entry->pid = p->pid; entry->success = success; The nice part about this is that the creation of the assignent is done via macro magic in the event tracer. Once the TRACE_EVENT_FORMAT is created, the developer will then have a faster method to record into the ring buffer. They do not need to worry about the tracer itself. The developer would only need to touch the files in include/trace/*.h Again, I would like to give special thanks to Tom Zanussi for this nice idea. Idea-from: Tom Zanussi <tzanussi@gmail.com> Signed-off-by: Steven Rostedt <srostedt@redhat.com>
2009-02-28 00:12:30 +00:00
* }
*
* static int ftrace_reg_event_<call>(void)
* {
* int ret;
tracing: add raw trace point recording infrastructure Impact: lower overhead tracing The current event tracer can automatically pick up trace points that are registered with the TRACE_FORMAT macro. But it required a printf format string and parsing. Although, this adds the ability to get guaranteed information like task names and such, it took a hit in overhead processing. This processing can add about 500-1000 nanoseconds overhead, but in some cases that too is considered too much and we want to shave off as much from this overhead as possible. Tom Zanussi recently posted tracing patches to lkml that are based on a nice idea about capturing the data via C structs using STRUCT_ENTER, STRUCT_EXIT type of macros. I liked that method very much, but did not like the implementation that required a developer to add data/code in several disjoint locations. This patch extends the event_tracer macros to do a similar "raw C" approach that Tom Zanussi did. But instead of having the developers needing to tweak a bunch of code all over the place, they can do it all in one macro - preferably placed near the code that it is tracing. That makes it much more likely that tracepoints will be maintained on an ongoing basis by the code they modify. The new macro TRACE_EVENT_FORMAT is created for this approach. (Note, a developer may still utilize the more low level DECLARE_TRACE macros if they don't care about getting their traces automatically in the event tracer.) They can also use the existing TRACE_FORMAT if they don't need to code the tracepoint in C, but just want to use the convenience of printf. So if the developer wants to "hardwire" a tracepoint in the fastest possible way, and wants to acquire their data via a user space utility in a raw binary format, or wants to see it in the trace output but not sacrifice any performance, then they can implement the faster but more complex TRACE_EVENT_FORMAT macro. Here's what usage looks like: TRACE_EVENT_FORMAT(name, TPPROTO(proto), TPARGS(args), TPFMT(fmt, fmt_args), TRACE_STUCT( TRACE_FIELD(type1, item1, assign1) TRACE_FIELD(type2, item2, assign2) [...] ), TPRAWFMT(raw_fmt) ); Note name, proto, args, and fmt, are all identical to what TRACE_FORMAT uses. name: is the unique identifier of the trace point proto: The proto type that the trace point uses args: the args in the proto type fmt: printf format to use with the event printf tracer fmt_args: the printf argments to match fmt TRACE_STRUCT starts the ability to create a structure. Each item in the structure is defined with a TRACE_FIELD TRACE_FIELD(type, item, assign) type: the C type of item. item: the name of the item in the stucture assign: what to assign the item in the trace point callback raw_fmt is a way to pretty print the struct. It must match the order of the items are added in TRACE_STUCT An example of this would be: TRACE_EVENT_FORMAT(sched_wakeup, TPPROTO(struct rq *rq, struct task_struct *p, int success), TPARGS(rq, p, success), TPFMT("task %s:%d %s", p->comm, p->pid, success?"succeeded":"failed"), TRACE_STRUCT( TRACE_FIELD(pid_t, pid, p->pid) TRACE_FIELD(int, success, success) ), TPRAWFMT("task %d success=%d") ); This creates us a unique struct of: struct { pid_t pid; int success; }; And the way the call back would assign these values would be: entry->pid = p->pid; entry->success = success; The nice part about this is that the creation of the assignent is done via macro magic in the event tracer. Once the TRACE_EVENT_FORMAT is created, the developer will then have a faster method to record into the ring buffer. They do not need to worry about the tracer itself. The developer would only need to touch the files in include/trace/*.h Again, I would like to give special thanks to Tom Zanussi for this nice idea. Idea-from: Tom Zanussi <tzanussi@gmail.com> Signed-off-by: Steven Rostedt <srostedt@redhat.com>
2009-02-28 00:12:30 +00:00
*
* ret = register_trace_<call>(ftrace_event_<call>);
* if (!ret)
* pr_info("event trace: Could not activate trace point "
* "probe to <call>");
* return ret;
tracing: add raw trace point recording infrastructure Impact: lower overhead tracing The current event tracer can automatically pick up trace points that are registered with the TRACE_FORMAT macro. But it required a printf format string and parsing. Although, this adds the ability to get guaranteed information like task names and such, it took a hit in overhead processing. This processing can add about 500-1000 nanoseconds overhead, but in some cases that too is considered too much and we want to shave off as much from this overhead as possible. Tom Zanussi recently posted tracing patches to lkml that are based on a nice idea about capturing the data via C structs using STRUCT_ENTER, STRUCT_EXIT type of macros. I liked that method very much, but did not like the implementation that required a developer to add data/code in several disjoint locations. This patch extends the event_tracer macros to do a similar "raw C" approach that Tom Zanussi did. But instead of having the developers needing to tweak a bunch of code all over the place, they can do it all in one macro - preferably placed near the code that it is tracing. That makes it much more likely that tracepoints will be maintained on an ongoing basis by the code they modify. The new macro TRACE_EVENT_FORMAT is created for this approach. (Note, a developer may still utilize the more low level DECLARE_TRACE macros if they don't care about getting their traces automatically in the event tracer.) They can also use the existing TRACE_FORMAT if they don't need to code the tracepoint in C, but just want to use the convenience of printf. So if the developer wants to "hardwire" a tracepoint in the fastest possible way, and wants to acquire their data via a user space utility in a raw binary format, or wants to see it in the trace output but not sacrifice any performance, then they can implement the faster but more complex TRACE_EVENT_FORMAT macro. Here's what usage looks like: TRACE_EVENT_FORMAT(name, TPPROTO(proto), TPARGS(args), TPFMT(fmt, fmt_args), TRACE_STUCT( TRACE_FIELD(type1, item1, assign1) TRACE_FIELD(type2, item2, assign2) [...] ), TPRAWFMT(raw_fmt) ); Note name, proto, args, and fmt, are all identical to what TRACE_FORMAT uses. name: is the unique identifier of the trace point proto: The proto type that the trace point uses args: the args in the proto type fmt: printf format to use with the event printf tracer fmt_args: the printf argments to match fmt TRACE_STRUCT starts the ability to create a structure. Each item in the structure is defined with a TRACE_FIELD TRACE_FIELD(type, item, assign) type: the C type of item. item: the name of the item in the stucture assign: what to assign the item in the trace point callback raw_fmt is a way to pretty print the struct. It must match the order of the items are added in TRACE_STUCT An example of this would be: TRACE_EVENT_FORMAT(sched_wakeup, TPPROTO(struct rq *rq, struct task_struct *p, int success), TPARGS(rq, p, success), TPFMT("task %s:%d %s", p->comm, p->pid, success?"succeeded":"failed"), TRACE_STRUCT( TRACE_FIELD(pid_t, pid, p->pid) TRACE_FIELD(int, success, success) ), TPRAWFMT("task %d success=%d") ); This creates us a unique struct of: struct { pid_t pid; int success; }; And the way the call back would assign these values would be: entry->pid = p->pid; entry->success = success; The nice part about this is that the creation of the assignent is done via macro magic in the event tracer. Once the TRACE_EVENT_FORMAT is created, the developer will then have a faster method to record into the ring buffer. They do not need to worry about the tracer itself. The developer would only need to touch the files in include/trace/*.h Again, I would like to give special thanks to Tom Zanussi for this nice idea. Idea-from: Tom Zanussi <tzanussi@gmail.com> Signed-off-by: Steven Rostedt <srostedt@redhat.com>
2009-02-28 00:12:30 +00:00
* }
*
* static void ftrace_unreg_event_<call>(void)
* {
* unregister_trace_<call>(ftrace_event_<call>);
tracing: add raw trace point recording infrastructure Impact: lower overhead tracing The current event tracer can automatically pick up trace points that are registered with the TRACE_FORMAT macro. But it required a printf format string and parsing. Although, this adds the ability to get guaranteed information like task names and such, it took a hit in overhead processing. This processing can add about 500-1000 nanoseconds overhead, but in some cases that too is considered too much and we want to shave off as much from this overhead as possible. Tom Zanussi recently posted tracing patches to lkml that are based on a nice idea about capturing the data via C structs using STRUCT_ENTER, STRUCT_EXIT type of macros. I liked that method very much, but did not like the implementation that required a developer to add data/code in several disjoint locations. This patch extends the event_tracer macros to do a similar "raw C" approach that Tom Zanussi did. But instead of having the developers needing to tweak a bunch of code all over the place, they can do it all in one macro - preferably placed near the code that it is tracing. That makes it much more likely that tracepoints will be maintained on an ongoing basis by the code they modify. The new macro TRACE_EVENT_FORMAT is created for this approach. (Note, a developer may still utilize the more low level DECLARE_TRACE macros if they don't care about getting their traces automatically in the event tracer.) They can also use the existing TRACE_FORMAT if they don't need to code the tracepoint in C, but just want to use the convenience of printf. So if the developer wants to "hardwire" a tracepoint in the fastest possible way, and wants to acquire their data via a user space utility in a raw binary format, or wants to see it in the trace output but not sacrifice any performance, then they can implement the faster but more complex TRACE_EVENT_FORMAT macro. Here's what usage looks like: TRACE_EVENT_FORMAT(name, TPPROTO(proto), TPARGS(args), TPFMT(fmt, fmt_args), TRACE_STUCT( TRACE_FIELD(type1, item1, assign1) TRACE_FIELD(type2, item2, assign2) [...] ), TPRAWFMT(raw_fmt) ); Note name, proto, args, and fmt, are all identical to what TRACE_FORMAT uses. name: is the unique identifier of the trace point proto: The proto type that the trace point uses args: the args in the proto type fmt: printf format to use with the event printf tracer fmt_args: the printf argments to match fmt TRACE_STRUCT starts the ability to create a structure. Each item in the structure is defined with a TRACE_FIELD TRACE_FIELD(type, item, assign) type: the C type of item. item: the name of the item in the stucture assign: what to assign the item in the trace point callback raw_fmt is a way to pretty print the struct. It must match the order of the items are added in TRACE_STUCT An example of this would be: TRACE_EVENT_FORMAT(sched_wakeup, TPPROTO(struct rq *rq, struct task_struct *p, int success), TPARGS(rq, p, success), TPFMT("task %s:%d %s", p->comm, p->pid, success?"succeeded":"failed"), TRACE_STRUCT( TRACE_FIELD(pid_t, pid, p->pid) TRACE_FIELD(int, success, success) ), TPRAWFMT("task %d success=%d") ); This creates us a unique struct of: struct { pid_t pid; int success; }; And the way the call back would assign these values would be: entry->pid = p->pid; entry->success = success; The nice part about this is that the creation of the assignent is done via macro magic in the event tracer. Once the TRACE_EVENT_FORMAT is created, the developer will then have a faster method to record into the ring buffer. They do not need to worry about the tracer itself. The developer would only need to touch the files in include/trace/*.h Again, I would like to give special thanks to Tom Zanussi for this nice idea. Idea-from: Tom Zanussi <tzanussi@gmail.com> Signed-off-by: Steven Rostedt <srostedt@redhat.com>
2009-02-28 00:12:30 +00:00
* }
*
* For those macros defined with TRACE_FORMAT:
*
* static struct ftrace_event_call __used
* __attribute__((__aligned__(4)))
* __attribute__((section("_ftrace_events"))) event_<call> = {
* .name = "<call>",
* .regfunc = ftrace_reg_event_<call>,
* .unregfunc = ftrace_unreg_event_<call>,
tracing: add raw trace point recording infrastructure Impact: lower overhead tracing The current event tracer can automatically pick up trace points that are registered with the TRACE_FORMAT macro. But it required a printf format string and parsing. Although, this adds the ability to get guaranteed information like task names and such, it took a hit in overhead processing. This processing can add about 500-1000 nanoseconds overhead, but in some cases that too is considered too much and we want to shave off as much from this overhead as possible. Tom Zanussi recently posted tracing patches to lkml that are based on a nice idea about capturing the data via C structs using STRUCT_ENTER, STRUCT_EXIT type of macros. I liked that method very much, but did not like the implementation that required a developer to add data/code in several disjoint locations. This patch extends the event_tracer macros to do a similar "raw C" approach that Tom Zanussi did. But instead of having the developers needing to tweak a bunch of code all over the place, they can do it all in one macro - preferably placed near the code that it is tracing. That makes it much more likely that tracepoints will be maintained on an ongoing basis by the code they modify. The new macro TRACE_EVENT_FORMAT is created for this approach. (Note, a developer may still utilize the more low level DECLARE_TRACE macros if they don't care about getting their traces automatically in the event tracer.) They can also use the existing TRACE_FORMAT if they don't need to code the tracepoint in C, but just want to use the convenience of printf. So if the developer wants to "hardwire" a tracepoint in the fastest possible way, and wants to acquire their data via a user space utility in a raw binary format, or wants to see it in the trace output but not sacrifice any performance, then they can implement the faster but more complex TRACE_EVENT_FORMAT macro. Here's what usage looks like: TRACE_EVENT_FORMAT(name, TPPROTO(proto), TPARGS(args), TPFMT(fmt, fmt_args), TRACE_STUCT( TRACE_FIELD(type1, item1, assign1) TRACE_FIELD(type2, item2, assign2) [...] ), TPRAWFMT(raw_fmt) ); Note name, proto, args, and fmt, are all identical to what TRACE_FORMAT uses. name: is the unique identifier of the trace point proto: The proto type that the trace point uses args: the args in the proto type fmt: printf format to use with the event printf tracer fmt_args: the printf argments to match fmt TRACE_STRUCT starts the ability to create a structure. Each item in the structure is defined with a TRACE_FIELD TRACE_FIELD(type, item, assign) type: the C type of item. item: the name of the item in the stucture assign: what to assign the item in the trace point callback raw_fmt is a way to pretty print the struct. It must match the order of the items are added in TRACE_STUCT An example of this would be: TRACE_EVENT_FORMAT(sched_wakeup, TPPROTO(struct rq *rq, struct task_struct *p, int success), TPARGS(rq, p, success), TPFMT("task %s:%d %s", p->comm, p->pid, success?"succeeded":"failed"), TRACE_STRUCT( TRACE_FIELD(pid_t, pid, p->pid) TRACE_FIELD(int, success, success) ), TPRAWFMT("task %d success=%d") ); This creates us a unique struct of: struct { pid_t pid; int success; }; And the way the call back would assign these values would be: entry->pid = p->pid; entry->success = success; The nice part about this is that the creation of the assignent is done via macro magic in the event tracer. Once the TRACE_EVENT_FORMAT is created, the developer will then have a faster method to record into the ring buffer. They do not need to worry about the tracer itself. The developer would only need to touch the files in include/trace/*.h Again, I would like to give special thanks to Tom Zanussi for this nice idea. Idea-from: Tom Zanussi <tzanussi@gmail.com> Signed-off-by: Steven Rostedt <srostedt@redhat.com>
2009-02-28 00:12:30 +00:00
* }
*
*
* For those macros defined with TRACE_EVENT:
tracing: add raw trace point recording infrastructure Impact: lower overhead tracing The current event tracer can automatically pick up trace points that are registered with the TRACE_FORMAT macro. But it required a printf format string and parsing. Although, this adds the ability to get guaranteed information like task names and such, it took a hit in overhead processing. This processing can add about 500-1000 nanoseconds overhead, but in some cases that too is considered too much and we want to shave off as much from this overhead as possible. Tom Zanussi recently posted tracing patches to lkml that are based on a nice idea about capturing the data via C structs using STRUCT_ENTER, STRUCT_EXIT type of macros. I liked that method very much, but did not like the implementation that required a developer to add data/code in several disjoint locations. This patch extends the event_tracer macros to do a similar "raw C" approach that Tom Zanussi did. But instead of having the developers needing to tweak a bunch of code all over the place, they can do it all in one macro - preferably placed near the code that it is tracing. That makes it much more likely that tracepoints will be maintained on an ongoing basis by the code they modify. The new macro TRACE_EVENT_FORMAT is created for this approach. (Note, a developer may still utilize the more low level DECLARE_TRACE macros if they don't care about getting their traces automatically in the event tracer.) They can also use the existing TRACE_FORMAT if they don't need to code the tracepoint in C, but just want to use the convenience of printf. So if the developer wants to "hardwire" a tracepoint in the fastest possible way, and wants to acquire their data via a user space utility in a raw binary format, or wants to see it in the trace output but not sacrifice any performance, then they can implement the faster but more complex TRACE_EVENT_FORMAT macro. Here's what usage looks like: TRACE_EVENT_FORMAT(name, TPPROTO(proto), TPARGS(args), TPFMT(fmt, fmt_args), TRACE_STUCT( TRACE_FIELD(type1, item1, assign1) TRACE_FIELD(type2, item2, assign2) [...] ), TPRAWFMT(raw_fmt) ); Note name, proto, args, and fmt, are all identical to what TRACE_FORMAT uses. name: is the unique identifier of the trace point proto: The proto type that the trace point uses args: the args in the proto type fmt: printf format to use with the event printf tracer fmt_args: the printf argments to match fmt TRACE_STRUCT starts the ability to create a structure. Each item in the structure is defined with a TRACE_FIELD TRACE_FIELD(type, item, assign) type: the C type of item. item: the name of the item in the stucture assign: what to assign the item in the trace point callback raw_fmt is a way to pretty print the struct. It must match the order of the items are added in TRACE_STUCT An example of this would be: TRACE_EVENT_FORMAT(sched_wakeup, TPPROTO(struct rq *rq, struct task_struct *p, int success), TPARGS(rq, p, success), TPFMT("task %s:%d %s", p->comm, p->pid, success?"succeeded":"failed"), TRACE_STRUCT( TRACE_FIELD(pid_t, pid, p->pid) TRACE_FIELD(int, success, success) ), TPRAWFMT("task %d success=%d") ); This creates us a unique struct of: struct { pid_t pid; int success; }; And the way the call back would assign these values would be: entry->pid = p->pid; entry->success = success; The nice part about this is that the creation of the assignent is done via macro magic in the event tracer. Once the TRACE_EVENT_FORMAT is created, the developer will then have a faster method to record into the ring buffer. They do not need to worry about the tracer itself. The developer would only need to touch the files in include/trace/*.h Again, I would like to give special thanks to Tom Zanussi for this nice idea. Idea-from: Tom Zanussi <tzanussi@gmail.com> Signed-off-by: Steven Rostedt <srostedt@redhat.com>
2009-02-28 00:12:30 +00:00
*
* static struct ftrace_event_call event_<call>;
*
* static void ftrace_raw_event_<call>(proto)
* {
* struct ring_buffer_event *event;
* struct ftrace_raw_<call> *entry; <-- defined in stage 1
* unsigned long irq_flags;
* int pc;
*
* local_save_flags(irq_flags);
* pc = preempt_count();
*
* event = trace_current_buffer_lock_reserve(event_<call>.id,
* sizeof(struct ftrace_raw_<call>),
* irq_flags, pc);
* if (!event)
* return;
* entry = ring_buffer_event_data(event);
*
* <assign>; <-- Here we assign the entries by the __field and
* __array macros.
tracing: add raw trace point recording infrastructure Impact: lower overhead tracing The current event tracer can automatically pick up trace points that are registered with the TRACE_FORMAT macro. But it required a printf format string and parsing. Although, this adds the ability to get guaranteed information like task names and such, it took a hit in overhead processing. This processing can add about 500-1000 nanoseconds overhead, but in some cases that too is considered too much and we want to shave off as much from this overhead as possible. Tom Zanussi recently posted tracing patches to lkml that are based on a nice idea about capturing the data via C structs using STRUCT_ENTER, STRUCT_EXIT type of macros. I liked that method very much, but did not like the implementation that required a developer to add data/code in several disjoint locations. This patch extends the event_tracer macros to do a similar "raw C" approach that Tom Zanussi did. But instead of having the developers needing to tweak a bunch of code all over the place, they can do it all in one macro - preferably placed near the code that it is tracing. That makes it much more likely that tracepoints will be maintained on an ongoing basis by the code they modify. The new macro TRACE_EVENT_FORMAT is created for this approach. (Note, a developer may still utilize the more low level DECLARE_TRACE macros if they don't care about getting their traces automatically in the event tracer.) They can also use the existing TRACE_FORMAT if they don't need to code the tracepoint in C, but just want to use the convenience of printf. So if the developer wants to "hardwire" a tracepoint in the fastest possible way, and wants to acquire their data via a user space utility in a raw binary format, or wants to see it in the trace output but not sacrifice any performance, then they can implement the faster but more complex TRACE_EVENT_FORMAT macro. Here's what usage looks like: TRACE_EVENT_FORMAT(name, TPPROTO(proto), TPARGS(args), TPFMT(fmt, fmt_args), TRACE_STUCT( TRACE_FIELD(type1, item1, assign1) TRACE_FIELD(type2, item2, assign2) [...] ), TPRAWFMT(raw_fmt) ); Note name, proto, args, and fmt, are all identical to what TRACE_FORMAT uses. name: is the unique identifier of the trace point proto: The proto type that the trace point uses args: the args in the proto type fmt: printf format to use with the event printf tracer fmt_args: the printf argments to match fmt TRACE_STRUCT starts the ability to create a structure. Each item in the structure is defined with a TRACE_FIELD TRACE_FIELD(type, item, assign) type: the C type of item. item: the name of the item in the stucture assign: what to assign the item in the trace point callback raw_fmt is a way to pretty print the struct. It must match the order of the items are added in TRACE_STUCT An example of this would be: TRACE_EVENT_FORMAT(sched_wakeup, TPPROTO(struct rq *rq, struct task_struct *p, int success), TPARGS(rq, p, success), TPFMT("task %s:%d %s", p->comm, p->pid, success?"succeeded":"failed"), TRACE_STRUCT( TRACE_FIELD(pid_t, pid, p->pid) TRACE_FIELD(int, success, success) ), TPRAWFMT("task %d success=%d") ); This creates us a unique struct of: struct { pid_t pid; int success; }; And the way the call back would assign these values would be: entry->pid = p->pid; entry->success = success; The nice part about this is that the creation of the assignent is done via macro magic in the event tracer. Once the TRACE_EVENT_FORMAT is created, the developer will then have a faster method to record into the ring buffer. They do not need to worry about the tracer itself. The developer would only need to touch the files in include/trace/*.h Again, I would like to give special thanks to Tom Zanussi for this nice idea. Idea-from: Tom Zanussi <tzanussi@gmail.com> Signed-off-by: Steven Rostedt <srostedt@redhat.com>
2009-02-28 00:12:30 +00:00
*
* trace_current_buffer_unlock_commit(event, irq_flags, pc);
tracing: add raw trace point recording infrastructure Impact: lower overhead tracing The current event tracer can automatically pick up trace points that are registered with the TRACE_FORMAT macro. But it required a printf format string and parsing. Although, this adds the ability to get guaranteed information like task names and such, it took a hit in overhead processing. This processing can add about 500-1000 nanoseconds overhead, but in some cases that too is considered too much and we want to shave off as much from this overhead as possible. Tom Zanussi recently posted tracing patches to lkml that are based on a nice idea about capturing the data via C structs using STRUCT_ENTER, STRUCT_EXIT type of macros. I liked that method very much, but did not like the implementation that required a developer to add data/code in several disjoint locations. This patch extends the event_tracer macros to do a similar "raw C" approach that Tom Zanussi did. But instead of having the developers needing to tweak a bunch of code all over the place, they can do it all in one macro - preferably placed near the code that it is tracing. That makes it much more likely that tracepoints will be maintained on an ongoing basis by the code they modify. The new macro TRACE_EVENT_FORMAT is created for this approach. (Note, a developer may still utilize the more low level DECLARE_TRACE macros if they don't care about getting their traces automatically in the event tracer.) They can also use the existing TRACE_FORMAT if they don't need to code the tracepoint in C, but just want to use the convenience of printf. So if the developer wants to "hardwire" a tracepoint in the fastest possible way, and wants to acquire their data via a user space utility in a raw binary format, or wants to see it in the trace output but not sacrifice any performance, then they can implement the faster but more complex TRACE_EVENT_FORMAT macro. Here's what usage looks like: TRACE_EVENT_FORMAT(name, TPPROTO(proto), TPARGS(args), TPFMT(fmt, fmt_args), TRACE_STUCT( TRACE_FIELD(type1, item1, assign1) TRACE_FIELD(type2, item2, assign2) [...] ), TPRAWFMT(raw_fmt) ); Note name, proto, args, and fmt, are all identical to what TRACE_FORMAT uses. name: is the unique identifier of the trace point proto: The proto type that the trace point uses args: the args in the proto type fmt: printf format to use with the event printf tracer fmt_args: the printf argments to match fmt TRACE_STRUCT starts the ability to create a structure. Each item in the structure is defined with a TRACE_FIELD TRACE_FIELD(type, item, assign) type: the C type of item. item: the name of the item in the stucture assign: what to assign the item in the trace point callback raw_fmt is a way to pretty print the struct. It must match the order of the items are added in TRACE_STUCT An example of this would be: TRACE_EVENT_FORMAT(sched_wakeup, TPPROTO(struct rq *rq, struct task_struct *p, int success), TPARGS(rq, p, success), TPFMT("task %s:%d %s", p->comm, p->pid, success?"succeeded":"failed"), TRACE_STRUCT( TRACE_FIELD(pid_t, pid, p->pid) TRACE_FIELD(int, success, success) ), TPRAWFMT("task %d success=%d") ); This creates us a unique struct of: struct { pid_t pid; int success; }; And the way the call back would assign these values would be: entry->pid = p->pid; entry->success = success; The nice part about this is that the creation of the assignent is done via macro magic in the event tracer. Once the TRACE_EVENT_FORMAT is created, the developer will then have a faster method to record into the ring buffer. They do not need to worry about the tracer itself. The developer would only need to touch the files in include/trace/*.h Again, I would like to give special thanks to Tom Zanussi for this nice idea. Idea-from: Tom Zanussi <tzanussi@gmail.com> Signed-off-by: Steven Rostedt <srostedt@redhat.com>
2009-02-28 00:12:30 +00:00
* }
*
* static int ftrace_raw_reg_event_<call>(void)
* {
* int ret;
tracing: add raw trace point recording infrastructure Impact: lower overhead tracing The current event tracer can automatically pick up trace points that are registered with the TRACE_FORMAT macro. But it required a printf format string and parsing. Although, this adds the ability to get guaranteed information like task names and such, it took a hit in overhead processing. This processing can add about 500-1000 nanoseconds overhead, but in some cases that too is considered too much and we want to shave off as much from this overhead as possible. Tom Zanussi recently posted tracing patches to lkml that are based on a nice idea about capturing the data via C structs using STRUCT_ENTER, STRUCT_EXIT type of macros. I liked that method very much, but did not like the implementation that required a developer to add data/code in several disjoint locations. This patch extends the event_tracer macros to do a similar "raw C" approach that Tom Zanussi did. But instead of having the developers needing to tweak a bunch of code all over the place, they can do it all in one macro - preferably placed near the code that it is tracing. That makes it much more likely that tracepoints will be maintained on an ongoing basis by the code they modify. The new macro TRACE_EVENT_FORMAT is created for this approach. (Note, a developer may still utilize the more low level DECLARE_TRACE macros if they don't care about getting their traces automatically in the event tracer.) They can also use the existing TRACE_FORMAT if they don't need to code the tracepoint in C, but just want to use the convenience of printf. So if the developer wants to "hardwire" a tracepoint in the fastest possible way, and wants to acquire their data via a user space utility in a raw binary format, or wants to see it in the trace output but not sacrifice any performance, then they can implement the faster but more complex TRACE_EVENT_FORMAT macro. Here's what usage looks like: TRACE_EVENT_FORMAT(name, TPPROTO(proto), TPARGS(args), TPFMT(fmt, fmt_args), TRACE_STUCT( TRACE_FIELD(type1, item1, assign1) TRACE_FIELD(type2, item2, assign2) [...] ), TPRAWFMT(raw_fmt) ); Note name, proto, args, and fmt, are all identical to what TRACE_FORMAT uses. name: is the unique identifier of the trace point proto: The proto type that the trace point uses args: the args in the proto type fmt: printf format to use with the event printf tracer fmt_args: the printf argments to match fmt TRACE_STRUCT starts the ability to create a structure. Each item in the structure is defined with a TRACE_FIELD TRACE_FIELD(type, item, assign) type: the C type of item. item: the name of the item in the stucture assign: what to assign the item in the trace point callback raw_fmt is a way to pretty print the struct. It must match the order of the items are added in TRACE_STUCT An example of this would be: TRACE_EVENT_FORMAT(sched_wakeup, TPPROTO(struct rq *rq, struct task_struct *p, int success), TPARGS(rq, p, success), TPFMT("task %s:%d %s", p->comm, p->pid, success?"succeeded":"failed"), TRACE_STRUCT( TRACE_FIELD(pid_t, pid, p->pid) TRACE_FIELD(int, success, success) ), TPRAWFMT("task %d success=%d") ); This creates us a unique struct of: struct { pid_t pid; int success; }; And the way the call back would assign these values would be: entry->pid = p->pid; entry->success = success; The nice part about this is that the creation of the assignent is done via macro magic in the event tracer. Once the TRACE_EVENT_FORMAT is created, the developer will then have a faster method to record into the ring buffer. They do not need to worry about the tracer itself. The developer would only need to touch the files in include/trace/*.h Again, I would like to give special thanks to Tom Zanussi for this nice idea. Idea-from: Tom Zanussi <tzanussi@gmail.com> Signed-off-by: Steven Rostedt <srostedt@redhat.com>
2009-02-28 00:12:30 +00:00
*
* ret = register_trace_<call>(ftrace_raw_event_<call>);
* if (!ret)
* pr_info("event trace: Could not activate trace point "
* "probe to <call>");
* return ret;
tracing: add raw trace point recording infrastructure Impact: lower overhead tracing The current event tracer can automatically pick up trace points that are registered with the TRACE_FORMAT macro. But it required a printf format string and parsing. Although, this adds the ability to get guaranteed information like task names and such, it took a hit in overhead processing. This processing can add about 500-1000 nanoseconds overhead, but in some cases that too is considered too much and we want to shave off as much from this overhead as possible. Tom Zanussi recently posted tracing patches to lkml that are based on a nice idea about capturing the data via C structs using STRUCT_ENTER, STRUCT_EXIT type of macros. I liked that method very much, but did not like the implementation that required a developer to add data/code in several disjoint locations. This patch extends the event_tracer macros to do a similar "raw C" approach that Tom Zanussi did. But instead of having the developers needing to tweak a bunch of code all over the place, they can do it all in one macro - preferably placed near the code that it is tracing. That makes it much more likely that tracepoints will be maintained on an ongoing basis by the code they modify. The new macro TRACE_EVENT_FORMAT is created for this approach. (Note, a developer may still utilize the more low level DECLARE_TRACE macros if they don't care about getting their traces automatically in the event tracer.) They can also use the existing TRACE_FORMAT if they don't need to code the tracepoint in C, but just want to use the convenience of printf. So if the developer wants to "hardwire" a tracepoint in the fastest possible way, and wants to acquire their data via a user space utility in a raw binary format, or wants to see it in the trace output but not sacrifice any performance, then they can implement the faster but more complex TRACE_EVENT_FORMAT macro. Here's what usage looks like: TRACE_EVENT_FORMAT(name, TPPROTO(proto), TPARGS(args), TPFMT(fmt, fmt_args), TRACE_STUCT( TRACE_FIELD(type1, item1, assign1) TRACE_FIELD(type2, item2, assign2) [...] ), TPRAWFMT(raw_fmt) ); Note name, proto, args, and fmt, are all identical to what TRACE_FORMAT uses. name: is the unique identifier of the trace point proto: The proto type that the trace point uses args: the args in the proto type fmt: printf format to use with the event printf tracer fmt_args: the printf argments to match fmt TRACE_STRUCT starts the ability to create a structure. Each item in the structure is defined with a TRACE_FIELD TRACE_FIELD(type, item, assign) type: the C type of item. item: the name of the item in the stucture assign: what to assign the item in the trace point callback raw_fmt is a way to pretty print the struct. It must match the order of the items are added in TRACE_STUCT An example of this would be: TRACE_EVENT_FORMAT(sched_wakeup, TPPROTO(struct rq *rq, struct task_struct *p, int success), TPARGS(rq, p, success), TPFMT("task %s:%d %s", p->comm, p->pid, success?"succeeded":"failed"), TRACE_STRUCT( TRACE_FIELD(pid_t, pid, p->pid) TRACE_FIELD(int, success, success) ), TPRAWFMT("task %d success=%d") ); This creates us a unique struct of: struct { pid_t pid; int success; }; And the way the call back would assign these values would be: entry->pid = p->pid; entry->success = success; The nice part about this is that the creation of the assignent is done via macro magic in the event tracer. Once the TRACE_EVENT_FORMAT is created, the developer will then have a faster method to record into the ring buffer. They do not need to worry about the tracer itself. The developer would only need to touch the files in include/trace/*.h Again, I would like to give special thanks to Tom Zanussi for this nice idea. Idea-from: Tom Zanussi <tzanussi@gmail.com> Signed-off-by: Steven Rostedt <srostedt@redhat.com>
2009-02-28 00:12:30 +00:00
* }
*
* static void ftrace_unreg_event_<call>(void)
* {
* unregister_trace_<call>(ftrace_raw_event_<call>);
tracing: add raw trace point recording infrastructure Impact: lower overhead tracing The current event tracer can automatically pick up trace points that are registered with the TRACE_FORMAT macro. But it required a printf format string and parsing. Although, this adds the ability to get guaranteed information like task names and such, it took a hit in overhead processing. This processing can add about 500-1000 nanoseconds overhead, but in some cases that too is considered too much and we want to shave off as much from this overhead as possible. Tom Zanussi recently posted tracing patches to lkml that are based on a nice idea about capturing the data via C structs using STRUCT_ENTER, STRUCT_EXIT type of macros. I liked that method very much, but did not like the implementation that required a developer to add data/code in several disjoint locations. This patch extends the event_tracer macros to do a similar "raw C" approach that Tom Zanussi did. But instead of having the developers needing to tweak a bunch of code all over the place, they can do it all in one macro - preferably placed near the code that it is tracing. That makes it much more likely that tracepoints will be maintained on an ongoing basis by the code they modify. The new macro TRACE_EVENT_FORMAT is created for this approach. (Note, a developer may still utilize the more low level DECLARE_TRACE macros if they don't care about getting their traces automatically in the event tracer.) They can also use the existing TRACE_FORMAT if they don't need to code the tracepoint in C, but just want to use the convenience of printf. So if the developer wants to "hardwire" a tracepoint in the fastest possible way, and wants to acquire their data via a user space utility in a raw binary format, or wants to see it in the trace output but not sacrifice any performance, then they can implement the faster but more complex TRACE_EVENT_FORMAT macro. Here's what usage looks like: TRACE_EVENT_FORMAT(name, TPPROTO(proto), TPARGS(args), TPFMT(fmt, fmt_args), TRACE_STUCT( TRACE_FIELD(type1, item1, assign1) TRACE_FIELD(type2, item2, assign2) [...] ), TPRAWFMT(raw_fmt) ); Note name, proto, args, and fmt, are all identical to what TRACE_FORMAT uses. name: is the unique identifier of the trace point proto: The proto type that the trace point uses args: the args in the proto type fmt: printf format to use with the event printf tracer fmt_args: the printf argments to match fmt TRACE_STRUCT starts the ability to create a structure. Each item in the structure is defined with a TRACE_FIELD TRACE_FIELD(type, item, assign) type: the C type of item. item: the name of the item in the stucture assign: what to assign the item in the trace point callback raw_fmt is a way to pretty print the struct. It must match the order of the items are added in TRACE_STUCT An example of this would be: TRACE_EVENT_FORMAT(sched_wakeup, TPPROTO(struct rq *rq, struct task_struct *p, int success), TPARGS(rq, p, success), TPFMT("task %s:%d %s", p->comm, p->pid, success?"succeeded":"failed"), TRACE_STRUCT( TRACE_FIELD(pid_t, pid, p->pid) TRACE_FIELD(int, success, success) ), TPRAWFMT("task %d success=%d") ); This creates us a unique struct of: struct { pid_t pid; int success; }; And the way the call back would assign these values would be: entry->pid = p->pid; entry->success = success; The nice part about this is that the creation of the assignent is done via macro magic in the event tracer. Once the TRACE_EVENT_FORMAT is created, the developer will then have a faster method to record into the ring buffer. They do not need to worry about the tracer itself. The developer would only need to touch the files in include/trace/*.h Again, I would like to give special thanks to Tom Zanussi for this nice idea. Idea-from: Tom Zanussi <tzanussi@gmail.com> Signed-off-by: Steven Rostedt <srostedt@redhat.com>
2009-02-28 00:12:30 +00:00
* }
*
* static struct trace_event ftrace_event_type_<call> = {
* .trace = ftrace_raw_output_<call>, <-- stage 2
tracing: add raw trace point recording infrastructure Impact: lower overhead tracing The current event tracer can automatically pick up trace points that are registered with the TRACE_FORMAT macro. But it required a printf format string and parsing. Although, this adds the ability to get guaranteed information like task names and such, it took a hit in overhead processing. This processing can add about 500-1000 nanoseconds overhead, but in some cases that too is considered too much and we want to shave off as much from this overhead as possible. Tom Zanussi recently posted tracing patches to lkml that are based on a nice idea about capturing the data via C structs using STRUCT_ENTER, STRUCT_EXIT type of macros. I liked that method very much, but did not like the implementation that required a developer to add data/code in several disjoint locations. This patch extends the event_tracer macros to do a similar "raw C" approach that Tom Zanussi did. But instead of having the developers needing to tweak a bunch of code all over the place, they can do it all in one macro - preferably placed near the code that it is tracing. That makes it much more likely that tracepoints will be maintained on an ongoing basis by the code they modify. The new macro TRACE_EVENT_FORMAT is created for this approach. (Note, a developer may still utilize the more low level DECLARE_TRACE macros if they don't care about getting their traces automatically in the event tracer.) They can also use the existing TRACE_FORMAT if they don't need to code the tracepoint in C, but just want to use the convenience of printf. So if the developer wants to "hardwire" a tracepoint in the fastest possible way, and wants to acquire their data via a user space utility in a raw binary format, or wants to see it in the trace output but not sacrifice any performance, then they can implement the faster but more complex TRACE_EVENT_FORMAT macro. Here's what usage looks like: TRACE_EVENT_FORMAT(name, TPPROTO(proto), TPARGS(args), TPFMT(fmt, fmt_args), TRACE_STUCT( TRACE_FIELD(type1, item1, assign1) TRACE_FIELD(type2, item2, assign2) [...] ), TPRAWFMT(raw_fmt) ); Note name, proto, args, and fmt, are all identical to what TRACE_FORMAT uses. name: is the unique identifier of the trace point proto: The proto type that the trace point uses args: the args in the proto type fmt: printf format to use with the event printf tracer fmt_args: the printf argments to match fmt TRACE_STRUCT starts the ability to create a structure. Each item in the structure is defined with a TRACE_FIELD TRACE_FIELD(type, item, assign) type: the C type of item. item: the name of the item in the stucture assign: what to assign the item in the trace point callback raw_fmt is a way to pretty print the struct. It must match the order of the items are added in TRACE_STUCT An example of this would be: TRACE_EVENT_FORMAT(sched_wakeup, TPPROTO(struct rq *rq, struct task_struct *p, int success), TPARGS(rq, p, success), TPFMT("task %s:%d %s", p->comm, p->pid, success?"succeeded":"failed"), TRACE_STRUCT( TRACE_FIELD(pid_t, pid, p->pid) TRACE_FIELD(int, success, success) ), TPRAWFMT("task %d success=%d") ); This creates us a unique struct of: struct { pid_t pid; int success; }; And the way the call back would assign these values would be: entry->pid = p->pid; entry->success = success; The nice part about this is that the creation of the assignent is done via macro magic in the event tracer. Once the TRACE_EVENT_FORMAT is created, the developer will then have a faster method to record into the ring buffer. They do not need to worry about the tracer itself. The developer would only need to touch the files in include/trace/*.h Again, I would like to give special thanks to Tom Zanussi for this nice idea. Idea-from: Tom Zanussi <tzanussi@gmail.com> Signed-off-by: Steven Rostedt <srostedt@redhat.com>
2009-02-28 00:12:30 +00:00
* };
*
* static int ftrace_raw_init_event_<call>(void)
* {
* int id;
tracing: add raw trace point recording infrastructure Impact: lower overhead tracing The current event tracer can automatically pick up trace points that are registered with the TRACE_FORMAT macro. But it required a printf format string and parsing. Although, this adds the ability to get guaranteed information like task names and such, it took a hit in overhead processing. This processing can add about 500-1000 nanoseconds overhead, but in some cases that too is considered too much and we want to shave off as much from this overhead as possible. Tom Zanussi recently posted tracing patches to lkml that are based on a nice idea about capturing the data via C structs using STRUCT_ENTER, STRUCT_EXIT type of macros. I liked that method very much, but did not like the implementation that required a developer to add data/code in several disjoint locations. This patch extends the event_tracer macros to do a similar "raw C" approach that Tom Zanussi did. But instead of having the developers needing to tweak a bunch of code all over the place, they can do it all in one macro - preferably placed near the code that it is tracing. That makes it much more likely that tracepoints will be maintained on an ongoing basis by the code they modify. The new macro TRACE_EVENT_FORMAT is created for this approach. (Note, a developer may still utilize the more low level DECLARE_TRACE macros if they don't care about getting their traces automatically in the event tracer.) They can also use the existing TRACE_FORMAT if they don't need to code the tracepoint in C, but just want to use the convenience of printf. So if the developer wants to "hardwire" a tracepoint in the fastest possible way, and wants to acquire their data via a user space utility in a raw binary format, or wants to see it in the trace output but not sacrifice any performance, then they can implement the faster but more complex TRACE_EVENT_FORMAT macro. Here's what usage looks like: TRACE_EVENT_FORMAT(name, TPPROTO(proto), TPARGS(args), TPFMT(fmt, fmt_args), TRACE_STUCT( TRACE_FIELD(type1, item1, assign1) TRACE_FIELD(type2, item2, assign2) [...] ), TPRAWFMT(raw_fmt) ); Note name, proto, args, and fmt, are all identical to what TRACE_FORMAT uses. name: is the unique identifier of the trace point proto: The proto type that the trace point uses args: the args in the proto type fmt: printf format to use with the event printf tracer fmt_args: the printf argments to match fmt TRACE_STRUCT starts the ability to create a structure. Each item in the structure is defined with a TRACE_FIELD TRACE_FIELD(type, item, assign) type: the C type of item. item: the name of the item in the stucture assign: what to assign the item in the trace point callback raw_fmt is a way to pretty print the struct. It must match the order of the items are added in TRACE_STUCT An example of this would be: TRACE_EVENT_FORMAT(sched_wakeup, TPPROTO(struct rq *rq, struct task_struct *p, int success), TPARGS(rq, p, success), TPFMT("task %s:%d %s", p->comm, p->pid, success?"succeeded":"failed"), TRACE_STRUCT( TRACE_FIELD(pid_t, pid, p->pid) TRACE_FIELD(int, success, success) ), TPRAWFMT("task %d success=%d") ); This creates us a unique struct of: struct { pid_t pid; int success; }; And the way the call back would assign these values would be: entry->pid = p->pid; entry->success = success; The nice part about this is that the creation of the assignent is done via macro magic in the event tracer. Once the TRACE_EVENT_FORMAT is created, the developer will then have a faster method to record into the ring buffer. They do not need to worry about the tracer itself. The developer would only need to touch the files in include/trace/*.h Again, I would like to give special thanks to Tom Zanussi for this nice idea. Idea-from: Tom Zanussi <tzanussi@gmail.com> Signed-off-by: Steven Rostedt <srostedt@redhat.com>
2009-02-28 00:12:30 +00:00
*
* id = register_ftrace_event(&ftrace_event_type_<call>);
* if (!id)
* return -ENODEV;
* event_<call>.id = id;
* return 0;
tracing: add raw trace point recording infrastructure Impact: lower overhead tracing The current event tracer can automatically pick up trace points that are registered with the TRACE_FORMAT macro. But it required a printf format string and parsing. Although, this adds the ability to get guaranteed information like task names and such, it took a hit in overhead processing. This processing can add about 500-1000 nanoseconds overhead, but in some cases that too is considered too much and we want to shave off as much from this overhead as possible. Tom Zanussi recently posted tracing patches to lkml that are based on a nice idea about capturing the data via C structs using STRUCT_ENTER, STRUCT_EXIT type of macros. I liked that method very much, but did not like the implementation that required a developer to add data/code in several disjoint locations. This patch extends the event_tracer macros to do a similar "raw C" approach that Tom Zanussi did. But instead of having the developers needing to tweak a bunch of code all over the place, they can do it all in one macro - preferably placed near the code that it is tracing. That makes it much more likely that tracepoints will be maintained on an ongoing basis by the code they modify. The new macro TRACE_EVENT_FORMAT is created for this approach. (Note, a developer may still utilize the more low level DECLARE_TRACE macros if they don't care about getting their traces automatically in the event tracer.) They can also use the existing TRACE_FORMAT if they don't need to code the tracepoint in C, but just want to use the convenience of printf. So if the developer wants to "hardwire" a tracepoint in the fastest possible way, and wants to acquire their data via a user space utility in a raw binary format, or wants to see it in the trace output but not sacrifice any performance, then they can implement the faster but more complex TRACE_EVENT_FORMAT macro. Here's what usage looks like: TRACE_EVENT_FORMAT(name, TPPROTO(proto), TPARGS(args), TPFMT(fmt, fmt_args), TRACE_STUCT( TRACE_FIELD(type1, item1, assign1) TRACE_FIELD(type2, item2, assign2) [...] ), TPRAWFMT(raw_fmt) ); Note name, proto, args, and fmt, are all identical to what TRACE_FORMAT uses. name: is the unique identifier of the trace point proto: The proto type that the trace point uses args: the args in the proto type fmt: printf format to use with the event printf tracer fmt_args: the printf argments to match fmt TRACE_STRUCT starts the ability to create a structure. Each item in the structure is defined with a TRACE_FIELD TRACE_FIELD(type, item, assign) type: the C type of item. item: the name of the item in the stucture assign: what to assign the item in the trace point callback raw_fmt is a way to pretty print the struct. It must match the order of the items are added in TRACE_STUCT An example of this would be: TRACE_EVENT_FORMAT(sched_wakeup, TPPROTO(struct rq *rq, struct task_struct *p, int success), TPARGS(rq, p, success), TPFMT("task %s:%d %s", p->comm, p->pid, success?"succeeded":"failed"), TRACE_STRUCT( TRACE_FIELD(pid_t, pid, p->pid) TRACE_FIELD(int, success, success) ), TPRAWFMT("task %d success=%d") ); This creates us a unique struct of: struct { pid_t pid; int success; }; And the way the call back would assign these values would be: entry->pid = p->pid; entry->success = success; The nice part about this is that the creation of the assignent is done via macro magic in the event tracer. Once the TRACE_EVENT_FORMAT is created, the developer will then have a faster method to record into the ring buffer. They do not need to worry about the tracer itself. The developer would only need to touch the files in include/trace/*.h Again, I would like to give special thanks to Tom Zanussi for this nice idea. Idea-from: Tom Zanussi <tzanussi@gmail.com> Signed-off-by: Steven Rostedt <srostedt@redhat.com>
2009-02-28 00:12:30 +00:00
* }
*
* static struct ftrace_event_call __used
* __attribute__((__aligned__(4)))
* __attribute__((section("_ftrace_events"))) event_<call> = {
* .name = "<call>",
* .system = "<system>",
* .raw_init = ftrace_raw_init_event_<call>,
* .regfunc = ftrace_reg_event_<call>,
* .unregfunc = ftrace_unreg_event_<call>,
* .show_format = ftrace_format_<call>,
tracing: add raw trace point recording infrastructure Impact: lower overhead tracing The current event tracer can automatically pick up trace points that are registered with the TRACE_FORMAT macro. But it required a printf format string and parsing. Although, this adds the ability to get guaranteed information like task names and such, it took a hit in overhead processing. This processing can add about 500-1000 nanoseconds overhead, but in some cases that too is considered too much and we want to shave off as much from this overhead as possible. Tom Zanussi recently posted tracing patches to lkml that are based on a nice idea about capturing the data via C structs using STRUCT_ENTER, STRUCT_EXIT type of macros. I liked that method very much, but did not like the implementation that required a developer to add data/code in several disjoint locations. This patch extends the event_tracer macros to do a similar "raw C" approach that Tom Zanussi did. But instead of having the developers needing to tweak a bunch of code all over the place, they can do it all in one macro - preferably placed near the code that it is tracing. That makes it much more likely that tracepoints will be maintained on an ongoing basis by the code they modify. The new macro TRACE_EVENT_FORMAT is created for this approach. (Note, a developer may still utilize the more low level DECLARE_TRACE macros if they don't care about getting their traces automatically in the event tracer.) They can also use the existing TRACE_FORMAT if they don't need to code the tracepoint in C, but just want to use the convenience of printf. So if the developer wants to "hardwire" a tracepoint in the fastest possible way, and wants to acquire their data via a user space utility in a raw binary format, or wants to see it in the trace output but not sacrifice any performance, then they can implement the faster but more complex TRACE_EVENT_FORMAT macro. Here's what usage looks like: TRACE_EVENT_FORMAT(name, TPPROTO(proto), TPARGS(args), TPFMT(fmt, fmt_args), TRACE_STUCT( TRACE_FIELD(type1, item1, assign1) TRACE_FIELD(type2, item2, assign2) [...] ), TPRAWFMT(raw_fmt) ); Note name, proto, args, and fmt, are all identical to what TRACE_FORMAT uses. name: is the unique identifier of the trace point proto: The proto type that the trace point uses args: the args in the proto type fmt: printf format to use with the event printf tracer fmt_args: the printf argments to match fmt TRACE_STRUCT starts the ability to create a structure. Each item in the structure is defined with a TRACE_FIELD TRACE_FIELD(type, item, assign) type: the C type of item. item: the name of the item in the stucture assign: what to assign the item in the trace point callback raw_fmt is a way to pretty print the struct. It must match the order of the items are added in TRACE_STUCT An example of this would be: TRACE_EVENT_FORMAT(sched_wakeup, TPPROTO(struct rq *rq, struct task_struct *p, int success), TPARGS(rq, p, success), TPFMT("task %s:%d %s", p->comm, p->pid, success?"succeeded":"failed"), TRACE_STRUCT( TRACE_FIELD(pid_t, pid, p->pid) TRACE_FIELD(int, success, success) ), TPRAWFMT("task %d success=%d") ); This creates us a unique struct of: struct { pid_t pid; int success; }; And the way the call back would assign these values would be: entry->pid = p->pid; entry->success = success; The nice part about this is that the creation of the assignent is done via macro magic in the event tracer. Once the TRACE_EVENT_FORMAT is created, the developer will then have a faster method to record into the ring buffer. They do not need to worry about the tracer itself. The developer would only need to touch the files in include/trace/*.h Again, I would like to give special thanks to Tom Zanussi for this nice idea. Idea-from: Tom Zanussi <tzanussi@gmail.com> Signed-off-by: Steven Rostedt <srostedt@redhat.com>
2009-02-28 00:12:30 +00:00
* }
*
*/
#undef TP_FMT
#define TP_FMT(fmt, args...) fmt "\n", ##args
tracing: add raw trace point recording infrastructure Impact: lower overhead tracing The current event tracer can automatically pick up trace points that are registered with the TRACE_FORMAT macro. But it required a printf format string and parsing. Although, this adds the ability to get guaranteed information like task names and such, it took a hit in overhead processing. This processing can add about 500-1000 nanoseconds overhead, but in some cases that too is considered too much and we want to shave off as much from this overhead as possible. Tom Zanussi recently posted tracing patches to lkml that are based on a nice idea about capturing the data via C structs using STRUCT_ENTER, STRUCT_EXIT type of macros. I liked that method very much, but did not like the implementation that required a developer to add data/code in several disjoint locations. This patch extends the event_tracer macros to do a similar "raw C" approach that Tom Zanussi did. But instead of having the developers needing to tweak a bunch of code all over the place, they can do it all in one macro - preferably placed near the code that it is tracing. That makes it much more likely that tracepoints will be maintained on an ongoing basis by the code they modify. The new macro TRACE_EVENT_FORMAT is created for this approach. (Note, a developer may still utilize the more low level DECLARE_TRACE macros if they don't care about getting their traces automatically in the event tracer.) They can also use the existing TRACE_FORMAT if they don't need to code the tracepoint in C, but just want to use the convenience of printf. So if the developer wants to "hardwire" a tracepoint in the fastest possible way, and wants to acquire their data via a user space utility in a raw binary format, or wants to see it in the trace output but not sacrifice any performance, then they can implement the faster but more complex TRACE_EVENT_FORMAT macro. Here's what usage looks like: TRACE_EVENT_FORMAT(name, TPPROTO(proto), TPARGS(args), TPFMT(fmt, fmt_args), TRACE_STUCT( TRACE_FIELD(type1, item1, assign1) TRACE_FIELD(type2, item2, assign2) [...] ), TPRAWFMT(raw_fmt) ); Note name, proto, args, and fmt, are all identical to what TRACE_FORMAT uses. name: is the unique identifier of the trace point proto: The proto type that the trace point uses args: the args in the proto type fmt: printf format to use with the event printf tracer fmt_args: the printf argments to match fmt TRACE_STRUCT starts the ability to create a structure. Each item in the structure is defined with a TRACE_FIELD TRACE_FIELD(type, item, assign) type: the C type of item. item: the name of the item in the stucture assign: what to assign the item in the trace point callback raw_fmt is a way to pretty print the struct. It must match the order of the items are added in TRACE_STUCT An example of this would be: TRACE_EVENT_FORMAT(sched_wakeup, TPPROTO(struct rq *rq, struct task_struct *p, int success), TPARGS(rq, p, success), TPFMT("task %s:%d %s", p->comm, p->pid, success?"succeeded":"failed"), TRACE_STRUCT( TRACE_FIELD(pid_t, pid, p->pid) TRACE_FIELD(int, success, success) ), TPRAWFMT("task %d success=%d") ); This creates us a unique struct of: struct { pid_t pid; int success; }; And the way the call back would assign these values would be: entry->pid = p->pid; entry->success = success; The nice part about this is that the creation of the assignent is done via macro magic in the event tracer. Once the TRACE_EVENT_FORMAT is created, the developer will then have a faster method to record into the ring buffer. They do not need to worry about the tracer itself. The developer would only need to touch the files in include/trace/*.h Again, I would like to give special thanks to Tom Zanussi for this nice idea. Idea-from: Tom Zanussi <tzanussi@gmail.com> Signed-off-by: Steven Rostedt <srostedt@redhat.com>
2009-02-28 00:12:30 +00:00
#ifdef CONFIG_EVENT_PROFILE
#define _TRACE_PROFILE(call, proto, args) \
static void ftrace_profile_##call(proto) \
{ \
extern void perf_tpcounter_event(int); \
perf_tpcounter_event(event_##call.id); \
} \
\
static int ftrace_profile_enable_##call(struct ftrace_event_call *call) \
{ \
int ret = 0; \
\
if (!atomic_inc_return(&call->profile_count)) \
ret = register_trace_##call(ftrace_profile_##call); \
\
return ret; \
} \
\
static void ftrace_profile_disable_##call(struct ftrace_event_call *call) \
{ \
if (atomic_add_negative(-1, &call->profile_count)) \
unregister_trace_##call(ftrace_profile_##call); \
}
#define _TRACE_PROFILE_INIT(call) \
.profile_count = ATOMIC_INIT(-1), \
.profile_enable = ftrace_profile_enable_##call, \
.profile_disable = ftrace_profile_disable_##call,
#else
#define _TRACE_PROFILE(call, proto, args)
#define _TRACE_PROFILE_INIT(call)
#endif
tracing: add raw trace point recording infrastructure Impact: lower overhead tracing The current event tracer can automatically pick up trace points that are registered with the TRACE_FORMAT macro. But it required a printf format string and parsing. Although, this adds the ability to get guaranteed information like task names and such, it took a hit in overhead processing. This processing can add about 500-1000 nanoseconds overhead, but in some cases that too is considered too much and we want to shave off as much from this overhead as possible. Tom Zanussi recently posted tracing patches to lkml that are based on a nice idea about capturing the data via C structs using STRUCT_ENTER, STRUCT_EXIT type of macros. I liked that method very much, but did not like the implementation that required a developer to add data/code in several disjoint locations. This patch extends the event_tracer macros to do a similar "raw C" approach that Tom Zanussi did. But instead of having the developers needing to tweak a bunch of code all over the place, they can do it all in one macro - preferably placed near the code that it is tracing. That makes it much more likely that tracepoints will be maintained on an ongoing basis by the code they modify. The new macro TRACE_EVENT_FORMAT is created for this approach. (Note, a developer may still utilize the more low level DECLARE_TRACE macros if they don't care about getting their traces automatically in the event tracer.) They can also use the existing TRACE_FORMAT if they don't need to code the tracepoint in C, but just want to use the convenience of printf. So if the developer wants to "hardwire" a tracepoint in the fastest possible way, and wants to acquire their data via a user space utility in a raw binary format, or wants to see it in the trace output but not sacrifice any performance, then they can implement the faster but more complex TRACE_EVENT_FORMAT macro. Here's what usage looks like: TRACE_EVENT_FORMAT(name, TPPROTO(proto), TPARGS(args), TPFMT(fmt, fmt_args), TRACE_STUCT( TRACE_FIELD(type1, item1, assign1) TRACE_FIELD(type2, item2, assign2) [...] ), TPRAWFMT(raw_fmt) ); Note name, proto, args, and fmt, are all identical to what TRACE_FORMAT uses. name: is the unique identifier of the trace point proto: The proto type that the trace point uses args: the args in the proto type fmt: printf format to use with the event printf tracer fmt_args: the printf argments to match fmt TRACE_STRUCT starts the ability to create a structure. Each item in the structure is defined with a TRACE_FIELD TRACE_FIELD(type, item, assign) type: the C type of item. item: the name of the item in the stucture assign: what to assign the item in the trace point callback raw_fmt is a way to pretty print the struct. It must match the order of the items are added in TRACE_STUCT An example of this would be: TRACE_EVENT_FORMAT(sched_wakeup, TPPROTO(struct rq *rq, struct task_struct *p, int success), TPARGS(rq, p, success), TPFMT("task %s:%d %s", p->comm, p->pid, success?"succeeded":"failed"), TRACE_STRUCT( TRACE_FIELD(pid_t, pid, p->pid) TRACE_FIELD(int, success, success) ), TPRAWFMT("task %d success=%d") ); This creates us a unique struct of: struct { pid_t pid; int success; }; And the way the call back would assign these values would be: entry->pid = p->pid; entry->success = success; The nice part about this is that the creation of the assignent is done via macro magic in the event tracer. Once the TRACE_EVENT_FORMAT is created, the developer will then have a faster method to record into the ring buffer. They do not need to worry about the tracer itself. The developer would only need to touch the files in include/trace/*.h Again, I would like to give special thanks to Tom Zanussi for this nice idea. Idea-from: Tom Zanussi <tzanussi@gmail.com> Signed-off-by: Steven Rostedt <srostedt@redhat.com>
2009-02-28 00:12:30 +00:00
#define _TRACE_FORMAT(call, proto, args, fmt) \
static void ftrace_event_##call(proto) \
{ \
tracing: add lockdep tracepoints for lock acquire/release Augment the traces with lock names when lockdep is available: 1) | down_read_trylock() { 1) | _spin_lock_irqsave() { 1) | /* lock_acquire: &sem->wait_lock */ 1) 4.201 us | } 1) | _spin_unlock_irqrestore() { 1) | /* lock_release: &sem->wait_lock */ 1) 3.523 us | } 1) | /* lock_acquire: try read &mm->mmap_sem */ 1) + 13.386 us | } 1) 1.635 us | find_vma(); 1) | handle_mm_fault() { 1) | __do_fault() { 1) | filemap_fault() { 1) | find_lock_page() { 1) | find_get_page() { 1) | /* lock_acquire: read rcu_read_lock */ 1) | /* lock_release: rcu_read_lock */ 1) 5.697 us | } 1) 8.158 us | } 1) + 11.079 us | } 1) | _spin_lock() { 1) | /* lock_acquire: __pte_lockptr(page) */ 1) 3.949 us | } 1) 1.460 us | page_add_file_rmap(); 1) | _spin_unlock() { 1) | /* lock_release: __pte_lockptr(page) */ 1) 3.115 us | } 1) | unlock_page() { 1) 1.421 us | page_waitqueue(); 1) 1.220 us | __wake_up_bit(); 1) 6.519 us | } 1) + 34.328 us | } 1) + 37.452 us | } 1) | up_read() { 1) | /* lock_release: &mm->mmap_sem */ 1) | _spin_lock_irqsave() { 1) | /* lock_acquire: &sem->wait_lock */ 1) 3.865 us | } 1) | _spin_unlock_irqrestore() { 1) | /* lock_release: &sem->wait_lock */ 1) 8.562 us | } 1) + 17.370 us | } Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Steven Rostedt <rostedt@goodmis.org> Cc: =?ISO-8859-1?Q?T=F6r=F6k?= Edwin <edwintorok@gmail.com> Cc: Jason Baron <jbaron@redhat.com> Cc: Frederic Weisbecker <fweisbec@gmail.com> LKML-Reference: <1236166375.5330.7209.camel@laptop> Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-03-04 11:32:55 +00:00
event_trace_printk(_RET_IP_, #call ": " fmt); \
tracing: add raw trace point recording infrastructure Impact: lower overhead tracing The current event tracer can automatically pick up trace points that are registered with the TRACE_FORMAT macro. But it required a printf format string and parsing. Although, this adds the ability to get guaranteed information like task names and such, it took a hit in overhead processing. This processing can add about 500-1000 nanoseconds overhead, but in some cases that too is considered too much and we want to shave off as much from this overhead as possible. Tom Zanussi recently posted tracing patches to lkml that are based on a nice idea about capturing the data via C structs using STRUCT_ENTER, STRUCT_EXIT type of macros. I liked that method very much, but did not like the implementation that required a developer to add data/code in several disjoint locations. This patch extends the event_tracer macros to do a similar "raw C" approach that Tom Zanussi did. But instead of having the developers needing to tweak a bunch of code all over the place, they can do it all in one macro - preferably placed near the code that it is tracing. That makes it much more likely that tracepoints will be maintained on an ongoing basis by the code they modify. The new macro TRACE_EVENT_FORMAT is created for this approach. (Note, a developer may still utilize the more low level DECLARE_TRACE macros if they don't care about getting their traces automatically in the event tracer.) They can also use the existing TRACE_FORMAT if they don't need to code the tracepoint in C, but just want to use the convenience of printf. So if the developer wants to "hardwire" a tracepoint in the fastest possible way, and wants to acquire their data via a user space utility in a raw binary format, or wants to see it in the trace output but not sacrifice any performance, then they can implement the faster but more complex TRACE_EVENT_FORMAT macro. Here's what usage looks like: TRACE_EVENT_FORMAT(name, TPPROTO(proto), TPARGS(args), TPFMT(fmt, fmt_args), TRACE_STUCT( TRACE_FIELD(type1, item1, assign1) TRACE_FIELD(type2, item2, assign2) [...] ), TPRAWFMT(raw_fmt) ); Note name, proto, args, and fmt, are all identical to what TRACE_FORMAT uses. name: is the unique identifier of the trace point proto: The proto type that the trace point uses args: the args in the proto type fmt: printf format to use with the event printf tracer fmt_args: the printf argments to match fmt TRACE_STRUCT starts the ability to create a structure. Each item in the structure is defined with a TRACE_FIELD TRACE_FIELD(type, item, assign) type: the C type of item. item: the name of the item in the stucture assign: what to assign the item in the trace point callback raw_fmt is a way to pretty print the struct. It must match the order of the items are added in TRACE_STUCT An example of this would be: TRACE_EVENT_FORMAT(sched_wakeup, TPPROTO(struct rq *rq, struct task_struct *p, int success), TPARGS(rq, p, success), TPFMT("task %s:%d %s", p->comm, p->pid, success?"succeeded":"failed"), TRACE_STRUCT( TRACE_FIELD(pid_t, pid, p->pid) TRACE_FIELD(int, success, success) ), TPRAWFMT("task %d success=%d") ); This creates us a unique struct of: struct { pid_t pid; int success; }; And the way the call back would assign these values would be: entry->pid = p->pid; entry->success = success; The nice part about this is that the creation of the assignent is done via macro magic in the event tracer. Once the TRACE_EVENT_FORMAT is created, the developer will then have a faster method to record into the ring buffer. They do not need to worry about the tracer itself. The developer would only need to touch the files in include/trace/*.h Again, I would like to give special thanks to Tom Zanussi for this nice idea. Idea-from: Tom Zanussi <tzanussi@gmail.com> Signed-off-by: Steven Rostedt <srostedt@redhat.com>
2009-02-28 00:12:30 +00:00
} \
\
static int ftrace_reg_event_##call(void) \
{ \
int ret; \
\
ret = register_trace_##call(ftrace_event_##call); \
if (ret) \
tracing: add raw trace point recording infrastructure Impact: lower overhead tracing The current event tracer can automatically pick up trace points that are registered with the TRACE_FORMAT macro. But it required a printf format string and parsing. Although, this adds the ability to get guaranteed information like task names and such, it took a hit in overhead processing. This processing can add about 500-1000 nanoseconds overhead, but in some cases that too is considered too much and we want to shave off as much from this overhead as possible. Tom Zanussi recently posted tracing patches to lkml that are based on a nice idea about capturing the data via C structs using STRUCT_ENTER, STRUCT_EXIT type of macros. I liked that method very much, but did not like the implementation that required a developer to add data/code in several disjoint locations. This patch extends the event_tracer macros to do a similar "raw C" approach that Tom Zanussi did. But instead of having the developers needing to tweak a bunch of code all over the place, they can do it all in one macro - preferably placed near the code that it is tracing. That makes it much more likely that tracepoints will be maintained on an ongoing basis by the code they modify. The new macro TRACE_EVENT_FORMAT is created for this approach. (Note, a developer may still utilize the more low level DECLARE_TRACE macros if they don't care about getting their traces automatically in the event tracer.) They can also use the existing TRACE_FORMAT if they don't need to code the tracepoint in C, but just want to use the convenience of printf. So if the developer wants to "hardwire" a tracepoint in the fastest possible way, and wants to acquire their data via a user space utility in a raw binary format, or wants to see it in the trace output but not sacrifice any performance, then they can implement the faster but more complex TRACE_EVENT_FORMAT macro. Here's what usage looks like: TRACE_EVENT_FORMAT(name, TPPROTO(proto), TPARGS(args), TPFMT(fmt, fmt_args), TRACE_STUCT( TRACE_FIELD(type1, item1, assign1) TRACE_FIELD(type2, item2, assign2) [...] ), TPRAWFMT(raw_fmt) ); Note name, proto, args, and fmt, are all identical to what TRACE_FORMAT uses. name: is the unique identifier of the trace point proto: The proto type that the trace point uses args: the args in the proto type fmt: printf format to use with the event printf tracer fmt_args: the printf argments to match fmt TRACE_STRUCT starts the ability to create a structure. Each item in the structure is defined with a TRACE_FIELD TRACE_FIELD(type, item, assign) type: the C type of item. item: the name of the item in the stucture assign: what to assign the item in the trace point callback raw_fmt is a way to pretty print the struct. It must match the order of the items are added in TRACE_STUCT An example of this would be: TRACE_EVENT_FORMAT(sched_wakeup, TPPROTO(struct rq *rq, struct task_struct *p, int success), TPARGS(rq, p, success), TPFMT("task %s:%d %s", p->comm, p->pid, success?"succeeded":"failed"), TRACE_STRUCT( TRACE_FIELD(pid_t, pid, p->pid) TRACE_FIELD(int, success, success) ), TPRAWFMT("task %d success=%d") ); This creates us a unique struct of: struct { pid_t pid; int success; }; And the way the call back would assign these values would be: entry->pid = p->pid; entry->success = success; The nice part about this is that the creation of the assignent is done via macro magic in the event tracer. Once the TRACE_EVENT_FORMAT is created, the developer will then have a faster method to record into the ring buffer. They do not need to worry about the tracer itself. The developer would only need to touch the files in include/trace/*.h Again, I would like to give special thanks to Tom Zanussi for this nice idea. Idea-from: Tom Zanussi <tzanussi@gmail.com> Signed-off-by: Steven Rostedt <srostedt@redhat.com>
2009-02-28 00:12:30 +00:00
pr_info("event trace: Could not activate trace point " \
"probe to " #call "\n"); \
tracing: add raw trace point recording infrastructure Impact: lower overhead tracing The current event tracer can automatically pick up trace points that are registered with the TRACE_FORMAT macro. But it required a printf format string and parsing. Although, this adds the ability to get guaranteed information like task names and such, it took a hit in overhead processing. This processing can add about 500-1000 nanoseconds overhead, but in some cases that too is considered too much and we want to shave off as much from this overhead as possible. Tom Zanussi recently posted tracing patches to lkml that are based on a nice idea about capturing the data via C structs using STRUCT_ENTER, STRUCT_EXIT type of macros. I liked that method very much, but did not like the implementation that required a developer to add data/code in several disjoint locations. This patch extends the event_tracer macros to do a similar "raw C" approach that Tom Zanussi did. But instead of having the developers needing to tweak a bunch of code all over the place, they can do it all in one macro - preferably placed near the code that it is tracing. That makes it much more likely that tracepoints will be maintained on an ongoing basis by the code they modify. The new macro TRACE_EVENT_FORMAT is created for this approach. (Note, a developer may still utilize the more low level DECLARE_TRACE macros if they don't care about getting their traces automatically in the event tracer.) They can also use the existing TRACE_FORMAT if they don't need to code the tracepoint in C, but just want to use the convenience of printf. So if the developer wants to "hardwire" a tracepoint in the fastest possible way, and wants to acquire their data via a user space utility in a raw binary format, or wants to see it in the trace output but not sacrifice any performance, then they can implement the faster but more complex TRACE_EVENT_FORMAT macro. Here's what usage looks like: TRACE_EVENT_FORMAT(name, TPPROTO(proto), TPARGS(args), TPFMT(fmt, fmt_args), TRACE_STUCT( TRACE_FIELD(type1, item1, assign1) TRACE_FIELD(type2, item2, assign2) [...] ), TPRAWFMT(raw_fmt) ); Note name, proto, args, and fmt, are all identical to what TRACE_FORMAT uses. name: is the unique identifier of the trace point proto: The proto type that the trace point uses args: the args in the proto type fmt: printf format to use with the event printf tracer fmt_args: the printf argments to match fmt TRACE_STRUCT starts the ability to create a structure. Each item in the structure is defined with a TRACE_FIELD TRACE_FIELD(type, item, assign) type: the C type of item. item: the name of the item in the stucture assign: what to assign the item in the trace point callback raw_fmt is a way to pretty print the struct. It must match the order of the items are added in TRACE_STUCT An example of this would be: TRACE_EVENT_FORMAT(sched_wakeup, TPPROTO(struct rq *rq, struct task_struct *p, int success), TPARGS(rq, p, success), TPFMT("task %s:%d %s", p->comm, p->pid, success?"succeeded":"failed"), TRACE_STRUCT( TRACE_FIELD(pid_t, pid, p->pid) TRACE_FIELD(int, success, success) ), TPRAWFMT("task %d success=%d") ); This creates us a unique struct of: struct { pid_t pid; int success; }; And the way the call back would assign these values would be: entry->pid = p->pid; entry->success = success; The nice part about this is that the creation of the assignent is done via macro magic in the event tracer. Once the TRACE_EVENT_FORMAT is created, the developer will then have a faster method to record into the ring buffer. They do not need to worry about the tracer itself. The developer would only need to touch the files in include/trace/*.h Again, I would like to give special thanks to Tom Zanussi for this nice idea. Idea-from: Tom Zanussi <tzanussi@gmail.com> Signed-off-by: Steven Rostedt <srostedt@redhat.com>
2009-02-28 00:12:30 +00:00
return ret; \
} \
\
static void ftrace_unreg_event_##call(void) \
{ \
unregister_trace_##call(ftrace_event_##call); \
} \
\
static struct ftrace_event_call event_##call; \
\
static int ftrace_init_event_##call(void) \
{ \
int id; \
\
id = register_ftrace_event(NULL); \
if (!id) \
return -ENODEV; \
event_##call.id = id; \
return 0; \
}
tracing: add raw trace point recording infrastructure Impact: lower overhead tracing The current event tracer can automatically pick up trace points that are registered with the TRACE_FORMAT macro. But it required a printf format string and parsing. Although, this adds the ability to get guaranteed information like task names and such, it took a hit in overhead processing. This processing can add about 500-1000 nanoseconds overhead, but in some cases that too is considered too much and we want to shave off as much from this overhead as possible. Tom Zanussi recently posted tracing patches to lkml that are based on a nice idea about capturing the data via C structs using STRUCT_ENTER, STRUCT_EXIT type of macros. I liked that method very much, but did not like the implementation that required a developer to add data/code in several disjoint locations. This patch extends the event_tracer macros to do a similar "raw C" approach that Tom Zanussi did. But instead of having the developers needing to tweak a bunch of code all over the place, they can do it all in one macro - preferably placed near the code that it is tracing. That makes it much more likely that tracepoints will be maintained on an ongoing basis by the code they modify. The new macro TRACE_EVENT_FORMAT is created for this approach. (Note, a developer may still utilize the more low level DECLARE_TRACE macros if they don't care about getting their traces automatically in the event tracer.) They can also use the existing TRACE_FORMAT if they don't need to code the tracepoint in C, but just want to use the convenience of printf. So if the developer wants to "hardwire" a tracepoint in the fastest possible way, and wants to acquire their data via a user space utility in a raw binary format, or wants to see it in the trace output but not sacrifice any performance, then they can implement the faster but more complex TRACE_EVENT_FORMAT macro. Here's what usage looks like: TRACE_EVENT_FORMAT(name, TPPROTO(proto), TPARGS(args), TPFMT(fmt, fmt_args), TRACE_STUCT( TRACE_FIELD(type1, item1, assign1) TRACE_FIELD(type2, item2, assign2) [...] ), TPRAWFMT(raw_fmt) ); Note name, proto, args, and fmt, are all identical to what TRACE_FORMAT uses. name: is the unique identifier of the trace point proto: The proto type that the trace point uses args: the args in the proto type fmt: printf format to use with the event printf tracer fmt_args: the printf argments to match fmt TRACE_STRUCT starts the ability to create a structure. Each item in the structure is defined with a TRACE_FIELD TRACE_FIELD(type, item, assign) type: the C type of item. item: the name of the item in the stucture assign: what to assign the item in the trace point callback raw_fmt is a way to pretty print the struct. It must match the order of the items are added in TRACE_STUCT An example of this would be: TRACE_EVENT_FORMAT(sched_wakeup, TPPROTO(struct rq *rq, struct task_struct *p, int success), TPARGS(rq, p, success), TPFMT("task %s:%d %s", p->comm, p->pid, success?"succeeded":"failed"), TRACE_STRUCT( TRACE_FIELD(pid_t, pid, p->pid) TRACE_FIELD(int, success, success) ), TPRAWFMT("task %d success=%d") ); This creates us a unique struct of: struct { pid_t pid; int success; }; And the way the call back would assign these values would be: entry->pid = p->pid; entry->success = success; The nice part about this is that the creation of the assignent is done via macro magic in the event tracer. Once the TRACE_EVENT_FORMAT is created, the developer will then have a faster method to record into the ring buffer. They do not need to worry about the tracer itself. The developer would only need to touch the files in include/trace/*.h Again, I would like to give special thanks to Tom Zanussi for this nice idea. Idea-from: Tom Zanussi <tzanussi@gmail.com> Signed-off-by: Steven Rostedt <srostedt@redhat.com>
2009-02-28 00:12:30 +00:00
#undef TRACE_FORMAT
#define TRACE_FORMAT(call, proto, args, fmt) \
_TRACE_FORMAT(call, PARAMS(proto), PARAMS(args), PARAMS(fmt)) \
_TRACE_PROFILE(call, PARAMS(proto), PARAMS(args)) \
tracing: add raw trace point recording infrastructure Impact: lower overhead tracing The current event tracer can automatically pick up trace points that are registered with the TRACE_FORMAT macro. But it required a printf format string and parsing. Although, this adds the ability to get guaranteed information like task names and such, it took a hit in overhead processing. This processing can add about 500-1000 nanoseconds overhead, but in some cases that too is considered too much and we want to shave off as much from this overhead as possible. Tom Zanussi recently posted tracing patches to lkml that are based on a nice idea about capturing the data via C structs using STRUCT_ENTER, STRUCT_EXIT type of macros. I liked that method very much, but did not like the implementation that required a developer to add data/code in several disjoint locations. This patch extends the event_tracer macros to do a similar "raw C" approach that Tom Zanussi did. But instead of having the developers needing to tweak a bunch of code all over the place, they can do it all in one macro - preferably placed near the code that it is tracing. That makes it much more likely that tracepoints will be maintained on an ongoing basis by the code they modify. The new macro TRACE_EVENT_FORMAT is created for this approach. (Note, a developer may still utilize the more low level DECLARE_TRACE macros if they don't care about getting their traces automatically in the event tracer.) They can also use the existing TRACE_FORMAT if they don't need to code the tracepoint in C, but just want to use the convenience of printf. So if the developer wants to "hardwire" a tracepoint in the fastest possible way, and wants to acquire their data via a user space utility in a raw binary format, or wants to see it in the trace output but not sacrifice any performance, then they can implement the faster but more complex TRACE_EVENT_FORMAT macro. Here's what usage looks like: TRACE_EVENT_FORMAT(name, TPPROTO(proto), TPARGS(args), TPFMT(fmt, fmt_args), TRACE_STUCT( TRACE_FIELD(type1, item1, assign1) TRACE_FIELD(type2, item2, assign2) [...] ), TPRAWFMT(raw_fmt) ); Note name, proto, args, and fmt, are all identical to what TRACE_FORMAT uses. name: is the unique identifier of the trace point proto: The proto type that the trace point uses args: the args in the proto type fmt: printf format to use with the event printf tracer fmt_args: the printf argments to match fmt TRACE_STRUCT starts the ability to create a structure. Each item in the structure is defined with a TRACE_FIELD TRACE_FIELD(type, item, assign) type: the C type of item. item: the name of the item in the stucture assign: what to assign the item in the trace point callback raw_fmt is a way to pretty print the struct. It must match the order of the items are added in TRACE_STUCT An example of this would be: TRACE_EVENT_FORMAT(sched_wakeup, TPPROTO(struct rq *rq, struct task_struct *p, int success), TPARGS(rq, p, success), TPFMT("task %s:%d %s", p->comm, p->pid, success?"succeeded":"failed"), TRACE_STRUCT( TRACE_FIELD(pid_t, pid, p->pid) TRACE_FIELD(int, success, success) ), TPRAWFMT("task %d success=%d") ); This creates us a unique struct of: struct { pid_t pid; int success; }; And the way the call back would assign these values would be: entry->pid = p->pid; entry->success = success; The nice part about this is that the creation of the assignent is done via macro magic in the event tracer. Once the TRACE_EVENT_FORMAT is created, the developer will then have a faster method to record into the ring buffer. They do not need to worry about the tracer itself. The developer would only need to touch the files in include/trace/*.h Again, I would like to give special thanks to Tom Zanussi for this nice idea. Idea-from: Tom Zanussi <tzanussi@gmail.com> Signed-off-by: Steven Rostedt <srostedt@redhat.com>
2009-02-28 00:12:30 +00:00
static struct ftrace_event_call __used \
__attribute__((__aligned__(4))) \
__attribute__((section("_ftrace_events"))) event_##call = { \
.name = #call, \
.system = __stringify(TRACE_SYSTEM), \
.raw_init = ftrace_init_event_##call, \
tracing: add raw trace point recording infrastructure Impact: lower overhead tracing The current event tracer can automatically pick up trace points that are registered with the TRACE_FORMAT macro. But it required a printf format string and parsing. Although, this adds the ability to get guaranteed information like task names and such, it took a hit in overhead processing. This processing can add about 500-1000 nanoseconds overhead, but in some cases that too is considered too much and we want to shave off as much from this overhead as possible. Tom Zanussi recently posted tracing patches to lkml that are based on a nice idea about capturing the data via C structs using STRUCT_ENTER, STRUCT_EXIT type of macros. I liked that method very much, but did not like the implementation that required a developer to add data/code in several disjoint locations. This patch extends the event_tracer macros to do a similar "raw C" approach that Tom Zanussi did. But instead of having the developers needing to tweak a bunch of code all over the place, they can do it all in one macro - preferably placed near the code that it is tracing. That makes it much more likely that tracepoints will be maintained on an ongoing basis by the code they modify. The new macro TRACE_EVENT_FORMAT is created for this approach. (Note, a developer may still utilize the more low level DECLARE_TRACE macros if they don't care about getting their traces automatically in the event tracer.) They can also use the existing TRACE_FORMAT if they don't need to code the tracepoint in C, but just want to use the convenience of printf. So if the developer wants to "hardwire" a tracepoint in the fastest possible way, and wants to acquire their data via a user space utility in a raw binary format, or wants to see it in the trace output but not sacrifice any performance, then they can implement the faster but more complex TRACE_EVENT_FORMAT macro. Here's what usage looks like: TRACE_EVENT_FORMAT(name, TPPROTO(proto), TPARGS(args), TPFMT(fmt, fmt_args), TRACE_STUCT( TRACE_FIELD(type1, item1, assign1) TRACE_FIELD(type2, item2, assign2) [...] ), TPRAWFMT(raw_fmt) ); Note name, proto, args, and fmt, are all identical to what TRACE_FORMAT uses. name: is the unique identifier of the trace point proto: The proto type that the trace point uses args: the args in the proto type fmt: printf format to use with the event printf tracer fmt_args: the printf argments to match fmt TRACE_STRUCT starts the ability to create a structure. Each item in the structure is defined with a TRACE_FIELD TRACE_FIELD(type, item, assign) type: the C type of item. item: the name of the item in the stucture assign: what to assign the item in the trace point callback raw_fmt is a way to pretty print the struct. It must match the order of the items are added in TRACE_STUCT An example of this would be: TRACE_EVENT_FORMAT(sched_wakeup, TPPROTO(struct rq *rq, struct task_struct *p, int success), TPARGS(rq, p, success), TPFMT("task %s:%d %s", p->comm, p->pid, success?"succeeded":"failed"), TRACE_STRUCT( TRACE_FIELD(pid_t, pid, p->pid) TRACE_FIELD(int, success, success) ), TPRAWFMT("task %d success=%d") ); This creates us a unique struct of: struct { pid_t pid; int success; }; And the way the call back would assign these values would be: entry->pid = p->pid; entry->success = success; The nice part about this is that the creation of the assignent is done via macro magic in the event tracer. Once the TRACE_EVENT_FORMAT is created, the developer will then have a faster method to record into the ring buffer. They do not need to worry about the tracer itself. The developer would only need to touch the files in include/trace/*.h Again, I would like to give special thanks to Tom Zanussi for this nice idea. Idea-from: Tom Zanussi <tzanussi@gmail.com> Signed-off-by: Steven Rostedt <srostedt@redhat.com>
2009-02-28 00:12:30 +00:00
.regfunc = ftrace_reg_event_##call, \
.unregfunc = ftrace_unreg_event_##call, \
_TRACE_PROFILE_INIT(call) \
tracing: add raw trace point recording infrastructure Impact: lower overhead tracing The current event tracer can automatically pick up trace points that are registered with the TRACE_FORMAT macro. But it required a printf format string and parsing. Although, this adds the ability to get guaranteed information like task names and such, it took a hit in overhead processing. This processing can add about 500-1000 nanoseconds overhead, but in some cases that too is considered too much and we want to shave off as much from this overhead as possible. Tom Zanussi recently posted tracing patches to lkml that are based on a nice idea about capturing the data via C structs using STRUCT_ENTER, STRUCT_EXIT type of macros. I liked that method very much, but did not like the implementation that required a developer to add data/code in several disjoint locations. This patch extends the event_tracer macros to do a similar "raw C" approach that Tom Zanussi did. But instead of having the developers needing to tweak a bunch of code all over the place, they can do it all in one macro - preferably placed near the code that it is tracing. That makes it much more likely that tracepoints will be maintained on an ongoing basis by the code they modify. The new macro TRACE_EVENT_FORMAT is created for this approach. (Note, a developer may still utilize the more low level DECLARE_TRACE macros if they don't care about getting their traces automatically in the event tracer.) They can also use the existing TRACE_FORMAT if they don't need to code the tracepoint in C, but just want to use the convenience of printf. So if the developer wants to "hardwire" a tracepoint in the fastest possible way, and wants to acquire their data via a user space utility in a raw binary format, or wants to see it in the trace output but not sacrifice any performance, then they can implement the faster but more complex TRACE_EVENT_FORMAT macro. Here's what usage looks like: TRACE_EVENT_FORMAT(name, TPPROTO(proto), TPARGS(args), TPFMT(fmt, fmt_args), TRACE_STUCT( TRACE_FIELD(type1, item1, assign1) TRACE_FIELD(type2, item2, assign2) [...] ), TPRAWFMT(raw_fmt) ); Note name, proto, args, and fmt, are all identical to what TRACE_FORMAT uses. name: is the unique identifier of the trace point proto: The proto type that the trace point uses args: the args in the proto type fmt: printf format to use with the event printf tracer fmt_args: the printf argments to match fmt TRACE_STRUCT starts the ability to create a structure. Each item in the structure is defined with a TRACE_FIELD TRACE_FIELD(type, item, assign) type: the C type of item. item: the name of the item in the stucture assign: what to assign the item in the trace point callback raw_fmt is a way to pretty print the struct. It must match the order of the items are added in TRACE_STUCT An example of this would be: TRACE_EVENT_FORMAT(sched_wakeup, TPPROTO(struct rq *rq, struct task_struct *p, int success), TPARGS(rq, p, success), TPFMT("task %s:%d %s", p->comm, p->pid, success?"succeeded":"failed"), TRACE_STRUCT( TRACE_FIELD(pid_t, pid, p->pid) TRACE_FIELD(int, success, success) ), TPRAWFMT("task %d success=%d") ); This creates us a unique struct of: struct { pid_t pid; int success; }; And the way the call back would assign these values would be: entry->pid = p->pid; entry->success = success; The nice part about this is that the creation of the assignent is done via macro magic in the event tracer. Once the TRACE_EVENT_FORMAT is created, the developer will then have a faster method to record into the ring buffer. They do not need to worry about the tracer itself. The developer would only need to touch the files in include/trace/*.h Again, I would like to give special thanks to Tom Zanussi for this nice idea. Idea-from: Tom Zanussi <tzanussi@gmail.com> Signed-off-by: Steven Rostedt <srostedt@redhat.com>
2009-02-28 00:12:30 +00:00
}
tracing: new format for specialized trace points Impact: clean up and enhancement The TRACE_EVENT_FORMAT macro looks quite ugly and is limited in its ability to save data as well as to print the record out. Working with Ingo Molnar, we came up with a new format that is much more pleasing to the eye of C developers. This new macro is more C style than the old macro, and is more obvious to what it does. Here's the example. The only updated macro in this patch is the sched_switch trace point. The old method looked like this: TRACE_EVENT_FORMAT(sched_switch, TP_PROTO(struct rq *rq, struct task_struct *prev, struct task_struct *next), TP_ARGS(rq, prev, next), TP_FMT("task %s:%d ==> %s:%d", prev->comm, prev->pid, next->comm, next->pid), TRACE_STRUCT( TRACE_FIELD(pid_t, prev_pid, prev->pid) TRACE_FIELD(int, prev_prio, prev->prio) TRACE_FIELD_SPECIAL(char next_comm[TASK_COMM_LEN], next_comm, TP_CMD(memcpy(TRACE_ENTRY->next_comm, next->comm, TASK_COMM_LEN))) TRACE_FIELD(pid_t, next_pid, next->pid) TRACE_FIELD(int, next_prio, next->prio) ), TP_RAW_FMT("prev %d:%d ==> next %s:%d:%d") ); The above method is hard to read and requires two format fields. The new method: /* * Tracepoint for task switches, performed by the scheduler: * * (NOTE: the 'rq' argument is not used by generic trace events, * but used by the latency tracer plugin. ) */ TRACE_EVENT(sched_switch, TP_PROTO(struct rq *rq, struct task_struct *prev, struct task_struct *next), TP_ARGS(rq, prev, next), TP_STRUCT__entry( __array( char, prev_comm, TASK_COMM_LEN ) __field( pid_t, prev_pid ) __field( int, prev_prio ) __array( char, next_comm, TASK_COMM_LEN ) __field( pid_t, next_pid ) __field( int, next_prio ) ), TP_printk("task %s:%d [%d] ==> %s:%d [%d]", __entry->prev_comm, __entry->prev_pid, __entry->prev_prio, __entry->next_comm, __entry->next_pid, __entry->next_prio), TP_fast_assign( memcpy(__entry->next_comm, next->comm, TASK_COMM_LEN); __entry->prev_pid = prev->pid; __entry->prev_prio = prev->prio; memcpy(__entry->prev_comm, prev->comm, TASK_COMM_LEN); __entry->next_pid = next->pid; __entry->next_prio = next->prio; ) ); This macro is called TRACE_EVENT, it is broken up into 5 parts: TP_PROTO: the proto type of the trace point TP_ARGS: the arguments of the trace point TP_STRUCT_entry: the structure layout of the entry in the ring buffer TP_printk: the printk format TP_fast_assign: the method used to write the entry into the ring buffer The structure is the definition of how the event will be saved in the ring buffer. The printk is used by the internal tracing in case of an oops, and the kernel needs to print out the format of the record to the console. This the TP_printk gives a means to show the records in a human readable format. It is also used to print out the data from the trace file. The TP_fast_assign is executed directly. It is basically like a C function, where the __entry is the handle to the record. Signed-off-by: Steven Rostedt <srostedt@redhat.com>
2009-03-09 21:14:30 +00:00
#undef __entry
#define __entry entry
tracing: add TRACE_FIELD_SPECIAL to record complex entries Tom Zanussi pointed out that the simple TRACE_FIELD was not enough to record trace data that required memcpy. This patch addresses this issue by adding a TRACE_FIELD_SPECIAL. The format is similar to TRACE_FIELD but looks like so: TRACE_FIELD_SPECIAL(type_item, item, cmd) What TRACE_FIELD gave was: TRACE_FIELD(type, item, assign) The TRACE_FIELD would be used in declaring a structure: struct { type item; }; And later assign it via: entry->item = assign; What TRACE_FIELD_SPECIAL gives us is: In the declaration of the structure: struct { type_item; }; And the assignment: cmd; This change log will explain the one example used in the patch: TRACE_EVENT_FORMAT(sched_switch, TPPROTO(struct rq *rq, struct task_struct *prev, struct task_struct *next), TPARGS(rq, prev, next), TPFMT("task %s:%d ==> %s:%d", prev->comm, prev->pid, next->comm, next->pid), TRACE_STRUCT( TRACE_FIELD(pid_t, prev_pid, prev->pid) TRACE_FIELD(int, prev_prio, prev->prio) TRACE_FIELD_SPECIAL(char next_comm[TASK_COMM_LEN], next_comm, TPCMD(memcpy(TRACE_ENTRY->next_comm, next->comm, TASK_COMM_LEN))) TRACE_FIELD(pid_t, next_pid, next->pid) TRACE_FIELD(int, next_prio, next->prio) ), TPRAWFMT("prev %d:%d ==> next %s:%d:%d") ); The struct will be create as: struct { pid_t prev_pid; int prev_prio; char next_comm[TASK_COMM_LEN]; pid_t next_pid; int next_prio; }; Note the TRACE_ENTRY in the cmd part of TRACE_SPECIAL. TRACE_ENTRY will be set by the tracer to point to the structure inside the trace buffer. entry->prev_pid = prev->pid; entry->prev_prio = prev->prio; memcpy(entry->next_comm, next->comm, TASK_COMM_LEN); entry->next_pid = next->pid; entry->next_prio = next->prio Reported-by: Tom Zanussi <tzanussi@gmail.com> Signed-off-by: Steven Rostedt <srostedt@redhat.com>
2009-03-02 15:53:15 +00:00
tracing: new format for specialized trace points Impact: clean up and enhancement The TRACE_EVENT_FORMAT macro looks quite ugly and is limited in its ability to save data as well as to print the record out. Working with Ingo Molnar, we came up with a new format that is much more pleasing to the eye of C developers. This new macro is more C style than the old macro, and is more obvious to what it does. Here's the example. The only updated macro in this patch is the sched_switch trace point. The old method looked like this: TRACE_EVENT_FORMAT(sched_switch, TP_PROTO(struct rq *rq, struct task_struct *prev, struct task_struct *next), TP_ARGS(rq, prev, next), TP_FMT("task %s:%d ==> %s:%d", prev->comm, prev->pid, next->comm, next->pid), TRACE_STRUCT( TRACE_FIELD(pid_t, prev_pid, prev->pid) TRACE_FIELD(int, prev_prio, prev->prio) TRACE_FIELD_SPECIAL(char next_comm[TASK_COMM_LEN], next_comm, TP_CMD(memcpy(TRACE_ENTRY->next_comm, next->comm, TASK_COMM_LEN))) TRACE_FIELD(pid_t, next_pid, next->pid) TRACE_FIELD(int, next_prio, next->prio) ), TP_RAW_FMT("prev %d:%d ==> next %s:%d:%d") ); The above method is hard to read and requires two format fields. The new method: /* * Tracepoint for task switches, performed by the scheduler: * * (NOTE: the 'rq' argument is not used by generic trace events, * but used by the latency tracer plugin. ) */ TRACE_EVENT(sched_switch, TP_PROTO(struct rq *rq, struct task_struct *prev, struct task_struct *next), TP_ARGS(rq, prev, next), TP_STRUCT__entry( __array( char, prev_comm, TASK_COMM_LEN ) __field( pid_t, prev_pid ) __field( int, prev_prio ) __array( char, next_comm, TASK_COMM_LEN ) __field( pid_t, next_pid ) __field( int, next_prio ) ), TP_printk("task %s:%d [%d] ==> %s:%d [%d]", __entry->prev_comm, __entry->prev_pid, __entry->prev_prio, __entry->next_comm, __entry->next_pid, __entry->next_prio), TP_fast_assign( memcpy(__entry->next_comm, next->comm, TASK_COMM_LEN); __entry->prev_pid = prev->pid; __entry->prev_prio = prev->prio; memcpy(__entry->prev_comm, prev->comm, TASK_COMM_LEN); __entry->next_pid = next->pid; __entry->next_prio = next->prio; ) ); This macro is called TRACE_EVENT, it is broken up into 5 parts: TP_PROTO: the proto type of the trace point TP_ARGS: the arguments of the trace point TP_STRUCT_entry: the structure layout of the entry in the ring buffer TP_printk: the printk format TP_fast_assign: the method used to write the entry into the ring buffer The structure is the definition of how the event will be saved in the ring buffer. The printk is used by the internal tracing in case of an oops, and the kernel needs to print out the format of the record to the console. This the TP_printk gives a means to show the records in a human readable format. It is also used to print out the data from the trace file. The TP_fast_assign is executed directly. It is basically like a C function, where the __entry is the handle to the record. Signed-off-by: Steven Rostedt <srostedt@redhat.com>
2009-03-09 21:14:30 +00:00
#undef TRACE_EVENT
#define TRACE_EVENT(call, proto, args, tstruct, assign, print) \
_TRACE_PROFILE(call, PARAMS(proto), PARAMS(args)) \
tracing: add raw trace point recording infrastructure Impact: lower overhead tracing The current event tracer can automatically pick up trace points that are registered with the TRACE_FORMAT macro. But it required a printf format string and parsing. Although, this adds the ability to get guaranteed information like task names and such, it took a hit in overhead processing. This processing can add about 500-1000 nanoseconds overhead, but in some cases that too is considered too much and we want to shave off as much from this overhead as possible. Tom Zanussi recently posted tracing patches to lkml that are based on a nice idea about capturing the data via C structs using STRUCT_ENTER, STRUCT_EXIT type of macros. I liked that method very much, but did not like the implementation that required a developer to add data/code in several disjoint locations. This patch extends the event_tracer macros to do a similar "raw C" approach that Tom Zanussi did. But instead of having the developers needing to tweak a bunch of code all over the place, they can do it all in one macro - preferably placed near the code that it is tracing. That makes it much more likely that tracepoints will be maintained on an ongoing basis by the code they modify. The new macro TRACE_EVENT_FORMAT is created for this approach. (Note, a developer may still utilize the more low level DECLARE_TRACE macros if they don't care about getting their traces automatically in the event tracer.) They can also use the existing TRACE_FORMAT if they don't need to code the tracepoint in C, but just want to use the convenience of printf. So if the developer wants to "hardwire" a tracepoint in the fastest possible way, and wants to acquire their data via a user space utility in a raw binary format, or wants to see it in the trace output but not sacrifice any performance, then they can implement the faster but more complex TRACE_EVENT_FORMAT macro. Here's what usage looks like: TRACE_EVENT_FORMAT(name, TPPROTO(proto), TPARGS(args), TPFMT(fmt, fmt_args), TRACE_STUCT( TRACE_FIELD(type1, item1, assign1) TRACE_FIELD(type2, item2, assign2) [...] ), TPRAWFMT(raw_fmt) ); Note name, proto, args, and fmt, are all identical to what TRACE_FORMAT uses. name: is the unique identifier of the trace point proto: The proto type that the trace point uses args: the args in the proto type fmt: printf format to use with the event printf tracer fmt_args: the printf argments to match fmt TRACE_STRUCT starts the ability to create a structure. Each item in the structure is defined with a TRACE_FIELD TRACE_FIELD(type, item, assign) type: the C type of item. item: the name of the item in the stucture assign: what to assign the item in the trace point callback raw_fmt is a way to pretty print the struct. It must match the order of the items are added in TRACE_STUCT An example of this would be: TRACE_EVENT_FORMAT(sched_wakeup, TPPROTO(struct rq *rq, struct task_struct *p, int success), TPARGS(rq, p, success), TPFMT("task %s:%d %s", p->comm, p->pid, success?"succeeded":"failed"), TRACE_STRUCT( TRACE_FIELD(pid_t, pid, p->pid) TRACE_FIELD(int, success, success) ), TPRAWFMT("task %d success=%d") ); This creates us a unique struct of: struct { pid_t pid; int success; }; And the way the call back would assign these values would be: entry->pid = p->pid; entry->success = success; The nice part about this is that the creation of the assignent is done via macro magic in the event tracer. Once the TRACE_EVENT_FORMAT is created, the developer will then have a faster method to record into the ring buffer. They do not need to worry about the tracer itself. The developer would only need to touch the files in include/trace/*.h Again, I would like to give special thanks to Tom Zanussi for this nice idea. Idea-from: Tom Zanussi <tzanussi@gmail.com> Signed-off-by: Steven Rostedt <srostedt@redhat.com>
2009-02-28 00:12:30 +00:00
\
static struct ftrace_event_call event_##call; \
\
static void ftrace_raw_event_##call(proto) \
{ \
tracing: add per-event filtering This patch adds per-event filtering to the event tracing subsystem. It adds a 'filter' debugfs file to each event directory. This file can be written to to set filters; reading from it will display the current set of filters set for that event. Basically, any field listed in the 'format' file for an event can be filtered on (including strings, but not yet other array types) using either matching ('==') or non-matching ('!=') 'predicates'. A 'predicate' can be either a single expression: # echo pid != 0 > filter # cat filter pid != 0 or a compound expression of up to 8 sub-expressions combined using '&&' or '||': # echo comm == Xorg > filter # echo "&& sig != 29" > filter # cat filter comm == Xorg && sig != 29 Only events having field values matching an expression will be available in the trace output; non-matching events are discarded. Note that a compound expression is built up by echoing each sub-expression separately - it's not the most efficient way to do things, but it keeps the parser simple and assumes that compound expressions will be relatively uncommon. In any case, a subsequent patch introducing a way to set filters for entire subsystems should mitigate any need to do this for lots of events. Setting a filter without an '&&' or '||' clears the previous filter completely and sets the filter to the new expression: # cat filter comm == Xorg && sig != 29 # echo comm != Xorg # cat filter comm != Xorg To clear a filter, echo 0 to the filter file: # echo 0 > filter # cat filter none The limit of 8 predicates for a compound expression is arbitrary - for efficiency, it's implemented as an array of pointers to predicates, and 8 seemed more than enough for any filter... Signed-off-by: Tom Zanussi <tzanussi@gmail.com> Acked-by: Frederic Weisbecker <fweisbec@gmail.com> LKML-Reference: <1237710665.7703.48.camel@charm-linux> Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-03-22 08:31:04 +00:00
struct ftrace_event_call *call = &event_##call; \
tracing: add raw trace point recording infrastructure Impact: lower overhead tracing The current event tracer can automatically pick up trace points that are registered with the TRACE_FORMAT macro. But it required a printf format string and parsing. Although, this adds the ability to get guaranteed information like task names and such, it took a hit in overhead processing. This processing can add about 500-1000 nanoseconds overhead, but in some cases that too is considered too much and we want to shave off as much from this overhead as possible. Tom Zanussi recently posted tracing patches to lkml that are based on a nice idea about capturing the data via C structs using STRUCT_ENTER, STRUCT_EXIT type of macros. I liked that method very much, but did not like the implementation that required a developer to add data/code in several disjoint locations. This patch extends the event_tracer macros to do a similar "raw C" approach that Tom Zanussi did. But instead of having the developers needing to tweak a bunch of code all over the place, they can do it all in one macro - preferably placed near the code that it is tracing. That makes it much more likely that tracepoints will be maintained on an ongoing basis by the code they modify. The new macro TRACE_EVENT_FORMAT is created for this approach. (Note, a developer may still utilize the more low level DECLARE_TRACE macros if they don't care about getting their traces automatically in the event tracer.) They can also use the existing TRACE_FORMAT if they don't need to code the tracepoint in C, but just want to use the convenience of printf. So if the developer wants to "hardwire" a tracepoint in the fastest possible way, and wants to acquire their data via a user space utility in a raw binary format, or wants to see it in the trace output but not sacrifice any performance, then they can implement the faster but more complex TRACE_EVENT_FORMAT macro. Here's what usage looks like: TRACE_EVENT_FORMAT(name, TPPROTO(proto), TPARGS(args), TPFMT(fmt, fmt_args), TRACE_STUCT( TRACE_FIELD(type1, item1, assign1) TRACE_FIELD(type2, item2, assign2) [...] ), TPRAWFMT(raw_fmt) ); Note name, proto, args, and fmt, are all identical to what TRACE_FORMAT uses. name: is the unique identifier of the trace point proto: The proto type that the trace point uses args: the args in the proto type fmt: printf format to use with the event printf tracer fmt_args: the printf argments to match fmt TRACE_STRUCT starts the ability to create a structure. Each item in the structure is defined with a TRACE_FIELD TRACE_FIELD(type, item, assign) type: the C type of item. item: the name of the item in the stucture assign: what to assign the item in the trace point callback raw_fmt is a way to pretty print the struct. It must match the order of the items are added in TRACE_STUCT An example of this would be: TRACE_EVENT_FORMAT(sched_wakeup, TPPROTO(struct rq *rq, struct task_struct *p, int success), TPARGS(rq, p, success), TPFMT("task %s:%d %s", p->comm, p->pid, success?"succeeded":"failed"), TRACE_STRUCT( TRACE_FIELD(pid_t, pid, p->pid) TRACE_FIELD(int, success, success) ), TPRAWFMT("task %d success=%d") ); This creates us a unique struct of: struct { pid_t pid; int success; }; And the way the call back would assign these values would be: entry->pid = p->pid; entry->success = success; The nice part about this is that the creation of the assignent is done via macro magic in the event tracer. Once the TRACE_EVENT_FORMAT is created, the developer will then have a faster method to record into the ring buffer. They do not need to worry about the tracer itself. The developer would only need to touch the files in include/trace/*.h Again, I would like to give special thanks to Tom Zanussi for this nice idea. Idea-from: Tom Zanussi <tzanussi@gmail.com> Signed-off-by: Steven Rostedt <srostedt@redhat.com>
2009-02-28 00:12:30 +00:00
struct ring_buffer_event *event; \
struct ftrace_raw_##call *entry; \
unsigned long irq_flags; \
int pc; \
\
local_save_flags(irq_flags); \
pc = preempt_count(); \
\
event = trace_current_buffer_lock_reserve(event_##call.id, \
sizeof(struct ftrace_raw_##call), \
tracing: add raw trace point recording infrastructure Impact: lower overhead tracing The current event tracer can automatically pick up trace points that are registered with the TRACE_FORMAT macro. But it required a printf format string and parsing. Although, this adds the ability to get guaranteed information like task names and such, it took a hit in overhead processing. This processing can add about 500-1000 nanoseconds overhead, but in some cases that too is considered too much and we want to shave off as much from this overhead as possible. Tom Zanussi recently posted tracing patches to lkml that are based on a nice idea about capturing the data via C structs using STRUCT_ENTER, STRUCT_EXIT type of macros. I liked that method very much, but did not like the implementation that required a developer to add data/code in several disjoint locations. This patch extends the event_tracer macros to do a similar "raw C" approach that Tom Zanussi did. But instead of having the developers needing to tweak a bunch of code all over the place, they can do it all in one macro - preferably placed near the code that it is tracing. That makes it much more likely that tracepoints will be maintained on an ongoing basis by the code they modify. The new macro TRACE_EVENT_FORMAT is created for this approach. (Note, a developer may still utilize the more low level DECLARE_TRACE macros if they don't care about getting their traces automatically in the event tracer.) They can also use the existing TRACE_FORMAT if they don't need to code the tracepoint in C, but just want to use the convenience of printf. So if the developer wants to "hardwire" a tracepoint in the fastest possible way, and wants to acquire their data via a user space utility in a raw binary format, or wants to see it in the trace output but not sacrifice any performance, then they can implement the faster but more complex TRACE_EVENT_FORMAT macro. Here's what usage looks like: TRACE_EVENT_FORMAT(name, TPPROTO(proto), TPARGS(args), TPFMT(fmt, fmt_args), TRACE_STUCT( TRACE_FIELD(type1, item1, assign1) TRACE_FIELD(type2, item2, assign2) [...] ), TPRAWFMT(raw_fmt) ); Note name, proto, args, and fmt, are all identical to what TRACE_FORMAT uses. name: is the unique identifier of the trace point proto: The proto type that the trace point uses args: the args in the proto type fmt: printf format to use with the event printf tracer fmt_args: the printf argments to match fmt TRACE_STRUCT starts the ability to create a structure. Each item in the structure is defined with a TRACE_FIELD TRACE_FIELD(type, item, assign) type: the C type of item. item: the name of the item in the stucture assign: what to assign the item in the trace point callback raw_fmt is a way to pretty print the struct. It must match the order of the items are added in TRACE_STUCT An example of this would be: TRACE_EVENT_FORMAT(sched_wakeup, TPPROTO(struct rq *rq, struct task_struct *p, int success), TPARGS(rq, p, success), TPFMT("task %s:%d %s", p->comm, p->pid, success?"succeeded":"failed"), TRACE_STRUCT( TRACE_FIELD(pid_t, pid, p->pid) TRACE_FIELD(int, success, success) ), TPRAWFMT("task %d success=%d") ); This creates us a unique struct of: struct { pid_t pid; int success; }; And the way the call back would assign these values would be: entry->pid = p->pid; entry->success = success; The nice part about this is that the creation of the assignent is done via macro magic in the event tracer. Once the TRACE_EVENT_FORMAT is created, the developer will then have a faster method to record into the ring buffer. They do not need to worry about the tracer itself. The developer would only need to touch the files in include/trace/*.h Again, I would like to give special thanks to Tom Zanussi for this nice idea. Idea-from: Tom Zanussi <tzanussi@gmail.com> Signed-off-by: Steven Rostedt <srostedt@redhat.com>
2009-02-28 00:12:30 +00:00
irq_flags, pc); \
if (!event) \
return; \
entry = ring_buffer_event_data(event); \
\
tracing: new format for specialized trace points Impact: clean up and enhancement The TRACE_EVENT_FORMAT macro looks quite ugly and is limited in its ability to save data as well as to print the record out. Working with Ingo Molnar, we came up with a new format that is much more pleasing to the eye of C developers. This new macro is more C style than the old macro, and is more obvious to what it does. Here's the example. The only updated macro in this patch is the sched_switch trace point. The old method looked like this: TRACE_EVENT_FORMAT(sched_switch, TP_PROTO(struct rq *rq, struct task_struct *prev, struct task_struct *next), TP_ARGS(rq, prev, next), TP_FMT("task %s:%d ==> %s:%d", prev->comm, prev->pid, next->comm, next->pid), TRACE_STRUCT( TRACE_FIELD(pid_t, prev_pid, prev->pid) TRACE_FIELD(int, prev_prio, prev->prio) TRACE_FIELD_SPECIAL(char next_comm[TASK_COMM_LEN], next_comm, TP_CMD(memcpy(TRACE_ENTRY->next_comm, next->comm, TASK_COMM_LEN))) TRACE_FIELD(pid_t, next_pid, next->pid) TRACE_FIELD(int, next_prio, next->prio) ), TP_RAW_FMT("prev %d:%d ==> next %s:%d:%d") ); The above method is hard to read and requires two format fields. The new method: /* * Tracepoint for task switches, performed by the scheduler: * * (NOTE: the 'rq' argument is not used by generic trace events, * but used by the latency tracer plugin. ) */ TRACE_EVENT(sched_switch, TP_PROTO(struct rq *rq, struct task_struct *prev, struct task_struct *next), TP_ARGS(rq, prev, next), TP_STRUCT__entry( __array( char, prev_comm, TASK_COMM_LEN ) __field( pid_t, prev_pid ) __field( int, prev_prio ) __array( char, next_comm, TASK_COMM_LEN ) __field( pid_t, next_pid ) __field( int, next_prio ) ), TP_printk("task %s:%d [%d] ==> %s:%d [%d]", __entry->prev_comm, __entry->prev_pid, __entry->prev_prio, __entry->next_comm, __entry->next_pid, __entry->next_prio), TP_fast_assign( memcpy(__entry->next_comm, next->comm, TASK_COMM_LEN); __entry->prev_pid = prev->pid; __entry->prev_prio = prev->prio; memcpy(__entry->prev_comm, prev->comm, TASK_COMM_LEN); __entry->next_pid = next->pid; __entry->next_prio = next->prio; ) ); This macro is called TRACE_EVENT, it is broken up into 5 parts: TP_PROTO: the proto type of the trace point TP_ARGS: the arguments of the trace point TP_STRUCT_entry: the structure layout of the entry in the ring buffer TP_printk: the printk format TP_fast_assign: the method used to write the entry into the ring buffer The structure is the definition of how the event will be saved in the ring buffer. The printk is used by the internal tracing in case of an oops, and the kernel needs to print out the format of the record to the console. This the TP_printk gives a means to show the records in a human readable format. It is also used to print out the data from the trace file. The TP_fast_assign is executed directly. It is basically like a C function, where the __entry is the handle to the record. Signed-off-by: Steven Rostedt <srostedt@redhat.com>
2009-03-09 21:14:30 +00:00
assign; \
tracing: add raw trace point recording infrastructure Impact: lower overhead tracing The current event tracer can automatically pick up trace points that are registered with the TRACE_FORMAT macro. But it required a printf format string and parsing. Although, this adds the ability to get guaranteed information like task names and such, it took a hit in overhead processing. This processing can add about 500-1000 nanoseconds overhead, but in some cases that too is considered too much and we want to shave off as much from this overhead as possible. Tom Zanussi recently posted tracing patches to lkml that are based on a nice idea about capturing the data via C structs using STRUCT_ENTER, STRUCT_EXIT type of macros. I liked that method very much, but did not like the implementation that required a developer to add data/code in several disjoint locations. This patch extends the event_tracer macros to do a similar "raw C" approach that Tom Zanussi did. But instead of having the developers needing to tweak a bunch of code all over the place, they can do it all in one macro - preferably placed near the code that it is tracing. That makes it much more likely that tracepoints will be maintained on an ongoing basis by the code they modify. The new macro TRACE_EVENT_FORMAT is created for this approach. (Note, a developer may still utilize the more low level DECLARE_TRACE macros if they don't care about getting their traces automatically in the event tracer.) They can also use the existing TRACE_FORMAT if they don't need to code the tracepoint in C, but just want to use the convenience of printf. So if the developer wants to "hardwire" a tracepoint in the fastest possible way, and wants to acquire their data via a user space utility in a raw binary format, or wants to see it in the trace output but not sacrifice any performance, then they can implement the faster but more complex TRACE_EVENT_FORMAT macro. Here's what usage looks like: TRACE_EVENT_FORMAT(name, TPPROTO(proto), TPARGS(args), TPFMT(fmt, fmt_args), TRACE_STUCT( TRACE_FIELD(type1, item1, assign1) TRACE_FIELD(type2, item2, assign2) [...] ), TPRAWFMT(raw_fmt) ); Note name, proto, args, and fmt, are all identical to what TRACE_FORMAT uses. name: is the unique identifier of the trace point proto: The proto type that the trace point uses args: the args in the proto type fmt: printf format to use with the event printf tracer fmt_args: the printf argments to match fmt TRACE_STRUCT starts the ability to create a structure. Each item in the structure is defined with a TRACE_FIELD TRACE_FIELD(type, item, assign) type: the C type of item. item: the name of the item in the stucture assign: what to assign the item in the trace point callback raw_fmt is a way to pretty print the struct. It must match the order of the items are added in TRACE_STUCT An example of this would be: TRACE_EVENT_FORMAT(sched_wakeup, TPPROTO(struct rq *rq, struct task_struct *p, int success), TPARGS(rq, p, success), TPFMT("task %s:%d %s", p->comm, p->pid, success?"succeeded":"failed"), TRACE_STRUCT( TRACE_FIELD(pid_t, pid, p->pid) TRACE_FIELD(int, success, success) ), TPRAWFMT("task %d success=%d") ); This creates us a unique struct of: struct { pid_t pid; int success; }; And the way the call back would assign these values would be: entry->pid = p->pid; entry->success = success; The nice part about this is that the creation of the assignent is done via macro magic in the event tracer. Once the TRACE_EVENT_FORMAT is created, the developer will then have a faster method to record into the ring buffer. They do not need to worry about the tracer itself. The developer would only need to touch the files in include/trace/*.h Again, I would like to give special thanks to Tom Zanussi for this nice idea. Idea-from: Tom Zanussi <tzanussi@gmail.com> Signed-off-by: Steven Rostedt <srostedt@redhat.com>
2009-02-28 00:12:30 +00:00
\
if (!filter_current_check_discard(call, entry, event)) \
trace_nowake_buffer_unlock_commit(event, irq_flags, pc); \
tracing: add raw trace point recording infrastructure Impact: lower overhead tracing The current event tracer can automatically pick up trace points that are registered with the TRACE_FORMAT macro. But it required a printf format string and parsing. Although, this adds the ability to get guaranteed information like task names and such, it took a hit in overhead processing. This processing can add about 500-1000 nanoseconds overhead, but in some cases that too is considered too much and we want to shave off as much from this overhead as possible. Tom Zanussi recently posted tracing patches to lkml that are based on a nice idea about capturing the data via C structs using STRUCT_ENTER, STRUCT_EXIT type of macros. I liked that method very much, but did not like the implementation that required a developer to add data/code in several disjoint locations. This patch extends the event_tracer macros to do a similar "raw C" approach that Tom Zanussi did. But instead of having the developers needing to tweak a bunch of code all over the place, they can do it all in one macro - preferably placed near the code that it is tracing. That makes it much more likely that tracepoints will be maintained on an ongoing basis by the code they modify. The new macro TRACE_EVENT_FORMAT is created for this approach. (Note, a developer may still utilize the more low level DECLARE_TRACE macros if they don't care about getting their traces automatically in the event tracer.) They can also use the existing TRACE_FORMAT if they don't need to code the tracepoint in C, but just want to use the convenience of printf. So if the developer wants to "hardwire" a tracepoint in the fastest possible way, and wants to acquire their data via a user space utility in a raw binary format, or wants to see it in the trace output but not sacrifice any performance, then they can implement the faster but more complex TRACE_EVENT_FORMAT macro. Here's what usage looks like: TRACE_EVENT_FORMAT(name, TPPROTO(proto), TPARGS(args), TPFMT(fmt, fmt_args), TRACE_STUCT( TRACE_FIELD(type1, item1, assign1) TRACE_FIELD(type2, item2, assign2) [...] ), TPRAWFMT(raw_fmt) ); Note name, proto, args, and fmt, are all identical to what TRACE_FORMAT uses. name: is the unique identifier of the trace point proto: The proto type that the trace point uses args: the args in the proto type fmt: printf format to use with the event printf tracer fmt_args: the printf argments to match fmt TRACE_STRUCT starts the ability to create a structure. Each item in the structure is defined with a TRACE_FIELD TRACE_FIELD(type, item, assign) type: the C type of item. item: the name of the item in the stucture assign: what to assign the item in the trace point callback raw_fmt is a way to pretty print the struct. It must match the order of the items are added in TRACE_STUCT An example of this would be: TRACE_EVENT_FORMAT(sched_wakeup, TPPROTO(struct rq *rq, struct task_struct *p, int success), TPARGS(rq, p, success), TPFMT("task %s:%d %s", p->comm, p->pid, success?"succeeded":"failed"), TRACE_STRUCT( TRACE_FIELD(pid_t, pid, p->pid) TRACE_FIELD(int, success, success) ), TPRAWFMT("task %d success=%d") ); This creates us a unique struct of: struct { pid_t pid; int success; }; And the way the call back would assign these values would be: entry->pid = p->pid; entry->success = success; The nice part about this is that the creation of the assignent is done via macro magic in the event tracer. Once the TRACE_EVENT_FORMAT is created, the developer will then have a faster method to record into the ring buffer. They do not need to worry about the tracer itself. The developer would only need to touch the files in include/trace/*.h Again, I would like to give special thanks to Tom Zanussi for this nice idea. Idea-from: Tom Zanussi <tzanussi@gmail.com> Signed-off-by: Steven Rostedt <srostedt@redhat.com>
2009-02-28 00:12:30 +00:00
} \
\
static int ftrace_raw_reg_event_##call(void) \
{ \
int ret; \
\
ret = register_trace_##call(ftrace_raw_event_##call); \
if (ret) \
tracing: add raw trace point recording infrastructure Impact: lower overhead tracing The current event tracer can automatically pick up trace points that are registered with the TRACE_FORMAT macro. But it required a printf format string and parsing. Although, this adds the ability to get guaranteed information like task names and such, it took a hit in overhead processing. This processing can add about 500-1000 nanoseconds overhead, but in some cases that too is considered too much and we want to shave off as much from this overhead as possible. Tom Zanussi recently posted tracing patches to lkml that are based on a nice idea about capturing the data via C structs using STRUCT_ENTER, STRUCT_EXIT type of macros. I liked that method very much, but did not like the implementation that required a developer to add data/code in several disjoint locations. This patch extends the event_tracer macros to do a similar "raw C" approach that Tom Zanussi did. But instead of having the developers needing to tweak a bunch of code all over the place, they can do it all in one macro - preferably placed near the code that it is tracing. That makes it much more likely that tracepoints will be maintained on an ongoing basis by the code they modify. The new macro TRACE_EVENT_FORMAT is created for this approach. (Note, a developer may still utilize the more low level DECLARE_TRACE macros if they don't care about getting their traces automatically in the event tracer.) They can also use the existing TRACE_FORMAT if they don't need to code the tracepoint in C, but just want to use the convenience of printf. So if the developer wants to "hardwire" a tracepoint in the fastest possible way, and wants to acquire their data via a user space utility in a raw binary format, or wants to see it in the trace output but not sacrifice any performance, then they can implement the faster but more complex TRACE_EVENT_FORMAT macro. Here's what usage looks like: TRACE_EVENT_FORMAT(name, TPPROTO(proto), TPARGS(args), TPFMT(fmt, fmt_args), TRACE_STUCT( TRACE_FIELD(type1, item1, assign1) TRACE_FIELD(type2, item2, assign2) [...] ), TPRAWFMT(raw_fmt) ); Note name, proto, args, and fmt, are all identical to what TRACE_FORMAT uses. name: is the unique identifier of the trace point proto: The proto type that the trace point uses args: the args in the proto type fmt: printf format to use with the event printf tracer fmt_args: the printf argments to match fmt TRACE_STRUCT starts the ability to create a structure. Each item in the structure is defined with a TRACE_FIELD TRACE_FIELD(type, item, assign) type: the C type of item. item: the name of the item in the stucture assign: what to assign the item in the trace point callback raw_fmt is a way to pretty print the struct. It must match the order of the items are added in TRACE_STUCT An example of this would be: TRACE_EVENT_FORMAT(sched_wakeup, TPPROTO(struct rq *rq, struct task_struct *p, int success), TPARGS(rq, p, success), TPFMT("task %s:%d %s", p->comm, p->pid, success?"succeeded":"failed"), TRACE_STRUCT( TRACE_FIELD(pid_t, pid, p->pid) TRACE_FIELD(int, success, success) ), TPRAWFMT("task %d success=%d") ); This creates us a unique struct of: struct { pid_t pid; int success; }; And the way the call back would assign these values would be: entry->pid = p->pid; entry->success = success; The nice part about this is that the creation of the assignent is done via macro magic in the event tracer. Once the TRACE_EVENT_FORMAT is created, the developer will then have a faster method to record into the ring buffer. They do not need to worry about the tracer itself. The developer would only need to touch the files in include/trace/*.h Again, I would like to give special thanks to Tom Zanussi for this nice idea. Idea-from: Tom Zanussi <tzanussi@gmail.com> Signed-off-by: Steven Rostedt <srostedt@redhat.com>
2009-02-28 00:12:30 +00:00
pr_info("event trace: Could not activate trace point " \
"probe to " #call "\n"); \
tracing: add raw trace point recording infrastructure Impact: lower overhead tracing The current event tracer can automatically pick up trace points that are registered with the TRACE_FORMAT macro. But it required a printf format string and parsing. Although, this adds the ability to get guaranteed information like task names and such, it took a hit in overhead processing. This processing can add about 500-1000 nanoseconds overhead, but in some cases that too is considered too much and we want to shave off as much from this overhead as possible. Tom Zanussi recently posted tracing patches to lkml that are based on a nice idea about capturing the data via C structs using STRUCT_ENTER, STRUCT_EXIT type of macros. I liked that method very much, but did not like the implementation that required a developer to add data/code in several disjoint locations. This patch extends the event_tracer macros to do a similar "raw C" approach that Tom Zanussi did. But instead of having the developers needing to tweak a bunch of code all over the place, they can do it all in one macro - preferably placed near the code that it is tracing. That makes it much more likely that tracepoints will be maintained on an ongoing basis by the code they modify. The new macro TRACE_EVENT_FORMAT is created for this approach. (Note, a developer may still utilize the more low level DECLARE_TRACE macros if they don't care about getting their traces automatically in the event tracer.) They can also use the existing TRACE_FORMAT if they don't need to code the tracepoint in C, but just want to use the convenience of printf. So if the developer wants to "hardwire" a tracepoint in the fastest possible way, and wants to acquire their data via a user space utility in a raw binary format, or wants to see it in the trace output but not sacrifice any performance, then they can implement the faster but more complex TRACE_EVENT_FORMAT macro. Here's what usage looks like: TRACE_EVENT_FORMAT(name, TPPROTO(proto), TPARGS(args), TPFMT(fmt, fmt_args), TRACE_STUCT( TRACE_FIELD(type1, item1, assign1) TRACE_FIELD(type2, item2, assign2) [...] ), TPRAWFMT(raw_fmt) ); Note name, proto, args, and fmt, are all identical to what TRACE_FORMAT uses. name: is the unique identifier of the trace point proto: The proto type that the trace point uses args: the args in the proto type fmt: printf format to use with the event printf tracer fmt_args: the printf argments to match fmt TRACE_STRUCT starts the ability to create a structure. Each item in the structure is defined with a TRACE_FIELD TRACE_FIELD(type, item, assign) type: the C type of item. item: the name of the item in the stucture assign: what to assign the item in the trace point callback raw_fmt is a way to pretty print the struct. It must match the order of the items are added in TRACE_STUCT An example of this would be: TRACE_EVENT_FORMAT(sched_wakeup, TPPROTO(struct rq *rq, struct task_struct *p, int success), TPARGS(rq, p, success), TPFMT("task %s:%d %s", p->comm, p->pid, success?"succeeded":"failed"), TRACE_STRUCT( TRACE_FIELD(pid_t, pid, p->pid) TRACE_FIELD(int, success, success) ), TPRAWFMT("task %d success=%d") ); This creates us a unique struct of: struct { pid_t pid; int success; }; And the way the call back would assign these values would be: entry->pid = p->pid; entry->success = success; The nice part about this is that the creation of the assignent is done via macro magic in the event tracer. Once the TRACE_EVENT_FORMAT is created, the developer will then have a faster method to record into the ring buffer. They do not need to worry about the tracer itself. The developer would only need to touch the files in include/trace/*.h Again, I would like to give special thanks to Tom Zanussi for this nice idea. Idea-from: Tom Zanussi <tzanussi@gmail.com> Signed-off-by: Steven Rostedt <srostedt@redhat.com>
2009-02-28 00:12:30 +00:00
return ret; \
} \
\
static void ftrace_raw_unreg_event_##call(void) \
{ \
unregister_trace_##call(ftrace_raw_event_##call); \
} \
\
static struct trace_event ftrace_event_type_##call = { \
.trace = ftrace_raw_output_##call, \
}; \
\
static int ftrace_raw_init_event_##call(void) \
{ \
int id; \
\
id = register_ftrace_event(&ftrace_event_type_##call); \
if (!id) \
return -ENODEV; \
event_##call.id = id; \
INIT_LIST_HEAD(&event_##call.fields); \
tracing/filters: allow on-the-fly filter switching This patch allows event filters to be safely removed or switched on-the-fly while avoiding the use of rcu or the suspension of tracing of previous versions. It does it by adding a new filter_pred_none() predicate function which does nothing and by never deallocating either the predicates or any of the filter_pred members used in matching; the predicate lists are allocated and initialized during ftrace_event_calls initialization. Whenever a filter is removed or replaced, the filter_pred_* functions currently in use by the affected ftrace_event_call are immediately switched over to to the filter_pred_none() function, while the rest of the filter_pred members are left intact, allowing any currently executing filter_pred_* functions to finish up, using the values they're currently using. In the case of filter replacement, the new predicate values are copied into the old predicates after the above step, and the filter_pred_none() functions are replaced by the filter_pred_* functions for the new filter. In this case, it is possible though very unlikely that a previous filter_pred_* is still running even after the filter_pred_none() switch and the switch to the new filter_pred_*. In that case, however, because nothing has been deallocated in the filter_pred, the worst that can happen is that the old filter_pred_* function sees the new values and as a result produces either a false positive or a false negative, depending on the values it finds. So one downside to this method is that rarely, it can produce a bad match during the filter switch, but it should be possible to live with that, IMHO. The other downside is that at least in this patch the predicate lists are always pre-allocated, taking up memory from the start. They could probably be allocated on first-use, and de-allocated when tracing is completely stopped - if this patch makes sense, I could create another one to do that later on. Oh, and it also places a restriction on the size of __arrays in events, currently set to 128, since they can't be larger than the now embedded str_val arrays in the filter_pred struct. Signed-off-by: Tom Zanussi <tzanussi@gmail.com> Acked-by: Frederic Weisbecker <fweisbec@gmail.com> Cc: Steven Rostedt <rostedt@goodmis.org> Cc: paulmck@linux.vnet.ibm.com LKML-Reference: <1239610670.6660.49.camel@tropicana> Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-04-13 08:17:50 +00:00
init_preds(&event_##call); \
tracing: add raw trace point recording infrastructure Impact: lower overhead tracing The current event tracer can automatically pick up trace points that are registered with the TRACE_FORMAT macro. But it required a printf format string and parsing. Although, this adds the ability to get guaranteed information like task names and such, it took a hit in overhead processing. This processing can add about 500-1000 nanoseconds overhead, but in some cases that too is considered too much and we want to shave off as much from this overhead as possible. Tom Zanussi recently posted tracing patches to lkml that are based on a nice idea about capturing the data via C structs using STRUCT_ENTER, STRUCT_EXIT type of macros. I liked that method very much, but did not like the implementation that required a developer to add data/code in several disjoint locations. This patch extends the event_tracer macros to do a similar "raw C" approach that Tom Zanussi did. But instead of having the developers needing to tweak a bunch of code all over the place, they can do it all in one macro - preferably placed near the code that it is tracing. That makes it much more likely that tracepoints will be maintained on an ongoing basis by the code they modify. The new macro TRACE_EVENT_FORMAT is created for this approach. (Note, a developer may still utilize the more low level DECLARE_TRACE macros if they don't care about getting their traces automatically in the event tracer.) They can also use the existing TRACE_FORMAT if they don't need to code the tracepoint in C, but just want to use the convenience of printf. So if the developer wants to "hardwire" a tracepoint in the fastest possible way, and wants to acquire their data via a user space utility in a raw binary format, or wants to see it in the trace output but not sacrifice any performance, then they can implement the faster but more complex TRACE_EVENT_FORMAT macro. Here's what usage looks like: TRACE_EVENT_FORMAT(name, TPPROTO(proto), TPARGS(args), TPFMT(fmt, fmt_args), TRACE_STUCT( TRACE_FIELD(type1, item1, assign1) TRACE_FIELD(type2, item2, assign2) [...] ), TPRAWFMT(raw_fmt) ); Note name, proto, args, and fmt, are all identical to what TRACE_FORMAT uses. name: is the unique identifier of the trace point proto: The proto type that the trace point uses args: the args in the proto type fmt: printf format to use with the event printf tracer fmt_args: the printf argments to match fmt TRACE_STRUCT starts the ability to create a structure. Each item in the structure is defined with a TRACE_FIELD TRACE_FIELD(type, item, assign) type: the C type of item. item: the name of the item in the stucture assign: what to assign the item in the trace point callback raw_fmt is a way to pretty print the struct. It must match the order of the items are added in TRACE_STUCT An example of this would be: TRACE_EVENT_FORMAT(sched_wakeup, TPPROTO(struct rq *rq, struct task_struct *p, int success), TPARGS(rq, p, success), TPFMT("task %s:%d %s", p->comm, p->pid, success?"succeeded":"failed"), TRACE_STRUCT( TRACE_FIELD(pid_t, pid, p->pid) TRACE_FIELD(int, success, success) ), TPRAWFMT("task %d success=%d") ); This creates us a unique struct of: struct { pid_t pid; int success; }; And the way the call back would assign these values would be: entry->pid = p->pid; entry->success = success; The nice part about this is that the creation of the assignent is done via macro magic in the event tracer. Once the TRACE_EVENT_FORMAT is created, the developer will then have a faster method to record into the ring buffer. They do not need to worry about the tracer itself. The developer would only need to touch the files in include/trace/*.h Again, I would like to give special thanks to Tom Zanussi for this nice idea. Idea-from: Tom Zanussi <tzanussi@gmail.com> Signed-off-by: Steven Rostedt <srostedt@redhat.com>
2009-02-28 00:12:30 +00:00
return 0; \
} \
\
static struct ftrace_event_call __used \
__attribute__((__aligned__(4))) \
__attribute__((section("_ftrace_events"))) event_##call = { \
.name = #call, \
.system = __stringify(TRACE_SYSTEM), \
.event = &ftrace_event_type_##call, \
tracing: add raw trace point recording infrastructure Impact: lower overhead tracing The current event tracer can automatically pick up trace points that are registered with the TRACE_FORMAT macro. But it required a printf format string and parsing. Although, this adds the ability to get guaranteed information like task names and such, it took a hit in overhead processing. This processing can add about 500-1000 nanoseconds overhead, but in some cases that too is considered too much and we want to shave off as much from this overhead as possible. Tom Zanussi recently posted tracing patches to lkml that are based on a nice idea about capturing the data via C structs using STRUCT_ENTER, STRUCT_EXIT type of macros. I liked that method very much, but did not like the implementation that required a developer to add data/code in several disjoint locations. This patch extends the event_tracer macros to do a similar "raw C" approach that Tom Zanussi did. But instead of having the developers needing to tweak a bunch of code all over the place, they can do it all in one macro - preferably placed near the code that it is tracing. That makes it much more likely that tracepoints will be maintained on an ongoing basis by the code they modify. The new macro TRACE_EVENT_FORMAT is created for this approach. (Note, a developer may still utilize the more low level DECLARE_TRACE macros if they don't care about getting their traces automatically in the event tracer.) They can also use the existing TRACE_FORMAT if they don't need to code the tracepoint in C, but just want to use the convenience of printf. So if the developer wants to "hardwire" a tracepoint in the fastest possible way, and wants to acquire their data via a user space utility in a raw binary format, or wants to see it in the trace output but not sacrifice any performance, then they can implement the faster but more complex TRACE_EVENT_FORMAT macro. Here's what usage looks like: TRACE_EVENT_FORMAT(name, TPPROTO(proto), TPARGS(args), TPFMT(fmt, fmt_args), TRACE_STUCT( TRACE_FIELD(type1, item1, assign1) TRACE_FIELD(type2, item2, assign2) [...] ), TPRAWFMT(raw_fmt) ); Note name, proto, args, and fmt, are all identical to what TRACE_FORMAT uses. name: is the unique identifier of the trace point proto: The proto type that the trace point uses args: the args in the proto type fmt: printf format to use with the event printf tracer fmt_args: the printf argments to match fmt TRACE_STRUCT starts the ability to create a structure. Each item in the structure is defined with a TRACE_FIELD TRACE_FIELD(type, item, assign) type: the C type of item. item: the name of the item in the stucture assign: what to assign the item in the trace point callback raw_fmt is a way to pretty print the struct. It must match the order of the items are added in TRACE_STUCT An example of this would be: TRACE_EVENT_FORMAT(sched_wakeup, TPPROTO(struct rq *rq, struct task_struct *p, int success), TPARGS(rq, p, success), TPFMT("task %s:%d %s", p->comm, p->pid, success?"succeeded":"failed"), TRACE_STRUCT( TRACE_FIELD(pid_t, pid, p->pid) TRACE_FIELD(int, success, success) ), TPRAWFMT("task %d success=%d") ); This creates us a unique struct of: struct { pid_t pid; int success; }; And the way the call back would assign these values would be: entry->pid = p->pid; entry->success = success; The nice part about this is that the creation of the assignent is done via macro magic in the event tracer. Once the TRACE_EVENT_FORMAT is created, the developer will then have a faster method to record into the ring buffer. They do not need to worry about the tracer itself. The developer would only need to touch the files in include/trace/*.h Again, I would like to give special thanks to Tom Zanussi for this nice idea. Idea-from: Tom Zanussi <tzanussi@gmail.com> Signed-off-by: Steven Rostedt <srostedt@redhat.com>
2009-02-28 00:12:30 +00:00
.raw_init = ftrace_raw_init_event_##call, \
tracing: new format for specialized trace points Impact: clean up and enhancement The TRACE_EVENT_FORMAT macro looks quite ugly and is limited in its ability to save data as well as to print the record out. Working with Ingo Molnar, we came up with a new format that is much more pleasing to the eye of C developers. This new macro is more C style than the old macro, and is more obvious to what it does. Here's the example. The only updated macro in this patch is the sched_switch trace point. The old method looked like this: TRACE_EVENT_FORMAT(sched_switch, TP_PROTO(struct rq *rq, struct task_struct *prev, struct task_struct *next), TP_ARGS(rq, prev, next), TP_FMT("task %s:%d ==> %s:%d", prev->comm, prev->pid, next->comm, next->pid), TRACE_STRUCT( TRACE_FIELD(pid_t, prev_pid, prev->pid) TRACE_FIELD(int, prev_prio, prev->prio) TRACE_FIELD_SPECIAL(char next_comm[TASK_COMM_LEN], next_comm, TP_CMD(memcpy(TRACE_ENTRY->next_comm, next->comm, TASK_COMM_LEN))) TRACE_FIELD(pid_t, next_pid, next->pid) TRACE_FIELD(int, next_prio, next->prio) ), TP_RAW_FMT("prev %d:%d ==> next %s:%d:%d") ); The above method is hard to read and requires two format fields. The new method: /* * Tracepoint for task switches, performed by the scheduler: * * (NOTE: the 'rq' argument is not used by generic trace events, * but used by the latency tracer plugin. ) */ TRACE_EVENT(sched_switch, TP_PROTO(struct rq *rq, struct task_struct *prev, struct task_struct *next), TP_ARGS(rq, prev, next), TP_STRUCT__entry( __array( char, prev_comm, TASK_COMM_LEN ) __field( pid_t, prev_pid ) __field( int, prev_prio ) __array( char, next_comm, TASK_COMM_LEN ) __field( pid_t, next_pid ) __field( int, next_prio ) ), TP_printk("task %s:%d [%d] ==> %s:%d [%d]", __entry->prev_comm, __entry->prev_pid, __entry->prev_prio, __entry->next_comm, __entry->next_pid, __entry->next_prio), TP_fast_assign( memcpy(__entry->next_comm, next->comm, TASK_COMM_LEN); __entry->prev_pid = prev->pid; __entry->prev_prio = prev->prio; memcpy(__entry->prev_comm, prev->comm, TASK_COMM_LEN); __entry->next_pid = next->pid; __entry->next_prio = next->prio; ) ); This macro is called TRACE_EVENT, it is broken up into 5 parts: TP_PROTO: the proto type of the trace point TP_ARGS: the arguments of the trace point TP_STRUCT_entry: the structure layout of the entry in the ring buffer TP_printk: the printk format TP_fast_assign: the method used to write the entry into the ring buffer The structure is the definition of how the event will be saved in the ring buffer. The printk is used by the internal tracing in case of an oops, and the kernel needs to print out the format of the record to the console. This the TP_printk gives a means to show the records in a human readable format. It is also used to print out the data from the trace file. The TP_fast_assign is executed directly. It is basically like a C function, where the __entry is the handle to the record. Signed-off-by: Steven Rostedt <srostedt@redhat.com>
2009-03-09 21:14:30 +00:00
.regfunc = ftrace_raw_reg_event_##call, \
.unregfunc = ftrace_raw_unreg_event_##call, \
.show_format = ftrace_format_##call, \
.define_fields = ftrace_define_fields_##call, \
_TRACE_PROFILE_INIT(call) \
tracing: add raw trace point recording infrastructure Impact: lower overhead tracing The current event tracer can automatically pick up trace points that are registered with the TRACE_FORMAT macro. But it required a printf format string and parsing. Although, this adds the ability to get guaranteed information like task names and such, it took a hit in overhead processing. This processing can add about 500-1000 nanoseconds overhead, but in some cases that too is considered too much and we want to shave off as much from this overhead as possible. Tom Zanussi recently posted tracing patches to lkml that are based on a nice idea about capturing the data via C structs using STRUCT_ENTER, STRUCT_EXIT type of macros. I liked that method very much, but did not like the implementation that required a developer to add data/code in several disjoint locations. This patch extends the event_tracer macros to do a similar "raw C" approach that Tom Zanussi did. But instead of having the developers needing to tweak a bunch of code all over the place, they can do it all in one macro - preferably placed near the code that it is tracing. That makes it much more likely that tracepoints will be maintained on an ongoing basis by the code they modify. The new macro TRACE_EVENT_FORMAT is created for this approach. (Note, a developer may still utilize the more low level DECLARE_TRACE macros if they don't care about getting their traces automatically in the event tracer.) They can also use the existing TRACE_FORMAT if they don't need to code the tracepoint in C, but just want to use the convenience of printf. So if the developer wants to "hardwire" a tracepoint in the fastest possible way, and wants to acquire their data via a user space utility in a raw binary format, or wants to see it in the trace output but not sacrifice any performance, then they can implement the faster but more complex TRACE_EVENT_FORMAT macro. Here's what usage looks like: TRACE_EVENT_FORMAT(name, TPPROTO(proto), TPARGS(args), TPFMT(fmt, fmt_args), TRACE_STUCT( TRACE_FIELD(type1, item1, assign1) TRACE_FIELD(type2, item2, assign2) [...] ), TPRAWFMT(raw_fmt) ); Note name, proto, args, and fmt, are all identical to what TRACE_FORMAT uses. name: is the unique identifier of the trace point proto: The proto type that the trace point uses args: the args in the proto type fmt: printf format to use with the event printf tracer fmt_args: the printf argments to match fmt TRACE_STRUCT starts the ability to create a structure. Each item in the structure is defined with a TRACE_FIELD TRACE_FIELD(type, item, assign) type: the C type of item. item: the name of the item in the stucture assign: what to assign the item in the trace point callback raw_fmt is a way to pretty print the struct. It must match the order of the items are added in TRACE_STUCT An example of this would be: TRACE_EVENT_FORMAT(sched_wakeup, TPPROTO(struct rq *rq, struct task_struct *p, int success), TPARGS(rq, p, success), TPFMT("task %s:%d %s", p->comm, p->pid, success?"succeeded":"failed"), TRACE_STRUCT( TRACE_FIELD(pid_t, pid, p->pid) TRACE_FIELD(int, success, success) ), TPRAWFMT("task %d success=%d") ); This creates us a unique struct of: struct { pid_t pid; int success; }; And the way the call back would assign these values would be: entry->pid = p->pid; entry->success = success; The nice part about this is that the creation of the assignent is done via macro magic in the event tracer. Once the TRACE_EVENT_FORMAT is created, the developer will then have a faster method to record into the ring buffer. They do not need to worry about the tracer itself. The developer would only need to touch the files in include/trace/*.h Again, I would like to give special thanks to Tom Zanussi for this nice idea. Idea-from: Tom Zanussi <tzanussi@gmail.com> Signed-off-by: Steven Rostedt <srostedt@redhat.com>
2009-02-28 00:12:30 +00:00
}
#include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
#undef _TRACE_PROFILE
#undef _TRACE_PROFILE_INIT