Update to 3.4-final.
[linux-flexiantxendom0-3.2.10.git] / kernel / trace / trace_events.c
index f85b0f1..29111da 100644 (file)
 #include <linux/uaccess.h>
 #include <linux/module.h>
 #include <linux/ctype.h>
+#include <linux/slab.h>
 #include <linux/delay.h>
 
 #include <asm/setup.h>
 
 #include "trace_output.h"
 
+#undef TRACE_SYSTEM
 #define TRACE_SYSTEM "TRACE_SYSTEM"
 
 DEFINE_MUTEX(event_mutex);
 
+DEFINE_MUTEX(event_storage_mutex);
+EXPORT_SYMBOL_GPL(event_storage_mutex);
+
+char event_storage[EVENT_STORAGE_SIZE];
+EXPORT_SYMBOL_GPL(event_storage);
+
 LIST_HEAD(ftrace_events);
+LIST_HEAD(ftrace_common_fields);
 
-int trace_define_field(struct ftrace_event_call *call, const char *type,
-                      const char *name, int offset, int size, int is_signed,
-                      int filter_type)
+struct list_head *
+trace_get_fields(struct ftrace_event_call *event_call)
+{
+       if (!event_call->class->get_fields)
+               return &event_call->class->fields;
+       return event_call->class->get_fields(event_call);
+}
+
+static int __trace_define_field(struct list_head *head, const char *type,
+                               const char *name, int offset, int size,
+                               int is_signed, int filter_type)
 {
        struct ftrace_event_field *field;
 
@@ -54,30 +71,43 @@ int trace_define_field(struct ftrace_event_call *call, const char *type,
        field->size = size;
        field->is_signed = is_signed;
 
-       list_add(&field->link, &call->fields);
+       list_add(&field->link, head);
 
        return 0;
 
 err:
-       if (field) {
+       if (field)
                kfree(field->name);
-               kfree(field->type);
-       }
        kfree(field);
 
        return -ENOMEM;
 }
+
+int trace_define_field(struct ftrace_event_call *call, const char *type,
+                      const char *name, int offset, int size, int is_signed,
+                      int filter_type)
+{
+       struct list_head *head;
+
+       if (WARN_ON(!call->class))
+               return 0;
+
+       head = trace_get_fields(call);
+       return __trace_define_field(head, type, name, offset, size,
+                                   is_signed, filter_type);
+}
 EXPORT_SYMBOL_GPL(trace_define_field);
 
 #define __common_field(type, item)                                     \
-       ret = trace_define_field(call, #type, "common_" #item,          \
-                                offsetof(typeof(ent), item),           \
-                                sizeof(ent.item),                      \
-                                is_signed_type(type), FILTER_OTHER);   \
+       ret = __trace_define_field(&ftrace_common_fields, #type,        \
+                                  "common_" #item,                     \
+                                  offsetof(typeof(ent), item),         \
+                                  sizeof(ent.item),                    \
+                                  is_signed_type(type), FILTER_OTHER); \
        if (ret)                                                        \
                return ret;
 
-int trace_define_common_fields(struct ftrace_event_call *call)
+static int trace_define_common_fields(void)
 {
        int ret;
        struct trace_entry ent;
@@ -86,17 +116,18 @@ int trace_define_common_fields(struct ftrace_event_call *call)
        __common_field(unsigned char, flags);
        __common_field(unsigned char, preempt_count);
        __common_field(int, pid);
-       __common_field(int, tgid);
+       __common_field(int, padding);
 
        return ret;
 }
-EXPORT_SYMBOL_GPL(trace_define_common_fields);
 
 void trace_destroy_fields(struct ftrace_event_call *call)
 {
        struct ftrace_event_field *field, *next;
+       struct list_head *head;
 
-       list_for_each_entry_safe(field, next, &call->fields, link) {
+       head = trace_get_fields(call);
+       list_for_each_entry_safe(field, next, head, link) {
                list_del(&field->link);
                kfree(field->type);
                kfree(field->name);
@@ -104,25 +135,108 @@ void trace_destroy_fields(struct ftrace_event_call *call)
        }
 }
 
-static void ftrace_event_enable_disable(struct ftrace_event_call *call,
+int trace_event_raw_init(struct ftrace_event_call *call)
+{
+       int id;
+
+       id = register_ftrace_event(&call->event);
+       if (!id)
+               return -ENODEV;
+
+       return 0;
+}
+EXPORT_SYMBOL_GPL(trace_event_raw_init);
+
+int ftrace_event_reg(struct ftrace_event_call *call,
+                    enum trace_reg type, void *data)
+{
+       switch (type) {
+       case TRACE_REG_REGISTER:
+               return tracepoint_probe_register(call->name,
+                                                call->class->probe,
+                                                call);
+       case TRACE_REG_UNREGISTER:
+               tracepoint_probe_unregister(call->name,
+                                           call->class->probe,
+                                           call);
+               return 0;
+
+#ifdef CONFIG_PERF_EVENTS
+       case TRACE_REG_PERF_REGISTER:
+               return tracepoint_probe_register(call->name,
+                                                call->class->perf_probe,
+                                                call);
+       case TRACE_REG_PERF_UNREGISTER:
+               tracepoint_probe_unregister(call->name,
+                                           call->class->perf_probe,
+                                           call);
+               return 0;
+       case TRACE_REG_PERF_OPEN:
+       case TRACE_REG_PERF_CLOSE:
+       case TRACE_REG_PERF_ADD:
+       case TRACE_REG_PERF_DEL:
+               return 0;
+#endif
+       }
+       return 0;
+}
+EXPORT_SYMBOL_GPL(ftrace_event_reg);
+
+void trace_event_enable_cmd_record(bool enable)
+{
+       struct ftrace_event_call *call;
+
+       mutex_lock(&event_mutex);
+       list_for_each_entry(call, &ftrace_events, list) {
+               if (!(call->flags & TRACE_EVENT_FL_ENABLED))
+                       continue;
+
+               if (enable) {
+                       tracing_start_cmdline_record();
+                       call->flags |= TRACE_EVENT_FL_RECORDED_CMD;
+               } else {
+                       tracing_stop_cmdline_record();
+                       call->flags &= ~TRACE_EVENT_FL_RECORDED_CMD;
+               }
+       }
+       mutex_unlock(&event_mutex);
+}
+
+static int ftrace_event_enable_disable(struct ftrace_event_call *call,
                                        int enable)
 {
+       int ret = 0;
+
        switch (enable) {
        case 0:
-               if (call->enabled) {
-                       call->enabled = 0;
-                       tracing_stop_cmdline_record();
-                       call->unregfunc(call);
+               if (call->flags & TRACE_EVENT_FL_ENABLED) {
+                       call->flags &= ~TRACE_EVENT_FL_ENABLED;
+                       if (call->flags & TRACE_EVENT_FL_RECORDED_CMD) {
+                               tracing_stop_cmdline_record();
+                               call->flags &= ~TRACE_EVENT_FL_RECORDED_CMD;
+                       }
+                       call->class->reg(call, TRACE_REG_UNREGISTER, NULL);
                }
                break;
        case 1:
-               if (!call->enabled) {
-                       call->enabled = 1;
-                       tracing_start_cmdline_record();
-                       call->regfunc(call);
+               if (!(call->flags & TRACE_EVENT_FL_ENABLED)) {
+                       if (trace_flags & TRACE_ITER_RECORD_CMD) {
+                               tracing_start_cmdline_record();
+                               call->flags |= TRACE_EVENT_FL_RECORDED_CMD;
+                       }
+                       ret = call->class->reg(call, TRACE_REG_REGISTER, NULL);
+                       if (ret) {
+                               tracing_stop_cmdline_record();
+                               pr_info("event trace: Could not enable event "
+                                       "%s\n", call->name);
+                               break;
+                       }
+                       call->flags |= TRACE_EVENT_FL_ENABLED;
                }
                break;
        }
+
+       return ret;
 }
 
 static void ftrace_clear_events(void)
@@ -136,6 +250,35 @@ static void ftrace_clear_events(void)
        mutex_unlock(&event_mutex);
 }
 
+static void __put_system(struct event_subsystem *system)
+{
+       struct event_filter *filter = system->filter;
+
+       WARN_ON_ONCE(system->ref_count == 0);
+       if (--system->ref_count)
+               return;
+
+       if (filter) {
+               kfree(filter->filter_string);
+               kfree(filter);
+       }
+       kfree(system->name);
+       kfree(system);
+}
+
+static void __get_system(struct event_subsystem *system)
+{
+       WARN_ON_ONCE(system->ref_count == 0);
+       system->ref_count++;
+}
+
+static void put_system(struct event_subsystem *system)
+{
+       mutex_lock(&event_mutex);
+       __put_system(system);
+       mutex_unlock(&event_mutex);
+}
+
 /*
  * __ftrace_set_clr_event(NULL, NULL, NULL, set) will set/unset all events.
  */
@@ -148,15 +291,18 @@ static int __ftrace_set_clr_event(const char *match, const char *sub,
        mutex_lock(&event_mutex);
        list_for_each_entry(call, &ftrace_events, list) {
 
-               if (!call->name || !call->regfunc)
+               if (!call->name || !call->class || !call->class->reg)
+                       continue;
+
+               if (call->flags & TRACE_EVENT_FL_IGNORE_ENABLE)
                        continue;
 
                if (match &&
                    strcmp(match, call->name) != 0 &&
-                   strcmp(match, call->system) != 0)
+                   strcmp(match, call->class->system) != 0)
                        continue;
 
-               if (sub && strcmp(sub, call->system) != 0)
+               if (sub && strcmp(sub, call->class->system) != 0)
                        continue;
 
                if (event && strcmp(event, call->name) != 0)
@@ -218,6 +364,7 @@ int trace_set_clr_event(const char *system, const char *event, int set)
 {
        return __ftrace_set_clr_event(NULL, system, event, set);
 }
+EXPORT_SYMBOL_GPL(trace_set_clr_event);
 
 /* 128 should be much more than enough */
 #define EVENT_BUF_SIZE         127
@@ -226,73 +373,38 @@ static ssize_t
 ftrace_event_write(struct file *file, const char __user *ubuf,
                   size_t cnt, loff_t *ppos)
 {
-       size_t read = 0;
-       int i, set = 1;
-       ssize_t ret;
-       char *buf;
-       char ch;
+       struct trace_parser parser;
+       ssize_t read, ret;
 
-       if (!cnt || cnt < 0)
+       if (!cnt)
                return 0;
 
        ret = tracing_update_buffers();
        if (ret < 0)
                return ret;
 
-       ret = get_user(ch, ubuf++);
-       if (ret)
-               return ret;
-       read++;
-       cnt--;
-
-       /* skip white space */
-       while (cnt && isspace(ch)) {
-               ret = get_user(ch, ubuf++);
-               if (ret)
-                       return ret;
-               read++;
-               cnt--;
-       }
-
-       /* Only white space found? */
-       if (isspace(ch)) {
-               file->f_pos += read;
-               ret = read;
-               return ret;
-       }
-
-       buf = kmalloc(EVENT_BUF_SIZE+1, GFP_KERNEL);
-       if (!buf)
+       if (trace_parser_get_init(&parser, EVENT_BUF_SIZE + 1))
                return -ENOMEM;
 
-       if (cnt > EVENT_BUF_SIZE)
-               cnt = EVENT_BUF_SIZE;
+       read = trace_get_user(&parser, ubuf, cnt, ppos);
 
-       i = 0;
-       while (cnt && !isspace(ch)) {
-               if (!i && ch == '!')
+       if (read >= 0 && trace_parser_loaded((&parser))) {
+               int set = 1;
+
+               if (*parser.buffer == '!')
                        set = 0;
-               else
-                       buf[i++] = ch;
 
-               ret = get_user(ch, ubuf++);
+               parser.buffer[parser.idx] = 0;
+
+               ret = ftrace_set_clr_event(parser.buffer + !set, set);
                if (ret)
-                       goto out_free;
-               read++;
-               cnt--;
+                       goto out_put;
        }
-       buf[i] = 0;
-
-       file->f_pos += read;
-
-       ret = ftrace_set_clr_event(buf, set);
-       if (ret)
-               goto out_free;
 
        ret = read;
 
- out_free:
-       kfree(buf);
+ out_put:
+       trace_parser_put(&parser);
 
        return ret;
 }
@@ -300,42 +412,32 @@ ftrace_event_write(struct file *file, const char __user *ubuf,
 static void *
 t_next(struct seq_file *m, void *v, loff_t *pos)
 {
-       struct list_head *list = m->private;
-       struct ftrace_event_call *call;
+       struct ftrace_event_call *call = v;
 
        (*pos)++;
 
-       for (;;) {
-               if (list == &ftrace_events)
-                       return NULL;
-
-               call = list_entry(list, struct ftrace_event_call, list);
-
+       list_for_each_entry_continue(call, &ftrace_events, list) {
                /*
                 * The ftrace subsystem is for showing formats only.
                 * They can not be enabled or disabled via the event files.
                 */
-               if (call->regfunc)
-                       break;
-
-               list = list->next;
+               if (call->class && call->class->reg)
+                       return call;
        }
 
-       m->private = list->next;
-
-       return call;
+       return NULL;
 }
 
 static void *t_start(struct seq_file *m, loff_t *pos)
 {
-       struct ftrace_event_call *call = NULL;
+       struct ftrace_event_call *call;
        loff_t l;
 
        mutex_lock(&event_mutex);
 
-       m->private = ftrace_events.next;
+       call = list_entry(&ftrace_events, struct ftrace_event_call, list);
        for (l = 0; l <= *pos; ) {
-               call = t_next(m, NULL, &l);
+               call = t_next(m, call, &l);
                if (!call)
                        break;
        }
@@ -345,37 +447,28 @@ static void *t_start(struct seq_file *m, loff_t *pos)
 static void *
 s_next(struct seq_file *m, void *v, loff_t *pos)
 {
-       struct list_head *list = m->private;
-       struct ftrace_event_call *call;
+       struct ftrace_event_call *call = v;
 
        (*pos)++;
 
- retry:
-       if (list == &ftrace_events)
-               return NULL;
-
-       call = list_entry(list, struct ftrace_event_call, list);
-
-       if (!call->enabled) {
-               list = list->next;
-               goto retry;
+       list_for_each_entry_continue(call, &ftrace_events, list) {
+               if (call->flags & TRACE_EVENT_FL_ENABLED)
+                       return call;
        }
 
-       m->private = list->next;
-
-       return call;
+       return NULL;
 }
 
 static void *s_start(struct seq_file *m, loff_t *pos)
 {
-       struct ftrace_event_call *call = NULL;
+       struct ftrace_event_call *call;
        loff_t l;
 
        mutex_lock(&event_mutex);
 
-       m->private = ftrace_events.next;
+       call = list_entry(&ftrace_events, struct ftrace_event_call, list);
        for (l = 0; l <= *pos; ) {
-               call = s_next(m, NULL, &l);
+               call = s_next(m, call, &l);
                if (!call)
                        break;
        }
@@ -386,8 +479,8 @@ static int t_show(struct seq_file *m, void *v)
 {
        struct ftrace_event_call *call = v;
 
-       if (strcmp(call->system, TRACE_SYSTEM) != 0)
-               seq_printf(m, "%s:", call->system);
+       if (strcmp(call->class->system, TRACE_SYSTEM) != 0)
+               seq_printf(m, "%s:", call->class->system);
        seq_printf(m, "%s\n", call->name);
 
        return 0;
@@ -418,7 +511,7 @@ event_enable_read(struct file *filp, char __user *ubuf, size_t cnt,
        struct ftrace_event_call *call = filp->private_data;
        char *buf;
 
-       if (call->enabled)
+       if (call->flags & TRACE_EVENT_FL_ENABLED)
                buf = "1\n";
        else
                buf = "0\n";
@@ -431,20 +524,11 @@ event_enable_write(struct file *filp, const char __user *ubuf, size_t cnt,
                   loff_t *ppos)
 {
        struct ftrace_event_call *call = filp->private_data;
-       char buf[64];
        unsigned long val;
        int ret;
 
-       if (cnt >= sizeof(buf))
-               return -EINVAL;
-
-       if (copy_from_user(&buf, ubuf, cnt))
-               return -EFAULT;
-
-       buf[cnt] = 0;
-
-       ret = strict_strtoul(buf, 10, &val);
-       if (ret < 0)
+       ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
+       if (ret)
                return ret;
 
        ret = tracing_update_buffers();
@@ -455,7 +539,7 @@ event_enable_write(struct file *filp, const char __user *ubuf, size_t cnt,
        case 0:
        case 1:
                mutex_lock(&event_mutex);
-               ftrace_event_enable_disable(call, val);
+               ret = ftrace_event_enable_disable(call, val);
                mutex_unlock(&event_mutex);
                break;
 
@@ -465,7 +549,7 @@ event_enable_write(struct file *filp, const char __user *ubuf, size_t cnt,
 
        *ppos += cnt;
 
-       return cnt;
+       return ret ? ret : cnt;
 }
 
 static ssize_t
@@ -473,7 +557,7 @@ system_enable_read(struct file *filp, char __user *ubuf, size_t cnt,
                   loff_t *ppos)
 {
        const char set_to_char[4] = { '?', '0', '1', 'X' };
-       const char *system = filp->private_data;
+       struct event_subsystem *system = filp->private_data;
        struct ftrace_event_call *call;
        char buf[2];
        int set = 0;
@@ -481,10 +565,10 @@ system_enable_read(struct file *filp, char __user *ubuf, size_t cnt,
 
        mutex_lock(&event_mutex);
        list_for_each_entry(call, &ftrace_events, list) {
-               if (!call->name || !call->regfunc)
+               if (!call->name || !call->class || !call->class->reg)
                        continue;
 
-               if (system && strcmp(call->system, system) != 0)
+               if (system && strcmp(call->class->system, system->name) != 0)
                        continue;
 
                /*
@@ -492,7 +576,7 @@ system_enable_read(struct file *filp, char __user *ubuf, size_t cnt,
                 * or if all events or cleared, or if we have
                 * a mixture.
                 */
-               set |= (1 << !!call->enabled);
+               set |= (1 << !!(call->flags & TRACE_EVENT_FL_ENABLED));
 
                /*
                 * If we have a mixture, no need to look further.
@@ -514,21 +598,13 @@ static ssize_t
 system_enable_write(struct file *filp, const char __user *ubuf, size_t cnt,
                    loff_t *ppos)
 {
-       const char *system = filp->private_data;
+       struct event_subsystem *system = filp->private_data;
+       const char *name = NULL;
        unsigned long val;
-       char buf[64];
        ssize_t ret;
 
-       if (cnt >= sizeof(buf))
-               return -EINVAL;
-
-       if (copy_from_user(&buf, ubuf, cnt))
-               return -EFAULT;
-
-       buf[cnt] = 0;
-
-       ret = strict_strtoul(buf, 10, &val);
-       if (ret < 0)
+       ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
+       if (ret)
                return ret;
 
        ret = tracing_update_buffers();
@@ -538,7 +614,14 @@ system_enable_write(struct file *filp, const char __user *ubuf, size_t cnt,
        if (val != 0 && val != 1)
                return -EINVAL;
 
-       ret = __ftrace_set_clr_event(NULL, system, NULL, val);
+       /*
+        * Opening of "enable" adds a ref count to system,
+        * so the name is safe to use.
+        */
+       if (system)
+               name = system->name;
+
+       ret = __ftrace_set_clr_event(NULL, name, NULL, val);
        if (ret)
                goto out;
 
@@ -550,74 +633,146 @@ out:
        return ret;
 }
 
-extern char *__bad_type_size(void);
+enum {
+       FORMAT_HEADER           = 1,
+       FORMAT_FIELD_SEPERATOR  = 2,
+       FORMAT_PRINTFMT         = 3,
+};
+
+static void *f_next(struct seq_file *m, void *v, loff_t *pos)
+{
+       struct ftrace_event_call *call = m->private;
+       struct ftrace_event_field *field;
+       struct list_head *common_head = &ftrace_common_fields;
+       struct list_head *head = trace_get_fields(call);
+
+       (*pos)++;
+
+       switch ((unsigned long)v) {
+       case FORMAT_HEADER:
+               if (unlikely(list_empty(common_head)))
+                       return NULL;
+
+               field = list_entry(common_head->prev,
+                                  struct ftrace_event_field, link);
+               return field;
+
+       case FORMAT_FIELD_SEPERATOR:
+               if (unlikely(list_empty(head)))
+                       return NULL;
 
-#undef FIELD
-#define FIELD(type, name)                                              \
-       sizeof(type) != sizeof(field.name) ? __bad_type_size() :        \
-       #type, "common_" #name, offsetof(typeof(field), name),          \
-               sizeof(field.name)
+               field = list_entry(head->prev, struct ftrace_event_field, link);
+               return field;
 
-static int trace_write_header(struct trace_seq *s)
+       case FORMAT_PRINTFMT:
+               /* all done */
+               return NULL;
+       }
+
+       field = v;
+       if (field->link.prev == common_head)
+               return (void *)FORMAT_FIELD_SEPERATOR;
+       else if (field->link.prev == head)
+               return (void *)FORMAT_PRINTFMT;
+
+       field = list_entry(field->link.prev, struct ftrace_event_field, link);
+
+       return field;
+}
+
+static void *f_start(struct seq_file *m, loff_t *pos)
 {
-       struct trace_entry field;
+       loff_t l = 0;
+       void *p;
+
+       /* Start by showing the header */
+       if (!*pos)
+               return (void *)FORMAT_HEADER;
 
-       /* struct trace_entry */
-       return trace_seq_printf(s,
-                               "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n"
-                               "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n"
-                               "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n"
-                               "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n"
-                               "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n"
-                               "\n",
-                               FIELD(unsigned short, type),
-                               FIELD(unsigned char, flags),
-                               FIELD(unsigned char, preempt_count),
-                               FIELD(int, pid),
-                               FIELD(int, tgid));
+       p = (void *)FORMAT_HEADER;
+       do {
+               p = f_next(m, p, &l);
+       } while (p && l < *pos);
+
+       return p;
 }
 
-static ssize_t
-event_format_read(struct file *filp, char __user *ubuf, size_t cnt,
-                 loff_t *ppos)
+static int f_show(struct seq_file *m, void *v)
 {
-       struct ftrace_event_call *call = filp->private_data;
-       struct trace_seq *s;
-       char *buf;
-       int r;
+       struct ftrace_event_call *call = m->private;
+       struct ftrace_event_field *field;
+       const char *array_descriptor;
 
-       if (*ppos)
+       switch ((unsigned long)v) {
+       case FORMAT_HEADER:
+               seq_printf(m, "name: %s\n", call->name);
+               seq_printf(m, "ID: %d\n", call->event.type);
+               seq_printf(m, "format:\n");
                return 0;
 
-       s = kmalloc(sizeof(*s), GFP_KERNEL);
-       if (!s)
-               return -ENOMEM;
+       case FORMAT_FIELD_SEPERATOR:
+               seq_putc(m, '\n');
+               return 0;
 
-       trace_seq_init(s);
+       case FORMAT_PRINTFMT:
+               seq_printf(m, "\nprint fmt: %s\n",
+                          call->print_fmt);
+               return 0;
+       }
 
-       /* If any of the first writes fail, so will the show_format. */
+       field = v;
 
-       trace_seq_printf(s, "name: %s\n", call->name);
-       trace_seq_printf(s, "ID: %d\n", call->id);
-       trace_seq_printf(s, "format:\n");
-       trace_write_header(s);
+       /*
+        * Smartly shows the array type(except dynamic array).
+        * Normal:
+        *      field:TYPE VAR
+        * If TYPE := TYPE[LEN], it is shown:
+        *      field:TYPE VAR[LEN]
+        */
+       array_descriptor = strchr(field->type, '[');
 
-       r = call->show_format(call, s);
-       if (!r) {
-               /*
-                * ug!  The format output is bigger than a PAGE!!
-                */
-               buf = "FORMAT TOO BIG\n";
-               r = simple_read_from_buffer(ubuf, cnt, ppos,
-                                             buf, strlen(buf));
-               goto out;
-       }
+       if (!strncmp(field->type, "__data_loc", 10))
+               array_descriptor = NULL;
 
-       r = simple_read_from_buffer(ubuf, cnt, ppos,
-                                   s->buffer, s->len);
- out:
-       kfree(s);
-       return r;
+       if (!array_descriptor)
+               seq_printf(m, "\tfield:%s %s;\toffset:%u;\tsize:%u;\tsigned:%d;\n",
+                          field->type, field->name, field->offset,
+                          field->size, !!field->is_signed);
+       else
+               seq_printf(m, "\tfield:%.*s %s%s;\toffset:%u;\tsize:%u;\tsigned:%d;\n",
+                          (int)(array_descriptor - field->type),
+                          field->type, field->name,
+                          array_descriptor, field->offset,
+                          field->size, !!field->is_signed);
+
+       return 0;
+}
+
+static void f_stop(struct seq_file *m, void *p)
+{
+}
+
+static const struct seq_operations trace_format_seq_ops = {
+       .start          = f_start,
+       .next           = f_next,
+       .stop           = f_stop,
+       .show           = f_show,
+};
+
+static int trace_format_open(struct inode *inode, struct file *file)
+{
+       struct ftrace_event_call *call = inode->i_private;
+       struct seq_file *m;
+       int ret;
+
+       ret = seq_open(file, &trace_format_seq_ops);
+       if (ret < 0)
+               return ret;
+
+       m = file->private_data;
+       m->private = call;
+
+       return 0;
 }
 
 static ssize_t
@@ -635,7 +790,7 @@ event_id_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
                return -ENOMEM;
 
        trace_seq_init(s);
-       trace_seq_printf(s, "%d\n", call->id);
+       trace_seq_printf(s, "%d\n", call->event.type);
 
        r = simple_read_from_buffer(ubuf, cnt, ppos,
                                    s->buffer, s->len);
@@ -699,6 +854,52 @@ event_filter_write(struct file *filp, const char __user *ubuf, size_t cnt,
        return cnt;
 }
 
+static LIST_HEAD(event_subsystems);
+
+static int subsystem_open(struct inode *inode, struct file *filp)
+{
+       struct event_subsystem *system = NULL;
+       int ret;
+
+       if (!inode->i_private)
+               goto skip_search;
+
+       /* Make sure the system still exists */
+       mutex_lock(&event_mutex);
+       list_for_each_entry(system, &event_subsystems, list) {
+               if (system == inode->i_private) {
+                       /* Don't open systems with no events */
+                       if (!system->nr_events) {
+                               system = NULL;
+                               break;
+                       }
+                       __get_system(system);
+                       break;
+               }
+       }
+       mutex_unlock(&event_mutex);
+
+       if (system != inode->i_private)
+               return -ENODEV;
+
+ skip_search:
+       ret = tracing_open_generic(inode, filp);
+       if (ret < 0 && system)
+               put_system(system);
+
+       return ret;
+}
+
+static int subsystem_release(struct inode *inode, struct file *file)
+{
+       struct event_subsystem *system = inode->i_private;
+
+       if (system)
+               put_system(system);
+
+       return 0;
+}
+
 static ssize_t
 subsystem_filter_read(struct file *filp, char __user *ubuf, size_t cnt,
                      loff_t *ppos)
@@ -812,39 +1013,49 @@ static const struct file_operations ftrace_enable_fops = {
        .open = tracing_open_generic,
        .read = event_enable_read,
        .write = event_enable_write,
+       .llseek = default_llseek,
 };
 
 static const struct file_operations ftrace_event_format_fops = {
-       .open = tracing_open_generic,
-       .read = event_format_read,
+       .open = trace_format_open,
+       .read = seq_read,
+       .llseek = seq_lseek,
+       .release = seq_release,
 };
 
 static const struct file_operations ftrace_event_id_fops = {
        .open = tracing_open_generic,
        .read = event_id_read,
+       .llseek = default_llseek,
 };
 
 static const struct file_operations ftrace_event_filter_fops = {
        .open = tracing_open_generic,
        .read = event_filter_read,
        .write = event_filter_write,
+       .llseek = default_llseek,
 };
 
 static const struct file_operations ftrace_subsystem_filter_fops = {
-       .open = tracing_open_generic,
+       .open = subsystem_open,
        .read = subsystem_filter_read,
        .write = subsystem_filter_write,
+       .llseek = default_llseek,
+       .release = subsystem_release,
 };
 
 static const struct file_operations ftrace_system_enable_fops = {
-       .open = tracing_open_generic,
+       .open = subsystem_open,
        .read = system_enable_read,
        .write = system_enable_write,
+       .llseek = default_llseek,
+       .release = subsystem_release,
 };
 
 static const struct file_operations ftrace_show_header_fops = {
        .open = tracing_open_generic,
        .read = show_header,
+       .llseek = default_llseek,
 };
 
 static struct dentry *event_trace_events_dir(void)
@@ -867,8 +1078,6 @@ static struct dentry *event_trace_events_dir(void)
        return d_events;
 }
 
-static LIST_HEAD(event_subsystems);
-
 static struct dentry *
 event_subsystem_dir(const char *name, struct dentry *d_events)
 {
@@ -900,6 +1109,7 @@ event_subsystem_dir(const char *name, struct dentry *d_events)
        }
 
        system->nr_events = 1;
+       system->ref_count = 1;
        system->name = kstrdup(name, GFP_KERNEL);
        if (!system->name) {
                debugfs_remove(system->entry);
@@ -927,9 +1137,8 @@ event_subsystem_dir(const char *name, struct dentry *d_events)
                           "'%s/filter' entry\n", name);
        }
 
-       entry = trace_create_file("enable", 0644, system->entry,
-                                 (void *)system->name,
-                                 &ftrace_system_enable_fops);
+       trace_create_file("enable", 0644, system->entry, system,
+                         &ftrace_system_enable_fops);
 
        return system->entry;
 }
@@ -941,15 +1150,15 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events,
                 const struct file_operations *filter,
                 const struct file_operations *format)
 {
-       struct dentry *entry;
+       struct list_head *head;
        int ret;
 
        /*
         * If the trace point header did not define TRACE_SYSTEM
         * then the system would be called "TRACE_SYSTEM".
         */
-       if (strcmp(call->system, TRACE_SYSTEM) != 0)
-               d_events = event_subsystem_dir(call->system, d_events);
+       if (strcmp(call->class->system, TRACE_SYSTEM) != 0)
+               d_events = event_subsystem_dir(call->class->system, d_events);
 
        call->dir = debugfs_create_dir(call->name, d_events);
        if (!call->dir) {
@@ -958,49 +1167,58 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events,
                return -1;
        }
 
-       if (call->regfunc)
-               entry = trace_create_file("enable", 0644, call->dir, call,
-                                         enable);
+       if (call->class->reg && !(call->flags & TRACE_EVENT_FL_IGNORE_ENABLE))
+               trace_create_file("enable", 0644, call->dir, call,
+                                 enable);
 
-       if (call->id && call->profile_enable)
-               entry = trace_create_file("id", 0444, call->dir, call,
-                                         id);
+#ifdef CONFIG_PERF_EVENTS
+       if (call->event.type && call->class->reg)
+               trace_create_file("id", 0444, call->dir, call,
+                                 id);
+#endif
 
-       if (call->define_fields) {
-               ret = call->define_fields(call);
+       /*
+        * Other events may have the same class. Only update
+        * the fields if they are not already defined.
+        */
+       head = trace_get_fields(call);
+       if (list_empty(head)) {
+               ret = call->class->define_fields(call);
                if (ret < 0) {
                        pr_warning("Could not initialize trace point"
                                   " events/%s\n", call->name);
                        return ret;
                }
-               entry = trace_create_file("filter", 0644, call->dir, call,
-                                         filter);
        }
+       trace_create_file("filter", 0644, call->dir, call,
+                         filter);
 
-       /* A trace may not want to export its format */
-       if (!call->show_format)
-               return 0;
-
-       entry = trace_create_file("format", 0444, call->dir, call,
-                                 format);
+       trace_create_file("format", 0444, call->dir, call,
+                         format);
 
        return 0;
 }
 
-static int __trace_add_event_call(struct ftrace_event_call *call)
+static int
+__trace_add_event_call(struct ftrace_event_call *call, struct module *mod,
+                      const struct file_operations *id,
+                      const struct file_operations *enable,
+                      const struct file_operations *filter,
+                      const struct file_operations *format)
 {
        struct dentry *d_events;
        int ret;
 
+       /* The linker may leave blanks */
        if (!call->name)
                return -EINVAL;
 
-       if (call->raw_init) {
-               ret = call->raw_init(call);
+       if (call->class->raw_init) {
+               ret = call->class->raw_init(call);
                if (ret < 0) {
                        if (ret != -ENOSYS)
-                               pr_warning("Could not initialize trace "
-                               "events/%s\n", call->name);
+                               pr_warning("Could not initialize trace events/%s\n",
+                                          call->name);
                        return ret;
                }
        }
@@ -1009,12 +1227,11 @@ static int __trace_add_event_call(struct ftrace_event_call *call)
        if (!d_events)
                return -ENOENT;
 
-       list_add(&call->list, &ftrace_events);
-       ret = event_create_dir(call, d_events, &ftrace_event_id_fops,
-                               &ftrace_enable_fops, &ftrace_event_filter_fops,
-                               &ftrace_event_format_fops);
-       if (ret < 0)
-               list_del(&call->list);
+       ret = event_create_dir(call, d_events, id, enable, filter, format);
+       if (!ret)
+               list_add(&call->list, &ftrace_events);
+       call->mod = mod;
+
        return ret;
 }
 
@@ -1023,7 +1240,10 @@ int trace_add_event_call(struct ftrace_event_call *call)
 {
        int ret;
        mutex_lock(&event_mutex);
-       ret = __trace_add_event_call(call);
+       ret = __trace_add_event_call(call, NULL, &ftrace_event_id_fops,
+                                    &ftrace_enable_fops,
+                                    &ftrace_event_filter_fops,
+                                    &ftrace_event_format_fops);
        mutex_unlock(&event_mutex);
        return ret;
 }
@@ -1038,16 +1258,9 @@ static void remove_subsystem_dir(const char *name)
        list_for_each_entry(system, &event_subsystems, list) {
                if (strcmp(system->name, name) == 0) {
                        if (!--system->nr_events) {
-                               struct event_filter *filter = system->filter;
-
                                debugfs_remove_recursive(system->entry);
                                list_del(&system->list);
-                               if (filter) {
-                                       kfree(filter->filter_string);
-                                       kfree(filter);
-                               }
-                               kfree(system->name);
-                               kfree(system);
+                               __put_system(system);
                        }
                        break;
                }
@@ -1060,13 +1273,13 @@ static void remove_subsystem_dir(const char *name)
 static void __trace_remove_event_call(struct ftrace_event_call *call)
 {
        ftrace_event_enable_disable(call, 0);
-       if (call->event)
-               __unregister_ftrace_event(call->event);
+       if (call->event.funcs)
+               __unregister_ftrace_event(&call->event);
        debugfs_remove_recursive(call->dir);
        list_del(&call->list);
        trace_destroy_fields(call);
        destroy_preds(call);
-       remove_subsystem_dir(call->system);
+       remove_subsystem_dir(call->class->system);
 }
 
 /* Remove an event_call */
@@ -1139,9 +1352,7 @@ trace_create_file_ops(struct module *mod)
 static void trace_module_add_events(struct module *mod)
 {
        struct ftrace_module_file_ops *file_ops = NULL;
-       struct ftrace_event_call *call, *start, *end;
-       struct dentry *d_events;
-       int ret;
+       struct ftrace_event_call **call, **start, **end;
 
        start = mod->trace_events;
        end = mod->trace_events + mod->num_trace_events;
@@ -1149,37 +1360,14 @@ static void trace_module_add_events(struct module *mod)
        if (start == end)
                return;
 
-       d_events = event_trace_events_dir();
-       if (!d_events)
+       file_ops = trace_create_file_ops(mod);
+       if (!file_ops)
                return;
 
        for_each_event(call, start, end) {
-               /* The linker may leave blanks */
-               if (!call->name)
-                       continue;
-               if (call->raw_init) {
-                       ret = call->raw_init(call);
-                       if (ret < 0) {
-                               if (ret != -ENOSYS)
-                                       pr_warning("Could not initialize trace "
-                                       "point events/%s\n", call->name);
-                               continue;
-                       }
-               }
-               /*
-                * This module has events, create file ops for this module
-                * if not already done.
-                */
-               if (!file_ops) {
-                       file_ops = trace_create_file_ops(mod);
-                       if (!file_ops)
-                               return;
-               }
-               call->mod = mod;
-               list_add(&call->list, &ftrace_events);
-               event_create_dir(call, d_events,
-                                &file_ops->id, &file_ops->enable,
-                                &file_ops->filter, &file_ops->format);
+               __trace_add_event_call(*call, mod,
+                                      &file_ops->id, &file_ops->enable,
+                                      &file_ops->filter, &file_ops->format);
        }
 }
 
@@ -1242,13 +1430,13 @@ static int trace_module_notify(struct notifier_block *self,
 }
 #endif /* CONFIG_MODULES */
 
-struct notifier_block trace_module_nb = {
+static struct notifier_block trace_module_nb = {
        .notifier_call = trace_module_notify,
        .priority = 0,
 };
 
-extern struct ftrace_event_call __start_ftrace_events[];
-extern struct ftrace_event_call __stop_ftrace_events[];
+extern struct ftrace_event_call *__start_ftrace_events[];
+extern struct ftrace_event_call *__stop_ftrace_events[];
 
 static char bootup_event_buf[COMMAND_LINE_SIZE] __initdata;
 
@@ -1264,7 +1452,7 @@ __setup("trace_event=", setup_trace_event);
 
 static __init int event_trace_init(void)
 {
-       struct ftrace_event_call *call;
+       struct ftrace_event_call **call;
        struct dentry *d_tracer;
        struct dentry *entry;
        struct dentry *d_events;
@@ -1306,23 +1494,14 @@ static __init int event_trace_init(void)
        trace_create_file("enable", 0644, d_events,
                          NULL, &ftrace_system_enable_fops);
 
+       if (trace_define_common_fields())
+               pr_warning("tracing: Failed to allocate common fields");
+
        for_each_event(call, __start_ftrace_events, __stop_ftrace_events) {
-               /* The linker may leave blanks */
-               if (!call->name)
-                       continue;
-               if (call->raw_init) {
-                       ret = call->raw_init(call);
-                       if (ret < 0) {
-                               if (ret != -ENOSYS)
-                                       pr_warning("Could not initialize trace "
-                                       "point events/%s\n", call->name);
-                               continue;
-                       }
-               }
-               list_add(&call->list, &ftrace_events);
-               event_create_dir(call, d_events, &ftrace_event_id_fops,
-                                &ftrace_enable_fops, &ftrace_event_filter_fops,
-                                &ftrace_event_format_fops);
+               __trace_add_event_call(*call, NULL, &ftrace_event_id_fops,
+                                      &ftrace_enable_fops,
+                                      &ftrace_event_filter_fops,
+                                      &ftrace_event_format_fops);
        }
 
        while (true) {
@@ -1410,17 +1589,29 @@ static __init void event_trace_self_tests(void)
 
        list_for_each_entry(call, &ftrace_events, list) {
 
-               /* Only test those that have a regfunc */
-               if (!call->regfunc)
+               /* Only test those that have a probe */
+               if (!call->class || !call->class->probe)
                        continue;
 
+/*
+ * Testing syscall events here is pretty useless, but
+ * we still do it if configured. But this is time consuming.
+ * What we really need is a user thread to perform the
+ * syscalls as we test.
+ */
+#ifndef CONFIG_EVENT_TRACE_TEST_SYSCALLS
+               if (call->class->system &&
+                   strcmp(call->class->system, "syscalls") == 0)
+                       continue;
+#endif
+
                pr_info("Testing event %s: ", call->name);
 
                /*
                 * If an event is already enabled, someone is using
                 * it and the self test should not be on.
                 */
-               if (call->enabled) {
+               if (call->flags & TRACE_EVENT_FL_ENABLED) {
                        pr_warning("Enabled event during self test!\n");
                        WARN_ON_ONCE(1);
                        continue;
@@ -1487,7 +1678,7 @@ static __init void event_trace_self_tests(void)
 
 #ifdef CONFIG_FUNCTION_TRACER
 
-static DEFINE_PER_CPU(atomic_t, test_event_disable);
+static DEFINE_PER_CPU(atomic_t, ftrace_test_event_disable);
 
 static void
 function_test_events_call(unsigned long ip, unsigned long parent_ip)
@@ -1497,14 +1688,13 @@ function_test_events_call(unsigned long ip, unsigned long parent_ip)
        struct ftrace_entry *entry;
        unsigned long flags;
        long disabled;
-       int resched;
        int cpu;
        int pc;
 
        pc = preempt_count();
-       resched = ftrace_preempt_disable();
+       preempt_disable_notrace();
        cpu = raw_smp_processor_id();
-       disabled = atomic_inc_return(&per_cpu(test_event_disable, cpu));
+       disabled = atomic_inc_return(&per_cpu(ftrace_test_event_disable, cpu));
 
        if (disabled != 1)
                goto out;
@@ -1523,8 +1713,8 @@ function_test_events_call(unsigned long ip, unsigned long parent_ip)
        trace_nowake_buffer_unlock_commit(buffer, event, flags, pc);
 
  out:
-       atomic_dec(&per_cpu(test_event_disable, cpu));
-       ftrace_preempt_enable(resched);
+       atomic_dec(&per_cpu(ftrace_test_event_disable, cpu));
+       preempt_enable_notrace();
 }
 
 static struct ftrace_ops trace_ops __initdata  =
@@ -1534,7 +1724,12 @@ static struct ftrace_ops trace_ops __initdata  =
 
 static __init void event_trace_self_test_with_function(void)
 {
-       register_ftrace_function(&trace_ops);
+       int ret;
+       ret = register_ftrace_function(&trace_ops);
+       if (WARN_ON(ret < 0)) {
+               pr_info("Failed to enable function tracer for event tests\n");
+               return;
+       }
        pr_info("Running tests again, along with the function tracer\n");
        event_trace_self_tests();
        unregister_ftrace_function(&trace_ops);