kernel/trace/trace_events.c

   1 /*
   2  * event tracer
   3  *
   4  * Copyright (C) 2008 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
   5  *
   6  *  - Added format output of fields of the trace point.
   7  *    This was based off of work by Tom Zanussi <tzanussi@gmail.com>.
   8  *
   9  */
  10
  11 #include <linux/workqueue.h>
  12 #include <linux/spinlock.h>
  13 #include <linux/kthread.h>
  14 #include <linux/debugfs.h>
  15 #include <linux/uaccess.h>
  16 #include <linux/module.h>
  17 #include <linux/ctype.h>
  18 #include <linux/delay.h>
  19
  20 #include <asm/setup.h>
  21
  22 #include "trace_output.h"
  23
  24 #undef TRACE_SYSTEM
  25 #define TRACE_SYSTEM "TRACE_SYSTEM"
  26
  27 DEFINE_MUTEX(event_mutex);
  28
  29 LIST_HEAD(ftrace_events);
  30
  31 int trace_define_field(struct ftrace_event_call *call, const char *type,
  32                        const char *name, int offset, int size, int is_signed,
  33                        int filter_type)
  34 {
  35         struct ftrace_event_field *field;
  36
  37         field = kzalloc(sizeof(*field), GFP_KERNEL);
  38         if (!field)
  39                 goto err;
  40
  41         field->name = kstrdup(name, GFP_KERNEL);
  42         if (!field->name)
  43                 goto err;
  44
  45         field->type = kstrdup(type, GFP_KERNEL);
  46         if (!field->type)
  47                 goto err;
  48
  49         if (filter_type == FILTER_OTHER)
  50                 field->filter_type = filter_assign_type(type);
  51         else
  52                 field->filter_type = filter_type;
  53
  54         field->offset = offset;
  55         field->size = size;
  56         field->is_signed = is_signed;
  57
  58         list_add(&field->link, &call->fields);
  59
  60         return 0;
  61
  62 err:
  63         if (field) {
  64                 kfree(field->name);
  65                 kfree(field->type);
  66         }
  67         kfree(field);
  68
  69         return -ENOMEM;
  70 }
  71 EXPORT_SYMBOL_GPL(trace_define_field);
  72
  73 #define __common_field(type, item)                                      \
  74         ret = trace_define_field(call, #type, "common_" #item,          \
  75                                  offsetof(typeof(ent), item),           \
  76                                  sizeof(ent.item),                      \
  77                                  is_signed_type(type), FILTER_OTHER);   \
  78         if (ret)                                                        \
  79                 return ret;
  80
  81 int trace_define_common_fields(struct ftrace_event_call *call)
  82 {
  83         int ret;
  84         struct trace_entry ent;
  85
  86         __common_field(unsigned short, type);
  87         __common_field(unsigned char, flags);
  88         __common_field(unsigned char, preempt_count);
  89         __common_field(int, pid);
  90         __common_field(int, lock_depth);
  91
  92         return ret;
  93 }
  94 EXPORT_SYMBOL_GPL(trace_define_common_fields);
  95
  96 #ifdef CONFIG_MODULES
  97
  98 static void trace_destroy_fields(struct ftrace_event_call *call)
  99 {
 100         struct ftrace_event_field *field, *next;
 101
 102         list_for_each_entry_safe(field, next, &call->fields, link) {
 103                 list_del(&field->link);
 104                 kfree(field->type);
 105                 kfree(field->name);
 106                 kfree(field);
 107         }
 108 }
 109
 110 #endif /* CONFIG_MODULES */
 111
 112 static void ftrace_event_enable_disable(struct ftrace_event_call *call,
 113                                         int enable)
 114 {
 115         switch (enable) {
 116         case 0:
 117                 if (call->enabled) {
 118                         call->enabled = 0;
 119                         tracing_stop_cmdline_record();
 120                         call->unregfunc(call->data);
 121                 }
 122                 break;
 123         case 1:
 124                 if (!call->enabled) {
 125                         call->enabled = 1;
 126                         tracing_start_cmdline_record();
 127                         call->regfunc(call->data);
 128                 }
 129                 break;
 130         }
 131 }
 132
 133 static void ftrace_clear_events(void)
 134 {
 135         struct ftrace_event_call *call;
 136
 137         mutex_lock(&event_mutex);
 138         list_for_each_entry(call, &ftrace_events, list) {
 139                 ftrace_event_enable_disable(call, 0);
 140         }
 141         mutex_unlock(&event_mutex);
 142 }
 143
 144 /*
 145  * __ftrace_set_clr_event(NULL, NULL, NULL, set) will set/unset all events.
 146  */
 147 static int __ftrace_set_clr_event(const char *match, const char *sub,
 148                                   const char *event, int set)
 149 {
 150         struct ftrace_event_call *call;
 151         int ret = -EINVAL;
 152
 153         mutex_lock(&event_mutex);
 154         list_for_each_entry(call, &ftrace_events, list) {
 155
 156                 if (!call->name || !call->regfunc)
 157                         continue;
 158
 159                 if (match &&
 160                     strcmp(match, call->name) != 0 &&
 161                     strcmp(match, call->system) != 0)
 162                         continue;
 163
 164                 if (sub && strcmp(sub, call->system) != 0)
 165                         continue;
 166
 167                 if (event && strcmp(event, call->name) != 0)
 168                         continue;
 169
 170                 ftrace_event_enable_disable(call, set);
 171
 172                 ret = 0;
 173         }
 174         mutex_unlock(&event_mutex);
 175
 176         return ret;
 177 }
 178
 179 static int ftrace_set_clr_event(char *buf, int set)
 180 {
 181         char *event = NULL, *sub = NULL, *match;
 182
 183         /*
 184          * The buf format can be <subsystem>:<event-name>
 185          *  *:<event-name> means any event by that name.
 186          *  :<event-name> is the same.
 187          *
 188          *  <subsystem>:* means all events in that subsystem
 189          *  <subsystem>: means the same.
 190          *
 191          *  <name> (no ':') means all events in a subsystem with
 192          *  the name <name> or any event that matches <name>
 193          */
 194
 195         match = strsep(&buf, ":");
 196         if (buf) {
 197                 sub = match;
 198                 event = buf;
 199                 match = NULL;
 200
 201                 if (!strlen(sub) || strcmp(sub, "*") == 0)
 202                         sub = NULL;
 203                 if (!strlen(event) || strcmp(event, "*") == 0)
 204                         event = NULL;
 205         }
 206
 207         return __ftrace_set_clr_event(match, sub, event, set);
 208 }
 209
 210 /**
 211  * trace_set_clr_event - enable or disable an event
 212  * @system: system name to match (NULL for any system)
 213  * @event: event name to match (NULL for all events, within system)
 214  * @set: 1 to enable, 0 to disable
 215  *
 216  * This is a way for other parts of the kernel to enable or disable
 217  * event recording.
 218  *
 219  * Returns 0 on success, -EINVAL if the parameters do not match any
 220  * registered events.
 221  */
 222 int trace_set_clr_event(const char *system, const char *event, int set)
 223 {
 224         return __ftrace_set_clr_event(NULL, system, event, set);
 225 }
 226
 227 /* 128 should be much more than enough */
 228 #define EVENT_BUF_SIZE          127
 229
 230 static ssize_t
 231 ftrace_event_write(struct file *file, const char __user *ubuf,
 232                    size_t cnt, loff_t *ppos)
 233 {
 234         struct trace_parser parser;
 235         ssize_t read, ret;
 236
 237         if (!cnt)
 238                 return 0;
 239
 240         ret = tracing_update_buffers();
 241         if (ret < 0)
 242                 return ret;
 243
 244         if (trace_parser_get_init(&parser, EVENT_BUF_SIZE + 1))
 245                 return -ENOMEM;
 246
 247         read = trace_get_user(&parser, ubuf, cnt, ppos);
 248
 249         if (read >= 0 && trace_parser_loaded((&parser))) {
 250                 int set = 1;
 251
 252                 if (*parser.buffer == '!')
 253                         set = 0;
 254
 255                 parser.buffer[parser.idx] = 0;
 256
 257                 ret = ftrace_set_clr_event(parser.buffer + !set, set);
 258                 if (ret)
 259                         goto out_put;
 260         }
 261
 262         ret = read;
 263
 264  out_put:
 265         trace_parser_put(&parser);
 266
 267         return ret;
 268 }
 269
 270 static void *
 271 t_next(struct seq_file *m, void *v, loff_t *pos)
 272 {
 273         struct ftrace_event_call *call = v;
 274
 275         (*pos)++;
 276
 277         list_for_each_entry_continue(call, &ftrace_events, list) {
 278                 /*
 279                  * The ftrace subsystem is for showing formats only.
 280                  * They can not be enabled or disabled via the event files.
 281                  */
 282                 if (call->regfunc)
 283                         return call;
 284         }
 285
 286         return NULL;
 287 }
 288
 289 static void *t_start(struct seq_file *m, loff_t *pos)
 290 {
 291         struct ftrace_event_call *call;
 292         loff_t l;
 293
 294         mutex_lock(&event_mutex);
 295
 296         call = list_entry(&ftrace_events, struct ftrace_event_call, list);
 297         for (l = 0; l <= *pos; ) {
 298                 call = t_next(m, call, &l);
 299                 if (!call)
 300                         break;
 301         }
 302         return call;
 303 }
 304
 305 static void *
 306 s_next(struct seq_file *m, void *v, loff_t *pos)
 307 {
 308         struct ftrace_event_call *call = v;
 309
 310         (*pos)++;
 311
 312         list_for_each_entry_continue(call, &ftrace_events, list) {
 313                 if (call->enabled)
 314                         return call;
 315         }
 316
 317         return NULL;
 318 }
 319
 320 static void *s_start(struct seq_file *m, loff_t *pos)
 321 {
 322         struct ftrace_event_call *call;
 323         loff_t l;
 324
 325         mutex_lock(&event_mutex);
 326
 327         call = list_entry(&ftrace_events, struct ftrace_event_call, list);
 328         for (l = 0; l <= *pos; ) {
 329                 call = s_next(m, call, &l);
 330                 if (!call)
 331                         break;
 332         }
 333         return call;
 334 }
 335
 336 static int t_show(struct seq_file *m, void *v)
 337 {
 338         struct ftrace_event_call *call = v;
 339
 340         if (strcmp(call->system, TRACE_SYSTEM) != 0)
 341                 seq_printf(m, "%s:", call->system);
 342         seq_printf(m, "%s\n", call->name);
 343
 344         return 0;
 345 }
 346
 347 static void t_stop(struct seq_file *m, void *p)
 348 {
 349         mutex_unlock(&event_mutex);
 350 }
 351
 352 static int
 353 ftrace_event_seq_open(struct inode *inode, struct file *file)
 354 {
 355         const struct seq_operations *seq_ops;
 356
 357         if ((file->f_mode & FMODE_WRITE) &&
 358             (file->f_flags & O_TRUNC))
 359                 ftrace_clear_events();
 360
 361         seq_ops = inode->i_private;
 362         return seq_open(file, seq_ops);
 363 }
 364
 365 static ssize_t
 366 event_enable_read(struct file *filp, char __user *ubuf, size_t cnt,
 367                   loff_t *ppos)
 368 {
 369         struct ftrace_event_call *call = filp->private_data;
 370         char *buf;
 371
 372         if (call->enabled)
 373                 buf = "1\n";
 374         else
 375                 buf = "0\n";
 376
 377         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
 378 }
 379
 380 static ssize_t
 381 event_enable_write(struct file *filp, const char __user *ubuf, size_t cnt,
 382                    loff_t *ppos)
 383 {
 384         struct ftrace_event_call *call = filp->private_data;
 385         char buf[64];
 386         unsigned long val;
 387         int ret;
 388
 389         if (cnt >= sizeof(buf))
 390                 return -EINVAL;
 391
 392         if (copy_from_user(&buf, ubuf, cnt))
 393                 return -EFAULT;
 394
 395         buf[cnt] = 0;
 396
 397         ret = strict_strtoul(buf, 10, &val);
 398         if (ret < 0)
 399                 return ret;
 400
 401         ret = tracing_update_buffers();
 402         if (ret < 0)
 403                 return ret;
 404
 405         switch (val) {
 406         case 0:
 407         case 1:
 408                 mutex_lock(&event_mutex);
 409                 ftrace_event_enable_disable(call, val);
 410                 mutex_unlock(&event_mutex);
 411                 break;
 412
 413         default:
 414                 return -EINVAL;
 415         }
 416
 417         *ppos += cnt;
 418
 419         return cnt;
 420 }
 421
 422 static ssize_t
 423 system_enable_read(struct file *filp, char __user *ubuf, size_t cnt,
 424                    loff_t *ppos)
 425 {
 426         const char set_to_char[4] = { '?', '0', '1', 'X' };
 427         const char *system = filp->private_data;
 428         struct ftrace_event_call *call;
 429         char buf[2];
 430         int set = 0;
 431         int ret;
 432
 433         mutex_lock(&event_mutex);
 434         list_for_each_entry(call, &ftrace_events, list) {
 435                 if (!call->name || !call->regfunc)
 436                         continue;
 437
 438                 if (system && strcmp(call->system, system) != 0)
 439                         continue;
 440
 441                 /*
 442                  * We need to find out if all the events are set
 443                  * or if all events or cleared, or if we have
 444                  * a mixture.
 445                  */
 446                 set |= (1 << !!call->enabled);
 447
 448                 /*
 449                  * If we have a mixture, no need to look further.
 450                  */
 451                 if (set == 3)
 452                         break;
 453         }
 454         mutex_unlock(&event_mutex);
 455
 456         buf[0] = set_to_char[set];
 457         buf[1] = '\n';
 458
 459         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
 460
 461         return ret;
 462 }
 463
 464 static ssize_t
 465 system_enable_write(struct file *filp, const char __user *ubuf, size_t cnt,
 466                     loff_t *ppos)
 467 {
 468         const char *system = filp->private_data;
 469         unsigned long val;
 470         char buf[64];
 471         ssize_t ret;
 472
 473         if (cnt >= sizeof(buf))
 474                 return -EINVAL;
 475
 476         if (copy_from_user(&buf, ubuf, cnt))
 477                 return -EFAULT;
 478
 479         buf[cnt] = 0;
 480
 481         ret = strict_strtoul(buf, 10, &val);
 482         if (ret < 0)
 483                 return ret;
 484
 485         ret = tracing_update_buffers();
 486         if (ret < 0)
 487                 return ret;
 488
 489         if (val != 0 && val != 1)
 490                 return -EINVAL;
 491
 492         ret = __ftrace_set_clr_event(NULL, system, NULL, val);
 493         if (ret)
 494                 goto out;
 495
 496         ret = cnt;
 497
 498 out:
 499         *ppos += cnt;
 500
 501         return ret;
 502 }
 503
 504 extern char *__bad_type_size(void);
 505
 506 #undef FIELD
 507 #define FIELD(type, name)                                               \
 508         sizeof(type) != sizeof(field.name) ? __bad_type_size() :        \
 509         #type, "common_" #name, offsetof(typeof(field), name),          \
 510                 sizeof(field.name)
 511
 512 static int trace_write_header(struct trace_seq *s)
 513 {
 514         struct trace_entry field;
 515
 516         /* struct trace_entry */
 517         return trace_seq_printf(s,
 518                                 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n"
 519                                 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n"
 520                                 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n"
 521                                 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n"
 522                                 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n"
 523                                 "\n",
 524                                 FIELD(unsigned short, type),
 525                                 FIELD(unsigned char, flags),
 526                                 FIELD(unsigned char, preempt_count),
 527                                 FIELD(int, pid),
 528                                 FIELD(int, lock_depth));
 529 }
 530
 531 static ssize_t
 532 event_format_read(struct file *filp, char __user *ubuf, size_t cnt,
 533                   loff_t *ppos)
 534 {
 535         struct ftrace_event_call *call = filp->private_data;
 536         struct trace_seq *s;
 537         char *buf;
 538         int r;
 539
 540         if (*ppos)
 541                 return 0;
 542
 543         s = kmalloc(sizeof(*s), GFP_KERNEL);
 544         if (!s)
 545                 return -ENOMEM;
 546
 547         trace_seq_init(s);
 548
 549         /* If any of the first writes fail, so will the show_format. */
 550
 551         trace_seq_printf(s, "name: %s\n", call->name);
 552         trace_seq_printf(s, "ID: %d\n", call->id);
 553         trace_seq_printf(s, "format:\n");
 554         trace_write_header(s);
 555
 556         r = call->show_format(call, s);
 557         if (!r) {
 558                 /*
 559                  * ug!  The format output is bigger than a PAGE!!
 560                  */
 561                 buf = "FORMAT TOO BIG\n";
 562                 r = simple_read_from_buffer(ubuf, cnt, ppos,
 563                                               buf, strlen(buf));
 564                 goto out;
 565         }
 566
 567         r = simple_read_from_buffer(ubuf, cnt, ppos,
 568                                     s->buffer, s->len);
 569  out:
 570         kfree(s);
 571         return r;
 572 }
 573
 574 static ssize_t
 575 event_id_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
 576 {
 577         struct ftrace_event_call *call = filp->private_data;
 578         struct trace_seq *s;
 579         int r;
 580
 581         if (*ppos)
 582                 return 0;
 583
 584         s = kmalloc(sizeof(*s), GFP_KERNEL);
 585         if (!s)
 586                 return -ENOMEM;
 587
 588         trace_seq_init(s);
 589         trace_seq_printf(s, "%d\n", call->id);
 590
 591         r = simple_read_from_buffer(ubuf, cnt, ppos,
 592                                     s->buffer, s->len);
 593         kfree(s);
 594         return r;
 595 }
 596
 597 static ssize_t
 598 event_filter_read(struct file *filp, char __user *ubuf, size_t cnt,
 599                   loff_t *ppos)
 600 {
 601         struct ftrace_event_call *call = filp->private_data;
 602         struct trace_seq *s;
 603         int r;
 604
 605         if (*ppos)
 606                 return 0;
 607
 608         s = kmalloc(sizeof(*s), GFP_KERNEL);
 609         if (!s)
 610                 return -ENOMEM;
 611
 612         trace_seq_init(s);
 613
 614         print_event_filter(call, s);
 615         r = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, s->len);
 616
 617         kfree(s);
 618
 619         return r;
 620 }
 621
 622 static ssize_t
 623 event_filter_write(struct file *filp, const char __user *ubuf, size_t cnt,
 624                    loff_t *ppos)
 625 {
 626         struct ftrace_event_call *call = filp->private_data;
 627         char *buf;
 628         int err;
 629
 630         if (cnt >= PAGE_SIZE)
 631                 return -EINVAL;
 632
 633         buf = (char *)__get_free_page(GFP_TEMPORARY);
 634         if (!buf)
 635                 return -ENOMEM;
 636
 637         if (copy_from_user(buf, ubuf, cnt)) {
 638                 free_page((unsigned long) buf);
 639                 return -EFAULT;
 640         }
 641         buf[cnt] = '\0';
 642
 643         err = apply_event_filter(call, buf);
 644         free_page((unsigned long) buf);
 645         if (err < 0)
 646                 return err;
 647
 648         *ppos += cnt;
 649
 650         return cnt;
 651 }
 652
 653 static ssize_t
 654 subsystem_filter_read(struct file *filp, char __user *ubuf, size_t cnt,
 655                       loff_t *ppos)
 656 {
 657         struct event_subsystem *system = filp->private_data;
 658         struct trace_seq *s;
 659         int r;
 660
 661         if (*ppos)
 662                 return 0;
 663
 664         s = kmalloc(sizeof(*s), GFP_KERNEL);
 665         if (!s)
 666                 return -ENOMEM;
 667
 668         trace_seq_init(s);
 669
 670         print_subsystem_event_filter(system, s);
 671         r = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, s->len);
 672
 673         kfree(s);
 674
 675         return r;
 676 }
 677
 678 static ssize_t
 679 subsystem_filter_write(struct file *filp, const char __user *ubuf, size_t cnt,
 680                        loff_t *ppos)
 681 {
 682         struct event_subsystem *system = filp->private_data;
 683         char *buf;
 684         int err;
 685
 686         if (cnt >= PAGE_SIZE)
 687                 return -EINVAL;
 688
 689         buf = (char *)__get_free_page(GFP_TEMPORARY);
 690         if (!buf)
 691                 return -ENOMEM;
 692
 693         if (copy_from_user(buf, ubuf, cnt)) {
 694                 free_page((unsigned long) buf);
 695                 return -EFAULT;
 696         }
 697         buf[cnt] = '\0';
 698
 699         err = apply_subsystem_event_filter(system, buf);
 700         free_page((unsigned long) buf);
 701         if (err < 0)
 702                 return err;
 703
 704         *ppos += cnt;
 705
 706         return cnt;
 707 }
 708
 709 static ssize_t
 710 show_header(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
 711 {
 712         int (*func)(struct trace_seq *s) = filp->private_data;
 713         struct trace_seq *s;
 714         int r;
 715
 716         if (*ppos)
 717                 return 0;
 718
 719         s = kmalloc(sizeof(*s), GFP_KERNEL);
 720         if (!s)
 721                 return -ENOMEM;
 722
 723         trace_seq_init(s);
 724
 725         func(s);
 726         r = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, s->len);
 727
 728         kfree(s);
 729
 730         return r;
 731 }
 732
 733 static const struct seq_operations show_event_seq_ops = {
 734         .start = t_start,
 735         .next = t_next,
 736         .show = t_show,
 737         .stop = t_stop,
 738 };
 739
 740 static const struct seq_operations show_set_event_seq_ops = {
 741         .start = s_start,
 742         .next = s_next,
 743         .show = t_show,
 744         .stop = t_stop,
 745 };
 746
 747 static const struct file_operations ftrace_avail_fops = {
 748         .open = ftrace_event_seq_open,
 749         .read = seq_read,
 750         .llseek = seq_lseek,
 751         .release = seq_release,
 752 };
 753
 754 static const struct file_operations ftrace_set_event_fops = {
 755         .open = ftrace_event_seq_open,
 756         .read = seq_read,
 757         .write = ftrace_event_write,
 758         .llseek = seq_lseek,
 759         .release = seq_release,
 760 };
 761
 762 static const struct file_operations ftrace_enable_fops = {
 763         .open = tracing_open_generic,
 764         .read = event_enable_read,
 765         .write = event_enable_write,
 766 };
 767
 768 static const struct file_operations ftrace_event_format_fops = {
 769         .open = tracing_open_generic,
 770         .read = event_format_read,
 771 };
 772
 773 static const struct file_operations ftrace_event_id_fops = {
 774         .open = tracing_open_generic,
 775         .read = event_id_read,
 776 };
 777
 778 static const struct file_operations ftrace_event_filter_fops = {
 779         .open = tracing_open_generic,
 780         .read = event_filter_read,
 781         .write = event_filter_write,
 782 };
 783
 784 static const struct file_operations ftrace_subsystem_filter_fops = {
 785         .open = tracing_open_generic,
 786         .read = subsystem_filter_read,
 787         .write = subsystem_filter_write,
 788 };
 789
 790 static const struct file_operations ftrace_system_enable_fops = {
 791         .open = tracing_open_generic,
 792         .read = system_enable_read,
 793         .write = system_enable_write,
 794 };
 795
 796 static const struct file_operations ftrace_show_header_fops = {
 797         .open = tracing_open_generic,
 798         .read = show_header,
 799 };
 800
 801 static struct dentry *event_trace_events_dir(void)
 802 {
 803         static struct dentry *d_tracer;
 804         static struct dentry *d_events;
 805
 806         if (d_events)
 807                 return d_events;
 808
 809         d_tracer = tracing_init_dentry();
 810         if (!d_tracer)
 811                 return NULL;
 812
 813         d_events = debugfs_create_dir("events", d_tracer);
 814         if (!d_events)
 815                 pr_warning("Could not create debugfs "
 816                            "'events' directory\n");
 817
 818         return d_events;
 819 }
 820
 821 static LIST_HEAD(event_subsystems);
 822
 823 static struct dentry *
 824 event_subsystem_dir(const char *name, struct dentry *d_events)
 825 {
 826         struct event_subsystem *system;
 827         struct dentry *entry;
 828
 829         /* First see if we did not already create this dir */
 830         list_for_each_entry(system, &event_subsystems, list) {
 831                 if (strcmp(system->name, name) == 0) {
 832                         system->nr_events++;
 833                         return system->entry;
 834                 }
 835         }
 836
 837         /* need to create new entry */
 838         system = kmalloc(sizeof(*system), GFP_KERNEL);
 839         if (!system) {
 840                 pr_warning("No memory to create event subsystem %s\n",
 841                            name);
 842                 return d_events;
 843         }
 844
 845         system->entry = debugfs_create_dir(name, d_events);
 846         if (!system->entry) {
 847                 pr_warning("Could not create event subsystem %s\n",
 848                            name);
 849                 kfree(system);
 850                 return d_events;
 851         }
 852
 853         system->nr_events = 1;
 854         system->name = kstrdup(name, GFP_KERNEL);
 855         if (!system->name) {
 856                 debugfs_remove(system->entry);
 857                 kfree(system);
 858                 return d_events;
 859         }
 860
 861         list_add(&system->list, &event_subsystems);
 862
 863         system->filter = NULL;
 864
 865         system->filter = kzalloc(sizeof(struct event_filter), GFP_KERNEL);
 866         if (!system->filter) {
 867                 pr_warning("Could not allocate filter for subsystem "
 868                            "'%s'\n", name);
 869                 return system->entry;
 870         }
 871
 872         entry = debugfs_create_file("filter", 0644, system->entry, system,
 873                                     &ftrace_subsystem_filter_fops);
 874         if (!entry) {
 875                 kfree(system->filter);
 876                 system->filter = NULL;
 877                 pr_warning("Could not create debugfs "
 878                            "'%s/filter' entry\n", name);
 879         }
 880
 881         entry = trace_create_file("enable", 0644, system->entry,
 882                                   (void *)system->name,
 883                                   &ftrace_system_enable_fops);
 884
 885         return system->entry;
 886 }
 887
 888 static int
 889 event_create_dir(struct ftrace_event_call *call, struct dentry *d_events,
 890                  const struct file_operations *id,
 891                  const struct file_operations *enable,
 892                  const struct file_operations *filter,
 893                  const struct file_operations *format)
 894 {
 895         struct dentry *entry;
 896         int ret;
 897
 898         /*
 899          * If the trace point header did not define TRACE_SYSTEM
 900          * then the system would be called "TRACE_SYSTEM".
 901          */
 902         if (strcmp(call->system, TRACE_SYSTEM) != 0)
 903                 d_events = event_subsystem_dir(call->system, d_events);
 904
 905         call->dir = debugfs_create_dir(call->name, d_events);
 906         if (!call->dir) {
 907                 pr_warning("Could not create debugfs "
 908                            "'%s' directory\n", call->name);
 909                 return -1;
 910         }
 911
 912         if (call->regfunc)
 913                 entry = trace_create_file("enable", 0644, call->dir, call,
 914                                           enable);
 915
 916         if (call->id && call->profile_enable)
 917                 entry = trace_create_file("id", 0444, call->dir, call,
 918                                           id);
 919
 920         if (call->define_fields) {
 921                 ret = call->define_fields(call);
 922                 if (ret < 0) {
 923                         pr_warning("Could not initialize trace point"
 924                                    " events/%s\n", call->name);
 925                         return ret;
 926                 }
 927                 entry = trace_create_file("filter", 0644, call->dir, call,
 928                                           filter);
 929         }
 930
 931         /* A trace may not want to export its format */
 932         if (!call->show_format)
 933                 return 0;
 934
 935         entry = trace_create_file("format", 0444, call->dir, call,
 936                                   format);
 937
 938         return 0;
 939 }
 940
 941 #define for_each_event(event, start, end)                       \
 942         for (event = start;                                     \
 943              (unsigned long)event < (unsigned long)end;         \
 944              event++)
 945
 946 #ifdef CONFIG_MODULES
 947
 948 static LIST_HEAD(ftrace_module_file_list);
 949
 950 /*
 951  * Modules must own their file_operations to keep up with
 952  * reference counting.
 953  */
 954 struct ftrace_module_file_ops {
 955         struct list_head                list;
 956         struct module                   *mod;
 957         struct file_operations          id;
 958         struct file_operations          enable;
 959         struct file_operations          format;
 960         struct file_operations          filter;
 961 };
 962
 963 static void remove_subsystem_dir(const char *name)
 964 {
 965         struct event_subsystem *system;
 966
 967         if (strcmp(name, TRACE_SYSTEM) == 0)
 968                 return;
 969
 970         list_for_each_entry(system, &event_subsystems, list) {
 971                 if (strcmp(system->name, name) == 0) {
 972                         if (!--system->nr_events) {
 973                                 struct event_filter *filter = system->filter;
 974
 975                                 debugfs_remove_recursive(system->entry);
 976                                 list_del(&system->list);
 977                                 if (filter) {
 978                                         kfree(filter->filter_string);
 979                                         kfree(filter);
 980                                 }
 981                                 kfree(system->name);
 982                                 kfree(system);
 983                         }
 984                         break;
 985                 }
 986         }
 987 }
 988
 989 static struct ftrace_module_file_ops *
 990 trace_create_file_ops(struct module *mod)
 991 {
 992         struct ftrace_module_file_ops *file_ops;
 993
 994         /*
 995          * This is a bit of a PITA. To allow for correct reference
 996          * counting, modules must "own" their file_operations.
 997          * To do this, we allocate the file operations that will be
 998          * used in the event directory.
 999          */
1000
1001         file_ops = kmalloc(sizeof(*file_ops), GFP_KERNEL);
1002         if (!file_ops)
1003                 return NULL;
1004
1005         file_ops->mod = mod;
1006
1007         file_ops->id = ftrace_event_id_fops;
1008         file_ops->id.owner = mod;
1009
1010         file_ops->enable = ftrace_enable_fops;
1011         file_ops->enable.owner = mod;
1012
1013         file_ops->filter = ftrace_event_filter_fops;
1014         file_ops->filter.owner = mod;
1015
1016         file_ops->format = ftrace_event_format_fops;
1017         file_ops->format.owner = mod;
1018
1019         list_add(&file_ops->list, &ftrace_module_file_list);
1020
1021         return file_ops;
1022 }
1023
1024 static void trace_module_add_events(struct module *mod)
1025 {
1026         struct ftrace_module_file_ops *file_ops = NULL;
1027         struct ftrace_event_call *call, *start, *end;
1028         struct dentry *d_events;
1029         int ret;
1030
1031         start = mod->trace_events;
1032         end = mod->trace_events + mod->num_trace_events;
1033
1034         if (start == end)
1035                 return;
1036
1037         d_events = event_trace_events_dir();
1038         if (!d_events)
1039                 return;
1040
1041         for_each_event(call, start, end) {
1042                 /* The linker may leave blanks */
1043                 if (!call->name)
1044                         continue;
1045                 if (call->raw_init) {
1046                         ret = call->raw_init();
1047                         if (ret < 0) {
1048                                 if (ret != -ENOSYS)
1049                                         pr_warning("Could not initialize trace "
1050                                         "point events/%s\n", call->name);
1051                                 continue;
1052                         }
1053                 }
1054                 /*
1055                  * This module has events, create file ops for this module
1056                  * if not already done.
1057                  */
1058                 if (!file_ops) {
1059                         file_ops = trace_create_file_ops(mod);
1060                         if (!file_ops)
1061                                 return;
1062                 }
1063                 call->mod = mod;
1064                 list_add(&call->list, &ftrace_events);
1065                 event_create_dir(call, d_events,
1066                                  &file_ops->id, &file_ops->enable,
1067                                  &file_ops->filter, &file_ops->format);
1068         }
1069 }
1070
1071 static void trace_module_remove_events(struct module *mod)
1072 {
1073         struct ftrace_module_file_ops *file_ops;
1074         struct ftrace_event_call *call, *p;
1075         bool found = false;
1076
1077         down_write(&trace_event_mutex);
1078         list_for_each_entry_safe(call, p, &ftrace_events, list) {
1079                 if (call->mod == mod) {
1080                         found = true;
1081                         ftrace_event_enable_disable(call, 0);
1082                         if (call->event)
1083                                 __unregister_ftrace_event(call->event);
1084                         debugfs_remove_recursive(call->dir);
1085                         list_del(&call->list);
1086                         trace_destroy_fields(call);
1087                         destroy_preds(call);
1088                         remove_subsystem_dir(call->system);
1089                 }
1090         }
1091
1092         /* Now free the file_operations */
1093         list_for_each_entry(file_ops, &ftrace_module_file_list, list) {
1094                 if (file_ops->mod == mod)
1095                         break;
1096         }
1097         if (&file_ops->list != &ftrace_module_file_list) {
1098                 list_del(&file_ops->list);
1099                 kfree(file_ops);
1100         }
1101
1102         /*
1103          * It is safest to reset the ring buffer if the module being unloaded
1104          * registered any events.
1105          */
1106         if (found)
1107                 tracing_reset_current_online_cpus();
1108         up_write(&trace_event_mutex);
1109 }
1110
1111 static int trace_module_notify(struct notifier_block *self,
1112                                unsigned long val, void *data)
1113 {
1114         struct module *mod = data;
1115
1116         mutex_lock(&event_mutex);
1117         switch (val) {
1118         case MODULE_STATE_COMING:
1119                 trace_module_add_events(mod);
1120                 break;
1121         case MODULE_STATE_GOING:
1122                 trace_module_remove_events(mod);
1123                 break;
1124         }
1125         mutex_unlock(&event_mutex);
1126
1127         return 0;
1128 }
1129 #else
1130 static int trace_module_notify(struct notifier_block *self,
1131                                unsigned long val, void *data)
1132 {
1133         return 0;
1134 }
1135 #endif /* CONFIG_MODULES */
1136
1137 static struct notifier_block trace_module_nb = {
1138         .notifier_call = trace_module_notify,
1139         .priority = 0,
1140 };
1141
1142 extern struct ftrace_event_call __start_ftrace_events[];
1143 extern struct ftrace_event_call __stop_ftrace_events[];
1144
1145 static char bootup_event_buf[COMMAND_LINE_SIZE] __initdata;
1146
1147 static __init int setup_trace_event(char *str)
1148 {
1149         strlcpy(bootup_event_buf, str, COMMAND_LINE_SIZE);
1150         ring_buffer_expanded = 1;
1151         tracing_selftest_disabled = 1;
1152
1153         return 1;
1154 }
1155 __setup("trace_event=", setup_trace_event);
1156
1157 static __init int event_trace_init(void)
1158 {
1159         struct ftrace_event_call *call;
1160         struct dentry *d_tracer;
1161         struct dentry *entry;
1162         struct dentry *d_events;
1163         int ret;
1164         char *buf = bootup_event_buf;
1165         char *token;
1166
1167         d_tracer = tracing_init_dentry();
1168         if (!d_tracer)
1169                 return 0;
1170
1171         entry = debugfs_create_file("available_events", 0444, d_tracer,
1172                                     (void *)&show_event_seq_ops,
1173                                     &ftrace_avail_fops);
1174         if (!entry)
1175                 pr_warning("Could not create debugfs "
1176                            "'available_events' entry\n");
1177
1178         entry = debugfs_create_file("set_event", 0644, d_tracer,
1179                                     (void *)&show_set_event_seq_ops,
1180                                     &ftrace_set_event_fops);
1181         if (!entry)
1182                 pr_warning("Could not create debugfs "
1183                            "'set_event' entry\n");
1184
1185         d_events = event_trace_events_dir();
1186         if (!d_events)
1187                 return 0;
1188
1189         /* ring buffer internal formats */
1190         trace_create_file("header_page", 0444, d_events,
1191                           ring_buffer_print_page_header,
1192                           &ftrace_show_header_fops);
1193
1194         trace_create_file("header_event", 0444, d_events,
1195                           ring_buffer_print_entry_header,
1196                           &ftrace_show_header_fops);
1197
1198         trace_create_file("enable", 0644, d_events,
1199                           NULL, &ftrace_system_enable_fops);
1200
1201         for_each_event(call, __start_ftrace_events, __stop_ftrace_events) {
1202                 /* The linker may leave blanks */
1203                 if (!call->name)
1204                         continue;
1205                 if (call->raw_init) {
1206                         ret = call->raw_init();
1207                         if (ret < 0) {
1208                                 if (ret != -ENOSYS)
1209                                         pr_warning("Could not initialize trace "
1210                                         "point events/%s\n", call->name);
1211                                 continue;
1212                         }
1213                 }
1214                 list_add(&call->list, &ftrace_events);
1215                 event_create_dir(call, d_events, &ftrace_event_id_fops,
1216                                  &ftrace_enable_fops, &ftrace_event_filter_fops,
1217                                  &ftrace_event_format_fops);
1218         }
1219
1220         while (true) {
1221                 token = strsep(&buf, ",");
1222
1223                 if (!token)
1224                         break;
1225                 if (!*token)
1226                         continue;
1227
1228                 ret = ftrace_set_clr_event(token, 1);
1229                 if (ret)
1230                         pr_warning("Failed to enable trace event: %s\n", token);
1231         }
1232
1233         ret = register_module_notifier(&trace_module_nb);
1234         if (ret)
1235                 pr_warning("Failed to register trace events module notifier\n");
1236
1237         return 0;
1238 }
1239 fs_initcall(event_trace_init);
1240
1241 #ifdef CONFIG_FTRACE_STARTUP_TEST
1242
1243 static DEFINE_SPINLOCK(test_spinlock);
1244 static DEFINE_SPINLOCK(test_spinlock_irq);
1245 static DEFINE_MUTEX(test_mutex);
1246
1247 static __init void test_work(struct work_struct *dummy)
1248 {
1249         spin_lock(&test_spinlock);
1250         spin_lock_irq(&test_spinlock_irq);
1251         udelay(1);
1252         spin_unlock_irq(&test_spinlock_irq);
1253         spin_unlock(&test_spinlock);
1254
1255         mutex_lock(&test_mutex);
1256         msleep(1);
1257         mutex_unlock(&test_mutex);
1258 }
1259
1260 static __init int event_test_thread(void *unused)
1261 {
1262         void *test_malloc;
1263
1264         test_malloc = kmalloc(1234, GFP_KERNEL);
1265         if (!test_malloc)
1266                 pr_info("failed to kmalloc\n");
1267
1268         schedule_on_each_cpu(test_work);
1269
1270         kfree(test_malloc);
1271
1272         set_current_state(TASK_INTERRUPTIBLE);
1273         while (!kthread_should_stop())
1274                 schedule();
1275
1276         return 0;
1277 }
1278
1279 /*
1280  * Do various things that may trigger events.
1281  */
1282 static __init void event_test_stuff(void)
1283 {
1284         struct task_struct *test_thread;
1285
1286         test_thread = kthread_run(event_test_thread, NULL, "test-events");
1287         msleep(1);
1288         kthread_stop(test_thread);
1289 }
1290
1291 /*
1292  * For every trace event defined, we will test each trace point separately,
1293  * and then by groups, and finally all trace points.
1294  */
1295 static __init void event_trace_self_tests(void)
1296 {
1297         struct ftrace_event_call *call;
1298         struct event_subsystem *system;
1299         int ret;
1300
1301         pr_info("Running tests on trace events:\n");
1302
1303         list_for_each_entry(call, &ftrace_events, list) {
1304
1305                 /* Only test those that have a regfunc */
1306                 if (!call->regfunc)
1307                         continue;
1308
1309 /*
1310  * Testing syscall events here is pretty useless, but
1311  * we still do it if configured. But this is time consuming.
1312  * What we really need is a user thread to perform the
1313  * syscalls as we test.
1314  */
1315 #ifndef CONFIG_EVENT_TRACE_TEST_SYSCALLS
1316                 if (call->system &&
1317                     strcmp(call->system, "syscalls") == 0)
1318                         continue;
1319 #endif
1320
1321                 pr_info("Testing event %s: ", call->name);
1322
1323                 /*
1324                  * If an event is already enabled, someone is using
1325                  * it and the self test should not be on.
1326                  */
1327                 if (call->enabled) {
1328                         pr_warning("Enabled event during self test!\n");
1329                         WARN_ON_ONCE(1);
1330                         continue;
1331                 }
1332
1333                 ftrace_event_enable_disable(call, 1);
1334                 event_test_stuff();
1335                 ftrace_event_enable_disable(call, 0);
1336
1337                 pr_cont("OK\n");
1338         }
1339
1340         /* Now test at the sub system level */
1341
1342         pr_info("Running tests on trace event systems:\n");
1343
1344         list_for_each_entry(system, &event_subsystems, list) {
1345
1346                 /* the ftrace system is special, skip it */
1347                 if (strcmp(system->name, "ftrace") == 0)
1348                         continue;
1349
1350                 pr_info("Testing event system %s: ", system->name);
1351
1352                 ret = __ftrace_set_clr_event(NULL, system->name, NULL, 1);
1353                 if (WARN_ON_ONCE(ret)) {
1354                         pr_warning("error enabling system %s\n",
1355                                    system->name);
1356                         continue;
1357                 }
1358
1359                 event_test_stuff();
1360
1361                 ret = __ftrace_set_clr_event(NULL, system->name, NULL, 0);
1362                 if (WARN_ON_ONCE(ret))
1363                         pr_warning("error disabling system %s\n",
1364                                    system->name);
1365
1366                 pr_cont("OK\n");
1367         }
1368
1369         /* Test with all events enabled */
1370
1371         pr_info("Running tests on all trace events:\n");
1372         pr_info("Testing all events: ");
1373
1374         ret = __ftrace_set_clr_event(NULL, NULL, NULL, 1);
1375         if (WARN_ON_ONCE(ret)) {
1376                 pr_warning("error enabling all events\n");
1377                 return;
1378         }
1379
1380         event_test_stuff();
1381
1382         /* reset sysname */
1383         ret = __ftrace_set_clr_event(NULL, NULL, NULL, 0);
1384         if (WARN_ON_ONCE(ret)) {
1385                 pr_warning("error disabling all events\n");
1386                 return;
1387         }
1388
1389         pr_cont("OK\n");
1390 }
1391
1392 #ifdef CONFIG_FUNCTION_TRACER
1393
1394 static DEFINE_PER_CPU(atomic_t, ftrace_test_event_disable);
1395
1396 static void
1397 function_test_events_call(unsigned long ip, unsigned long parent_ip)
1398 {
1399         struct ring_buffer_event *event;
1400         struct ring_buffer *buffer;
1401         struct ftrace_entry *entry;
1402         unsigned long flags;
1403         long disabled;
1404         int resched;
1405         int cpu;
1406         int pc;
1407
1408         pc = preempt_count();
1409         resched = ftrace_preempt_disable();
1410         cpu = raw_smp_processor_id();
1411         disabled = atomic_inc_return(&per_cpu(ftrace_test_event_disable, cpu));
1412
1413         if (disabled != 1)
1414                 goto out;
1415
1416         local_save_flags(flags);
1417
1418         event = trace_current_buffer_lock_reserve(&buffer,
1419                                                   TRACE_FN, sizeof(*entry),
1420                                                   flags, pc);
1421         if (!event)
1422                 goto out;
1423         entry   = ring_buffer_event_data(event);
1424         entry->ip                       = ip;
1425         entry->parent_ip                = parent_ip;
1426
1427         trace_nowake_buffer_unlock_commit(buffer, event, flags, pc);
1428
1429  out:
1430         atomic_dec(&per_cpu(ftrace_test_event_disable, cpu));
1431         ftrace_preempt_enable(resched);
1432 }
1433
1434 static struct ftrace_ops trace_ops __initdata  =
1435 {
1436         .func = function_test_events_call,
1437 };
1438
1439 static __init void event_trace_self_test_with_function(void)
1440 {
1441         register_ftrace_function(&trace_ops);
1442         pr_info("Running tests again, along with the function tracer\n");
1443         event_trace_self_tests();
1444         unregister_ftrace_function(&trace_ops);
1445 }
1446 #else
1447 static __init void event_trace_self_test_with_function(void)
1448 {
1449 }
1450 #endif
1451
1452 static __init int event_trace_self_tests_init(void)
1453 {
1454         if (!tracing_selftest_disabled) {
1455                 event_trace_self_tests();
1456                 event_trace_self_test_with_function();
1457         }
1458
1459         return 0;
1460 }
1461
1462 late_initcall(event_trace_self_tests_init);
1463
1464 #endif