Update to 3.4-rc5.
[linux-flexiantxendom0-3.2.10.git] / tools / perf / builtin-record.c
1 /*
2  * builtin-record.c
3  *
4  * Builtin record command: Record the profile of a workload
5  * (or a CPU, or a PID) into the perf.data output file - for
6  * later analysis via perf report.
7  */
8 #define _FILE_OFFSET_BITS 64
9
10 #include "builtin.h"
11
12 #include "perf.h"
13
14 #include "util/build-id.h"
15 #include "util/util.h"
16 #include "util/parse-options.h"
17 #include "util/parse-events.h"
18
19 #include "util/header.h"
20 #include "util/event.h"
21 #include "util/evlist.h"
22 #include "util/evsel.h"
23 #include "util/debug.h"
24 #include "util/session.h"
25 #include "util/tool.h"
26 #include "util/symbol.h"
27 #include "util/cpumap.h"
28 #include "util/thread_map.h"
29
30 #include <unistd.h>
31 #include <sched.h>
32 #include <sys/mman.h>
33
34 enum write_mode_t {
35         WRITE_FORCE,
36         WRITE_APPEND
37 };
38
39 struct perf_record {
40         struct perf_tool        tool;
41         struct perf_record_opts opts;
42         u64                     bytes_written;
43         const char              *output_name;
44         struct perf_evlist      *evlist;
45         struct perf_session     *session;
46         const char              *progname;
47         const char              *uid_str;
48         int                     output;
49         unsigned int            page_size;
50         int                     realtime_prio;
51         enum write_mode_t       write_mode;
52         bool                    no_buildid;
53         bool                    no_buildid_cache;
54         bool                    force;
55         bool                    file_new;
56         bool                    append_file;
57         long                    samples;
58         off_t                   post_processing_offset;
59 };
60
61 static void advance_output(struct perf_record *rec, size_t size)
62 {
63         rec->bytes_written += size;
64 }
65
66 static void write_output(struct perf_record *rec, void *buf, size_t size)
67 {
68         while (size) {
69                 int ret = write(rec->output, buf, size);
70
71                 if (ret < 0)
72                         die("failed to write");
73
74                 size -= ret;
75                 buf += ret;
76
77                 rec->bytes_written += ret;
78         }
79 }
80
81 static int process_synthesized_event(struct perf_tool *tool,
82                                      union perf_event *event,
83                                      struct perf_sample *sample __used,
84                                      struct machine *machine __used)
85 {
86         struct perf_record *rec = container_of(tool, struct perf_record, tool);
87         write_output(rec, event, event->header.size);
88         return 0;
89 }
90
91 static void perf_record__mmap_read(struct perf_record *rec,
92                                    struct perf_mmap *md)
93 {
94         unsigned int head = perf_mmap__read_head(md);
95         unsigned int old = md->prev;
96         unsigned char *data = md->base + rec->page_size;
97         unsigned long size;
98         void *buf;
99
100         if (old == head)
101                 return;
102
103         rec->samples++;
104
105         size = head - old;
106
107         if ((old & md->mask) + size != (head & md->mask)) {
108                 buf = &data[old & md->mask];
109                 size = md->mask + 1 - (old & md->mask);
110                 old += size;
111
112                 write_output(rec, buf, size);
113         }
114
115         buf = &data[old & md->mask];
116         size = head - old;
117         old += size;
118
119         write_output(rec, buf, size);
120
121         md->prev = old;
122         perf_mmap__write_tail(md, old);
123 }
124
125 static volatile int done = 0;
126 static volatile int signr = -1;
127 static volatile int child_finished = 0;
128
129 static void sig_handler(int sig)
130 {
131         if (sig == SIGCHLD)
132                 child_finished = 1;
133
134         done = 1;
135         signr = sig;
136 }
137
138 static void perf_record__sig_exit(int exit_status __used, void *arg)
139 {
140         struct perf_record *rec = arg;
141         int status;
142
143         if (rec->evlist->workload.pid > 0) {
144                 if (!child_finished)
145                         kill(rec->evlist->workload.pid, SIGTERM);
146
147                 wait(&status);
148                 if (WIFSIGNALED(status))
149                         psignal(WTERMSIG(status), rec->progname);
150         }
151
152         if (signr == -1 || signr == SIGUSR1)
153                 return;
154
155         signal(signr, SIG_DFL);
156         kill(getpid(), signr);
157 }
158
159 static bool perf_evlist__equal(struct perf_evlist *evlist,
160                                struct perf_evlist *other)
161 {
162         struct perf_evsel *pos, *pair;
163
164         if (evlist->nr_entries != other->nr_entries)
165                 return false;
166
167         pair = list_entry(other->entries.next, struct perf_evsel, node);
168
169         list_for_each_entry(pos, &evlist->entries, node) {
170                 if (memcmp(&pos->attr, &pair->attr, sizeof(pos->attr) != 0))
171                         return false;
172                 pair = list_entry(pair->node.next, struct perf_evsel, node);
173         }
174
175         return true;
176 }
177
178 static void perf_record__open(struct perf_record *rec)
179 {
180         struct perf_evsel *pos, *first;
181         struct perf_evlist *evlist = rec->evlist;
182         struct perf_session *session = rec->session;
183         struct perf_record_opts *opts = &rec->opts;
184
185         first = list_entry(evlist->entries.next, struct perf_evsel, node);
186
187         perf_evlist__config_attrs(evlist, opts);
188
189         list_for_each_entry(pos, &evlist->entries, node) {
190                 struct perf_event_attr *attr = &pos->attr;
191                 struct xyarray *group_fd = NULL;
192                 /*
193                  * Check if parse_single_tracepoint_event has already asked for
194                  * PERF_SAMPLE_TIME.
195                  *
196                  * XXX this is kludgy but short term fix for problems introduced by
197                  * eac23d1c that broke 'perf script' by having different sample_types
198                  * when using multiple tracepoint events when we use a perf binary
199                  * that tries to use sample_id_all on an older kernel.
200                  *
201                  * We need to move counter creation to perf_session, support
202                  * different sample_types, etc.
203                  */
204                 bool time_needed = attr->sample_type & PERF_SAMPLE_TIME;
205
206                 if (opts->group && pos != first)
207                         group_fd = first->fd;
208 fallback_missing_features:
209                 if (opts->exclude_guest_missing)
210                         attr->exclude_guest = attr->exclude_host = 0;
211 retry_sample_id:
212                 attr->sample_id_all = opts->sample_id_all_missing ? 0 : 1;
213 try_again:
214                 if (perf_evsel__open(pos, evlist->cpus, evlist->threads,
215                                      opts->group, group_fd) < 0) {
216                         int err = errno;
217
218                         if (err == EPERM || err == EACCES) {
219                                 ui__error_paranoid();
220                                 exit(EXIT_FAILURE);
221                         } else if (err ==  ENODEV && opts->cpu_list) {
222                                 die("No such device - did you specify"
223                                         " an out-of-range profile CPU?\n");
224                         } else if (err == EINVAL) {
225                                 if (!opts->exclude_guest_missing &&
226                                     (attr->exclude_guest || attr->exclude_host)) {
227                                         pr_debug("Old kernel, cannot exclude "
228                                                  "guest or host samples.\n");
229                                         opts->exclude_guest_missing = true;
230                                         goto fallback_missing_features;
231                                 } else if (!opts->sample_id_all_missing) {
232                                         /*
233                                          * Old kernel, no attr->sample_id_type_all field
234                                          */
235                                         opts->sample_id_all_missing = true;
236                                         if (!opts->sample_time && !opts->raw_samples && !time_needed)
237                                                 attr->sample_type &= ~PERF_SAMPLE_TIME;
238
239                                         goto retry_sample_id;
240                                 }
241                         }
242
243                         /*
244                          * If it's cycles then fall back to hrtimer
245                          * based cpu-clock-tick sw counter, which
246                          * is always available even if no PMU support:
247                          */
248                         if (attr->type == PERF_TYPE_HARDWARE
249                                         && attr->config == PERF_COUNT_HW_CPU_CYCLES) {
250
251                                 if (verbose)
252                                         ui__warning("The cycles event is not supported, "
253                                                     "trying to fall back to cpu-clock-ticks\n");
254                                 attr->type = PERF_TYPE_SOFTWARE;
255                                 attr->config = PERF_COUNT_SW_CPU_CLOCK;
256                                 goto try_again;
257                         }
258
259                         if (err == ENOENT) {
260                                 ui__warning("The %s event is not supported.\n",
261                                             event_name(pos));
262                                 exit(EXIT_FAILURE);
263                         }
264
265                         printf("\n");
266                         error("sys_perf_event_open() syscall returned with %d (%s).  /bin/dmesg may provide additional information.\n",
267                               err, strerror(err));
268
269 #if defined(__i386__) || defined(__x86_64__)
270                         if (attr->type == PERF_TYPE_HARDWARE && err == EOPNOTSUPP)
271                                 die("No hardware sampling interrupt available."
272                                     " No APIC? If so then you can boot the kernel"
273                                     " with the \"lapic\" boot parameter to"
274                                     " force-enable it.\n");
275 #endif
276
277                         die("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
278                 }
279         }
280
281         if (perf_evlist__set_filters(evlist)) {
282                 error("failed to set filter with %d (%s)\n", errno,
283                         strerror(errno));
284                 exit(-1);
285         }
286
287         if (perf_evlist__mmap(evlist, opts->mmap_pages, false) < 0) {
288                 if (errno == EPERM)
289                         die("Permission error mapping pages.\n"
290                             "Consider increasing "
291                             "/proc/sys/kernel/perf_event_mlock_kb,\n"
292                             "or try again with a smaller value of -m/--mmap_pages.\n"
293                             "(current value: %d)\n", opts->mmap_pages);
294                 else if (!is_power_of_2(opts->mmap_pages))
295                         die("--mmap_pages/-m value must be a power of two.");
296
297                 die("failed to mmap with %d (%s)\n", errno, strerror(errno));
298         }
299
300         if (rec->file_new)
301                 session->evlist = evlist;
302         else {
303                 if (!perf_evlist__equal(session->evlist, evlist)) {
304                         fprintf(stderr, "incompatible append\n");
305                         exit(-1);
306                 }
307         }
308
309         perf_session__update_sample_type(session);
310 }
311
312 static int process_buildids(struct perf_record *rec)
313 {
314         u64 size = lseek(rec->output, 0, SEEK_CUR);
315
316         if (size == 0)
317                 return 0;
318
319         rec->session->fd = rec->output;
320         return __perf_session__process_events(rec->session, rec->post_processing_offset,
321                                               size - rec->post_processing_offset,
322                                               size, &build_id__mark_dso_hit_ops);
323 }
324
325 static void perf_record__exit(int status __used, void *arg)
326 {
327         struct perf_record *rec = arg;
328
329         if (!rec->opts.pipe_output) {
330                 rec->session->header.data_size += rec->bytes_written;
331
332                 if (!rec->no_buildid)
333                         process_buildids(rec);
334                 perf_session__write_header(rec->session, rec->evlist,
335                                            rec->output, true);
336                 perf_session__delete(rec->session);
337                 perf_evlist__delete(rec->evlist);
338                 symbol__exit();
339         }
340 }
341
342 static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
343 {
344         int err;
345         struct perf_tool *tool = data;
346
347         if (machine__is_host(machine))
348                 return;
349
350         /*
351          *As for guest kernel when processing subcommand record&report,
352          *we arrange module mmap prior to guest kernel mmap and trigger
353          *a preload dso because default guest module symbols are loaded
354          *from guest kallsyms instead of /lib/modules/XXX/XXX. This
355          *method is used to avoid symbol missing when the first addr is
356          *in module instead of in guest kernel.
357          */
358         err = perf_event__synthesize_modules(tool, process_synthesized_event,
359                                              machine);
360         if (err < 0)
361                 pr_err("Couldn't record guest kernel [%d]'s reference"
362                        " relocation symbol.\n", machine->pid);
363
364         /*
365          * We use _stext for guest kernel because guest kernel's /proc/kallsyms
366          * have no _text sometimes.
367          */
368         err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
369                                                  machine, "_text");
370         if (err < 0)
371                 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
372                                                          machine, "_stext");
373         if (err < 0)
374                 pr_err("Couldn't record guest kernel [%d]'s reference"
375                        " relocation symbol.\n", machine->pid);
376 }
377
378 static struct perf_event_header finished_round_event = {
379         .size = sizeof(struct perf_event_header),
380         .type = PERF_RECORD_FINISHED_ROUND,
381 };
382
383 static void perf_record__mmap_read_all(struct perf_record *rec)
384 {
385         int i;
386
387         for (i = 0; i < rec->evlist->nr_mmaps; i++) {
388                 if (rec->evlist->mmap[i].base)
389                         perf_record__mmap_read(rec, &rec->evlist->mmap[i]);
390         }
391
392         if (perf_header__has_feat(&rec->session->header, HEADER_TRACE_INFO))
393                 write_output(rec, &finished_round_event, sizeof(finished_round_event));
394 }
395
396 static int __cmd_record(struct perf_record *rec, int argc, const char **argv)
397 {
398         struct stat st;
399         int flags;
400         int err, output, feat;
401         unsigned long waking = 0;
402         const bool forks = argc > 0;
403         struct machine *machine;
404         struct perf_tool *tool = &rec->tool;
405         struct perf_record_opts *opts = &rec->opts;
406         struct perf_evlist *evsel_list = rec->evlist;
407         const char *output_name = rec->output_name;
408         struct perf_session *session;
409
410         rec->progname = argv[0];
411
412         rec->page_size = sysconf(_SC_PAGE_SIZE);
413
414         on_exit(perf_record__sig_exit, rec);
415         signal(SIGCHLD, sig_handler);
416         signal(SIGINT, sig_handler);
417         signal(SIGUSR1, sig_handler);
418
419         if (!output_name) {
420                 if (!fstat(STDOUT_FILENO, &st) && S_ISFIFO(st.st_mode))
421                         opts->pipe_output = true;
422                 else
423                         rec->output_name = output_name = "perf.data";
424         }
425         if (output_name) {
426                 if (!strcmp(output_name, "-"))
427                         opts->pipe_output = true;
428                 else if (!stat(output_name, &st) && st.st_size) {
429                         if (rec->write_mode == WRITE_FORCE) {
430                                 char oldname[PATH_MAX];
431                                 snprintf(oldname, sizeof(oldname), "%s.old",
432                                          output_name);
433                                 unlink(oldname);
434                                 rename(output_name, oldname);
435                         }
436                 } else if (rec->write_mode == WRITE_APPEND) {
437                         rec->write_mode = WRITE_FORCE;
438                 }
439         }
440
441         flags = O_CREAT|O_RDWR;
442         if (rec->write_mode == WRITE_APPEND)
443                 rec->file_new = 0;
444         else
445                 flags |= O_TRUNC;
446
447         if (opts->pipe_output)
448                 output = STDOUT_FILENO;
449         else
450                 output = open(output_name, flags, S_IRUSR | S_IWUSR);
451         if (output < 0) {
452                 perror("failed to create output file");
453                 exit(-1);
454         }
455
456         rec->output = output;
457
458         session = perf_session__new(output_name, O_WRONLY,
459                                     rec->write_mode == WRITE_FORCE, false, NULL);
460         if (session == NULL) {
461                 pr_err("Not enough memory for reading perf file header\n");
462                 return -1;
463         }
464
465         rec->session = session;
466
467         for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
468                 perf_header__set_feat(&session->header, feat);
469
470         if (rec->no_buildid)
471                 perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
472
473         if (!have_tracepoints(&evsel_list->entries))
474                 perf_header__clear_feat(&session->header, HEADER_TRACE_INFO);
475
476         if (!rec->opts.branch_stack)
477                 perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
478
479         if (!rec->file_new) {
480                 err = perf_session__read_header(session, output);
481                 if (err < 0)
482                         goto out_delete_session;
483         }
484
485         if (forks) {
486                 err = perf_evlist__prepare_workload(evsel_list, opts, argv);
487                 if (err < 0) {
488                         pr_err("Couldn't run the workload!\n");
489                         goto out_delete_session;
490                 }
491         }
492
493         perf_record__open(rec);
494
495         /*
496          * perf_session__delete(session) will be called at perf_record__exit()
497          */
498         on_exit(perf_record__exit, rec);
499
500         if (opts->pipe_output) {
501                 err = perf_header__write_pipe(output);
502                 if (err < 0)
503                         return err;
504         } else if (rec->file_new) {
505                 err = perf_session__write_header(session, evsel_list,
506                                                  output, false);
507                 if (err < 0)
508                         return err;
509         }
510
511         if (!rec->no_buildid
512             && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
513                 pr_err("Couldn't generate buildids. "
514                        "Use --no-buildid to profile anyway.\n");
515                 return -1;
516         }
517
518         rec->post_processing_offset = lseek(output, 0, SEEK_CUR);
519
520         machine = perf_session__find_host_machine(session);
521         if (!machine) {
522                 pr_err("Couldn't find native kernel information.\n");
523                 return -1;
524         }
525
526         if (opts->pipe_output) {
527                 err = perf_event__synthesize_attrs(tool, session,
528                                                    process_synthesized_event);
529                 if (err < 0) {
530                         pr_err("Couldn't synthesize attrs.\n");
531                         return err;
532                 }
533
534                 err = perf_event__synthesize_event_types(tool, process_synthesized_event,
535                                                          machine);
536                 if (err < 0) {
537                         pr_err("Couldn't synthesize event_types.\n");
538                         return err;
539                 }
540
541                 if (have_tracepoints(&evsel_list->entries)) {
542                         /*
543                          * FIXME err <= 0 here actually means that
544                          * there were no tracepoints so its not really
545                          * an error, just that we don't need to
546                          * synthesize anything.  We really have to
547                          * return this more properly and also
548                          * propagate errors that now are calling die()
549                          */
550                         err = perf_event__synthesize_tracing_data(tool, output, evsel_list,
551                                                                   process_synthesized_event);
552                         if (err <= 0) {
553                                 pr_err("Couldn't record tracing data.\n");
554                                 return err;
555                         }
556                         advance_output(rec, err);
557                 }
558         }
559
560         err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
561                                                  machine, "_text");
562         if (err < 0)
563                 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
564                                                          machine, "_stext");
565         if (err < 0)
566                 pr_err("Couldn't record kernel reference relocation symbol\n"
567                        "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
568                        "Check /proc/kallsyms permission or run as root.\n");
569
570         err = perf_event__synthesize_modules(tool, process_synthesized_event,
571                                              machine);
572         if (err < 0)
573                 pr_err("Couldn't record kernel module information.\n"
574                        "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
575                        "Check /proc/modules permission or run as root.\n");
576
577         if (perf_guest)
578                 perf_session__process_machines(session, tool,
579                                                perf_event__synthesize_guest_os);
580
581         if (!opts->system_wide)
582                 perf_event__synthesize_thread_map(tool, evsel_list->threads,
583                                                   process_synthesized_event,
584                                                   machine);
585         else
586                 perf_event__synthesize_threads(tool, process_synthesized_event,
587                                                machine);
588
589         if (rec->realtime_prio) {
590                 struct sched_param param;
591
592                 param.sched_priority = rec->realtime_prio;
593                 if (sched_setscheduler(0, SCHED_FIFO, &param)) {
594                         pr_err("Could not set realtime priority.\n");
595                         exit(-1);
596                 }
597         }
598
599         perf_evlist__enable(evsel_list);
600
601         /*
602          * Let the child rip
603          */
604         if (forks)
605                 perf_evlist__start_workload(evsel_list);
606
607         for (;;) {
608                 int hits = rec->samples;
609
610                 perf_record__mmap_read_all(rec);
611
612                 if (hits == rec->samples) {
613                         if (done)
614                                 break;
615                         err = poll(evsel_list->pollfd, evsel_list->nr_fds, -1);
616                         waking++;
617                 }
618
619                 if (done)
620                         perf_evlist__disable(evsel_list);
621         }
622
623         if (quiet || signr == SIGUSR1)
624                 return 0;
625
626         fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
627
628         /*
629          * Approximate RIP event size: 24 bytes.
630          */
631         fprintf(stderr,
632                 "[ perf record: Captured and wrote %.3f MB %s (~%" PRIu64 " samples) ]\n",
633                 (double)rec->bytes_written / 1024.0 / 1024.0,
634                 output_name,
635                 rec->bytes_written / 24);
636
637         return 0;
638
639 out_delete_session:
640         perf_session__delete(session);
641         return err;
642 }
643
644 #define BRANCH_OPT(n, m) \
645         { .name = n, .mode = (m) }
646
647 #define BRANCH_END { .name = NULL }
648
649 struct branch_mode {
650         const char *name;
651         int mode;
652 };
653
654 static const struct branch_mode branch_modes[] = {
655         BRANCH_OPT("u", PERF_SAMPLE_BRANCH_USER),
656         BRANCH_OPT("k", PERF_SAMPLE_BRANCH_KERNEL),
657         BRANCH_OPT("hv", PERF_SAMPLE_BRANCH_HV),
658         BRANCH_OPT("any", PERF_SAMPLE_BRANCH_ANY),
659         BRANCH_OPT("any_call", PERF_SAMPLE_BRANCH_ANY_CALL),
660         BRANCH_OPT("any_ret", PERF_SAMPLE_BRANCH_ANY_RETURN),
661         BRANCH_OPT("ind_call", PERF_SAMPLE_BRANCH_IND_CALL),
662         BRANCH_END
663 };
664
665 static int
666 parse_branch_stack(const struct option *opt, const char *str, int unset)
667 {
668 #define ONLY_PLM \
669         (PERF_SAMPLE_BRANCH_USER        |\
670          PERF_SAMPLE_BRANCH_KERNEL      |\
671          PERF_SAMPLE_BRANCH_HV)
672
673         uint64_t *mode = (uint64_t *)opt->value;
674         const struct branch_mode *br;
675         char *s, *os = NULL, *p;
676         int ret = -1;
677
678         if (unset)
679                 return 0;
680
681         /*
682          * cannot set it twice, -b + --branch-filter for instance
683          */
684         if (*mode)
685                 return -1;
686
687         /* str may be NULL in case no arg is passed to -b */
688         if (str) {
689                 /* because str is read-only */
690                 s = os = strdup(str);
691                 if (!s)
692                         return -1;
693
694                 for (;;) {
695                         p = strchr(s, ',');
696                         if (p)
697                                 *p = '\0';
698
699                         for (br = branch_modes; br->name; br++) {
700                                 if (!strcasecmp(s, br->name))
701                                         break;
702                         }
703                         if (!br->name) {
704                                 ui__warning("unknown branch filter %s,"
705                                             " check man page\n", s);
706                                 goto error;
707                         }
708
709                         *mode |= br->mode;
710
711                         if (!p)
712                                 break;
713
714                         s = p + 1;
715                 }
716         }
717         ret = 0;
718
719         /* default to any branch */
720         if ((*mode & ~ONLY_PLM) == 0) {
721                 *mode = PERF_SAMPLE_BRANCH_ANY;
722         }
723 error:
724         free(os);
725         return ret;
726 }
727
728 static const char * const record_usage[] = {
729         "perf record [<options>] [<command>]",
730         "perf record [<options>] -- <command> [<options>]",
731         NULL
732 };
733
734 /*
735  * XXX Ideally would be local to cmd_record() and passed to a perf_record__new
736  * because we need to have access to it in perf_record__exit, that is called
737  * after cmd_record() exits, but since record_options need to be accessible to
738  * builtin-script, leave it here.
739  *
740  * At least we don't ouch it in all the other functions here directly.
741  *
742  * Just say no to tons of global variables, sigh.
743  */
744 static struct perf_record record = {
745         .opts = {
746                 .mmap_pages          = UINT_MAX,
747                 .user_freq           = UINT_MAX,
748                 .user_interval       = ULLONG_MAX,
749                 .freq                = 1000,
750         },
751         .write_mode = WRITE_FORCE,
752         .file_new   = true,
753 };
754
755 /*
756  * XXX Will stay a global variable till we fix builtin-script.c to stop messing
757  * with it and switch to use the library functions in perf_evlist that came
758  * from builtin-record.c, i.e. use perf_record_opts,
759  * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
760  * using pipes, etc.
761  */
762 const struct option record_options[] = {
763         OPT_CALLBACK('e', "event", &record.evlist, "event",
764                      "event selector. use 'perf list' to list available events",
765                      parse_events_option),
766         OPT_CALLBACK(0, "filter", &record.evlist, "filter",
767                      "event filter", parse_filter),
768         OPT_STRING('p', "pid", &record.opts.target_pid, "pid",
769                     "record events on existing process id"),
770         OPT_STRING('t', "tid", &record.opts.target_tid, "tid",
771                     "record events on existing thread id"),
772         OPT_INTEGER('r', "realtime", &record.realtime_prio,
773                     "collect data with this RT SCHED_FIFO priority"),
774         OPT_BOOLEAN('D', "no-delay", &record.opts.no_delay,
775                     "collect data without buffering"),
776         OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
777                     "collect raw sample records from all opened counters"),
778         OPT_BOOLEAN('a', "all-cpus", &record.opts.system_wide,
779                             "system-wide collection from all CPUs"),
780         OPT_BOOLEAN('A', "append", &record.append_file,
781                             "append to the output file to do incremental profiling"),
782         OPT_STRING('C', "cpu", &record.opts.cpu_list, "cpu",
783                     "list of cpus to monitor"),
784         OPT_BOOLEAN('f', "force", &record.force,
785                         "overwrite existing data file (deprecated)"),
786         OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
787         OPT_STRING('o', "output", &record.output_name, "file",
788                     "output file name"),
789         OPT_BOOLEAN('i', "no-inherit", &record.opts.no_inherit,
790                     "child tasks do not inherit counters"),
791         OPT_UINTEGER('F', "freq", &record.opts.user_freq, "profile at this frequency"),
792         OPT_UINTEGER('m', "mmap-pages", &record.opts.mmap_pages,
793                      "number of mmap data pages"),
794         OPT_BOOLEAN(0, "group", &record.opts.group,
795                     "put the counters into a counter group"),
796         OPT_BOOLEAN('g', "call-graph", &record.opts.call_graph,
797                     "do call-graph (stack chain/backtrace) recording"),
798         OPT_INCR('v', "verbose", &verbose,
799                     "be more verbose (show counter open errors, etc)"),
800         OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
801         OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
802                     "per thread counts"),
803         OPT_BOOLEAN('d', "data", &record.opts.sample_address,
804                     "Sample addresses"),
805         OPT_BOOLEAN('T', "timestamp", &record.opts.sample_time, "Sample timestamps"),
806         OPT_BOOLEAN('P', "period", &record.opts.period, "Sample period"),
807         OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
808                     "don't sample"),
809         OPT_BOOLEAN('N', "no-buildid-cache", &record.no_buildid_cache,
810                     "do not update the buildid cache"),
811         OPT_BOOLEAN('B', "no-buildid", &record.no_buildid,
812                     "do not collect buildids in perf.data"),
813         OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
814                      "monitor event in cgroup name only",
815                      parse_cgroups),
816         OPT_STRING('u', "uid", &record.uid_str, "user", "user to profile"),
817
818         OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack,
819                      "branch any", "sample any taken branches",
820                      parse_branch_stack),
821
822         OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
823                      "branch filter mask", "branch stack filter modes",
824                      parse_branch_stack),
825         OPT_END()
826 };
827
828 int cmd_record(int argc, const char **argv, const char *prefix __used)
829 {
830         int err = -ENOMEM;
831         struct perf_evsel *pos;
832         struct perf_evlist *evsel_list;
833         struct perf_record *rec = &record;
834
835         perf_header__set_cmdline(argc, argv);
836
837         evsel_list = perf_evlist__new(NULL, NULL);
838         if (evsel_list == NULL)
839                 return -ENOMEM;
840
841         rec->evlist = evsel_list;
842
843         argc = parse_options(argc, argv, record_options, record_usage,
844                             PARSE_OPT_STOP_AT_NON_OPTION);
845         if (!argc && !rec->opts.target_pid && !rec->opts.target_tid &&
846                 !rec->opts.system_wide && !rec->opts.cpu_list && !rec->uid_str)
847                 usage_with_options(record_usage, record_options);
848
849         if (rec->force && rec->append_file) {
850                 fprintf(stderr, "Can't overwrite and append at the same time."
851                                 " You need to choose between -f and -A");
852                 usage_with_options(record_usage, record_options);
853         } else if (rec->append_file) {
854                 rec->write_mode = WRITE_APPEND;
855         } else {
856                 rec->write_mode = WRITE_FORCE;
857         }
858
859         if (nr_cgroups && !rec->opts.system_wide) {
860                 fprintf(stderr, "cgroup monitoring only available in"
861                         " system-wide mode\n");
862                 usage_with_options(record_usage, record_options);
863         }
864
865         symbol__init();
866
867         if (symbol_conf.kptr_restrict)
868                 pr_warning(
869 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
870 "check /proc/sys/kernel/kptr_restrict.\n\n"
871 "Samples in kernel functions may not be resolved if a suitable vmlinux\n"
872 "file is not found in the buildid cache or in the vmlinux path.\n\n"
873 "Samples in kernel modules won't be resolved at all.\n\n"
874 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
875 "even with a suitable vmlinux or kallsyms file.\n\n");
876
877         if (rec->no_buildid_cache || rec->no_buildid)
878                 disable_buildid_cache();
879
880         if (evsel_list->nr_entries == 0 &&
881             perf_evlist__add_default(evsel_list) < 0) {
882                 pr_err("Not enough memory for event selector list\n");
883                 goto out_symbol_exit;
884         }
885
886         rec->opts.uid = parse_target_uid(rec->uid_str, rec->opts.target_tid,
887                                          rec->opts.target_pid);
888         if (rec->uid_str != NULL && rec->opts.uid == UINT_MAX - 1)
889                 goto out_free_fd;
890
891         if (rec->opts.target_pid)
892                 rec->opts.target_tid = rec->opts.target_pid;
893
894         if (perf_evlist__create_maps(evsel_list, rec->opts.target_pid,
895                                      rec->opts.target_tid, rec->opts.uid,
896                                      rec->opts.cpu_list) < 0)
897                 usage_with_options(record_usage, record_options);
898
899         list_for_each_entry(pos, &evsel_list->entries, node) {
900                 if (perf_header__push_event(pos->attr.config, event_name(pos)))
901                         goto out_free_fd;
902         }
903
904         if (rec->opts.user_interval != ULLONG_MAX)
905                 rec->opts.default_interval = rec->opts.user_interval;
906         if (rec->opts.user_freq != UINT_MAX)
907                 rec->opts.freq = rec->opts.user_freq;
908
909         /*
910          * User specified count overrides default frequency.
911          */
912         if (rec->opts.default_interval)
913                 rec->opts.freq = 0;
914         else if (rec->opts.freq) {
915                 rec->opts.default_interval = rec->opts.freq;
916         } else {
917                 fprintf(stderr, "frequency and count are zero, aborting\n");
918                 err = -EINVAL;
919                 goto out_free_fd;
920         }
921
922         err = __cmd_record(&record, argc, argv);
923 out_free_fd:
924         perf_evlist__delete_maps(evsel_list);
925 out_symbol_exit:
926         symbol__exit();
927         return err;
928 }