tools turbostat: harden against cpu online/offline
[linux-flexiantxendom0-3.2.10.git] / tools / perf / builtin-record.c
1 /*
2  * builtin-record.c
3  *
4  * Builtin record command: Record the profile of a workload
5  * (or a CPU, or a PID) into the perf.data output file - for
6  * later analysis via perf report.
7  */
8 #define _FILE_OFFSET_BITS 64
9
10 #include "builtin.h"
11
12 #include "perf.h"
13
14 #include "util/build-id.h"
15 #include "util/util.h"
16 #include "util/parse-options.h"
17 #include "util/parse-events.h"
18
19 #include "util/header.h"
20 #include "util/event.h"
21 #include "util/evlist.h"
22 #include "util/evsel.h"
23 #include "util/debug.h"
24 #include "util/session.h"
25 #include "util/tool.h"
26 #include "util/symbol.h"
27 #include "util/cpumap.h"
28 #include "util/thread_map.h"
29
30 #include <unistd.h>
31 #include <sched.h>
32 #include <sys/mman.h>
33
34 enum write_mode_t {
35         WRITE_FORCE,
36         WRITE_APPEND
37 };
38
39 struct perf_record {
40         struct perf_tool        tool;
41         struct perf_record_opts opts;
42         u64                     bytes_written;
43         const char              *output_name;
44         struct perf_evlist      *evlist;
45         struct perf_session     *session;
46         const char              *progname;
47         int                     output;
48         unsigned int            page_size;
49         int                     realtime_prio;
50         enum write_mode_t       write_mode;
51         bool                    no_buildid;
52         bool                    no_buildid_cache;
53         bool                    force;
54         bool                    file_new;
55         bool                    append_file;
56         long                    samples;
57         off_t                   post_processing_offset;
58 };
59
60 static void advance_output(struct perf_record *rec, size_t size)
61 {
62         rec->bytes_written += size;
63 }
64
65 static void write_output(struct perf_record *rec, void *buf, size_t size)
66 {
67         while (size) {
68                 int ret = write(rec->output, buf, size);
69
70                 if (ret < 0)
71                         die("failed to write");
72
73                 size -= ret;
74                 buf += ret;
75
76                 rec->bytes_written += ret;
77         }
78 }
79
80 static int process_synthesized_event(struct perf_tool *tool,
81                                      union perf_event *event,
82                                      struct perf_sample *sample __used,
83                                      struct machine *machine __used)
84 {
85         struct perf_record *rec = container_of(tool, struct perf_record, tool);
86         write_output(rec, event, event->header.size);
87         return 0;
88 }
89
90 static void perf_record__mmap_read(struct perf_record *rec,
91                                    struct perf_mmap *md)
92 {
93         unsigned int head = perf_mmap__read_head(md);
94         unsigned int old = md->prev;
95         unsigned char *data = md->base + rec->page_size;
96         unsigned long size;
97         void *buf;
98
99         if (old == head)
100                 return;
101
102         rec->samples++;
103
104         size = head - old;
105
106         if ((old & md->mask) + size != (head & md->mask)) {
107                 buf = &data[old & md->mask];
108                 size = md->mask + 1 - (old & md->mask);
109                 old += size;
110
111                 write_output(rec, buf, size);
112         }
113
114         buf = &data[old & md->mask];
115         size = head - old;
116         old += size;
117
118         write_output(rec, buf, size);
119
120         md->prev = old;
121         perf_mmap__write_tail(md, old);
122 }
123
124 static volatile int done = 0;
125 static volatile int signr = -1;
126 static volatile int child_finished = 0;
127
128 static void sig_handler(int sig)
129 {
130         if (sig == SIGCHLD)
131                 child_finished = 1;
132
133         done = 1;
134         signr = sig;
135 }
136
137 static void perf_record__sig_exit(int exit_status __used, void *arg)
138 {
139         struct perf_record *rec = arg;
140         int status;
141
142         if (rec->evlist->workload.pid > 0) {
143                 if (!child_finished)
144                         kill(rec->evlist->workload.pid, SIGTERM);
145
146                 wait(&status);
147                 if (WIFSIGNALED(status))
148                         psignal(WTERMSIG(status), rec->progname);
149         }
150
151         if (signr == -1 || signr == SIGUSR1)
152                 return;
153
154         signal(signr, SIG_DFL);
155         kill(getpid(), signr);
156 }
157
158 static bool perf_evlist__equal(struct perf_evlist *evlist,
159                                struct perf_evlist *other)
160 {
161         struct perf_evsel *pos, *pair;
162
163         if (evlist->nr_entries != other->nr_entries)
164                 return false;
165
166         pair = list_entry(other->entries.next, struct perf_evsel, node);
167
168         list_for_each_entry(pos, &evlist->entries, node) {
169                 if (memcmp(&pos->attr, &pair->attr, sizeof(pos->attr) != 0))
170                         return false;
171                 pair = list_entry(pair->node.next, struct perf_evsel, node);
172         }
173
174         return true;
175 }
176
177 static void perf_record__open(struct perf_record *rec)
178 {
179         struct perf_evsel *pos, *first;
180         struct perf_evlist *evlist = rec->evlist;
181         struct perf_session *session = rec->session;
182         struct perf_record_opts *opts = &rec->opts;
183
184         first = list_entry(evlist->entries.next, struct perf_evsel, node);
185
186         perf_evlist__config_attrs(evlist, opts);
187
188         list_for_each_entry(pos, &evlist->entries, node) {
189                 struct perf_event_attr *attr = &pos->attr;
190                 struct xyarray *group_fd = NULL;
191                 /*
192                  * Check if parse_single_tracepoint_event has already asked for
193                  * PERF_SAMPLE_TIME.
194                  *
195                  * XXX this is kludgy but short term fix for problems introduced by
196                  * eac23d1c that broke 'perf script' by having different sample_types
197                  * when using multiple tracepoint events when we use a perf binary
198                  * that tries to use sample_id_all on an older kernel.
199                  *
200                  * We need to move counter creation to perf_session, support
201                  * different sample_types, etc.
202                  */
203                 bool time_needed = attr->sample_type & PERF_SAMPLE_TIME;
204
205                 if (opts->group && pos != first)
206                         group_fd = first->fd;
207 fallback_missing_features:
208                 if (opts->exclude_guest_missing)
209                         attr->exclude_guest = attr->exclude_host = 0;
210 retry_sample_id:
211                 attr->sample_id_all = opts->sample_id_all_avail ? 1 : 0;
212 try_again:
213                 if (perf_evsel__open(pos, evlist->cpus, evlist->threads,
214                                      opts->group, group_fd) < 0) {
215                         int err = errno;
216
217                         if (err == EPERM || err == EACCES) {
218                                 ui__error_paranoid();
219                                 exit(EXIT_FAILURE);
220                         } else if (err ==  ENODEV && opts->cpu_list) {
221                                 die("No such device - did you specify"
222                                         " an out-of-range profile CPU?\n");
223                         } else if (err == EINVAL) {
224                                 if (!opts->exclude_guest_missing &&
225                                     (attr->exclude_guest || attr->exclude_host)) {
226                                         pr_debug("Old kernel, cannot exclude "
227                                                  "guest or host samples.\n");
228                                         opts->exclude_guest_missing = true;
229                                         goto fallback_missing_features;
230                                 } else if (opts->sample_id_all_avail) {
231                                         /*
232                                          * Old kernel, no attr->sample_id_type_all field
233                                          */
234                                         opts->sample_id_all_avail = false;
235                                         if (!opts->sample_time && !opts->raw_samples && !time_needed)
236                                                 attr->sample_type &= ~PERF_SAMPLE_TIME;
237
238                                         goto retry_sample_id;
239                                 }
240                         }
241
242                         /*
243                          * If it's cycles then fall back to hrtimer
244                          * based cpu-clock-tick sw counter, which
245                          * is always available even if no PMU support:
246                          */
247                         if (attr->type == PERF_TYPE_HARDWARE
248                                         && attr->config == PERF_COUNT_HW_CPU_CYCLES) {
249
250                                 if (verbose)
251                                         ui__warning("The cycles event is not supported, "
252                                                     "trying to fall back to cpu-clock-ticks\n");
253                                 attr->type = PERF_TYPE_SOFTWARE;
254                                 attr->config = PERF_COUNT_SW_CPU_CLOCK;
255                                 goto try_again;
256                         }
257
258                         if (err == ENOENT) {
259                                 ui__warning("The %s event is not supported.\n",
260                                             event_name(pos));
261                                 exit(EXIT_FAILURE);
262                         }
263
264                         printf("\n");
265                         error("sys_perf_event_open() syscall returned with %d (%s).  /bin/dmesg may provide additional information.\n",
266                               err, strerror(err));
267
268 #if defined(__i386__) || defined(__x86_64__)
269                         if (attr->type == PERF_TYPE_HARDWARE && err == EOPNOTSUPP)
270                                 die("No hardware sampling interrupt available."
271                                     " No APIC? If so then you can boot the kernel"
272                                     " with the \"lapic\" boot parameter to"
273                                     " force-enable it.\n");
274 #endif
275
276                         die("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
277                 }
278         }
279
280         if (perf_evlist__set_filters(evlist)) {
281                 error("failed to set filter with %d (%s)\n", errno,
282                         strerror(errno));
283                 exit(-1);
284         }
285
286         if (perf_evlist__mmap(evlist, opts->mmap_pages, false) < 0) {
287                 if (errno == EPERM)
288                         die("Permission error mapping pages.\n"
289                             "Consider increasing "
290                             "/proc/sys/kernel/perf_event_mlock_kb,\n"
291                             "or try again with a smaller value of -m/--mmap_pages.\n"
292                             "(current value: %d)\n", opts->mmap_pages);
293                 else if (!is_power_of_2(opts->mmap_pages))
294                         die("--mmap_pages/-m value must be a power of two.");
295
296                 die("failed to mmap with %d (%s)\n", errno, strerror(errno));
297         }
298
299         if (rec->file_new)
300                 session->evlist = evlist;
301         else {
302                 if (!perf_evlist__equal(session->evlist, evlist)) {
303                         fprintf(stderr, "incompatible append\n");
304                         exit(-1);
305                 }
306         }
307
308         perf_session__update_sample_type(session);
309 }
310
311 static int process_buildids(struct perf_record *rec)
312 {
313         u64 size = lseek(rec->output, 0, SEEK_CUR);
314
315         if (size == 0)
316                 return 0;
317
318         rec->session->fd = rec->output;
319         return __perf_session__process_events(rec->session, rec->post_processing_offset,
320                                               size - rec->post_processing_offset,
321                                               size, &build_id__mark_dso_hit_ops);
322 }
323
324 static void perf_record__exit(int status __used, void *arg)
325 {
326         struct perf_record *rec = arg;
327
328         if (!rec->opts.pipe_output) {
329                 rec->session->header.data_size += rec->bytes_written;
330
331                 if (!rec->no_buildid)
332                         process_buildids(rec);
333                 perf_session__write_header(rec->session, rec->evlist,
334                                            rec->output, true);
335                 perf_session__delete(rec->session);
336                 perf_evlist__delete(rec->evlist);
337                 symbol__exit();
338         }
339 }
340
341 static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
342 {
343         int err;
344         struct perf_tool *tool = data;
345
346         if (machine__is_host(machine))
347                 return;
348
349         /*
350          *As for guest kernel when processing subcommand record&report,
351          *we arrange module mmap prior to guest kernel mmap and trigger
352          *a preload dso because default guest module symbols are loaded
353          *from guest kallsyms instead of /lib/modules/XXX/XXX. This
354          *method is used to avoid symbol missing when the first addr is
355          *in module instead of in guest kernel.
356          */
357         err = perf_event__synthesize_modules(tool, process_synthesized_event,
358                                              machine);
359         if (err < 0)
360                 pr_err("Couldn't record guest kernel [%d]'s reference"
361                        " relocation symbol.\n", machine->pid);
362
363         /*
364          * We use _stext for guest kernel because guest kernel's /proc/kallsyms
365          * have no _text sometimes.
366          */
367         err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
368                                                  machine, "_text");
369         if (err < 0)
370                 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
371                                                          machine, "_stext");
372         if (err < 0)
373                 pr_err("Couldn't record guest kernel [%d]'s reference"
374                        " relocation symbol.\n", machine->pid);
375 }
376
377 static struct perf_event_header finished_round_event = {
378         .size = sizeof(struct perf_event_header),
379         .type = PERF_RECORD_FINISHED_ROUND,
380 };
381
382 static void perf_record__mmap_read_all(struct perf_record *rec)
383 {
384         int i;
385
386         for (i = 0; i < rec->evlist->nr_mmaps; i++) {
387                 if (rec->evlist->mmap[i].base)
388                         perf_record__mmap_read(rec, &rec->evlist->mmap[i]);
389         }
390
391         if (perf_header__has_feat(&rec->session->header, HEADER_TRACE_INFO))
392                 write_output(rec, &finished_round_event, sizeof(finished_round_event));
393 }
394
395 static int __cmd_record(struct perf_record *rec, int argc, const char **argv)
396 {
397         struct stat st;
398         int flags;
399         int err, output;
400         unsigned long waking = 0;
401         const bool forks = argc > 0;
402         struct machine *machine;
403         struct perf_tool *tool = &rec->tool;
404         struct perf_record_opts *opts = &rec->opts;
405         struct perf_evlist *evsel_list = rec->evlist;
406         const char *output_name = rec->output_name;
407         struct perf_session *session;
408
409         rec->progname = argv[0];
410
411         rec->page_size = sysconf(_SC_PAGE_SIZE);
412
413         on_exit(perf_record__sig_exit, rec);
414         signal(SIGCHLD, sig_handler);
415         signal(SIGINT, sig_handler);
416         signal(SIGUSR1, sig_handler);
417
418         if (!output_name) {
419                 if (!fstat(STDOUT_FILENO, &st) && S_ISFIFO(st.st_mode))
420                         opts->pipe_output = true;
421                 else
422                         rec->output_name = output_name = "perf.data";
423         }
424         if (output_name) {
425                 if (!strcmp(output_name, "-"))
426                         opts->pipe_output = true;
427                 else if (!stat(output_name, &st) && st.st_size) {
428                         if (rec->write_mode == WRITE_FORCE) {
429                                 char oldname[PATH_MAX];
430                                 snprintf(oldname, sizeof(oldname), "%s.old",
431                                          output_name);
432                                 unlink(oldname);
433                                 rename(output_name, oldname);
434                         }
435                 } else if (rec->write_mode == WRITE_APPEND) {
436                         rec->write_mode = WRITE_FORCE;
437                 }
438         }
439
440         flags = O_CREAT|O_RDWR;
441         if (rec->write_mode == WRITE_APPEND)
442                 rec->file_new = 0;
443         else
444                 flags |= O_TRUNC;
445
446         if (opts->pipe_output)
447                 output = STDOUT_FILENO;
448         else
449                 output = open(output_name, flags, S_IRUSR | S_IWUSR);
450         if (output < 0) {
451                 perror("failed to create output file");
452                 exit(-1);
453         }
454
455         rec->output = output;
456
457         session = perf_session__new(output_name, O_WRONLY,
458                                     rec->write_mode == WRITE_FORCE, false, NULL);
459         if (session == NULL) {
460                 pr_err("Not enough memory for reading perf file header\n");
461                 return -1;
462         }
463
464         rec->session = session;
465
466         if (!rec->no_buildid)
467                 perf_header__set_feat(&session->header, HEADER_BUILD_ID);
468
469         if (!rec->file_new) {
470                 err = perf_session__read_header(session, output);
471                 if (err < 0)
472                         goto out_delete_session;
473         }
474
475         if (have_tracepoints(&evsel_list->entries))
476                 perf_header__set_feat(&session->header, HEADER_TRACE_INFO);
477
478         perf_header__set_feat(&session->header, HEADER_HOSTNAME);
479         perf_header__set_feat(&session->header, HEADER_OSRELEASE);
480         perf_header__set_feat(&session->header, HEADER_ARCH);
481         perf_header__set_feat(&session->header, HEADER_CPUDESC);
482         perf_header__set_feat(&session->header, HEADER_NRCPUS);
483         perf_header__set_feat(&session->header, HEADER_EVENT_DESC);
484         perf_header__set_feat(&session->header, HEADER_CMDLINE);
485         perf_header__set_feat(&session->header, HEADER_VERSION);
486         perf_header__set_feat(&session->header, HEADER_CPU_TOPOLOGY);
487         perf_header__set_feat(&session->header, HEADER_TOTAL_MEM);
488         perf_header__set_feat(&session->header, HEADER_NUMA_TOPOLOGY);
489         perf_header__set_feat(&session->header, HEADER_CPUID);
490
491         if (forks) {
492                 err = perf_evlist__prepare_workload(evsel_list, opts, argv);
493                 if (err < 0) {
494                         pr_err("Couldn't run the workload!\n");
495                         goto out_delete_session;
496                 }
497         }
498
499         perf_record__open(rec);
500
501         /*
502          * perf_session__delete(session) will be called at perf_record__exit()
503          */
504         on_exit(perf_record__exit, rec);
505
506         if (opts->pipe_output) {
507                 err = perf_header__write_pipe(output);
508                 if (err < 0)
509                         return err;
510         } else if (rec->file_new) {
511                 err = perf_session__write_header(session, evsel_list,
512                                                  output, false);
513                 if (err < 0)
514                         return err;
515         }
516
517         if (!rec->no_buildid
518             && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
519                 pr_err("Couldn't generate buildids. "
520                        "Use --no-buildid to profile anyway.\n");
521                 return -1;
522         }
523
524         rec->post_processing_offset = lseek(output, 0, SEEK_CUR);
525
526         machine = perf_session__find_host_machine(session);
527         if (!machine) {
528                 pr_err("Couldn't find native kernel information.\n");
529                 return -1;
530         }
531
532         if (opts->pipe_output) {
533                 err = perf_event__synthesize_attrs(tool, session,
534                                                    process_synthesized_event);
535                 if (err < 0) {
536                         pr_err("Couldn't synthesize attrs.\n");
537                         return err;
538                 }
539
540                 err = perf_event__synthesize_event_types(tool, process_synthesized_event,
541                                                          machine);
542                 if (err < 0) {
543                         pr_err("Couldn't synthesize event_types.\n");
544                         return err;
545                 }
546
547                 if (have_tracepoints(&evsel_list->entries)) {
548                         /*
549                          * FIXME err <= 0 here actually means that
550                          * there were no tracepoints so its not really
551                          * an error, just that we don't need to
552                          * synthesize anything.  We really have to
553                          * return this more properly and also
554                          * propagate errors that now are calling die()
555                          */
556                         err = perf_event__synthesize_tracing_data(tool, output, evsel_list,
557                                                                   process_synthesized_event);
558                         if (err <= 0) {
559                                 pr_err("Couldn't record tracing data.\n");
560                                 return err;
561                         }
562                         advance_output(rec, err);
563                 }
564         }
565
566         err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
567                                                  machine, "_text");
568         if (err < 0)
569                 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
570                                                          machine, "_stext");
571         if (err < 0)
572                 pr_err("Couldn't record kernel reference relocation symbol\n"
573                        "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
574                        "Check /proc/kallsyms permission or run as root.\n");
575
576         err = perf_event__synthesize_modules(tool, process_synthesized_event,
577                                              machine);
578         if (err < 0)
579                 pr_err("Couldn't record kernel module information.\n"
580                        "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
581                        "Check /proc/modules permission or run as root.\n");
582
583         if (perf_guest)
584                 perf_session__process_machines(session, tool,
585                                                perf_event__synthesize_guest_os);
586
587         if (!opts->system_wide)
588                 perf_event__synthesize_thread_map(tool, evsel_list->threads,
589                                                   process_synthesized_event,
590                                                   machine);
591         else
592                 perf_event__synthesize_threads(tool, process_synthesized_event,
593                                                machine);
594
595         if (rec->realtime_prio) {
596                 struct sched_param param;
597
598                 param.sched_priority = rec->realtime_prio;
599                 if (sched_setscheduler(0, SCHED_FIFO, &param)) {
600                         pr_err("Could not set realtime priority.\n");
601                         exit(-1);
602                 }
603         }
604
605         perf_evlist__enable(evsel_list);
606
607         /*
608          * Let the child rip
609          */
610         if (forks)
611                 perf_evlist__start_workload(evsel_list);
612
613         for (;;) {
614                 int hits = rec->samples;
615
616                 perf_record__mmap_read_all(rec);
617
618                 if (hits == rec->samples) {
619                         if (done)
620                                 break;
621                         err = poll(evsel_list->pollfd, evsel_list->nr_fds, -1);
622                         waking++;
623                 }
624
625                 if (done)
626                         perf_evlist__disable(evsel_list);
627         }
628
629         if (quiet || signr == SIGUSR1)
630                 return 0;
631
632         fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
633
634         /*
635          * Approximate RIP event size: 24 bytes.
636          */
637         fprintf(stderr,
638                 "[ perf record: Captured and wrote %.3f MB %s (~%" PRIu64 " samples) ]\n",
639                 (double)rec->bytes_written / 1024.0 / 1024.0,
640                 output_name,
641                 rec->bytes_written / 24);
642
643         return 0;
644
645 out_delete_session:
646         perf_session__delete(session);
647         return err;
648 }
649
650 static const char * const record_usage[] = {
651         "perf record [<options>] [<command>]",
652         "perf record [<options>] -- <command> [<options>]",
653         NULL
654 };
655
656 /*
657  * XXX Ideally would be local to cmd_record() and passed to a perf_record__new
658  * because we need to have access to it in perf_record__exit, that is called
659  * after cmd_record() exits, but since record_options need to be accessible to
660  * builtin-script, leave it here.
661  *
662  * At least we don't ouch it in all the other functions here directly.
663  *
664  * Just say no to tons of global variables, sigh.
665  */
666 static struct perf_record record = {
667         .opts = {
668                 .target_pid          = -1,
669                 .target_tid          = -1,
670                 .mmap_pages          = UINT_MAX,
671                 .user_freq           = UINT_MAX,
672                 .user_interval       = ULLONG_MAX,
673                 .freq                = 1000,
674                 .sample_id_all_avail = true,
675         },
676         .write_mode = WRITE_FORCE,
677         .file_new   = true,
678 };
679
680 /*
681  * XXX Will stay a global variable till we fix builtin-script.c to stop messing
682  * with it and switch to use the library functions in perf_evlist that came
683  * from builtin-record.c, i.e. use perf_record_opts,
684  * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
685  * using pipes, etc.
686  */
687 const struct option record_options[] = {
688         OPT_CALLBACK('e', "event", &record.evlist, "event",
689                      "event selector. use 'perf list' to list available events",
690                      parse_events_option),
691         OPT_CALLBACK(0, "filter", &record.evlist, "filter",
692                      "event filter", parse_filter),
693         OPT_INTEGER('p', "pid", &record.opts.target_pid,
694                     "record events on existing process id"),
695         OPT_INTEGER('t', "tid", &record.opts.target_tid,
696                     "record events on existing thread id"),
697         OPT_INTEGER('r', "realtime", &record.realtime_prio,
698                     "collect data with this RT SCHED_FIFO priority"),
699         OPT_BOOLEAN('D', "no-delay", &record.opts.no_delay,
700                     "collect data without buffering"),
701         OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
702                     "collect raw sample records from all opened counters"),
703         OPT_BOOLEAN('a', "all-cpus", &record.opts.system_wide,
704                             "system-wide collection from all CPUs"),
705         OPT_BOOLEAN('A', "append", &record.append_file,
706                             "append to the output file to do incremental profiling"),
707         OPT_STRING('C', "cpu", &record.opts.cpu_list, "cpu",
708                     "list of cpus to monitor"),
709         OPT_BOOLEAN('f', "force", &record.force,
710                         "overwrite existing data file (deprecated)"),
711         OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
712         OPT_STRING('o', "output", &record.output_name, "file",
713                     "output file name"),
714         OPT_BOOLEAN('i', "no-inherit", &record.opts.no_inherit,
715                     "child tasks do not inherit counters"),
716         OPT_UINTEGER('F', "freq", &record.opts.user_freq, "profile at this frequency"),
717         OPT_UINTEGER('m', "mmap-pages", &record.opts.mmap_pages,
718                      "number of mmap data pages"),
719         OPT_BOOLEAN(0, "group", &record.opts.group,
720                     "put the counters into a counter group"),
721         OPT_BOOLEAN('g', "call-graph", &record.opts.call_graph,
722                     "do call-graph (stack chain/backtrace) recording"),
723         OPT_INCR('v', "verbose", &verbose,
724                     "be more verbose (show counter open errors, etc)"),
725         OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
726         OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
727                     "per thread counts"),
728         OPT_BOOLEAN('d', "data", &record.opts.sample_address,
729                     "Sample addresses"),
730         OPT_BOOLEAN('T', "timestamp", &record.opts.sample_time, "Sample timestamps"),
731         OPT_BOOLEAN('P', "period", &record.opts.period, "Sample period"),
732         OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
733                     "don't sample"),
734         OPT_BOOLEAN('N', "no-buildid-cache", &record.no_buildid_cache,
735                     "do not update the buildid cache"),
736         OPT_BOOLEAN('B', "no-buildid", &record.no_buildid,
737                     "do not collect buildids in perf.data"),
738         OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
739                      "monitor event in cgroup name only",
740                      parse_cgroups),
741         OPT_END()
742 };
743
744 int cmd_record(int argc, const char **argv, const char *prefix __used)
745 {
746         int err = -ENOMEM;
747         struct perf_evsel *pos;
748         struct perf_evlist *evsel_list;
749         struct perf_record *rec = &record;
750
751         perf_header__set_cmdline(argc, argv);
752
753         evsel_list = perf_evlist__new(NULL, NULL);
754         if (evsel_list == NULL)
755                 return -ENOMEM;
756
757         rec->evlist = evsel_list;
758
759         argc = parse_options(argc, argv, record_options, record_usage,
760                             PARSE_OPT_STOP_AT_NON_OPTION);
761         if (!argc && rec->opts.target_pid == -1 && rec->opts.target_tid == -1 &&
762                 !rec->opts.system_wide && !rec->opts.cpu_list)
763                 usage_with_options(record_usage, record_options);
764
765         if (rec->force && rec->append_file) {
766                 fprintf(stderr, "Can't overwrite and append at the same time."
767                                 " You need to choose between -f and -A");
768                 usage_with_options(record_usage, record_options);
769         } else if (rec->append_file) {
770                 rec->write_mode = WRITE_APPEND;
771         } else {
772                 rec->write_mode = WRITE_FORCE;
773         }
774
775         if (nr_cgroups && !rec->opts.system_wide) {
776                 fprintf(stderr, "cgroup monitoring only available in"
777                         " system-wide mode\n");
778                 usage_with_options(record_usage, record_options);
779         }
780
781         symbol__init();
782
783         if (symbol_conf.kptr_restrict)
784                 pr_warning(
785 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
786 "check /proc/sys/kernel/kptr_restrict.\n\n"
787 "Samples in kernel functions may not be resolved if a suitable vmlinux\n"
788 "file is not found in the buildid cache or in the vmlinux path.\n\n"
789 "Samples in kernel modules won't be resolved at all.\n\n"
790 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
791 "even with a suitable vmlinux or kallsyms file.\n\n");
792
793         if (rec->no_buildid_cache || rec->no_buildid)
794                 disable_buildid_cache();
795
796         if (evsel_list->nr_entries == 0 &&
797             perf_evlist__add_default(evsel_list) < 0) {
798                 pr_err("Not enough memory for event selector list\n");
799                 goto out_symbol_exit;
800         }
801
802         if (rec->opts.target_pid != -1)
803                 rec->opts.target_tid = rec->opts.target_pid;
804
805         if (perf_evlist__create_maps(evsel_list, rec->opts.target_pid,
806                                      rec->opts.target_tid, rec->opts.cpu_list) < 0)
807                 usage_with_options(record_usage, record_options);
808
809         list_for_each_entry(pos, &evsel_list->entries, node) {
810                 if (perf_header__push_event(pos->attr.config, event_name(pos)))
811                         goto out_free_fd;
812         }
813
814         if (rec->opts.user_interval != ULLONG_MAX)
815                 rec->opts.default_interval = rec->opts.user_interval;
816         if (rec->opts.user_freq != UINT_MAX)
817                 rec->opts.freq = rec->opts.user_freq;
818
819         /*
820          * User specified count overrides default frequency.
821          */
822         if (rec->opts.default_interval)
823                 rec->opts.freq = 0;
824         else if (rec->opts.freq) {
825                 rec->opts.default_interval = rec->opts.freq;
826         } else {
827                 fprintf(stderr, "frequency and count are zero, aborting\n");
828                 err = -EINVAL;
829                 goto out_free_fd;
830         }
831
832         err = __cmd_record(&record, argc, argv);
833 out_free_fd:
834         perf_evlist__delete_maps(evsel_list);
835 out_symbol_exit:
836         symbol__exit();
837         return err;
838 }