perf evsel: Steal the counter reading routines from stat
authorArnaldo Carvalho de Melo <acme@redhat.com>
Mon, 3 Jan 2011 19:45:52 +0000 (17:45 -0200)
committerArnaldo Carvalho de Melo <acme@redhat.com>
Tue, 4 Jan 2011 02:22:55 +0000 (00:22 -0200)
Making them hopefully generic enough to be used in 'perf test',
well see.

Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Tom Zanussi <tzanussi@gmail.com>
LKML-Reference: <new-submission>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

tools/perf/builtin-stat.c
tools/perf/util/evsel.c
tools/perf/util/evsel.h

index 589ba3a..a8b00b4 100644 (file)
@@ -93,12 +93,6 @@ static const char            *cpu_list;
 static const char              *csv_sep                        = NULL;
 static bool                    csv_output                      = false;
 
-struct cpu_counts {
-       u64 val;
-       u64 ena;
-       u64 run;
-};
-
 static volatile int done = 0;
 
 struct stats
@@ -108,15 +102,11 @@ struct stats
 
 struct perf_stat {
        struct stats      res_stats[3];
-       int               scaled;
-       struct cpu_counts cpu_counts[];
 };
 
-static int perf_evsel__alloc_stat_priv(struct perf_evsel *evsel, int ncpus)
+static int perf_evsel__alloc_stat_priv(struct perf_evsel *evsel)
 {
-       size_t priv_size = (sizeof(struct perf_stat) +
-                           (ncpus * sizeof(struct cpu_counts)));
-       evsel->priv = zalloc(priv_size);
+       evsel->priv = zalloc(sizeof(struct perf_stat));
        return evsel->priv == NULL ? -ENOMEM : 0;
 }
 
@@ -238,52 +228,14 @@ static inline int nsec_counter(struct perf_evsel *evsel)
  * Read out the results of a single counter:
  * aggregate counts across CPUs in system-wide mode
  */
-static void read_counter_aggr(struct perf_evsel *counter)
+static int read_counter_aggr(struct perf_evsel *counter)
 {
        struct perf_stat *ps = counter->priv;
-       u64 count[3], single_count[3];
-       int cpu;
-       size_t res, nv;
-       int scaled;
-       int i, thread;
-
-       count[0] = count[1] = count[2] = 0;
-
-       nv = scale ? 3 : 1;
-       for (cpu = 0; cpu < nr_cpus; cpu++) {
-               for (thread = 0; thread < thread_num; thread++) {
-                       if (FD(counter, cpu, thread) < 0)
-                               continue;
-
-                       res = read(FD(counter, cpu, thread),
-                                       single_count, nv * sizeof(u64));
-                       assert(res == nv * sizeof(u64));
-
-                       close(FD(counter, cpu, thread));
-                       FD(counter, cpu, thread) = -1;
-
-                       count[0] += single_count[0];
-                       if (scale) {
-                               count[1] += single_count[1];
-                               count[2] += single_count[2];
-                       }
-               }
-       }
-
-       scaled = 0;
-       if (scale) {
-               if (count[2] == 0) {
-                       ps->scaled = -1;
-                       count[0] = 0;
-                       return;
-               }
+       u64 *count = counter->counts->aggr.values;
+       int i;
 
-               if (count[2] < count[1]) {
-                       ps->scaled = 1;
-                       count[0] = (unsigned long long)
-                               ((double)count[0] * count[1] / count[2] + 0.5);
-               }
-       }
+       if (__perf_evsel__read(counter, nr_cpus, thread_num, scale) < 0)
+               return -1;
 
        for (i = 0; i < 3; i++)
                update_stats(&ps->res_stats[i], count[i]);
@@ -302,46 +254,24 @@ static void read_counter_aggr(struct perf_evsel *counter)
                update_stats(&runtime_cycles_stats[0], count[0]);
        if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS))
                update_stats(&runtime_branches_stats[0], count[0]);
+
+       return 0;
 }
 
 /*
  * Read out the results of a single counter:
  * do not aggregate counts across CPUs in system-wide mode
  */
-static void read_counter(struct perf_evsel *counter)
+static int read_counter(struct perf_evsel *counter)
 {
-       struct cpu_counts *cpu_counts = counter->priv;
-       u64 count[3];
+       u64 *count;
        int cpu;
-       size_t res, nv;
-
-       count[0] = count[1] = count[2] = 0;
-
-       nv = scale ? 3 : 1;
 
        for (cpu = 0; cpu < nr_cpus; cpu++) {
+               if (__perf_evsel__read_on_cpu(counter, cpu, 0, scale) < 0)
+                       return -1;
 
-               if (FD(counter, cpu, 0) < 0)
-                       continue;
-
-               res = read(FD(counter, cpu, 0), count, nv * sizeof(u64));
-
-               assert(res == nv * sizeof(u64));
-
-               close(FD(counter, cpu, 0));
-               FD(counter, cpu, 0) = -1;
-
-               if (scale) {
-                       if (count[2] == 0) {
-                               count[0] = 0;
-                       } else if (count[2] < count[1]) {
-                               count[0] = (unsigned long long)
-                               ((double)count[0] * count[1] / count[2] + 0.5);
-                       }
-               }
-               cpu_counts[cpu].val = count[0]; /* scaled count */
-               cpu_counts[cpu].ena = count[1];
-               cpu_counts[cpu].run = count[2];
+               count = counter->counts->cpu[cpu].values;
 
                if (perf_evsel__match(counter, SOFTWARE, SW_TASK_CLOCK))
                        update_stats(&runtime_nsecs_stats[cpu], count[0]);
@@ -350,6 +280,8 @@ static void read_counter(struct perf_evsel *counter)
                if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS))
                        update_stats(&runtime_branches_stats[cpu], count[0]);
        }
+
+       return 0;
 }
 
 static int run_perf_stat(int argc __used, const char **argv)
@@ -449,12 +381,17 @@ static int run_perf_stat(int argc __used, const char **argv)
        update_stats(&walltime_nsecs_stats, t1 - t0);
 
        if (no_aggr) {
-               list_for_each_entry(counter, &evsel_list, node)
+               list_for_each_entry(counter, &evsel_list, node) {
                        read_counter(counter);
+                       perf_evsel__close_fd(counter, nr_cpus, 1);
+               }
        } else {
-               list_for_each_entry(counter, &evsel_list, node)
+               list_for_each_entry(counter, &evsel_list, node) {
                        read_counter_aggr(counter);
+                       perf_evsel__close_fd(counter, nr_cpus, thread_num);
+               }
        }
+
        return WEXITSTATUS(status);
 }
 
@@ -550,7 +487,7 @@ static void print_counter_aggr(struct perf_evsel *counter)
 {
        struct perf_stat *ps = counter->priv;
        double avg = avg_stats(&ps->res_stats[0]);
-       int scaled = ps->scaled;
+       int scaled = counter->counts->scaled;
 
        if (scaled == -1) {
                fprintf(stderr, "%*s%s%-24s\n",
@@ -590,14 +527,13 @@ static void print_counter_aggr(struct perf_evsel *counter)
  */
 static void print_counter(struct perf_evsel *counter)
 {
-       struct perf_stat *ps = counter->priv;
        u64 ena, run, val;
        int cpu;
 
        for (cpu = 0; cpu < nr_cpus; cpu++) {
-               val = ps->cpu_counts[cpu].val;
-               ena = ps->cpu_counts[cpu].ena;
-               run = ps->cpu_counts[cpu].run;
+               val = counter->counts->cpu[cpu].val;
+               ena = counter->counts->cpu[cpu].ena;
+               run = counter->counts->cpu[cpu].run;
                if (run == 0 || ena == 0) {
                        fprintf(stderr, "CPU%*d%s%*s%s%-24s",
                                csv_output ? 0 : -4,
@@ -818,7 +754,8 @@ int cmd_stat(int argc, const char **argv, const char *prefix __used)
        }
 
        list_for_each_entry(pos, &evsel_list, node) {
-               if (perf_evsel__alloc_stat_priv(pos, nr_cpus) < 0 ||
+               if (perf_evsel__alloc_stat_priv(pos) < 0 ||
+                   perf_evsel__alloc_counts(pos, nr_cpus) < 0 ||
                    perf_evsel__alloc_fd(pos, nr_cpus, thread_num) < 0)
                        goto out_free_fd;
        }
index 6539ec9..3f5de51 100644 (file)
@@ -1,6 +1,8 @@
 #include "evsel.h"
 #include "util.h"
 
+#define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y))
+
 struct perf_evsel *perf_evsel__new(u32 type, u64 config, int idx)
 {
        struct perf_evsel *evsel = zalloc(sizeof(*evsel));
@@ -21,15 +23,101 @@ int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads)
        return evsel->fd != NULL ? 0 : -ENOMEM;
 }
 
+int perf_evsel__alloc_counts(struct perf_evsel *evsel, int ncpus)
+{
+       evsel->counts = zalloc((sizeof(*evsel->counts) +
+                               (ncpus * sizeof(struct perf_counts_values))));
+       return evsel->counts != NULL ? 0 : -ENOMEM;
+}
+
 void perf_evsel__free_fd(struct perf_evsel *evsel)
 {
        xyarray__delete(evsel->fd);
        evsel->fd = NULL;
 }
 
+void perf_evsel__close_fd(struct perf_evsel *evsel, int ncpus, int nthreads)
+{
+       int cpu, thread;
+
+       for (cpu = 0; cpu < ncpus; cpu++)
+               for (thread = 0; thread < nthreads; ++thread) {
+                       close(FD(evsel, cpu, thread));
+                       FD(evsel, cpu, thread) = -1;
+               }
+}
+
 void perf_evsel__delete(struct perf_evsel *evsel)
 {
        assert(list_empty(&evsel->node));
        xyarray__delete(evsel->fd);
        free(evsel);
 }
+
+int __perf_evsel__read_on_cpu(struct perf_evsel *evsel,
+                             int cpu, int thread, bool scale)
+{
+       struct perf_counts_values count;
+       size_t nv = scale ? 3 : 1;
+
+       if (FD(evsel, cpu, thread) < 0)
+               return -EINVAL;
+
+       if (readn(FD(evsel, cpu, thread), &count, nv * sizeof(u64)) < 0)
+               return -errno;
+
+       if (scale) {
+               if (count.run == 0)
+                       count.val = 0;
+               else if (count.run < count.ena)
+                       count.val = (u64)((double)count.val * count.ena / count.run + 0.5);
+       } else
+               count.ena = count.run = 0;
+
+       evsel->counts->cpu[cpu] = count;
+       return 0;
+}
+
+int __perf_evsel__read(struct perf_evsel *evsel,
+                      int ncpus, int nthreads, bool scale)
+{
+       size_t nv = scale ? 3 : 1;
+       int cpu, thread;
+       struct perf_counts_values *aggr = &evsel->counts->aggr, count;
+
+       aggr->val = 0;
+
+       for (cpu = 0; cpu < ncpus; cpu++) {
+               for (thread = 0; thread < nthreads; thread++) {
+                       if (FD(evsel, cpu, thread) < 0)
+                               continue;
+
+                       if (readn(FD(evsel, cpu, thread),
+                                 &count, nv * sizeof(u64)) < 0)
+                               return -errno;
+
+                       aggr->val += count.val;
+                       if (scale) {
+                               aggr->ena += count.ena;
+                               aggr->run += count.run;
+                       }
+               }
+       }
+
+       evsel->counts->scaled = 0;
+       if (scale) {
+               if (aggr->run == 0) {
+                       evsel->counts->scaled = -1;
+                       aggr->val = 0;
+                       return 0;
+               }
+
+               if (aggr->run < aggr->ena) {
+                       evsel->counts->scaled = 1;
+                       aggr->val = (u64)((double)aggr->val * aggr->ena / aggr->run + 0.5);
+               }
+       } else
+               aggr->ena = aggr->run = 0;
+
+       return 0;
+}
index 8a5cfb6..8b48ef1 100644 (file)
@@ -2,15 +2,34 @@
 #define __PERF_EVSEL_H 1
 
 #include <linux/list.h>
+#include <stdbool.h>
 #include <linux/perf_event.h>
 #include "types.h"
 #include "xyarray.h"
+struct perf_counts_values {
+       union {
+               struct {
+                       u64 val;
+                       u64 ena;
+                       u64 run;
+               };
+               u64 values[3];
+       };
+};
+
+struct perf_counts {
+       s8                        scaled;
+       struct perf_counts_values aggr;
+       struct perf_counts_values cpu[];
+};
 
 struct perf_evsel {
        struct list_head        node;
        struct perf_event_attr  attr;
        char                    *filter;
        struct xyarray          *fd;
+       struct perf_counts      *counts;
        int                     idx;
        void                    *priv;
 };
@@ -19,10 +38,70 @@ struct perf_evsel *perf_evsel__new(u32 type, u64 config, int idx);
 void perf_evsel__delete(struct perf_evsel *evsel);
 
 int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads);
+int perf_evsel__alloc_counts(struct perf_evsel *evsel, int ncpus);
 void perf_evsel__free_fd(struct perf_evsel *evsel);
+void perf_evsel__close_fd(struct perf_evsel *evsel, int ncpus, int nthreads);
 
 #define perf_evsel__match(evsel, t, c)         \
        (evsel->attr.type == PERF_TYPE_##t &&   \
         evsel->attr.config == PERF_COUNT_##c)
 
+int __perf_evsel__read_on_cpu(struct perf_evsel *evsel,
+                             int cpu, int thread, bool scale);
+
+/**
+ * perf_evsel__read_on_cpu - Read out the results on a CPU and thread
+ *
+ * @evsel - event selector to read value
+ * @cpu - CPU of interest
+ * @thread - thread of interest
+ */
+static inline int perf_evsel__read_on_cpu(struct perf_evsel *evsel,
+                                         int cpu, int thread)
+{
+       return __perf_evsel__read_on_cpu(evsel, cpu, thread, false);
+}
+
+/**
+ * perf_evsel__read_on_cpu_scaled - Read out the results on a CPU and thread, scaled
+ *
+ * @evsel - event selector to read value
+ * @cpu - CPU of interest
+ * @thread - thread of interest
+ */
+static inline int perf_evsel__read_on_cpu_scaled(struct perf_evsel *evsel,
+                                                int cpu, int thread)
+{
+       return __perf_evsel__read_on_cpu(evsel, cpu, thread, true);
+}
+
+int __perf_evsel__read(struct perf_evsel *evsel, int ncpus, int nthreads,
+                      bool scale);
+
+/**
+ * perf_evsel__read - Read the aggregate results on all CPUs
+ *
+ * @evsel - event selector to read value
+ * @ncpus - Number of cpus affected, from zero
+ * @nthreads - Number of threads affected, from zero
+ */
+static inline int perf_evsel__read(struct perf_evsel *evsel,
+                                   int ncpus, int nthreads)
+{
+       return __perf_evsel__read(evsel, ncpus, nthreads, false);
+}
+
+/**
+ * perf_evsel__read_scaled - Read the aggregate results on all CPUs, scaled
+ *
+ * @evsel - event selector to read value
+ * @ncpus - Number of cpus affected, from zero
+ * @nthreads - Number of threads affected, from zero
+ */
+static inline int perf_evsel__read_scaled(struct perf_evsel *evsel,
+                                         int ncpus, int nthreads)
+{
+       return __perf_evsel__read(evsel, ncpus, nthreads, true);
+}
+
 #endif /* __PERF_EVSEL_H */