This adds some tracepoint instrumentation taken from the LTTng patch
authorJan Blunck <jblunck@suse.de>
Wed, 24 Sep 2008 14:56:12 +0000 (14:56 +0000)
committerJan Blunck <jblunck@suse.de>
Wed, 24 Sep 2008 14:56:12 +0000 (14:56 +0000)
series. Tracepoints are enabled for kernel-debug and kernel-trace only. I
disabled ftrace for all flavors except kernel-debug and kernel-trace as well.

- Update config files.
- rpm/kernel-source.spec.in,kernel-binary.spec.in, config.conf: Add trace
  flavor
- patches.fixes/ia64-sparse-fixes.diff: ia64-kvm: fix sparse
  warnings.
- patches.xen/xen3-fixup-common: rediff
- patches.xen/xen3-auto-common.diff: rediff
- patches.xen/xen3-patch-2.6.21: rediff
- patches.xen/xen3-patch-2.6.26: rediff
- patches.trace/rcu-read-sched.patch
- patches.trace/markers-use-rcu-read-lock-sched.patch
- patches.trace/tracepoints.patch
- patches.trace/tracepoints-use-table-size-macro.patch
- patches.trace/tracepoints-documentation.patch
- patches.trace/tracepoints-tracepoint-synchronize-unregister.patch
- patches.trace/tracepoints-documentation-fix-teardown.patch
- patches.trace/tracepoints-samples.patch
- patches.trace/tracepoints-samples-fix-teardown.patch
- patches.trace/lttng-instrumentation-irq.patch
- patches.trace/lttng-instrumentation-scheduler.patch
- patches.trace/lttng-instrumentation-timer.patch
- patches.trace/lttng-instrumentation-kernel.patch
- patches.trace/lttng-instrumentation-filemap.patch
- patches.trace/lttng-instrumentation-swap.patch
- patches.trace/lttng-instrumentation-memory.patch
- patches.trace/lttng-instrumentation-page_alloc.patch
- patches.trace/lttng-instrumentation-hugetlb.patch
- patches.trace/lttng-instrumentation-net.patch
- patches.trace/lttng-instrumentation-ipv4.patch
- patches.trace/lttng-instrumentation-ipv6.patch
- patches.trace/lttng-instrumentation-socket.patch
- patches.trace/lttng-instrumentation-fs.patch
- patches.trace/lttng-instrumentation-ipc.patch
- patches.trace/ftrace-port-to-tracepoints.patch
- patches.trace/ftrace-framepointer.diff

suse-commit: d20c2b23c8747f8a2549715ad3a902e5d068704a

64 files changed:
Documentation/tracepoints.txt [new file with mode: 0644]
fs/buffer.c
fs/compat.c
fs/exec.c
fs/ioctl.c
fs/open.c
fs/read_write.c
fs/select.c
include/asm-generic/vmlinux.lds.h
include/linux/module.h
include/linux/rcupdate.h
include/linux/tracepoint.h [new file with mode: 0644]
include/trace/filemap.h [new file with mode: 0644]
include/trace/fs.h [new file with mode: 0644]
include/trace/hugetlb.h [new file with mode: 0644]
include/trace/ipc.h [new file with mode: 0644]
include/trace/ipv4.h [new file with mode: 0644]
include/trace/ipv6.h [new file with mode: 0644]
include/trace/irq.h [new file with mode: 0644]
include/trace/kernel.h [new file with mode: 0644]
include/trace/memory.h [new file with mode: 0644]
include/trace/net.h [new file with mode: 0644]
include/trace/page_alloc.h [new file with mode: 0644]
include/trace/sched.h [new file with mode: 0644]
include/trace/socket.h [new file with mode: 0644]
include/trace/swap.h [new file with mode: 0644]
include/trace/timer.h [new file with mode: 0644]
init/Kconfig
ipc/msg.c
ipc/sem.c
ipc/shm.c
kernel/Makefile
kernel/exit.c
kernel/fork.c
kernel/irq/handle.c
kernel/itimer.c
kernel/kthread.c
kernel/marker.c
kernel/module.c
kernel/sched.c
kernel/signal.c
kernel/softirq.c
kernel/timer.c
kernel/trace/Kconfig
kernel/trace/trace_sched_switch.c
kernel/trace/trace_sched_wakeup.c
kernel/tracepoint.c [new file with mode: 0644]
mm/filemap.c
mm/hugetlb.c
mm/memory.c
mm/page_alloc.c
mm/page_io.c
mm/swapfile.c
net/core/dev.c
net/ipv4/devinet.c
net/ipv6/addrconf.c
net/socket.c
samples/Kconfig
samples/Makefile
samples/tracepoints/Makefile [new file with mode: 0644]
samples/tracepoints/tp-samples-trace.h [new file with mode: 0644]
samples/tracepoints/tracepoint-probe-sample.c [new file with mode: 0644]
samples/tracepoints/tracepoint-probe-sample2.c [new file with mode: 0644]
samples/tracepoints/tracepoint-sample.c [new file with mode: 0644]

diff --git a/Documentation/tracepoints.txt b/Documentation/tracepoints.txt
new file mode 100644 (file)
index 0000000..1b5f114
--- /dev/null
@@ -0,0 +1,103 @@
+                    Using the Linux Kernel Tracepoints
+
+                           Mathieu Desnoyers
+
+
+This document introduces Linux Kernel Tracepoints and their use. It provides
+examples of how to insert tracepoints in the kernel and connect probe functions
+to them and provides some examples of probe functions.
+
+
+* Purpose of tracepoints
+
+A tracepoint placed in code provides a hook to call a function (probe) that you
+can provide at runtime. A tracepoint can be "on" (a probe is connected to it) or
+"off" (no probe is attached). When a tracepoint is "off" it has no effect,
+except for adding a tiny time penalty (checking a condition for a branch) and
+space penalty (adding a few bytes for the function call at the end of the
+instrumented function and adds a data structure in a separate section).  When a
+tracepoint is "on", the function you provide is called each time the tracepoint
+is executed, in the execution context of the caller. When the function provided
+ends its execution, it returns to the caller (continuing from the tracepoint
+site).
+
+You can put tracepoints at important locations in the code. They are
+lightweight hooks that can pass an arbitrary number of parameters,
+which prototypes are described in a tracepoint declaration placed in a header
+file.
+
+They can be used for tracing and performance accounting.
+
+
+* Usage
+
+Two elements are required for tracepoints :
+
+- A tracepoint definition, placed in a header file.
+- The tracepoint statement, in C code.
+
+In order to use tracepoints, you should include linux/tracepoint.h.
+
+In include/trace/subsys.h :
+
+#include <linux/tracepoint.h>
+
+DEFINE_TRACE(subsys_eventname,
+       TPPTOTO(int firstarg, struct task_struct *p),
+       TPARGS(firstarg, p));
+
+In subsys/file.c (where the tracing statement must be added) :
+
+#include <trace/subsys.h>
+
+void somefct(void)
+{
+       ...
+       trace_subsys_eventname(arg, task);
+       ...
+}
+
+Where :
+- subsys_eventname is an identifier unique to your event
+    - subsys is the name of your subsystem.
+    - eventname is the name of the event to trace.
+- TPPTOTO(int firstarg, struct task_struct *p) is the prototype of the function
+  called by this tracepoint.
+- TPARGS(firstarg, p) are the parameters names, same as found in the prototype.
+
+Connecting a function (probe) to a tracepoint is done by providing a probe
+(function to call) for the specific tracepoint through
+register_trace_subsys_eventname().  Removing a probe is done through
+unregister_trace_subsys_eventname(); it will remove the probe.
+marker_synchronize_unregister() must be called before the end of the module exit
+function to make sure there is no caller left using the probe. This, and the
+fact that preemption is disabled around the probe call, make sure that probe
+removal and module unload are safe. See the "Probe example" section below for a
+sample probe module.
+
+The tracepoint mechanism supports inserting multiple instances of the same
+tracepoint, but a single definition must be made of a given tracepoint name over
+all the kernel to make sure no type conflict will occur. Name mangling of the
+tracepoints is done using the prototypes to make sure typing is correct.
+Verification of probe type correctness is done at the registration site by the
+compiler. Tracepoints can be put in inline functions, inlined static functions,
+and unrolled loops as well as regular functions.
+
+The naming scheme "subsys_event" is suggested here as a convention intended
+to limit collisions. Tracepoint names are global to the kernel: they are
+considered as being the same whether they are in the core kernel image or in
+modules.
+
+
+* Probe / tracepoint example
+
+See the example provided in samples/tracepoints/src
+
+Compile them with your kernel.
+
+Run, as root :
+modprobe tracepoint-example (insmod order is not important)
+modprobe tracepoint-probe-example
+cat /proc/tracepoint-example (returns an expected error)
+rmmod tracepoint-example tracepoint-probe-example
+dmesg
index a3a0879..7bb5d03 100644 (file)
@@ -41,6 +41,7 @@
 #include <linux/bitops.h>
 #include <linux/mpage.h>
 #include <linux/bit_spinlock.h>
+#include <trace/fs.h>
 
 static int fsync_buffers_list(spinlock_t *lock, struct list_head *list);
 
@@ -89,7 +90,9 @@ void unlock_buffer(struct buffer_head *bh)
  */
 void __wait_on_buffer(struct buffer_head * bh)
 {
+       trace_fs_buffer_wait_start(bh);
        wait_on_bit(&bh->b_state, BH_Lock, sync_buffer, TASK_UNINTERRUPTIBLE);
+       trace_fs_buffer_wait_end(bh);
 }
 
 static void
index 075d050..f40f47a 100644 (file)
@@ -51,6 +51,7 @@
 #include <linux/poll.h>
 #include <linux/mm.h>
 #include <linux/eventpoll.h>
+#include <trace/fs.h>
 
 #include <asm/uaccess.h>
 #include <asm/mmu_context.h>
@@ -1406,6 +1407,7 @@ int compat_do_execve(char * filename,
 
        retval = search_binary_handler(bprm, regs);
        if (retval >= 0) {
+               trace_fs_exec(filename);
                /* execve success */
                security_bprm_free(bprm);
                acct_update_integrals(current);
index 5dd3af7..2be993c 100644 (file)
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -50,6 +50,7 @@
 #include <linux/cn_proc.h>
 #include <linux/audit.h>
 #include <linux/tracehook.h>
+#include <trace/fs.h>
 
 #include <asm/uaccess.h>
 #include <asm/mmu_context.h>
@@ -1339,6 +1340,7 @@ int do_execve(char * filename,
        current->flags &= ~PF_KTHREAD;
        retval = search_binary_handler(bprm,regs);
        if (retval >= 0) {
+               trace_fs_exec(filename);
                /* execve success */
                security_bprm_free(bprm);
                acct_update_integrals(current);
index 7db32b3..22d747b 100644 (file)
@@ -13,6 +13,7 @@
 #include <linux/security.h>
 #include <linux/module.h>
 #include <linux/uaccess.h>
+#include <trace/fs.h>
 
 #include <asm/ioctls.h>
 
@@ -201,6 +202,8 @@ asmlinkage long sys_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg)
        if (!filp)
                goto out;
 
+       trace_fs_ioctl(fd, cmd, arg);
+
        error = security_file_ioctl(filp, cmd, arg);
        if (error)
                goto out_fput;
index 42e4632..b4fdbd2 100644 (file)
--- a/fs/open.c
+++ b/fs/open.c
@@ -29,6 +29,7 @@
 #include <linux/rcupdate.h>
 #include <linux/audit.h>
 #include <linux/falloc.h>
+#include <trace/fs.h>
 
 int vfs_statfs(struct dentry *dentry, struct kstatfs *buf)
 {
@@ -1027,6 +1028,7 @@ long do_sys_open(int dfd, const char __user *filename, int flags, int mode)
                                fsnotify_open(f->f_path.dentry);
                                fd_install(fd, f);
                        }
+                       trace_fs_open(fd, tmp);
                }
                putname(tmp);
        }
@@ -1116,6 +1118,7 @@ asmlinkage long sys_close(unsigned int fd)
        filp = fdt->fd[fd];
        if (!filp)
                goto out_unlock;
+       trace_fs_close(fd);
        rcu_assign_pointer(fdt->fd[fd], NULL);
        FD_CLR(fd, fdt->close_on_exec);
        __put_unused_fd(files, fd);
index 9ba495d..5779fb4 100644 (file)
@@ -16,6 +16,7 @@
 #include <linux/syscalls.h>
 #include <linux/pagemap.h>
 #include <linux/splice.h>
+#include <trace/fs.h>
 #include "read_write.h"
 
 #include <asm/uaccess.h>
@@ -130,6 +131,9 @@ asmlinkage off_t sys_lseek(unsigned int fd, off_t offset, unsigned int origin)
                if (res != (loff_t)retval)
                        retval = -EOVERFLOW;    /* LFS: should only happen on 32 bit platforms */
        }
+
+       trace_fs_lseek(fd, offset, origin);
+
        fput_light(file, fput_needed);
 bad:
        return retval;
@@ -157,6 +161,8 @@ asmlinkage long sys_llseek(unsigned int fd, unsigned long offset_high,
        offset = vfs_llseek(file, ((loff_t) offset_high << 32) | offset_low,
                        origin);
 
+       trace_fs_llseek(fd, offset, origin);
+
        retval = (int)offset;
        if (offset >= 0) {
                retval = -EFAULT;
@@ -344,6 +350,7 @@ asmlinkage ssize_t sys_read(unsigned int fd, char __user * buf, size_t count)
        if (file) {
                loff_t pos = file_pos_read(file);
                ret = vfs_read(file, buf, count, &pos);
+               trace_fs_read(fd, buf, count, ret);
                file_pos_write(file, pos);
                fput_light(file, fput_needed);
        }
@@ -361,6 +368,7 @@ asmlinkage ssize_t sys_write(unsigned int fd, const char __user * buf, size_t co
        if (file) {
                loff_t pos = file_pos_read(file);
                ret = vfs_write(file, buf, count, &pos);
+               trace_fs_write(fd, buf, count, ret);
                file_pos_write(file, pos);
                fput_light(file, fput_needed);
        }
@@ -381,8 +389,11 @@ asmlinkage ssize_t sys_pread64(unsigned int fd, char __user *buf,
        file = fget_light(fd, &fput_needed);
        if (file) {
                ret = -ESPIPE;
-               if (file->f_mode & FMODE_PREAD)
+               if (file->f_mode & FMODE_PREAD) {
                        ret = vfs_read(file, buf, count, &pos);
+                       trace_fs_pread64(fd, buf, count, pos, ret);
+               }
+
                fput_light(file, fput_needed);
        }
 
@@ -402,8 +413,10 @@ asmlinkage ssize_t sys_pwrite64(unsigned int fd, const char __user *buf,
        file = fget_light(fd, &fput_needed);
        if (file) {
                ret = -ESPIPE;
-               if (file->f_mode & FMODE_PWRITE)  
+               if (file->f_mode & FMODE_PWRITE) {
                        ret = vfs_write(file, buf, count, &pos);
+                       trace_fs_pwrite64(fd, buf, count, pos, ret);
+               }
                fput_light(file, fput_needed);
        }
 
@@ -648,6 +661,7 @@ sys_readv(unsigned long fd, const struct iovec __user *vec, unsigned long vlen)
        if (file) {
                loff_t pos = file_pos_read(file);
                ret = vfs_readv(file, vec, vlen, &pos);
+               trace_fs_readv(fd, vec, vlen, ret);
                file_pos_write(file, pos);
                fput_light(file, fput_needed);
        }
@@ -669,6 +683,7 @@ sys_writev(unsigned long fd, const struct iovec __user *vec, unsigned long vlen)
        if (file) {
                loff_t pos = file_pos_read(file);
                ret = vfs_writev(file, vec, vlen, &pos);
+               trace_fs_writev(fd, vec, vlen, ret);
                file_pos_write(file, pos);
                fput_light(file, fput_needed);
        }
index da0e882..7f47668 100644 (file)
@@ -24,6 +24,7 @@
 #include <linux/fdtable.h>
 #include <linux/fs.h>
 #include <linux/rcupdate.h>
+#include <trace/fs.h>
 
 #include <asm/uaccess.h>
 
@@ -232,6 +233,7 @@ int do_select(int n, fd_set_bits *fds, s64 *timeout)
                                file = fget_light(i, &fput_needed);
                                if (file) {
                                        f_op = file->f_op;
+                                       trace_fs_select(i, *timeout);
                                        mask = DEFAULT_POLLMASK;
                                        if (f_op && f_op->poll)
                                                mask = (*f_op->poll)(file, retval ? NULL : wait);
@@ -560,6 +562,7 @@ static inline unsigned int do_pollfd(struct pollfd *pollfd, poll_table *pwait)
                file = fget_light(fd, &fput_needed);
                mask = POLLNVAL;
                if (file != NULL) {
+                       trace_fs_poll(fd);
                        mask = DEFAULT_POLLMASK;
                        if (file->f_op && file->f_op->poll)
                                mask = file->f_op->poll(file, pwait);
index 2a29be1..5721e01 100644 (file)
        . = ALIGN(8);                                                   \
        VMLINUX_SYMBOL(__start___markers) = .;                          \
        *(__markers)                                                    \
-       VMLINUX_SYMBOL(__stop___markers) = .;
+       VMLINUX_SYMBOL(__stop___markers) = .;                           \
+       VMLINUX_SYMBOL(__start___tracepoints) = .;                      \
+       *(__tracepoints)                                                \
+       VMLINUX_SYMBOL(__stop___tracepoints) = .;
 
 #define RO_DATA(align)                                                 \
        . = ALIGN((align));                                             \
@@ -61,6 +64,7 @@
                *(.rodata) *(.rodata.*)                                 \
                *(__vermagic)           /* Kernel version magic */      \
                *(__markers_strings)    /* Markers: strings */          \
+               *(__tracepoints_strings)/* Tracepoints: strings */      \
        }                                                               \
                                                                        \
        .rodata1          : AT(ADDR(.rodata1) - LOAD_OFFSET) {          \
index 68e0955..c33fd92 100644 (file)
@@ -16,6 +16,7 @@
 #include <linux/kobject.h>
 #include <linux/moduleparam.h>
 #include <linux/marker.h>
+#include <linux/tracepoint.h>
 #include <asm/local.h>
 
 #include <asm/module.h>
@@ -332,6 +333,11 @@ struct module
        unsigned int num_markers;
 #endif
 
+#ifdef CONFIG_TRACEPOINTS
+       struct tracepoint *tracepoints;
+       unsigned int num_tracepoints;
+#endif
+
 #ifdef CONFIG_MODULE_UNLOAD
        /* What modules depend on me? */
        struct list_head modules_which_use_me;
@@ -454,6 +460,9 @@ extern void print_modules(void);
 
 extern void module_update_markers(void);
 
+extern void module_update_tracepoints(void);
+extern int module_get_iter_tracepoints(struct tracepoint_iter *iter);
+
 #else /* !CONFIG_MODULES... */
 #define EXPORT_SYMBOL(sym)
 #define EXPORT_SYMBOL_GPL(sym)
@@ -558,6 +567,15 @@ static inline void module_update_markers(void)
 {
 }
 
+static inline void module_update_tracepoints(void)
+{
+}
+
+static inline int module_get_iter_tracepoints(struct tracepoint_iter *iter)
+{
+       return 0;
+}
+
 #endif /* CONFIG_MODULES */
 
 struct device_driver;
index e8b4039..86f1f5e 100644 (file)
@@ -133,6 +133,26 @@ struct rcu_head {
 #define rcu_read_unlock_bh() __rcu_read_unlock_bh()
 
 /**
+ * rcu_read_lock_sched - mark the beginning of a RCU-classic critical section
+ *
+ * Should be used with either
+ * - synchronize_sched()
+ * or
+ * - call_rcu_sched() and rcu_barrier_sched()
+ * on the write-side to insure proper synchronization.
+ */
+#define rcu_read_lock_sched() preempt_disable()
+
+/*
+ * rcu_read_unlock_sched - marks the end of a RCU-classic critical section
+ *
+ * See rcu_read_lock_sched for more information.
+ */
+#define rcu_read_unlock_sched() preempt_enable()
+
+
+
+/**
  * rcu_dereference - fetch an RCU-protected pointer in an
  * RCU read-side critical section.  This pointer may later
  * be safely dereferenced.
diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
new file mode 100644 (file)
index 0000000..199f4c2
--- /dev/null
@@ -0,0 +1,134 @@
+#ifndef _LINUX_TRACEPOINT_H
+#define _LINUX_TRACEPOINT_H
+
+/*
+ * Kernel Tracepoint API.
+ *
+ * See Documentation/tracepoint.txt.
+ *
+ * (C) Copyright 2008 Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
+ *
+ * Heavily inspired from the Linux Kernel Markers.
+ *
+ * This file is released under the GPLv2.
+ * See the file COPYING for more details.
+ */
+
+#include <linux/types.h>
+#include <linux/rcupdate.h>
+
+struct module;
+struct tracepoint;
+
+struct tracepoint {
+       const char *name;               /* Tracepoint name */
+       int state;                      /* State. */
+       void **funcs;
+} __attribute__((aligned(8)));
+
+
+#define TPPROTO(args...)       args
+#define TPARGS(args...)                args
+
+#ifdef CONFIG_TRACEPOINTS
+
+/*
+ * it_func[0] is never NULL because there is at least one element in the array
+ * when the array itself is non NULL.
+ */
+#define __DO_TRACE(tp, proto, args)                                    \
+       do {                                                            \
+               void **it_func;                                         \
+                                                                       \
+               rcu_read_lock_sched();                                  \
+               it_func = rcu_dereference((tp)->funcs);                 \
+               if (it_func) {                                          \
+                       do {                                            \
+                               ((void(*)(proto))(*it_func))(args);     \
+                       } while (*(++it_func));                         \
+               }                                                       \
+               rcu_read_unlock_sched();                                \
+       } while (0)
+
+/*
+ * Make sure the alignment of the structure in the __tracepoints section will
+ * not add unwanted padding between the beginning of the section and the
+ * structure. Force alignment to the same alignment as the section start.
+ */
+#define DEFINE_TRACE(name, proto, args)                                        \
+       static inline void trace_##name(proto)                          \
+       {                                                               \
+               static const char __tpstrtab_##name[]                   \
+               __attribute__((section("__tracepoints_strings")))       \
+               = #name ":" #proto;                                     \
+               static struct tracepoint __tracepoint_##name            \
+               __attribute__((section("__tracepoints"), aligned(8))) = \
+               { __tpstrtab_##name, 0, NULL };                         \
+               if (unlikely(__tracepoint_##name.state))                \
+                       __DO_TRACE(&__tracepoint_##name,                \
+                               TPPROTO(proto), TPARGS(args));          \
+       }                                                               \
+       static inline int register_trace_##name(void (*probe)(proto))   \
+       {                                                               \
+               return tracepoint_probe_register(#name ":" #proto,      \
+                       (void *)probe);                                 \
+       }                                                               \
+       static inline void unregister_trace_##name(void (*probe)(proto))\
+       {                                                               \
+               tracepoint_probe_unregister(#name ":" #proto,           \
+                       (void *)probe);                                 \
+       }
+
+extern void tracepoint_update_probe_range(struct tracepoint *begin,
+       struct tracepoint *end);
+
+#else /* !CONFIG_TRACEPOINTS */
+#define DEFINE_TRACE(name, proto, args)                        \
+       static inline void _do_trace_##name(struct tracepoint *tp, proto) \
+       { }                                                             \
+       static inline void trace_##name(proto)                          \
+       { }                                                             \
+       static inline int register_trace_##name(void (*probe)(proto))   \
+       {                                                               \
+               return -ENOSYS;                                         \
+       }                                                               \
+       static inline void unregister_trace_##name(void (*probe)(proto))\
+       { }
+
+static inline void tracepoint_update_probe_range(struct tracepoint *begin,
+       struct tracepoint *end)
+{ }
+#endif /* CONFIG_TRACEPOINTS */
+
+/*
+ * Connect a probe to a tracepoint.
+ * Internal API, should not be used directly.
+ */
+extern int tracepoint_probe_register(const char *name, void *probe);
+
+/*
+ * Disconnect a probe from a tracepoint.
+ * Internal API, should not be used directly.
+ */
+extern int tracepoint_probe_unregister(const char *name, void *probe);
+
+struct tracepoint_iter {
+       struct module *module;
+       struct tracepoint *tracepoint;
+};
+
+extern void tracepoint_iter_start(struct tracepoint_iter *iter);
+extern void tracepoint_iter_next(struct tracepoint_iter *iter);
+extern void tracepoint_iter_stop(struct tracepoint_iter *iter);
+extern void tracepoint_iter_reset(struct tracepoint_iter *iter);
+extern int tracepoint_get_iter_range(struct tracepoint **tracepoint,
+       struct tracepoint *begin, struct tracepoint *end);
+
+/*
+ * tracepoint_synchronize_unregister must be called between the last tracepoint
+ * probe unregistration and the end of module exit to make sure there is no
+ * caller executing a probe when it is freed.
+ */
+#define tracepoint_synchronize_unregister() synchronize_sched()
+
+#endif
diff --git a/include/trace/filemap.h b/include/trace/filemap.h
new file mode 100644 (file)
index 0000000..0a2f65d
--- /dev/null
@@ -0,0 +1,13 @@
+#ifndef _TRACE_FILEMAP_H
+#define _TRACE_FILEMAP_H
+
+#include <linux/tracepoint.h>
+
+DEFINE_TRACE(wait_on_page_start,
+       TPPROTO(struct page *page, int bit_nr),
+       TPARGS(page, bit_nr));
+DEFINE_TRACE(wait_on_page_end,
+       TPPROTO(struct page *page, int bit_nr),
+       TPARGS(page, bit_nr));
+
+#endif
diff --git a/include/trace/fs.h b/include/trace/fs.h
new file mode 100644 (file)
index 0000000..5e30365
--- /dev/null
@@ -0,0 +1,65 @@
+#ifndef _TRACE_FS_H
+#define _TRACE_FS_H
+
+#include <linux/buffer_head.h>
+#include <linux/tracepoint.h>
+
+DEFINE_TRACE(fs_buffer_wait_start,
+       TPPROTO(struct buffer_head *bh),
+       TPARGS(bh));
+DEFINE_TRACE(fs_buffer_wait_end,
+       TPPROTO(struct buffer_head *bh),
+       TPARGS(bh));
+DEFINE_TRACE(fs_exec,
+       TPPROTO(char *filename),
+       TPARGS(filename));
+DEFINE_TRACE(fs_ioctl,
+       TPPROTO(unsigned int fd, unsigned int cmd, unsigned long arg),
+       TPARGS(fd, cmd, arg));
+DEFINE_TRACE(fs_open,
+       TPPROTO(int fd, char *filename),
+       TPARGS(fd, filename));
+DEFINE_TRACE(fs_close,
+       TPPROTO(unsigned int fd),
+       TPARGS(fd));
+DEFINE_TRACE(fs_lseek,
+       TPPROTO(unsigned int fd, long offset, unsigned int origin),
+       TPARGS(fd, offset, origin));
+DEFINE_TRACE(fs_llseek,
+       TPPROTO(unsigned int fd, loff_t offset, unsigned int origin),
+       TPARGS(fd, offset, origin));
+
+/*
+ * Probes must be aware that __user * may be modified by concurrent userspace
+ * or kernel threads.
+ */
+DEFINE_TRACE(fs_read,
+       TPPROTO(unsigned int fd, char __user *buf, size_t count, ssize_t ret),
+       TPARGS(fd, buf, count, ret));
+DEFINE_TRACE(fs_write,
+       TPPROTO(unsigned int fd, const char __user *buf, size_t count,
+               ssize_t ret),
+       TPARGS(fd, buf, count, ret));
+DEFINE_TRACE(fs_pread64,
+       TPPROTO(unsigned int fd, char __user *buf, size_t count, loff_t pos,
+               ssize_t ret),
+       TPARGS(fd, buf, count, pos, ret));
+DEFINE_TRACE(fs_pwrite64,
+       TPPROTO(unsigned int fd, const char __user *buf, size_t count,
+               loff_t pos, ssize_t ret),
+       TPARGS(fd, buf, count, pos, ret));
+DEFINE_TRACE(fs_readv,
+       TPPROTO(unsigned long fd, const struct iovec __user *vec,
+               unsigned long vlen, ssize_t ret),
+       TPARGS(fd, vec, vlen, ret));
+DEFINE_TRACE(fs_writev,
+       TPPROTO(unsigned long fd, const struct iovec __user *vec,
+               unsigned long vlen, ssize_t ret),
+       TPARGS(fd, vec, vlen, ret));
+DEFINE_TRACE(fs_select,
+       TPPROTO(int fd, s64 timeout),
+       TPARGS(fd, timeout));
+DEFINE_TRACE(fs_poll,
+       TPPROTO(int fd),
+       TPARGS(fd));
+#endif
diff --git a/include/trace/hugetlb.h b/include/trace/hugetlb.h
new file mode 100644 (file)
index 0000000..5fbfb94
--- /dev/null
@@ -0,0 +1,28 @@
+#ifndef _TRACE_HUGETLB_H
+#define _TRACE_HUGETLB_H
+
+#include <linux/tracepoint.h>
+
+DEFINE_TRACE(hugetlb_page_release,
+       TPPROTO(struct page *page),
+       TPARGS(page));
+DEFINE_TRACE(hugetlb_page_grab,
+       TPPROTO(struct page *page),
+       TPARGS(page));
+DEFINE_TRACE(hugetlb_buddy_pgalloc,
+       TPPROTO(struct page *page),
+       TPARGS(page));
+DEFINE_TRACE(hugetlb_page_alloc,
+       TPPROTO(struct page *page),
+       TPARGS(page));
+DEFINE_TRACE(hugetlb_page_free,
+       TPPROTO(struct page *page),
+       TPARGS(page));
+DEFINE_TRACE(hugetlb_pages_reserve,
+       TPPROTO(struct inode *inode, long from, long to, int ret),
+       TPARGS(inode, from, to, ret));
+DEFINE_TRACE(hugetlb_pages_unreserve,
+       TPPROTO(struct inode *inode, long offset, long freed),
+       TPARGS(inode, offset, freed));
+
+#endif
diff --git a/include/trace/ipc.h b/include/trace/ipc.h
new file mode 100644 (file)
index 0000000..19a5c43
--- /dev/null
@@ -0,0 +1,15 @@
+#ifndef _TRACE_IPC_H
+#define _TRACE_IPC_H
+
+#include <linux/tracepoint.h>
+
+DEFINE_TRACE(ipc_msg_create,
+       TPPROTO(long id, int flags),
+       TPARGS(id, flags));
+DEFINE_TRACE(ipc_sem_create,
+       TPPROTO(long id, int flags),
+       TPARGS(id, flags));
+DEFINE_TRACE(ipc_shm_create,
+       TPPROTO(long id, int flags),
+       TPARGS(id, flags));
+#endif
diff --git a/include/trace/ipv4.h b/include/trace/ipv4.h
new file mode 100644 (file)
index 0000000..38617ba
--- /dev/null
@@ -0,0 +1,14 @@
+#ifndef _TRACE_IPV4_H
+#define _TRACE_IPV4_H
+
+#include <linux/inetdevice.h>
+#include <linux/tracepoint.h>
+
+DEFINE_TRACE(ipv4_addr_add,
+       TPPROTO(struct in_ifaddr *ifa),
+       TPARGS(ifa));
+DEFINE_TRACE(ipv4_addr_del,
+       TPPROTO(struct in_ifaddr *ifa),
+       TPARGS(ifa));
+
+#endif
diff --git a/include/trace/ipv6.h b/include/trace/ipv6.h
new file mode 100644 (file)
index 0000000..f8055f6
--- /dev/null
@@ -0,0 +1,14 @@
+#ifndef _TRACE_IPV6_H
+#define _TRACE_IPV6_H
+
+#include <net/if_inet6.h>
+#include <linux/tracepoint.h>
+
+DEFINE_TRACE(ipv6_addr_add,
+       TPPROTO(struct inet6_ifaddr *ifa),
+       TPARGS(ifa));
+DEFINE_TRACE(ipv6_addr_del,
+       TPPROTO(struct inet6_ifaddr *ifa),
+       TPARGS(ifa));
+
+#endif
diff --git a/include/trace/irq.h b/include/trace/irq.h
new file mode 100644 (file)
index 0000000..48dc584
--- /dev/null
@@ -0,0 +1,36 @@
+#ifndef _TRACE_IRQ_H
+#define _TRACE_IRQ_H
+
+#include <linux/kdebug.h>
+#include <linux/interrupt.h>
+#include <linux/tracepoint.h>
+
+DEFINE_TRACE(irq_entry,
+       TPPROTO(unsigned int id, struct pt_regs *regs),
+       TPARGS(id, regs));
+DEFINE_TRACE(irq_exit,
+       TPPROTO(irqreturn_t retval),
+       TPARGS(retval));
+DEFINE_TRACE(irq_softirq_entry,
+       TPPROTO(struct softirq_action *h, struct softirq_action *softirq_vec),
+       TPARGS(h, softirq_vec));
+DEFINE_TRACE(irq_softirq_exit,
+       TPPROTO(struct softirq_action *h, struct softirq_action *softirq_vec),
+       TPARGS(h, softirq_vec));
+DEFINE_TRACE(irq_softirq_raise,
+       TPPROTO(unsigned int nr),
+       TPARGS(nr));
+DEFINE_TRACE(irq_tasklet_low_entry,
+       TPPROTO(struct tasklet_struct *t),
+       TPARGS(t));
+DEFINE_TRACE(irq_tasklet_low_exit,
+       TPPROTO(struct tasklet_struct *t),
+       TPARGS(t));
+DEFINE_TRACE(irq_tasklet_high_entry,
+       TPPROTO(struct tasklet_struct *t),
+       TPARGS(t));
+DEFINE_TRACE(irq_tasklet_high_exit,
+       TPPROTO(struct tasklet_struct *t),
+       TPARGS(t));
+
+#endif
diff --git a/include/trace/kernel.h b/include/trace/kernel.h
new file mode 100644 (file)
index 0000000..68f5fd4
--- /dev/null
@@ -0,0 +1,13 @@
+#ifndef _TRACE_KERNEL_H
+#define _TRACE_KERNEL_H
+
+#include <linux/tracepoint.h>
+
+DEFINE_TRACE(kernel_module_free,
+       TPPROTO(struct module *mod),
+       TPARGS(mod));
+DEFINE_TRACE(kernel_module_load,
+       TPPROTO(struct module *mod),
+       TPARGS(mod));
+
+#endif
diff --git a/include/trace/memory.h b/include/trace/memory.h
new file mode 100644 (file)
index 0000000..fc9d7fe
--- /dev/null
@@ -0,0 +1,14 @@
+#ifndef _TRACE_MEMORY_H
+#define _TRACE_MEMORY_H
+
+#include <linux/tracepoint.h>
+
+DEFINE_TRACE(memory_handle_fault_entry,
+       TPPROTO(struct mm_struct *mm, struct vm_area_struct *vma,
+               unsigned long address, int write_access),
+       TPARGS(mm, vma, address, write_access));
+DEFINE_TRACE(memory_handle_fault_exit,
+       TPPROTO(int res),
+       TPARGS(res));
+
+#endif
diff --git a/include/trace/net.h b/include/trace/net.h
new file mode 100644 (file)
index 0000000..f673382
--- /dev/null
@@ -0,0 +1,14 @@
+#ifndef _TRACE_NET_H
+#define _TRACE_NET_H
+
+#include <net/sock.h>
+#include <linux/tracepoint.h>
+
+DEFINE_TRACE(net_dev_xmit,
+       TPPROTO(struct sk_buff *skb),
+       TPARGS(skb));
+DEFINE_TRACE(net_dev_receive,
+       TPPROTO(struct sk_buff *skb),
+       TPARGS(skb));
+
+#endif
diff --git a/include/trace/page_alloc.h b/include/trace/page_alloc.h
new file mode 100644 (file)
index 0000000..9f525f2
--- /dev/null
@@ -0,0 +1,16 @@
+#ifndef _TRACE_PAGE_ALLOC_H
+#define _TRACE_PAGE_ALLOC_H
+
+#include <linux/tracepoint.h>
+
+/*
+ * mm_page_alloc : page can be NULL.
+ */
+DEFINE_TRACE(page_alloc,
+       TPPROTO(struct page *page, unsigned int order),
+       TPARGS(page, order));
+DEFINE_TRACE(page_free,
+       TPPROTO(struct page *page, unsigned int order),
+       TPARGS(page, order));
+
+#endif
diff --git a/include/trace/sched.h b/include/trace/sched.h
new file mode 100644 (file)
index 0000000..506ae13
--- /dev/null
@@ -0,0 +1,45 @@
+#ifndef _TRACE_SCHED_H
+#define _TRACE_SCHED_H
+
+#include <linux/sched.h>
+#include <linux/tracepoint.h>
+
+DEFINE_TRACE(sched_kthread_stop,
+       TPPROTO(struct task_struct *t),
+       TPARGS(t));
+DEFINE_TRACE(sched_kthread_stop_ret,
+       TPPROTO(int ret),
+       TPARGS(ret));
+DEFINE_TRACE(sched_wait_task,
+       TPPROTO(struct rq *rq, struct task_struct *p),
+       TPARGS(rq, p));
+DEFINE_TRACE(sched_wakeup,
+       TPPROTO(struct rq *rq, struct task_struct *p),
+       TPARGS(rq, p));
+DEFINE_TRACE(sched_wakeup_new,
+       TPPROTO(struct rq *rq, struct task_struct *p),
+       TPARGS(rq, p));
+DEFINE_TRACE(sched_switch,
+       TPPROTO(struct rq *rq, struct task_struct *prev,
+               struct task_struct *next),
+       TPARGS(rq, prev, next));
+DEFINE_TRACE(sched_migrate_task,
+       TPPROTO(struct rq *rq, struct task_struct *p, int dest_cpu),
+       TPARGS(rq, p, dest_cpu));
+DEFINE_TRACE(sched_process_free,
+       TPPROTO(struct task_struct *p),
+       TPARGS(p));
+DEFINE_TRACE(sched_process_exit,
+       TPPROTO(struct task_struct *p),
+       TPARGS(p));
+DEFINE_TRACE(sched_process_wait,
+       TPPROTO(struct pid *pid),
+       TPARGS(pid));
+DEFINE_TRACE(sched_process_fork,
+       TPPROTO(struct task_struct *parent, struct task_struct *child),
+       TPARGS(parent, child));
+DEFINE_TRACE(sched_signal_send,
+       TPPROTO(int sig, struct task_struct *p),
+       TPARGS(sig, p));
+
+#endif
diff --git a/include/trace/socket.h b/include/trace/socket.h
new file mode 100644 (file)
index 0000000..876bfd8
--- /dev/null
@@ -0,0 +1,26 @@
+#ifndef _TRACE_SOCKET_H
+#define _TRACE_SOCKET_H
+
+#include <net/sock.h>
+#include <linux/tracepoint.h>
+
+DEFINE_TRACE(socket_sendmsg,
+       TPPROTO(struct socket *sock, struct msghdr *msg, size_t size, int ret),
+       TPARGS(sock, msg, size, ret));
+DEFINE_TRACE(socket_recvmsg,
+       TPPROTO(struct socket *sock, struct msghdr *msg, size_t size, int flags,
+               int ret),
+       TPARGS(sock, msg, size, flags, ret));
+DEFINE_TRACE(socket_create,
+       TPPROTO(struct socket *sock, int fd),
+       TPARGS(sock, fd));
+/*
+ * socket_call
+ *
+ * TODO : This tracepoint should be expanded to cover each element of the
+ * switch in sys_socketcall().
+ */
+DEFINE_TRACE(socket_call,
+       TPPROTO(int call, unsigned long a0),
+       TPARGS(call, a0));
+#endif
diff --git a/include/trace/swap.h b/include/trace/swap.h
new file mode 100644 (file)
index 0000000..f74a711
--- /dev/null
@@ -0,0 +1,20 @@
+#ifndef _TRACE_SWAP_H
+#define _TRACE_SWAP_H
+
+#include <linux/swap.h>
+#include <linux/tracepoint.h>
+
+DEFINE_TRACE(swap_in,
+       TPPROTO(struct page *page, swp_entry_t entry),
+       TPARGS(page, entry));
+DEFINE_TRACE(swap_out,
+       TPPROTO(struct page *page),
+       TPARGS(page));
+DEFINE_TRACE(swap_file_open,
+       TPPROTO(struct file *file, char *filename),
+       TPARGS(file, filename));
+DEFINE_TRACE(swap_file_close,
+       TPPROTO(struct file *file),
+       TPARGS(file));
+
+#endif
diff --git a/include/trace/timer.h b/include/trace/timer.h
new file mode 100644 (file)
index 0000000..60da060
--- /dev/null
@@ -0,0 +1,24 @@
+#ifndef _TRACE_TIMER_H
+#define _TRACE_TIMER_H
+
+#include <linux/tracepoint.h>
+
+DEFINE_TRACE(timer_itimer_expired,
+       TPPROTO(struct signal_struct *sig),
+       TPARGS(sig));
+DEFINE_TRACE(timer_itimer_set,
+       TPPROTO(int which, struct itimerval *value),
+       TPARGS(which, value));
+DEFINE_TRACE(timer_set,
+       TPPROTO(struct timer_list *timer),
+       TPARGS(timer));
+/*
+ * xtime_lock is taken when kernel_timer_update_time tracepoint is reached.
+ */
+DEFINE_TRACE(timer_update_time,
+       TPPROTO(struct timespec *_xtime, struct timespec *_wall_to_monotonic),
+       TPARGS(_xtime, _wall_to_monotonic));
+DEFINE_TRACE(timer_timeout,
+       TPPROTO(struct task_struct *p),
+       TPARGS(p));
+#endif
index 924e4a4..ddfa033 100644 (file)
@@ -775,6 +775,13 @@ config PROFILING
          Say Y here to enable the extended profiling support mechanisms used
          by profilers such as OProfile.
 
+config TRACEPOINTS
+       bool "Activate tracepoints"
+       default y
+       help
+         Place an empty function call at each tracepoint site. Can be
+         dynamically changed for a probe function.
+
 config MARKERS
        bool "Activate markers"
        help
index b4eee1c..5ff465b 100644 (file)
--- a/ipc/msg.c
+++ b/ipc/msg.c
@@ -38,6 +38,7 @@
 #include <linux/rwsem.h>
 #include <linux/nsproxy.h>
 #include <linux/ipc_namespace.h>
+#include <trace/ipc.h>
 
 #include <asm/current.h>
 #include <asm/uaccess.h>
@@ -314,6 +315,7 @@ asmlinkage long sys_msgget(key_t key, int msgflg)
        struct ipc_namespace *ns;
        struct ipc_ops msg_ops;
        struct ipc_params msg_params;
+       long ret;
 
        ns = current->nsproxy->ipc_ns;
 
@@ -324,7 +326,9 @@ asmlinkage long sys_msgget(key_t key, int msgflg)
        msg_params.key = key;
        msg_params.flg = msgflg;
 
-       return ipcget(ns, &msg_ids(ns), &msg_ops, &msg_params);
+       ret = ipcget(ns, &msg_ids(ns), &msg_ops, &msg_params);
+       trace_ipc_msg_create(ret, msgflg);
+       return ret;
 }
 
 static inline unsigned long
index bf1bc36..f4b28e7 100644 (file)
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -83,6 +83,7 @@
 #include <linux/rwsem.h>
 #include <linux/nsproxy.h>
 #include <linux/ipc_namespace.h>
+#include <trace/ipc.h>
 
 #include <asm/uaccess.h>
 #include "util.h"
@@ -313,6 +314,7 @@ asmlinkage long sys_semget(key_t key, int nsems, int semflg)
        struct ipc_namespace *ns;
        struct ipc_ops sem_ops;
        struct ipc_params sem_params;
+       long err;
 
        ns = current->nsproxy->ipc_ns;
 
@@ -327,7 +329,9 @@ asmlinkage long sys_semget(key_t key, int nsems, int semflg)
        sem_params.flg = semflg;
        sem_params.u.nsems = nsems;
 
-       return ipcget(ns, &sem_ids(ns), &sem_ops, &sem_params);
+       err = ipcget(ns, &sem_ids(ns), &sem_ops, &sem_params);
+       trace_ipc_sem_create(err, semflg);
+       return err;
 }
 
 /*
index e77ec69..59a344b 100644 (file)
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -39,6 +39,7 @@
 #include <linux/nsproxy.h>
 #include <linux/mount.h>
 #include <linux/ipc_namespace.h>
+#include <trace/ipc.h>
 
 #include <asm/uaccess.h>
 
@@ -445,6 +446,7 @@ asmlinkage long sys_shmget (key_t key, size_t size, int shmflg)
        struct ipc_namespace *ns;
        struct ipc_ops shm_ops;
        struct ipc_params shm_params;
+       long err;
 
        ns = current->nsproxy->ipc_ns;
 
@@ -456,7 +458,9 @@ asmlinkage long sys_shmget (key_t key, size_t size, int shmflg)
        shm_params.flg = shmflg;
        shm_params.u.size = size;
 
-       return ipcget(ns, &shm_ids(ns), &shm_ops, &shm_params);
+       err = ipcget(ns, &shm_ids(ns), &shm_ops, &shm_params);
+       trace_ipc_shm_create(err, shmflg);
+       return err;
 }
 
 static inline unsigned long copy_shmid_to_user(void __user *buf, struct shmid64_ds *in, int version)
index 3e37469..3be2a59 100644 (file)
@@ -84,6 +84,7 @@ obj-$(CONFIG_SYSCTL) += utsname_sysctl.o
 obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o
 obj-$(CONFIG_TASKSTATS) += taskstats.o tsacct.o
 obj-$(CONFIG_MARKERS) += marker.o
+obj-$(CONFIG_TRACEPOINTS) += tracepoint.o
 obj-$(CONFIG_LATENCYTOP) += latencytop.o
 obj-$(CONFIG_HAVE_GENERIC_DMA_COHERENT) += dma-coherent.o
 obj-$(CONFIG_FTRACE) += trace/
index 52f6f82..b8bd014 100644 (file)
@@ -50,6 +50,7 @@
 #include <linux/blkdev.h>
 #include <linux/task_io_accounting_ops.h>
 #include <linux/tracehook.h>
+#include <trace/sched.h>
 
 #include <asm/uaccess.h>
 #include <asm/unistd.h>
@@ -152,7 +153,10 @@ static void __exit_signal(struct task_struct *tsk)
 
 static void delayed_put_task_struct(struct rcu_head *rhp)
 {
-       put_task_struct(container_of(rhp, struct task_struct, rcu));
+       struct task_struct *tsk = container_of(rhp, struct task_struct, rcu);
+
+       trace_sched_process_free(tsk);
+       put_task_struct(tsk);
 }
 
 
@@ -1069,6 +1073,8 @@ NORET_TYPE void do_exit(long code)
 
        if (group_dead)
                acct_process();
+       trace_sched_process_exit(tsk);
+
        exit_sem(tsk);
        exit_files(tsk);
        exit_fs(tsk);
@@ -1670,6 +1676,8 @@ static long do_wait(enum pid_type type, struct pid *pid, int options,
        struct task_struct *tsk;
        int retval;
 
+       trace_sched_process_wait(pid);
+
        add_wait_queue(&current->signal->wait_chldexit,&wait);
 repeat:
        /*
index 7ce2ebe..5013b59 100644 (file)
@@ -58,6 +58,7 @@
 #include <linux/tty.h>
 #include <linux/proc_fs.h>
 #include <linux/blkdev.h>
+#include <trace/sched.h>
 
 #include <asm/pgtable.h>
 #include <asm/pgalloc.h>
@@ -1361,6 +1362,8 @@ long do_fork(unsigned long clone_flags,
        if (!IS_ERR(p)) {
                struct completion vfork;
 
+               trace_sched_process_fork(current, p);
+
                nr = task_pid_vnr(p);
 
                if (clone_flags & CLONE_PARENT_SETTID)
index 5fa6198..ab30dc2 100644 (file)
@@ -15,6 +15,7 @@
 #include <linux/random.h>
 #include <linux/interrupt.h>
 #include <linux/kernel_stat.h>
+#include <trace/irq.h>
 
 #include "internals.h"
 
@@ -130,6 +131,9 @@ irqreturn_t handle_IRQ_event(unsigned int irq, struct irqaction *action)
 {
        irqreturn_t ret, retval = IRQ_NONE;
        unsigned int status = 0;
+       struct pt_regs *regs = get_irq_regs();
+
+       trace_irq_entry(irq, regs);
 
        handle_dynamic_tick(action);
 
@@ -148,6 +152,8 @@ irqreturn_t handle_IRQ_event(unsigned int irq, struct irqaction *action)
                add_interrupt_randomness(irq);
        local_irq_disable();
 
+       trace_irq_exit(retval);
+
        return retval;
 }
 
index ab98274..54c1c6b 100644 (file)
@@ -12,6 +12,7 @@
 #include <linux/time.h>
 #include <linux/posix-timers.h>
 #include <linux/hrtimer.h>
+#include <trace/timer.h>
 
 #include <asm/uaccess.h>
 
@@ -132,6 +133,8 @@ enum hrtimer_restart it_real_fn(struct hrtimer *timer)
        struct signal_struct *sig =
                container_of(timer, struct signal_struct, real_timer);
 
+       trace_timer_itimer_expired(sig);
+
        kill_pid_info(SIGALRM, SEND_SIG_PRIV, sig->leader_pid);
 
        return HRTIMER_NORESTART;
@@ -157,6 +160,8 @@ int do_setitimer(int which, struct itimerval *value, struct itimerval *ovalue)
            !timeval_valid(&value->it_interval))
                return -EINVAL;
 
+       trace_timer_itimer_set(which, value);
+
        switch (which) {
        case ITIMER_REAL:
 again:
index 96cff2f..50598e2 100644 (file)
@@ -13,6 +13,7 @@
 #include <linux/file.h>
 #include <linux/module.h>
 #include <linux/mutex.h>
+#include <trace/sched.h>
 
 #define KTHREAD_NICE_LEVEL (-5)
 
@@ -206,6 +207,8 @@ int kthread_stop(struct task_struct *k)
        /* It could exit after stop_info.k set, but before wake_up_process. */
        get_task_struct(k);
 
+       trace_sched_kthread_stop(k);
+
        /* Must init completion *before* thread sees kthread_stop_info.k */
        init_completion(&kthread_stop_info.done);
        smp_wmb();
@@ -221,6 +224,8 @@ int kthread_stop(struct task_struct *k)
        ret = kthread_stop_info.err;
        mutex_unlock(&kthread_stop_lock);
 
+       trace_sched_kthread_stop_ret(ret);
+
        return ret;
 }
 EXPORT_SYMBOL(kthread_stop);
index 7d1faec..1d2c560 100644 (file)
@@ -103,11 +103,11 @@ void marker_probe_cb(const struct marker *mdata, void *call_private, ...)
        char ptype;
 
        /*
-        * preempt_disable does two things : disabling preemption to make sure
-        * the teardown of the callbacks can be done correctly when they are in
-        * modules and they insure RCU read coherency.
+        * rcu_read_lock_sched does two things : disabling preemption to make
+        * sure the teardown of the callbacks can be done correctly when they
+        * are in modules and they insure RCU read coherency.
         */
-       preempt_disable();
+       rcu_read_lock_sched();
        ptype = mdata->ptype;
        if (likely(!ptype)) {
                marker_probe_func *func;
@@ -145,7 +145,7 @@ void marker_probe_cb(const struct marker *mdata, void *call_private, ...)
                        va_end(args);
                }
        }
-       preempt_enable();
+       rcu_read_unlock_sched();
 }
 EXPORT_SYMBOL_GPL(marker_probe_cb);
 
@@ -162,7 +162,7 @@ void marker_probe_cb_noarg(const struct marker *mdata, void *call_private, ...)
        va_list args;   /* not initialized */
        char ptype;
 
-       preempt_disable();
+       rcu_read_lock_sched();
        ptype = mdata->ptype;
        if (likely(!ptype)) {
                marker_probe_func *func;
@@ -195,7 +195,7 @@ void marker_probe_cb_noarg(const struct marker *mdata, void *call_private, ...)
                        multi[i].func(multi[i].probe_private, call_private,
                                mdata->format, &args);
        }
-       preempt_enable();
+       rcu_read_unlock_sched();
 }
 EXPORT_SYMBOL_GPL(marker_probe_cb_noarg);
 
@@ -560,7 +560,7 @@ static int set_marker(struct marker_entry **entry, struct marker *elem,
  * Disable a marker and its probe callback.
  * Note: only waiting an RCU period after setting elem->call to the empty
  * function insures that the original callback is not used anymore. This insured
- * by preempt_disable around the call site.
+ * by rcu_read_lock_sched around the call site.
  */
 static void disable_marker(struct marker *elem)
 {
index 3fa97d1..98d04c7 100644 (file)
@@ -46,6 +46,8 @@
 #include <asm/cacheflush.h>
 #include <linux/license.h>
 #include <asm/sections.h>
+#include <linux/tracepoint.h>
+#include <trace/kernel.h>
 
 #if 0
 #define DEBUGP printk
@@ -1429,6 +1431,8 @@ static int __unlink_module(void *_mod)
 /* Free a module, remove from lists, etc (must hold module_mutex). */
 static void free_module(struct module *mod)
 {
+       trace_kernel_module_free(mod);
+
        /* Delete from various lists */
        stop_machine(__unlink_module, mod, NULL);
        remove_notes_attrs(mod);
@@ -1845,6 +1849,8 @@ static noinline struct module *load_module(void __user *umod,
 #endif
        unsigned int markersindex;
        unsigned int markersstringsindex;
+       unsigned int tracepointsindex;
+       unsigned int tracepointsstringsindex;
        struct module *mod;
        long err = 0;
        void *percpu = NULL, *ptr = NULL; /* Stops spurious gcc warning */
@@ -2153,6 +2159,9 @@ static noinline struct module *load_module(void __user *umod,
        markersindex = find_sec(hdr, sechdrs, secstrings, "__markers");
        markersstringsindex = find_sec(hdr, sechdrs, secstrings,
                                        "__markers_strings");
+       tracepointsindex = find_sec(hdr, sechdrs, secstrings, "__tracepoints");
+       tracepointsstringsindex = find_sec(hdr, sechdrs, secstrings,
+                                       "__tracepoints_strings");
 
        /* Now do relocations. */
        for (i = 1; i < hdr->e_shnum; i++) {
@@ -2180,6 +2189,12 @@ static noinline struct module *load_module(void __user *umod,
        mod->num_markers =
                sechdrs[markersindex].sh_size / sizeof(*mod->markers);
 #endif
+#ifdef CONFIG_TRACEPOINTS
+       mod->tracepoints = (void *)sechdrs[tracepointsindex].sh_addr;
+       mod->num_tracepoints =
+               sechdrs[tracepointsindex].sh_size / sizeof(*mod->tracepoints);
+#endif
+
 
         /* Find duplicate symbols */
        err = verify_export_symbols(mod);
@@ -2198,11 +2213,16 @@ static noinline struct module *load_module(void __user *umod,
 
        add_kallsyms(mod, sechdrs, symindex, strindex, secstrings);
 
+       if (!mod->taints) {
 #ifdef CONFIG_MARKERS
-       if (!mod->taints)
                marker_update_probe_range(mod->markers,
                        mod->markers + mod->num_markers);
 #endif
+#ifdef CONFIG_TRACEPOINTS
+               tracepoint_update_probe_range(mod->tracepoints,
+                       mod->tracepoints + mod->num_tracepoints);
+#endif
+       }
        err = module_finalize(hdr, sechdrs, mod);
        if (err < 0)
                goto cleanup;
@@ -2263,6 +2283,8 @@ static noinline struct module *load_module(void __user *umod,
        /* Get rid of temporary copy */
        vfree(hdr);
 
+       trace_kernel_module_load(mod);
+
        /* Done! */
        return mod;
 
@@ -2766,3 +2788,50 @@ void module_update_markers(void)
        mutex_unlock(&module_mutex);
 }
 #endif
+
+#ifdef CONFIG_TRACEPOINTS
+void module_update_tracepoints(void)
+{
+       struct module *mod;
+
+       mutex_lock(&module_mutex);
+       list_for_each_entry(mod, &modules, list)
+               if (!mod->taints)
+                       tracepoint_update_probe_range(mod->tracepoints,
+                               mod->tracepoints + mod->num_tracepoints);
+       mutex_unlock(&module_mutex);
+}
+
+/*
+ * Returns 0 if current not found.
+ * Returns 1 if current found.
+ */
+int module_get_iter_tracepoints(struct tracepoint_iter *iter)
+{
+       struct module *iter_mod;
+       int found = 0;
+
+       mutex_lock(&module_mutex);
+       list_for_each_entry(iter_mod, &modules, list) {
+               if (!iter_mod->taints) {
+                       /*
+                        * Sorted module list
+                        */
+                       if (iter_mod < iter->module)
+                               continue;
+                       else if (iter_mod > iter->module)
+                               iter->tracepoint = NULL;
+                       found = tracepoint_get_iter_range(&iter->tracepoint,
+                               iter_mod->tracepoints,
+                               iter_mod->tracepoints
+                                       + iter_mod->num_tracepoints);
+                       if (found) {
+                               iter->module = iter_mod;
+                               break;
+                       }
+               }
+       }
+       mutex_unlock(&module_mutex);
+       return found;
+}
+#endif
index f42db3d..11a6538 100644 (file)
@@ -71,6 +71,7 @@
 #include <linux/debugfs.h>
 #include <linux/ctype.h>
 #include <linux/ftrace.h>
+#include <trace/sched.h>
 
 #include <asm/tlb.h>
 #include <asm/irq_regs.h>
@@ -1918,6 +1919,7 @@ unsigned long wait_task_inactive(struct task_struct *p, long match_state)
                 * just go back and repeat.
                 */
                rq = task_rq_lock(p, &flags);
+               trace_sched_wait_task(rq, p);
                running = task_running(rq, p);
                on_rq = p->se.on_rq;
                ncsw = 0;
@@ -2310,9 +2312,7 @@ out_activate:
        success = 1;
 
 out_running:
-       trace_mark(kernel_sched_wakeup,
-               "pid %d state %ld ## rq %p task %p rq->curr %p",
-               p->pid, p->state, rq, p, rq->curr);
+       trace_sched_wakeup(rq, p);
        check_preempt_curr(rq, p);
 
        p->state = TASK_RUNNING;
@@ -2445,9 +2445,7 @@ void wake_up_new_task(struct task_struct *p, unsigned long clone_flags)
                p->sched_class->task_new(rq, p);
                inc_nr_running(rq);
        }
-       trace_mark(kernel_sched_wakeup_new,
-               "pid %d state %ld ## rq %p task %p rq->curr %p",
-               p->pid, p->state, rq, p, rq->curr);
+       trace_sched_wakeup_new(rq, p);
        check_preempt_curr(rq, p);
 #ifdef CONFIG_SMP
        if (p->sched_class->task_wake_up)
@@ -2620,11 +2618,7 @@ context_switch(struct rq *rq, struct task_struct *prev,
        struct mm_struct *mm, *oldmm;
 
        prepare_task_switch(rq, prev, next);
-       trace_mark(kernel_sched_schedule,
-               "prev_pid %d next_pid %d prev_state %ld "
-               "## rq %p prev %p next %p",
-               prev->pid, next->pid, prev->state,
-               rq, prev, next);
+       trace_sched_switch(rq, prev, next);
        mm = next->mm;
        oldmm = prev->active_mm;
        /*
@@ -2864,6 +2858,7 @@ static void sched_migrate_task(struct task_struct *p, int dest_cpu)
            || unlikely(!cpu_active(dest_cpu)))
                goto out;
 
+       trace_sched_migrate_task(rq, p, dest_cpu);
        /* force the process onto the specified CPU */
        if (migrate_task(p, dest_cpu, &req)) {
                /* Need to wait for migration thread (might exit: take ref). */
index e42da3d..ee16fe5 100644 (file)
@@ -27,6 +27,7 @@
 #include <linux/freezer.h>
 #include <linux/pid_namespace.h>
 #include <linux/nsproxy.h>
+#include <trace/sched.h>
 
 #include <asm/param.h>
 #include <asm/uaccess.h>
@@ -803,6 +804,8 @@ static int send_signal(int sig, struct siginfo *info, struct task_struct *t,
        struct sigpending *pending;
        struct sigqueue *q;
 
+       trace_sched_signal_send(sig, t);
+
        assert_spin_locked(&t->sighand->siglock);
        if (!prepare_signal(sig, t))
                return 0;
index c506f26..2db8020 100644 (file)
@@ -21,6 +21,7 @@
 #include <linux/rcupdate.h>
 #include <linux/smp.h>
 #include <linux/tick.h>
+#include <trace/irq.h>
 
 #include <asm/irq.h>
 /*
@@ -205,7 +206,9 @@ restart:
 
        do {
                if (pending & 1) {
+                       trace_irq_softirq_entry(h, softirq_vec);
                        h->action(h);
+                       trace_irq_softirq_exit(h, softirq_vec);
                        rcu_bh_qsctr_inc(cpu);
                }
                h++;
@@ -297,6 +300,7 @@ void irq_exit(void)
  */
 inline void raise_softirq_irqoff(unsigned int nr)
 {
+       trace_irq_softirq_raise(nr);
        __raise_softirq_irqoff(nr);
 
        /*
@@ -383,7 +387,9 @@ static void tasklet_action(struct softirq_action *a)
                        if (!atomic_read(&t->count)) {
                                if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
                                        BUG();
+                               trace_irq_tasklet_low_entry(t);
                                t->func(t->data);
+                               trace_irq_tasklet_low_exit(t);
                                tasklet_unlock(t);
                                continue;
                        }
@@ -418,7 +424,9 @@ static void tasklet_hi_action(struct softirq_action *a)
                        if (!atomic_read(&t->count)) {
                                if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
                                        BUG();
+                               trace_irq_tasklet_high_entry(t);
                                t->func(t->data);
+                               trace_irq_tasklet_high_exit(t);
                                tasklet_unlock(t);
                                continue;
                        }
index 49e4e19..366c3a6 100644 (file)
 #include <linux/delay.h>
 #include <linux/tick.h>
 #include <linux/kallsyms.h>
+#include <trace/timer.h>
 
 #include <asm/uaccess.h>
 #include <asm/unistd.h>
 #include <asm/div64.h>
 #include <asm/timex.h>
 #include <asm/io.h>
+#include <asm/irq_regs.h>
 
 u64 jiffies_64 __cacheline_aligned_in_smp = INITIAL_JIFFIES;
 
@@ -288,6 +290,7 @@ static void internal_add_timer(struct tvec_base *base, struct timer_list *timer)
                i = (expires >> (TVR_BITS + 3 * TVN_BITS)) & TVN_MASK;
                vec = base->tv5.vec + i;
        }
+       trace_timer_set(timer);
        /*
         * Timers are FIFO:
         */
@@ -1066,6 +1069,7 @@ void do_timer(unsigned long ticks)
 {
        jiffies_64 += ticks;
        update_times(ticks);
+       trace_timer_update_time(&xtime, &wall_to_monotonic);
 }
 
 #ifdef __ARCH_WANT_SYS_ALARM
@@ -1147,7 +1151,9 @@ asmlinkage long sys_getegid(void)
 
 static void process_timeout(unsigned long __data)
 {
-       wake_up_process((struct task_struct *)__data);
+       struct task_struct *task = (struct task_struct *)__data;
+       trace_timer_timeout(task);
+       wake_up_process(task);
 }
 
 /**
index 263e9e6..3a8bd3f 100644 (file)
@@ -18,7 +18,6 @@ config TRACING
 config FTRACE
        bool "Kernel Function Tracer"
        depends on HAVE_FTRACE
-       select FRAME_POINTER
        select TRACING
        select CONTEXT_SWITCH_TRACER
        help
@@ -79,6 +78,7 @@ config SYSPROF_TRACER
        bool "Sysprof Tracer"
        depends on X86
        select TRACING
+       select FRAME_POINTER
        help
          This tracer provides the trace needed by the 'Sysprof' userspace
          tool.
index cb817a2..789e927 100644 (file)
@@ -9,8 +9,8 @@
 #include <linux/debugfs.h>
 #include <linux/kallsyms.h>
 #include <linux/uaccess.h>
-#include <linux/marker.h>
 #include <linux/ftrace.h>
+#include <trace/sched.h>
 
 #include "trace.h"
 
@@ -19,16 +19,17 @@ static int __read_mostly    tracer_enabled;
 static atomic_t                        sched_ref;
 
 static void
-sched_switch_func(void *private, void *__rq, struct task_struct *prev,
+probe_sched_switch(struct rq *__rq, struct task_struct *prev,
                        struct task_struct *next)
 {
-       struct trace_array **ptr = private;
-       struct trace_array *tr = *ptr;
        struct trace_array_cpu *data;
        unsigned long flags;
        long disabled;
        int cpu;
 
+       if (!atomic_read(&sched_ref))
+               return;
+
        tracing_record_cmdline(prev);
        tracing_record_cmdline(next);
 
@@ -37,95 +38,42 @@ sched_switch_func(void *private, void *__rq, struct task_struct *prev,
 
        local_irq_save(flags);
        cpu = raw_smp_processor_id();
-       data = tr->data[cpu];
+       data = ctx_trace->data[cpu];
        disabled = atomic_inc_return(&data->disabled);
 
        if (likely(disabled == 1))
-               tracing_sched_switch_trace(tr, data, prev, next, flags);
+               tracing_sched_switch_trace(ctx_trace, data, prev, next, flags);
 
        atomic_dec(&data->disabled);
        local_irq_restore(flags);
 }
 
-static notrace void
-sched_switch_callback(void *probe_data, void *call_data,
-                     const char *format, va_list *args)
-{
-       struct task_struct *prev;
-       struct task_struct *next;
-       struct rq *__rq;
-
-       if (!atomic_read(&sched_ref))
-               return;
-
-       /* skip prev_pid %d next_pid %d prev_state %ld */
-       (void)va_arg(*args, int);
-       (void)va_arg(*args, int);
-       (void)va_arg(*args, long);
-       __rq = va_arg(*args, typeof(__rq));
-       prev = va_arg(*args, typeof(prev));
-       next = va_arg(*args, typeof(next));
-
-       /*
-        * If tracer_switch_func only points to the local
-        * switch func, it still needs the ptr passed to it.
-        */
-       sched_switch_func(probe_data, __rq, prev, next);
-}
-
 static void
-wakeup_func(void *private, void *__rq, struct task_struct *wakee, struct
-                       task_struct *curr)
+probe_sched_wakeup(struct rq *__rq, struct task_struct *wakee)
 {
-       struct trace_array **ptr = private;
-       struct trace_array *tr = *ptr;
        struct trace_array_cpu *data;
        unsigned long flags;
        long disabled;
        int cpu;
 
-       if (!tracer_enabled)
+       if (!likely(tracer_enabled))
                return;
 
-       tracing_record_cmdline(curr);
+       tracing_record_cmdline(current);
 
        local_irq_save(flags);
        cpu = raw_smp_processor_id();
-       data = tr->data[cpu];
+       data = ctx_trace->data[cpu];
        disabled = atomic_inc_return(&data->disabled);
 
        if (likely(disabled == 1))
-               tracing_sched_wakeup_trace(tr, data, wakee, curr, flags);
+               tracing_sched_wakeup_trace(ctx_trace, data, wakee, current,
+                       flags);
 
        atomic_dec(&data->disabled);
        local_irq_restore(flags);
 }
 
-static notrace void
-wake_up_callback(void *probe_data, void *call_data,
-                const char *format, va_list *args)
-{
-       struct task_struct *curr;
-       struct task_struct *task;
-       struct rq *__rq;
-
-       if (likely(!tracer_enabled))
-               return;
-
-       /* Skip pid %d state %ld */
-       (void)va_arg(*args, int);
-       (void)va_arg(*args, long);
-       /* now get the meat: "rq %p task %p rq->curr %p" */
-       __rq = va_arg(*args, typeof(__rq));
-       task = va_arg(*args, typeof(task));
-       curr = va_arg(*args, typeof(curr));
-
-       tracing_record_cmdline(task);
-       tracing_record_cmdline(curr);
-
-       wakeup_func(probe_data, __rq, task, curr);
-}
-
 static void sched_switch_reset(struct trace_array *tr)
 {
        int cpu;
@@ -140,60 +88,40 @@ static int tracing_sched_register(void)
 {
        int ret;
 
-       ret = marker_probe_register("kernel_sched_wakeup",
-                       "pid %d state %ld ## rq %p task %p rq->curr %p",
-                       wake_up_callback,
-                       &ctx_trace);
+       ret = register_trace_sched_wakeup(probe_sched_wakeup);
        if (ret) {
-               pr_info("wakeup trace: Couldn't add marker"
+               pr_info("wakeup trace: Couldn't activate tracepoint"
                        " probe to kernel_sched_wakeup\n");
                return ret;
        }
 
-       ret = marker_probe_register("kernel_sched_wakeup_new",
-                       "pid %d state %ld ## rq %p task %p rq->curr %p",
-                       wake_up_callback,
-                       &ctx_trace);
+       ret = register_trace_sched_wakeup_new(probe_sched_wakeup);
        if (ret) {
-               pr_info("wakeup trace: Couldn't add marker"
+               pr_info("wakeup trace: Couldn't activate tracepoint"
                        " probe to kernel_sched_wakeup_new\n");
                goto fail_deprobe;
        }
 
-       ret = marker_probe_register("kernel_sched_schedule",
-               "prev_pid %d next_pid %d prev_state %ld "
-               "## rq %p prev %p next %p",
-               sched_switch_callback,
-               &ctx_trace);
+       ret = register_trace_sched_switch(probe_sched_switch);
        if (ret) {
-               pr_info("sched trace: Couldn't add marker"
+               pr_info("sched trace: Couldn't activate tracepoint"
                        " probe to kernel_sched_schedule\n");
                goto fail_deprobe_wake_new;
        }
 
        return ret;
 fail_deprobe_wake_new:
-       marker_probe_unregister("kernel_sched_wakeup_new",
-                               wake_up_callback,
-                               &ctx_trace);
+       unregister_trace_sched_wakeup_new(probe_sched_wakeup);
 fail_deprobe:
-       marker_probe_unregister("kernel_sched_wakeup",
-                               wake_up_callback,
-                               &ctx_trace);
+       unregister_trace_sched_wakeup(probe_sched_wakeup);
        return ret;
 }
 
 static void tracing_sched_unregister(void)
 {
-       marker_probe_unregister("kernel_sched_schedule",
-                               sched_switch_callback,
-                               &ctx_trace);
-       marker_probe_unregister("kernel_sched_wakeup_new",
-                               wake_up_callback,
-                               &ctx_trace);
-       marker_probe_unregister("kernel_sched_wakeup",
-                               wake_up_callback,
-                               &ctx_trace);
+       unregister_trace_sched_switch(probe_sched_switch);
+       unregister_trace_sched_wakeup_new(probe_sched_wakeup);
+       unregister_trace_sched_wakeup(probe_sched_wakeup);
 }
 
 static void tracing_start_sched_switch(void)
index e303ccb..08206b4 100644 (file)
@@ -15,7 +15,7 @@
 #include <linux/kallsyms.h>
 #include <linux/uaccess.h>
 #include <linux/ftrace.h>
-#include <linux/marker.h>
+#include <trace/sched.h>
 
 #include "trace.h"
 
@@ -112,18 +112,18 @@ static int report_latency(cycle_t delta)
 }
 
 static void notrace
-wakeup_sched_switch(void *private, void *rq, struct task_struct *prev,
+probe_wakeup_sched_switch(struct rq *rq, struct task_struct *prev,
        struct task_struct *next)
 {
        unsigned long latency = 0, t0 = 0, t1 = 0;
-       struct trace_array **ptr = private;
-       struct trace_array *tr = *ptr;
        struct trace_array_cpu *data;
        cycle_t T0, T1, delta;
        unsigned long flags;
        long disabled;
        int cpu;
 
+       tracing_record_cmdline(prev);
+
        if (unlikely(!tracer_enabled))
                return;
 
@@ -140,11 +140,11 @@ wakeup_sched_switch(void *private, void *rq, struct task_struct *prev,
                return;
 
        /* The task we are waiting for is waking up */
-       data = tr->data[wakeup_cpu];
+       data = wakeup_trace->data[wakeup_cpu];
 
        /* disable local data, not wakeup_cpu data */
        cpu = raw_smp_processor_id();
-       disabled = atomic_inc_return(&tr->data[cpu]->disabled);
+       disabled = atomic_inc_return(&wakeup_trace->data[cpu]->disabled);
        if (likely(disabled != 1))
                goto out;
 
@@ -155,7 +155,7 @@ wakeup_sched_switch(void *private, void *rq, struct task_struct *prev,
        if (unlikely(!tracer_enabled || next != wakeup_task))
                goto out_unlock;
 
-       trace_function(tr, data, CALLER_ADDR1, CALLER_ADDR2, flags);
+       trace_function(wakeup_trace, data, CALLER_ADDR1, CALLER_ADDR2, flags);
 
        /*
         * usecs conversion is slow so we try to delay the conversion
@@ -174,39 +174,14 @@ wakeup_sched_switch(void *private, void *rq, struct task_struct *prev,
        t0 = nsecs_to_usecs(T0);
        t1 = nsecs_to_usecs(T1);
 
-       update_max_tr(tr, wakeup_task, wakeup_cpu);
+       update_max_tr(wakeup_trace, wakeup_task, wakeup_cpu);
 
 out_unlock:
-       __wakeup_reset(tr);
+       __wakeup_reset(wakeup_trace);
        __raw_spin_unlock(&wakeup_lock);
        local_irq_restore(flags);
 out:
-       atomic_dec(&tr->data[cpu]->disabled);
-}
-
-static notrace void
-sched_switch_callback(void *probe_data, void *call_data,
-                     const char *format, va_list *args)
-{
-       struct task_struct *prev;
-       struct task_struct *next;
-       struct rq *__rq;
-
-       /* skip prev_pid %d next_pid %d prev_state %ld */
-       (void)va_arg(*args, int);
-       (void)va_arg(*args, int);
-       (void)va_arg(*args, long);
-       __rq = va_arg(*args, typeof(__rq));
-       prev = va_arg(*args, typeof(prev));
-       next = va_arg(*args, typeof(next));
-
-       tracing_record_cmdline(prev);
-
-       /*
-        * If tracer_switch_func only points to the local
-        * switch func, it still needs the ptr passed to it.
-        */
-       wakeup_sched_switch(probe_data, __rq, prev, next);
+       atomic_dec(&wakeup_trace->data[cpu]->disabled);
 }
 
 static void __wakeup_reset(struct trace_array *tr)
@@ -240,19 +215,24 @@ static void wakeup_reset(struct trace_array *tr)
 }
 
 static void
-wakeup_check_start(struct trace_array *tr, struct task_struct *p,
-                  struct task_struct *curr)
+probe_wakeup(struct rq *rq, struct task_struct *p)
 {
        int cpu = smp_processor_id();
        unsigned long flags;
        long disabled;
 
+       if (likely(!tracer_enabled))
+               return;
+
+       tracing_record_cmdline(p);
+       tracing_record_cmdline(current);
+
        if (likely(!rt_task(p)) ||
                        p->prio >= wakeup_prio ||
-                       p->prio >= curr->prio)
+                       p->prio >= current->prio)
                return;
 
-       disabled = atomic_inc_return(&tr->data[cpu]->disabled);
+       disabled = atomic_inc_return(&wakeup_trace->data[cpu]->disabled);
        if (unlikely(disabled != 1))
                goto out;
 
@@ -264,7 +244,7 @@ wakeup_check_start(struct trace_array *tr, struct task_struct *p,
                goto out_locked;
 
        /* reset the trace */
-       __wakeup_reset(tr);
+       __wakeup_reset(wakeup_trace);
 
        wakeup_cpu = task_cpu(p);
        wakeup_prio = p->prio;
@@ -274,74 +254,37 @@ wakeup_check_start(struct trace_array *tr, struct task_struct *p,
 
        local_save_flags(flags);
 
-       tr->data[wakeup_cpu]->preempt_timestamp = ftrace_now(cpu);
-       trace_function(tr, tr->data[wakeup_cpu],
+       wakeup_trace->data[wakeup_cpu]->preempt_timestamp = ftrace_now(cpu);
+       trace_function(wakeup_trace, wakeup_trace->data[wakeup_cpu],
                       CALLER_ADDR1, CALLER_ADDR2, flags);
 
 out_locked:
        __raw_spin_unlock(&wakeup_lock);
 out:
-       atomic_dec(&tr->data[cpu]->disabled);
-}
-
-static notrace void
-wake_up_callback(void *probe_data, void *call_data,
-                const char *format, va_list *args)
-{
-       struct trace_array **ptr = probe_data;
-       struct trace_array *tr = *ptr;
-       struct task_struct *curr;
-       struct task_struct *task;
-       struct rq *__rq;
-
-       if (likely(!tracer_enabled))
-               return;
-
-       /* Skip pid %d state %ld */
-       (void)va_arg(*args, int);
-       (void)va_arg(*args, long);
-       /* now get the meat: "rq %p task %p rq->curr %p" */
-       __rq = va_arg(*args, typeof(__rq));
-       task = va_arg(*args, typeof(task));
-       curr = va_arg(*args, typeof(curr));
-
-       tracing_record_cmdline(task);
-       tracing_record_cmdline(curr);
-
-       wakeup_check_start(tr, task, curr);
+       atomic_dec(&wakeup_trace->data[cpu]->disabled);
 }
 
 static void start_wakeup_tracer(struct trace_array *tr)
 {
        int ret;
 
-       ret = marker_probe_register("kernel_sched_wakeup",
-                       "pid %d state %ld ## rq %p task %p rq->curr %p",
-                       wake_up_callback,
-                       &wakeup_trace);
+       ret = register_trace_sched_wakeup(probe_wakeup);
        if (ret) {
-               pr_info("wakeup trace: Couldn't add marker"
+               pr_info("wakeup trace: Couldn't activate tracepoint"
                        " probe to kernel_sched_wakeup\n");
                return;
        }
 
-       ret = marker_probe_register("kernel_sched_wakeup_new",
-                       "pid %d state %ld ## rq %p task %p rq->curr %p",
-                       wake_up_callback,
-                       &wakeup_trace);
+       ret = register_trace_sched_wakeup_new(probe_wakeup);
        if (ret) {
-               pr_info("wakeup trace: Couldn't add marker"
+               pr_info("wakeup trace: Couldn't activate tracepoint"
                        " probe to kernel_sched_wakeup_new\n");
                goto fail_deprobe;
        }
 
-       ret = marker_probe_register("kernel_sched_schedule",
-               "prev_pid %d next_pid %d prev_state %ld "
-               "## rq %p prev %p next %p",
-               sched_switch_callback,
-               &wakeup_trace);
+       ret = register_trace_sched_switch(probe_wakeup_sched_switch);
        if (ret) {
-               pr_info("sched trace: Couldn't add marker"
+               pr_info("sched trace: Couldn't activate tracepoint"
                        " probe to kernel_sched_schedule\n");
                goto fail_deprobe_wake_new;
        }
@@ -363,28 +306,18 @@ static void start_wakeup_tracer(struct trace_array *tr)
 
        return;
 fail_deprobe_wake_new:
-       marker_probe_unregister("kernel_sched_wakeup_new",
-                               wake_up_callback,
-                               &wakeup_trace);
+       unregister_trace_sched_wakeup_new(probe_wakeup);
 fail_deprobe:
-       marker_probe_unregister("kernel_sched_wakeup",
-                               wake_up_callback,
-                               &wakeup_trace);
+       unregister_trace_sched_wakeup(probe_wakeup);
 }
 
 static void stop_wakeup_tracer(struct trace_array *tr)
 {
        tracer_enabled = 0;
        unregister_ftrace_function(&trace_ops);
-       marker_probe_unregister("kernel_sched_schedule",
-                               sched_switch_callback,
-                               &wakeup_trace);
-       marker_probe_unregister("kernel_sched_wakeup_new",
-                               wake_up_callback,
-                               &wakeup_trace);
-       marker_probe_unregister("kernel_sched_wakeup",
-                               wake_up_callback,
-                               &wakeup_trace);
+       unregister_trace_sched_switch(probe_wakeup_sched_switch);
+       unregister_trace_sched_wakeup_new(probe_wakeup);
+       unregister_trace_sched_wakeup(probe_wakeup);
 }
 
 static void wakeup_tracer_init(struct trace_array *tr)
diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c
new file mode 100644 (file)
index 0000000..c7c62a4
--- /dev/null
@@ -0,0 +1,476 @@
+/*
+ * Copyright (C) 2008 Mathieu Desnoyers
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/types.h>
+#include <linux/jhash.h>
+#include <linux/list.h>
+#include <linux/rcupdate.h>
+#include <linux/tracepoint.h>
+#include <linux/err.h>
+#include <linux/slab.h>
+
+extern struct tracepoint __start___tracepoints[];
+extern struct tracepoint __stop___tracepoints[];
+
+/* Set to 1 to enable tracepoint debug output */
+static const int tracepoint_debug;
+
+/*
+ * tracepoints_mutex nests inside module_mutex. Tracepoints mutex protects the
+ * builtin and module tracepoints and the hash table.
+ */
+static DEFINE_MUTEX(tracepoints_mutex);
+
+/*
+ * Tracepoint hash table, containing the active tracepoints.
+ * Protected by tracepoints_mutex.
+ */
+#define TRACEPOINT_HASH_BITS 6
+#define TRACEPOINT_TABLE_SIZE (1 << TRACEPOINT_HASH_BITS)
+
+/*
+ * Note about RCU :
+ * It is used to to delay the free of multiple probes array until a quiescent
+ * state is reached.
+ * Tracepoint entries modifications are protected by the tracepoints_mutex.
+ */
+struct tracepoint_entry {
+       struct hlist_node hlist;
+       void **funcs;
+       int refcount;   /* Number of times armed. 0 if disarmed. */
+       struct rcu_head rcu;
+       void *oldptr;
+       unsigned char rcu_pending:1;
+       char name[0];
+};
+
+static struct hlist_head tracepoint_table[TRACEPOINT_TABLE_SIZE];
+
+static void free_old_closure(struct rcu_head *head)
+{
+       struct tracepoint_entry *entry = container_of(head,
+               struct tracepoint_entry, rcu);
+       kfree(entry->oldptr);
+       /* Make sure we free the data before setting the pending flag to 0 */
+       smp_wmb();
+       entry->rcu_pending = 0;
+}
+
+static void tracepoint_entry_free_old(struct tracepoint_entry *entry, void *old)
+{
+       if (!old)
+               return;
+       entry->oldptr = old;
+       entry->rcu_pending = 1;
+       /* write rcu_pending before calling the RCU callback */
+       smp_wmb();
+#ifdef CONFIG_PREEMPT_RCU
+       synchronize_sched();    /* Until we have the call_rcu_sched() */
+#endif
+       call_rcu(&entry->rcu, free_old_closure);
+}
+
+static void debug_print_probes(struct tracepoint_entry *entry)
+{
+       int i;
+
+       if (!tracepoint_debug)
+               return;
+
+       for (i = 0; entry->funcs[i]; i++)
+               printk(KERN_DEBUG "Probe %d : %p\n", i, entry->funcs[i]);
+}
+
+static void *
+tracepoint_entry_add_probe(struct tracepoint_entry *entry, void *probe)
+{
+       int nr_probes = 0;
+       void **old, **new;
+
+       WARN_ON(!probe);
+
+       debug_print_probes(entry);
+       old = entry->funcs;
+       if (old) {
+               /* (N -> N+1), (N != 0, 1) probes */
+               for (nr_probes = 0; old[nr_probes]; nr_probes++)
+                       if (old[nr_probes] == probe)
+                               return ERR_PTR(-EEXIST);
+       }
+       /* + 2 : one for new probe, one for NULL func */
+       new = kzalloc((nr_probes + 2) * sizeof(void *), GFP_KERNEL);
+       if (new == NULL)
+               return ERR_PTR(-ENOMEM);
+       if (old)
+               memcpy(new, old, nr_probes * sizeof(void *));
+       new[nr_probes] = probe;
+       entry->refcount = nr_probes + 1;
+       entry->funcs = new;
+       debug_print_probes(entry);
+       return old;
+}
+
+static void *
+tracepoint_entry_remove_probe(struct tracepoint_entry *entry, void *probe)
+{
+       int nr_probes = 0, nr_del = 0, i;
+       void **old, **new;
+
+       old = entry->funcs;
+
+       debug_print_probes(entry);
+       /* (N -> M), (N > 1, M >= 0) probes */
+       for (nr_probes = 0; old[nr_probes]; nr_probes++) {
+               if ((!probe || old[nr_probes] == probe))
+                       nr_del++;
+       }
+
+       if (nr_probes - nr_del == 0) {
+               /* N -> 0, (N > 1) */
+               entry->funcs = NULL;
+               entry->refcount = 0;
+               debug_print_probes(entry);
+               return old;
+       } else {
+               int j = 0;
+               /* N -> M, (N > 1, M > 0) */
+               /* + 1 for NULL */
+               new = kzalloc((nr_probes - nr_del + 1)
+                       * sizeof(void *), GFP_KERNEL);
+               if (new == NULL)
+                       return ERR_PTR(-ENOMEM);
+               for (i = 0; old[i]; i++)
+                       if ((probe && old[i] != probe))
+                               new[j++] = old[i];
+               entry->refcount = nr_probes - nr_del;
+               entry->funcs = new;
+       }
+       debug_print_probes(entry);
+       return old;
+}
+
+/*
+ * Get tracepoint if the tracepoint is present in the tracepoint hash table.
+ * Must be called with tracepoints_mutex held.
+ * Returns NULL if not present.
+ */
+static struct tracepoint_entry *get_tracepoint(const char *name)
+{
+       struct hlist_head *head;
+       struct hlist_node *node;
+       struct tracepoint_entry *e;
+       u32 hash = jhash(name, strlen(name), 0);
+
+       head = &tracepoint_table[hash & (TRACEPOINT_TABLE_SIZE - 1)];
+       hlist_for_each_entry(e, node, head, hlist) {
+               if (!strcmp(name, e->name))
+                       return e;
+       }
+       return NULL;
+}
+
+/*
+ * Add the tracepoint to the tracepoint hash table. Must be called with
+ * tracepoints_mutex held.
+ */
+static struct tracepoint_entry *add_tracepoint(const char *name)
+{
+       struct hlist_head *head;
+       struct hlist_node *node;
+       struct tracepoint_entry *e;
+       size_t name_len = strlen(name) + 1;
+       u32 hash = jhash(name, name_len-1, 0);
+
+       head = &tracepoint_table[hash & (TRACEPOINT_TABLE_SIZE - 1)];
+       hlist_for_each_entry(e, node, head, hlist) {
+               if (!strcmp(name, e->name)) {
+                       printk(KERN_NOTICE
+                               "tracepoint %s busy\n", name);
+                       return ERR_PTR(-EEXIST);        /* Already there */
+               }
+       }
+       /*
+        * Using kmalloc here to allocate a variable length element. Could
+        * cause some memory fragmentation if overused.
+        */
+       e = kmalloc(sizeof(struct tracepoint_entry) + name_len, GFP_KERNEL);
+       if (!e)
+               return ERR_PTR(-ENOMEM);
+       memcpy(&e->name[0], name, name_len);
+       e->funcs = NULL;
+       e->refcount = 0;
+       e->rcu_pending = 0;
+       hlist_add_head(&e->hlist, head);
+       return e;
+}
+
+/*
+ * Remove the tracepoint from the tracepoint hash table. Must be called with
+ * mutex_lock held.
+ */
+static int remove_tracepoint(const char *name)
+{
+       struct hlist_head *head;
+       struct hlist_node *node;
+       struct tracepoint_entry *e;
+       int found = 0;
+       size_t len = strlen(name) + 1;
+       u32 hash = jhash(name, len-1, 0);
+
+       head = &tracepoint_table[hash & (TRACEPOINT_TABLE_SIZE - 1)];
+       hlist_for_each_entry(e, node, head, hlist) {
+               if (!strcmp(name, e->name)) {
+                       found = 1;
+                       break;
+               }
+       }
+       if (!found)
+               return -ENOENT;
+       if (e->refcount)
+               return -EBUSY;
+       hlist_del(&e->hlist);
+       /* Make sure the call_rcu has been executed */
+       if (e->rcu_pending)
+               rcu_barrier();
+       kfree(e);
+       return 0;
+}
+
+/*
+ * Sets the probe callback corresponding to one tracepoint.
+ */
+static void set_tracepoint(struct tracepoint_entry **entry,
+       struct tracepoint *elem, int active)
+{
+       WARN_ON(strcmp((*entry)->name, elem->name) != 0);
+
+       /*
+        * rcu_assign_pointer has a smp_wmb() which makes sure that the new
+        * probe callbacks array is consistent before setting a pointer to it.
+        * This array is referenced by __DO_TRACE from
+        * include/linux/tracepoints.h. A matching smp_read_barrier_depends()
+        * is used.
+        */
+       rcu_assign_pointer(elem->funcs, (*entry)->funcs);
+       elem->state = active;
+}
+
+/*
+ * Disable a tracepoint and its probe callback.
+ * Note: only waiting an RCU period after setting elem->call to the empty
+ * function insures that the original callback is not used anymore. This insured
+ * by preempt_disable around the call site.
+ */
+static void disable_tracepoint(struct tracepoint *elem)
+{
+       elem->state = 0;
+}
+
+/**
+ * tracepoint_update_probe_range - Update a probe range
+ * @begin: beginning of the range
+ * @end: end of the range
+ *
+ * Updates the probe callback corresponding to a range of tracepoints.
+ */
+void tracepoint_update_probe_range(struct tracepoint *begin,
+       struct tracepoint *end)
+{
+       struct tracepoint *iter;
+       struct tracepoint_entry *mark_entry;
+
+       mutex_lock(&tracepoints_mutex);
+       for (iter = begin; iter < end; iter++) {
+               mark_entry = get_tracepoint(iter->name);
+               if (mark_entry) {
+                       set_tracepoint(&mark_entry, iter,
+                                       !!mark_entry->refcount);
+               } else {
+                       disable_tracepoint(iter);
+               }
+       }
+       mutex_unlock(&tracepoints_mutex);
+}
+
+/*
+ * Update probes, removing the faulty probes.
+ */
+static void tracepoint_update_probes(void)
+{
+       /* Core kernel tracepoints */
+       tracepoint_update_probe_range(__start___tracepoints,
+               __stop___tracepoints);
+       /* tracepoints in modules. */
+       module_update_tracepoints();
+}
+
+/**
+ * tracepoint_probe_register -  Connect a probe to a tracepoint
+ * @name: tracepoint name
+ * @probe: probe handler
+ *
+ * Returns 0 if ok, error value on error.
+ * The probe address must at least be aligned on the architecture pointer size.
+ */
+int tracepoint_probe_register(const char *name, void *probe)
+{
+       struct tracepoint_entry *entry;
+       int ret = 0;
+       void *old;
+
+       mutex_lock(&tracepoints_mutex);
+       entry = get_tracepoint(name);
+       if (!entry) {
+               entry = add_tracepoint(name);
+               if (IS_ERR(entry)) {
+                       ret = PTR_ERR(entry);
+                       goto end;
+               }
+       }
+       /*
+        * If we detect that a call_rcu is pending for this tracepoint,
+        * make sure it's executed now.
+        */
+       if (entry->rcu_pending)
+               rcu_barrier();
+       old = tracepoint_entry_add_probe(entry, probe);
+       if (IS_ERR(old)) {
+               ret = PTR_ERR(old);
+               goto end;
+       }
+       mutex_unlock(&tracepoints_mutex);
+       tracepoint_update_probes();             /* may update entry */
+       mutex_lock(&tracepoints_mutex);
+       entry = get_tracepoint(name);
+       WARN_ON(!entry);
+       tracepoint_entry_free_old(entry, old);
+end:
+       mutex_unlock(&tracepoints_mutex);
+       return ret;
+}
+EXPORT_SYMBOL_GPL(tracepoint_probe_register);
+
+/**
+ * tracepoint_probe_unregister -  Disconnect a probe from a tracepoint
+ * @name: tracepoint name
+ * @probe: probe function pointer
+ *
+ * We do not need to call a synchronize_sched to make sure the probes have
+ * finished running before doing a module unload, because the module unload
+ * itself uses stop_machine(), which insures that every preempt disabled section
+ * have finished.
+ */
+int tracepoint_probe_unregister(const char *name, void *probe)
+{
+       struct tracepoint_entry *entry;
+       void *old;
+       int ret = -ENOENT;
+
+       mutex_lock(&tracepoints_mutex);
+       entry = get_tracepoint(name);
+       if (!entry)
+               goto end;
+       if (entry->rcu_pending)
+               rcu_barrier();
+       old = tracepoint_entry_remove_probe(entry, probe);
+       mutex_unlock(&tracepoints_mutex);
+       tracepoint_update_probes();             /* may update entry */
+       mutex_lock(&tracepoints_mutex);
+       entry = get_tracepoint(name);
+       if (!entry)
+               goto end;
+       tracepoint_entry_free_old(entry, old);
+       remove_tracepoint(name);        /* Ignore busy error message */
+       ret = 0;
+end:
+       mutex_unlock(&tracepoints_mutex);
+       return ret;
+}
+EXPORT_SYMBOL_GPL(tracepoint_probe_unregister);
+
+/**
+ * tracepoint_get_iter_range - Get a next tracepoint iterator given a range.
+ * @tracepoint: current tracepoints (in), next tracepoint (out)
+ * @begin: beginning of the range
+ * @end: end of the range
+ *
+ * Returns whether a next tracepoint has been found (1) or not (0).
+ * Will return the first tracepoint in the range if the input tracepoint is
+ * NULL.
+ */
+int tracepoint_get_iter_range(struct tracepoint **tracepoint,
+       struct tracepoint *begin, struct tracepoint *end)
+{
+       if (!*tracepoint && begin != end) {
+               *tracepoint = begin;
+               return 1;
+       }
+       if (*tracepoint >= begin && *tracepoint < end)
+               return 1;
+       return 0;
+}
+EXPORT_SYMBOL_GPL(tracepoint_get_iter_range);
+
+static void tracepoint_get_iter(struct tracepoint_iter *iter)
+{
+       int found = 0;
+
+       /* Core kernel tracepoints */
+       if (!iter->module) {
+               found = tracepoint_get_iter_range(&iter->tracepoint,
+                               __start___tracepoints, __stop___tracepoints);
+               if (found)
+                       goto end;
+       }
+       /* tracepoints in modules. */
+       found = module_get_iter_tracepoints(iter);
+end:
+       if (!found)
+               tracepoint_iter_reset(iter);
+}
+
+void tracepoint_iter_start(struct tracepoint_iter *iter)
+{
+       tracepoint_get_iter(iter);
+}
+EXPORT_SYMBOL_GPL(tracepoint_iter_start);
+
+void tracepoint_iter_next(struct tracepoint_iter *iter)
+{
+       iter->tracepoint++;
+       /*
+        * iter->tracepoint may be invalid because we blindly incremented it.
+        * Make sure it is valid by marshalling on the tracepoints, getting the
+        * tracepoints from following modules if necessary.
+        */
+       tracepoint_get_iter(iter);
+}
+EXPORT_SYMBOL_GPL(tracepoint_iter_next);
+
+void tracepoint_iter_stop(struct tracepoint_iter *iter)
+{
+}
+EXPORT_SYMBOL_GPL(tracepoint_iter_stop);
+
+void tracepoint_iter_reset(struct tracepoint_iter *iter)
+{
+       iter->module = NULL;
+       iter->tracepoint = NULL;
+}
+EXPORT_SYMBOL_GPL(tracepoint_iter_reset);
index 5e47388..c11ae76 100644 (file)
@@ -35,6 +35,7 @@
 #include <linux/memcontrol.h>
 #include <linux/page-states.h>
 #include "internal.h"
+#include <trace/filemap.h>
 
 /*
  * FIXME: remove all knowledge of the buffer layer from the core VM
@@ -628,9 +629,11 @@ void wait_on_page_bit(struct page *page, int bit_nr)
 {
        DEFINE_WAIT_BIT(wait, &page->flags, bit_nr);
 
+       trace_wait_on_page_start(page, bit_nr);
        if (test_bit(bit_nr, &page->flags))
                __wait_on_bit(page_waitqueue(page), &wait, sync_page,
                                                        TASK_UNINTERRUPTIBLE);
+       trace_wait_on_page_end(page, bit_nr);
 }
 EXPORT_SYMBOL(wait_on_page_bit);
 
index 45ec9dd..7f3482b 100644 (file)
@@ -17,6 +17,7 @@
 #include <linux/mutex.h>
 #include <linux/bootmem.h>
 #include <linux/sysfs.h>
+#include <trace/hugetlb.h>
 
 #include <asm/page.h>
 #include <asm/pgtable.h>
@@ -455,6 +456,7 @@ static void update_and_free_page(struct hstate *h, struct page *page)
 {
        int i;
 
+       trace_hugetlb_page_release(page);
        h->nr_huge_pages--;
        h->nr_huge_pages_node[page_to_nid(page)]--;
        for (i = 0; i < pages_per_huge_page(h); i++) {
@@ -489,6 +491,7 @@ static void free_huge_page(struct page *page)
        int nid = page_to_nid(page);
        struct address_space *mapping;
 
+       trace_hugetlb_page_free(page);
        mapping = (struct address_space *) page_private(page);
        set_page_private(page, 0);
        BUG_ON(page_count(page));
@@ -556,8 +559,10 @@ static struct page *alloc_fresh_huge_page_node(struct hstate *h, int nid)
 {
        struct page *page;
 
-       if (h->order >= MAX_ORDER)
-               return NULL;
+       if (h->order >= MAX_ORDER) {
+               page = NULL;
+               goto end;
+       }
 
        page = alloc_pages_node(nid,
                htlb_alloc_mask|__GFP_COMP|__GFP_THISNODE|
@@ -566,11 +571,13 @@ static struct page *alloc_fresh_huge_page_node(struct hstate *h, int nid)
        if (page) {
                if (arch_prepare_hugepage(page)) {
                        __free_pages(page, huge_page_order(h));
-                       return NULL;
+                       page = NULL;
+                       goto end;
                }
                prep_new_huge_page(h, page, nid);
        }
-
+end:
+       trace_hugetlb_page_grab(page);
        return page;
 }
 
@@ -654,7 +661,8 @@ static struct page *alloc_buddy_huge_page(struct hstate *h,
        spin_lock(&hugetlb_lock);
        if (h->surplus_huge_pages >= h->nr_overcommit_huge_pages) {
                spin_unlock(&hugetlb_lock);
-               return NULL;
+               page = NULL;
+               goto end;
        } else {
                h->nr_huge_pages++;
                h->surplus_huge_pages++;
@@ -692,7 +700,8 @@ static struct page *alloc_buddy_huge_page(struct hstate *h,
                __count_vm_event(HTLB_BUDDY_PGALLOC_FAIL);
        }
        spin_unlock(&hugetlb_lock);
-
+end:
+       trace_hugetlb_buddy_pgalloc(page);
        return page;
 }
 
@@ -931,6 +940,7 @@ static struct page *alloc_huge_page(struct vm_area_struct *vma,
 
        vma_commit_reservation(h, vma, addr);
 
+       trace_hugetlb_page_alloc(page);
        return page;
 }
 
@@ -2187,11 +2197,12 @@ int hugetlb_reserve_pages(struct inode *inode,
                                        long from, long to,
                                        struct vm_area_struct *vma)
 {
-       long ret, chg;
+       int ret = 0;
+       long chg;
        struct hstate *h = hstate_inode(inode);
 
        if (vma && vma->vm_flags & VM_NORESERVE)
-               return 0;
+               goto end;
 
        /*
         * Shared mappings base their reservation on the number of pages that
@@ -2203,8 +2214,10 @@ int hugetlb_reserve_pages(struct inode *inode,
                chg = region_chg(&inode->i_mapping->private_list, from, to);
        else {
                struct resv_map *resv_map = resv_map_alloc();
-               if (!resv_map)
-                       return -ENOMEM;
+               if (!resv_map) {
+                       ret = -ENOMEM;
+                       goto end;
+               }
 
                chg = to - from;
 
@@ -2212,26 +2225,34 @@ int hugetlb_reserve_pages(struct inode *inode,
                set_vma_resv_flags(vma, HPAGE_RESV_OWNER);
        }
 
-       if (chg < 0)
-               return chg;
+       if (chg < 0) {
+               ret = chg;
+               goto end;
+       }
 
-       if (hugetlb_get_quota(inode->i_mapping, chg))
-               return -ENOSPC;
+       if (hugetlb_get_quota(inode->i_mapping, chg)) {
+               ret = -ENOSPC;
+               goto end;
+       }
        ret = hugetlb_acct_memory(h, chg);
        if (ret < 0) {
                hugetlb_put_quota(inode->i_mapping, chg);
-               return ret;
+               goto end;
        }
        if (!vma || vma->vm_flags & VM_SHARED)
                region_add(&inode->i_mapping->private_list, from, to);
-       return 0;
+end:
+       trace_hugetlb_pages_reserve(inode, from, to, ret);
+       return ret;
 }
 
 void hugetlb_unreserve_pages(struct inode *inode, long offset, long freed)
 {
        struct hstate *h = hstate_inode(inode);
-       long chg = region_truncate(&inode->i_mapping->private_list, offset);
+       long chg;
 
+       trace_hugetlb_pages_unreserve(inode, offset, freed);
+       chg = region_truncate(&inode->i_mapping->private_list, offset);
        spin_lock(&inode->i_lock);
        inode->i_blocks -= blocks_per_huge_page(h);
        spin_unlock(&inode->i_lock);
index c5d7dbb..dc18d74 100644 (file)
 
 #include <linux/swapops.h>
 #include <linux/elf.h>
+#include <trace/memory.h>
 
 #include "internal.h"
 
+#include <trace/swap.h>
+
 #ifndef CONFIG_NEED_MULTIPLE_NODES
 /* use the per-pgdat data instead for discontigmem - mbligh */
 unsigned long max_mapnr;
@@ -2390,6 +2393,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
                /* Had to read the page from swap area: Major fault */
                ret = VM_FAULT_MAJOR;
                count_vm_event(PGMAJFAULT);
+               trace_swap_in(page, entry);
        }
 
        if (mem_cgroup_charge(page, mm, GFP_KERNEL)) {
@@ -2792,30 +2796,44 @@ unlock:
 int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
                unsigned long address, int write_access)
 {
+       int res;
        pgd_t *pgd;
        pud_t *pud;
        pmd_t *pmd;
        pte_t *pte;
 
+       trace_memory_handle_fault_entry(mm, vma, address, write_access);
+
        __set_current_state(TASK_RUNNING);
 
        count_vm_event(PGFAULT);
 
-       if (unlikely(is_vm_hugetlb_page(vma)))
-               return hugetlb_fault(mm, vma, address, write_access);
+       if (unlikely(is_vm_hugetlb_page(vma))) {
+               res = hugetlb_fault(mm, vma, address, write_access);
+               goto end;
+       }
 
        pgd = pgd_offset(mm, address);
        pud = pud_alloc(mm, pgd, address);
-       if (!pud)
-               return VM_FAULT_OOM;
+       if (!pud) {
+               res = VM_FAULT_OOM;
+               goto end;
+       }
        pmd = pmd_alloc(mm, pud, address);
-       if (!pmd)
-               return VM_FAULT_OOM;
+       if (!pmd) {
+               res = VM_FAULT_OOM;
+               goto end;
+       }
        pte = pte_alloc_map(mm, pmd, address);
-       if (!pte)
-               return VM_FAULT_OOM;
+       if (!pte) {
+               res = VM_FAULT_OOM;
+               goto end;
+       }
 
-       return handle_pte_fault(mm, vma, address, pte, pmd, write_access);
+       res = handle_pte_fault(mm, vma, address, pte, pmd, write_access);
+end:
+       trace_memory_handle_fault_exit(res);
+       return res;
 }
 EXPORT_SYMBOL_GPL(handle_mm_fault); /* For MoL */
 
index 755bc69..aa6fbcb 100644 (file)
@@ -52,6 +52,8 @@
 #include <asm/div64.h>
 #include "internal.h"
 
+#include <trace/page_alloc.h>
+
 /*
  * Array of node states.
  */
@@ -512,6 +514,8 @@ static void __free_pages_ok(struct page *page, unsigned int order)
        int i;
        int reserved = 0;
 
+       trace_page_free(page, order);
+
 #ifdef CONFIG_XEN
        if (PageForeign(page)) {
                PageForeignDestructor(page);
@@ -981,6 +985,8 @@ static void free_hot_cold_page(struct page *page, int cold)
        struct per_cpu_pages *pcp;
        unsigned long flags;
 
+       trace_page_free(page, 0);
+
        if (unlikely(PageDiscarded(page))) {
                if (page_free_discarded(page))
                        return;
@@ -1664,6 +1670,7 @@ nopage:
                show_mem();
        }
 got_pg:
+       trace_page_alloc(page, order);
        return page;
 }
 EXPORT_SYMBOL(__alloc_pages_internal);
index 065c448..5a252c1 100644 (file)
@@ -17,6 +17,7 @@
 #include <linux/bio.h>
 #include <linux/swapops.h>
 #include <linux/writeback.h>
+#include <trace/swap.h>
 #include <asm/pgtable.h>
 
 static struct bio *get_swap_bio(gfp_t gfp_flags, pgoff_t index,
@@ -114,6 +115,7 @@ int swap_writepage(struct page *page, struct writeback_control *wbc)
                rw |= (1 << BIO_RW_SYNC);
        count_vm_event(PSWPOUT);
        set_page_writeback(page);
+       trace_swap_out(page);
        unlock_page(page);
        submit_bio(rw, bio);
 out:
index f4aee37..db4bde6 100644 (file)
@@ -37,6 +37,7 @@
 #include <asm/pgtable.h>
 #include <asm/tlbflush.h>
 #include <linux/swapops.h>
+#include <trace/swap.h>
 
 static DEFINE_SPINLOCK(swap_lock);
 static unsigned int nr_swapfiles;
@@ -1343,6 +1344,7 @@ asmlinkage long sys_swapoff(const char __user * specialfile)
        swap_map = p->swap_map;
        p->swap_map = NULL;
        p->flags = 0;
+       trace_swap_file_close(swap_file);
        spin_unlock(&swap_lock);
        mutex_unlock(&swapon_mutex);
        vfree(swap_map);
@@ -1722,6 +1724,7 @@ asmlinkage long sys_swapon(const char __user * specialfile, int swap_flags)
        } else {
                swap_info[prev].next = p - swap_info;
        }
+       trace_swap_file_open(swap_file, name);
        spin_unlock(&swap_lock);
        mutex_unlock(&swapon_mutex);
        error = 0;
index 65bf359..16acd9b 100644 (file)
 #include <linux/in.h>
 #include <linux/jhash.h>
 #include <linux/random.h>
+#include <trace/net.h>
 
 #include "net-sysfs.h"
 
@@ -1856,6 +1857,7 @@ int dev_queue_xmit(struct sk_buff *skb)
        }
 
 gso:
+       trace_net_dev_xmit(skb);
        /* Disable soft irqs for various locks below. Also
         * stops preemption for RCU.
         */
@@ -2274,6 +2276,7 @@ int netif_receive_skb(struct sk_buff *skb)
 
        __get_cpu_var(netdev_rx_stat).total++;
 
+       trace_net_dev_receive(skb);
        skb_reset_network_header(skb);
        skb_reset_transport_header(skb);
        skb->mac_len = skb->network_header - skb->mac_header;
index b12dae2..49d30b6 100644 (file)
@@ -61,6 +61,7 @@
 #include <net/ip_fib.h>
 #include <net/rtnetlink.h>
 #include <net/net_namespace.h>
+#include <trace/ipv4.h>
 
 static struct ipv4_devconf ipv4_devconf = {
        .data = {
@@ -257,6 +258,7 @@ static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
                struct in_ifaddr **ifap1 = &ifa1->ifa_next;
 
                while ((ifa = *ifap1) != NULL) {
+                       trace_ipv4_addr_del(ifa);
                        if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
                            ifa1->ifa_scope <= ifa->ifa_scope)
                                last_prim = ifa;
@@ -363,6 +365,7 @@ static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
                        }
                        ifa->ifa_flags |= IFA_F_SECONDARY;
                }
+               trace_ipv4_addr_add(ifa);
        }
 
        if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
index c246a83..a3cf038 100644 (file)
@@ -85,6 +85,7 @@
 
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
+#include <trace/ipv6.h>
 
 /* Set to 3 to get tracing... */
 #define ACONF_DEBUG 2
@@ -653,6 +654,8 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen,
        /* For caller */
        in6_ifa_hold(ifa);
 
+       trace_ipv6_addr_add(ifa);
+
        /* Add to big hash table */
        hash = ipv6_addr_hash(addr);
 
@@ -2167,6 +2170,7 @@ static int inet6_addr_del(struct net *net, int ifindex, struct in6_addr *pfx,
                        in6_ifa_hold(ifp);
                        read_unlock_bh(&idev->lock);
 
+                       trace_ipv6_addr_del(ifp);
                        ipv6_del_addr(ifp);
 
                        /* If the last address is deleted administratively,
index 8ef8ba8..e96f8f0 100644 (file)
@@ -96,6 +96,7 @@
 
 #include <net/sock.h>
 #include <linux/netfilter.h>
+#include <trace/socket.h>
 
 static int sock_no_open(struct inode *irrelevant, struct file *dontcare);
 static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
@@ -575,6 +576,7 @@ int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
        ret = __sock_sendmsg(&iocb, sock, msg, size);
        if (-EIOCBQUEUED == ret)
                ret = wait_on_sync_kiocb(&iocb);
+       trace_socket_sendmsg(sock, msg, size, ret);
        return ret;
 }
 
@@ -654,10 +656,12 @@ int sock_recvmsg(struct socket *sock, struct msghdr *msg,
        int ret;
 
        init_sync_kiocb(&iocb, NULL);
+
        iocb.private = &siocb;
        ret = __sock_recvmsg(&iocb, sock, msg, size, flags);
        if (-EIOCBQUEUED == ret)
                ret = wait_on_sync_kiocb(&iocb);
+       trace_socket_recvmsg(sock, msg, size, flags, ret);
        return ret;
 }
 
@@ -1244,6 +1248,7 @@ asmlinkage long sys_socket(int family, int type, int protocol)
        if (retval < 0)
                goto out_release;
 
+       trace_socket_create(sock, retval);
 out:
        /* It may be already another descriptor 8) Not kernel problem. */
        return retval;
@@ -2128,6 +2133,8 @@ asmlinkage long sys_socketcall(int call, unsigned long __user *args)
        a0 = a[0];
        a1 = a[1];
 
+       trace_socket_call(call, a0);
+
        switch (call) {
        case SYS_SOCKET:
                err = sys_socket(a0, a1, a[2]);
index e1fb471..4b02f5a 100644 (file)
@@ -13,6 +13,12 @@ config SAMPLE_MARKERS
        help
          This build markers example modules.
 
+config SAMPLE_TRACEPOINTS
+       tristate "Build tracepoints examples -- loadable modules only"
+       depends on TRACEPOINTS && m
+       help
+         This build tracepoints example modules.
+
 config SAMPLE_KOBJECT
        tristate "Build kobject examples"
        help
index 2e02575..10eaca8 100644 (file)
@@ -1,3 +1,3 @@
 # Makefile for Linux samples code
 
-obj-$(CONFIG_SAMPLES)  += markers/ kobject/ kprobes/
+obj-$(CONFIG_SAMPLES)  += markers/ kobject/ kprobes/ tracepoints/
diff --git a/samples/tracepoints/Makefile b/samples/tracepoints/Makefile
new file mode 100644 (file)
index 0000000..36479ad
--- /dev/null
@@ -0,0 +1,6 @@
+# builds the tracepoint example kernel modules;
+# then to use one (as root):  insmod <module_name.ko>
+
+obj-$(CONFIG_SAMPLE_TRACEPOINTS) += tracepoint-sample.o
+obj-$(CONFIG_SAMPLE_TRACEPOINTS) += tracepoint-probe-sample.o
+obj-$(CONFIG_SAMPLE_TRACEPOINTS) += tracepoint-probe-sample2.o
diff --git a/samples/tracepoints/tp-samples-trace.h b/samples/tracepoints/tp-samples-trace.h
new file mode 100644 (file)
index 0000000..0216b55
--- /dev/null
@@ -0,0 +1,13 @@
+#ifndef _TP_SAMPLES_TRACE_H
+#define _TP_SAMPLES_TRACE_H
+
+#include <linux/proc_fs.h>     /* for struct inode and struct file */
+#include <linux/tracepoint.h>
+
+DEFINE_TRACE(subsys_event,
+       TPPROTO(struct inode *inode, struct file *file),
+       TPARGS(inode, file));
+DEFINE_TRACE(subsys_eventb,
+       TPPROTO(void),
+       TPARGS());
+#endif
diff --git a/samples/tracepoints/tracepoint-probe-sample.c b/samples/tracepoints/tracepoint-probe-sample.c
new file mode 100644 (file)
index 0000000..e3a9648
--- /dev/null
@@ -0,0 +1,56 @@
+/*
+ * tracepoint-probe-sample.c
+ *
+ * sample tracepoint probes.
+ */
+
+#include <linux/module.h>
+#include <linux/file.h>
+#include <linux/dcache.h>
+#include "tp-samples-trace.h"
+
+/*
+ * Here the caller only guarantees locking for struct file and struct inode.
+ * Locking must therefore be done in the probe to use the dentry.
+ */
+static void probe_subsys_event(struct inode *inode, struct file *file)
+{
+       path_get(&file->f_path);
+       dget(file->f_path.dentry);
+       printk(KERN_INFO "Event is encountered with filename %s\n",
+               file->f_path.dentry->d_name.name);
+       dput(file->f_path.dentry);
+       path_put(&file->f_path);
+}
+
+static void probe_subsys_eventb(void)
+{
+       printk(KERN_INFO "Event B is encountered\n");
+}
+
+int __init tp_sample_trace_init(void)
+{
+       int ret;
+
+       ret = register_trace_subsys_event(probe_subsys_event);
+       WARN_ON(ret);
+       ret = register_trace_subsys_eventb(probe_subsys_eventb);
+       WARN_ON(ret);
+
+       return 0;
+}
+
+module_init(tp_sample_trace_init);
+
+void __exit tp_sample_trace_exit(void)
+{
+       unregister_trace_subsys_eventb(probe_subsys_eventb);
+       unregister_trace_subsys_event(probe_subsys_event);
+       tracepoint_synchronize_unregister();
+}
+
+module_exit(tp_sample_trace_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Mathieu Desnoyers");
+MODULE_DESCRIPTION("Tracepoint Probes Samples");
diff --git a/samples/tracepoints/tracepoint-probe-sample2.c b/samples/tracepoints/tracepoint-probe-sample2.c
new file mode 100644 (file)
index 0000000..685a5ac
--- /dev/null
@@ -0,0 +1,43 @@
+/*
+ * tracepoint-probe-sample2.c
+ *
+ * 2nd sample tracepoint probes.
+ */
+
+#include <linux/module.h>
+#include <linux/fs.h>
+#include "tp-samples-trace.h"
+
+/*
+ * Here the caller only guarantees locking for struct file and struct inode.
+ * Locking must therefore be done in the probe to use the dentry.
+ */
+static void probe_subsys_event(struct inode *inode, struct file *file)
+{
+       printk(KERN_INFO "Event is encountered with inode number %lu\n",
+               inode->i_ino);
+}
+
+int __init tp_sample_trace_init(void)
+{
+       int ret;
+
+       ret = register_trace_subsys_event(probe_subsys_event);
+       WARN_ON(ret);
+
+       return 0;
+}
+
+module_init(tp_sample_trace_init);
+
+void __exit tp_sample_trace_exit(void)
+{
+       unregister_trace_subsys_event(probe_subsys_event);
+       tracepoint_synchronize_unregister();
+}
+
+module_exit(tp_sample_trace_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Mathieu Desnoyers");
+MODULE_DESCRIPTION("Tracepoint Probes Samples");
diff --git a/samples/tracepoints/tracepoint-sample.c b/samples/tracepoints/tracepoint-sample.c
new file mode 100644 (file)
index 0000000..4ae4b7f
--- /dev/null
@@ -0,0 +1,53 @@
+/* tracepoint-sample.c
+ *
+ * Executes a tracepoint when /proc/tracepoint-example is opened.
+ *
+ * (C) Copyright 2007 Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
+ *
+ * This file is released under the GPLv2.
+ * See the file COPYING for more details.
+ */
+
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/proc_fs.h>
+#include "tp-samples-trace.h"
+
+struct proc_dir_entry *pentry_example;
+
+static int my_open(struct inode *inode, struct file *file)
+{
+       int i;
+
+       trace_subsys_event(inode, file);
+       for (i = 0; i < 10; i++)
+               trace_subsys_eventb();
+       return -EPERM;
+}
+
+static struct file_operations mark_ops = {
+       .open = my_open,
+};
+
+static int example_init(void)
+{
+       printk(KERN_ALERT "example init\n");
+       pentry_example = proc_create("tracepoint-example", 0444, NULL,
+               &mark_ops);
+       if (!pentry_example)
+               return -EPERM;
+       return 0;
+}
+
+static void example_exit(void)
+{
+       printk(KERN_ALERT "example exit\n");
+       remove_proc_entry("tracepoint-example", NULL);
+}
+
+module_init(example_init)
+module_exit(example_exit)
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Mathieu Desnoyers");
+MODULE_DESCRIPTION("Tracepoint example");