--- /dev/null
+ Using the Linux Kernel Tracepoints
+
+ Mathieu Desnoyers
+
+
+This document introduces Linux Kernel Tracepoints and their use. It provides
+examples of how to insert tracepoints in the kernel and connect probe functions
+to them and provides some examples of probe functions.
+
+
+* Purpose of tracepoints
+
+A tracepoint placed in code provides a hook to call a function (probe) that you
+can provide at runtime. A tracepoint can be "on" (a probe is connected to it) or
+"off" (no probe is attached). When a tracepoint is "off" it has no effect,
+except for adding a tiny time penalty (checking a condition for a branch) and
+space penalty (adding a few bytes for the function call at the end of the
+instrumented function and adds a data structure in a separate section). When a
+tracepoint is "on", the function you provide is called each time the tracepoint
+is executed, in the execution context of the caller. When the function provided
+ends its execution, it returns to the caller (continuing from the tracepoint
+site).
+
+You can put tracepoints at important locations in the code. They are
+lightweight hooks that can pass an arbitrary number of parameters,
+which prototypes are described in a tracepoint declaration placed in a header
+file.
+
+They can be used for tracing and performance accounting.
+
+
+* Usage
+
+Two elements are required for tracepoints :
+
+- A tracepoint definition, placed in a header file.
+- The tracepoint statement, in C code.
+
+In order to use tracepoints, you should include linux/tracepoint.h.
+
+In include/trace/subsys.h :
+
+#include <linux/tracepoint.h>
+
+DEFINE_TRACE(subsys_eventname,
+ TPPTOTO(int firstarg, struct task_struct *p),
+ TPARGS(firstarg, p));
+
+In subsys/file.c (where the tracing statement must be added) :
+
+#include <trace/subsys.h>
+
+void somefct(void)
+{
+ ...
+ trace_subsys_eventname(arg, task);
+ ...
+}
+
+Where :
+- subsys_eventname is an identifier unique to your event
+ - subsys is the name of your subsystem.
+ - eventname is the name of the event to trace.
+- TPPTOTO(int firstarg, struct task_struct *p) is the prototype of the function
+ called by this tracepoint.
+- TPARGS(firstarg, p) are the parameters names, same as found in the prototype.
+
+Connecting a function (probe) to a tracepoint is done by providing a probe
+(function to call) for the specific tracepoint through
+register_trace_subsys_eventname(). Removing a probe is done through
+unregister_trace_subsys_eventname(); it will remove the probe.
+marker_synchronize_unregister() must be called before the end of the module exit
+function to make sure there is no caller left using the probe. This, and the
+fact that preemption is disabled around the probe call, make sure that probe
+removal and module unload are safe. See the "Probe example" section below for a
+sample probe module.
+
+The tracepoint mechanism supports inserting multiple instances of the same
+tracepoint, but a single definition must be made of a given tracepoint name over
+all the kernel to make sure no type conflict will occur. Name mangling of the
+tracepoints is done using the prototypes to make sure typing is correct.
+Verification of probe type correctness is done at the registration site by the
+compiler. Tracepoints can be put in inline functions, inlined static functions,
+and unrolled loops as well as regular functions.
+
+The naming scheme "subsys_event" is suggested here as a convention intended
+to limit collisions. Tracepoint names are global to the kernel: they are
+considered as being the same whether they are in the core kernel image or in
+modules.
+
+
+* Probe / tracepoint example
+
+See the example provided in samples/tracepoints/src
+
+Compile them with your kernel.
+
+Run, as root :
+modprobe tracepoint-example (insmod order is not important)
+modprobe tracepoint-probe-example
+cat /proc/tracepoint-example (returns an expected error)
+rmmod tracepoint-example tracepoint-probe-example
+dmesg
#include <linux/bitops.h>
#include <linux/mpage.h>
#include <linux/bit_spinlock.h>
+#include <trace/fs.h>
static int fsync_buffers_list(spinlock_t *lock, struct list_head *list);
*/
void __wait_on_buffer(struct buffer_head * bh)
{
+ trace_fs_buffer_wait_start(bh);
wait_on_bit(&bh->b_state, BH_Lock, sync_buffer, TASK_UNINTERRUPTIBLE);
+ trace_fs_buffer_wait_end(bh);
}
static void
#include <linux/poll.h>
#include <linux/mm.h>
#include <linux/eventpoll.h>
+#include <trace/fs.h>
#include <asm/uaccess.h>
#include <asm/mmu_context.h>
retval = search_binary_handler(bprm, regs);
if (retval >= 0) {
+ trace_fs_exec(filename);
/* execve success */
security_bprm_free(bprm);
acct_update_integrals(current);
#include <linux/cn_proc.h>
#include <linux/audit.h>
#include <linux/tracehook.h>
+#include <trace/fs.h>
#include <asm/uaccess.h>
#include <asm/mmu_context.h>
current->flags &= ~PF_KTHREAD;
retval = search_binary_handler(bprm,regs);
if (retval >= 0) {
+ trace_fs_exec(filename);
/* execve success */
security_bprm_free(bprm);
acct_update_integrals(current);
#include <linux/security.h>
#include <linux/module.h>
#include <linux/uaccess.h>
+#include <trace/fs.h>
#include <asm/ioctls.h>
if (!filp)
goto out;
+ trace_fs_ioctl(fd, cmd, arg);
+
error = security_file_ioctl(filp, cmd, arg);
if (error)
goto out_fput;
#include <linux/rcupdate.h>
#include <linux/audit.h>
#include <linux/falloc.h>
+#include <trace/fs.h>
int vfs_statfs(struct dentry *dentry, struct kstatfs *buf)
{
fsnotify_open(f->f_path.dentry);
fd_install(fd, f);
}
+ trace_fs_open(fd, tmp);
}
putname(tmp);
}
filp = fdt->fd[fd];
if (!filp)
goto out_unlock;
+ trace_fs_close(fd);
rcu_assign_pointer(fdt->fd[fd], NULL);
FD_CLR(fd, fdt->close_on_exec);
__put_unused_fd(files, fd);
#include <linux/syscalls.h>
#include <linux/pagemap.h>
#include <linux/splice.h>
+#include <trace/fs.h>
#include "read_write.h"
#include <asm/uaccess.h>
if (res != (loff_t)retval)
retval = -EOVERFLOW; /* LFS: should only happen on 32 bit platforms */
}
+
+ trace_fs_lseek(fd, offset, origin);
+
fput_light(file, fput_needed);
bad:
return retval;
offset = vfs_llseek(file, ((loff_t) offset_high << 32) | offset_low,
origin);
+ trace_fs_llseek(fd, offset, origin);
+
retval = (int)offset;
if (offset >= 0) {
retval = -EFAULT;
if (file) {
loff_t pos = file_pos_read(file);
ret = vfs_read(file, buf, count, &pos);
+ trace_fs_read(fd, buf, count, ret);
file_pos_write(file, pos);
fput_light(file, fput_needed);
}
if (file) {
loff_t pos = file_pos_read(file);
ret = vfs_write(file, buf, count, &pos);
+ trace_fs_write(fd, buf, count, ret);
file_pos_write(file, pos);
fput_light(file, fput_needed);
}
file = fget_light(fd, &fput_needed);
if (file) {
ret = -ESPIPE;
- if (file->f_mode & FMODE_PREAD)
+ if (file->f_mode & FMODE_PREAD) {
ret = vfs_read(file, buf, count, &pos);
+ trace_fs_pread64(fd, buf, count, pos, ret);
+ }
+
fput_light(file, fput_needed);
}
file = fget_light(fd, &fput_needed);
if (file) {
ret = -ESPIPE;
- if (file->f_mode & FMODE_PWRITE)
+ if (file->f_mode & FMODE_PWRITE) {
ret = vfs_write(file, buf, count, &pos);
+ trace_fs_pwrite64(fd, buf, count, pos, ret);
+ }
fput_light(file, fput_needed);
}
if (file) {
loff_t pos = file_pos_read(file);
ret = vfs_readv(file, vec, vlen, &pos);
+ trace_fs_readv(fd, vec, vlen, ret);
file_pos_write(file, pos);
fput_light(file, fput_needed);
}
if (file) {
loff_t pos = file_pos_read(file);
ret = vfs_writev(file, vec, vlen, &pos);
+ trace_fs_writev(fd, vec, vlen, ret);
file_pos_write(file, pos);
fput_light(file, fput_needed);
}
#include <linux/fdtable.h>
#include <linux/fs.h>
#include <linux/rcupdate.h>
+#include <trace/fs.h>
#include <asm/uaccess.h>
file = fget_light(i, &fput_needed);
if (file) {
f_op = file->f_op;
+ trace_fs_select(i, *timeout);
mask = DEFAULT_POLLMASK;
if (f_op && f_op->poll)
mask = (*f_op->poll)(file, retval ? NULL : wait);
file = fget_light(fd, &fput_needed);
mask = POLLNVAL;
if (file != NULL) {
+ trace_fs_poll(fd);
mask = DEFAULT_POLLMASK;
if (file->f_op && file->f_op->poll)
mask = file->f_op->poll(file, pwait);
. = ALIGN(8); \
VMLINUX_SYMBOL(__start___markers) = .; \
*(__markers) \
- VMLINUX_SYMBOL(__stop___markers) = .;
+ VMLINUX_SYMBOL(__stop___markers) = .; \
+ VMLINUX_SYMBOL(__start___tracepoints) = .; \
+ *(__tracepoints) \
+ VMLINUX_SYMBOL(__stop___tracepoints) = .;
#define RO_DATA(align) \
. = ALIGN((align)); \
*(.rodata) *(.rodata.*) \
*(__vermagic) /* Kernel version magic */ \
*(__markers_strings) /* Markers: strings */ \
+ *(__tracepoints_strings)/* Tracepoints: strings */ \
} \
\
.rodata1 : AT(ADDR(.rodata1) - LOAD_OFFSET) { \
#include <linux/kobject.h>
#include <linux/moduleparam.h>
#include <linux/marker.h>
+#include <linux/tracepoint.h>
#include <asm/local.h>
#include <asm/module.h>
unsigned int num_markers;
#endif
+#ifdef CONFIG_TRACEPOINTS
+ struct tracepoint *tracepoints;
+ unsigned int num_tracepoints;
+#endif
+
#ifdef CONFIG_MODULE_UNLOAD
/* What modules depend on me? */
struct list_head modules_which_use_me;
extern void module_update_markers(void);
+extern void module_update_tracepoints(void);
+extern int module_get_iter_tracepoints(struct tracepoint_iter *iter);
+
#else /* !CONFIG_MODULES... */
#define EXPORT_SYMBOL(sym)
#define EXPORT_SYMBOL_GPL(sym)
{
}
+static inline void module_update_tracepoints(void)
+{
+}
+
+static inline int module_get_iter_tracepoints(struct tracepoint_iter *iter)
+{
+ return 0;
+}
+
#endif /* CONFIG_MODULES */
struct device_driver;
#define rcu_read_unlock_bh() __rcu_read_unlock_bh()
/**
+ * rcu_read_lock_sched - mark the beginning of a RCU-classic critical section
+ *
+ * Should be used with either
+ * - synchronize_sched()
+ * or
+ * - call_rcu_sched() and rcu_barrier_sched()
+ * on the write-side to insure proper synchronization.
+ */
+#define rcu_read_lock_sched() preempt_disable()
+
+/*
+ * rcu_read_unlock_sched - marks the end of a RCU-classic critical section
+ *
+ * See rcu_read_lock_sched for more information.
+ */
+#define rcu_read_unlock_sched() preempt_enable()
+
+
+
+/**
* rcu_dereference - fetch an RCU-protected pointer in an
* RCU read-side critical section. This pointer may later
* be safely dereferenced.
--- /dev/null
+#ifndef _LINUX_TRACEPOINT_H
+#define _LINUX_TRACEPOINT_H
+
+/*
+ * Kernel Tracepoint API.
+ *
+ * See Documentation/tracepoint.txt.
+ *
+ * (C) Copyright 2008 Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
+ *
+ * Heavily inspired from the Linux Kernel Markers.
+ *
+ * This file is released under the GPLv2.
+ * See the file COPYING for more details.
+ */
+
+#include <linux/types.h>
+#include <linux/rcupdate.h>
+
+struct module;
+struct tracepoint;
+
+struct tracepoint {
+ const char *name; /* Tracepoint name */
+ int state; /* State. */
+ void **funcs;
+} __attribute__((aligned(8)));
+
+
+#define TPPROTO(args...) args
+#define TPARGS(args...) args
+
+#ifdef CONFIG_TRACEPOINTS
+
+/*
+ * it_func[0] is never NULL because there is at least one element in the array
+ * when the array itself is non NULL.
+ */
+#define __DO_TRACE(tp, proto, args) \
+ do { \
+ void **it_func; \
+ \
+ rcu_read_lock_sched(); \
+ it_func = rcu_dereference((tp)->funcs); \
+ if (it_func) { \
+ do { \
+ ((void(*)(proto))(*it_func))(args); \
+ } while (*(++it_func)); \
+ } \
+ rcu_read_unlock_sched(); \
+ } while (0)
+
+/*
+ * Make sure the alignment of the structure in the __tracepoints section will
+ * not add unwanted padding between the beginning of the section and the
+ * structure. Force alignment to the same alignment as the section start.
+ */
+#define DEFINE_TRACE(name, proto, args) \
+ static inline void trace_##name(proto) \
+ { \
+ static const char __tpstrtab_##name[] \
+ __attribute__((section("__tracepoints_strings"))) \
+ = #name ":" #proto; \
+ static struct tracepoint __tracepoint_##name \
+ __attribute__((section("__tracepoints"), aligned(8))) = \
+ { __tpstrtab_##name, 0, NULL }; \
+ if (unlikely(__tracepoint_##name.state)) \
+ __DO_TRACE(&__tracepoint_##name, \
+ TPPROTO(proto), TPARGS(args)); \
+ } \
+ static inline int register_trace_##name(void (*probe)(proto)) \
+ { \
+ return tracepoint_probe_register(#name ":" #proto, \
+ (void *)probe); \
+ } \
+ static inline void unregister_trace_##name(void (*probe)(proto))\
+ { \
+ tracepoint_probe_unregister(#name ":" #proto, \
+ (void *)probe); \
+ }
+
+extern void tracepoint_update_probe_range(struct tracepoint *begin,
+ struct tracepoint *end);
+
+#else /* !CONFIG_TRACEPOINTS */
+#define DEFINE_TRACE(name, proto, args) \
+ static inline void _do_trace_##name(struct tracepoint *tp, proto) \
+ { } \
+ static inline void trace_##name(proto) \
+ { } \
+ static inline int register_trace_##name(void (*probe)(proto)) \
+ { \
+ return -ENOSYS; \
+ } \
+ static inline void unregister_trace_##name(void (*probe)(proto))\
+ { }
+
+static inline void tracepoint_update_probe_range(struct tracepoint *begin,
+ struct tracepoint *end)
+{ }
+#endif /* CONFIG_TRACEPOINTS */
+
+/*
+ * Connect a probe to a tracepoint.
+ * Internal API, should not be used directly.
+ */
+extern int tracepoint_probe_register(const char *name, void *probe);
+
+/*
+ * Disconnect a probe from a tracepoint.
+ * Internal API, should not be used directly.
+ */
+extern int tracepoint_probe_unregister(const char *name, void *probe);
+
+struct tracepoint_iter {
+ struct module *module;
+ struct tracepoint *tracepoint;
+};
+
+extern void tracepoint_iter_start(struct tracepoint_iter *iter);
+extern void tracepoint_iter_next(struct tracepoint_iter *iter);
+extern void tracepoint_iter_stop(struct tracepoint_iter *iter);
+extern void tracepoint_iter_reset(struct tracepoint_iter *iter);
+extern int tracepoint_get_iter_range(struct tracepoint **tracepoint,
+ struct tracepoint *begin, struct tracepoint *end);
+
+/*
+ * tracepoint_synchronize_unregister must be called between the last tracepoint
+ * probe unregistration and the end of module exit to make sure there is no
+ * caller executing a probe when it is freed.
+ */
+#define tracepoint_synchronize_unregister() synchronize_sched()
+
+#endif
--- /dev/null
+#ifndef _TRACE_FILEMAP_H
+#define _TRACE_FILEMAP_H
+
+#include <linux/tracepoint.h>
+
+DEFINE_TRACE(wait_on_page_start,
+ TPPROTO(struct page *page, int bit_nr),
+ TPARGS(page, bit_nr));
+DEFINE_TRACE(wait_on_page_end,
+ TPPROTO(struct page *page, int bit_nr),
+ TPARGS(page, bit_nr));
+
+#endif
--- /dev/null
+#ifndef _TRACE_FS_H
+#define _TRACE_FS_H
+
+#include <linux/buffer_head.h>
+#include <linux/tracepoint.h>
+
+DEFINE_TRACE(fs_buffer_wait_start,
+ TPPROTO(struct buffer_head *bh),
+ TPARGS(bh));
+DEFINE_TRACE(fs_buffer_wait_end,
+ TPPROTO(struct buffer_head *bh),
+ TPARGS(bh));
+DEFINE_TRACE(fs_exec,
+ TPPROTO(char *filename),
+ TPARGS(filename));
+DEFINE_TRACE(fs_ioctl,
+ TPPROTO(unsigned int fd, unsigned int cmd, unsigned long arg),
+ TPARGS(fd, cmd, arg));
+DEFINE_TRACE(fs_open,
+ TPPROTO(int fd, char *filename),
+ TPARGS(fd, filename));
+DEFINE_TRACE(fs_close,
+ TPPROTO(unsigned int fd),
+ TPARGS(fd));
+DEFINE_TRACE(fs_lseek,
+ TPPROTO(unsigned int fd, long offset, unsigned int origin),
+ TPARGS(fd, offset, origin));
+DEFINE_TRACE(fs_llseek,
+ TPPROTO(unsigned int fd, loff_t offset, unsigned int origin),
+ TPARGS(fd, offset, origin));
+
+/*
+ * Probes must be aware that __user * may be modified by concurrent userspace
+ * or kernel threads.
+ */
+DEFINE_TRACE(fs_read,
+ TPPROTO(unsigned int fd, char __user *buf, size_t count, ssize_t ret),
+ TPARGS(fd, buf, count, ret));
+DEFINE_TRACE(fs_write,
+ TPPROTO(unsigned int fd, const char __user *buf, size_t count,
+ ssize_t ret),
+ TPARGS(fd, buf, count, ret));
+DEFINE_TRACE(fs_pread64,
+ TPPROTO(unsigned int fd, char __user *buf, size_t count, loff_t pos,
+ ssize_t ret),
+ TPARGS(fd, buf, count, pos, ret));
+DEFINE_TRACE(fs_pwrite64,
+ TPPROTO(unsigned int fd, const char __user *buf, size_t count,
+ loff_t pos, ssize_t ret),
+ TPARGS(fd, buf, count, pos, ret));
+DEFINE_TRACE(fs_readv,
+ TPPROTO(unsigned long fd, const struct iovec __user *vec,
+ unsigned long vlen, ssize_t ret),
+ TPARGS(fd, vec, vlen, ret));
+DEFINE_TRACE(fs_writev,
+ TPPROTO(unsigned long fd, const struct iovec __user *vec,
+ unsigned long vlen, ssize_t ret),
+ TPARGS(fd, vec, vlen, ret));
+DEFINE_TRACE(fs_select,
+ TPPROTO(int fd, s64 timeout),
+ TPARGS(fd, timeout));
+DEFINE_TRACE(fs_poll,
+ TPPROTO(int fd),
+ TPARGS(fd));
+#endif
--- /dev/null
+#ifndef _TRACE_HUGETLB_H
+#define _TRACE_HUGETLB_H
+
+#include <linux/tracepoint.h>
+
+DEFINE_TRACE(hugetlb_page_release,
+ TPPROTO(struct page *page),
+ TPARGS(page));
+DEFINE_TRACE(hugetlb_page_grab,
+ TPPROTO(struct page *page),
+ TPARGS(page));
+DEFINE_TRACE(hugetlb_buddy_pgalloc,
+ TPPROTO(struct page *page),
+ TPARGS(page));
+DEFINE_TRACE(hugetlb_page_alloc,
+ TPPROTO(struct page *page),
+ TPARGS(page));
+DEFINE_TRACE(hugetlb_page_free,
+ TPPROTO(struct page *page),
+ TPARGS(page));
+DEFINE_TRACE(hugetlb_pages_reserve,
+ TPPROTO(struct inode *inode, long from, long to, int ret),
+ TPARGS(inode, from, to, ret));
+DEFINE_TRACE(hugetlb_pages_unreserve,
+ TPPROTO(struct inode *inode, long offset, long freed),
+ TPARGS(inode, offset, freed));
+
+#endif
--- /dev/null
+#ifndef _TRACE_IPC_H
+#define _TRACE_IPC_H
+
+#include <linux/tracepoint.h>
+
+DEFINE_TRACE(ipc_msg_create,
+ TPPROTO(long id, int flags),
+ TPARGS(id, flags));
+DEFINE_TRACE(ipc_sem_create,
+ TPPROTO(long id, int flags),
+ TPARGS(id, flags));
+DEFINE_TRACE(ipc_shm_create,
+ TPPROTO(long id, int flags),
+ TPARGS(id, flags));
+#endif
--- /dev/null
+#ifndef _TRACE_IPV4_H
+#define _TRACE_IPV4_H
+
+#include <linux/inetdevice.h>
+#include <linux/tracepoint.h>
+
+DEFINE_TRACE(ipv4_addr_add,
+ TPPROTO(struct in_ifaddr *ifa),
+ TPARGS(ifa));
+DEFINE_TRACE(ipv4_addr_del,
+ TPPROTO(struct in_ifaddr *ifa),
+ TPARGS(ifa));
+
+#endif
--- /dev/null
+#ifndef _TRACE_IPV6_H
+#define _TRACE_IPV6_H
+
+#include <net/if_inet6.h>
+#include <linux/tracepoint.h>
+
+DEFINE_TRACE(ipv6_addr_add,
+ TPPROTO(struct inet6_ifaddr *ifa),
+ TPARGS(ifa));
+DEFINE_TRACE(ipv6_addr_del,
+ TPPROTO(struct inet6_ifaddr *ifa),
+ TPARGS(ifa));
+
+#endif
--- /dev/null
+#ifndef _TRACE_IRQ_H
+#define _TRACE_IRQ_H
+
+#include <linux/kdebug.h>
+#include <linux/interrupt.h>
+#include <linux/tracepoint.h>
+
+DEFINE_TRACE(irq_entry,
+ TPPROTO(unsigned int id, struct pt_regs *regs),
+ TPARGS(id, regs));
+DEFINE_TRACE(irq_exit,
+ TPPROTO(irqreturn_t retval),
+ TPARGS(retval));
+DEFINE_TRACE(irq_softirq_entry,
+ TPPROTO(struct softirq_action *h, struct softirq_action *softirq_vec),
+ TPARGS(h, softirq_vec));
+DEFINE_TRACE(irq_softirq_exit,
+ TPPROTO(struct softirq_action *h, struct softirq_action *softirq_vec),
+ TPARGS(h, softirq_vec));
+DEFINE_TRACE(irq_softirq_raise,
+ TPPROTO(unsigned int nr),
+ TPARGS(nr));
+DEFINE_TRACE(irq_tasklet_low_entry,
+ TPPROTO(struct tasklet_struct *t),
+ TPARGS(t));
+DEFINE_TRACE(irq_tasklet_low_exit,
+ TPPROTO(struct tasklet_struct *t),
+ TPARGS(t));
+DEFINE_TRACE(irq_tasklet_high_entry,
+ TPPROTO(struct tasklet_struct *t),
+ TPARGS(t));
+DEFINE_TRACE(irq_tasklet_high_exit,
+ TPPROTO(struct tasklet_struct *t),
+ TPARGS(t));
+
+#endif
--- /dev/null
+#ifndef _TRACE_KERNEL_H
+#define _TRACE_KERNEL_H
+
+#include <linux/tracepoint.h>
+
+DEFINE_TRACE(kernel_module_free,
+ TPPROTO(struct module *mod),
+ TPARGS(mod));
+DEFINE_TRACE(kernel_module_load,
+ TPPROTO(struct module *mod),
+ TPARGS(mod));
+
+#endif
--- /dev/null
+#ifndef _TRACE_MEMORY_H
+#define _TRACE_MEMORY_H
+
+#include <linux/tracepoint.h>
+
+DEFINE_TRACE(memory_handle_fault_entry,
+ TPPROTO(struct mm_struct *mm, struct vm_area_struct *vma,
+ unsigned long address, int write_access),
+ TPARGS(mm, vma, address, write_access));
+DEFINE_TRACE(memory_handle_fault_exit,
+ TPPROTO(int res),
+ TPARGS(res));
+
+#endif
--- /dev/null
+#ifndef _TRACE_NET_H
+#define _TRACE_NET_H
+
+#include <net/sock.h>
+#include <linux/tracepoint.h>
+
+DEFINE_TRACE(net_dev_xmit,
+ TPPROTO(struct sk_buff *skb),
+ TPARGS(skb));
+DEFINE_TRACE(net_dev_receive,
+ TPPROTO(struct sk_buff *skb),
+ TPARGS(skb));
+
+#endif
--- /dev/null
+#ifndef _TRACE_PAGE_ALLOC_H
+#define _TRACE_PAGE_ALLOC_H
+
+#include <linux/tracepoint.h>
+
+/*
+ * mm_page_alloc : page can be NULL.
+ */
+DEFINE_TRACE(page_alloc,
+ TPPROTO(struct page *page, unsigned int order),
+ TPARGS(page, order));
+DEFINE_TRACE(page_free,
+ TPPROTO(struct page *page, unsigned int order),
+ TPARGS(page, order));
+
+#endif
--- /dev/null
+#ifndef _TRACE_SCHED_H
+#define _TRACE_SCHED_H
+
+#include <linux/sched.h>
+#include <linux/tracepoint.h>
+
+DEFINE_TRACE(sched_kthread_stop,
+ TPPROTO(struct task_struct *t),
+ TPARGS(t));
+DEFINE_TRACE(sched_kthread_stop_ret,
+ TPPROTO(int ret),
+ TPARGS(ret));
+DEFINE_TRACE(sched_wait_task,
+ TPPROTO(struct rq *rq, struct task_struct *p),
+ TPARGS(rq, p));
+DEFINE_TRACE(sched_wakeup,
+ TPPROTO(struct rq *rq, struct task_struct *p),
+ TPARGS(rq, p));
+DEFINE_TRACE(sched_wakeup_new,
+ TPPROTO(struct rq *rq, struct task_struct *p),
+ TPARGS(rq, p));
+DEFINE_TRACE(sched_switch,
+ TPPROTO(struct rq *rq, struct task_struct *prev,
+ struct task_struct *next),
+ TPARGS(rq, prev, next));
+DEFINE_TRACE(sched_migrate_task,
+ TPPROTO(struct rq *rq, struct task_struct *p, int dest_cpu),
+ TPARGS(rq, p, dest_cpu));
+DEFINE_TRACE(sched_process_free,
+ TPPROTO(struct task_struct *p),
+ TPARGS(p));
+DEFINE_TRACE(sched_process_exit,
+ TPPROTO(struct task_struct *p),
+ TPARGS(p));
+DEFINE_TRACE(sched_process_wait,
+ TPPROTO(struct pid *pid),
+ TPARGS(pid));
+DEFINE_TRACE(sched_process_fork,
+ TPPROTO(struct task_struct *parent, struct task_struct *child),
+ TPARGS(parent, child));
+DEFINE_TRACE(sched_signal_send,
+ TPPROTO(int sig, struct task_struct *p),
+ TPARGS(sig, p));
+
+#endif
--- /dev/null
+#ifndef _TRACE_SOCKET_H
+#define _TRACE_SOCKET_H
+
+#include <net/sock.h>
+#include <linux/tracepoint.h>
+
+DEFINE_TRACE(socket_sendmsg,
+ TPPROTO(struct socket *sock, struct msghdr *msg, size_t size, int ret),
+ TPARGS(sock, msg, size, ret));
+DEFINE_TRACE(socket_recvmsg,
+ TPPROTO(struct socket *sock, struct msghdr *msg, size_t size, int flags,
+ int ret),
+ TPARGS(sock, msg, size, flags, ret));
+DEFINE_TRACE(socket_create,
+ TPPROTO(struct socket *sock, int fd),
+ TPARGS(sock, fd));
+/*
+ * socket_call
+ *
+ * TODO : This tracepoint should be expanded to cover each element of the
+ * switch in sys_socketcall().
+ */
+DEFINE_TRACE(socket_call,
+ TPPROTO(int call, unsigned long a0),
+ TPARGS(call, a0));
+#endif
--- /dev/null
+#ifndef _TRACE_SWAP_H
+#define _TRACE_SWAP_H
+
+#include <linux/swap.h>
+#include <linux/tracepoint.h>
+
+DEFINE_TRACE(swap_in,
+ TPPROTO(struct page *page, swp_entry_t entry),
+ TPARGS(page, entry));
+DEFINE_TRACE(swap_out,
+ TPPROTO(struct page *page),
+ TPARGS(page));
+DEFINE_TRACE(swap_file_open,
+ TPPROTO(struct file *file, char *filename),
+ TPARGS(file, filename));
+DEFINE_TRACE(swap_file_close,
+ TPPROTO(struct file *file),
+ TPARGS(file));
+
+#endif
--- /dev/null
+#ifndef _TRACE_TIMER_H
+#define _TRACE_TIMER_H
+
+#include <linux/tracepoint.h>
+
+DEFINE_TRACE(timer_itimer_expired,
+ TPPROTO(struct signal_struct *sig),
+ TPARGS(sig));
+DEFINE_TRACE(timer_itimer_set,
+ TPPROTO(int which, struct itimerval *value),
+ TPARGS(which, value));
+DEFINE_TRACE(timer_set,
+ TPPROTO(struct timer_list *timer),
+ TPARGS(timer));
+/*
+ * xtime_lock is taken when kernel_timer_update_time tracepoint is reached.
+ */
+DEFINE_TRACE(timer_update_time,
+ TPPROTO(struct timespec *_xtime, struct timespec *_wall_to_monotonic),
+ TPARGS(_xtime, _wall_to_monotonic));
+DEFINE_TRACE(timer_timeout,
+ TPPROTO(struct task_struct *p),
+ TPARGS(p));
+#endif
Say Y here to enable the extended profiling support mechanisms used
by profilers such as OProfile.
+config TRACEPOINTS
+ bool "Activate tracepoints"
+ default y
+ help
+ Place an empty function call at each tracepoint site. Can be
+ dynamically changed for a probe function.
+
config MARKERS
bool "Activate markers"
help
#include <linux/rwsem.h>
#include <linux/nsproxy.h>
#include <linux/ipc_namespace.h>
+#include <trace/ipc.h>
#include <asm/current.h>
#include <asm/uaccess.h>
struct ipc_namespace *ns;
struct ipc_ops msg_ops;
struct ipc_params msg_params;
+ long ret;
ns = current->nsproxy->ipc_ns;
msg_params.key = key;
msg_params.flg = msgflg;
- return ipcget(ns, &msg_ids(ns), &msg_ops, &msg_params);
+ ret = ipcget(ns, &msg_ids(ns), &msg_ops, &msg_params);
+ trace_ipc_msg_create(ret, msgflg);
+ return ret;
}
static inline unsigned long
#include <linux/rwsem.h>
#include <linux/nsproxy.h>
#include <linux/ipc_namespace.h>
+#include <trace/ipc.h>
#include <asm/uaccess.h>
#include "util.h"
struct ipc_namespace *ns;
struct ipc_ops sem_ops;
struct ipc_params sem_params;
+ long err;
ns = current->nsproxy->ipc_ns;
sem_params.flg = semflg;
sem_params.u.nsems = nsems;
- return ipcget(ns, &sem_ids(ns), &sem_ops, &sem_params);
+ err = ipcget(ns, &sem_ids(ns), &sem_ops, &sem_params);
+ trace_ipc_sem_create(err, semflg);
+ return err;
}
/*
#include <linux/nsproxy.h>
#include <linux/mount.h>
#include <linux/ipc_namespace.h>
+#include <trace/ipc.h>
#include <asm/uaccess.h>
struct ipc_namespace *ns;
struct ipc_ops shm_ops;
struct ipc_params shm_params;
+ long err;
ns = current->nsproxy->ipc_ns;
shm_params.flg = shmflg;
shm_params.u.size = size;
- return ipcget(ns, &shm_ids(ns), &shm_ops, &shm_params);
+ err = ipcget(ns, &shm_ids(ns), &shm_ops, &shm_params);
+ trace_ipc_shm_create(err, shmflg);
+ return err;
}
static inline unsigned long copy_shmid_to_user(void __user *buf, struct shmid64_ds *in, int version)
obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o
obj-$(CONFIG_TASKSTATS) += taskstats.o tsacct.o
obj-$(CONFIG_MARKERS) += marker.o
+obj-$(CONFIG_TRACEPOINTS) += tracepoint.o
obj-$(CONFIG_LATENCYTOP) += latencytop.o
obj-$(CONFIG_HAVE_GENERIC_DMA_COHERENT) += dma-coherent.o
obj-$(CONFIG_FTRACE) += trace/
#include <linux/blkdev.h>
#include <linux/task_io_accounting_ops.h>
#include <linux/tracehook.h>
+#include <trace/sched.h>
#include <asm/uaccess.h>
#include <asm/unistd.h>
static void delayed_put_task_struct(struct rcu_head *rhp)
{
- put_task_struct(container_of(rhp, struct task_struct, rcu));
+ struct task_struct *tsk = container_of(rhp, struct task_struct, rcu);
+
+ trace_sched_process_free(tsk);
+ put_task_struct(tsk);
}
if (group_dead)
acct_process();
+ trace_sched_process_exit(tsk);
+
exit_sem(tsk);
exit_files(tsk);
exit_fs(tsk);
struct task_struct *tsk;
int retval;
+ trace_sched_process_wait(pid);
+
add_wait_queue(¤t->signal->wait_chldexit,&wait);
repeat:
/*
#include <linux/tty.h>
#include <linux/proc_fs.h>
#include <linux/blkdev.h>
+#include <trace/sched.h>
#include <asm/pgtable.h>
#include <asm/pgalloc.h>
if (!IS_ERR(p)) {
struct completion vfork;
+ trace_sched_process_fork(current, p);
+
nr = task_pid_vnr(p);
if (clone_flags & CLONE_PARENT_SETTID)
#include <linux/random.h>
#include <linux/interrupt.h>
#include <linux/kernel_stat.h>
+#include <trace/irq.h>
#include "internals.h"
{
irqreturn_t ret, retval = IRQ_NONE;
unsigned int status = 0;
+ struct pt_regs *regs = get_irq_regs();
+
+ trace_irq_entry(irq, regs);
handle_dynamic_tick(action);
add_interrupt_randomness(irq);
local_irq_disable();
+ trace_irq_exit(retval);
+
return retval;
}
#include <linux/time.h>
#include <linux/posix-timers.h>
#include <linux/hrtimer.h>
+#include <trace/timer.h>
#include <asm/uaccess.h>
struct signal_struct *sig =
container_of(timer, struct signal_struct, real_timer);
+ trace_timer_itimer_expired(sig);
+
kill_pid_info(SIGALRM, SEND_SIG_PRIV, sig->leader_pid);
return HRTIMER_NORESTART;
!timeval_valid(&value->it_interval))
return -EINVAL;
+ trace_timer_itimer_set(which, value);
+
switch (which) {
case ITIMER_REAL:
again:
#include <linux/file.h>
#include <linux/module.h>
#include <linux/mutex.h>
+#include <trace/sched.h>
#define KTHREAD_NICE_LEVEL (-5)
/* It could exit after stop_info.k set, but before wake_up_process. */
get_task_struct(k);
+ trace_sched_kthread_stop(k);
+
/* Must init completion *before* thread sees kthread_stop_info.k */
init_completion(&kthread_stop_info.done);
smp_wmb();
ret = kthread_stop_info.err;
mutex_unlock(&kthread_stop_lock);
+ trace_sched_kthread_stop_ret(ret);
+
return ret;
}
EXPORT_SYMBOL(kthread_stop);
char ptype;
/*
- * preempt_disable does two things : disabling preemption to make sure
- * the teardown of the callbacks can be done correctly when they are in
- * modules and they insure RCU read coherency.
+ * rcu_read_lock_sched does two things : disabling preemption to make
+ * sure the teardown of the callbacks can be done correctly when they
+ * are in modules and they insure RCU read coherency.
*/
- preempt_disable();
+ rcu_read_lock_sched();
ptype = mdata->ptype;
if (likely(!ptype)) {
marker_probe_func *func;
va_end(args);
}
}
- preempt_enable();
+ rcu_read_unlock_sched();
}
EXPORT_SYMBOL_GPL(marker_probe_cb);
va_list args; /* not initialized */
char ptype;
- preempt_disable();
+ rcu_read_lock_sched();
ptype = mdata->ptype;
if (likely(!ptype)) {
marker_probe_func *func;
multi[i].func(multi[i].probe_private, call_private,
mdata->format, &args);
}
- preempt_enable();
+ rcu_read_unlock_sched();
}
EXPORT_SYMBOL_GPL(marker_probe_cb_noarg);
* Disable a marker and its probe callback.
* Note: only waiting an RCU period after setting elem->call to the empty
* function insures that the original callback is not used anymore. This insured
- * by preempt_disable around the call site.
+ * by rcu_read_lock_sched around the call site.
*/
static void disable_marker(struct marker *elem)
{
#include <asm/cacheflush.h>
#include <linux/license.h>
#include <asm/sections.h>
+#include <linux/tracepoint.h>
+#include <trace/kernel.h>
#if 0
#define DEBUGP printk
/* Free a module, remove from lists, etc (must hold module_mutex). */
static void free_module(struct module *mod)
{
+ trace_kernel_module_free(mod);
+
/* Delete from various lists */
stop_machine(__unlink_module, mod, NULL);
remove_notes_attrs(mod);
#endif
unsigned int markersindex;
unsigned int markersstringsindex;
+ unsigned int tracepointsindex;
+ unsigned int tracepointsstringsindex;
struct module *mod;
long err = 0;
void *percpu = NULL, *ptr = NULL; /* Stops spurious gcc warning */
markersindex = find_sec(hdr, sechdrs, secstrings, "__markers");
markersstringsindex = find_sec(hdr, sechdrs, secstrings,
"__markers_strings");
+ tracepointsindex = find_sec(hdr, sechdrs, secstrings, "__tracepoints");
+ tracepointsstringsindex = find_sec(hdr, sechdrs, secstrings,
+ "__tracepoints_strings");
/* Now do relocations. */
for (i = 1; i < hdr->e_shnum; i++) {
mod->num_markers =
sechdrs[markersindex].sh_size / sizeof(*mod->markers);
#endif
+#ifdef CONFIG_TRACEPOINTS
+ mod->tracepoints = (void *)sechdrs[tracepointsindex].sh_addr;
+ mod->num_tracepoints =
+ sechdrs[tracepointsindex].sh_size / sizeof(*mod->tracepoints);
+#endif
+
/* Find duplicate symbols */
err = verify_export_symbols(mod);
add_kallsyms(mod, sechdrs, symindex, strindex, secstrings);
+ if (!mod->taints) {
#ifdef CONFIG_MARKERS
- if (!mod->taints)
marker_update_probe_range(mod->markers,
mod->markers + mod->num_markers);
#endif
+#ifdef CONFIG_TRACEPOINTS
+ tracepoint_update_probe_range(mod->tracepoints,
+ mod->tracepoints + mod->num_tracepoints);
+#endif
+ }
err = module_finalize(hdr, sechdrs, mod);
if (err < 0)
goto cleanup;
/* Get rid of temporary copy */
vfree(hdr);
+ trace_kernel_module_load(mod);
+
/* Done! */
return mod;
mutex_unlock(&module_mutex);
}
#endif
+
+#ifdef CONFIG_TRACEPOINTS
+void module_update_tracepoints(void)
+{
+ struct module *mod;
+
+ mutex_lock(&module_mutex);
+ list_for_each_entry(mod, &modules, list)
+ if (!mod->taints)
+ tracepoint_update_probe_range(mod->tracepoints,
+ mod->tracepoints + mod->num_tracepoints);
+ mutex_unlock(&module_mutex);
+}
+
+/*
+ * Returns 0 if current not found.
+ * Returns 1 if current found.
+ */
+int module_get_iter_tracepoints(struct tracepoint_iter *iter)
+{
+ struct module *iter_mod;
+ int found = 0;
+
+ mutex_lock(&module_mutex);
+ list_for_each_entry(iter_mod, &modules, list) {
+ if (!iter_mod->taints) {
+ /*
+ * Sorted module list
+ */
+ if (iter_mod < iter->module)
+ continue;
+ else if (iter_mod > iter->module)
+ iter->tracepoint = NULL;
+ found = tracepoint_get_iter_range(&iter->tracepoint,
+ iter_mod->tracepoints,
+ iter_mod->tracepoints
+ + iter_mod->num_tracepoints);
+ if (found) {
+ iter->module = iter_mod;
+ break;
+ }
+ }
+ }
+ mutex_unlock(&module_mutex);
+ return found;
+}
+#endif
#include <linux/debugfs.h>
#include <linux/ctype.h>
#include <linux/ftrace.h>
+#include <trace/sched.h>
#include <asm/tlb.h>
#include <asm/irq_regs.h>
* just go back and repeat.
*/
rq = task_rq_lock(p, &flags);
+ trace_sched_wait_task(rq, p);
running = task_running(rq, p);
on_rq = p->se.on_rq;
ncsw = 0;
success = 1;
out_running:
- trace_mark(kernel_sched_wakeup,
- "pid %d state %ld ## rq %p task %p rq->curr %p",
- p->pid, p->state, rq, p, rq->curr);
+ trace_sched_wakeup(rq, p);
check_preempt_curr(rq, p);
p->state = TASK_RUNNING;
p->sched_class->task_new(rq, p);
inc_nr_running(rq);
}
- trace_mark(kernel_sched_wakeup_new,
- "pid %d state %ld ## rq %p task %p rq->curr %p",
- p->pid, p->state, rq, p, rq->curr);
+ trace_sched_wakeup_new(rq, p);
check_preempt_curr(rq, p);
#ifdef CONFIG_SMP
if (p->sched_class->task_wake_up)
struct mm_struct *mm, *oldmm;
prepare_task_switch(rq, prev, next);
- trace_mark(kernel_sched_schedule,
- "prev_pid %d next_pid %d prev_state %ld "
- "## rq %p prev %p next %p",
- prev->pid, next->pid, prev->state,
- rq, prev, next);
+ trace_sched_switch(rq, prev, next);
mm = next->mm;
oldmm = prev->active_mm;
/*
|| unlikely(!cpu_active(dest_cpu)))
goto out;
+ trace_sched_migrate_task(rq, p, dest_cpu);
/* force the process onto the specified CPU */
if (migrate_task(p, dest_cpu, &req)) {
/* Need to wait for migration thread (might exit: take ref). */
#include <linux/freezer.h>
#include <linux/pid_namespace.h>
#include <linux/nsproxy.h>
+#include <trace/sched.h>
#include <asm/param.h>
#include <asm/uaccess.h>
struct sigpending *pending;
struct sigqueue *q;
+ trace_sched_signal_send(sig, t);
+
assert_spin_locked(&t->sighand->siglock);
if (!prepare_signal(sig, t))
return 0;
#include <linux/rcupdate.h>
#include <linux/smp.h>
#include <linux/tick.h>
+#include <trace/irq.h>
#include <asm/irq.h>
/*
do {
if (pending & 1) {
+ trace_irq_softirq_entry(h, softirq_vec);
h->action(h);
+ trace_irq_softirq_exit(h, softirq_vec);
rcu_bh_qsctr_inc(cpu);
}
h++;
*/
inline void raise_softirq_irqoff(unsigned int nr)
{
+ trace_irq_softirq_raise(nr);
__raise_softirq_irqoff(nr);
/*
if (!atomic_read(&t->count)) {
if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
BUG();
+ trace_irq_tasklet_low_entry(t);
t->func(t->data);
+ trace_irq_tasklet_low_exit(t);
tasklet_unlock(t);
continue;
}
if (!atomic_read(&t->count)) {
if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
BUG();
+ trace_irq_tasklet_high_entry(t);
t->func(t->data);
+ trace_irq_tasklet_high_exit(t);
tasklet_unlock(t);
continue;
}
#include <linux/delay.h>
#include <linux/tick.h>
#include <linux/kallsyms.h>
+#include <trace/timer.h>
#include <asm/uaccess.h>
#include <asm/unistd.h>
#include <asm/div64.h>
#include <asm/timex.h>
#include <asm/io.h>
+#include <asm/irq_regs.h>
u64 jiffies_64 __cacheline_aligned_in_smp = INITIAL_JIFFIES;
i = (expires >> (TVR_BITS + 3 * TVN_BITS)) & TVN_MASK;
vec = base->tv5.vec + i;
}
+ trace_timer_set(timer);
/*
* Timers are FIFO:
*/
{
jiffies_64 += ticks;
update_times(ticks);
+ trace_timer_update_time(&xtime, &wall_to_monotonic);
}
#ifdef __ARCH_WANT_SYS_ALARM
static void process_timeout(unsigned long __data)
{
- wake_up_process((struct task_struct *)__data);
+ struct task_struct *task = (struct task_struct *)__data;
+ trace_timer_timeout(task);
+ wake_up_process(task);
}
/**
config FTRACE
bool "Kernel Function Tracer"
depends on HAVE_FTRACE
- select FRAME_POINTER
select TRACING
select CONTEXT_SWITCH_TRACER
help
bool "Sysprof Tracer"
depends on X86
select TRACING
+ select FRAME_POINTER
help
This tracer provides the trace needed by the 'Sysprof' userspace
tool.
#include <linux/debugfs.h>
#include <linux/kallsyms.h>
#include <linux/uaccess.h>
-#include <linux/marker.h>
#include <linux/ftrace.h>
+#include <trace/sched.h>
#include "trace.h"
static atomic_t sched_ref;
static void
-sched_switch_func(void *private, void *__rq, struct task_struct *prev,
+probe_sched_switch(struct rq *__rq, struct task_struct *prev,
struct task_struct *next)
{
- struct trace_array **ptr = private;
- struct trace_array *tr = *ptr;
struct trace_array_cpu *data;
unsigned long flags;
long disabled;
int cpu;
+ if (!atomic_read(&sched_ref))
+ return;
+
tracing_record_cmdline(prev);
tracing_record_cmdline(next);
local_irq_save(flags);
cpu = raw_smp_processor_id();
- data = tr->data[cpu];
+ data = ctx_trace->data[cpu];
disabled = atomic_inc_return(&data->disabled);
if (likely(disabled == 1))
- tracing_sched_switch_trace(tr, data, prev, next, flags);
+ tracing_sched_switch_trace(ctx_trace, data, prev, next, flags);
atomic_dec(&data->disabled);
local_irq_restore(flags);
}
-static notrace void
-sched_switch_callback(void *probe_data, void *call_data,
- const char *format, va_list *args)
-{
- struct task_struct *prev;
- struct task_struct *next;
- struct rq *__rq;
-
- if (!atomic_read(&sched_ref))
- return;
-
- /* skip prev_pid %d next_pid %d prev_state %ld */
- (void)va_arg(*args, int);
- (void)va_arg(*args, int);
- (void)va_arg(*args, long);
- __rq = va_arg(*args, typeof(__rq));
- prev = va_arg(*args, typeof(prev));
- next = va_arg(*args, typeof(next));
-
- /*
- * If tracer_switch_func only points to the local
- * switch func, it still needs the ptr passed to it.
- */
- sched_switch_func(probe_data, __rq, prev, next);
-}
-
static void
-wakeup_func(void *private, void *__rq, struct task_struct *wakee, struct
- task_struct *curr)
+probe_sched_wakeup(struct rq *__rq, struct task_struct *wakee)
{
- struct trace_array **ptr = private;
- struct trace_array *tr = *ptr;
struct trace_array_cpu *data;
unsigned long flags;
long disabled;
int cpu;
- if (!tracer_enabled)
+ if (!likely(tracer_enabled))
return;
- tracing_record_cmdline(curr);
+ tracing_record_cmdline(current);
local_irq_save(flags);
cpu = raw_smp_processor_id();
- data = tr->data[cpu];
+ data = ctx_trace->data[cpu];
disabled = atomic_inc_return(&data->disabled);
if (likely(disabled == 1))
- tracing_sched_wakeup_trace(tr, data, wakee, curr, flags);
+ tracing_sched_wakeup_trace(ctx_trace, data, wakee, current,
+ flags);
atomic_dec(&data->disabled);
local_irq_restore(flags);
}
-static notrace void
-wake_up_callback(void *probe_data, void *call_data,
- const char *format, va_list *args)
-{
- struct task_struct *curr;
- struct task_struct *task;
- struct rq *__rq;
-
- if (likely(!tracer_enabled))
- return;
-
- /* Skip pid %d state %ld */
- (void)va_arg(*args, int);
- (void)va_arg(*args, long);
- /* now get the meat: "rq %p task %p rq->curr %p" */
- __rq = va_arg(*args, typeof(__rq));
- task = va_arg(*args, typeof(task));
- curr = va_arg(*args, typeof(curr));
-
- tracing_record_cmdline(task);
- tracing_record_cmdline(curr);
-
- wakeup_func(probe_data, __rq, task, curr);
-}
-
static void sched_switch_reset(struct trace_array *tr)
{
int cpu;
{
int ret;
- ret = marker_probe_register("kernel_sched_wakeup",
- "pid %d state %ld ## rq %p task %p rq->curr %p",
- wake_up_callback,
- &ctx_trace);
+ ret = register_trace_sched_wakeup(probe_sched_wakeup);
if (ret) {
- pr_info("wakeup trace: Couldn't add marker"
+ pr_info("wakeup trace: Couldn't activate tracepoint"
" probe to kernel_sched_wakeup\n");
return ret;
}
- ret = marker_probe_register("kernel_sched_wakeup_new",
- "pid %d state %ld ## rq %p task %p rq->curr %p",
- wake_up_callback,
- &ctx_trace);
+ ret = register_trace_sched_wakeup_new(probe_sched_wakeup);
if (ret) {
- pr_info("wakeup trace: Couldn't add marker"
+ pr_info("wakeup trace: Couldn't activate tracepoint"
" probe to kernel_sched_wakeup_new\n");
goto fail_deprobe;
}
- ret = marker_probe_register("kernel_sched_schedule",
- "prev_pid %d next_pid %d prev_state %ld "
- "## rq %p prev %p next %p",
- sched_switch_callback,
- &ctx_trace);
+ ret = register_trace_sched_switch(probe_sched_switch);
if (ret) {
- pr_info("sched trace: Couldn't add marker"
+ pr_info("sched trace: Couldn't activate tracepoint"
" probe to kernel_sched_schedule\n");
goto fail_deprobe_wake_new;
}
return ret;
fail_deprobe_wake_new:
- marker_probe_unregister("kernel_sched_wakeup_new",
- wake_up_callback,
- &ctx_trace);
+ unregister_trace_sched_wakeup_new(probe_sched_wakeup);
fail_deprobe:
- marker_probe_unregister("kernel_sched_wakeup",
- wake_up_callback,
- &ctx_trace);
+ unregister_trace_sched_wakeup(probe_sched_wakeup);
return ret;
}
static void tracing_sched_unregister(void)
{
- marker_probe_unregister("kernel_sched_schedule",
- sched_switch_callback,
- &ctx_trace);
- marker_probe_unregister("kernel_sched_wakeup_new",
- wake_up_callback,
- &ctx_trace);
- marker_probe_unregister("kernel_sched_wakeup",
- wake_up_callback,
- &ctx_trace);
+ unregister_trace_sched_switch(probe_sched_switch);
+ unregister_trace_sched_wakeup_new(probe_sched_wakeup);
+ unregister_trace_sched_wakeup(probe_sched_wakeup);
}
static void tracing_start_sched_switch(void)
#include <linux/kallsyms.h>
#include <linux/uaccess.h>
#include <linux/ftrace.h>
-#include <linux/marker.h>
+#include <trace/sched.h>
#include "trace.h"
}
static void notrace
-wakeup_sched_switch(void *private, void *rq, struct task_struct *prev,
+probe_wakeup_sched_switch(struct rq *rq, struct task_struct *prev,
struct task_struct *next)
{
unsigned long latency = 0, t0 = 0, t1 = 0;
- struct trace_array **ptr = private;
- struct trace_array *tr = *ptr;
struct trace_array_cpu *data;
cycle_t T0, T1, delta;
unsigned long flags;
long disabled;
int cpu;
+ tracing_record_cmdline(prev);
+
if (unlikely(!tracer_enabled))
return;
return;
/* The task we are waiting for is waking up */
- data = tr->data[wakeup_cpu];
+ data = wakeup_trace->data[wakeup_cpu];
/* disable local data, not wakeup_cpu data */
cpu = raw_smp_processor_id();
- disabled = atomic_inc_return(&tr->data[cpu]->disabled);
+ disabled = atomic_inc_return(&wakeup_trace->data[cpu]->disabled);
if (likely(disabled != 1))
goto out;
if (unlikely(!tracer_enabled || next != wakeup_task))
goto out_unlock;
- trace_function(tr, data, CALLER_ADDR1, CALLER_ADDR2, flags);
+ trace_function(wakeup_trace, data, CALLER_ADDR1, CALLER_ADDR2, flags);
/*
* usecs conversion is slow so we try to delay the conversion
t0 = nsecs_to_usecs(T0);
t1 = nsecs_to_usecs(T1);
- update_max_tr(tr, wakeup_task, wakeup_cpu);
+ update_max_tr(wakeup_trace, wakeup_task, wakeup_cpu);
out_unlock:
- __wakeup_reset(tr);
+ __wakeup_reset(wakeup_trace);
__raw_spin_unlock(&wakeup_lock);
local_irq_restore(flags);
out:
- atomic_dec(&tr->data[cpu]->disabled);
-}
-
-static notrace void
-sched_switch_callback(void *probe_data, void *call_data,
- const char *format, va_list *args)
-{
- struct task_struct *prev;
- struct task_struct *next;
- struct rq *__rq;
-
- /* skip prev_pid %d next_pid %d prev_state %ld */
- (void)va_arg(*args, int);
- (void)va_arg(*args, int);
- (void)va_arg(*args, long);
- __rq = va_arg(*args, typeof(__rq));
- prev = va_arg(*args, typeof(prev));
- next = va_arg(*args, typeof(next));
-
- tracing_record_cmdline(prev);
-
- /*
- * If tracer_switch_func only points to the local
- * switch func, it still needs the ptr passed to it.
- */
- wakeup_sched_switch(probe_data, __rq, prev, next);
+ atomic_dec(&wakeup_trace->data[cpu]->disabled);
}
static void __wakeup_reset(struct trace_array *tr)
}
static void
-wakeup_check_start(struct trace_array *tr, struct task_struct *p,
- struct task_struct *curr)
+probe_wakeup(struct rq *rq, struct task_struct *p)
{
int cpu = smp_processor_id();
unsigned long flags;
long disabled;
+ if (likely(!tracer_enabled))
+ return;
+
+ tracing_record_cmdline(p);
+ tracing_record_cmdline(current);
+
if (likely(!rt_task(p)) ||
p->prio >= wakeup_prio ||
- p->prio >= curr->prio)
+ p->prio >= current->prio)
return;
- disabled = atomic_inc_return(&tr->data[cpu]->disabled);
+ disabled = atomic_inc_return(&wakeup_trace->data[cpu]->disabled);
if (unlikely(disabled != 1))
goto out;
goto out_locked;
/* reset the trace */
- __wakeup_reset(tr);
+ __wakeup_reset(wakeup_trace);
wakeup_cpu = task_cpu(p);
wakeup_prio = p->prio;
local_save_flags(flags);
- tr->data[wakeup_cpu]->preempt_timestamp = ftrace_now(cpu);
- trace_function(tr, tr->data[wakeup_cpu],
+ wakeup_trace->data[wakeup_cpu]->preempt_timestamp = ftrace_now(cpu);
+ trace_function(wakeup_trace, wakeup_trace->data[wakeup_cpu],
CALLER_ADDR1, CALLER_ADDR2, flags);
out_locked:
__raw_spin_unlock(&wakeup_lock);
out:
- atomic_dec(&tr->data[cpu]->disabled);
-}
-
-static notrace void
-wake_up_callback(void *probe_data, void *call_data,
- const char *format, va_list *args)
-{
- struct trace_array **ptr = probe_data;
- struct trace_array *tr = *ptr;
- struct task_struct *curr;
- struct task_struct *task;
- struct rq *__rq;
-
- if (likely(!tracer_enabled))
- return;
-
- /* Skip pid %d state %ld */
- (void)va_arg(*args, int);
- (void)va_arg(*args, long);
- /* now get the meat: "rq %p task %p rq->curr %p" */
- __rq = va_arg(*args, typeof(__rq));
- task = va_arg(*args, typeof(task));
- curr = va_arg(*args, typeof(curr));
-
- tracing_record_cmdline(task);
- tracing_record_cmdline(curr);
-
- wakeup_check_start(tr, task, curr);
+ atomic_dec(&wakeup_trace->data[cpu]->disabled);
}
static void start_wakeup_tracer(struct trace_array *tr)
{
int ret;
- ret = marker_probe_register("kernel_sched_wakeup",
- "pid %d state %ld ## rq %p task %p rq->curr %p",
- wake_up_callback,
- &wakeup_trace);
+ ret = register_trace_sched_wakeup(probe_wakeup);
if (ret) {
- pr_info("wakeup trace: Couldn't add marker"
+ pr_info("wakeup trace: Couldn't activate tracepoint"
" probe to kernel_sched_wakeup\n");
return;
}
- ret = marker_probe_register("kernel_sched_wakeup_new",
- "pid %d state %ld ## rq %p task %p rq->curr %p",
- wake_up_callback,
- &wakeup_trace);
+ ret = register_trace_sched_wakeup_new(probe_wakeup);
if (ret) {
- pr_info("wakeup trace: Couldn't add marker"
+ pr_info("wakeup trace: Couldn't activate tracepoint"
" probe to kernel_sched_wakeup_new\n");
goto fail_deprobe;
}
- ret = marker_probe_register("kernel_sched_schedule",
- "prev_pid %d next_pid %d prev_state %ld "
- "## rq %p prev %p next %p",
- sched_switch_callback,
- &wakeup_trace);
+ ret = register_trace_sched_switch(probe_wakeup_sched_switch);
if (ret) {
- pr_info("sched trace: Couldn't add marker"
+ pr_info("sched trace: Couldn't activate tracepoint"
" probe to kernel_sched_schedule\n");
goto fail_deprobe_wake_new;
}
return;
fail_deprobe_wake_new:
- marker_probe_unregister("kernel_sched_wakeup_new",
- wake_up_callback,
- &wakeup_trace);
+ unregister_trace_sched_wakeup_new(probe_wakeup);
fail_deprobe:
- marker_probe_unregister("kernel_sched_wakeup",
- wake_up_callback,
- &wakeup_trace);
+ unregister_trace_sched_wakeup(probe_wakeup);
}
static void stop_wakeup_tracer(struct trace_array *tr)
{
tracer_enabled = 0;
unregister_ftrace_function(&trace_ops);
- marker_probe_unregister("kernel_sched_schedule",
- sched_switch_callback,
- &wakeup_trace);
- marker_probe_unregister("kernel_sched_wakeup_new",
- wake_up_callback,
- &wakeup_trace);
- marker_probe_unregister("kernel_sched_wakeup",
- wake_up_callback,
- &wakeup_trace);
+ unregister_trace_sched_switch(probe_wakeup_sched_switch);
+ unregister_trace_sched_wakeup_new(probe_wakeup);
+ unregister_trace_sched_wakeup(probe_wakeup);
}
static void wakeup_tracer_init(struct trace_array *tr)
--- /dev/null
+/*
+ * Copyright (C) 2008 Mathieu Desnoyers
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/types.h>
+#include <linux/jhash.h>
+#include <linux/list.h>
+#include <linux/rcupdate.h>
+#include <linux/tracepoint.h>
+#include <linux/err.h>
+#include <linux/slab.h>
+
+extern struct tracepoint __start___tracepoints[];
+extern struct tracepoint __stop___tracepoints[];
+
+/* Set to 1 to enable tracepoint debug output */
+static const int tracepoint_debug;
+
+/*
+ * tracepoints_mutex nests inside module_mutex. Tracepoints mutex protects the
+ * builtin and module tracepoints and the hash table.
+ */
+static DEFINE_MUTEX(tracepoints_mutex);
+
+/*
+ * Tracepoint hash table, containing the active tracepoints.
+ * Protected by tracepoints_mutex.
+ */
+#define TRACEPOINT_HASH_BITS 6
+#define TRACEPOINT_TABLE_SIZE (1 << TRACEPOINT_HASH_BITS)
+
+/*
+ * Note about RCU :
+ * It is used to to delay the free of multiple probes array until a quiescent
+ * state is reached.
+ * Tracepoint entries modifications are protected by the tracepoints_mutex.
+ */
+struct tracepoint_entry {
+ struct hlist_node hlist;
+ void **funcs;
+ int refcount; /* Number of times armed. 0 if disarmed. */
+ struct rcu_head rcu;
+ void *oldptr;
+ unsigned char rcu_pending:1;
+ char name[0];
+};
+
+static struct hlist_head tracepoint_table[TRACEPOINT_TABLE_SIZE];
+
+static void free_old_closure(struct rcu_head *head)
+{
+ struct tracepoint_entry *entry = container_of(head,
+ struct tracepoint_entry, rcu);
+ kfree(entry->oldptr);
+ /* Make sure we free the data before setting the pending flag to 0 */
+ smp_wmb();
+ entry->rcu_pending = 0;
+}
+
+static void tracepoint_entry_free_old(struct tracepoint_entry *entry, void *old)
+{
+ if (!old)
+ return;
+ entry->oldptr = old;
+ entry->rcu_pending = 1;
+ /* write rcu_pending before calling the RCU callback */
+ smp_wmb();
+#ifdef CONFIG_PREEMPT_RCU
+ synchronize_sched(); /* Until we have the call_rcu_sched() */
+#endif
+ call_rcu(&entry->rcu, free_old_closure);
+}
+
+static void debug_print_probes(struct tracepoint_entry *entry)
+{
+ int i;
+
+ if (!tracepoint_debug)
+ return;
+
+ for (i = 0; entry->funcs[i]; i++)
+ printk(KERN_DEBUG "Probe %d : %p\n", i, entry->funcs[i]);
+}
+
+static void *
+tracepoint_entry_add_probe(struct tracepoint_entry *entry, void *probe)
+{
+ int nr_probes = 0;
+ void **old, **new;
+
+ WARN_ON(!probe);
+
+ debug_print_probes(entry);
+ old = entry->funcs;
+ if (old) {
+ /* (N -> N+1), (N != 0, 1) probes */
+ for (nr_probes = 0; old[nr_probes]; nr_probes++)
+ if (old[nr_probes] == probe)
+ return ERR_PTR(-EEXIST);
+ }
+ /* + 2 : one for new probe, one for NULL func */
+ new = kzalloc((nr_probes + 2) * sizeof(void *), GFP_KERNEL);
+ if (new == NULL)
+ return ERR_PTR(-ENOMEM);
+ if (old)
+ memcpy(new, old, nr_probes * sizeof(void *));
+ new[nr_probes] = probe;
+ entry->refcount = nr_probes + 1;
+ entry->funcs = new;
+ debug_print_probes(entry);
+ return old;
+}
+
+static void *
+tracepoint_entry_remove_probe(struct tracepoint_entry *entry, void *probe)
+{
+ int nr_probes = 0, nr_del = 0, i;
+ void **old, **new;
+
+ old = entry->funcs;
+
+ debug_print_probes(entry);
+ /* (N -> M), (N > 1, M >= 0) probes */
+ for (nr_probes = 0; old[nr_probes]; nr_probes++) {
+ if ((!probe || old[nr_probes] == probe))
+ nr_del++;
+ }
+
+ if (nr_probes - nr_del == 0) {
+ /* N -> 0, (N > 1) */
+ entry->funcs = NULL;
+ entry->refcount = 0;
+ debug_print_probes(entry);
+ return old;
+ } else {
+ int j = 0;
+ /* N -> M, (N > 1, M > 0) */
+ /* + 1 for NULL */
+ new = kzalloc((nr_probes - nr_del + 1)
+ * sizeof(void *), GFP_KERNEL);
+ if (new == NULL)
+ return ERR_PTR(-ENOMEM);
+ for (i = 0; old[i]; i++)
+ if ((probe && old[i] != probe))
+ new[j++] = old[i];
+ entry->refcount = nr_probes - nr_del;
+ entry->funcs = new;
+ }
+ debug_print_probes(entry);
+ return old;
+}
+
+/*
+ * Get tracepoint if the tracepoint is present in the tracepoint hash table.
+ * Must be called with tracepoints_mutex held.
+ * Returns NULL if not present.
+ */
+static struct tracepoint_entry *get_tracepoint(const char *name)
+{
+ struct hlist_head *head;
+ struct hlist_node *node;
+ struct tracepoint_entry *e;
+ u32 hash = jhash(name, strlen(name), 0);
+
+ head = &tracepoint_table[hash & (TRACEPOINT_TABLE_SIZE - 1)];
+ hlist_for_each_entry(e, node, head, hlist) {
+ if (!strcmp(name, e->name))
+ return e;
+ }
+ return NULL;
+}
+
+/*
+ * Add the tracepoint to the tracepoint hash table. Must be called with
+ * tracepoints_mutex held.
+ */
+static struct tracepoint_entry *add_tracepoint(const char *name)
+{
+ struct hlist_head *head;
+ struct hlist_node *node;
+ struct tracepoint_entry *e;
+ size_t name_len = strlen(name) + 1;
+ u32 hash = jhash(name, name_len-1, 0);
+
+ head = &tracepoint_table[hash & (TRACEPOINT_TABLE_SIZE - 1)];
+ hlist_for_each_entry(e, node, head, hlist) {
+ if (!strcmp(name, e->name)) {
+ printk(KERN_NOTICE
+ "tracepoint %s busy\n", name);
+ return ERR_PTR(-EEXIST); /* Already there */
+ }
+ }
+ /*
+ * Using kmalloc here to allocate a variable length element. Could
+ * cause some memory fragmentation if overused.
+ */
+ e = kmalloc(sizeof(struct tracepoint_entry) + name_len, GFP_KERNEL);
+ if (!e)
+ return ERR_PTR(-ENOMEM);
+ memcpy(&e->name[0], name, name_len);
+ e->funcs = NULL;
+ e->refcount = 0;
+ e->rcu_pending = 0;
+ hlist_add_head(&e->hlist, head);
+ return e;
+}
+
+/*
+ * Remove the tracepoint from the tracepoint hash table. Must be called with
+ * mutex_lock held.
+ */
+static int remove_tracepoint(const char *name)
+{
+ struct hlist_head *head;
+ struct hlist_node *node;
+ struct tracepoint_entry *e;
+ int found = 0;
+ size_t len = strlen(name) + 1;
+ u32 hash = jhash(name, len-1, 0);
+
+ head = &tracepoint_table[hash & (TRACEPOINT_TABLE_SIZE - 1)];
+ hlist_for_each_entry(e, node, head, hlist) {
+ if (!strcmp(name, e->name)) {
+ found = 1;
+ break;
+ }
+ }
+ if (!found)
+ return -ENOENT;
+ if (e->refcount)
+ return -EBUSY;
+ hlist_del(&e->hlist);
+ /* Make sure the call_rcu has been executed */
+ if (e->rcu_pending)
+ rcu_barrier();
+ kfree(e);
+ return 0;
+}
+
+/*
+ * Sets the probe callback corresponding to one tracepoint.
+ */
+static void set_tracepoint(struct tracepoint_entry **entry,
+ struct tracepoint *elem, int active)
+{
+ WARN_ON(strcmp((*entry)->name, elem->name) != 0);
+
+ /*
+ * rcu_assign_pointer has a smp_wmb() which makes sure that the new
+ * probe callbacks array is consistent before setting a pointer to it.
+ * This array is referenced by __DO_TRACE from
+ * include/linux/tracepoints.h. A matching smp_read_barrier_depends()
+ * is used.
+ */
+ rcu_assign_pointer(elem->funcs, (*entry)->funcs);
+ elem->state = active;
+}
+
+/*
+ * Disable a tracepoint and its probe callback.
+ * Note: only waiting an RCU period after setting elem->call to the empty
+ * function insures that the original callback is not used anymore. This insured
+ * by preempt_disable around the call site.
+ */
+static void disable_tracepoint(struct tracepoint *elem)
+{
+ elem->state = 0;
+}
+
+/**
+ * tracepoint_update_probe_range - Update a probe range
+ * @begin: beginning of the range
+ * @end: end of the range
+ *
+ * Updates the probe callback corresponding to a range of tracepoints.
+ */
+void tracepoint_update_probe_range(struct tracepoint *begin,
+ struct tracepoint *end)
+{
+ struct tracepoint *iter;
+ struct tracepoint_entry *mark_entry;
+
+ mutex_lock(&tracepoints_mutex);
+ for (iter = begin; iter < end; iter++) {
+ mark_entry = get_tracepoint(iter->name);
+ if (mark_entry) {
+ set_tracepoint(&mark_entry, iter,
+ !!mark_entry->refcount);
+ } else {
+ disable_tracepoint(iter);
+ }
+ }
+ mutex_unlock(&tracepoints_mutex);
+}
+
+/*
+ * Update probes, removing the faulty probes.
+ */
+static void tracepoint_update_probes(void)
+{
+ /* Core kernel tracepoints */
+ tracepoint_update_probe_range(__start___tracepoints,
+ __stop___tracepoints);
+ /* tracepoints in modules. */
+ module_update_tracepoints();
+}
+
+/**
+ * tracepoint_probe_register - Connect a probe to a tracepoint
+ * @name: tracepoint name
+ * @probe: probe handler
+ *
+ * Returns 0 if ok, error value on error.
+ * The probe address must at least be aligned on the architecture pointer size.
+ */
+int tracepoint_probe_register(const char *name, void *probe)
+{
+ struct tracepoint_entry *entry;
+ int ret = 0;
+ void *old;
+
+ mutex_lock(&tracepoints_mutex);
+ entry = get_tracepoint(name);
+ if (!entry) {
+ entry = add_tracepoint(name);
+ if (IS_ERR(entry)) {
+ ret = PTR_ERR(entry);
+ goto end;
+ }
+ }
+ /*
+ * If we detect that a call_rcu is pending for this tracepoint,
+ * make sure it's executed now.
+ */
+ if (entry->rcu_pending)
+ rcu_barrier();
+ old = tracepoint_entry_add_probe(entry, probe);
+ if (IS_ERR(old)) {
+ ret = PTR_ERR(old);
+ goto end;
+ }
+ mutex_unlock(&tracepoints_mutex);
+ tracepoint_update_probes(); /* may update entry */
+ mutex_lock(&tracepoints_mutex);
+ entry = get_tracepoint(name);
+ WARN_ON(!entry);
+ tracepoint_entry_free_old(entry, old);
+end:
+ mutex_unlock(&tracepoints_mutex);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(tracepoint_probe_register);
+
+/**
+ * tracepoint_probe_unregister - Disconnect a probe from a tracepoint
+ * @name: tracepoint name
+ * @probe: probe function pointer
+ *
+ * We do not need to call a synchronize_sched to make sure the probes have
+ * finished running before doing a module unload, because the module unload
+ * itself uses stop_machine(), which insures that every preempt disabled section
+ * have finished.
+ */
+int tracepoint_probe_unregister(const char *name, void *probe)
+{
+ struct tracepoint_entry *entry;
+ void *old;
+ int ret = -ENOENT;
+
+ mutex_lock(&tracepoints_mutex);
+ entry = get_tracepoint(name);
+ if (!entry)
+ goto end;
+ if (entry->rcu_pending)
+ rcu_barrier();
+ old = tracepoint_entry_remove_probe(entry, probe);
+ mutex_unlock(&tracepoints_mutex);
+ tracepoint_update_probes(); /* may update entry */
+ mutex_lock(&tracepoints_mutex);
+ entry = get_tracepoint(name);
+ if (!entry)
+ goto end;
+ tracepoint_entry_free_old(entry, old);
+ remove_tracepoint(name); /* Ignore busy error message */
+ ret = 0;
+end:
+ mutex_unlock(&tracepoints_mutex);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(tracepoint_probe_unregister);
+
+/**
+ * tracepoint_get_iter_range - Get a next tracepoint iterator given a range.
+ * @tracepoint: current tracepoints (in), next tracepoint (out)
+ * @begin: beginning of the range
+ * @end: end of the range
+ *
+ * Returns whether a next tracepoint has been found (1) or not (0).
+ * Will return the first tracepoint in the range if the input tracepoint is
+ * NULL.
+ */
+int tracepoint_get_iter_range(struct tracepoint **tracepoint,
+ struct tracepoint *begin, struct tracepoint *end)
+{
+ if (!*tracepoint && begin != end) {
+ *tracepoint = begin;
+ return 1;
+ }
+ if (*tracepoint >= begin && *tracepoint < end)
+ return 1;
+ return 0;
+}
+EXPORT_SYMBOL_GPL(tracepoint_get_iter_range);
+
+static void tracepoint_get_iter(struct tracepoint_iter *iter)
+{
+ int found = 0;
+
+ /* Core kernel tracepoints */
+ if (!iter->module) {
+ found = tracepoint_get_iter_range(&iter->tracepoint,
+ __start___tracepoints, __stop___tracepoints);
+ if (found)
+ goto end;
+ }
+ /* tracepoints in modules. */
+ found = module_get_iter_tracepoints(iter);
+end:
+ if (!found)
+ tracepoint_iter_reset(iter);
+}
+
+void tracepoint_iter_start(struct tracepoint_iter *iter)
+{
+ tracepoint_get_iter(iter);
+}
+EXPORT_SYMBOL_GPL(tracepoint_iter_start);
+
+void tracepoint_iter_next(struct tracepoint_iter *iter)
+{
+ iter->tracepoint++;
+ /*
+ * iter->tracepoint may be invalid because we blindly incremented it.
+ * Make sure it is valid by marshalling on the tracepoints, getting the
+ * tracepoints from following modules if necessary.
+ */
+ tracepoint_get_iter(iter);
+}
+EXPORT_SYMBOL_GPL(tracepoint_iter_next);
+
+void tracepoint_iter_stop(struct tracepoint_iter *iter)
+{
+}
+EXPORT_SYMBOL_GPL(tracepoint_iter_stop);
+
+void tracepoint_iter_reset(struct tracepoint_iter *iter)
+{
+ iter->module = NULL;
+ iter->tracepoint = NULL;
+}
+EXPORT_SYMBOL_GPL(tracepoint_iter_reset);
#include <linux/memcontrol.h>
#include <linux/page-states.h>
#include "internal.h"
+#include <trace/filemap.h>
/*
* FIXME: remove all knowledge of the buffer layer from the core VM
{
DEFINE_WAIT_BIT(wait, &page->flags, bit_nr);
+ trace_wait_on_page_start(page, bit_nr);
if (test_bit(bit_nr, &page->flags))
__wait_on_bit(page_waitqueue(page), &wait, sync_page,
TASK_UNINTERRUPTIBLE);
+ trace_wait_on_page_end(page, bit_nr);
}
EXPORT_SYMBOL(wait_on_page_bit);
#include <linux/mutex.h>
#include <linux/bootmem.h>
#include <linux/sysfs.h>
+#include <trace/hugetlb.h>
#include <asm/page.h>
#include <asm/pgtable.h>
{
int i;
+ trace_hugetlb_page_release(page);
h->nr_huge_pages--;
h->nr_huge_pages_node[page_to_nid(page)]--;
for (i = 0; i < pages_per_huge_page(h); i++) {
int nid = page_to_nid(page);
struct address_space *mapping;
+ trace_hugetlb_page_free(page);
mapping = (struct address_space *) page_private(page);
set_page_private(page, 0);
BUG_ON(page_count(page));
{
struct page *page;
- if (h->order >= MAX_ORDER)
- return NULL;
+ if (h->order >= MAX_ORDER) {
+ page = NULL;
+ goto end;
+ }
page = alloc_pages_node(nid,
htlb_alloc_mask|__GFP_COMP|__GFP_THISNODE|
if (page) {
if (arch_prepare_hugepage(page)) {
__free_pages(page, huge_page_order(h));
- return NULL;
+ page = NULL;
+ goto end;
}
prep_new_huge_page(h, page, nid);
}
-
+end:
+ trace_hugetlb_page_grab(page);
return page;
}
spin_lock(&hugetlb_lock);
if (h->surplus_huge_pages >= h->nr_overcommit_huge_pages) {
spin_unlock(&hugetlb_lock);
- return NULL;
+ page = NULL;
+ goto end;
} else {
h->nr_huge_pages++;
h->surplus_huge_pages++;
__count_vm_event(HTLB_BUDDY_PGALLOC_FAIL);
}
spin_unlock(&hugetlb_lock);
-
+end:
+ trace_hugetlb_buddy_pgalloc(page);
return page;
}
vma_commit_reservation(h, vma, addr);
+ trace_hugetlb_page_alloc(page);
return page;
}
long from, long to,
struct vm_area_struct *vma)
{
- long ret, chg;
+ int ret = 0;
+ long chg;
struct hstate *h = hstate_inode(inode);
if (vma && vma->vm_flags & VM_NORESERVE)
- return 0;
+ goto end;
/*
* Shared mappings base their reservation on the number of pages that
chg = region_chg(&inode->i_mapping->private_list, from, to);
else {
struct resv_map *resv_map = resv_map_alloc();
- if (!resv_map)
- return -ENOMEM;
+ if (!resv_map) {
+ ret = -ENOMEM;
+ goto end;
+ }
chg = to - from;
set_vma_resv_flags(vma, HPAGE_RESV_OWNER);
}
- if (chg < 0)
- return chg;
+ if (chg < 0) {
+ ret = chg;
+ goto end;
+ }
- if (hugetlb_get_quota(inode->i_mapping, chg))
- return -ENOSPC;
+ if (hugetlb_get_quota(inode->i_mapping, chg)) {
+ ret = -ENOSPC;
+ goto end;
+ }
ret = hugetlb_acct_memory(h, chg);
if (ret < 0) {
hugetlb_put_quota(inode->i_mapping, chg);
- return ret;
+ goto end;
}
if (!vma || vma->vm_flags & VM_SHARED)
region_add(&inode->i_mapping->private_list, from, to);
- return 0;
+end:
+ trace_hugetlb_pages_reserve(inode, from, to, ret);
+ return ret;
}
void hugetlb_unreserve_pages(struct inode *inode, long offset, long freed)
{
struct hstate *h = hstate_inode(inode);
- long chg = region_truncate(&inode->i_mapping->private_list, offset);
+ long chg;
+ trace_hugetlb_pages_unreserve(inode, offset, freed);
+ chg = region_truncate(&inode->i_mapping->private_list, offset);
spin_lock(&inode->i_lock);
inode->i_blocks -= blocks_per_huge_page(h);
spin_unlock(&inode->i_lock);
#include <linux/swapops.h>
#include <linux/elf.h>
+#include <trace/memory.h>
#include "internal.h"
+#include <trace/swap.h>
+
#ifndef CONFIG_NEED_MULTIPLE_NODES
/* use the per-pgdat data instead for discontigmem - mbligh */
unsigned long max_mapnr;
/* Had to read the page from swap area: Major fault */
ret = VM_FAULT_MAJOR;
count_vm_event(PGMAJFAULT);
+ trace_swap_in(page, entry);
}
if (mem_cgroup_charge(page, mm, GFP_KERNEL)) {
int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long address, int write_access)
{
+ int res;
pgd_t *pgd;
pud_t *pud;
pmd_t *pmd;
pte_t *pte;
+ trace_memory_handle_fault_entry(mm, vma, address, write_access);
+
__set_current_state(TASK_RUNNING);
count_vm_event(PGFAULT);
- if (unlikely(is_vm_hugetlb_page(vma)))
- return hugetlb_fault(mm, vma, address, write_access);
+ if (unlikely(is_vm_hugetlb_page(vma))) {
+ res = hugetlb_fault(mm, vma, address, write_access);
+ goto end;
+ }
pgd = pgd_offset(mm, address);
pud = pud_alloc(mm, pgd, address);
- if (!pud)
- return VM_FAULT_OOM;
+ if (!pud) {
+ res = VM_FAULT_OOM;
+ goto end;
+ }
pmd = pmd_alloc(mm, pud, address);
- if (!pmd)
- return VM_FAULT_OOM;
+ if (!pmd) {
+ res = VM_FAULT_OOM;
+ goto end;
+ }
pte = pte_alloc_map(mm, pmd, address);
- if (!pte)
- return VM_FAULT_OOM;
+ if (!pte) {
+ res = VM_FAULT_OOM;
+ goto end;
+ }
- return handle_pte_fault(mm, vma, address, pte, pmd, write_access);
+ res = handle_pte_fault(mm, vma, address, pte, pmd, write_access);
+end:
+ trace_memory_handle_fault_exit(res);
+ return res;
}
EXPORT_SYMBOL_GPL(handle_mm_fault); /* For MoL */
#include <asm/div64.h>
#include "internal.h"
+#include <trace/page_alloc.h>
+
/*
* Array of node states.
*/
int i;
int reserved = 0;
+ trace_page_free(page, order);
+
#ifdef CONFIG_XEN
if (PageForeign(page)) {
PageForeignDestructor(page);
struct per_cpu_pages *pcp;
unsigned long flags;
+ trace_page_free(page, 0);
+
if (unlikely(PageDiscarded(page))) {
if (page_free_discarded(page))
return;
show_mem();
}
got_pg:
+ trace_page_alloc(page, order);
return page;
}
EXPORT_SYMBOL(__alloc_pages_internal);
#include <linux/bio.h>
#include <linux/swapops.h>
#include <linux/writeback.h>
+#include <trace/swap.h>
#include <asm/pgtable.h>
static struct bio *get_swap_bio(gfp_t gfp_flags, pgoff_t index,
rw |= (1 << BIO_RW_SYNC);
count_vm_event(PSWPOUT);
set_page_writeback(page);
+ trace_swap_out(page);
unlock_page(page);
submit_bio(rw, bio);
out:
#include <asm/pgtable.h>
#include <asm/tlbflush.h>
#include <linux/swapops.h>
+#include <trace/swap.h>
static DEFINE_SPINLOCK(swap_lock);
static unsigned int nr_swapfiles;
swap_map = p->swap_map;
p->swap_map = NULL;
p->flags = 0;
+ trace_swap_file_close(swap_file);
spin_unlock(&swap_lock);
mutex_unlock(&swapon_mutex);
vfree(swap_map);
} else {
swap_info[prev].next = p - swap_info;
}
+ trace_swap_file_open(swap_file, name);
spin_unlock(&swap_lock);
mutex_unlock(&swapon_mutex);
error = 0;
#include <linux/in.h>
#include <linux/jhash.h>
#include <linux/random.h>
+#include <trace/net.h>
#include "net-sysfs.h"
}
gso:
+ trace_net_dev_xmit(skb);
/* Disable soft irqs for various locks below. Also
* stops preemption for RCU.
*/
__get_cpu_var(netdev_rx_stat).total++;
+ trace_net_dev_receive(skb);
skb_reset_network_header(skb);
skb_reset_transport_header(skb);
skb->mac_len = skb->network_header - skb->mac_header;
#include <net/ip_fib.h>
#include <net/rtnetlink.h>
#include <net/net_namespace.h>
+#include <trace/ipv4.h>
static struct ipv4_devconf ipv4_devconf = {
.data = {
struct in_ifaddr **ifap1 = &ifa1->ifa_next;
while ((ifa = *ifap1) != NULL) {
+ trace_ipv4_addr_del(ifa);
if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
ifa1->ifa_scope <= ifa->ifa_scope)
last_prim = ifa;
}
ifa->ifa_flags |= IFA_F_SECONDARY;
}
+ trace_ipv4_addr_add(ifa);
}
if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
+#include <trace/ipv6.h>
/* Set to 3 to get tracing... */
#define ACONF_DEBUG 2
/* For caller */
in6_ifa_hold(ifa);
+ trace_ipv6_addr_add(ifa);
+
/* Add to big hash table */
hash = ipv6_addr_hash(addr);
in6_ifa_hold(ifp);
read_unlock_bh(&idev->lock);
+ trace_ipv6_addr_del(ifp);
ipv6_del_addr(ifp);
/* If the last address is deleted administratively,
#include <net/sock.h>
#include <linux/netfilter.h>
+#include <trace/socket.h>
static int sock_no_open(struct inode *irrelevant, struct file *dontcare);
static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
ret = __sock_sendmsg(&iocb, sock, msg, size);
if (-EIOCBQUEUED == ret)
ret = wait_on_sync_kiocb(&iocb);
+ trace_socket_sendmsg(sock, msg, size, ret);
return ret;
}
int ret;
init_sync_kiocb(&iocb, NULL);
+
iocb.private = &siocb;
ret = __sock_recvmsg(&iocb, sock, msg, size, flags);
if (-EIOCBQUEUED == ret)
ret = wait_on_sync_kiocb(&iocb);
+ trace_socket_recvmsg(sock, msg, size, flags, ret);
return ret;
}
if (retval < 0)
goto out_release;
+ trace_socket_create(sock, retval);
out:
/* It may be already another descriptor 8) Not kernel problem. */
return retval;
a0 = a[0];
a1 = a[1];
+ trace_socket_call(call, a0);
+
switch (call) {
case SYS_SOCKET:
err = sys_socket(a0, a1, a[2]);
help
This build markers example modules.
+config SAMPLE_TRACEPOINTS
+ tristate "Build tracepoints examples -- loadable modules only"
+ depends on TRACEPOINTS && m
+ help
+ This build tracepoints example modules.
+
config SAMPLE_KOBJECT
tristate "Build kobject examples"
help
# Makefile for Linux samples code
-obj-$(CONFIG_SAMPLES) += markers/ kobject/ kprobes/
+obj-$(CONFIG_SAMPLES) += markers/ kobject/ kprobes/ tracepoints/
--- /dev/null
+# builds the tracepoint example kernel modules;
+# then to use one (as root): insmod <module_name.ko>
+
+obj-$(CONFIG_SAMPLE_TRACEPOINTS) += tracepoint-sample.o
+obj-$(CONFIG_SAMPLE_TRACEPOINTS) += tracepoint-probe-sample.o
+obj-$(CONFIG_SAMPLE_TRACEPOINTS) += tracepoint-probe-sample2.o
--- /dev/null
+#ifndef _TP_SAMPLES_TRACE_H
+#define _TP_SAMPLES_TRACE_H
+
+#include <linux/proc_fs.h> /* for struct inode and struct file */
+#include <linux/tracepoint.h>
+
+DEFINE_TRACE(subsys_event,
+ TPPROTO(struct inode *inode, struct file *file),
+ TPARGS(inode, file));
+DEFINE_TRACE(subsys_eventb,
+ TPPROTO(void),
+ TPARGS());
+#endif
--- /dev/null
+/*
+ * tracepoint-probe-sample.c
+ *
+ * sample tracepoint probes.
+ */
+
+#include <linux/module.h>
+#include <linux/file.h>
+#include <linux/dcache.h>
+#include "tp-samples-trace.h"
+
+/*
+ * Here the caller only guarantees locking for struct file and struct inode.
+ * Locking must therefore be done in the probe to use the dentry.
+ */
+static void probe_subsys_event(struct inode *inode, struct file *file)
+{
+ path_get(&file->f_path);
+ dget(file->f_path.dentry);
+ printk(KERN_INFO "Event is encountered with filename %s\n",
+ file->f_path.dentry->d_name.name);
+ dput(file->f_path.dentry);
+ path_put(&file->f_path);
+}
+
+static void probe_subsys_eventb(void)
+{
+ printk(KERN_INFO "Event B is encountered\n");
+}
+
+int __init tp_sample_trace_init(void)
+{
+ int ret;
+
+ ret = register_trace_subsys_event(probe_subsys_event);
+ WARN_ON(ret);
+ ret = register_trace_subsys_eventb(probe_subsys_eventb);
+ WARN_ON(ret);
+
+ return 0;
+}
+
+module_init(tp_sample_trace_init);
+
+void __exit tp_sample_trace_exit(void)
+{
+ unregister_trace_subsys_eventb(probe_subsys_eventb);
+ unregister_trace_subsys_event(probe_subsys_event);
+ tracepoint_synchronize_unregister();
+}
+
+module_exit(tp_sample_trace_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Mathieu Desnoyers");
+MODULE_DESCRIPTION("Tracepoint Probes Samples");
--- /dev/null
+/*
+ * tracepoint-probe-sample2.c
+ *
+ * 2nd sample tracepoint probes.
+ */
+
+#include <linux/module.h>
+#include <linux/fs.h>
+#include "tp-samples-trace.h"
+
+/*
+ * Here the caller only guarantees locking for struct file and struct inode.
+ * Locking must therefore be done in the probe to use the dentry.
+ */
+static void probe_subsys_event(struct inode *inode, struct file *file)
+{
+ printk(KERN_INFO "Event is encountered with inode number %lu\n",
+ inode->i_ino);
+}
+
+int __init tp_sample_trace_init(void)
+{
+ int ret;
+
+ ret = register_trace_subsys_event(probe_subsys_event);
+ WARN_ON(ret);
+
+ return 0;
+}
+
+module_init(tp_sample_trace_init);
+
+void __exit tp_sample_trace_exit(void)
+{
+ unregister_trace_subsys_event(probe_subsys_event);
+ tracepoint_synchronize_unregister();
+}
+
+module_exit(tp_sample_trace_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Mathieu Desnoyers");
+MODULE_DESCRIPTION("Tracepoint Probes Samples");
--- /dev/null
+/* tracepoint-sample.c
+ *
+ * Executes a tracepoint when /proc/tracepoint-example is opened.
+ *
+ * (C) Copyright 2007 Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
+ *
+ * This file is released under the GPLv2.
+ * See the file COPYING for more details.
+ */
+
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/proc_fs.h>
+#include "tp-samples-trace.h"
+
+struct proc_dir_entry *pentry_example;
+
+static int my_open(struct inode *inode, struct file *file)
+{
+ int i;
+
+ trace_subsys_event(inode, file);
+ for (i = 0; i < 10; i++)
+ trace_subsys_eventb();
+ return -EPERM;
+}
+
+static struct file_operations mark_ops = {
+ .open = my_open,
+};
+
+static int example_init(void)
+{
+ printk(KERN_ALERT "example init\n");
+ pentry_example = proc_create("tracepoint-example", 0444, NULL,
+ &mark_ops);
+ if (!pentry_example)
+ return -EPERM;
+ return 0;
+}
+
+static void example_exit(void)
+{
+ printk(KERN_ALERT "example exit\n");
+ remove_proc_entry("tracepoint-example", NULL);
+}
+
+module_init(example_init)
+module_exit(example_exit)
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Mathieu Desnoyers");
+MODULE_DESCRIPTION("Tracepoint example");