UBUNTU: SAUCE: (no-up) vfs: Add a trace point in the mark_inode_dirty function
authorArjan van de Ven <arjan@linux.intel.com>
Sun, 25 Oct 2009 22:37:04 +0000 (15:37 -0700)
committerLeann Ogasawara <leann.ogasawara@canonical.com>
Mon, 2 Apr 2012 20:10:07 +0000 (13:10 -0700)
[apw@canonical.com: This has no upstream traction but is used by powertop,
so its worth carrying.]

PowerTOP would like to be able to show who is keeping the disk
busy by dirtying data. The most logical spot for this is in the vfs
in the mark_inode_dirty() function. Doing this on the block level
is not possible because by the time the IO hits the block layer the
guilty party can no longer be found ("kjournald" and "pdflush" are not
useful answers to "who caused this file to be dirty).

The trace point follows the same logic/style as the block_dump code
and pretty much dumps the same data, just not to dmesg (and thus to
/var/log/messages) but via the trace events streams.

Note: This patch was posted to lkml and might potentially go into 2.6.33 but I
have not seen which maintainer will take it.

Signed-of-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Amit Kucheria <amit.kucheria@canonical.com>
Signed-off-by: Andy Whitcroft <apw@canonical.com>

fs/fs-writeback.c
fs/inode.c
include/trace/events/vfs.h [new file with mode: 0644]

index 54f5786..87fd5bf 100644 (file)
@@ -27,6 +27,7 @@
 #include <linux/backing-dev.h>
 #include <linux/buffer_head.h>
 #include <linux/tracepoint.h>
+#include <trace/events/vfs.h>
 #include "internal.h"
 
 /*
@@ -1072,6 +1073,8 @@ void __mark_inode_dirty(struct inode *inode, int flags)
        if ((inode->i_state & flags) == flags)
                return;
 
+       trace_dirty_inode(inode, current);
+
        if (unlikely(block_dump))
                block_dump___mark_inode_dirty(inode);
 
index ee4e66b..6cf14df 100644 (file)
@@ -1678,3 +1678,7 @@ bool inode_owner_or_capable(const struct inode *inode)
        return false;
 }
 EXPORT_SYMBOL(inode_owner_or_capable);
+
+#define CREATE_TRACE_POINTS
+#include <trace/events/vfs.h>
+
diff --git a/include/trace/events/vfs.h b/include/trace/events/vfs.h
new file mode 100644 (file)
index 0000000..3c170f8
--- /dev/null
@@ -0,0 +1,53 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM vfs
+
+#if !defined(_TRACE_VFS_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_VFS_H
+
+/*
+ * Tracepoint for dirtying an inode:
+ */
+TRACE_EVENT(dirty_inode,
+
+       TP_PROTO(struct inode *inode, struct task_struct *task),
+
+       TP_ARGS(inode, task),
+
+       TP_STRUCT__entry(
+               __array( char,  comm,   TASK_COMM_LEN   )
+               __field( pid_t, pid                     )
+               __array( char,  dev,    16              )
+               __array( char,  file,   32              )
+       ),
+
+       TP_fast_assign(
+               if (inode->i_ino || strcmp(inode->i_sb->s_id, "bdev")) {
+                       struct dentry *dentry;
+                       const char *name = "?";
+
+                       dentry = d_find_alias(inode);
+                       if (dentry) {
+                               spin_lock(&dentry->d_lock);
+                               name = (const char *) dentry->d_name.name;
+                       }
+
+                       memcpy(__entry->comm, task->comm, TASK_COMM_LEN);
+                       __entry->pid = task->pid;
+                       strlcpy(__entry->file, name, 32);
+                       strlcpy(__entry->dev, inode->i_sb->s_id, 16);
+
+                       if (dentry) {
+                               spin_unlock(&dentry->d_lock);
+                               dput(dentry);
+                       }
+               }
+       ),
+
+       TP_printk("task=%i (%s) file=%s dev=%s",
+               __entry->pid, __entry->comm, __entry->file, __entry->dev)
+);
+
+#endif /* _TRACE_VFS_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>