UBUNTU: Ubuntu-2.6.38-12.51
[linux-flexiantxendom0-natty.git] / kernel / audit_tree.c
index 9ef5e0a..37b2bea 100644 (file)
@@ -1,7 +1,9 @@
 #include "audit.h"
-#include <linux/inotify.h>
+#include <linux/fsnotify_backend.h>
 #include <linux/namei.h>
 #include <linux/mount.h>
+#include <linux/kthread.h>
+#include <linux/slab.h>
 
 struct audit_tree;
 struct audit_chunk;
@@ -20,10 +22,11 @@ struct audit_tree {
 
 struct audit_chunk {
        struct list_head hash;
-       struct inotify_watch watch;
+       struct fsnotify_mark mark;
        struct list_head trees;         /* with root here */
        int dead;
        int count;
+       atomic_long_t refs;
        struct rcu_head head;
        struct node {
                struct list_head list;
@@ -56,7 +59,8 @@ static LIST_HEAD(prune_list);
  * tree is refcounted; one reference for "some rules on rules_list refer to
  * it", one for each chunk with pointer to it.
  *
- * chunk is refcounted by embedded inotify_watch.
+ * chunk is refcounted by embedded fsnotify_mark + .refs (non-zero refcount
+ * of watch contributes 1 to .refs).
  *
  * node.index allows to get from node.list to containing chunk.
  * MSB of that sucker is stolen to mark taggings that we might have to
@@ -64,7 +68,7 @@ static LIST_HEAD(prune_list);
  * that makes a difference.  Some.
  */
 
-static struct inotify_handle *rtree_ih;
+static struct fsnotify_group *audit_tree_group;
 
 static struct audit_tree *alloc_tree(const char *s)
 {
@@ -107,6 +111,35 @@ const char *audit_tree_path(struct audit_tree *tree)
        return tree->pathname;
 }
 
+static void free_chunk(struct audit_chunk *chunk)
+{
+       int i;
+
+       for (i = 0; i < chunk->count; i++) {
+               if (chunk->owners[i].owner)
+                       put_tree(chunk->owners[i].owner);
+       }
+       kfree(chunk);
+}
+
+void audit_put_chunk(struct audit_chunk *chunk)
+{
+       if (atomic_long_dec_and_test(&chunk->refs))
+               free_chunk(chunk);
+}
+
+static void __put_chunk(struct rcu_head *rcu)
+{
+       struct audit_chunk *chunk = container_of(rcu, struct audit_chunk, head);
+       audit_put_chunk(chunk);
+}
+
+static void audit_tree_destroy_watch(struct fsnotify_mark *entry)
+{
+       struct audit_chunk *chunk = container_of(entry, struct audit_chunk, mark);
+       call_rcu(&chunk->head, __put_chunk);
+}
+
 static struct audit_chunk *alloc_chunk(int count)
 {
        struct audit_chunk *chunk;
@@ -121,36 +154,15 @@ static struct audit_chunk *alloc_chunk(int count)
        INIT_LIST_HEAD(&chunk->hash);
        INIT_LIST_HEAD(&chunk->trees);
        chunk->count = count;
+       atomic_long_set(&chunk->refs, 1);
        for (i = 0; i < count; i++) {
                INIT_LIST_HEAD(&chunk->owners[i].list);
                chunk->owners[i].index = i;
        }
-       inotify_init_watch(&chunk->watch);
+       fsnotify_init_mark(&chunk->mark, audit_tree_destroy_watch);
        return chunk;
 }
 
-static void __free_chunk(struct rcu_head *rcu)
-{
-       struct audit_chunk *chunk = container_of(rcu, struct audit_chunk, head);
-       int i;
-
-       for (i = 0; i < chunk->count; i++) {
-               if (chunk->owners[i].owner)
-                       put_tree(chunk->owners[i].owner);
-       }
-       kfree(chunk);
-}
-
-static inline void free_chunk(struct audit_chunk *chunk)
-{
-       call_rcu(&chunk->head, __free_chunk);
-}
-
-void audit_put_chunk(struct audit_chunk *chunk)
-{
-       put_inotify_watch(&chunk->watch);
-}
-
 enum {HASH_SIZE = 128};
 static struct list_head chunk_hash_heads[HASH_SIZE];
 static __cacheline_aligned_in_smp DEFINE_SPINLOCK(hash_lock);
@@ -161,10 +173,15 @@ static inline struct list_head *chunk_hash(const struct inode *inode)
        return chunk_hash_heads + n % HASH_SIZE;
 }
 
-/* hash_lock is held by caller */
+/* hash_lock & entry->lock is held by caller */
 static void insert_hash(struct audit_chunk *chunk)
 {
-       struct list_head *list = chunk_hash(chunk->watch.inode);
+       struct fsnotify_mark *entry = &chunk->mark;
+       struct list_head *list;
+
+       if (!entry->i.inode)
+               return;
+       list = chunk_hash(entry->i.inode);
        list_add_rcu(&chunk->hash, list);
 }
 
@@ -172,12 +189,12 @@ static void insert_hash(struct audit_chunk *chunk)
 struct audit_chunk *audit_tree_lookup(const struct inode *inode)
 {
        struct list_head *list = chunk_hash(inode);
-       struct list_head *pos;
+       struct audit_chunk *p;
 
-       list_for_each_rcu(pos, list) {
-               struct audit_chunk *p = container_of(pos, struct audit_chunk, hash);
-               if (p->watch.inode == inode) {
-                       get_inotify_watch(&p->watch);
+       list_for_each_entry_rcu(p, list, hash) {
+               /* mark.inode may have gone NULL, but who cares? */
+               if (p->mark.i.inode == inode) {
+                       atomic_long_inc(&p->refs);
                        return p;
                }
        }
@@ -195,17 +212,35 @@ int audit_tree_match(struct audit_chunk *chunk, struct audit_tree *tree)
 
 /* tagging and untagging inodes with trees */
 
-static void untag_chunk(struct audit_chunk *chunk, struct node *p)
+static struct audit_chunk *find_chunk(struct node *p)
+{
+       int index = p->index & ~(1U<<31);
+       p -= index;
+       return container_of(p, struct audit_chunk, owners[0]);
+}
+
+static void untag_chunk(struct node *p)
 {
-       struct audit_chunk *new;
+       struct audit_chunk *chunk = find_chunk(p);
+       struct fsnotify_mark *entry = &chunk->mark;
+       struct audit_chunk *new = NULL;
        struct audit_tree *owner;
        int size = chunk->count - 1;
        int i, j;
 
-       mutex_lock(&chunk->watch.inode->inotify_mutex);
-       if (chunk->dead) {
-               mutex_unlock(&chunk->watch.inode->inotify_mutex);
-               return;
+       fsnotify_get_mark(entry);
+
+       spin_unlock(&hash_lock);
+
+       if (size)
+               new = alloc_chunk(size);
+
+       spin_lock(&entry->lock);
+       if (chunk->dead || !entry->i.inode) {
+               spin_unlock(&entry->lock);
+               if (new)
+                       free_chunk(new);
+               goto out;
        }
 
        owner = p->owner;
@@ -219,16 +254,17 @@ static void untag_chunk(struct audit_chunk *chunk, struct node *p)
                list_del_init(&p->list);
                list_del_rcu(&chunk->hash);
                spin_unlock(&hash_lock);
-               inotify_evict_watch(&chunk->watch);
-               mutex_unlock(&chunk->watch.inode->inotify_mutex);
-               put_inotify_watch(&chunk->watch);
-               return;
+               spin_unlock(&entry->lock);
+               fsnotify_destroy_mark(entry);
+               fsnotify_put_mark(entry);
+               goto out;
        }
 
-       new = alloc_chunk(size);
        if (!new)
                goto Fallback;
-       if (inotify_clone_watch(&chunk->watch, &new->watch) < 0) {
+
+       fsnotify_duplicate_mark(&new->mark, entry);
+       if (fsnotify_add_mark(&new->mark, new->mark.group, new->mark.i.inode, NULL, 1)) {
                free_chunk(new);
                goto Fallback;
        }
@@ -241,7 +277,7 @@ static void untag_chunk(struct audit_chunk *chunk, struct node *p)
                owner->root = NULL;
        }
 
-       for (i = j = 0; i < size; i++, j++) {
+       for (i = j = 0; j <= size; i++, j++) {
                struct audit_tree *s;
                if (&chunk->owners[j] == p) {
                        list_del_init(&p->list);
@@ -254,17 +290,17 @@ static void untag_chunk(struct audit_chunk *chunk, struct node *p)
                if (!s) /* result of earlier fallback */
                        continue;
                get_tree(s);
-               list_replace_init(&chunk->owners[i].list, &new->owners[j].list);
+               list_replace_init(&chunk->owners[j].list, &new->owners[i].list);
        }
 
        list_replace_rcu(&chunk->hash, &new->hash);
        list_for_each_entry(owner, &new->trees, same_root)
                owner->root = new;
        spin_unlock(&hash_lock);
-       inotify_evict_watch(&chunk->watch);
-       mutex_unlock(&chunk->watch.inode->inotify_mutex);
-       put_inotify_watch(&chunk->watch);
-       return;
+       spin_unlock(&entry->lock);
+       fsnotify_destroy_mark(entry);
+       fsnotify_put_mark(entry);
+       goto out;
 
 Fallback:
        // do the best we can
@@ -277,28 +313,33 @@ Fallback:
        p->owner = NULL;
        put_tree(owner);
        spin_unlock(&hash_lock);
-       mutex_unlock(&chunk->watch.inode->inotify_mutex);
+       spin_unlock(&entry->lock);
+out:
+       fsnotify_put_mark(entry);
+       spin_lock(&hash_lock);
 }
 
 static int create_chunk(struct inode *inode, struct audit_tree *tree)
 {
+       struct fsnotify_mark *entry;
        struct audit_chunk *chunk = alloc_chunk(1);
        if (!chunk)
                return -ENOMEM;
 
-       if (inotify_add_watch(rtree_ih, &chunk->watch, inode, IN_IGNORED | IN_DELETE_SELF) < 0) {
+       entry = &chunk->mark;
+       if (fsnotify_add_mark(entry, audit_tree_group, inode, NULL, 0)) {
                free_chunk(chunk);
                return -ENOSPC;
        }
 
-       mutex_lock(&inode->inotify_mutex);
+       spin_lock(&entry->lock);
        spin_lock(&hash_lock);
        if (tree->goner) {
                spin_unlock(&hash_lock);
                chunk->dead = 1;
-               inotify_evict_watch(&chunk->watch);
-               mutex_unlock(&inode->inotify_mutex);
-               put_inotify_watch(&chunk->watch);
+               spin_unlock(&entry->lock);
+               fsnotify_destroy_mark(entry);
+               fsnotify_put_mark(entry);
                return 0;
        }
        chunk->owners[0].index = (1U << 31);
@@ -311,52 +352,76 @@ static int create_chunk(struct inode *inode, struct audit_tree *tree)
        }
        insert_hash(chunk);
        spin_unlock(&hash_lock);
-       mutex_unlock(&inode->inotify_mutex);
+       spin_unlock(&entry->lock);
        return 0;
 }
 
 /* the first tagged inode becomes root of tree */
 static int tag_chunk(struct inode *inode, struct audit_tree *tree)
 {
-       struct inotify_watch *watch;
+       struct fsnotify_mark *old_entry, *chunk_entry;
        struct audit_tree *owner;
        struct audit_chunk *chunk, *old;
        struct node *p;
        int n;
 
-       if (inotify_find_watch(rtree_ih, inode, &watch) < 0)
+       old_entry = fsnotify_find_inode_mark(audit_tree_group, inode);
+       if (!old_entry)
                return create_chunk(inode, tree);
 
-       old = container_of(watch, struct audit_chunk, watch);
+       old = container_of(old_entry, struct audit_chunk, mark);
 
        /* are we already there? */
        spin_lock(&hash_lock);
        for (n = 0; n < old->count; n++) {
                if (old->owners[n].owner == tree) {
                        spin_unlock(&hash_lock);
-                       put_inotify_watch(watch);
+                       fsnotify_put_mark(old_entry);
                        return 0;
                }
        }
        spin_unlock(&hash_lock);
 
        chunk = alloc_chunk(old->count + 1);
-       if (!chunk)
+       if (!chunk) {
+               fsnotify_put_mark(old_entry);
                return -ENOMEM;
+       }
 
-       mutex_lock(&inode->inotify_mutex);
-       if (inotify_clone_watch(&old->watch, &chunk->watch) < 0) {
-               mutex_unlock(&inode->inotify_mutex);
+       chunk_entry = &chunk->mark;
+
+       spin_lock(&old_entry->lock);
+       if (!old_entry->i.inode) {
+               /* old_entry is being shot, lets just lie */
+               spin_unlock(&old_entry->lock);
+               fsnotify_put_mark(old_entry);
                free_chunk(chunk);
+               return -ENOENT;
+       }
+
+       fsnotify_duplicate_mark(chunk_entry, old_entry);
+       if (fsnotify_add_mark(chunk_entry, chunk_entry->group, chunk_entry->i.inode, NULL, 1)) {
+               spin_unlock(&old_entry->lock);
+               free_chunk(chunk);
+               fsnotify_put_mark(old_entry);
                return -ENOSPC;
        }
+
+       /* even though we hold old_entry->lock, this is safe since chunk_entry->lock could NEVER have been grabbed before */
+       spin_lock(&chunk_entry->lock);
        spin_lock(&hash_lock);
+
+       /* we now hold old_entry->lock, chunk_entry->lock, and hash_lock */
        if (tree->goner) {
                spin_unlock(&hash_lock);
                chunk->dead = 1;
-               inotify_evict_watch(&chunk->watch);
-               mutex_unlock(&inode->inotify_mutex);
-               put_inotify_watch(&chunk->watch);
+               spin_unlock(&chunk_entry->lock);
+               spin_unlock(&old_entry->lock);
+
+               fsnotify_destroy_mark(chunk_entry);
+
+               fsnotify_put_mark(chunk_entry);
+               fsnotify_put_mark(old_entry);
                return 0;
        }
        list_replace_init(&old->trees, &chunk->trees);
@@ -382,19 +447,14 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
                list_add(&tree->same_root, &chunk->trees);
        }
        spin_unlock(&hash_lock);
-       inotify_evict_watch(&old->watch);
-       mutex_unlock(&inode->inotify_mutex);
-       put_inotify_watch(&old->watch);
+       spin_unlock(&chunk_entry->lock);
+       spin_unlock(&old_entry->lock);
+       fsnotify_destroy_mark(old_entry);
+       fsnotify_put_mark(old_entry); /* pair to fsnotify_find mark_entry */
+       fsnotify_put_mark(old_entry); /* and kill it */
        return 0;
 }
 
-static struct audit_chunk *find_chunk(struct node *p)
-{
-       int index = p->index & ~(1U<<31);
-       p -= index;
-       return container_of(p, struct audit_chunk, owners[0]);
-}
-
 static void kill_rules(struct audit_tree *tree)
 {
        struct audit_krule *rule, *next;
@@ -408,17 +468,16 @@ static void kill_rules(struct audit_tree *tree)
                if (rule->tree) {
                        /* not a half-baked one */
                        ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE);
-                       audit_log_format(ab, "op=remove rule dir=");
+                       audit_log_format(ab, "op=");
+                       audit_log_string(ab, "remove rule");
+                       audit_log_format(ab, " dir=");
                        audit_log_untrustedstring(ab, rule->tree->pathname);
-                       if (rule->filterkey) {
-                               audit_log_format(ab, " key=");
-                               audit_log_untrustedstring(ab, rule->filterkey);
-                       } else
-                               audit_log_format(ab, " key=(null)");
+                       audit_log_key(ab, rule->filterkey);
                        audit_log_format(ab, " list=%d res=1", rule->listnr);
                        audit_log_end(ab);
                        rule->tree = NULL;
                        list_del_rcu(&entry->list);
+                       list_del(&entry->rule.list);
                        call_rcu(&entry->rcu, audit_free_rule_rcu);
                }
        }
@@ -432,17 +491,10 @@ static void prune_one(struct audit_tree *victim)
        spin_lock(&hash_lock);
        while (!list_empty(&victim->chunks)) {
                struct node *p;
-               struct audit_chunk *chunk;
 
                p = list_entry(victim->chunks.next, struct node, list);
-               chunk = find_chunk(p);
-               get_inotify_watch(&chunk->watch);
-               spin_unlock(&hash_lock);
 
-               untag_chunk(chunk, p);
-
-               put_inotify_watch(&chunk->watch);
-               spin_lock(&hash_lock);
+               untag_chunk(p);
        }
        spin_unlock(&hash_lock);
        put_tree(victim);
@@ -470,7 +522,6 @@ static void trim_marked(struct audit_tree *tree)
 
        while (!list_empty(&tree->chunks)) {
                struct node *node;
-               struct audit_chunk *chunk;
 
                node = list_entry(tree->chunks.next, struct node, list);
 
@@ -478,14 +529,7 @@ static void trim_marked(struct audit_tree *tree)
                if (!(node->index & (1U<<31)))
                        break;
 
-               chunk = find_chunk(node);
-               get_inotify_watch(&chunk->watch);
-               spin_unlock(&hash_lock);
-
-               untag_chunk(chunk, node);
-
-               put_inotify_watch(&chunk->watch);
-               spin_lock(&hash_lock);
+               untag_chunk(node);
        }
        if (!tree->root && !tree->goner) {
                tree->goner = 1;
@@ -500,6 +544,8 @@ static void trim_marked(struct audit_tree *tree)
        }
 }
 
+static void audit_schedule_prune(void);
+
 /* called with audit_filter_mutex */
 int audit_remove_tree_rule(struct audit_krule *rule)
 {
@@ -525,6 +571,11 @@ int audit_remove_tree_rule(struct audit_krule *rule)
        return 0;
 }
 
+static int compare_root(struct vfsmount *mnt, void *arg)
+{
+       return mnt->mnt_root->d_inode == arg;
+}
+
 void audit_trim_trees(void)
 {
        struct list_head cursor;
@@ -533,10 +584,9 @@ void audit_trim_trees(void)
        list_add(&cursor, &tree_list);
        while (cursor.next != &tree_list) {
                struct audit_tree *tree;
-               struct nameidata nd;
+               struct path path;
                struct vfsmount *root_mnt;
                struct node *node;
-               struct list_head list;
                int err;
 
                tree = container_of(cursor.next, struct audit_tree, list);
@@ -545,33 +595,27 @@ void audit_trim_trees(void)
                list_add(&cursor, &tree->list);
                mutex_unlock(&audit_filter_mutex);
 
-               err = path_lookup(tree->pathname, 0, &nd);
+               err = kern_path(tree->pathname, 0, &path);
                if (err)
                        goto skip_it;
 
-               root_mnt = collect_mounts(nd.path.mnt, nd.path.dentry);
-               path_put(&nd.path);
+               root_mnt = collect_mounts(&path);
+               path_put(&path);
                if (!root_mnt)
                        goto skip_it;
 
-               list_add_tail(&list, &root_mnt->mnt_list);
                spin_lock(&hash_lock);
                list_for_each_entry(node, &tree->chunks, list) {
                        struct audit_chunk *chunk = find_chunk(node);
-                       struct inode *inode = chunk->watch.inode;
-                       struct vfsmount *mnt;
+                       /* this could be NULL if the watch is dieing else where... */
+                       struct inode *inode = chunk->mark.i.inode;
                        node->index |= 1U<<31;
-                       list_for_each_entry(mnt, &list, mnt_list) {
-                               if (mnt->mnt_root->d_inode == inode) {
-                                       node->index &= ~(1U<<31);
-                                       break;
-                               }
-                       }
+                       if (iterate_mounts(compare_root, inode, root_mnt))
+                               node->index &= ~(1U<<31);
                }
                spin_unlock(&hash_lock);
                trim_marked(tree);
                put_tree(tree);
-               list_del_init(&list);
                drop_collected_mounts(root_mnt);
 skip_it:
                mutex_lock(&audit_filter_mutex);
@@ -580,28 +624,12 @@ skip_it:
        mutex_unlock(&audit_filter_mutex);
 }
 
-static int is_under(struct vfsmount *mnt, struct dentry *dentry,
-                   struct nameidata *nd)
-{
-       if (mnt != nd->path.mnt) {
-               for (;;) {
-                       if (mnt->mnt_parent == mnt)
-                               return 0;
-                       if (mnt->mnt_parent == nd->path.mnt)
-                                       break;
-                       mnt = mnt->mnt_parent;
-               }
-               dentry = mnt->mnt_mountpoint;
-       }
-       return is_subdir(dentry, nd->path.dentry);
-}
-
 int audit_make_tree(struct audit_krule *rule, char *pathname, u32 op)
 {
 
        if (pathname[0] != '/' ||
            rule->listnr != AUDIT_FILTER_EXIT ||
-           op & ~AUDIT_EQUAL ||
+           op != Audit_equal ||
            rule->inode_f || rule->watch || rule->tree)
                return -EINVAL;
        rule->tree = alloc_tree(pathname);
@@ -615,13 +643,17 @@ void audit_put_tree(struct audit_tree *tree)
        put_tree(tree);
 }
 
+static int tag_mount(struct vfsmount *mnt, void *arg)
+{
+       return tag_chunk(mnt->mnt_root->d_inode, arg);
+}
+
 /* called with audit_filter_mutex */
 int audit_add_tree_rule(struct audit_krule *rule)
 {
        struct audit_tree *seed = rule->tree, *tree;
-       struct nameidata nd;
-       struct vfsmount *mnt, *p;
-       struct list_head list;
+       struct path path;
+       struct vfsmount *mnt;
        int err;
 
        list_for_each_entry(tree, &tree_list, list) {
@@ -638,25 +670,18 @@ int audit_add_tree_rule(struct audit_krule *rule)
        /* do not set rule->tree yet */
        mutex_unlock(&audit_filter_mutex);
 
-       err = path_lookup(tree->pathname, 0, &nd);
+       err = kern_path(tree->pathname, 0, &path);
        if (err)
                goto Err;
-       mnt = collect_mounts(nd.path.mnt, nd.path.dentry);
-       path_put(&nd.path);
+       mnt = collect_mounts(&path);
+       path_put(&path);
        if (!mnt) {
                err = -ENOMEM;
                goto Err;
        }
-       list_add_tail(&list, &mnt->mnt_list);
 
        get_tree(tree);
-       list_for_each_entry(p, &list, mnt_list) {
-               err = tag_chunk(p->mnt_root->d_inode, tree);
-               if (err)
-                       break;
-       }
-
-       list_del(&list);
+       err = iterate_mounts(tag_mount, tree, mnt);
        drop_collected_mounts(mnt);
 
        if (!err) {
@@ -691,34 +716,23 @@ int audit_tag_tree(char *old, char *new)
 {
        struct list_head cursor, barrier;
        int failed = 0;
-       struct nameidata nd;
+       struct path path1, path2;
        struct vfsmount *tagged;
-       struct list_head list;
-       struct vfsmount *mnt;
-       struct dentry *dentry;
        int err;
 
-       err = path_lookup(new, 0, &nd);
+       err = kern_path(new, 0, &path2);
        if (err)
                return err;
-       tagged = collect_mounts(nd.path.mnt, nd.path.dentry);
-       path_put(&nd.path);
+       tagged = collect_mounts(&path2);
+       path_put(&path2);
        if (!tagged)
                return -ENOMEM;
 
-       err = path_lookup(old, 0, &nd);
+       err = kern_path(old, 0, &path1);
        if (err) {
                drop_collected_mounts(tagged);
                return err;
        }
-       mnt = mntget(nd.path.mnt);
-       dentry = dget(nd.path.dentry);
-       path_put(&nd.path);
-
-       if (dentry == tagged->mnt_root && dentry == mnt->mnt_root)
-               follow_up(&mnt, &dentry);
-
-       list_add_tail(&list, &tagged->mnt_list);
 
        mutex_lock(&audit_filter_mutex);
        list_add(&barrier, &tree_list);
@@ -726,7 +740,7 @@ int audit_tag_tree(char *old, char *new)
 
        while (cursor.next != &tree_list) {
                struct audit_tree *tree;
-               struct vfsmount *p;
+               int good_one = 0;
 
                tree = container_of(cursor.next, struct audit_tree, list);
                get_tree(tree);
@@ -734,30 +748,19 @@ int audit_tag_tree(char *old, char *new)
                list_add(&cursor, &tree->list);
                mutex_unlock(&audit_filter_mutex);
 
-               err = path_lookup(tree->pathname, 0, &nd);
-               if (err) {
-                       put_tree(tree);
-                       mutex_lock(&audit_filter_mutex);
-                       continue;
+               err = kern_path(tree->pathname, 0, &path2);
+               if (!err) {
+                       good_one = path_is_under(&path1, &path2);
+                       path_put(&path2);
                }
 
-               spin_lock(&vfsmount_lock);
-               if (!is_under(mnt, dentry, &nd)) {
-                       spin_unlock(&vfsmount_lock);
-                       path_put(&nd.path);
+               if (!good_one) {
                        put_tree(tree);
                        mutex_lock(&audit_filter_mutex);
                        continue;
                }
-               spin_unlock(&vfsmount_lock);
-               path_put(&nd.path);
-
-               list_for_each_entry(p, &list, mnt_list) {
-                       failed = tag_chunk(p->mnt_root->d_inode, tree);
-                       if (failed)
-                               break;
-               }
 
+               failed = iterate_mounts(tag_mount, tree, tagged);
                if (failed) {
                        put_tree(tree);
                        mutex_lock(&audit_filter_mutex);
@@ -798,20 +801,19 @@ int audit_tag_tree(char *old, char *new)
        }
        list_del(&barrier);
        list_del(&cursor);
-       list_del(&list);
        mutex_unlock(&audit_filter_mutex);
-       dput(dentry);
-       mntput(mnt);
+       path_put(&path1);
        drop_collected_mounts(tagged);
        return failed;
 }
 
 /*
  * That gets run when evict_chunk() ends up needing to kill audit_tree.
- * Runs from a separate thread, with audit_cmd_mutex held.
+ * Runs from a separate thread.
  */
-void audit_prune_trees(void)
+static int prune_tree_thread(void *unused)
 {
+       mutex_lock(&audit_cmd_mutex);
        mutex_lock(&audit_filter_mutex);
 
        while (!list_empty(&prune_list)) {
@@ -828,16 +830,51 @@ void audit_prune_trees(void)
        }
 
        mutex_unlock(&audit_filter_mutex);
+       mutex_unlock(&audit_cmd_mutex);
+       return 0;
+}
+
+static void audit_schedule_prune(void)
+{
+       kthread_run(prune_tree_thread, NULL, "audit_prune_tree");
+}
+
+/*
+ * ... and that one is done if evict_chunk() decides to delay until the end
+ * of syscall.  Runs synchronously.
+ */
+void audit_kill_trees(struct list_head *list)
+{
+       mutex_lock(&audit_cmd_mutex);
+       mutex_lock(&audit_filter_mutex);
+
+       while (!list_empty(list)) {
+               struct audit_tree *victim;
+
+               victim = list_entry(list->next, struct audit_tree, list);
+               kill_rules(victim);
+               list_del_init(&victim->list);
+
+               mutex_unlock(&audit_filter_mutex);
+
+               prune_one(victim);
+
+               mutex_lock(&audit_filter_mutex);
+       }
+
+       mutex_unlock(&audit_filter_mutex);
+       mutex_unlock(&audit_cmd_mutex);
 }
 
 /*
  *  Here comes the stuff asynchronous to auditctl operations
  */
 
-/* inode->inotify_mutex is locked */
 static void evict_chunk(struct audit_chunk *chunk)
 {
        struct audit_tree *owner;
+       struct list_head *postponed = audit_killed_trees();
+       int need_prune = 0;
        int n;
 
        if (chunk->dead)
@@ -853,47 +890,64 @@ static void evict_chunk(struct audit_chunk *chunk)
                owner->root = NULL;
                list_del_init(&owner->same_root);
                spin_unlock(&hash_lock);
-               kill_rules(owner);
-               list_move(&owner->list, &prune_list);
-               audit_schedule_prune();
+               if (!postponed) {
+                       kill_rules(owner);
+                       list_move(&owner->list, &prune_list);
+                       need_prune = 1;
+               } else {
+                       list_move(&owner->list, postponed);
+               }
                spin_lock(&hash_lock);
        }
        list_del_rcu(&chunk->hash);
        for (n = 0; n < chunk->count; n++)
                list_del_init(&chunk->owners[n].list);
        spin_unlock(&hash_lock);
+       if (need_prune)
+               audit_schedule_prune();
        mutex_unlock(&audit_filter_mutex);
 }
 
-static void handle_event(struct inotify_watch *watch, u32 wd, u32 mask,
-                         u32 cookie, const char *dname, struct inode *inode)
+static int audit_tree_handle_event(struct fsnotify_group *group,
+                                  struct fsnotify_mark *inode_mark,
+                                  struct fsnotify_mark *vfsmonut_mark,
+                                  struct fsnotify_event *event)
 {
-       struct audit_chunk *chunk = container_of(watch, struct audit_chunk, watch);
+       BUG();
+       return -EOPNOTSUPP;
+}
 
-       if (mask & IN_IGNORED) {
-               evict_chunk(chunk);
-               put_inotify_watch(watch);
-       }
+static void audit_tree_freeing_mark(struct fsnotify_mark *entry, struct fsnotify_group *group)
+{
+       struct audit_chunk *chunk = container_of(entry, struct audit_chunk, mark);
+
+       evict_chunk(chunk);
+       fsnotify_put_mark(entry);
 }
 
-static void destroy_watch(struct inotify_watch *watch)
+static bool audit_tree_send_event(struct fsnotify_group *group, struct inode *inode,
+                                 struct fsnotify_mark *inode_mark,
+                                 struct fsnotify_mark *vfsmount_mark,
+                                 __u32 mask, void *data, int data_type)
 {
-       struct audit_chunk *chunk = container_of(watch, struct audit_chunk, watch);
-       free_chunk(chunk);
+       return false;
 }
 
-static const struct inotify_operations rtree_inotify_ops = {
-       .handle_event   = handle_event,
-       .destroy_watch  = destroy_watch,
+static const struct fsnotify_ops audit_tree_ops = {
+       .handle_event = audit_tree_handle_event,
+       .should_send_event = audit_tree_send_event,
+       .free_group_priv = NULL,
+       .free_event_priv = NULL,
+       .freeing_mark = audit_tree_freeing_mark,
 };
 
 static int __init audit_tree_init(void)
 {
        int i;
 
-       rtree_ih = inotify_init(&rtree_inotify_ops);
-       if (IS_ERR(rtree_ih))
-               audit_panic("cannot initialize inotify handle for rectree watches");
+       audit_tree_group = fsnotify_alloc_group(&audit_tree_ops);
+       if (IS_ERR(audit_tree_group))
+               audit_panic("cannot initialize fsnotify group for rectree watches");
 
        for (i = 0; i < HASH_SIZE; i++)
                INIT_LIST_HEAD(&chunk_hash_heads[i]);