#include <linux/bootmem.h>
#include <linux/fs_struct.h>
#include <linux/hardirq.h>
+#include <linux/bit_spinlock.h>
+#include <linux/rculist_bl.h>
#include "internal.h"
/*
* Usage:
- * dcache_inode_lock protects:
- * - i_dentry, d_alias, d_inode
- * dcache_hash_lock protects:
- * - the dcache hash table, s_anon lists
+ * dcache->d_inode->i_lock protects:
+ * - i_dentry, d_alias, d_inode of aliases
+ * dcache_hash_bucket lock protects:
+ * - the dcache hash table
+ * s_anon bl list spinlock protects:
+ * - the s_anon list (see __d_drop)
* dcache_lru_lock protects:
* - the dcache lru lists and counters
* d_lock protects:
* - d_alias, d_inode
*
* Ordering:
- * dcache_inode_lock
+ * dentry->d_inode->i_lock
* dentry->d_lock
* dcache_lru_lock
- * dcache_hash_lock
+ * dcache_hash_bucket lock
+ * s_anon lock
*
* If there is an ancestor relationship:
* dentry->d_parent->...->d_parent->d_lock
int sysctl_vfs_cache_pressure __read_mostly = 100;
EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure);
-__cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_inode_lock);
-static __cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_hash_lock);
static __cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_lru_lock);
__cacheline_aligned_in_smp DEFINE_SEQLOCK(rename_lock);
EXPORT_SYMBOL(rename_lock);
-EXPORT_SYMBOL(dcache_inode_lock);
static struct kmem_cache *dentry_cache __read_mostly;
-#define DNAME_INLINE_LEN (sizeof(struct dentry)-offsetof(struct dentry,d_iname))
-
/*
* This is the single most critical data structure when it comes
* to the dcache: the hashtable for lookups. Somebody should try
static unsigned int d_hash_mask __read_mostly;
static unsigned int d_hash_shift __read_mostly;
-static struct hlist_head *dentry_hashtable __read_mostly;
+
+struct dcache_hash_bucket {
+ struct hlist_bl_head head;
+};
+static struct dcache_hash_bucket *dentry_hashtable __read_mostly;
+
+static inline struct dcache_hash_bucket *d_hash(struct dentry *parent,
+ unsigned long hash)
+{
+ hash += ((unsigned long) parent ^ GOLDEN_RATIO_PRIME) / L1_CACHE_BYTES;
+ hash = hash ^ ((hash ^ GOLDEN_RATIO_PRIME) >> D_HASHBITS);
+ return dentry_hashtable + (hash & D_HASHMASK);
+}
+
+static inline void spin_lock_bucket(struct dcache_hash_bucket *b)
+{
+ bit_spin_lock(0, (unsigned long *)&b->head.first);
+}
+
+static inline void spin_unlock_bucket(struct dcache_hash_bucket *b)
+{
+ __bit_spin_unlock(0, (unsigned long *)&b->head.first);
+}
/* Statistics gathering. */
struct dentry_stat_t dentry_stat = {
dentry->d_op->d_release(dentry);
/* if dentry was never inserted into hash, immediate free is OK */
- if (hlist_unhashed(&dentry->d_hash))
+ if (hlist_bl_unhashed(&dentry->d_hash))
__d_free(&dentry->d_u.d_rcu);
else
call_rcu(&dentry->d_u.d_rcu, __d_free);
}
+/**
+ * dentry_rcuwalk_barrier - invalidate in-progress rcu-walk lookups
+ * @dentry: the target dentry
+ * After this call, in-progress rcu-walk path lookup will fail. This
+ * should be called after unhashing, and after changing d_inode (if
+ * the dentry has not already been unhashed).
+ */
+static inline void dentry_rcuwalk_barrier(struct dentry *dentry)
+{
+ assert_spin_locked(&dentry->d_lock);
+ /* Go through a barrier */
+ write_seqcount_barrier(&dentry->d_seq);
+}
+
/*
* Release the dentry's inode, using the filesystem
- * d_iput() operation if defined.
+ * d_iput() operation if defined. Dentry has no refcount
+ * and is unhashed.
*/
static void dentry_iput(struct dentry * dentry)
__releases(dentry->d_lock)
- __releases(dcache_inode_lock)
+ __releases(dentry->d_inode->i_lock)
{
struct inode *inode = dentry->d_inode;
if (inode) {
dentry->d_inode = NULL;
list_del_init(&dentry->d_alias);
spin_unlock(&dentry->d_lock);
- spin_unlock(&dcache_inode_lock);
+ spin_unlock(&inode->i_lock);
if (!inode->i_nlink)
fsnotify_inoderemove(inode);
if (dentry->d_op && dentry->d_op->d_iput)
iput(inode);
} else {
spin_unlock(&dentry->d_lock);
- spin_unlock(&dcache_inode_lock);
}
}
/*
+ * Release the dentry's inode, using the filesystem
+ * d_iput() operation if defined. dentry remains in-use.
+ */
+static void dentry_unlink_inode(struct dentry * dentry)
+ __releases(dentry->d_lock)
+ __releases(dentry->d_inode->i_lock)
+{
+ struct inode *inode = dentry->d_inode;
+ dentry->d_inode = NULL;
+ list_del_init(&dentry->d_alias);
+ dentry_rcuwalk_barrier(dentry);
+ spin_unlock(&dentry->d_lock);
+ spin_unlock(&inode->i_lock);
+ if (!inode->i_nlink)
+ fsnotify_inoderemove(inode);
+ if (dentry->d_op && dentry->d_op->d_iput)
+ dentry->d_op->d_iput(dentry, inode);
+ else
+ iput(inode);
+}
+
+/*
* dentry_lru_(add|del|move_tail) must be called with d_lock held.
*/
static void dentry_lru_add(struct dentry *dentry)
/**
* d_kill - kill dentry and return parent
* @dentry: dentry to kill
+ * @parent: parent dentry
*
* The dentry must already be unhashed and removed from the LRU.
*
static struct dentry *d_kill(struct dentry *dentry, struct dentry *parent)
__releases(dentry->d_lock)
__releases(parent->d_lock)
- __releases(dcache_inode_lock)
+ __releases(dentry->d_inode->i_lock)
{
- dentry->d_parent = NULL;
list_del(&dentry->d_u.d_child);
+ /*
+ * Inform try_to_ascend() that we are no longer attached to the
+ * dentry tree
+ */
+ dentry->d_flags |= DCACHE_DISCONNECTED;
if (parent)
spin_unlock(&parent->d_lock);
dentry_iput(dentry);
void __d_drop(struct dentry *dentry)
{
if (!(dentry->d_flags & DCACHE_UNHASHED)) {
- dentry->d_flags |= DCACHE_UNHASHED;
- spin_lock(&dcache_hash_lock);
- hlist_del_rcu(&dentry->d_hash);
- spin_unlock(&dcache_hash_lock);
+ if (unlikely(dentry->d_flags & DCACHE_DISCONNECTED)) {
+ bit_spin_lock(0,
+ (unsigned long *)&dentry->d_sb->s_anon.first);
+ dentry->d_flags |= DCACHE_UNHASHED;
+ hlist_bl_del_init(&dentry->d_hash);
+ __bit_spin_unlock(0,
+ (unsigned long *)&dentry->d_sb->s_anon.first);
+ } else {
+ struct dcache_hash_bucket *b;
+ b = d_hash(dentry->d_parent, dentry->d_name.hash);
+ spin_lock_bucket(b);
+ /*
+ * We may not actually need to put DCACHE_UNHASHED
+ * manipulations under the hash lock, but follow
+ * the principle of least surprise.
+ */
+ dentry->d_flags |= DCACHE_UNHASHED;
+ hlist_bl_del_rcu(&dentry->d_hash);
+ spin_unlock_bucket(b);
+ dentry_rcuwalk_barrier(dentry);
+ }
}
}
EXPORT_SYMBOL(__d_drop);
}
EXPORT_SYMBOL(d_drop);
+/*
+ * Finish off a dentry we've decided to kill.
+ * dentry->d_lock must be held, returns with it unlocked.
+ * If ref is non-zero, then decrement the refcount too.
+ * Returns dentry requiring refcount drop, or NULL if we're done.
+ */
+static inline struct dentry *dentry_kill(struct dentry *dentry, int ref)
+ __releases(dentry->d_lock)
+{
+ struct inode *inode;
+ struct dentry *parent;
+
+ inode = dentry->d_inode;
+ if (inode && !spin_trylock(&inode->i_lock)) {
+relock:
+ spin_unlock(&dentry->d_lock);
+ cpu_relax();
+ return dentry; /* try again with same dentry */
+ }
+ if (IS_ROOT(dentry))
+ parent = NULL;
+ else
+ parent = dentry->d_parent;
+ if (parent && !spin_trylock(&parent->d_lock)) {
+ if (inode)
+ spin_unlock(&inode->i_lock);
+ goto relock;
+ }
+
+ if (ref)
+ dentry->d_count--;
+ /* if dentry was on the d_lru list delete it from there */
+ dentry_lru_del(dentry);
+ /* if it was on the hash then remove it */
+ __d_drop(dentry);
+ return d_kill(dentry, parent);
+}
+
/*
* This is dput
*
* call the dentry unlink method as well as removing it from the queues and
* releasing its resources. If the parent dentries were scheduled for release
* they too may now get deleted.
- *
- * no dcache lock, please.
*/
-
void dput(struct dentry *dentry)
{
- struct dentry *parent;
if (!dentry)
return;
return;
}
- if (dentry->d_op && dentry->d_op->d_delete) {
+ if (dentry->d_flags & DCACHE_OP_DELETE) {
if (dentry->d_op->d_delete(dentry))
goto kill_it;
}
return;
kill_it:
- if (!spin_trylock(&dcache_inode_lock)) {
-relock:
- spin_unlock(&dentry->d_lock);
- cpu_relax();
- goto repeat;
- }
- if (IS_ROOT(dentry))
- parent = NULL;
- else
- parent = dentry->d_parent;
- if (parent && !spin_trylock(&parent->d_lock)) {
- spin_unlock(&dcache_inode_lock);
- goto relock;
- }
- dentry->d_count--;
- /* if dentry was on the d_lru list delete it from there */
- dentry_lru_del(dentry);
- /* if it was on the hash (d_delete case), then remove it */
- __d_drop(dentry);
- dentry = d_kill(dentry, parent);
+ dentry = dentry_kill(dentry, 1);
if (dentry)
goto repeat;
}
EXPORT_SYMBOL(d_invalidate);
/* This must be called with d_lock held */
-static inline struct dentry * __dget_locked_dlock(struct dentry *dentry)
+static inline void __dget_dlock(struct dentry *dentry)
{
dentry->d_count++;
- dentry_lru_del(dentry);
- return dentry;
}
-/* This must be called with d_lock held */
-static inline struct dentry * __dget_locked(struct dentry *dentry)
+static inline void __dget(struct dentry *dentry)
{
spin_lock(&dentry->d_lock);
- __dget_locked_dlock(dentry);
+ __dget_dlock(dentry);
spin_unlock(&dentry->d_lock);
- return dentry;
-}
-
-struct dentry * dget_locked_dlock(struct dentry *dentry)
-{
- return __dget_locked_dlock(dentry);
-}
-
-struct dentry * dget_locked(struct dentry *dentry)
-{
- return __dget_locked(dentry);
}
-EXPORT_SYMBOL(dget_locked);
struct dentry *dget_parent(struct dentry *dentry)
{
struct dentry *ret;
repeat:
- spin_lock(&dentry->d_lock);
+ /*
+ * Don't need rcu_dereference because we re-check it was correct under
+ * the lock.
+ */
+ rcu_read_lock();
ret = dentry->d_parent;
- if (!ret)
- goto out;
- if (dentry == ret) {
- ret->d_count++;
+ if (!ret) {
+ rcu_read_unlock();
goto out;
}
- if (!spin_trylock(&ret->d_lock)) {
- spin_unlock(&dentry->d_lock);
- cpu_relax();
+ spin_lock(&ret->d_lock);
+ if (unlikely(ret != dentry->d_parent)) {
+ spin_unlock(&ret->d_lock);
+ rcu_read_unlock();
goto repeat;
}
+ rcu_read_unlock();
BUG_ON(!ret->d_count);
ret->d_count++;
spin_unlock(&ret->d_lock);
out:
- spin_unlock(&dentry->d_lock);
return ret;
}
EXPORT_SYMBOL(dget_parent);
(alias->d_flags & DCACHE_DISCONNECTED)) {
discon_alias = alias;
} else if (!want_discon) {
- __dget_locked_dlock(alias);
+ __dget_dlock(alias);
spin_unlock(&alias->d_lock);
return alias;
}
if (S_ISDIR(inode->i_mode) || !d_unhashed(alias)) {
if (IS_ROOT(alias) &&
(alias->d_flags & DCACHE_DISCONNECTED)) {
- __dget_locked_dlock(alias);
+ __dget_dlock(alias);
spin_unlock(&alias->d_lock);
return alias;
}
struct dentry *de = NULL;
if (!list_empty(&inode->i_dentry)) {
- spin_lock(&dcache_inode_lock);
+ spin_lock(&inode->i_lock);
de = __d_find_alias(inode, 0);
- spin_unlock(&dcache_inode_lock);
+ spin_unlock(&inode->i_lock);
}
return de;
}
{
struct dentry *dentry;
restart:
- spin_lock(&dcache_inode_lock);
+ spin_lock(&inode->i_lock);
list_for_each_entry(dentry, &inode->i_dentry, d_alias) {
spin_lock(&dentry->d_lock);
if (!dentry->d_count) {
- __dget_locked_dlock(dentry);
+ __dget_dlock(dentry);
__d_drop(dentry);
spin_unlock(&dentry->d_lock);
- spin_unlock(&dcache_inode_lock);
+ spin_unlock(&inode->i_lock);
dput(dentry);
goto restart;
}
spin_unlock(&dentry->d_lock);
}
- spin_unlock(&dcache_inode_lock);
+ spin_unlock(&inode->i_lock);
}
EXPORT_SYMBOL(d_prune_aliases);
/*
- * Throw away a dentry - free the inode, dput the parent. This requires that
- * the LRU list has already been removed.
+ * Try to throw away a dentry - free the inode, dput the parent.
+ * Requires dentry->d_lock is held, and dentry->d_count == 0.
+ * Releases dentry->d_lock.
*
- * Try to prune ancestors as well. This is necessary to prevent
- * quadratic behavior of shrink_dcache_parent(), but is also expected
- * to be beneficial in reducing dentry cache fragmentation.
+ * This may fail if locks cannot be acquired no problem, just try again.
*/
-static void prune_one_dentry(struct dentry *dentry, struct dentry *parent)
+static void try_prune_one_dentry(struct dentry *dentry)
__releases(dentry->d_lock)
- __releases(parent->d_lock)
- __releases(dcache_inode_lock)
{
- __d_drop(dentry);
- dentry = d_kill(dentry, parent);
+ struct dentry *parent;
+ parent = dentry_kill(dentry, 0);
/*
- * Prune ancestors.
+ * If dentry_kill returns NULL, we have nothing more to do.
+ * if it returns the same dentry, trylocks failed. In either
+ * case, just loop again.
+ *
+ * Otherwise, we need to prune ancestors too. This is necessary
+ * to prevent quadratic behavior of shrink_dcache_parent(), but
+ * is also expected to be beneficial in reducing dentry cache
+ * fragmentation.
*/
+ if (!parent)
+ return;
+ if (parent == dentry)
+ return;
+
+ /* Prune ancestors. */
+ dentry = parent;
while (dentry) {
- spin_lock(&dcache_inode_lock);
-again:
spin_lock(&dentry->d_lock);
- if (IS_ROOT(dentry))
- parent = NULL;
- else
- parent = dentry->d_parent;
- if (parent && !spin_trylock(&parent->d_lock)) {
- spin_unlock(&dentry->d_lock);
- goto again;
- }
- dentry->d_count--;
- if (dentry->d_count) {
- if (parent)
- spin_unlock(&parent->d_lock);
+ if (dentry->d_count > 1) {
+ dentry->d_count--;
spin_unlock(&dentry->d_lock);
- spin_unlock(&dcache_inode_lock);
return;
}
-
- dentry_lru_del(dentry);
- __d_drop(dentry);
- dentry = d_kill(dentry, parent);
+ dentry = dentry_kill(dentry, 1);
}
}
{
struct dentry *dentry;
- while (!list_empty(list)) {
- struct dentry *parent;
-
- dentry = list_entry(list->prev, struct dentry, d_lru);
-
- if (!spin_trylock(&dentry->d_lock)) {
-relock:
- spin_unlock(&dcache_lru_lock);
- cpu_relax();
- spin_lock(&dcache_lru_lock);
+ rcu_read_lock();
+ for (;;) {
+ dentry = list_entry_rcu(list->prev, struct dentry, d_lru);
+ if (&dentry->d_lru == list)
+ break; /* empty */
+ spin_lock(&dentry->d_lock);
+ if (dentry != list_entry(list->prev, struct dentry, d_lru)) {
+ spin_unlock(&dentry->d_lock);
continue;
}
* it - just keep it off the LRU list.
*/
if (dentry->d_count) {
- __dentry_lru_del(dentry);
+ dentry_lru_del(dentry);
spin_unlock(&dentry->d_lock);
continue;
}
- if (IS_ROOT(dentry))
- parent = NULL;
- else
- parent = dentry->d_parent;
- if (parent && !spin_trylock(&parent->d_lock)) {
- spin_unlock(&dentry->d_lock);
- goto relock;
- }
- __dentry_lru_del(dentry);
- spin_unlock(&dcache_lru_lock);
- prune_one_dentry(dentry, parent);
- /* dcache_inode_lock and dentry->d_lock dropped */
- spin_lock(&dcache_inode_lock);
- spin_lock(&dcache_lru_lock);
+ rcu_read_unlock();
+
+ try_prune_one_dentry(dentry);
+
+ rcu_read_lock();
}
+ rcu_read_unlock();
}
/**
LIST_HEAD(tmp);
int cnt = *count;
- spin_lock(&dcache_inode_lock);
relock:
spin_lock(&dcache_lru_lock);
while (!list_empty(&sb->s_dentry_lru)) {
if (!--cnt)
break;
}
- /* XXX: re-add cond_resched_lock when dcache_lock goes away */
+ cond_resched_lock(&dcache_lru_lock);
}
-
- *count = cnt;
- shrink_dentry_list(&tmp);
-
if (!list_empty(&referenced))
list_splice(&referenced, &sb->s_dentry_lru);
spin_unlock(&dcache_lru_lock);
- spin_unlock(&dcache_inode_lock);
+
+ shrink_dentry_list(&tmp);
+
+ *count = cnt;
}
/**
{
LIST_HEAD(tmp);
- spin_lock(&dcache_inode_lock);
spin_lock(&dcache_lru_lock);
while (!list_empty(&sb->s_dentry_lru)) {
list_splice_init(&sb->s_dentry_lru, &tmp);
+ spin_unlock(&dcache_lru_lock);
shrink_dentry_list(&tmp);
+ spin_lock(&dcache_lru_lock);
}
spin_unlock(&dcache_lru_lock);
- spin_unlock(&dcache_inode_lock);
}
EXPORT_SYMBOL(shrink_dcache_sb);
spin_unlock(&dentry->d_lock);
shrink_dcache_for_umount_subtree(dentry);
- while (!hlist_empty(&sb->s_anon)) {
- dentry = hlist_entry(sb->s_anon.first, struct dentry, d_hash);
+ while (!hlist_bl_empty(&sb->s_anon)) {
+ dentry = hlist_bl_entry(hlist_bl_first(&sb->s_anon), struct dentry, d_hash);
shrink_dcache_for_umount_subtree(dentry);
}
}
/*
+ * This tries to ascend one level of parenthood, but
+ * we can race with renaming, so we need to re-check
+ * the parenthood after dropping the lock and check
+ * that the sequence number still matches.
+ */
+static struct dentry *try_to_ascend(struct dentry *old, int locked, unsigned seq)
+{
+ struct dentry *new = old->d_parent;
+
+ rcu_read_lock();
+ spin_unlock(&old->d_lock);
+ spin_lock(&new->d_lock);
+
+ /*
+ * might go back up the wrong parent if we have had a rename
+ * or deletion
+ */
+ if (new != old->d_parent ||
+ (old->d_flags & DCACHE_DISCONNECTED) ||
+ (!locked && read_seqretry(&rename_lock, seq))) {
+ spin_unlock(&new->d_lock);
+ new = NULL;
+ }
+ rcu_read_unlock();
+ return new;
+}
+
+
+/*
* Search for at least 1 mount point in the dentry's subdirs.
* We descend to the next level whenever the d_subdirs
* list is non-empty and continue searching.
* All done at this level ... ascend and resume the search.
*/
if (this_parent != parent) {
- struct dentry *tmp;
- struct dentry *child;
-
- tmp = this_parent->d_parent;
- rcu_read_lock();
- spin_unlock(&this_parent->d_lock);
- child = this_parent;
- this_parent = tmp;
- spin_lock(&this_parent->d_lock);
- /* might go back up the wrong parent if we have had a rename
- * or deletion */
- if (this_parent != child->d_parent ||
- (!locked && read_seqretry(&rename_lock, seq))) {
- spin_unlock(&this_parent->d_lock);
- rcu_read_unlock();
+ struct dentry *child = this_parent;
+ this_parent = try_to_ascend(this_parent, locked, seq);
+ if (!this_parent)
goto rename_retry;
- }
- rcu_read_unlock();
next = child->d_u.d_child.next;
goto resume;
}
* All done at this level ... ascend and resume the search.
*/
if (this_parent != parent) {
- struct dentry *tmp;
- struct dentry *child;
-
- tmp = this_parent->d_parent;
- rcu_read_lock();
- spin_unlock(&this_parent->d_lock);
- child = this_parent;
- this_parent = tmp;
- spin_lock(&this_parent->d_lock);
- /* might go back up the wrong parent if we have had a rename
- * or deletion */
- if (this_parent != child->d_parent ||
- (!locked && read_seqretry(&rename_lock, seq))) {
- spin_unlock(&this_parent->d_lock);
- rcu_read_unlock();
+ struct dentry *child = this_parent;
+ this_parent = try_to_ascend(this_parent, locked, seq);
+ if (!this_parent)
goto rename_retry;
- }
- rcu_read_unlock();
next = child->d_u.d_child.next;
goto resume;
}
dentry->d_count = 1;
dentry->d_flags = DCACHE_UNHASHED;
spin_lock_init(&dentry->d_lock);
+ seqcount_init(&dentry->d_seq);
dentry->d_inode = NULL;
dentry->d_parent = NULL;
dentry->d_sb = NULL;
dentry->d_op = NULL;
dentry->d_fsdata = NULL;
- dentry->d_mounted = 0;
- INIT_HLIST_NODE(&dentry->d_hash);
+ INIT_HLIST_BL_NODE(&dentry->d_hash);
INIT_LIST_HEAD(&dentry->d_lru);
INIT_LIST_HEAD(&dentry->d_subdirs);
INIT_LIST_HEAD(&dentry->d_alias);
* don't need child lock because it is not subject
* to concurrency here
*/
- dentry->d_parent = dget_dlock(parent);
+ __dget_dlock(parent);
+ dentry->d_parent = parent;
dentry->d_sb = parent->d_sb;
+ d_set_d_op(dentry, dentry->d_sb->s_d_op);
list_add(&dentry->d_u.d_child, &parent->d_subdirs);
spin_unlock(&parent->d_lock);
}
}
EXPORT_SYMBOL(d_alloc);
+struct dentry *d_alloc_pseudo(struct super_block *sb, const struct qstr *name)
+{
+ struct dentry *dentry = d_alloc(NULL, name);
+ if (dentry) {
+ dentry->d_sb = sb;
+ d_set_d_op(dentry, dentry->d_sb->s_d_op);
+ dentry->d_parent = dentry;
+ dentry->d_flags |= DCACHE_DISCONNECTED;
+ }
+ return dentry;
+}
+EXPORT_SYMBOL(d_alloc_pseudo);
+
struct dentry *d_alloc_name(struct dentry *parent, const char *name)
{
struct qstr q;
}
EXPORT_SYMBOL(d_alloc_name);
+void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op)
+{
+ WARN_ON_ONCE(dentry->d_op);
+ WARN_ON_ONCE(dentry->d_flags & (DCACHE_OP_HASH |
+ DCACHE_OP_COMPARE |
+ DCACHE_OP_REVALIDATE |
+ DCACHE_OP_DELETE ));
+ dentry->d_op = op;
+ if (!op)
+ return;
+ if (op->d_hash)
+ dentry->d_flags |= DCACHE_OP_HASH;
+ if (op->d_compare)
+ dentry->d_flags |= DCACHE_OP_COMPARE;
+ if (op->d_revalidate)
+ dentry->d_flags |= DCACHE_OP_REVALIDATE;
+ if (op->d_delete)
+ dentry->d_flags |= DCACHE_OP_DELETE;
+
+}
+EXPORT_SYMBOL(d_set_d_op);
+
static void __d_instantiate(struct dentry *dentry, struct inode *inode)
{
spin_lock(&dentry->d_lock);
- if (inode)
+ if (inode) {
+ if (unlikely(IS_AUTOMOUNT(inode)))
+ dentry->d_flags |= DCACHE_NEED_AUTOMOUNT;
list_add(&dentry->d_alias, &inode->i_dentry);
+ }
dentry->d_inode = inode;
+ dentry_rcuwalk_barrier(dentry);
spin_unlock(&dentry->d_lock);
fsnotify_d_instantiate(dentry, inode);
}
void d_instantiate(struct dentry *entry, struct inode * inode)
{
BUG_ON(!list_empty(&entry->d_alias));
- spin_lock(&dcache_inode_lock);
+ if (inode)
+ spin_lock(&inode->i_lock);
__d_instantiate(entry, inode);
- spin_unlock(&dcache_inode_lock);
+ if (inode)
+ spin_unlock(&inode->i_lock);
security_d_instantiate(entry, inode);
}
EXPORT_SYMBOL(d_instantiate);
continue;
if (alias->d_parent != entry->d_parent)
continue;
- if (qstr->len != len)
- continue;
- if (memcmp(qstr->name, name, len))
+ if (dentry_cmp(qstr->name, qstr->len, name, len))
continue;
- dget_locked(alias);
+ __dget(alias);
return alias;
}
BUG_ON(!list_empty(&entry->d_alias));
- spin_lock(&dcache_inode_lock);
+ if (inode)
+ spin_lock(&inode->i_lock);
result = __d_instantiate_unique(entry, inode);
- spin_unlock(&dcache_inode_lock);
+ if (inode)
+ spin_unlock(&inode->i_lock);
if (!result) {
security_d_instantiate(entry, inode);
res = d_alloc(NULL, &name);
if (res) {
res->d_sb = root_inode->i_sb;
+ d_set_d_op(res, res->d_sb->s_d_op);
res->d_parent = res;
d_instantiate(res, root_inode);
}
}
EXPORT_SYMBOL(d_alloc_root);
-static inline struct hlist_head *d_hash(struct dentry *parent,
- unsigned long hash)
+static struct dentry * __d_find_any_alias(struct inode *inode)
{
- hash += ((unsigned long) parent ^ GOLDEN_RATIO_PRIME) / L1_CACHE_BYTES;
- hash = hash ^ ((hash ^ GOLDEN_RATIO_PRIME) >> D_HASHBITS);
- return dentry_hashtable + (hash & D_HASHMASK);
+ struct dentry *alias;
+
+ if (list_empty(&inode->i_dentry))
+ return NULL;
+ alias = list_first_entry(&inode->i_dentry, struct dentry, d_alias);
+ __dget(alias);
+ return alias;
}
+static struct dentry * d_find_any_alias(struct inode *inode)
+{
+ struct dentry *de;
+
+ spin_lock(&inode->i_lock);
+ de = __d_find_any_alias(inode);
+ spin_unlock(&inode->i_lock);
+ return de;
+}
+
+
/**
* d_obtain_alias - find or allocate a dentry for a given inode
* @inode: inode to allocate the dentry for
if (IS_ERR(inode))
return ERR_CAST(inode);
- res = d_find_alias(inode);
+ res = d_find_any_alias(inode);
if (res)
goto out_iput;
tmp->d_parent = tmp; /* make sure dput doesn't croak */
- spin_lock(&dcache_inode_lock);
- res = __d_find_alias(inode, 0);
+ spin_lock(&inode->i_lock);
+ res = __d_find_any_alias(inode);
if (res) {
- spin_unlock(&dcache_inode_lock);
+ spin_unlock(&inode->i_lock);
dput(tmp);
goto out_iput;
}
/* attach a disconnected dentry */
spin_lock(&tmp->d_lock);
tmp->d_sb = inode->i_sb;
+ d_set_d_op(tmp, tmp->d_sb->s_d_op);
tmp->d_inode = inode;
tmp->d_flags |= DCACHE_DISCONNECTED;
- tmp->d_flags &= ~DCACHE_UNHASHED;
list_add(&tmp->d_alias, &inode->i_dentry);
- spin_lock(&dcache_hash_lock);
- hlist_add_head(&tmp->d_hash, &inode->i_sb->s_anon);
- spin_unlock(&dcache_hash_lock);
+ bit_spin_lock(0, (unsigned long *)&tmp->d_sb->s_anon.first);
+ tmp->d_flags &= ~DCACHE_UNHASHED;
+ hlist_bl_add_head(&tmp->d_hash, &tmp->d_sb->s_anon);
+ __bit_spin_unlock(0, (unsigned long *)&tmp->d_sb->s_anon.first);
spin_unlock(&tmp->d_lock);
- spin_unlock(&dcache_inode_lock);
+ spin_unlock(&inode->i_lock);
+ security_d_instantiate(tmp, inode);
return tmp;
out_iput:
+ if (res && !IS_ERR(res))
+ security_d_instantiate(res, inode);
iput(inode);
return res;
}
struct dentry *new = NULL;
if (inode && S_ISDIR(inode->i_mode)) {
- spin_lock(&dcache_inode_lock);
+ spin_lock(&inode->i_lock);
new = __d_find_alias(inode, 1);
if (new) {
BUG_ON(!(new->d_flags & DCACHE_DISCONNECTED));
- spin_unlock(&dcache_inode_lock);
+ spin_unlock(&inode->i_lock);
security_d_instantiate(new, inode);
d_move(new, dentry);
iput(inode);
} else {
- /* already taking dcache_inode_lock, so d_add() by hand */
+ /* already taking inode->i_lock, so d_add() by hand */
__d_instantiate(dentry, inode);
- spin_unlock(&dcache_inode_lock);
+ spin_unlock(&inode->i_lock);
security_d_instantiate(dentry, inode);
d_rehash(dentry);
}
* Negative dentry: instantiate it unless the inode is a directory and
* already has a dentry.
*/
- spin_lock(&dcache_inode_lock);
+ spin_lock(&inode->i_lock);
if (!S_ISDIR(inode->i_mode) || list_empty(&inode->i_dentry)) {
__d_instantiate(found, inode);
- spin_unlock(&dcache_inode_lock);
+ spin_unlock(&inode->i_lock);
security_d_instantiate(found, inode);
return found;
}
* reference to it, move it in place and use it.
*/
new = list_entry(inode->i_dentry.next, struct dentry, d_alias);
- dget_locked(new);
- spin_unlock(&dcache_inode_lock);
+ __dget(new);
+ spin_unlock(&inode->i_lock);
security_d_instantiate(found, inode);
d_move(new, found);
iput(inode);
EXPORT_SYMBOL(d_add_ci);
/**
+ * __d_lookup_rcu - search for a dentry (racy, store-free)
+ * @parent: parent dentry
+ * @name: qstr of name we wish to find
+ * @seq: returns d_seq value at the point where the dentry was found
+ * @inode: returns dentry->d_inode when the inode was found valid.
+ * Returns: dentry, or NULL
+ *
+ * __d_lookup_rcu is the dcache lookup function for rcu-walk name
+ * resolution (store-free path walking) design described in
+ * Documentation/filesystems/path-lookup.txt.
+ *
+ * This is not to be used outside core vfs.
+ *
+ * __d_lookup_rcu must only be used in rcu-walk mode, ie. with vfsmount lock
+ * held, and rcu_read_lock held. The returned dentry must not be stored into
+ * without taking d_lock and checking d_seq sequence count against @seq
+ * returned here.
+ *
+ * A refcount may be taken on the found dentry with the __d_rcu_to_refcount
+ * function.
+ *
+ * Alternatively, __d_lookup_rcu may be called again to look up the child of
+ * the returned dentry, so long as its parent's seqlock is checked after the
+ * child is looked up. Thus, an interlocking stepping of sequence lock checks
+ * is formed, giving integrity down the path walk.
+ */
+struct dentry *__d_lookup_rcu(struct dentry *parent, struct qstr *name,
+ unsigned *seq, struct inode **inode)
+{
+ unsigned int len = name->len;
+ unsigned int hash = name->hash;
+ const unsigned char *str = name->name;
+ struct dcache_hash_bucket *b = d_hash(parent, hash);
+ struct hlist_bl_node *node;
+ struct dentry *dentry;
+
+ /*
+ * Note: There is significant duplication with __d_lookup_rcu which is
+ * required to prevent single threaded performance regressions
+ * especially on architectures where smp_rmb (in seqcounts) are costly.
+ * Keep the two functions in sync.
+ */
+
+ /*
+ * The hash list is protected using RCU.
+ *
+ * Carefully use d_seq when comparing a candidate dentry, to avoid
+ * races with d_move().
+ *
+ * It is possible that concurrent renames can mess up our list
+ * walk here and result in missing our dentry, resulting in the
+ * false-negative result. d_lookup() protects against concurrent
+ * renames using rename_lock seqlock.
+ *
+ * See Documentation/vfs/dcache-locking.txt for more details.
+ */
+ hlist_bl_for_each_entry_rcu(dentry, node, &b->head, d_hash) {
+ struct inode *i;
+ const char *tname;
+ int tlen;
+
+ if (dentry->d_name.hash != hash)
+ continue;
+
+seqretry:
+ *seq = read_seqcount_begin(&dentry->d_seq);
+ if (dentry->d_parent != parent)
+ continue;
+ if (d_unhashed(dentry))
+ continue;
+ tlen = dentry->d_name.len;
+ tname = dentry->d_name.name;
+ i = dentry->d_inode;
+ prefetch(tname);
+ if (i)
+ prefetch(i);
+ /*
+ * This seqcount check is required to ensure name and
+ * len are loaded atomically, so as not to walk off the
+ * edge of memory when walking. If we could load this
+ * atomically some other way, we could drop this check.
+ */
+ if (read_seqcount_retry(&dentry->d_seq, *seq))
+ goto seqretry;
+ if (parent->d_flags & DCACHE_OP_COMPARE) {
+ if (parent->d_op->d_compare(parent, *inode,
+ dentry, i,
+ tlen, tname, name))
+ continue;
+ } else {
+ if (dentry_cmp(tname, tlen, str, len))
+ continue;
+ }
+ /*
+ * No extra seqcount check is required after the name
+ * compare. The caller must perform a seqcount check in
+ * order to do anything useful with the returned dentry
+ * anyway.
+ */
+ *inode = i;
+ return dentry;
+ }
+ return NULL;
+}
+
+/**
* d_lookup - search for a dentry
* @parent: parent dentry
* @name: qstr of name we wish to find
* dentry is returned. The caller must use dput to free the entry when it has
* finished using it. %NULL is returned if the dentry does not exist.
*/
-struct dentry * d_lookup(struct dentry * parent, struct qstr * name)
+struct dentry *d_lookup(struct dentry *parent, struct qstr *name)
{
- struct dentry * dentry = NULL;
+ struct dentry *dentry;
unsigned seq;
do {
}
EXPORT_SYMBOL(d_lookup);
-/*
+/**
* __d_lookup - search for a dentry (racy)
* @parent: parent dentry
* @name: qstr of name we wish to find
*
* __d_lookup callers must be commented.
*/
-struct dentry * __d_lookup(struct dentry * parent, struct qstr * name)
+struct dentry *__d_lookup(struct dentry *parent, struct qstr *name)
{
unsigned int len = name->len;
unsigned int hash = name->hash;
const unsigned char *str = name->name;
- struct hlist_head *head = d_hash(parent,hash);
+ struct dcache_hash_bucket *b = d_hash(parent, hash);
+ struct hlist_bl_node *node;
struct dentry *found = NULL;
- struct hlist_node *node;
struct dentry *dentry;
/*
+ * Note: There is significant duplication with __d_lookup_rcu which is
+ * required to prevent single threaded performance regressions
+ * especially on architectures where smp_rmb (in seqcounts) are costly.
+ * Keep the two functions in sync.
+ */
+
+ /*
* The hash list is protected using RCU.
*
* Take d_lock when comparing a candidate dentry, to avoid races
*/
rcu_read_lock();
- hlist_for_each_entry_rcu(dentry, node, head, d_hash) {
- struct qstr *qstr;
+ hlist_bl_for_each_entry_rcu(dentry, node, &b->head, d_hash) {
+ const char *tname;
+ int tlen;
if (dentry->d_name.hash != hash)
continue;
- if (dentry->d_parent != parent)
- continue;
spin_lock(&dentry->d_lock);
-
- /*
- * Recheck the dentry after taking the lock - d_move may have
- * changed things. Don't bother checking the hash because
- * we're about to compare the whole name anyway.
- */
if (dentry->d_parent != parent)
goto next;
-
- /* non-existing due to RCU? */
if (d_unhashed(dentry))
goto next;
* It is safe to compare names since d_move() cannot
* change the qstr (protected by d_lock).
*/
- qstr = &dentry->d_name;
- if (parent->d_op && parent->d_op->d_compare) {
+ tlen = dentry->d_name.len;
+ tname = dentry->d_name.name;
+ if (parent->d_flags & DCACHE_OP_COMPARE) {
if (parent->d_op->d_compare(parent, parent->d_inode,
dentry, dentry->d_inode,
- qstr->len, qstr->name, name))
+ tlen, tname, name))
goto next;
} else {
- if (qstr->len != len)
- goto next;
- if (memcmp(qstr->name, str, len))
+ if (dentry_cmp(tname, tlen, str, len))
goto next;
}
* routine may choose to leave the hash value unchanged.
*/
name->hash = full_name_hash(name->name, name->len);
- if (dir->d_op && dir->d_op->d_hash) {
+ if (dir->d_flags & DCACHE_OP_HASH) {
if (dir->d_op->d_hash(dir, dir->d_inode, name) < 0)
goto out;
}
list_for_each_entry(child, &dparent->d_subdirs, d_u.d_child) {
if (dentry == child) {
spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
- __dget_locked_dlock(dentry);
+ __dget_dlock(dentry);
spin_unlock(&dentry->d_lock);
spin_unlock(&dparent->d_lock);
return 1;
void d_delete(struct dentry * dentry)
{
+ struct inode *inode;
int isdir = 0;
/*
* Are we the only user?
*/
- spin_lock(&dcache_inode_lock);
+again:
spin_lock(&dentry->d_lock);
- isdir = S_ISDIR(dentry->d_inode->i_mode);
+ inode = dentry->d_inode;
+ isdir = S_ISDIR(inode->i_mode);
if (dentry->d_count == 1) {
+ if (inode && !spin_trylock(&inode->i_lock)) {
+ spin_unlock(&dentry->d_lock);
+ cpu_relax();
+ goto again;
+ }
dentry->d_flags &= ~DCACHE_CANT_MOUNT;
- dentry_iput(dentry);
+ dentry_unlink_inode(dentry);
fsnotify_nameremove(dentry, isdir);
return;
}
__d_drop(dentry);
spin_unlock(&dentry->d_lock);
- spin_unlock(&dcache_inode_lock);
fsnotify_nameremove(dentry, isdir);
}
EXPORT_SYMBOL(d_delete);
-static void __d_rehash(struct dentry * entry, struct hlist_head *list)
+static void __d_rehash(struct dentry * entry, struct dcache_hash_bucket *b)
{
-
+ BUG_ON(!d_unhashed(entry));
+ spin_lock_bucket(b);
entry->d_flags &= ~DCACHE_UNHASHED;
- hlist_add_head_rcu(&entry->d_hash, list);
+ hlist_bl_add_head_rcu(&entry->d_hash, &b->head);
+ spin_unlock_bucket(b);
}
static void _d_rehash(struct dentry * entry)
void d_rehash(struct dentry * entry)
{
spin_lock(&entry->d_lock);
- spin_lock(&dcache_hash_lock);
_d_rehash(entry);
- spin_unlock(&dcache_hash_lock);
spin_unlock(&entry->d_lock);
}
EXPORT_SYMBOL(d_rehash);
BUG_ON(dentry->d_name.len != name->len); /* d_lookup gives this */
spin_lock(&dentry->d_lock);
+ write_seqcount_begin(&dentry->d_seq);
memcpy((unsigned char *)dentry->d_name.name, name->name, name->len);
+ write_seqcount_end(&dentry->d_seq);
spin_unlock(&dentry->d_lock);
}
EXPORT_SYMBOL(dentry_update_name_case);
dentry_lock_for_move(dentry, target);
- /* Move the dentry to the target hash queue, if on different bucket */
- spin_lock(&dcache_hash_lock);
- if (!d_unhashed(dentry))
- hlist_del_rcu(&dentry->d_hash);
+ write_seqcount_begin(&dentry->d_seq);
+ write_seqcount_begin(&target->d_seq);
+
+ /* __d_drop does write_seqcount_barrier, but they're OK to nest. */
+
+ /*
+ * Move the dentry to the target hash queue. Don't bother checking
+ * for the same hash queue because of how unlikely it is.
+ */
+ __d_drop(dentry);
__d_rehash(dentry, d_hash(target->d_parent, target->d_name.hash));
- spin_unlock(&dcache_hash_lock);
/* Unhash the target: dput() will then get rid of it */
__d_drop(target);
list_add(&dentry->d_u.d_child, &dentry->d_parent->d_subdirs);
+ write_seqcount_end(&target->d_seq);
+ write_seqcount_end(&dentry->d_seq);
+
dentry_unlock_parents_for_move(dentry, target);
spin_unlock(&target->d_lock);
fsnotify_d_move(dentry);
* This helper attempts to cope with remotely renamed directories
*
* It assumes that the caller is already holding
- * dentry->d_parent->d_inode->i_mutex and the dcache_inode_lock
+ * dentry->d_parent->d_inode->i_mutex and the inode->i_lock
*
* Note: If ever the locking in lock_rename() changes, then please
* remember to update this too...
*/
-static struct dentry *__d_unalias(struct dentry *dentry, struct dentry *alias)
- __releases(dcache_inode_lock)
+static struct dentry *__d_unalias(struct inode *inode,
+ struct dentry *dentry, struct dentry *alias)
{
struct mutex *m1 = NULL, *m2 = NULL;
struct dentry *ret;
d_move(alias, dentry);
ret = alias;
out_err:
- spin_unlock(&dcache_inode_lock);
+ spin_unlock(&inode->i_lock);
if (m2)
mutex_unlock(m2);
if (m1)
dentry_lock_for_move(anon, dentry);
+ write_seqcount_begin(&dentry->d_seq);
+ write_seqcount_begin(&anon->d_seq);
+
dparent = dentry->d_parent;
aparent = anon->d_parent;
else
INIT_LIST_HEAD(&anon->d_u.d_child);
+ write_seqcount_end(&dentry->d_seq);
+ write_seqcount_end(&anon->d_seq);
+
dentry_unlock_parents_for_move(anon, dentry);
spin_unlock(&dentry->d_lock);
BUG_ON(!d_unhashed(dentry));
- spin_lock(&dcache_inode_lock);
-
if (!inode) {
actual = dentry;
__d_instantiate(dentry, NULL);
- goto found_lock;
+ d_rehash(actual);
+ goto out_nolock;
}
+ spin_lock(&inode->i_lock);
+
if (S_ISDIR(inode->i_mode)) {
struct dentry *alias;
goto found;
}
/* Nope, but we must(!) avoid directory aliasing */
- actual = __d_unalias(dentry, alias);
+ actual = __d_unalias(inode, dentry, alias);
if (IS_ERR(actual))
dput(alias);
goto out_nolock;
actual = __d_instantiate_unique(dentry, inode);
if (!actual)
actual = dentry;
- else if (unlikely(!d_unhashed(actual)))
- goto shouldnt_be_hashed;
+ else
+ BUG_ON(!d_unhashed(actual));
-found_lock:
spin_lock(&actual->d_lock);
found:
- spin_lock(&dcache_hash_lock);
_d_rehash(actual);
- spin_unlock(&dcache_hash_lock);
spin_unlock(&actual->d_lock);
- spin_unlock(&dcache_inode_lock);
+ spin_unlock(&inode->i_lock);
out_nolock:
if (actual == dentry) {
security_d_instantiate(dentry, inode);
iput(inode);
return actual;
-
-shouldnt_be_hashed:
- spin_unlock(&dcache_inode_lock);
- BUG();
}
EXPORT_SYMBOL_GPL(d_materialise_unique);
}
/**
- * Prepend path string to a buffer
- *
+ * prepend_path - Prepend path string to a buffer
* @path: the dentry/vfsmount to report
* @root: root vfsmnt/dentry (may be modified by this function)
* @buffer: pointer to the end of the buffer
spin_unlock(&dentry->d_lock);
}
if (this_parent != root) {
- struct dentry *tmp;
- struct dentry *child;
-
- tmp = this_parent->d_parent;
+ struct dentry *child = this_parent;
if (!(this_parent->d_flags & DCACHE_GENOCIDE)) {
this_parent->d_flags |= DCACHE_GENOCIDE;
this_parent->d_count--;
}
- rcu_read_lock();
- spin_unlock(&this_parent->d_lock);
- child = this_parent;
- this_parent = tmp;
- spin_lock(&this_parent->d_lock);
- /* might go back up the wrong parent if we have had a rename
- * or deletion */
- if (this_parent != child->d_parent ||
- (!locked && read_seqretry(&rename_lock, seq))) {
- spin_unlock(&this_parent->d_lock);
- rcu_read_unlock();
+ this_parent = try_to_ascend(this_parent, locked, seq);
+ if (!this_parent)
goto rename_retry;
- }
- rcu_read_unlock();
next = child->d_u.d_child.next;
goto resume;
}
dentry_hashtable =
alloc_large_system_hash("Dentry cache",
- sizeof(struct hlist_head),
+ sizeof(struct dcache_hash_bucket),
dhash_entries,
13,
HASH_EARLY,
0);
for (loop = 0; loop < (1 << d_hash_shift); loop++)
- INIT_HLIST_HEAD(&dentry_hashtable[loop]);
+ INIT_HLIST_BL_HEAD(&dentry_hashtable[loop].head);
}
static void __init dcache_init(void)
dentry_hashtable =
alloc_large_system_hash("Dentry cache",
- sizeof(struct hlist_head),
+ sizeof(struct dcache_hash_bucket),
dhash_entries,
13,
0,
0);
for (loop = 0; loop < (1 << d_hash_shift); loop++)
- INIT_HLIST_HEAD(&dentry_hashtable[loop]);
+ INIT_HLIST_BL_HEAD(&dentry_hashtable[loop].head);
}
/* SLAB cache for __getname() consumers */