* 'vfs-scale-working' of git://git.kernel.org/pub/scm/linux/kernel/git/npiggin/linux-npiggin: (57 commits)
fs: scale mntget/mntput
fs: rename vfsmount counter helpers
fs: implement faster dentry memcmp
fs: prefetch inode data in dcache lookup
fs: improve scalability of pseudo filesystems
fs: dcache per-inode inode alias locking
fs: dcache per-bucket dcache hash locking
bit_spinlock: add required includes
kernel: add bl_list
xfs: provide simple rcu-walk ACL implementation
btrfs: provide simple rcu-walk ACL implementation
ext2,3,4: provide simple rcu-walk ACL implementation
fs: provide simple rcu-walk generic_check_acl implementation
fs: provide rcu-walk aware permission i_ops
fs: rcu-walk aware d_revalidate method
fs: cache optimise dentry and inode for rcu-walk
fs: dcache reduce branches in lookup path
fs: dcache remove d_mounted
fs: fs_struct use seqlock
fs: rcu-walk for path lookup
...
}
if (!is_root) {
- error = gfs2_permission(dir, MAY_EXEC);
+ error = gfs2_permission(dir, MAY_EXEC, 0);
if (error)
goto out;
}
{
int error;
- error = gfs2_permission(&dip->i_inode, MAY_WRITE | MAY_EXEC);
+ error = gfs2_permission(&dip->i_inode, MAY_WRITE | MAY_EXEC, 0);
if (error)
return error;
if (error)
return error;
- if ((attr->ia_valid & ATTR_SIZE) &&
- attr->ia_size != i_size_read(inode)) {
- error = vmtruncate(inode, attr->ia_size);
- if (error)
- return error;
- }
-
setattr_copy(inode, attr);
mark_inode_dirty(inode);
-
- gfs2_assert_warn(GFS2_SB(inode), !error);
gfs2_trans_add_bh(ip->i_gl, dibh, 1);
gfs2_dinode_out(ip, dibh->b_data);
brelse(dibh);
{
struct inode *inode = NULL;
- dentry->d_op = &gfs2_dops;
+ d_set_d_op(dentry, &gfs2_dops);
inode = gfs2_lookupi(dir, &dentry->d_name, 0);
if (inode && IS_ERR(inode))
if (error)
goto out_child;
- error = gfs2_permission(dir, MAY_WRITE | MAY_EXEC);
+ error = gfs2_permission(dir, MAY_WRITE | MAY_EXEC, 0);
if (error)
goto out_gunlock;
if (IS_APPEND(&dip->i_inode))
return -EPERM;
- error = gfs2_permission(&dip->i_inode, MAY_WRITE | MAY_EXEC);
+ error = gfs2_permission(&dip->i_inode, MAY_WRITE | MAY_EXEC, 0);
if (error)
return error;
}
}
} else {
- error = gfs2_permission(ndir, MAY_WRITE | MAY_EXEC);
+ error = gfs2_permission(ndir, MAY_WRITE | MAY_EXEC, 0);
if (error)
goto out_gunlock;
/* Check out the dir to be renamed */
if (dir_rename) {
- error = gfs2_permission(odentry->d_inode, MAY_WRITE);
+ error = gfs2_permission(odentry->d_inode, MAY_WRITE, 0);
if (error)
goto out_gunlock;
}
* Returns: errno
*/
- int gfs2_permission(struct inode *inode, int mask)
+ int gfs2_permission(struct inode *inode, int mask, unsigned int flags)
{
- struct gfs2_inode *ip = GFS2_I(inode);
+ struct gfs2_inode *ip;
struct gfs2_holder i_gh;
int error;
int unlock = 0;
+ if (flags & IPERM_FLAG_RCU)
+ return -ECHILD;
+
+ ip = GFS2_I(inode);
if (gfs2_glock_is_locked_by_me(ip->i_gl) == NULL) {
error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh);
if (error)
if ((mask & MAY_WRITE) && IS_IMMUTABLE(inode))
error = -EACCES;
else
- error = generic_permission(inode, mask, gfs2_check_acl);
+ error = generic_permission(inode, mask, flags, gfs2_check_acl);
if (unlock)
gfs2_glock_dq_uninit(&i_gh);
{
struct gfs2_inode *ip = GFS2_I(inode);
struct gfs2_sbd *sdp = GFS2_SB(inode);
- struct buffer_head *dibh;
u32 ouid, ogid, nuid, ngid;
int error;
if (error)
goto out_gunlock_q;
- error = gfs2_meta_inode_buffer(ip, &dibh);
+ error = gfs2_setattr_simple(ip, attr);
if (error)
goto out_end_trans;
- if ((attr->ia_valid & ATTR_SIZE) &&
- attr->ia_size != i_size_read(inode)) {
- int error;
-
- error = vmtruncate(inode, attr->ia_size);
- gfs2_assert_warn(sdp, !error);
- }
-
- setattr_copy(inode, attr);
- mark_inode_dirty(inode);
-
- gfs2_trans_add_bh(ip->i_gl, dibh, 1);
- gfs2_dinode_out(ip, dibh->b_data);
- brelse(dibh);
-
if (ouid != NO_QUOTA_CHANGE || ogid != NO_QUOTA_CHANGE) {
u64 blocks = gfs2_get_inode_blocks(&ip->i_inode);
gfs2_quota_change(ip, -blocks, ouid, ogid);
#endif
+#ifdef CONFIG_SCHED_AUTOGROUP
+/*
+ * Print out autogroup related information:
+ */
+static int sched_autogroup_show(struct seq_file *m, void *v)
+{
+ struct inode *inode = m->private;
+ struct task_struct *p;
+
+ p = get_proc_task(inode);
+ if (!p)
+ return -ESRCH;
+ proc_sched_autogroup_show_task(p, m);
+
+ put_task_struct(p);
+
+ return 0;
+}
+
+static ssize_t
+sched_autogroup_write(struct file *file, const char __user *buf,
+ size_t count, loff_t *offset)
+{
+ struct inode *inode = file->f_path.dentry->d_inode;
+ struct task_struct *p;
+ char buffer[PROC_NUMBUF];
+ long nice;
+ int err;
+
+ memset(buffer, 0, sizeof(buffer));
+ if (count > sizeof(buffer) - 1)
+ count = sizeof(buffer) - 1;
+ if (copy_from_user(buffer, buf, count))
+ return -EFAULT;
+
+ err = strict_strtol(strstrip(buffer), 0, &nice);
+ if (err)
+ return -EINVAL;
+
+ p = get_proc_task(inode);
+ if (!p)
+ return -ESRCH;
+
+ err = nice;
+ err = proc_sched_autogroup_set_nice(p, &err);
+ if (err)
+ count = err;
+
+ put_task_struct(p);
+
+ return count;
+}
+
+static int sched_autogroup_open(struct inode *inode, struct file *filp)
+{
+ int ret;
+
+ ret = single_open(filp, sched_autogroup_show, NULL);
+ if (!ret) {
+ struct seq_file *m = filp->private_data;
+
+ m->private = inode;
+ }
+ return ret;
+}
+
+static const struct file_operations proc_pid_sched_autogroup_operations = {
+ .open = sched_autogroup_open,
+ .read = seq_read,
+ .write = sched_autogroup_write,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+#endif /* CONFIG_SCHED_AUTOGROUP */
+
static ssize_t comm_write(struct file *file, const char __user *buf,
size_t count, loff_t *offset)
{
*/
static int pid_revalidate(struct dentry *dentry, struct nameidata *nd)
{
- struct inode *inode = dentry->d_inode;
- struct task_struct *task = get_proc_task(inode);
+ struct inode *inode;
+ struct task_struct *task;
const struct cred *cred;
+ if (nd && nd->flags & LOOKUP_RCU)
+ return -ECHILD;
+
+ inode = dentry->d_inode;
+ task = get_proc_task(inode);
+
if (task) {
if ((inode->i_mode == (S_IFDIR|S_IRUGO|S_IXUGO)) ||
task_dumpable(task)) {
return 0;
}
- static int pid_delete_dentry(struct dentry * dentry)
+ static int pid_delete_dentry(const struct dentry * dentry)
{
/* Is the task we represent dead?
* If so, then don't put the dentry on the lru list,
static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd)
{
- struct inode *inode = dentry->d_inode;
- struct task_struct *task = get_proc_task(inode);
- int fd = proc_fd(inode);
+ struct inode *inode;
+ struct task_struct *task;
+ int fd;
struct files_struct *files;
const struct cred *cred;
+ if (nd && nd->flags & LOOKUP_RCU)
+ return -ECHILD;
+
+ inode = dentry->d_inode;
+ task = get_proc_task(inode);
+ fd = proc_fd(inode);
+
if (task) {
files = get_files_struct(task);
if (files) {
inode->i_op = &proc_pid_link_inode_operations;
inode->i_size = 64;
ei->op.proc_get_link = proc_fd_link;
- dentry->d_op = &tid_fd_dentry_operations;
+ d_set_d_op(dentry, &tid_fd_dentry_operations);
d_add(dentry, inode);
/* Close the race of the process dying before we return the dentry */
if (tid_fd_revalidate(dentry, NULL))
* /proc/pid/fd needs a special permission handler so that a process can still
* access /proc/self/fd after it has executed a setuid().
*/
- static int proc_fd_permission(struct inode *inode, int mask)
+ static int proc_fd_permission(struct inode *inode, int mask, unsigned int flags)
{
int rv;
- rv = generic_permission(inode, mask, NULL);
+ if (flags & IPERM_FLAG_RCU)
+ return -ECHILD;
+ rv = generic_permission(inode, mask, flags, NULL);
if (rv == 0)
return 0;
if (task_pid(current) == proc_pid(inode))
ei->fd = fd;
inode->i_mode = S_IFREG | S_IRUSR;
inode->i_fop = &proc_fdinfo_file_operations;
- dentry->d_op = &tid_fd_dentry_operations;
+ d_set_d_op(dentry, &tid_fd_dentry_operations);
d_add(dentry, inode);
/* Close the race of the process dying before we return the dentry */
if (tid_fd_revalidate(dentry, NULL))
if (p->fop)
inode->i_fop = p->fop;
ei->op = p->op;
- dentry->d_op = &pid_dentry_operations;
+ d_set_d_op(dentry, &pid_dentry_operations);
d_add(dentry, inode);
/* Close the race of the process dying before we return the dentry */
if (pid_revalidate(dentry, NULL))
*/
static int proc_base_revalidate(struct dentry *dentry, struct nameidata *nd)
{
- struct inode *inode = dentry->d_inode;
- struct task_struct *task = get_proc_task(inode);
+ struct inode *inode;
+ struct task_struct *task;
+
+ if (nd->flags & LOOKUP_RCU)
+ return -ECHILD;
+
+ inode = dentry->d_inode;
+ task = get_proc_task(inode);
if (task) {
put_task_struct(task);
return 1;
if (p->fop)
inode->i_fop = p->fop;
ei->op = p->op;
- dentry->d_op = &proc_base_dentry_operations;
+ d_set_d_op(dentry, &proc_base_dentry_operations);
d_add(dentry, inode);
error = NULL;
out:
#ifdef CONFIG_SCHED_DEBUG
REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations),
#endif
+#ifdef CONFIG_SCHED_AUTOGROUP
+ REG("autogroup", S_IRUGO|S_IWUSR, proc_pid_sched_autogroup_operations),
+#endif
REG("comm", S_IRUGO|S_IWUSR, proc_pid_set_comm_operations),
#ifdef CONFIG_HAVE_ARCH_TRACEHOOK
INF("syscall", S_IRUSR, proc_pid_syscall),
inode->i_nlink = 2 + pid_entry_count_dirs(tgid_base_stuff,
ARRAY_SIZE(tgid_base_stuff));
- dentry->d_op = &pid_dentry_operations;
+ d_set_d_op(dentry, &pid_dentry_operations);
d_add(dentry, inode);
/* Close the race of the process dying before we return the dentry */
inode->i_nlink = 2 + pid_entry_count_dirs(tid_base_stuff,
ARRAY_SIZE(tid_base_stuff));
- dentry->d_op = &pid_dentry_operations;
+ d_set_d_op(dentry, &pid_dentry_operations);
d_add(dentry, inode);
/* Close the race of the process dying before we return the dentry */
* called when the actual read/write operations are performed.
* @inode contains the inode structure to check.
* @mask contains the permission mask.
- * @nd contains the nameidata (may be NULL).
* Return 0 if permission is granted.
* @inode_setattr:
* Check permission before setting file attributes. Note that the kernel
* @unix_stream_connect:
* Check permissions before establishing a Unix domain stream connection
* between @sock and @other.
- * @sock contains the socket structure.
- * @other contains the peer socket structure.
+ * @sock contains the sock structure.
+ * @other contains the peer sock structure.
+ * @newsk contains the new sock structure.
* Return 0 if permission is granted.
* @unix_may_send:
* Check permissions before connecting or sending datagrams from @sock to
int (*inode_getsecctx)(struct inode *inode, void **ctx, u32 *ctxlen);
#ifdef CONFIG_SECURITY_NETWORK
- int (*unix_stream_connect) (struct socket *sock,
- struct socket *other, struct sock *newsk);
+ int (*unix_stream_connect) (struct sock *sock, struct sock *other, struct sock *newsk);
int (*unix_may_send) (struct socket *sock, struct socket *other);
int (*socket_create) (int family, int type, int protocol, int kern);
int security_inode_readlink(struct dentry *dentry);
int security_inode_follow_link(struct dentry *dentry, struct nameidata *nd);
int security_inode_permission(struct inode *inode, int mask);
+ int security_inode_exec_permission(struct inode *inode, unsigned int flags);
int security_inode_setattr(struct dentry *dentry, struct iattr *attr);
int security_inode_getattr(struct vfsmount *mnt, struct dentry *dentry);
int security_inode_setxattr(struct dentry *dentry, const char *name,
return 0;
}
+ static inline int security_inode_exec_permission(struct inode *inode,
+ unsigned int flags)
+ {
+ return 0;
+ }
+
static inline int security_inode_setattr(struct dentry *dentry,
struct iattr *attr)
{
#ifdef CONFIG_SECURITY_NETWORK
-int security_unix_stream_connect(struct socket *sock, struct socket *other,
- struct sock *newsk);
+int security_unix_stream_connect(struct sock *sock, struct sock *other, struct sock *newsk);
int security_unix_may_send(struct socket *sock, struct socket *other);
int security_socket_create(int family, int type, int protocol, int kern);
int security_socket_post_create(struct socket *sock, int family,
int security_tun_dev_attach(struct sock *sk);
#else /* CONFIG_SECURITY_NETWORK */
-static inline int security_unix_stream_connect(struct socket *sock,
- struct socket *other,
+static inline int security_unix_stream_connect(struct sock *sock,
+ struct sock *other,
struct sock *newsk)
{
return 0;
*/
static DEFINE_SPINLOCK(net_family_lock);
-static const struct net_proto_family *net_families[NPROTO] __read_mostly;
+static const struct net_proto_family __rcu *net_families[NPROTO] __read_mostly;
/*
* Statistics counters of the socket lists
}
+
static void wq_free_rcu(struct rcu_head *head)
{
struct socket_wq *wq = container_of(head, struct socket_wq, rcu);
if (unlikely(fd < 0))
return fd;
- path.dentry = d_alloc(sock_mnt->mnt_sb->s_root, &name);
+ path.dentry = d_alloc_pseudo(sock_mnt->mnt_sb, &name);
if (unlikely(!path.dentry)) {
put_unused_fd(fd);
return -ENOMEM;
}
path.mnt = mntget(sock_mnt);
- path.dentry->d_op = &sockfs_dentry_operations;
+ d_set_d_op(path.dentry, &sockfs_dentry_operations);
d_instantiate(path.dentry, SOCK_INODE(sock));
SOCK_INODE(sock)->i_fop = &socket_file_ops;
* requested real, full-featured networking support upon configuration.
* Otherwise module support will break!
*/
- if (net_families[family] == NULL)
+ if (rcu_access_pointer(net_families[family]) == NULL)
request_module("net-pf-%d", family);
#endif
}
spin_lock(&net_family_lock);
- if (net_families[ops->family])
+ if (rcu_dereference_protected(net_families[ops->family],
+ lockdep_is_held(&net_family_lock)))
err = -EEXIST;
else {
- net_families[ops->family] = ops;
+ rcu_assign_pointer(net_families[ops->family], ops);
err = 0;
}
spin_unlock(&net_family_lock);
BUG_ON(family < 0 || family >= NPROTO);
spin_lock(&net_family_lock);
- net_families[family] = NULL;
+ rcu_assign_pointer(net_families[family], NULL);
spin_unlock(&net_family_lock);
synchronize_rcu();
static int __init sock_init(void)
{
+ int err;
+
/*
* Initialize sock SLAB cache.
*/
*/
init_inodecache();
- register_filesystem(&sock_fs_type);
+
+ err = register_filesystem(&sock_fs_type);
+ if (err)
+ goto out_fs;
sock_mnt = kern_mount(&sock_fs_type);
+ if (IS_ERR(sock_mnt)) {
+ err = PTR_ERR(sock_mnt);
+ goto out_mount;
+ }
/* The real protocol initialization is performed in later initcalls.
*/
skb_timestamping_init();
#endif
- return 0;
+ out:
+ return err;
+
+ out_mount:
+ unregister_filesystem(&sock_fs_type);
+ out_fs:
+ goto out;
}
core_initcall(sock_init); /* early initcall */
return security_ops->inode_permission(inode, mask);
}
+ int security_inode_exec_permission(struct inode *inode, unsigned int flags)
+ {
+ if (unlikely(IS_PRIVATE(inode)))
+ return 0;
+ if (flags)
+ return -ECHILD;
+ return security_ops->inode_permission(inode, MAY_EXEC);
+ }
+
int security_inode_setattr(struct dentry *dentry, struct iattr *attr)
{
if (unlikely(IS_PRIVATE(dentry->d_inode)))
#ifdef CONFIG_SECURITY_NETWORK
-int security_unix_stream_connect(struct socket *sock, struct socket *other,
- struct sock *newsk)
+int security_unix_stream_connect(struct sock *sock, struct sock *other, struct sock *newsk)
{
return security_ops->unix_stream_connect(sock, other, newsk);
}