cifs: fix misspelling of "forcedirectio"
[linux-flexiantxendom0-3.2.10.git] / ipc / mqueue.c
index 547d9c8..28bd64d 100644 (file)
@@ -32,6 +32,8 @@
 #include <linux/nsproxy.h>
 #include <linux/pid.h>
 #include <linux/ipc_namespace.h>
+#include <linux/user_namespace.h>
+#include <linux/slab.h>
 
 #include <net/sock.h>
 #include "util.h"
@@ -107,116 +109,108 @@ static struct ipc_namespace *get_ns_from_inode(struct inode *inode)
 }
 
 static struct inode *mqueue_get_inode(struct super_block *sb,
-               struct ipc_namespace *ipc_ns, int mode,
+               struct ipc_namespace *ipc_ns, umode_t mode,
                struct mq_attr *attr)
 {
        struct user_struct *u = current_user();
        struct inode *inode;
+       int ret = -ENOMEM;
 
        inode = new_inode(sb);
-       if (inode) {
-               inode->i_mode = mode;
-               inode->i_uid = current_fsuid();
-               inode->i_gid = current_fsgid();
-               inode->i_mtime = inode->i_ctime = inode->i_atime =
-                               CURRENT_TIME;
+       if (!inode)
+               goto err;
 
-               if (S_ISREG(mode)) {
-                       struct mqueue_inode_info *info;
-                       struct task_struct *p = current;
-                       unsigned long mq_bytes, mq_msg_tblsz;
-
-                       inode->i_fop = &mqueue_file_operations;
-                       inode->i_size = FILENT_SIZE;
-                       /* mqueue specific info */
-                       info = MQUEUE_I(inode);
-                       spin_lock_init(&info->lock);
-                       init_waitqueue_head(&info->wait_q);
-                       INIT_LIST_HEAD(&info->e_wait_q[0].list);
-                       INIT_LIST_HEAD(&info->e_wait_q[1].list);
-                       info->notify_owner = NULL;
-                       info->qsize = 0;
-                       info->user = NULL;      /* set when all is ok */
-                       memset(&info->attr, 0, sizeof(info->attr));
-                       info->attr.mq_maxmsg = ipc_ns->mq_msg_max;
-                       info->attr.mq_msgsize = ipc_ns->mq_msgsize_max;
-                       if (attr) {
-                               info->attr.mq_maxmsg = attr->mq_maxmsg;
-                               info->attr.mq_msgsize = attr->mq_msgsize;
-                       }
-                       mq_msg_tblsz = info->attr.mq_maxmsg * sizeof(struct msg_msg *);
-                       info->messages = kmalloc(mq_msg_tblsz, GFP_KERNEL);
-                       if (!info->messages)
-                               goto out_inode;
-
-                       mq_bytes = (mq_msg_tblsz +
-                               (info->attr.mq_maxmsg * info->attr.mq_msgsize));
-
-                       spin_lock(&mq_lock);
-                       if (u->mq_bytes + mq_bytes < u->mq_bytes ||
-                           u->mq_bytes + mq_bytes >
-                           p->signal->rlim[RLIMIT_MSGQUEUE].rlim_cur) {
-                               spin_unlock(&mq_lock);
-                               kfree(info->messages);
-                               goto out_inode;
-                       }
-                       u->mq_bytes += mq_bytes;
-                       spin_unlock(&mq_lock);
+       inode->i_ino = get_next_ino();
+       inode->i_mode = mode;
+       inode->i_uid = current_fsuid();
+       inode->i_gid = current_fsgid();
+       inode->i_mtime = inode->i_ctime = inode->i_atime = CURRENT_TIME;
+
+       if (S_ISREG(mode)) {
+               struct mqueue_inode_info *info;
+               unsigned long mq_bytes, mq_msg_tblsz;
+
+               inode->i_fop = &mqueue_file_operations;
+               inode->i_size = FILENT_SIZE;
+               /* mqueue specific info */
+               info = MQUEUE_I(inode);
+               spin_lock_init(&info->lock);
+               init_waitqueue_head(&info->wait_q);
+               INIT_LIST_HEAD(&info->e_wait_q[0].list);
+               INIT_LIST_HEAD(&info->e_wait_q[1].list);
+               info->notify_owner = NULL;
+               info->qsize = 0;
+               info->user = NULL;      /* set when all is ok */
+               memset(&info->attr, 0, sizeof(info->attr));
+               info->attr.mq_maxmsg = ipc_ns->mq_msg_max;
+               info->attr.mq_msgsize = ipc_ns->mq_msgsize_max;
+               if (attr) {
+                       info->attr.mq_maxmsg = attr->mq_maxmsg;
+                       info->attr.mq_msgsize = attr->mq_msgsize;
+               }
+               mq_msg_tblsz = info->attr.mq_maxmsg * sizeof(struct msg_msg *);
+               info->messages = kmalloc(mq_msg_tblsz, GFP_KERNEL);
+               if (!info->messages)
+                       goto out_inode;
 
-                       /* all is ok */
-                       info->user = get_uid(u);
-               } else if (S_ISDIR(mode)) {
-                       inc_nlink(inode);
-                       /* Some things misbehave if size == 0 on a directory */
-                       inode->i_size = 2 * DIRENT_SIZE;
-                       inode->i_op = &mqueue_dir_inode_operations;
-                       inode->i_fop = &simple_dir_operations;
+               mq_bytes = (mq_msg_tblsz +
+                       (info->attr.mq_maxmsg * info->attr.mq_msgsize));
+
+               spin_lock(&mq_lock);
+               if (u->mq_bytes + mq_bytes < u->mq_bytes ||
+                   u->mq_bytes + mq_bytes > rlimit(RLIMIT_MSGQUEUE)) {
+                       spin_unlock(&mq_lock);
+                       /* mqueue_evict_inode() releases info->messages */
+                       ret = -EMFILE;
+                       goto out_inode;
                }
+               u->mq_bytes += mq_bytes;
+               spin_unlock(&mq_lock);
+
+               /* all is ok */
+               info->user = get_uid(u);
+       } else if (S_ISDIR(mode)) {
+               inc_nlink(inode);
+               /* Some things misbehave if size == 0 on a directory */
+               inode->i_size = 2 * DIRENT_SIZE;
+               inode->i_op = &mqueue_dir_inode_operations;
+               inode->i_fop = &simple_dir_operations;
        }
+
        return inode;
 out_inode:
-       make_bad_inode(inode);
        iput(inode);
-       return NULL;
+err:
+       return ERR_PTR(ret);
 }
 
 static int mqueue_fill_super(struct super_block *sb, void *data, int silent)
 {
        struct inode *inode;
        struct ipc_namespace *ns = data;
-       int error;
 
        sb->s_blocksize = PAGE_CACHE_SIZE;
        sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
        sb->s_magic = MQUEUE_MAGIC;
        sb->s_op = &mqueue_super_ops;
 
-       inode = mqueue_get_inode(sb, ns, S_IFDIR | S_ISVTX | S_IRWXUGO,
-                               NULL);
-       if (!inode) {
-               error = -ENOMEM;
-               goto out;
-       }
-
-       sb->s_root = d_alloc_root(inode);
-       if (!sb->s_root) {
-               iput(inode);
-               error = -ENOMEM;
-               goto out;
-       }
-       error = 0;
+       inode = mqueue_get_inode(sb, ns, S_IFDIR | S_ISVTX | S_IRWXUGO, NULL);
+       if (IS_ERR(inode))
+               return PTR_ERR(inode);
 
-out:
-       return error;
+       sb->s_root = d_make_root(inode);
+       if (!sb->s_root)
+               return -ENOMEM;
+       return 0;
 }
 
-static int mqueue_get_sb(struct file_system_type *fs_type,
+static struct dentry *mqueue_mount(struct file_system_type *fs_type,
                         int flags, const char *dev_name,
-                        void *data, struct vfsmount *mnt)
+                        void *data)
 {
        if (!(flags & MS_KERNMOUNT))
                data = current->nsproxy->ipc_ns;
-       return get_sb_ns(fs_type, flags, data, mqueue_fill_super, mnt);
+       return mount_ns(fs_type, flags, data, mqueue_fill_super);
 }
 
 static void init_once(void *foo)
@@ -236,12 +230,18 @@ static struct inode *mqueue_alloc_inode(struct super_block *sb)
        return &ei->vfs_inode;
 }
 
-static void mqueue_destroy_inode(struct inode *inode)
+static void mqueue_i_callback(struct rcu_head *head)
 {
+       struct inode *inode = container_of(head, struct inode, i_rcu);
        kmem_cache_free(mqueue_inode_cachep, MQUEUE_I(inode));
 }
 
-static void mqueue_delete_inode(struct inode *inode)
+static void mqueue_destroy_inode(struct inode *inode)
+{
+       call_rcu(&inode->i_rcu, mqueue_i_callback);
+}
+
+static void mqueue_evict_inode(struct inode *inode)
 {
        struct mqueue_inode_info *info;
        struct user_struct *user;
@@ -249,10 +249,11 @@ static void mqueue_delete_inode(struct inode *inode)
        int i;
        struct ipc_namespace *ipc_ns;
 
-       if (S_ISDIR(inode->i_mode)) {
-               clear_inode(inode);
+       end_writeback(inode);
+
+       if (S_ISDIR(inode->i_mode))
                return;
-       }
+
        ipc_ns = get_ns_from_inode(inode);
        info = MQUEUE_I(inode);
        spin_lock(&info->lock);
@@ -261,8 +262,6 @@ static void mqueue_delete_inode(struct inode *inode)
        kfree(info->messages);
        spin_unlock(&info->lock);
 
-       clear_inode(inode);
-
        /* Total amount of bytes accounted for the mqueue */
        mq_bytes = info->attr.mq_maxmsg * (sizeof(struct msg_msg *)
            + info->attr.mq_msgsize);
@@ -286,7 +285,7 @@ static void mqueue_delete_inode(struct inode *inode)
 }
 
 static int mqueue_create(struct inode *dir, struct dentry *dentry,
-                               int mode, struct nameidata *nd)
+                               umode_t mode, struct nameidata *nd)
 {
        struct inode *inode;
        struct mq_attr *attr = dentry->d_fsdata;
@@ -308,8 +307,8 @@ static int mqueue_create(struct inode *dir, struct dentry *dentry,
        spin_unlock(&mq_lock);
 
        inode = mqueue_get_inode(dir->i_sb, ipc_ns, mode, attr);
-       if (!inode) {
-               error = -ENOMEM;
+       if (IS_ERR(inode)) {
+               error = PTR_ERR(inode);
                spin_lock(&mq_lock);
                ipc_ns->mq_queues_count--;
                goto out_unlock;
@@ -428,7 +427,7 @@ static void wq_add(struct mqueue_inode_info *info, int sr,
  * sr: SEND or RECV
  */
 static int wq_sleep(struct mqueue_inode_info *info, int sr,
-                       long timeout, struct ext_wait_queue *ewp)
+                   ktime_t *timeout, struct ext_wait_queue *ewp)
 {
        int retval;
        signed long time;
@@ -439,7 +438,8 @@ static int wq_sleep(struct mqueue_inode_info *info, int sr,
                set_current_state(TASK_INTERRUPTIBLE);
 
                spin_unlock(&info->lock);
-               time = schedule_timeout(timeout);
+               time = schedule_hrtimeout_range_clock(timeout, 0,
+                       HRTIMER_MODE_ABS, CLOCK_REALTIME);
 
                while (ewp->state == STATE_PENDING)
                        cpu_relax();
@@ -532,9 +532,13 @@ static void __do_notify(struct mqueue_inode_info *info)
                        sig_i.si_errno = 0;
                        sig_i.si_code = SI_MESGQ;
                        sig_i.si_value = info->notify.sigev_value;
+                       /* map current pid/uid into info->owner's namespaces */
+                       rcu_read_lock();
                        sig_i.si_pid = task_tgid_nr_ns(current,
                                                ns_of_pid(info->notify_owner));
-                       sig_i.si_uid = current_uid();
+                       sig_i.si_uid = user_ns_map_uid(info->user->user_ns,
+                                               current_cred(), current_uid());
+                       rcu_read_unlock();
 
                        kill_pid_info(info->notify.sigev_signo,
                                      &sig_i, info->notify_owner);
@@ -551,31 +555,16 @@ static void __do_notify(struct mqueue_inode_info *info)
        wake_up(&info->wait_q);
 }
 
-static long prepare_timeout(struct timespec *p)
+static int prepare_timeout(const struct timespec __user *u_abs_timeout,
+                          ktime_t *expires, struct timespec *ts)
 {
-       struct timespec nowts;
-       long timeout;
-
-       if (p) {
-               if (unlikely(p->tv_nsec < 0 || p->tv_sec < 0
-                       || p->tv_nsec >= NSEC_PER_SEC))
-                       return -EINVAL;
-               nowts = CURRENT_TIME;
-               /* first subtract as jiffies can't be too big */
-               p->tv_sec -= nowts.tv_sec;
-               if (p->tv_nsec < nowts.tv_nsec) {
-                       p->tv_nsec += NSEC_PER_SEC;
-                       p->tv_sec--;
-               }
-               p->tv_nsec -= nowts.tv_nsec;
-               if (p->tv_sec < 0)
-                       return 0;
-
-               timeout = timespec_to_jiffies(p) + 1;
-       } else
-               return MAX_SCHEDULE_TIMEOUT;
+       if (copy_from_user(ts, u_abs_timeout, sizeof(struct timespec)))
+               return -EFAULT;
+       if (!timespec_valid(ts))
+               return -EINVAL;
 
-       return timeout;
+       *expires = timespec_to_ktime(*ts);
+       return 0;
 }
 
 static void remove_notification(struct mqueue_inode_info *info)
@@ -615,7 +604,7 @@ static int mq_attr_ok(struct ipc_namespace *ipc_ns, struct mq_attr *attr)
  * Invoked when creating a new queue via sys_mq_open
  */
 static struct file *do_create(struct ipc_namespace *ipc_ns, struct dentry *dir,
-                       struct dentry *dentry, int oflag, mode_t mode,
+                       struct dentry *dentry, int oflag, umode_t mode,
                        struct mq_attr *attr)
 {
        const struct cred *cred = current_cred();
@@ -684,7 +673,7 @@ err:
        return ERR_PTR(ret);
 }
 
-SYSCALL_DEFINE4(mq_open, const char __user *, u_name, int, oflag, mode_t, mode,
+SYSCALL_DEFINE4(mq_open, const char __user *, u_name, int, oflag, umode_t, mode,
                struct mq_attr __user *, u_attr)
 {
        struct dentry *dentry;
@@ -784,7 +773,7 @@ SYSCALL_DEFINE1(mq_unlink, const char __user *, u_name)
 
        inode = dentry->d_inode;
        if (inode)
-               atomic_inc(&inode->i_count);
+               ihold(inode);
        err = mnt_want_write(ipc_ns->mq_mnt);
        if (err)
                goto out_err;
@@ -861,22 +850,21 @@ SYSCALL_DEFINE5(mq_timedsend, mqd_t, mqdes, const char __user *, u_msg_ptr,
        struct ext_wait_queue *receiver;
        struct msg_msg *msg_ptr;
        struct mqueue_inode_info *info;
-       struct timespec ts, *p = NULL;
-       long timeout;
+       ktime_t expires, *timeout = NULL;
+       struct timespec ts;
        int ret;
 
        if (u_abs_timeout) {
-               if (copy_from_user(&ts, u_abs_timeout, 
-                                       sizeof(struct timespec)))
-                       return -EFAULT;
-               p = &ts;
+               int res = prepare_timeout(u_abs_timeout, &expires, &ts);
+               if (res)
+                       return res;
+               timeout = &expires;
        }
 
        if (unlikely(msg_prio >= (unsigned long) MQ_PRIO_MAX))
                return -EINVAL;
 
-       audit_mq_sendrecv(mqdes, msg_len, msg_prio, p);
-       timeout = prepare_timeout(p);
+       audit_mq_sendrecv(mqdes, msg_len, msg_prio, timeout ? &ts : NULL);
 
        filp = fget(mqdes);
        if (unlikely(!filp)) {
@@ -918,9 +906,6 @@ SYSCALL_DEFINE5(mq_timedsend, mqd_t, mqdes, const char __user *, u_msg_ptr,
                if (filp->f_flags & O_NONBLOCK) {
                        spin_unlock(&info->lock);
                        ret = -EAGAIN;
-               } else if (unlikely(timeout < 0)) {
-                       spin_unlock(&info->lock);
-                       ret = timeout;
                } else {
                        wait.task = current;
                        wait.msg = (void *) msg_ptr;
@@ -953,24 +938,23 @@ SYSCALL_DEFINE5(mq_timedreceive, mqd_t, mqdes, char __user *, u_msg_ptr,
                size_t, msg_len, unsigned int __user *, u_msg_prio,
                const struct timespec __user *, u_abs_timeout)
 {
-       long timeout;
        ssize_t ret;
        struct msg_msg *msg_ptr;
        struct file *filp;
        struct inode *inode;
        struct mqueue_inode_info *info;
        struct ext_wait_queue wait;
-       struct timespec ts, *p = NULL;
+       ktime_t expires, *timeout = NULL;
+       struct timespec ts;
 
        if (u_abs_timeout) {
-               if (copy_from_user(&ts, u_abs_timeout, 
-                                       sizeof(struct timespec)))
-                       return -EFAULT;
-               p = &ts;
+               int res = prepare_timeout(u_abs_timeout, &expires, &ts);
+               if (res)
+                       return res;
+               timeout = &expires;
        }
 
-       audit_mq_sendrecv(mqdes, msg_len, 0, p);
-       timeout = prepare_timeout(p);
+       audit_mq_sendrecv(mqdes, msg_len, 0, timeout ? &ts : NULL);
 
        filp = fget(mqdes);
        if (unlikely(!filp)) {
@@ -1002,11 +986,6 @@ SYSCALL_DEFINE5(mq_timedreceive, mqd_t, mqdes, char __user *, u_msg_ptr,
                if (filp->f_flags & O_NONBLOCK) {
                        spin_unlock(&info->lock);
                        ret = -EAGAIN;
-                       msg_ptr = NULL;
-               } else if (unlikely(timeout < 0)) {
-                       spin_unlock(&info->lock);
-                       ret = timeout;
-                       msg_ptr = NULL;
                } else {
                        wait.task = current;
                        wait.state = STATE_NONE;
@@ -1244,19 +1223,19 @@ static const struct file_operations mqueue_file_operations = {
        .flush = mqueue_flush_file,
        .poll = mqueue_poll_file,
        .read = mqueue_read_file,
+       .llseek = default_llseek,
 };
 
 static const struct super_operations mqueue_super_ops = {
        .alloc_inode = mqueue_alloc_inode,
        .destroy_inode = mqueue_destroy_inode,
+       .evict_inode = mqueue_evict_inode,
        .statfs = simple_statfs,
-       .delete_inode = mqueue_delete_inode,
-       .drop_inode = generic_delete_inode,
 };
 
 static struct file_system_type mqueue_fs_type = {
        .name = "mqueue",
-       .get_sb = mqueue_get_sb,
+       .mount = mqueue_mount,
        .kill_sb = kill_litter_super,
 };
 
@@ -1283,7 +1262,7 @@ void mq_clear_sbinfo(struct ipc_namespace *ns)
 
 void mq_put_mnt(struct ipc_namespace *ns)
 {
-       mntput(ns->mq_mnt);
+       kern_unmount(ns->mq_mnt);
 }
 
 static int __init init_mqueue_fs(void)
@@ -1296,7 +1275,7 @@ static int __init init_mqueue_fs(void)
        if (mqueue_inode_cachep == NULL)
                return -ENOMEM;
 
-       /* ignore failues - they are not fatal */
+       /* ignore failures - they are not fatal */
        mq_sysctl_table = mq_register_sysctl_table();
 
        error = register_filesystem(&mqueue_fs_type);
@@ -1305,11 +1284,9 @@ static int __init init_mqueue_fs(void)
 
        spin_lock_init(&mq_lock);
 
-       init_ipc_ns.mq_mnt = kern_mount_data(&mqueue_fs_type, &init_ipc_ns);
-       if (IS_ERR(init_ipc_ns.mq_mnt)) {
-               error = PTR_ERR(init_ipc_ns.mq_mnt);
+       error = mq_init_ns(&init_ipc_ns);
+       if (error)
                goto out_filesystem;
-       }
 
        return 0;