*/
static DEFINE_SPINLOCK(net_family_lock);
-static const struct net_proto_family *net_families[NPROTO] __read_mostly;
+static const struct net_proto_family __rcu *net_families[NPROTO] __read_mostly;
/*
* Statistics counters of the socket lists
}
+
static void wq_free_rcu(struct rcu_head *head)
{
struct socket_wq *wq = container_of(head, struct socket_wq, rcu);
.statfs = simple_statfs,
};
-static int sockfs_get_sb(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *data,
- struct vfsmount *mnt)
-{
- return get_sb_pseudo(fs_type, "socket:", &sockfs_ops, SOCKFS_MAGIC,
- mnt);
-}
-
-static struct vfsmount *sock_mnt __read_mostly;
-
-static struct file_system_type sock_fs_type = {
- .name = "sockfs",
- .get_sb = sockfs_get_sb,
- .kill_sb = kill_anon_super,
-};
-
/*
* sockfs_dname() is called from d_path().
*/
.d_dname = sockfs_dname,
};
+static struct dentry *sockfs_mount(struct file_system_type *fs_type,
+ int flags, const char *dev_name, void *data)
+{
+ return mount_pseudo(fs_type, "socket:", &sockfs_ops,
+ &sockfs_dentry_operations, SOCKFS_MAGIC);
+}
+
+static struct vfsmount *sock_mnt __read_mostly;
+
+static struct file_system_type sock_fs_type = {
+ .name = "sockfs",
+ .mount = sockfs_mount,
+ .kill_sb = kill_anon_super,
+};
+
/*
* Obtains the first available file descriptor and sets it up for use.
*
if (unlikely(fd < 0))
return fd;
- path.dentry = d_alloc(sock_mnt->mnt_sb->s_root, &name);
+ path.dentry = d_alloc_pseudo(sock_mnt->mnt_sb, &name);
if (unlikely(!path.dentry)) {
put_unused_fd(fd);
return -ENOMEM;
}
path.mnt = mntget(sock_mnt);
- path.dentry->d_op = &sockfs_dentry_operations;
d_instantiate(path.dentry, SOCK_INODE(sock));
SOCK_INODE(sock)->i_fop = &socket_file_ops;
&socket_file_ops);
if (unlikely(!file)) {
/* drop dentry, keep inode */
- atomic_inc(&path.dentry->d_inode->i_count);
+ ihold(path.dentry->d_inode);
path_put(&path);
put_unused_fd(fd);
return -ENFILE;
sock = SOCKET_I(inode);
kmemcheck_annotate_bitfield(sock, type);
+ inode->i_ino = get_next_ino();
inode->i_mode = S_IFSOCK | S_IRWXUGO;
inode->i_uid = current_fsuid();
inode->i_gid = current_fsgid();
return ret;
}
+/**
+ * kernel_recvmsg - Receive a message from a socket (kernel space)
+ * @sock: The socket to receive the message from
+ * @msg: Received message
+ * @vec: Input s/g array for message data
+ * @num: Size of input s/g array
+ * @size: Number of bytes to read
+ * @flags: Message flags (MSG_DONTWAIT, etc...)
+ *
+ * On return the msg structure contains the scatter/gather array passed in the
+ * vec argument. The array is modified so that it consists of the unfilled
+ * portion of the original array.
+ *
+ * The returned value is the total number of bytes received, or an error.
+ */
int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
struct kvec *vec, size_t num, size_t size, int flags)
{
* requested real, full-featured networking support upon configuration.
* Otherwise module support will break!
*/
- if (net_families[family] == NULL)
+ if (rcu_access_pointer(net_families[family]) == NULL)
request_module("net-pf-%d", family);
#endif
struct iovec iov;
int fput_needed;
+ if (len > INT_MAX)
+ len = INT_MAX;
sock = sockfd_lookup_light(fd, &err, &fput_needed);
if (!sock)
goto out;
int err, err2;
int fput_needed;
+ if (size > INT_MAX)
+ size = INT_MAX;
sock = sockfd_lookup_light(fd, &err, &fput_needed);
if (!sock)
goto out;
*/
if (MSG_CMSG_COMPAT & flags) {
err = __sys_recvmsg(sock, (struct msghdr __user *)compat_entry,
- &msg_sys, flags, datagrams);
+ &msg_sys, flags & ~MSG_WAITFORONE,
+ datagrams);
if (err < 0)
break;
err = __put_user(err, &compat_entry->msg_len);
++compat_entry;
} else {
err = __sys_recvmsg(sock, (struct msghdr __user *)entry,
- &msg_sys, flags, datagrams);
+ &msg_sys, flags & ~MSG_WAITFORONE,
+ datagrams);
if (err < 0)
break;
err = put_user(err, &entry->msg_len);
}
spin_lock(&net_family_lock);
- if (net_families[ops->family])
+ if (rcu_dereference_protected(net_families[ops->family],
+ lockdep_is_held(&net_family_lock)))
err = -EEXIST;
else {
- net_families[ops->family] = ops;
+ rcu_assign_pointer(net_families[ops->family], ops);
err = 0;
}
spin_unlock(&net_family_lock);
BUG_ON(family < 0 || family >= NPROTO);
spin_lock(&net_family_lock);
- net_families[family] = NULL;
+ rcu_assign_pointer(net_families[family], NULL);
spin_unlock(&net_family_lock);
synchronize_rcu();
static int __init sock_init(void)
{
+ int err;
+
/*
* Initialize sock SLAB cache.
*/
*/
init_inodecache();
- register_filesystem(&sock_fs_type);
+
+ err = register_filesystem(&sock_fs_type);
+ if (err)
+ goto out_fs;
sock_mnt = kern_mount(&sock_fs_type);
+ if (IS_ERR(sock_mnt)) {
+ err = PTR_ERR(sock_mnt);
+ goto out_mount;
+ }
/* The real protocol initialization is performed in later initcalls.
*/
skb_timestamping_init();
#endif
- return 0;
+out:
+ return err;
+
+out_mount:
+ unregister_filesystem(&sock_fs_type);
+out_fs:
+ goto out;
}
core_initcall(sock_init); /* early initcall */
static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32)
{
+ struct compat_ethtool_rxnfc __user *compat_rxnfc;
+ bool convert_in = false, convert_out = false;
+ size_t buf_size = ALIGN(sizeof(struct ifreq), 8);
+ struct ethtool_rxnfc __user *rxnfc;
struct ifreq __user *ifr;
+ u32 rule_cnt = 0, actual_rule_cnt;
+ u32 ethcmd;
u32 data;
- void __user *datap;
+ int ret;
- ifr = compat_alloc_user_space(sizeof(*ifr));
+ if (get_user(data, &ifr32->ifr_ifru.ifru_data))
+ return -EFAULT;
- if (copy_in_user(&ifr->ifr_name, &ifr32->ifr_name, IFNAMSIZ))
+ compat_rxnfc = compat_ptr(data);
+
+ if (get_user(ethcmd, &compat_rxnfc->cmd))
return -EFAULT;
- if (get_user(data, &ifr32->ifr_ifru.ifru_data))
+ /* Most ethtool structures are defined without padding.
+ * Unfortunately struct ethtool_rxnfc is an exception.
+ */
+ switch (ethcmd) {
+ default:
+ break;
+ case ETHTOOL_GRXCLSRLALL:
+ /* Buffer size is variable */
+ if (get_user(rule_cnt, &compat_rxnfc->rule_cnt))
+ return -EFAULT;
+ if (rule_cnt > KMALLOC_MAX_SIZE / sizeof(u32))
+ return -ENOMEM;
+ buf_size += rule_cnt * sizeof(u32);
+ /* fall through */
+ case ETHTOOL_GRXRINGS:
+ case ETHTOOL_GRXCLSRLCNT:
+ case ETHTOOL_GRXCLSRULE:
+ convert_out = true;
+ /* fall through */
+ case ETHTOOL_SRXCLSRLDEL:
+ case ETHTOOL_SRXCLSRLINS:
+ buf_size += sizeof(struct ethtool_rxnfc);
+ convert_in = true;
+ break;
+ }
+
+ ifr = compat_alloc_user_space(buf_size);
+ rxnfc = (void *)ifr + ALIGN(sizeof(struct ifreq), 8);
+
+ if (copy_in_user(&ifr->ifr_name, &ifr32->ifr_name, IFNAMSIZ))
return -EFAULT;
- datap = compat_ptr(data);
- if (put_user(datap, &ifr->ifr_ifru.ifru_data))
+ if (put_user(convert_in ? rxnfc : compat_ptr(data),
+ &ifr->ifr_ifru.ifru_data))
return -EFAULT;
- return dev_ioctl(net, SIOCETHTOOL, ifr);
+ if (convert_in) {
+ /* We expect there to be holes between fs.m_u and
+ * fs.ring_cookie and at the end of fs, but nowhere else.
+ */
+ BUILD_BUG_ON(offsetof(struct compat_ethtool_rxnfc, fs.m_u) +
+ sizeof(compat_rxnfc->fs.m_u) !=
+ offsetof(struct ethtool_rxnfc, fs.m_u) +
+ sizeof(rxnfc->fs.m_u));
+ BUILD_BUG_ON(
+ offsetof(struct compat_ethtool_rxnfc, fs.location) -
+ offsetof(struct compat_ethtool_rxnfc, fs.ring_cookie) !=
+ offsetof(struct ethtool_rxnfc, fs.location) -
+ offsetof(struct ethtool_rxnfc, fs.ring_cookie));
+
+ if (copy_in_user(rxnfc, compat_rxnfc,
+ (void *)(&rxnfc->fs.m_u + 1) -
+ (void *)rxnfc) ||
+ copy_in_user(&rxnfc->fs.ring_cookie,
+ &compat_rxnfc->fs.ring_cookie,
+ (void *)(&rxnfc->fs.location + 1) -
+ (void *)&rxnfc->fs.ring_cookie) ||
+ copy_in_user(&rxnfc->rule_cnt, &compat_rxnfc->rule_cnt,
+ sizeof(rxnfc->rule_cnt)))
+ return -EFAULT;
+ }
+
+ ret = dev_ioctl(net, SIOCETHTOOL, ifr);
+ if (ret)
+ return ret;
+
+ if (convert_out) {
+ if (copy_in_user(compat_rxnfc, rxnfc,
+ (const void *)(&rxnfc->fs.m_u + 1) -
+ (const void *)rxnfc) ||
+ copy_in_user(&compat_rxnfc->fs.ring_cookie,
+ &rxnfc->fs.ring_cookie,
+ (const void *)(&rxnfc->fs.location + 1) -
+ (const void *)&rxnfc->fs.ring_cookie) ||
+ copy_in_user(&compat_rxnfc->rule_cnt, &rxnfc->rule_cnt,
+ sizeof(rxnfc->rule_cnt)))
+ return -EFAULT;
+
+ if (ethcmd == ETHTOOL_GRXCLSRLALL) {
+ /* As an optimisation, we only copy the actual
+ * number of rules that the underlying
+ * function returned. Since Mallory might
+ * change the rule count in user memory, we
+ * check that it is less than the rule count
+ * originally given (as the user buffer size),
+ * which has been range-checked.
+ */
+ if (get_user(actual_rule_cnt, &rxnfc->rule_cnt))
+ return -EFAULT;
+ if (actual_rule_cnt < rule_cnt)
+ rule_cnt = actual_rule_cnt;
+ if (copy_in_user(&compat_rxnfc->rule_locs[0],
+ &rxnfc->rule_locs[0],
+ rule_cnt * sizeof(u32)))
+ return -EFAULT;
+ }
+ }
+
+ return 0;
}
static int compat_siocwandev(struct net *net, struct compat_ifreq __user *uifr32)