#include <linux/fcntl.h>
#include <linux/file.h>
#include <linux/uio.h>
-#include <linux/smp_lock.h>
#include <linux/fsnotify.h>
#include <linux/security.h>
-#include <linux/module.h>
+#include <linux/export.h>
#include <linux/syscalls.h>
#include <linux/pagemap.h>
#include <linux/splice.h>
EXPORT_SYMBOL(generic_ro_fops);
+static inline int unsigned_offsets(struct file *file)
+{
+ return file->f_mode & FMODE_UNSIGNED_OFFSET;
+}
+
+static loff_t lseek_execute(struct file *file, struct inode *inode,
+ loff_t offset, loff_t maxsize)
+{
+ if (offset < 0 && !unsigned_offsets(file))
+ return -EINVAL;
+ if (offset > maxsize)
+ return -EINVAL;
+
+ if (offset != file->f_pos) {
+ file->f_pos = offset;
+ file->f_version = 0;
+ }
+ return offset;
+}
+
/**
- * generic_file_llseek_unlocked - lockless generic llseek implementation
+ * generic_file_llseek_size - generic llseek implementation for regular files
* @file: file structure to seek on
* @offset: file offset to seek to
* @origin: type of seek
+ * @size: max size of file system
+ *
+ * This is a variant of generic_file_llseek that allows passing in a custom
+ * file size.
*
- * Updates the file offset to the value specified by @offset and @origin.
- * Locking must be provided by the caller.
+ * Synchronization:
+ * SEEK_SET and SEEK_END are unsynchronized (but atomic on 64bit platforms)
+ * SEEK_CUR is synchronized against other SEEK_CURs, but not read/writes.
+ * read/writes behave like SEEK_SET against seeks.
*/
loff_t
-generic_file_llseek_unlocked(struct file *file, loff_t offset, int origin)
+generic_file_llseek_size(struct file *file, loff_t offset, int origin,
+ loff_t maxsize)
{
struct inode *inode = file->f_mapping->host;
switch (origin) {
case SEEK_END:
- offset += inode->i_size;
+ offset += i_size_read(inode);
break;
case SEEK_CUR:
/*
*/
if (offset == 0)
return file->f_pos;
- offset += file->f_pos;
+ /*
+ * f_lock protects against read/modify/write race with other
+ * SEEK_CURs. Note that parallel writes and reads behave
+ * like SEEK_SET.
+ */
+ spin_lock(&file->f_lock);
+ offset = lseek_execute(file, inode, file->f_pos + offset,
+ maxsize);
+ spin_unlock(&file->f_lock);
+ return offset;
+ case SEEK_DATA:
+ /*
+ * In the generic case the entire file is data, so as long as
+ * offset isn't at the end of the file then the offset is data.
+ */
+ if (offset >= i_size_read(inode))
+ return -ENXIO;
+ break;
+ case SEEK_HOLE:
+ /*
+ * There is a virtual hole at the end of the file, so as long as
+ * offset isn't i_size or larger, return i_size.
+ */
+ if (offset >= i_size_read(inode))
+ return -ENXIO;
+ offset = i_size_read(inode);
break;
}
- if (offset < 0 || offset > inode->i_sb->s_maxbytes)
- return -EINVAL;
-
- /* Special lock needed here? */
- if (offset != file->f_pos) {
- file->f_pos = offset;
- file->f_version = 0;
- }
-
- return offset;
+ return lseek_execute(file, inode, offset, maxsize);
}
-EXPORT_SYMBOL(generic_file_llseek_unlocked);
+EXPORT_SYMBOL(generic_file_llseek_size);
/**
* generic_file_llseek - generic llseek implementation for regular files
*/
loff_t generic_file_llseek(struct file *file, loff_t offset, int origin)
{
- loff_t rval;
-
- mutex_lock(&file->f_dentry->d_inode->i_mutex);
- rval = generic_file_llseek_unlocked(file, offset, origin);
- mutex_unlock(&file->f_dentry->d_inode->i_mutex);
+ struct inode *inode = file->f_mapping->host;
- return rval;
+ return generic_file_llseek_size(file, offset, origin,
+ inode->i_sb->s_maxbytes);
}
EXPORT_SYMBOL(generic_file_llseek);
+/**
+ * noop_llseek - No Operation Performed llseek implementation
+ * @file: file structure to seek on
+ * @offset: file offset to seek to
+ * @origin: type of seek
+ *
+ * This is an implementation of ->llseek useable for the rare special case when
+ * userspace expects the seek to succeed but the (device) file is actually not
+ * able to perform the seek. In this case you use noop_llseek() instead of
+ * falling back to the default implementation of ->llseek.
+ */
+loff_t noop_llseek(struct file *file, loff_t offset, int origin)
+{
+ return file->f_pos;
+}
+EXPORT_SYMBOL(noop_llseek);
+
loff_t no_llseek(struct file *file, loff_t offset, int origin)
{
return -ESPIPE;
loff_t default_llseek(struct file *file, loff_t offset, int origin)
{
+ struct inode *inode = file->f_path.dentry->d_inode;
loff_t retval;
- lock_kernel();
+ mutex_lock(&inode->i_mutex);
switch (origin) {
case SEEK_END:
- offset += i_size_read(file->f_path.dentry->d_inode);
+ offset += i_size_read(inode);
break;
case SEEK_CUR:
if (offset == 0) {
goto out;
}
offset += file->f_pos;
+ break;
+ case SEEK_DATA:
+ /*
+ * In the generic case the entire file is data, so as
+ * long as offset isn't at the end of the file then the
+ * offset is data.
+ */
+ if (offset >= inode->i_size) {
+ retval = -ENXIO;
+ goto out;
+ }
+ break;
+ case SEEK_HOLE:
+ /*
+ * There is a virtual hole at the end of the file, so
+ * as long as offset isn't i_size or larger, return
+ * i_size.
+ */
+ if (offset >= inode->i_size) {
+ retval = -ENXIO;
+ goto out;
+ }
+ offset = inode->i_size;
+ break;
}
retval = -EINVAL;
- if (offset >= 0) {
+ if (offset >= 0 || unsigned_offsets(file)) {
if (offset != file->f_pos) {
file->f_pos = offset;
file->f_version = 0;
retval = offset;
}
out:
- unlock_kernel();
+ mutex_unlock(&inode->i_mutex);
return retval;
}
EXPORT_SYMBOL(default_llseek);
fn = no_llseek;
if (file->f_mode & FMODE_LSEEK) {
- fn = default_llseek;
if (file->f_op && file->f_op->llseek)
fn = file->f_op->llseek;
}
}
#endif
+
/*
* rw_verify_area doesn't like huge counts. We limit
* them to something that fits in "int" so that others
* won't have to do range checks all the time.
*/
-#define MAX_RW_COUNT (INT_MAX & PAGE_CACHE_MASK)
-
int rw_verify_area(int read_write, struct file *file, loff_t *ppos, size_t count)
{
struct inode *inode;
if (unlikely((ssize_t) count < 0))
return retval;
pos = *ppos;
- if (unlikely((pos < 0) || (loff_t) (pos + count) < 0))
- return retval;
+ if (unlikely(pos < 0)) {
+ if (!unsigned_offsets(file))
+ return retval;
+ if (count >= -pos) /* both values are in 0..LLONG_MAX */
+ return -EOVERFLOW;
+ } else if (unlikely((loff_t) (pos + count) < 0)) {
+ if (!unsigned_offsets(file))
+ return retval;
+ }
if (unlikely(inode->i_flock && mandatory_lock(inode))) {
retval = locks_mandatory_area(
init_sync_kiocb(&kiocb, filp);
kiocb.ki_pos = *ppos;
kiocb.ki_left = len;
+ kiocb.ki_nbytes = len;
for (;;) {
ret = filp->f_op->aio_read(&kiocb, &iov, 1, kiocb.ki_pos);
else
ret = do_sync_read(file, buf, count, pos);
if (ret > 0) {
- fsnotify_access(file->f_path.dentry);
+ fsnotify_access(file);
add_rchar(current, ret);
}
inc_syscr(current);
init_sync_kiocb(&kiocb, filp);
kiocb.ki_pos = *ppos;
kiocb.ki_left = len;
+ kiocb.ki_nbytes = len;
for (;;) {
ret = filp->f_op->aio_write(&kiocb, &iov, 1, kiocb.ki_pos);
else
ret = do_sync_write(file, buf, count, pos);
if (ret > 0) {
- fsnotify_modify(file->f_path.dentry);
+ fsnotify_modify(file);
add_wchar(current, ret);
}
inc_syscw(current);
ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector,
unsigned long nr_segs, unsigned long fast_segs,
struct iovec *fast_pointer,
- struct iovec **ret_pointer)
- {
+ struct iovec **ret_pointer,
+ int check_access)
+{
unsigned long seg;
- ssize_t ret;
+ ssize_t ret;
struct iovec *iov = fast_pointer;
- /*
- * SuS says "The readv() function *may* fail if the iovcnt argument
- * was less than or equal to 0, or greater than {IOV_MAX}. Linux has
- * traditionally returned zero for zero segments, so...
- */
+ /*
+ * SuS says "The readv() function *may* fail if the iovcnt argument
+ * was less than or equal to 0, or greater than {IOV_MAX}. Linux has
+ * traditionally returned zero for zero segments, so...
+ */
if (nr_segs == 0) {
ret = 0;
- goto out;
+ goto out;
}
- /*
- * First get the "struct iovec" from user memory and
- * verify all the pointers
- */
+ /*
+ * First get the "struct iovec" from user memory and
+ * verify all the pointers
+ */
if (nr_segs > UIO_MAXIOV) {
ret = -EINVAL;
- goto out;
+ goto out;
}
if (nr_segs > fast_segs) {
- iov = kmalloc(nr_segs*sizeof(struct iovec), GFP_KERNEL);
+ iov = kmalloc(nr_segs*sizeof(struct iovec), GFP_KERNEL);
if (iov == NULL) {
ret = -ENOMEM;
- goto out;
+ goto out;
}
- }
+ }
if (copy_from_user(iov, uvector, nr_segs*sizeof(*uvector))) {
ret = -EFAULT;
- goto out;
+ goto out;
}
- /*
+ /*
* According to the Single Unix Specification we should return EINVAL
* if an element length is < 0 when cast to ssize_t or if the
* total length would overflow the ssize_t return value of the
* system call.
- */
+ *
+ * Linux caps all read/write calls to MAX_RW_COUNT, and avoids the
+ * overflow case.
+ */
ret = 0;
- for (seg = 0; seg < nr_segs; seg++) {
- void __user *buf = iov[seg].iov_base;
- ssize_t len = (ssize_t)iov[seg].iov_len;
+ for (seg = 0; seg < nr_segs; seg++) {
+ void __user *buf = iov[seg].iov_base;
+ ssize_t len = (ssize_t)iov[seg].iov_len;
/* see if we we're about to use an invalid len or if
* it's about to overflow ssize_t */
- if (len < 0 || (ret + len < ret)) {
+ if (len < 0) {
ret = -EINVAL;
- goto out;
+ goto out;
}
- if (unlikely(!access_ok(vrfy_dir(type), buf, len))) {
+ if (check_access
+ && unlikely(!access_ok(vrfy_dir(type), buf, len))) {
ret = -EFAULT;
- goto out;
+ goto out;
+ }
+ if (len > MAX_RW_COUNT - ret) {
+ len = MAX_RW_COUNT - ret;
+ iov[seg].iov_len = len;
}
-
ret += len;
- }
+ }
out:
*ret_pointer = iov;
return ret;
}
ret = rw_copy_check_uvector(type, uvector, nr_segs,
- ARRAY_SIZE(iovstack), iovstack, &iov);
+ ARRAY_SIZE(iovstack), iovstack, &iov, 1);
if (ret <= 0)
goto out;
kfree(iov);
if ((ret + (type == READ)) > 0) {
if (type == READ)
- fsnotify_access(file->f_path.dentry);
+ fsnotify_access(file);
else
- fsnotify_modify(file->f_path.dentry);
+ fsnotify_modify(file);
}
return ret;
}