- Update to 2.6.25-rc3.
[linux-flexiantxendom0-3.2.10.git] / fs / xfs / linux-2.6 / xfs_file.c
1 /*
2  * Copyright (c) 2000-2005 Silicon Graphics, Inc.
3  * All Rights Reserved.
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License as
7  * published by the Free Software Foundation.
8  *
9  * This program is distributed in the hope that it would be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write the Free Software Foundation,
16  * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17  */
18 #include "xfs.h"
19 #include "xfs_bit.h"
20 #include "xfs_log.h"
21 #include "xfs_inum.h"
22 #include "xfs_sb.h"
23 #include "xfs_ag.h"
24 #include "xfs_dir2.h"
25 #include "xfs_trans.h"
26 #include "xfs_dmapi.h"
27 #include "xfs_mount.h"
28 #include "xfs_bmap_btree.h"
29 #include "xfs_alloc_btree.h"
30 #include "xfs_ialloc_btree.h"
31 #include "xfs_alloc.h"
32 #include "xfs_btree.h"
33 #include "xfs_attr_sf.h"
34 #include "xfs_dir2_sf.h"
35 #include "xfs_dinode.h"
36 #include "xfs_inode.h"
37 #include "xfs_error.h"
38 #include "xfs_rw.h"
39 #include "xfs_ioctl32.h"
40 #include "xfs_vnodeops.h"
41
42 #include <linux/dcache.h>
43 #include <linux/smp_lock.h>
44
45 static struct vm_operations_struct xfs_file_vm_ops;
46 #ifdef CONFIG_XFS_DMAPI
47 static struct vm_operations_struct xfs_dmapi_file_vm_ops;
48 #endif
49
50 STATIC_INLINE ssize_t
51 __xfs_file_read(
52         struct kiocb            *iocb,
53         const struct iovec      *iov,
54         unsigned long           nr_segs,
55         int                     ioflags,
56         loff_t                  pos)
57 {
58         struct file             *file = iocb->ki_filp;
59
60         BUG_ON(iocb->ki_pos != pos);
61         if (unlikely(file->f_flags & O_DIRECT))
62                 ioflags |= IO_ISDIRECT;
63         return xfs_read(XFS_I(file->f_path.dentry->d_inode), iocb, iov,
64                                 nr_segs, &iocb->ki_pos, ioflags);
65 }
66
67 STATIC ssize_t
68 xfs_file_aio_read(
69         struct kiocb            *iocb,
70         const struct iovec      *iov,
71         unsigned long           nr_segs,
72         loff_t                  pos)
73 {
74         return __xfs_file_read(iocb, iov, nr_segs, IO_ISAIO, pos);
75 }
76
77 STATIC ssize_t
78 xfs_file_aio_read_invis(
79         struct kiocb            *iocb,
80         const struct iovec      *iov,
81         unsigned long           nr_segs,
82         loff_t                  pos)
83 {
84         return __xfs_file_read(iocb, iov, nr_segs, IO_ISAIO|IO_INVIS, pos);
85 }
86
87 STATIC_INLINE ssize_t
88 __xfs_file_write(
89         struct kiocb            *iocb,
90         const struct iovec      *iov,
91         unsigned long           nr_segs,
92         int                     ioflags,
93         loff_t                  pos)
94 {
95         struct file     *file = iocb->ki_filp;
96
97         BUG_ON(iocb->ki_pos != pos);
98         if (unlikely(file->f_flags & O_DIRECT))
99                 ioflags |= IO_ISDIRECT;
100         return xfs_write(XFS_I(file->f_mapping->host), iocb, iov, nr_segs,
101                                 &iocb->ki_pos, ioflags);
102 }
103
104 STATIC ssize_t
105 xfs_file_aio_write(
106         struct kiocb            *iocb,
107         const struct iovec      *iov,
108         unsigned long           nr_segs,
109         loff_t                  pos)
110 {
111         return __xfs_file_write(iocb, iov, nr_segs, IO_ISAIO, pos);
112 }
113
114 STATIC ssize_t
115 xfs_file_aio_write_invis(
116         struct kiocb            *iocb,
117         const struct iovec      *iov,
118         unsigned long           nr_segs,
119         loff_t                  pos)
120 {
121         return __xfs_file_write(iocb, iov, nr_segs, IO_ISAIO|IO_INVIS, pos);
122 }
123
124 STATIC ssize_t
125 xfs_file_splice_read(
126         struct file             *infilp,
127         loff_t                  *ppos,
128         struct pipe_inode_info  *pipe,
129         size_t                  len,
130         unsigned int            flags)
131 {
132         return xfs_splice_read(XFS_I(infilp->f_path.dentry->d_inode),
133                                    infilp, ppos, pipe, len, flags, 0);
134 }
135
136 STATIC ssize_t
137 xfs_file_splice_read_invis(
138         struct file             *infilp,
139         loff_t                  *ppos,
140         struct pipe_inode_info  *pipe,
141         size_t                  len,
142         unsigned int            flags)
143 {
144         return xfs_splice_read(XFS_I(infilp->f_path.dentry->d_inode),
145                                    infilp, ppos, pipe, len, flags, IO_INVIS);
146 }
147
148 STATIC ssize_t
149 xfs_file_splice_write(
150         struct pipe_inode_info  *pipe,
151         struct file             *outfilp,
152         loff_t                  *ppos,
153         size_t                  len,
154         unsigned int            flags)
155 {
156         return xfs_splice_write(XFS_I(outfilp->f_path.dentry->d_inode),
157                                     pipe, outfilp, ppos, len, flags, 0);
158 }
159
160 STATIC ssize_t
161 xfs_file_splice_write_invis(
162         struct pipe_inode_info  *pipe,
163         struct file             *outfilp,
164         loff_t                  *ppos,
165         size_t                  len,
166         unsigned int            flags)
167 {
168         return xfs_splice_write(XFS_I(outfilp->f_path.dentry->d_inode),
169                                     pipe, outfilp, ppos, len, flags, IO_INVIS);
170 }
171
172 STATIC int
173 xfs_file_open(
174         struct inode    *inode,
175         struct file     *filp)
176 {
177         if (!(filp->f_flags & O_LARGEFILE) && i_size_read(inode) > MAX_NON_LFS)
178                 return -EFBIG;
179         return -xfs_open(XFS_I(inode));
180 }
181
182 STATIC int
183 xfs_file_release(
184         struct inode    *inode,
185         struct file     *filp)
186 {
187         return -xfs_release(XFS_I(inode));
188 }
189
190 STATIC int
191 xfs_file_fsync(
192         struct file     *filp,
193         struct dentry   *dentry,
194         int             datasync)
195 {
196         int             flags = FSYNC_WAIT;
197
198         if (datasync)
199                 flags |= FSYNC_DATA;
200         xfs_iflags_clear(XFS_I(dentry->d_inode), XFS_ITRUNCATED);
201         return -xfs_fsync(XFS_I(dentry->d_inode), flags,
202                         (xfs_off_t)0, (xfs_off_t)-1);
203 }
204
205 #ifdef CONFIG_XFS_DMAPI
206 STATIC int
207 xfs_vm_fault(
208         struct vm_area_struct   *vma,
209         struct vm_fault *vmf)
210 {
211         struct inode    *inode = vma->vm_file->f_path.dentry->d_inode;
212         bhv_vnode_t     *vp = vn_from_inode(inode);
213
214         ASSERT_ALWAYS(vp->v_vfsp->vfs_flag & VFS_DMI);
215         if (XFS_SEND_MMAP(XFS_VFSTOM(vp->v_vfsp), vma, 0))
216                 return VM_FAULT_SIGBUS;
217         return filemap_fault(vma, vmf);
218 }
219 #endif /* CONFIG_XFS_DMAPI */
220
221 /*
222  * Unfortunately we can't just use the clean and simple readdir implementation
223  * below, because nfs might call back into ->lookup from the filldir callback
224  * and that will deadlock the low-level btree code.
225  *
226  * Hopefully we'll find a better workaround that allows to use the optimal
227  * version at least for local readdirs for 2.6.25.
228  */
229 #if 0
230 STATIC int
231 xfs_file_readdir(
232         struct file     *filp,
233         void            *dirent,
234         filldir_t       filldir)
235 {
236         struct inode    *inode = filp->f_path.dentry->d_inode;
237         xfs_inode_t     *ip = XFS_I(inode);
238         int             error;
239         size_t          bufsize;
240
241         /*
242          * The Linux API doesn't pass down the total size of the buffer
243          * we read into down to the filesystem.  With the filldir concept
244          * it's not needed for correct information, but the XFS dir2 leaf
245          * code wants an estimate of the buffer size to calculate it's
246          * readahead window and size the buffers used for mapping to
247          * physical blocks.
248          *
249          * Try to give it an estimate that's good enough, maybe at some
250          * point we can change the ->readdir prototype to include the
251          * buffer size.
252          */
253         bufsize = (size_t)min_t(loff_t, PAGE_SIZE, inode->i_size);
254
255         error = xfs_readdir(ip, dirent, bufsize,
256                                 (xfs_off_t *)&filp->f_pos, filldir);
257         if (error)
258                 return -error;
259         return 0;
260 }
261 #else
262
263 struct hack_dirent {
264         u64             ino;
265         loff_t          offset;
266         int             namlen;
267         unsigned int    d_type;
268         char            name[];
269 };
270
271 struct hack_callback {
272         char            *dirent;
273         size_t          len;
274         size_t          used;
275 };
276
277 STATIC int
278 xfs_hack_filldir(
279         void            *__buf,
280         const char      *name,
281         int             namlen,
282         loff_t          offset,
283         u64             ino,
284         unsigned int    d_type)
285 {
286         struct hack_callback *buf = __buf;
287         struct hack_dirent *de = (struct hack_dirent *)(buf->dirent + buf->used);
288         unsigned int reclen;
289
290         reclen = ALIGN(sizeof(struct hack_dirent) + namlen, sizeof(u64));
291         if (buf->used + reclen > buf->len)
292                 return -EINVAL;
293
294         de->namlen = namlen;
295         de->offset = offset;
296         de->ino = ino;
297         de->d_type = d_type;
298         memcpy(de->name, name, namlen);
299         buf->used += reclen;
300         return 0;
301 }
302
303 STATIC int
304 xfs_file_readdir(
305         struct file     *filp,
306         void            *dirent,
307         filldir_t       filldir)
308 {
309         struct inode    *inode = filp->f_path.dentry->d_inode;
310         xfs_inode_t     *ip = XFS_I(inode);
311         struct hack_callback buf;
312         struct hack_dirent *de;
313         int             error;
314         loff_t          size;
315         int             eof = 0;
316         xfs_off_t       start_offset, curr_offset, offset;
317
318         /*
319          * Try fairly hard to get memory
320          */
321         buf.len = PAGE_CACHE_SIZE;
322         do {
323                 buf.dirent = kmalloc(buf.len, GFP_KERNEL);
324                 if (buf.dirent)
325                         break;
326                 buf.len >>= 1;
327         } while (buf.len >= 1024);
328
329         if (!buf.dirent)
330                 return -ENOMEM;
331
332         curr_offset = filp->f_pos;
333         if (curr_offset == 0x7fffffff)
334                 offset = 0xffffffff;
335         else
336                 offset = filp->f_pos;
337
338         while (!eof) {
339                 unsigned int reclen;
340
341                 start_offset = offset;
342
343                 buf.used = 0;
344                 error = -xfs_readdir(ip, &buf, buf.len, &offset,
345                                      xfs_hack_filldir);
346                 if (error || offset == start_offset) {
347                         size = 0;
348                         break;
349                 }
350
351                 size = buf.used;
352                 de = (struct hack_dirent *)buf.dirent;
353                 while (size > 0) {
354                         curr_offset = de->offset /* & 0x7fffffff */;
355                         if (filldir(dirent, de->name, de->namlen,
356                                         curr_offset & 0x7fffffff,
357                                         de->ino, de->d_type)) {
358                                 goto done;
359                         }
360
361                         reclen = ALIGN(sizeof(struct hack_dirent) + de->namlen,
362                                        sizeof(u64));
363                         size -= reclen;
364                         de = (struct hack_dirent *)((char *)de + reclen);
365                 }
366         }
367
368  done:
369         if (!error) {
370                 if (size == 0)
371                         filp->f_pos = offset & 0x7fffffff;
372                 else if (de)
373                         filp->f_pos = curr_offset;
374         }
375
376         kfree(buf.dirent);
377         return error;
378 }
379 #endif
380
381 STATIC int
382 xfs_file_mmap(
383         struct file     *filp,
384         struct vm_area_struct *vma)
385 {
386         vma->vm_ops = &xfs_file_vm_ops;
387         vma->vm_flags |= VM_CAN_NONLINEAR;
388
389 #ifdef CONFIG_XFS_DMAPI
390         if (XFS_M(filp->f_path.dentry->d_inode->i_sb)->m_flags & XFS_MOUNT_DMAPI)
391                 vma->vm_ops = &xfs_dmapi_file_vm_ops;
392 #endif /* CONFIG_XFS_DMAPI */
393
394         file_accessed(filp);
395         return 0;
396 }
397
398 STATIC long
399 xfs_file_ioctl(
400         struct file     *filp,
401         unsigned int    cmd,
402         unsigned long   p)
403 {
404         int             error;
405         struct inode    *inode = filp->f_path.dentry->d_inode;
406
407         error = xfs_ioctl(XFS_I(inode), filp, 0, cmd, (void __user *)p);
408         xfs_iflags_set(XFS_I(inode), XFS_IMODIFIED);
409
410         /* NOTE:  some of the ioctl's return positive #'s as a
411          *        byte count indicating success, such as
412          *        readlink_by_handle.  So we don't "sign flip"
413          *        like most other routines.  This means true
414          *        errors need to be returned as a negative value.
415          */
416         return error;
417 }
418
419 STATIC long
420 xfs_file_ioctl_invis(
421         struct file     *filp,
422         unsigned int    cmd,
423         unsigned long   p)
424 {
425         int             error;
426         struct inode    *inode = filp->f_path.dentry->d_inode;
427
428         error = xfs_ioctl(XFS_I(inode), filp, IO_INVIS, cmd, (void __user *)p);
429         xfs_iflags_set(XFS_I(inode), XFS_IMODIFIED);
430
431         /* NOTE:  some of the ioctl's return positive #'s as a
432          *        byte count indicating success, such as
433          *        readlink_by_handle.  So we don't "sign flip"
434          *        like most other routines.  This means true
435          *        errors need to be returned as a negative value.
436          */
437         return error;
438 }
439
440 #ifdef CONFIG_XFS_DMAPI
441 #ifdef HAVE_VMOP_MPROTECT
442 STATIC int
443 xfs_vm_mprotect(
444         struct vm_area_struct *vma,
445         unsigned int    newflags)
446 {
447         struct inode    *inode = vma->vm_file->f_path.dentry->d_inode;
448         struct xfs_mount *mp = XFS_M(inode->i_sb);
449         int             error = 0;
450
451         if (mp->m_flags & XFS_MOUNT_DMAPI) {
452                 if ((vma->vm_flags & VM_MAYSHARE) &&
453                     (newflags & VM_WRITE) && !(vma->vm_flags & VM_WRITE))
454                         error = XFS_SEND_MMAP(mp, vma, VM_WRITE);
455         }
456         return error;
457 }
458 #endif /* HAVE_VMOP_MPROTECT */
459 #endif /* CONFIG_XFS_DMAPI */
460
461 #ifdef HAVE_FOP_OPEN_EXEC
462 /* If the user is attempting to execute a file that is offline then
463  * we have to trigger a DMAPI READ event before the file is marked as busy
464  * otherwise the invisible I/O will not be able to write to the file to bring
465  * it back online.
466  */
467 STATIC int
468 xfs_file_open_exec(
469         struct inode    *inode)
470 {
471         struct xfs_mount *mp = XFS_M(inode->i_sb);
472
473         if (unlikely(mp->m_flags & XFS_MOUNT_DMAPI)) {
474                 if (DM_EVENT_ENABLED(XFS_I(inode), DM_EVENT_READ)) {
475                         bhv_vnode_t *vp = vn_from_inode(inode);
476
477                         return -XFS_SEND_DATA(mp, DM_EVENT_READ,
478                                                 vp, 0, 0, 0, NULL);
479                 }
480         }
481
482         return 0;
483 }
484 #endif /* HAVE_FOP_OPEN_EXEC */
485
486 /*
487  * mmap()d file has taken write protection fault and is being made
488  * writable. We can set the page state up correctly for a writable
489  * page, which means we can do correct delalloc accounting (ENOSPC
490  * checking!) and unwritten extent mapping.
491  */
492 STATIC int
493 xfs_vm_page_mkwrite(
494         struct vm_area_struct   *vma,
495         struct page             *page)
496 {
497         return block_page_mkwrite(vma, page, xfs_get_blocks);
498 }
499
500 const struct file_operations xfs_file_operations = {
501         .llseek         = generic_file_llseek,
502         .read           = do_sync_read,
503         .write          = do_sync_write,
504         .aio_read       = xfs_file_aio_read,
505         .aio_write      = xfs_file_aio_write,
506         .splice_read    = xfs_file_splice_read,
507         .splice_write   = xfs_file_splice_write,
508         .unlocked_ioctl = xfs_file_ioctl,
509 #ifdef CONFIG_COMPAT
510         .compat_ioctl   = xfs_file_compat_ioctl,
511 #endif
512         .mmap           = xfs_file_mmap,
513         .open           = xfs_file_open,
514         .release        = xfs_file_release,
515         .fsync          = xfs_file_fsync,
516 #ifdef HAVE_FOP_OPEN_EXEC
517         .open_exec      = xfs_file_open_exec,
518 #endif
519 };
520
521 const struct file_operations xfs_invis_file_operations = {
522         .llseek         = generic_file_llseek,
523         .read           = do_sync_read,
524         .write          = do_sync_write,
525         .aio_read       = xfs_file_aio_read_invis,
526         .aio_write      = xfs_file_aio_write_invis,
527         .splice_read    = xfs_file_splice_read_invis,
528         .splice_write   = xfs_file_splice_write_invis,
529         .unlocked_ioctl = xfs_file_ioctl_invis,
530 #ifdef CONFIG_COMPAT
531         .compat_ioctl   = xfs_file_compat_invis_ioctl,
532 #endif
533         .mmap           = xfs_file_mmap,
534         .open           = xfs_file_open,
535         .release        = xfs_file_release,
536         .fsync          = xfs_file_fsync,
537 };
538
539
540 const struct file_operations xfs_dir_file_operations = {
541         .read           = generic_read_dir,
542         .readdir        = xfs_file_readdir,
543         .unlocked_ioctl = xfs_file_ioctl,
544 #ifdef CONFIG_COMPAT
545         .compat_ioctl   = xfs_file_compat_ioctl,
546 #endif
547         .fsync          = xfs_file_fsync,
548 };
549
550 static struct vm_operations_struct xfs_file_vm_ops = {
551         .fault          = filemap_fault,
552         .page_mkwrite   = xfs_vm_page_mkwrite,
553 };
554
555 #ifdef CONFIG_XFS_DMAPI
556 static struct vm_operations_struct xfs_dmapi_file_vm_ops = {
557         .fault          = xfs_vm_fault,
558         .page_mkwrite   = xfs_vm_page_mkwrite,
559 #ifdef HAVE_VMOP_MPROTECT
560         .mprotect       = xfs_vm_mprotect,
561 #endif
562 };
563 #endif /* CONFIG_XFS_DMAPI */