[PATCH] 2.5.13: remove VALID_PAGE
[linux-flexiantxendom0-3.2.10.git] / mm / msync.c
1 /*
2  *      linux/mm/msync.c
3  *
4  * Copyright (C) 1994-1999  Linus Torvalds
5  */
6
7 /*
8  * The msync() system call.
9  */
10 #include <linux/slab.h>
11 #include <linux/pagemap.h>
12 #include <linux/mm.h>
13 #include <linux/mman.h>
14
15 #include <asm/pgtable.h>
16 #include <asm/pgalloc.h>
17 #include <asm/tlbflush.h>
18
19 /*
20  * Called with mm->page_table_lock held to protect against other
21  * threads/the swapper from ripping pte's out from under us.
22  */
23 static int filemap_sync_pte(pte_t *ptep, struct vm_area_struct *vma,
24         unsigned long address, unsigned int flags)
25 {
26         pte_t pte = *ptep;
27
28         if (pte_present(pte) && pte_dirty(pte)) {
29                 struct page *page;
30                 unsigned long pfn = pte_pfn(pte);
31                 if (pfn_valid(pfn)) {
32                         page = pfn_to_page(pfn);
33                         if (!PageReserved(page) && ptep_test_and_clear_dirty(ptep)) {
34                                 flush_tlb_page(vma, address);
35                                 set_page_dirty(page);
36                         }
37                 }
38         }
39         return 0;
40 }
41
42 static inline int filemap_sync_pte_range(pmd_t * pmd,
43         unsigned long address, unsigned long end, 
44         struct vm_area_struct *vma, unsigned int flags)
45 {
46         pte_t *pte;
47         int error;
48
49         if (pmd_none(*pmd))
50                 return 0;
51         if (pmd_bad(*pmd)) {
52                 pmd_ERROR(*pmd);
53                 pmd_clear(pmd);
54                 return 0;
55         }
56         pte = pte_offset_map(pmd, address);
57         if ((address & PMD_MASK) != (end & PMD_MASK))
58                 end = (address & PMD_MASK) + PMD_SIZE;
59         error = 0;
60         do {
61                 error |= filemap_sync_pte(pte, vma, address, flags);
62                 address += PAGE_SIZE;
63                 pte++;
64         } while (address && (address < end));
65
66         pte_unmap(pte - 1);
67
68         return error;
69 }
70
71 static inline int filemap_sync_pmd_range(pgd_t * pgd,
72         unsigned long address, unsigned long end, 
73         struct vm_area_struct *vma, unsigned int flags)
74 {
75         pmd_t * pmd;
76         int error;
77
78         if (pgd_none(*pgd))
79                 return 0;
80         if (pgd_bad(*pgd)) {
81                 pgd_ERROR(*pgd);
82                 pgd_clear(pgd);
83                 return 0;
84         }
85         pmd = pmd_offset(pgd, address);
86         if ((address & PGDIR_MASK) != (end & PGDIR_MASK))
87                 end = (address & PGDIR_MASK) + PGDIR_SIZE;
88         error = 0;
89         do {
90                 error |= filemap_sync_pte_range(pmd, address, end, vma, flags);
91                 address = (address + PMD_SIZE) & PMD_MASK;
92                 pmd++;
93         } while (address && (address < end));
94         return error;
95 }
96
97 int filemap_sync(struct vm_area_struct * vma, unsigned long address,
98         size_t size, unsigned int flags)
99 {
100         pgd_t * dir;
101         unsigned long end = address + size;
102         int error = 0;
103
104         /* Aquire the lock early; it may be possible to avoid dropping
105          * and reaquiring it repeatedly.
106          */
107         spin_lock(&vma->vm_mm->page_table_lock);
108
109         dir = pgd_offset(vma->vm_mm, address);
110         flush_cache_range(vma, address, end);
111         if (address >= end)
112                 BUG();
113         do {
114                 error |= filemap_sync_pmd_range(dir, address, end, vma, flags);
115                 address = (address + PGDIR_SIZE) & PGDIR_MASK;
116                 dir++;
117         } while (address && (address < end));
118         flush_tlb_range(vma, end - size, end);
119
120         spin_unlock(&vma->vm_mm->page_table_lock);
121
122         return error;
123 }
124
125 /*
126  * MS_SYNC syncs the entire file - including mappings.
127  *
128  * MS_ASYNC initiates writeout of just the dirty mapped data.
129  * This provides no guarantee of file integrity - things like indirect
130  * blocks may not have started writeout.  MS_ASYNC is primarily useful
131  * where the application knows that it has finished with the data and
132  * wishes to intelligently schedule its own I/O traffic.
133  */
134 static int msync_interval(struct vm_area_struct * vma,
135         unsigned long start, unsigned long end, int flags)
136 {
137         int ret = 0;
138         struct file * file = vma->vm_file;
139
140         if (file && (vma->vm_flags & VM_SHARED)) {
141                 ret = filemap_sync(vma, start, end-start, flags);
142
143                 if (!ret && (flags & (MS_SYNC|MS_ASYNC))) {
144                         struct inode * inode = file->f_dentry->d_inode;
145                         int err;
146
147                         down(&inode->i_sem);
148                         ret = filemap_fdatawait(inode->i_mapping);
149                         err = filemap_fdatawrite(inode->i_mapping);
150                         if (!ret)
151                                 ret = err;
152                         if (flags & MS_SYNC) {
153                                 if (file->f_op && file->f_op->fsync) {
154                                         err = file->f_op->fsync(file, file->f_dentry, 1);
155                                         if (err && !ret)
156                                                 ret = err;
157                                 }
158                                 err = filemap_fdatawait(inode->i_mapping);
159                                 if (!ret)
160                                         ret = err;
161                         }
162                         up(&inode->i_sem);
163                 }
164         }
165         return ret;
166 }
167
168 asmlinkage long sys_msync(unsigned long start, size_t len, int flags)
169 {
170         unsigned long end;
171         struct vm_area_struct * vma;
172         int unmapped_error, error = -EINVAL;
173
174         down_read(&current->mm->mmap_sem);
175         if (start & ~PAGE_MASK)
176                 goto out;
177         len = (len + ~PAGE_MASK) & PAGE_MASK;
178         end = start + len;
179         if (end < start)
180                 goto out;
181         if (flags & ~(MS_ASYNC | MS_INVALIDATE | MS_SYNC))
182                 goto out;
183         error = 0;
184         if (end == start)
185                 goto out;
186         /*
187          * If the interval [start,end) covers some unmapped address ranges,
188          * just ignore them, but return -EFAULT at the end.
189          */
190         vma = find_vma(current->mm, start);
191         unmapped_error = 0;
192         for (;;) {
193                 /* Still start < end. */
194                 error = -EFAULT;
195                 if (!vma)
196                         goto out;
197                 /* Here start < vma->vm_end. */
198                 if (start < vma->vm_start) {
199                         unmapped_error = -EFAULT;
200                         start = vma->vm_start;
201                 }
202                 /* Here vma->vm_start <= start < vma->vm_end. */
203                 if (end <= vma->vm_end) {
204                         if (start < end) {
205                                 error = msync_interval(vma, start, end, flags);
206                                 if (error)
207                                         goto out;
208                         }
209                         error = unmapped_error;
210                         goto out;
211                 }
212                 /* Here vma->vm_start <= start < vma->vm_end < end. */
213                 error = msync_interval(vma, start, vma->vm_end, flags);
214                 if (error)
215                         goto out;
216                 start = vma->vm_end;
217                 vma = vma->vm_next;
218         }
219 out:
220         up_read(&current->mm->mmap_sem);
221         return error;
222 }
223
224