Update to 3.4-final.

[linux-flexiantxendom0-3.2.10.git] / mm / filemap.c
diff --git a/mm/filemap.c b/mm/filemap.c

index c0018f2..79c4b2b 100644 (file)
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -101,9 +101,8 @@
   *    ->inode->i_lock          (zap_pte_range->set_page_dirty)
   *    ->private_lock           (zap_pte_range->__set_page_dirty_buffers)
   *
- *  (code doesn't rely on that order, so you could switch it around)
- *  ->tasklist_lock             (memory_failure, collect_procs_ao)
- *    ->i_mmap_mutex
+ * ->i_mmap_mutex
+ *   ->tasklist_lock            (memory_failure, collect_procs_ao)
   */
  
  /*
@@ -123,7 +122,7 @@ void __delete_from_page_cache(struct page *page)
         if (PageUptodate(page) && PageMappedToDisk(page))
                 cleancache_put_page(page);
         else
-               cleancache_flush_page(mapping, page);
+               cleancache_invalidate_page(mapping, page);
  
         radix_tree_delete(&mapping->page_tree, page->index);
         page->mapping = NULL;
@@ -393,24 +392,11 @@ EXPORT_SYMBOL(filemap_write_and_wait_range);
  int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask)
  {
         int error;
-       struct mem_cgroup *memcg = NULL;
  
         VM_BUG_ON(!PageLocked(old));
         VM_BUG_ON(!PageLocked(new));
         VM_BUG_ON(new->mapping);
  
-       /*
-        * This is not page migration, but prepare_migration and
-        * end_migration does enough work for charge replacement.
-        *
-        * In the longer term we probably want a specialized function
-        * for moving the charge from old to new in a more efficient
-        * manner.
-        */
-       error = mem_cgroup_prepare_migration(old, new, &memcg, gfp_mask);
-       if (error)
-               return error;
-
         error = radix_tree_preload(gfp_mask & ~__GFP_HIGHMEM);
         if (!error) {
                 struct address_space *mapping = old->mapping;
@@ -432,13 +418,12 @@ int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask)
                 if (PageSwapBacked(new))
                         __inc_zone_page_state(new, NR_SHMEM);
                 spin_unlock_irq(&mapping->tree_lock);
+               /* mem_cgroup codes must not be called under tree_lock */
+               mem_cgroup_replace_page_cache(old, new);
                 radix_tree_preload_end();
                 if (freepage)
                         freepage(old);
                 page_cache_release(old);
-               mem_cgroup_end_migration(memcg, old, new, true);
-       } else {
-               mem_cgroup_end_migration(memcg, old, new, false);
         }
  
         return error;
@@ -514,10 +499,13 @@ struct page *__page_cache_alloc(gfp_t gfp)
         struct page *page;
  
         if (cpuset_do_page_mem_spread()) {
-               get_mems_allowed();
-               n = cpuset_mem_spread_node();
-               page = alloc_pages_exact_node(n, gfp, 0);
-               put_mems_allowed();
+               unsigned int cpuset_mems_cookie;
+               do {
+                       cpuset_mems_cookie = get_mems_allowed();
+                       n = cpuset_mem_spread_node();
+                       page = alloc_pages_exact_node(n, gfp, 0);
+               } while (!put_mems_allowed(cpuset_mems_cookie) && !page);
+
                 return page;
         }
         return alloc_pages(gfp, 0);
@@ -825,20 +813,19 @@ EXPORT_SYMBOL(find_or_create_page);
  unsigned find_get_pages(struct address_space *mapping, pgoff_t start,
                             unsigned int nr_pages, struct page **pages)
  {
-       unsigned int i;
-       unsigned int ret;
-       unsigned int nr_found, nr_skip;
+       struct radix_tree_iter iter;
+       void **slot;
+       unsigned ret = 0;
+
+       if (unlikely(!nr_pages))
+               return 0;
  
         rcu_read_lock();
  restart:
-       nr_found = radix_tree_gang_lookup_slot(&mapping->page_tree,
-                               (void ***)pages, NULL, start, nr_pages);
-       ret = 0;
-       nr_skip = 0;
-       for (i = 0; i < nr_found; i++) {
+       radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, start) {
                 struct page *page;
  repeat:
-               page = radix_tree_deref_slot((void **)pages[i]);
+               page = radix_tree_deref_slot(slot);
                 if (unlikely(!page))
                         continue;
  
@@ -849,7 +836,7 @@ repeat:
                                  * when entry at index 0 moves out of or back
                                  * to root: none yet gotten, safe to restart.
                                  */
-                               WARN_ON(start | i);
+                               WARN_ON(iter.index);
                                 goto restart;
                         }
                         /*
@@ -857,7 +844,6 @@ repeat:
                          * here as an exceptional entry: so skip over it -
                          * we only reach this from invalidate_mapping_pages().
                          */
-                       nr_skip++;
                         continue;
                 }
  
@@ -865,21 +851,16 @@ repeat:
                         goto repeat;
  
                 /* Has the page moved? */
-               if (unlikely(page != *((void **)pages[i]))) {
+               if (unlikely(page != *slot)) {
                         page_cache_release(page);
                         goto repeat;
                 }
  
                 pages[ret] = page;
-               ret++;
+               if (++ret == nr_pages)
+                       break;
         }
  
-       /*
-        * If all entries were removed before we could secure them,
-        * try again, because callers stop trying once 0 is returned.
-        */
-       if (unlikely(!ret && nr_found > nr_skip))
-               goto restart;
         rcu_read_unlock();
         return ret;
  }
@@ -899,21 +880,22 @@ repeat:
  unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t index,
                                unsigned int nr_pages, struct page **pages)
  {
-       unsigned int i;
-       unsigned int ret;
-       unsigned int nr_found;
+       struct radix_tree_iter iter;
+       void **slot;
+       unsigned int ret = 0;
+
+       if (unlikely(!nr_pages))
+               return 0;
  
         rcu_read_lock();
  restart:
-       nr_found = radix_tree_gang_lookup_slot(&mapping->page_tree,
-                               (void ***)pages, NULL, index, nr_pages);
-       ret = 0;
-       for (i = 0; i < nr_found; i++) {
+       radix_tree_for_each_contig(slot, &mapping->page_tree, &iter, index) {
                 struct page *page;
  repeat:
-               page = radix_tree_deref_slot((void **)pages[i]);
+               page = radix_tree_deref_slot(slot);
+               /* The hole, there no reason to continue */
                 if (unlikely(!page))
-                       continue;
+                       break;
  
                 if (radix_tree_exception(page)) {
                         if (radix_tree_deref_retry(page)) {
@@ -936,7 +918,7 @@ repeat:
                         goto repeat;
  
                 /* Has the page moved? */
-               if (unlikely(page != *((void **)pages[i]))) {
+               if (unlikely(page != *slot)) {
                         page_cache_release(page);
                         goto repeat;
                 }
@@ -946,14 +928,14 @@ repeat:
                  * otherwise we can get both false positives and false
                  * negatives, which is just confusing to the caller.
                  */
-               if (page->mapping == NULL || page->index != index) {
+               if (page->mapping == NULL || page->index != iter.index) {
                         page_cache_release(page);
                         break;
                 }
  
                 pages[ret] = page;
-               ret++;
-               index++;
+               if (++ret == nr_pages)
+                       break;
         }
         rcu_read_unlock();
         return ret;
@@ -974,19 +956,20 @@ EXPORT_SYMBOL(find_get_pages_contig);
  unsigned find_get_pages_tag(struct address_space *mapping, pgoff_t *index,
                         int tag, unsigned int nr_pages, struct page **pages)
  {
-       unsigned int i;
-       unsigned int ret;
-       unsigned int nr_found;
+       struct radix_tree_iter iter;
+       void **slot;
+       unsigned ret = 0;
+
+       if (unlikely(!nr_pages))
+               return 0;
  
         rcu_read_lock();
  restart:
-       nr_found = radix_tree_gang_lookup_tag_slot(&mapping->page_tree,
-                               (void ***)pages, *index, nr_pages, tag);
-       ret = 0;
-       for (i = 0; i < nr_found; i++) {
+       radix_tree_for_each_tagged(slot, &mapping->page_tree,
+                                  &iter, *index, tag) {
                 struct page *page;
  repeat:
-               page = radix_tree_deref_slot((void **)pages[i]);
+               page = radix_tree_deref_slot(slot);
                 if (unlikely(!page))
                         continue;
  
@@ -1010,21 +993,16 @@ repeat:
                         goto repeat;
  
                 /* Has the page moved? */
-               if (unlikely(page != *((void **)pages[i]))) {
+               if (unlikely(page != *slot)) {
                         page_cache_release(page);
                         goto repeat;
                 }
  
                 pages[ret] = page;
-               ret++;
+               if (++ret == nr_pages)
+                       break;
         }
  
-       /*
-        * If all entries were removed before we could secure them,
-        * try again, because callers stop trying once 0 is returned.
-        */
-       if (unlikely(!ret && nr_found))
-               goto restart;
         rcu_read_unlock();
  
         if (ret)
@@ -1332,10 +1310,10 @@ int file_read_actor(read_descriptor_t *desc, struct page *page,
          * taking the kmap.
          */
         if (!fault_in_pages_writeable(desc->arg.buf, size)) {
-               kaddr = kmap_atomic(page, KM_USER0);
+               kaddr = kmap_atomic(page);
                 left = __copy_to_user_inatomic(desc->arg.buf,
                                                 kaddr + offset, size);
-               kunmap_atomic(kaddr, KM_USER0);
+               kunmap_atomic(kaddr);
                 if (left == 0)
                         goto success;
         }
@@ -1414,15 +1392,12 @@ generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
         unsigned long seg = 0;
         size_t count;
         loff_t *ppos = &iocb->ki_pos;
-       struct blk_plug plug;
  
         count = 0;
         retval = generic_segment_checks(iov, &nr_segs, &count, VERIFY_WRITE);
         if (retval)
                 return retval;
  
-       blk_start_plug(&plug);
-
         /* coalesce the iovecs and go direct-to-BIO for O_DIRECT */
         if (filp->f_flags & O_DIRECT) {
                 loff_t size;
@@ -1438,8 +1413,12 @@ generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
                         retval = filemap_write_and_wait_range(mapping, pos,
                                         pos + iov_length(iov, nr_segs) - 1);
                         if (!retval) {
+                               struct blk_plug plug;
+
+                               blk_start_plug(&plug);
                                 retval = mapping->a_ops->direct_IO(READ, iocb,
                                                         iov, pos, nr_segs);
+                               blk_finish_plug(&plug);
                         }
                         if (retval > 0) {
                                 *ppos = pos + retval;
@@ -1495,7 +1474,6 @@ generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
                         break;
         }
  out:
-       blk_finish_plug(&plug);
         return retval;
  }
  EXPORT_SYMBOL(generic_file_aio_read);
@@ -1828,7 +1806,7 @@ repeat:
                 page = __page_cache_alloc(gfp | __GFP_COLD);
                 if (!page)
                         return ERR_PTR(-ENOMEM);
-               err = add_to_page_cache_lru(page, mapping, index, GFP_KERNEL);
+               err = add_to_page_cache_lru(page, mapping, index, gfp);
                 if (unlikely(err)) {
                         page_cache_release(page);
                         if (err == -EEXIST)
@@ -1925,10 +1903,7 @@ static struct page *wait_on_page_read(struct page *page)
   * @gfp:       the page allocator flags to use if allocating
   *
   * This is the same as "read_mapping_page(mapping, index, NULL)", but with
- * any new page allocations done using the specified allocation flags. Note
- * that the Radix tree operations will still use GFP_KERNEL, so you can't
- * expect to do this atomically or anything like that - but you can pass in
- * other page requirements.
+ * any new page allocations done using the specified allocation flags.
   *
   * If the page does not get brought uptodate, return -EIO.
   */
@@ -1971,7 +1946,7 @@ EXPORT_SYMBOL(read_cache_page);
   */
  int should_remove_suid(struct dentry *dentry)
  {
-       mode_t mode = dentry->d_inode->i_mode;
+       umode_t mode = dentry->d_inode->i_mode;
         int kill = 0;
  
         /* suid always must be killed */
@@ -2062,7 +2037,7 @@ size_t iov_iter_copy_from_user_atomic(struct page *page,
         size_t copied;
  
         BUG_ON(!in_atomic());
-       kaddr = kmap_atomic(page, KM_USER0);
+       kaddr = kmap_atomic(page);
         if (likely(i->nr_segs == 1)) {
                 int left;
                 char __user *buf = i->iov->iov_base + i->iov_offset;
@@ -2072,7 +2047,7 @@ size_t iov_iter_copy_from_user_atomic(struct page *page,
                 copied = __iovec_copy_from_user_inatomic(kaddr + offset,
                                                 i->iov, i->iov_offset, bytes);
         }
-       kunmap_atomic(kaddr, KM_USER0);
+       kunmap_atomic(kaddr);
  
         return copied;
  }
@@ -2354,8 +2329,13 @@ struct page *grab_cache_page_write_begin(struct address_space *mapping,
                                         pgoff_t index, unsigned flags)
  {
         int status;
+       gfp_t gfp_mask;
         struct page *page;
         gfp_t gfp_notmask = 0;
+
+       gfp_mask = mapping_gfp_mask(mapping);
+       if (mapping_cap_account_dirty(mapping))
+               gfp_mask |= __GFP_WRITE;
         if (flags & AOP_FLAG_NOFS)
                 gfp_notmask = __GFP_FS;
  repeat:
@@ -2363,7 +2343,7 @@ repeat:
         if (page)
                 goto found;
  
-       page = __page_cache_alloc(mapping_gfp_mask(mapping) & ~gfp_notmask);
+       page = __page_cache_alloc(gfp_mask & ~gfp_notmask);
         if (!page)
                 return NULL;
         status = add_to_page_cache_lru(page, mapping, index,
@@ -2407,7 +2387,6 @@ static ssize_t generic_perform_write(struct file *file,
                                                 iov_iter_count(i));
  
  again:
-
                 /*
                  * Bring in the user page that we will copy from _first_.
                  * Otherwise there's a nasty deadlock on copying from the
@@ -2463,7 +2442,10 @@ again:
                 written += copied;
  
                 balance_dirty_pages_ratelimited(mapping);
-
+               if (fatal_signal_pending(current)) {
+                       status = -EINTR;
+                       break;
+               }
         } while (iov_iter_count(i));
  
         return written ? written : status;