fs: dcache remove dcache_lock
[linux-flexiantxendom0-natty.git] / mm / filemap.c
index 829ac9c..ca38939 100644 (file)
  *    ->inode_lock             (zap_pte_range->set_page_dirty)
  *    ->private_lock           (zap_pte_range->__set_page_dirty_buffers)
  *
- *  ->task->proc_lock
- *    ->dcache_lock            (proc_pid_lookup)
- *
  *  (code doesn't rely on that order, so you could switch it around)
  *  ->tasklist_lock             (memory_failure, collect_procs_ao)
  *    ->i_mmap_lock
@@ -143,14 +140,20 @@ void __remove_from_page_cache(struct page *page)
 void remove_from_page_cache(struct page *page)
 {
        struct address_space *mapping = page->mapping;
+       void (*freepage)(struct page *);
 
        BUG_ON(!PageLocked(page));
 
+       freepage = mapping->a_ops->freepage;
        spin_lock_irq(&mapping->tree_lock);
        __remove_from_page_cache(page);
        spin_unlock_irq(&mapping->tree_lock);
        mem_cgroup_uncharge_cache_page(page);
+
+       if (freepage)
+               freepage(page);
 }
+EXPORT_SYMBOL(remove_from_page_cache);
 
 static int sync_page(void *word)
 {
@@ -441,7 +444,7 @@ int add_to_page_cache_lru(struct page *page, struct address_space *mapping,
        /*
         * Splice_read and readahead add shmem/tmpfs pages into the page cache
         * before shmem_readpage has a chance to mark them as SwapBacked: they
-        * need to go on the active_anon lru below, and mem_cgroup_cache_charge
+        * need to go on the anon lru below, and mem_cgroup_cache_charge
         * (called in add_to_page_cache) needs to know where they're going too.
         */
        if (mapping_cap_swap_backed(mapping))
@@ -452,7 +455,7 @@ int add_to_page_cache_lru(struct page *page, struct address_space *mapping,
                if (page_is_file_cache(page))
                        lru_cache_add_file(page);
                else
-                       lru_cache_add_active_anon(page);
+                       lru_cache_add_anon(page);
        }
        return ret;
 }
@@ -461,9 +464,15 @@ EXPORT_SYMBOL_GPL(add_to_page_cache_lru);
 #ifdef CONFIG_NUMA
 struct page *__page_cache_alloc(gfp_t gfp)
 {
+       int n;
+       struct page *page;
+
        if (cpuset_do_page_mem_spread()) {
-               int n = cpuset_mem_spread_node();
-               return alloc_pages_exact_node(n, gfp, 0);
+               get_mems_allowed();
+               n = cpuset_mem_spread_node();
+               page = alloc_pages_exact_node(n, gfp, 0);
+               put_mems_allowed();
+               return page;
        }
        return alloc_pages(gfp, 0);
 }
@@ -605,6 +614,19 @@ void __lock_page_nosync(struct page *page)
                                                        TASK_UNINTERRUPTIBLE);
 }
 
+int __lock_page_or_retry(struct page *page, struct mm_struct *mm,
+                        unsigned int flags)
+{
+       if (!(flags & FAULT_FLAG_ALLOW_RETRY)) {
+               __lock_page(page);
+               return 1;
+       } else {
+               up_read(&mm->mmap_sem);
+               wait_on_page_locked(page);
+               return 0;
+       }
+}
+
 /**
  * find_get_page - find and get a page reference
  * @mapping: the address_space to search
@@ -624,7 +646,9 @@ repeat:
        pagep = radix_tree_lookup_slot(&mapping->page_tree, offset);
        if (pagep) {
                page = radix_tree_deref_slot(pagep);
-               if (unlikely(!page || page == RADIX_TREE_RETRY))
+               if (unlikely(!page))
+                       goto out;
+               if (radix_tree_deref_retry(page))
                        goto repeat;
 
                if (!page_cache_get_speculative(page))
@@ -640,6 +664,7 @@ repeat:
                        goto repeat;
                }
        }
+out:
        rcu_read_unlock();
 
        return page;
@@ -757,12 +782,11 @@ repeat:
                page = radix_tree_deref_slot((void **)pages[i]);
                if (unlikely(!page))
                        continue;
-               /*
-                * this can only trigger if nr_found == 1, making livelock
-                * a non issue.
-                */
-               if (unlikely(page == RADIX_TREE_RETRY))
+               if (radix_tree_deref_retry(page)) {
+                       if (ret)
+                               start = pages[ret-1]->index;
                        goto restart;
+               }
 
                if (!page_cache_get_speculative(page))
                        goto repeat;
@@ -810,11 +834,7 @@ repeat:
                page = radix_tree_deref_slot((void **)pages[i]);
                if (unlikely(!page))
                        continue;
-               /*
-                * this can only trigger if nr_found == 1, making livelock
-                * a non issue.
-                */
-               if (unlikely(page == RADIX_TREE_RETRY))
+               if (radix_tree_deref_retry(page))
                        goto restart;
 
                if (page->mapping == NULL || page->index != index)
@@ -867,11 +887,7 @@ repeat:
                page = radix_tree_deref_slot((void **)pages[i]);
                if (unlikely(!page))
                        continue;
-               /*
-                * this can only trigger if nr_found == 1, making livelock
-                * a non issue.
-                */
-               if (unlikely(page == RADIX_TREE_RETRY))
+               if (radix_tree_deref_retry(page))
                        goto restart;
 
                if (!page_cache_get_speculative(page))
@@ -1009,6 +1025,9 @@ find_page:
                                goto page_not_up_to_date;
                        if (!trylock_page(page))
                                goto page_not_up_to_date;
+                       /* Did it get truncated before we got the lock? */
+                       if (!page->mapping)
+                               goto page_not_up_to_date_locked;
                        if (!mapping->a_ops->is_partially_uptodate(page,
                                                                desc, offset))
                                goto page_not_up_to_date_locked;
@@ -1099,6 +1118,12 @@ page_not_up_to_date_locked:
                }
 
 readpage:
+               /*
+                * A previous I/O error may have been due to temporary
+                * failures, eg. multipath errors.
+                * PG_error will be set again if readpage fails.
+                */
+               ClearPageError(page);
                /* Start the actual read. The read will unlock the page. */
                error = mapping->a_ops->readpage(filp, page);
 
@@ -1526,25 +1551,30 @@ int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
                 * waiting for the lock.
                 */
                do_async_mmap_readahead(vma, ra, file, page, offset);
-               lock_page(page);
-
-               /* Did it get truncated? */
-               if (unlikely(page->mapping != mapping)) {
-                       unlock_page(page);
-                       put_page(page);
-                       goto no_cached_page;
-               }
        } else {
                /* No page in the page cache at all */
                do_sync_mmap_readahead(vma, ra, file, offset);
                count_vm_event(PGMAJFAULT);
                ret = VM_FAULT_MAJOR;
 retry_find:
-               page = find_lock_page(mapping, offset);
+               page = find_get_page(mapping, offset);
                if (!page)
                        goto no_cached_page;
        }
 
+       if (!lock_page_or_retry(page, vma->vm_mm, vmf->flags)) {
+               page_cache_release(page);
+               return ret | VM_FAULT_RETRY;
+       }
+
+       /* Did it get truncated? */
+       if (unlikely(page->mapping != mapping)) {
+               unlock_page(page);
+               put_page(page);
+               goto retry_find;
+       }
+       VM_BUG_ON(page->index != offset);
+
        /*
         * We have a locked page in the page cache, now we need to check
         * that it's up-to-date. If not, it is going to be due to an error.
@@ -2164,12 +2194,12 @@ generic_file_direct_write(struct kiocb *iocb, const struct iovec *iov,
        }
 
        if (written > 0) {
-               loff_t end = pos + written;
-               if (end > i_size_read(inode) && !S_ISBLK(inode->i_mode)) {
-                       i_size_write(inode,  end);
+               pos += written;
+               if (pos > i_size_read(inode) && !S_ISBLK(inode->i_mode)) {
+                       i_size_write(inode, pos);
                        mark_inode_dirty(inode);
                }
-               *ppos = end;
+               *ppos = pos;
        }
 out:
        return written;
@@ -2225,14 +2255,12 @@ static ssize_t generic_perform_write(struct file *file,
 
        do {
                struct page *page;
-               pgoff_t index;          /* Pagecache index for current page */
                unsigned long offset;   /* Offset into pagecache page */
                unsigned long bytes;    /* Bytes to write to page */
                size_t copied;          /* Bytes copied from user */
                void *fsdata;
 
                offset = (pos & (PAGE_CACHE_SIZE - 1));
-               index = pos >> PAGE_CACHE_SHIFT;
                bytes = min_t(unsigned long, PAGE_CACHE_SIZE - offset,
                                                iov_iter_count(i));