- Update to 3.4-rc7.

[linux-flexiantxendom0-3.2.10.git] / mm / page_alloc.c
diff --git a/mm/page_alloc.c b/mm/page_alloc.c

index 70bbd0f..e5a3966 100644 (file)
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -692,6 +692,13 @@ static bool free_pages_prepare(struct page *page, unsigned int order)
         int i;
         int bad = 0;
  
+#ifdef CONFIG_XEN
+       if (PageForeign(page)) {
+               PageForeignDestructor(page, order);
+               return false;
+       }
+#endif
+
         trace_mm_page_free(page, order);
         kmemcheck_free_shadow(page, order);
  
@@ -718,6 +725,9 @@ static void __free_pages_ok(struct page *page, unsigned int order)
         unsigned long flags;
         int wasMlocked = __TestClearPageMlocked(page);
  
+#ifdef CONFIG_XEN
+       WARN_ON(PageForeign(page) && wasMlocked);
+#endif
         if (!free_pages_prepare(page, order))
                 return;
  
@@ -1161,11 +1171,47 @@ void drain_local_pages(void *arg)
  }
  
  /*
- * Spill all the per-cpu pages from all CPUs back into the buddy allocator
+ * Spill all the per-cpu pages from all CPUs back into the buddy allocator.
+ *
+ * Note that this code is protected against sending an IPI to an offline
+ * CPU but does not guarantee sending an IPI to newly hotplugged CPUs:
+ * on_each_cpu_mask() blocks hotplug and won't talk to offlined CPUs but
+ * nothing keeps CPUs from showing up after we populated the cpumask and
+ * before the call to on_each_cpu_mask().
   */
  void drain_all_pages(void)
  {
-       on_each_cpu(drain_local_pages, NULL, 1);
+       int cpu;
+       struct per_cpu_pageset *pcp;
+       struct zone *zone;
+
+       /*
+        * Allocate in the BSS so we wont require allocation in
+        * direct reclaim path for CONFIG_CPUMASK_OFFSTACK=y
+        */
+       static cpumask_t cpus_with_pcps;
+
+       /*
+        * We don't care about racing with CPU hotplug event
+        * as offline notification will cause the notified
+        * cpu to drain that CPU pcps and on_each_cpu_mask
+        * disables preemption as part of its processing
+        */
+       for_each_online_cpu(cpu) {
+               bool has_pcps = false;
+               for_each_populated_zone(zone) {
+                       pcp = per_cpu_ptr(zone->pageset, cpu);
+                       if (pcp->pcp.count) {
+                               has_pcps = true;
+                               break;
+                       }
+               }
+               if (has_pcps)
+                       cpumask_set_cpu(cpu, &cpus_with_pcps);
+               else
+                       cpumask_clear_cpu(cpu, &cpus_with_pcps);
+       }
+       on_each_cpu_mask(&cpus_with_pcps, drain_local_pages, NULL, 1);
  }
  
  #ifdef CONFIG_HIBERNATION
@@ -1216,6 +1262,9 @@ void free_hot_cold_page(struct page *page, int cold)
         int migratetype;
         int wasMlocked = __TestClearPageMlocked(page);
  
+#ifdef CONFIG_XEN
+       WARN_ON(PageForeign(page) && wasMlocked);
+#endif
         if (!free_pages_prepare(page, 0))
                 return;
  
@@ -1874,7 +1923,13 @@ void warn_alloc_failed(gfp_t gfp_mask, int order, const char *fmt, ...)
                 va_end(args);
         }
  
-       pr_warn("%s: page allocation failure: order:%d, mode:0x%x\n",
+       if (!(gfp_mask & __GFP_WAIT)) {
+               pr_info("The following is only an harmless informational message.\n");
+               pr_info("Unless you get a _continuous_flood_ of these messages it means\n");
+               pr_info("everything is working fine. Allocations from irqs cannot be\n");
+               pr_info("perfectly reliable and the kernel is designed to handle that.\n");
+       }
+       pr_info("%s: page allocation failure. order:%d, mode:0x%x\n",
                 current->comm, order, gfp_mask);
  
         dump_stack();
@@ -2308,6 +2363,10 @@ rebalance:
                 if ((gfp_mask & __GFP_FS) && !(gfp_mask & __GFP_NORETRY)) {
                         if (oom_killer_disabled)
                                 goto nopage;
+                       /* Coredumps can quickly deplete all memory reserves */
+                       if ((current->flags & PF_DUMPCORE) &&
+                           !(gfp_mask & __GFP_NOFAIL))
+                               goto nopage;
                         page = __alloc_pages_may_oom(gfp_mask, order,
                                         zonelist, high_zoneidx,
                                         nodemask, preferred_zone,
@@ -4524,7 +4583,7 @@ static unsigned long __init early_calculate_totalpages(void)
   * memory. When they don't, some nodes will have more kernelcore than
   * others
   */
-static void __init find_zone_movable_pfns_for_nodes(unsigned long *movable_pfn)
+static void __init find_zone_movable_pfns_for_nodes(void)
  {
         int i, nid;
         unsigned long usable_startpfn;
@@ -4716,7 +4775,7 @@ void __init free_area_init_nodes(unsigned long *max_zone_pfn)
  
         /* Find the PFNs that ZONE_MOVABLE begins at in each node */
         memset(zone_movable_pfn, 0, sizeof(zone_movable_pfn));
-       find_zone_movable_pfns_for_nodes(zone_movable_pfn);
+       find_zone_movable_pfns_for_nodes();
  
         /* Print out the zone ranges */
         printk("Zone PFN ranges:\n");
@@ -4994,6 +5053,22 @@ void setup_per_zone_wmarks(void)
                 spin_unlock_irqrestore(&zone->lock, flags);
         }
  
+#ifdef CONFIG_XEN
+       for_each_populated_zone(zone) {
+               unsigned int cpu;
+
+               for_each_online_cpu(cpu) {
+                       unsigned long high;
+
+                       high = percpu_pagelist_fraction
+                              ? zone->present_pages / percpu_pagelist_fraction
+                              : 5 * zone_batchsize(zone);
+                       setup_pagelist_highmark(
+                               per_cpu_ptr(zone->pageset, cpu), high);
+               }
+       }
+#endif
+
         /* update totalreserve_pages */
         calculate_totalreserve_pages();
  }
@@ -5163,7 +5238,7 @@ int percpu_pagelist_fraction_sysctl_handler(ctl_table *table, int write,
         int ret;
  
         ret = proc_dointvec_minmax(table, write, buffer, length, ppos);
-       if (!write || (ret == -EINVAL))
+       if (!write || (ret < 0))
                 return ret;
         for_each_populated_zone(zone) {
                 for_each_possible_cpu(cpu) {