Update to 3.4-final.

[linux-flexiantxendom0-3.2.10.git] / mm / slab.c
diff --git a/mm/slab.c b/mm/slab.c

index 8270ba3..e901a36 100644 (file)
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -102,7 +102,6 @@
  #include       <linux/cpu.h>
  #include       <linux/sysctl.h>
  #include       <linux/module.h>
-#include       <linux/kmemtrace.h>
  #include       <linux/rcupdate.h>
  #include       <linux/string.h>
  #include       <linux/uaccess.h>
@@ -116,11 +115,14 @@
  #include       <linux/debugobjects.h>
  #include       <linux/kmemcheck.h>
  #include       <linux/memory.h>
+#include       <linux/prefetch.h>
  
  #include       <asm/cacheflush.h>
  #include       <asm/tlbflush.h>
  #include       <asm/page.h>
  
+#include <trace/events/kmem.h>
+
  /*
   * DEBUG       - 1 for kmem_cache_create() to honour; SLAB_RED_ZONE & SLAB_POISON.
   *               0 for faster, smaller code (especially in the critical paths).
@@ -192,22 +194,6 @@ typedef unsigned int kmem_bufctl_t;
  #define        SLAB_LIMIT      (((kmem_bufctl_t)(~0U))-3)
  
  /*
- * struct slab
- *
- * Manages the objs in a slab. Placed either at the beginning of mem allocated
- * for a slab, or allocated from an general cache.
- * Slabs are chained into three list: fully used, partial, fully free slabs.
- */
-struct slab {
-       struct list_head list;
-       unsigned long colouroff;
-       void *s_mem;            /* including colour offset */
-       unsigned int inuse;     /* num of objs active in slab */
-       kmem_bufctl_t free;
-       unsigned short nodeid;
-};
-
-/*
   * struct slab_rcu
   *
   * slab_destroy on a SLAB_DESTROY_BY_RCU cache uses this structure to
@@ -220,8 +206,6 @@ struct slab {
   *
   * rcu_read_lock before reading the address, then rcu_read_unlock after
   * taking the spinlock within the structure expected at that address.
- *
- * We assume struct slab_rcu can overlay struct slab when destroying.
   */
  struct slab_rcu {
         struct rcu_head head;
@@ -230,6 +214,27 @@ struct slab_rcu {
  };
  
  /*
+ * struct slab
+ *
+ * Manages the objs in a slab. Placed either at the beginning of mem allocated
+ * for a slab, or allocated from an general cache.
+ * Slabs are chained into three list: fully used, partial, fully free slabs.
+ */
+struct slab {
+       union {
+               struct {
+                       struct list_head list;
+                       unsigned long colouroff;
+                       void *s_mem;            /* including colour offset */
+                       unsigned int inuse;     /* num of objs active in slab */
+                       kmem_bufctl_t free;
+                       unsigned short nodeid;
+               };
+               struct slab_rcu __slab_cover_slab_rcu;
+       };
+};
+
+/*
   * struct array_cache
   *
   * Purpose:
@@ -285,7 +290,7 @@ struct kmem_list3 {
   * Need this for bootstrapping a per node allocator.
   */
  #define NUM_INIT_LISTS (3 * MAX_NUMNODES)
-struct kmem_list3 __initdata initkmem_list3[NUM_INIT_LISTS];
+static struct kmem_list3 __initdata initkmem_list3[NUM_INIT_LISTS];
  #define        CACHE_CACHE 0
  #define        SIZE_AC MAX_NUMNODES
  #define        SIZE_L3 (2 * MAX_NUMNODES)
@@ -395,7 +400,7 @@ static void kmem_list3_init(struct kmem_list3 *parent)
  #define        STATS_DEC_ACTIVE(x)     do { } while (0)
  #define        STATS_INC_ALLOCED(x)    do { } while (0)
  #define        STATS_INC_GROWN(x)      do { } while (0)
-#define        STATS_ADD_REAPED(x,y)   do { } while (0)
+#define        STATS_ADD_REAPED(x,y)   do { (void)(y); } while (0)
  #define        STATS_SET_HIGH(x)       do { } while (0)
  #define        STATS_INC_ERR(x)        do { } while (0)
  #define        STATS_INC_NODEALLOCS(x) do { } while (0)
@@ -476,11 +481,13 @@ EXPORT_SYMBOL(slab_buffer_size);
  #endif
  
  /*
- * Do not go above this order unless 0 objects fit into the slab.
+ * Do not go above this order unless 0 objects fit into the slab or
+ * overridden on the command line.
   */
-#define        BREAK_GFP_ORDER_HI      1
-#define        BREAK_GFP_ORDER_LO      0
-static int slab_break_gfp_order = BREAK_GFP_ORDER_LO;
+#define        SLAB_MAX_ORDER_HI       1
+#define        SLAB_MAX_ORDER_LO       0
+static int slab_max_order = SLAB_MAX_ORDER_LO;
+static bool slab_max_order_set __initdata;
  
  /*
   * Functions for storing/retrieving the cachep and or slab from the page
@@ -571,7 +578,9 @@ static struct arraycache_init initarray_generic =
      { {0, BOOT_CPUCACHE_ENTRIES, 1, 0} };
  
  /* internal cache of cache description objs */
+static struct kmem_list3 *cache_cache_nodelists[MAX_NUMNODES];
  static struct kmem_cache cache_cache = {
+       .nodelists = cache_cache_nodelists,
         .batchcount = 1,
         .limit = BOOT_CPUCACHE_ENTRIES,
         .shared = 1,
@@ -590,6 +599,7 @@ static enum {
         PARTIAL_AC,
         PARTIAL_L3,
         EARLY,
+       LATE,
         FULL
  } g_cpucache_up;
  
@@ -617,37 +627,67 @@ int slab_is_available(void)
  static struct lock_class_key on_slab_l3_key;
  static struct lock_class_key on_slab_alc_key;
  
+static struct lock_class_key debugobj_l3_key;
+static struct lock_class_key debugobj_alc_key;
+
+static void slab_set_lock_classes(struct kmem_cache *cachep,
+               struct lock_class_key *l3_key, struct lock_class_key *alc_key,
+               int q)
+{
+       struct array_cache **alc;
+       struct kmem_list3 *l3;
+       int r;
+
+       l3 = cachep->nodelists[q];
+       if (!l3)
+               return;
+
+       lockdep_set_class(&l3->list_lock, l3_key);
+       alc = l3->alien;
+       /*
+        * FIXME: This check for BAD_ALIEN_MAGIC
+        * should go away when common slab code is taught to
+        * work even without alien caches.
+        * Currently, non NUMA code returns BAD_ALIEN_MAGIC
+        * for alloc_alien_cache,
+        */
+       if (!alc || (unsigned long)alc == BAD_ALIEN_MAGIC)
+               return;
+       for_each_node(r) {
+               if (alc[r])
+                       lockdep_set_class(&alc[r]->lock, alc_key);
+       }
+}
+
+static void slab_set_debugobj_lock_classes_node(struct kmem_cache *cachep, int node)
+{
+       slab_set_lock_classes(cachep, &debugobj_l3_key, &debugobj_alc_key, node);
+}
+
+static void slab_set_debugobj_lock_classes(struct kmem_cache *cachep)
+{
+       int node;
+
+       for_each_online_node(node)
+               slab_set_debugobj_lock_classes_node(cachep, node);
+}
+
  static void init_node_lock_keys(int q)
  {
         struct cache_sizes *s = malloc_sizes;
  
-       if (g_cpucache_up != FULL)
+       if (g_cpucache_up < LATE)
                 return;
  
         for (s = malloc_sizes; s->cs_size != ULONG_MAX; s++) {
-               struct array_cache **alc;
                 struct kmem_list3 *l3;
-               int r;
  
                 l3 = s->cs_cachep->nodelists[q];
                 if (!l3 || OFF_SLAB(s->cs_cachep))
                         continue;
-               lockdep_set_class(&l3->list_lock, &on_slab_l3_key);
-               alc = l3->alien;
-               /*
-                * FIXME: This check for BAD_ALIEN_MAGIC
-                * should go away when common slab code is taught to
-                * work even without alien caches.
-                * Currently, non NUMA code returns BAD_ALIEN_MAGIC
-                * for alloc_alien_cache,
-                */
-               if (!alc || (unsigned long)alc == BAD_ALIEN_MAGIC)
-                       continue;
-               for_each_node(r) {
-                       if (alc[r])
-                               lockdep_set_class(&alc[r]->lock,
-                                       &on_slab_alc_key);
-               }
+
+               slab_set_lock_classes(s->cs_cachep, &on_slab_l3_key,
+                               &on_slab_alc_key, q);
         }
  }
  
@@ -666,6 +706,14 @@ static void init_node_lock_keys(int q)
  static inline void init_lock_keys(void)
  {
  }
+
+static void slab_set_debugobj_lock_classes_node(struct kmem_cache *cachep, int node)
+{
+}
+
+static void slab_set_debugobj_lock_classes(struct kmem_cache *cachep)
+{
+}
  #endif
  
  /*
@@ -808,6 +856,17 @@ static int __init noaliencache_setup(char *s)
  }
  __setup("noaliencache", noaliencache_setup);
  
+static int __init slab_max_order_setup(char *str)
+{
+       get_option(&str, &slab_max_order);
+       slab_max_order = slab_max_order < 0 ? 0 :
+                               min(slab_max_order, MAX_ORDER - 1);
+       slab_max_order_set = true;
+
+       return 1;
+}
+__setup("slab_max_order=", slab_max_order_setup);
+
  #ifdef CONFIG_NUMA
  /*
   * Special reaping functions for NUMA systems called from cache_reap().
@@ -821,7 +880,7 @@ static void init_reap_node(int cpu)
  {
         int node;
  
-       node = next_node(cpu_to_node(cpu), node_online_map);
+       node = next_node(cpu_to_mem(cpu), node_online_map);
         if (node == MAX_NUMNODES)
                 node = first_node(node_online_map);
  
@@ -830,12 +889,12 @@ static void init_reap_node(int cpu)
  
  static void next_reap_node(void)
  {
-       int node = __get_cpu_var(slab_reap_node);
+       int node = __this_cpu_read(slab_reap_node);
  
         node = next_node(node, node_online_map);
         if (unlikely(node >= MAX_NUMNODES))
                 node = first_node(node_online_map);
-       __get_cpu_var(slab_reap_node) = node;
+       __this_cpu_write(slab_reap_node, node);
  }
  
  #else
@@ -861,7 +920,7 @@ static void __cpuinit start_cpu_timer(int cpu)
          */
         if (keventd_up() && reap_work->work.func == NULL) {
                 init_reap_node(cpu);
-               INIT_DELAYED_WORK(reap_work, cache_reap);
+               INIT_DELAYED_WORK_DEFERRABLE(reap_work, cache_reap);
                 schedule_delayed_work_on(cpu, reap_work,
                                         __round_jiffies_relative(HZ, cpu));
         }
@@ -876,7 +935,7 @@ static struct array_cache *alloc_arraycache(int node, int entries,
         nc = kmalloc_node(memsize, gfp, node);
         /*
          * The array_cache structures contain pointers to free object.
-        * However, when such objects are allocated or transfered to another
+        * However, when such objects are allocated or transferred to another
          * cache the pointers are not cleared and they could be counted as
          * valid references during a kmemleak scan. Therefore, kmemleak must
          * not scan such objects.
@@ -902,7 +961,7 @@ static int transfer_objects(struct array_cache *to,
                 struct array_cache *from, unsigned int max)
  {
         /* Figure out how many entries to transfer */
-       int nr = min(min(from->avail, max), to->limit - to->avail);
+       int nr = min3(from->avail, max, to->limit - to->avail);
  
         if (!nr)
                 return 0;
@@ -1013,7 +1072,7 @@ static void __drain_alien_cache(struct kmem_cache *cachep,
   */
  static void reap_alien(struct kmem_cache *cachep, struct kmem_list3 *l3)
  {
-       int node = __get_cpu_var(slab_reap_node);
+       int node = __this_cpu_read(slab_reap_node);
  
         if (l3->alien) {
                 struct array_cache *ac = l3->alien[node];
@@ -1050,7 +1109,7 @@ static inline int cache_free_alien(struct kmem_cache *cachep, void *objp)
         struct array_cache *alien = NULL;
         int node;
  
-       node = numa_node_id();
+       node = numa_mem_id();
  
         /*
          * Make sure we are not freeing a object from another node to the array
@@ -1129,7 +1188,7 @@ static void __cpuinit cpuup_canceled(long cpu)
  {
         struct kmem_cache *cachep;
         struct kmem_list3 *l3 = NULL;
-       int node = cpu_to_node(cpu);
+       int node = cpu_to_mem(cpu);
         const struct cpumask *mask = cpumask_of_node(node);
  
         list_for_each_entry(cachep, &cache_chain, next) {
@@ -1194,7 +1253,7 @@ static int __cpuinit cpuup_prepare(long cpu)
  {
         struct kmem_cache *cachep;
         struct kmem_list3 *l3 = NULL;
-       int node = cpu_to_node(cpu);
+       int node = cpu_to_mem(cpu);
         int err;
  
         /*
@@ -1259,6 +1318,8 @@ static int __cpuinit cpuup_prepare(long cpu)
                 spin_unlock_irq(&l3->list_lock);
                 kfree(shared);
                 free_alien_cache(alien);
+               if (cachep->flags & SLAB_DEBUG_OBJECTS)
+                       slab_set_debugobj_lock_classes_node(cachep, node);
         }
         init_node_lock_keys(node);
  
@@ -1294,7 +1355,7 @@ static int __cpuinit cpuup_callback(struct notifier_block *nfb,
                  * anything expensive but will only modify reap_work
                  * and reschedule the timer.
                 */
-               cancel_rearming_delayed_work(&per_cpu(slab_reap_work, cpu));
+               cancel_delayed_work_sync(&per_cpu(slab_reap_work, cpu));
                 /* Now the cache_reaper is guaranteed to be not running. */
                 per_cpu(slab_reap_work, cpu).work.func = NULL;
                 break;
@@ -1321,7 +1382,7 @@ static int __cpuinit cpuup_callback(struct notifier_block *nfb,
                 mutex_unlock(&cache_chain_mutex);
                 break;
         }
-       return err ? NOTIFY_BAD : NOTIFY_OK;
+       return notifier_from_errno(err);
  }
  
  static struct notifier_block __cpuinitdata cpucache_notifier = {
@@ -1388,7 +1449,7 @@ static int __meminit slab_memory_callback(struct notifier_block *self,
                 break;
         }
  out:
-       return ret ? notifier_from_errno(ret) : NOTIFY_OK;
+       return notifier_from_errno(ret);
  }
  #endif /* CONFIG_NUMA && CONFIG_MEMORY_HOTPLUG */
  
@@ -1454,10 +1515,11 @@ void __init kmem_cache_init(void)
  
         /*
          * Fragmentation resistance on low memory - only use bigger
-        * page orders on machines with more than 32MB of memory.
+        * page orders on machines with more than 32MB of memory if
+        * not overridden on the command line.
          */
-       if (totalram_pages > (32 << 20) >> PAGE_SHIFT)
-               slab_break_gfp_order = BREAK_GFP_ORDER_HI;
+       if (!slab_max_order_set && totalram_pages > (32 << 20) >> PAGE_SHIFT)
+               slab_max_order = SLAB_MAX_ORDER_HI;
  
         /* Bootstrap is tricky, because several objects are allocated
          * from caches that do not exist yet:
@@ -1479,7 +1541,7 @@ void __init kmem_cache_init(void)
          * 6) Resize the head arrays of the kmalloc caches to their final sizes.
          */
  
-       node = numa_node_id();
+       node = numa_mem_id();
  
         /* 1) create the cache_cache */
         INIT_LIST_HEAD(&cache_chain);
@@ -1489,11 +1551,10 @@ void __init kmem_cache_init(void)
         cache_cache.nodelists[node] = &initkmem_list3[CACHE_CACHE + node];
  
         /*
-        * struct kmem_cache size depends on nr_node_ids, which
-        * can be less than MAX_NUMNODES.
+        * struct kmem_cache size depends on nr_node_ids & nr_cpu_ids
          */
-       cache_cache.buffer_size = offsetof(struct kmem_cache, nodelists) +
-                                nr_node_ids * sizeof(struct kmem_list3 *);
+       cache_cache.buffer_size = offsetof(struct kmem_cache, array[nr_cpu_ids]) +
+                                 nr_node_ids * sizeof(struct kmem_list3 *);
  #if DEBUG
         cache_cache.obj_size = cache_cache.buffer_size;
  #endif
@@ -1622,6 +1683,11 @@ void __init kmem_cache_init_late(void)
  {
         struct kmem_cache *cachep;
  
+       g_cpucache_up = LATE;
+
+       /* Annotate slab for lockdep -- annotate the malloc caches */
+       init_lock_keys();
+
         /* 6) resize the head arrays to their final sizes */
         mutex_lock(&cache_chain_mutex);
         list_for_each_entry(cachep, &cache_chain, next)
@@ -1632,9 +1698,6 @@ void __init kmem_cache_init_late(void)
         /* Done! */
         g_cpucache_up = FULL;
  
-       /* Annotate slab for lockdep -- annotate the malloc caches */
-       init_lock_keys();
-
         /*
          * Register a cpu startup notifier callback that initializes
          * cpu_cache_get for all new cpus
@@ -1668,6 +1731,52 @@ static int __init cpucache_init(void)
  }
  __initcall(cpucache_init);
  
+static noinline void
+slab_out_of_memory(struct kmem_cache *cachep, gfp_t gfpflags, int nodeid)
+{
+       struct kmem_list3 *l3;
+       struct slab *slabp;
+       unsigned long flags;
+       int node;
+
+       printk(KERN_WARNING
+               "SLAB: Unable to allocate memory on node %d (gfp=0x%x)\n",
+               nodeid, gfpflags);
+       printk(KERN_WARNING "  cache: %s, object size: %d, order: %d\n",
+               cachep->name, cachep->buffer_size, cachep->gfporder);
+
+       for_each_online_node(node) {
+               unsigned long active_objs = 0, num_objs = 0, free_objects = 0;
+               unsigned long active_slabs = 0, num_slabs = 0;
+
+               l3 = cachep->nodelists[node];
+               if (!l3)
+                       continue;
+
+               spin_lock_irqsave(&l3->list_lock, flags);
+               list_for_each_entry(slabp, &l3->slabs_full, list) {
+                       active_objs += cachep->num;
+                       active_slabs++;
+               }
+               list_for_each_entry(slabp, &l3->slabs_partial, list) {
+                       active_objs += slabp->inuse;
+                       active_slabs++;
+               }
+               list_for_each_entry(slabp, &l3->slabs_free, list)
+                       num_slabs++;
+
+               free_objects += l3->free_objects;
+               spin_unlock_irqrestore(&l3->list_lock, flags);
+
+               num_slabs += active_slabs;
+               num_objs = num_slabs * cachep->num;
+               printk(KERN_WARNING
+                       "  node %d: slabs: %ld/%ld, objs: %ld/%ld, free: %ld\n",
+                       node, active_slabs, num_slabs, active_objs, num_objs,
+                       free_objects);
+       }
+}
+
  /*
   * Interface to system's page allocator. No need to hold the cache-lock.
   *
@@ -1694,8 +1803,11 @@ static void *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, int nodeid)
                 flags |= __GFP_RECLAIMABLE;
  
         page = alloc_pages_exact_node(nodeid, flags | __GFP_NOTRACK, cachep->gfporder);
-       if (!page)
+       if (!page) {
+               if (!(flags & __GFP_NOWARN) && printk_ratelimit())
+                       slab_out_of_memory(cachep, flags, nodeid);
                 return NULL;
+       }
  
         nr_pages = (1 << cachep->gfporder);
         if (cachep->flags & SLAB_RECLAIM_ACCOUNT)
@@ -1807,15 +1919,15 @@ static void dump_line(char *data, int offset, int limit)
         unsigned char error = 0;
         int bad_count = 0;
  
-       printk(KERN_ERR "%03x:", offset);
+       printk(KERN_ERR "%03x: ", offset);
         for (i = 0; i < limit; i++) {
                 if (data[offset + i] != POISON_FREE) {
                         error = data[offset + i];
                         bad_count++;
                 }
-               printk(" %02x", (unsigned char)data[offset + i]);
         }
-       printk("\n");
+       print_hex_dump(KERN_CONT, "", 0, 16, 1,
+                       &data[offset], limit, 1);
  
         if (bad_count == 1) {
                 error ^= POISON_FREE;
@@ -1883,8 +1995,8 @@ static void check_poison_obj(struct kmem_cache *cachep, void *objp)
                         /* Print header */
                         if (lines == 0) {
                                 printk(KERN_ERR
-                                       "Slab corruption: %s start=%p, len=%d\n",
-                                       cachep->name, realobj, size);
+                                       "Slab corruption (%s): %s start=%p, len=%d\n",
+                                       print_tainted(), cachep->name, realobj, size);
                                 print_objinfo(cachep, objp, 0);
                         }
                         /* Hexdump the affected line */
@@ -2068,7 +2180,7 @@ static size_t calculate_slab_order(struct kmem_cache *cachep,
                  * Large number of objects is good, but very large slabs are
                  * currently bad for the gfp()s.
                  */
-               if (gfporder >= slab_break_gfp_order)
+               if (gfporder >= slab_max_order)
                         break;
  
                 /*
@@ -2121,7 +2233,7 @@ static int __init_refok setup_cpu_cache(struct kmem_cache *cachep, gfp_t gfp)
                         }
                 }
         }
-       cachep->nodelists[numa_node_id()]->next_reap =
+       cachep->nodelists[numa_mem_id()]->next_reap =
                         jiffies + REAPTIMEOUT_LIST3 +
                         ((unsigned long)cachep) % REAPTIMEOUT_LIST3;
  
@@ -2148,8 +2260,6 @@ static int __init_refok setup_cpu_cache(struct kmem_cache *cachep, gfp_t gfp)
   *
   * @name must be valid until the cache is destroyed. This implies that
   * the module calling this has to destroy the cache before getting unloaded.
- * Note that kmem_cache_name() is not guaranteed to return the same pointer,
- * therefore applications must manage it themselves.
   *
   * The flags are
   *
@@ -2289,8 +2399,8 @@ kmem_cache_create (const char *name, size_t size, size_t align,
         if (ralign < align) {
                 ralign = align;
         }
-       /* disable debug if not aligning with REDZONE_ALIGN */
-       if (ralign & (__alignof__(unsigned long long) - 1))
+       /* disable debug if necessary */
+       if (ralign > __alignof__(unsigned long long))
                 flags &= ~(SLAB_RED_ZONE | SLAB_STORE_USER);
         /*
          * 4) Store it.
@@ -2307,6 +2417,7 @@ kmem_cache_create (const char *name, size_t size, size_t align,
         if (!cachep)
                 goto oops;
  
+       cachep->nodelists = (struct kmem_list3 **)&cachep->array[nr_cpu_ids];
  #if DEBUG
         cachep->obj_size = size;
  
@@ -2316,8 +2427,8 @@ kmem_cache_create (const char *name, size_t size, size_t align,
          */
         if (flags & SLAB_RED_ZONE) {
                 /* add space for red zone words */
-               cachep->obj_offset += align;
-               size += align + sizeof(unsigned long long);
+               cachep->obj_offset += sizeof(unsigned long long);
+               size += 2 * sizeof(unsigned long long);
         }
         if (flags & SLAB_STORE_USER) {
                 /* user store requires one word storage behind the end of
@@ -2331,8 +2442,8 @@ kmem_cache_create (const char *name, size_t size, size_t align,
         }
  #if FORCED_DEBUG && defined(CONFIG_DEBUG_PAGEALLOC)
         if (size >= malloc_sizes[INDEX_L3 + 1].cs_size
-           && cachep->obj_size > cache_line_size() && size < PAGE_SIZE) {
-               cachep->obj_offset += PAGE_SIZE - size;
+           && cachep->obj_size > cache_line_size() && ALIGN(size, align) < PAGE_SIZE) {
+               cachep->obj_offset += PAGE_SIZE - ALIGN(size, align);
                 size = PAGE_SIZE;
         }
  #endif
@@ -2423,6 +2534,16 @@ kmem_cache_create (const char *name, size_t size, size_t align,
                 goto oops;
         }
  
+       if (flags & SLAB_DEBUG_OBJECTS) {
+               /*
+                * Would deadlock through slab_destroy()->call_rcu()->
+                * debug_object_activate()->kmem_cache_alloc().
+                */
+               WARN_ON_ONCE(flags & SLAB_DESTROY_BY_RCU);
+
+               slab_set_debugobj_lock_classes(cachep);
+       }
+
         /* cache setup completed, link it into the list */
         list_add(&cachep->next, &cache_chain);
  oops:
@@ -2452,7 +2573,7 @@ static void check_spinlock_acquired(struct kmem_cache *cachep)
  {
  #ifdef CONFIG_SMP
         check_irq_off();
-       assert_spin_locked(&cachep->nodelists[numa_node_id()]->list_lock);
+       assert_spin_locked(&cachep->nodelists[numa_mem_id()]->list_lock);
  #endif
  }
  
@@ -2479,7 +2600,7 @@ static void do_drain(void *arg)
  {
         struct kmem_cache *cachep = arg;
         struct array_cache *ac;
-       int node = numa_node_id();
+       int node = numa_mem_id();
  
         check_irq_off();
         ac = cpu_cache_get(cachep);
@@ -2606,7 +2727,7 @@ EXPORT_SYMBOL(kmem_cache_shrink);
   *
   * The cache must be empty before calling this function.
   *
- * The caller must guarantee that noone will allocate memory from the cache
+ * The caller must guarantee that no one will allocate memory from the cache
   * during the kmem_cache_destroy().
   */
  void kmem_cache_destroy(struct kmem_cache *cachep)
@@ -2782,7 +2903,7 @@ static void slab_put_obj(struct kmem_cache *cachep, struct slab *slabp,
  /*
   * Map pages beginning at addr to the given cache and slab. This is required
   * for the slab allocator to be able to lookup the cache and slab of a
- * virtual address for kfree, ksize, kmem_ptr_validate, and slab debugging.
+ * virtual address for kfree, ksize, and slab debugging.
   */
  static void slab_map_pages(struct kmem_cache *cache, struct slab *slab,
                            void *addr)
@@ -2984,16 +3105,12 @@ static void check_slabp(struct kmem_cache *cachep, struct slab *slabp)
         if (entries != cachep->num - slabp->inuse) {
  bad:
                 printk(KERN_ERR "slab: Internal list corruption detected in "
-                               "cache '%s'(%d), slabp %p(%d). Hexdump:\n",
-                       cachep->name, cachep->num, slabp, slabp->inuse);
-               for (i = 0;
-                    i < sizeof(*slabp) + cachep->num * sizeof(kmem_bufctl_t);
-                    i++) {
-                       if (i % 16 == 0)
-                               printk("\n%03x:", i);
-                       printk(" %02x", ((unsigned char *)slabp)[i]);
-               }
-               printk("\n");
+                       "cache '%s'(%d), slabp %p(%d). Tainted(%s). Hexdump:\n",
+                       cachep->name, cachep->num, slabp, slabp->inuse,
+                       print_tainted());
+               print_hex_dump(KERN_ERR, "", DUMP_PREFIX_OFFSET, 16, 1, slabp,
+                       sizeof(*slabp) + cachep->num * sizeof(kmem_bufctl_t),
+                       1);
                 BUG();
         }
  }
@@ -3012,7 +3129,7 @@ static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags)
  
  retry:
         check_irq_off();
-       node = numa_node_id();
+       node = numa_mem_id();
         ac = cpu_cache_get(cachep);
         batchcount = ac->batchcount;
         if (!ac->touched && batchcount > BATCHREFILL_LIMIT) {
@@ -3152,12 +3269,11 @@ static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep,
         objp += obj_offset(cachep);
         if (cachep->ctor && cachep->flags & SLAB_POISON)
                 cachep->ctor(objp);
-#if ARCH_SLAB_MINALIGN
-       if ((u32)objp & (ARCH_SLAB_MINALIGN-1)) {
+       if (ARCH_SLAB_MINALIGN &&
+           ((unsigned long)objp & (ARCH_SLAB_MINALIGN-1))) {
                 printk(KERN_ERR "0x%p: not aligned to ARCH_SLAB_MINALIGN=%d\n",
-                      objp, ARCH_SLAB_MINALIGN);
+                      objp, (int)ARCH_SLAB_MINALIGN);
         }
-#endif
         return objp;
  }
  #else
@@ -3216,13 +3332,11 @@ static void *alternate_node_alloc(struct kmem_cache *cachep, gfp_t flags)
  
         if (in_interrupt() || (flags & __GFP_THISNODE))
                 return NULL;
-       nid_alloc = nid_here = numa_node_id();
-       get_mems_allowed();
+       nid_alloc = nid_here = numa_mem_id();
         if (cpuset_do_slab_mem_spread() && (cachep->flags & SLAB_MEM_SPREAD))
                 nid_alloc = cpuset_slab_spread_node();
         else if (current->mempolicy)
                 nid_alloc = slab_node(current->mempolicy);
-       put_mems_allowed();
         if (nid_alloc != nid_here)
                 return ____cache_alloc_node(cachep, flags, nid_alloc);
         return NULL;
@@ -3245,14 +3359,17 @@ static void *fallback_alloc(struct kmem_cache *cache, gfp_t flags)
         enum zone_type high_zoneidx = gfp_zone(flags);
         void *obj = NULL;
         int nid;
+       unsigned int cpuset_mems_cookie;
  
         if (flags & __GFP_THISNODE)
                 return NULL;
  
-       get_mems_allowed();
-       zonelist = node_zonelist(slab_node(current->mempolicy), flags);
         local_flags = flags & (GFP_CONSTRAINT_MASK|GFP_RECLAIM_MASK);
  
+retry_cpuset:
+       cpuset_mems_cookie = get_mems_allowed();
+       zonelist = node_zonelist(slab_node(current->mempolicy), flags);
+
  retry:
         /*
          * Look through allowed nodes for objects available
@@ -3281,7 +3398,7 @@ retry:
                 if (local_flags & __GFP_WAIT)
                         local_irq_enable();
                 kmem_flagcheck(cache, flags);
-               obj = kmem_getpages(cache, local_flags, numa_node_id());
+               obj = kmem_getpages(cache, local_flags, numa_mem_id());
                 if (local_flags & __GFP_WAIT)
                         local_irq_disable();
                 if (obj) {
@@ -3305,7 +3422,9 @@ retry:
                         }
                 }
         }
-       put_mems_allowed();
+
+       if (unlikely(!put_mems_allowed(cpuset_mems_cookie) && !obj))
+               goto retry_cpuset;
         return obj;
  }
  
@@ -3389,6 +3508,7 @@ __cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid,
  {
         unsigned long save_flags;
         void *ptr;
+       int slab_node = numa_mem_id();
  
         flags &= gfp_allowed_mask;
  
@@ -3400,8 +3520,8 @@ __cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid,
         cache_alloc_debugcheck_before(cachep, flags);
         local_irq_save(save_flags);
  
-       if (nodeid == -1)
-               nodeid = numa_node_id();
+       if (nodeid == NUMA_NO_NODE)
+               nodeid = slab_node;
  
         if (unlikely(!cachep->nodelists[nodeid])) {
                 /* Node not bootstrapped yet */
@@ -3409,7 +3529,7 @@ __cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid,
                 goto out;
         }
  
-       if (nodeid == numa_node_id()) {
+       if (nodeid == slab_node) {
                 /*
                  * Use the locally cached objects if possible.
                  * However ____cache_alloc does not allow fallback
@@ -3453,8 +3573,8 @@ __do_cache_alloc(struct kmem_cache *cache, gfp_t flags)
          * We may just have run out of memory on the local node.
          * ____cache_alloc_node() knows how to locate memory on other nodes
          */
-       if (!objp)
-               objp = ____cache_alloc_node(cache, flags, numa_node_id());
+       if (!objp)
+               objp = ____cache_alloc_node(cache, flags, numa_mem_id());
  
    out:
         return objp;
@@ -3551,7 +3671,7 @@ static void cache_flusharray(struct kmem_cache *cachep, struct array_cache *ac)
  {
         int batchcount;
         struct kmem_list3 *l3;
-       int node = numa_node_id();
+       int node = numa_mem_id();
  
         batchcount = ac->batchcount;
  #if DEBUG
@@ -3602,13 +3722,14 @@ free_done:
   * Release an obj back to its cache. If the obj has a constructed state, it must
   * be in this state _before_ it is released.  Called with disabled ints.
   */
-static inline void __cache_free(struct kmem_cache *cachep, void *objp)
+static inline void __cache_free(struct kmem_cache *cachep, void *objp,
+    void *caller)
  {
         struct array_cache *ac = cpu_cache_get(cachep);
  
         check_irq_off();
         kmemleak_free_recursive(objp, cachep->flags);
-       objp = cache_free_debugcheck(cachep, objp, __builtin_return_address(0));
+       objp = cache_free_debugcheck(cachep, objp, caller);
  
         kmemcheck_slab_free(cachep, objp, obj_size(cachep));
  
@@ -3624,13 +3745,12 @@ static inline void __cache_free(struct kmem_cache *cachep, void *objp)
  
         if (likely(ac->avail < ac->limit)) {
                 STATS_INC_FREEHIT(cachep);
-               ac->entry[ac->avail++] = objp;
-               return;
         } else {
                 STATS_INC_FREEMISS(cachep);
                 cache_flusharray(cachep, ac);
-               ac->entry[ac->avail++] = objp;
         }
+
+       ac->entry[ac->avail++] = objp;
  }
  
  /**
@@ -3653,42 +3773,19 @@ void *kmem_cache_alloc(struct kmem_cache *cachep, gfp_t flags)
  EXPORT_SYMBOL(kmem_cache_alloc);
  
  #ifdef CONFIG_TRACING
-void *kmem_cache_alloc_notrace(struct kmem_cache *cachep, gfp_t flags)
+void *
+kmem_cache_alloc_trace(size_t size, struct kmem_cache *cachep, gfp_t flags)
  {
-       return __cache_alloc(cachep, flags, __builtin_return_address(0));
-}
-EXPORT_SYMBOL(kmem_cache_alloc_notrace);
-#endif
+       void *ret;
  
-/**
- * kmem_ptr_validate - check if an untrusted pointer might be a slab entry.
- * @cachep: the cache we're checking against
- * @ptr: pointer to validate
- *
- * This verifies that the untrusted pointer looks sane;
- * it is _not_ a guarantee that the pointer is actually
- * part of the slab cache in question, but it at least
- * validates that the pointer can be dereferenced and
- * looks half-way sane.
- *
- * Currently only used for dentry validation.
- */
-int kmem_ptr_validate(struct kmem_cache *cachep, const void *ptr)
-{
-       unsigned long size = cachep->buffer_size;
-       struct page *page;
+       ret = __cache_alloc(cachep, flags, __builtin_return_address(0));
  
-       if (unlikely(!kern_ptr_validate(ptr, size)))
-               goto out;
-       page = virt_to_page(ptr);
-       if (unlikely(!PageSlab(page)))
-               goto out;
-       if (unlikely(page_get_cache(page) != cachep))
-               goto out;
-       return 1;
-out:
-       return 0;
+       trace_kmalloc(_RET_IP_, ret,
+                     size, slab_buffer_size(cachep), flags);
+       return ret;
  }
+EXPORT_SYMBOL(kmem_cache_alloc_trace);
+#endif
  
  #ifdef CONFIG_NUMA
  void *kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid)
@@ -3705,31 +3802,32 @@ void *kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid)
  EXPORT_SYMBOL(kmem_cache_alloc_node);
  
  #ifdef CONFIG_TRACING
-void *kmem_cache_alloc_node_notrace(struct kmem_cache *cachep,
-                                   gfp_t flags,
-                                   int nodeid)
+void *kmem_cache_alloc_node_trace(size_t size,
+                                 struct kmem_cache *cachep,
+                                 gfp_t flags,
+                                 int nodeid)
  {
-       return __cache_alloc_node(cachep, flags, nodeid,
+       void *ret;
+
+       ret = __cache_alloc_node(cachep, flags, nodeid,
                                   __builtin_return_address(0));
+       trace_kmalloc_node(_RET_IP_, ret,
+                          size, slab_buffer_size(cachep),
+                          flags, nodeid);
+       return ret;
  }
-EXPORT_SYMBOL(kmem_cache_alloc_node_notrace);
+EXPORT_SYMBOL(kmem_cache_alloc_node_trace);
  #endif
  
  static __always_inline void *
  __do_kmalloc_node(size_t size, gfp_t flags, int node, void *caller)
  {
         struct kmem_cache *cachep;
-       void *ret;
  
         cachep = kmem_find_general_cachep(size, flags);
         if (unlikely(ZERO_OR_NULL_PTR(cachep)))
                 return cachep;
-       ret = kmem_cache_alloc_node_notrace(cachep, flags, node);
-
-       trace_kmalloc_node((unsigned long) caller, ret,
-                          size, cachep->buffer_size, flags, node);
-
-       return ret;
+       return kmem_cache_alloc_node_trace(size, cachep, flags, node);
  }
  
  #if defined(CONFIG_DEBUG_SLAB) || defined(CONFIG_TRACING)
@@ -3821,7 +3919,7 @@ void kmem_cache_free(struct kmem_cache *cachep, void *objp)
         debug_check_no_locks_freed(objp, obj_size(cachep));
         if (!(cachep->flags & SLAB_DEBUG_OBJECTS))
                 debug_check_no_obj_freed(objp, obj_size(cachep));
-       __cache_free(cachep, objp);
+       __cache_free(cachep, objp, __builtin_return_address(0));
         local_irq_restore(flags);
  
         trace_kmem_cache_free(_RET_IP_, objp);
@@ -3851,7 +3949,7 @@ void kfree(const void *objp)
         c = virt_to_cache(objp);
         debug_check_no_locks_freed(objp, obj_size(c));
         debug_check_no_obj_freed(objp, obj_size(c));
-       __cache_free(c, (void *)objp);
+       __cache_free(c, (void *)objp, __builtin_return_address(0));
         local_irq_restore(flags);
  }
  EXPORT_SYMBOL(kfree);
@@ -3862,12 +3960,6 @@ unsigned int kmem_cache_size(struct kmem_cache *cachep)
  }
  EXPORT_SYMBOL(kmem_cache_size);
  
-const char *kmem_cache_name(struct kmem_cache *cachep)
-{
-       return cachep->name;
-}
-EXPORT_SYMBOL_GPL(kmem_cache_name);
-
  /*
   * This initializes kmem_list3 or resizes various caches for all nodes.
   */
@@ -3958,7 +4050,7 @@ fail:
  
  struct ccupdate_struct {
         struct kmem_cache *cachep;
-       struct array_cache *new[NR_CPUS];
+       struct array_cache *new[0];
  };
  
  static void do_ccupdate_local(void *info)
@@ -3980,12 +4072,13 @@ static int do_tune_cpucache(struct kmem_cache *cachep, int limit,
         struct ccupdate_struct *new;
         int i;
  
-       new = kzalloc(sizeof(*new), gfp);
+       new = kzalloc(sizeof(*new) + nr_cpu_ids * sizeof(struct array_cache *),
+                     gfp);
         if (!new)
                 return -ENOMEM;
  
         for_each_online_cpu(i) {
-               new->new[i] = alloc_arraycache(cpu_to_node(i), limit,
+               new->new[i] = alloc_arraycache(cpu_to_mem(i), limit,
                                                 batchcount, gfp);
                 if (!new->new[i]) {
                         for (i--; i >= 0; i--)
@@ -4007,9 +4100,9 @@ static int do_tune_cpucache(struct kmem_cache *cachep, int limit,
                 struct array_cache *ccold = new->new[i];
                 if (!ccold)
                         continue;
-               spin_lock_irq(&cachep->nodelists[cpu_to_node(i)]->list_lock);
-               free_block(cachep, ccold->entry, ccold->avail, cpu_to_node(i));
-               spin_unlock_irq(&cachep->nodelists[cpu_to_node(i)]->list_lock);
+               spin_lock_irq(&cachep->nodelists[cpu_to_mem(i)]->list_lock);
+               free_block(cachep, ccold->entry, ccold->avail, cpu_to_mem(i));
+               spin_unlock_irq(&cachep->nodelists[cpu_to_mem(i)]->list_lock);
                 kfree(ccold);
         }
         kfree(new);
@@ -4075,7 +4168,7 @@ static int enable_cpucache(struct kmem_cache *cachep, gfp_t gfp)
   * necessary. Note that the l3 listlock also protects the array_cache
   * if drain_array() is used on the shared array.
   */
-void drain_array(struct kmem_cache *cachep, struct kmem_list3 *l3,
+static void drain_array(struct kmem_cache *cachep, struct kmem_list3 *l3,
                          struct array_cache *ac, int force, int node)
  {
         int tofree;
@@ -4115,7 +4208,7 @@ static void cache_reap(struct work_struct *w)
  {
         struct kmem_cache *searchp;
         struct kmem_list3 *l3;
-       int node = numa_node_id();
+       int node = numa_mem_id();
         struct delayed_work *work = to_delayed_work(w);
  
         if (!mutex_trylock(&cache_chain_mutex))
@@ -4339,7 +4432,7 @@ static const struct seq_operations slabinfo_op = {
   * @count: data length
   * @ppos: unused
   */
-ssize_t slabinfo_write(struct file *file, const char __user * buffer,
+static ssize_t slabinfo_write(struct file *file, const char __user *buffer,
                        size_t count, loff_t *ppos)
  {
         char kbuf[MAX_SLABINFO_WRITE + 1], *tmp;
@@ -4557,7 +4650,7 @@ static const struct file_operations proc_slabstats_operations = {
  
  static int __init slab_proc_init(void)
  {
-       proc_create("slabinfo",S_IWUSR|S_IRUGO,NULL,&proc_slabinfo_operations);
+       proc_create("slabinfo",S_IWUSR|S_IRUSR,NULL,&proc_slabinfo_operations);
  #ifdef CONFIG_DEBUG_SLAB_LEAK
         proc_create("slab_allocators", 0, NULL, &proc_slabstats_operations);
  #endif