tracing: Fix build breakage without CONFIG_PERF_EVENTS (again)

[linux-flexiantxendom0-3.2.10.git] / kernel / cpuset.c
diff --git a/kernel/cpuset.c b/kernel/cpuset.c

index 1a10978..14f7070 100644 (file)
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -37,7 +37,7 @@
  #include <linux/mempolicy.h>
  #include <linux/mm.h>
  #include <linux/memory.h>
-#include <linux/module.h>
+#include <linux/export.h>
  #include <linux/mount.h>
  #include <linux/namei.h>
  #include <linux/pagemap.h>
@@ -55,7 +55,7 @@
  #include <linux/sort.h>
  
  #include <asm/uaccess.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
  #include <linux/mutex.h>
  #include <linux/workqueue.h>
  #include <linux/cgroup.h>
@@ -123,6 +123,19 @@ static inline struct cpuset *task_cs(struct task_struct *task)
                             struct cpuset, css);
  }
  
+#ifdef CONFIG_NUMA
+static inline bool task_has_mempolicy(struct task_struct *task)
+{
+       return task->mempolicy;
+}
+#else
+static inline bool task_has_mempolicy(struct task_struct *task)
+{
+       return false;
+}
+#endif
+
+
  /* bits in struct cpuset flags field */
  typedef enum {
         CS_CPU_EXCLUSIVE,
@@ -231,18 +244,17 @@ static DEFINE_SPINLOCK(cpuset_buffer_lock);
   * users. If someone tries to mount the "cpuset" filesystem, we
   * silently switch it to mount "cgroup" instead
   */
-static int cpuset_get_sb(struct file_system_type *fs_type,
-                        int flags, const char *unused_dev_name,
-                        void *data, struct vfsmount *mnt)
+static struct dentry *cpuset_mount(struct file_system_type *fs_type,
+                        int flags, const char *unused_dev_name, void *data)
  {
         struct file_system_type *cgroup_fs = get_fs_type("cgroup");
-       int ret = -ENODEV;
+       struct dentry *ret = ERR_PTR(-ENODEV);
         if (cgroup_fs) {
                 char mountopts[] =
                         "cpuset,noprefix,"
                         "release_agent=/sbin/cpuset_release_agent";
-               ret = cgroup_fs->get_sb(cgroup_fs, flags,
-                                          unused_dev_name, mountopts, mnt);
+               ret = cgroup_fs->mount(cgroup_fs, flags,
+                                          unused_dev_name, mountopts);
                 put_filesystem(cgroup_fs);
         }
         return ret;
@@ -250,7 +262,7 @@ static int cpuset_get_sb(struct file_system_type *fs_type,
  
  static struct file_system_type cpuset_fs_type = {
         .name = "cpuset",
-       .get_sb = cpuset_get_sb,
+       .mount = cpuset_mount,
  };
  
  /*
@@ -258,11 +270,11 @@ static struct file_system_type cpuset_fs_type = {
   * are online.  If none are online, walk up the cpuset hierarchy
   * until we find one that does have some online cpus.  If we get
   * all the way to the top and still haven't found any online cpus,
- * return cpu_online_map.  Or if passed a NULL cs from an exit'ing
- * task, return cpu_online_map.
+ * return cpu_online_mask.  Or if passed a NULL cs from an exit'ing
+ * task, return cpu_online_mask.
   *
   * One way or another, we guarantee to return some non-empty subset
- * of cpu_online_map.
+ * of cpu_online_mask.
   *
   * Call with callback_mutex held.
   */
@@ -855,7 +867,7 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs,
         int retval;
         int is_load_balanced;
  
-       /* top_cpuset.cpus_allowed tracks cpu_online_map; it's read-only */
+       /* top_cpuset.cpus_allowed tracks cpu_online_mask; it's read-only */
         if (cs == &top_cpuset)
                 return -EACCES;
  
@@ -946,16 +958,44 @@ static void cpuset_migrate_mm(struct mm_struct *mm, const nodemask_t *from,
   * In order to avoid seeing no nodes if the old and new nodes are disjoint,
   * we structure updates as setting all new allowed nodes, then clearing newly
   * disallowed ones.
- *
- * Called with task's alloc_lock held
   */
  static void cpuset_change_task_nodemask(struct task_struct *tsk,
                                         nodemask_t *newmems)
  {
+       bool need_loop;
+
+       /*
+        * Allow tasks that have access to memory reserves because they have
+        * been OOM killed to get memory anywhere.
+        */
+       if (unlikely(test_thread_flag(TIF_MEMDIE)))
+               return;
+       if (current->flags & PF_EXITING) /* Let dying task have memory */
+               return;
+
+       task_lock(tsk);
+       /*
+        * Determine if a loop is necessary if another thread is doing
+        * get_mems_allowed().  If at least one node remains unchanged and
+        * tsk does not have a mempolicy, then an empty nodemask will not be
+        * possible when mems_allowed is larger than a word.
+        */
+       need_loop = task_has_mempolicy(tsk) ||
+                       !nodes_intersects(*newmems, tsk->mems_allowed);
+
+       if (need_loop)
+               write_seqcount_begin(&tsk->mems_allowed_seq);
+
         nodes_or(tsk->mems_allowed, tsk->mems_allowed, *newmems);
-       mpol_rebind_task(tsk, &tsk->mems_allowed);
-       mpol_rebind_task(tsk, newmems);
+       mpol_rebind_task(tsk, newmems, MPOL_REBIND_STEP1);
+
+       mpol_rebind_task(tsk, newmems, MPOL_REBIND_STEP2);
         tsk->mems_allowed = *newmems;
+
+       if (need_loop)
+               write_seqcount_end(&tsk->mems_allowed_seq);
+
+       task_unlock(tsk);
  }
  
  /*
@@ -970,19 +1010,12 @@ static void cpuset_change_nodemask(struct task_struct *p,
         struct cpuset *cs;
         int migrate;
         const nodemask_t *oldmem = scan->data;
-       NODEMASK_ALLOC(nodemask_t, newmems, GFP_KERNEL);
-
-       if (!newmems)
-               return;
+       static nodemask_t newmems;      /* protected by cgroup_mutex */
  
         cs = cgroup_cs(scan->cg);
-       guarantee_online_mems(cs, newmems);
-
-       task_lock(p);
-       cpuset_change_task_nodemask(p, newmems);
-       task_unlock(p);
+       guarantee_online_mems(cs, &newmems);
  
-       NODEMASK_FREE(newmems);
+       cpuset_change_task_nodemask(p, &newmems);
  
         mm = get_task_mm(p);
         if (!mm)
@@ -1121,7 +1154,7 @@ int current_cpuset_is_being_rebound(void)
  static int update_relax_domain_level(struct cpuset *cs, s64 val)
  {
  #ifdef CONFIG_SMP
-       if (val < -1 || val >= SD_LV_MAX)
+       if (val < -1 || val >= sched_domain_level_max)
                 return -EINVAL;
  #endif
  
@@ -1329,112 +1362,86 @@ static int fmeter_getrate(struct fmeter *fmp)
         return val;
  }
  
-/* Protected by cgroup_lock */
+/*
+ * Protected by cgroup_lock. The nodemasks must be stored globally because
+ * dynamically allocating them is not allowed in can_attach, and they must
+ * persist until attach.
+ */
  static cpumask_var_t cpus_attach;
+static nodemask_t cpuset_attach_nodemask_from;
+static nodemask_t cpuset_attach_nodemask_to;
  
  /* Called by cgroups to determine if a cpuset is usable; cgroup_mutex held */
-static int cpuset_can_attach(struct cgroup_subsys *ss, struct cgroup *cont,
-                            struct task_struct *tsk, bool threadgroup)
+static int cpuset_can_attach(struct cgroup *cgrp, struct cgroup_taskset *tset)
  {
+       struct cpuset *cs = cgroup_cs(cgrp);
+       struct task_struct *task;
         int ret;
-       struct cpuset *cs = cgroup_cs(cont);
  
         if (cpumask_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed))
                 return -ENOSPC;
  
-       /*
-        * Kthreads bound to specific cpus cannot be moved to a new cpuset; we
-        * cannot change their cpu affinity and isolating such threads by their
-        * set of allowed nodes is unnecessary.  Thus, cpusets are not
-        * applicable for such threads.  This prevents checking for success of
-        * set_cpus_allowed_ptr() on all attached tasks before cpus_allowed may
-        * be changed.
-        */
-       if (tsk->flags & PF_THREAD_BOUND)
-               return -EINVAL;
-
-       ret = security_task_setscheduler(tsk, 0, NULL);
-       if (ret)
-               return ret;
-       if (threadgroup) {
-               struct task_struct *c;
-
-               rcu_read_lock();
-               list_for_each_entry_rcu(c, &tsk->thread_group, thread_group) {
-                       ret = security_task_setscheduler(c, 0, NULL);
-                       if (ret) {
-                               rcu_read_unlock();
-                               return ret;
-                       }
-               }
-               rcu_read_unlock();
+       cgroup_taskset_for_each(task, cgrp, tset) {
+               /*
+                * Kthreads bound to specific cpus cannot be moved to a new
+                * cpuset; we cannot change their cpu affinity and
+                * isolating such threads by their set of allowed nodes is
+                * unnecessary.  Thus, cpusets are not applicable for such
+                * threads.  This prevents checking for success of
+                * set_cpus_allowed_ptr() on all attached tasks before
+                * cpus_allowed may be changed.
+                */
+               if (task->flags & PF_THREAD_BOUND)
+                       return -EINVAL;
+               if ((ret = security_task_setscheduler(task)))
+                       return ret;
         }
-       return 0;
-}
  
-static void cpuset_attach_task(struct task_struct *tsk, nodemask_t *to,
-                              struct cpuset *cs)
-{
-       int err;
-       /*
-        * can_attach beforehand should guarantee that this doesn't fail.
-        * TODO: have a better way to handle failure here
-        */
-       err = set_cpus_allowed_ptr(tsk, cpus_attach);
-       WARN_ON_ONCE(err);
+       /* prepare for attach */
+       if (cs == &top_cpuset)
+               cpumask_copy(cpus_attach, cpu_possible_mask);
+       else
+               guarantee_online_cpus(cs, cpus_attach);
  
-       task_lock(tsk);
-       cpuset_change_task_nodemask(tsk, to);
-       task_unlock(tsk);
-       cpuset_update_task_spread_flag(cs, tsk);
+       guarantee_online_mems(cs, &cpuset_attach_nodemask_to);
  
+       return 0;
  }
  
-static void cpuset_attach(struct cgroup_subsys *ss, struct cgroup *cont,
-                         struct cgroup *oldcont, struct task_struct *tsk,
-                         bool threadgroup)
+static void cpuset_attach(struct cgroup *cgrp, struct cgroup_taskset *tset)
  {
         struct mm_struct *mm;
-       struct cpuset *cs = cgroup_cs(cont);
-       struct cpuset *oldcs = cgroup_cs(oldcont);
-       NODEMASK_ALLOC(nodemask_t, from, GFP_KERNEL);
-       NODEMASK_ALLOC(nodemask_t, to, GFP_KERNEL);
+       struct task_struct *task;
+       struct task_struct *leader = cgroup_taskset_first(tset);
+       struct cgroup *oldcgrp = cgroup_taskset_cur_cgroup(tset);
+       struct cpuset *cs = cgroup_cs(cgrp);
+       struct cpuset *oldcs = cgroup_cs(oldcgrp);
  
-       if (from == NULL || to == NULL)
-               goto alloc_fail;
+       cgroup_taskset_for_each(task, cgrp, tset) {
+               /*
+                * can_attach beforehand should guarantee that this doesn't
+                * fail.  TODO: have a better way to handle failure here
+                */
+               WARN_ON_ONCE(set_cpus_allowed_ptr(task, cpus_attach));
  
-       if (cs == &top_cpuset) {
-               cpumask_copy(cpus_attach, cpu_possible_mask);
-       } else {
-               guarantee_online_cpus(cs, cpus_attach);
-       }
-       guarantee_online_mems(cs, to);
-
-       /* do per-task migration stuff possibly for each in the threadgroup */
-       cpuset_attach_task(tsk, to, cs);
-       if (threadgroup) {
-               struct task_struct *c;
-               rcu_read_lock();
-               list_for_each_entry_rcu(c, &tsk->thread_group, thread_group) {
-                       cpuset_attach_task(c, to, cs);
-               }
-               rcu_read_unlock();
+               cpuset_change_task_nodemask(task, &cpuset_attach_nodemask_to);
+               cpuset_update_task_spread_flag(cs, task);
         }
  
-       /* change mm; only needs to be done once even if threadgroup */
-       *from = oldcs->mems_allowed;
-       *to = cs->mems_allowed;
-       mm = get_task_mm(tsk);
+       /*
+        * Change mm, possibly for multiple threads in a threadgroup. This is
+        * expensive and may sleep.
+        */
+       cpuset_attach_nodemask_from = oldcs->mems_allowed;
+       cpuset_attach_nodemask_to = cs->mems_allowed;
+       mm = get_task_mm(leader);
         if (mm) {
-               mpol_rebind_mm(mm, to);
+               mpol_rebind_mm(mm, &cpuset_attach_nodemask_to);
                 if (is_memory_migrate(cs))
-                       cpuset_migrate_mm(mm, from, to);
+                       cpuset_migrate_mm(mm, &cpuset_attach_nodemask_from,
+                                         &cpuset_attach_nodemask_to);
                 mmput(mm);
         }
-
-alloc_fail:
-       NODEMASK_FREE(from);
-       NODEMASK_FREE(to);
  }
  
  /* The various types of files and directories in a cpuset file system */
@@ -1534,8 +1541,10 @@ static int cpuset_write_resmask(struct cgroup *cgrp, struct cftype *cft,
                 return -ENODEV;
  
         trialcs = alloc_trial_cpuset(cs);
-       if (!trialcs)
-               return -ENOMEM;
+       if (!trialcs) {
+               retval = -ENOMEM;
+               goto out;
+       }
  
         switch (cft->private) {
         case FILE_CPULIST:
@@ -1550,6 +1559,7 @@ static int cpuset_write_resmask(struct cgroup *cgrp, struct cftype *cft,
         }
  
         free_trial_cpuset(trialcs);
+out:
         cgroup_unlock();
         return retval;
  }
@@ -1566,34 +1576,26 @@ static int cpuset_write_resmask(struct cgroup *cgrp, struct cftype *cft,
   * across a page fault.
   */
  
-static int cpuset_sprintf_cpulist(char *page, struct cpuset *cs)
+static size_t cpuset_sprintf_cpulist(char *page, struct cpuset *cs)
  {
-       int ret;
+       size_t count;
  
         mutex_lock(&callback_mutex);
-       ret = cpulist_scnprintf(page, PAGE_SIZE, cs->cpus_allowed);
+       count = cpulist_scnprintf(page, PAGE_SIZE, cs->cpus_allowed);
         mutex_unlock(&callback_mutex);
  
-       return ret;
+       return count;
  }
  
-static int cpuset_sprintf_memlist(char *page, struct cpuset *cs)
+static size_t cpuset_sprintf_memlist(char *page, struct cpuset *cs)
  {
-       NODEMASK_ALLOC(nodemask_t, mask, GFP_KERNEL);
-       int retval;
-
-       if (mask == NULL)
-               return -ENOMEM;
+       size_t count;
  
         mutex_lock(&callback_mutex);
-       *mask = cs->mems_allowed;
+       count = nodelist_scnprintf(page, PAGE_SIZE, cs->mems_allowed);
         mutex_unlock(&callback_mutex);
  
-       retval = nodelist_scnprintf(page, PAGE_SIZE, *mask);
-
-       NODEMASK_FREE(mask);
-
-       return retval;
+       return count;
  }
  
  static ssize_t cpuset_common_file_read(struct cgroup *cont,
@@ -1787,10 +1789,9 @@ static int cpuset_populate(struct cgroup_subsys *ss, struct cgroup *cont)
  }
  
  /*
- * post_clone() is called at the end of cgroup_clone().
- * 'cgroup' was just created automatically as a result of
- * a cgroup_clone(), and the current task is about to
- * be moved into 'cgroup'.
+ * post_clone() is called during cgroup_create() when the
+ * clone_children mount argument was specified.  The cgroup
+ * can not yet have any tasks.
   *
   * Currently we refuse to set up the cgroup - thereby
   * refusing the task to be entered, and as a result refusing
@@ -1803,8 +1804,7 @@ static int cpuset_populate(struct cgroup_subsys *ss, struct cgroup *cont)
   * (and likewise for mems) to the new cgroup. Called with cgroup_mutex
   * held.
   */
-static void cpuset_post_clone(struct cgroup_subsys *ss,
-                             struct cgroup *cgroup)
+static void cpuset_post_clone(struct cgroup *cgroup)
  {
         struct cgroup *parent, *child;
         struct cpuset *cs, *parent_cs;
@@ -1818,20 +1818,19 @@ static void cpuset_post_clone(struct cgroup_subsys *ss,
         cs = cgroup_cs(cgroup);
         parent_cs = cgroup_cs(parent);
  
+       mutex_lock(&callback_mutex);
         cs->mems_allowed = parent_cs->mems_allowed;
         cpumask_copy(cs->cpus_allowed, parent_cs->cpus_allowed);
+       mutex_unlock(&callback_mutex);
         return;
  }
  
  /*
   *     cpuset_create - create a cpuset
- *     ss:     cpuset cgroup subsystem
   *     cont:   control group that the new cpuset will be part of
   */
  
-static struct cgroup_subsys_state *cpuset_create(
-       struct cgroup_subsys *ss,
-       struct cgroup *cont)
+static struct cgroup_subsys_state *cpuset_create(struct cgroup *cont)
  {
         struct cpuset *cs;
         struct cpuset *parent;
@@ -1870,7 +1869,7 @@ static struct cgroup_subsys_state *cpuset_create(
   * will call async_rebuild_sched_domains().
   */
  
-static void cpuset_destroy(struct cgroup_subsys *ss, struct cgroup *cont)
+static void cpuset_destroy(struct cgroup *cont)
  {
         struct cpuset *cs = cgroup_cs(cont);
  
@@ -2022,10 +2021,7 @@ static void scan_for_empty_cpusets(struct cpuset *root)
         struct cpuset *cp;      /* scans cpusets being updated */
         struct cpuset *child;   /* scans child cpusets of cp */
         struct cgroup *cont;
-       NODEMASK_ALLOC(nodemask_t, oldmems, GFP_KERNEL);
-
-       if (oldmems == NULL)
-               return;
+       static nodemask_t oldmems;      /* protected by cgroup_mutex */
  
         list_add_tail((struct list_head *)&root->stack_list, &queue);
  
@@ -2042,7 +2038,7 @@ static void scan_for_empty_cpusets(struct cpuset *root)
                     nodes_subset(cp->mems_allowed, node_states[N_HIGH_MEMORY]))
                         continue;
  
-               *oldmems = cp->mems_allowed;
+               oldmems = cp->mems_allowed;
  
                 /* Remove offline cpus and mems from this cpuset. */
                 mutex_lock(&callback_mutex);
@@ -2058,10 +2054,9 @@ static void scan_for_empty_cpusets(struct cpuset *root)
                         remove_tasks_in_empty_cpuset(cp);
                 else {
                         update_tasks_cpumask(cp, NULL);
-                       update_tasks_nodemask(cp, oldmems, NULL);
+                       update_tasks_nodemask(cp, &oldmems, NULL);
                 }
         }
-       NODEMASK_FREE(oldmems);
  }
  
  /*
@@ -2071,31 +2066,17 @@ static void scan_for_empty_cpusets(struct cpuset *root)
   * but making no active use of cpusets.
   *
   * This routine ensures that top_cpuset.cpus_allowed tracks
- * cpu_online_map on each CPU hotplug (cpuhp) event.
+ * cpu_active_mask on each CPU hotplug (cpuhp) event.
   *
   * Called within get_online_cpus().  Needs to call cgroup_lock()
   * before calling generate_sched_domains().
   */
-static int cpuset_track_online_cpus(struct notifier_block *unused_nb,
-                               unsigned long phase, void *unused_cpu)
+void cpuset_update_active_cpus(void)
  {
         struct sched_domain_attr *attr;
         cpumask_var_t *doms;
         int ndoms;
  
-       switch (phase) {
-       case CPU_ONLINE:
-       case CPU_ONLINE_FROZEN:
-       case CPU_DOWN_PREPARE:
-       case CPU_DOWN_PREPARE_FROZEN:
-       case CPU_DOWN_FAILED:
-       case CPU_DOWN_FAILED_FROZEN:
-               break;
-
-       default:
-               return NOTIFY_DONE;
-       }
-
         cgroup_lock();
         mutex_lock(&callback_mutex);
         cpumask_copy(top_cpuset.cpus_allowed, cpu_active_mask);
@@ -2106,8 +2087,6 @@ static int cpuset_track_online_cpus(struct notifier_block *unused_nb,
  
         /* Have scheduler rebuild the domains */
         partition_sched_domains(ndoms, doms, attr);
-
-       return NOTIFY_OK;
  }
  
  #ifdef CONFIG_MEMORY_HOTPLUG
@@ -2119,19 +2098,16 @@ static int cpuset_track_online_cpus(struct notifier_block *unused_nb,
  static int cpuset_track_online_nodes(struct notifier_block *self,
                                 unsigned long action, void *arg)
  {
-       NODEMASK_ALLOC(nodemask_t, oldmems, GFP_KERNEL);
-
-       if (oldmems == NULL)
-               return NOTIFY_DONE;
+       static nodemask_t oldmems;      /* protected by cgroup_mutex */
  
         cgroup_lock();
         switch (action) {
         case MEM_ONLINE:
-               *oldmems = top_cpuset.mems_allowed;
+               oldmems = top_cpuset.mems_allowed;
                 mutex_lock(&callback_mutex);
                 top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY];
                 mutex_unlock(&callback_mutex);
-               update_tasks_nodemask(&top_cpuset, oldmems, NULL);
+               update_tasks_nodemask(&top_cpuset, &oldmems, NULL);
                 break;
         case MEM_OFFLINE:
                 /*
@@ -2145,7 +2121,6 @@ static int cpuset_track_online_nodes(struct notifier_block *self,
         }
         cgroup_unlock();
  
-       NODEMASK_FREE(oldmems);
         return NOTIFY_OK;
  }
  #endif
@@ -2161,7 +2136,6 @@ void __init cpuset_init_smp(void)
         cpumask_copy(top_cpuset.cpus_allowed, cpu_active_mask);
         top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY];
  
-       hotcpu_notifier(cpuset_track_online_cpus, 0);
         hotplug_memory_notifier(cpuset_track_online_nodes, 10);
  
         cpuset_wq = create_singlethread_workqueue("cpuset");
@@ -2175,7 +2149,7 @@ void __init cpuset_init_smp(void)
   *
   * Description: Returns the cpumask_var_t cpus_allowed of the cpuset
   * attached to the specified @tsk.  Guaranteed to return some non-empty
- * subset of cpu_online_map, even if this means going outside the
+ * subset of cpu_online_mask, even if this means going outside the
   * tasks cpuset.
   **/
  
@@ -2188,15 +2162,14 @@ void cpuset_cpus_allowed(struct task_struct *tsk, struct cpumask *pmask)
         mutex_unlock(&callback_mutex);
  }
  
-int cpuset_cpus_allowed_fallback(struct task_struct *tsk)
+void cpuset_cpus_allowed_fallback(struct task_struct *tsk)
  {
         const struct cpuset *cs;
-       int cpu;
  
         rcu_read_lock();
         cs = task_cs(tsk);
         if (cs)
-               cpumask_copy(&tsk->cpus_allowed, cs->cpus_allowed);
+               do_set_cpus_allowed(tsk, cs->cpus_allowed);
         rcu_read_unlock();
  
         /*
@@ -2212,22 +2185,10 @@ int cpuset_cpus_allowed_fallback(struct task_struct *tsk)
          * changes in tsk_cs()->cpus_allowed. Otherwise we can temporary
          * set any mask even if it is not right from task_cs() pov,
          * the pending set_cpus_allowed_ptr() will fix things.
+        *
+        * select_fallback_rq() will fix things ups and set cpu_possible_mask
+        * if required.
          */
-
-       cpu = cpumask_any_and(&tsk->cpus_allowed, cpu_active_mask);
-       if (cpu >= nr_cpu_ids) {
-               /*
-                * Either tsk->cpus_allowed is wrong (see above) or it
-                * is actually empty. The latter case is only possible
-                * if we are racing with remove_tasks_in_empty_cpuset().
-                * Like above we can temporary set any mask and rely on
-                * set_cpus_allowed_ptr() as synchronization point.
-                */
-               cpumask_copy(&tsk->cpus_allowed, cpu_possible_mask);
-               cpu = cpumask_any(cpu_active_mask);
-       }
-
-       return cpu;
  }
  
  void cpuset_init_current_mems_allowed(void)
@@ -2427,7 +2388,8 @@ void cpuset_unlock(void)
  }
  
  /**
- * cpuset_mem_spread_node() - On which node to begin search for a page
+ * cpuset_mem_spread_node() - On which node to begin search for a file page
+ * cpuset_slab_spread_node() - On which node to begin search for a slab page
   *
   * If a task is marked PF_SPREAD_PAGE or PF_SPREAD_SLAB (as for
   * tasks in a cpuset with is_spread_page or is_spread_slab set),
@@ -2452,16 +2414,35 @@ void cpuset_unlock(void)
   * See kmem_cache_alloc_node().
   */
  
-int cpuset_mem_spread_node(void)
+static int cpuset_spread_node(int *rotor)
  {
         int node;
  
-       node = next_node(current->cpuset_mem_spread_rotor, current->mems_allowed);
+       node = next_node(*rotor, current->mems_allowed);
         if (node == MAX_NUMNODES)
                 node = first_node(current->mems_allowed);
-       current->cpuset_mem_spread_rotor = node;
+       *rotor = node;
         return node;
  }
+
+int cpuset_mem_spread_node(void)
+{
+       if (current->cpuset_mem_spread_rotor == NUMA_NO_NODE)
+               current->cpuset_mem_spread_rotor =
+                       node_random(&current->mems_allowed);
+
+       return cpuset_spread_node(&current->cpuset_mem_spread_rotor);
+}
+
+int cpuset_slab_spread_node(void)
+{
+       if (current->cpuset_slab_spread_rotor == NUMA_NO_NODE)
+               current->cpuset_slab_spread_rotor =
+                       node_random(&current->mems_allowed);
+
+       return cpuset_spread_node(&current->cpuset_slab_spread_rotor);
+}
+
  EXPORT_SYMBOL_GPL(cpuset_mem_spread_node);
  
  /**