sched: Fix OOPS when build_sched_domains() percpu allocation fails

[linux-flexiantxendom0-3.2.10.git] / kernel / cgroup.c
diff --git a/kernel/cgroup.c b/kernel/cgroup.c

index 865d89a..ed64cca 100644 (file)
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -1472,7 +1472,6 @@ static int cgroup_get_rootdir(struct super_block *sb)
  
         struct inode *inode =
                 cgroup_new_inode(S_IFDIR | S_IRUGO | S_IXUGO | S_IWUSR, sb);
-       struct dentry *dentry;
  
         if (!inode)
                 return -ENOMEM;
@@ -1481,12 +1480,9 @@ static int cgroup_get_rootdir(struct super_block *sb)
         inode->i_op = &cgroup_dir_inode_operations;
         /* directories start off with i_nlink == 2 (for "." entry) */
         inc_nlink(inode);
-       dentry = d_alloc_root(inode);
-       if (!dentry) {
-               iput(inode);
+       sb->s_root = d_make_root(inode);
+       if (!sb->s_root)
                 return -ENOMEM;
-       }
-       sb->s_root = dentry;
         /* for everything else we want ->d_op set */
         sb->s_d_op = &cgroup_dops;
         return 0;
@@ -1887,7 +1883,7 @@ static void cgroup_task_migrate(struct cgroup *cgrp, struct cgroup *oldcgrp,
   */
  int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
  {
-       int retval;
+       int retval = 0;
         struct cgroup_subsys *ss, *failed_ss = NULL;
         struct cgroup *oldcgrp;
         struct cgroupfs_root *root = cgrp->root;
@@ -2701,15 +2697,20 @@ static void cgroup_advance_iter(struct cgroup *cgrp,
   * using their cgroups capability, we don't maintain the lists running
   * through each css_set to its tasks until we see the list actually
   * used - in other words after the first call to cgroup_iter_start().
- *
- * The tasklist_lock is not held here, as do_each_thread() and
- * while_each_thread() are protected by RCU.
   */
  static void cgroup_enable_task_cg_lists(void)
  {
         struct task_struct *p, *g;
         write_lock(&css_set_lock);
         use_task_css_set_links = 1;
+       /*
+        * We need tasklist_lock because RCU is not safe against
+        * while_each_thread(). Besides, a forking task that has passed
+        * cgroup_post_fork() without seeing use_task_css_set_links = 1
+        * is not guaranteed to have its child immediately visible in the
+        * tasklist if we walk through it with RCU.
+        */
+       read_lock(&tasklist_lock);
         do_each_thread(g, p) {
                 task_lock(p);
                 /*
@@ -2721,6 +2722,7 @@ static void cgroup_enable_task_cg_lists(void)
                         list_add(&p->cg_list, &p->cgroups->tasks);
                 task_unlock(p);
         } while_each_thread(g, p);
+       read_unlock(&tasklist_lock);
         write_unlock(&css_set_lock);
  }
  
@@ -4525,6 +4527,17 @@ void cgroup_fork_callbacks(struct task_struct *child)
   */
  void cgroup_post_fork(struct task_struct *child)
  {
+       /*
+        * use_task_css_set_links is set to 1 before we walk the tasklist
+        * under the tasklist_lock and we read it here after we added the child
+        * to the tasklist under the tasklist_lock as well. If the child wasn't
+        * yet in the tasklist when we walked through it from
+        * cgroup_enable_task_cg_lists(), then use_task_css_set_links value
+        * should be visible now due to the paired locking and barriers implied
+        * by LOCK/UNLOCK: it is written before the tasklist_lock unlock
+        * in cgroup_enable_task_cg_lists() and read here after the tasklist_lock
+        * lock on fork.
+        */
         if (use_task_css_set_links) {
                 write_lock(&css_set_lock);
                 if (list_empty(&child->cg_list)) {
@@ -4868,9 +4881,9 @@ void free_css_id(struct cgroup_subsys *ss, struct cgroup_subsys_state *css)
  
         rcu_assign_pointer(id->css, NULL);
         rcu_assign_pointer(css->id, NULL);
-       write_lock(&ss->id_lock);
+       spin_lock(&ss->id_lock);
         idr_remove(&ss->idr, id->id);
-       write_unlock(&ss->id_lock);
+       spin_unlock(&ss->id_lock);
         kfree_rcu(id, rcu_head);
  }
  EXPORT_SYMBOL_GPL(free_css_id);
@@ -4896,10 +4909,10 @@ static struct css_id *get_new_cssid(struct cgroup_subsys *ss, int depth)
                 error = -ENOMEM;
                 goto err_out;
         }
-       write_lock(&ss->id_lock);
+       spin_lock(&ss->id_lock);
         /* Don't use 0. allocates an ID of 1-65535 */
         error = idr_get_new_above(&ss->idr, newid, 1, &myid);
-       write_unlock(&ss->id_lock);
+       spin_unlock(&ss->id_lock);
  
         /* Returns error when there are no free spaces for new ID.*/
         if (error) {
@@ -4914,9 +4927,9 @@ static struct css_id *get_new_cssid(struct cgroup_subsys *ss, int depth)
         return newid;
  remove_idr:
         error = -ENOSPC;
-       write_lock(&ss->id_lock);
+       spin_lock(&ss->id_lock);
         idr_remove(&ss->idr, myid);
-       write_unlock(&ss->id_lock);
+       spin_unlock(&ss->id_lock);
  err_out:
         kfree(newid);
         return ERR_PTR(error);
@@ -4928,7 +4941,7 @@ static int __init_or_module cgroup_init_idr(struct cgroup_subsys *ss,
  {
         struct css_id *newid;
  
-       rwlock_init(&ss->id_lock);
+       spin_lock_init(&ss->id_lock);
         idr_init(&ss->idr);
  
         newid = get_new_cssid(ss, 0);
@@ -5016,6 +5029,8 @@ css_get_next(struct cgroup_subsys *ss, int id,
                 return NULL;
  
         BUG_ON(!ss->use_id);
+       WARN_ON_ONCE(!rcu_read_lock_held());
+
         /* fill start point for scan */
         tmpid = id;
         while (1) {
@@ -5023,10 +5038,7 @@ css_get_next(struct cgroup_subsys *ss, int id,
                  * scan next entry from bitmap(tree), tmpid is updated after
                  * idr_get_next().
                  */
-               read_lock(&ss->id_lock);
                 tmp = idr_get_next(&ss->idr, &tmpid);
-               read_unlock(&ss->id_lock);
-
                 if (!tmp)
                         break;
                 if (tmp->depth >= depth && tmp->stack[depth] == rootid) {