auxv: require the target to be tracable (or yourself), CVE-2011-1020

[linux-flexiantxendom0-natty.git] / kernel / sched.c
diff --git a/kernel/sched.c b/kernel/sched.c

index 114a0de..1cc9328 100644 (file)
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -278,14 +278,12 @@ struct task_group {
  #endif
  };
  
-#define root_task_group init_task_group
-
  /* task_group_lock serializes the addition/removal of task groups */
  static DEFINE_SPINLOCK(task_group_lock);
  
  #ifdef CONFIG_FAIR_GROUP_SCHED
  
-# define INIT_TASK_GROUP_LOAD  NICE_0_LOAD
+# define ROOT_TASK_GROUP_LOAD  NICE_0_LOAD
  
  /*
   * A weight of 0 or 1 can cause arithmetics problems.
@@ -298,13 +296,13 @@ static DEFINE_SPINLOCK(task_group_lock);
  #define MIN_SHARES     2
  #define MAX_SHARES     (1UL << 18)
  
-static int init_task_group_load = INIT_TASK_GROUP_LOAD;
+static int root_task_group_load = ROOT_TASK_GROUP_LOAD;
  #endif
  
  /* Default task group.
   *     Every task in system belong to this group at bootup.
   */
-struct task_group init_task_group;
+struct task_group root_task_group;
  
  #endif /* CONFIG_CGROUP_SCHED */
  
@@ -555,9 +553,6 @@ struct rq {
         /* try_to_wake_up() stats */
         unsigned int ttwu_count;
         unsigned int ttwu_local;
-
-       /* BKL stats */
-       unsigned int bkl_count;
  #endif
  };
  
@@ -611,6 +606,9 @@ static inline struct task_group *task_group(struct task_struct *p)
         struct task_group *tg;
         struct cgroup_subsys_state *css;
  
+       if (p->flags & PF_EXITING)
+               return &root_task_group;
+
         css = task_subsys_state_check(p, cpu_cgroup_subsys_id,
                         lockdep_is_held(&task_rq(p)->lock));
         tg = container_of(css, struct task_group, css);
@@ -743,7 +741,7 @@ sched_feat_write(struct file *filp, const char __user *ubuf,
         buf[cnt] = 0;
         cmp = strstrip(buf);
  
-       if (strncmp(buf, "NO_", 3) == 0) {
+       if (strncmp(cmp, "NO_", 3) == 0) {
                 neg = 1;
                 cmp += 3;
         }
@@ -2507,7 +2505,7 @@ out:
   * try_to_wake_up_local - try to wake up a local task with rq lock held
   * @p: the thread to be awakened
   *
- * Put @p on the run-queue if it's not alredy there.  The caller must
+ * Put @p on the run-queue if it's not already there.  The caller must
   * ensure that this_rq() is locked, @p is bound to this_rq() and not
   * the current task.  this_rq() stays locked over invocation.
   */
@@ -3889,7 +3887,7 @@ static inline void schedule_debug(struct task_struct *prev)
         schedstat_inc(this_rq(), sched_count);
  #ifdef CONFIG_SCHEDSTATS
         if (unlikely(prev->lock_depth >= 0)) {
-               schedstat_inc(this_rq(), bkl_count);
+               schedstat_inc(this_rq(), rq_sched_info.bkl_count);
                 schedstat_inc(prev, sched_info.bkl_count);
         }
  #endif
@@ -4215,6 +4213,7 @@ void __wake_up_locked_key(wait_queue_head_t *q, unsigned int mode, void *key)
  {
         __wake_up_common(q, mode, 1, 0, key);
  }
+EXPORT_SYMBOL_GPL(__wake_up_locked_key);
  
  /**
   * __wake_up_sync_key - wake up threads blocked on a waitqueue.
@@ -4873,7 +4872,8 @@ recheck:
                  * assigned.
                  */
                 if (rt_bandwidth_enabled() && rt_policy(policy) &&
-                               task_group(p)->rt_bandwidth.rt_runtime == 0) {
+                               task_group(p)->rt_bandwidth.rt_runtime == 0 &&
+                               !task_group_is_autogroup(task_group(p))) {
                         __task_rq_unlock(rq);
                         raw_spin_unlock_irqrestore(&p->pi_lock, flags);
                         return -EPERM;
@@ -5572,7 +5572,7 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu)
          * The idle tasks have their own, simple scheduling class:
          */
         idle->sched_class = &idle_sched_class;
-       ftrace_graph_init_task(idle);
+       ftrace_graph_init_idle_task(idle, cpu);
  }
  
  /*
@@ -7848,7 +7848,7 @@ static void init_tg_cfs_entry(struct task_group *tg, struct cfs_rq *cfs_rq,
         cfs_rq->tg = tg;
  
         tg->se[cpu] = se;
-       /* se could be NULL for init_task_group */
+       /* se could be NULL for root_task_group */
         if (!se)
                 return;
  
@@ -7858,7 +7858,7 @@ static void init_tg_cfs_entry(struct task_group *tg, struct cfs_rq *cfs_rq,
                 se->cfs_rq = parent->my_q;
  
         se->my_q = cfs_rq;
-       update_load_set(&se->load, 0);
+       update_load_set(&se->load, tg->shares);
         se->parent = parent;
  }
  #endif
@@ -7908,18 +7908,18 @@ void __init sched_init(void)
                 ptr = (unsigned long)kzalloc(alloc_size, GFP_NOWAIT);
  
  #ifdef CONFIG_FAIR_GROUP_SCHED
-               init_task_group.se = (struct sched_entity **)ptr;
+               root_task_group.se = (struct sched_entity **)ptr;
                 ptr += nr_cpu_ids * sizeof(void **);
  
-               init_task_group.cfs_rq = (struct cfs_rq **)ptr;
+               root_task_group.cfs_rq = (struct cfs_rq **)ptr;
                 ptr += nr_cpu_ids * sizeof(void **);
  
  #endif /* CONFIG_FAIR_GROUP_SCHED */
  #ifdef CONFIG_RT_GROUP_SCHED
-               init_task_group.rt_se = (struct sched_rt_entity **)ptr;
+               root_task_group.rt_se = (struct sched_rt_entity **)ptr;
                 ptr += nr_cpu_ids * sizeof(void **);
  
-               init_task_group.rt_rq = (struct rt_rq **)ptr;
+               root_task_group.rt_rq = (struct rt_rq **)ptr;
                 ptr += nr_cpu_ids * sizeof(void **);
  
  #endif /* CONFIG_RT_GROUP_SCHED */
@@ -7939,13 +7939,13 @@ void __init sched_init(void)
                         global_rt_period(), global_rt_runtime());
  
  #ifdef CONFIG_RT_GROUP_SCHED
-       init_rt_bandwidth(&init_task_group.rt_bandwidth,
+       init_rt_bandwidth(&root_task_group.rt_bandwidth,
                         global_rt_period(), global_rt_runtime());
  #endif /* CONFIG_RT_GROUP_SCHED */
  
  #ifdef CONFIG_CGROUP_SCHED
-       list_add(&init_task_group.list, &task_groups);
-       INIT_LIST_HEAD(&init_task_group.children);
+       list_add(&root_task_group.list, &task_groups);
+       INIT_LIST_HEAD(&root_task_group.children);
         autogroup_init(&init_task);
  #endif /* CONFIG_CGROUP_SCHED */
  
@@ -7960,34 +7960,34 @@ void __init sched_init(void)
                 init_cfs_rq(&rq->cfs, rq);
                 init_rt_rq(&rq->rt, rq);
  #ifdef CONFIG_FAIR_GROUP_SCHED
-               init_task_group.shares = init_task_group_load;
+               root_task_group.shares = root_task_group_load;
                 INIT_LIST_HEAD(&rq->leaf_cfs_rq_list);
                 /*
-                * How much cpu bandwidth does init_task_group get?
+                * How much cpu bandwidth does root_task_group get?
                  *
                  * In case of task-groups formed thr' the cgroup filesystem, it
                  * gets 100% of the cpu resources in the system. This overall
                  * system cpu resource is divided among the tasks of
-                * init_task_group and its child task-groups in a fair manner,
+                * root_task_group and its child task-groups in a fair manner,
                  * based on each entity's (task or task-group's) weight
                  * (se->load.weight).
                  *
-                * In other words, if init_task_group has 10 tasks of weight
+                * In other words, if root_task_group has 10 tasks of weight
                  * 1024) and two child groups A0 and A1 (of weight 1024 each),
                  * then A0's share of the cpu resource is:
                  *
                  *      A0's bandwidth = 1024 / (10*1024 + 1024 + 1024) = 8.33%
                  *
-                * We achieve this by letting init_task_group's tasks sit
-                * directly in rq->cfs (i.e init_task_group->se[] = NULL).
+                * We achieve this by letting root_task_group's tasks sit
+                * directly in rq->cfs (i.e root_task_group->se[] = NULL).
                  */
-               init_tg_cfs_entry(&init_task_group, &rq->cfs, NULL, i, NULL);
+               init_tg_cfs_entry(&root_task_group, &rq->cfs, NULL, i, NULL);
  #endif /* CONFIG_FAIR_GROUP_SCHED */
  
                 rq->rt.rt_runtime = def_rt_bandwidth.rt_runtime;
  #ifdef CONFIG_RT_GROUP_SCHED
                 INIT_LIST_HEAD(&rq->leaf_rt_rq_list);
-               init_tg_rt_entry(&init_task_group, &rq->rt, NULL, i, NULL);
+               init_tg_rt_entry(&root_task_group, &rq->rt, NULL, i, NULL);
  #endif
  
                 for (j = 0; j < CPU_LOAD_IDX_MAX; j++)
@@ -8067,8 +8067,6 @@ void __init sched_init(void)
                 zalloc_cpumask_var(&cpu_isolated_map, GFP_NOWAIT);
  #endif /* SMP */
  
-       perf_event_init();
-
         scheduler_running = 1;
  }
  
@@ -8381,6 +8379,7 @@ static void free_sched_group(struct task_group *tg)
  {
         free_fair_sched_group(tg);
         free_rt_sched_group(tg);
+       autogroup_free(tg);
         kfree(tg);
  }
  
@@ -8814,7 +8813,7 @@ cpu_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cgrp)
  
         if (!cgrp->parent) {
                 /* This is early initialization for the top cgroup */
-               return &init_task_group.css;
+               return &root_task_group.css;
         }
  
         parent = cgroup_tg(cgrp->parent);
@@ -8885,6 +8884,20 @@ cpu_cgroup_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
         }
  }
  
+static void
+cpu_cgroup_exit(struct cgroup_subsys *ss, struct task_struct *task)
+{
+       /*
+        * cgroup_exit() is called in the copy_process() failure path.
+        * Ignore this case since the task hasn't ran yet, this avoids
+        * trying to poke a half freed task state from generic code.
+        */
+       if (!(task->flags & PF_EXITING))
+               return;
+
+       sched_move_task(task);
+}
+
  #ifdef CONFIG_FAIR_GROUP_SCHED
  static int cpu_shares_write_u64(struct cgroup *cgrp, struct cftype *cftype,
                                 u64 shareval)
@@ -8957,6 +8970,7 @@ struct cgroup_subsys cpu_cgroup_subsys = {
         .destroy        = cpu_cgroup_destroy,
         .can_attach     = cpu_cgroup_can_attach,
         .attach         = cpu_cgroup_attach,
+       .exit           = cpu_cgroup_exit,
         .populate       = cpu_cgroup_populate,
         .subsys_id      = cpu_cgroup_subsys_id,
         .early_init     = 1,
@@ -9241,72 +9255,3 @@ struct cgroup_subsys cpuacct_subsys = {
  };
  #endif /* CONFIG_CGROUP_CPUACCT */
  
-#ifndef CONFIG_SMP
-
-void synchronize_sched_expedited(void)
-{
-       barrier();
-}
-EXPORT_SYMBOL_GPL(synchronize_sched_expedited);
-
-#else /* #ifndef CONFIG_SMP */
-
-static atomic_t synchronize_sched_expedited_count = ATOMIC_INIT(0);
-
-static int synchronize_sched_expedited_cpu_stop(void *data)
-{
-       /*
-        * There must be a full memory barrier on each affected CPU
-        * between the time that try_stop_cpus() is called and the
-        * time that it returns.
-        *
-        * In the current initial implementation of cpu_stop, the
-        * above condition is already met when the control reaches
-        * this point and the following smp_mb() is not strictly
-        * necessary.  Do smp_mb() anyway for documentation and
-        * robustness against future implementation changes.
-        */
-       smp_mb(); /* See above comment block. */
-       return 0;
-}
-
-/*
- * Wait for an rcu-sched grace period to elapse, but use "big hammer"
- * approach to force grace period to end quickly.  This consumes
- * significant time on all CPUs, and is thus not recommended for
- * any sort of common-case code.
- *
- * Note that it is illegal to call this function while holding any
- * lock that is acquired by a CPU-hotplug notifier.  Failing to
- * observe this restriction will result in deadlock.
- */
-void synchronize_sched_expedited(void)
-{
-       int snap, trycount = 0;
-
-       smp_mb();  /* ensure prior mod happens before capturing snap. */
-       snap = atomic_read(&synchronize_sched_expedited_count) + 1;
-       get_online_cpus();
-       while (try_stop_cpus(cpu_online_mask,
-                            synchronize_sched_expedited_cpu_stop,
-                            NULL) == -EAGAIN) {
-               put_online_cpus();
-               if (trycount++ < 10)
-                       udelay(trycount * num_online_cpus());
-               else {
-                       synchronize_sched();
-                       return;
-               }
-               if (atomic_read(&synchronize_sched_expedited_count) - snap > 0) {
-                       smp_mb(); /* ensure test happens before caller kfree */
-                       return;
-               }
-               get_online_cpus();
-       }
-       atomic_inc(&synchronize_sched_expedited_count);
-       smp_mb__after_atomic_inc(); /* ensure post-GP actions seen after GP. */
-       put_online_cpus();
-}
-EXPORT_SYMBOL_GPL(synchronize_sched_expedited);
-
-#endif /* #else #ifndef CONFIG_SMP */