- Update to 2.6.25-rc3.
[linux-flexiantxendom0-3.2.10.git] / kernel / fork.c
index 8dd8ff2..dd249c3 100644 (file)
@@ -40,6 +40,7 @@
 #include <linux/ptrace.h>
 #include <linux/mount.h>
 #include <linux/audit.h>
+#include <linux/memcontrol.h>
 #include <linux/profile.h>
 #include <linux/rmap.h>
 #include <linux/acct.h>
@@ -51,6 +52,7 @@
 #include <linux/random.h>
 #include <linux/tty.h>
 #include <linux/proc_fs.h>
+#include <linux/blkdev.h>
 
 #include <asm/pgtable.h>
 #include <asm/pgalloc.h>
@@ -324,7 +326,7 @@ static inline int mm_alloc_pgd(struct mm_struct * mm)
 
 static inline void mm_free_pgd(struct mm_struct * mm)
 {
-       pgd_free(mm->pgd);
+       pgd_free(mm, mm->pgd);
 }
 #else
 #define dup_mmap(mm, oldmm)    (0)
@@ -339,7 +341,7 @@ __cacheline_aligned_in_smp DEFINE_SPINLOCK(mmlist_lock);
 
 #include <linux/init_task.h>
 
-static struct mm_struct * mm_init(struct mm_struct * mm)
+static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p)
 {
        atomic_set(&mm->mm_users, 1);
        atomic_set(&mm->mm_count, 1);
@@ -356,11 +358,14 @@ static struct mm_struct * mm_init(struct mm_struct * mm)
        mm->ioctx_list = NULL;
        mm->free_area_cache = TASK_UNMAPPED_BASE;
        mm->cached_hole_size = ~0UL;
+       mm_init_cgroup(mm, p);
 
        if (likely(!mm_alloc_pgd(mm))) {
                mm->def_flags = 0;
                return mm;
        }
+
+       mm_free_cgroup(mm);
        free_mm(mm);
        return NULL;
 }
@@ -375,7 +380,7 @@ struct mm_struct * mm_alloc(void)
        mm = allocate_mm();
        if (mm) {
                memset(mm, 0, sizeof(*mm));
-               mm = mm_init(mm);
+               mm = mm_init(mm, current);
        }
        return mm;
 }
@@ -385,13 +390,15 @@ struct mm_struct * mm_alloc(void)
  * is dropped: either by a lazy thread or by
  * mmput. Free the page directory and the mm.
  */
-void fastcall __mmdrop(struct mm_struct *mm)
+void __mmdrop(struct mm_struct *mm)
 {
        BUG_ON(mm == &init_mm);
        mm_free_pgd(mm);
+       mm_free_cgroup(mm);
        destroy_context(mm);
        free_mm(mm);
 }
+EXPORT_SYMBOL_GPL(__mmdrop);
 
 /*
  * Decrement the use count and release all resources for an mm.
@@ -509,7 +516,7 @@ static struct mm_struct *dup_mm(struct task_struct *tsk)
        mm->token_priority = 0;
        mm->last_interval = 0;
 
-       if (!mm_init(mm))
+       if (!mm_init(mm, tsk))
                goto fail_nomem;
 
        if (init_new_context(tsk, mm))
@@ -593,16 +600,16 @@ static struct fs_struct *__copy_fs_struct(struct fs_struct *old)
                rwlock_init(&fs->lock);
                fs->umask = old->umask;
                read_lock(&old->lock);
-               fs->rootmnt = mntget(old->rootmnt);
-               fs->root = dget(old->root);
-               fs->pwdmnt = mntget(old->pwdmnt);
-               fs->pwd = dget(old->pwd);
-               if (old->altroot) {
-                       fs->altrootmnt = mntget(old->altrootmnt);
-                       fs->altroot = dget(old->altroot);
+               fs->root = old->root;
+               path_get(&old->root);
+               fs->pwd = old->pwd;
+               path_get(&old->pwd);
+               if (old->altroot.dentry) {
+                       fs->altroot = old->altroot;
+                       path_get(&old->altroot);
                } else {
-                       fs->altrootmnt = NULL;
-                       fs->altroot = NULL;
+                       fs->altroot.mnt = NULL;
+                       fs->altroot.dentry = NULL;
                }
                read_unlock(&old->lock);
        }
@@ -791,6 +798,31 @@ out:
        return error;
 }
 
+static int copy_io(unsigned long clone_flags, struct task_struct *tsk)
+{
+#ifdef CONFIG_BLOCK
+       struct io_context *ioc = current->io_context;
+
+       if (!ioc)
+               return 0;
+       /*
+        * Share io context with parent, if CLONE_IO is set
+        */
+       if (clone_flags & CLONE_IO) {
+               tsk->io_context = ioc_task_link(ioc);
+               if (unlikely(!tsk->io_context))
+                       return -ENOMEM;
+       } else if (ioprio_valid(ioc->ioprio)) {
+               tsk->io_context = alloc_io_context(GFP_KERNEL, -1);
+               if (unlikely(!tsk->io_context))
+                       return -ENOMEM;
+
+               tsk->io_context->ioprio = ioc->ioprio;
+       }
+#endif
+       return 0;
+}
+
 /*
  *     Helper to unshare the files of the current task.
  *     We don't want to expose copy_files internals to
@@ -877,7 +909,6 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
        hrtimer_init(&sig->real_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
        sig->it_real_incr.tv64 = 0;
        sig->real_timer.function = it_real_fn;
-       sig->tsk = tsk;
 
        sig->it_virt_expires = cputime_zero;
        sig->it_virt_incr = cputime_zero;
@@ -1045,6 +1076,10 @@ static struct task_struct *copy_process(unsigned long clone_flags,
        copy_flags(clone_flags, p);
        INIT_LIST_HEAD(&p->children);
        INIT_LIST_HEAD(&p->sibling);
+#ifdef CONFIG_PREEMPT_RCU
+       p->rcu_read_lock_nesting = 0;
+       p->rcu_flipctr_idx = 0;
+#endif /* #ifdef CONFIG_PREEMPT_RCU */
        p->vfork_done = NULL;
        spin_lock_init(&p->alloc_lock);
 
@@ -1059,6 +1094,11 @@ static struct task_struct *copy_process(unsigned long clone_flags,
        p->prev_utime = cputime_zero;
        p->prev_stime = cputime_zero;
 
+#ifdef CONFIG_DETECT_SOFTLOCKUP
+       p->last_switch_count = 0;
+       p->last_switch_timestamp = 0;
+#endif
+
 #ifdef CONFIG_TASK_XACCT
        p->rchar = 0;           /* I/O counter: bytes read */
        p->wchar = 0;           /* I/O counter: bytes written */
@@ -1082,6 +1122,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 #ifdef CONFIG_SECURITY
        p->security = NULL;
 #endif
+       p->cap_bset = current->cap_bset;
        p->io_context = NULL;
        p->audit_context = NULL;
        cgroup_fork(p);
@@ -1147,15 +1188,17 @@ static struct task_struct *copy_process(unsigned long clone_flags,
                goto bad_fork_cleanup_mm;
        if ((retval = copy_namespaces(clone_flags, p)))
                goto bad_fork_cleanup_keys;
+       if ((retval = copy_io(clone_flags, p)))
+               goto bad_fork_cleanup_namespaces;
        retval = copy_thread(0, clone_flags, stack_start, stack_size, p, regs);
        if (retval)
-               goto bad_fork_cleanup_namespaces;
+               goto bad_fork_cleanup_io;
 
        if (pid != &init_struct_pid) {
                retval = -ENOMEM;
                pid = alloc_pid(task_active_pid_ns(p));
                if (!pid)
-                       goto bad_fork_cleanup_namespaces;
+                       goto bad_fork_cleanup_io;
 
                if (clone_flags & CLONE_NEWPID) {
                        retval = pid_ns_prepare_proc(task_active_pid_ns(p));
@@ -1196,6 +1239,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 #ifdef TIF_SYSCALL_EMU
        clear_tsk_thread_flag(p, TIF_SYSCALL_EMU);
 #endif
+       clear_all_latency_tracing(p);
 
        /* Our parent execution domain becomes current domain
           These must match for thread signalling to apply */
@@ -1224,9 +1268,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
        /* Need tasklist lock for parent etc handling! */
        write_lock_irq(&tasklist_lock);
 
-       /* for sys_ioprio_set(IOPRIO_WHO_PGRP) */
-       p->ioprio = current->ioprio;
-
        /*
         * The task hasn't been attached yet, so its cpus_allowed mask will
         * not be changed, nor will its assigned CPU.
@@ -1237,6 +1278,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
         * parent's CPU). This avoids alot of nasty races.
         */
        p->cpus_allowed = current->cpus_allowed;
+       p->rt.nr_cpus_allowed = current->rt.nr_cpus_allowed;
        if (unlikely(!cpu_isset(task_cpu(p), p->cpus_allowed) ||
                        !cpu_online(task_cpu(p))))
                set_task_cpu(p, smp_processor_id());
@@ -1295,6 +1337,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
                        if (clone_flags & CLONE_NEWPID)
                                p->nsproxy->pid_ns->child_reaper = p;
 
+                       p->signal->leader_pid = pid;
                        p->signal->tty = current->signal->tty;
                        set_task_pgrp(p, task_pgrp_nr(current));
                        set_task_session(p, task_session_nr(current));
@@ -1317,6 +1360,8 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 bad_fork_free_pid:
        if (pid != &init_struct_pid)
                free_pid(pid);
+bad_fork_cleanup_io:
+       put_io_context(p->io_context);
 bad_fork_cleanup_namespaces:
        exit_task_namespaces(p);
 bad_fork_cleanup_keys:
@@ -1359,7 +1404,7 @@ fork_out:
        return ERR_PTR(retval);
 }
 
-noinline struct pt_regs * __devinit __attribute__((weak)) idle_regs(struct pt_regs *regs)
+noinline struct pt_regs * __cpuinit __attribute__((weak)) idle_regs(struct pt_regs *regs)
 {
        memset(regs, 0, sizeof(struct pt_regs));
        return regs;
@@ -1411,6 +1456,23 @@ long do_fork(unsigned long clone_flags,
        int trace = 0;
        long nr;
 
+       /*
+        * We hope to recycle these flags after 2.6.26
+        */
+       if (unlikely(clone_flags & CLONE_STOPPED)) {
+               static int __read_mostly count = 100;
+
+               if (count > 0 && printk_ratelimit()) {
+                       char comm[TASK_COMM_LEN];
+
+                       count--;
+                       printk(KERN_INFO "fork(): process `%s' used deprecated "
+                                       "clone flags 0x%lx\n",
+                               get_task_comm(comm, current),
+                               clone_flags & CLONE_STOPPED);
+               }
+       }
+
        if (unlikely(current->ptrace)) {
                trace = fork_traceflag (clone_flags);
                if (trace)
@@ -1426,13 +1488,7 @@ long do_fork(unsigned long clone_flags,
        if (!IS_ERR(p)) {
                struct completion vfork;
 
-               /*
-                * this is enough to call pid_nr_ns here, but this if
-                * improves optimisation of regular fork()
-                */
-               nr = (clone_flags & CLONE_NEWPID) ?
-                       task_pid_nr_ns(p, current->nsproxy->pid_ns) :
-                               task_pid_vnr(p);
+               nr = task_pid_vnr(p);
 
                if (clone_flags & CLONE_PARENT_SETTID)
                        put_user(nr, parent_tidptr);
@@ -1453,7 +1509,7 @@ long do_fork(unsigned long clone_flags,
                if (!(clone_flags & CLONE_STOPPED))
                        wake_up_new_task(p, clone_flags);
                else
-                       p->state = TASK_STOPPED;
+                       __set_task_state(p, TASK_STOPPED);
 
                if (unlikely (trace)) {
                        current->ptrace_message = nr;