kernel/exit.c

   1 /*
   2  *  linux/kernel/exit.c
   3  *
   4  *  Copyright (C) 1991, 1992  Linus Torvalds
   5  */
   6
   7 #include <linux/config.h>
   8 #include <linux/slab.h>
   9 #include <linux/interrupt.h>
  10 #include <linux/smp_lock.h>
  11 #include <linux/module.h>
  12 #include <linux/completion.h>
  13 #include <linux/personality.h>
  14 #include <linux/tty.h>
  15 #ifdef CONFIG_BSD_PROCESS_ACCT
  16 #include <linux/acct.h>
  17 #endif
  18
  19 #include <asm/uaccess.h>
  20 #include <asm/pgtable.h>
  21 #include <asm/mmu_context.h>
  22
  23 extern void sem_exit (void);
  24 extern struct task_struct *child_reaper;
  25
  26 int getrusage(struct task_struct *, int, struct rusage *);
  27
  28 static void release_task(struct task_struct * p)
  29 {
  30         if (p != current) {
  31 #ifdef CONFIG_SMP
  32                 /*
  33                  * Wait to make sure the process isn't on the
  34                  * runqueue (active on some other CPU still)
  35                  */
  36                 for (;;) {
  37                         task_lock(p);
  38                         if (!task_has_cpu(p))
  39                                 break;
  40                         task_unlock(p);
  41                         do {
  42                                 cpu_relax();
  43                                 barrier();
  44                         } while (task_has_cpu(p));
  45                 }
  46                 task_unlock(p);
  47 #endif
  48                 atomic_dec(&p->user->processes);
  49                 free_uid(p->user);
  50                 unhash_process(p);
  51
  52                 release_thread(p);
  53                 current->cmin_flt += p->min_flt + p->cmin_flt;
  54                 current->cmaj_flt += p->maj_flt + p->cmaj_flt;
  55                 current->cnswap += p->nswap + p->cnswap;
  56                 /*
  57                  * Potentially available timeslices are retrieved
  58                  * here - this way the parent does not get penalized
  59                  * for creating too many processes.
  60                  *
  61                  * (this cannot be used to artificially 'generate'
  62                  * timeslices, because any timeslice recovered here
  63                  * was given away by the parent in the first place.)
  64                  */
  65                 current->counter += p->counter;
  66                 if (current->counter >= MAX_COUNTER)
  67                         current->counter = MAX_COUNTER;
  68                 p->pid = 0;
  69                 free_task_struct(p);
  70         } else {
  71                 printk("task releasing itself\n");
  72         }
  73 }
  74
  75 /*
  76  * This checks not only the pgrp, but falls back on the pid if no
  77  * satisfactory pgrp is found. I dunno - gdb doesn't work correctly
  78  * without this...
  79  */
  80 int session_of_pgrp(int pgrp)
  81 {
  82         struct task_struct *p;
  83         int fallback;
  84
  85         fallback = -1;
  86         read_lock(&tasklist_lock);
  87         for_each_task(p) {
  88                 if (p->session <= 0)
  89                         continue;
  90                 if (p->pgrp == pgrp) {
  91                         fallback = p->session;
  92                         break;
  93                 }
  94                 if (p->pid == pgrp)
  95                         fallback = p->session;
  96         }
  97         read_unlock(&tasklist_lock);
  98         return fallback;
  99 }
 100
 101 /*
 102  * Determine if a process group is "orphaned", according to the POSIX
 103  * definition in 2.2.2.52.  Orphaned process groups are not to be affected
 104  * by terminal-generated stop signals.  Newly orphaned process groups are
 105  * to receive a SIGHUP and a SIGCONT.
 106  *
 107  * "I ask you, have you ever known what it is to be an orphan?"
 108  */
 109 static int will_become_orphaned_pgrp(int pgrp, struct task_struct * ignored_task)
 110 {
 111         struct task_struct *p;
 112
 113         read_lock(&tasklist_lock);
 114         for_each_task(p) {
 115                 if ((p == ignored_task) || (p->pgrp != pgrp) ||
 116                     (p->state == TASK_ZOMBIE) ||
 117                     (p->p_pptr->pid == 1))
 118                         continue;
 119                 if ((p->p_pptr->pgrp != pgrp) &&
 120                     (p->p_pptr->session == p->session)) {
 121                         read_unlock(&tasklist_lock);
 122                         return 0;
 123                 }
 124         }
 125         read_unlock(&tasklist_lock);
 126         return 1;       /* (sighing) "Often!" */
 127 }
 128
 129 int is_orphaned_pgrp(int pgrp)
 130 {
 131         return will_become_orphaned_pgrp(pgrp, 0);
 132 }
 133
 134 static inline int has_stopped_jobs(int pgrp)
 135 {
 136         int retval = 0;
 137         struct task_struct * p;
 138
 139         read_lock(&tasklist_lock);
 140         for_each_task(p) {
 141                 if (p->pgrp != pgrp)
 142                         continue;
 143                 if (p->state != TASK_STOPPED)
 144                         continue;
 145                 retval = 1;
 146                 break;
 147         }
 148         read_unlock(&tasklist_lock);
 149         return retval;
 150 }
 151
 152 /*
 153  * When we die, we re-parent all our children.
 154  * Try to give them to another thread in our process
 155  * group, and if no such member exists, give it to
 156  * the global child reaper process (ie "init")
 157  */
 158 static inline void forget_original_parent(struct task_struct * father)
 159 {
 160         struct task_struct * p, *reaper;
 161
 162         read_lock(&tasklist_lock);
 163
 164         /* Next in our thread group */
 165         reaper = next_thread(father);
 166         if (reaper == father)
 167                 reaper = child_reaper;
 168
 169         for_each_task(p) {
 170                 if (p->p_opptr == father) {
 171                         /* We dont want people slaying init */
 172                         p->exit_signal = SIGCHLD;
 173                         p->self_exec_id++;
 174
 175                         /* Make sure we're not reparenting to ourselves */
 176                         if (p == reaper)
 177                                 p->p_opptr = child_reaper;
 178                         else
 179                                 p->p_opptr = reaper;
 180
 181                         if (p->pdeath_signal) send_sig(p->pdeath_signal, p, 0);
 182                 }
 183         }
 184         read_unlock(&tasklist_lock);
 185 }
 186
 187 static inline void close_files(struct files_struct * files)
 188 {
 189         int i, j;
 190
 191         j = 0;
 192         for (;;) {
 193                 unsigned long set;
 194                 i = j * __NFDBITS;
 195                 if (i >= files->max_fdset || i >= files->max_fds)
 196                         break;
 197                 set = files->open_fds->fds_bits[j++];
 198                 while (set) {
 199                         if (set & 1) {
 200                                 struct file * file = xchg(&files->fd[i], NULL);
 201                                 if (file)
 202                                         filp_close(file, files);
 203                         }
 204                         i++;
 205                         set >>= 1;
 206                 }
 207         }
 208 }
 209
 210 void put_files_struct(struct files_struct *files)
 211 {
 212         if (atomic_dec_and_test(&files->count)) {
 213                 close_files(files);
 214                 /*
 215                  * Free the fd and fdset arrays if we expanded them.
 216                  */
 217                 if (files->fd != &files->fd_array[0])
 218                         free_fd_array(files->fd, files->max_fds);
 219                 if (files->max_fdset > __FD_SETSIZE) {
 220                         free_fdset(files->open_fds, files->max_fdset);
 221                         free_fdset(files->close_on_exec, files->max_fdset);
 222                 }
 223                 kmem_cache_free(files_cachep, files);
 224         }
 225 }
 226
 227 static inline void __exit_files(struct task_struct *tsk)
 228 {
 229         struct files_struct * files = tsk->files;
 230
 231         if (files) {
 232                 task_lock(tsk);
 233                 tsk->files = NULL;
 234                 task_unlock(tsk);
 235                 put_files_struct(files);
 236         }
 237 }
 238
 239 void exit_files(struct task_struct *tsk)
 240 {
 241         __exit_files(tsk);
 242 }
 243
 244 static inline void __put_fs_struct(struct fs_struct *fs)
 245 {
 246         /* No need to hold fs->lock if we are killing it */
 247         if (atomic_dec_and_test(&fs->count)) {
 248                 dput(fs->root);
 249                 mntput(fs->rootmnt);
 250                 dput(fs->pwd);
 251                 mntput(fs->pwdmnt);
 252                 if (fs->altroot) {
 253                         dput(fs->altroot);
 254                         mntput(fs->altrootmnt);
 255                 }
 256                 kmem_cache_free(fs_cachep, fs);
 257         }
 258 }
 259
 260 void put_fs_struct(struct fs_struct *fs)
 261 {
 262         __put_fs_struct(fs);
 263 }
 264
 265 static inline void __exit_fs(struct task_struct *tsk)
 266 {
 267         struct fs_struct * fs = tsk->fs;
 268
 269         if (fs) {
 270                 task_lock(tsk);
 271                 tsk->fs = NULL;
 272                 task_unlock(tsk);
 273                 __put_fs_struct(fs);
 274         }
 275 }
 276
 277 void exit_fs(struct task_struct *tsk)
 278 {
 279         __exit_fs(tsk);
 280 }
 281
 282 /*
 283  * We can use these to temporarily drop into
 284  * "lazy TLB" mode and back.
 285  */
 286 struct mm_struct * start_lazy_tlb(void)
 287 {
 288         struct mm_struct *mm = current->mm;
 289         current->mm = NULL;
 290         /* active_mm is still 'mm' */
 291         atomic_inc(&mm->mm_count);
 292         enter_lazy_tlb(mm, current, smp_processor_id());
 293         return mm;
 294 }
 295
 296 void end_lazy_tlb(struct mm_struct *mm)
 297 {
 298         struct mm_struct *active_mm = current->active_mm;
 299
 300         current->mm = mm;
 301         if (mm != active_mm) {
 302                 current->active_mm = mm;
 303                 activate_mm(active_mm, mm);
 304         }
 305         mmdrop(active_mm);
 306 }
 307
 308 /*
 309  * Turn us into a lazy TLB process if we
 310  * aren't already..
 311  */
 312 static inline void __exit_mm(struct task_struct * tsk)
 313 {
 314         struct mm_struct * mm = tsk->mm;
 315
 316         mm_release();
 317         if (mm) {
 318                 atomic_inc(&mm->mm_count);
 319                 if (mm != tsk->active_mm) BUG();
 320                 /* more a memory barrier than a real lock */
 321                 task_lock(tsk);
 322                 tsk->mm = NULL;
 323                 task_unlock(tsk);
 324                 enter_lazy_tlb(mm, current, smp_processor_id());
 325                 mmput(mm);
 326         }
 327 }
 328
 329 void exit_mm(struct task_struct *tsk)
 330 {
 331         __exit_mm(tsk);
 332 }
 333
 334 /*
 335  * Send signals to all our closest relatives so that they know
 336  * to properly mourn us..
 337  */
 338 static void exit_notify(void)
 339 {
 340         struct task_struct * p, *t;
 341
 342         forget_original_parent(current);
 343         /*
 344          * Check to see if any process groups have become orphaned
 345          * as a result of our exiting, and if they have any stopped
 346          * jobs, send them a SIGHUP and then a SIGCONT.  (POSIX 3.2.2.2)
 347          *
 348          * Case i: Our father is in a different pgrp than we are
 349          * and we were the only connection outside, so our pgrp
 350          * is about to become orphaned.
 351          */
 352
 353         t = current->p_pptr;
 354
 355         if ((t->pgrp != current->pgrp) &&
 356             (t->session == current->session) &&
 357             will_become_orphaned_pgrp(current->pgrp, current) &&
 358             has_stopped_jobs(current->pgrp)) {
 359                 kill_pg(current->pgrp,SIGHUP,1);
 360                 kill_pg(current->pgrp,SIGCONT,1);
 361         }
 362
 363         /* Let father know we died
 364          *
 365          * Thread signals are configurable, but you aren't going to use
 366          * that to send signals to arbitary processes.
 367          * That stops right now.
 368          *
 369          * If the parent exec id doesn't match the exec id we saved
 370          * when we started then we know the parent has changed security
 371          * domain.
 372          *
 373          * If our self_exec id doesn't match our parent_exec_id then
 374          * we have changed execution domain as these two values started
 375          * the same after a fork.
 376          *
 377          */
 378
 379         if(current->exit_signal != SIGCHLD &&
 380             ( current->parent_exec_id != t->self_exec_id  ||
 381               current->self_exec_id != current->parent_exec_id)
 382             && !capable(CAP_KILL))
 383                 current->exit_signal = SIGCHLD;
 384
 385
 386         /*
 387          * This loop does two things:
 388          *
 389          * A.  Make init inherit all the child processes
 390          * B.  Check to see if any process groups have become orphaned
 391          *      as a result of our exiting, and if they have any stopped
 392          *      jobs, send them a SIGHUP and then a SIGCONT.  (POSIX 3.2.2.2)
 393          */
 394
 395         write_lock_irq(&tasklist_lock);
 396         current->state = TASK_ZOMBIE;
 397         do_notify_parent(current, current->exit_signal);
 398         while (current->p_cptr != NULL) {
 399                 p = current->p_cptr;
 400                 current->p_cptr = p->p_osptr;
 401                 p->p_ysptr = NULL;
 402                 p->ptrace = 0;
 403
 404                 p->p_pptr = p->p_opptr;
 405                 p->p_osptr = p->p_pptr->p_cptr;
 406                 if (p->p_osptr)
 407                         p->p_osptr->p_ysptr = p;
 408                 p->p_pptr->p_cptr = p;
 409                 if (p->state == TASK_ZOMBIE)
 410                         do_notify_parent(p, p->exit_signal);
 411                 /*
 412                  * process group orphan check
 413                  * Case ii: Our child is in a different pgrp
 414                  * than we are, and it was the only connection
 415                  * outside, so the child pgrp is now orphaned.
 416                  */
 417                 if ((p->pgrp != current->pgrp) &&
 418                     (p->session == current->session)) {
 419                         int pgrp = p->pgrp;
 420
 421                         write_unlock_irq(&tasklist_lock);
 422                         if (is_orphaned_pgrp(pgrp) && has_stopped_jobs(pgrp)) {
 423                                 kill_pg(pgrp,SIGHUP,1);
 424                                 kill_pg(pgrp,SIGCONT,1);
 425                         }
 426                         write_lock_irq(&tasklist_lock);
 427                 }
 428         }
 429         write_unlock_irq(&tasklist_lock);
 430 }
 431
 432 NORET_TYPE void do_exit(long code)
 433 {
 434         struct task_struct *tsk = current;
 435
 436         if (in_interrupt())
 437                 panic("Aiee, killing interrupt handler!");
 438         if (!tsk->pid)
 439                 panic("Attempted to kill the idle task!");
 440         if (tsk->pid == 1)
 441                 panic("Attempted to kill init!");
 442         tsk->flags |= PF_EXITING;
 443         del_timer_sync(&tsk->real_timer);
 444
 445 fake_volatile:
 446 #ifdef CONFIG_BSD_PROCESS_ACCT
 447         acct_process(code);
 448 #endif
 449         __exit_mm(tsk);
 450
 451         lock_kernel();
 452         sem_exit();
 453         __exit_files(tsk);
 454         __exit_fs(tsk);
 455         exit_sighand(tsk);
 456         exit_thread();
 457
 458         if (current->leader)
 459                 disassociate_ctty(1);
 460
 461         put_exec_domain(tsk->exec_domain);
 462         if (tsk->binfmt && tsk->binfmt->module)
 463                 __MOD_DEC_USE_COUNT(tsk->binfmt->module);
 464
 465         tsk->exit_code = code;
 466         exit_notify();
 467         schedule();
 468         BUG();
 469 /*
 470  * In order to get rid of the "volatile function does return" message
 471  * I did this little loop that confuses gcc to think do_exit really
 472  * is volatile. In fact it's schedule() that is volatile in some
 473  * circumstances: when current->state = ZOMBIE, schedule() never
 474  * returns.
 475  *
 476  * In fact the natural way to do all this is to have the label and the
 477  * goto right after each other, but I put the fake_volatile label at
 478  * the start of the function just in case something /really/ bad
 479  * happens, and the schedule returns. This way we can try again. I'm
 480  * not paranoid: it's just that everybody is out to get me.
 481  */
 482         goto fake_volatile;
 483 }
 484
 485 NORET_TYPE void complete_and_exit(struct completion *comp, long code)
 486 {
 487         if (comp)
 488                 complete(comp);
 489
 490         do_exit(code);
 491 }
 492
 493 asmlinkage long sys_exit(int error_code)
 494 {
 495         do_exit((error_code&0xff)<<8);
 496 }
 497
 498 asmlinkage long sys_wait4(pid_t pid,unsigned int * stat_addr, int options, struct rusage * ru)
 499 {
 500         int flag, retval;
 501         DECLARE_WAITQUEUE(wait, current);
 502         struct task_struct *tsk;
 503
 504         if (options & ~(WNOHANG|WUNTRACED|__WNOTHREAD|__WCLONE|__WALL))
 505                 return -EINVAL;
 506
 507         add_wait_queue(&current->wait_chldexit,&wait);
 508 repeat:
 509         flag = 0;
 510         current->state = TASK_INTERRUPTIBLE;
 511         read_lock(&tasklist_lock);
 512         tsk = current;
 513         do {
 514                 struct task_struct *p;
 515                 for (p = tsk->p_cptr ; p ; p = p->p_osptr) {
 516                         if (pid>0) {
 517                                 if (p->pid != pid)
 518                                         continue;
 519                         } else if (!pid) {
 520                                 if (p->pgrp != current->pgrp)
 521                                         continue;
 522                         } else if (pid != -1) {
 523                                 if (p->pgrp != -pid)
 524                                         continue;
 525                         }
 526                         /* Wait for all children (clone and not) if __WALL is set;
 527                          * otherwise, wait for clone children *only* if __WCLONE is
 528                          * set; otherwise, wait for non-clone children *only*.  (Note:
 529                          * A "clone" child here is one that reports to its parent
 530                          * using a signal other than SIGCHLD.) */
 531                         if (((p->exit_signal != SIGCHLD) ^ ((options & __WCLONE) != 0))
 532                             && !(options & __WALL))
 533                                 continue;
 534                         flag = 1;
 535                         switch (p->state) {
 536                         case TASK_STOPPED:
 537                                 if (!p->exit_code)
 538                                         continue;
 539                                 if (!(options & WUNTRACED) && !(p->ptrace & PT_PTRACED))
 540                                         continue;
 541                                 read_unlock(&tasklist_lock);
 542                                 retval = ru ? getrusage(p, RUSAGE_BOTH, ru) : 0;
 543                                 if (!retval && stat_addr)
 544                                         retval = put_user((p->exit_code << 8) | 0x7f, stat_addr);
 545                                 if (!retval) {
 546                                         p->exit_code = 0;
 547                                         retval = p->pid;
 548                                 }
 549                                 goto end_wait4;
 550                         case TASK_ZOMBIE:
 551                                 current->times.tms_cutime += p->times.tms_utime + p->times.tms_cutime;
 552                                 current->times.tms_cstime += p->times.tms_stime + p->times.tms_cstime;
 553                                 read_unlock(&tasklist_lock);
 554                                 retval = ru ? getrusage(p, RUSAGE_BOTH, ru) : 0;
 555                                 if (!retval && stat_addr)
 556                                         retval = put_user(p->exit_code, stat_addr);
 557                                 if (retval)
 558                                         goto end_wait4;
 559                                 retval = p->pid;
 560                                 if (p->p_opptr != p->p_pptr) {
 561                                         write_lock_irq(&tasklist_lock);
 562                                         REMOVE_LINKS(p);
 563                                         p->p_pptr = p->p_opptr;
 564                                         SET_LINKS(p);
 565                                         do_notify_parent(p, SIGCHLD);
 566                                         write_unlock_irq(&tasklist_lock);
 567                                 } else
 568                                         release_task(p);
 569                                 goto end_wait4;
 570                         default:
 571                                 continue;
 572                         }
 573                 }
 574                 if (options & __WNOTHREAD)
 575                         break;
 576                 tsk = next_thread(tsk);
 577         } while (tsk != current);
 578         read_unlock(&tasklist_lock);
 579         if (flag) {
 580                 retval = 0;
 581                 if (options & WNOHANG)
 582                         goto end_wait4;
 583                 retval = -ERESTARTSYS;
 584                 if (signal_pending(current))
 585                         goto end_wait4;
 586                 schedule();
 587                 goto repeat;
 588         }
 589         retval = -ECHILD;
 590 end_wait4:
 591         current->state = TASK_RUNNING;
 592         remove_wait_queue(&current->wait_chldexit,&wait);
 593         return retval;
 594 }
 595
 596 #if !defined(__alpha__) && !defined(__ia64__)
 597
 598 /*
 599  * sys_waitpid() remains for compatibility. waitpid() should be
 600  * implemented by calling sys_wait4() from libc.a.
 601  */
 602 asmlinkage long sys_waitpid(pid_t pid,unsigned int * stat_addr, int options)
 603 {
 604         return sys_wait4(pid, stat_addr, options, NULL);
 605 }
 606
 607 #endif