v2.4.8 -> v2.4.8.1
[opensuse:kernel.git] / kernel / exit.c
1 /*
2  *  linux/kernel/exit.c
3  *
4  *  Copyright (C) 1991, 1992  Linus Torvalds
5  */
6
7 #include <linux/config.h>
8 #include <linux/slab.h>
9 #include <linux/interrupt.h>
10 #include <linux/smp_lock.h>
11 #include <linux/module.h>
12 #include <linux/completion.h>
13 #include <linux/tty.h>
14 #ifdef CONFIG_BSD_PROCESS_ACCT
15 #include <linux/acct.h>
16 #endif
17
18 #include <asm/uaccess.h>
19 #include <asm/pgtable.h>
20 #include <asm/mmu_context.h>
21
22 extern void sem_exit (void);
23 extern struct task_struct *child_reaper;
24
25 int getrusage(struct task_struct *, int, struct rusage *);
26
27 static void release_task(struct task_struct * p)
28 {
29         if (p != current) {
30 #ifdef CONFIG_SMP
31                 /*
32                  * Wait to make sure the process isn't on the
33                  * runqueue (active on some other CPU still)
34                  */
35                 for (;;) {
36                         task_lock(p);
37                         if (!p->has_cpu)
38                                 break;
39                         task_unlock(p);
40                         do {
41                                 barrier();
42                         } while (p->has_cpu);
43                 }
44                 task_unlock(p);
45 #endif
46                 atomic_dec(&p->user->processes);
47                 free_uid(p->user);
48                 unhash_process(p);
49
50                 release_thread(p);
51                 current->cmin_flt += p->min_flt + p->cmin_flt;
52                 current->cmaj_flt += p->maj_flt + p->cmaj_flt;
53                 current->cnswap += p->nswap + p->cnswap;
54                 /*
55                  * Potentially available timeslices are retrieved
56                  * here - this way the parent does not get penalized
57                  * for creating too many processes.
58                  *
59                  * (this cannot be used to artificially 'generate'
60                  * timeslices, because any timeslice recovered here
61                  * was given away by the parent in the first place.)
62                  */
63                 current->counter += p->counter;
64                 if (current->counter >= MAX_COUNTER)
65                         current->counter = MAX_COUNTER;
66                 p->pid = 0;
67                 free_task_struct(p);
68         } else {
69                 printk("task releasing itself\n");
70         }
71 }
72
73 /*
74  * This checks not only the pgrp, but falls back on the pid if no
75  * satisfactory pgrp is found. I dunno - gdb doesn't work correctly
76  * without this...
77  */
78 int session_of_pgrp(int pgrp)
79 {
80         struct task_struct *p;
81         int fallback;
82
83         fallback = -1;
84         read_lock(&tasklist_lock);
85         for_each_task(p) {
86                 if (p->session <= 0)
87                         continue;
88                 if (p->pgrp == pgrp) {
89                         fallback = p->session;
90                         break;
91                 }
92                 if (p->pid == pgrp)
93                         fallback = p->session;
94         }
95         read_unlock(&tasklist_lock);
96         return fallback;
97 }
98
99 /*
100  * Determine if a process group is "orphaned", according to the POSIX
101  * definition in 2.2.2.52.  Orphaned process groups are not to be affected
102  * by terminal-generated stop signals.  Newly orphaned process groups are
103  * to receive a SIGHUP and a SIGCONT.
104  *
105  * "I ask you, have you ever known what it is to be an orphan?"
106  */
107 static int will_become_orphaned_pgrp(int pgrp, struct task_struct * ignored_task)
108 {
109         struct task_struct *p;
110
111         read_lock(&tasklist_lock);
112         for_each_task(p) {
113                 if ((p == ignored_task) || (p->pgrp != pgrp) ||
114                     (p->state == TASK_ZOMBIE) ||
115                     (p->p_pptr->pid == 1))
116                         continue;
117                 if ((p->p_pptr->pgrp != pgrp) &&
118                     (p->p_pptr->session == p->session)) {
119                         read_unlock(&tasklist_lock);
120                         return 0;
121                 }
122         }
123         read_unlock(&tasklist_lock);
124         return 1;       /* (sighing) "Often!" */
125 }
126
127 int is_orphaned_pgrp(int pgrp)
128 {
129         return will_become_orphaned_pgrp(pgrp, 0);
130 }
131
132 static inline int has_stopped_jobs(int pgrp)
133 {
134         int retval = 0;
135         struct task_struct * p;
136
137         read_lock(&tasklist_lock);
138         for_each_task(p) {
139                 if (p->pgrp != pgrp)
140                         continue;
141                 if (p->state != TASK_STOPPED)
142                         continue;
143                 retval = 1;
144                 break;
145         }
146         read_unlock(&tasklist_lock);
147         return retval;
148 }
149
150 /*
151  * When we die, we re-parent all our children.
152  * Try to give them to another thread in our process
153  * group, and if no such member exists, give it to
154  * the global child reaper process (ie "init")
155  */
156 static inline void forget_original_parent(struct task_struct * father)
157 {
158         struct task_struct * p, *reaper;
159
160         read_lock(&tasklist_lock);
161
162         /* Next in our thread group */
163         reaper = next_thread(father);
164         if (reaper == father)
165                 reaper = child_reaper;
166
167         for_each_task(p) {
168                 if (p->p_opptr == father) {
169                         /* We dont want people slaying init */
170                         p->exit_signal = SIGCHLD;
171                         p->self_exec_id++;
172                         p->p_opptr = reaper;
173                         if (p->pdeath_signal) send_sig(p->pdeath_signal, p, 0);
174                 }
175         }
176         read_unlock(&tasklist_lock);
177 }
178
179 static inline void close_files(struct files_struct * files)
180 {
181         int i, j;
182
183         j = 0;
184         for (;;) {
185                 unsigned long set;
186                 i = j * __NFDBITS;
187                 if (i >= files->max_fdset || i >= files->max_fds)
188                         break;
189                 set = files->open_fds->fds_bits[j++];
190                 while (set) {
191                         if (set & 1) {
192                                 struct file * file = xchg(&files->fd[i], NULL);
193                                 if (file)
194                                         filp_close(file, files);
195                         }
196                         i++;
197                         set >>= 1;
198                 }
199         }
200 }
201
202 void put_files_struct(struct files_struct *files)
203 {
204         if (atomic_dec_and_test(&files->count)) {
205                 close_files(files);
206                 /*
207                  * Free the fd and fdset arrays if we expanded them.
208                  */
209                 if (files->fd != &files->fd_array[0])
210                         free_fd_array(files->fd, files->max_fds);
211                 if (files->max_fdset > __FD_SETSIZE) {
212                         free_fdset(files->open_fds, files->max_fdset);
213                         free_fdset(files->close_on_exec, files->max_fdset);
214                 }
215                 kmem_cache_free(files_cachep, files);
216         }
217 }
218
219 static inline void __exit_files(struct task_struct *tsk)
220 {
221         struct files_struct * files = tsk->files;
222
223         if (files) {
224                 task_lock(tsk);
225                 tsk->files = NULL;
226                 task_unlock(tsk);
227                 put_files_struct(files);
228         }
229 }
230
231 void exit_files(struct task_struct *tsk)
232 {
233         __exit_files(tsk);
234 }
235
236 static inline void __put_fs_struct(struct fs_struct *fs)
237 {
238         /* No need to hold fs->lock if we are killing it */
239         if (atomic_dec_and_test(&fs->count)) {
240                 dput(fs->root);
241                 mntput(fs->rootmnt);
242                 dput(fs->pwd);
243                 mntput(fs->pwdmnt);
244                 if (fs->altroot) {
245                         dput(fs->altroot);
246                         mntput(fs->altrootmnt);
247                 }
248                 kmem_cache_free(fs_cachep, fs);
249         }
250 }
251
252 void put_fs_struct(struct fs_struct *fs)
253 {
254         __put_fs_struct(fs);
255 }
256
257 static inline void __exit_fs(struct task_struct *tsk)
258 {
259         struct fs_struct * fs = tsk->fs;
260
261         if (fs) {
262                 task_lock(tsk);
263                 tsk->fs = NULL;
264                 task_unlock(tsk);
265                 __put_fs_struct(fs);
266         }
267 }
268
269 void exit_fs(struct task_struct *tsk)
270 {
271         __exit_fs(tsk);
272 }
273
274 /*
275  * We can use these to temporarily drop into
276  * "lazy TLB" mode and back.
277  */
278 struct mm_struct * start_lazy_tlb(void)
279 {
280         struct mm_struct *mm = current->mm;
281         current->mm = NULL;
282         /* active_mm is still 'mm' */
283         atomic_inc(&mm->mm_count);
284         enter_lazy_tlb(mm, current, smp_processor_id());
285         return mm;
286 }
287
288 void end_lazy_tlb(struct mm_struct *mm)
289 {
290         struct mm_struct *active_mm = current->active_mm;
291
292         current->mm = mm;
293         if (mm != active_mm) {
294                 current->active_mm = mm;
295                 activate_mm(active_mm, mm);
296         }
297         mmdrop(active_mm);
298 }
299
300 /*
301  * Turn us into a lazy TLB process if we
302  * aren't already..
303  */
304 static inline void __exit_mm(struct task_struct * tsk)
305 {
306         struct mm_struct * mm = tsk->mm;
307
308         mm_release();
309         if (mm) {
310                 atomic_inc(&mm->mm_count);
311                 if (mm != tsk->active_mm) BUG();
312                 /* more a memory barrier than a real lock */
313                 task_lock(tsk);
314                 tsk->mm = NULL;
315                 task_unlock(tsk);
316                 enter_lazy_tlb(mm, current, smp_processor_id());
317                 mmput(mm);
318         }
319 }
320
321 void exit_mm(struct task_struct *tsk)
322 {
323         __exit_mm(tsk);
324 }
325
326 /*
327  * Send signals to all our closest relatives so that they know
328  * to properly mourn us..
329  */
330 static void exit_notify(void)
331 {
332         struct task_struct * p, *t;
333
334         forget_original_parent(current);
335         /*
336          * Check to see if any process groups have become orphaned
337          * as a result of our exiting, and if they have any stopped
338          * jobs, send them a SIGHUP and then a SIGCONT.  (POSIX 3.2.2.2)
339          *
340          * Case i: Our father is in a different pgrp than we are
341          * and we were the only connection outside, so our pgrp
342          * is about to become orphaned.
343          */
344          
345         t = current->p_pptr;
346         
347         if ((t->pgrp != current->pgrp) &&
348             (t->session == current->session) &&
349             will_become_orphaned_pgrp(current->pgrp, current) &&
350             has_stopped_jobs(current->pgrp)) {
351                 kill_pg(current->pgrp,SIGHUP,1);
352                 kill_pg(current->pgrp,SIGCONT,1);
353         }
354
355         /* Let father know we died 
356          *
357          * Thread signals are configurable, but you aren't going to use
358          * that to send signals to arbitary processes. 
359          * That stops right now.
360          *
361          * If the parent exec id doesn't match the exec id we saved
362          * when we started then we know the parent has changed security
363          * domain.
364          *
365          * If our self_exec id doesn't match our parent_exec_id then
366          * we have changed execution domain as these two values started
367          * the same after a fork.
368          *      
369          */
370         
371         if(current->exit_signal != SIGCHLD &&
372             ( current->parent_exec_id != t->self_exec_id  ||
373               current->self_exec_id != current->parent_exec_id) 
374             && !capable(CAP_KILL))
375                 current->exit_signal = SIGCHLD;
376
377
378         /*
379          * This loop does two things:
380          *
381          * A.  Make init inherit all the child processes
382          * B.  Check to see if any process groups have become orphaned
383          *      as a result of our exiting, and if they have any stopped
384          *      jobs, send them a SIGHUP and then a SIGCONT.  (POSIX 3.2.2.2)
385          */
386
387         write_lock_irq(&tasklist_lock);
388         current->state = TASK_ZOMBIE;
389         do_notify_parent(current, current->exit_signal);
390         while (current->p_cptr != NULL) {
391                 p = current->p_cptr;
392                 current->p_cptr = p->p_osptr;
393                 p->p_ysptr = NULL;
394                 p->ptrace = 0;
395
396                 p->p_pptr = p->p_opptr;
397                 p->p_osptr = p->p_pptr->p_cptr;
398                 if (p->p_osptr)
399                         p->p_osptr->p_ysptr = p;
400                 p->p_pptr->p_cptr = p;
401                 if (p->state == TASK_ZOMBIE)
402                         do_notify_parent(p, p->exit_signal);
403                 /*
404                  * process group orphan check
405                  * Case ii: Our child is in a different pgrp
406                  * than we are, and it was the only connection
407                  * outside, so the child pgrp is now orphaned.
408                  */
409                 if ((p->pgrp != current->pgrp) &&
410                     (p->session == current->session)) {
411                         int pgrp = p->pgrp;
412
413                         write_unlock_irq(&tasklist_lock);
414                         if (is_orphaned_pgrp(pgrp) && has_stopped_jobs(pgrp)) {
415                                 kill_pg(pgrp,SIGHUP,1);
416                                 kill_pg(pgrp,SIGCONT,1);
417                         }
418                         write_lock_irq(&tasklist_lock);
419                 }
420         }
421         write_unlock_irq(&tasklist_lock);
422 }
423
424 NORET_TYPE void do_exit(long code)
425 {
426         struct task_struct *tsk = current;
427
428         if (in_interrupt())
429                 panic("Aiee, killing interrupt handler!");
430         if (!tsk->pid)
431                 panic("Attempted to kill the idle task!");
432         if (tsk->pid == 1)
433                 panic("Attempted to kill init!");
434         tsk->flags |= PF_EXITING;
435         del_timer_sync(&tsk->real_timer);
436
437 fake_volatile:
438 #ifdef CONFIG_BSD_PROCESS_ACCT
439         acct_process(code);
440 #endif
441         __exit_mm(tsk);
442
443         lock_kernel();
444         sem_exit();
445         __exit_files(tsk);
446         __exit_fs(tsk);
447         exit_sighand(tsk);
448         exit_thread();
449
450         if (current->leader)
451                 disassociate_ctty(1);
452
453         put_exec_domain(tsk->exec_domain);
454         if (tsk->binfmt && tsk->binfmt->module)
455                 __MOD_DEC_USE_COUNT(tsk->binfmt->module);
456
457         tsk->exit_code = code;
458         exit_notify();
459         schedule();
460         BUG();
461 /*
462  * In order to get rid of the "volatile function does return" message
463  * I did this little loop that confuses gcc to think do_exit really
464  * is volatile. In fact it's schedule() that is volatile in some
465  * circumstances: when current->state = ZOMBIE, schedule() never
466  * returns.
467  *
468  * In fact the natural way to do all this is to have the label and the
469  * goto right after each other, but I put the fake_volatile label at
470  * the start of the function just in case something /really/ bad
471  * happens, and the schedule returns. This way we can try again. I'm
472  * not paranoid: it's just that everybody is out to get me.
473  */
474         goto fake_volatile;
475 }
476
477 NORET_TYPE void complete_and_exit(struct completion *comp, long code)
478 {
479         if (comp)
480                 complete(comp);
481         
482         do_exit(code);
483 }
484
485 asmlinkage long sys_exit(int error_code)
486 {
487         do_exit((error_code&0xff)<<8);
488 }
489
490 asmlinkage long sys_wait4(pid_t pid,unsigned int * stat_addr, int options, struct rusage * ru)
491 {
492         int flag, retval;
493         DECLARE_WAITQUEUE(wait, current);
494         struct task_struct *tsk;
495
496         if (options & ~(WNOHANG|WUNTRACED|__WNOTHREAD|__WCLONE|__WALL))
497                 return -EINVAL;
498
499         add_wait_queue(&current->wait_chldexit,&wait);
500 repeat:
501         flag = 0;
502         current->state = TASK_INTERRUPTIBLE;
503         read_lock(&tasklist_lock);
504         tsk = current;
505         do {
506                 struct task_struct *p;
507                 for (p = tsk->p_cptr ; p ; p = p->p_osptr) {
508                         if (pid>0) {
509                                 if (p->pid != pid)
510                                         continue;
511                         } else if (!pid) {
512                                 if (p->pgrp != current->pgrp)
513                                         continue;
514                         } else if (pid != -1) {
515                                 if (p->pgrp != -pid)
516                                         continue;
517                         }
518                         /* Wait for all children (clone and not) if __WALL is set;
519                          * otherwise, wait for clone children *only* if __WCLONE is
520                          * set; otherwise, wait for non-clone children *only*.  (Note:
521                          * A "clone" child here is one that reports to its parent
522                          * using a signal other than SIGCHLD.) */
523                         if (((p->exit_signal != SIGCHLD) ^ ((options & __WCLONE) != 0))
524                             && !(options & __WALL))
525                                 continue;
526                         flag = 1;
527                         switch (p->state) {
528                         case TASK_STOPPED:
529                                 if (!p->exit_code)
530                                         continue;
531                                 if (!(options & WUNTRACED) && !(p->ptrace & PT_PTRACED))
532                                         continue;
533                                 read_unlock(&tasklist_lock);
534                                 retval = ru ? getrusage(p, RUSAGE_BOTH, ru) : 0; 
535                                 if (!retval && stat_addr) 
536                                         retval = put_user((p->exit_code << 8) | 0x7f, stat_addr);
537                                 if (!retval) {
538                                         p->exit_code = 0;
539                                         retval = p->pid;
540                                 }
541                                 goto end_wait4;
542                         case TASK_ZOMBIE:
543                                 current->times.tms_cutime += p->times.tms_utime + p->times.tms_cutime;
544                                 current->times.tms_cstime += p->times.tms_stime + p->times.tms_cstime;
545                                 read_unlock(&tasklist_lock);
546                                 retval = ru ? getrusage(p, RUSAGE_BOTH, ru) : 0;
547                                 if (!retval && stat_addr)
548                                         retval = put_user(p->exit_code, stat_addr);
549                                 if (retval)
550                                         goto end_wait4; 
551                                 retval = p->pid;
552                                 if (p->p_opptr != p->p_pptr) {
553                                         write_lock_irq(&tasklist_lock);
554                                         REMOVE_LINKS(p);
555                                         p->p_pptr = p->p_opptr;
556                                         SET_LINKS(p);
557                                         do_notify_parent(p, SIGCHLD);
558                                         write_unlock_irq(&tasklist_lock);
559                                 } else
560                                         release_task(p);
561                                 goto end_wait4;
562                         default:
563                                 continue;
564                         }
565                 }
566                 if (options & __WNOTHREAD)
567                         break;
568                 tsk = next_thread(tsk);
569         } while (tsk != current);
570         read_unlock(&tasklist_lock);
571         if (flag) {
572                 retval = 0;
573                 if (options & WNOHANG)
574                         goto end_wait4;
575                 retval = -ERESTARTSYS;
576                 if (signal_pending(current))
577                         goto end_wait4;
578                 schedule();
579                 goto repeat;
580         }
581         retval = -ECHILD;
582 end_wait4:
583         current->state = TASK_RUNNING;
584         remove_wait_queue(&current->wait_chldexit,&wait);
585         return retval;
586 }
587
588 #if !defined(__alpha__) && !defined(__ia64__)
589
590 /*
591  * sys_waitpid() remains for compatibility. waitpid() should be
592  * implemented by calling sys_wait4() from libc.a.
593  */
594 asmlinkage long sys_waitpid(pid_t pid,unsigned int * stat_addr, int options)
595 {
596         return sys_wait4(pid, stat_addr, options, NULL);
597 }
598
599 #endif