v2.4.9.9 -> v2.4.9.10
[opensuse:kernel.git] / fs / exec.c
1 /*
2  *  linux/fs/exec.c
3  *
4  *  Copyright (C) 1991, 1992  Linus Torvalds
5  */
6
7 /*
8  * #!-checking implemented by tytso.
9  */
10 /*
11  * Demand-loading implemented 01.12.91 - no need to read anything but
12  * the header into memory. The inode of the executable is put into
13  * "current->executable", and page faults do the actual loading. Clean.
14  *
15  * Once more I can proudly say that linux stood up to being changed: it
16  * was less than 2 hours work to get demand-loading completely implemented.
17  *
18  * Demand loading changed July 1993 by Eric Youngdale.   Use mmap instead,
19  * current->executable is only used by the procfs.  This allows a dispatch
20  * table to check for several different types  of binary formats.  We keep
21  * trying until we recognize the file or we run out of supported binary
22  * formats. 
23  */
24
25 #include <linux/config.h>
26 #include <linux/slab.h>
27 #include <linux/file.h>
28 #include <linux/mman.h>
29 #include <linux/a.out.h>
30 #include <linux/stat.h>
31 #include <linux/fcntl.h>
32 #include <linux/smp_lock.h>
33 #include <linux/init.h>
34 #include <linux/pagemap.h>
35 #include <linux/highmem.h>
36 #include <linux/spinlock.h>
37 #define __NO_VERSION__
38 #include <linux/module.h>
39
40 #include <asm/uaccess.h>
41 #include <asm/pgalloc.h>
42 #include <asm/mmu_context.h>
43
44 #ifdef CONFIG_KMOD
45 #include <linux/kmod.h>
46 #endif
47
48 int core_uses_pid;
49
50 static struct linux_binfmt *formats;
51 static rwlock_t binfmt_lock = RW_LOCK_UNLOCKED;
52
53 int register_binfmt(struct linux_binfmt * fmt)
54 {
55         struct linux_binfmt ** tmp = &formats;
56
57         if (!fmt)
58                 return -EINVAL;
59         if (fmt->next)
60                 return -EBUSY;
61         write_lock(&binfmt_lock);
62         while (*tmp) {
63                 if (fmt == *tmp) {
64                         write_unlock(&binfmt_lock);
65                         return -EBUSY;
66                 }
67                 tmp = &(*tmp)->next;
68         }
69         fmt->next = formats;
70         formats = fmt;
71         write_unlock(&binfmt_lock);
72         return 0;       
73 }
74
75 int unregister_binfmt(struct linux_binfmt * fmt)
76 {
77         struct linux_binfmt ** tmp = &formats;
78
79         write_lock(&binfmt_lock);
80         while (*tmp) {
81                 if (fmt == *tmp) {
82                         *tmp = fmt->next;
83                         write_unlock(&binfmt_lock);
84                         return 0;
85                 }
86                 tmp = &(*tmp)->next;
87         }
88         write_unlock(&binfmt_lock);
89         return -EINVAL;
90 }
91
92 static inline void put_binfmt(struct linux_binfmt * fmt)
93 {
94         if (fmt->module)
95                 __MOD_DEC_USE_COUNT(fmt->module);
96 }
97
98 /*
99  * Note that a shared library must be both readable and executable due to
100  * security reasons.
101  *
102  * Also note that we take the address to load from from the file itself.
103  */
104 asmlinkage long sys_uselib(const char * library)
105 {
106         struct file * file;
107         struct nameidata nd;
108         int error;
109
110         error = user_path_walk(library, &nd);
111         if (error)
112                 goto out;
113
114         error = -EINVAL;
115         if (!S_ISREG(nd.dentry->d_inode->i_mode))
116                 goto exit;
117
118         error = permission(nd.dentry->d_inode, MAY_READ | MAY_EXEC);
119         if (error)
120                 goto exit;
121
122         file = dentry_open(nd.dentry, nd.mnt, O_RDONLY);
123         error = PTR_ERR(file);
124         if (IS_ERR(file))
125                 goto out;
126
127         error = -ENOEXEC;
128         if(file->f_op && file->f_op->read) {
129                 struct linux_binfmt * fmt;
130
131                 read_lock(&binfmt_lock);
132                 for (fmt = formats ; fmt ; fmt = fmt->next) {
133                         if (!fmt->load_shlib)
134                                 continue;
135                         if (!try_inc_mod_count(fmt->module))
136                                 continue;
137                         read_unlock(&binfmt_lock);
138                         error = fmt->load_shlib(file);
139                         read_lock(&binfmt_lock);
140                         put_binfmt(fmt);
141                         if (error != -ENOEXEC)
142                                 break;
143                 }
144                 read_unlock(&binfmt_lock);
145         }
146         fput(file);
147 out:
148         return error;
149 exit:
150         path_release(&nd);
151         goto out;
152 }
153
154 /*
155  * count() counts the number of arguments/envelopes
156  */
157 static int count(char ** argv, int max)
158 {
159         int i = 0;
160
161         if (argv != NULL) {
162                 for (;;) {
163                         char * p;
164
165                         if (get_user(p, argv))
166                                 return -EFAULT;
167                         if (!p)
168                                 break;
169                         argv++;
170                         if(++i > max)
171                                 return -E2BIG;
172                 }
173         }
174         return i;
175 }
176
177 /*
178  * 'copy_strings()' copies argument/envelope strings from user
179  * memory to free pages in kernel mem. These are in a format ready
180  * to be put directly into the top of new user memory.
181  */
182 int copy_strings(int argc,char ** argv, struct linux_binprm *bprm) 
183 {
184         while (argc-- > 0) {
185                 char *str;
186                 int len;
187                 unsigned long pos;
188
189                 if (get_user(str, argv+argc) || !str || !(len = strnlen_user(str, bprm->p))) 
190                         return -EFAULT;
191                 if (bprm->p < len) 
192                         return -E2BIG; 
193
194                 bprm->p -= len;
195                 /* XXX: add architecture specific overflow check here. */ 
196
197                 pos = bprm->p;
198                 while (len > 0) {
199                         char *kaddr;
200                         int i, new, err;
201                         struct page *page;
202                         int offset, bytes_to_copy;
203
204                         offset = pos % PAGE_SIZE;
205                         i = pos/PAGE_SIZE;
206                         page = bprm->page[i];
207                         new = 0;
208                         if (!page) {
209                                 page = alloc_page(GFP_HIGHUSER);
210                                 bprm->page[i] = page;
211                                 if (!page)
212                                         return -ENOMEM;
213                                 new = 1;
214                         }
215                         kaddr = kmap(page);
216
217                         if (new && offset)
218                                 memset(kaddr, 0, offset);
219                         bytes_to_copy = PAGE_SIZE - offset;
220                         if (bytes_to_copy > len) {
221                                 bytes_to_copy = len;
222                                 if (new)
223                                         memset(kaddr+offset+len, 0, PAGE_SIZE-offset-len);
224                         }
225                         err = copy_from_user(kaddr + offset, str, bytes_to_copy);
226                         kunmap(page);
227
228                         if (err)
229                                 return -EFAULT; 
230
231                         pos += bytes_to_copy;
232                         str += bytes_to_copy;
233                         len -= bytes_to_copy;
234                 }
235         }
236         return 0;
237 }
238
239 /*
240  * Like copy_strings, but get argv and its values from kernel memory.
241  */
242 int copy_strings_kernel(int argc,char ** argv, struct linux_binprm *bprm)
243 {
244         int r;
245         mm_segment_t oldfs = get_fs();
246         set_fs(KERNEL_DS); 
247         r = copy_strings(argc, argv, bprm);
248         set_fs(oldfs);
249         return r; 
250 }
251
252 /*
253  * This routine is used to map in a page into an address space: needed by
254  * execve() for the initial stack and environment pages.
255  *
256  * tsk->mmap_sem is held for writing.
257  */
258 void put_dirty_page(struct task_struct * tsk, struct page *page, unsigned long address)
259 {
260         pgd_t * pgd;
261         pmd_t * pmd;
262         pte_t * pte;
263
264         if (page_count(page) != 1)
265                 printk(KERN_ERR "mem_map disagrees with %p at %08lx\n", page, address);
266         pgd = pgd_offset(tsk->mm, address);
267
268         spin_lock(&tsk->mm->page_table_lock);
269         pmd = pmd_alloc(tsk->mm, pgd, address);
270         if (!pmd)
271                 goto out;
272         pte = pte_alloc(tsk->mm, pmd, address);
273         if (!pte)
274                 goto out;
275         if (!pte_none(*pte))
276                 goto out;
277         flush_dcache_page(page);
278         flush_page_to_ram(page);
279         set_pte(pte, pte_mkdirty(pte_mkwrite(mk_pte(page, PAGE_COPY))));
280         tsk->mm->rss++;
281         spin_unlock(&tsk->mm->page_table_lock);
282
283         /* no need for flush_tlb */
284         return;
285 out:
286         spin_unlock(&tsk->mm->page_table_lock);
287         __free_page(page);
288         force_sig(SIGKILL, tsk);
289         return;
290 }
291
292 int setup_arg_pages(struct linux_binprm *bprm)
293 {
294         unsigned long stack_base;
295         struct vm_area_struct *mpnt;
296         int i;
297
298         stack_base = STACK_TOP - MAX_ARG_PAGES*PAGE_SIZE;
299
300         bprm->p += stack_base;
301         if (bprm->loader)
302                 bprm->loader += stack_base;
303         bprm->exec += stack_base;
304
305         mpnt = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
306         if (!mpnt) 
307                 return -ENOMEM; 
308         
309         down_write(&current->mm->mmap_sem);
310         {
311                 mpnt->vm_mm = current->mm;
312                 mpnt->vm_start = PAGE_MASK & (unsigned long) bprm->p;
313                 mpnt->vm_end = STACK_TOP;
314                 mpnt->vm_page_prot = PAGE_COPY;
315                 mpnt->vm_flags = VM_STACK_FLAGS;
316                 mpnt->vm_ops = NULL;
317                 mpnt->vm_pgoff = 0;
318                 mpnt->vm_file = NULL;
319                 mpnt->vm_private_data = (void *) 0;
320                 insert_vm_struct(current->mm, mpnt);
321                 current->mm->total_vm = (mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT;
322         } 
323
324         for (i = 0 ; i < MAX_ARG_PAGES ; i++) {
325                 struct page *page = bprm->page[i];
326                 if (page) {
327                         bprm->page[i] = NULL;
328                         put_dirty_page(current,page,stack_base);
329                 }
330                 stack_base += PAGE_SIZE;
331         }
332         up_write(&current->mm->mmap_sem);
333         
334         return 0;
335 }
336
337 struct file *open_exec(const char *name)
338 {
339         struct nameidata nd;
340         struct inode *inode;
341         struct file *file;
342         int err = 0;
343
344         if (path_init(name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, &nd))
345                 err = path_walk(name, &nd);
346         file = ERR_PTR(err);
347         if (!err) {
348                 inode = nd.dentry->d_inode;
349                 file = ERR_PTR(-EACCES);
350                 if (!(nd.mnt->mnt_flags & MNT_NOEXEC) &&
351                     S_ISREG(inode->i_mode)) {
352                         int err = permission(inode, MAY_EXEC);
353                         if (!err && !(inode->i_mode & 0111))
354                                 err = -EACCES;
355                         file = ERR_PTR(err);
356                         if (!err) {
357                                 file = dentry_open(nd.dentry, nd.mnt, O_RDONLY);
358                                 if (!IS_ERR(file)) {
359                                         err = deny_write_access(file);
360                                         if (err) {
361                                                 fput(file);
362                                                 file = ERR_PTR(err);
363                                         }
364                                 }
365 out:
366                                 return file;
367                         }
368                 }
369                 path_release(&nd);
370         }
371         goto out;
372 }
373
374 int kernel_read(struct file *file, unsigned long offset,
375         char * addr, unsigned long count)
376 {
377         mm_segment_t old_fs;
378         loff_t pos = offset;
379         int result = -ENOSYS;
380
381         if (!file->f_op->read)
382                 goto fail;
383         old_fs = get_fs();
384         set_fs(get_ds());
385         result = file->f_op->read(file, addr, count, &pos);
386         set_fs(old_fs);
387 fail:
388         return result;
389 }
390
391 static int exec_mmap(void)
392 {
393         struct mm_struct * mm, * old_mm;
394
395         old_mm = current->mm;
396         if (old_mm && atomic_read(&old_mm->mm_users) == 1) {
397                 mm_release();
398                 exit_mmap(old_mm);
399                 return 0;
400         }
401
402         mm = mm_alloc();
403         if (mm) {
404                 struct mm_struct *active_mm;
405
406                 if (init_new_context(current, mm)) {
407                         mmdrop(mm);
408                         return -ENOMEM;
409                 }
410
411                 /* Add it to the list of mm's */
412                 spin_lock(&mmlist_lock);
413                 list_add(&mm->mmlist, &init_mm.mmlist);
414                 mmlist_nr++;
415                 spin_unlock(&mmlist_lock);
416
417                 task_lock(current);
418                 active_mm = current->active_mm;
419                 current->mm = mm;
420                 current->active_mm = mm;
421                 task_unlock(current);
422                 activate_mm(active_mm, mm);
423                 mm_release();
424                 if (old_mm) {
425                         if (active_mm != old_mm) BUG();
426                         mmput(old_mm);
427                         return 0;
428                 }
429                 mmdrop(active_mm);
430                 return 0;
431         }
432         return -ENOMEM;
433 }
434
435 /*
436  * This function makes sure the current process has its own signal table,
437  * so that flush_signal_handlers can later reset the handlers without
438  * disturbing other processes.  (Other processes might share the signal
439  * table via the CLONE_SIGNAL option to clone().)
440  */
441  
442 static inline int make_private_signals(void)
443 {
444         struct signal_struct * newsig;
445
446         if (atomic_read(&current->sig->count) <= 1)
447                 return 0;
448         newsig = kmem_cache_alloc(sigact_cachep, GFP_KERNEL);
449         if (newsig == NULL)
450                 return -ENOMEM;
451         spin_lock_init(&newsig->siglock);
452         atomic_set(&newsig->count, 1);
453         memcpy(newsig->action, current->sig->action, sizeof(newsig->action));
454         spin_lock_irq(&current->sigmask_lock);
455         current->sig = newsig;
456         spin_unlock_irq(&current->sigmask_lock);
457         return 0;
458 }
459         
460 /*
461  * If make_private_signals() made a copy of the signal table, decrement the
462  * refcount of the original table, and free it if necessary.
463  * We don't do that in make_private_signals() so that we can back off
464  * in flush_old_exec() if an error occurs after calling make_private_signals().
465  */
466
467 static inline void release_old_signals(struct signal_struct * oldsig)
468 {
469         if (current->sig == oldsig)
470                 return;
471         if (atomic_dec_and_test(&oldsig->count))
472                 kmem_cache_free(sigact_cachep, oldsig);
473 }
474
475 /*
476  * These functions flushes out all traces of the currently running executable
477  * so that a new one can be started
478  */
479
480 static inline void flush_old_files(struct files_struct * files)
481 {
482         long j = -1;
483
484         write_lock(&files->file_lock);
485         for (;;) {
486                 unsigned long set, i;
487
488                 j++;
489                 i = j * __NFDBITS;
490                 if (i >= files->max_fds || i >= files->max_fdset)
491                         break;
492                 set = files->close_on_exec->fds_bits[j];
493                 if (!set)
494                         continue;
495                 files->close_on_exec->fds_bits[j] = 0;
496                 write_unlock(&files->file_lock);
497                 for ( ; set ; i++,set >>= 1) {
498                         if (set & 1) {
499                                 sys_close(i);
500                         }
501                 }
502                 write_lock(&files->file_lock);
503
504         }
505         write_unlock(&files->file_lock);
506 }
507
508 /*
509  * An execve() will automatically "de-thread" the process.
510  * Note: we don't have to hold the tasklist_lock to test
511  * whether we migth need to do this. If we're not part of
512  * a thread group, there is no way we can become one
513  * dynamically. And if we are, we only need to protect the
514  * unlink - even if we race with the last other thread exit,
515  * at worst the list_del_init() might end up being a no-op.
516  */
517 static inline void de_thread(struct task_struct *tsk)
518 {
519         if (!list_empty(&tsk->thread_group)) {
520                 write_lock_irq(&tasklist_lock);
521                 list_del_init(&tsk->thread_group);
522                 write_unlock_irq(&tasklist_lock);
523         }
524
525         /* Minor oddity: this might stay the same. */
526         tsk->tgid = tsk->pid;
527 }
528
529 int flush_old_exec(struct linux_binprm * bprm)
530 {
531         char * name;
532         int i, ch, retval;
533         struct signal_struct * oldsig;
534
535         /*
536          * Make sure we have a private signal table
537          */
538         oldsig = current->sig;
539         retval = make_private_signals();
540         if (retval) goto flush_failed;
541
542         /* 
543          * Release all of the old mmap stuff
544          */
545         retval = exec_mmap();
546         if (retval) goto mmap_failed;
547
548         /* This is the point of no return */
549         release_old_signals(oldsig);
550
551         current->sas_ss_sp = current->sas_ss_size = 0;
552
553         if (current->euid == current->uid && current->egid == current->gid)
554                 current->mm->dumpable = 1;
555         name = bprm->filename;
556         for (i=0; (ch = *(name++)) != '\0';) {
557                 if (ch == '/')
558                         i = 0;
559                 else
560                         if (i < 15)
561                                 current->comm[i++] = ch;
562         }
563         current->comm[i] = '\0';
564
565         flush_thread();
566
567         de_thread(current);
568
569         if (bprm->e_uid != current->euid || bprm->e_gid != current->egid || 
570             permission(bprm->file->f_dentry->d_inode,MAY_READ))
571                 current->mm->dumpable = 0;
572
573         /* An exec changes our domain. We are no longer part of the thread
574            group */
575            
576         current->self_exec_id++;
577                         
578         flush_signal_handlers(current);
579         flush_old_files(current->files);
580
581         return 0;
582
583 mmap_failed:
584 flush_failed:
585         spin_lock_irq(&current->sigmask_lock);
586         if (current->sig != oldsig) {
587                 kfree(current->sig);
588                 current->sig = oldsig;
589         }
590         spin_unlock_irq(&current->sigmask_lock);
591         return retval;
592 }
593
594 /*
595  * We mustn't allow tracing of suid binaries, unless
596  * the tracer has the capability to trace anything..
597  */
598 static inline int must_not_trace_exec(struct task_struct * p)
599 {
600         return (p->ptrace & PT_PTRACED) && !cap_raised(p->p_pptr->cap_effective, CAP_SYS_PTRACE);
601 }
602
603 /* 
604  * Fill the binprm structure from the inode. 
605  * Check permissions, then read the first 128 (BINPRM_BUF_SIZE) bytes
606  */
607 int prepare_binprm(struct linux_binprm *bprm)
608 {
609         int mode;
610         struct inode * inode = bprm->file->f_dentry->d_inode;
611
612         mode = inode->i_mode;
613         /*
614          * Check execute perms again - if the caller has CAP_DAC_OVERRIDE,
615          * vfs_permission lets a non-executable through
616          */
617         if (!(mode & 0111))     /* with at least _one_ execute bit set */
618                 return -EACCES;
619         if (bprm->file->f_op == NULL)
620                 return -EACCES;
621
622         bprm->e_uid = current->euid;
623         bprm->e_gid = current->egid;
624
625         if(!(bprm->file->f_vfsmnt->mnt_flags & MNT_NOSUID)) {
626                 /* Set-uid? */
627                 if (mode & S_ISUID)
628                         bprm->e_uid = inode->i_uid;
629
630                 /* Set-gid? */
631                 /*
632                  * If setgid is set but no group execute bit then this
633                  * is a candidate for mandatory locking, not a setgid
634                  * executable.
635                  */
636                 if ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP))
637                         bprm->e_gid = inode->i_gid;
638         }
639
640         /* We don't have VFS support for capabilities yet */
641         cap_clear(bprm->cap_inheritable);
642         cap_clear(bprm->cap_permitted);
643         cap_clear(bprm->cap_effective);
644
645         /*  To support inheritance of root-permissions and suid-root
646          *  executables under compatibility mode, we raise all three
647          *  capability sets for the file.
648          *
649          *  If only the real uid is 0, we only raise the inheritable
650          *  and permitted sets of the executable file.
651          */
652
653         if (!issecure(SECURE_NOROOT)) {
654                 if (bprm->e_uid == 0 || current->uid == 0) {
655                         cap_set_full(bprm->cap_inheritable);
656                         cap_set_full(bprm->cap_permitted);
657                 }
658                 if (bprm->e_uid == 0) 
659                         cap_set_full(bprm->cap_effective);
660         }
661
662         memset(bprm->buf,0,BINPRM_BUF_SIZE);
663         return kernel_read(bprm->file,0,bprm->buf,BINPRM_BUF_SIZE);
664 }
665
666 /*
667  * This function is used to produce the new IDs and capabilities
668  * from the old ones and the file's capabilities.
669  *
670  * The formula used for evolving capabilities is:
671  *
672  *       pI' = pI
673  * (***) pP' = (fP & X) | (fI & pI)
674  *       pE' = pP' & fE          [NB. fE is 0 or ~0]
675  *
676  * I=Inheritable, P=Permitted, E=Effective // p=process, f=file
677  * ' indicates post-exec(), and X is the global 'cap_bset'.
678  *
679  */
680
681 void compute_creds(struct linux_binprm *bprm) 
682 {
683         kernel_cap_t new_permitted, working;
684         int do_unlock = 0;
685
686         new_permitted = cap_intersect(bprm->cap_permitted, cap_bset);
687         working = cap_intersect(bprm->cap_inheritable,
688                                 current->cap_inheritable);
689         new_permitted = cap_combine(new_permitted, working);
690
691         if (bprm->e_uid != current->uid || bprm->e_gid != current->gid ||
692             !cap_issubset(new_permitted, current->cap_permitted)) {
693                 current->mm->dumpable = 0;
694                 
695                 lock_kernel();
696                 if (must_not_trace_exec(current)
697                     || atomic_read(&current->fs->count) > 1
698                     || atomic_read(&current->files->count) > 1
699                     || atomic_read(&current->sig->count) > 1) {
700                         if(!capable(CAP_SETUID)) {
701                                 bprm->e_uid = current->uid;
702                                 bprm->e_gid = current->gid;
703                         }
704                         if(!capable(CAP_SETPCAP)) {
705                                 new_permitted = cap_intersect(new_permitted,
706                                                         current->cap_permitted);
707                         }
708                 }
709                 do_unlock = 1;
710         }
711
712
713         /* For init, we want to retain the capabilities set
714          * in the init_task struct. Thus we skip the usual
715          * capability rules */
716         if (current->pid != 1) {
717                 current->cap_permitted = new_permitted;
718                 current->cap_effective =
719                         cap_intersect(new_permitted, bprm->cap_effective);
720         }
721         
722         /* AUD: Audit candidate if current->cap_effective is set */
723
724         current->suid = current->euid = current->fsuid = bprm->e_uid;
725         current->sgid = current->egid = current->fsgid = bprm->e_gid;
726
727         if(do_unlock)
728                 unlock_kernel();
729         current->keep_capabilities = 0;
730 }
731
732
733 void remove_arg_zero(struct linux_binprm *bprm)
734 {
735         if (bprm->argc) {
736                 unsigned long offset;
737                 char * kaddr;
738                 struct page *page;
739
740                 offset = bprm->p % PAGE_SIZE;
741                 goto inside;
742
743                 while (bprm->p++, *(kaddr+offset++)) {
744                         if (offset != PAGE_SIZE)
745                                 continue;
746                         offset = 0;
747                         kunmap(page);
748 inside:
749                         page = bprm->page[bprm->p/PAGE_SIZE];
750                         kaddr = kmap(page);
751                 }
752                 kunmap(page);
753                 bprm->argc--;
754         }
755 }
756
757 /*
758  * cycle the list of binary formats handler, until one recognizes the image
759  */
760 int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs)
761 {
762         int try,retval=0;
763         struct linux_binfmt *fmt;
764 #ifdef __alpha__
765         /* handle /sbin/loader.. */
766         {
767             struct exec * eh = (struct exec *) bprm->buf;
768
769             if (!bprm->loader && eh->fh.f_magic == 0x183 &&
770                 (eh->fh.f_flags & 0x3000) == 0x3000)
771             {
772                 char * dynloader[] = { "/sbin/loader" };
773                 struct file * file;
774                 unsigned long loader;
775
776                 allow_write_access(bprm->file);
777                 fput(bprm->file);
778                 bprm->file = NULL;
779
780                 loader = PAGE_SIZE*MAX_ARG_PAGES-sizeof(void *);
781
782                 file = open_exec(dynloader[0]);
783                 retval = PTR_ERR(file);
784                 if (IS_ERR(file))
785                         return retval;
786                 bprm->file = file;
787                 bprm->loader = loader;
788                 retval = prepare_binprm(bprm);
789                 if (retval<0)
790                         return retval;
791                 /* should call search_binary_handler recursively here,
792                    but it does not matter */
793             }
794         }
795 #endif
796         /* kernel module loader fixup */
797         /* so we don't try to load run modprobe in kernel space. */
798         set_fs(USER_DS);
799         for (try=0; try<2; try++) {
800                 read_lock(&binfmt_lock);
801                 for (fmt = formats ; fmt ; fmt = fmt->next) {
802                         int (*fn)(struct linux_binprm *, struct pt_regs *) = fmt->load_binary;
803                         if (!fn)
804                                 continue;
805                         if (!try_inc_mod_count(fmt->module))
806                                 continue;
807                         read_unlock(&binfmt_lock);
808                         retval = fn(bprm, regs);
809                         if (retval >= 0) {
810                                 put_binfmt(fmt);
811                                 allow_write_access(bprm->file);
812                                 if (bprm->file)
813                                         fput(bprm->file);
814                                 bprm->file = NULL;
815                                 current->did_exec = 1;
816                                 return retval;
817                         }
818                         read_lock(&binfmt_lock);
819                         put_binfmt(fmt);
820                         if (retval != -ENOEXEC)
821                                 break;
822                         if (!bprm->file) {
823                                 read_unlock(&binfmt_lock);
824                                 return retval;
825                         }
826                 }
827                 read_unlock(&binfmt_lock);
828                 if (retval != -ENOEXEC) {
829                         break;
830 #ifdef CONFIG_KMOD
831                 }else{
832 #define printable(c) (((c)=='\t') || ((c)=='\n') || (0x20<=(c) && (c)<=0x7e))
833                         char modname[20];
834                         if (printable(bprm->buf[0]) &&
835                             printable(bprm->buf[1]) &&
836                             printable(bprm->buf[2]) &&
837                             printable(bprm->buf[3]))
838                                 break; /* -ENOEXEC */
839                         sprintf(modname, "binfmt-%04x", *(unsigned short *)(&bprm->buf[2]));
840                         request_module(modname);
841 #endif
842                 }
843         }
844         return retval;
845 }
846
847
848 /*
849  * sys_execve() executes a new program.
850  */
851 int do_execve(char * filename, char ** argv, char ** envp, struct pt_regs * regs)
852 {
853         struct linux_binprm bprm;
854         struct file *file;
855         int retval;
856         int i;
857
858         file = open_exec(filename);
859
860         retval = PTR_ERR(file);
861         if (IS_ERR(file))
862                 return retval;
863
864         bprm.p = PAGE_SIZE*MAX_ARG_PAGES-sizeof(void *);
865         memset(bprm.page, 0, MAX_ARG_PAGES*sizeof(bprm.page[0])); 
866
867         bprm.file = file;
868         bprm.filename = filename;
869         bprm.sh_bang = 0;
870         bprm.loader = 0;
871         bprm.exec = 0;
872         if ((bprm.argc = count(argv, bprm.p / sizeof(void *))) < 0) {
873                 allow_write_access(file);
874                 fput(file);
875                 return bprm.argc;
876         }
877
878         if ((bprm.envc = count(envp, bprm.p / sizeof(void *))) < 0) {
879                 allow_write_access(file);
880                 fput(file);
881                 return bprm.envc;
882         }
883
884         retval = prepare_binprm(&bprm);
885         if (retval < 0) 
886                 goto out; 
887
888         retval = copy_strings_kernel(1, &bprm.filename, &bprm);
889         if (retval < 0) 
890                 goto out; 
891
892         bprm.exec = bprm.p;
893         retval = copy_strings(bprm.envc, envp, &bprm);
894         if (retval < 0) 
895                 goto out; 
896
897         retval = copy_strings(bprm.argc, argv, &bprm);
898         if (retval < 0) 
899                 goto out; 
900
901         retval = search_binary_handler(&bprm,regs);
902         if (retval >= 0)
903                 /* execve success */
904                 return retval;
905
906 out:
907         /* Something went wrong, return the inode and free the argument pages*/
908         allow_write_access(bprm.file);
909         if (bprm.file)
910                 fput(bprm.file);
911
912         for (i = 0 ; i < MAX_ARG_PAGES ; i++) {
913                 struct page * page = bprm.page[i];
914                 if (page)
915                         __free_page(page);
916         }
917
918         return retval;
919 }
920
921 void set_binfmt(struct linux_binfmt *new)
922 {
923         struct linux_binfmt *old = current->binfmt;
924         if (new && new->module)
925                 __MOD_INC_USE_COUNT(new->module);
926         current->binfmt = new;
927         if (old && old->module)
928                 __MOD_DEC_USE_COUNT(old->module);
929 }
930
931 int do_coredump(long signr, struct pt_regs * regs)
932 {
933         struct linux_binfmt * binfmt;
934         char corename[6+sizeof(current->comm)+10];
935         struct file * file;
936         struct inode * inode;
937         int retval = 0;
938
939         lock_kernel();
940         binfmt = current->binfmt;
941         if (!binfmt || !binfmt->core_dump)
942                 goto fail;
943         if (!current->mm->dumpable)
944                 goto fail;
945         current->mm->dumpable = 0;
946         if (current->rlim[RLIMIT_CORE].rlim_cur < binfmt->min_coredump)
947                 goto fail;
948
949         memcpy(corename,"core.", 5);
950         corename[4] = '\0';
951         if (core_uses_pid || atomic_read(&current->mm->mm_users) != 1)
952                 sprintf(&corename[4], ".%d", current->pid);
953         file = filp_open(corename, O_CREAT | 2 | O_NOFOLLOW, 0600);
954         if (IS_ERR(file))
955                 goto fail;
956         inode = file->f_dentry->d_inode;
957         if (inode->i_nlink > 1)
958                 goto close_fail;        /* multiple links - don't dump */
959         if (d_unhashed(file->f_dentry))
960                 goto close_fail;
961
962         if (!S_ISREG(inode->i_mode))
963                 goto close_fail;
964         if (!file->f_op)
965                 goto close_fail;
966         if (!file->f_op->write)
967                 goto close_fail;
968         if (do_truncate(file->f_dentry, 0) != 0)
969                 goto close_fail;
970
971         down_read(&current->mm->mmap_sem);
972         retval = binfmt->core_dump(signr, regs, file);
973         up_read(&current->mm->mmap_sem);
974
975 close_fail:
976         filp_close(file, NULL);
977 fail:
978         unlock_kernel();
979         return retval;
980 }