UBUNTU: Ubuntu-2.6.38-12.51
[linux-flexiantxendom0-natty.git] / fs / binfmt_elf.c
1 /*
2  * linux/fs/binfmt_elf.c
3  *
4  * These are the functions used to load ELF format executables as used
5  * on SVr4 machines.  Information on the format may be found in the book
6  * "UNIX SYSTEM V RELEASE 4 Programmers Guide: Ansi C and Programming Support
7  * Tools".
8  *
9  * Copyright 1993, 1994: Eric Youngdale (ericy@cais.com).
10  */
11
12 #include <linux/module.h>
13 #include <linux/kernel.h>
14 #include <linux/fs.h>
15 #include <linux/mm.h>
16 #include <linux/mman.h>
17 #include <linux/errno.h>
18 #include <linux/signal.h>
19 #include <linux/binfmts.h>
20 #include <linux/string.h>
21 #include <linux/file.h>
22 #include <linux/slab.h>
23 #include <linux/personality.h>
24 #include <linux/elfcore.h>
25 #include <linux/init.h>
26 #include <linux/highuid.h>
27 #include <linux/compiler.h>
28 #include <linux/highmem.h>
29 #include <linux/pagemap.h>
30 #include <linux/security.h>
31 #include <linux/random.h>
32 #include <linux/elf.h>
33 #include <linux/utsname.h>
34 #include <linux/coredump.h>
35 #include <asm/uaccess.h>
36 #include <asm/param.h>
37 #include <asm/page.h>
38
39 static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs);
40 static int load_elf_library(struct file *);
41 static unsigned long elf_map(struct file *, unsigned long, struct elf_phdr *,
42                                 int, int, unsigned long);
43
44 /*
45  * If we don't support core dumping, then supply a NULL so we
46  * don't even try.
47  */
48 #ifdef CONFIG_ELF_CORE
49 static int elf_core_dump(struct coredump_params *cprm);
50 #else
51 #define elf_core_dump   NULL
52 #endif
53
54 #if ELF_EXEC_PAGESIZE > PAGE_SIZE
55 #define ELF_MIN_ALIGN   ELF_EXEC_PAGESIZE
56 #else
57 #define ELF_MIN_ALIGN   PAGE_SIZE
58 #endif
59
60 #ifndef ELF_CORE_EFLAGS
61 #define ELF_CORE_EFLAGS 0
62 #endif
63
64 #define ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(ELF_MIN_ALIGN-1))
65 #define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1))
66 #define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
67
68 static struct linux_binfmt elf_format = {
69         .module         = THIS_MODULE,
70         .load_binary    = load_elf_binary,
71         .load_shlib     = load_elf_library,
72         .core_dump      = elf_core_dump,
73         .min_coredump   = ELF_EXEC_PAGESIZE,
74 };
75
76 #define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE)
77
78 static int set_brk(unsigned long start, unsigned long end)
79 {
80         start = ELF_PAGEALIGN(start);
81         end = ELF_PAGEALIGN(end);
82         if (end > start) {
83                 unsigned long addr;
84                 down_write(&current->mm->mmap_sem);
85                 addr = do_brk(start, end - start);
86                 up_write(&current->mm->mmap_sem);
87                 if (BAD_ADDR(addr))
88                         return addr;
89         }
90         current->mm->start_brk = current->mm->brk = end;
91         return 0;
92 }
93
94 /* We need to explicitly zero any fractional pages
95    after the data section (i.e. bss).  This would
96    contain the junk from the file that should not
97    be in memory
98  */
99 static int padzero(unsigned long elf_bss)
100 {
101         unsigned long nbyte;
102
103         nbyte = ELF_PAGEOFFSET(elf_bss);
104         if (nbyte) {
105                 nbyte = ELF_MIN_ALIGN - nbyte;
106                 if (clear_user((void __user *) elf_bss, nbyte))
107                         return -EFAULT;
108         }
109         return 0;
110 }
111
112 /* Let's use some macros to make this stack manipulation a little clearer */
113 #ifdef CONFIG_STACK_GROWSUP
114 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) + (items))
115 #define STACK_ROUND(sp, items) \
116         ((15 + (unsigned long) ((sp) + (items))) &~ 15UL)
117 #define STACK_ALLOC(sp, len) ({ \
118         elf_addr_t __user *old_sp = (elf_addr_t __user *)sp; sp += len; \
119         old_sp; })
120 #else
121 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items))
122 #define STACK_ROUND(sp, items) \
123         (((unsigned long) (sp - items)) &~ 15UL)
124 #define STACK_ALLOC(sp, len) ({ sp -= len ; sp; })
125 #endif
126
127 #ifndef ELF_BASE_PLATFORM
128 /*
129  * AT_BASE_PLATFORM indicates the "real" hardware/microarchitecture.
130  * If the arch defines ELF_BASE_PLATFORM (in asm/elf.h), the value
131  * will be copied to the user stack in the same manner as AT_PLATFORM.
132  */
133 #define ELF_BASE_PLATFORM NULL
134 #endif
135
136 static int
137 create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
138                 unsigned long load_addr, unsigned long interp_load_addr)
139 {
140         unsigned long p = bprm->p;
141         int argc = bprm->argc;
142         int envc = bprm->envc;
143         elf_addr_t __user *argv;
144         elf_addr_t __user *envp;
145         elf_addr_t __user *sp;
146         elf_addr_t __user *u_platform;
147         elf_addr_t __user *u_base_platform;
148         elf_addr_t __user *u_rand_bytes;
149         const char *k_platform = ELF_PLATFORM;
150         const char *k_base_platform = ELF_BASE_PLATFORM;
151         unsigned char k_rand_bytes[16];
152         int items;
153         elf_addr_t *elf_info;
154         int ei_index = 0;
155         const struct cred *cred = current_cred();
156         struct vm_area_struct *vma;
157
158         /*
159          * In some cases (e.g. Hyper-Threading), we want to avoid L1
160          * evictions by the processes running on the same package. One
161          * thing we can do is to shuffle the initial stack for them.
162          */
163
164         p = arch_align_stack(p);
165
166         /*
167          * If this architecture has a platform capability string, copy it
168          * to userspace.  In some cases (Sparc), this info is impossible
169          * for userspace to get any other way, in others (i386) it is
170          * merely difficult.
171          */
172         u_platform = NULL;
173         if (k_platform) {
174                 size_t len = strlen(k_platform) + 1;
175
176                 u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
177                 if (__copy_to_user(u_platform, k_platform, len))
178                         return -EFAULT;
179         }
180
181         /*
182          * If this architecture has a "base" platform capability
183          * string, copy it to userspace.
184          */
185         u_base_platform = NULL;
186         if (k_base_platform) {
187                 size_t len = strlen(k_base_platform) + 1;
188
189                 u_base_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
190                 if (__copy_to_user(u_base_platform, k_base_platform, len))
191                         return -EFAULT;
192         }
193
194         /*
195          * Generate 16 random bytes for userspace PRNG seeding.
196          */
197         get_random_bytes(k_rand_bytes, sizeof(k_rand_bytes));
198         u_rand_bytes = (elf_addr_t __user *)
199                        STACK_ALLOC(p, sizeof(k_rand_bytes));
200         if (__copy_to_user(u_rand_bytes, k_rand_bytes, sizeof(k_rand_bytes)))
201                 return -EFAULT;
202
203         /* Create the ELF interpreter info */
204         elf_info = (elf_addr_t *)current->mm->saved_auxv;
205         /* update AT_VECTOR_SIZE_BASE if the number of NEW_AUX_ENT() changes */
206 #define NEW_AUX_ENT(id, val) \
207         do { \
208                 elf_info[ei_index++] = id; \
209                 elf_info[ei_index++] = val; \
210         } while (0)
211
212 #ifdef ARCH_DLINFO
213         /* 
214          * ARCH_DLINFO must come first so PPC can do its special alignment of
215          * AUXV.
216          * update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT() in
217          * ARCH_DLINFO changes
218          */
219         ARCH_DLINFO;
220 #endif
221         NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP);
222         NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE);
223         NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
224         NEW_AUX_ENT(AT_PHDR, load_addr + exec->e_phoff);
225         NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr));
226         NEW_AUX_ENT(AT_PHNUM, exec->e_phnum);
227         NEW_AUX_ENT(AT_BASE, interp_load_addr);
228         NEW_AUX_ENT(AT_FLAGS, 0);
229         NEW_AUX_ENT(AT_ENTRY, exec->e_entry);
230         NEW_AUX_ENT(AT_UID, cred->uid);
231         NEW_AUX_ENT(AT_EUID, cred->euid);
232         NEW_AUX_ENT(AT_GID, cred->gid);
233         NEW_AUX_ENT(AT_EGID, cred->egid);
234         NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm));
235         NEW_AUX_ENT(AT_RANDOM, (elf_addr_t)(unsigned long)u_rand_bytes);
236         NEW_AUX_ENT(AT_EXECFN, bprm->exec);
237         if (k_platform) {
238                 NEW_AUX_ENT(AT_PLATFORM,
239                             (elf_addr_t)(unsigned long)u_platform);
240         }
241         if (k_base_platform) {
242                 NEW_AUX_ENT(AT_BASE_PLATFORM,
243                             (elf_addr_t)(unsigned long)u_base_platform);
244         }
245         if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) {
246                 NEW_AUX_ENT(AT_EXECFD, bprm->interp_data);
247         }
248 #undef NEW_AUX_ENT
249         /* AT_NULL is zero; clear the rest too */
250         memset(&elf_info[ei_index], 0,
251                sizeof current->mm->saved_auxv - ei_index * sizeof elf_info[0]);
252
253         /* And advance past the AT_NULL entry.  */
254         ei_index += 2;
255
256         sp = STACK_ADD(p, ei_index);
257
258         items = (argc + 1) + (envc + 1) + 1;
259         bprm->p = STACK_ROUND(sp, items);
260
261         /* Point sp at the lowest address on the stack */
262 #ifdef CONFIG_STACK_GROWSUP
263         sp = (elf_addr_t __user *)bprm->p - items - ei_index;
264         bprm->exec = (unsigned long)sp; /* XXX: PARISC HACK */
265 #else
266         sp = (elf_addr_t __user *)bprm->p;
267 #endif
268
269
270         /*
271          * Grow the stack manually; some architectures have a limit on how
272          * far ahead a user-space access may be in order to grow the stack.
273          */
274         vma = find_extend_vma(current->mm, bprm->p);
275         if (!vma)
276                 return -EFAULT;
277
278         /* Now, let's put argc (and argv, envp if appropriate) on the stack */
279         if (__put_user(argc, sp++))
280                 return -EFAULT;
281         argv = sp;
282         envp = argv + argc + 1;
283
284         /* Populate argv and envp */
285         p = current->mm->arg_end = current->mm->arg_start;
286         while (argc-- > 0) {
287                 size_t len;
288                 if (__put_user((elf_addr_t)p, argv++))
289                         return -EFAULT;
290                 len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
291                 if (!len || len > MAX_ARG_STRLEN)
292                         return -EINVAL;
293                 p += len;
294         }
295         if (__put_user(0, argv))
296                 return -EFAULT;
297         current->mm->arg_end = current->mm->env_start = p;
298         while (envc-- > 0) {
299                 size_t len;
300                 if (__put_user((elf_addr_t)p, envp++))
301                         return -EFAULT;
302                 len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
303                 if (!len || len > MAX_ARG_STRLEN)
304                         return -EINVAL;
305                 p += len;
306         }
307         if (__put_user(0, envp))
308                 return -EFAULT;
309         current->mm->env_end = p;
310
311         /* Put the elf_info on the stack in the right place.  */
312         sp = (elf_addr_t __user *)envp + 1;
313         if (copy_to_user(sp, elf_info, ei_index * sizeof(elf_addr_t)))
314                 return -EFAULT;
315         return 0;
316 }
317
318 static unsigned long elf_map(struct file *filep, unsigned long addr,
319                 struct elf_phdr *eppnt, int prot, int type,
320                 unsigned long total_size)
321 {
322         unsigned long map_addr;
323         unsigned long size = eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr);
324         unsigned long off = eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr);
325         addr = ELF_PAGESTART(addr);
326         size = ELF_PAGEALIGN(size);
327
328         /* mmap() will return -EINVAL if given a zero size, but a
329          * segment with zero filesize is perfectly valid */
330         if (!size)
331                 return addr;
332
333         down_write(&current->mm->mmap_sem);
334         /*
335         * total_size is the size of the ELF (interpreter) image.
336         * The _first_ mmap needs to know the full size, otherwise
337         * randomization might put this image into an overlapping
338         * position with the ELF binary image. (since size < total_size)
339         * So we first map the 'big' image - and unmap the remainder at
340         * the end. (which unmap is needed for ELF images with holes.)
341         */
342         if (total_size) {
343                 total_size = ELF_PAGEALIGN(total_size);
344                 map_addr = do_mmap(filep, addr, total_size, prot, type, off);
345                 if (!BAD_ADDR(map_addr))
346                         do_munmap(current->mm, map_addr+size, total_size-size);
347         } else
348                 map_addr = do_mmap(filep, addr, size, prot, type, off);
349
350         up_write(&current->mm->mmap_sem);
351         return(map_addr);
352 }
353
354 static unsigned long total_mapping_size(struct elf_phdr *cmds, int nr)
355 {
356         int i, first_idx = -1, last_idx = -1;
357
358         for (i = 0; i < nr; i++) {
359                 if (cmds[i].p_type == PT_LOAD) {
360                         last_idx = i;
361                         if (first_idx == -1)
362                                 first_idx = i;
363                 }
364         }
365         if (first_idx == -1)
366                 return 0;
367
368         return cmds[last_idx].p_vaddr + cmds[last_idx].p_memsz -
369                                 ELF_PAGESTART(cmds[first_idx].p_vaddr);
370 }
371
372
373 /* This is much more generalized than the library routine read function,
374    so we keep this separate.  Technically the library read function
375    is only provided so that we can read a.out libraries that have
376    an ELF header */
377
378 static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
379                 struct file *interpreter, unsigned long *interp_map_addr,
380                 unsigned long no_base)
381 {
382         struct elf_phdr *elf_phdata;
383         struct elf_phdr *eppnt;
384         unsigned long load_addr = 0;
385         int load_addr_set = 0;
386         unsigned long last_bss = 0, elf_bss = 0;
387         unsigned long error = ~0UL;
388         unsigned long total_size;
389         int retval, i, size;
390
391         /* First of all, some simple consistency checks */
392         if (interp_elf_ex->e_type != ET_EXEC &&
393             interp_elf_ex->e_type != ET_DYN)
394                 goto out;
395         if (!elf_check_arch(interp_elf_ex))
396                 goto out;
397         if (!interpreter->f_op || !interpreter->f_op->mmap)
398                 goto out;
399
400         /*
401          * If the size of this structure has changed, then punt, since
402          * we will be doing the wrong thing.
403          */
404         if (interp_elf_ex->e_phentsize != sizeof(struct elf_phdr))
405                 goto out;
406         if (interp_elf_ex->e_phnum < 1 ||
407                 interp_elf_ex->e_phnum > 65536U / sizeof(struct elf_phdr))
408                 goto out;
409
410         /* Now read in all of the header information */
411         size = sizeof(struct elf_phdr) * interp_elf_ex->e_phnum;
412         if (size > ELF_MIN_ALIGN)
413                 goto out;
414         elf_phdata = kmalloc(size, GFP_KERNEL);
415         if (!elf_phdata)
416                 goto out;
417
418         retval = kernel_read(interpreter, interp_elf_ex->e_phoff,
419                              (char *)elf_phdata, size);
420         error = -EIO;
421         if (retval != size) {
422                 if (retval < 0)
423                         error = retval; 
424                 goto out_close;
425         }
426
427         total_size = total_mapping_size(elf_phdata, interp_elf_ex->e_phnum);
428         if (!total_size) {
429                 error = -EINVAL;
430                 goto out_close;
431         }
432
433         eppnt = elf_phdata;
434         for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
435                 if (eppnt->p_type == PT_LOAD) {
436                         int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
437                         int elf_prot = 0;
438                         unsigned long vaddr = 0;
439                         unsigned long k, map_addr;
440
441                         if (eppnt->p_flags & PF_R)
442                                 elf_prot = PROT_READ;
443                         if (eppnt->p_flags & PF_W)
444                                 elf_prot |= PROT_WRITE;
445                         if (eppnt->p_flags & PF_X)
446                                 elf_prot |= PROT_EXEC;
447                         vaddr = eppnt->p_vaddr;
448                         if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
449                                 elf_type |= MAP_FIXED;
450                         else if (no_base && interp_elf_ex->e_type == ET_DYN)
451                                 load_addr = -vaddr;
452
453                         map_addr = elf_map(interpreter, load_addr + vaddr,
454                                         eppnt, elf_prot, elf_type, total_size);
455                         total_size = 0;
456                         if (!*interp_map_addr)
457                                 *interp_map_addr = map_addr;
458                         error = map_addr;
459                         if (BAD_ADDR(map_addr))
460                                 goto out_close;
461
462                         if (!load_addr_set &&
463                             interp_elf_ex->e_type == ET_DYN) {
464                                 load_addr = map_addr - ELF_PAGESTART(vaddr);
465                                 load_addr_set = 1;
466                         }
467
468                         /*
469                          * Check to see if the section's size will overflow the
470                          * allowed task size. Note that p_filesz must always be
471                          * <= p_memsize so it's only necessary to check p_memsz.
472                          */
473                         k = load_addr + eppnt->p_vaddr;
474                         if (BAD_ADDR(k) ||
475                             eppnt->p_filesz > eppnt->p_memsz ||
476                             eppnt->p_memsz > TASK_SIZE ||
477                             TASK_SIZE - eppnt->p_memsz < k) {
478                                 error = -ENOMEM;
479                                 goto out_close;
480                         }
481
482                         /*
483                          * Find the end of the file mapping for this phdr, and
484                          * keep track of the largest address we see for this.
485                          */
486                         k = load_addr + eppnt->p_vaddr + eppnt->p_filesz;
487                         if (k > elf_bss)
488                                 elf_bss = k;
489
490                         /*
491                          * Do the same thing for the memory mapping - between
492                          * elf_bss and last_bss is the bss section.
493                          */
494                         k = load_addr + eppnt->p_memsz + eppnt->p_vaddr;
495                         if (k > last_bss)
496                                 last_bss = k;
497                 }
498         }
499
500         if (last_bss > elf_bss) {
501                 /*
502                  * Now fill out the bss section.  First pad the last page up
503                  * to the page boundary, and then perform a mmap to make sure
504                  * that there are zero-mapped pages up to and including the
505                  * last bss page.
506                  */
507                 if (padzero(elf_bss)) {
508                         error = -EFAULT;
509                         goto out_close;
510                 }
511
512                 /* What we have mapped so far */
513                 elf_bss = ELF_PAGESTART(elf_bss + ELF_MIN_ALIGN - 1);
514
515                 /* Map the last of the bss segment */
516                 down_write(&current->mm->mmap_sem);
517                 error = do_brk(elf_bss, last_bss - elf_bss);
518                 up_write(&current->mm->mmap_sem);
519                 if (BAD_ADDR(error))
520                         goto out_close;
521         }
522
523         error = load_addr;
524
525 out_close:
526         kfree(elf_phdata);
527 out:
528         return error;
529 }
530
531 /*
532  * These are the functions used to load ELF style executables and shared
533  * libraries.  There is no binary dependent code anywhere else.
534  */
535
536 #define INTERPRETER_NONE 0
537 #define INTERPRETER_ELF 2
538
539 #ifndef STACK_RND_MASK
540 #define STACK_RND_MASK (0x7ff >> (PAGE_SHIFT - 12))     /* 8MB of VA */
541 #endif
542
543 static unsigned long randomize_stack_top(unsigned long stack_top)
544 {
545         unsigned int random_variable = 0;
546
547         if ((current->flags & PF_RANDOMIZE) &&
548                 !(current->personality & ADDR_NO_RANDOMIZE)) {
549                 random_variable = get_random_int() & STACK_RND_MASK;
550                 random_variable <<= PAGE_SHIFT;
551         }
552 #ifdef CONFIG_STACK_GROWSUP
553         return PAGE_ALIGN(stack_top) + random_variable;
554 #else
555         return PAGE_ALIGN(stack_top) - random_variable;
556 #endif
557 }
558
559 static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
560 {
561         struct file *interpreter = NULL; /* to shut gcc up */
562         unsigned long load_addr = 0, load_bias = 0;
563         int load_addr_set = 0;
564         char * elf_interpreter = NULL;
565         unsigned long error;
566         struct elf_phdr *elf_ppnt, *elf_phdata;
567         unsigned long elf_bss, elf_brk;
568         int retval, i;
569         unsigned int size;
570         unsigned long elf_entry;
571         unsigned long interp_load_addr = 0;
572         unsigned long start_code, end_code, start_data, end_data;
573         unsigned long reloc_func_desc = 0;
574         int executable_stack = EXSTACK_DEFAULT;
575         unsigned long def_flags = 0;
576         struct {
577                 struct elfhdr elf_ex;
578                 struct elfhdr interp_elf_ex;
579         } *loc;
580
581         loc = kmalloc(sizeof(*loc), GFP_KERNEL);
582         if (!loc) {
583                 retval = -ENOMEM;
584                 goto out_ret;
585         }
586         
587         /* Get the exec-header */
588         loc->elf_ex = *((struct elfhdr *)bprm->buf);
589
590         retval = -ENOEXEC;
591         /* First of all, some simple consistency checks */
592         if (memcmp(loc->elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
593                 goto out;
594
595         if (loc->elf_ex.e_type != ET_EXEC && loc->elf_ex.e_type != ET_DYN)
596                 goto out;
597         if (!elf_check_arch(&loc->elf_ex))
598                 goto out;
599         if (!bprm->file->f_op || !bprm->file->f_op->mmap)
600                 goto out;
601
602         /* Now read in all of the header information */
603         if (loc->elf_ex.e_phentsize != sizeof(struct elf_phdr))
604                 goto out;
605         if (loc->elf_ex.e_phnum < 1 ||
606                 loc->elf_ex.e_phnum > 65536U / sizeof(struct elf_phdr))
607                 goto out;
608         size = loc->elf_ex.e_phnum * sizeof(struct elf_phdr);
609         retval = -ENOMEM;
610         elf_phdata = kmalloc(size, GFP_KERNEL);
611         if (!elf_phdata)
612                 goto out;
613
614         retval = kernel_read(bprm->file, loc->elf_ex.e_phoff,
615                              (char *)elf_phdata, size);
616         if (retval != size) {
617                 if (retval >= 0)
618                         retval = -EIO;
619                 goto out_free_ph;
620         }
621
622         elf_ppnt = elf_phdata;
623         elf_bss = 0;
624         elf_brk = 0;
625
626         start_code = ~0UL;
627         end_code = 0;
628         start_data = 0;
629         end_data = 0;
630
631         for (i = 0; i < loc->elf_ex.e_phnum; i++) {
632                 if (elf_ppnt->p_type == PT_INTERP) {
633                         /* This is the program interpreter used for
634                          * shared libraries - for now assume that this
635                          * is an a.out format binary
636                          */
637                         retval = -ENOEXEC;
638                         if (elf_ppnt->p_filesz > PATH_MAX || 
639                             elf_ppnt->p_filesz < 2)
640                                 goto out_free_ph;
641
642                         retval = -ENOMEM;
643                         elf_interpreter = kmalloc(elf_ppnt->p_filesz,
644                                                   GFP_KERNEL);
645                         if (!elf_interpreter)
646                                 goto out_free_ph;
647
648                         retval = kernel_read(bprm->file, elf_ppnt->p_offset,
649                                              elf_interpreter,
650                                              elf_ppnt->p_filesz);
651                         if (retval != elf_ppnt->p_filesz) {
652                                 if (retval >= 0)
653                                         retval = -EIO;
654                                 goto out_free_interp;
655                         }
656                         /* make sure path is NULL terminated */
657                         retval = -ENOEXEC;
658                         if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
659                                 goto out_free_interp;
660
661                         interpreter = open_exec(elf_interpreter);
662                         retval = PTR_ERR(interpreter);
663                         if (IS_ERR(interpreter))
664                                 goto out_free_interp;
665
666                         /*
667                          * If the binary is not readable then enforce
668                          * mm->dumpable = 0 regardless of the interpreter's
669                          * permissions.
670                          */
671                         if (file_permission(interpreter, MAY_READ) < 0)
672                                 bprm->interp_flags |= BINPRM_FLAGS_ENFORCE_NONDUMP;
673
674                         retval = kernel_read(interpreter, 0, bprm->buf,
675                                              BINPRM_BUF_SIZE);
676                         if (retval != BINPRM_BUF_SIZE) {
677                                 if (retval >= 0)
678                                         retval = -EIO;
679                                 goto out_free_dentry;
680                         }
681
682                         /* Get the exec headers */
683                         loc->interp_elf_ex = *((struct elfhdr *)bprm->buf);
684                         break;
685                 }
686                 elf_ppnt++;
687         }
688
689         elf_ppnt = elf_phdata;
690         for (i = 0; i < loc->elf_ex.e_phnum; i++, elf_ppnt++)
691                 if (elf_ppnt->p_type == PT_GNU_STACK) {
692                         if (elf_ppnt->p_flags & PF_X)
693                                 executable_stack = EXSTACK_ENABLE_X;
694                         else
695                                 executable_stack = EXSTACK_DISABLE_X;
696                         break;
697                 }
698
699         /* Some simple consistency checks for the interpreter */
700         if (elf_interpreter) {
701                 retval = -ELIBBAD;
702                 /* Not an ELF interpreter */
703                 if (memcmp(loc->interp_elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
704                         goto out_free_dentry;
705                 /* Verify the interpreter has a valid arch */
706                 if (!elf_check_arch(&loc->interp_elf_ex))
707                         goto out_free_dentry;
708         }
709
710         /* Flush all traces of the currently running executable */
711         retval = flush_old_exec(bprm);
712         if (retval)
713                 goto out_free_dentry;
714
715 #ifdef CONFIG_X86_32
716         /*
717          * Turn off the CS limit completely if exec-shield disabled or
718          * NX active:
719          */
720         if (disable_nx || executable_stack != EXSTACK_DISABLE_X || (__supported_pte_mask & _PAGE_NX))
721                 arch_add_exec_range(current->mm, -1);
722 #endif
723
724         /* OK, This is the point of no return */
725         current->flags &= ~PF_FORKNOEXEC;
726         current->mm->def_flags = def_flags;
727
728         /* Do this immediately, since STACK_TOP as used in setup_arg_pages
729            may depend on the personality.  */
730         SET_PERSONALITY(loc->elf_ex);
731         if (elf_read_implies_exec(loc->elf_ex, executable_stack))
732                 current->personality |= READ_IMPLIES_EXEC;
733
734         if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
735                 current->flags |= PF_RANDOMIZE;
736
737         setup_new_exec(bprm);
738
739         /* Do this so that we can load the interpreter, if need be.  We will
740            change some of these later */
741         current->mm->free_area_cache = current->mm->mmap_base;
742         current->mm->cached_hole_size = 0;
743         retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
744                                  executable_stack);
745         if (retval < 0) {
746                 send_sig(SIGKILL, current, 0);
747                 goto out_free_dentry;
748         }
749         
750         current->mm->start_stack = bprm->p;
751
752         /* Now we do a little grungy work by mmapping the ELF image into
753            the correct location in memory. */
754         for(i = 0, elf_ppnt = elf_phdata;
755             i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
756                 int elf_prot = 0, elf_flags;
757                 unsigned long k, vaddr;
758
759                 if (elf_ppnt->p_type != PT_LOAD)
760                         continue;
761
762                 if (unlikely (elf_brk > elf_bss)) {
763                         unsigned long nbyte;
764                     
765                         /* There was a PT_LOAD segment with p_memsz > p_filesz
766                            before this one. Map anonymous pages, if needed,
767                            and clear the area.  */
768                         retval = set_brk(elf_bss + load_bias,
769                                          elf_brk + load_bias);
770                         if (retval) {
771                                 send_sig(SIGKILL, current, 0);
772                                 goto out_free_dentry;
773                         }
774                         nbyte = ELF_PAGEOFFSET(elf_bss);
775                         if (nbyte) {
776                                 nbyte = ELF_MIN_ALIGN - nbyte;
777                                 if (nbyte > elf_brk - elf_bss)
778                                         nbyte = elf_brk - elf_bss;
779                                 if (clear_user((void __user *)elf_bss +
780                                                         load_bias, nbyte)) {
781                                         /*
782                                          * This bss-zeroing can fail if the ELF
783                                          * file specifies odd protections. So
784                                          * we don't check the return value
785                                          */
786                                 }
787                         }
788                 }
789
790                 if (elf_ppnt->p_flags & PF_R)
791                         elf_prot |= PROT_READ;
792                 if (elf_ppnt->p_flags & PF_W)
793                         elf_prot |= PROT_WRITE;
794                 if (elf_ppnt->p_flags & PF_X)
795                         elf_prot |= PROT_EXEC;
796
797                 elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE;
798
799                 vaddr = elf_ppnt->p_vaddr;
800                 if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) {
801                         elf_flags |= MAP_FIXED;
802                 } else if (loc->elf_ex.e_type == ET_DYN) {
803                         /* Try and get dynamic programs out of the way of the
804                          * default mmap base, as well as whatever program they
805                          * might try to exec.  This is because the brk will
806                          * follow the loader, and is not movable.  */
807 #if defined(CONFIG_X86) || defined(CONFIG_ARM)
808                         load_bias = 0;
809 #else
810                         load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
811 #endif
812                 }
813
814                 error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
815                                 elf_prot, elf_flags, 0);
816                 if (BAD_ADDR(error)) {
817                         send_sig(SIGKILL, current, 0);
818                         retval = IS_ERR((void *)error) ?
819                                 PTR_ERR((void*)error) : -EINVAL;
820                         goto out_free_dentry;
821                 }
822
823                 if (!load_addr_set) {
824                         load_addr_set = 1;
825                         load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset);
826                         if (loc->elf_ex.e_type == ET_DYN) {
827                                 load_bias += error -
828                                              ELF_PAGESTART(load_bias + vaddr);
829                                 load_addr += load_bias;
830                                 reloc_func_desc = load_bias;
831                         }
832                 }
833                 k = elf_ppnt->p_vaddr;
834                 if (k < start_code)
835                         start_code = k;
836                 if (start_data < k)
837                         start_data = k;
838
839                 /*
840                  * Check to see if the section's size will overflow the
841                  * allowed task size. Note that p_filesz must always be
842                  * <= p_memsz so it is only necessary to check p_memsz.
843                  */
844                 if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
845                     elf_ppnt->p_memsz > TASK_SIZE ||
846                     TASK_SIZE - elf_ppnt->p_memsz < k) {
847                         /* set_brk can never work. Avoid overflows. */
848                         send_sig(SIGKILL, current, 0);
849                         retval = -EINVAL;
850                         goto out_free_dentry;
851                 }
852
853                 k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
854
855                 if (k > elf_bss)
856                         elf_bss = k;
857                 if ((elf_ppnt->p_flags & PF_X) && end_code < k)
858                         end_code = k;
859                 if (end_data < k)
860                         end_data = k;
861                 k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
862                 if (k > elf_brk)
863                         elf_brk = k;
864         }
865
866         loc->elf_ex.e_entry += load_bias;
867         elf_bss += load_bias;
868         elf_brk += load_bias;
869         start_code += load_bias;
870         end_code += load_bias;
871         start_data += load_bias;
872         end_data += load_bias;
873
874         /* Calling set_brk effectively mmaps the pages that we need
875          * for the bss and break sections.  We must do this before
876          * mapping in the interpreter, to make sure it doesn't wind
877          * up getting placed where the bss needs to go.
878          */
879         retval = set_brk(elf_bss, elf_brk);
880         if (retval) {
881                 send_sig(SIGKILL, current, 0);
882                 goto out_free_dentry;
883         }
884         if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) {
885                 send_sig(SIGSEGV, current, 0);
886                 retval = -EFAULT; /* Nobody gets to see this, but.. */
887                 goto out_free_dentry;
888         }
889
890         if (elf_interpreter) {
891                 unsigned long uninitialized_var(interp_map_addr);
892
893                 elf_entry = load_elf_interp(&loc->interp_elf_ex,
894                                             interpreter,
895                                             &interp_map_addr,
896                                             load_bias);
897                 if (!IS_ERR((void *)elf_entry)) {
898                         /*
899                          * load_elf_interp() returns relocation
900                          * adjustment
901                          */
902                         interp_load_addr = elf_entry;
903                         elf_entry += loc->interp_elf_ex.e_entry;
904                 }
905                 if (BAD_ADDR(elf_entry)) {
906                         force_sig(SIGSEGV, current);
907                         retval = IS_ERR((void *)elf_entry) ?
908                                         (int)elf_entry : -EINVAL;
909                         goto out_free_dentry;
910                 }
911                 reloc_func_desc = interp_load_addr;
912
913                 allow_write_access(interpreter);
914                 fput(interpreter);
915                 kfree(elf_interpreter);
916         } else {
917                 elf_entry = loc->elf_ex.e_entry;
918                 if (BAD_ADDR(elf_entry)) {
919                         force_sig(SIGSEGV, current);
920                         retval = -EINVAL;
921                         goto out_free_dentry;
922                 }
923         }
924
925         kfree(elf_phdata);
926
927         set_binfmt(&elf_format);
928
929 #ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
930         retval = arch_setup_additional_pages(bprm, !!elf_interpreter);
931         if (retval < 0) {
932                 send_sig(SIGKILL, current, 0);
933                 goto out;
934         }
935 #endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
936
937         install_exec_creds(bprm);
938         current->flags &= ~PF_FORKNOEXEC;
939         retval = create_elf_tables(bprm, &loc->elf_ex,
940                           load_addr, interp_load_addr);
941         if (retval < 0) {
942                 send_sig(SIGKILL, current, 0);
943                 goto out;
944         }
945         /* N.B. passed_fileno might not be initialized? */
946         current->mm->end_code = end_code;
947         current->mm->start_code = start_code;
948         current->mm->start_data = start_data;
949         current->mm->end_data = end_data;
950         current->mm->start_stack = bprm->p;
951
952 #ifdef arch_randomize_brk
953         if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1)) {
954                 current->mm->brk = current->mm->start_brk =
955                         arch_randomize_brk(current->mm);
956 #ifdef CONFIG_COMPAT_BRK
957                 current->brk_randomized = 1;
958 #endif
959         }
960 #endif
961
962         if (current->personality & MMAP_PAGE_ZERO) {
963                 /* Why this, you ask???  Well SVr4 maps page 0 as read-only,
964                    and some applications "depend" upon this behavior.
965                    Since we do not have the power to recompile these, we
966                    emulate the SVr4 behavior. Sigh. */
967                 down_write(&current->mm->mmap_sem);
968                 error = do_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
969                                 MAP_FIXED | MAP_PRIVATE, 0);
970                 up_write(&current->mm->mmap_sem);
971         }
972
973 #ifdef ELF_PLAT_INIT
974         /*
975          * The ABI may specify that certain registers be set up in special
976          * ways (on i386 %edx is the address of a DT_FINI function, for
977          * example.  In addition, it may also specify (eg, PowerPC64 ELF)
978          * that the e_entry field is the address of the function descriptor
979          * for the startup routine, rather than the address of the startup
980          * routine itself.  This macro performs whatever initialization to
981          * the regs structure is required as well as any relocations to the
982          * function descriptor entries when executing dynamically links apps.
983          */
984         ELF_PLAT_INIT(regs, reloc_func_desc);
985 #endif
986
987         start_thread(regs, elf_entry, bprm->p);
988         retval = 0;
989 out:
990         kfree(loc);
991 out_ret:
992         return retval;
993
994         /* error cleanup */
995 out_free_dentry:
996         allow_write_access(interpreter);
997         if (interpreter)
998                 fput(interpreter);
999 out_free_interp:
1000         kfree(elf_interpreter);
1001 out_free_ph:
1002         kfree(elf_phdata);
1003         goto out;
1004 }
1005
1006 /* This is really simpleminded and specialized - we are loading an
1007    a.out library that is given an ELF header. */
1008 static int load_elf_library(struct file *file)
1009 {
1010         struct elf_phdr *elf_phdata;
1011         struct elf_phdr *eppnt;
1012         unsigned long elf_bss, bss, len;
1013         int retval, error, i, j;
1014         struct elfhdr elf_ex;
1015
1016         error = -ENOEXEC;
1017         retval = kernel_read(file, 0, (char *)&elf_ex, sizeof(elf_ex));
1018         if (retval != sizeof(elf_ex))
1019                 goto out;
1020
1021         if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
1022                 goto out;
1023
1024         /* First of all, some simple consistency checks */
1025         if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
1026             !elf_check_arch(&elf_ex) || !file->f_op || !file->f_op->mmap)
1027                 goto out;
1028
1029         /* Now read in all of the header information */
1030
1031         j = sizeof(struct elf_phdr) * elf_ex.e_phnum;
1032         /* j < ELF_MIN_ALIGN because elf_ex.e_phnum <= 2 */
1033
1034         error = -ENOMEM;
1035         elf_phdata = kmalloc(j, GFP_KERNEL);
1036         if (!elf_phdata)
1037                 goto out;
1038
1039         eppnt = elf_phdata;
1040         error = -ENOEXEC;
1041         retval = kernel_read(file, elf_ex.e_phoff, (char *)eppnt, j);
1042         if (retval != j)
1043                 goto out_free_ph;
1044
1045         for (j = 0, i = 0; i<elf_ex.e_phnum; i++)
1046                 if ((eppnt + i)->p_type == PT_LOAD)
1047                         j++;
1048         if (j != 1)
1049                 goto out_free_ph;
1050
1051         while (eppnt->p_type != PT_LOAD)
1052                 eppnt++;
1053
1054         /* Now use mmap to map the library into memory. */
1055         down_write(&current->mm->mmap_sem);
1056         error = do_mmap(file,
1057                         ELF_PAGESTART(eppnt->p_vaddr),
1058                         (eppnt->p_filesz +
1059                          ELF_PAGEOFFSET(eppnt->p_vaddr)),
1060                         PROT_READ | PROT_WRITE | PROT_EXEC,
1061                         MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
1062                         (eppnt->p_offset -
1063                          ELF_PAGEOFFSET(eppnt->p_vaddr)));
1064         up_write(&current->mm->mmap_sem);
1065         if (error != ELF_PAGESTART(eppnt->p_vaddr))
1066                 goto out_free_ph;
1067
1068         elf_bss = eppnt->p_vaddr + eppnt->p_filesz;
1069         if (padzero(elf_bss)) {
1070                 error = -EFAULT;
1071                 goto out_free_ph;
1072         }
1073
1074         len = ELF_PAGESTART(eppnt->p_filesz + eppnt->p_vaddr +
1075                             ELF_MIN_ALIGN - 1);
1076         bss = eppnt->p_memsz + eppnt->p_vaddr;
1077         if (bss > len) {
1078                 down_write(&current->mm->mmap_sem);
1079                 do_brk(len, bss - len);
1080                 up_write(&current->mm->mmap_sem);
1081         }
1082         error = 0;
1083
1084 out_free_ph:
1085         kfree(elf_phdata);
1086 out:
1087         return error;
1088 }
1089
1090 #ifdef CONFIG_ELF_CORE
1091 /*
1092  * ELF core dumper
1093  *
1094  * Modelled on fs/exec.c:aout_core_dump()
1095  * Jeremy Fitzhardinge <jeremy@sw.oz.au>
1096  */
1097
1098 /*
1099  * Decide what to dump of a segment, part, all or none.
1100  */
1101 static unsigned long vma_dump_size(struct vm_area_struct *vma,
1102                                    unsigned long mm_flags)
1103 {
1104 #define FILTER(type)    (mm_flags & (1UL << MMF_DUMP_##type))
1105
1106         /* The vma can be set up to tell us the answer directly.  */
1107         if (vma->vm_flags & VM_ALWAYSDUMP)
1108                 goto whole;
1109
1110         /* Hugetlb memory check */
1111         if (vma->vm_flags & VM_HUGETLB) {
1112                 if ((vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_SHARED))
1113                         goto whole;
1114                 if (!(vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_PRIVATE))
1115                         goto whole;
1116         }
1117
1118         /* Do not dump I/O mapped devices or special mappings */
1119         if (vma->vm_flags & (VM_IO | VM_RESERVED))
1120                 return 0;
1121
1122         /* By default, dump shared memory if mapped from an anonymous file. */
1123         if (vma->vm_flags & VM_SHARED) {
1124                 if (vma->vm_file->f_path.dentry->d_inode->i_nlink == 0 ?
1125                     FILTER(ANON_SHARED) : FILTER(MAPPED_SHARED))
1126                         goto whole;
1127                 return 0;
1128         }
1129
1130         /* Dump segments that have been written to.  */
1131         if (vma->anon_vma && FILTER(ANON_PRIVATE))
1132                 goto whole;
1133         if (vma->vm_file == NULL)
1134                 return 0;
1135
1136         if (FILTER(MAPPED_PRIVATE))
1137                 goto whole;
1138
1139         /*
1140          * If this looks like the beginning of a DSO or executable mapping,
1141          * check for an ELF header.  If we find one, dump the first page to
1142          * aid in determining what was mapped here.
1143          */
1144         if (FILTER(ELF_HEADERS) &&
1145             vma->vm_pgoff == 0 && (vma->vm_flags & VM_READ)) {
1146                 u32 __user *header = (u32 __user *) vma->vm_start;
1147                 u32 word;
1148                 mm_segment_t fs = get_fs();
1149                 /*
1150                  * Doing it this way gets the constant folded by GCC.
1151                  */
1152                 union {
1153                         u32 cmp;
1154                         char elfmag[SELFMAG];
1155                 } magic;
1156                 BUILD_BUG_ON(SELFMAG != sizeof word);
1157                 magic.elfmag[EI_MAG0] = ELFMAG0;
1158                 magic.elfmag[EI_MAG1] = ELFMAG1;
1159                 magic.elfmag[EI_MAG2] = ELFMAG2;
1160                 magic.elfmag[EI_MAG3] = ELFMAG3;
1161                 /*
1162                  * Switch to the user "segment" for get_user(),
1163                  * then put back what elf_core_dump() had in place.
1164                  */
1165                 set_fs(USER_DS);
1166                 if (unlikely(get_user(word, header)))
1167                         word = 0;
1168                 set_fs(fs);
1169                 if (word == magic.cmp)
1170                         return PAGE_SIZE;
1171         }
1172
1173 #undef  FILTER
1174
1175         return 0;
1176
1177 whole:
1178         return vma->vm_end - vma->vm_start;
1179 }
1180
1181 /* An ELF note in memory */
1182 struct memelfnote
1183 {
1184         const char *name;
1185         int type;
1186         unsigned int datasz;
1187         void *data;
1188 };
1189
1190 static int notesize(struct memelfnote *en)
1191 {
1192         int sz;
1193
1194         sz = sizeof(struct elf_note);
1195         sz += roundup(strlen(en->name) + 1, 4);
1196         sz += roundup(en->datasz, 4);
1197
1198         return sz;
1199 }
1200
1201 #define DUMP_WRITE(addr, nr, foffset)   \
1202         do { if (!dump_write(file, (addr), (nr))) return 0; *foffset += (nr); } while(0)
1203
1204 static int alignfile(struct file *file, loff_t *foffset)
1205 {
1206         static const char buf[4] = { 0, };
1207         DUMP_WRITE(buf, roundup(*foffset, 4) - *foffset, foffset);
1208         return 1;
1209 }
1210
1211 static int writenote(struct memelfnote *men, struct file *file,
1212                         loff_t *foffset)
1213 {
1214         struct elf_note en;
1215         en.n_namesz = strlen(men->name) + 1;
1216         en.n_descsz = men->datasz;
1217         en.n_type = men->type;
1218
1219         DUMP_WRITE(&en, sizeof(en), foffset);
1220         DUMP_WRITE(men->name, en.n_namesz, foffset);
1221         if (!alignfile(file, foffset))
1222                 return 0;
1223         DUMP_WRITE(men->data, men->datasz, foffset);
1224         if (!alignfile(file, foffset))
1225                 return 0;
1226
1227         return 1;
1228 }
1229 #undef DUMP_WRITE
1230
1231 static void fill_elf_header(struct elfhdr *elf, int segs,
1232                             u16 machine, u32 flags, u8 osabi)
1233 {
1234         memset(elf, 0, sizeof(*elf));
1235
1236         memcpy(elf->e_ident, ELFMAG, SELFMAG);
1237         elf->e_ident[EI_CLASS] = ELF_CLASS;
1238         elf->e_ident[EI_DATA] = ELF_DATA;
1239         elf->e_ident[EI_VERSION] = EV_CURRENT;
1240         elf->e_ident[EI_OSABI] = ELF_OSABI;
1241
1242         elf->e_type = ET_CORE;
1243         elf->e_machine = machine;
1244         elf->e_version = EV_CURRENT;
1245         elf->e_phoff = sizeof(struct elfhdr);
1246         elf->e_flags = flags;
1247         elf->e_ehsize = sizeof(struct elfhdr);
1248         elf->e_phentsize = sizeof(struct elf_phdr);
1249         elf->e_phnum = segs;
1250
1251         return;
1252 }
1253
1254 static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset)
1255 {
1256         phdr->p_type = PT_NOTE;
1257         phdr->p_offset = offset;
1258         phdr->p_vaddr = 0;
1259         phdr->p_paddr = 0;
1260         phdr->p_filesz = sz;
1261         phdr->p_memsz = 0;
1262         phdr->p_flags = 0;
1263         phdr->p_align = 0;
1264         return;
1265 }
1266
1267 static void fill_note(struct memelfnote *note, const char *name, int type, 
1268                 unsigned int sz, void *data)
1269 {
1270         note->name = name;
1271         note->type = type;
1272         note->datasz = sz;
1273         note->data = data;
1274         return;
1275 }
1276
1277 /*
1278  * fill up all the fields in prstatus from the given task struct, except
1279  * registers which need to be filled up separately.
1280  */
1281 static void fill_prstatus(struct elf_prstatus *prstatus,
1282                 struct task_struct *p, long signr)
1283 {
1284         prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
1285         prstatus->pr_sigpend = p->pending.signal.sig[0];
1286         prstatus->pr_sighold = p->blocked.sig[0];
1287         rcu_read_lock();
1288         prstatus->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1289         rcu_read_unlock();
1290         prstatus->pr_pid = task_pid_vnr(p);
1291         prstatus->pr_pgrp = task_pgrp_vnr(p);
1292         prstatus->pr_sid = task_session_vnr(p);
1293         if (thread_group_leader(p)) {
1294                 struct task_cputime cputime;
1295
1296                 /*
1297                  * This is the record for the group leader.  It shows the
1298                  * group-wide total, not its individual thread total.
1299                  */
1300                 thread_group_cputime(p, &cputime);
1301                 cputime_to_timeval(cputime.utime, &prstatus->pr_utime);
1302                 cputime_to_timeval(cputime.stime, &prstatus->pr_stime);
1303         } else {
1304                 cputime_to_timeval(p->utime, &prstatus->pr_utime);
1305                 cputime_to_timeval(p->stime, &prstatus->pr_stime);
1306         }
1307         cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime);
1308         cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime);
1309 }
1310
1311 static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1312                        struct mm_struct *mm)
1313 {
1314         const struct cred *cred;
1315         unsigned int i, len;
1316         
1317         /* first copy the parameters from user space */
1318         memset(psinfo, 0, sizeof(struct elf_prpsinfo));
1319
1320         len = mm->arg_end - mm->arg_start;
1321         if (len >= ELF_PRARGSZ)
1322                 len = ELF_PRARGSZ-1;
1323         if (copy_from_user(&psinfo->pr_psargs,
1324                            (const char __user *)mm->arg_start, len))
1325                 return -EFAULT;
1326         for(i = 0; i < len; i++)
1327                 if (psinfo->pr_psargs[i] == 0)
1328                         psinfo->pr_psargs[i] = ' ';
1329         psinfo->pr_psargs[len] = 0;
1330
1331         rcu_read_lock();
1332         psinfo->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1333         rcu_read_unlock();
1334         psinfo->pr_pid = task_pid_vnr(p);
1335         psinfo->pr_pgrp = task_pgrp_vnr(p);
1336         psinfo->pr_sid = task_session_vnr(p);
1337
1338         i = p->state ? ffz(~p->state) + 1 : 0;
1339         psinfo->pr_state = i;
1340         psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i];
1341         psinfo->pr_zomb = psinfo->pr_sname == 'Z';
1342         psinfo->pr_nice = task_nice(p);
1343         psinfo->pr_flag = p->flags;
1344         rcu_read_lock();
1345         cred = __task_cred(p);
1346         SET_UID(psinfo->pr_uid, cred->uid);
1347         SET_GID(psinfo->pr_gid, cred->gid);
1348         rcu_read_unlock();
1349         strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
1350         
1351         return 0;
1352 }
1353
1354 static void fill_auxv_note(struct memelfnote *note, struct mm_struct *mm)
1355 {
1356         elf_addr_t *auxv = (elf_addr_t *) mm->saved_auxv;
1357         int i = 0;
1358         do
1359                 i += 2;
1360         while (auxv[i - 2] != AT_NULL);
1361         fill_note(note, "CORE", NT_AUXV, i * sizeof(elf_addr_t), auxv);
1362 }
1363
1364 #ifdef CORE_DUMP_USE_REGSET
1365 #include <linux/regset.h>
1366
1367 struct elf_thread_core_info {
1368         struct elf_thread_core_info *next;
1369         struct task_struct *task;
1370         struct elf_prstatus prstatus;
1371         struct memelfnote notes[0];
1372 };
1373
1374 struct elf_note_info {
1375         struct elf_thread_core_info *thread;
1376         struct memelfnote psinfo;
1377         struct memelfnote auxv;
1378         size_t size;
1379         int thread_notes;
1380 };
1381
1382 /*
1383  * When a regset has a writeback hook, we call it on each thread before
1384  * dumping user memory.  On register window machines, this makes sure the
1385  * user memory backing the register data is up to date before we read it.
1386  */
1387 static void do_thread_regset_writeback(struct task_struct *task,
1388                                        const struct user_regset *regset)
1389 {
1390         if (regset->writeback)
1391                 regset->writeback(task, regset, 1);
1392 }
1393
1394 static int fill_thread_core_info(struct elf_thread_core_info *t,
1395                                  const struct user_regset_view *view,
1396                                  long signr, size_t *total)
1397 {
1398         unsigned int i;
1399
1400         /*
1401          * NT_PRSTATUS is the one special case, because the regset data
1402          * goes into the pr_reg field inside the note contents, rather
1403          * than being the whole note contents.  We fill the reset in here.
1404          * We assume that regset 0 is NT_PRSTATUS.
1405          */
1406         fill_prstatus(&t->prstatus, t->task, signr);
1407         (void) view->regsets[0].get(t->task, &view->regsets[0],
1408                                     0, sizeof(t->prstatus.pr_reg),
1409                                     &t->prstatus.pr_reg, NULL);
1410
1411         fill_note(&t->notes[0], "CORE", NT_PRSTATUS,
1412                   sizeof(t->prstatus), &t->prstatus);
1413         *total += notesize(&t->notes[0]);
1414
1415         do_thread_regset_writeback(t->task, &view->regsets[0]);
1416
1417         /*
1418          * Each other regset might generate a note too.  For each regset
1419          * that has no core_note_type or is inactive, we leave t->notes[i]
1420          * all zero and we'll know to skip writing it later.
1421          */
1422         for (i = 1; i < view->n; ++i) {
1423                 const struct user_regset *regset = &view->regsets[i];
1424                 do_thread_regset_writeback(t->task, regset);
1425                 if (regset->core_note_type &&
1426                     (!regset->active || regset->active(t->task, regset))) {
1427                         int ret;
1428                         size_t size = regset->n * regset->size;
1429                         void *data = kmalloc(size, GFP_KERNEL);
1430                         if (unlikely(!data))
1431                                 return 0;
1432                         ret = regset->get(t->task, regset,
1433                                           0, size, data, NULL);
1434                         if (unlikely(ret))
1435                                 kfree(data);
1436                         else {
1437                                 if (regset->core_note_type != NT_PRFPREG)
1438                                         fill_note(&t->notes[i], "LINUX",
1439                                                   regset->core_note_type,
1440                                                   size, data);
1441                                 else {
1442                                         t->prstatus.pr_fpvalid = 1;
1443                                         fill_note(&t->notes[i], "CORE",
1444                                                   NT_PRFPREG, size, data);
1445                                 }
1446                                 *total += notesize(&t->notes[i]);
1447                         }
1448                 }
1449         }
1450
1451         return 1;
1452 }
1453
1454 static int fill_note_info(struct elfhdr *elf, int phdrs,
1455                           struct elf_note_info *info,
1456                           long signr, struct pt_regs *regs)
1457 {
1458         struct task_struct *dump_task = current;
1459         const struct user_regset_view *view = task_user_regset_view(dump_task);
1460         struct elf_thread_core_info *t;
1461         struct elf_prpsinfo *psinfo;
1462         struct core_thread *ct;
1463         unsigned int i;
1464
1465         info->size = 0;
1466         info->thread = NULL;
1467
1468         psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
1469         if (psinfo == NULL)
1470                 return 0;
1471
1472         fill_note(&info->psinfo, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
1473
1474         /*
1475          * Figure out how many notes we're going to need for each thread.
1476          */
1477         info->thread_notes = 0;
1478         for (i = 0; i < view->n; ++i)
1479                 if (view->regsets[i].core_note_type != 0)
1480                         ++info->thread_notes;
1481
1482         /*
1483          * Sanity check.  We rely on regset 0 being in NT_PRSTATUS,
1484          * since it is our one special case.
1485          */
1486         if (unlikely(info->thread_notes == 0) ||
1487             unlikely(view->regsets[0].core_note_type != NT_PRSTATUS)) {
1488                 WARN_ON(1);
1489                 return 0;
1490         }
1491
1492         /*
1493          * Initialize the ELF file header.
1494          */
1495         fill_elf_header(elf, phdrs,
1496                         view->e_machine, view->e_flags, view->ei_osabi);
1497
1498         /*
1499          * Allocate a structure for each thread.
1500          */
1501         for (ct = &dump_task->mm->core_state->dumper; ct; ct = ct->next) {
1502                 t = kzalloc(offsetof(struct elf_thread_core_info,
1503                                      notes[info->thread_notes]),
1504                             GFP_KERNEL);
1505                 if (unlikely(!t))
1506                         return 0;
1507
1508                 t->task = ct->task;
1509                 if (ct->task == dump_task || !info->thread) {
1510                         t->next = info->thread;
1511                         info->thread = t;
1512                 } else {
1513                         /*
1514                          * Make sure to keep the original task at
1515                          * the head of the list.
1516                          */
1517                         t->next = info->thread->next;
1518                         info->thread->next = t;
1519                 }
1520         }
1521
1522         /*
1523          * Now fill in each thread's information.
1524          */
1525         for (t = info->thread; t != NULL; t = t->next)
1526                 if (!fill_thread_core_info(t, view, signr, &info->size))
1527                         return 0;
1528
1529         /*
1530          * Fill in the two process-wide notes.
1531          */
1532         fill_psinfo(psinfo, dump_task->group_leader, dump_task->mm);
1533         info->size += notesize(&info->psinfo);
1534
1535         fill_auxv_note(&info->auxv, current->mm);
1536         info->size += notesize(&info->auxv);
1537
1538         return 1;
1539 }
1540
1541 static size_t get_note_info_size(struct elf_note_info *info)
1542 {
1543         return info->size;
1544 }
1545
1546 /*
1547  * Write all the notes for each thread.  When writing the first thread, the
1548  * process-wide notes are interleaved after the first thread-specific note.
1549  */
1550 static int write_note_info(struct elf_note_info *info,
1551                            struct file *file, loff_t *foffset)
1552 {
1553         bool first = 1;
1554         struct elf_thread_core_info *t = info->thread;
1555
1556         do {
1557                 int i;
1558
1559                 if (!writenote(&t->notes[0], file, foffset))
1560                         return 0;
1561
1562                 if (first && !writenote(&info->psinfo, file, foffset))
1563                         return 0;
1564                 if (first && !writenote(&info->auxv, file, foffset))
1565                         return 0;
1566
1567                 for (i = 1; i < info->thread_notes; ++i)
1568                         if (t->notes[i].data &&
1569                             !writenote(&t->notes[i], file, foffset))
1570                                 return 0;
1571
1572                 first = 0;
1573                 t = t->next;
1574         } while (t);
1575
1576         return 1;
1577 }
1578
1579 static void free_note_info(struct elf_note_info *info)
1580 {
1581         struct elf_thread_core_info *threads = info->thread;
1582         while (threads) {
1583                 unsigned int i;
1584                 struct elf_thread_core_info *t = threads;
1585                 threads = t->next;
1586                 WARN_ON(t->notes[0].data && t->notes[0].data != &t->prstatus);
1587                 for (i = 1; i < info->thread_notes; ++i)
1588                         kfree(t->notes[i].data);
1589                 kfree(t);
1590         }
1591         kfree(info->psinfo.data);
1592 }
1593
1594 #else
1595
1596 /* Here is the structure in which status of each thread is captured. */
1597 struct elf_thread_status
1598 {
1599         struct list_head list;
1600         struct elf_prstatus prstatus;   /* NT_PRSTATUS */
1601         elf_fpregset_t fpu;             /* NT_PRFPREG */
1602         struct task_struct *thread;
1603 #ifdef ELF_CORE_COPY_XFPREGS
1604         elf_fpxregset_t xfpu;           /* ELF_CORE_XFPREG_TYPE */
1605 #endif
1606         struct memelfnote notes[3];
1607         int num_notes;
1608 };
1609
1610 /*
1611  * In order to add the specific thread information for the elf file format,
1612  * we need to keep a linked list of every threads pr_status and then create
1613  * a single section for them in the final core file.
1614  */
1615 static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
1616 {
1617         int sz = 0;
1618         struct task_struct *p = t->thread;
1619         t->num_notes = 0;
1620
1621         fill_prstatus(&t->prstatus, p, signr);
1622         elf_core_copy_task_regs(p, &t->prstatus.pr_reg);        
1623         
1624         fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus),
1625                   &(t->prstatus));
1626         t->num_notes++;
1627         sz += notesize(&t->notes[0]);
1628
1629         if ((t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, NULL,
1630                                                                 &t->fpu))) {
1631                 fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu),
1632                           &(t->fpu));
1633                 t->num_notes++;
1634                 sz += notesize(&t->notes[1]);
1635         }
1636
1637 #ifdef ELF_CORE_COPY_XFPREGS
1638         if (elf_core_copy_task_xfpregs(p, &t->xfpu)) {
1639                 fill_note(&t->notes[2], "LINUX", ELF_CORE_XFPREG_TYPE,
1640                           sizeof(t->xfpu), &t->xfpu);
1641                 t->num_notes++;
1642                 sz += notesize(&t->notes[2]);
1643         }
1644 #endif  
1645         return sz;
1646 }
1647
1648 struct elf_note_info {
1649         struct memelfnote *notes;
1650         struct elf_prstatus *prstatus;  /* NT_PRSTATUS */
1651         struct elf_prpsinfo *psinfo;    /* NT_PRPSINFO */
1652         struct list_head thread_list;
1653         elf_fpregset_t *fpu;
1654 #ifdef ELF_CORE_COPY_XFPREGS
1655         elf_fpxregset_t *xfpu;
1656 #endif
1657         int thread_status_size;
1658         int numnote;
1659 };
1660
1661 static int elf_note_info_init(struct elf_note_info *info)
1662 {
1663         memset(info, 0, sizeof(*info));
1664         INIT_LIST_HEAD(&info->thread_list);
1665
1666         /* Allocate space for six ELF notes */
1667         info->notes = kmalloc(6 * sizeof(struct memelfnote), GFP_KERNEL);
1668         if (!info->notes)
1669                 return 0;
1670         info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL);
1671         if (!info->psinfo)
1672                 goto notes_free;
1673         info->prstatus = kmalloc(sizeof(*info->prstatus), GFP_KERNEL);
1674         if (!info->prstatus)
1675                 goto psinfo_free;
1676         info->fpu = kmalloc(sizeof(*info->fpu), GFP_KERNEL);
1677         if (!info->fpu)
1678                 goto prstatus_free;
1679 #ifdef ELF_CORE_COPY_XFPREGS
1680         info->xfpu = kmalloc(sizeof(*info->xfpu), GFP_KERNEL);
1681         if (!info->xfpu)
1682                 goto fpu_free;
1683 #endif
1684         return 1;
1685 #ifdef ELF_CORE_COPY_XFPREGS
1686  fpu_free:
1687         kfree(info->fpu);
1688 #endif
1689  prstatus_free:
1690         kfree(info->prstatus);
1691  psinfo_free:
1692         kfree(info->psinfo);
1693  notes_free:
1694         kfree(info->notes);
1695         return 0;
1696 }
1697
1698 static int fill_note_info(struct elfhdr *elf, int phdrs,
1699                           struct elf_note_info *info,
1700                           long signr, struct pt_regs *regs)
1701 {
1702         struct list_head *t;
1703
1704         if (!elf_note_info_init(info))
1705                 return 0;
1706
1707         if (signr) {
1708                 struct core_thread *ct;
1709                 struct elf_thread_status *ets;
1710
1711                 for (ct = current->mm->core_state->dumper.next;
1712                                                 ct; ct = ct->next) {
1713                         ets = kzalloc(sizeof(*ets), GFP_KERNEL);
1714                         if (!ets)
1715                                 return 0;
1716
1717                         ets->thread = ct->task;
1718                         list_add(&ets->list, &info->thread_list);
1719                 }
1720
1721                 list_for_each(t, &info->thread_list) {
1722                         int sz;
1723
1724                         ets = list_entry(t, struct elf_thread_status, list);
1725                         sz = elf_dump_thread_status(signr, ets);
1726                         info->thread_status_size += sz;
1727                 }
1728         }
1729         /* now collect the dump for the current */
1730         memset(info->prstatus, 0, sizeof(*info->prstatus));
1731         fill_prstatus(info->prstatus, current, signr);
1732         elf_core_copy_regs(&info->prstatus->pr_reg, regs);
1733
1734         /* Set up header */
1735         fill_elf_header(elf, phdrs, ELF_ARCH, ELF_CORE_EFLAGS, ELF_OSABI);
1736
1737         /*
1738          * Set up the notes in similar form to SVR4 core dumps made
1739          * with info from their /proc.
1740          */
1741
1742         fill_note(info->notes + 0, "CORE", NT_PRSTATUS,
1743                   sizeof(*info->prstatus), info->prstatus);
1744         fill_psinfo(info->psinfo, current->group_leader, current->mm);
1745         fill_note(info->notes + 1, "CORE", NT_PRPSINFO,
1746                   sizeof(*info->psinfo), info->psinfo);
1747
1748         info->numnote = 2;
1749
1750         fill_auxv_note(&info->notes[info->numnote++], current->mm);
1751
1752         /* Try to dump the FPU. */
1753         info->prstatus->pr_fpvalid = elf_core_copy_task_fpregs(current, regs,
1754                                                                info->fpu);
1755         if (info->prstatus->pr_fpvalid)
1756                 fill_note(info->notes + info->numnote++,
1757                           "CORE", NT_PRFPREG, sizeof(*info->fpu), info->fpu);
1758 #ifdef ELF_CORE_COPY_XFPREGS
1759         if (elf_core_copy_task_xfpregs(current, info->xfpu))
1760                 fill_note(info->notes + info->numnote++,
1761                           "LINUX", ELF_CORE_XFPREG_TYPE,
1762                           sizeof(*info->xfpu), info->xfpu);
1763 #endif
1764
1765         return 1;
1766 }
1767
1768 static size_t get_note_info_size(struct elf_note_info *info)
1769 {
1770         int sz = 0;
1771         int i;
1772
1773         for (i = 0; i < info->numnote; i++)
1774                 sz += notesize(info->notes + i);
1775
1776         sz += info->thread_status_size;
1777
1778         return sz;
1779 }
1780
1781 static int write_note_info(struct elf_note_info *info,
1782                            struct file *file, loff_t *foffset)
1783 {
1784         int i;
1785         struct list_head *t;
1786
1787         for (i = 0; i < info->numnote; i++)
1788                 if (!writenote(info->notes + i, file, foffset))
1789                         return 0;
1790
1791         /* write out the thread status notes section */
1792         list_for_each(t, &info->thread_list) {
1793                 struct elf_thread_status *tmp =
1794                                 list_entry(t, struct elf_thread_status, list);
1795
1796                 for (i = 0; i < tmp->num_notes; i++)
1797                         if (!writenote(&tmp->notes[i], file, foffset))
1798                                 return 0;
1799         }
1800
1801         return 1;
1802 }
1803
1804 static void free_note_info(struct elf_note_info *info)
1805 {
1806         while (!list_empty(&info->thread_list)) {
1807                 struct list_head *tmp = info->thread_list.next;
1808                 list_del(tmp);
1809                 kfree(list_entry(tmp, struct elf_thread_status, list));
1810         }
1811
1812         kfree(info->prstatus);
1813         kfree(info->psinfo);
1814         kfree(info->notes);
1815         kfree(info->fpu);
1816 #ifdef ELF_CORE_COPY_XFPREGS
1817         kfree(info->xfpu);
1818 #endif
1819 }
1820
1821 #endif
1822
1823 static struct vm_area_struct *first_vma(struct task_struct *tsk,
1824                                         struct vm_area_struct *gate_vma)
1825 {
1826         struct vm_area_struct *ret = tsk->mm->mmap;
1827
1828         if (ret)
1829                 return ret;
1830         return gate_vma;
1831 }
1832 /*
1833  * Helper function for iterating across a vma list.  It ensures that the caller
1834  * will visit `gate_vma' prior to terminating the search.
1835  */
1836 static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
1837                                         struct vm_area_struct *gate_vma)
1838 {
1839         struct vm_area_struct *ret;
1840
1841         ret = this_vma->vm_next;
1842         if (ret)
1843                 return ret;
1844         if (this_vma == gate_vma)
1845                 return NULL;
1846         return gate_vma;
1847 }
1848
1849 static void fill_extnum_info(struct elfhdr *elf, struct elf_shdr *shdr4extnum,
1850                              elf_addr_t e_shoff, int segs)
1851 {
1852         elf->e_shoff = e_shoff;
1853         elf->e_shentsize = sizeof(*shdr4extnum);
1854         elf->e_shnum = 1;
1855         elf->e_shstrndx = SHN_UNDEF;
1856
1857         memset(shdr4extnum, 0, sizeof(*shdr4extnum));
1858
1859         shdr4extnum->sh_type = SHT_NULL;
1860         shdr4extnum->sh_size = elf->e_shnum;
1861         shdr4extnum->sh_link = elf->e_shstrndx;
1862         shdr4extnum->sh_info = segs;
1863 }
1864
1865 static size_t elf_core_vma_data_size(struct vm_area_struct *gate_vma,
1866                                      unsigned long mm_flags)
1867 {
1868         struct vm_area_struct *vma;
1869         size_t size = 0;
1870
1871         for (vma = first_vma(current, gate_vma); vma != NULL;
1872              vma = next_vma(vma, gate_vma))
1873                 size += vma_dump_size(vma, mm_flags);
1874         return size;
1875 }
1876
1877 /*
1878  * Actual dumper
1879  *
1880  * This is a two-pass process; first we find the offsets of the bits,
1881  * and then they are actually written out.  If we run out of core limit
1882  * we just truncate.
1883  */
1884 static int elf_core_dump(struct coredump_params *cprm)
1885 {
1886         int has_dumped = 0;
1887         mm_segment_t fs;
1888         int segs;
1889         size_t size = 0;
1890         struct vm_area_struct *vma, *gate_vma;
1891         struct elfhdr *elf = NULL;
1892         loff_t offset = 0, dataoff, foffset;
1893         struct elf_note_info info;
1894         struct elf_phdr *phdr4note = NULL;
1895         struct elf_shdr *shdr4extnum = NULL;
1896         Elf_Half e_phnum;
1897         elf_addr_t e_shoff;
1898
1899         /*
1900          * We no longer stop all VM operations.
1901          * 
1902          * This is because those proceses that could possibly change map_count
1903          * or the mmap / vma pages are now blocked in do_exit on current
1904          * finishing this core dump.
1905          *
1906          * Only ptrace can touch these memory addresses, but it doesn't change
1907          * the map_count or the pages allocated. So no possibility of crashing
1908          * exists while dumping the mm->vm_next areas to the core file.
1909          */
1910   
1911         /* alloc memory for large data structures: too large to be on stack */
1912         elf = kmalloc(sizeof(*elf), GFP_KERNEL);
1913         if (!elf)
1914                 goto out;
1915         /*
1916          * The number of segs are recored into ELF header as 16bit value.
1917          * Please check DEFAULT_MAX_MAP_COUNT definition when you modify here.
1918          */
1919         segs = current->mm->map_count;
1920         segs += elf_core_extra_phdrs();
1921
1922         gate_vma = get_gate_vma(current);
1923         if (gate_vma != NULL)
1924                 segs++;
1925
1926         /* for notes section */
1927         segs++;
1928
1929         /* If segs > PN_XNUM(0xffff), then e_phnum overflows. To avoid
1930          * this, kernel supports extended numbering. Have a look at
1931          * include/linux/elf.h for further information. */
1932         e_phnum = segs > PN_XNUM ? PN_XNUM : segs;
1933
1934         /*
1935          * Collect all the non-memory information about the process for the
1936          * notes.  This also sets up the file header.
1937          */
1938         if (!fill_note_info(elf, e_phnum, &info, cprm->signr, cprm->regs))
1939                 goto cleanup;
1940
1941         has_dumped = 1;
1942         current->flags |= PF_DUMPCORE;
1943   
1944         fs = get_fs();
1945         set_fs(KERNEL_DS);
1946
1947         offset += sizeof(*elf);                         /* Elf header */
1948         offset += segs * sizeof(struct elf_phdr);       /* Program headers */
1949         foffset = offset;
1950
1951         /* Write notes phdr entry */
1952         {
1953                 size_t sz = get_note_info_size(&info);
1954
1955                 sz += elf_coredump_extra_notes_size();
1956
1957                 phdr4note = kmalloc(sizeof(*phdr4note), GFP_KERNEL);
1958                 if (!phdr4note)
1959                         goto end_coredump;
1960
1961                 fill_elf_note_phdr(phdr4note, sz, offset);
1962                 offset += sz;
1963         }
1964
1965         dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
1966
1967         offset += elf_core_vma_data_size(gate_vma, cprm->mm_flags);
1968         offset += elf_core_extra_data_size();
1969         e_shoff = offset;
1970
1971         if (e_phnum == PN_XNUM) {
1972                 shdr4extnum = kmalloc(sizeof(*shdr4extnum), GFP_KERNEL);
1973                 if (!shdr4extnum)
1974                         goto end_coredump;
1975                 fill_extnum_info(elf, shdr4extnum, e_shoff, segs);
1976         }
1977
1978         offset = dataoff;
1979
1980         size += sizeof(*elf);
1981         if (size > cprm->limit || !dump_write(cprm->file, elf, sizeof(*elf)))
1982                 goto end_coredump;
1983
1984         size += sizeof(*phdr4note);
1985         if (size > cprm->limit
1986             || !dump_write(cprm->file, phdr4note, sizeof(*phdr4note)))
1987                 goto end_coredump;
1988
1989         /* Write program headers for segments dump */
1990         for (vma = first_vma(current, gate_vma); vma != NULL;
1991                         vma = next_vma(vma, gate_vma)) {
1992                 struct elf_phdr phdr;
1993
1994                 phdr.p_type = PT_LOAD;
1995                 phdr.p_offset = offset;
1996                 phdr.p_vaddr = vma->vm_start;
1997                 phdr.p_paddr = 0;
1998                 phdr.p_filesz = vma_dump_size(vma, cprm->mm_flags);
1999                 phdr.p_memsz = vma->vm_end - vma->vm_start;
2000                 offset += phdr.p_filesz;
2001                 phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0;
2002                 if (vma->vm_flags & VM_WRITE)
2003                         phdr.p_flags |= PF_W;
2004                 if (vma->vm_flags & VM_EXEC)
2005                         phdr.p_flags |= PF_X;
2006                 phdr.p_align = ELF_EXEC_PAGESIZE;
2007
2008                 size += sizeof(phdr);
2009                 if (size > cprm->limit
2010                     || !dump_write(cprm->file, &phdr, sizeof(phdr)))
2011                         goto end_coredump;
2012         }
2013
2014         if (!elf_core_write_extra_phdrs(cprm->file, offset, &size, cprm->limit))
2015                 goto end_coredump;
2016
2017         /* write out the notes section */
2018         if (!write_note_info(&info, cprm->file, &foffset))
2019                 goto end_coredump;
2020
2021         if (elf_coredump_extra_notes_write(cprm->file, &foffset))
2022                 goto end_coredump;
2023
2024         /* Align to page */
2025         if (!dump_seek(cprm->file, dataoff - foffset))
2026                 goto end_coredump;
2027
2028         for (vma = first_vma(current, gate_vma); vma != NULL;
2029                         vma = next_vma(vma, gate_vma)) {
2030                 unsigned long addr;
2031                 unsigned long end;
2032
2033                 end = vma->vm_start + vma_dump_size(vma, cprm->mm_flags);
2034
2035                 for (addr = vma->vm_start; addr < end; addr += PAGE_SIZE) {
2036                         struct page *page;
2037                         int stop;
2038
2039                         page = get_dump_page(addr);
2040                         if (page) {
2041                                 void *kaddr = kmap(page);
2042                                 stop = ((size += PAGE_SIZE) > cprm->limit) ||
2043                                         !dump_write(cprm->file, kaddr,
2044                                                     PAGE_SIZE);
2045                                 kunmap(page);
2046                                 page_cache_release(page);
2047                         } else
2048                                 stop = !dump_seek(cprm->file, PAGE_SIZE);
2049                         if (stop)
2050                                 goto end_coredump;
2051                 }
2052         }
2053
2054         if (!elf_core_write_extra_data(cprm->file, &size, cprm->limit))
2055                 goto end_coredump;
2056
2057         if (e_phnum == PN_XNUM) {
2058                 size += sizeof(*shdr4extnum);
2059                 if (size > cprm->limit
2060                     || !dump_write(cprm->file, shdr4extnum,
2061                                    sizeof(*shdr4extnum)))
2062                         goto end_coredump;
2063         }
2064
2065 end_coredump:
2066         set_fs(fs);
2067
2068 cleanup:
2069         free_note_info(&info);
2070         kfree(shdr4extnum);
2071         kfree(phdr4note);
2072         kfree(elf);
2073 out:
2074         return has_dumped;
2075 }
2076
2077 #endif          /* CONFIG_ELF_CORE */
2078
2079 static int __init init_elf_binfmt(void)
2080 {
2081         return register_binfmt(&elf_format);
2082 }
2083
2084 static void __exit exit_elf_binfmt(void)
2085 {
2086         /* Remove the COFF and ELF loaders. */
2087         unregister_binfmt(&elf_format);
2088 }
2089
2090 core_initcall(init_elf_binfmt);
2091 module_exit(exit_elf_binfmt);
2092 MODULE_LICENSE("GPL");