Update to 3.4-final.
[linux-flexiantxendom0-3.2.10.git] / fs / binfmt_elf.c
1 /*
2  * linux/fs/binfmt_elf.c
3  *
4  * These are the functions used to load ELF format executables as used
5  * on SVr4 machines.  Information on the format may be found in the book
6  * "UNIX SYSTEM V RELEASE 4 Programmers Guide: Ansi C and Programming Support
7  * Tools".
8  *
9  * Copyright 1993, 1994: Eric Youngdale (ericy@cais.com).
10  */
11
12 #include <linux/module.h>
13 #include <linux/kernel.h>
14 #include <linux/fs.h>
15 #include <linux/mm.h>
16 #include <linux/mman.h>
17 #include <linux/errno.h>
18 #include <linux/signal.h>
19 #include <linux/binfmts.h>
20 #include <linux/string.h>
21 #include <linux/file.h>
22 #include <linux/slab.h>
23 #include <linux/personality.h>
24 #include <linux/elfcore.h>
25 #include <linux/init.h>
26 #include <linux/highuid.h>
27 #include <linux/compiler.h>
28 #include <linux/highmem.h>
29 #include <linux/pagemap.h>
30 #include <linux/security.h>
31 #include <linux/random.h>
32 #include <linux/elf.h>
33 #include <linux/utsname.h>
34 #include <linux/coredump.h>
35 #include <asm/uaccess.h>
36 #include <asm/param.h>
37 #include <asm/page.h>
38 #include <asm/exec.h>
39
40 static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs);
41 static int load_elf_library(struct file *);
42 static unsigned long elf_map(struct file *, unsigned long, struct elf_phdr *,
43                                 int, int, unsigned long);
44
45 /*
46  * If we don't support core dumping, then supply a NULL so we
47  * don't even try.
48  */
49 #ifdef CONFIG_ELF_CORE
50 static int elf_core_dump(struct coredump_params *cprm);
51 #else
52 #define elf_core_dump   NULL
53 #endif
54
55 #if ELF_EXEC_PAGESIZE > PAGE_SIZE
56 #define ELF_MIN_ALIGN   ELF_EXEC_PAGESIZE
57 #else
58 #define ELF_MIN_ALIGN   PAGE_SIZE
59 #endif
60
61 #ifndef ELF_CORE_EFLAGS
62 #define ELF_CORE_EFLAGS 0
63 #endif
64
65 #define ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(ELF_MIN_ALIGN-1))
66 #define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1))
67 #define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
68
69 static struct linux_binfmt elf_format = {
70         .module         = THIS_MODULE,
71         .load_binary    = load_elf_binary,
72         .load_shlib     = load_elf_library,
73         .core_dump      = elf_core_dump,
74         .min_coredump   = ELF_EXEC_PAGESIZE,
75 };
76
77 #define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE)
78
79 static int set_brk(unsigned long start, unsigned long end)
80 {
81         start = ELF_PAGEALIGN(start);
82         end = ELF_PAGEALIGN(end);
83         if (end > start) {
84                 unsigned long addr;
85                 addr = vm_brk(start, end - start);
86                 if (BAD_ADDR(addr))
87                         return addr;
88         }
89         current->mm->start_brk = current->mm->brk = end;
90         return 0;
91 }
92
93 /* We need to explicitly zero any fractional pages
94    after the data section (i.e. bss).  This would
95    contain the junk from the file that should not
96    be in memory
97  */
98 static int padzero(unsigned long elf_bss)
99 {
100         unsigned long nbyte;
101
102         nbyte = ELF_PAGEOFFSET(elf_bss);
103         if (nbyte) {
104                 nbyte = ELF_MIN_ALIGN - nbyte;
105                 if (clear_user((void __user *) elf_bss, nbyte))
106                         return -EFAULT;
107         }
108         return 0;
109 }
110
111 /* Let's use some macros to make this stack manipulation a little clearer */
112 #ifdef CONFIG_STACK_GROWSUP
113 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) + (items))
114 #define STACK_ROUND(sp, items) \
115         ((15 + (unsigned long) ((sp) + (items))) &~ 15UL)
116 #define STACK_ALLOC(sp, len) ({ \
117         elf_addr_t __user *old_sp = (elf_addr_t __user *)sp; sp += len; \
118         old_sp; })
119 #else
120 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items))
121 #define STACK_ROUND(sp, items) \
122         (((unsigned long) (sp - items)) &~ 15UL)
123 #define STACK_ALLOC(sp, len) ({ sp -= len ; sp; })
124 #endif
125
126 #ifndef ELF_BASE_PLATFORM
127 /*
128  * AT_BASE_PLATFORM indicates the "real" hardware/microarchitecture.
129  * If the arch defines ELF_BASE_PLATFORM (in asm/elf.h), the value
130  * will be copied to the user stack in the same manner as AT_PLATFORM.
131  */
132 #define ELF_BASE_PLATFORM NULL
133 #endif
134
135 static int
136 create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
137                 unsigned long load_addr, unsigned long interp_load_addr)
138 {
139         unsigned long p = bprm->p;
140         int argc = bprm->argc;
141         int envc = bprm->envc;
142         elf_addr_t __user *argv;
143         elf_addr_t __user *envp;
144         elf_addr_t __user *sp;
145         elf_addr_t __user *u_platform;
146         elf_addr_t __user *u_base_platform;
147         elf_addr_t __user *u_rand_bytes;
148         const char *k_platform = ELF_PLATFORM;
149         const char *k_base_platform = ELF_BASE_PLATFORM;
150         unsigned char k_rand_bytes[16];
151         int items;
152         elf_addr_t *elf_info;
153         int ei_index = 0;
154         const struct cred *cred = current_cred();
155         struct vm_area_struct *vma;
156
157         /*
158          * In some cases (e.g. Hyper-Threading), we want to avoid L1
159          * evictions by the processes running on the same package. One
160          * thing we can do is to shuffle the initial stack for them.
161          */
162
163         p = arch_align_stack(p);
164
165         /*
166          * If this architecture has a platform capability string, copy it
167          * to userspace.  In some cases (Sparc), this info is impossible
168          * for userspace to get any other way, in others (i386) it is
169          * merely difficult.
170          */
171         u_platform = NULL;
172         if (k_platform) {
173                 size_t len = strlen(k_platform) + 1;
174
175                 u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
176                 if (__copy_to_user(u_platform, k_platform, len))
177                         return -EFAULT;
178         }
179
180         /*
181          * If this architecture has a "base" platform capability
182          * string, copy it to userspace.
183          */
184         u_base_platform = NULL;
185         if (k_base_platform) {
186                 size_t len = strlen(k_base_platform) + 1;
187
188                 u_base_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
189                 if (__copy_to_user(u_base_platform, k_base_platform, len))
190                         return -EFAULT;
191         }
192
193         /*
194          * Generate 16 random bytes for userspace PRNG seeding.
195          */
196         get_random_bytes(k_rand_bytes, sizeof(k_rand_bytes));
197         u_rand_bytes = (elf_addr_t __user *)
198                        STACK_ALLOC(p, sizeof(k_rand_bytes));
199         if (__copy_to_user(u_rand_bytes, k_rand_bytes, sizeof(k_rand_bytes)))
200                 return -EFAULT;
201
202         /* Create the ELF interpreter info */
203         elf_info = (elf_addr_t *)current->mm->saved_auxv;
204         /* update AT_VECTOR_SIZE_BASE if the number of NEW_AUX_ENT() changes */
205 #define NEW_AUX_ENT(id, val) \
206         do { \
207                 elf_info[ei_index++] = id; \
208                 elf_info[ei_index++] = val; \
209         } while (0)
210
211 #ifdef ARCH_DLINFO
212         /* 
213          * ARCH_DLINFO must come first so PPC can do its special alignment of
214          * AUXV.
215          * update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT() in
216          * ARCH_DLINFO changes
217          */
218         ARCH_DLINFO;
219 #endif
220         NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP);
221         NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE);
222         NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
223         NEW_AUX_ENT(AT_PHDR, load_addr + exec->e_phoff);
224         NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr));
225         NEW_AUX_ENT(AT_PHNUM, exec->e_phnum);
226         NEW_AUX_ENT(AT_BASE, interp_load_addr);
227         NEW_AUX_ENT(AT_FLAGS, 0);
228         NEW_AUX_ENT(AT_ENTRY, exec->e_entry);
229         NEW_AUX_ENT(AT_UID, cred->uid);
230         NEW_AUX_ENT(AT_EUID, cred->euid);
231         NEW_AUX_ENT(AT_GID, cred->gid);
232         NEW_AUX_ENT(AT_EGID, cred->egid);
233         NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm));
234         NEW_AUX_ENT(AT_RANDOM, (elf_addr_t)(unsigned long)u_rand_bytes);
235         NEW_AUX_ENT(AT_EXECFN, bprm->exec);
236         if (k_platform) {
237                 NEW_AUX_ENT(AT_PLATFORM,
238                             (elf_addr_t)(unsigned long)u_platform);
239         }
240         if (k_base_platform) {
241                 NEW_AUX_ENT(AT_BASE_PLATFORM,
242                             (elf_addr_t)(unsigned long)u_base_platform);
243         }
244         if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) {
245                 NEW_AUX_ENT(AT_EXECFD, bprm->interp_data);
246         }
247 #undef NEW_AUX_ENT
248         /* AT_NULL is zero; clear the rest too */
249         memset(&elf_info[ei_index], 0,
250                sizeof current->mm->saved_auxv - ei_index * sizeof elf_info[0]);
251
252         /* And advance past the AT_NULL entry.  */
253         ei_index += 2;
254
255         sp = STACK_ADD(p, ei_index);
256
257         items = (argc + 1) + (envc + 1) + 1;
258         bprm->p = STACK_ROUND(sp, items);
259
260         /* Point sp at the lowest address on the stack */
261 #ifdef CONFIG_STACK_GROWSUP
262         sp = (elf_addr_t __user *)bprm->p - items - ei_index;
263         bprm->exec = (unsigned long)sp; /* XXX: PARISC HACK */
264 #else
265         sp = (elf_addr_t __user *)bprm->p;
266 #endif
267
268
269         /*
270          * Grow the stack manually; some architectures have a limit on how
271          * far ahead a user-space access may be in order to grow the stack.
272          */
273         vma = find_extend_vma(current->mm, bprm->p);
274         if (!vma)
275                 return -EFAULT;
276
277         /* Now, let's put argc (and argv, envp if appropriate) on the stack */
278         if (__put_user(argc, sp++))
279                 return -EFAULT;
280         argv = sp;
281         envp = argv + argc + 1;
282
283         /* Populate argv and envp */
284         p = current->mm->arg_end = current->mm->arg_start;
285         while (argc-- > 0) {
286                 size_t len;
287                 if (__put_user((elf_addr_t)p, argv++))
288                         return -EFAULT;
289                 len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
290                 if (!len || len > MAX_ARG_STRLEN)
291                         return -EINVAL;
292                 p += len;
293         }
294         if (__put_user(0, argv))
295                 return -EFAULT;
296         current->mm->arg_end = current->mm->env_start = p;
297         while (envc-- > 0) {
298                 size_t len;
299                 if (__put_user((elf_addr_t)p, envp++))
300                         return -EFAULT;
301                 len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
302                 if (!len || len > MAX_ARG_STRLEN)
303                         return -EINVAL;
304                 p += len;
305         }
306         if (__put_user(0, envp))
307                 return -EFAULT;
308         current->mm->env_end = p;
309
310         /* Put the elf_info on the stack in the right place.  */
311         sp = (elf_addr_t __user *)envp + 1;
312         if (copy_to_user(sp, elf_info, ei_index * sizeof(elf_addr_t)))
313                 return -EFAULT;
314         return 0;
315 }
316
317 static unsigned long elf_map(struct file *filep, unsigned long addr,
318                 struct elf_phdr *eppnt, int prot, int type,
319                 unsigned long total_size)
320 {
321         unsigned long map_addr;
322         unsigned long size = eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr);
323         unsigned long off = eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr);
324         addr = ELF_PAGESTART(addr);
325         size = ELF_PAGEALIGN(size);
326
327         /* mmap() will return -EINVAL if given a zero size, but a
328          * segment with zero filesize is perfectly valid */
329         if (!size)
330                 return addr;
331
332         down_write(&current->mm->mmap_sem);
333         /*
334         * total_size is the size of the ELF (interpreter) image.
335         * The _first_ mmap needs to know the full size, otherwise
336         * randomization might put this image into an overlapping
337         * position with the ELF binary image. (since size < total_size)
338         * So we first map the 'big' image - and unmap the remainder at
339         * the end. (which unmap is needed for ELF images with holes.)
340         */
341         if (total_size) {
342                 total_size = ELF_PAGEALIGN(total_size);
343                 map_addr = do_mmap(filep, addr, total_size, prot, type, off);
344                 if (!BAD_ADDR(map_addr))
345                         do_munmap(current->mm, map_addr+size, total_size-size);
346         } else
347                 map_addr = do_mmap(filep, addr, size, prot, type, off);
348
349         up_write(&current->mm->mmap_sem);
350         return(map_addr);
351 }
352
353 static unsigned long total_mapping_size(struct elf_phdr *cmds, int nr)
354 {
355         int i, first_idx = -1, last_idx = -1;
356
357         for (i = 0; i < nr; i++) {
358                 if (cmds[i].p_type == PT_LOAD) {
359                         last_idx = i;
360                         if (first_idx == -1)
361                                 first_idx = i;
362                 }
363         }
364         if (first_idx == -1)
365                 return 0;
366
367         return cmds[last_idx].p_vaddr + cmds[last_idx].p_memsz -
368                                 ELF_PAGESTART(cmds[first_idx].p_vaddr);
369 }
370
371
372 /* This is much more generalized than the library routine read function,
373    so we keep this separate.  Technically the library read function
374    is only provided so that we can read a.out libraries that have
375    an ELF header */
376
377 static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
378                 struct file *interpreter, unsigned long *interp_map_addr,
379                 unsigned long no_base)
380 {
381         struct elf_phdr *elf_phdata;
382         struct elf_phdr *eppnt;
383         unsigned long load_addr = 0;
384         int load_addr_set = 0;
385         unsigned long last_bss = 0, elf_bss = 0;
386         unsigned long error = ~0UL;
387         unsigned long total_size;
388         int retval, i, size;
389
390         /* First of all, some simple consistency checks */
391         if (interp_elf_ex->e_type != ET_EXEC &&
392             interp_elf_ex->e_type != ET_DYN)
393                 goto out;
394         if (!elf_check_arch(interp_elf_ex))
395                 goto out;
396         if (!interpreter->f_op || !interpreter->f_op->mmap)
397                 goto out;
398
399         /*
400          * If the size of this structure has changed, then punt, since
401          * we will be doing the wrong thing.
402          */
403         if (interp_elf_ex->e_phentsize != sizeof(struct elf_phdr))
404                 goto out;
405         if (interp_elf_ex->e_phnum < 1 ||
406                 interp_elf_ex->e_phnum > 65536U / sizeof(struct elf_phdr))
407                 goto out;
408
409         /* Now read in all of the header information */
410         size = sizeof(struct elf_phdr) * interp_elf_ex->e_phnum;
411         if (size > ELF_MIN_ALIGN)
412                 goto out;
413         elf_phdata = kmalloc(size, GFP_KERNEL);
414         if (!elf_phdata)
415                 goto out;
416
417         retval = kernel_read(interpreter, interp_elf_ex->e_phoff,
418                              (char *)elf_phdata, size);
419         error = -EIO;
420         if (retval != size) {
421                 if (retval < 0)
422                         error = retval; 
423                 goto out_close;
424         }
425
426         total_size = total_mapping_size(elf_phdata, interp_elf_ex->e_phnum);
427         if (!total_size) {
428                 error = -EINVAL;
429                 goto out_close;
430         }
431
432         eppnt = elf_phdata;
433         for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
434                 if (eppnt->p_type == PT_LOAD) {
435                         int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
436                         int elf_prot = 0;
437                         unsigned long vaddr = 0;
438                         unsigned long k, map_addr;
439
440                         if (eppnt->p_flags & PF_R)
441                                 elf_prot = PROT_READ;
442                         if (eppnt->p_flags & PF_W)
443                                 elf_prot |= PROT_WRITE;
444                         if (eppnt->p_flags & PF_X)
445                                 elf_prot |= PROT_EXEC;
446                         vaddr = eppnt->p_vaddr;
447                         if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
448                                 elf_type |= MAP_FIXED;
449                         else if (no_base && interp_elf_ex->e_type == ET_DYN)
450                                 load_addr = -vaddr;
451
452                         map_addr = elf_map(interpreter, load_addr + vaddr,
453                                         eppnt, elf_prot, elf_type, total_size);
454                         total_size = 0;
455                         if (!*interp_map_addr)
456                                 *interp_map_addr = map_addr;
457                         error = map_addr;
458                         if (BAD_ADDR(map_addr))
459                                 goto out_close;
460
461                         if (!load_addr_set &&
462                             interp_elf_ex->e_type == ET_DYN) {
463                                 load_addr = map_addr - ELF_PAGESTART(vaddr);
464                                 load_addr_set = 1;
465                         }
466
467                         /*
468                          * Check to see if the section's size will overflow the
469                          * allowed task size. Note that p_filesz must always be
470                          * <= p_memsize so it's only necessary to check p_memsz.
471                          */
472                         k = load_addr + eppnt->p_vaddr;
473                         if (BAD_ADDR(k) ||
474                             eppnt->p_filesz > eppnt->p_memsz ||
475                             eppnt->p_memsz > TASK_SIZE ||
476                             TASK_SIZE - eppnt->p_memsz < k) {
477                                 error = -ENOMEM;
478                                 goto out_close;
479                         }
480
481                         /*
482                          * Find the end of the file mapping for this phdr, and
483                          * keep track of the largest address we see for this.
484                          */
485                         k = load_addr + eppnt->p_vaddr + eppnt->p_filesz;
486                         if (k > elf_bss)
487                                 elf_bss = k;
488
489                         /*
490                          * Do the same thing for the memory mapping - between
491                          * elf_bss and last_bss is the bss section.
492                          */
493                         k = load_addr + eppnt->p_memsz + eppnt->p_vaddr;
494                         if (k > last_bss)
495                                 last_bss = k;
496                 }
497         }
498
499         if (last_bss > elf_bss) {
500                 /*
501                  * Now fill out the bss section.  First pad the last page up
502                  * to the page boundary, and then perform a mmap to make sure
503                  * that there are zero-mapped pages up to and including the
504                  * last bss page.
505                  */
506                 if (padzero(elf_bss)) {
507                         error = -EFAULT;
508                         goto out_close;
509                 }
510
511                 /* What we have mapped so far */
512                 elf_bss = ELF_PAGESTART(elf_bss + ELF_MIN_ALIGN - 1);
513
514                 /* Map the last of the bss segment */
515                 error = vm_brk(elf_bss, last_bss - elf_bss);
516                 if (BAD_ADDR(error))
517                         goto out_close;
518         }
519
520         error = load_addr;
521
522 out_close:
523         kfree(elf_phdata);
524 out:
525         return error;
526 }
527
528 /*
529  * These are the functions used to load ELF style executables and shared
530  * libraries.  There is no binary dependent code anywhere else.
531  */
532
533 #define INTERPRETER_NONE 0
534 #define INTERPRETER_ELF 2
535
536 #ifndef STACK_RND_MASK
537 #define STACK_RND_MASK (0x7ff >> (PAGE_SHIFT - 12))     /* 8MB of VA */
538 #endif
539
540 static unsigned long randomize_stack_top(unsigned long stack_top)
541 {
542         unsigned int random_variable = 0;
543
544         if ((current->flags & PF_RANDOMIZE) &&
545                 !(current->personality & ADDR_NO_RANDOMIZE)) {
546                 random_variable = get_random_int() & STACK_RND_MASK;
547                 random_variable <<= PAGE_SHIFT;
548         }
549 #ifdef CONFIG_STACK_GROWSUP
550         return PAGE_ALIGN(stack_top) + random_variable;
551 #else
552         return PAGE_ALIGN(stack_top) - random_variable;
553 #endif
554 }
555
556 static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
557 {
558         struct file *interpreter = NULL; /* to shut gcc up */
559         unsigned long load_addr = 0, load_bias = 0;
560         int load_addr_set = 0;
561         char * elf_interpreter = NULL;
562         unsigned long error;
563         struct elf_phdr *elf_ppnt, *elf_phdata;
564         unsigned long elf_bss, elf_brk;
565         int retval, i;
566         unsigned int size;
567         unsigned long elf_entry;
568         unsigned long interp_load_addr = 0;
569         unsigned long start_code, end_code, start_data, end_data;
570         unsigned long reloc_func_desc __maybe_unused = 0;
571         int executable_stack = EXSTACK_DEFAULT;
572         unsigned long def_flags = 0;
573         struct {
574                 struct elfhdr elf_ex;
575                 struct elfhdr interp_elf_ex;
576         } *loc;
577
578         loc = kmalloc(sizeof(*loc), GFP_KERNEL);
579         if (!loc) {
580                 retval = -ENOMEM;
581                 goto out_ret;
582         }
583         
584         /* Get the exec-header */
585         loc->elf_ex = *((struct elfhdr *)bprm->buf);
586
587         retval = -ENOEXEC;
588         /* First of all, some simple consistency checks */
589         if (memcmp(loc->elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
590                 goto out;
591
592         if (loc->elf_ex.e_type != ET_EXEC && loc->elf_ex.e_type != ET_DYN)
593                 goto out;
594         if (!elf_check_arch(&loc->elf_ex))
595                 goto out;
596         if (!bprm->file->f_op || !bprm->file->f_op->mmap)
597                 goto out;
598
599         /* Now read in all of the header information */
600         if (loc->elf_ex.e_phentsize != sizeof(struct elf_phdr))
601                 goto out;
602         if (loc->elf_ex.e_phnum < 1 ||
603                 loc->elf_ex.e_phnum > 65536U / sizeof(struct elf_phdr))
604                 goto out;
605         size = loc->elf_ex.e_phnum * sizeof(struct elf_phdr);
606         retval = -ENOMEM;
607         elf_phdata = kmalloc(size, GFP_KERNEL);
608         if (!elf_phdata)
609                 goto out;
610
611         retval = kernel_read(bprm->file, loc->elf_ex.e_phoff,
612                              (char *)elf_phdata, size);
613         if (retval != size) {
614                 if (retval >= 0)
615                         retval = -EIO;
616                 goto out_free_ph;
617         }
618
619         elf_ppnt = elf_phdata;
620         elf_bss = 0;
621         elf_brk = 0;
622
623         start_code = ~0UL;
624         end_code = 0;
625         start_data = 0;
626         end_data = 0;
627
628         for (i = 0; i < loc->elf_ex.e_phnum; i++) {
629                 if (elf_ppnt->p_type == PT_INTERP) {
630                         /* This is the program interpreter used for
631                          * shared libraries - for now assume that this
632                          * is an a.out format binary
633                          */
634                         retval = -ENOEXEC;
635                         if (elf_ppnt->p_filesz > PATH_MAX || 
636                             elf_ppnt->p_filesz < 2)
637                                 goto out_free_ph;
638
639                         retval = -ENOMEM;
640                         elf_interpreter = kmalloc(elf_ppnt->p_filesz,
641                                                   GFP_KERNEL);
642                         if (!elf_interpreter)
643                                 goto out_free_ph;
644
645                         retval = kernel_read(bprm->file, elf_ppnt->p_offset,
646                                              elf_interpreter,
647                                              elf_ppnt->p_filesz);
648                         if (retval != elf_ppnt->p_filesz) {
649                                 if (retval >= 0)
650                                         retval = -EIO;
651                                 goto out_free_interp;
652                         }
653                         /* make sure path is NULL terminated */
654                         retval = -ENOEXEC;
655                         if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
656                                 goto out_free_interp;
657
658                         interpreter = open_exec(elf_interpreter);
659                         retval = PTR_ERR(interpreter);
660                         if (IS_ERR(interpreter))
661                                 goto out_free_interp;
662
663                         /*
664                          * If the binary is not readable then enforce
665                          * mm->dumpable = 0 regardless of the interpreter's
666                          * permissions.
667                          */
668                         would_dump(bprm, interpreter);
669
670                         retval = kernel_read(interpreter, 0, bprm->buf,
671                                              BINPRM_BUF_SIZE);
672                         if (retval != BINPRM_BUF_SIZE) {
673                                 if (retval >= 0)
674                                         retval = -EIO;
675                                 goto out_free_dentry;
676                         }
677
678                         /* Get the exec headers */
679                         loc->interp_elf_ex = *((struct elfhdr *)bprm->buf);
680                         break;
681                 }
682                 elf_ppnt++;
683         }
684
685         elf_ppnt = elf_phdata;
686         for (i = 0; i < loc->elf_ex.e_phnum; i++, elf_ppnt++)
687                 if (elf_ppnt->p_type == PT_GNU_STACK) {
688                         if (elf_ppnt->p_flags & PF_X)
689                                 executable_stack = EXSTACK_ENABLE_X;
690                         else
691                                 executable_stack = EXSTACK_DISABLE_X;
692                         break;
693                 }
694
695         /* Some simple consistency checks for the interpreter */
696         if (elf_interpreter) {
697                 retval = -ELIBBAD;
698                 /* Not an ELF interpreter */
699                 if (memcmp(loc->interp_elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
700                         goto out_free_dentry;
701                 /* Verify the interpreter has a valid arch */
702                 if (!elf_check_arch(&loc->interp_elf_ex))
703                         goto out_free_dentry;
704         }
705
706         /* Flush all traces of the currently running executable */
707         retval = flush_old_exec(bprm);
708         if (retval)
709                 goto out_free_dentry;
710
711         /* OK, This is the point of no return */
712         current->mm->def_flags = def_flags;
713
714         /* Do this immediately, since STACK_TOP as used in setup_arg_pages
715            may depend on the personality.  */
716         SET_PERSONALITY(loc->elf_ex);
717         if (elf_read_implies_exec(loc->elf_ex, executable_stack))
718                 current->personality |= READ_IMPLIES_EXEC;
719
720         if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
721                 current->flags |= PF_RANDOMIZE;
722
723         setup_new_exec(bprm);
724
725         /* Do this so that we can load the interpreter, if need be.  We will
726            change some of these later */
727         current->mm->free_area_cache = current->mm->mmap_base;
728         current->mm->cached_hole_size = 0;
729         retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
730                                  executable_stack);
731         if (retval < 0) {
732                 send_sig(SIGKILL, current, 0);
733                 goto out_free_dentry;
734         }
735         
736         current->mm->start_stack = bprm->p;
737
738         /* Now we do a little grungy work by mmapping the ELF image into
739            the correct location in memory. */
740         for(i = 0, elf_ppnt = elf_phdata;
741             i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
742                 int elf_prot = 0, elf_flags;
743                 unsigned long k, vaddr;
744
745                 if (elf_ppnt->p_type != PT_LOAD)
746                         continue;
747
748                 if (unlikely (elf_brk > elf_bss)) {
749                         unsigned long nbyte;
750                     
751                         /* There was a PT_LOAD segment with p_memsz > p_filesz
752                            before this one. Map anonymous pages, if needed,
753                            and clear the area.  */
754                         retval = set_brk(elf_bss + load_bias,
755                                          elf_brk + load_bias);
756                         if (retval) {
757                                 send_sig(SIGKILL, current, 0);
758                                 goto out_free_dentry;
759                         }
760                         nbyte = ELF_PAGEOFFSET(elf_bss);
761                         if (nbyte) {
762                                 nbyte = ELF_MIN_ALIGN - nbyte;
763                                 if (nbyte > elf_brk - elf_bss)
764                                         nbyte = elf_brk - elf_bss;
765                                 if (clear_user((void __user *)elf_bss +
766                                                         load_bias, nbyte)) {
767                                         /*
768                                          * This bss-zeroing can fail if the ELF
769                                          * file specifies odd protections. So
770                                          * we don't check the return value
771                                          */
772                                 }
773                         }
774                 }
775
776                 if (elf_ppnt->p_flags & PF_R)
777                         elf_prot |= PROT_READ;
778                 if (elf_ppnt->p_flags & PF_W)
779                         elf_prot |= PROT_WRITE;
780                 if (elf_ppnt->p_flags & PF_X)
781                         elf_prot |= PROT_EXEC;
782
783                 elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE;
784
785                 vaddr = elf_ppnt->p_vaddr;
786                 if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) {
787                         elf_flags |= MAP_FIXED;
788                 } else if (loc->elf_ex.e_type == ET_DYN) {
789                         /* Try and get dynamic programs out of the way of the
790                          * default mmap base, as well as whatever program they
791                          * might try to exec.  This is because the brk will
792                          * follow the loader, and is not movable.  */
793 #ifdef CONFIG_ARCH_BINFMT_ELF_RANDOMIZE_PIE
794                         /* Memory randomization might have been switched off
795                          * in runtime via sysctl.
796                          * If that is the case, retain the original non-zero
797                          * load_bias value in order to establish proper
798                          * non-randomized mappings.
799                          */
800                         if (current->flags & PF_RANDOMIZE)
801                                 load_bias = 0;
802                         else
803                                 load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
804 #else
805                         load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
806 #endif
807                 }
808
809                 error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
810                                 elf_prot, elf_flags, 0);
811                 if (BAD_ADDR(error)) {
812                         send_sig(SIGKILL, current, 0);
813                         retval = IS_ERR((void *)error) ?
814                                 PTR_ERR((void*)error) : -EINVAL;
815                         goto out_free_dentry;
816                 }
817
818                 if (!load_addr_set) {
819                         load_addr_set = 1;
820                         load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset);
821                         if (loc->elf_ex.e_type == ET_DYN) {
822                                 load_bias += error -
823                                              ELF_PAGESTART(load_bias + vaddr);
824                                 load_addr += load_bias;
825                                 reloc_func_desc = load_bias;
826                         }
827                 }
828                 k = elf_ppnt->p_vaddr;
829                 if (k < start_code)
830                         start_code = k;
831                 if (start_data < k)
832                         start_data = k;
833
834                 /*
835                  * Check to see if the section's size will overflow the
836                  * allowed task size. Note that p_filesz must always be
837                  * <= p_memsz so it is only necessary to check p_memsz.
838                  */
839                 if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
840                     elf_ppnt->p_memsz > TASK_SIZE ||
841                     TASK_SIZE - elf_ppnt->p_memsz < k) {
842                         /* set_brk can never work. Avoid overflows. */
843                         send_sig(SIGKILL, current, 0);
844                         retval = -EINVAL;
845                         goto out_free_dentry;
846                 }
847
848                 k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
849
850                 if (k > elf_bss)
851                         elf_bss = k;
852                 if ((elf_ppnt->p_flags & PF_X) && end_code < k)
853                         end_code = k;
854                 if (end_data < k)
855                         end_data = k;
856                 k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
857                 if (k > elf_brk)
858                         elf_brk = k;
859         }
860
861         loc->elf_ex.e_entry += load_bias;
862         elf_bss += load_bias;
863         elf_brk += load_bias;
864         start_code += load_bias;
865         end_code += load_bias;
866         start_data += load_bias;
867         end_data += load_bias;
868
869         /* Calling set_brk effectively mmaps the pages that we need
870          * for the bss and break sections.  We must do this before
871          * mapping in the interpreter, to make sure it doesn't wind
872          * up getting placed where the bss needs to go.
873          */
874         retval = set_brk(elf_bss, elf_brk);
875         if (retval) {
876                 send_sig(SIGKILL, current, 0);
877                 goto out_free_dentry;
878         }
879         if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) {
880                 send_sig(SIGSEGV, current, 0);
881                 retval = -EFAULT; /* Nobody gets to see this, but.. */
882                 goto out_free_dentry;
883         }
884
885         if (elf_interpreter) {
886                 unsigned long uninitialized_var(interp_map_addr);
887
888                 elf_entry = load_elf_interp(&loc->interp_elf_ex,
889                                             interpreter,
890                                             &interp_map_addr,
891                                             load_bias);
892                 if (!IS_ERR((void *)elf_entry)) {
893                         /*
894                          * load_elf_interp() returns relocation
895                          * adjustment
896                          */
897                         interp_load_addr = elf_entry;
898                         elf_entry += loc->interp_elf_ex.e_entry;
899                 }
900                 if (BAD_ADDR(elf_entry)) {
901                         force_sig(SIGSEGV, current);
902                         retval = IS_ERR((void *)elf_entry) ?
903                                         (int)elf_entry : -EINVAL;
904                         goto out_free_dentry;
905                 }
906                 reloc_func_desc = interp_load_addr;
907
908                 allow_write_access(interpreter);
909                 fput(interpreter);
910                 kfree(elf_interpreter);
911         } else {
912                 elf_entry = loc->elf_ex.e_entry;
913                 if (BAD_ADDR(elf_entry)) {
914                         force_sig(SIGSEGV, current);
915                         retval = -EINVAL;
916                         goto out_free_dentry;
917                 }
918         }
919
920         kfree(elf_phdata);
921
922         set_binfmt(&elf_format);
923
924 #ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
925         retval = arch_setup_additional_pages(bprm, !!elf_interpreter);
926         if (retval < 0) {
927                 send_sig(SIGKILL, current, 0);
928                 goto out;
929         }
930 #endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
931
932         install_exec_creds(bprm);
933         retval = create_elf_tables(bprm, &loc->elf_ex,
934                           load_addr, interp_load_addr);
935         if (retval < 0) {
936                 send_sig(SIGKILL, current, 0);
937                 goto out;
938         }
939         /* N.B. passed_fileno might not be initialized? */
940         current->mm->end_code = end_code;
941         current->mm->start_code = start_code;
942         current->mm->start_data = start_data;
943         current->mm->end_data = end_data;
944         current->mm->start_stack = bprm->p;
945
946 #ifdef arch_randomize_brk
947         if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1)) {
948                 current->mm->brk = current->mm->start_brk =
949                         arch_randomize_brk(current->mm);
950 #ifdef CONFIG_COMPAT_BRK
951                 current->brk_randomized = 1;
952 #endif
953         }
954 #endif
955
956         if (current->personality & MMAP_PAGE_ZERO) {
957                 /* Why this, you ask???  Well SVr4 maps page 0 as read-only,
958                    and some applications "depend" upon this behavior.
959                    Since we do not have the power to recompile these, we
960                    emulate the SVr4 behavior. Sigh. */
961                 error = vm_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
962                                 MAP_FIXED | MAP_PRIVATE, 0);
963         }
964
965 #ifdef ELF_PLAT_INIT
966         /*
967          * The ABI may specify that certain registers be set up in special
968          * ways (on i386 %edx is the address of a DT_FINI function, for
969          * example.  In addition, it may also specify (eg, PowerPC64 ELF)
970          * that the e_entry field is the address of the function descriptor
971          * for the startup routine, rather than the address of the startup
972          * routine itself.  This macro performs whatever initialization to
973          * the regs structure is required as well as any relocations to the
974          * function descriptor entries when executing dynamically links apps.
975          */
976         ELF_PLAT_INIT(regs, reloc_func_desc);
977 #endif
978
979         start_thread(regs, elf_entry, bprm->p);
980         retval = 0;
981 out:
982         kfree(loc);
983 out_ret:
984         return retval;
985
986         /* error cleanup */
987 out_free_dentry:
988         allow_write_access(interpreter);
989         if (interpreter)
990                 fput(interpreter);
991 out_free_interp:
992         kfree(elf_interpreter);
993 out_free_ph:
994         kfree(elf_phdata);
995         goto out;
996 }
997
998 /* This is really simpleminded and specialized - we are loading an
999    a.out library that is given an ELF header. */
1000 static int load_elf_library(struct file *file)
1001 {
1002         struct elf_phdr *elf_phdata;
1003         struct elf_phdr *eppnt;
1004         unsigned long elf_bss, bss, len;
1005         int retval, error, i, j;
1006         struct elfhdr elf_ex;
1007
1008         error = -ENOEXEC;
1009         retval = kernel_read(file, 0, (char *)&elf_ex, sizeof(elf_ex));
1010         if (retval != sizeof(elf_ex))
1011                 goto out;
1012
1013         if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
1014                 goto out;
1015
1016         /* First of all, some simple consistency checks */
1017         if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
1018             !elf_check_arch(&elf_ex) || !file->f_op || !file->f_op->mmap)
1019                 goto out;
1020
1021         /* Now read in all of the header information */
1022
1023         j = sizeof(struct elf_phdr) * elf_ex.e_phnum;
1024         /* j < ELF_MIN_ALIGN because elf_ex.e_phnum <= 2 */
1025
1026         error = -ENOMEM;
1027         elf_phdata = kmalloc(j, GFP_KERNEL);
1028         if (!elf_phdata)
1029                 goto out;
1030
1031         eppnt = elf_phdata;
1032         error = -ENOEXEC;
1033         retval = kernel_read(file, elf_ex.e_phoff, (char *)eppnt, j);
1034         if (retval != j)
1035                 goto out_free_ph;
1036
1037         for (j = 0, i = 0; i<elf_ex.e_phnum; i++)
1038                 if ((eppnt + i)->p_type == PT_LOAD)
1039                         j++;
1040         if (j != 1)
1041                 goto out_free_ph;
1042
1043         while (eppnt->p_type != PT_LOAD)
1044                 eppnt++;
1045
1046         /* Now use mmap to map the library into memory. */
1047         error = vm_mmap(file,
1048                         ELF_PAGESTART(eppnt->p_vaddr),
1049                         (eppnt->p_filesz +
1050                          ELF_PAGEOFFSET(eppnt->p_vaddr)),
1051                         PROT_READ | PROT_WRITE | PROT_EXEC,
1052                         MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
1053                         (eppnt->p_offset -
1054                          ELF_PAGEOFFSET(eppnt->p_vaddr)));
1055         if (error != ELF_PAGESTART(eppnt->p_vaddr))
1056                 goto out_free_ph;
1057
1058         elf_bss = eppnt->p_vaddr + eppnt->p_filesz;
1059         if (padzero(elf_bss)) {
1060                 error = -EFAULT;
1061                 goto out_free_ph;
1062         }
1063
1064         len = ELF_PAGESTART(eppnt->p_filesz + eppnt->p_vaddr +
1065                             ELF_MIN_ALIGN - 1);
1066         bss = eppnt->p_memsz + eppnt->p_vaddr;
1067         if (bss > len)
1068                 vm_brk(len, bss - len);
1069         error = 0;
1070
1071 out_free_ph:
1072         kfree(elf_phdata);
1073 out:
1074         return error;
1075 }
1076
1077 #ifdef CONFIG_ELF_CORE
1078 /*
1079  * ELF core dumper
1080  *
1081  * Modelled on fs/exec.c:aout_core_dump()
1082  * Jeremy Fitzhardinge <jeremy@sw.oz.au>
1083  */
1084
1085 /*
1086  * The purpose of always_dump_vma() is to make sure that special kernel mappings
1087  * that are useful for post-mortem analysis are included in every core dump.
1088  * In that way we ensure that the core dump is fully interpretable later
1089  * without matching up the same kernel and hardware config to see what PC values
1090  * meant. These special mappings include - vDSO, vsyscall, and other
1091  * architecture specific mappings
1092  */
1093 static bool always_dump_vma(struct vm_area_struct *vma)
1094 {
1095         /* Any vsyscall mappings? */
1096         if (vma == get_gate_vma(vma->vm_mm))
1097                 return true;
1098         /*
1099          * arch_vma_name() returns non-NULL for special architecture mappings,
1100          * such as vDSO sections.
1101          */
1102         if (arch_vma_name(vma))
1103                 return true;
1104
1105         return false;
1106 }
1107
1108 /*
1109  * Decide what to dump of a segment, part, all or none.
1110  */
1111 static unsigned long vma_dump_size(struct vm_area_struct *vma,
1112                                    unsigned long mm_flags)
1113 {
1114 #define FILTER(type)    (mm_flags & (1UL << MMF_DUMP_##type))
1115
1116         /* always dump the vdso and vsyscall sections */
1117         if (always_dump_vma(vma))
1118                 goto whole;
1119
1120         if (vma->vm_flags & VM_NODUMP)
1121                 return 0;
1122
1123         /* Hugetlb memory check */
1124         if (vma->vm_flags & VM_HUGETLB) {
1125                 if ((vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_SHARED))
1126                         goto whole;
1127                 if (!(vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_PRIVATE))
1128                         goto whole;
1129         }
1130
1131         /* Do not dump I/O mapped devices or special mappings */
1132         if (vma->vm_flags & (VM_IO | VM_RESERVED))
1133                 return 0;
1134
1135         /* By default, dump shared memory if mapped from an anonymous file. */
1136         if (vma->vm_flags & VM_SHARED) {
1137                 if (vma->vm_file->f_path.dentry->d_inode->i_nlink == 0 ?
1138                     FILTER(ANON_SHARED) : FILTER(MAPPED_SHARED))
1139                         goto whole;
1140                 return 0;
1141         }
1142
1143         /* Dump segments that have been written to.  */
1144         if (vma->anon_vma && FILTER(ANON_PRIVATE))
1145                 goto whole;
1146         if (vma->vm_file == NULL)
1147                 return 0;
1148
1149         if (FILTER(MAPPED_PRIVATE))
1150                 goto whole;
1151
1152         /*
1153          * If this looks like the beginning of a DSO or executable mapping,
1154          * check for an ELF header.  If we find one, dump the first page to
1155          * aid in determining what was mapped here.
1156          */
1157         if (FILTER(ELF_HEADERS) &&
1158             vma->vm_pgoff == 0 && (vma->vm_flags & VM_READ)) {
1159                 u32 __user *header = (u32 __user *) vma->vm_start;
1160                 u32 word;
1161                 mm_segment_t fs = get_fs();
1162                 /*
1163                  * Doing it this way gets the constant folded by GCC.
1164                  */
1165                 union {
1166                         u32 cmp;
1167                         char elfmag[SELFMAG];
1168                 } magic;
1169                 BUILD_BUG_ON(SELFMAG != sizeof word);
1170                 magic.elfmag[EI_MAG0] = ELFMAG0;
1171                 magic.elfmag[EI_MAG1] = ELFMAG1;
1172                 magic.elfmag[EI_MAG2] = ELFMAG2;
1173                 magic.elfmag[EI_MAG3] = ELFMAG3;
1174                 /*
1175                  * Switch to the user "segment" for get_user(),
1176                  * then put back what elf_core_dump() had in place.
1177                  */
1178                 set_fs(USER_DS);
1179                 if (unlikely(get_user(word, header)))
1180                         word = 0;
1181                 set_fs(fs);
1182                 if (word == magic.cmp)
1183                         return PAGE_SIZE;
1184         }
1185
1186 #undef  FILTER
1187
1188         return 0;
1189
1190 whole:
1191         return vma->vm_end - vma->vm_start;
1192 }
1193
1194 /* An ELF note in memory */
1195 struct memelfnote
1196 {
1197         const char *name;
1198         int type;
1199         unsigned int datasz;
1200         void *data;
1201 };
1202
1203 static int notesize(struct memelfnote *en)
1204 {
1205         int sz;
1206
1207         sz = sizeof(struct elf_note);
1208         sz += roundup(strlen(en->name) + 1, 4);
1209         sz += roundup(en->datasz, 4);
1210
1211         return sz;
1212 }
1213
1214 #define DUMP_WRITE(addr, nr, foffset)   \
1215         do { if (!dump_write(file, (addr), (nr))) return 0; *foffset += (nr); } while(0)
1216
1217 static int alignfile(struct file *file, loff_t *foffset)
1218 {
1219         static const char buf[4] = { 0, };
1220         DUMP_WRITE(buf, roundup(*foffset, 4) - *foffset, foffset);
1221         return 1;
1222 }
1223
1224 static int writenote(struct memelfnote *men, struct file *file,
1225                         loff_t *foffset)
1226 {
1227         struct elf_note en;
1228         en.n_namesz = strlen(men->name) + 1;
1229         en.n_descsz = men->datasz;
1230         en.n_type = men->type;
1231
1232         DUMP_WRITE(&en, sizeof(en), foffset);
1233         DUMP_WRITE(men->name, en.n_namesz, foffset);
1234         if (!alignfile(file, foffset))
1235                 return 0;
1236         DUMP_WRITE(men->data, men->datasz, foffset);
1237         if (!alignfile(file, foffset))
1238                 return 0;
1239
1240         return 1;
1241 }
1242 #undef DUMP_WRITE
1243
1244 static void fill_elf_header(struct elfhdr *elf, int segs,
1245                             u16 machine, u32 flags, u8 osabi)
1246 {
1247         memset(elf, 0, sizeof(*elf));
1248
1249         memcpy(elf->e_ident, ELFMAG, SELFMAG);
1250         elf->e_ident[EI_CLASS] = ELF_CLASS;
1251         elf->e_ident[EI_DATA] = ELF_DATA;
1252         elf->e_ident[EI_VERSION] = EV_CURRENT;
1253         elf->e_ident[EI_OSABI] = ELF_OSABI;
1254
1255         elf->e_type = ET_CORE;
1256         elf->e_machine = machine;
1257         elf->e_version = EV_CURRENT;
1258         elf->e_phoff = sizeof(struct elfhdr);
1259         elf->e_flags = flags;
1260         elf->e_ehsize = sizeof(struct elfhdr);
1261         elf->e_phentsize = sizeof(struct elf_phdr);
1262         elf->e_phnum = segs;
1263
1264         return;
1265 }
1266
1267 static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset)
1268 {
1269         phdr->p_type = PT_NOTE;
1270         phdr->p_offset = offset;
1271         phdr->p_vaddr = 0;
1272         phdr->p_paddr = 0;
1273         phdr->p_filesz = sz;
1274         phdr->p_memsz = 0;
1275         phdr->p_flags = 0;
1276         phdr->p_align = 0;
1277         return;
1278 }
1279
1280 static void fill_note(struct memelfnote *note, const char *name, int type, 
1281                 unsigned int sz, void *data)
1282 {
1283         note->name = name;
1284         note->type = type;
1285         note->datasz = sz;
1286         note->data = data;
1287         return;
1288 }
1289
1290 /*
1291  * fill up all the fields in prstatus from the given task struct, except
1292  * registers which need to be filled up separately.
1293  */
1294 static void fill_prstatus(struct elf_prstatus *prstatus,
1295                 struct task_struct *p, long signr)
1296 {
1297         prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
1298         prstatus->pr_sigpend = p->pending.signal.sig[0];
1299         prstatus->pr_sighold = p->blocked.sig[0];
1300         rcu_read_lock();
1301         prstatus->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1302         rcu_read_unlock();
1303         prstatus->pr_pid = task_pid_vnr(p);
1304         prstatus->pr_pgrp = task_pgrp_vnr(p);
1305         prstatus->pr_sid = task_session_vnr(p);
1306         if (thread_group_leader(p)) {
1307                 struct task_cputime cputime;
1308
1309                 /*
1310                  * This is the record for the group leader.  It shows the
1311                  * group-wide total, not its individual thread total.
1312                  */
1313                 thread_group_cputime(p, &cputime);
1314                 cputime_to_timeval(cputime.utime, &prstatus->pr_utime);
1315                 cputime_to_timeval(cputime.stime, &prstatus->pr_stime);
1316         } else {
1317                 cputime_to_timeval(p->utime, &prstatus->pr_utime);
1318                 cputime_to_timeval(p->stime, &prstatus->pr_stime);
1319         }
1320         cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime);
1321         cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime);
1322 }
1323
1324 static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1325                        struct mm_struct *mm)
1326 {
1327         const struct cred *cred;
1328         unsigned int i, len;
1329         
1330         /* first copy the parameters from user space */
1331         memset(psinfo, 0, sizeof(struct elf_prpsinfo));
1332
1333         len = mm->arg_end - mm->arg_start;
1334         if (len >= ELF_PRARGSZ)
1335                 len = ELF_PRARGSZ-1;
1336         if (copy_from_user(&psinfo->pr_psargs,
1337                            (const char __user *)mm->arg_start, len))
1338                 return -EFAULT;
1339         for(i = 0; i < len; i++)
1340                 if (psinfo->pr_psargs[i] == 0)
1341                         psinfo->pr_psargs[i] = ' ';
1342         psinfo->pr_psargs[len] = 0;
1343
1344         rcu_read_lock();
1345         psinfo->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1346         rcu_read_unlock();
1347         psinfo->pr_pid = task_pid_vnr(p);
1348         psinfo->pr_pgrp = task_pgrp_vnr(p);
1349         psinfo->pr_sid = task_session_vnr(p);
1350
1351         i = p->state ? ffz(~p->state) + 1 : 0;
1352         psinfo->pr_state = i;
1353         psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i];
1354         psinfo->pr_zomb = psinfo->pr_sname == 'Z';
1355         psinfo->pr_nice = task_nice(p);
1356         psinfo->pr_flag = p->flags;
1357         rcu_read_lock();
1358         cred = __task_cred(p);
1359         SET_UID(psinfo->pr_uid, cred->uid);
1360         SET_GID(psinfo->pr_gid, cred->gid);
1361         rcu_read_unlock();
1362         strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
1363         
1364         return 0;
1365 }
1366
1367 static void fill_auxv_note(struct memelfnote *note, struct mm_struct *mm)
1368 {
1369         elf_addr_t *auxv = (elf_addr_t *) mm->saved_auxv;
1370         int i = 0;
1371         do
1372                 i += 2;
1373         while (auxv[i - 2] != AT_NULL);
1374         fill_note(note, "CORE", NT_AUXV, i * sizeof(elf_addr_t), auxv);
1375 }
1376
1377 #ifdef CORE_DUMP_USE_REGSET
1378 #include <linux/regset.h>
1379
1380 struct elf_thread_core_info {
1381         struct elf_thread_core_info *next;
1382         struct task_struct *task;
1383         struct elf_prstatus prstatus;
1384         struct memelfnote notes[0];
1385 };
1386
1387 struct elf_note_info {
1388         struct elf_thread_core_info *thread;
1389         struct memelfnote psinfo;
1390         struct memelfnote auxv;
1391         size_t size;
1392         int thread_notes;
1393 };
1394
1395 /*
1396  * When a regset has a writeback hook, we call it on each thread before
1397  * dumping user memory.  On register window machines, this makes sure the
1398  * user memory backing the register data is up to date before we read it.
1399  */
1400 static void do_thread_regset_writeback(struct task_struct *task,
1401                                        const struct user_regset *regset)
1402 {
1403         if (regset->writeback)
1404                 regset->writeback(task, regset, 1);
1405 }
1406
1407 #ifndef PR_REG_SIZE
1408 #define PR_REG_SIZE(S) sizeof(S)
1409 #endif
1410
1411 #ifndef PRSTATUS_SIZE
1412 #define PRSTATUS_SIZE(S) sizeof(S)
1413 #endif
1414
1415 #ifndef PR_REG_PTR
1416 #define PR_REG_PTR(S) (&((S)->pr_reg))
1417 #endif
1418
1419 #ifndef SET_PR_FPVALID
1420 #define SET_PR_FPVALID(S, V) ((S)->pr_fpvalid = (V))
1421 #endif
1422
1423 static int fill_thread_core_info(struct elf_thread_core_info *t,
1424                                  const struct user_regset_view *view,
1425                                  long signr, size_t *total)
1426 {
1427         unsigned int i;
1428
1429         /*
1430          * NT_PRSTATUS is the one special case, because the regset data
1431          * goes into the pr_reg field inside the note contents, rather
1432          * than being the whole note contents.  We fill the reset in here.
1433          * We assume that regset 0 is NT_PRSTATUS.
1434          */
1435         fill_prstatus(&t->prstatus, t->task, signr);
1436         (void) view->regsets[0].get(t->task, &view->regsets[0],
1437                                     0, PR_REG_SIZE(t->prstatus.pr_reg),
1438                                     PR_REG_PTR(&t->prstatus), NULL);
1439
1440         fill_note(&t->notes[0], "CORE", NT_PRSTATUS,
1441                   PRSTATUS_SIZE(t->prstatus), &t->prstatus);
1442         *total += notesize(&t->notes[0]);
1443
1444         do_thread_regset_writeback(t->task, &view->regsets[0]);
1445
1446         /*
1447          * Each other regset might generate a note too.  For each regset
1448          * that has no core_note_type or is inactive, we leave t->notes[i]
1449          * all zero and we'll know to skip writing it later.
1450          */
1451         for (i = 1; i < view->n; ++i) {
1452                 const struct user_regset *regset = &view->regsets[i];
1453                 do_thread_regset_writeback(t->task, regset);
1454                 if (regset->core_note_type && regset->get &&
1455                     (!regset->active || regset->active(t->task, regset))) {
1456                         int ret;
1457                         size_t size = regset->n * regset->size;
1458                         void *data = kmalloc(size, GFP_KERNEL);
1459                         if (unlikely(!data))
1460                                 return 0;
1461                         ret = regset->get(t->task, regset,
1462                                           0, size, data, NULL);
1463                         if (unlikely(ret))
1464                                 kfree(data);
1465                         else {
1466                                 if (regset->core_note_type != NT_PRFPREG)
1467                                         fill_note(&t->notes[i], "LINUX",
1468                                                   regset->core_note_type,
1469                                                   size, data);
1470                                 else {
1471                                         SET_PR_FPVALID(&t->prstatus, 1);
1472                                         fill_note(&t->notes[i], "CORE",
1473                                                   NT_PRFPREG, size, data);
1474                                 }
1475                                 *total += notesize(&t->notes[i]);
1476                         }
1477                 }
1478         }
1479
1480         return 1;
1481 }
1482
1483 static int fill_note_info(struct elfhdr *elf, int phdrs,
1484                           struct elf_note_info *info,
1485                           long signr, struct pt_regs *regs)
1486 {
1487         struct task_struct *dump_task = current;
1488         const struct user_regset_view *view = task_user_regset_view(dump_task);
1489         struct elf_thread_core_info *t;
1490         struct elf_prpsinfo *psinfo;
1491         struct core_thread *ct;
1492         unsigned int i;
1493
1494         info->size = 0;
1495         info->thread = NULL;
1496
1497         psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
1498         if (psinfo == NULL)
1499                 return 0;
1500
1501         fill_note(&info->psinfo, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
1502
1503         /*
1504          * Figure out how many notes we're going to need for each thread.
1505          */
1506         info->thread_notes = 0;
1507         for (i = 0; i < view->n; ++i)
1508                 if (view->regsets[i].core_note_type != 0)
1509                         ++info->thread_notes;
1510
1511         /*
1512          * Sanity check.  We rely on regset 0 being in NT_PRSTATUS,
1513          * since it is our one special case.
1514          */
1515         if (unlikely(info->thread_notes == 0) ||
1516             unlikely(view->regsets[0].core_note_type != NT_PRSTATUS)) {
1517                 WARN_ON(1);
1518                 return 0;
1519         }
1520
1521         /*
1522          * Initialize the ELF file header.
1523          */
1524         fill_elf_header(elf, phdrs,
1525                         view->e_machine, view->e_flags, view->ei_osabi);
1526
1527         /*
1528          * Allocate a structure for each thread.
1529          */
1530         for (ct = &dump_task->mm->core_state->dumper; ct; ct = ct->next) {
1531                 t = kzalloc(offsetof(struct elf_thread_core_info,
1532                                      notes[info->thread_notes]),
1533                             GFP_KERNEL);
1534                 if (unlikely(!t))
1535                         return 0;
1536
1537                 t->task = ct->task;
1538                 if (ct->task == dump_task || !info->thread) {
1539                         t->next = info->thread;
1540                         info->thread = t;
1541                 } else {
1542                         /*
1543                          * Make sure to keep the original task at
1544                          * the head of the list.
1545                          */
1546                         t->next = info->thread->next;
1547                         info->thread->next = t;
1548                 }
1549         }
1550
1551         /*
1552          * Now fill in each thread's information.
1553          */
1554         for (t = info->thread; t != NULL; t = t->next)
1555                 if (!fill_thread_core_info(t, view, signr, &info->size))
1556                         return 0;
1557
1558         /*
1559          * Fill in the two process-wide notes.
1560          */
1561         fill_psinfo(psinfo, dump_task->group_leader, dump_task->mm);
1562         info->size += notesize(&info->psinfo);
1563
1564         fill_auxv_note(&info->auxv, current->mm);
1565         info->size += notesize(&info->auxv);
1566
1567         return 1;
1568 }
1569
1570 static size_t get_note_info_size(struct elf_note_info *info)
1571 {
1572         return info->size;
1573 }
1574
1575 /*
1576  * Write all the notes for each thread.  When writing the first thread, the
1577  * process-wide notes are interleaved after the first thread-specific note.
1578  */
1579 static int write_note_info(struct elf_note_info *info,
1580                            struct file *file, loff_t *foffset)
1581 {
1582         bool first = 1;
1583         struct elf_thread_core_info *t = info->thread;
1584
1585         do {
1586                 int i;
1587
1588                 if (!writenote(&t->notes[0], file, foffset))
1589                         return 0;
1590
1591                 if (first && !writenote(&info->psinfo, file, foffset))
1592                         return 0;
1593                 if (first && !writenote(&info->auxv, file, foffset))
1594                         return 0;
1595
1596                 for (i = 1; i < info->thread_notes; ++i)
1597                         if (t->notes[i].data &&
1598                             !writenote(&t->notes[i], file, foffset))
1599                                 return 0;
1600
1601                 first = 0;
1602                 t = t->next;
1603         } while (t);
1604
1605         return 1;
1606 }
1607
1608 static void free_note_info(struct elf_note_info *info)
1609 {
1610         struct elf_thread_core_info *threads = info->thread;
1611         while (threads) {
1612                 unsigned int i;
1613                 struct elf_thread_core_info *t = threads;
1614                 threads = t->next;
1615                 WARN_ON(t->notes[0].data && t->notes[0].data != &t->prstatus);
1616                 for (i = 1; i < info->thread_notes; ++i)
1617                         kfree(t->notes[i].data);
1618                 kfree(t);
1619         }
1620         kfree(info->psinfo.data);
1621 }
1622
1623 #else
1624
1625 /* Here is the structure in which status of each thread is captured. */
1626 struct elf_thread_status
1627 {
1628         struct list_head list;
1629         struct elf_prstatus prstatus;   /* NT_PRSTATUS */
1630         elf_fpregset_t fpu;             /* NT_PRFPREG */
1631         struct task_struct *thread;
1632 #ifdef ELF_CORE_COPY_XFPREGS
1633         elf_fpxregset_t xfpu;           /* ELF_CORE_XFPREG_TYPE */
1634 #endif
1635         struct memelfnote notes[3];
1636         int num_notes;
1637 };
1638
1639 /*
1640  * In order to add the specific thread information for the elf file format,
1641  * we need to keep a linked list of every threads pr_status and then create
1642  * a single section for them in the final core file.
1643  */
1644 static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
1645 {
1646         int sz = 0;
1647         struct task_struct *p = t->thread;
1648         t->num_notes = 0;
1649
1650         fill_prstatus(&t->prstatus, p, signr);
1651         elf_core_copy_task_regs(p, &t->prstatus.pr_reg);        
1652         
1653         fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus),
1654                   &(t->prstatus));
1655         t->num_notes++;
1656         sz += notesize(&t->notes[0]);
1657
1658         if ((t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, NULL,
1659                                                                 &t->fpu))) {
1660                 fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu),
1661                           &(t->fpu));
1662                 t->num_notes++;
1663                 sz += notesize(&t->notes[1]);
1664         }
1665
1666 #ifdef ELF_CORE_COPY_XFPREGS
1667         if (elf_core_copy_task_xfpregs(p, &t->xfpu)) {
1668                 fill_note(&t->notes[2], "LINUX", ELF_CORE_XFPREG_TYPE,
1669                           sizeof(t->xfpu), &t->xfpu);
1670                 t->num_notes++;
1671                 sz += notesize(&t->notes[2]);
1672         }
1673 #endif  
1674         return sz;
1675 }
1676
1677 struct elf_note_info {
1678         struct memelfnote *notes;
1679         struct elf_prstatus *prstatus;  /* NT_PRSTATUS */
1680         struct elf_prpsinfo *psinfo;    /* NT_PRPSINFO */
1681         struct list_head thread_list;
1682         elf_fpregset_t *fpu;
1683 #ifdef ELF_CORE_COPY_XFPREGS
1684         elf_fpxregset_t *xfpu;
1685 #endif
1686         int thread_status_size;
1687         int numnote;
1688 };
1689
1690 static int elf_note_info_init(struct elf_note_info *info)
1691 {
1692         memset(info, 0, sizeof(*info));
1693         INIT_LIST_HEAD(&info->thread_list);
1694
1695         /* Allocate space for six ELF notes */
1696         info->notes = kmalloc(6 * sizeof(struct memelfnote), GFP_KERNEL);
1697         if (!info->notes)
1698                 return 0;
1699         info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL);
1700         if (!info->psinfo)
1701                 goto notes_free;
1702         info->prstatus = kmalloc(sizeof(*info->prstatus), GFP_KERNEL);
1703         if (!info->prstatus)
1704                 goto psinfo_free;
1705         info->fpu = kmalloc(sizeof(*info->fpu), GFP_KERNEL);
1706         if (!info->fpu)
1707                 goto prstatus_free;
1708 #ifdef ELF_CORE_COPY_XFPREGS
1709         info->xfpu = kmalloc(sizeof(*info->xfpu), GFP_KERNEL);
1710         if (!info->xfpu)
1711                 goto fpu_free;
1712 #endif
1713         return 1;
1714 #ifdef ELF_CORE_COPY_XFPREGS
1715  fpu_free:
1716         kfree(info->fpu);
1717 #endif
1718  prstatus_free:
1719         kfree(info->prstatus);
1720  psinfo_free:
1721         kfree(info->psinfo);
1722  notes_free:
1723         kfree(info->notes);
1724         return 0;
1725 }
1726
1727 static int fill_note_info(struct elfhdr *elf, int phdrs,
1728                           struct elf_note_info *info,
1729                           long signr, struct pt_regs *regs)
1730 {
1731         struct list_head *t;
1732
1733         if (!elf_note_info_init(info))
1734                 return 0;
1735
1736         if (signr) {
1737                 struct core_thread *ct;
1738                 struct elf_thread_status *ets;
1739
1740                 for (ct = current->mm->core_state->dumper.next;
1741                                                 ct; ct = ct->next) {
1742                         ets = kzalloc(sizeof(*ets), GFP_KERNEL);
1743                         if (!ets)
1744                                 return 0;
1745
1746                         ets->thread = ct->task;
1747                         list_add(&ets->list, &info->thread_list);
1748                 }
1749
1750                 list_for_each(t, &info->thread_list) {
1751                         int sz;
1752
1753                         ets = list_entry(t, struct elf_thread_status, list);
1754                         sz = elf_dump_thread_status(signr, ets);
1755                         info->thread_status_size += sz;
1756                 }
1757         }
1758         /* now collect the dump for the current */
1759         memset(info->prstatus, 0, sizeof(*info->prstatus));
1760         fill_prstatus(info->prstatus, current, signr);
1761         elf_core_copy_regs(&info->prstatus->pr_reg, regs);
1762
1763         /* Set up header */
1764         fill_elf_header(elf, phdrs, ELF_ARCH, ELF_CORE_EFLAGS, ELF_OSABI);
1765
1766         /*
1767          * Set up the notes in similar form to SVR4 core dumps made
1768          * with info from their /proc.
1769          */
1770
1771         fill_note(info->notes + 0, "CORE", NT_PRSTATUS,
1772                   sizeof(*info->prstatus), info->prstatus);
1773         fill_psinfo(info->psinfo, current->group_leader, current->mm);
1774         fill_note(info->notes + 1, "CORE", NT_PRPSINFO,
1775                   sizeof(*info->psinfo), info->psinfo);
1776
1777         info->numnote = 2;
1778
1779         fill_auxv_note(&info->notes[info->numnote++], current->mm);
1780
1781         /* Try to dump the FPU. */
1782         info->prstatus->pr_fpvalid = elf_core_copy_task_fpregs(current, regs,
1783                                                                info->fpu);
1784         if (info->prstatus->pr_fpvalid)
1785                 fill_note(info->notes + info->numnote++,
1786                           "CORE", NT_PRFPREG, sizeof(*info->fpu), info->fpu);
1787 #ifdef ELF_CORE_COPY_XFPREGS
1788         if (elf_core_copy_task_xfpregs(current, info->xfpu))
1789                 fill_note(info->notes + info->numnote++,
1790                           "LINUX", ELF_CORE_XFPREG_TYPE,
1791                           sizeof(*info->xfpu), info->xfpu);
1792 #endif
1793
1794         return 1;
1795 }
1796
1797 static size_t get_note_info_size(struct elf_note_info *info)
1798 {
1799         int sz = 0;
1800         int i;
1801
1802         for (i = 0; i < info->numnote; i++)
1803                 sz += notesize(info->notes + i);
1804
1805         sz += info->thread_status_size;
1806
1807         return sz;
1808 }
1809
1810 static int write_note_info(struct elf_note_info *info,
1811                            struct file *file, loff_t *foffset)
1812 {
1813         int i;
1814         struct list_head *t;
1815
1816         for (i = 0; i < info->numnote; i++)
1817                 if (!writenote(info->notes + i, file, foffset))
1818                         return 0;
1819
1820         /* write out the thread status notes section */
1821         list_for_each(t, &info->thread_list) {
1822                 struct elf_thread_status *tmp =
1823                                 list_entry(t, struct elf_thread_status, list);
1824
1825                 for (i = 0; i < tmp->num_notes; i++)
1826                         if (!writenote(&tmp->notes[i], file, foffset))
1827                                 return 0;
1828         }
1829
1830         return 1;
1831 }
1832
1833 static void free_note_info(struct elf_note_info *info)
1834 {
1835         while (!list_empty(&info->thread_list)) {
1836                 struct list_head *tmp = info->thread_list.next;
1837                 list_del(tmp);
1838                 kfree(list_entry(tmp, struct elf_thread_status, list));
1839         }
1840
1841         kfree(info->prstatus);
1842         kfree(info->psinfo);
1843         kfree(info->notes);
1844         kfree(info->fpu);
1845 #ifdef ELF_CORE_COPY_XFPREGS
1846         kfree(info->xfpu);
1847 #endif
1848 }
1849
1850 #endif
1851
1852 static struct vm_area_struct *first_vma(struct task_struct *tsk,
1853                                         struct vm_area_struct *gate_vma)
1854 {
1855         struct vm_area_struct *ret = tsk->mm->mmap;
1856
1857         if (ret)
1858                 return ret;
1859         return gate_vma;
1860 }
1861 /*
1862  * Helper function for iterating across a vma list.  It ensures that the caller
1863  * will visit `gate_vma' prior to terminating the search.
1864  */
1865 static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
1866                                         struct vm_area_struct *gate_vma)
1867 {
1868         struct vm_area_struct *ret;
1869
1870         ret = this_vma->vm_next;
1871         if (ret)
1872                 return ret;
1873         if (this_vma == gate_vma)
1874                 return NULL;
1875         return gate_vma;
1876 }
1877
1878 static void fill_extnum_info(struct elfhdr *elf, struct elf_shdr *shdr4extnum,
1879                              elf_addr_t e_shoff, int segs)
1880 {
1881         elf->e_shoff = e_shoff;
1882         elf->e_shentsize = sizeof(*shdr4extnum);
1883         elf->e_shnum = 1;
1884         elf->e_shstrndx = SHN_UNDEF;
1885
1886         memset(shdr4extnum, 0, sizeof(*shdr4extnum));
1887
1888         shdr4extnum->sh_type = SHT_NULL;
1889         shdr4extnum->sh_size = elf->e_shnum;
1890         shdr4extnum->sh_link = elf->e_shstrndx;
1891         shdr4extnum->sh_info = segs;
1892 }
1893
1894 static size_t elf_core_vma_data_size(struct vm_area_struct *gate_vma,
1895                                      unsigned long mm_flags)
1896 {
1897         struct vm_area_struct *vma;
1898         size_t size = 0;
1899
1900         for (vma = first_vma(current, gate_vma); vma != NULL;
1901              vma = next_vma(vma, gate_vma))
1902                 size += vma_dump_size(vma, mm_flags);
1903         return size;
1904 }
1905
1906 /*
1907  * Actual dumper
1908  *
1909  * This is a two-pass process; first we find the offsets of the bits,
1910  * and then they are actually written out.  If we run out of core limit
1911  * we just truncate.
1912  */
1913 static int elf_core_dump(struct coredump_params *cprm)
1914 {
1915         int has_dumped = 0;
1916         mm_segment_t fs;
1917         int segs;
1918         size_t size = 0;
1919         struct vm_area_struct *vma, *gate_vma;
1920         struct elfhdr *elf = NULL;
1921         loff_t offset = 0, dataoff, foffset;
1922         struct elf_note_info info;
1923         struct elf_phdr *phdr4note = NULL;
1924         struct elf_shdr *shdr4extnum = NULL;
1925         Elf_Half e_phnum;
1926         elf_addr_t e_shoff;
1927
1928         /*
1929          * We no longer stop all VM operations.
1930          * 
1931          * This is because those proceses that could possibly change map_count
1932          * or the mmap / vma pages are now blocked in do_exit on current
1933          * finishing this core dump.
1934          *
1935          * Only ptrace can touch these memory addresses, but it doesn't change
1936          * the map_count or the pages allocated. So no possibility of crashing
1937          * exists while dumping the mm->vm_next areas to the core file.
1938          */
1939   
1940         /* alloc memory for large data structures: too large to be on stack */
1941         elf = kmalloc(sizeof(*elf), GFP_KERNEL);
1942         if (!elf)
1943                 goto out;
1944         /*
1945          * The number of segs are recored into ELF header as 16bit value.
1946          * Please check DEFAULT_MAX_MAP_COUNT definition when you modify here.
1947          */
1948         segs = current->mm->map_count;
1949         segs += elf_core_extra_phdrs();
1950
1951         gate_vma = get_gate_vma(current->mm);
1952         if (gate_vma != NULL)
1953                 segs++;
1954
1955         /* for notes section */
1956         segs++;
1957
1958         /* If segs > PN_XNUM(0xffff), then e_phnum overflows. To avoid
1959          * this, kernel supports extended numbering. Have a look at
1960          * include/linux/elf.h for further information. */
1961         e_phnum = segs > PN_XNUM ? PN_XNUM : segs;
1962
1963         /*
1964          * Collect all the non-memory information about the process for the
1965          * notes.  This also sets up the file header.
1966          */
1967         if (!fill_note_info(elf, e_phnum, &info, cprm->signr, cprm->regs))
1968                 goto cleanup;
1969
1970         has_dumped = 1;
1971         current->flags |= PF_DUMPCORE;
1972   
1973         fs = get_fs();
1974         set_fs(KERNEL_DS);
1975
1976         offset += sizeof(*elf);                         /* Elf header */
1977         offset += segs * sizeof(struct elf_phdr);       /* Program headers */
1978         foffset = offset;
1979
1980         /* Write notes phdr entry */
1981         {
1982                 size_t sz = get_note_info_size(&info);
1983
1984                 sz += elf_coredump_extra_notes_size();
1985
1986                 phdr4note = kmalloc(sizeof(*phdr4note), GFP_KERNEL);
1987                 if (!phdr4note)
1988                         goto end_coredump;
1989
1990                 fill_elf_note_phdr(phdr4note, sz, offset);
1991                 offset += sz;
1992         }
1993
1994         dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
1995
1996         offset += elf_core_vma_data_size(gate_vma, cprm->mm_flags);
1997         offset += elf_core_extra_data_size();
1998         e_shoff = offset;
1999
2000         if (e_phnum == PN_XNUM) {
2001                 shdr4extnum = kmalloc(sizeof(*shdr4extnum), GFP_KERNEL);
2002                 if (!shdr4extnum)
2003                         goto end_coredump;
2004                 fill_extnum_info(elf, shdr4extnum, e_shoff, segs);
2005         }
2006
2007         offset = dataoff;
2008
2009         size += sizeof(*elf);
2010         if (size > cprm->limit || !dump_write(cprm->file, elf, sizeof(*elf)))
2011                 goto end_coredump;
2012
2013         size += sizeof(*phdr4note);
2014         if (size > cprm->limit
2015             || !dump_write(cprm->file, phdr4note, sizeof(*phdr4note)))
2016                 goto end_coredump;
2017
2018         /* Write program headers for segments dump */
2019         for (vma = first_vma(current, gate_vma); vma != NULL;
2020                         vma = next_vma(vma, gate_vma)) {
2021                 struct elf_phdr phdr;
2022
2023                 phdr.p_type = PT_LOAD;
2024                 phdr.p_offset = offset;
2025                 phdr.p_vaddr = vma->vm_start;
2026                 phdr.p_paddr = 0;
2027                 phdr.p_filesz = vma_dump_size(vma, cprm->mm_flags);
2028                 phdr.p_memsz = vma->vm_end - vma->vm_start;
2029                 offset += phdr.p_filesz;
2030                 phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0;
2031                 if (vma->vm_flags & VM_WRITE)
2032                         phdr.p_flags |= PF_W;
2033                 if (vma->vm_flags & VM_EXEC)
2034                         phdr.p_flags |= PF_X;
2035                 phdr.p_align = ELF_EXEC_PAGESIZE;
2036
2037                 size += sizeof(phdr);
2038                 if (size > cprm->limit
2039                     || !dump_write(cprm->file, &phdr, sizeof(phdr)))
2040                         goto end_coredump;
2041         }
2042
2043         if (!elf_core_write_extra_phdrs(cprm->file, offset, &size, cprm->limit))
2044                 goto end_coredump;
2045
2046         /* write out the notes section */
2047         if (!write_note_info(&info, cprm->file, &foffset))
2048                 goto end_coredump;
2049
2050         if (elf_coredump_extra_notes_write(cprm->file, &foffset))
2051                 goto end_coredump;
2052
2053         /* Align to page */
2054         if (!dump_seek(cprm->file, dataoff - foffset))
2055                 goto end_coredump;
2056
2057         for (vma = first_vma(current, gate_vma); vma != NULL;
2058                         vma = next_vma(vma, gate_vma)) {
2059                 unsigned long addr;
2060                 unsigned long end;
2061
2062                 end = vma->vm_start + vma_dump_size(vma, cprm->mm_flags);
2063
2064                 for (addr = vma->vm_start; addr < end; addr += PAGE_SIZE) {
2065                         struct page *page;
2066                         int stop;
2067
2068                         page = get_dump_page(addr);
2069                         if (page) {
2070                                 void *kaddr = kmap(page);
2071                                 stop = ((size += PAGE_SIZE) > cprm->limit) ||
2072                                         !dump_write(cprm->file, kaddr,
2073                                                     PAGE_SIZE);
2074                                 kunmap(page);
2075                                 page_cache_release(page);
2076                         } else
2077                                 stop = !dump_seek(cprm->file, PAGE_SIZE);
2078                         if (stop)
2079                                 goto end_coredump;
2080                 }
2081         }
2082
2083         if (!elf_core_write_extra_data(cprm->file, &size, cprm->limit))
2084                 goto end_coredump;
2085
2086         if (e_phnum == PN_XNUM) {
2087                 size += sizeof(*shdr4extnum);
2088                 if (size > cprm->limit
2089                     || !dump_write(cprm->file, shdr4extnum,
2090                                    sizeof(*shdr4extnum)))
2091                         goto end_coredump;
2092         }
2093
2094 end_coredump:
2095         set_fs(fs);
2096
2097 cleanup:
2098         free_note_info(&info);
2099         kfree(shdr4extnum);
2100         kfree(phdr4note);
2101         kfree(elf);
2102 out:
2103         return has_dumped;
2104 }
2105
2106 #endif          /* CONFIG_ELF_CORE */
2107
2108 static int __init init_elf_binfmt(void)
2109 {
2110         register_binfmt(&elf_format);
2111         return 0;
2112 }
2113
2114 static void __exit exit_elf_binfmt(void)
2115 {
2116         /* Remove the COFF and ELF loaders. */
2117         unregister_binfmt(&elf_format);
2118 }
2119
2120 core_initcall(init_elf_binfmt);
2121 module_exit(exit_elf_binfmt);
2122 MODULE_LICENSE("GPL");