serial: PL011: clear pending interrupts
[linux-flexiantxendom0.git] / fs / binfmt_elf.c
1 /*
2  * linux/fs/binfmt_elf.c
3  *
4  * These are the functions used to load ELF format executables as used
5  * on SVr4 machines.  Information on the format may be found in the book
6  * "UNIX SYSTEM V RELEASE 4 Programmers Guide: Ansi C and Programming Support
7  * Tools".
8  *
9  * Copyright 1993, 1994: Eric Youngdale (ericy@cais.com).
10  */
11
12 #include <linux/module.h>
13 #include <linux/kernel.h>
14 #include <linux/fs.h>
15 #include <linux/mm.h>
16 #include <linux/mman.h>
17 #include <linux/errno.h>
18 #include <linux/signal.h>
19 #include <linux/binfmts.h>
20 #include <linux/string.h>
21 #include <linux/file.h>
22 #include <linux/slab.h>
23 #include <linux/personality.h>
24 #include <linux/elfcore.h>
25 #include <linux/init.h>
26 #include <linux/highuid.h>
27 #include <linux/compiler.h>
28 #include <linux/highmem.h>
29 #include <linux/pagemap.h>
30 #include <linux/security.h>
31 #include <linux/random.h>
32 #include <linux/elf.h>
33 #include <linux/utsname.h>
34 #include <linux/coredump.h>
35 #include <asm/uaccess.h>
36 #include <asm/param.h>
37 #include <asm/page.h>
38
39 static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs);
40 static int load_elf_library(struct file *);
41 static unsigned long elf_map(struct file *, unsigned long, struct elf_phdr *,
42                                 int, int, unsigned long);
43
44 /*
45  * If we don't support core dumping, then supply a NULL so we
46  * don't even try.
47  */
48 #ifdef CONFIG_ELF_CORE
49 static int elf_core_dump(struct coredump_params *cprm);
50 #else
51 #define elf_core_dump   NULL
52 #endif
53
54 #if ELF_EXEC_PAGESIZE > PAGE_SIZE
55 #define ELF_MIN_ALIGN   ELF_EXEC_PAGESIZE
56 #else
57 #define ELF_MIN_ALIGN   PAGE_SIZE
58 #endif
59
60 #ifndef ELF_CORE_EFLAGS
61 #define ELF_CORE_EFLAGS 0
62 #endif
63
64 #define ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(ELF_MIN_ALIGN-1))
65 #define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1))
66 #define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
67
68 static struct linux_binfmt elf_format = {
69         .module         = THIS_MODULE,
70         .load_binary    = load_elf_binary,
71         .load_shlib     = load_elf_library,
72         .core_dump      = elf_core_dump,
73         .min_coredump   = ELF_EXEC_PAGESIZE,
74 };
75
76 #define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE)
77
78 static int set_brk(unsigned long start, unsigned long end)
79 {
80         start = ELF_PAGEALIGN(start);
81         end = ELF_PAGEALIGN(end);
82         if (end > start) {
83                 unsigned long addr;
84                 down_write(&current->mm->mmap_sem);
85                 addr = do_brk(start, end - start);
86                 up_write(&current->mm->mmap_sem);
87                 if (BAD_ADDR(addr))
88                         return addr;
89         }
90         current->mm->start_brk = current->mm->brk = end;
91         return 0;
92 }
93
94 /* We need to explicitly zero any fractional pages
95    after the data section (i.e. bss).  This would
96    contain the junk from the file that should not
97    be in memory
98  */
99 static int padzero(unsigned long elf_bss)
100 {
101         unsigned long nbyte;
102
103         nbyte = ELF_PAGEOFFSET(elf_bss);
104         if (nbyte) {
105                 nbyte = ELF_MIN_ALIGN - nbyte;
106                 if (clear_user((void __user *) elf_bss, nbyte))
107                         return -EFAULT;
108         }
109         return 0;
110 }
111
112 /* Let's use some macros to make this stack manipulation a little clearer */
113 #ifdef CONFIG_STACK_GROWSUP
114 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) + (items))
115 #define STACK_ROUND(sp, items) \
116         ((15 + (unsigned long) ((sp) + (items))) &~ 15UL)
117 #define STACK_ALLOC(sp, len) ({ \
118         elf_addr_t __user *old_sp = (elf_addr_t __user *)sp; sp += len; \
119         old_sp; })
120 #else
121 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items))
122 #define STACK_ROUND(sp, items) \
123         (((unsigned long) (sp - items)) &~ 15UL)
124 #define STACK_ALLOC(sp, len) ({ sp -= len ; sp; })
125 #endif
126
127 #ifndef ELF_BASE_PLATFORM
128 /*
129  * AT_BASE_PLATFORM indicates the "real" hardware/microarchitecture.
130  * If the arch defines ELF_BASE_PLATFORM (in asm/elf.h), the value
131  * will be copied to the user stack in the same manner as AT_PLATFORM.
132  */
133 #define ELF_BASE_PLATFORM NULL
134 #endif
135
136 static int
137 create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
138                 unsigned long load_addr, unsigned long interp_load_addr)
139 {
140         unsigned long p = bprm->p;
141         int argc = bprm->argc;
142         int envc = bprm->envc;
143         elf_addr_t __user *argv;
144         elf_addr_t __user *envp;
145         elf_addr_t __user *sp;
146         elf_addr_t __user *u_platform;
147         elf_addr_t __user *u_base_platform;
148         elf_addr_t __user *u_rand_bytes;
149         const char *k_platform = ELF_PLATFORM;
150         const char *k_base_platform = ELF_BASE_PLATFORM;
151         unsigned char k_rand_bytes[16];
152         int items;
153         elf_addr_t *elf_info;
154         int ei_index = 0;
155         const struct cred *cred = current_cred();
156         struct vm_area_struct *vma;
157
158         /*
159          * In some cases (e.g. Hyper-Threading), we want to avoid L1
160          * evictions by the processes running on the same package. One
161          * thing we can do is to shuffle the initial stack for them.
162          */
163
164         p = arch_align_stack(p);
165
166         /*
167          * If this architecture has a platform capability string, copy it
168          * to userspace.  In some cases (Sparc), this info is impossible
169          * for userspace to get any other way, in others (i386) it is
170          * merely difficult.
171          */
172         u_platform = NULL;
173         if (k_platform) {
174                 size_t len = strlen(k_platform) + 1;
175
176                 u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
177                 if (__copy_to_user(u_platform, k_platform, len))
178                         return -EFAULT;
179         }
180
181         /*
182          * If this architecture has a "base" platform capability
183          * string, copy it to userspace.
184          */
185         u_base_platform = NULL;
186         if (k_base_platform) {
187                 size_t len = strlen(k_base_platform) + 1;
188
189                 u_base_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
190                 if (__copy_to_user(u_base_platform, k_base_platform, len))
191                         return -EFAULT;
192         }
193
194         /*
195          * Generate 16 random bytes for userspace PRNG seeding.
196          */
197         get_random_bytes(k_rand_bytes, sizeof(k_rand_bytes));
198         u_rand_bytes = (elf_addr_t __user *)
199                        STACK_ALLOC(p, sizeof(k_rand_bytes));
200         if (__copy_to_user(u_rand_bytes, k_rand_bytes, sizeof(k_rand_bytes)))
201                 return -EFAULT;
202
203         /* Create the ELF interpreter info */
204         elf_info = (elf_addr_t *)current->mm->saved_auxv;
205         /* update AT_VECTOR_SIZE_BASE if the number of NEW_AUX_ENT() changes */
206 #define NEW_AUX_ENT(id, val) \
207         do { \
208                 elf_info[ei_index++] = id; \
209                 elf_info[ei_index++] = val; \
210         } while (0)
211
212 #ifdef ARCH_DLINFO
213         /* 
214          * ARCH_DLINFO must come first so PPC can do its special alignment of
215          * AUXV.
216          * update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT() in
217          * ARCH_DLINFO changes
218          */
219         ARCH_DLINFO;
220 #endif
221         NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP);
222         NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE);
223         NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
224         NEW_AUX_ENT(AT_PHDR, load_addr + exec->e_phoff);
225         NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr));
226         NEW_AUX_ENT(AT_PHNUM, exec->e_phnum);
227         NEW_AUX_ENT(AT_BASE, interp_load_addr);
228         NEW_AUX_ENT(AT_FLAGS, 0);
229         NEW_AUX_ENT(AT_ENTRY, exec->e_entry);
230         NEW_AUX_ENT(AT_UID, cred->uid);
231         NEW_AUX_ENT(AT_EUID, cred->euid);
232         NEW_AUX_ENT(AT_GID, cred->gid);
233         NEW_AUX_ENT(AT_EGID, cred->egid);
234         NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm));
235         NEW_AUX_ENT(AT_RANDOM, (elf_addr_t)(unsigned long)u_rand_bytes);
236         NEW_AUX_ENT(AT_EXECFN, bprm->exec);
237         if (k_platform) {
238                 NEW_AUX_ENT(AT_PLATFORM,
239                             (elf_addr_t)(unsigned long)u_platform);
240         }
241         if (k_base_platform) {
242                 NEW_AUX_ENT(AT_BASE_PLATFORM,
243                             (elf_addr_t)(unsigned long)u_base_platform);
244         }
245         if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) {
246                 NEW_AUX_ENT(AT_EXECFD, bprm->interp_data);
247         }
248 #undef NEW_AUX_ENT
249         /* AT_NULL is zero; clear the rest too */
250         memset(&elf_info[ei_index], 0,
251                sizeof current->mm->saved_auxv - ei_index * sizeof elf_info[0]);
252
253         /* And advance past the AT_NULL entry.  */
254         ei_index += 2;
255
256         sp = STACK_ADD(p, ei_index);
257
258         items = (argc + 1) + (envc + 1) + 1;
259         bprm->p = STACK_ROUND(sp, items);
260
261         /* Point sp at the lowest address on the stack */
262 #ifdef CONFIG_STACK_GROWSUP
263         sp = (elf_addr_t __user *)bprm->p - items - ei_index;
264         bprm->exec = (unsigned long)sp; /* XXX: PARISC HACK */
265 #else
266         sp = (elf_addr_t __user *)bprm->p;
267 #endif
268
269
270         /*
271          * Grow the stack manually; some architectures have a limit on how
272          * far ahead a user-space access may be in order to grow the stack.
273          */
274         vma = find_extend_vma(current->mm, bprm->p);
275         if (!vma)
276                 return -EFAULT;
277
278         /* Now, let's put argc (and argv, envp if appropriate) on the stack */
279         if (__put_user(argc, sp++))
280                 return -EFAULT;
281         argv = sp;
282         envp = argv + argc + 1;
283
284         /* Populate argv and envp */
285         p = current->mm->arg_end = current->mm->arg_start;
286         while (argc-- > 0) {
287                 size_t len;
288                 if (__put_user((elf_addr_t)p, argv++))
289                         return -EFAULT;
290                 len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
291                 if (!len || len > MAX_ARG_STRLEN)
292                         return -EINVAL;
293                 p += len;
294         }
295         if (__put_user(0, argv))
296                 return -EFAULT;
297         current->mm->arg_end = current->mm->env_start = p;
298         while (envc-- > 0) {
299                 size_t len;
300                 if (__put_user((elf_addr_t)p, envp++))
301                         return -EFAULT;
302                 len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
303                 if (!len || len > MAX_ARG_STRLEN)
304                         return -EINVAL;
305                 p += len;
306         }
307         if (__put_user(0, envp))
308                 return -EFAULT;
309         current->mm->env_end = p;
310
311         /* Put the elf_info on the stack in the right place.  */
312         sp = (elf_addr_t __user *)envp + 1;
313         if (copy_to_user(sp, elf_info, ei_index * sizeof(elf_addr_t)))
314                 return -EFAULT;
315         return 0;
316 }
317
318 static unsigned long elf_map(struct file *filep, unsigned long addr,
319                 struct elf_phdr *eppnt, int prot, int type,
320                 unsigned long total_size)
321 {
322         unsigned long map_addr;
323         unsigned long size = eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr);
324         unsigned long off = eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr);
325         addr = ELF_PAGESTART(addr);
326         size = ELF_PAGEALIGN(size);
327
328         /* mmap() will return -EINVAL if given a zero size, but a
329          * segment with zero filesize is perfectly valid */
330         if (!size)
331                 return addr;
332
333         down_write(&current->mm->mmap_sem);
334         /*
335         * total_size is the size of the ELF (interpreter) image.
336         * The _first_ mmap needs to know the full size, otherwise
337         * randomization might put this image into an overlapping
338         * position with the ELF binary image. (since size < total_size)
339         * So we first map the 'big' image - and unmap the remainder at
340         * the end. (which unmap is needed for ELF images with holes.)
341         */
342         if (total_size) {
343                 total_size = ELF_PAGEALIGN(total_size);
344                 map_addr = do_mmap(filep, addr, total_size, prot, type, off);
345                 if (!BAD_ADDR(map_addr))
346                         do_munmap(current->mm, map_addr+size, total_size-size);
347         } else
348                 map_addr = do_mmap(filep, addr, size, prot, type, off);
349
350         up_write(&current->mm->mmap_sem);
351         return(map_addr);
352 }
353
354 static unsigned long total_mapping_size(struct elf_phdr *cmds, int nr)
355 {
356         int i, first_idx = -1, last_idx = -1;
357
358         for (i = 0; i < nr; i++) {
359                 if (cmds[i].p_type == PT_LOAD) {
360                         last_idx = i;
361                         if (first_idx == -1)
362                                 first_idx = i;
363                 }
364         }
365         if (first_idx == -1)
366                 return 0;
367
368         return cmds[last_idx].p_vaddr + cmds[last_idx].p_memsz -
369                                 ELF_PAGESTART(cmds[first_idx].p_vaddr);
370 }
371
372
373 /* This is much more generalized than the library routine read function,
374    so we keep this separate.  Technically the library read function
375    is only provided so that we can read a.out libraries that have
376    an ELF header */
377
378 static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
379                 struct file *interpreter, unsigned long *interp_map_addr,
380                 unsigned long no_base)
381 {
382         struct elf_phdr *elf_phdata;
383         struct elf_phdr *eppnt;
384         unsigned long load_addr = 0;
385         int load_addr_set = 0;
386         unsigned long last_bss = 0, elf_bss = 0;
387         unsigned long error = ~0UL;
388         unsigned long total_size;
389         int retval, i, size;
390
391         /* First of all, some simple consistency checks */
392         if (interp_elf_ex->e_type != ET_EXEC &&
393             interp_elf_ex->e_type != ET_DYN)
394                 goto out;
395         if (!elf_check_arch(interp_elf_ex))
396                 goto out;
397         if (!interpreter->f_op || !interpreter->f_op->mmap)
398                 goto out;
399
400         /*
401          * If the size of this structure has changed, then punt, since
402          * we will be doing the wrong thing.
403          */
404         if (interp_elf_ex->e_phentsize != sizeof(struct elf_phdr))
405                 goto out;
406         if (interp_elf_ex->e_phnum < 1 ||
407                 interp_elf_ex->e_phnum > 65536U / sizeof(struct elf_phdr))
408                 goto out;
409
410         /* Now read in all of the header information */
411         size = sizeof(struct elf_phdr) * interp_elf_ex->e_phnum;
412         if (size > ELF_MIN_ALIGN)
413                 goto out;
414         elf_phdata = kmalloc(size, GFP_KERNEL);
415         if (!elf_phdata)
416                 goto out;
417
418         retval = kernel_read(interpreter, interp_elf_ex->e_phoff,
419                              (char *)elf_phdata, size);
420         error = -EIO;
421         if (retval != size) {
422                 if (retval < 0)
423                         error = retval; 
424                 goto out_close;
425         }
426
427         total_size = total_mapping_size(elf_phdata, interp_elf_ex->e_phnum);
428         if (!total_size) {
429                 error = -EINVAL;
430                 goto out_close;
431         }
432
433         eppnt = elf_phdata;
434         for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
435                 if (eppnt->p_type == PT_LOAD) {
436                         int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
437                         int elf_prot = 0;
438                         unsigned long vaddr = 0;
439                         unsigned long k, map_addr;
440
441                         if (eppnt->p_flags & PF_R)
442                                 elf_prot = PROT_READ;
443                         if (eppnt->p_flags & PF_W)
444                                 elf_prot |= PROT_WRITE;
445                         if (eppnt->p_flags & PF_X)
446                                 elf_prot |= PROT_EXEC;
447                         vaddr = eppnt->p_vaddr;
448                         if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
449                                 elf_type |= MAP_FIXED;
450                         else if (no_base && interp_elf_ex->e_type == ET_DYN)
451                                 load_addr = -vaddr;
452
453                         map_addr = elf_map(interpreter, load_addr + vaddr,
454                                         eppnt, elf_prot, elf_type, total_size);
455                         total_size = 0;
456                         if (!*interp_map_addr)
457                                 *interp_map_addr = map_addr;
458                         error = map_addr;
459                         if (BAD_ADDR(map_addr))
460                                 goto out_close;
461
462                         if (!load_addr_set &&
463                             interp_elf_ex->e_type == ET_DYN) {
464                                 load_addr = map_addr - ELF_PAGESTART(vaddr);
465                                 load_addr_set = 1;
466                         }
467
468                         /*
469                          * Check to see if the section's size will overflow the
470                          * allowed task size. Note that p_filesz must always be
471                          * <= p_memsize so it's only necessary to check p_memsz.
472                          */
473                         k = load_addr + eppnt->p_vaddr;
474                         if (BAD_ADDR(k) ||
475                             eppnt->p_filesz > eppnt->p_memsz ||
476                             eppnt->p_memsz > TASK_SIZE ||
477                             TASK_SIZE - eppnt->p_memsz < k) {
478                                 error = -ENOMEM;
479                                 goto out_close;
480                         }
481
482                         /*
483                          * Find the end of the file mapping for this phdr, and
484                          * keep track of the largest address we see for this.
485                          */
486                         k = load_addr + eppnt->p_vaddr + eppnt->p_filesz;
487                         if (k > elf_bss)
488                                 elf_bss = k;
489
490                         /*
491                          * Do the same thing for the memory mapping - between
492                          * elf_bss and last_bss is the bss section.
493                          */
494                         k = load_addr + eppnt->p_memsz + eppnt->p_vaddr;
495                         if (k > last_bss)
496                                 last_bss = k;
497                 }
498         }
499
500         if (last_bss > elf_bss) {
501                 /*
502                  * Now fill out the bss section.  First pad the last page up
503                  * to the page boundary, and then perform a mmap to make sure
504                  * that there are zero-mapped pages up to and including the
505                  * last bss page.
506                  */
507                 if (padzero(elf_bss)) {
508                         error = -EFAULT;
509                         goto out_close;
510                 }
511
512                 /* What we have mapped so far */
513                 elf_bss = ELF_PAGESTART(elf_bss + ELF_MIN_ALIGN - 1);
514
515                 /* Map the last of the bss segment */
516                 down_write(&current->mm->mmap_sem);
517                 error = do_brk(elf_bss, last_bss - elf_bss);
518                 up_write(&current->mm->mmap_sem);
519                 if (BAD_ADDR(error))
520                         goto out_close;
521         }
522
523         error = load_addr;
524
525 out_close:
526         kfree(elf_phdata);
527 out:
528         return error;
529 }
530
531 /*
532  * These are the functions used to load ELF style executables and shared
533  * libraries.  There is no binary dependent code anywhere else.
534  */
535
536 #define INTERPRETER_NONE 0
537 #define INTERPRETER_ELF 2
538
539 #ifndef STACK_RND_MASK
540 #define STACK_RND_MASK (0x7ff >> (PAGE_SHIFT - 12))     /* 8MB of VA */
541 #endif
542
543 static unsigned long randomize_stack_top(unsigned long stack_top)
544 {
545         unsigned int random_variable = 0;
546
547         if ((current->flags & PF_RANDOMIZE) &&
548                 !(current->personality & ADDR_NO_RANDOMIZE)) {
549                 random_variable = get_random_int() & STACK_RND_MASK;
550                 random_variable <<= PAGE_SHIFT;
551         }
552 #ifdef CONFIG_STACK_GROWSUP
553         return PAGE_ALIGN(stack_top) + random_variable;
554 #else
555         return PAGE_ALIGN(stack_top) - random_variable;
556 #endif
557 }
558
559 static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
560 {
561         struct file *interpreter = NULL; /* to shut gcc up */
562         unsigned long load_addr = 0, load_bias = 0;
563         int load_addr_set = 0;
564         char * elf_interpreter = NULL;
565         unsigned long error;
566         struct elf_phdr *elf_ppnt, *elf_phdata;
567         unsigned long elf_bss, elf_brk;
568         int retval, i;
569         unsigned int size;
570         unsigned long elf_entry;
571         unsigned long interp_load_addr = 0;
572         unsigned long start_code, end_code, start_data, end_data;
573         unsigned long reloc_func_desc __maybe_unused = 0;
574         int executable_stack = EXSTACK_DEFAULT;
575         unsigned long def_flags = 0;
576         struct {
577                 struct elfhdr elf_ex;
578                 struct elfhdr interp_elf_ex;
579         } *loc;
580
581         loc = kmalloc(sizeof(*loc), GFP_KERNEL);
582         if (!loc) {
583                 retval = -ENOMEM;
584                 goto out_ret;
585         }
586         
587         /* Get the exec-header */
588         loc->elf_ex = *((struct elfhdr *)bprm->buf);
589
590         retval = -ENOEXEC;
591         /* First of all, some simple consistency checks */
592         if (memcmp(loc->elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
593                 goto out;
594
595         if (loc->elf_ex.e_type != ET_EXEC && loc->elf_ex.e_type != ET_DYN)
596                 goto out;
597         if (!elf_check_arch(&loc->elf_ex))
598                 goto out;
599         if (!bprm->file->f_op || !bprm->file->f_op->mmap)
600                 goto out;
601
602         /* Now read in all of the header information */
603         if (loc->elf_ex.e_phentsize != sizeof(struct elf_phdr))
604                 goto out;
605         if (loc->elf_ex.e_phnum < 1 ||
606                 loc->elf_ex.e_phnum > 65536U / sizeof(struct elf_phdr))
607                 goto out;
608         size = loc->elf_ex.e_phnum * sizeof(struct elf_phdr);
609         retval = -ENOMEM;
610         elf_phdata = kmalloc(size, GFP_KERNEL);
611         if (!elf_phdata)
612                 goto out;
613
614         retval = kernel_read(bprm->file, loc->elf_ex.e_phoff,
615                              (char *)elf_phdata, size);
616         if (retval != size) {
617                 if (retval >= 0)
618                         retval = -EIO;
619                 goto out_free_ph;
620         }
621
622         elf_ppnt = elf_phdata;
623         elf_bss = 0;
624         elf_brk = 0;
625
626         start_code = ~0UL;
627         end_code = 0;
628         start_data = 0;
629         end_data = 0;
630
631         for (i = 0; i < loc->elf_ex.e_phnum; i++) {
632                 if (elf_ppnt->p_type == PT_INTERP) {
633                         /* This is the program interpreter used for
634                          * shared libraries - for now assume that this
635                          * is an a.out format binary
636                          */
637                         retval = -ENOEXEC;
638                         if (elf_ppnt->p_filesz > PATH_MAX || 
639                             elf_ppnt->p_filesz < 2)
640                                 goto out_free_ph;
641
642                         retval = -ENOMEM;
643                         elf_interpreter = kmalloc(elf_ppnt->p_filesz,
644                                                   GFP_KERNEL);
645                         if (!elf_interpreter)
646                                 goto out_free_ph;
647
648                         retval = kernel_read(bprm->file, elf_ppnt->p_offset,
649                                              elf_interpreter,
650                                              elf_ppnt->p_filesz);
651                         if (retval != elf_ppnt->p_filesz) {
652                                 if (retval >= 0)
653                                         retval = -EIO;
654                                 goto out_free_interp;
655                         }
656                         /* make sure path is NULL terminated */
657                         retval = -ENOEXEC;
658                         if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
659                                 goto out_free_interp;
660
661                         interpreter = open_exec(elf_interpreter);
662                         retval = PTR_ERR(interpreter);
663                         if (IS_ERR(interpreter))
664                                 goto out_free_interp;
665
666                         /*
667                          * If the binary is not readable then enforce
668                          * mm->dumpable = 0 regardless of the interpreter's
669                          * permissions.
670                          */
671                         would_dump(bprm, interpreter);
672
673                         retval = kernel_read(interpreter, 0, bprm->buf,
674                                              BINPRM_BUF_SIZE);
675                         if (retval != BINPRM_BUF_SIZE) {
676                                 if (retval >= 0)
677                                         retval = -EIO;
678                                 goto out_free_dentry;
679                         }
680
681                         /* Get the exec headers */
682                         loc->interp_elf_ex = *((struct elfhdr *)bprm->buf);
683                         break;
684                 }
685                 elf_ppnt++;
686         }
687
688         elf_ppnt = elf_phdata;
689         for (i = 0; i < loc->elf_ex.e_phnum; i++, elf_ppnt++)
690                 if (elf_ppnt->p_type == PT_GNU_STACK) {
691                         if (elf_ppnt->p_flags & PF_X)
692                                 executable_stack = EXSTACK_ENABLE_X;
693                         else
694                                 executable_stack = EXSTACK_DISABLE_X;
695                         break;
696                 }
697
698         /* Some simple consistency checks for the interpreter */
699         if (elf_interpreter) {
700                 retval = -ELIBBAD;
701                 /* Not an ELF interpreter */
702                 if (memcmp(loc->interp_elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
703                         goto out_free_dentry;
704                 /* Verify the interpreter has a valid arch */
705                 if (!elf_check_arch(&loc->interp_elf_ex))
706                         goto out_free_dentry;
707         }
708
709         /* Flush all traces of the currently running executable */
710         retval = flush_old_exec(bprm);
711         if (retval)
712                 goto out_free_dentry;
713
714 #ifdef CONFIG_X86_32
715         /*
716          * Turn off the CS limit completely if exec-shield disabled or
717          * NX active:
718          */
719         if (disable_nx || executable_stack != EXSTACK_DISABLE_X || (__supported_pte_mask & _PAGE_NX))
720                 arch_add_exec_range(current->mm, -1);
721 #endif
722
723         /* OK, This is the point of no return */
724         current->flags &= ~PF_FORKNOEXEC;
725         current->mm->def_flags = def_flags;
726
727         /* Do this immediately, since STACK_TOP as used in setup_arg_pages
728            may depend on the personality.  */
729         SET_PERSONALITY(loc->elf_ex);
730         if (elf_read_implies_exec(loc->elf_ex, executable_stack))
731                 current->personality |= READ_IMPLIES_EXEC;
732
733         if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
734                 current->flags |= PF_RANDOMIZE;
735
736         setup_new_exec(bprm);
737
738         /* Do this so that we can load the interpreter, if need be.  We will
739            change some of these later */
740         current->mm->free_area_cache = current->mm->mmap_base;
741         current->mm->cached_hole_size = 0;
742         retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
743                                  executable_stack);
744         if (retval < 0) {
745                 send_sig(SIGKILL, current, 0);
746                 goto out_free_dentry;
747         }
748         
749         current->mm->start_stack = bprm->p;
750
751         /* Now we do a little grungy work by mmapping the ELF image into
752            the correct location in memory. */
753         for(i = 0, elf_ppnt = elf_phdata;
754             i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
755                 int elf_prot = 0, elf_flags;
756                 unsigned long k, vaddr;
757
758                 if (elf_ppnt->p_type != PT_LOAD)
759                         continue;
760
761                 if (unlikely (elf_brk > elf_bss)) {
762                         unsigned long nbyte;
763                     
764                         /* There was a PT_LOAD segment with p_memsz > p_filesz
765                            before this one. Map anonymous pages, if needed,
766                            and clear the area.  */
767                         retval = set_brk(elf_bss + load_bias,
768                                          elf_brk + load_bias);
769                         if (retval) {
770                                 send_sig(SIGKILL, current, 0);
771                                 goto out_free_dentry;
772                         }
773                         nbyte = ELF_PAGEOFFSET(elf_bss);
774                         if (nbyte) {
775                                 nbyte = ELF_MIN_ALIGN - nbyte;
776                                 if (nbyte > elf_brk - elf_bss)
777                                         nbyte = elf_brk - elf_bss;
778                                 if (clear_user((void __user *)elf_bss +
779                                                         load_bias, nbyte)) {
780                                         /*
781                                          * This bss-zeroing can fail if the ELF
782                                          * file specifies odd protections. So
783                                          * we don't check the return value
784                                          */
785                                 }
786                         }
787                 }
788
789                 if (elf_ppnt->p_flags & PF_R)
790                         elf_prot |= PROT_READ;
791                 if (elf_ppnt->p_flags & PF_W)
792                         elf_prot |= PROT_WRITE;
793                 if (elf_ppnt->p_flags & PF_X)
794                         elf_prot |= PROT_EXEC;
795
796                 elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE;
797
798                 vaddr = elf_ppnt->p_vaddr;
799                 if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) {
800                         elf_flags |= MAP_FIXED;
801                 } else if (loc->elf_ex.e_type == ET_DYN) {
802                         /* Try and get dynamic programs out of the way of the
803                          * default mmap base, as well as whatever program they
804                          * might try to exec.  This is because the brk will
805                          * follow the loader, and is not movable.  */
806 #if defined(CONFIG_X86) || defined(CONFIG_ARM)
807                         /* Memory randomization might have been switched off
808                          * in runtime via sysctl.
809                          * If that is the case, retain the original non-zero
810                          * load_bias value in order to establish proper
811                          * non-randomized mappings.
812                          */
813                         if (current->flags & PF_RANDOMIZE)
814                                 load_bias = 0;
815                         else
816                                 load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
817 #else
818                         load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
819 #endif
820                 }
821
822                 error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
823                                 elf_prot, elf_flags, 0);
824                 if (BAD_ADDR(error)) {
825                         send_sig(SIGKILL, current, 0);
826                         retval = IS_ERR((void *)error) ?
827                                 PTR_ERR((void*)error) : -EINVAL;
828                         goto out_free_dentry;
829                 }
830
831                 if (!load_addr_set) {
832                         load_addr_set = 1;
833                         load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset);
834                         if (loc->elf_ex.e_type == ET_DYN) {
835                                 load_bias += error -
836                                              ELF_PAGESTART(load_bias + vaddr);
837                                 load_addr += load_bias;
838                                 reloc_func_desc = load_bias;
839                         }
840                 }
841                 k = elf_ppnt->p_vaddr;
842                 if (k < start_code)
843                         start_code = k;
844                 if (start_data < k)
845                         start_data = k;
846
847                 /*
848                  * Check to see if the section's size will overflow the
849                  * allowed task size. Note that p_filesz must always be
850                  * <= p_memsz so it is only necessary to check p_memsz.
851                  */
852                 if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
853                     elf_ppnt->p_memsz > TASK_SIZE ||
854                     TASK_SIZE - elf_ppnt->p_memsz < k) {
855                         /* set_brk can never work. Avoid overflows. */
856                         send_sig(SIGKILL, current, 0);
857                         retval = -EINVAL;
858                         goto out_free_dentry;
859                 }
860
861                 k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
862
863                 if (k > elf_bss)
864                         elf_bss = k;
865                 if ((elf_ppnt->p_flags & PF_X) && end_code < k)
866                         end_code = k;
867                 if (end_data < k)
868                         end_data = k;
869                 k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
870                 if (k > elf_brk)
871                         elf_brk = k;
872         }
873
874         loc->elf_ex.e_entry += load_bias;
875         elf_bss += load_bias;
876         elf_brk += load_bias;
877         start_code += load_bias;
878         end_code += load_bias;
879         start_data += load_bias;
880         end_data += load_bias;
881
882         /* Calling set_brk effectively mmaps the pages that we need
883          * for the bss and break sections.  We must do this before
884          * mapping in the interpreter, to make sure it doesn't wind
885          * up getting placed where the bss needs to go.
886          */
887         retval = set_brk(elf_bss, elf_brk);
888         if (retval) {
889                 send_sig(SIGKILL, current, 0);
890                 goto out_free_dentry;
891         }
892         if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) {
893                 send_sig(SIGSEGV, current, 0);
894                 retval = -EFAULT; /* Nobody gets to see this, but.. */
895                 goto out_free_dentry;
896         }
897
898         if (elf_interpreter) {
899                 unsigned long uninitialized_var(interp_map_addr);
900
901                 elf_entry = load_elf_interp(&loc->interp_elf_ex,
902                                             interpreter,
903                                             &interp_map_addr,
904                                             load_bias);
905                 if (!IS_ERR((void *)elf_entry)) {
906                         /*
907                          * load_elf_interp() returns relocation
908                          * adjustment
909                          */
910                         interp_load_addr = elf_entry;
911                         elf_entry += loc->interp_elf_ex.e_entry;
912                 }
913                 if (BAD_ADDR(elf_entry)) {
914                         force_sig(SIGSEGV, current);
915                         retval = IS_ERR((void *)elf_entry) ?
916                                         (int)elf_entry : -EINVAL;
917                         goto out_free_dentry;
918                 }
919                 reloc_func_desc = interp_load_addr;
920
921                 allow_write_access(interpreter);
922                 fput(interpreter);
923                 kfree(elf_interpreter);
924         } else {
925                 elf_entry = loc->elf_ex.e_entry;
926                 if (BAD_ADDR(elf_entry)) {
927                         force_sig(SIGSEGV, current);
928                         retval = -EINVAL;
929                         goto out_free_dentry;
930                 }
931         }
932
933         kfree(elf_phdata);
934
935         set_binfmt(&elf_format);
936
937 #ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
938         retval = arch_setup_additional_pages(bprm, !!elf_interpreter);
939         if (retval < 0) {
940                 send_sig(SIGKILL, current, 0);
941                 goto out;
942         }
943 #endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
944
945         install_exec_creds(bprm);
946         current->flags &= ~PF_FORKNOEXEC;
947         retval = create_elf_tables(bprm, &loc->elf_ex,
948                           load_addr, interp_load_addr);
949         if (retval < 0) {
950                 send_sig(SIGKILL, current, 0);
951                 goto out;
952         }
953         /* N.B. passed_fileno might not be initialized? */
954         current->mm->end_code = end_code;
955         current->mm->start_code = start_code;
956         current->mm->start_data = start_data;
957         current->mm->end_data = end_data;
958         current->mm->start_stack = bprm->p;
959
960 #ifdef arch_randomize_brk
961         if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1)) {
962                 current->mm->brk = current->mm->start_brk =
963                         arch_randomize_brk(current->mm);
964 #ifdef CONFIG_COMPAT_BRK
965                 current->brk_randomized = 1;
966 #endif
967         }
968 #endif
969
970         if (current->personality & MMAP_PAGE_ZERO) {
971                 /* Why this, you ask???  Well SVr4 maps page 0 as read-only,
972                    and some applications "depend" upon this behavior.
973                    Since we do not have the power to recompile these, we
974                    emulate the SVr4 behavior. Sigh. */
975                 down_write(&current->mm->mmap_sem);
976                 error = do_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
977                                 MAP_FIXED | MAP_PRIVATE, 0);
978                 up_write(&current->mm->mmap_sem);
979         }
980
981 #ifdef ELF_PLAT_INIT
982         /*
983          * The ABI may specify that certain registers be set up in special
984          * ways (on i386 %edx is the address of a DT_FINI function, for
985          * example.  In addition, it may also specify (eg, PowerPC64 ELF)
986          * that the e_entry field is the address of the function descriptor
987          * for the startup routine, rather than the address of the startup
988          * routine itself.  This macro performs whatever initialization to
989          * the regs structure is required as well as any relocations to the
990          * function descriptor entries when executing dynamically links apps.
991          */
992         ELF_PLAT_INIT(regs, reloc_func_desc);
993 #endif
994
995         start_thread(regs, elf_entry, bprm->p);
996         retval = 0;
997 out:
998         kfree(loc);
999 out_ret:
1000         return retval;
1001
1002         /* error cleanup */
1003 out_free_dentry:
1004         allow_write_access(interpreter);
1005         if (interpreter)
1006                 fput(interpreter);
1007 out_free_interp:
1008         kfree(elf_interpreter);
1009 out_free_ph:
1010         kfree(elf_phdata);
1011         goto out;
1012 }
1013
1014 /* This is really simpleminded and specialized - we are loading an
1015    a.out library that is given an ELF header. */
1016 static int load_elf_library(struct file *file)
1017 {
1018         struct elf_phdr *elf_phdata;
1019         struct elf_phdr *eppnt;
1020         unsigned long elf_bss, bss, len;
1021         int retval, error, i, j;
1022         struct elfhdr elf_ex;
1023
1024         error = -ENOEXEC;
1025         retval = kernel_read(file, 0, (char *)&elf_ex, sizeof(elf_ex));
1026         if (retval != sizeof(elf_ex))
1027                 goto out;
1028
1029         if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
1030                 goto out;
1031
1032         /* First of all, some simple consistency checks */
1033         if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
1034             !elf_check_arch(&elf_ex) || !file->f_op || !file->f_op->mmap)
1035                 goto out;
1036
1037         /* Now read in all of the header information */
1038
1039         j = sizeof(struct elf_phdr) * elf_ex.e_phnum;
1040         /* j < ELF_MIN_ALIGN because elf_ex.e_phnum <= 2 */
1041
1042         error = -ENOMEM;
1043         elf_phdata = kmalloc(j, GFP_KERNEL);
1044         if (!elf_phdata)
1045                 goto out;
1046
1047         eppnt = elf_phdata;
1048         error = -ENOEXEC;
1049         retval = kernel_read(file, elf_ex.e_phoff, (char *)eppnt, j);
1050         if (retval != j)
1051                 goto out_free_ph;
1052
1053         for (j = 0, i = 0; i<elf_ex.e_phnum; i++)
1054                 if ((eppnt + i)->p_type == PT_LOAD)
1055                         j++;
1056         if (j != 1)
1057                 goto out_free_ph;
1058
1059         while (eppnt->p_type != PT_LOAD)
1060                 eppnt++;
1061
1062         /* Now use mmap to map the library into memory. */
1063         down_write(&current->mm->mmap_sem);
1064         error = do_mmap(file,
1065                         ELF_PAGESTART(eppnt->p_vaddr),
1066                         (eppnt->p_filesz +
1067                          ELF_PAGEOFFSET(eppnt->p_vaddr)),
1068                         PROT_READ | PROT_WRITE | PROT_EXEC,
1069                         MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
1070                         (eppnt->p_offset -
1071                          ELF_PAGEOFFSET(eppnt->p_vaddr)));
1072         up_write(&current->mm->mmap_sem);
1073         if (error != ELF_PAGESTART(eppnt->p_vaddr))
1074                 goto out_free_ph;
1075
1076         elf_bss = eppnt->p_vaddr + eppnt->p_filesz;
1077         if (padzero(elf_bss)) {
1078                 error = -EFAULT;
1079                 goto out_free_ph;
1080         }
1081
1082         len = ELF_PAGESTART(eppnt->p_filesz + eppnt->p_vaddr +
1083                             ELF_MIN_ALIGN - 1);
1084         bss = eppnt->p_memsz + eppnt->p_vaddr;
1085         if (bss > len) {
1086                 down_write(&current->mm->mmap_sem);
1087                 do_brk(len, bss - len);
1088                 up_write(&current->mm->mmap_sem);
1089         }
1090         error = 0;
1091
1092 out_free_ph:
1093         kfree(elf_phdata);
1094 out:
1095         return error;
1096 }
1097
1098 #ifdef CONFIG_ELF_CORE
1099 /*
1100  * ELF core dumper
1101  *
1102  * Modelled on fs/exec.c:aout_core_dump()
1103  * Jeremy Fitzhardinge <jeremy@sw.oz.au>
1104  */
1105
1106 /*
1107  * Decide what to dump of a segment, part, all or none.
1108  */
1109 static unsigned long vma_dump_size(struct vm_area_struct *vma,
1110                                    unsigned long mm_flags)
1111 {
1112 #define FILTER(type)    (mm_flags & (1UL << MMF_DUMP_##type))
1113
1114         /* The vma can be set up to tell us the answer directly.  */
1115         if (vma->vm_flags & VM_ALWAYSDUMP)
1116                 goto whole;
1117
1118         /* Hugetlb memory check */
1119         if (vma->vm_flags & VM_HUGETLB) {
1120                 if ((vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_SHARED))
1121                         goto whole;
1122                 if (!(vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_PRIVATE))
1123                         goto whole;
1124         }
1125
1126         /* Do not dump I/O mapped devices or special mappings */
1127         if (vma->vm_flags & (VM_IO | VM_RESERVED))
1128                 return 0;
1129
1130         /* By default, dump shared memory if mapped from an anonymous file. */
1131         if (vma->vm_flags & VM_SHARED) {
1132                 if (vma->vm_file->f_path.dentry->d_inode->i_nlink == 0 ?
1133                     FILTER(ANON_SHARED) : FILTER(MAPPED_SHARED))
1134                         goto whole;
1135                 return 0;
1136         }
1137
1138         /* Dump segments that have been written to.  */
1139         if (vma->anon_vma && FILTER(ANON_PRIVATE))
1140                 goto whole;
1141         if (vma->vm_file == NULL)
1142                 return 0;
1143
1144         if (FILTER(MAPPED_PRIVATE))
1145                 goto whole;
1146
1147         /*
1148          * If this looks like the beginning of a DSO or executable mapping,
1149          * check for an ELF header.  If we find one, dump the first page to
1150          * aid in determining what was mapped here.
1151          */
1152         if (FILTER(ELF_HEADERS) &&
1153             vma->vm_pgoff == 0 && (vma->vm_flags & VM_READ)) {
1154                 u32 __user *header = (u32 __user *) vma->vm_start;
1155                 u32 word;
1156                 mm_segment_t fs = get_fs();
1157                 /*
1158                  * Doing it this way gets the constant folded by GCC.
1159                  */
1160                 union {
1161                         u32 cmp;
1162                         char elfmag[SELFMAG];
1163                 } magic;
1164                 BUILD_BUG_ON(SELFMAG != sizeof word);
1165                 magic.elfmag[EI_MAG0] = ELFMAG0;
1166                 magic.elfmag[EI_MAG1] = ELFMAG1;
1167                 magic.elfmag[EI_MAG2] = ELFMAG2;
1168                 magic.elfmag[EI_MAG3] = ELFMAG3;
1169                 /*
1170                  * Switch to the user "segment" for get_user(),
1171                  * then put back what elf_core_dump() had in place.
1172                  */
1173                 set_fs(USER_DS);
1174                 if (unlikely(get_user(word, header)))
1175                         word = 0;
1176                 set_fs(fs);
1177                 if (word == magic.cmp)
1178                         return PAGE_SIZE;
1179         }
1180
1181 #undef  FILTER
1182
1183         return 0;
1184
1185 whole:
1186         return vma->vm_end - vma->vm_start;
1187 }
1188
1189 /* An ELF note in memory */
1190 struct memelfnote
1191 {
1192         const char *name;
1193         int type;
1194         unsigned int datasz;
1195         void *data;
1196 };
1197
1198 static int notesize(struct memelfnote *en)
1199 {
1200         int sz;
1201
1202         sz = sizeof(struct elf_note);
1203         sz += roundup(strlen(en->name) + 1, 4);
1204         sz += roundup(en->datasz, 4);
1205
1206         return sz;
1207 }
1208
1209 #define DUMP_WRITE(addr, nr, foffset)   \
1210         do { if (!dump_write(file, (addr), (nr))) return 0; *foffset += (nr); } while(0)
1211
1212 static int alignfile(struct file *file, loff_t *foffset)
1213 {
1214         static const char buf[4] = { 0, };
1215         DUMP_WRITE(buf, roundup(*foffset, 4) - *foffset, foffset);
1216         return 1;
1217 }
1218
1219 static int writenote(struct memelfnote *men, struct file *file,
1220                         loff_t *foffset)
1221 {
1222         struct elf_note en;
1223         en.n_namesz = strlen(men->name) + 1;
1224         en.n_descsz = men->datasz;
1225         en.n_type = men->type;
1226
1227         DUMP_WRITE(&en, sizeof(en), foffset);
1228         DUMP_WRITE(men->name, en.n_namesz, foffset);
1229         if (!alignfile(file, foffset))
1230                 return 0;
1231         DUMP_WRITE(men->data, men->datasz, foffset);
1232         if (!alignfile(file, foffset))
1233                 return 0;
1234
1235         return 1;
1236 }
1237 #undef DUMP_WRITE
1238
1239 static void fill_elf_header(struct elfhdr *elf, int segs,
1240                             u16 machine, u32 flags, u8 osabi)
1241 {
1242         memset(elf, 0, sizeof(*elf));
1243
1244         memcpy(elf->e_ident, ELFMAG, SELFMAG);
1245         elf->e_ident[EI_CLASS] = ELF_CLASS;
1246         elf->e_ident[EI_DATA] = ELF_DATA;
1247         elf->e_ident[EI_VERSION] = EV_CURRENT;
1248         elf->e_ident[EI_OSABI] = ELF_OSABI;
1249
1250         elf->e_type = ET_CORE;
1251         elf->e_machine = machine;
1252         elf->e_version = EV_CURRENT;
1253         elf->e_phoff = sizeof(struct elfhdr);
1254         elf->e_flags = flags;
1255         elf->e_ehsize = sizeof(struct elfhdr);
1256         elf->e_phentsize = sizeof(struct elf_phdr);
1257         elf->e_phnum = segs;
1258
1259         return;
1260 }
1261
1262 static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset)
1263 {
1264         phdr->p_type = PT_NOTE;
1265         phdr->p_offset = offset;
1266         phdr->p_vaddr = 0;
1267         phdr->p_paddr = 0;
1268         phdr->p_filesz = sz;
1269         phdr->p_memsz = 0;
1270         phdr->p_flags = 0;
1271         phdr->p_align = 0;
1272         return;
1273 }
1274
1275 static void fill_note(struct memelfnote *note, const char *name, int type, 
1276                 unsigned int sz, void *data)
1277 {
1278         note->name = name;
1279         note->type = type;
1280         note->datasz = sz;
1281         note->data = data;
1282         return;
1283 }
1284
1285 /*
1286  * fill up all the fields in prstatus from the given task struct, except
1287  * registers which need to be filled up separately.
1288  */
1289 static void fill_prstatus(struct elf_prstatus *prstatus,
1290                 struct task_struct *p, long signr)
1291 {
1292         prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
1293         prstatus->pr_sigpend = p->pending.signal.sig[0];
1294         prstatus->pr_sighold = p->blocked.sig[0];
1295         rcu_read_lock();
1296         prstatus->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1297         rcu_read_unlock();
1298         prstatus->pr_pid = task_pid_vnr(p);
1299         prstatus->pr_pgrp = task_pgrp_vnr(p);
1300         prstatus->pr_sid = task_session_vnr(p);
1301         if (thread_group_leader(p)) {
1302                 struct task_cputime cputime;
1303
1304                 /*
1305                  * This is the record for the group leader.  It shows the
1306                  * group-wide total, not its individual thread total.
1307                  */
1308                 thread_group_cputime(p, &cputime);
1309                 cputime_to_timeval(cputime.utime, &prstatus->pr_utime);
1310                 cputime_to_timeval(cputime.stime, &prstatus->pr_stime);
1311         } else {
1312                 cputime_to_timeval(p->utime, &prstatus->pr_utime);
1313                 cputime_to_timeval(p->stime, &prstatus->pr_stime);
1314         }
1315         cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime);
1316         cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime);
1317 }
1318
1319 static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1320                        struct mm_struct *mm)
1321 {
1322         const struct cred *cred;
1323         unsigned int i, len;
1324         
1325         /* first copy the parameters from user space */
1326         memset(psinfo, 0, sizeof(struct elf_prpsinfo));
1327
1328         len = mm->arg_end - mm->arg_start;
1329         if (len >= ELF_PRARGSZ)
1330                 len = ELF_PRARGSZ-1;
1331         if (copy_from_user(&psinfo->pr_psargs,
1332                            (const char __user *)mm->arg_start, len))
1333                 return -EFAULT;
1334         for(i = 0; i < len; i++)
1335                 if (psinfo->pr_psargs[i] == 0)
1336                         psinfo->pr_psargs[i] = ' ';
1337         psinfo->pr_psargs[len] = 0;
1338
1339         rcu_read_lock();
1340         psinfo->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1341         rcu_read_unlock();
1342         psinfo->pr_pid = task_pid_vnr(p);
1343         psinfo->pr_pgrp = task_pgrp_vnr(p);
1344         psinfo->pr_sid = task_session_vnr(p);
1345
1346         i = p->state ? ffz(~p->state) + 1 : 0;
1347         psinfo->pr_state = i;
1348         psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i];
1349         psinfo->pr_zomb = psinfo->pr_sname == 'Z';
1350         psinfo->pr_nice = task_nice(p);
1351         psinfo->pr_flag = p->flags;
1352         rcu_read_lock();
1353         cred = __task_cred(p);
1354         SET_UID(psinfo->pr_uid, cred->uid);
1355         SET_GID(psinfo->pr_gid, cred->gid);
1356         rcu_read_unlock();
1357         strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
1358         
1359         return 0;
1360 }
1361
1362 static void fill_auxv_note(struct memelfnote *note, struct mm_struct *mm)
1363 {
1364         elf_addr_t *auxv = (elf_addr_t *) mm->saved_auxv;
1365         int i = 0;
1366         do
1367                 i += 2;
1368         while (auxv[i - 2] != AT_NULL);
1369         fill_note(note, "CORE", NT_AUXV, i * sizeof(elf_addr_t), auxv);
1370 }
1371
1372 #ifdef CORE_DUMP_USE_REGSET
1373 #include <linux/regset.h>
1374
1375 struct elf_thread_core_info {
1376         struct elf_thread_core_info *next;
1377         struct task_struct *task;
1378         struct elf_prstatus prstatus;
1379         struct memelfnote notes[0];
1380 };
1381
1382 struct elf_note_info {
1383         struct elf_thread_core_info *thread;
1384         struct memelfnote psinfo;
1385         struct memelfnote auxv;
1386         size_t size;
1387         int thread_notes;
1388 };
1389
1390 /*
1391  * When a regset has a writeback hook, we call it on each thread before
1392  * dumping user memory.  On register window machines, this makes sure the
1393  * user memory backing the register data is up to date before we read it.
1394  */
1395 static void do_thread_regset_writeback(struct task_struct *task,
1396                                        const struct user_regset *regset)
1397 {
1398         if (regset->writeback)
1399                 regset->writeback(task, regset, 1);
1400 }
1401
1402 static int fill_thread_core_info(struct elf_thread_core_info *t,
1403                                  const struct user_regset_view *view,
1404                                  long signr, size_t *total)
1405 {
1406         unsigned int i;
1407
1408         /*
1409          * NT_PRSTATUS is the one special case, because the regset data
1410          * goes into the pr_reg field inside the note contents, rather
1411          * than being the whole note contents.  We fill the reset in here.
1412          * We assume that regset 0 is NT_PRSTATUS.
1413          */
1414         fill_prstatus(&t->prstatus, t->task, signr);
1415         (void) view->regsets[0].get(t->task, &view->regsets[0],
1416                                     0, sizeof(t->prstatus.pr_reg),
1417                                     &t->prstatus.pr_reg, NULL);
1418
1419         fill_note(&t->notes[0], "CORE", NT_PRSTATUS,
1420                   sizeof(t->prstatus), &t->prstatus);
1421         *total += notesize(&t->notes[0]);
1422
1423         do_thread_regset_writeback(t->task, &view->regsets[0]);
1424
1425         /*
1426          * Each other regset might generate a note too.  For each regset
1427          * that has no core_note_type or is inactive, we leave t->notes[i]
1428          * all zero and we'll know to skip writing it later.
1429          */
1430         for (i = 1; i < view->n; ++i) {
1431                 const struct user_regset *regset = &view->regsets[i];
1432                 do_thread_regset_writeback(t->task, regset);
1433                 if (regset->core_note_type && regset->get &&
1434                     (!regset->active || regset->active(t->task, regset))) {
1435                         int ret;
1436                         size_t size = regset->n * regset->size;
1437                         void *data = kmalloc(size, GFP_KERNEL);
1438                         if (unlikely(!data))
1439                                 return 0;
1440                         ret = regset->get(t->task, regset,
1441                                           0, size, data, NULL);
1442                         if (unlikely(ret))
1443                                 kfree(data);
1444                         else {
1445                                 if (regset->core_note_type != NT_PRFPREG)
1446                                         fill_note(&t->notes[i], "LINUX",
1447                                                   regset->core_note_type,
1448                                                   size, data);
1449                                 else {
1450                                         t->prstatus.pr_fpvalid = 1;
1451                                         fill_note(&t->notes[i], "CORE",
1452                                                   NT_PRFPREG, size, data);
1453                                 }
1454                                 *total += notesize(&t->notes[i]);
1455                         }
1456                 }
1457         }
1458
1459         return 1;
1460 }
1461
1462 static int fill_note_info(struct elfhdr *elf, int phdrs,
1463                           struct elf_note_info *info,
1464                           long signr, struct pt_regs *regs)
1465 {
1466         struct task_struct *dump_task = current;
1467         const struct user_regset_view *view = task_user_regset_view(dump_task);
1468         struct elf_thread_core_info *t;
1469         struct elf_prpsinfo *psinfo;
1470         struct core_thread *ct;
1471         unsigned int i;
1472
1473         info->size = 0;
1474         info->thread = NULL;
1475
1476         psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
1477         if (psinfo == NULL)
1478                 return 0;
1479
1480         fill_note(&info->psinfo, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
1481
1482         /*
1483          * Figure out how many notes we're going to need for each thread.
1484          */
1485         info->thread_notes = 0;
1486         for (i = 0; i < view->n; ++i)
1487                 if (view->regsets[i].core_note_type != 0)
1488                         ++info->thread_notes;
1489
1490         /*
1491          * Sanity check.  We rely on regset 0 being in NT_PRSTATUS,
1492          * since it is our one special case.
1493          */
1494         if (unlikely(info->thread_notes == 0) ||
1495             unlikely(view->regsets[0].core_note_type != NT_PRSTATUS)) {
1496                 WARN_ON(1);
1497                 return 0;
1498         }
1499
1500         /*
1501          * Initialize the ELF file header.
1502          */
1503         fill_elf_header(elf, phdrs,
1504                         view->e_machine, view->e_flags, view->ei_osabi);
1505
1506         /*
1507          * Allocate a structure for each thread.
1508          */
1509         for (ct = &dump_task->mm->core_state->dumper; ct; ct = ct->next) {
1510                 t = kzalloc(offsetof(struct elf_thread_core_info,
1511                                      notes[info->thread_notes]),
1512                             GFP_KERNEL);
1513                 if (unlikely(!t))
1514                         return 0;
1515
1516                 t->task = ct->task;
1517                 if (ct->task == dump_task || !info->thread) {
1518                         t->next = info->thread;
1519                         info->thread = t;
1520                 } else {
1521                         /*
1522                          * Make sure to keep the original task at
1523                          * the head of the list.
1524                          */
1525                         t->next = info->thread->next;
1526                         info->thread->next = t;
1527                 }
1528         }
1529
1530         /*
1531          * Now fill in each thread's information.
1532          */
1533         for (t = info->thread; t != NULL; t = t->next)
1534                 if (!fill_thread_core_info(t, view, signr, &info->size))
1535                         return 0;
1536
1537         /*
1538          * Fill in the two process-wide notes.
1539          */
1540         fill_psinfo(psinfo, dump_task->group_leader, dump_task->mm);
1541         info->size += notesize(&info->psinfo);
1542
1543         fill_auxv_note(&info->auxv, current->mm);
1544         info->size += notesize(&info->auxv);
1545
1546         return 1;
1547 }
1548
1549 static size_t get_note_info_size(struct elf_note_info *info)
1550 {
1551         return info->size;
1552 }
1553
1554 /*
1555  * Write all the notes for each thread.  When writing the first thread, the
1556  * process-wide notes are interleaved after the first thread-specific note.
1557  */
1558 static int write_note_info(struct elf_note_info *info,
1559                            struct file *file, loff_t *foffset)
1560 {
1561         bool first = 1;
1562         struct elf_thread_core_info *t = info->thread;
1563
1564         do {
1565                 int i;
1566
1567                 if (!writenote(&t->notes[0], file, foffset))
1568                         return 0;
1569
1570                 if (first && !writenote(&info->psinfo, file, foffset))
1571                         return 0;
1572                 if (first && !writenote(&info->auxv, file, foffset))
1573                         return 0;
1574
1575                 for (i = 1; i < info->thread_notes; ++i)
1576                         if (t->notes[i].data &&
1577                             !writenote(&t->notes[i], file, foffset))
1578                                 return 0;
1579
1580                 first = 0;
1581                 t = t->next;
1582         } while (t);
1583
1584         return 1;
1585 }
1586
1587 static void free_note_info(struct elf_note_info *info)
1588 {
1589         struct elf_thread_core_info *threads = info->thread;
1590         while (threads) {
1591                 unsigned int i;
1592                 struct elf_thread_core_info *t = threads;
1593                 threads = t->next;
1594                 WARN_ON(t->notes[0].data && t->notes[0].data != &t->prstatus);
1595                 for (i = 1; i < info->thread_notes; ++i)
1596                         kfree(t->notes[i].data);
1597                 kfree(t);
1598         }
1599         kfree(info->psinfo.data);
1600 }
1601
1602 #else
1603
1604 /* Here is the structure in which status of each thread is captured. */
1605 struct elf_thread_status
1606 {
1607         struct list_head list;
1608         struct elf_prstatus prstatus;   /* NT_PRSTATUS */
1609         elf_fpregset_t fpu;             /* NT_PRFPREG */
1610         struct task_struct *thread;
1611 #ifdef ELF_CORE_COPY_XFPREGS
1612         elf_fpxregset_t xfpu;           /* ELF_CORE_XFPREG_TYPE */
1613 #endif
1614         struct memelfnote notes[3];
1615         int num_notes;
1616 };
1617
1618 /*
1619  * In order to add the specific thread information for the elf file format,
1620  * we need to keep a linked list of every threads pr_status and then create
1621  * a single section for them in the final core file.
1622  */
1623 static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
1624 {
1625         int sz = 0;
1626         struct task_struct *p = t->thread;
1627         t->num_notes = 0;
1628
1629         fill_prstatus(&t->prstatus, p, signr);
1630         elf_core_copy_task_regs(p, &t->prstatus.pr_reg);        
1631         
1632         fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus),
1633                   &(t->prstatus));
1634         t->num_notes++;
1635         sz += notesize(&t->notes[0]);
1636
1637         if ((t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, NULL,
1638                                                                 &t->fpu))) {
1639                 fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu),
1640                           &(t->fpu));
1641                 t->num_notes++;
1642                 sz += notesize(&t->notes[1]);
1643         }
1644
1645 #ifdef ELF_CORE_COPY_XFPREGS
1646         if (elf_core_copy_task_xfpregs(p, &t->xfpu)) {
1647                 fill_note(&t->notes[2], "LINUX", ELF_CORE_XFPREG_TYPE,
1648                           sizeof(t->xfpu), &t->xfpu);
1649                 t->num_notes++;
1650                 sz += notesize(&t->notes[2]);
1651         }
1652 #endif  
1653         return sz;
1654 }
1655
1656 struct elf_note_info {
1657         struct memelfnote *notes;
1658         struct elf_prstatus *prstatus;  /* NT_PRSTATUS */
1659         struct elf_prpsinfo *psinfo;    /* NT_PRPSINFO */
1660         struct list_head thread_list;
1661         elf_fpregset_t *fpu;
1662 #ifdef ELF_CORE_COPY_XFPREGS
1663         elf_fpxregset_t *xfpu;
1664 #endif
1665         int thread_status_size;
1666         int numnote;
1667 };
1668
1669 static int elf_note_info_init(struct elf_note_info *info)
1670 {
1671         memset(info, 0, sizeof(*info));
1672         INIT_LIST_HEAD(&info->thread_list);
1673
1674         /* Allocate space for six ELF notes */
1675         info->notes = kmalloc(6 * sizeof(struct memelfnote), GFP_KERNEL);
1676         if (!info->notes)
1677                 return 0;
1678         info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL);
1679         if (!info->psinfo)
1680                 goto notes_free;
1681         info->prstatus = kmalloc(sizeof(*info->prstatus), GFP_KERNEL);
1682         if (!info->prstatus)
1683                 goto psinfo_free;
1684         info->fpu = kmalloc(sizeof(*info->fpu), GFP_KERNEL);
1685         if (!info->fpu)
1686                 goto prstatus_free;
1687 #ifdef ELF_CORE_COPY_XFPREGS
1688         info->xfpu = kmalloc(sizeof(*info->xfpu), GFP_KERNEL);
1689         if (!info->xfpu)
1690                 goto fpu_free;
1691 #endif
1692         return 1;
1693 #ifdef ELF_CORE_COPY_XFPREGS
1694  fpu_free:
1695         kfree(info->fpu);
1696 #endif
1697  prstatus_free:
1698         kfree(info->prstatus);
1699  psinfo_free:
1700         kfree(info->psinfo);
1701  notes_free:
1702         kfree(info->notes);
1703         return 0;
1704 }
1705
1706 static int fill_note_info(struct elfhdr *elf, int phdrs,
1707                           struct elf_note_info *info,
1708                           long signr, struct pt_regs *regs)
1709 {
1710         struct list_head *t;
1711
1712         if (!elf_note_info_init(info))
1713                 return 0;
1714
1715         if (signr) {
1716                 struct core_thread *ct;
1717                 struct elf_thread_status *ets;
1718
1719                 for (ct = current->mm->core_state->dumper.next;
1720                                                 ct; ct = ct->next) {
1721                         ets = kzalloc(sizeof(*ets), GFP_KERNEL);
1722                         if (!ets)
1723                                 return 0;
1724
1725                         ets->thread = ct->task;
1726                         list_add(&ets->list, &info->thread_list);
1727                 }
1728
1729                 list_for_each(t, &info->thread_list) {
1730                         int sz;
1731
1732                         ets = list_entry(t, struct elf_thread_status, list);
1733                         sz = elf_dump_thread_status(signr, ets);
1734                         info->thread_status_size += sz;
1735                 }
1736         }
1737         /* now collect the dump for the current */
1738         memset(info->prstatus, 0, sizeof(*info->prstatus));
1739         fill_prstatus(info->prstatus, current, signr);
1740         elf_core_copy_regs(&info->prstatus->pr_reg, regs);
1741
1742         /* Set up header */
1743         fill_elf_header(elf, phdrs, ELF_ARCH, ELF_CORE_EFLAGS, ELF_OSABI);
1744
1745         /*
1746          * Set up the notes in similar form to SVR4 core dumps made
1747          * with info from their /proc.
1748          */
1749
1750         fill_note(info->notes + 0, "CORE", NT_PRSTATUS,
1751                   sizeof(*info->prstatus), info->prstatus);
1752         fill_psinfo(info->psinfo, current->group_leader, current->mm);
1753         fill_note(info->notes + 1, "CORE", NT_PRPSINFO,
1754                   sizeof(*info->psinfo), info->psinfo);
1755
1756         info->numnote = 2;
1757
1758         fill_auxv_note(&info->notes[info->numnote++], current->mm);
1759
1760         /* Try to dump the FPU. */
1761         info->prstatus->pr_fpvalid = elf_core_copy_task_fpregs(current, regs,
1762                                                                info->fpu);
1763         if (info->prstatus->pr_fpvalid)
1764                 fill_note(info->notes + info->numnote++,
1765                           "CORE", NT_PRFPREG, sizeof(*info->fpu), info->fpu);
1766 #ifdef ELF_CORE_COPY_XFPREGS
1767         if (elf_core_copy_task_xfpregs(current, info->xfpu))
1768                 fill_note(info->notes + info->numnote++,
1769                           "LINUX", ELF_CORE_XFPREG_TYPE,
1770                           sizeof(*info->xfpu), info->xfpu);
1771 #endif
1772
1773         return 1;
1774 }
1775
1776 static size_t get_note_info_size(struct elf_note_info *info)
1777 {
1778         int sz = 0;
1779         int i;
1780
1781         for (i = 0; i < info->numnote; i++)
1782                 sz += notesize(info->notes + i);
1783
1784         sz += info->thread_status_size;
1785
1786         return sz;
1787 }
1788
1789 static int write_note_info(struct elf_note_info *info,
1790                            struct file *file, loff_t *foffset)
1791 {
1792         int i;
1793         struct list_head *t;
1794
1795         for (i = 0; i < info->numnote; i++)
1796                 if (!writenote(info->notes + i, file, foffset))
1797                         return 0;
1798
1799         /* write out the thread status notes section */
1800         list_for_each(t, &info->thread_list) {
1801                 struct elf_thread_status *tmp =
1802                                 list_entry(t, struct elf_thread_status, list);
1803
1804                 for (i = 0; i < tmp->num_notes; i++)
1805                         if (!writenote(&tmp->notes[i], file, foffset))
1806                                 return 0;
1807         }
1808
1809         return 1;
1810 }
1811
1812 static void free_note_info(struct elf_note_info *info)
1813 {
1814         while (!list_empty(&info->thread_list)) {
1815                 struct list_head *tmp = info->thread_list.next;
1816                 list_del(tmp);
1817                 kfree(list_entry(tmp, struct elf_thread_status, list));
1818         }
1819
1820         kfree(info->prstatus);
1821         kfree(info->psinfo);
1822         kfree(info->notes);
1823         kfree(info->fpu);
1824 #ifdef ELF_CORE_COPY_XFPREGS
1825         kfree(info->xfpu);
1826 #endif
1827 }
1828
1829 #endif
1830
1831 static struct vm_area_struct *first_vma(struct task_struct *tsk,
1832                                         struct vm_area_struct *gate_vma)
1833 {
1834         struct vm_area_struct *ret = tsk->mm->mmap;
1835
1836         if (ret)
1837                 return ret;
1838         return gate_vma;
1839 }
1840 /*
1841  * Helper function for iterating across a vma list.  It ensures that the caller
1842  * will visit `gate_vma' prior to terminating the search.
1843  */
1844 static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
1845                                         struct vm_area_struct *gate_vma)
1846 {
1847         struct vm_area_struct *ret;
1848
1849         ret = this_vma->vm_next;
1850         if (ret)
1851                 return ret;
1852         if (this_vma == gate_vma)
1853                 return NULL;
1854         return gate_vma;
1855 }
1856
1857 static void fill_extnum_info(struct elfhdr *elf, struct elf_shdr *shdr4extnum,
1858                              elf_addr_t e_shoff, int segs)
1859 {
1860         elf->e_shoff = e_shoff;
1861         elf->e_shentsize = sizeof(*shdr4extnum);
1862         elf->e_shnum = 1;
1863         elf->e_shstrndx = SHN_UNDEF;
1864
1865         memset(shdr4extnum, 0, sizeof(*shdr4extnum));
1866
1867         shdr4extnum->sh_type = SHT_NULL;
1868         shdr4extnum->sh_size = elf->e_shnum;
1869         shdr4extnum->sh_link = elf->e_shstrndx;
1870         shdr4extnum->sh_info = segs;
1871 }
1872
1873 static size_t elf_core_vma_data_size(struct vm_area_struct *gate_vma,
1874                                      unsigned long mm_flags)
1875 {
1876         struct vm_area_struct *vma;
1877         size_t size = 0;
1878
1879         for (vma = first_vma(current, gate_vma); vma != NULL;
1880              vma = next_vma(vma, gate_vma))
1881                 size += vma_dump_size(vma, mm_flags);
1882         return size;
1883 }
1884
1885 /*
1886  * Actual dumper
1887  *
1888  * This is a two-pass process; first we find the offsets of the bits,
1889  * and then they are actually written out.  If we run out of core limit
1890  * we just truncate.
1891  */
1892 static int elf_core_dump(struct coredump_params *cprm)
1893 {
1894         int has_dumped = 0;
1895         mm_segment_t fs;
1896         int segs;
1897         size_t size = 0;
1898         struct vm_area_struct *vma, *gate_vma;
1899         struct elfhdr *elf = NULL;
1900         loff_t offset = 0, dataoff, foffset;
1901         struct elf_note_info info;
1902         struct elf_phdr *phdr4note = NULL;
1903         struct elf_shdr *shdr4extnum = NULL;
1904         Elf_Half e_phnum;
1905         elf_addr_t e_shoff;
1906
1907         /*
1908          * We no longer stop all VM operations.
1909          * 
1910          * This is because those proceses that could possibly change map_count
1911          * or the mmap / vma pages are now blocked in do_exit on current
1912          * finishing this core dump.
1913          *
1914          * Only ptrace can touch these memory addresses, but it doesn't change
1915          * the map_count or the pages allocated. So no possibility of crashing
1916          * exists while dumping the mm->vm_next areas to the core file.
1917          */
1918   
1919         /* alloc memory for large data structures: too large to be on stack */
1920         elf = kmalloc(sizeof(*elf), GFP_KERNEL);
1921         if (!elf)
1922                 goto out;
1923         /*
1924          * The number of segs are recored into ELF header as 16bit value.
1925          * Please check DEFAULT_MAX_MAP_COUNT definition when you modify here.
1926          */
1927         segs = current->mm->map_count;
1928         segs += elf_core_extra_phdrs();
1929
1930         gate_vma = get_gate_vma(current->mm);
1931         if (gate_vma != NULL)
1932                 segs++;
1933
1934         /* for notes section */
1935         segs++;
1936
1937         /* If segs > PN_XNUM(0xffff), then e_phnum overflows. To avoid
1938          * this, kernel supports extended numbering. Have a look at
1939          * include/linux/elf.h for further information. */
1940         e_phnum = segs > PN_XNUM ? PN_XNUM : segs;
1941
1942         /*
1943          * Collect all the non-memory information about the process for the
1944          * notes.  This also sets up the file header.
1945          */
1946         if (!fill_note_info(elf, e_phnum, &info, cprm->signr, cprm->regs))
1947                 goto cleanup;
1948
1949         has_dumped = 1;
1950         current->flags |= PF_DUMPCORE;
1951   
1952         fs = get_fs();
1953         set_fs(KERNEL_DS);
1954
1955         offset += sizeof(*elf);                         /* Elf header */
1956         offset += segs * sizeof(struct elf_phdr);       /* Program headers */
1957         foffset = offset;
1958
1959         /* Write notes phdr entry */
1960         {
1961                 size_t sz = get_note_info_size(&info);
1962
1963                 sz += elf_coredump_extra_notes_size();
1964
1965                 phdr4note = kmalloc(sizeof(*phdr4note), GFP_KERNEL);
1966                 if (!phdr4note)
1967                         goto end_coredump;
1968
1969                 fill_elf_note_phdr(phdr4note, sz, offset);
1970                 offset += sz;
1971         }
1972
1973         dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
1974
1975         offset += elf_core_vma_data_size(gate_vma, cprm->mm_flags);
1976         offset += elf_core_extra_data_size();
1977         e_shoff = offset;
1978
1979         if (e_phnum == PN_XNUM) {
1980                 shdr4extnum = kmalloc(sizeof(*shdr4extnum), GFP_KERNEL);
1981                 if (!shdr4extnum)
1982                         goto end_coredump;
1983                 fill_extnum_info(elf, shdr4extnum, e_shoff, segs);
1984         }
1985
1986         offset = dataoff;
1987
1988         size += sizeof(*elf);
1989         if (size > cprm->limit || !dump_write(cprm->file, elf, sizeof(*elf)))
1990                 goto end_coredump;
1991
1992         size += sizeof(*phdr4note);
1993         if (size > cprm->limit
1994             || !dump_write(cprm->file, phdr4note, sizeof(*phdr4note)))
1995                 goto end_coredump;
1996
1997         /* Write program headers for segments dump */
1998         for (vma = first_vma(current, gate_vma); vma != NULL;
1999                         vma = next_vma(vma, gate_vma)) {
2000                 struct elf_phdr phdr;
2001
2002                 phdr.p_type = PT_LOAD;
2003                 phdr.p_offset = offset;
2004                 phdr.p_vaddr = vma->vm_start;
2005                 phdr.p_paddr = 0;
2006                 phdr.p_filesz = vma_dump_size(vma, cprm->mm_flags);
2007                 phdr.p_memsz = vma->vm_end - vma->vm_start;
2008                 offset += phdr.p_filesz;
2009                 phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0;
2010                 if (vma->vm_flags & VM_WRITE)
2011                         phdr.p_flags |= PF_W;
2012                 if (vma->vm_flags & VM_EXEC)
2013                         phdr.p_flags |= PF_X;
2014                 phdr.p_align = ELF_EXEC_PAGESIZE;
2015
2016                 size += sizeof(phdr);
2017                 if (size > cprm->limit
2018                     || !dump_write(cprm->file, &phdr, sizeof(phdr)))
2019                         goto end_coredump;
2020         }
2021
2022         if (!elf_core_write_extra_phdrs(cprm->file, offset, &size, cprm->limit))
2023                 goto end_coredump;
2024
2025         /* write out the notes section */
2026         if (!write_note_info(&info, cprm->file, &foffset))
2027                 goto end_coredump;
2028
2029         if (elf_coredump_extra_notes_write(cprm->file, &foffset))
2030                 goto end_coredump;
2031
2032         /* Align to page */
2033         if (!dump_seek(cprm->file, dataoff - foffset))
2034                 goto end_coredump;
2035
2036         for (vma = first_vma(current, gate_vma); vma != NULL;
2037                         vma = next_vma(vma, gate_vma)) {
2038                 unsigned long addr;
2039                 unsigned long end;
2040
2041                 end = vma->vm_start + vma_dump_size(vma, cprm->mm_flags);
2042
2043                 for (addr = vma->vm_start; addr < end; addr += PAGE_SIZE) {
2044                         struct page *page;
2045                         int stop;
2046
2047                         page = get_dump_page(addr);
2048                         if (page) {
2049                                 void *kaddr = kmap(page);
2050                                 stop = ((size += PAGE_SIZE) > cprm->limit) ||
2051                                         !dump_write(cprm->file, kaddr,
2052                                                     PAGE_SIZE);
2053                                 kunmap(page);
2054                                 page_cache_release(page);
2055                         } else
2056                                 stop = !dump_seek(cprm->file, PAGE_SIZE);
2057                         if (stop)
2058                                 goto end_coredump;
2059                 }
2060         }
2061
2062         if (!elf_core_write_extra_data(cprm->file, &size, cprm->limit))
2063                 goto end_coredump;
2064
2065         if (e_phnum == PN_XNUM) {
2066                 size += sizeof(*shdr4extnum);
2067                 if (size > cprm->limit
2068                     || !dump_write(cprm->file, shdr4extnum,
2069                                    sizeof(*shdr4extnum)))
2070                         goto end_coredump;
2071         }
2072
2073 end_coredump:
2074         set_fs(fs);
2075
2076 cleanup:
2077         free_note_info(&info);
2078         kfree(shdr4extnum);
2079         kfree(phdr4note);
2080         kfree(elf);
2081 out:
2082         return has_dumped;
2083 }
2084
2085 #endif          /* CONFIG_ELF_CORE */
2086
2087 static int __init init_elf_binfmt(void)
2088 {
2089         return register_binfmt(&elf_format);
2090 }
2091
2092 static void __exit exit_elf_binfmt(void)
2093 {
2094         /* Remove the COFF and ELF loaders. */
2095         unregister_binfmt(&elf_format);
2096 }
2097
2098 core_initcall(init_elf_binfmt);
2099 module_exit(exit_elf_binfmt);
2100 MODULE_LICENSE("GPL");