#include <linux/pid_namespace.h>
#include <linux/module.h>
#include <linux/namei.h>
-#include <linux/proc_fs.h>
#include <linux/mount.h>
#include <linux/security.h>
#include <linux/syscalls.h>
#include <asm/uaccess.h>
#include <asm/mmu_context.h>
#include <asm/tlb.h>
+#include <asm/exec.h>
+
+#include <trace/events/task.h>
#include "internal.h"
+#include <trace/events/sched.h>
+
int core_uses_pid;
char core_pattern[CORENAME_MAX_SIZE] = "core";
unsigned int core_pipe_limit;
static LIST_HEAD(formats);
static DEFINE_RWLOCK(binfmt_lock);
-int __register_binfmt(struct linux_binfmt * fmt, int insert)
+void __register_binfmt(struct linux_binfmt * fmt, int insert)
{
- if (!fmt)
- return -EINVAL;
+ BUG_ON(!fmt);
write_lock(&binfmt_lock);
insert ? list_add(&fmt->lh, &formats) :
list_add_tail(&fmt->lh, &formats);
write_unlock(&binfmt_lock);
- return 0;
}
EXPORT_SYMBOL(__register_binfmt);
return;
bprm->vma_pages = pages;
-
-#ifdef SPLIT_RSS_COUNTING
add_mm_counter(mm, MM_ANONPAGES, diff);
-#else
- spin_lock(&mm->page_table_lock);
- add_mm_counter(mm, MM_ANONPAGES, diff);
- spin_unlock(&mm->page_table_lock);
-#endif
}
static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
#ifdef CONFIG_STACK_GROWSUP
if (write) {
- ret = expand_stack_downwards(bprm->vma, pos);
+ ret = expand_downwards(bprm->vma, pos);
if (ret < 0)
return NULL;
}
* use STACK_TOP because that can depend on attributes which aren't
* configured yet.
*/
- BUG_ON(VM_STACK_FLAGS & VM_STACK_INCOMPLETE_SETUP);
+ BUILD_BUG_ON(VM_STACK_FLAGS & VM_STACK_INCOMPLETE_SETUP);
vma->vm_end = STACK_TOP_MAX;
vma->vm_start = vma->vm_end - PAGE_SIZE;
vma->vm_flags = VM_STACK_FLAGS | VM_STACK_INCOMPLETE_SETUP;
unsigned long length = old_end - old_start;
unsigned long new_start = old_start - shift;
unsigned long new_end = old_end - shift;
- struct mmu_gather *tlb;
+ struct mmu_gather tlb;
BUG_ON(new_start > new_end);
return -ENOMEM;
lru_add_drain();
- tlb = tlb_gather_mmu(mm, 0);
+ tlb_gather_mmu(&tlb, mm, 0);
if (new_end > old_start) {
/*
* when the old and new regions overlap clear from new_end.
*/
- free_pgd_range(tlb, new_end, old_end, new_end,
+ free_pgd_range(&tlb, new_end, old_end, new_end,
vma->vm_next ? vma->vm_next->vm_start : 0);
} else {
/*
* have constraints on va-space that make this illegal (IA64) -
* for the others its just a little faster.
*/
- free_pgd_range(tlb, old_start, old_end, new_end,
+ free_pgd_range(&tlb, old_start, old_end, new_end,
vma->vm_next ? vma->vm_next->vm_start : 0);
}
- tlb_finish_mmu(tlb, new_end, old_end);
+ tlb_finish_mmu(&tlb, new_end, old_end);
/*
* Shrink the vma to just the new range. Always succeeds.
/* Notify parent that we're no longer interested in the old VM */
tsk = current;
old_mm = current->mm;
- sync_mm_rss(tsk, old_mm);
+ sync_mm_rss(old_mm);
mm_release(tsk, old_mm);
if (old_mm) {
tsk->mm = mm;
tsk->active_mm = mm;
activate_mm(active_mm, mm);
- if (old_mm && tsk->signal->oom_score_adj == OOM_SCORE_ADJ_MIN) {
- atomic_dec(&old_mm->oom_disable_count);
- atomic_inc(&tsk->mm->oom_disable_count);
- }
task_unlock(tsk);
arch_pick_mmap_layout(mm);
if (old_mm) {
up_read(&old_mm->mmap_sem);
BUG_ON(active_mm != old_mm);
+ setmax_mm_hiwater_rss(&tsk->signal->maxrss, old_mm);
mm_update_next_owner(old_mm);
mmput(old_mm);
return 0;
leader->group_leader = tsk;
tsk->exit_signal = SIGCHLD;
+ leader->exit_signal = -1;
BUG_ON(leader->exit_state != EXIT_ZOMBIE);
leader->exit_state = EXIT_DEAD;
+
+ /*
+ * We are going to release_task()->ptrace_unlink() silently,
+ * the tracer can sleep in do_wait(). EXIT_DEAD guarantees
+ * the tracer wont't block again waiting for this thread.
+ */
+ if (unlikely(leader->ptrace))
+ __wake_up_parent(leader, leader->parent);
write_unlock_irq(&tasklist_lock);
release_task(leader);
sig->notify_count = 0;
no_thread_group:
- if (current->mm)
- setmax_mm_hiwater_rss(&sig->maxrss, current->mm);
+ /* we have changed execution domain */
+ tsk->exit_signal = SIGCHLD;
exit_itimers(sig);
flush_itimer_signals();
fdt = files_fdtable(files);
if (i >= fdt->max_fds)
break;
- set = fdt->close_on_exec->fds_bits[j];
+ set = fdt->close_on_exec[j];
if (!set)
continue;
- fdt->close_on_exec->fds_bits[j] = 0;
+ fdt->close_on_exec[j] = 0;
spin_unlock(&files->file_lock);
for ( ; set ; i++,set >>= 1) {
if (set & 1) {
task_unlock(tsk);
return buf;
}
+EXPORT_SYMBOL_GPL(get_task_comm);
void set_task_comm(struct task_struct *tsk, char *buf)
{
task_lock(tsk);
+ trace_task_rename(tsk, buf);
+
/*
* Threads may access current->comm without holding
* the task lock, so write the string carefully.
perf_event_comm(tsk);
}
+static void filename_to_taskname(char *tcomm, const char *fn, unsigned int len)
+{
+ int i, ch;
+
+ /* Copies the binary name from after last slash */
+ for (i = 0; (ch = *(fn++)) != '\0';) {
+ if (ch == '/')
+ i = 0; /* overwrite what we wrote */
+ else
+ if (i < len - 1)
+ tcomm[i++] = ch;
+ }
+ tcomm[i] = '\0';
+}
+
int flush_old_exec(struct linux_binprm * bprm)
{
int retval;
set_mm_exe_file(bprm->mm, bprm->file);
+ filename_to_taskname(bprm->tcomm, bprm->filename, sizeof(bprm->tcomm));
/*
* Release all of the old mmap stuff
*/
bprm->mm = NULL; /* We're using it now */
- current->flags &= ~(PF_RANDOMIZE | PF_KTHREAD);
+ set_fs(USER_DS);
+ current->flags &= ~(PF_RANDOMIZE | PF_FORKNOEXEC | PF_KTHREAD);
flush_thread();
current->personality &= ~bprm->per_clear;
}
EXPORT_SYMBOL(flush_old_exec);
-void setup_new_exec(struct linux_binprm * bprm)
+void would_dump(struct linux_binprm *bprm, struct file *file)
{
- int i, ch;
- const char *name;
- char tcomm[sizeof(current->comm)];
+ if (inode_permission(file->f_path.dentry->d_inode, MAY_READ) < 0)
+ bprm->interp_flags |= BINPRM_FLAGS_ENFORCE_NONDUMP;
+}
+EXPORT_SYMBOL(would_dump);
+void setup_new_exec(struct linux_binprm * bprm)
+{
arch_pick_mmap_layout(current->mm);
/* This is the point of no return */
else
set_dumpable(current->mm, suid_dumpable);
- name = bprm->filename;
-
- /* Copies the binary name from after last slash */
- for (i=0; (ch = *(name++)) != '\0';) {
- if (ch == '/')
- i = 0; /* overwrite what we wrote */
- else
- if (i < (sizeof(tcomm) - 1))
- tcomm[i++] = ch;
- }
- tcomm[i] = '\0';
- set_task_comm(current, tcomm);
+ set_task_comm(current, bprm->tcomm);
/* Set the new mm task size. We have to do that late because it may
* depend on TIF_32BIT which is only updated in flush_thread() on
if (bprm->cred->uid != current_euid() ||
bprm->cred->gid != current_egid()) {
current->pdeath_signal = 0;
- } else if (file_permission(bprm->file, MAY_READ) ||
- bprm->interp_flags & BINPRM_FLAGS_ENFORCE_NONDUMP) {
- set_dumpable(current->mm, suid_dumpable);
+ } else {
+ would_dump(bprm, bprm->file);
+ if (bprm->interp_flags & BINPRM_FLAGS_ENFORCE_NONDUMP)
+ set_dumpable(current->mm, suid_dumpable);
}
/*
* - the caller must hold ->cred_guard_mutex to protect against
* PTRACE_ATTACH
*/
-int check_unsafe_exec(struct linux_binprm *bprm)
+static int check_unsafe_exec(struct linux_binprm *bprm)
{
struct task_struct *p = current, *t;
unsigned n_fs;
int res = 0;
- bprm->unsafe = tracehook_unsafe_exec(p);
+ if (p->ptrace) {
+ if (p->ptrace & PT_PTRACE_CAP)
+ bprm->unsafe |= LSM_UNSAFE_PTRACE_CAP;
+ else
+ bprm->unsafe |= LSM_UNSAFE_PTRACE;
+ }
n_fs = 1;
spin_lock(&p->fs->lock);
ret = -EFAULT;
goto out;
}
- kaddr = kmap_atomic(page, KM_USER0);
+ kaddr = kmap_atomic(page);
for (; offset < PAGE_SIZE && kaddr[offset];
offset++, bprm->p++)
;
- kunmap_atomic(kaddr, KM_USER0);
+ kunmap_atomic(kaddr);
put_arg_page(page);
if (offset == PAGE_SIZE)
unsigned int depth = bprm->recursion_depth;
int try,retval;
struct linux_binfmt *fmt;
+ pid_t old_pid, old_vpid;
retval = security_bprm_check(bprm);
if (retval)
return retval;
- /* kernel module loader fixup */
- /* so we don't try to load run modprobe in kernel space. */
- set_fs(USER_DS);
-
retval = audit_bprm(bprm);
if (retval)
return retval;
+ /* Need to fetch pid before load_binary changes it */
+ old_pid = current->pid;
+ rcu_read_lock();
+ old_vpid = task_pid_nr_ns(current, task_active_pid_ns(current->parent));
+ rcu_read_unlock();
+
retval = -ENOENT;
for (try=0; try<2; try++) {
read_lock(&binfmt_lock);
*/
bprm->recursion_depth = depth;
if (retval >= 0) {
- if (depth == 0)
- tracehook_report_exec(fmt, bprm, regs);
+ if (depth == 0) {
+ trace_sched_process_exec(current, old_pid, bprm);
+ ptrace_event(PTRACE_EVENT_EXEC, old_vpid);
+ }
put_binfmt(fmt);
allow_write_access(bprm->file);
if (bprm->file)
}
}
read_unlock(&binfmt_lock);
+#ifdef CONFIG_MODULES
if (retval != -ENOEXEC || bprm->mm == NULL) {
break;
-#ifdef CONFIG_MODULES
} else {
#define printable(c) (((c)=='\t') || ((c)=='\n') || (0x20<=(c) && (c)<=0x7e))
if (printable(bprm->buf[0]) &&
printable(bprm->buf[2]) &&
printable(bprm->buf[3]))
break; /* -ENOEXEC */
+ if (try)
+ break; /* -ENOEXEC */
request_module("binfmt-%04x", *(unsigned short *)(&bprm->buf[2]));
-#endif
}
+#else
+ break;
+#endif
}
return retval;
}
struct files_struct *displaced;
bool clear_in_exec;
int retval;
+ const struct cred *cred = current_cred();
+
+ /*
+ * We move the actual failure in case of RLIMIT_NPROC excess from
+ * set*uid() to execve() because too many poorly written programs
+ * don't check setuid() return code. Here we additionally recheck
+ * whether NPROC limit is still exceeded.
+ */
+ if ((current->flags & PF_NPROC_EXCEEDED) &&
+ atomic_read(&cred->user->processes) > rlimit(RLIMIT_NPROC)) {
+ retval = -EAGAIN;
+ goto out_ret;
+ }
+
+ /* We're below the limit (still or again), so we don't want to make
+ * further execve() calls fail. */
+ current->flags &= ~PF_NPROC_EXCEEDED;
retval = unshare_files(&displaced);
if (retval)
return ret;
}
+static void cn_escape(char *str)
+{
+ for (; *str; str++)
+ if (*str == '/')
+ *str = '!';
+}
+
+static int cn_print_exe_file(struct core_name *cn)
+{
+ struct file *exe_file;
+ char *pathbuf, *path;
+ int ret;
+
+ exe_file = get_mm_exe_file(current->mm);
+ if (!exe_file) {
+ char *commstart = cn->corename + cn->used;
+ ret = cn_printf(cn, "%s (path unknown)", current->comm);
+ cn_escape(commstart);
+ return ret;
+ }
+
+ pathbuf = kmalloc(PATH_MAX, GFP_TEMPORARY);
+ if (!pathbuf) {
+ ret = -ENOMEM;
+ goto put_exe_file;
+ }
+
+ path = d_path(&exe_file->f_path, pathbuf, PATH_MAX);
+ if (IS_ERR(path)) {
+ ret = PTR_ERR(path);
+ goto free_buf;
+ }
+
+ cn_escape(path);
+
+ ret = cn_printf(cn, "%s", path);
+
+free_buf:
+ kfree(pathbuf);
+put_exe_file:
+ fput(exe_file);
+ return ret;
+}
+
/* format_corename will inspect the pattern parameter, and output a
* name into corename, which must have space for at least
* CORENAME_MAX_SIZE bytes plus one byte for the zero terminator.
break;
}
/* hostname */
- case 'h':
+ case 'h': {
+ char *namestart = cn->corename + cn->used;
down_read(&uts_sem);
err = cn_printf(cn, "%s",
utsname()->nodename);
up_read(&uts_sem);
+ cn_escape(namestart);
break;
+ }
/* executable */
- case 'e':
+ case 'e': {
+ char *commstart = cn->corename + cn->used;
err = cn_printf(cn, "%s", current->comm);
+ cn_escape(commstart);
+ break;
+ }
+ case 'E':
+ err = cn_print_exe_file(cn);
break;
/* core limit size */
case 'c':
t = start;
do {
+ task_clear_jobctl_pending(t, JOBCTL_PENDING_MASK);
if (t != current && t->mm) {
sigaddset(&t->pending.signal, SIGKILL);
signal_wake_up(t, 1);
{
struct task_struct *tsk = current;
struct mm_struct *mm = tsk->mm;
- struct completion *vfork_done;
int core_waiters = -EBUSY;
init_completion(&core_state->startup);
core_waiters = zap_threads(tsk, mm, core_state, exit_code);
up_write(&mm->mmap_sem);
- if (unlikely(core_waiters < 0))
- goto fail;
-
- /*
- * Make sure nobody is waiting for us to release the VM,
- * otherwise we can deadlock when we wait on each other
- */
- vfork_done = tsk->vfork_done;
- if (vfork_done) {
- tsk->vfork_done = NULL;
- complete(vfork_done);
- }
-
- if (core_waiters)
+ if (core_waiters > 0)
wait_for_completion(&core_state->startup);
-fail:
+
return core_waiters;
}
* is a special value that we use to trap recursive
* core dumps
*/
-static int umh_pipe_setup(struct subprocess_info *info)
+static int umh_pipe_setup(struct subprocess_info *info, struct cred *new)
{
struct file *rp, *wp;
struct fdtable *fdt;
fd_install(0, rp);
spin_lock(&cf->file_lock);
fdt = files_fdtable(cf);
- FD_SET(0, fdt->open_fds);
- FD_CLR(0, fdt->close_on_exec);
+ __set_open_fd(0, fdt);
+ __clear_close_on_exec(0, fdt);
spin_unlock(&cf->file_lock);
/* and disallow core files too */
ispipe = format_corename(&cn, signr);
- if (ispipe == -ENOMEM) {
- printk(KERN_WARNING "format_corename failed\n");
- printk(KERN_WARNING "Aborting core\n");
- goto fail_corename;
- }
-
if (ispipe) {
int dump_count;
char **helper_argv;
+ if (ispipe < 0) {
+ printk(KERN_WARNING "format_corename failed\n");
+ printk(KERN_WARNING "Aborting core\n");
+ goto fail_corename;
+ }
+
if (cprm.limit == 1) {
/*
* Normally core limits are irrelevant to pipes, since