PR_SET_SECCOMP:
A pre-existing option for enabling strict seccomp mode (1) or
- filtering seccomp (13). Performing this call (13) will enable
- the current set of defined filters.
+ filtering seccomp (13).
Usage:
prctl(PR_SET_SECCOMP, 1); /* strict */
-----------
Changing the availability of the kernel ABI at runtime runs the risk of
-providing access to unreachable code paths in normal applications. To
-avoid the pitfalls that accompany this risk, seccomp filter inheritance
-is restricted.
-
-Filters may be inherited across a fork/clone if they have been activated
-by a call to prctl(PR_SET_SECCOMP, 13). If the process had the
-CAP_SYS_ADMIN capability when configuring the filters, they may also be
-inherited across an execve call.
-
-Inherited filters may not be modified by the child process. If child
-would like to further restrict the available system calls, it may
-perform the same calls as discussed earlier: set, clear, get, and
-finally prctl(PR_SET_SECCOMP, 13). Until the child calls
-PR_SET_SECCOMP, their filters will be ignored and only the inherited
-filter will be evaluated. After a successful PR_SET_SECCOMP call, all
-system calls performed by the child will be checked against the filters
-that were specified by itself and then against the filters supplied by
-any ancestors. Any system call used must be allowed by all of the
-filters tested. This composition of the ancestral seccomp filters and
-process-local filters guarantees that only the minimal set of system calls
-will be permitted at any point and without child processes needing to be
-aware of any prior system call filtering.
-
-(If, for instance, the child process merely copied the parent filters
-and then extended them, the child would be required to enumerate all
-existing filters to determine which needed to be dropped.)
+providing access to normally unreachable code paths in normal
+applications. To avoid the pitfalls that accompany this risk, seccomp
+filters inheritance is restricted.
+
+In general, filters can be inherited across fork/clone, but only when
+they are active (e.g., PR_SET_SECCOMP has been set to 13) and not prior
+to use. Inheriting only active filters stops a parent process from
+adding filters that may undermine the child process security or create
+unexpected behavior after an execve.
+
+For example, a parent process may add a rule to exposes a system call
+that was not normally part of the child process' filter set. When the
+child process configures its filters, it would have to check
+/proc/self/seccomp_filter to ensure nothing unexpected has been added.
+The standard inheritance behavior ensures this suboptimal situation is
+avoided.
+
+Inheritance across execve follows a subset of this behavior. In
+particular, execve can only be added to the allowed filter set by a
+process with CAP_SYS_ADMIN privileges. The result is that an
+unprivileged process can never create a seccomp filter set that can be
+inherited across execve. To further guarantee this behavior, any
+unprivileged modifications to a seccomp filter set will forcibly
+clear execve. The end result is that a privileged parent may install
+a set of seccomp filters and, at any point in the hierarchy, a child may
+make a private version of the inherited filter set with their own
+changes applied but execve blocked.
Caveats
(_tsk)->seccomp.filters = NULL; \
} while (0)
-extern void seccomp_filter_fork(struct task_struct *child,
- struct task_struct *parent);
+/* Do nothing unless seccomp filtering is active. If not, the execve boundary
+ * can not be cleanly enforced and preset filters may leak across execve calls.
+ */
+#define seccomp_filter_fork(_tsk, _orig) do { \
+ if ((_tsk)->seccomp.mode) { \
+ (_tsk)->seccomp.mode = (_orig)->seccomp.mode; \
+ mutex_lock(&(_orig)->seccomp.filters_guard); \
+ (_tsk)->seccomp.filters = \
+ get_seccomp_filters((_orig)->seccomp.filters); \
+ mutex_unlock(&(_orig)->seccomp.filters_guard); \
+ } \
+} while (0)
/* No locking is needed here because the task_struct will
* have no parallel consumers.
extern long seccomp_clear_filter(int);
extern long seccomp_get_filter(int, char *, unsigned long);
-extern long seccomp_enable_filters(void);
-
extern long prctl_set_seccomp_filter(unsigned long, unsigned long,
char __user *);
extern long prctl_get_seccomp_filter(unsigned long, unsigned long,
struct seccomp_filters { };
#define seccomp_filter_init_task(_tsk) do { } while (0)
-static inline void seccomp_filter_fork(struct task_struct *child,
- struct task_struct *parent) { }
+#define seccomp_filter_fork(_tsk, _orig) do { } while (0)
#define seccomp_filter_free_task(_tsk) do { } while (0)
static inline int seccomp_show_filters(struct seccomp_filters *filters,
#include <linux/kallsyms.h>
#include <linux/kref.h>
#include <linux/perf_event.h>
-#include <linux/pid.h>
#include <linux/prctl.h>
#include <linux/seccomp.h>
#include <linux/security.h>
* get/put helpers should be used when accessing an instance
* outside of a lifetime-guarded section. In general, this
* is only needed for handling shared filters across tasks.
- * @creator: pointer to the pid that created this filter
- * @parent: pointer to the ancestor which this filter will be composed with.
* @count: size of @event_filters
* @filter: tree of pointers to seccomp filters
*
* seccomp_filters objects should never be modified after being attached
- * to a task_struct (other than @usage).
+ * to a task_struct.
*/
struct seccomp_filters {
struct kref usage;
- struct pid *creator;
- struct seccomp_filters *parent;
struct {
uint32_t compat:1,
- enabled:1,
- __reserved:30;
+ __reserved:31;
} flags;
uint16_t count;
struct btree_head32 tree;
/*
* Make ftrace support optional
*/
+
#if defined(CONFIG_FTRACE_SYSCALLS) && defined(CONFIG_PERF_EVENTS)
#include <asm/syscall.h>
kref_init(&f->usage);
if (btree_init32(&f->tree))
return ERR_PTR(-ENOMEM);
- f->creator = get_task_pid(current, PIDTYPE_PID);
return f;
}
free_event_filter(ef);
}
btree_destroy32(&filters->tree);
- put_seccomp_filters(filters->parent);
- put_pid(filters->creator);
kfree(filters);
}
}
dst->count++;
}
- dst->parent = get_seccomp_filters(src->parent);
- if (dst->creator)
- put_pid(dst->creator);
- dst->creator = get_pid(src->creator);
done:
return ret;
return orig;
}
-static int filters_created_by_parent(struct seccomp_filters *filters)
-{
- struct pid *pid;
- int ret = 0;
- if (!filters)
- return ret;
- pid = get_task_pid(current, PIDTYPE_PID);
- if (pid != filters->creator)
- ret = 1;
- put_pid(pid);
- return ret;
-}
-
/**
* seccomp_test_filters - tests 'current' against the given syscall
* @syscall: number of the system call to test
mutex_lock(¤t->seccomp.filters_guard);
/* No reference counting is done. filters_guard should protect the
- * lifetime of any existing pointer below using the task reference.
- * Parents will be protected by the held references.
+ * lifetime of any existing pointer below.
*/
filters = current->seccomp.filters;
-
- /* Inherited filters will always be enabled so skip ahead here. */
- if (filters && !filters->flags.enabled)
- filters = filters->parent;
-
- /* Without any enabled filters, no system calls will be allowed. */
if (!filters)
goto out;
- /* Only allow a system call if it is allowed in all ancestors. */
- for ( ; filters != NULL; filters = filters->parent) {
- if (filters_compat_mismatch(filters)) {
- pr_info("%s[%d]: seccomp_filter compat() mismatch.\n",
- current->comm, task_pid_nr(current));
- goto out;
- }
-
- filter = btree_lookup32(&filters->tree, syscall);
- if (!filter)
- goto out;
-
- if (IS_ALLOW_FILTER(filter) || filter_match_current(filter))
- continue;
+ if (filters_compat_mismatch(filters)) {
+ pr_info("%s[%d]: seccomp_filter compat() mismatch.\n",
+ current->comm, task_pid_nr(current));
goto out;
}
- /* If the loop terminates normally, the syscall is approved. */
+
+ filter = btree_lookup32(&filters->tree, syscall);
+ if (!filter)
+ goto out;
+
ret = 0;
+ if (IS_ALLOW_FILTER(filter))
+ goto out;
+
+ if (!filter_match_current(filter))
+ ret = -EACCES;
out:
mutex_unlock(¤t->seccomp.filters_guard);
return ret;
{
int nr;
struct event_filter *ef;
-
if (!filters)
goto out;
- for ( ; filters; filters = filters->parent) {
- seq_printf(m, "Enabled: %d\n", filters->flags.enabled);
- seq_printf(m, "Inherited: %d\n",
- filters_created_by_parent(filters));
- btree_for_each_safe32(&filters->tree, nr, ef) {
- const char *filter_string = SECCOMP_FILTER_ALLOW;
- seq_printf(m, "%d (%s): ", nr, syscall_nr_to_name(nr));
- if (!IS_ALLOW_FILTER(ef))
- filter_string = get_filter_string(ef);
- seq_printf(m, "%s\n", filter_string);
- }
- seq_printf(m, "--\n");
+ btree_for_each_safe32(&filters->tree, nr, ef) {
+ const char *filter_string = SECCOMP_FILTER_ALLOW;
+ seq_printf(m, "%d (%s): ", nr, syscall_nr_to_name(nr));
+ if (!IS_ALLOW_FILTER(ef))
+ filter_string = get_filter_string(ef);
+ seq_printf(m, "%s\n", filter_string);
}
out:
return 0;
mutex_lock(¤t->seccomp.filters_guard);
filters = current->seccomp.filters;
- if (!filters || filters_created_by_parent(filters))
+ if (!filters)
goto out;
ret = -ENOENT;
mutex_lock(¤t->seccomp.filters_guard);
orig_filters = current->seccomp.filters;
- if (!orig_filters || filters_created_by_parent(orig_filters))
+ if (!orig_filters)
goto out;
if (filters_compat_mismatch(orig_filters))
*/
long seccomp_set_filter(int syscall_nr, char *filter)
{
- struct seccomp_filters *filters = NULL, *orig_filters = NULL,
- *parent_filters = NULL;
+ struct seccomp_filters *filters = NULL, *orig_filters = NULL;
struct event_filter *ef = NULL;
long ret = -EPERM;
orig_filters = current->seccomp.filters;
- if (filters_created_by_parent(orig_filters)) {
- parent_filters = orig_filters;
- orig_filters = NULL;
- }
-
/* After the first call, compatibility mode is selected permanently. */
ret = -EACCES;
if (filters_compat_mismatch(orig_filters))
ef = btree_lookup32(&orig_filters->tree, syscall_nr);
if (!ef) {
- /* A new filter cannot be added to an active filters set. */
+ /* Don't allow DENYs to be changed when in a seccomp mode */
ret = -EACCES;
- if (current->seccomp.mode && orig_filters &&
- orig_filters->flags.enabled)
+ if (current->seccomp.mode)
goto out;
}
get_seccomp_filters(filters); /* simplify the error paths */
current->seccomp.filters = filters;
- if (parent_filters)
- filters->parent = parent_filters; /* already have a ref */
put_seccomp_filters(orig_filters); /* for the task */
out:
put_seccomp_filters(filters); /* for get or task, on err */
}
EXPORT_SYMBOL_GPL(seccomp_set_filter);
-long seccomp_enable_filters(void)
-{
- struct seccomp_filters *filters = NULL, *orig_filters = NULL;
- long ret = 0;
-
- mutex_lock(¤t->seccomp.filters_guard);
- /* Rely on the task reference */
- orig_filters = current->seccomp.filters;
- if (!orig_filters)
- goto out;
-
- /* Cannot re-enable inherited filters */
- ret = -EINVAL;
- if (filters_created_by_parent(orig_filters))
- goto out;
-
- filters = seccomp_filters_alloc();
- if (IS_ERR(filters)) {
- ret = PTR_ERR(filters);
- goto out;
- }
-
- ret = seccomp_filters_copy(filters, orig_filters);
- if (ret)
- goto out;
-
- /* Do the real work */
- filters->flags.enabled = 1;
-
- get_seccomp_filters(filters); /* simplify the error paths */
-
- current->seccomp.filters = filters;
- put_seccomp_filters(orig_filters); /* for the task */
-out:
- put_seccomp_filters(filters); /* for get or task, on err */
- mutex_unlock(¤t->seccomp.filters_guard);
- return ret;
-}
-EXPORT_SYMBOL_GPL(seccomp_enable_filters);
-
long prctl_set_seccomp_filter(unsigned long id_type,
unsigned long id,
char __user *user_filter)
kfree(buf);
return ret;
}
-
-/* seccomp_filter_fork: manages inheritance on fork
- * @child: forkee
- * @parent: forker
- * Ensures that @child inherit a seccomp_filters iff seccomp is enabled
- * and the set of filters is marked as 'enabled'.
- */
-void seccomp_filter_fork(struct task_struct *child,
- struct task_struct *parent)
-{
- if (!parent->seccomp.mode)
- return;
- child->seccomp.mode = parent->seccomp.mode;
- mutex_lock(&parent->seccomp.filters_guard);
- child->seccomp.filters = get_seccomp_filters(parent->seccomp.filters);
- mutex_unlock(&parent->seccomp.filters_guard);
- /* If @parent's filters are not active, then inherit the ancestor
- * if there is one. It's possible that it will be NULL if the seccomp
- * mode does not use seccomp_filters.
- */
- if (child->seccomp.filters && !child->seccomp.filters->flags.enabled) {
- struct seccomp_filters *enabled =
- get_seccomp_filters(child->seccomp.filters->parent);
- put_seccomp_filters(child->seccomp.filters);
- child->seccomp.filters = enabled;
- }
-}