file capabilities: add no_file_caps switch (v4)
[linux-flexiantxendom0-natty.git] / kernel / capability.c
1 /*
2  * linux/kernel/capability.c
3  *
4  * Copyright (C) 1997  Andrew Main <zefram@fysh.org>
5  *
6  * Integrated into 2.1.97+,  Andrew G. Morgan <morgan@kernel.org>
7  * 30 May 2002: Cleanup, Robert M. Love <rml@tech9.net>
8  */
9
10 #include <linux/capability.h>
11 #include <linux/mm.h>
12 #include <linux/module.h>
13 #include <linux/security.h>
14 #include <linux/syscalls.h>
15 #include <linux/pid_namespace.h>
16 #include <asm/uaccess.h>
17
18 /*
19  * This lock protects task->cap_* for all tasks including current.
20  * Locking rule: acquire this prior to tasklist_lock.
21  */
22 static DEFINE_SPINLOCK(task_capability_lock);
23
24 /*
25  * Leveraged for setting/resetting capabilities
26  */
27
28 const kernel_cap_t __cap_empty_set = CAP_EMPTY_SET;
29 const kernel_cap_t __cap_full_set = CAP_FULL_SET;
30 const kernel_cap_t __cap_init_eff_set = CAP_INIT_EFF_SET;
31
32 EXPORT_SYMBOL(__cap_empty_set);
33 EXPORT_SYMBOL(__cap_full_set);
34 EXPORT_SYMBOL(__cap_init_eff_set);
35
36 #ifdef CONFIG_SECURITY_FILE_CAPABILITIES
37 int file_caps_enabled = 1;
38
39 static int __init file_caps_disable(char *str)
40 {
41         file_caps_enabled = 0;
42         return 1;
43 }
44 __setup("no_file_caps", file_caps_disable);
45 #endif
46
47 /*
48  * More recent versions of libcap are available from:
49  *
50  *   http://www.kernel.org/pub/linux/libs/security/linux-privs/
51  */
52
53 static void warn_legacy_capability_use(void)
54 {
55         static int warned;
56         if (!warned) {
57                 char name[sizeof(current->comm)];
58
59                 printk(KERN_INFO "warning: `%s' uses 32-bit capabilities"
60                        " (legacy support in use)\n",
61                        get_task_comm(name, current));
62                 warned = 1;
63         }
64 }
65
66 /*
67  * Version 2 capabilities worked fine, but the linux/capability.h file
68  * that accompanied their introduction encouraged their use without
69  * the necessary user-space source code changes. As such, we have
70  * created a version 3 with equivalent functionality to version 2, but
71  * with a header change to protect legacy source code from using
72  * version 2 when it wanted to use version 1. If your system has code
73  * that trips the following warning, it is using version 2 specific
74  * capabilities and may be doing so insecurely.
75  *
76  * The remedy is to either upgrade your version of libcap (to 2.10+,
77  * if the application is linked against it), or recompile your
78  * application with modern kernel headers and this warning will go
79  * away.
80  */
81
82 static void warn_deprecated_v2(void)
83 {
84         static int warned;
85
86         if (!warned) {
87                 char name[sizeof(current->comm)];
88
89                 printk(KERN_INFO "warning: `%s' uses deprecated v2"
90                        " capabilities in a way that may be insecure.\n",
91                        get_task_comm(name, current));
92                 warned = 1;
93         }
94 }
95
96 /*
97  * Version check. Return the number of u32s in each capability flag
98  * array, or a negative value on error.
99  */
100 static int cap_validate_magic(cap_user_header_t header, unsigned *tocopy)
101 {
102         __u32 version;
103
104         if (get_user(version, &header->version))
105                 return -EFAULT;
106
107         switch (version) {
108         case _LINUX_CAPABILITY_VERSION_1:
109                 warn_legacy_capability_use();
110                 *tocopy = _LINUX_CAPABILITY_U32S_1;
111                 break;
112         case _LINUX_CAPABILITY_VERSION_2:
113                 warn_deprecated_v2();
114                 /*
115                  * fall through - v3 is otherwise equivalent to v2.
116                  */
117         case _LINUX_CAPABILITY_VERSION_3:
118                 *tocopy = _LINUX_CAPABILITY_U32S_3;
119                 break;
120         default:
121                 if (put_user((u32)_KERNEL_CAPABILITY_VERSION, &header->version))
122                         return -EFAULT;
123                 return -EINVAL;
124         }
125
126         return 0;
127 }
128
129 #ifndef CONFIG_SECURITY_FILE_CAPABILITIES
130
131 /*
132  * Without filesystem capability support, we nominally support one process
133  * setting the capabilities of another
134  */
135 static inline int cap_get_target_pid(pid_t pid, kernel_cap_t *pEp,
136                                      kernel_cap_t *pIp, kernel_cap_t *pPp)
137 {
138         struct task_struct *target;
139         int ret;
140
141         spin_lock(&task_capability_lock);
142         read_lock(&tasklist_lock);
143
144         if (pid && pid != task_pid_vnr(current)) {
145                 target = find_task_by_vpid(pid);
146                 if (!target) {
147                         ret = -ESRCH;
148                         goto out;
149                 }
150         } else
151                 target = current;
152
153         ret = security_capget(target, pEp, pIp, pPp);
154
155 out:
156         read_unlock(&tasklist_lock);
157         spin_unlock(&task_capability_lock);
158
159         return ret;
160 }
161
162 /*
163  * cap_set_pg - set capabilities for all processes in a given process
164  * group.  We call this holding task_capability_lock and tasklist_lock.
165  */
166 static inline int cap_set_pg(int pgrp_nr, kernel_cap_t *effective,
167                              kernel_cap_t *inheritable,
168                              kernel_cap_t *permitted)
169 {
170         struct task_struct *g, *target;
171         int ret = -EPERM;
172         int found = 0;
173         struct pid *pgrp;
174
175         spin_lock(&task_capability_lock);
176         read_lock(&tasklist_lock);
177
178         pgrp = find_vpid(pgrp_nr);
179         do_each_pid_task(pgrp, PIDTYPE_PGID, g) {
180                 target = g;
181                 while_each_thread(g, target) {
182                         if (!security_capset_check(target, effective,
183                                                    inheritable, permitted)) {
184                                 security_capset_set(target, effective,
185                                                     inheritable, permitted);
186                                 ret = 0;
187                         }
188                         found = 1;
189                 }
190         } while_each_pid_task(pgrp, PIDTYPE_PGID, g);
191
192         read_unlock(&tasklist_lock);
193         spin_unlock(&task_capability_lock);
194
195         if (!found)
196                 ret = 0;
197         return ret;
198 }
199
200 /*
201  * cap_set_all - set capabilities for all processes other than init
202  * and self.  We call this holding task_capability_lock and tasklist_lock.
203  */
204 static inline int cap_set_all(kernel_cap_t *effective,
205                               kernel_cap_t *inheritable,
206                               kernel_cap_t *permitted)
207 {
208         struct task_struct *g, *target;
209         int ret = -EPERM;
210         int found = 0;
211
212         spin_lock(&task_capability_lock);
213         read_lock(&tasklist_lock);
214
215         do_each_thread(g, target) {
216                 if (target == current
217                     || is_container_init(target->group_leader))
218                         continue;
219                 found = 1;
220                 if (security_capset_check(target, effective, inheritable,
221                                           permitted))
222                         continue;
223                 ret = 0;
224                 security_capset_set(target, effective, inheritable, permitted);
225         } while_each_thread(g, target);
226
227         read_unlock(&tasklist_lock);
228         spin_unlock(&task_capability_lock);
229
230         if (!found)
231                 ret = 0;
232
233         return ret;
234 }
235
236 /*
237  * Given the target pid does not refer to the current process we
238  * need more elaborate support... (This support is not present when
239  * filesystem capabilities are configured.)
240  */
241 static inline int do_sys_capset_other_tasks(pid_t pid, kernel_cap_t *effective,
242                                             kernel_cap_t *inheritable,
243                                             kernel_cap_t *permitted)
244 {
245         struct task_struct *target;
246         int ret;
247
248         if (!capable(CAP_SETPCAP))
249                 return -EPERM;
250
251         if (pid == -1)            /* all procs other than current and init */
252                 return cap_set_all(effective, inheritable, permitted);
253
254         else if (pid < 0)                    /* all procs in process group */
255                 return cap_set_pg(-pid, effective, inheritable, permitted);
256
257         /* target != current */
258         spin_lock(&task_capability_lock);
259         read_lock(&tasklist_lock);
260
261         target = find_task_by_vpid(pid);
262         if (!target)
263                 ret = -ESRCH;
264         else {
265                 ret = security_capset_check(target, effective, inheritable,
266                                             permitted);
267
268                 /* having verified that the proposed changes are legal,
269                    we now put them into effect. */
270                 if (!ret)
271                         security_capset_set(target, effective, inheritable,
272                                             permitted);
273         }
274
275         read_unlock(&tasklist_lock);
276         spin_unlock(&task_capability_lock);
277
278         return ret;
279 }
280
281 #else /* ie., def CONFIG_SECURITY_FILE_CAPABILITIES */
282
283 /*
284  * If we have configured with filesystem capability support, then the
285  * only thing that can change the capabilities of the current process
286  * is the current process. As such, we can't be in this code at the
287  * same time as we are in the process of setting capabilities in this
288  * process. The net result is that we can limit our use of locks to
289  * when we are reading the caps of another process.
290  */
291 static inline int cap_get_target_pid(pid_t pid, kernel_cap_t *pEp,
292                                      kernel_cap_t *pIp, kernel_cap_t *pPp)
293 {
294         int ret;
295
296         if (pid && (pid != task_pid_vnr(current))) {
297                 struct task_struct *target;
298
299                 spin_lock(&task_capability_lock);
300                 read_lock(&tasklist_lock);
301
302                 target = find_task_by_vpid(pid);
303                 if (!target)
304                         ret = -ESRCH;
305                 else
306                         ret = security_capget(target, pEp, pIp, pPp);
307
308                 read_unlock(&tasklist_lock);
309                 spin_unlock(&task_capability_lock);
310         } else
311                 ret = security_capget(current, pEp, pIp, pPp);
312
313         return ret;
314 }
315
316 /*
317  * With filesystem capability support configured, the kernel does not
318  * permit the changing of capabilities in one process by another
319  * process. (CAP_SETPCAP has much less broad semantics when configured
320  * this way.)
321  */
322 static inline int do_sys_capset_other_tasks(pid_t pid,
323                                             kernel_cap_t *effective,
324                                             kernel_cap_t *inheritable,
325                                             kernel_cap_t *permitted)
326 {
327         return -EPERM;
328 }
329
330 #endif /* ie., ndef CONFIG_SECURITY_FILE_CAPABILITIES */
331
332 /*
333  * Atomically modify the effective capabilities returning the original
334  * value. No permission check is performed here - it is assumed that the
335  * caller is permitted to set the desired effective capabilities.
336  */
337 kernel_cap_t cap_set_effective(const kernel_cap_t pE_new)
338 {
339         kernel_cap_t pE_old;
340
341         spin_lock(&task_capability_lock);
342
343         pE_old = current->cap_effective;
344         current->cap_effective = pE_new;
345
346         spin_unlock(&task_capability_lock);
347
348         return pE_old;
349 }
350
351 EXPORT_SYMBOL(cap_set_effective);
352
353 /**
354  * sys_capget - get the capabilities of a given process.
355  * @header: pointer to struct that contains capability version and
356  *      target pid data
357  * @dataptr: pointer to struct that contains the effective, permitted,
358  *      and inheritable capabilities that are returned
359  *
360  * Returns 0 on success and < 0 on error.
361  */
362 asmlinkage long sys_capget(cap_user_header_t header, cap_user_data_t dataptr)
363 {
364         int ret = 0;
365         pid_t pid;
366         unsigned tocopy;
367         kernel_cap_t pE, pI, pP;
368
369         ret = cap_validate_magic(header, &tocopy);
370         if (ret != 0)
371                 return ret;
372
373         if (get_user(pid, &header->pid))
374                 return -EFAULT;
375
376         if (pid < 0)
377                 return -EINVAL;
378
379         ret = cap_get_target_pid(pid, &pE, &pI, &pP);
380
381         if (!ret) {
382                 struct __user_cap_data_struct kdata[_KERNEL_CAPABILITY_U32S];
383                 unsigned i;
384
385                 for (i = 0; i < tocopy; i++) {
386                         kdata[i].effective = pE.cap[i];
387                         kdata[i].permitted = pP.cap[i];
388                         kdata[i].inheritable = pI.cap[i];
389                 }
390
391                 /*
392                  * Note, in the case, tocopy < _KERNEL_CAPABILITY_U32S,
393                  * we silently drop the upper capabilities here. This
394                  * has the effect of making older libcap
395                  * implementations implicitly drop upper capability
396                  * bits when they perform a: capget/modify/capset
397                  * sequence.
398                  *
399                  * This behavior is considered fail-safe
400                  * behavior. Upgrading the application to a newer
401                  * version of libcap will enable access to the newer
402                  * capabilities.
403                  *
404                  * An alternative would be to return an error here
405                  * (-ERANGE), but that causes legacy applications to
406                  * unexpectidly fail; the capget/modify/capset aborts
407                  * before modification is attempted and the application
408                  * fails.
409                  */
410                 if (copy_to_user(dataptr, kdata, tocopy
411                                  * sizeof(struct __user_cap_data_struct))) {
412                         return -EFAULT;
413                 }
414         }
415
416         return ret;
417 }
418
419 /**
420  * sys_capset - set capabilities for a process or (*) a group of processes
421  * @header: pointer to struct that contains capability version and
422  *      target pid data
423  * @data: pointer to struct that contains the effective, permitted,
424  *      and inheritable capabilities
425  *
426  * Set capabilities for a given process, all processes, or all
427  * processes in a given process group.
428  *
429  * The restrictions on setting capabilities are specified as:
430  *
431  * [pid is for the 'target' task.  'current' is the calling task.]
432  *
433  * I: any raised capabilities must be a subset of the (old current) permitted
434  * P: any raised capabilities must be a subset of the (old current) permitted
435  * E: must be set to a subset of (new target) permitted
436  *
437  * Returns 0 on success and < 0 on error.
438  */
439 asmlinkage long sys_capset(cap_user_header_t header, const cap_user_data_t data)
440 {
441         struct __user_cap_data_struct kdata[_KERNEL_CAPABILITY_U32S];
442         unsigned i, tocopy;
443         kernel_cap_t inheritable, permitted, effective;
444         int ret;
445         pid_t pid;
446
447         ret = cap_validate_magic(header, &tocopy);
448         if (ret != 0)
449                 return ret;
450
451         if (get_user(pid, &header->pid))
452                 return -EFAULT;
453
454         if (copy_from_user(&kdata, data, tocopy
455                            * sizeof(struct __user_cap_data_struct))) {
456                 return -EFAULT;
457         }
458
459         for (i = 0; i < tocopy; i++) {
460                 effective.cap[i] = kdata[i].effective;
461                 permitted.cap[i] = kdata[i].permitted;
462                 inheritable.cap[i] = kdata[i].inheritable;
463         }
464         while (i < _KERNEL_CAPABILITY_U32S) {
465                 effective.cap[i] = 0;
466                 permitted.cap[i] = 0;
467                 inheritable.cap[i] = 0;
468                 i++;
469         }
470
471         if (pid && (pid != task_pid_vnr(current)))
472                 ret = do_sys_capset_other_tasks(pid, &effective, &inheritable,
473                                                 &permitted);
474         else {
475                 /*
476                  * This lock is required even when filesystem
477                  * capability support is configured - it protects the
478                  * sys_capget() call from returning incorrect data in
479                  * the case that the targeted process is not the
480                  * current one.
481                  */
482                 spin_lock(&task_capability_lock);
483
484                 ret = security_capset_check(current, &effective, &inheritable,
485                                             &permitted);
486                 /*
487                  * Having verified that the proposed changes are
488                  * legal, we now put them into effect.
489                  */
490                 if (!ret)
491                         security_capset_set(current, &effective, &inheritable,
492                                             &permitted);
493                 spin_unlock(&task_capability_lock);
494         }
495
496
497         return ret;
498 }
499
500 /**
501  * capable - Determine if the current task has a superior capability in effect
502  * @cap: The capability to be tested for
503  *
504  * Return true if the current task has the given superior capability currently
505  * available for use, false if not.
506  *
507  * This sets PF_SUPERPRIV on the task if the capability is available on the
508  * assumption that it's about to be used.
509  */
510 int capable(int cap)
511 {
512         if (has_capability(current, cap)) {
513                 current->flags |= PF_SUPERPRIV;
514                 return 1;
515         }
516         return 0;
517 }
518 EXPORT_SYMBOL(capable);