- patches.suse/slab-handle-memoryless-nodes-v2a.patch: Refresh.
[linux-flexiantxendom0-3.2.10.git] / kernel / sysctl.c
1 /*
2  * sysctl.c: General linux system control interface
3  *
4  * Begun 24 March 1995, Stephen Tweedie
5  * Added /proc support, Dec 1995
6  * Added bdflush entry and intvec min/max checking, 2/23/96, Tom Dyas.
7  * Added hooks for /proc/sys/net (minor, minor patch), 96/4/1, Mike Shaver.
8  * Added kernel/java-{interpreter,appletviewer}, 96/5/10, Mike Shaver.
9  * Dynamic registration fixes, Stephen Tweedie.
10  * Added kswapd-interval, ctrl-alt-del, printk stuff, 1/8/97, Chris Horn.
11  * Made sysctl support optional via CONFIG_SYSCTL, 1/10/97, Chris
12  *  Horn.
13  * Added proc_doulongvec_ms_jiffies_minmax, 09/08/99, Carlos H. Bauer.
14  * Added proc_doulongvec_minmax, 09/08/99, Carlos H. Bauer.
15  * Changed linked lists to use list.h instead of lists.h, 02/24/00, Bill
16  *  Wendling.
17  * The list_for_each() macro wasn't appropriate for the sysctl loop.
18  *  Removed it and replaced it with older style, 03/23/00, Bill Wendling
19  */
20
21 #include <linux/module.h>
22 #include <linux/mm.h>
23 #include <linux/swap.h>
24 #include <linux/slab.h>
25 #include <linux/sysctl.h>
26 #include <linux/proc_fs.h>
27 #include <linux/security.h>
28 #include <linux/ctype.h>
29 #include <linux/kmemcheck.h>
30 #include <linux/fs.h>
31 #include <linux/init.h>
32 #include <linux/kernel.h>
33 #include <linux/kobject.h>
34 #include <linux/net.h>
35 #include <linux/sysrq.h>
36 #include <linux/highuid.h>
37 #include <linux/writeback.h>
38 #include <linux/ratelimit.h>
39 #include <linux/hugetlb.h>
40 #include <linux/initrd.h>
41 #include <linux/key.h>
42 #include <linux/times.h>
43 #include <linux/limits.h>
44 #include <linux/dcache.h>
45 #include <linux/syscalls.h>
46 #include <linux/vmstat.h>
47 #include <linux/nfs_fs.h>
48 #include <linux/acpi.h>
49 #include <linux/reboot.h>
50 #include <linux/ftrace.h>
51 #include <linux/slow-work.h>
52 #include <linux/perf_event.h>
53
54 #include <asm/uaccess.h>
55 #include <asm/processor.h>
56
57 #ifdef CONFIG_X86
58 #include <asm/nmi.h>
59 #include <asm/stacktrace.h>
60 #include <asm/io.h>
61 #endif
62
63
64 #if defined(CONFIG_SYSCTL)
65
66 /* External variables not in a header file. */
67 extern int C_A_D;
68 extern int print_fatal_signals;
69 extern int sysctl_overcommit_memory;
70 extern int sysctl_overcommit_ratio;
71 extern int sysctl_panic_on_oom;
72 extern int sysctl_oom_kill_allocating_task;
73 extern int sysctl_oom_dump_tasks;
74 extern int max_threads;
75 extern int core_uses_pid;
76 extern int suid_dumpable;
77 extern char core_pattern[];
78 extern unsigned int core_pipe_limit;
79 extern int pid_max;
80 extern int min_free_kbytes;
81 extern int pid_max_min, pid_max_max;
82 extern int sysctl_drop_caches;
83 extern int percpu_pagelist_fraction;
84 extern int compat_log;
85 extern int latencytop_enabled;
86 extern int sysctl_nr_open_min, sysctl_nr_open_max;
87 #ifndef CONFIG_MMU
88 extern int sysctl_nr_trim_pages;
89 #endif
90 #ifdef CONFIG_RCU_TORTURE_TEST
91 extern int rcutorture_runnable;
92 #endif /* #ifdef CONFIG_RCU_TORTURE_TEST */
93 #ifdef CONFIG_BLOCK
94 extern int blk_iopoll_enabled;
95 #endif
96
97 /* Constants used for minimum and  maximum */
98 #ifdef CONFIG_DETECT_SOFTLOCKUP
99 static int sixty = 60;
100 static int neg_one = -1;
101 #endif
102
103 static int zero;
104 static int __maybe_unused one = 1;
105 static int __maybe_unused two = 2;
106 static unsigned long one_ul = 1;
107 static int one_hundred = 100;
108 #ifdef CONFIG_PRINTK
109 static int ten_thousand = 10000;
110 #endif
111
112 /* this is needed for the proc_doulongvec_minmax of vm_dirty_bytes */
113 static unsigned long dirty_bytes_min = 2 * PAGE_SIZE;
114
115 /* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */
116 static int maxolduid = 65535;
117 static int minolduid;
118 static int min_percpu_pagelist_fract = 8;
119
120 static int ngroups_max = NGROUPS_MAX;
121
122 #ifdef CONFIG_MODULES
123 extern char modprobe_path[];
124 extern int modules_disabled;
125 #endif
126 #ifdef CONFIG_CHR_DEV_SG
127 extern int sg_big_buff;
128 #endif
129
130 #ifdef CONFIG_SPARC
131 #include <asm/system.h>
132 #endif
133
134 #ifdef CONFIG_SPARC64
135 extern int sysctl_tsb_ratio;
136 #endif
137
138 #ifdef __hppa__
139 extern int pwrsw_enabled;
140 extern int unaligned_enabled;
141 #endif
142
143 #ifdef CONFIG_S390
144 #ifdef CONFIG_MATHEMU
145 extern int sysctl_ieee_emulation_warnings;
146 #endif
147 extern int sysctl_userprocess_debug;
148 extern int spin_retry;
149 #endif
150
151 #ifdef CONFIG_BSD_PROCESS_ACCT
152 extern int acct_parm[];
153 #endif
154
155 #ifdef CONFIG_IA64
156 extern int no_unaligned_warning;
157 extern int unaligned_dump_stack;
158 #endif
159
160 extern struct ratelimit_state printk_ratelimit_state;
161
162 #ifdef CONFIG_RT_MUTEXES
163 extern int max_lock_depth;
164 #endif
165
166 #ifdef CONFIG_PROC_SYSCTL
167 static int proc_do_cad_pid(struct ctl_table *table, int write,
168                   void __user *buffer, size_t *lenp, loff_t *ppos);
169 static int proc_taint(struct ctl_table *table, int write,
170                                void __user *buffer, size_t *lenp, loff_t *ppos);
171 #endif
172
173 static struct ctl_table root_table[];
174 static struct ctl_table_root sysctl_table_root;
175 static struct ctl_table_header root_table_header = {
176         .count = 1,
177         .ctl_table = root_table,
178         .ctl_entry = LIST_HEAD_INIT(sysctl_table_root.default_set.list),
179         .root = &sysctl_table_root,
180         .set = &sysctl_table_root.default_set,
181 };
182 static struct ctl_table_root sysctl_table_root = {
183         .root_list = LIST_HEAD_INIT(sysctl_table_root.root_list),
184         .default_set.list = LIST_HEAD_INIT(root_table_header.ctl_entry),
185 };
186
187 static struct ctl_table kern_table[];
188 static struct ctl_table vm_table[];
189 static struct ctl_table fs_table[];
190 static struct ctl_table debug_table[];
191 static struct ctl_table dev_table[];
192 extern struct ctl_table random_table[];
193 #ifdef CONFIG_INOTIFY_USER
194 extern struct ctl_table inotify_table[];
195 #endif
196 #ifdef CONFIG_EPOLL
197 extern struct ctl_table epoll_table[];
198 #endif
199
200 #ifdef HAVE_ARCH_PICK_MMAP_LAYOUT
201 int sysctl_legacy_va_layout;
202 #endif
203
204 extern int prove_locking;
205 extern int lock_stat;
206
207 /* The default sysctl tables: */
208
209 static struct ctl_table root_table[] = {
210         {
211                 .procname       = "kernel",
212                 .mode           = 0555,
213                 .child          = kern_table,
214         },
215         {
216                 .procname       = "vm",
217                 .mode           = 0555,
218                 .child          = vm_table,
219         },
220         {
221                 .procname       = "fs",
222                 .mode           = 0555,
223                 .child          = fs_table,
224         },
225         {
226                 .procname       = "debug",
227                 .mode           = 0555,
228                 .child          = debug_table,
229         },
230         {
231                 .procname       = "dev",
232                 .mode           = 0555,
233                 .child          = dev_table,
234         },
235 /*
236  * NOTE: do not add new entries to this table unless you have read
237  * Documentation/sysctl/ctl_unnumbered.txt
238  */
239         { }
240 };
241
242 #ifdef CONFIG_SCHED_DEBUG
243 static int min_sched_granularity_ns = 100000;           /* 100 usecs */
244 static int max_sched_granularity_ns = NSEC_PER_SEC;     /* 1 second */
245 static int min_wakeup_granularity_ns;                   /* 0 usecs */
246 static int max_wakeup_granularity_ns = NSEC_PER_SEC;    /* 1 second */
247 static int min_sched_tunable_scaling = SCHED_TUNABLESCALING_NONE;
248 static int max_sched_tunable_scaling = SCHED_TUNABLESCALING_END-1;
249 static int min_sched_shares_ratelimit = 100000; /* 100 usec */
250 static int max_sched_shares_ratelimit = NSEC_PER_SEC; /* 1 second */
251 #endif
252
253 static struct ctl_table kern_table[] = {
254         {
255                 .procname       = "sched_child_runs_first",
256                 .data           = &sysctl_sched_child_runs_first,
257                 .maxlen         = sizeof(unsigned int),
258                 .mode           = 0644,
259                 .proc_handler   = proc_dointvec,
260         },
261 #ifdef CONFIG_SCHED_DEBUG
262         {
263                 .procname       = "sched_min_granularity_ns",
264                 .data           = &sysctl_sched_min_granularity,
265                 .maxlen         = sizeof(unsigned int),
266                 .mode           = 0644,
267                 .proc_handler   = sched_proc_update_handler,
268                 .extra1         = &min_sched_granularity_ns,
269                 .extra2         = &max_sched_granularity_ns,
270         },
271         {
272                 .procname       = "sched_latency_ns",
273                 .data           = &sysctl_sched_latency,
274                 .maxlen         = sizeof(unsigned int),
275                 .mode           = 0644,
276                 .proc_handler   = sched_proc_update_handler,
277                 .extra1         = &min_sched_granularity_ns,
278                 .extra2         = &max_sched_granularity_ns,
279         },
280         {
281                 .procname       = "sched_wakeup_granularity_ns",
282                 .data           = &sysctl_sched_wakeup_granularity,
283                 .maxlen         = sizeof(unsigned int),
284                 .mode           = 0644,
285                 .proc_handler   = sched_proc_update_handler,
286                 .extra1         = &min_wakeup_granularity_ns,
287                 .extra2         = &max_wakeup_granularity_ns,
288         },
289         {
290                 .procname       = "sched_shares_ratelimit",
291                 .data           = &sysctl_sched_shares_ratelimit,
292                 .maxlen         = sizeof(unsigned int),
293                 .mode           = 0644,
294                 .proc_handler   = sched_proc_update_handler,
295                 .extra1         = &min_sched_shares_ratelimit,
296                 .extra2         = &max_sched_shares_ratelimit,
297         },
298         {
299                 .procname       = "sched_tunable_scaling",
300                 .data           = &sysctl_sched_tunable_scaling,
301                 .maxlen         = sizeof(enum sched_tunable_scaling),
302                 .mode           = 0644,
303                 .proc_handler   = sched_proc_update_handler,
304                 .extra1         = &min_sched_tunable_scaling,
305                 .extra2         = &max_sched_tunable_scaling,
306         },
307         {
308                 .procname       = "sched_shares_thresh",
309                 .data           = &sysctl_sched_shares_thresh,
310                 .maxlen         = sizeof(unsigned int),
311                 .mode           = 0644,
312                 .proc_handler   = proc_dointvec_minmax,
313                 .extra1         = &zero,
314         },
315         {
316                 .procname       = "sched_migration_cost",
317                 .data           = &sysctl_sched_migration_cost,
318                 .maxlen         = sizeof(unsigned int),
319                 .mode           = 0644,
320                 .proc_handler   = proc_dointvec,
321         },
322         {
323                 .procname       = "sched_nr_migrate",
324                 .data           = &sysctl_sched_nr_migrate,
325                 .maxlen         = sizeof(unsigned int),
326                 .mode           = 0644,
327                 .proc_handler   = proc_dointvec,
328         },
329         {
330                 .procname       = "sched_time_avg",
331                 .data           = &sysctl_sched_time_avg,
332                 .maxlen         = sizeof(unsigned int),
333                 .mode           = 0644,
334                 .proc_handler   = proc_dointvec,
335         },
336         {
337                 .procname       = "timer_migration",
338                 .data           = &sysctl_timer_migration,
339                 .maxlen         = sizeof(unsigned int),
340                 .mode           = 0644,
341                 .proc_handler   = proc_dointvec_minmax,
342                 .extra1         = &zero,
343                 .extra2         = &one,
344         },
345 #endif
346         {
347                 .procname       = "sched_rt_period_us",
348                 .data           = &sysctl_sched_rt_period,
349                 .maxlen         = sizeof(unsigned int),
350                 .mode           = 0644,
351                 .proc_handler   = sched_rt_handler,
352         },
353         {
354                 .procname       = "sched_rt_runtime_us",
355                 .data           = &sysctl_sched_rt_runtime,
356                 .maxlen         = sizeof(int),
357                 .mode           = 0644,
358                 .proc_handler   = sched_rt_handler,
359         },
360         {
361                 .procname       = "sched_compat_yield",
362                 .data           = &sysctl_sched_compat_yield,
363                 .maxlen         = sizeof(unsigned int),
364                 .mode           = 0644,
365                 .proc_handler   = proc_dointvec,
366         },
367 #ifdef CONFIG_PROVE_LOCKING
368         {
369                 .procname       = "prove_locking",
370                 .data           = &prove_locking,
371                 .maxlen         = sizeof(int),
372                 .mode           = 0644,
373                 .proc_handler   = proc_dointvec,
374         },
375 #endif
376 #ifdef CONFIG_LOCK_STAT
377         {
378                 .procname       = "lock_stat",
379                 .data           = &lock_stat,
380                 .maxlen         = sizeof(int),
381                 .mode           = 0644,
382                 .proc_handler   = proc_dointvec,
383         },
384 #endif
385         {
386                 .procname       = "panic",
387                 .data           = &panic_timeout,
388                 .maxlen         = sizeof(int),
389                 .mode           = 0644,
390                 .proc_handler   = proc_dointvec,
391         },
392         {
393                 .procname       = "core_uses_pid",
394                 .data           = &core_uses_pid,
395                 .maxlen         = sizeof(int),
396                 .mode           = 0644,
397                 .proc_handler   = proc_dointvec,
398         },
399         {
400                 .procname       = "core_pattern",
401                 .data           = core_pattern,
402                 .maxlen         = CORENAME_MAX_SIZE,
403                 .mode           = 0644,
404                 .proc_handler   = proc_dostring,
405         },
406         {
407                 .procname       = "core_pipe_limit",
408                 .data           = &core_pipe_limit,
409                 .maxlen         = sizeof(unsigned int),
410                 .mode           = 0644,
411                 .proc_handler   = proc_dointvec,
412         },
413 #ifdef CONFIG_PROC_SYSCTL
414         {
415                 .procname       = "tainted",
416                 .maxlen         = sizeof(long),
417                 .mode           = 0644,
418                 .proc_handler   = proc_taint,
419         },
420 #endif
421 #ifdef CONFIG_LATENCYTOP
422         {
423                 .procname       = "latencytop",
424                 .data           = &latencytop_enabled,
425                 .maxlen         = sizeof(int),
426                 .mode           = 0644,
427                 .proc_handler   = proc_dointvec,
428         },
429 #endif
430 #ifdef CONFIG_BLK_DEV_INITRD
431         {
432                 .procname       = "real-root-dev",
433                 .data           = &real_root_dev,
434                 .maxlen         = sizeof(int),
435                 .mode           = 0644,
436                 .proc_handler   = proc_dointvec,
437         },
438 #endif
439         {
440                 .procname       = "print-fatal-signals",
441                 .data           = &print_fatal_signals,
442                 .maxlen         = sizeof(int),
443                 .mode           = 0644,
444                 .proc_handler   = proc_dointvec,
445         },
446 #ifdef CONFIG_SPARC
447         {
448                 .procname       = "reboot-cmd",
449                 .data           = reboot_command,
450                 .maxlen         = 256,
451                 .mode           = 0644,
452                 .proc_handler   = proc_dostring,
453         },
454         {
455                 .procname       = "stop-a",
456                 .data           = &stop_a_enabled,
457                 .maxlen         = sizeof (int),
458                 .mode           = 0644,
459                 .proc_handler   = proc_dointvec,
460         },
461         {
462                 .procname       = "scons-poweroff",
463                 .data           = &scons_pwroff,
464                 .maxlen         = sizeof (int),
465                 .mode           = 0644,
466                 .proc_handler   = proc_dointvec,
467         },
468 #endif
469 #ifdef CONFIG_SPARC64
470         {
471                 .procname       = "tsb-ratio",
472                 .data           = &sysctl_tsb_ratio,
473                 .maxlen         = sizeof (int),
474                 .mode           = 0644,
475                 .proc_handler   = proc_dointvec,
476         },
477 #endif
478 #ifdef __hppa__
479         {
480                 .procname       = "soft-power",
481                 .data           = &pwrsw_enabled,
482                 .maxlen         = sizeof (int),
483                 .mode           = 0644,
484                 .proc_handler   = proc_dointvec,
485         },
486         {
487                 .procname       = "unaligned-trap",
488                 .data           = &unaligned_enabled,
489                 .maxlen         = sizeof (int),
490                 .mode           = 0644,
491                 .proc_handler   = proc_dointvec,
492         },
493 #endif
494         {
495                 .procname       = "ctrl-alt-del",
496                 .data           = &C_A_D,
497                 .maxlen         = sizeof(int),
498                 .mode           = 0644,
499                 .proc_handler   = proc_dointvec,
500         },
501 #ifdef CONFIG_FUNCTION_TRACER
502         {
503                 .procname       = "ftrace_enabled",
504                 .data           = &ftrace_enabled,
505                 .maxlen         = sizeof(int),
506                 .mode           = 0644,
507                 .proc_handler   = ftrace_enable_sysctl,
508         },
509 #endif
510 #ifdef CONFIG_STACK_TRACER
511         {
512                 .procname       = "stack_tracer_enabled",
513                 .data           = &stack_tracer_enabled,
514                 .maxlen         = sizeof(int),
515                 .mode           = 0644,
516                 .proc_handler   = stack_trace_sysctl,
517         },
518 #endif
519 #ifdef CONFIG_TRACING
520         {
521                 .procname       = "ftrace_dump_on_oops",
522                 .data           = &ftrace_dump_on_oops,
523                 .maxlen         = sizeof(int),
524                 .mode           = 0644,
525                 .proc_handler   = proc_dointvec,
526         },
527 #endif
528 #ifdef CONFIG_MODULES
529         {
530                 .procname       = "modprobe",
531                 .data           = &modprobe_path,
532                 .maxlen         = KMOD_PATH_LEN,
533                 .mode           = 0644,
534                 .proc_handler   = proc_dostring,
535         },
536         {
537                 .procname       = "modules_disabled",
538                 .data           = &modules_disabled,
539                 .maxlen         = sizeof(int),
540                 .mode           = 0644,
541                 /* only handle a transition from default "0" to "1" */
542                 .proc_handler   = proc_dointvec_minmax,
543                 .extra1         = &one,
544                 .extra2         = &one,
545         },
546 #endif
547 #if defined(CONFIG_HOTPLUG) && defined(CONFIG_NET)
548         {
549                 .procname       = "hotplug",
550                 .data           = &uevent_helper,
551                 .maxlen         = UEVENT_HELPER_PATH_LEN,
552                 .mode           = 0644,
553                 .proc_handler   = proc_dostring,
554         },
555 #endif
556 #ifdef CONFIG_CHR_DEV_SG
557         {
558                 .procname       = "sg-big-buff",
559                 .data           = &sg_big_buff,
560                 .maxlen         = sizeof (int),
561                 .mode           = 0444,
562                 .proc_handler   = proc_dointvec,
563         },
564 #endif
565 #ifdef CONFIG_BSD_PROCESS_ACCT
566         {
567                 .procname       = "acct",
568                 .data           = &acct_parm,
569                 .maxlen         = 3*sizeof(int),
570                 .mode           = 0644,
571                 .proc_handler   = proc_dointvec,
572         },
573 #endif
574 #ifdef CONFIG_MAGIC_SYSRQ
575         {
576                 .procname       = "sysrq",
577                 .data           = &__sysrq_enabled,
578                 .maxlen         = sizeof (int),
579                 .mode           = 0644,
580                 .proc_handler   = proc_dointvec,
581         },
582 #endif
583 #ifdef CONFIG_PROC_SYSCTL
584         {
585                 .procname       = "cad_pid",
586                 .data           = NULL,
587                 .maxlen         = sizeof (int),
588                 .mode           = 0600,
589                 .proc_handler   = proc_do_cad_pid,
590         },
591 #endif
592         {
593                 .procname       = "threads-max",
594                 .data           = &max_threads,
595                 .maxlen         = sizeof(int),
596                 .mode           = 0644,
597                 .proc_handler   = proc_dointvec,
598         },
599         {
600                 .procname       = "random",
601                 .mode           = 0555,
602                 .child          = random_table,
603         },
604         {
605                 .procname       = "overflowuid",
606                 .data           = &overflowuid,
607                 .maxlen         = sizeof(int),
608                 .mode           = 0644,
609                 .proc_handler   = proc_dointvec_minmax,
610                 .extra1         = &minolduid,
611                 .extra2         = &maxolduid,
612         },
613         {
614                 .procname       = "overflowgid",
615                 .data           = &overflowgid,
616                 .maxlen         = sizeof(int),
617                 .mode           = 0644,
618                 .proc_handler   = proc_dointvec_minmax,
619                 .extra1         = &minolduid,
620                 .extra2         = &maxolduid,
621         },
622 #ifdef CONFIG_S390
623 #ifdef CONFIG_MATHEMU
624         {
625                 .procname       = "ieee_emulation_warnings",
626                 .data           = &sysctl_ieee_emulation_warnings,
627                 .maxlen         = sizeof(int),
628                 .mode           = 0644,
629                 .proc_handler   = proc_dointvec,
630         },
631 #endif
632         {
633                 .procname       = "userprocess_debug",
634                 .data           = &sysctl_userprocess_debug,
635                 .maxlen         = sizeof(int),
636                 .mode           = 0644,
637                 .proc_handler   = proc_dointvec,
638         },
639 #endif
640         {
641                 .procname       = "pid_max",
642                 .data           = &pid_max,
643                 .maxlen         = sizeof (int),
644                 .mode           = 0644,
645                 .proc_handler   = proc_dointvec_minmax,
646                 .extra1         = &pid_max_min,
647                 .extra2         = &pid_max_max,
648         },
649 #if defined(CONFIG_MODULES) && defined(CONFIG_ENTERPRISE_SUPPORT)
650         {
651                 .ctl_name       = CTL_UNNUMBERED,
652                 .procname       = "unsupported",
653                 .data           = &unsupported,
654                 .maxlen         = sizeof(int),
655                 .mode           = 0644,
656                 .proc_handler   = &proc_dointvec,
657         },
658 #endif
659         {
660                 .procname       = "panic_on_oops",
661                 .data           = &panic_on_oops,
662                 .maxlen         = sizeof(int),
663                 .mode           = 0644,
664                 .proc_handler   = proc_dointvec,
665         },
666 #if defined CONFIG_PRINTK
667         {
668                 .procname       = "printk",
669                 .data           = &console_loglevel,
670                 .maxlen         = 4*sizeof(int),
671                 .mode           = 0644,
672                 .proc_handler   = proc_dointvec,
673         },
674         {
675                 .procname       = "printk_ratelimit",
676                 .data           = &printk_ratelimit_state.interval,
677                 .maxlen         = sizeof(int),
678                 .mode           = 0644,
679                 .proc_handler   = proc_dointvec_jiffies,
680         },
681         {
682                 .procname       = "printk_ratelimit_burst",
683                 .data           = &printk_ratelimit_state.burst,
684                 .maxlen         = sizeof(int),
685                 .mode           = 0644,
686                 .proc_handler   = proc_dointvec,
687         },
688         {
689                 .procname       = "printk_delay",
690                 .data           = &printk_delay_msec,
691                 .maxlen         = sizeof(int),
692                 .mode           = 0644,
693                 .proc_handler   = proc_dointvec_minmax,
694                 .extra1         = &zero,
695                 .extra2         = &ten_thousand,
696         },
697 #endif
698         {
699                 .procname       = "ngroups_max",
700                 .data           = &ngroups_max,
701                 .maxlen         = sizeof (int),
702                 .mode           = 0444,
703                 .proc_handler   = proc_dointvec,
704         },
705 #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
706         {
707                 .procname       = "unknown_nmi_panic",
708                 .data           = &unknown_nmi_panic,
709                 .maxlen         = sizeof (int),
710                 .mode           = 0644,
711                 .proc_handler   = proc_dointvec,
712         },
713         {
714                 .procname       = "nmi_watchdog",
715                 .data           = &nmi_watchdog_enabled,
716                 .maxlen         = sizeof (int),
717                 .mode           = 0644,
718                 .proc_handler   = proc_nmi_enabled,
719         },
720 #endif
721 #if defined(CONFIG_X86)
722         {
723                 .procname       = "panic_on_unrecovered_nmi",
724                 .data           = &panic_on_unrecovered_nmi,
725                 .maxlen         = sizeof(int),
726                 .mode           = 0644,
727                 .proc_handler   = proc_dointvec,
728         },
729         {
730                 .procname       = "panic_on_io_nmi",
731                 .data           = &panic_on_io_nmi,
732                 .maxlen         = sizeof(int),
733                 .mode           = 0644,
734                 .proc_handler   = proc_dointvec,
735         },
736         {
737                 .procname       = "bootloader_type",
738                 .data           = &bootloader_type,
739                 .maxlen         = sizeof (int),
740                 .mode           = 0444,
741                 .proc_handler   = proc_dointvec,
742         },
743         {
744                 .procname       = "bootloader_version",
745                 .data           = &bootloader_version,
746                 .maxlen         = sizeof (int),
747                 .mode           = 0444,
748                 .proc_handler   = proc_dointvec,
749         },
750         {
751                 .procname       = "kstack_depth_to_print",
752                 .data           = &kstack_depth_to_print,
753                 .maxlen         = sizeof(int),
754                 .mode           = 0644,
755                 .proc_handler   = proc_dointvec,
756         },
757         {
758                 .procname       = "io_delay_type",
759                 .data           = &io_delay_type,
760                 .maxlen         = sizeof(int),
761                 .mode           = 0644,
762                 .proc_handler   = proc_dointvec,
763         },
764 #endif
765 #if defined(CONFIG_MMU)
766         {
767                 .procname       = "randomize_va_space",
768                 .data           = &randomize_va_space,
769                 .maxlen         = sizeof(int),
770                 .mode           = 0644,
771                 .proc_handler   = proc_dointvec,
772         },
773 #endif
774         {
775                 .procname       = "suid_dumpable",
776                 .data           = &suid_dumpable,
777                 .maxlen         = sizeof(int),
778                 .mode           = 0644,
779                 .proc_handler   = proc_dointvec,
780         },
781 #if defined(CONFIG_S390) && defined(CONFIG_SMP)
782         {
783                 .procname       = "spin_retry",
784                 .data           = &spin_retry,
785                 .maxlen         = sizeof (int),
786                 .mode           = 0644,
787                 .proc_handler   = proc_dointvec,
788         },
789 #endif
790 #if     defined(CONFIG_ACPI_SLEEP) && defined(CONFIG_X86)
791         {
792                 .procname       = "acpi_video_flags",
793                 .data           = &acpi_realmode_flags,
794                 .maxlen         = sizeof (unsigned long),
795                 .mode           = 0644,
796                 .proc_handler   = proc_doulongvec_minmax,
797         },
798 #endif
799 #ifdef CONFIG_IA64
800         {
801                 .procname       = "ignore-unaligned-usertrap",
802                 .data           = &no_unaligned_warning,
803                 .maxlen         = sizeof (int),
804                 .mode           = 0644,
805                 .proc_handler   = proc_dointvec,
806         },
807         {
808                 .procname       = "unaligned-dump-stack",
809                 .data           = &unaligned_dump_stack,
810                 .maxlen         = sizeof (int),
811                 .mode           = 0644,
812                 .proc_handler   = proc_dointvec,
813         },
814 #endif
815 #ifdef CONFIG_DETECT_SOFTLOCKUP
816         {
817                 .procname       = "softlockup_panic",
818                 .data           = &softlockup_panic,
819                 .maxlen         = sizeof(int),
820                 .mode           = 0644,
821                 .proc_handler   = proc_dointvec_minmax,
822                 .extra1         = &zero,
823                 .extra2         = &one,
824         },
825         {
826                 .procname       = "softlockup_thresh",
827                 .data           = &softlockup_thresh,
828                 .maxlen         = sizeof(int),
829                 .mode           = 0644,
830                 .proc_handler   = proc_dosoftlockup_thresh,
831                 .extra1         = &neg_one,
832                 .extra2         = &sixty,
833         },
834 #endif
835 #ifdef CONFIG_DETECT_HUNG_TASK
836         {
837                 .procname       = "hung_task_panic",
838                 .data           = &sysctl_hung_task_panic,
839                 .maxlen         = sizeof(int),
840                 .mode           = 0644,
841                 .proc_handler   = proc_dointvec_minmax,
842                 .extra1         = &zero,
843                 .extra2         = &one,
844         },
845         {
846                 .procname       = "hung_task_check_count",
847                 .data           = &sysctl_hung_task_check_count,
848                 .maxlen         = sizeof(unsigned long),
849                 .mode           = 0644,
850                 .proc_handler   = proc_doulongvec_minmax,
851         },
852         {
853                 .procname       = "hung_task_timeout_secs",
854                 .data           = &sysctl_hung_task_timeout_secs,
855                 .maxlen         = sizeof(unsigned long),
856                 .mode           = 0644,
857                 .proc_handler   = proc_dohung_task_timeout_secs,
858         },
859         {
860                 .procname       = "hung_task_warnings",
861                 .data           = &sysctl_hung_task_warnings,
862                 .maxlen         = sizeof(unsigned long),
863                 .mode           = 0644,
864                 .proc_handler   = proc_doulongvec_minmax,
865         },
866 #endif
867 #ifdef CONFIG_COMPAT
868         {
869                 .procname       = "compat-log",
870                 .data           = &compat_log,
871                 .maxlen         = sizeof (int),
872                 .mode           = 0644,
873                 .proc_handler   = proc_dointvec,
874         },
875 #endif
876 #ifdef CONFIG_RT_MUTEXES
877         {
878                 .procname       = "max_lock_depth",
879                 .data           = &max_lock_depth,
880                 .maxlen         = sizeof(int),
881                 .mode           = 0644,
882                 .proc_handler   = proc_dointvec,
883         },
884 #endif
885         {
886                 .procname       = "poweroff_cmd",
887                 .data           = &poweroff_cmd,
888                 .maxlen         = POWEROFF_CMD_PATH_LEN,
889                 .mode           = 0644,
890                 .proc_handler   = proc_dostring,
891         },
892 #ifdef CONFIG_KEYS
893         {
894                 .procname       = "keys",
895                 .mode           = 0555,
896                 .child          = key_sysctls,
897         },
898 #endif
899 #ifdef CONFIG_RCU_TORTURE_TEST
900         {
901                 .procname       = "rcutorture_runnable",
902                 .data           = &rcutorture_runnable,
903                 .maxlen         = sizeof(int),
904                 .mode           = 0644,
905                 .proc_handler   = proc_dointvec,
906         },
907 #endif
908 #ifdef CONFIG_SLOW_WORK
909         {
910                 .procname       = "slow-work",
911                 .mode           = 0555,
912                 .child          = slow_work_sysctls,
913         },
914 #endif
915 #ifdef CONFIG_PERF_EVENTS
916         {
917                 .procname       = "perf_event_paranoid",
918                 .data           = &sysctl_perf_event_paranoid,
919                 .maxlen         = sizeof(sysctl_perf_event_paranoid),
920                 .mode           = 0644,
921                 .proc_handler   = proc_dointvec,
922         },
923         {
924                 .procname       = "perf_event_mlock_kb",
925                 .data           = &sysctl_perf_event_mlock,
926                 .maxlen         = sizeof(sysctl_perf_event_mlock),
927                 .mode           = 0644,
928                 .proc_handler   = proc_dointvec,
929         },
930         {
931                 .procname       = "perf_event_max_sample_rate",
932                 .data           = &sysctl_perf_event_sample_rate,
933                 .maxlen         = sizeof(sysctl_perf_event_sample_rate),
934                 .mode           = 0644,
935                 .proc_handler   = proc_dointvec,
936         },
937 #endif
938 #ifdef CONFIG_KMEMCHECK
939         {
940                 .procname       = "kmemcheck",
941                 .data           = &kmemcheck_enabled,
942                 .maxlen         = sizeof(int),
943                 .mode           = 0644,
944                 .proc_handler   = proc_dointvec,
945         },
946 #endif
947 #ifdef CONFIG_BLOCK
948         {
949                 .procname       = "blk_iopoll",
950                 .data           = &blk_iopoll_enabled,
951                 .maxlen         = sizeof(int),
952                 .mode           = 0644,
953                 .proc_handler   = proc_dointvec,
954         },
955 #endif
956 /*
957  * NOTE: do not add new entries to this table unless you have read
958  * Documentation/sysctl/ctl_unnumbered.txt
959  */
960         { }
961 };
962
963 static struct ctl_table vm_table[] = {
964         {
965                 .procname       = "overcommit_memory",
966                 .data           = &sysctl_overcommit_memory,
967                 .maxlen         = sizeof(sysctl_overcommit_memory),
968                 .mode           = 0644,
969                 .proc_handler   = proc_dointvec,
970         },
971         {
972                 .procname       = "panic_on_oom",
973                 .data           = &sysctl_panic_on_oom,
974                 .maxlen         = sizeof(sysctl_panic_on_oom),
975                 .mode           = 0644,
976                 .proc_handler   = proc_dointvec,
977         },
978         {
979                 .procname       = "oom_kill_allocating_task",
980                 .data           = &sysctl_oom_kill_allocating_task,
981                 .maxlen         = sizeof(sysctl_oom_kill_allocating_task),
982                 .mode           = 0644,
983                 .proc_handler   = proc_dointvec,
984         },
985         {
986                 .procname       = "oom_dump_tasks",
987                 .data           = &sysctl_oom_dump_tasks,
988                 .maxlen         = sizeof(sysctl_oom_dump_tasks),
989                 .mode           = 0644,
990                 .proc_handler   = proc_dointvec,
991         },
992         {
993                 .procname       = "overcommit_ratio",
994                 .data           = &sysctl_overcommit_ratio,
995                 .maxlen         = sizeof(sysctl_overcommit_ratio),
996                 .mode           = 0644,
997                 .proc_handler   = proc_dointvec,
998         },
999         {
1000                 .procname       = "page-cluster", 
1001                 .data           = &page_cluster,
1002                 .maxlen         = sizeof(int),
1003                 .mode           = 0644,
1004                 .proc_handler   = proc_dointvec,
1005         },
1006         {
1007                 .procname       = "dirty_background_ratio",
1008                 .data           = &dirty_background_ratio,
1009                 .maxlen         = sizeof(dirty_background_ratio),
1010                 .mode           = 0644,
1011                 .proc_handler   = dirty_background_ratio_handler,
1012                 .extra1         = &zero,
1013                 .extra2         = &one_hundred,
1014         },
1015         {
1016                 .procname       = "dirty_background_bytes",
1017                 .data           = &dirty_background_bytes,
1018                 .maxlen         = sizeof(dirty_background_bytes),
1019                 .mode           = 0644,
1020                 .proc_handler   = dirty_background_bytes_handler,
1021                 .extra1         = &one_ul,
1022         },
1023         {
1024                 .procname       = "dirty_ratio",
1025                 .data           = &vm_dirty_ratio,
1026                 .maxlen         = sizeof(vm_dirty_ratio),
1027                 .mode           = 0644,
1028                 .proc_handler   = dirty_ratio_handler,
1029                 .extra1         = &zero,
1030                 .extra2         = &one_hundred,
1031         },
1032         {
1033                 .procname       = "dirty_bytes",
1034                 .data           = &vm_dirty_bytes,
1035                 .maxlen         = sizeof(vm_dirty_bytes),
1036                 .mode           = 0644,
1037                 .proc_handler   = dirty_bytes_handler,
1038                 .extra1         = &dirty_bytes_min,
1039         },
1040         {
1041                 .procname       = "dirty_writeback_centisecs",
1042                 .data           = &dirty_writeback_interval,
1043                 .maxlen         = sizeof(dirty_writeback_interval),
1044                 .mode           = 0644,
1045                 .proc_handler   = dirty_writeback_centisecs_handler,
1046         },
1047         {
1048                 .procname       = "dirty_expire_centisecs",
1049                 .data           = &dirty_expire_interval,
1050                 .maxlen         = sizeof(dirty_expire_interval),
1051                 .mode           = 0644,
1052                 .proc_handler   = proc_dointvec,
1053         },
1054         {
1055                 .procname       = "nr_pdflush_threads",
1056                 .data           = &nr_pdflush_threads,
1057                 .maxlen         = sizeof nr_pdflush_threads,
1058                 .mode           = 0444 /* read-only*/,
1059                 .proc_handler   = proc_dointvec,
1060         },
1061         {
1062                 .procname       = "swappiness",
1063                 .data           = &vm_swappiness,
1064                 .maxlen         = sizeof(vm_swappiness),
1065                 .mode           = 0644,
1066                 .proc_handler   = proc_dointvec_minmax,
1067                 .extra1         = &zero,
1068                 .extra2         = &one_hundred,
1069         },
1070 #ifdef CONFIG_HUGETLB_PAGE
1071         {
1072                 .procname       = "nr_hugepages",
1073                 .data           = NULL,
1074                 .maxlen         = sizeof(unsigned long),
1075                 .mode           = 0644,
1076                 .proc_handler   = hugetlb_sysctl_handler,
1077                 .extra1         = (void *)&hugetlb_zero,
1078                 .extra2         = (void *)&hugetlb_infinity,
1079         },
1080 #ifdef CONFIG_NUMA
1081         {
1082                 .procname       = "nr_hugepages_mempolicy",
1083                 .data           = NULL,
1084                 .maxlen         = sizeof(unsigned long),
1085                 .mode           = 0644,
1086                 .proc_handler   = &hugetlb_mempolicy_sysctl_handler,
1087                 .extra1         = (void *)&hugetlb_zero,
1088                 .extra2         = (void *)&hugetlb_infinity,
1089         },
1090 #endif
1091          {
1092                 .procname       = "hugetlb_shm_group",
1093                 .data           = &sysctl_hugetlb_shm_group,
1094                 .maxlen         = sizeof(gid_t),
1095                 .mode           = 0644,
1096                 .proc_handler   = proc_dointvec,
1097          },
1098          {
1099                 .procname       = "hugepages_treat_as_movable",
1100                 .data           = &hugepages_treat_as_movable,
1101                 .maxlen         = sizeof(int),
1102                 .mode           = 0644,
1103                 .proc_handler   = hugetlb_treat_movable_handler,
1104         },
1105         {
1106                 .procname       = "nr_overcommit_hugepages",
1107                 .data           = NULL,
1108                 .maxlen         = sizeof(unsigned long),
1109                 .mode           = 0644,
1110                 .proc_handler   = hugetlb_overcommit_handler,
1111                 .extra1         = (void *)&hugetlb_zero,
1112                 .extra2         = (void *)&hugetlb_infinity,
1113         },
1114 #endif
1115         {
1116                 .procname       = "lowmem_reserve_ratio",
1117                 .data           = &sysctl_lowmem_reserve_ratio,
1118                 .maxlen         = sizeof(sysctl_lowmem_reserve_ratio),
1119                 .mode           = 0644,
1120                 .proc_handler   = lowmem_reserve_ratio_sysctl_handler,
1121         },
1122         {
1123                 .procname       = "drop_caches",
1124                 .data           = &sysctl_drop_caches,
1125                 .maxlen         = sizeof(int),
1126                 .mode           = 0644,
1127                 .proc_handler   = drop_caches_sysctl_handler,
1128         },
1129         {
1130                 .procname       = "min_free_kbytes",
1131                 .data           = &min_free_kbytes,
1132                 .maxlen         = sizeof(min_free_kbytes),
1133                 .mode           = 0644,
1134                 .proc_handler   = min_free_kbytes_sysctl_handler,
1135                 .extra1         = &zero,
1136         },
1137         {
1138                 .procname       = "percpu_pagelist_fraction",
1139                 .data           = &percpu_pagelist_fraction,
1140                 .maxlen         = sizeof(percpu_pagelist_fraction),
1141                 .mode           = 0644,
1142                 .proc_handler   = percpu_pagelist_fraction_sysctl_handler,
1143                 .extra1         = &min_percpu_pagelist_fract,
1144         },
1145 #ifdef CONFIG_MMU
1146         {
1147                 .procname       = "max_map_count",
1148                 .data           = &sysctl_max_map_count,
1149                 .maxlen         = sizeof(sysctl_max_map_count),
1150                 .mode           = 0644,
1151                 .proc_handler   = proc_dointvec_minmax,
1152                 .extra1         = &zero,
1153         },
1154 #else
1155         {
1156                 .procname       = "nr_trim_pages",
1157                 .data           = &sysctl_nr_trim_pages,
1158                 .maxlen         = sizeof(sysctl_nr_trim_pages),
1159                 .mode           = 0644,
1160                 .proc_handler   = proc_dointvec_minmax,
1161                 .extra1         = &zero,
1162         },
1163 #endif
1164         {
1165                 .procname       = "laptop_mode",
1166                 .data           = &laptop_mode,
1167                 .maxlen         = sizeof(laptop_mode),
1168                 .mode           = 0644,
1169                 .proc_handler   = proc_dointvec_jiffies,
1170         },
1171         {
1172                 .procname       = "block_dump",
1173                 .data           = &block_dump,
1174                 .maxlen         = sizeof(block_dump),
1175                 .mode           = 0644,
1176                 .proc_handler   = proc_dointvec,
1177                 .extra1         = &zero,
1178         },
1179         {
1180                 .procname       = "vfs_cache_pressure",
1181                 .data           = &sysctl_vfs_cache_pressure,
1182                 .maxlen         = sizeof(sysctl_vfs_cache_pressure),
1183                 .mode           = 0644,
1184                 .proc_handler   = proc_dointvec,
1185                 .extra1         = &zero,
1186         },
1187 #ifdef HAVE_ARCH_PICK_MMAP_LAYOUT
1188         {
1189                 .procname       = "legacy_va_layout",
1190                 .data           = &sysctl_legacy_va_layout,
1191                 .maxlen         = sizeof(sysctl_legacy_va_layout),
1192                 .mode           = 0644,
1193                 .proc_handler   = proc_dointvec,
1194                 .extra1         = &zero,
1195         },
1196 #endif
1197 #ifdef CONFIG_NUMA
1198         {
1199                 .procname       = "zone_reclaim_mode",
1200                 .data           = &zone_reclaim_mode,
1201                 .maxlen         = sizeof(zone_reclaim_mode),
1202                 .mode           = 0644,
1203                 .proc_handler   = proc_dointvec,
1204                 .extra1         = &zero,
1205         },
1206         {
1207                 .procname       = "min_unmapped_ratio",
1208                 .data           = &sysctl_min_unmapped_ratio,
1209                 .maxlen         = sizeof(sysctl_min_unmapped_ratio),
1210                 .mode           = 0644,
1211                 .proc_handler   = sysctl_min_unmapped_ratio_sysctl_handler,
1212                 .extra1         = &zero,
1213                 .extra2         = &one_hundred,
1214         },
1215         {
1216                 .procname       = "min_slab_ratio",
1217                 .data           = &sysctl_min_slab_ratio,
1218                 .maxlen         = sizeof(sysctl_min_slab_ratio),
1219                 .mode           = 0644,
1220                 .proc_handler   = sysctl_min_slab_ratio_sysctl_handler,
1221                 .extra1         = &zero,
1222                 .extra2         = &one_hundred,
1223         },
1224 #endif
1225 #ifdef CONFIG_SMP
1226         {
1227                 .procname       = "stat_interval",
1228                 .data           = &sysctl_stat_interval,
1229                 .maxlen         = sizeof(sysctl_stat_interval),
1230                 .mode           = 0644,
1231                 .proc_handler   = proc_dointvec_jiffies,
1232         },
1233 #endif
1234 #ifdef CONFIG_MMU
1235         {
1236                 .procname       = "mmap_min_addr",
1237                 .data           = &dac_mmap_min_addr,
1238                 .maxlen         = sizeof(unsigned long),
1239                 .mode           = 0644,
1240                 .proc_handler   = mmap_min_addr_handler,
1241         },
1242 #endif
1243 #ifdef CONFIG_NUMA
1244         {
1245                 .procname       = "numa_zonelist_order",
1246                 .data           = &numa_zonelist_order,
1247                 .maxlen         = NUMA_ZONELIST_ORDER_LEN,
1248                 .mode           = 0644,
1249                 .proc_handler   = numa_zonelist_order_handler,
1250         },
1251 #endif
1252 #if (defined(CONFIG_X86_32) && !defined(CONFIG_UML))|| \
1253    (defined(CONFIG_SUPERH) && defined(CONFIG_VSYSCALL))
1254         {
1255                 .procname       = "vdso_enabled",
1256                 .data           = &vdso_enabled,
1257                 .maxlen         = sizeof(vdso_enabled),
1258                 .mode           = 0644,
1259                 .proc_handler   = proc_dointvec,
1260                 .extra1         = &zero,
1261         },
1262 #endif
1263 #ifdef CONFIG_HIGHMEM
1264         {
1265                 .procname       = "highmem_is_dirtyable",
1266                 .data           = &vm_highmem_is_dirtyable,
1267                 .maxlen         = sizeof(vm_highmem_is_dirtyable),
1268                 .mode           = 0644,
1269                 .proc_handler   = proc_dointvec_minmax,
1270                 .extra1         = &zero,
1271                 .extra2         = &one,
1272         },
1273 #endif
1274         {
1275                 .procname       = "scan_unevictable_pages",
1276                 .data           = &scan_unevictable_pages,
1277                 .maxlen         = sizeof(scan_unevictable_pages),
1278                 .mode           = 0644,
1279                 .proc_handler   = scan_unevictable_handler,
1280         },
1281         {
1282                 .procname       = "heap-stack-gap",
1283                 .data           = &heap_stack_gap,
1284                 .maxlen         = sizeof(int),
1285                 .mode           = 0644,
1286                 .proc_handler   = proc_dointvec,
1287         },
1288 #ifdef CONFIG_MEMORY_FAILURE
1289         {
1290                 .procname       = "memory_failure_early_kill",
1291                 .data           = &sysctl_memory_failure_early_kill,
1292                 .maxlen         = sizeof(sysctl_memory_failure_early_kill),
1293                 .mode           = 0644,
1294                 .proc_handler   = proc_dointvec_minmax,
1295                 .extra1         = &zero,
1296                 .extra2         = &one,
1297         },
1298         {
1299                 .procname       = "memory_failure_recovery",
1300                 .data           = &sysctl_memory_failure_recovery,
1301                 .maxlen         = sizeof(sysctl_memory_failure_recovery),
1302                 .mode           = 0644,
1303                 .proc_handler   = proc_dointvec_minmax,
1304                 .extra1         = &zero,
1305                 .extra2         = &one,
1306         },
1307 #endif
1308
1309 /*
1310  * NOTE: do not add new entries to this table unless you have read
1311  * Documentation/sysctl/ctl_unnumbered.txt
1312  */
1313         { }
1314 };
1315
1316 #if defined(CONFIG_BINFMT_MISC) || defined(CONFIG_BINFMT_MISC_MODULE)
1317 static struct ctl_table binfmt_misc_table[] = {
1318         { }
1319 };
1320 #endif
1321
1322 static struct ctl_table fs_table[] = {
1323         {
1324                 .procname       = "inode-nr",
1325                 .data           = &inodes_stat,
1326                 .maxlen         = 2*sizeof(int),
1327                 .mode           = 0444,
1328                 .proc_handler   = proc_dointvec,
1329         },
1330         {
1331                 .procname       = "inode-state",
1332                 .data           = &inodes_stat,
1333                 .maxlen         = 7*sizeof(int),
1334                 .mode           = 0444,
1335                 .proc_handler   = proc_dointvec,
1336         },
1337         {
1338                 .procname       = "file-nr",
1339                 .data           = &files_stat,
1340                 .maxlen         = 3*sizeof(int),
1341                 .mode           = 0444,
1342                 .proc_handler   = proc_nr_files,
1343         },
1344         {
1345                 .procname       = "file-max",
1346                 .data           = &files_stat.max_files,
1347                 .maxlen         = sizeof(int),
1348                 .mode           = 0644,
1349                 .proc_handler   = proc_dointvec,
1350         },
1351         {
1352                 .procname       = "nr_open",
1353                 .data           = &sysctl_nr_open,
1354                 .maxlen         = sizeof(int),
1355                 .mode           = 0644,
1356                 .proc_handler   = proc_dointvec_minmax,
1357                 .extra1         = &sysctl_nr_open_min,
1358                 .extra2         = &sysctl_nr_open_max,
1359         },
1360         {
1361                 .procname       = "dentry-state",
1362                 .data           = &dentry_stat,
1363                 .maxlen         = 6*sizeof(int),
1364                 .mode           = 0444,
1365                 .proc_handler   = proc_dointvec,
1366         },
1367         {
1368                 .procname       = "overflowuid",
1369                 .data           = &fs_overflowuid,
1370                 .maxlen         = sizeof(int),
1371                 .mode           = 0644,
1372                 .proc_handler   = proc_dointvec_minmax,
1373                 .extra1         = &minolduid,
1374                 .extra2         = &maxolduid,
1375         },
1376         {
1377                 .procname       = "overflowgid",
1378                 .data           = &fs_overflowgid,
1379                 .maxlen         = sizeof(int),
1380                 .mode           = 0644,
1381                 .proc_handler   = proc_dointvec_minmax,
1382                 .extra1         = &minolduid,
1383                 .extra2         = &maxolduid,
1384         },
1385 #ifdef CONFIG_FILE_LOCKING
1386         {
1387                 .procname       = "leases-enable",
1388                 .data           = &leases_enable,
1389                 .maxlen         = sizeof(int),
1390                 .mode           = 0644,
1391                 .proc_handler   = proc_dointvec,
1392         },
1393 #endif
1394 #ifdef CONFIG_DNOTIFY
1395         {
1396                 .procname       = "dir-notify-enable",
1397                 .data           = &dir_notify_enable,
1398                 .maxlen         = sizeof(int),
1399                 .mode           = 0644,
1400                 .proc_handler   = proc_dointvec,
1401         },
1402 #endif
1403 #ifdef CONFIG_MMU
1404 #ifdef CONFIG_FILE_LOCKING
1405         {
1406                 .procname       = "lease-break-time",
1407                 .data           = &lease_break_time,
1408                 .maxlen         = sizeof(int),
1409                 .mode           = 0644,
1410                 .proc_handler   = proc_dointvec,
1411         },
1412 #endif
1413 #ifdef CONFIG_AIO
1414         {
1415                 .procname       = "aio-nr",
1416                 .data           = &aio_nr,
1417                 .maxlen         = sizeof(aio_nr),
1418                 .mode           = 0444,
1419                 .proc_handler   = proc_doulongvec_minmax,
1420         },
1421         {
1422                 .procname       = "aio-max-nr",
1423                 .data           = &aio_max_nr,
1424                 .maxlen         = sizeof(aio_max_nr),
1425                 .mode           = 0644,
1426                 .proc_handler   = proc_doulongvec_minmax,
1427         },
1428 #endif /* CONFIG_AIO */
1429 #ifdef CONFIG_INOTIFY_USER
1430         {
1431                 .procname       = "inotify",
1432                 .mode           = 0555,
1433                 .child          = inotify_table,
1434         },
1435 #endif  
1436 #ifdef CONFIG_EPOLL
1437         {
1438                 .procname       = "epoll",
1439                 .mode           = 0555,
1440                 .child          = epoll_table,
1441         },
1442 #endif
1443 #endif
1444         {
1445                 .procname       = "suid_dumpable",
1446                 .data           = &suid_dumpable,
1447                 .maxlen         = sizeof(int),
1448                 .mode           = 0644,
1449                 .proc_handler   = proc_dointvec_minmax,
1450                 .extra1         = &zero,
1451                 .extra2         = &two,
1452         },
1453 #if defined(CONFIG_BINFMT_MISC) || defined(CONFIG_BINFMT_MISC_MODULE)
1454         {
1455                 .procname       = "binfmt_misc",
1456                 .mode           = 0555,
1457                 .child          = binfmt_misc_table,
1458         },
1459 #endif
1460 /*
1461  * NOTE: do not add new entries to this table unless you have read
1462  * Documentation/sysctl/ctl_unnumbered.txt
1463  */
1464         { }
1465 };
1466
1467 static struct ctl_table debug_table[] = {
1468 #if defined(CONFIG_X86) || defined(CONFIG_PPC)
1469         {
1470                 .procname       = "exception-trace",
1471                 .data           = &show_unhandled_signals,
1472                 .maxlen         = sizeof(int),
1473                 .mode           = 0644,
1474                 .proc_handler   = proc_dointvec
1475         },
1476 #endif
1477         { }
1478 };
1479
1480 static struct ctl_table dev_table[] = {
1481         { }
1482 };
1483
1484 static DEFINE_SPINLOCK(sysctl_lock);
1485
1486 /* called under sysctl_lock */
1487 static int use_table(struct ctl_table_header *p)
1488 {
1489         if (unlikely(p->unregistering))
1490                 return 0;
1491         p->used++;
1492         return 1;
1493 }
1494
1495 /* called under sysctl_lock */
1496 static void unuse_table(struct ctl_table_header *p)
1497 {
1498         if (!--p->used)
1499                 if (unlikely(p->unregistering))
1500                         complete(p->unregistering);
1501 }
1502
1503 /* called under sysctl_lock, will reacquire if has to wait */
1504 static void start_unregistering(struct ctl_table_header *p)
1505 {
1506         /*
1507          * if p->used is 0, nobody will ever touch that entry again;
1508          * we'll eliminate all paths to it before dropping sysctl_lock
1509          */
1510         if (unlikely(p->used)) {
1511                 struct completion wait;
1512                 init_completion(&wait);
1513                 p->unregistering = &wait;
1514                 spin_unlock(&sysctl_lock);
1515                 wait_for_completion(&wait);
1516                 spin_lock(&sysctl_lock);
1517         } else {
1518                 /* anything non-NULL; we'll never dereference it */
1519                 p->unregistering = ERR_PTR(-EINVAL);
1520         }
1521         /*
1522          * do not remove from the list until nobody holds it; walking the
1523          * list in do_sysctl() relies on that.
1524          */
1525         list_del_init(&p->ctl_entry);
1526 }
1527
1528 void sysctl_head_get(struct ctl_table_header *head)
1529 {
1530         spin_lock(&sysctl_lock);
1531         head->count++;
1532         spin_unlock(&sysctl_lock);
1533 }
1534
1535 void sysctl_head_put(struct ctl_table_header *head)
1536 {
1537         spin_lock(&sysctl_lock);
1538         if (!--head->count)
1539                 kfree(head);
1540         spin_unlock(&sysctl_lock);
1541 }
1542
1543 struct ctl_table_header *sysctl_head_grab(struct ctl_table_header *head)
1544 {
1545         if (!head)
1546                 BUG();
1547         spin_lock(&sysctl_lock);
1548         if (!use_table(head))
1549                 head = ERR_PTR(-ENOENT);
1550         spin_unlock(&sysctl_lock);
1551         return head;
1552 }
1553
1554 void sysctl_head_finish(struct ctl_table_header *head)
1555 {
1556         if (!head)
1557                 return;
1558         spin_lock(&sysctl_lock);
1559         unuse_table(head);
1560         spin_unlock(&sysctl_lock);
1561 }
1562
1563 static struct ctl_table_set *
1564 lookup_header_set(struct ctl_table_root *root, struct nsproxy *namespaces)
1565 {
1566         struct ctl_table_set *set = &root->default_set;
1567         if (root->lookup)
1568                 set = root->lookup(root, namespaces);
1569         return set;
1570 }
1571
1572 static struct list_head *
1573 lookup_header_list(struct ctl_table_root *root, struct nsproxy *namespaces)
1574 {
1575         struct ctl_table_set *set = lookup_header_set(root, namespaces);
1576         return &set->list;
1577 }
1578
1579 struct ctl_table_header *__sysctl_head_next(struct nsproxy *namespaces,
1580                                             struct ctl_table_header *prev)
1581 {
1582         struct ctl_table_root *root;
1583         struct list_head *header_list;
1584         struct ctl_table_header *head;
1585         struct list_head *tmp;
1586
1587         spin_lock(&sysctl_lock);
1588         if (prev) {
1589                 head = prev;
1590                 tmp = &prev->ctl_entry;
1591                 unuse_table(prev);
1592                 goto next;
1593         }
1594         tmp = &root_table_header.ctl_entry;
1595         for (;;) {
1596                 head = list_entry(tmp, struct ctl_table_header, ctl_entry);
1597
1598                 if (!use_table(head))
1599                         goto next;
1600                 spin_unlock(&sysctl_lock);
1601                 return head;
1602         next:
1603                 root = head->root;
1604                 tmp = tmp->next;
1605                 header_list = lookup_header_list(root, namespaces);
1606                 if (tmp != header_list)
1607                         continue;
1608
1609                 do {
1610                         root = list_entry(root->root_list.next,
1611                                         struct ctl_table_root, root_list);
1612                         if (root == &sysctl_table_root)
1613                                 goto out;
1614                         header_list = lookup_header_list(root, namespaces);
1615                 } while (list_empty(header_list));
1616                 tmp = header_list->next;
1617         }
1618 out:
1619         spin_unlock(&sysctl_lock);
1620         return NULL;
1621 }
1622
1623 struct ctl_table_header *sysctl_head_next(struct ctl_table_header *prev)
1624 {
1625         return __sysctl_head_next(current->nsproxy, prev);
1626 }
1627
1628 void register_sysctl_root(struct ctl_table_root *root)
1629 {
1630         spin_lock(&sysctl_lock);
1631         list_add_tail(&root->root_list, &sysctl_table_root.root_list);
1632         spin_unlock(&sysctl_lock);
1633 }
1634
1635 /*
1636  * sysctl_perm does NOT grant the superuser all rights automatically, because
1637  * some sysctl variables are readonly even to root.
1638  */
1639
1640 static int test_perm(int mode, int op)
1641 {
1642         if (!current_euid())
1643                 mode >>= 6;
1644         else if (in_egroup_p(0))
1645                 mode >>= 3;
1646         if ((op & ~mode & (MAY_READ|MAY_WRITE|MAY_EXEC)) == 0)
1647                 return 0;
1648         return -EACCES;
1649 }
1650
1651 int sysctl_perm(struct ctl_table_root *root, struct ctl_table *table, int op)
1652 {
1653         int error;
1654         int mode;
1655
1656         error = security_sysctl(table, op & (MAY_READ | MAY_WRITE | MAY_EXEC));
1657         if (error)
1658                 return error;
1659
1660         if (root->permissions)
1661                 mode = root->permissions(root, current->nsproxy, table);
1662         else
1663                 mode = table->mode;
1664
1665         return test_perm(mode, op);
1666 }
1667
1668 static void sysctl_set_parent(struct ctl_table *parent, struct ctl_table *table)
1669 {
1670         for (; table->procname; table++) {
1671                 table->parent = parent;
1672                 if (table->child)
1673                         sysctl_set_parent(table, table->child);
1674         }
1675 }
1676
1677 static __init int sysctl_init(void)
1678 {
1679         sysctl_set_parent(NULL, root_table);
1680 #ifdef CONFIG_SYSCTL_SYSCALL_CHECK
1681         {
1682                 int err;
1683                 err = sysctl_check_table(current->nsproxy, root_table);
1684         }
1685 #endif
1686         return 0;
1687 }
1688
1689 core_initcall(sysctl_init);
1690
1691 static struct ctl_table *is_branch_in(struct ctl_table *branch,
1692                                       struct ctl_table *table)
1693 {
1694         struct ctl_table *p;
1695         const char *s = branch->procname;
1696
1697         /* branch should have named subdirectory as its first element */
1698         if (!s || !branch->child)
1699                 return NULL;
1700
1701         /* ... and nothing else */
1702         if (branch[1].procname)
1703                 return NULL;
1704
1705         /* table should contain subdirectory with the same name */
1706         for (p = table; p->procname; p++) {
1707                 if (!p->child)
1708                         continue;
1709                 if (p->procname && strcmp(p->procname, s) == 0)
1710                         return p;
1711         }
1712         return NULL;
1713 }
1714
1715 /* see if attaching q to p would be an improvement */
1716 static void try_attach(struct ctl_table_header *p, struct ctl_table_header *q)
1717 {
1718         struct ctl_table *to = p->ctl_table, *by = q->ctl_table;
1719         struct ctl_table *next;
1720         int is_better = 0;
1721         int not_in_parent = !p->attached_by;
1722
1723         while ((next = is_branch_in(by, to)) != NULL) {
1724                 if (by == q->attached_by)
1725                         is_better = 1;
1726                 if (to == p->attached_by)
1727                         not_in_parent = 1;
1728                 by = by->child;
1729                 to = next->child;
1730         }
1731
1732         if (is_better && not_in_parent) {
1733                 q->attached_by = by;
1734                 q->attached_to = to;
1735                 q->parent = p;
1736         }
1737 }
1738
1739 /**
1740  * __register_sysctl_paths - register a sysctl hierarchy
1741  * @root: List of sysctl headers to register on
1742  * @namespaces: Data to compute which lists of sysctl entries are visible
1743  * @path: The path to the directory the sysctl table is in.
1744  * @table: the top-level table structure
1745  *
1746  * Register a sysctl table hierarchy. @table should be a filled in ctl_table
1747  * array. A completely 0 filled entry terminates the table.
1748  *
1749  * The members of the &struct ctl_table structure are used as follows:
1750  *
1751  * procname - the name of the sysctl file under /proc/sys. Set to %NULL to not
1752  *            enter a sysctl file
1753  *
1754  * data - a pointer to data for use by proc_handler
1755  *
1756  * maxlen - the maximum size in bytes of the data
1757  *
1758  * mode - the file permissions for the /proc/sys file, and for sysctl(2)
1759  *
1760  * child - a pointer to the child sysctl table if this entry is a directory, or
1761  *         %NULL.
1762  *
1763  * proc_handler - the text handler routine (described below)
1764  *
1765  * de - for internal use by the sysctl routines
1766  *
1767  * extra1, extra2 - extra pointers usable by the proc handler routines
1768  *
1769  * Leaf nodes in the sysctl tree will be represented by a single file
1770  * under /proc; non-leaf nodes will be represented by directories.
1771  *
1772  * sysctl(2) can automatically manage read and write requests through
1773  * the sysctl table.  The data and maxlen fields of the ctl_table
1774  * struct enable minimal validation of the values being written to be
1775  * performed, and the mode field allows minimal authentication.
1776  *
1777  * There must be a proc_handler routine for any terminal nodes
1778  * mirrored under /proc/sys (non-terminals are handled by a built-in
1779  * directory handler).  Several default handlers are available to
1780  * cover common cases -
1781  *
1782  * proc_dostring(), proc_dointvec(), proc_dointvec_jiffies(),
1783  * proc_dointvec_userhz_jiffies(), proc_dointvec_minmax(), 
1784  * proc_doulongvec_ms_jiffies_minmax(), proc_doulongvec_minmax()
1785  *
1786  * It is the handler's job to read the input buffer from user memory
1787  * and process it. The handler should return 0 on success.
1788  *
1789  * This routine returns %NULL on a failure to register, and a pointer
1790  * to the table header on success.
1791  */
1792 struct ctl_table_header *__register_sysctl_paths(
1793         struct ctl_table_root *root,
1794         struct nsproxy *namespaces,
1795         const struct ctl_path *path, struct ctl_table *table)
1796 {
1797         struct ctl_table_header *header;
1798         struct ctl_table *new, **prevp;
1799         unsigned int n, npath;
1800         struct ctl_table_set *set;
1801
1802         /* Count the path components */
1803         for (npath = 0; path[npath].procname; ++npath)
1804                 ;
1805
1806         /*
1807          * For each path component, allocate a 2-element ctl_table array.
1808          * The first array element will be filled with the sysctl entry
1809          * for this, the second will be the sentinel (procname == 0).
1810          *
1811          * We allocate everything in one go so that we don't have to
1812          * worry about freeing additional memory in unregister_sysctl_table.
1813          */
1814         header = kzalloc(sizeof(struct ctl_table_header) +
1815                          (2 * npath * sizeof(struct ctl_table)), GFP_KERNEL);
1816         if (!header)
1817                 return NULL;
1818
1819         new = (struct ctl_table *) (header + 1);
1820
1821         /* Now connect the dots */
1822         prevp = &header->ctl_table;
1823         for (n = 0; n < npath; ++n, ++path) {
1824                 /* Copy the procname */
1825                 new->procname = path->procname;
1826                 new->mode     = 0555;
1827
1828                 *prevp = new;
1829                 prevp = &new->child;
1830
1831                 new += 2;
1832         }
1833         *prevp = table;
1834         header->ctl_table_arg = table;
1835
1836         INIT_LIST_HEAD(&header->ctl_entry);
1837         header->used = 0;
1838         header->unregistering = NULL;
1839         header->root = root;
1840         sysctl_set_parent(NULL, header->ctl_table);
1841         header->count = 1;
1842 #ifdef CONFIG_SYSCTL_SYSCALL_CHECK
1843         if (sysctl_check_table(namespaces, header->ctl_table)) {
1844                 kfree(header);
1845                 return NULL;
1846         }
1847 #endif
1848         spin_lock(&sysctl_lock);
1849         header->set = lookup_header_set(root, namespaces);
1850         header->attached_by = header->ctl_table;
1851         header->attached_to = root_table;
1852         header->parent = &root_table_header;
1853         for (set = header->set; set; set = set->parent) {
1854                 struct ctl_table_header *p;
1855                 list_for_each_entry(p, &set->list, ctl_entry) {
1856                         if (p->unregistering)
1857                                 continue;
1858                         try_attach(p, header);
1859                 }
1860         }
1861         header->parent->count++;
1862         list_add_tail(&header->ctl_entry, &header->set->list);
1863         spin_unlock(&sysctl_lock);
1864
1865         return header;
1866 }
1867
1868 /**
1869  * register_sysctl_table_path - register a sysctl table hierarchy
1870  * @path: The path to the directory the sysctl table is in.
1871  * @table: the top-level table structure
1872  *
1873  * Register a sysctl table hierarchy. @table should be a filled in ctl_table
1874  * array. A completely 0 filled entry terminates the table.
1875  *
1876  * See __register_sysctl_paths for more details.
1877  */
1878 struct ctl_table_header *register_sysctl_paths(const struct ctl_path *path,
1879                                                 struct ctl_table *table)
1880 {
1881         return __register_sysctl_paths(&sysctl_table_root, current->nsproxy,
1882                                         path, table);
1883 }
1884
1885 /**
1886  * register_sysctl_table - register a sysctl table hierarchy
1887  * @table: the top-level table structure
1888  *
1889  * Register a sysctl table hierarchy. @table should be a filled in ctl_table
1890  * array. A completely 0 filled entry terminates the table.
1891  *
1892  * See register_sysctl_paths for more details.
1893  */
1894 struct ctl_table_header *register_sysctl_table(struct ctl_table *table)
1895 {
1896         static const struct ctl_path null_path[] = { {} };
1897
1898         return register_sysctl_paths(null_path, table);
1899 }
1900
1901 /**
1902  * unregister_sysctl_table - unregister a sysctl table hierarchy
1903  * @header: the header returned from register_sysctl_table
1904  *
1905  * Unregisters the sysctl table and all children. proc entries may not
1906  * actually be removed until they are no longer used by anyone.
1907  */
1908 void unregister_sysctl_table(struct ctl_table_header * header)
1909 {
1910         might_sleep();
1911
1912         if (header == NULL)
1913                 return;
1914
1915         spin_lock(&sysctl_lock);
1916         start_unregistering(header);
1917         if (!--header->parent->count) {
1918                 WARN_ON(1);
1919                 kfree(header->parent);
1920         }
1921         if (!--header->count)
1922                 kfree(header);
1923         spin_unlock(&sysctl_lock);
1924 }
1925
1926 int sysctl_is_seen(struct ctl_table_header *p)
1927 {
1928         struct ctl_table_set *set = p->set;
1929         int res;
1930         spin_lock(&sysctl_lock);
1931         if (p->unregistering)
1932                 res = 0;
1933         else if (!set->is_seen)
1934                 res = 1;
1935         else
1936                 res = set->is_seen(set);
1937         spin_unlock(&sysctl_lock);
1938         return res;
1939 }
1940
1941 void setup_sysctl_set(struct ctl_table_set *p,
1942         struct ctl_table_set *parent,
1943         int (*is_seen)(struct ctl_table_set *))
1944 {
1945         INIT_LIST_HEAD(&p->list);
1946         p->parent = parent ? parent : &sysctl_table_root.default_set;
1947         p->is_seen = is_seen;
1948 }
1949
1950 #else /* !CONFIG_SYSCTL */
1951 struct ctl_table_header *register_sysctl_table(struct ctl_table * table)
1952 {
1953         return NULL;
1954 }
1955
1956 struct ctl_table_header *register_sysctl_paths(const struct ctl_path *path,
1957                                                     struct ctl_table *table)
1958 {
1959         return NULL;
1960 }
1961
1962 void unregister_sysctl_table(struct ctl_table_header * table)
1963 {
1964 }
1965
1966 void setup_sysctl_set(struct ctl_table_set *p,
1967         struct ctl_table_set *parent,
1968         int (*is_seen)(struct ctl_table_set *))
1969 {
1970 }
1971
1972 void sysctl_head_put(struct ctl_table_header *head)
1973 {
1974 }
1975
1976 #endif /* CONFIG_SYSCTL */
1977
1978 /*
1979  * /proc/sys support
1980  */
1981
1982 #ifdef CONFIG_PROC_SYSCTL
1983
1984 static int _proc_do_string(void* data, int maxlen, int write,
1985                            void __user *buffer,
1986                            size_t *lenp, loff_t *ppos)
1987 {
1988         size_t len;
1989         char __user *p;
1990         char c;
1991
1992         if (!data || !maxlen || !*lenp) {
1993                 *lenp = 0;
1994                 return 0;
1995         }
1996
1997         if (write) {
1998                 len = 0;
1999                 p = buffer;
2000                 while (len < *lenp) {
2001                         if (get_user(c, p++))
2002                                 return -EFAULT;
2003                         if (c == 0 || c == '\n')
2004                                 break;
2005                         len++;
2006                 }
2007                 if (len >= maxlen)
2008                         len = maxlen-1;
2009                 if(copy_from_user(data, buffer, len))
2010                         return -EFAULT;
2011                 ((char *) data)[len] = 0;
2012                 *ppos += *lenp;
2013         } else {
2014                 len = strlen(data);
2015                 if (len > maxlen)
2016                         len = maxlen;
2017
2018                 if (*ppos > len) {
2019                         *lenp = 0;
2020                         return 0;
2021                 }
2022
2023                 data += *ppos;
2024                 len  -= *ppos;
2025
2026                 if (len > *lenp)
2027                         len = *lenp;
2028                 if (len)
2029                         if(copy_to_user(buffer, data, len))
2030                                 return -EFAULT;
2031                 if (len < *lenp) {
2032                         if(put_user('\n', ((char __user *) buffer) + len))
2033                                 return -EFAULT;
2034                         len++;
2035                 }
2036                 *lenp = len;
2037                 *ppos += len;
2038         }
2039         return 0;
2040 }
2041
2042 /**
2043  * proc_dostring - read a string sysctl
2044  * @table: the sysctl table
2045  * @write: %TRUE if this is a write to the sysctl file
2046  * @buffer: the user buffer
2047  * @lenp: the size of the user buffer
2048  * @ppos: file position
2049  *
2050  * Reads/writes a string from/to the user buffer. If the kernel
2051  * buffer provided is not large enough to hold the string, the
2052  * string is truncated. The copied string is %NULL-terminated.
2053  * If the string is being read by the user process, it is copied
2054  * and a newline '\n' is added. It is truncated if the buffer is
2055  * not large enough.
2056  *
2057  * Returns 0 on success.
2058  */
2059 int proc_dostring(struct ctl_table *table, int write,
2060                   void __user *buffer, size_t *lenp, loff_t *ppos)
2061 {
2062         return _proc_do_string(table->data, table->maxlen, write,
2063                                buffer, lenp, ppos);
2064 }
2065
2066
2067 static int do_proc_dointvec_conv(int *negp, unsigned long *lvalp,
2068                                  int *valp,
2069                                  int write, void *data)
2070 {
2071         if (write) {
2072                 *valp = *negp ? -*lvalp : *lvalp;
2073         } else {
2074                 int val = *valp;
2075                 if (val < 0) {
2076                         *negp = -1;
2077                         *lvalp = (unsigned long)-val;
2078                 } else {
2079                         *negp = 0;
2080                         *lvalp = (unsigned long)val;
2081                 }
2082         }
2083         return 0;
2084 }
2085
2086 static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table,
2087                   int write, void __user *buffer,
2088                   size_t *lenp, loff_t *ppos,
2089                   int (*conv)(int *negp, unsigned long *lvalp, int *valp,
2090                               int write, void *data),
2091                   void *data)
2092 {
2093 #define TMPBUFLEN 21
2094         int *i, vleft, first = 1, neg;
2095         unsigned long lval;
2096         size_t left, len;
2097         
2098         char buf[TMPBUFLEN], *p;
2099         char __user *s = buffer;
2100         
2101         if (!tbl_data || !table->maxlen || !*lenp ||
2102             (*ppos && !write)) {
2103                 *lenp = 0;
2104                 return 0;
2105         }
2106         
2107         i = (int *) tbl_data;
2108         vleft = table->maxlen / sizeof(*i);
2109         left = *lenp;
2110
2111         if (!conv)
2112                 conv = do_proc_dointvec_conv;
2113
2114         for (; left && vleft--; i++, first=0) {
2115                 if (write) {
2116                         while (left) {
2117                                 char c;
2118                                 if (get_user(c, s))
2119                                         return -EFAULT;
2120                                 if (!isspace(c))
2121                                         break;
2122                                 left--;
2123                                 s++;
2124                         }
2125                         if (!left)
2126                                 break;
2127                         neg = 0;
2128                         len = left;
2129                         if (len > sizeof(buf) - 1)
2130                                 len = sizeof(buf) - 1;
2131                         if (copy_from_user(buf, s, len))
2132                                 return -EFAULT;
2133                         buf[len] = 0;
2134                         p = buf;
2135                         if (*p == '-' && left > 1) {
2136                                 neg = 1;
2137                                 p++;
2138                         }
2139                         if (*p < '0' || *p > '9')
2140                                 break;
2141
2142                         lval = simple_strtoul(p, &p, 0);
2143
2144                         len = p-buf;
2145                         if ((len < left) && *p && !isspace(*p))
2146                                 break;
2147                         s += len;
2148                         left -= len;
2149
2150                         if (conv(&neg, &lval, i, 1, data))
2151                                 break;
2152                 } else {
2153                         p = buf;
2154                         if (!first)
2155                                 *p++ = '\t';
2156         
2157                         if (conv(&neg, &lval, i, 0, data))
2158                                 break;
2159
2160                         sprintf(p, "%s%lu", neg ? "-" : "", lval);
2161                         len = strlen(buf);
2162                         if (len > left)
2163                                 len = left;
2164                         if(copy_to_user(s, buf, len))
2165                                 return -EFAULT;
2166                         left -= len;
2167                         s += len;
2168                 }
2169         }
2170
2171         if (!write && !first && left) {
2172                 if(put_user('\n', s))
2173                         return -EFAULT;
2174                 left--, s++;
2175         }
2176         if (write) {
2177                 while (left) {
2178                         char c;
2179                         if (get_user(c, s++))
2180                                 return -EFAULT;
2181                         if (!isspace(c))
2182                                 break;
2183                         left--;
2184                 }
2185         }
2186         if (write && first)
2187                 return -EINVAL;
2188         *lenp -= left;
2189         *ppos += *lenp;
2190         return 0;
2191 #undef TMPBUFLEN
2192 }
2193
2194 static int do_proc_dointvec(struct ctl_table *table, int write,
2195                   void __user *buffer, size_t *lenp, loff_t *ppos,
2196                   int (*conv)(int *negp, unsigned long *lvalp, int *valp,
2197                               int write, void *data),
2198                   void *data)
2199 {
2200         return __do_proc_dointvec(table->data, table, write,
2201                         buffer, lenp, ppos, conv, data);
2202 }
2203
2204 /**
2205  * proc_dointvec - read a vector of integers
2206  * @table: the sysctl table
2207  * @write: %TRUE if this is a write to the sysctl file
2208  * @buffer: the user buffer
2209  * @lenp: the size of the user buffer
2210  * @ppos: file position
2211  *
2212  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2213  * values from/to the user buffer, treated as an ASCII string. 
2214  *
2215  * Returns 0 on success.
2216  */
2217 int proc_dointvec(struct ctl_table *table, int write,
2218                      void __user *buffer, size_t *lenp, loff_t *ppos)
2219 {
2220     return do_proc_dointvec(table,write,buffer,lenp,ppos,
2221                             NULL,NULL);
2222 }
2223
2224 /*
2225  * Taint values can only be increased
2226  * This means we can safely use a temporary.
2227  */
2228 static int proc_taint(struct ctl_table *table, int write,
2229                                void __user *buffer, size_t *lenp, loff_t *ppos)
2230 {
2231         struct ctl_table t;
2232         unsigned long tmptaint = get_taint();
2233         int err;
2234
2235         if (write && !capable(CAP_SYS_ADMIN))
2236                 return -EPERM;
2237
2238         t = *table;
2239         t.data = &tmptaint;
2240         err = proc_doulongvec_minmax(&t, write, buffer, lenp, ppos);
2241         if (err < 0)
2242                 return err;
2243
2244         if (write) {
2245                 /*
2246                  * Poor man's atomic or. Not worth adding a primitive
2247                  * to everyone's atomic.h for this
2248                  */
2249                 int i;
2250                 for (i = 0; i < BITS_PER_LONG && tmptaint >> i; i++) {
2251                         if ((tmptaint >> i) & 1)
2252                                 add_taint(i);
2253                 }
2254         }
2255
2256         return err;
2257 }
2258
2259 struct do_proc_dointvec_minmax_conv_param {
2260         int *min;
2261         int *max;
2262 };
2263
2264 static int do_proc_dointvec_minmax_conv(int *negp, unsigned long *lvalp, 
2265                                         int *valp, 
2266                                         int write, void *data)
2267 {
2268         struct do_proc_dointvec_minmax_conv_param *param = data;
2269         if (write) {
2270                 int val = *negp ? -*lvalp : *lvalp;
2271                 if ((param->min && *param->min > val) ||
2272                     (param->max && *param->max < val))
2273                         return -EINVAL;
2274                 *valp = val;
2275         } else {
2276                 int val = *valp;
2277                 if (val < 0) {
2278                         *negp = -1;
2279                         *lvalp = (unsigned long)-val;
2280                 } else {
2281                         *negp = 0;
2282                         *lvalp = (unsigned long)val;
2283                 }
2284         }
2285         return 0;
2286 }
2287
2288 /**
2289  * proc_dointvec_minmax - read a vector of integers with min/max values
2290  * @table: the sysctl table
2291  * @write: %TRUE if this is a write to the sysctl file
2292  * @buffer: the user buffer
2293  * @lenp: the size of the user buffer
2294  * @ppos: file position
2295  *
2296  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2297  * values from/to the user buffer, treated as an ASCII string.
2298  *
2299  * This routine will ensure the values are within the range specified by
2300  * table->extra1 (min) and table->extra2 (max).
2301  *
2302  * Returns 0 on success.
2303  */
2304 int proc_dointvec_minmax(struct ctl_table *table, int write,
2305                   void __user *buffer, size_t *lenp, loff_t *ppos)
2306 {
2307         struct do_proc_dointvec_minmax_conv_param param = {
2308                 .min = (int *) table->extra1,
2309                 .max = (int *) table->extra2,
2310         };
2311         return do_proc_dointvec(table, write, buffer, lenp, ppos,
2312                                 do_proc_dointvec_minmax_conv, &param);
2313 }
2314
2315 static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table, int write,
2316                                      void __user *buffer,
2317                                      size_t *lenp, loff_t *ppos,
2318                                      unsigned long convmul,
2319                                      unsigned long convdiv)
2320 {
2321 #define TMPBUFLEN 21
2322         unsigned long *i, *min, *max, val;
2323         int vleft, first=1, neg;
2324         size_t len, left;
2325         char buf[TMPBUFLEN], *p;
2326         char __user *s = buffer;
2327         
2328         if (!data || !table->maxlen || !*lenp ||
2329             (*ppos && !write)) {
2330                 *lenp = 0;
2331                 return 0;
2332         }
2333         
2334         i = (unsigned long *) data;
2335         min = (unsigned long *) table->extra1;
2336         max = (unsigned long *) table->extra2;
2337         vleft = table->maxlen / sizeof(unsigned long);
2338         left = *lenp;
2339         
2340         for (; left && vleft--; i++, min++, max++, first=0) {
2341                 if (write) {
2342                         while (left) {
2343                                 char c;
2344                                 if (get_user(c, s))
2345                                         return -EFAULT;
2346                                 if (!isspace(c))
2347                                         break;
2348                                 left--;
2349                                 s++;
2350                         }
2351                         if (!left)
2352                                 break;
2353                         neg = 0;
2354                         len = left;
2355                         if (len > TMPBUFLEN-1)
2356                                 len = TMPBUFLEN-1;
2357                         if (copy_from_user(buf, s, len))
2358                                 return -EFAULT;
2359                         buf[len] = 0;
2360                         p = buf;
2361                         if (*p == '-' && left > 1) {
2362                                 neg = 1;
2363                                 p++;
2364                         }
2365                         if (*p < '0' || *p > '9')
2366                                 break;
2367                         val = simple_strtoul(p, &p, 0) * convmul / convdiv ;
2368                         len = p-buf;
2369                         if ((len < left) && *p && !isspace(*p))
2370                                 break;
2371                         if (neg)
2372                                 val = -val;
2373                         s += len;
2374                         left -= len;
2375
2376                         if(neg)
2377                                 continue;
2378                         if ((min && val < *min) || (max && val > *max))
2379                                 continue;
2380                         *i = val;
2381                 } else {
2382                         p = buf;
2383                         if (!first)
2384                                 *p++ = '\t';
2385                         sprintf(p, "%lu", convdiv * (*i) / convmul);
2386                         len = strlen(buf);
2387                         if (len > left)
2388                                 len = left;
2389                         if(copy_to_user(s, buf, len))
2390                                 return -EFAULT;
2391                         left -= len;
2392                         s += len;
2393                 }
2394         }
2395
2396         if (!write && !first && left) {
2397                 if(put_user('\n', s))
2398                         return -EFAULT;
2399                 left--, s++;
2400         }
2401         if (write) {
2402                 while (left) {
2403                         char c;
2404                         if (get_user(c, s++))
2405                                 return -EFAULT;
2406                         if (!isspace(c))
2407                                 break;
2408                         left--;
2409                 }
2410         }
2411         if (write && first)
2412                 return -EINVAL;
2413         *lenp -= left;
2414         *ppos += *lenp;
2415         return 0;
2416 #undef TMPBUFLEN
2417 }
2418
2419 static int do_proc_doulongvec_minmax(struct ctl_table *table, int write,
2420                                      void __user *buffer,
2421                                      size_t *lenp, loff_t *ppos,
2422                                      unsigned long convmul,
2423                                      unsigned long convdiv)
2424 {
2425         return __do_proc_doulongvec_minmax(table->data, table, write,
2426                         buffer, lenp, ppos, convmul, convdiv);
2427 }
2428
2429 /**
2430  * proc_doulongvec_minmax - read a vector of long integers with min/max values
2431  * @table: the sysctl table
2432  * @write: %TRUE if this is a write to the sysctl file
2433  * @buffer: the user buffer
2434  * @lenp: the size of the user buffer
2435  * @ppos: file position
2436  *
2437  * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long
2438  * values from/to the user buffer, treated as an ASCII string.
2439  *
2440  * This routine will ensure the values are within the range specified by
2441  * table->extra1 (min) and table->extra2 (max).
2442  *
2443  * Returns 0 on success.
2444  */
2445 int proc_doulongvec_minmax(struct ctl_table *table, int write,
2446                            void __user *buffer, size_t *lenp, loff_t *ppos)
2447 {
2448     return do_proc_doulongvec_minmax(table, write, buffer, lenp, ppos, 1l, 1l);
2449 }
2450
2451 /**
2452  * proc_doulongvec_ms_jiffies_minmax - read a vector of millisecond values with min/max values
2453  * @table: the sysctl table
2454  * @write: %TRUE if this is a write to the sysctl file
2455  * @buffer: the user buffer
2456  * @lenp: the size of the user buffer
2457  * @ppos: file position
2458  *
2459  * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long
2460  * values from/to the user buffer, treated as an ASCII string. The values
2461  * are treated as milliseconds, and converted to jiffies when they are stored.
2462  *
2463  * This routine will ensure the values are within the range specified by
2464  * table->extra1 (min) and table->extra2 (max).
2465  *
2466  * Returns 0 on success.
2467  */
2468 int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write,
2469                                       void __user *buffer,
2470                                       size_t *lenp, loff_t *ppos)
2471 {
2472     return do_proc_doulongvec_minmax(table, write, buffer,
2473                                      lenp, ppos, HZ, 1000l);
2474 }
2475
2476
2477 static int do_proc_dointvec_jiffies_conv(int *negp, unsigned long *lvalp,
2478                                          int *valp,
2479                                          int write, void *data)
2480 {
2481         if (write) {
2482                 if (*lvalp > LONG_MAX / HZ)
2483                         return 1;
2484                 *valp = *negp ? -(*lvalp*HZ) : (*lvalp*HZ);
2485         } else {
2486                 int val = *valp;
2487                 unsigned long lval;
2488                 if (val < 0) {
2489                         *negp = -1;
2490                         lval = (unsigned long)-val;
2491                 } else {
2492                         *negp = 0;
2493                         lval = (unsigned long)val;
2494                 }
2495                 *lvalp = lval / HZ;
2496         }
2497         return 0;
2498 }
2499
2500 static int do_proc_dointvec_userhz_jiffies_conv(int *negp, unsigned long *lvalp,
2501                                                 int *valp,
2502                                                 int write, void *data)
2503 {
2504         if (write) {
2505                 if (USER_HZ < HZ && *lvalp > (LONG_MAX / HZ) * USER_HZ)
2506                         return 1;
2507                 *valp = clock_t_to_jiffies(*negp ? -*lvalp : *lvalp);
2508         } else {
2509                 int val = *valp;
2510                 unsigned long lval;
2511                 if (val < 0) {
2512                         *negp = -1;
2513                         lval = (unsigned long)-val;
2514                 } else {
2515                         *negp = 0;
2516                         lval = (unsigned long)val;
2517                 }
2518                 *lvalp = jiffies_to_clock_t(lval);
2519         }
2520         return 0;
2521 }
2522
2523 static int do_proc_dointvec_ms_jiffies_conv(int *negp, unsigned long *lvalp,
2524                                             int *valp,
2525                                             int write, void *data)
2526 {
2527         if (write) {
2528                 *valp = msecs_to_jiffies(*negp ? -*lvalp : *lvalp);
2529         } else {
2530                 int val = *valp;
2531                 unsigned long lval;
2532                 if (val < 0) {
2533                         *negp = -1;
2534                         lval = (unsigned long)-val;
2535                 } else {
2536                         *negp = 0;
2537                         lval = (unsigned long)val;
2538                 }
2539                 *lvalp = jiffies_to_msecs(lval);
2540         }
2541         return 0;
2542 }
2543
2544 /**
2545  * proc_dointvec_jiffies - read a vector of integers as seconds
2546  * @table: the sysctl table
2547  * @write: %TRUE if this is a write to the sysctl file
2548  * @buffer: the user buffer
2549  * @lenp: the size of the user buffer
2550  * @ppos: file position
2551  *
2552  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2553  * values from/to the user buffer, treated as an ASCII string. 
2554  * The values read are assumed to be in seconds, and are converted into
2555  * jiffies.
2556  *
2557  * Returns 0 on success.
2558  */
2559 int proc_dointvec_jiffies(struct ctl_table *table, int write,
2560                           void __user *buffer, size_t *lenp, loff_t *ppos)
2561 {
2562     return do_proc_dointvec(table,write,buffer,lenp,ppos,
2563                             do_proc_dointvec_jiffies_conv,NULL);
2564 }
2565
2566 /**
2567  * proc_dointvec_userhz_jiffies - read a vector of integers as 1/USER_HZ seconds
2568  * @table: the sysctl table
2569  * @write: %TRUE if this is a write to the sysctl file
2570  * @buffer: the user buffer
2571  * @lenp: the size of the user buffer
2572  * @ppos: pointer to the file position
2573  *
2574  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2575  * values from/to the user buffer, treated as an ASCII string. 
2576  * The values read are assumed to be in 1/USER_HZ seconds, and 
2577  * are converted into jiffies.
2578  *
2579  * Returns 0 on success.
2580  */
2581 int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write,
2582                                  void __user *buffer, size_t *lenp, loff_t *ppos)
2583 {
2584     return do_proc_dointvec(table,write,buffer,lenp,ppos,
2585                             do_proc_dointvec_userhz_jiffies_conv,NULL);
2586 }
2587
2588 /**
2589  * proc_dointvec_ms_jiffies - read a vector of integers as 1 milliseconds
2590  * @table: the sysctl table
2591  * @write: %TRUE if this is a write to the sysctl file
2592  * @buffer: the user buffer
2593  * @lenp: the size of the user buffer
2594  * @ppos: file position
2595  * @ppos: the current position in the file
2596  *
2597  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2598  * values from/to the user buffer, treated as an ASCII string. 
2599  * The values read are assumed to be in 1/1000 seconds, and 
2600  * are converted into jiffies.
2601  *
2602  * Returns 0 on success.
2603  */
2604 int proc_dointvec_ms_jiffies(struct ctl_table *table, int write,
2605                              void __user *buffer, size_t *lenp, loff_t *ppos)
2606 {
2607         return do_proc_dointvec(table, write, buffer, lenp, ppos,
2608                                 do_proc_dointvec_ms_jiffies_conv, NULL);
2609 }
2610
2611 static int proc_do_cad_pid(struct ctl_table *table, int write,
2612                            void __user *buffer, size_t *lenp, loff_t *ppos)
2613 {
2614         struct pid *new_pid;
2615         pid_t tmp;
2616         int r;
2617
2618         tmp = pid_vnr(cad_pid);
2619
2620         r = __do_proc_dointvec(&tmp, table, write, buffer,
2621                                lenp, ppos, NULL, NULL);
2622         if (r || !write)
2623                 return r;
2624
2625         new_pid = find_get_pid(tmp);
2626         if (!new_pid)
2627                 return -ESRCH;
2628
2629         put_pid(xchg(&cad_pid, new_pid));
2630         return 0;
2631 }
2632
2633 #else /* CONFIG_PROC_FS */
2634
2635 int proc_dostring(struct ctl_table *table, int write,
2636                   void __user *buffer, size_t *lenp, loff_t *ppos)
2637 {
2638         return -ENOSYS;
2639 }
2640
2641 int proc_dointvec(struct ctl_table *table, int write,
2642                   void __user *buffer, size_t *lenp, loff_t *ppos)
2643 {
2644         return -ENOSYS;
2645 }
2646
2647 int proc_dointvec_minmax(struct ctl_table *table, int write,
2648                     void __user *buffer, size_t *lenp, loff_t *ppos)
2649 {
2650         return -ENOSYS;
2651 }
2652
2653 int proc_dointvec_jiffies(struct ctl_table *table, int write,
2654                     void __user *buffer, size_t *lenp, loff_t *ppos)
2655 {
2656         return -ENOSYS;
2657 }
2658
2659 int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write,
2660                     void __user *buffer, size_t *lenp, loff_t *ppos)
2661 {
2662         return -ENOSYS;
2663 }
2664
2665 int proc_dointvec_ms_jiffies(struct ctl_table *table, int write,
2666                              void __user *buffer, size_t *lenp, loff_t *ppos)
2667 {
2668         return -ENOSYS;
2669 }
2670
2671 int proc_doulongvec_minmax(struct ctl_table *table, int write,
2672                     void __user *buffer, size_t *lenp, loff_t *ppos)
2673 {
2674         return -ENOSYS;
2675 }
2676
2677 int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write,
2678                                       void __user *buffer,
2679                                       size_t *lenp, loff_t *ppos)
2680 {
2681     return -ENOSYS;
2682 }
2683
2684
2685 #endif /* CONFIG_PROC_FS */
2686
2687 /*
2688  * No sense putting this after each symbol definition, twice,
2689  * exception granted :-)
2690  */
2691 EXPORT_SYMBOL(proc_dointvec);
2692 EXPORT_SYMBOL(proc_dointvec_jiffies);
2693 EXPORT_SYMBOL(proc_dointvec_minmax);
2694 EXPORT_SYMBOL(proc_dointvec_userhz_jiffies);
2695 EXPORT_SYMBOL(proc_dointvec_ms_jiffies);
2696 EXPORT_SYMBOL(proc_dostring);
2697 EXPORT_SYMBOL(proc_doulongvec_minmax);
2698 EXPORT_SYMBOL(proc_doulongvec_ms_jiffies_minmax);
2699 EXPORT_SYMBOL(register_sysctl_table);
2700 EXPORT_SYMBOL(register_sysctl_paths);
2701 EXPORT_SYMBOL(unregister_sysctl_table);