blob: 9d9bcc928986a9854c6bf46f62b92314340bc539 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * linux/kernel/sys.c
3 *
4 * Copyright (C) 1991, 1992 Linus Torvalds
5 */
6
Paul Gortmaker9984de12011-05-23 14:51:41 -04007#include <linux/export.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -07008#include <linux/mm.h>
9#include <linux/utsname.h>
10#include <linux/mman.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070011#include <linux/reboot.h>
12#include <linux/prctl.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070013#include <linux/highuid.h>
14#include <linux/fs.h>
Paul Gortmaker74da1ff2011-05-26 12:48:41 -040015#include <linux/kmod.h>
Ingo Molnarcdd6c482009-09-21 12:02:48 +020016#include <linux/perf_event.h>
Daniel Walker3e88c552007-05-10 22:22:53 -070017#include <linux/resource.h>
Eric W. Biedermandc009d92005-06-25 14:57:52 -070018#include <linux/kernel.h>
19#include <linux/kexec.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070020#include <linux/workqueue.h>
Randy.Dunlapc59ede72006-01-11 12:17:46 -080021#include <linux/capability.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070022#include <linux/device.h>
23#include <linux/key.h>
24#include <linux/times.h>
25#include <linux/posix-timers.h>
26#include <linux/security.h>
27#include <linux/dcookies.h>
28#include <linux/suspend.h>
29#include <linux/tty.h>
Jesper Juhl7ed20e12005-05-01 08:59:14 -070030#include <linux/signal.h>
Matt Helsley9f460802005-11-07 00:59:16 -080031#include <linux/cn_proc.h>
Andi Kleen3cfc3482006-09-26 10:52:28 +020032#include <linux/getcpu.h>
Eric Dumazet6eaeeab2007-05-10 22:22:37 -070033#include <linux/task_io_accounting_ops.h>
Andrea Arcangeli1d9d02f2007-07-15 23:41:32 -070034#include <linux/seccomp.h>
Mark Lord40477272007-10-01 01:20:10 -070035#include <linux/cpu.h>
Christoph Hellwige28cbf22010-03-10 15:21:19 -080036#include <linux/personality.h>
Paul Mackerrase3d5a272009-01-06 14:41:02 -080037#include <linux/ptrace.h>
Al Viro5ad4e532009-03-29 19:50:06 -040038#include <linux/fs_struct.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090039#include <linux/gfp.h>
Rafael J. Wysocki40dc1662011-03-15 00:43:46 +010040#include <linux/syscore_ops.h>
Andi Kleenbe274252011-08-19 16:15:10 -070041#include <linux/version.h>
42#include <linux/ctype.h>
Colin Cross8ad62c22013-06-26 17:26:01 -070043#include <linux/mm.h>
44#include <linux/mempolicy.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070045
46#include <linux/compat.h>
47#include <linux/syscalls.h>
Keshavamurthy Anil S00d7c052005-12-12 00:37:33 -080048#include <linux/kprobes.h>
Cedric Le Goateracce2922007-07-15 23:40:59 -070049#include <linux/user_namespace.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070050
Seiji Aguchi04c68622011-01-12 16:59:30 -080051#include <linux/kmsg_dump.h>
Andi Kleenbe274252011-08-19 16:15:10 -070052/* Move somewhere else to avoid recompiling? */
53#include <generated/utsrelease.h>
Seiji Aguchi04c68622011-01-12 16:59:30 -080054
Linus Torvalds1da177e2005-04-16 15:20:36 -070055#include <asm/uaccess.h>
56#include <asm/io.h>
57#include <asm/unistd.h>
58
59#ifndef SET_UNALIGN_CTL
60# define SET_UNALIGN_CTL(a,b) (-EINVAL)
61#endif
62#ifndef GET_UNALIGN_CTL
63# define GET_UNALIGN_CTL(a,b) (-EINVAL)
64#endif
65#ifndef SET_FPEMU_CTL
66# define SET_FPEMU_CTL(a,b) (-EINVAL)
67#endif
68#ifndef GET_FPEMU_CTL
69# define GET_FPEMU_CTL(a,b) (-EINVAL)
70#endif
71#ifndef SET_FPEXC_CTL
72# define SET_FPEXC_CTL(a,b) (-EINVAL)
73#endif
74#ifndef GET_FPEXC_CTL
75# define GET_FPEXC_CTL(a,b) (-EINVAL)
76#endif
Anton Blanchard651d7652006-06-07 16:10:19 +100077#ifndef GET_ENDIAN
78# define GET_ENDIAN(a,b) (-EINVAL)
79#endif
80#ifndef SET_ENDIAN
81# define SET_ENDIAN(a,b) (-EINVAL)
82#endif
Erik Bosman8fb402b2008-04-11 18:54:17 +020083#ifndef GET_TSC_CTL
84# define GET_TSC_CTL(a) (-EINVAL)
85#endif
86#ifndef SET_TSC_CTL
87# define SET_TSC_CTL(a) (-EINVAL)
88#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -070089
90/*
91 * this is where the system-wide overflow UID and GID are defined, for
92 * architectures that now have 32-bit UID/GID but didn't in the past
93 */
94
95int overflowuid = DEFAULT_OVERFLOWUID;
96int overflowgid = DEFAULT_OVERFLOWGID;
97
Linus Torvalds1da177e2005-04-16 15:20:36 -070098EXPORT_SYMBOL(overflowuid);
99EXPORT_SYMBOL(overflowgid);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700100
101/*
102 * the same as above, but for filesystems which can only store a 16-bit
103 * UID and GID. as such, this is needed on all architectures
104 */
105
106int fs_overflowuid = DEFAULT_FS_OVERFLOWUID;
107int fs_overflowgid = DEFAULT_FS_OVERFLOWUID;
108
109EXPORT_SYMBOL(fs_overflowuid);
110EXPORT_SYMBOL(fs_overflowgid);
111
112/*
113 * this indicates whether you can reboot with ctrl-alt-del: the default is yes
114 */
115
116int C_A_D = 1;
Cedric Le Goater9ec52092006-10-02 02:19:00 -0700117struct pid *cad_pid;
118EXPORT_SYMBOL(cad_pid);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700119
120/*
Rafael J. Wysockibd804eb2007-07-19 01:47:40 -0700121 * If set, this is used for preparing the system to power off.
122 */
123
124void (*pm_power_off_prepare)(void);
Rafael J. Wysockibd804eb2007-07-19 01:47:40 -0700125
David Howellsc69e8d92008-11-14 10:39:19 +1100126/*
Serge E. Hallynfc832ad2011-03-23 16:43:22 -0700127 * Returns true if current's euid is same as p's uid or euid,
128 * or has CAP_SYS_NICE to p's user_ns.
129 *
130 * Called with rcu_read_lock, creds are safe
131 */
132static bool set_one_prio_perm(struct task_struct *p)
133{
134 const struct cred *cred = current_cred(), *pcred = __task_cred(p);
135
Eric W. Biederman6f2c0a92011-11-16 23:15:31 -0800136 if (pcred->user_ns == cred->user_ns &&
Serge E. Hallynfc832ad2011-03-23 16:43:22 -0700137 (pcred->uid == cred->euid ||
138 pcred->euid == cred->euid))
139 return true;
Eric W. Biederman6f2c0a92011-11-16 23:15:31 -0800140 if (ns_capable(pcred->user_ns, CAP_SYS_NICE))
Serge E. Hallynfc832ad2011-03-23 16:43:22 -0700141 return true;
142 return false;
143}
144
145/*
David Howellsc69e8d92008-11-14 10:39:19 +1100146 * set the priority of a task
147 * - the caller must hold the RCU read lock
148 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700149static int set_one_prio(struct task_struct *p, int niceval, int error)
150{
151 int no_nice;
152
Serge E. Hallynfc832ad2011-03-23 16:43:22 -0700153 if (!set_one_prio_perm(p)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700154 error = -EPERM;
155 goto out;
156 }
Matt Mackalle43379f2005-05-01 08:59:00 -0700157 if (niceval < task_nice(p) && !can_nice(p, niceval)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700158 error = -EACCES;
159 goto out;
160 }
161 no_nice = security_task_setnice(p, niceval);
162 if (no_nice) {
163 error = no_nice;
164 goto out;
165 }
166 if (error == -ESRCH)
167 error = 0;
168 set_user_nice(p, niceval);
169out:
170 return error;
171}
172
Heiko Carstens754fe8d2009-01-14 14:14:09 +0100173SYSCALL_DEFINE3(setpriority, int, which, int, who, int, niceval)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700174{
175 struct task_struct *g, *p;
176 struct user_struct *user;
David Howells86a264a2008-11-14 10:39:18 +1100177 const struct cred *cred = current_cred();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700178 int error = -EINVAL;
Eric W. Biederman41487c62007-02-12 00:53:01 -0800179 struct pid *pgrp;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700180
Daniel Walker3e88c552007-05-10 22:22:53 -0700181 if (which > PRIO_USER || which < PRIO_PROCESS)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700182 goto out;
183
184 /* normalize: avoid signed division (rounding problems) */
185 error = -ESRCH;
186 if (niceval < -20)
187 niceval = -20;
188 if (niceval > 19)
189 niceval = 19;
190
Thomas Gleixnerd4581a22009-12-10 00:52:51 +0000191 rcu_read_lock();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700192 read_lock(&tasklist_lock);
193 switch (which) {
194 case PRIO_PROCESS:
Eric W. Biederman41487c62007-02-12 00:53:01 -0800195 if (who)
Pavel Emelyanov228ebcb2007-10-18 23:40:16 -0700196 p = find_task_by_vpid(who);
Eric W. Biederman41487c62007-02-12 00:53:01 -0800197 else
198 p = current;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700199 if (p)
200 error = set_one_prio(p, niceval, error);
201 break;
202 case PRIO_PGRP:
Eric W. Biederman41487c62007-02-12 00:53:01 -0800203 if (who)
Pavel Emelyanovb4888932007-10-18 23:40:14 -0700204 pgrp = find_vpid(who);
Eric W. Biederman41487c62007-02-12 00:53:01 -0800205 else
206 pgrp = task_pgrp(current);
Ken Chen2d70b682008-08-20 14:09:17 -0700207 do_each_pid_thread(pgrp, PIDTYPE_PGID, p) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700208 error = set_one_prio(p, niceval, error);
Ken Chen2d70b682008-08-20 14:09:17 -0700209 } while_each_pid_thread(pgrp, PIDTYPE_PGID, p);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700210 break;
211 case PRIO_USER:
David Howellsd84f4f92008-11-14 10:39:23 +1100212 user = (struct user_struct *) cred->user;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700213 if (!who)
David Howells86a264a2008-11-14 10:39:18 +1100214 who = cred->uid;
215 else if ((who != cred->uid) &&
216 !(user = find_user(who)))
217 goto out_unlock; /* No processes for this user */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700218
H Hartley Sweetendfc6a732009-12-14 18:00:22 -0800219 do_each_thread(g, p) {
David Howells86a264a2008-11-14 10:39:18 +1100220 if (__task_cred(p)->uid == who)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700221 error = set_one_prio(p, niceval, error);
H Hartley Sweetendfc6a732009-12-14 18:00:22 -0800222 } while_each_thread(g, p);
David Howells86a264a2008-11-14 10:39:18 +1100223 if (who != cred->uid)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700224 free_uid(user); /* For find_user() */
225 break;
226 }
227out_unlock:
228 read_unlock(&tasklist_lock);
Thomas Gleixnerd4581a22009-12-10 00:52:51 +0000229 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700230out:
231 return error;
232}
233
234/*
235 * Ugh. To avoid negative return values, "getpriority()" will
236 * not return the normal nice-value, but a negated value that
237 * has been offset by 20 (ie it returns 40..1 instead of -20..19)
238 * to stay compatible.
239 */
Heiko Carstens754fe8d2009-01-14 14:14:09 +0100240SYSCALL_DEFINE2(getpriority, int, which, int, who)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700241{
242 struct task_struct *g, *p;
243 struct user_struct *user;
David Howells86a264a2008-11-14 10:39:18 +1100244 const struct cred *cred = current_cred();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700245 long niceval, retval = -ESRCH;
Eric W. Biederman41487c62007-02-12 00:53:01 -0800246 struct pid *pgrp;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700247
Daniel Walker3e88c552007-05-10 22:22:53 -0700248 if (which > PRIO_USER || which < PRIO_PROCESS)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700249 return -EINVAL;
250
Tetsuo Handa70118832010-02-22 12:44:16 -0800251 rcu_read_lock();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700252 read_lock(&tasklist_lock);
253 switch (which) {
254 case PRIO_PROCESS:
Eric W. Biederman41487c62007-02-12 00:53:01 -0800255 if (who)
Pavel Emelyanov228ebcb2007-10-18 23:40:16 -0700256 p = find_task_by_vpid(who);
Eric W. Biederman41487c62007-02-12 00:53:01 -0800257 else
258 p = current;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700259 if (p) {
260 niceval = 20 - task_nice(p);
261 if (niceval > retval)
262 retval = niceval;
263 }
264 break;
265 case PRIO_PGRP:
Eric W. Biederman41487c62007-02-12 00:53:01 -0800266 if (who)
Pavel Emelyanovb4888932007-10-18 23:40:14 -0700267 pgrp = find_vpid(who);
Eric W. Biederman41487c62007-02-12 00:53:01 -0800268 else
269 pgrp = task_pgrp(current);
Ken Chen2d70b682008-08-20 14:09:17 -0700270 do_each_pid_thread(pgrp, PIDTYPE_PGID, p) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700271 niceval = 20 - task_nice(p);
272 if (niceval > retval)
273 retval = niceval;
Ken Chen2d70b682008-08-20 14:09:17 -0700274 } while_each_pid_thread(pgrp, PIDTYPE_PGID, p);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700275 break;
276 case PRIO_USER:
David Howells86a264a2008-11-14 10:39:18 +1100277 user = (struct user_struct *) cred->user;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700278 if (!who)
David Howells86a264a2008-11-14 10:39:18 +1100279 who = cred->uid;
280 else if ((who != cred->uid) &&
281 !(user = find_user(who)))
282 goto out_unlock; /* No processes for this user */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700283
H Hartley Sweetendfc6a732009-12-14 18:00:22 -0800284 do_each_thread(g, p) {
David Howells86a264a2008-11-14 10:39:18 +1100285 if (__task_cred(p)->uid == who) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700286 niceval = 20 - task_nice(p);
287 if (niceval > retval)
288 retval = niceval;
289 }
H Hartley Sweetendfc6a732009-12-14 18:00:22 -0800290 } while_each_thread(g, p);
David Howells86a264a2008-11-14 10:39:18 +1100291 if (who != cred->uid)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700292 free_uid(user); /* for find_user() */
293 break;
294 }
295out_unlock:
296 read_unlock(&tasklist_lock);
Tetsuo Handa70118832010-02-22 12:44:16 -0800297 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700298
299 return retval;
300}
301
Eric W. Biedermane4c94332005-09-22 21:43:45 -0700302/**
303 * emergency_restart - reboot the system
304 *
305 * Without shutting down any hardware or taking any locks
306 * reboot the system. This is called when we know we are in
307 * trouble so this is our best effort to reboot. This is
308 * safe to call in interrupt context.
309 */
Eric W. Biederman7c903472005-07-26 11:29:55 -0600310void emergency_restart(void)
311{
Seiji Aguchi04c68622011-01-12 16:59:30 -0800312 kmsg_dump(KMSG_DUMP_EMERG);
Eric W. Biederman7c903472005-07-26 11:29:55 -0600313 machine_emergency_restart();
314}
315EXPORT_SYMBOL_GPL(emergency_restart);
316
Huang Yingca195b72008-08-15 00:40:24 -0700317void kernel_restart_prepare(char *cmd)
Eric W. Biederman4a00ea12005-07-26 11:24:14 -0600318{
Alan Sterne041c682006-03-27 01:16:30 -0800319 blocking_notifier_call_chain(&reboot_notifier_list, SYS_RESTART, cmd);
Eric W. Biederman4a00ea12005-07-26 11:24:14 -0600320 system_state = SYSTEM_RESTART;
Kay Sieversb50fa7c2011-05-05 13:32:05 +0200321 usermodehelper_disable();
Eric W. Biederman4a00ea12005-07-26 11:24:14 -0600322 device_shutdown();
Eric W. Biedermane4c94332005-09-22 21:43:45 -0700323}
Randy Dunlap1e5d5332005-11-07 01:01:06 -0800324
325/**
Amerigo Wangc5f41752011-07-25 17:13:10 -0700326 * register_reboot_notifier - Register function to be called at reboot time
327 * @nb: Info about notifier function to be called
328 *
329 * Registers a function with the list of functions
330 * to be called at reboot time.
331 *
332 * Currently always returns zero, as blocking_notifier_chain_register()
333 * always returns zero.
334 */
335int register_reboot_notifier(struct notifier_block *nb)
336{
337 return blocking_notifier_chain_register(&reboot_notifier_list, nb);
338}
339EXPORT_SYMBOL(register_reboot_notifier);
340
341/**
342 * unregister_reboot_notifier - Unregister previously registered reboot notifier
343 * @nb: Hook to be unregistered
344 *
345 * Unregisters a previously registered reboot
346 * notifier function.
347 *
348 * Returns zero on success, or %-ENOENT on failure.
349 */
350int unregister_reboot_notifier(struct notifier_block *nb)
351{
352 return blocking_notifier_chain_unregister(&reboot_notifier_list, nb);
353}
354EXPORT_SYMBOL(unregister_reboot_notifier);
355
Robin Holtfc1cbc72013-06-12 14:04:37 -0700356/* Add backwards compatibility for stable trees. */
357#ifndef PF_NO_SETAFFINITY
358#define PF_NO_SETAFFINITY PF_THREAD_BOUND
359#endif
360
361static void migrate_to_reboot_cpu(void)
362{
363 /* The boot cpu is always logical cpu 0 */
364 int cpu = 0;
365
366 cpu_hotplug_disable();
367
368 /* Make certain the cpu I'm about to reboot on is online */
369 if (!cpu_online(cpu))
370 cpu = cpumask_first(cpu_online_mask);
371
372 /* Prevent races with other tasks migrating this task */
373 current->flags |= PF_NO_SETAFFINITY;
374
375 /* Make certain I only run on the appropriate processor */
376 set_cpus_allowed_ptr(current, cpumask_of(cpu));
377}
378
Amerigo Wangc5f41752011-07-25 17:13:10 -0700379/**
Randy Dunlap1e5d5332005-11-07 01:01:06 -0800380 * kernel_restart - reboot the system
381 * @cmd: pointer to buffer containing command to execute for restart
Randy Dunlapb8887e62005-11-07 01:01:07 -0800382 * or %NULL
Randy Dunlap1e5d5332005-11-07 01:01:06 -0800383 *
384 * Shutdown everything and perform a clean reboot.
385 * This is not safe to call in interrupt context.
386 */
Eric W. Biedermane4c94332005-09-22 21:43:45 -0700387void kernel_restart(char *cmd)
388{
389 kernel_restart_prepare(cmd);
Robin Holtfc1cbc72013-06-12 14:04:37 -0700390 migrate_to_reboot_cpu();
Huacai Chene3573b22013-04-07 02:14:14 +0000391 syscore_shutdown();
Cal Peake756184b2006-09-30 23:27:24 -0700392 if (!cmd)
Eric W. Biederman4a00ea12005-07-26 11:24:14 -0600393 printk(KERN_EMERG "Restarting system.\n");
Cal Peake756184b2006-09-30 23:27:24 -0700394 else
Eric W. Biederman4a00ea12005-07-26 11:24:14 -0600395 printk(KERN_EMERG "Restarting system with command '%s'.\n", cmd);
Seiji Aguchi04c68622011-01-12 16:59:30 -0800396 kmsg_dump(KMSG_DUMP_RESTART);
Eric W. Biederman4a00ea12005-07-26 11:24:14 -0600397 machine_restart(cmd);
398}
399EXPORT_SYMBOL_GPL(kernel_restart);
400
Adrian Bunk4ef72292008-02-04 22:30:06 -0800401static void kernel_shutdown_prepare(enum system_states state)
Alexey Starikovskiy729b4d42005-12-01 04:29:00 -0500402{
Alan Sterne041c682006-03-27 01:16:30 -0800403 blocking_notifier_call_chain(&reboot_notifier_list,
Alexey Starikovskiy729b4d42005-12-01 04:29:00 -0500404 (state == SYSTEM_HALT)?SYS_HALT:SYS_POWER_OFF, NULL);
405 system_state = state;
Kay Sieversb50fa7c2011-05-05 13:32:05 +0200406 usermodehelper_disable();
Alexey Starikovskiy729b4d42005-12-01 04:29:00 -0500407 device_shutdown();
408}
Eric W. Biedermane4c94332005-09-22 21:43:45 -0700409/**
410 * kernel_halt - halt the system
411 *
412 * Shutdown everything and perform a clean system halt.
413 */
Eric W. Biedermane4c94332005-09-22 21:43:45 -0700414void kernel_halt(void)
415{
Alexey Starikovskiy729b4d42005-12-01 04:29:00 -0500416 kernel_shutdown_prepare(SYSTEM_HALT);
Robin Holtfc1cbc72013-06-12 14:04:37 -0700417 migrate_to_reboot_cpu();
Rafael J. Wysocki40dc1662011-03-15 00:43:46 +0100418 syscore_shutdown();
Eric W. Biederman4a00ea12005-07-26 11:24:14 -0600419 printk(KERN_EMERG "System halted.\n");
Seiji Aguchi04c68622011-01-12 16:59:30 -0800420 kmsg_dump(KMSG_DUMP_HALT);
Eric W. Biederman4a00ea12005-07-26 11:24:14 -0600421 machine_halt();
422}
Alexey Starikovskiy729b4d42005-12-01 04:29:00 -0500423
Eric W. Biederman4a00ea12005-07-26 11:24:14 -0600424EXPORT_SYMBOL_GPL(kernel_halt);
425
Eric W. Biedermane4c94332005-09-22 21:43:45 -0700426/**
427 * kernel_power_off - power_off the system
428 *
429 * Shutdown everything and perform a clean system power_off.
430 */
Eric W. Biedermane4c94332005-09-22 21:43:45 -0700431void kernel_power_off(void)
432{
Alexey Starikovskiy729b4d42005-12-01 04:29:00 -0500433 kernel_shutdown_prepare(SYSTEM_POWER_OFF);
Rafael J. Wysockibd804eb2007-07-19 01:47:40 -0700434 if (pm_power_off_prepare)
435 pm_power_off_prepare();
Robin Holtfc1cbc72013-06-12 14:04:37 -0700436 migrate_to_reboot_cpu();
Rafael J. Wysocki40dc1662011-03-15 00:43:46 +0100437 syscore_shutdown();
Eric W. Biederman4a00ea12005-07-26 11:24:14 -0600438 printk(KERN_EMERG "Power down.\n");
Seiji Aguchi04c68622011-01-12 16:59:30 -0800439 kmsg_dump(KMSG_DUMP_POWEROFF);
Eric W. Biederman4a00ea12005-07-26 11:24:14 -0600440 machine_power_off();
441}
442EXPORT_SYMBOL_GPL(kernel_power_off);
Thomas Gleixner6f15fa52009-10-09 20:31:33 +0200443
444static DEFINE_MUTEX(reboot_mutex);
445
Linus Torvalds1da177e2005-04-16 15:20:36 -0700446/*
447 * Reboot system call: for obvious reasons only root may call it,
448 * and even root needs to set up some magic numbers in the registers
449 * so that some mistake won't make this reboot the whole machine.
450 * You can also set the meaning of the ctrl-alt-del-key here.
451 *
452 * reboot doesn't sync: do that yourself before calling this.
453 */
Heiko Carstens754fe8d2009-01-14 14:14:09 +0100454SYSCALL_DEFINE4(reboot, int, magic1, int, magic2, unsigned int, cmd,
455 void __user *, arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700456{
457 char buffer[256];
Andi Kleen3d26dcf2009-04-13 14:40:08 -0700458 int ret = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700459
460 /* We only trust the superuser with rebooting the system. */
461 if (!capable(CAP_SYS_BOOT))
462 return -EPERM;
463
464 /* For safety, we require "magic" arguments. */
465 if (magic1 != LINUX_REBOOT_MAGIC1 ||
466 (magic2 != LINUX_REBOOT_MAGIC2 &&
467 magic2 != LINUX_REBOOT_MAGIC2A &&
468 magic2 != LINUX_REBOOT_MAGIC2B &&
469 magic2 != LINUX_REBOOT_MAGIC2C))
470 return -EINVAL;
471
Daniel Lezcanocf3f8922012-03-28 14:42:51 -0700472 /*
473 * If pid namespaces are enabled and the current task is in a child
474 * pid_namespace, the command is handled by reboot_pid_ns() which will
475 * call do_exit().
476 */
477 ret = reboot_pid_ns(task_active_pid_ns(current), cmd);
478 if (ret)
479 return ret;
480
Eric W. Biederman5e382912006-01-08 01:03:46 -0800481 /* Instead of trying to make the power_off code look like
482 * halt when pm_power_off is not set do it the easy way.
483 */
484 if ((cmd == LINUX_REBOOT_CMD_POWER_OFF) && !pm_power_off)
485 cmd = LINUX_REBOOT_CMD_HALT;
486
Thomas Gleixner6f15fa52009-10-09 20:31:33 +0200487 mutex_lock(&reboot_mutex);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700488 switch (cmd) {
489 case LINUX_REBOOT_CMD_RESTART:
Eric W. Biederman4a00ea12005-07-26 11:24:14 -0600490 kernel_restart(NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700491 break;
492
493 case LINUX_REBOOT_CMD_CAD_ON:
494 C_A_D = 1;
495 break;
496
497 case LINUX_REBOOT_CMD_CAD_OFF:
498 C_A_D = 0;
499 break;
500
501 case LINUX_REBOOT_CMD_HALT:
Eric W. Biederman4a00ea12005-07-26 11:24:14 -0600502 kernel_halt();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700503 do_exit(0);
Andi Kleen3d26dcf2009-04-13 14:40:08 -0700504 panic("cannot halt");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700505
506 case LINUX_REBOOT_CMD_POWER_OFF:
Eric W. Biederman4a00ea12005-07-26 11:24:14 -0600507 kernel_power_off();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700508 do_exit(0);
509 break;
510
511 case LINUX_REBOOT_CMD_RESTART2:
512 if (strncpy_from_user(&buffer[0], arg, sizeof(buffer) - 1) < 0) {
Thomas Gleixner6f15fa52009-10-09 20:31:33 +0200513 ret = -EFAULT;
514 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700515 }
516 buffer[sizeof(buffer) - 1] = '\0';
517
Eric W. Biederman4a00ea12005-07-26 11:24:14 -0600518 kernel_restart(buffer);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700519 break;
520
Huang Ying3ab83522008-07-25 19:45:07 -0700521#ifdef CONFIG_KEXEC
Eric W. Biedermandc009d92005-06-25 14:57:52 -0700522 case LINUX_REBOOT_CMD_KEXEC:
Andi Kleen3d26dcf2009-04-13 14:40:08 -0700523 ret = kernel_kexec();
524 break;
Huang Ying3ab83522008-07-25 19:45:07 -0700525#endif
Eric W. Biederman4a00ea12005-07-26 11:24:14 -0600526
Rafael J. Wysockib0cb1a12007-07-29 23:24:36 +0200527#ifdef CONFIG_HIBERNATION
Linus Torvalds1da177e2005-04-16 15:20:36 -0700528 case LINUX_REBOOT_CMD_SW_SUSPEND:
Andi Kleen3d26dcf2009-04-13 14:40:08 -0700529 ret = hibernate();
530 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700531#endif
532
533 default:
Andi Kleen3d26dcf2009-04-13 14:40:08 -0700534 ret = -EINVAL;
535 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700536 }
Thomas Gleixner6f15fa52009-10-09 20:31:33 +0200537 mutex_unlock(&reboot_mutex);
Andi Kleen3d26dcf2009-04-13 14:40:08 -0700538 return ret;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700539}
540
David Howells65f27f32006-11-22 14:55:48 +0000541static void deferred_cad(struct work_struct *dummy)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700542{
Eric W. Biedermanabcd9e52005-07-26 11:27:34 -0600543 kernel_restart(NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700544}
545
546/*
547 * This function gets called by ctrl-alt-del - ie the keyboard interrupt.
548 * As it's called within an interrupt, it may NOT sync: the only choice
549 * is whether to reboot at once, or just ignore the ctrl-alt-del.
550 */
551void ctrl_alt_del(void)
552{
David Howells65f27f32006-11-22 14:55:48 +0000553 static DECLARE_WORK(cad_work, deferred_cad);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700554
555 if (C_A_D)
556 schedule_work(&cad_work);
557 else
Cedric Le Goater9ec52092006-10-02 02:19:00 -0700558 kill_cad_pid(SIGINT, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700559}
560
Linus Torvalds1da177e2005-04-16 15:20:36 -0700561/*
562 * Unprivileged users may change the real gid to the effective gid
563 * or vice versa. (BSD-style)
564 *
565 * If you set the real gid at all, or set the effective gid to a value not
566 * equal to the real gid, then the saved gid is set to the new effective gid.
567 *
568 * This makes it possible for a setgid program to completely drop its
569 * privileges, which is often a useful assertion to make when you are doing
570 * a security audit over a program.
571 *
572 * The general idea is that a program which uses just setregid() will be
573 * 100% compatible with BSD. A program which uses just setgid() will be
574 * 100% compatible with POSIX with saved IDs.
575 *
576 * SMP: There are not races, the GIDs are checked only by filesystem
577 * operations (as far as semantic preservation is concerned).
578 */
Heiko Carstensae1251a2009-01-14 14:14:05 +0100579SYSCALL_DEFINE2(setregid, gid_t, rgid, gid_t, egid)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700580{
David Howellsd84f4f92008-11-14 10:39:23 +1100581 const struct cred *old;
582 struct cred *new;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700583 int retval;
584
David Howellsd84f4f92008-11-14 10:39:23 +1100585 new = prepare_creds();
586 if (!new)
587 return -ENOMEM;
588 old = current_cred();
589
David Howellsd84f4f92008-11-14 10:39:23 +1100590 retval = -EPERM;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700591 if (rgid != (gid_t) -1) {
David Howellsd84f4f92008-11-14 10:39:23 +1100592 if (old->gid == rgid ||
593 old->egid == rgid ||
Serge E. Hallynfc832ad2011-03-23 16:43:22 -0700594 nsown_capable(CAP_SETGID))
David Howellsd84f4f92008-11-14 10:39:23 +1100595 new->gid = rgid;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700596 else
David Howellsd84f4f92008-11-14 10:39:23 +1100597 goto error;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700598 }
599 if (egid != (gid_t) -1) {
David Howellsd84f4f92008-11-14 10:39:23 +1100600 if (old->gid == egid ||
601 old->egid == egid ||
602 old->sgid == egid ||
Serge E. Hallynfc832ad2011-03-23 16:43:22 -0700603 nsown_capable(CAP_SETGID))
David Howellsd84f4f92008-11-14 10:39:23 +1100604 new->egid = egid;
Cal Peake756184b2006-09-30 23:27:24 -0700605 else
David Howellsd84f4f92008-11-14 10:39:23 +1100606 goto error;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700607 }
David Howellsd84f4f92008-11-14 10:39:23 +1100608
Linus Torvalds1da177e2005-04-16 15:20:36 -0700609 if (rgid != (gid_t) -1 ||
David Howellsd84f4f92008-11-14 10:39:23 +1100610 (egid != (gid_t) -1 && egid != old->gid))
611 new->sgid = new->egid;
612 new->fsgid = new->egid;
613
614 return commit_creds(new);
615
616error:
617 abort_creds(new);
618 return retval;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700619}
620
621/*
622 * setgid() is implemented like SysV w/ SAVED_IDS
623 *
624 * SMP: Same implicit races as above.
625 */
Heiko Carstensae1251a2009-01-14 14:14:05 +0100626SYSCALL_DEFINE1(setgid, gid_t, gid)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700627{
David Howellsd84f4f92008-11-14 10:39:23 +1100628 const struct cred *old;
629 struct cred *new;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700630 int retval;
631
David Howellsd84f4f92008-11-14 10:39:23 +1100632 new = prepare_creds();
633 if (!new)
634 return -ENOMEM;
635 old = current_cred();
636
David Howellsd84f4f92008-11-14 10:39:23 +1100637 retval = -EPERM;
Serge E. Hallynfc832ad2011-03-23 16:43:22 -0700638 if (nsown_capable(CAP_SETGID))
David Howellsd84f4f92008-11-14 10:39:23 +1100639 new->gid = new->egid = new->sgid = new->fsgid = gid;
640 else if (gid == old->gid || gid == old->sgid)
641 new->egid = new->fsgid = gid;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700642 else
David Howellsd84f4f92008-11-14 10:39:23 +1100643 goto error;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700644
David Howellsd84f4f92008-11-14 10:39:23 +1100645 return commit_creds(new);
646
647error:
648 abort_creds(new);
649 return retval;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700650}
Dhaval Giani54e99122009-02-27 15:13:54 +0530651
David Howellsd84f4f92008-11-14 10:39:23 +1100652/*
653 * change the user struct in a credentials set to match the new UID
654 */
655static int set_user(struct cred *new)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700656{
657 struct user_struct *new_user;
658
Serge Hallyn18b6e042008-10-15 16:38:45 -0500659 new_user = alloc_uid(current_user_ns(), new->uid);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700660 if (!new_user)
661 return -EAGAIN;
662
Vasiliy Kulikov72fa5992011-08-08 19:02:04 +0400663 /*
664 * We don't fail in case of NPROC limit excess here because too many
665 * poorly written programs don't check set*uid() return code, assuming
666 * it never fails if called by root. We may still enforce NPROC limit
667 * for programs doing set*uid()+execve() by harmlessly deferring the
668 * failure to the execve() stage.
669 */
Jiri Slaby78d7d402010-03-05 13:42:54 -0800670 if (atomic_read(&new_user->processes) >= rlimit(RLIMIT_NPROC) &&
Vasiliy Kulikov72fa5992011-08-08 19:02:04 +0400671 new_user != INIT_USER)
672 current->flags |= PF_NPROC_EXCEEDED;
673 else
674 current->flags &= ~PF_NPROC_EXCEEDED;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700675
David Howellsd84f4f92008-11-14 10:39:23 +1100676 free_uid(new->user);
677 new->user = new_user;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700678 return 0;
679}
680
681/*
682 * Unprivileged users may change the real uid to the effective uid
683 * or vice versa. (BSD-style)
684 *
685 * If you set the real uid at all, or set the effective uid to a value not
686 * equal to the real uid, then the saved uid is set to the new effective uid.
687 *
688 * This makes it possible for a setuid program to completely drop its
689 * privileges, which is often a useful assertion to make when you are doing
690 * a security audit over a program.
691 *
692 * The general idea is that a program which uses just setreuid() will be
693 * 100% compatible with BSD. A program which uses just setuid() will be
694 * 100% compatible with POSIX with saved IDs.
695 */
Heiko Carstensae1251a2009-01-14 14:14:05 +0100696SYSCALL_DEFINE2(setreuid, uid_t, ruid, uid_t, euid)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700697{
David Howellsd84f4f92008-11-14 10:39:23 +1100698 const struct cred *old;
699 struct cred *new;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700700 int retval;
701
David Howellsd84f4f92008-11-14 10:39:23 +1100702 new = prepare_creds();
703 if (!new)
704 return -ENOMEM;
705 old = current_cred();
706
David Howellsd84f4f92008-11-14 10:39:23 +1100707 retval = -EPERM;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700708 if (ruid != (uid_t) -1) {
David Howellsd84f4f92008-11-14 10:39:23 +1100709 new->uid = ruid;
710 if (old->uid != ruid &&
711 old->euid != ruid &&
Serge E. Hallynfc832ad2011-03-23 16:43:22 -0700712 !nsown_capable(CAP_SETUID))
David Howellsd84f4f92008-11-14 10:39:23 +1100713 goto error;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700714 }
715
716 if (euid != (uid_t) -1) {
David Howellsd84f4f92008-11-14 10:39:23 +1100717 new->euid = euid;
718 if (old->uid != euid &&
719 old->euid != euid &&
720 old->suid != euid &&
Serge E. Hallynfc832ad2011-03-23 16:43:22 -0700721 !nsown_capable(CAP_SETUID))
David Howellsd84f4f92008-11-14 10:39:23 +1100722 goto error;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700723 }
724
Dhaval Giani54e99122009-02-27 15:13:54 +0530725 if (new->uid != old->uid) {
726 retval = set_user(new);
727 if (retval < 0)
728 goto error;
729 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700730 if (ruid != (uid_t) -1 ||
David Howellsd84f4f92008-11-14 10:39:23 +1100731 (euid != (uid_t) -1 && euid != old->uid))
732 new->suid = new->euid;
733 new->fsuid = new->euid;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700734
David Howellsd84f4f92008-11-14 10:39:23 +1100735 retval = security_task_fix_setuid(new, old, LSM_SETID_RE);
736 if (retval < 0)
737 goto error;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700738
David Howellsd84f4f92008-11-14 10:39:23 +1100739 return commit_creds(new);
740
741error:
742 abort_creds(new);
743 return retval;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700744}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700745
746/*
747 * setuid() is implemented like SysV with SAVED_IDS
748 *
749 * Note that SAVED_ID's is deficient in that a setuid root program
750 * like sendmail, for example, cannot set its uid to be a normal
751 * user and then switch back, because if you're root, setuid() sets
752 * the saved uid too. If you don't like this, blame the bright people
753 * in the POSIX committee and/or USG. Note that the BSD-style setreuid()
754 * will allow a root program to temporarily drop privileges and be able to
755 * regain them by swapping the real and effective uid.
756 */
Heiko Carstensae1251a2009-01-14 14:14:05 +0100757SYSCALL_DEFINE1(setuid, uid_t, uid)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700758{
David Howellsd84f4f92008-11-14 10:39:23 +1100759 const struct cred *old;
760 struct cred *new;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700761 int retval;
762
David Howellsd84f4f92008-11-14 10:39:23 +1100763 new = prepare_creds();
764 if (!new)
765 return -ENOMEM;
766 old = current_cred();
767
David Howellsd84f4f92008-11-14 10:39:23 +1100768 retval = -EPERM;
Serge E. Hallynfc832ad2011-03-23 16:43:22 -0700769 if (nsown_capable(CAP_SETUID)) {
David Howellsd84f4f92008-11-14 10:39:23 +1100770 new->suid = new->uid = uid;
Dhaval Giani54e99122009-02-27 15:13:54 +0530771 if (uid != old->uid) {
772 retval = set_user(new);
773 if (retval < 0)
774 goto error;
David Howellsd84f4f92008-11-14 10:39:23 +1100775 }
776 } else if (uid != old->uid && uid != new->suid) {
777 goto error;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700778 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700779
David Howellsd84f4f92008-11-14 10:39:23 +1100780 new->fsuid = new->euid = uid;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700781
David Howellsd84f4f92008-11-14 10:39:23 +1100782 retval = security_task_fix_setuid(new, old, LSM_SETID_ID);
783 if (retval < 0)
784 goto error;
785
786 return commit_creds(new);
787
788error:
789 abort_creds(new);
790 return retval;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700791}
792
793
794/*
795 * This function implements a generic ability to update ruid, euid,
796 * and suid. This allows you to implement the 4.4 compatible seteuid().
797 */
Heiko Carstensae1251a2009-01-14 14:14:05 +0100798SYSCALL_DEFINE3(setresuid, uid_t, ruid, uid_t, euid, uid_t, suid)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700799{
David Howellsd84f4f92008-11-14 10:39:23 +1100800 const struct cred *old;
801 struct cred *new;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700802 int retval;
803
David Howellsd84f4f92008-11-14 10:39:23 +1100804 new = prepare_creds();
805 if (!new)
806 return -ENOMEM;
807
David Howellsd84f4f92008-11-14 10:39:23 +1100808 old = current_cred();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700809
David Howellsd84f4f92008-11-14 10:39:23 +1100810 retval = -EPERM;
Serge E. Hallynfc832ad2011-03-23 16:43:22 -0700811 if (!nsown_capable(CAP_SETUID)) {
David Howellsd84f4f92008-11-14 10:39:23 +1100812 if (ruid != (uid_t) -1 && ruid != old->uid &&
813 ruid != old->euid && ruid != old->suid)
814 goto error;
815 if (euid != (uid_t) -1 && euid != old->uid &&
816 euid != old->euid && euid != old->suid)
817 goto error;
818 if (suid != (uid_t) -1 && suid != old->uid &&
819 suid != old->euid && suid != old->suid)
820 goto error;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700821 }
David Howellsd84f4f92008-11-14 10:39:23 +1100822
Linus Torvalds1da177e2005-04-16 15:20:36 -0700823 if (ruid != (uid_t) -1) {
David Howellsd84f4f92008-11-14 10:39:23 +1100824 new->uid = ruid;
Dhaval Giani54e99122009-02-27 15:13:54 +0530825 if (ruid != old->uid) {
826 retval = set_user(new);
827 if (retval < 0)
828 goto error;
829 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700830 }
David Howellsd84f4f92008-11-14 10:39:23 +1100831 if (euid != (uid_t) -1)
832 new->euid = euid;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700833 if (suid != (uid_t) -1)
David Howellsd84f4f92008-11-14 10:39:23 +1100834 new->suid = suid;
835 new->fsuid = new->euid;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700836
David Howellsd84f4f92008-11-14 10:39:23 +1100837 retval = security_task_fix_setuid(new, old, LSM_SETID_RES);
838 if (retval < 0)
839 goto error;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700840
David Howellsd84f4f92008-11-14 10:39:23 +1100841 return commit_creds(new);
842
843error:
844 abort_creds(new);
845 return retval;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700846}
847
Heiko Carstensdbf040d2009-01-14 14:14:04 +0100848SYSCALL_DEFINE3(getresuid, uid_t __user *, ruid, uid_t __user *, euid, uid_t __user *, suid)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700849{
David Howells86a264a2008-11-14 10:39:18 +1100850 const struct cred *cred = current_cred();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700851 int retval;
852
David Howells86a264a2008-11-14 10:39:18 +1100853 if (!(retval = put_user(cred->uid, ruid)) &&
854 !(retval = put_user(cred->euid, euid)))
David Howellsb6dff3e2008-11-14 10:39:16 +1100855 retval = put_user(cred->suid, suid);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700856
857 return retval;
858}
859
860/*
861 * Same as above, but for rgid, egid, sgid.
862 */
Heiko Carstensae1251a2009-01-14 14:14:05 +0100863SYSCALL_DEFINE3(setresgid, gid_t, rgid, gid_t, egid, gid_t, sgid)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700864{
David Howellsd84f4f92008-11-14 10:39:23 +1100865 const struct cred *old;
866 struct cred *new;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700867 int retval;
868
David Howellsd84f4f92008-11-14 10:39:23 +1100869 new = prepare_creds();
870 if (!new)
871 return -ENOMEM;
872 old = current_cred();
873
David Howellsd84f4f92008-11-14 10:39:23 +1100874 retval = -EPERM;
Serge E. Hallynfc832ad2011-03-23 16:43:22 -0700875 if (!nsown_capable(CAP_SETGID)) {
David Howellsd84f4f92008-11-14 10:39:23 +1100876 if (rgid != (gid_t) -1 && rgid != old->gid &&
877 rgid != old->egid && rgid != old->sgid)
878 goto error;
879 if (egid != (gid_t) -1 && egid != old->gid &&
880 egid != old->egid && egid != old->sgid)
881 goto error;
882 if (sgid != (gid_t) -1 && sgid != old->gid &&
883 sgid != old->egid && sgid != old->sgid)
884 goto error;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700885 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700886
David Howellsd84f4f92008-11-14 10:39:23 +1100887 if (rgid != (gid_t) -1)
888 new->gid = rgid;
889 if (egid != (gid_t) -1)
890 new->egid = egid;
891 if (sgid != (gid_t) -1)
892 new->sgid = sgid;
893 new->fsgid = new->egid;
894
895 return commit_creds(new);
896
897error:
898 abort_creds(new);
899 return retval;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700900}
901
Heiko Carstensdbf040d2009-01-14 14:14:04 +0100902SYSCALL_DEFINE3(getresgid, gid_t __user *, rgid, gid_t __user *, egid, gid_t __user *, sgid)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700903{
David Howells86a264a2008-11-14 10:39:18 +1100904 const struct cred *cred = current_cred();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700905 int retval;
906
David Howells86a264a2008-11-14 10:39:18 +1100907 if (!(retval = put_user(cred->gid, rgid)) &&
908 !(retval = put_user(cred->egid, egid)))
David Howellsb6dff3e2008-11-14 10:39:16 +1100909 retval = put_user(cred->sgid, sgid);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700910
911 return retval;
912}
913
914
915/*
916 * "setfsuid()" sets the fsuid - the uid used for filesystem checks. This
917 * is used for "access()" and for the NFS daemon (letting nfsd stay at
918 * whatever uid it wants to). It normally shadows "euid", except when
919 * explicitly set by setfsuid() or for access..
920 */
Heiko Carstensae1251a2009-01-14 14:14:05 +0100921SYSCALL_DEFINE1(setfsuid, uid_t, uid)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700922{
David Howellsd84f4f92008-11-14 10:39:23 +1100923 const struct cred *old;
924 struct cred *new;
925 uid_t old_fsuid;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700926
David Howellsd84f4f92008-11-14 10:39:23 +1100927 new = prepare_creds();
928 if (!new)
929 return current_fsuid();
930 old = current_cred();
931 old_fsuid = old->fsuid;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700932
David Howellsd84f4f92008-11-14 10:39:23 +1100933 if (uid == old->uid || uid == old->euid ||
934 uid == old->suid || uid == old->fsuid ||
Serge E. Hallynfc832ad2011-03-23 16:43:22 -0700935 nsown_capable(CAP_SETUID)) {
Cal Peake756184b2006-09-30 23:27:24 -0700936 if (uid != old_fsuid) {
David Howellsd84f4f92008-11-14 10:39:23 +1100937 new->fsuid = uid;
938 if (security_task_fix_setuid(new, old, LSM_SETID_FS) == 0)
939 goto change_okay;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700940 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700941 }
942
David Howellsd84f4f92008-11-14 10:39:23 +1100943 abort_creds(new);
944 return old_fsuid;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700945
David Howellsd84f4f92008-11-14 10:39:23 +1100946change_okay:
947 commit_creds(new);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700948 return old_fsuid;
949}
950
951/*
John Anthony Kazos Jrf42df9e2007-05-09 08:23:08 +0200952 * Samma på svenska..
Linus Torvalds1da177e2005-04-16 15:20:36 -0700953 */
Heiko Carstensae1251a2009-01-14 14:14:05 +0100954SYSCALL_DEFINE1(setfsgid, gid_t, gid)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700955{
David Howellsd84f4f92008-11-14 10:39:23 +1100956 const struct cred *old;
957 struct cred *new;
958 gid_t old_fsgid;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700959
David Howellsd84f4f92008-11-14 10:39:23 +1100960 new = prepare_creds();
961 if (!new)
962 return current_fsgid();
963 old = current_cred();
964 old_fsgid = old->fsgid;
965
David Howellsd84f4f92008-11-14 10:39:23 +1100966 if (gid == old->gid || gid == old->egid ||
967 gid == old->sgid || gid == old->fsgid ||
Serge E. Hallynfc832ad2011-03-23 16:43:22 -0700968 nsown_capable(CAP_SETGID)) {
Cal Peake756184b2006-09-30 23:27:24 -0700969 if (gid != old_fsgid) {
David Howellsd84f4f92008-11-14 10:39:23 +1100970 new->fsgid = gid;
971 goto change_okay;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700972 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700973 }
David Howellsd84f4f92008-11-14 10:39:23 +1100974
David Howellsd84f4f92008-11-14 10:39:23 +1100975 abort_creds(new);
976 return old_fsgid;
977
978change_okay:
979 commit_creds(new);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700980 return old_fsgid;
981}
982
Frank Mayharf06febc2008-09-12 09:54:39 -0700983void do_sys_times(struct tms *tms)
984{
Hidetoshi Seto0cf55e12009-12-02 17:28:07 +0900985 cputime_t tgutime, tgstime, cutime, cstime;
Frank Mayharf06febc2008-09-12 09:54:39 -0700986
Oleg Nesterov2b5fe6d2008-11-17 15:40:08 +0100987 spin_lock_irq(&current->sighand->siglock);
Hidetoshi Seto0cf55e12009-12-02 17:28:07 +0900988 thread_group_times(current, &tgutime, &tgstime);
Frank Mayharf06febc2008-09-12 09:54:39 -0700989 cutime = current->signal->cutime;
990 cstime = current->signal->cstime;
991 spin_unlock_irq(&current->sighand->siglock);
Hidetoshi Seto0cf55e12009-12-02 17:28:07 +0900992 tms->tms_utime = cputime_to_clock_t(tgutime);
993 tms->tms_stime = cputime_to_clock_t(tgstime);
Frank Mayharf06febc2008-09-12 09:54:39 -0700994 tms->tms_cutime = cputime_to_clock_t(cutime);
995 tms->tms_cstime = cputime_to_clock_t(cstime);
996}
997
Heiko Carstens58fd3aa2009-01-14 14:14:03 +0100998SYSCALL_DEFINE1(times, struct tms __user *, tbuf)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700999{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001000 if (tbuf) {
1001 struct tms tmp;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001002
Frank Mayharf06febc2008-09-12 09:54:39 -07001003 do_sys_times(&tmp);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001004 if (copy_to_user(tbuf, &tmp, sizeof(struct tms)))
1005 return -EFAULT;
1006 }
Paul Mackerrase3d5a272009-01-06 14:41:02 -08001007 force_successful_syscall_return();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001008 return (long) jiffies_64_to_clock_t(get_jiffies_64());
1009}
1010
1011/*
1012 * This needs some heavy checking ...
1013 * I just haven't the stomach for it. I also don't fully
1014 * understand sessions/pgrp etc. Let somebody who does explain it.
1015 *
1016 * OK, I think I have the protection semantics right.... this is really
1017 * only important on a multi-user system anyway, to make sure one user
1018 * can't send a signal to a process owned by another. -TYT, 12/12/91
1019 *
1020 * Auch. Had to add the 'did_exec' flag to conform completely to POSIX.
1021 * LBT 04.03.94
1022 */
Heiko Carstensb290ebe2009-01-14 14:14:06 +01001023SYSCALL_DEFINE2(setpgid, pid_t, pid, pid_t, pgid)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001024{
1025 struct task_struct *p;
Oleg Nesterovee0acf92006-01-08 01:03:53 -08001026 struct task_struct *group_leader = current->group_leader;
Oleg Nesterov4e021302008-02-08 04:19:08 -08001027 struct pid *pgrp;
1028 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001029
1030 if (!pid)
Pavel Emelyanovb4888932007-10-18 23:40:14 -07001031 pid = task_pid_vnr(group_leader);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001032 if (!pgid)
1033 pgid = pid;
1034 if (pgid < 0)
1035 return -EINVAL;
Paul E. McKenney950eaac2010-08-31 17:00:18 -07001036 rcu_read_lock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001037
1038 /* From this point forward we keep holding onto the tasklist lock
1039 * so that our parent does not change from under us. -DaveM
1040 */
1041 write_lock_irq(&tasklist_lock);
1042
1043 err = -ESRCH;
Oleg Nesterov4e021302008-02-08 04:19:08 -08001044 p = find_task_by_vpid(pid);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001045 if (!p)
1046 goto out;
1047
1048 err = -EINVAL;
1049 if (!thread_group_leader(p))
1050 goto out;
1051
Oleg Nesterov4e021302008-02-08 04:19:08 -08001052 if (same_thread_group(p->real_parent, group_leader)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001053 err = -EPERM;
Eric W. Biederman41487c62007-02-12 00:53:01 -08001054 if (task_session(p) != task_session(group_leader))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001055 goto out;
1056 err = -EACCES;
1057 if (p->did_exec)
1058 goto out;
1059 } else {
1060 err = -ESRCH;
Oleg Nesterovee0acf92006-01-08 01:03:53 -08001061 if (p != group_leader)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001062 goto out;
1063 }
1064
1065 err = -EPERM;
1066 if (p->signal->leader)
1067 goto out;
1068
Oleg Nesterov4e021302008-02-08 04:19:08 -08001069 pgrp = task_pid(p);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001070 if (pgid != pid) {
Pavel Emelyanovb4888932007-10-18 23:40:14 -07001071 struct task_struct *g;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001072
Oleg Nesterov4e021302008-02-08 04:19:08 -08001073 pgrp = find_vpid(pgid);
1074 g = pid_task(pgrp, PIDTYPE_PGID);
Eric W. Biederman41487c62007-02-12 00:53:01 -08001075 if (!g || task_session(g) != task_session(group_leader))
Oleg Nesterovf020bc42006-12-08 02:38:02 -08001076 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001077 }
1078
Linus Torvalds1da177e2005-04-16 15:20:36 -07001079 err = security_task_setpgid(p, pgid);
1080 if (err)
1081 goto out;
1082
Oleg Nesterov1b0f7ff2009-04-02 16:58:39 -07001083 if (task_pgrp(p) != pgrp)
Oleg Nesterov83beaf32008-04-30 00:54:27 -07001084 change_pid(p, PIDTYPE_PGID, pgrp);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001085
1086 err = 0;
1087out:
1088 /* All paths lead to here, thus we are safe. -DaveM */
1089 write_unlock_irq(&tasklist_lock);
Paul E. McKenney950eaac2010-08-31 17:00:18 -07001090 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001091 return err;
1092}
1093
Heiko Carstensdbf040d2009-01-14 14:14:04 +01001094SYSCALL_DEFINE1(getpgid, pid_t, pid)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001095{
Oleg Nesterov12a3de02008-04-30 00:54:29 -07001096 struct task_struct *p;
1097 struct pid *grp;
1098 int retval;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001099
Oleg Nesterov12a3de02008-04-30 00:54:29 -07001100 rcu_read_lock();
1101 if (!pid)
1102 grp = task_pgrp(current);
1103 else {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001104 retval = -ESRCH;
Oleg Nesterov12a3de02008-04-30 00:54:29 -07001105 p = find_task_by_vpid(pid);
1106 if (!p)
1107 goto out;
1108 grp = task_pgrp(p);
1109 if (!grp)
1110 goto out;
1111
1112 retval = security_task_getpgid(p);
1113 if (retval)
1114 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001115 }
Oleg Nesterov12a3de02008-04-30 00:54:29 -07001116 retval = pid_vnr(grp);
1117out:
1118 rcu_read_unlock();
1119 return retval;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001120}
1121
1122#ifdef __ARCH_WANT_SYS_GETPGRP
1123
Heiko Carstensdbf040d2009-01-14 14:14:04 +01001124SYSCALL_DEFINE0(getpgrp)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001125{
Oleg Nesterov12a3de02008-04-30 00:54:29 -07001126 return sys_getpgid(0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001127}
1128
1129#endif
1130
Heiko Carstensdbf040d2009-01-14 14:14:04 +01001131SYSCALL_DEFINE1(getsid, pid_t, pid)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001132{
Oleg Nesterov1dd768c2008-04-30 00:54:28 -07001133 struct task_struct *p;
1134 struct pid *sid;
1135 int retval;
Pavel Emelyanovb4888932007-10-18 23:40:14 -07001136
Oleg Nesterov1dd768c2008-04-30 00:54:28 -07001137 rcu_read_lock();
1138 if (!pid)
1139 sid = task_session(current);
1140 else {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001141 retval = -ESRCH;
Oleg Nesterov1dd768c2008-04-30 00:54:28 -07001142 p = find_task_by_vpid(pid);
1143 if (!p)
1144 goto out;
1145 sid = task_session(p);
1146 if (!sid)
1147 goto out;
1148
1149 retval = security_task_getsid(p);
1150 if (retval)
1151 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001152 }
Oleg Nesterov1dd768c2008-04-30 00:54:28 -07001153 retval = pid_vnr(sid);
1154out:
1155 rcu_read_unlock();
1156 return retval;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001157}
1158
Heiko Carstensb290ebe2009-01-14 14:14:06 +01001159SYSCALL_DEFINE0(setsid)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001160{
Oren Laadane19f2472006-01-08 01:03:58 -08001161 struct task_struct *group_leader = current->group_leader;
Oleg Nesterove4cc0a92008-02-08 04:19:09 -08001162 struct pid *sid = task_pid(group_leader);
1163 pid_t session = pid_vnr(sid);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001164 int err = -EPERM;
1165
Linus Torvalds1da177e2005-04-16 15:20:36 -07001166 write_lock_irq(&tasklist_lock);
Eric W. Biederman390e2ff2006-03-31 02:31:33 -08001167 /* Fail if I am already a session leader */
1168 if (group_leader->signal->leader)
1169 goto out;
1170
Oleg Nesterov430c6232008-02-08 04:19:11 -08001171 /* Fail if a process group id already exists that equals the
1172 * proposed session id.
Eric W. Biederman390e2ff2006-03-31 02:31:33 -08001173 */
Oleg Nesterov6806aac2008-02-08 04:19:12 -08001174 if (pid_task(sid, PIDTYPE_PGID))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001175 goto out;
1176
Oren Laadane19f2472006-01-08 01:03:58 -08001177 group_leader->signal->leader = 1;
Oleg Nesterov8520d7c2008-02-08 04:19:09 -08001178 __set_special_pids(sid);
Peter Zijlstra24ec8392006-12-08 02:36:04 -08001179
Alan Cox9c9f4de2008-10-13 10:37:26 +01001180 proc_clear_tty(group_leader);
Peter Zijlstra24ec8392006-12-08 02:36:04 -08001181
Oleg Nesterove4cc0a92008-02-08 04:19:09 -08001182 err = session;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001183out:
1184 write_unlock_irq(&tasklist_lock);
Mike Galbraith5091faa2010-11-30 14:18:03 +01001185 if (err > 0) {
Christian Borntraeger0d0df592009-10-26 16:49:34 -07001186 proc_sid_connector(group_leader);
Mike Galbraith5091faa2010-11-30 14:18:03 +01001187 sched_autogroup_create_attach(group_leader);
1188 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001189 return err;
1190}
1191
Linus Torvalds1da177e2005-04-16 15:20:36 -07001192DECLARE_RWSEM(uts_sem);
1193
Christoph Hellwige28cbf22010-03-10 15:21:19 -08001194#ifdef COMPAT_UTS_MACHINE
1195#define override_architecture(name) \
Andreas Schwab46da2762010-04-23 13:17:44 -04001196 (personality(current->personality) == PER_LINUX32 && \
Christoph Hellwige28cbf22010-03-10 15:21:19 -08001197 copy_to_user(name->machine, COMPAT_UTS_MACHINE, \
1198 sizeof(COMPAT_UTS_MACHINE)))
1199#else
1200#define override_architecture(name) 0
1201#endif
1202
Andi Kleenbe274252011-08-19 16:15:10 -07001203/*
1204 * Work around broken programs that cannot handle "Linux 3.0".
1205 * Instead we map 3.x to 2.6.40+x, so e.g. 3.0 would be 2.6.40
1206 */
Kees Cook643fde82012-10-19 13:56:51 -07001207static int override_release(char __user *release, size_t len)
Andi Kleenbe274252011-08-19 16:15:10 -07001208{
1209 int ret = 0;
Andi Kleenbe274252011-08-19 16:15:10 -07001210
1211 if (current->personality & UNAME26) {
Kees Cook643fde82012-10-19 13:56:51 -07001212 const char *rest = UTS_RELEASE;
1213 char buf[65] = { 0 };
Andi Kleenbe274252011-08-19 16:15:10 -07001214 int ndots = 0;
1215 unsigned v;
Kees Cook643fde82012-10-19 13:56:51 -07001216 size_t copy;
Andi Kleenbe274252011-08-19 16:15:10 -07001217
1218 while (*rest) {
1219 if (*rest == '.' && ++ndots >= 3)
1220 break;
1221 if (!isdigit(*rest) && *rest != '.')
1222 break;
1223 rest++;
1224 }
1225 v = ((LINUX_VERSION_CODE >> 8) & 0xff) + 40;
Kees Cook36800302012-10-19 18:45:53 -07001226 copy = clamp_t(size_t, len, 1, sizeof(buf));
Kees Cook643fde82012-10-19 13:56:51 -07001227 copy = scnprintf(buf, copy, "2.6.%u%s", v, rest);
1228 ret = copy_to_user(release, buf, copy + 1);
Andi Kleenbe274252011-08-19 16:15:10 -07001229 }
1230 return ret;
1231}
1232
Heiko Carstense48fbb62009-01-14 14:14:26 +01001233SYSCALL_DEFINE1(newuname, struct new_utsname __user *, name)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001234{
1235 int errno = 0;
1236
1237 down_read(&uts_sem);
Serge E. Hallyne9ff3992006-10-02 02:18:11 -07001238 if (copy_to_user(name, utsname(), sizeof *name))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001239 errno = -EFAULT;
1240 up_read(&uts_sem);
Christoph Hellwige28cbf22010-03-10 15:21:19 -08001241
Andi Kleenbe274252011-08-19 16:15:10 -07001242 if (!errno && override_release(name->release, sizeof(name->release)))
1243 errno = -EFAULT;
Christoph Hellwige28cbf22010-03-10 15:21:19 -08001244 if (!errno && override_architecture(name))
1245 errno = -EFAULT;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001246 return errno;
1247}
1248
Christoph Hellwig5cacdb42010-03-10 15:21:21 -08001249#ifdef __ARCH_WANT_SYS_OLD_UNAME
1250/*
1251 * Old cruft
1252 */
1253SYSCALL_DEFINE1(uname, struct old_utsname __user *, name)
1254{
1255 int error = 0;
1256
1257 if (!name)
1258 return -EFAULT;
1259
1260 down_read(&uts_sem);
1261 if (copy_to_user(name, utsname(), sizeof(*name)))
1262 error = -EFAULT;
1263 up_read(&uts_sem);
1264
Andi Kleenbe274252011-08-19 16:15:10 -07001265 if (!error && override_release(name->release, sizeof(name->release)))
1266 error = -EFAULT;
Christoph Hellwig5cacdb42010-03-10 15:21:21 -08001267 if (!error && override_architecture(name))
1268 error = -EFAULT;
1269 return error;
1270}
1271
1272SYSCALL_DEFINE1(olduname, struct oldold_utsname __user *, name)
1273{
1274 int error;
1275
1276 if (!name)
1277 return -EFAULT;
1278 if (!access_ok(VERIFY_WRITE, name, sizeof(struct oldold_utsname)))
1279 return -EFAULT;
1280
1281 down_read(&uts_sem);
1282 error = __copy_to_user(&name->sysname, &utsname()->sysname,
1283 __OLD_UTS_LEN);
1284 error |= __put_user(0, name->sysname + __OLD_UTS_LEN);
1285 error |= __copy_to_user(&name->nodename, &utsname()->nodename,
1286 __OLD_UTS_LEN);
1287 error |= __put_user(0, name->nodename + __OLD_UTS_LEN);
1288 error |= __copy_to_user(&name->release, &utsname()->release,
1289 __OLD_UTS_LEN);
1290 error |= __put_user(0, name->release + __OLD_UTS_LEN);
1291 error |= __copy_to_user(&name->version, &utsname()->version,
1292 __OLD_UTS_LEN);
1293 error |= __put_user(0, name->version + __OLD_UTS_LEN);
1294 error |= __copy_to_user(&name->machine, &utsname()->machine,
1295 __OLD_UTS_LEN);
1296 error |= __put_user(0, name->machine + __OLD_UTS_LEN);
1297 up_read(&uts_sem);
1298
1299 if (!error && override_architecture(name))
1300 error = -EFAULT;
Andi Kleenbe274252011-08-19 16:15:10 -07001301 if (!error && override_release(name->release, sizeof(name->release)))
1302 error = -EFAULT;
Christoph Hellwig5cacdb42010-03-10 15:21:21 -08001303 return error ? -EFAULT : 0;
1304}
1305#endif
1306
Heiko Carstens5a8a82b2009-01-14 14:14:25 +01001307SYSCALL_DEFINE2(sethostname, char __user *, name, int, len)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001308{
1309 int errno;
1310 char tmp[__NEW_UTS_LEN];
1311
Serge E. Hallynbb96a6f2011-03-23 16:43:18 -07001312 if (!ns_capable(current->nsproxy->uts_ns->user_ns, CAP_SYS_ADMIN))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001313 return -EPERM;
Serge E. Hallynfc832ad2011-03-23 16:43:22 -07001314
Linus Torvalds1da177e2005-04-16 15:20:36 -07001315 if (len < 0 || len > __NEW_UTS_LEN)
1316 return -EINVAL;
1317 down_write(&uts_sem);
1318 errno = -EFAULT;
1319 if (!copy_from_user(tmp, name, len)) {
Andrew Morton9679e4d2008-10-15 22:01:51 -07001320 struct new_utsname *u = utsname();
1321
1322 memcpy(u->nodename, tmp, len);
1323 memset(u->nodename + len, 0, sizeof(u->nodename) - len);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001324 errno = 0;
1325 }
Lucas De Marchif1ecf062011-11-02 13:39:22 -07001326 uts_proc_notify(UTS_PROC_HOSTNAME);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001327 up_write(&uts_sem);
1328 return errno;
1329}
1330
1331#ifdef __ARCH_WANT_SYS_GETHOSTNAME
1332
Heiko Carstens5a8a82b2009-01-14 14:14:25 +01001333SYSCALL_DEFINE2(gethostname, char __user *, name, int, len)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001334{
1335 int i, errno;
Andrew Morton9679e4d2008-10-15 22:01:51 -07001336 struct new_utsname *u;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001337
1338 if (len < 0)
1339 return -EINVAL;
1340 down_read(&uts_sem);
Andrew Morton9679e4d2008-10-15 22:01:51 -07001341 u = utsname();
1342 i = 1 + strlen(u->nodename);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001343 if (i > len)
1344 i = len;
1345 errno = 0;
Andrew Morton9679e4d2008-10-15 22:01:51 -07001346 if (copy_to_user(name, u->nodename, i))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001347 errno = -EFAULT;
1348 up_read(&uts_sem);
1349 return errno;
1350}
1351
1352#endif
1353
1354/*
1355 * Only setdomainname; getdomainname can be implemented by calling
1356 * uname()
1357 */
Heiko Carstens5a8a82b2009-01-14 14:14:25 +01001358SYSCALL_DEFINE2(setdomainname, char __user *, name, int, len)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001359{
1360 int errno;
1361 char tmp[__NEW_UTS_LEN];
1362
Serge E. Hallynfc832ad2011-03-23 16:43:22 -07001363 if (!ns_capable(current->nsproxy->uts_ns->user_ns, CAP_SYS_ADMIN))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001364 return -EPERM;
1365 if (len < 0 || len > __NEW_UTS_LEN)
1366 return -EINVAL;
1367
1368 down_write(&uts_sem);
1369 errno = -EFAULT;
1370 if (!copy_from_user(tmp, name, len)) {
Andrew Morton9679e4d2008-10-15 22:01:51 -07001371 struct new_utsname *u = utsname();
1372
1373 memcpy(u->domainname, tmp, len);
1374 memset(u->domainname + len, 0, sizeof(u->domainname) - len);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001375 errno = 0;
1376 }
Lucas De Marchif1ecf062011-11-02 13:39:22 -07001377 uts_proc_notify(UTS_PROC_DOMAINNAME);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001378 up_write(&uts_sem);
1379 return errno;
1380}
1381
Heiko Carstense48fbb62009-01-14 14:14:26 +01001382SYSCALL_DEFINE2(getrlimit, unsigned int, resource, struct rlimit __user *, rlim)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001383{
Jiri Slabyb9518342010-05-04 11:28:25 +02001384 struct rlimit value;
1385 int ret;
1386
1387 ret = do_prlimit(current, resource, NULL, &value);
1388 if (!ret)
1389 ret = copy_to_user(rlim, &value, sizeof(*rlim)) ? -EFAULT : 0;
1390
1391 return ret;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001392}
1393
1394#ifdef __ARCH_WANT_SYS_OLD_GETRLIMIT
1395
1396/*
1397 * Back compatibility for getrlimit. Needed for some apps.
1398 */
1399
Heiko Carstense48fbb62009-01-14 14:14:26 +01001400SYSCALL_DEFINE2(old_getrlimit, unsigned int, resource,
1401 struct rlimit __user *, rlim)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001402{
1403 struct rlimit x;
1404 if (resource >= RLIM_NLIMITS)
1405 return -EINVAL;
1406
1407 task_lock(current->group_leader);
1408 x = current->signal->rlim[resource];
1409 task_unlock(current->group_leader);
Cal Peake756184b2006-09-30 23:27:24 -07001410 if (x.rlim_cur > 0x7FFFFFFF)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001411 x.rlim_cur = 0x7FFFFFFF;
Cal Peake756184b2006-09-30 23:27:24 -07001412 if (x.rlim_max > 0x7FFFFFFF)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001413 x.rlim_max = 0x7FFFFFFF;
1414 return copy_to_user(rlim, &x, sizeof(x))?-EFAULT:0;
1415}
1416
1417#endif
1418
Jiri Slabyc022a0a2010-05-04 18:03:50 +02001419static inline bool rlim64_is_infinity(__u64 rlim64)
1420{
1421#if BITS_PER_LONG < 64
1422 return rlim64 >= ULONG_MAX;
1423#else
1424 return rlim64 == RLIM64_INFINITY;
1425#endif
1426}
1427
1428static void rlim_to_rlim64(const struct rlimit *rlim, struct rlimit64 *rlim64)
1429{
1430 if (rlim->rlim_cur == RLIM_INFINITY)
1431 rlim64->rlim_cur = RLIM64_INFINITY;
1432 else
1433 rlim64->rlim_cur = rlim->rlim_cur;
1434 if (rlim->rlim_max == RLIM_INFINITY)
1435 rlim64->rlim_max = RLIM64_INFINITY;
1436 else
1437 rlim64->rlim_max = rlim->rlim_max;
1438}
1439
1440static void rlim64_to_rlim(const struct rlimit64 *rlim64, struct rlimit *rlim)
1441{
1442 if (rlim64_is_infinity(rlim64->rlim_cur))
1443 rlim->rlim_cur = RLIM_INFINITY;
1444 else
1445 rlim->rlim_cur = (unsigned long)rlim64->rlim_cur;
1446 if (rlim64_is_infinity(rlim64->rlim_max))
1447 rlim->rlim_max = RLIM_INFINITY;
1448 else
1449 rlim->rlim_max = (unsigned long)rlim64->rlim_max;
1450}
1451
Jiri Slaby1c1e6182009-08-28 14:08:17 +02001452/* make sure you are allowed to change @tsk limits before calling this */
Jiri Slaby5b415352010-03-24 16:11:29 +01001453int do_prlimit(struct task_struct *tsk, unsigned int resource,
1454 struct rlimit *new_rlim, struct rlimit *old_rlim)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001455{
Jiri Slaby5b415352010-03-24 16:11:29 +01001456 struct rlimit *rlim;
Jiri Slaby86f162f2009-11-14 17:37:04 +01001457 int retval = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001458
1459 if (resource >= RLIM_NLIMITS)
1460 return -EINVAL;
Jiri Slaby5b415352010-03-24 16:11:29 +01001461 if (new_rlim) {
1462 if (new_rlim->rlim_cur > new_rlim->rlim_max)
1463 return -EINVAL;
1464 if (resource == RLIMIT_NOFILE &&
1465 new_rlim->rlim_max > sysctl_nr_open)
1466 return -EPERM;
1467 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001468
Jiri Slaby1c1e6182009-08-28 14:08:17 +02001469 /* protect tsk->signal and tsk->sighand from disappearing */
1470 read_lock(&tasklist_lock);
1471 if (!tsk->sighand) {
1472 retval = -ESRCH;
1473 goto out;
1474 }
1475
Jiri Slaby5b415352010-03-24 16:11:29 +01001476 rlim = tsk->signal->rlim + resource;
Jiri Slaby86f162f2009-11-14 17:37:04 +01001477 task_lock(tsk->group_leader);
Jiri Slaby5b415352010-03-24 16:11:29 +01001478 if (new_rlim) {
Serge E. Hallynfc832ad2011-03-23 16:43:22 -07001479 /* Keep the capable check against init_user_ns until
1480 cgroups can contain all limits */
Jiri Slaby5b415352010-03-24 16:11:29 +01001481 if (new_rlim->rlim_max > rlim->rlim_max &&
1482 !capable(CAP_SYS_RESOURCE))
1483 retval = -EPERM;
1484 if (!retval)
1485 retval = security_task_setrlimit(tsk->group_leader,
1486 resource, new_rlim);
1487 if (resource == RLIMIT_CPU && new_rlim->rlim_cur == 0) {
1488 /*
1489 * The caller is asking for an immediate RLIMIT_CPU
1490 * expiry. But we use the zero value to mean "it was
1491 * never set". So let's cheat and make it one second
1492 * instead
1493 */
1494 new_rlim->rlim_cur = 1;
1495 }
Tom Alsberg9926e4c2007-05-08 00:30:31 -07001496 }
Jiri Slaby5b415352010-03-24 16:11:29 +01001497 if (!retval) {
1498 if (old_rlim)
1499 *old_rlim = *rlim;
1500 if (new_rlim)
1501 *rlim = *new_rlim;
1502 }
Jiri Slaby7855c352009-08-26 23:45:34 +02001503 task_unlock(tsk->group_leader);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001504
Andrew Mortond3561f72006-03-24 03:18:36 -08001505 /*
1506 * RLIMIT_CPU handling. Note that the kernel fails to return an error
1507 * code if it rejected the user's attempt to set RLIMIT_CPU. This is a
1508 * very long-standing error, and fixing it now risks breakage of
1509 * applications, so we live with it
1510 */
Jiri Slaby5b415352010-03-24 16:11:29 +01001511 if (!retval && new_rlim && resource == RLIMIT_CPU &&
1512 new_rlim->rlim_cur != RLIM_INFINITY)
1513 update_rlimit_cpu(tsk, new_rlim->rlim_cur);
Andrew Mortonec9e16b2006-03-24 03:18:34 -08001514out:
Jiri Slaby1c1e6182009-08-28 14:08:17 +02001515 read_unlock(&tasklist_lock);
Oleg Nesterov2fb9d262009-09-03 19:21:45 +02001516 return retval;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001517}
1518
Jiri Slabyc022a0a2010-05-04 18:03:50 +02001519/* rcu lock must be held */
1520static int check_prlimit_permission(struct task_struct *task)
1521{
1522 const struct cred *cred = current_cred(), *tcred;
1523
Serge E. Hallynfc832ad2011-03-23 16:43:22 -07001524 if (current == task)
1525 return 0;
Jiri Slabyc022a0a2010-05-04 18:03:50 +02001526
Serge E. Hallynfc832ad2011-03-23 16:43:22 -07001527 tcred = __task_cred(task);
Eric W. Biederman6f2c0a92011-11-16 23:15:31 -08001528 if (cred->user_ns == tcred->user_ns &&
Serge E. Hallynfc832ad2011-03-23 16:43:22 -07001529 (cred->uid == tcred->euid &&
1530 cred->uid == tcred->suid &&
1531 cred->uid == tcred->uid &&
1532 cred->gid == tcred->egid &&
1533 cred->gid == tcred->sgid &&
1534 cred->gid == tcred->gid))
1535 return 0;
Eric W. Biederman6f2c0a92011-11-16 23:15:31 -08001536 if (ns_capable(tcred->user_ns, CAP_SYS_RESOURCE))
Serge E. Hallynfc832ad2011-03-23 16:43:22 -07001537 return 0;
1538
1539 return -EPERM;
Jiri Slabyc022a0a2010-05-04 18:03:50 +02001540}
1541
1542SYSCALL_DEFINE4(prlimit64, pid_t, pid, unsigned int, resource,
1543 const struct rlimit64 __user *, new_rlim,
1544 struct rlimit64 __user *, old_rlim)
1545{
1546 struct rlimit64 old64, new64;
1547 struct rlimit old, new;
1548 struct task_struct *tsk;
1549 int ret;
1550
1551 if (new_rlim) {
1552 if (copy_from_user(&new64, new_rlim, sizeof(new64)))
1553 return -EFAULT;
1554 rlim64_to_rlim(&new64, &new);
1555 }
1556
1557 rcu_read_lock();
1558 tsk = pid ? find_task_by_vpid(pid) : current;
1559 if (!tsk) {
1560 rcu_read_unlock();
1561 return -ESRCH;
1562 }
1563 ret = check_prlimit_permission(tsk);
1564 if (ret) {
1565 rcu_read_unlock();
1566 return ret;
1567 }
1568 get_task_struct(tsk);
1569 rcu_read_unlock();
1570
1571 ret = do_prlimit(tsk, resource, new_rlim ? &new : NULL,
1572 old_rlim ? &old : NULL);
1573
1574 if (!ret && old_rlim) {
1575 rlim_to_rlim64(&old, &old64);
1576 if (copy_to_user(old_rlim, &old64, sizeof(old64)))
1577 ret = -EFAULT;
1578 }
1579
1580 put_task_struct(tsk);
1581 return ret;
1582}
1583
Jiri Slaby7855c352009-08-26 23:45:34 +02001584SYSCALL_DEFINE2(setrlimit, unsigned int, resource, struct rlimit __user *, rlim)
1585{
1586 struct rlimit new_rlim;
1587
1588 if (copy_from_user(&new_rlim, rlim, sizeof(*rlim)))
1589 return -EFAULT;
Jiri Slaby5b415352010-03-24 16:11:29 +01001590 return do_prlimit(current, resource, &new_rlim, NULL);
Jiri Slaby7855c352009-08-26 23:45:34 +02001591}
1592
Linus Torvalds1da177e2005-04-16 15:20:36 -07001593/*
1594 * It would make sense to put struct rusage in the task_struct,
1595 * except that would make the task_struct be *really big*. After
1596 * task_struct gets moved into malloc'ed memory, it would
1597 * make sense to do this. It will make moving the rest of the information
1598 * a lot simpler! (Which we're not doing right now because we're not
1599 * measuring them yet).
1600 *
Linus Torvalds1da177e2005-04-16 15:20:36 -07001601 * When sampling multiple threads for RUSAGE_SELF, under SMP we might have
1602 * races with threads incrementing their own counters. But since word
1603 * reads are atomic, we either get new values or old values and we don't
1604 * care which for the sums. We always take the siglock to protect reading
1605 * the c* fields from p->signal from races with exit.c updating those
1606 * fields when reaping, so a sample either gets all the additions of a
1607 * given child after it's reaped, or none so this sample is before reaping.
Ravikiran G Thirumalai2dd0ebc2006-03-23 03:00:13 -08001608 *
Ravikiran G Thirumalaide047c12006-06-22 14:47:26 -07001609 * Locking:
1610 * We need to take the siglock for CHILDEREN, SELF and BOTH
1611 * for the cases current multithreaded, non-current single threaded
1612 * non-current multithreaded. Thread traversal is now safe with
1613 * the siglock held.
1614 * Strictly speaking, we donot need to take the siglock if we are current and
1615 * single threaded, as no one else can take our signal_struct away, no one
1616 * else can reap the children to update signal->c* counters, and no one else
1617 * can race with the signal-> fields. If we do not take any lock, the
1618 * signal-> fields could be read out of order while another thread was just
1619 * exiting. So we should place a read memory barrier when we avoid the lock.
1620 * On the writer side, write memory barrier is implied in __exit_signal
1621 * as __exit_signal releases the siglock spinlock after updating the signal->
1622 * fields. But we don't do this yet to keep things simple.
Ravikiran G Thirumalai2dd0ebc2006-03-23 03:00:13 -08001623 *
Linus Torvalds1da177e2005-04-16 15:20:36 -07001624 */
1625
Frank Mayharf06febc2008-09-12 09:54:39 -07001626static void accumulate_thread_rusage(struct task_struct *t, struct rusage *r)
Sripathi Kodi679c9cd2008-04-29 00:58:42 -07001627{
Sripathi Kodi679c9cd2008-04-29 00:58:42 -07001628 r->ru_nvcsw += t->nvcsw;
1629 r->ru_nivcsw += t->nivcsw;
1630 r->ru_minflt += t->min_flt;
1631 r->ru_majflt += t->maj_flt;
1632 r->ru_inblock += task_io_get_inblock(t);
1633 r->ru_oublock += task_io_get_oublock(t);
1634}
1635
Linus Torvalds1da177e2005-04-16 15:20:36 -07001636static void k_getrusage(struct task_struct *p, int who, struct rusage *r)
1637{
1638 struct task_struct *t;
1639 unsigned long flags;
Hidetoshi Seto0cf55e12009-12-02 17:28:07 +09001640 cputime_t tgutime, tgstime, utime, stime;
Jiri Pirko1f102062009-09-22 16:44:10 -07001641 unsigned long maxrss = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001642
1643 memset((char *) r, 0, sizeof *r);
Martin Schwidefsky64861632011-12-15 14:56:09 +01001644 utime = stime = 0;
Oleg Nesterov0f59cc42006-01-08 01:05:15 -08001645
Sripathi Kodi679c9cd2008-04-29 00:58:42 -07001646 if (who == RUSAGE_THREAD) {
Hidetoshi Setod180c5b2009-11-26 14:48:30 +09001647 task_times(current, &utime, &stime);
Frank Mayharf06febc2008-09-12 09:54:39 -07001648 accumulate_thread_rusage(p, r);
Jiri Pirko1f102062009-09-22 16:44:10 -07001649 maxrss = p->signal->maxrss;
Sripathi Kodi679c9cd2008-04-29 00:58:42 -07001650 goto out;
1651 }
1652
Oleg Nesterovd6cf7232008-04-30 00:52:38 -07001653 if (!lock_task_sighand(p, &flags))
Ravikiran G Thirumalaide047c12006-06-22 14:47:26 -07001654 return;
Ravikiran G Thirumalai2dd0ebc2006-03-23 03:00:13 -08001655
Linus Torvalds1da177e2005-04-16 15:20:36 -07001656 switch (who) {
Oleg Nesterov0f59cc42006-01-08 01:05:15 -08001657 case RUSAGE_BOTH:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001658 case RUSAGE_CHILDREN:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001659 utime = p->signal->cutime;
1660 stime = p->signal->cstime;
1661 r->ru_nvcsw = p->signal->cnvcsw;
1662 r->ru_nivcsw = p->signal->cnivcsw;
1663 r->ru_minflt = p->signal->cmin_flt;
1664 r->ru_majflt = p->signal->cmaj_flt;
Eric Dumazet6eaeeab2007-05-10 22:22:37 -07001665 r->ru_inblock = p->signal->cinblock;
1666 r->ru_oublock = p->signal->coublock;
Jiri Pirko1f102062009-09-22 16:44:10 -07001667 maxrss = p->signal->cmaxrss;
Oleg Nesterov0f59cc42006-01-08 01:05:15 -08001668
1669 if (who == RUSAGE_CHILDREN)
1670 break;
1671
Linus Torvalds1da177e2005-04-16 15:20:36 -07001672 case RUSAGE_SELF:
Hidetoshi Seto0cf55e12009-12-02 17:28:07 +09001673 thread_group_times(p, &tgutime, &tgstime);
Martin Schwidefsky64861632011-12-15 14:56:09 +01001674 utime += tgutime;
1675 stime += tgstime;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001676 r->ru_nvcsw += p->signal->nvcsw;
1677 r->ru_nivcsw += p->signal->nivcsw;
1678 r->ru_minflt += p->signal->min_flt;
1679 r->ru_majflt += p->signal->maj_flt;
Eric Dumazet6eaeeab2007-05-10 22:22:37 -07001680 r->ru_inblock += p->signal->inblock;
1681 r->ru_oublock += p->signal->oublock;
Jiri Pirko1f102062009-09-22 16:44:10 -07001682 if (maxrss < p->signal->maxrss)
1683 maxrss = p->signal->maxrss;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001684 t = p;
1685 do {
Frank Mayharf06febc2008-09-12 09:54:39 -07001686 accumulate_thread_rusage(t, r);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001687 t = next_thread(t);
1688 } while (t != p);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001689 break;
Oleg Nesterov0f59cc42006-01-08 01:05:15 -08001690
Linus Torvalds1da177e2005-04-16 15:20:36 -07001691 default:
1692 BUG();
1693 }
Ravikiran G Thirumalaide047c12006-06-22 14:47:26 -07001694 unlock_task_sighand(p, &flags);
Ravikiran G Thirumalaide047c12006-06-22 14:47:26 -07001695
Sripathi Kodi679c9cd2008-04-29 00:58:42 -07001696out:
Oleg Nesterov0f59cc42006-01-08 01:05:15 -08001697 cputime_to_timeval(utime, &r->ru_utime);
1698 cputime_to_timeval(stime, &r->ru_stime);
Jiri Pirko1f102062009-09-22 16:44:10 -07001699
1700 if (who != RUSAGE_CHILDREN) {
1701 struct mm_struct *mm = get_task_mm(p);
1702 if (mm) {
1703 setmax_mm_hiwater_rss(&maxrss, mm);
1704 mmput(mm);
1705 }
1706 }
1707 r->ru_maxrss = maxrss * (PAGE_SIZE / 1024); /* convert pages to KBs */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001708}
1709
1710int getrusage(struct task_struct *p, int who, struct rusage __user *ru)
1711{
1712 struct rusage r;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001713 k_getrusage(p, who, &r);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001714 return copy_to_user(ru, &r, sizeof(r)) ? -EFAULT : 0;
1715}
1716
Heiko Carstense48fbb62009-01-14 14:14:26 +01001717SYSCALL_DEFINE2(getrusage, int, who, struct rusage __user *, ru)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001718{
Sripathi Kodi679c9cd2008-04-29 00:58:42 -07001719 if (who != RUSAGE_SELF && who != RUSAGE_CHILDREN &&
1720 who != RUSAGE_THREAD)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001721 return -EINVAL;
1722 return getrusage(current, who, ru);
1723}
1724
Heiko Carstense48fbb62009-01-14 14:14:26 +01001725SYSCALL_DEFINE1(umask, int, mask)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001726{
1727 mask = xchg(&current->fs->umask, mask & S_IRWXUGO);
1728 return mask;
1729}
Serge E. Hallyn3b7391d2008-02-04 22:29:45 -08001730
Cyrill Gorcunov028ee4b2012-01-12 17:20:55 -08001731#ifdef CONFIG_CHECKPOINT_RESTORE
1732static int prctl_set_mm(int opt, unsigned long addr,
1733 unsigned long arg4, unsigned long arg5)
1734{
1735 unsigned long rlim = rlimit(RLIMIT_DATA);
1736 unsigned long vm_req_flags;
1737 unsigned long vm_bad_flags;
1738 struct vm_area_struct *vma;
1739 int error = 0;
1740 struct mm_struct *mm = current->mm;
1741
1742 if (arg4 | arg5)
1743 return -EINVAL;
1744
Cyrill Gorcunov79f07132012-03-15 15:17:10 -07001745 if (!capable(CAP_SYS_RESOURCE))
Cyrill Gorcunov028ee4b2012-01-12 17:20:55 -08001746 return -EPERM;
1747
1748 if (addr >= TASK_SIZE)
1749 return -EINVAL;
1750
1751 down_read(&mm->mmap_sem);
1752 vma = find_vma(mm, addr);
1753
1754 if (opt != PR_SET_MM_START_BRK && opt != PR_SET_MM_BRK) {
1755 /* It must be existing VMA */
1756 if (!vma || vma->vm_start > addr)
1757 goto out;
1758 }
1759
1760 error = -EINVAL;
1761 switch (opt) {
1762 case PR_SET_MM_START_CODE:
1763 case PR_SET_MM_END_CODE:
1764 vm_req_flags = VM_READ | VM_EXEC;
1765 vm_bad_flags = VM_WRITE | VM_MAYSHARE;
1766
1767 if ((vma->vm_flags & vm_req_flags) != vm_req_flags ||
1768 (vma->vm_flags & vm_bad_flags))
1769 goto out;
1770
1771 if (opt == PR_SET_MM_START_CODE)
1772 mm->start_code = addr;
1773 else
1774 mm->end_code = addr;
1775 break;
1776
1777 case PR_SET_MM_START_DATA:
1778 case PR_SET_MM_END_DATA:
1779 vm_req_flags = VM_READ | VM_WRITE;
1780 vm_bad_flags = VM_EXEC | VM_MAYSHARE;
1781
1782 if ((vma->vm_flags & vm_req_flags) != vm_req_flags ||
1783 (vma->vm_flags & vm_bad_flags))
1784 goto out;
1785
1786 if (opt == PR_SET_MM_START_DATA)
1787 mm->start_data = addr;
1788 else
1789 mm->end_data = addr;
1790 break;
1791
1792 case PR_SET_MM_START_STACK:
1793
1794#ifdef CONFIG_STACK_GROWSUP
1795 vm_req_flags = VM_READ | VM_WRITE | VM_GROWSUP;
1796#else
1797 vm_req_flags = VM_READ | VM_WRITE | VM_GROWSDOWN;
1798#endif
1799 if ((vma->vm_flags & vm_req_flags) != vm_req_flags)
1800 goto out;
1801
1802 mm->start_stack = addr;
1803 break;
1804
1805 case PR_SET_MM_START_BRK:
1806 if (addr <= mm->end_data)
1807 goto out;
1808
1809 if (rlim < RLIM_INFINITY &&
1810 (mm->brk - addr) +
1811 (mm->end_data - mm->start_data) > rlim)
1812 goto out;
1813
1814 mm->start_brk = addr;
1815 break;
1816
1817 case PR_SET_MM_BRK:
1818 if (addr <= mm->end_data)
1819 goto out;
1820
1821 if (rlim < RLIM_INFINITY &&
1822 (addr - mm->start_brk) +
1823 (mm->end_data - mm->start_data) > rlim)
1824 goto out;
1825
1826 mm->brk = addr;
1827 break;
1828
1829 default:
1830 error = -EINVAL;
1831 goto out;
1832 }
1833
1834 error = 0;
1835
1836out:
1837 up_read(&mm->mmap_sem);
1838
1839 return error;
1840}
1841#else /* CONFIG_CHECKPOINT_RESTORE */
1842static int prctl_set_mm(int opt, unsigned long addr,
1843 unsigned long arg4, unsigned long arg5)
1844{
1845 return -EINVAL;
1846}
1847#endif
1848
Colin Cross8ad62c22013-06-26 17:26:01 -07001849
1850static int prctl_update_vma_anon_name(struct vm_area_struct *vma,
1851 struct vm_area_struct **prev,
1852 unsigned long start, unsigned long end,
1853 const char __user *name_addr)
1854{
1855 struct mm_struct * mm = vma->vm_mm;
1856 int error = 0;
1857 pgoff_t pgoff;
1858
1859 if (name_addr == vma_get_anon_name(vma)) {
1860 *prev = vma;
1861 goto out;
1862 }
1863
1864 pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT);
1865 *prev = vma_merge(mm, *prev, start, end, vma->vm_flags, vma->anon_vma,
1866 vma->vm_file, pgoff, vma_policy(vma),
1867 name_addr);
1868 if (*prev) {
1869 vma = *prev;
1870 goto success;
1871 }
1872
1873 *prev = vma;
1874
1875 if (start != vma->vm_start) {
1876 error = split_vma(mm, vma, start, 1);
1877 if (error)
1878 goto out;
1879 }
1880
1881 if (end != vma->vm_end) {
1882 error = split_vma(mm, vma, end, 0);
1883 if (error)
1884 goto out;
1885 }
1886
1887success:
1888 if (!vma->vm_file)
1889 vma->shared.anon_name = name_addr;
1890
1891out:
1892 if (error == -ENOMEM)
1893 error = -EAGAIN;
1894 return error;
1895}
1896
1897static int prctl_set_vma_anon_name(unsigned long start, unsigned long end,
1898 unsigned long arg)
1899{
1900 unsigned long tmp;
1901 struct vm_area_struct * vma, *prev;
1902 int unmapped_error = 0;
1903 int error = -EINVAL;
1904
1905 /*
1906 * If the interval [start,end) covers some unmapped address
1907 * ranges, just ignore them, but return -ENOMEM at the end.
1908 * - this matches the handling in madvise.
1909 */
1910 vma = find_vma_prev(current->mm, start, &prev);
1911 if (vma && start > vma->vm_start)
1912 prev = vma;
1913
1914 for (;;) {
1915 /* Still start < end. */
1916 error = -ENOMEM;
1917 if (!vma)
1918 return error;
1919
1920 /* Here start < (end|vma->vm_end). */
1921 if (start < vma->vm_start) {
1922 unmapped_error = -ENOMEM;
1923 start = vma->vm_start;
1924 if (start >= end)
1925 return error;
1926 }
1927
1928 /* Here vma->vm_start <= start < (end|vma->vm_end) */
1929 tmp = vma->vm_end;
1930 if (end < tmp)
1931 tmp = end;
1932
1933 /* Here vma->vm_start <= start < tmp <= (end|vma->vm_end). */
1934 error = prctl_update_vma_anon_name(vma, &prev, start, end,
1935 (const char __user *)arg);
1936 if (error)
1937 return error;
1938 start = tmp;
1939 if (prev && start < prev->vm_end)
1940 start = prev->vm_end;
1941 error = unmapped_error;
1942 if (start >= end)
1943 return error;
1944 if (prev)
1945 vma = prev->vm_next;
1946 else /* madvise_remove dropped mmap_sem */
1947 vma = find_vma(current->mm, start);
1948 }
1949}
1950
1951static int prctl_set_vma(unsigned long opt, unsigned long start,
1952 unsigned long len_in, unsigned long arg)
1953{
1954 struct mm_struct *mm = current->mm;
1955 int error;
1956 unsigned long len;
1957 unsigned long end;
1958
1959 if (start & ~PAGE_MASK)
1960 return -EINVAL;
1961 len = (len_in + ~PAGE_MASK) & PAGE_MASK;
1962
1963 /* Check to see whether len was rounded up from small -ve to zero */
1964 if (len_in && !len)
1965 return -EINVAL;
1966
1967 end = start + len;
1968 if (end < start)
1969 return -EINVAL;
1970
1971 if (end == start)
1972 return 0;
1973
1974 down_write(&mm->mmap_sem);
1975
1976 switch (opt) {
1977 case PR_SET_VMA_ANON_NAME:
1978 error = prctl_set_vma_anon_name(start, end, arg);
1979 break;
1980 default:
1981 error = -EINVAL;
1982 }
1983
1984 up_write(&mm->mmap_sem);
1985
1986 return error;
1987}
1988
Heiko Carstensc4ea37c2009-01-14 14:14:28 +01001989SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
1990 unsigned long, arg4, unsigned long, arg5)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001991{
David Howellsb6dff3e2008-11-14 10:39:16 +11001992 struct task_struct *me = current;
1993 unsigned char comm[sizeof(me->comm)];
1994 long error;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001995
David Howellsd84f4f92008-11-14 10:39:23 +11001996 error = security_task_prctl(option, arg2, arg3, arg4, arg5);
1997 if (error != -ENOSYS)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001998 return error;
1999
David Howellsd84f4f92008-11-14 10:39:23 +11002000 error = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002001 switch (option) {
2002 case PR_SET_PDEATHSIG:
Jesper Juhl0730ded2005-09-06 15:17:37 -07002003 if (!valid_signal(arg2)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002004 error = -EINVAL;
2005 break;
2006 }
David Howellsb6dff3e2008-11-14 10:39:16 +11002007 me->pdeath_signal = arg2;
2008 error = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002009 break;
2010 case PR_GET_PDEATHSIG:
David Howellsb6dff3e2008-11-14 10:39:16 +11002011 error = put_user(me->pdeath_signal, (int __user *)arg2);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002012 break;
2013 case PR_GET_DUMPABLE:
David Howellsb6dff3e2008-11-14 10:39:16 +11002014 error = get_dumpable(me->mm);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002015 break;
2016 case PR_SET_DUMPABLE:
Marcel Holtmannabf75a52006-07-12 13:12:00 +02002017 if (arg2 < 0 || arg2 > 1) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002018 error = -EINVAL;
2019 break;
2020 }
David Howellsb6dff3e2008-11-14 10:39:16 +11002021 set_dumpable(me->mm, arg2);
2022 error = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002023 break;
2024
2025 case PR_SET_UNALIGN:
David Howellsb6dff3e2008-11-14 10:39:16 +11002026 error = SET_UNALIGN_CTL(me, arg2);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002027 break;
2028 case PR_GET_UNALIGN:
David Howellsb6dff3e2008-11-14 10:39:16 +11002029 error = GET_UNALIGN_CTL(me, arg2);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002030 break;
2031 case PR_SET_FPEMU:
David Howellsb6dff3e2008-11-14 10:39:16 +11002032 error = SET_FPEMU_CTL(me, arg2);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002033 break;
2034 case PR_GET_FPEMU:
David Howellsb6dff3e2008-11-14 10:39:16 +11002035 error = GET_FPEMU_CTL(me, arg2);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002036 break;
2037 case PR_SET_FPEXC:
David Howellsb6dff3e2008-11-14 10:39:16 +11002038 error = SET_FPEXC_CTL(me, arg2);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002039 break;
2040 case PR_GET_FPEXC:
David Howellsb6dff3e2008-11-14 10:39:16 +11002041 error = GET_FPEXC_CTL(me, arg2);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002042 break;
2043 case PR_GET_TIMING:
2044 error = PR_TIMING_STATISTICAL;
2045 break;
2046 case PR_SET_TIMING:
Shi Weihua7b266552008-05-23 13:04:59 -07002047 if (arg2 != PR_TIMING_STATISTICAL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002048 error = -EINVAL;
David Howellsb6dff3e2008-11-14 10:39:16 +11002049 else
2050 error = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002051 break;
2052
David Howellsb6dff3e2008-11-14 10:39:16 +11002053 case PR_SET_NAME:
2054 comm[sizeof(me->comm)-1] = 0;
2055 if (strncpy_from_user(comm, (char __user *)arg2,
2056 sizeof(me->comm) - 1) < 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002057 return -EFAULT;
David Howellsb6dff3e2008-11-14 10:39:16 +11002058 set_task_comm(me, comm);
Vladimir Zapolskiyf786ecb2011-09-21 09:26:44 +00002059 proc_comm_connector(me);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002060 return 0;
David Howellsb6dff3e2008-11-14 10:39:16 +11002061 case PR_GET_NAME:
2062 get_task_comm(comm, me);
2063 if (copy_to_user((char __user *)arg2, comm,
2064 sizeof(comm)))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002065 return -EFAULT;
2066 return 0;
Anton Blanchard651d7652006-06-07 16:10:19 +10002067 case PR_GET_ENDIAN:
David Howellsb6dff3e2008-11-14 10:39:16 +11002068 error = GET_ENDIAN(me, arg2);
Anton Blanchard651d7652006-06-07 16:10:19 +10002069 break;
2070 case PR_SET_ENDIAN:
David Howellsb6dff3e2008-11-14 10:39:16 +11002071 error = SET_ENDIAN(me, arg2);
Anton Blanchard651d7652006-06-07 16:10:19 +10002072 break;
2073
Andrea Arcangeli1d9d02f2007-07-15 23:41:32 -07002074 case PR_GET_SECCOMP:
2075 error = prctl_get_seccomp();
2076 break;
2077 case PR_SET_SECCOMP:
Will Drewry980e9202012-02-09 11:50:58 -06002078 error = prctl_set_seccomp(arg2, (char __user *)arg3);
Andrea Arcangeli1d9d02f2007-07-15 23:41:32 -07002079 break;
Erik Bosman8fb402b2008-04-11 18:54:17 +02002080 case PR_GET_TSC:
2081 error = GET_TSC_CTL(arg2);
2082 break;
2083 case PR_SET_TSC:
2084 error = SET_TSC_CTL(arg2);
2085 break;
Ingo Molnarcdd6c482009-09-21 12:02:48 +02002086 case PR_TASK_PERF_EVENTS_DISABLE:
2087 error = perf_event_task_disable();
Ingo Molnar1d1c7dd2008-12-11 14:59:31 +01002088 break;
Ingo Molnarcdd6c482009-09-21 12:02:48 +02002089 case PR_TASK_PERF_EVENTS_ENABLE:
2090 error = perf_event_task_enable();
Ingo Molnar1d1c7dd2008-12-11 14:59:31 +01002091 break;
Arjan van de Ven69766752008-09-01 15:52:40 -07002092 case PR_GET_TIMERSLACK:
2093 error = current->timer_slack_ns;
2094 break;
2095 case PR_SET_TIMERSLACK:
2096 if (arg2 <= 0)
2097 current->timer_slack_ns =
2098 current->default_timer_slack_ns;
2099 else
2100 current->timer_slack_ns = arg2;
David Howellsb6dff3e2008-11-14 10:39:16 +11002101 error = 0;
Arjan van de Ven69766752008-09-01 15:52:40 -07002102 break;
Andi Kleen4db96cf2009-09-16 11:50:14 +02002103 case PR_MCE_KILL:
2104 if (arg4 | arg5)
2105 return -EINVAL;
2106 switch (arg2) {
Andi Kleen1087e9b2009-10-04 02:20:11 +02002107 case PR_MCE_KILL_CLEAR:
Andi Kleen4db96cf2009-09-16 11:50:14 +02002108 if (arg3 != 0)
2109 return -EINVAL;
2110 current->flags &= ~PF_MCE_PROCESS;
2111 break;
Andi Kleen1087e9b2009-10-04 02:20:11 +02002112 case PR_MCE_KILL_SET:
Andi Kleen4db96cf2009-09-16 11:50:14 +02002113 current->flags |= PF_MCE_PROCESS;
Andi Kleen1087e9b2009-10-04 02:20:11 +02002114 if (arg3 == PR_MCE_KILL_EARLY)
Andi Kleen4db96cf2009-09-16 11:50:14 +02002115 current->flags |= PF_MCE_EARLY;
Andi Kleen1087e9b2009-10-04 02:20:11 +02002116 else if (arg3 == PR_MCE_KILL_LATE)
Andi Kleen4db96cf2009-09-16 11:50:14 +02002117 current->flags &= ~PF_MCE_EARLY;
Andi Kleen1087e9b2009-10-04 02:20:11 +02002118 else if (arg3 == PR_MCE_KILL_DEFAULT)
2119 current->flags &=
2120 ~(PF_MCE_EARLY|PF_MCE_PROCESS);
2121 else
2122 return -EINVAL;
Andi Kleen4db96cf2009-09-16 11:50:14 +02002123 break;
2124 default:
2125 return -EINVAL;
2126 }
2127 error = 0;
2128 break;
Andi Kleen1087e9b2009-10-04 02:20:11 +02002129 case PR_MCE_KILL_GET:
2130 if (arg2 | arg3 | arg4 | arg5)
2131 return -EINVAL;
2132 if (current->flags & PF_MCE_PROCESS)
2133 error = (current->flags & PF_MCE_EARLY) ?
2134 PR_MCE_KILL_EARLY : PR_MCE_KILL_LATE;
2135 else
2136 error = PR_MCE_KILL_DEFAULT;
2137 break;
Cyrill Gorcunov028ee4b2012-01-12 17:20:55 -08002138 case PR_SET_MM:
2139 error = prctl_set_mm(arg2, arg3, arg4, arg5);
2140 break;
Lennart Poetteringebec18a2012-03-23 15:01:54 -07002141 case PR_SET_CHILD_SUBREAPER:
2142 me->signal->is_child_subreaper = !!arg2;
2143 error = 0;
2144 break;
2145 case PR_GET_CHILD_SUBREAPER:
2146 error = put_user(me->signal->is_child_subreaper,
2147 (int __user *) arg2);
2148 break;
Colin Cross8ad62c22013-06-26 17:26:01 -07002149 case PR_SET_VMA:
2150 error = prctl_set_vma(arg2, arg3, arg4, arg5);
2151 break;
Andy Lutomirski9bf75df2012-04-12 16:47:50 -05002152 case PR_SET_NO_NEW_PRIVS:
2153 if (arg2 != 1 || arg3 || arg4 || arg5)
2154 return -EINVAL;
2155
Kees Cook5eab1302014-05-21 15:23:46 -07002156 task_set_no_new_privs(current);
Andy Lutomirski9bf75df2012-04-12 16:47:50 -05002157 break;
2158 case PR_GET_NO_NEW_PRIVS:
2159 if (arg2 || arg3 || arg4 || arg5)
2160 return -EINVAL;
Kees Cook5eab1302014-05-21 15:23:46 -07002161 return task_no_new_privs(current) ? 1 : 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002162 default:
2163 error = -EINVAL;
2164 break;
2165 }
2166 return error;
2167}
Andi Kleen3cfc3482006-09-26 10:52:28 +02002168
Heiko Carstens836f92a2009-01-14 14:14:33 +01002169SYSCALL_DEFINE3(getcpu, unsigned __user *, cpup, unsigned __user *, nodep,
2170 struct getcpu_cache __user *, unused)
Andi Kleen3cfc3482006-09-26 10:52:28 +02002171{
2172 int err = 0;
2173 int cpu = raw_smp_processor_id();
2174 if (cpup)
2175 err |= put_user(cpu, cpup);
2176 if (nodep)
2177 err |= put_user(cpu_to_node(cpu), nodep);
Andi Kleen3cfc3482006-09-26 10:52:28 +02002178 return err ? -EFAULT : 0;
2179}
Jeremy Fitzhardinge10a0a8d2007-07-17 18:37:02 -07002180
2181char poweroff_cmd[POWEROFF_CMD_PATH_LEN] = "/sbin/poweroff";
2182
Neil Hormana06a4dc2010-05-26 14:42:58 -07002183static void argv_cleanup(struct subprocess_info *info)
Jeremy Fitzhardinge10a0a8d2007-07-17 18:37:02 -07002184{
Neil Hormana06a4dc2010-05-26 14:42:58 -07002185 argv_free(info->argv);
Jeremy Fitzhardinge10a0a8d2007-07-17 18:37:02 -07002186}
2187
2188/**
2189 * orderly_poweroff - Trigger an orderly system poweroff
2190 * @force: force poweroff if command execution fails
2191 *
2192 * This may be called from any context to trigger a system shutdown.
2193 * If the orderly shutdown fails, it will force an immediate shutdown.
2194 */
2195int orderly_poweroff(bool force)
2196{
2197 int argc;
2198 char **argv = argv_split(GFP_ATOMIC, poweroff_cmd, &argc);
2199 static char *envp[] = {
2200 "HOME=/",
2201 "PATH=/sbin:/bin:/usr/sbin:/usr/bin",
2202 NULL
2203 };
2204 int ret = -ENOMEM;
2205 struct subprocess_info *info;
2206
2207 if (argv == NULL) {
2208 printk(KERN_WARNING "%s failed to allocate memory for \"%s\"\n",
2209 __func__, poweroff_cmd);
2210 goto out;
2211 }
2212
KOSAKI Motohiroac331d12008-07-25 01:45:38 -07002213 info = call_usermodehelper_setup(argv[0], argv, envp, GFP_ATOMIC);
Jeremy Fitzhardinge10a0a8d2007-07-17 18:37:02 -07002214 if (info == NULL) {
2215 argv_free(argv);
2216 goto out;
2217 }
2218
Neil Hormana06a4dc2010-05-26 14:42:58 -07002219 call_usermodehelper_setfns(info, NULL, argv_cleanup, NULL);
Jeremy Fitzhardinge10a0a8d2007-07-17 18:37:02 -07002220
Jeremy Fitzhardinge86313c42007-07-17 18:37:03 -07002221 ret = call_usermodehelper_exec(info, UMH_NO_WAIT);
Jeremy Fitzhardinge10a0a8d2007-07-17 18:37:02 -07002222
2223 out:
2224 if (ret && force) {
2225 printk(KERN_WARNING "Failed to start orderly shutdown: "
2226 "forcing the issue\n");
2227
2228 /* I guess this should try to kick off some daemon to
2229 sync and poweroff asap. Or not even bother syncing
2230 if we're doing an emergency shutdown? */
2231 emergency_sync();
2232 kernel_power_off();
2233 }
2234
2235 return ret;
2236}
2237EXPORT_SYMBOL_GPL(orderly_poweroff);