| Cedric Le Goater | acce292 | 2007-07-15 23:40:59 -0700 | [diff] [blame] | 1 | /* | 
 | 2 |  *  This program is free software; you can redistribute it and/or | 
 | 3 |  *  modify it under the terms of the GNU General Public License as | 
 | 4 |  *  published by the Free Software Foundation, version 2 of the | 
 | 5 |  *  License. | 
 | 6 |  */ | 
 | 7 |  | 
| Paul Gortmaker | 9984de1 | 2011-05-23 14:51:41 -0400 | [diff] [blame] | 8 | #include <linux/export.h> | 
| Cedric Le Goater | acce292 | 2007-07-15 23:40:59 -0700 | [diff] [blame] | 9 | #include <linux/nsproxy.h> | 
| Robert P. J. Day | 1aeb272 | 2008-04-29 00:59:25 -0700 | [diff] [blame] | 10 | #include <linux/slab.h> | 
| Cedric Le Goater | acce292 | 2007-07-15 23:40:59 -0700 | [diff] [blame] | 11 | #include <linux/user_namespace.h> | 
| Eric W. Biederman | 5c1469d | 2010-06-13 03:28:03 +0000 | [diff] [blame] | 12 | #include <linux/highuid.h> | 
| Serge Hallyn | 18b6e04 | 2008-10-15 16:38:45 -0500 | [diff] [blame] | 13 | #include <linux/cred.h> | 
| Eric W. Biederman | 973c591 | 2011-11-17 01:59:07 -0800 | [diff] [blame] | 14 | #include <linux/securebits.h> | 
| Eric W. Biederman | 22d917d | 2011-11-17 00:11:58 -0800 | [diff] [blame] | 15 | #include <linux/keyctl.h> | 
 | 16 | #include <linux/key-type.h> | 
 | 17 | #include <keys/user-type.h> | 
 | 18 | #include <linux/seq_file.h> | 
 | 19 | #include <linux/fs.h> | 
 | 20 | #include <linux/uaccess.h> | 
 | 21 | #include <linux/ctype.h> | 
| Eric W. Biederman | f76d207 | 2012-08-30 01:24:05 -0700 | [diff] [blame] | 22 | #include <linux/projid.h> | 
| Cedric Le Goater | acce292 | 2007-07-15 23:40:59 -0700 | [diff] [blame] | 23 |  | 
| Pavel Emelyanov | 6164281 | 2011-01-12 17:00:46 -0800 | [diff] [blame] | 24 | static struct kmem_cache *user_ns_cachep __read_mostly; | 
 | 25 |  | 
| Eric W. Biederman | 22d917d | 2011-11-17 00:11:58 -0800 | [diff] [blame] | 26 | static bool new_idmap_permitted(struct user_namespace *ns, int cap_setid, | 
 | 27 | 				struct uid_gid_map *map); | 
 | 28 |  | 
| Serge E. Hallyn | 77ec739 | 2007-07-15 23:41:01 -0700 | [diff] [blame] | 29 | /* | 
| Serge Hallyn | 18b6e04 | 2008-10-15 16:38:45 -0500 | [diff] [blame] | 30 |  * Create a new user namespace, deriving the creator from the user in the | 
 | 31 |  * passed credentials, and replacing that user with the new root user for the | 
 | 32 |  * new namespace. | 
 | 33 |  * | 
 | 34 |  * This is called by copy_creds(), which will finish setting the target task's | 
 | 35 |  * credentials. | 
| Serge E. Hallyn | 77ec739 | 2007-07-15 23:41:01 -0700 | [diff] [blame] | 36 |  */ | 
| Serge Hallyn | 18b6e04 | 2008-10-15 16:38:45 -0500 | [diff] [blame] | 37 | int create_user_ns(struct cred *new) | 
| Serge E. Hallyn | 77ec739 | 2007-07-15 23:41:01 -0700 | [diff] [blame] | 38 | { | 
| Eric W. Biederman | 0093ccb | 2011-11-16 21:52:53 -0800 | [diff] [blame] | 39 | 	struct user_namespace *ns, *parent_ns = new->user_ns; | 
| Eric W. Biederman | 078de5f | 2012-02-08 07:00:08 -0800 | [diff] [blame] | 40 | 	kuid_t owner = new->euid; | 
 | 41 | 	kgid_t group = new->egid; | 
| Eric W. Biederman | 783291e | 2011-11-17 01:32:59 -0800 | [diff] [blame] | 42 |  | 
 | 43 | 	/* The creator needs a mapping in the parent user namespace | 
 | 44 | 	 * or else we won't be able to reasonably tell userspace who | 
 | 45 | 	 * created a user_namespace. | 
 | 46 | 	 */ | 
 | 47 | 	if (!kuid_has_mapping(parent_ns, owner) || | 
 | 48 | 	    !kgid_has_mapping(parent_ns, group)) | 
 | 49 | 		return -EPERM; | 
| Serge E. Hallyn | 77ec739 | 2007-07-15 23:41:01 -0700 | [diff] [blame] | 50 |  | 
| Eric W. Biederman | 22d917d | 2011-11-17 00:11:58 -0800 | [diff] [blame] | 51 | 	ns = kmem_cache_zalloc(user_ns_cachep, GFP_KERNEL); | 
| Serge E. Hallyn | 77ec739 | 2007-07-15 23:41:01 -0700 | [diff] [blame] | 52 | 	if (!ns) | 
| Serge Hallyn | 18b6e04 | 2008-10-15 16:38:45 -0500 | [diff] [blame] | 53 | 		return -ENOMEM; | 
| Serge E. Hallyn | 77ec739 | 2007-07-15 23:41:01 -0700 | [diff] [blame] | 54 |  | 
 | 55 | 	kref_init(&ns->kref); | 
| Eric W. Biederman | aeb3ae9 | 2011-11-16 21:59:43 -0800 | [diff] [blame] | 56 | 	ns->parent = parent_ns; | 
| Eric W. Biederman | 783291e | 2011-11-17 01:32:59 -0800 | [diff] [blame] | 57 | 	ns->owner = owner; | 
 | 58 | 	ns->group = group; | 
| Eric W. Biederman | 22d917d | 2011-11-17 00:11:58 -0800 | [diff] [blame] | 59 |  | 
| Eric W. Biederman | 973c591 | 2011-11-17 01:59:07 -0800 | [diff] [blame] | 60 | 	/* Start with the same capabilities as init but useless for doing | 
 | 61 | 	 * anything as the capabilities are bound to the new user namespace. | 
 | 62 | 	 */ | 
 | 63 | 	new->securebits = SECUREBITS_DEFAULT; | 
 | 64 | 	new->cap_inheritable = CAP_EMPTY_SET; | 
 | 65 | 	new->cap_permitted = CAP_FULL_SET; | 
 | 66 | 	new->cap_effective = CAP_FULL_SET; | 
 | 67 | 	new->cap_bset = CAP_FULL_SET; | 
| Serge Hallyn | 18b6e04 | 2008-10-15 16:38:45 -0500 | [diff] [blame] | 68 | #ifdef CONFIG_KEYS | 
 | 69 | 	key_put(new->request_key_auth); | 
 | 70 | 	new->request_key_auth = NULL; | 
 | 71 | #endif | 
 | 72 | 	/* tgcred will be cleared in our caller bc CLONE_THREAD won't be set */ | 
| Serge E. Hallyn | 77ec739 | 2007-07-15 23:41:01 -0700 | [diff] [blame] | 73 |  | 
| Eric W. Biederman | 783291e | 2011-11-17 01:32:59 -0800 | [diff] [blame] | 74 | 	/* Leave the new->user_ns reference with the new user namespace. */ | 
 | 75 | 	/* Leave the reference to our user_ns with the new cred. */ | 
| Eric W. Biederman | 0093ccb | 2011-11-16 21:52:53 -0800 | [diff] [blame] | 76 | 	new->user_ns = ns; | 
 | 77 |  | 
| Serge Hallyn | 18b6e04 | 2008-10-15 16:38:45 -0500 | [diff] [blame] | 78 | 	return 0; | 
| Cedric Le Goater | acce292 | 2007-07-15 23:40:59 -0700 | [diff] [blame] | 79 | } | 
 | 80 |  | 
| David Howells | 5170836 | 2009-02-27 14:03:03 -0800 | [diff] [blame] | 81 | void free_user_ns(struct kref *kref) | 
 | 82 | { | 
| Eric W. Biederman | 783291e | 2011-11-17 01:32:59 -0800 | [diff] [blame] | 83 | 	struct user_namespace *parent, *ns = | 
| David Howells | 5170836 | 2009-02-27 14:03:03 -0800 | [diff] [blame] | 84 | 		container_of(kref, struct user_namespace, kref); | 
 | 85 |  | 
| Eric W. Biederman | 783291e | 2011-11-17 01:32:59 -0800 | [diff] [blame] | 86 | 	parent = ns->parent; | 
 | 87 | 	kmem_cache_free(user_ns_cachep, ns); | 
 | 88 | 	put_user_ns(parent); | 
| David Howells | 5170836 | 2009-02-27 14:03:03 -0800 | [diff] [blame] | 89 | } | 
| Michael Halcrow | 6a3fd92 | 2008-04-29 00:59:52 -0700 | [diff] [blame] | 90 | EXPORT_SYMBOL(free_user_ns); | 
| Eric W. Biederman | 5c1469d | 2010-06-13 03:28:03 +0000 | [diff] [blame] | 91 |  | 
| Eric W. Biederman | 22d917d | 2011-11-17 00:11:58 -0800 | [diff] [blame] | 92 | static u32 map_id_range_down(struct uid_gid_map *map, u32 id, u32 count) | 
| Eric W. Biederman | 5c1469d | 2010-06-13 03:28:03 +0000 | [diff] [blame] | 93 | { | 
| Eric W. Biederman | 22d917d | 2011-11-17 00:11:58 -0800 | [diff] [blame] | 94 | 	unsigned idx, extents; | 
 | 95 | 	u32 first, last, id2; | 
| Eric W. Biederman | 5c1469d | 2010-06-13 03:28:03 +0000 | [diff] [blame] | 96 |  | 
| Eric W. Biederman | 22d917d | 2011-11-17 00:11:58 -0800 | [diff] [blame] | 97 | 	id2 = id + count - 1; | 
| Eric W. Biederman | 5c1469d | 2010-06-13 03:28:03 +0000 | [diff] [blame] | 98 |  | 
| Eric W. Biederman | 22d917d | 2011-11-17 00:11:58 -0800 | [diff] [blame] | 99 | 	/* Find the matching extent */ | 
 | 100 | 	extents = map->nr_extents; | 
 | 101 | 	smp_read_barrier_depends(); | 
 | 102 | 	for (idx = 0; idx < extents; idx++) { | 
 | 103 | 		first = map->extent[idx].first; | 
 | 104 | 		last = first + map->extent[idx].count - 1; | 
 | 105 | 		if (id >= first && id <= last && | 
 | 106 | 		    (id2 >= first && id2 <= last)) | 
 | 107 | 			break; | 
| Eric W. Biederman | 5c1469d | 2010-06-13 03:28:03 +0000 | [diff] [blame] | 108 | 	} | 
| Eric W. Biederman | 22d917d | 2011-11-17 00:11:58 -0800 | [diff] [blame] | 109 | 	/* Map the id or note failure */ | 
 | 110 | 	if (idx < extents) | 
 | 111 | 		id = (id - first) + map->extent[idx].lower_first; | 
 | 112 | 	else | 
 | 113 | 		id = (u32) -1; | 
| Eric W. Biederman | 5c1469d | 2010-06-13 03:28:03 +0000 | [diff] [blame] | 114 |  | 
| Eric W. Biederman | 22d917d | 2011-11-17 00:11:58 -0800 | [diff] [blame] | 115 | 	return id; | 
| Eric W. Biederman | 5c1469d | 2010-06-13 03:28:03 +0000 | [diff] [blame] | 116 | } | 
 | 117 |  | 
| Eric W. Biederman | 22d917d | 2011-11-17 00:11:58 -0800 | [diff] [blame] | 118 | static u32 map_id_down(struct uid_gid_map *map, u32 id) | 
| Eric W. Biederman | 5c1469d | 2010-06-13 03:28:03 +0000 | [diff] [blame] | 119 | { | 
| Eric W. Biederman | 22d917d | 2011-11-17 00:11:58 -0800 | [diff] [blame] | 120 | 	unsigned idx, extents; | 
 | 121 | 	u32 first, last; | 
| Eric W. Biederman | 5c1469d | 2010-06-13 03:28:03 +0000 | [diff] [blame] | 122 |  | 
| Eric W. Biederman | 22d917d | 2011-11-17 00:11:58 -0800 | [diff] [blame] | 123 | 	/* Find the matching extent */ | 
 | 124 | 	extents = map->nr_extents; | 
 | 125 | 	smp_read_barrier_depends(); | 
 | 126 | 	for (idx = 0; idx < extents; idx++) { | 
 | 127 | 		first = map->extent[idx].first; | 
 | 128 | 		last = first + map->extent[idx].count - 1; | 
 | 129 | 		if (id >= first && id <= last) | 
 | 130 | 			break; | 
 | 131 | 	} | 
 | 132 | 	/* Map the id or note failure */ | 
 | 133 | 	if (idx < extents) | 
 | 134 | 		id = (id - first) + map->extent[idx].lower_first; | 
 | 135 | 	else | 
 | 136 | 		id = (u32) -1; | 
| Eric W. Biederman | 5c1469d | 2010-06-13 03:28:03 +0000 | [diff] [blame] | 137 |  | 
| Eric W. Biederman | 22d917d | 2011-11-17 00:11:58 -0800 | [diff] [blame] | 138 | 	return id; | 
 | 139 | } | 
 | 140 |  | 
 | 141 | static u32 map_id_up(struct uid_gid_map *map, u32 id) | 
 | 142 | { | 
 | 143 | 	unsigned idx, extents; | 
 | 144 | 	u32 first, last; | 
 | 145 |  | 
 | 146 | 	/* Find the matching extent */ | 
 | 147 | 	extents = map->nr_extents; | 
 | 148 | 	smp_read_barrier_depends(); | 
 | 149 | 	for (idx = 0; idx < extents; idx++) { | 
 | 150 | 		first = map->extent[idx].lower_first; | 
 | 151 | 		last = first + map->extent[idx].count - 1; | 
 | 152 | 		if (id >= first && id <= last) | 
 | 153 | 			break; | 
 | 154 | 	} | 
 | 155 | 	/* Map the id or note failure */ | 
 | 156 | 	if (idx < extents) | 
 | 157 | 		id = (id - first) + map->extent[idx].first; | 
 | 158 | 	else | 
 | 159 | 		id = (u32) -1; | 
 | 160 |  | 
 | 161 | 	return id; | 
 | 162 | } | 
 | 163 |  | 
 | 164 | /** | 
 | 165 |  *	make_kuid - Map a user-namespace uid pair into a kuid. | 
 | 166 |  *	@ns:  User namespace that the uid is in | 
 | 167 |  *	@uid: User identifier | 
 | 168 |  * | 
 | 169 |  *	Maps a user-namespace uid pair into a kernel internal kuid, | 
 | 170 |  *	and returns that kuid. | 
 | 171 |  * | 
 | 172 |  *	When there is no mapping defined for the user-namespace uid | 
 | 173 |  *	pair INVALID_UID is returned.  Callers are expected to test | 
 | 174 |  *	for and handle handle INVALID_UID being returned.  INVALID_UID | 
 | 175 |  *	may be tested for using uid_valid(). | 
 | 176 |  */ | 
 | 177 | kuid_t make_kuid(struct user_namespace *ns, uid_t uid) | 
 | 178 | { | 
 | 179 | 	/* Map the uid to a global kernel uid */ | 
 | 180 | 	return KUIDT_INIT(map_id_down(&ns->uid_map, uid)); | 
 | 181 | } | 
 | 182 | EXPORT_SYMBOL(make_kuid); | 
 | 183 |  | 
 | 184 | /** | 
 | 185 |  *	from_kuid - Create a uid from a kuid user-namespace pair. | 
 | 186 |  *	@targ: The user namespace we want a uid in. | 
 | 187 |  *	@kuid: The kernel internal uid to start with. | 
 | 188 |  * | 
 | 189 |  *	Map @kuid into the user-namespace specified by @targ and | 
 | 190 |  *	return the resulting uid. | 
 | 191 |  * | 
 | 192 |  *	There is always a mapping into the initial user_namespace. | 
 | 193 |  * | 
 | 194 |  *	If @kuid has no mapping in @targ (uid_t)-1 is returned. | 
 | 195 |  */ | 
 | 196 | uid_t from_kuid(struct user_namespace *targ, kuid_t kuid) | 
 | 197 | { | 
 | 198 | 	/* Map the uid from a global kernel uid */ | 
 | 199 | 	return map_id_up(&targ->uid_map, __kuid_val(kuid)); | 
 | 200 | } | 
 | 201 | EXPORT_SYMBOL(from_kuid); | 
 | 202 |  | 
 | 203 | /** | 
 | 204 |  *	from_kuid_munged - Create a uid from a kuid user-namespace pair. | 
 | 205 |  *	@targ: The user namespace we want a uid in. | 
 | 206 |  *	@kuid: The kernel internal uid to start with. | 
 | 207 |  * | 
 | 208 |  *	Map @kuid into the user-namespace specified by @targ and | 
 | 209 |  *	return the resulting uid. | 
 | 210 |  * | 
 | 211 |  *	There is always a mapping into the initial user_namespace. | 
 | 212 |  * | 
 | 213 |  *	Unlike from_kuid from_kuid_munged never fails and always | 
 | 214 |  *	returns a valid uid.  This makes from_kuid_munged appropriate | 
 | 215 |  *	for use in syscalls like stat and getuid where failing the | 
 | 216 |  *	system call and failing to provide a valid uid are not an | 
 | 217 |  *	options. | 
 | 218 |  * | 
 | 219 |  *	If @kuid has no mapping in @targ overflowuid is returned. | 
 | 220 |  */ | 
 | 221 | uid_t from_kuid_munged(struct user_namespace *targ, kuid_t kuid) | 
 | 222 | { | 
 | 223 | 	uid_t uid; | 
 | 224 | 	uid = from_kuid(targ, kuid); | 
 | 225 |  | 
 | 226 | 	if (uid == (uid_t) -1) | 
 | 227 | 		uid = overflowuid; | 
 | 228 | 	return uid; | 
 | 229 | } | 
 | 230 | EXPORT_SYMBOL(from_kuid_munged); | 
 | 231 |  | 
 | 232 | /** | 
 | 233 |  *	make_kgid - Map a user-namespace gid pair into a kgid. | 
 | 234 |  *	@ns:  User namespace that the gid is in | 
 | 235 |  *	@uid: group identifier | 
 | 236 |  * | 
 | 237 |  *	Maps a user-namespace gid pair into a kernel internal kgid, | 
 | 238 |  *	and returns that kgid. | 
 | 239 |  * | 
 | 240 |  *	When there is no mapping defined for the user-namespace gid | 
 | 241 |  *	pair INVALID_GID is returned.  Callers are expected to test | 
 | 242 |  *	for and handle INVALID_GID being returned.  INVALID_GID may be | 
 | 243 |  *	tested for using gid_valid(). | 
 | 244 |  */ | 
 | 245 | kgid_t make_kgid(struct user_namespace *ns, gid_t gid) | 
 | 246 | { | 
 | 247 | 	/* Map the gid to a global kernel gid */ | 
 | 248 | 	return KGIDT_INIT(map_id_down(&ns->gid_map, gid)); | 
 | 249 | } | 
 | 250 | EXPORT_SYMBOL(make_kgid); | 
 | 251 |  | 
 | 252 | /** | 
 | 253 |  *	from_kgid - Create a gid from a kgid user-namespace pair. | 
 | 254 |  *	@targ: The user namespace we want a gid in. | 
 | 255 |  *	@kgid: The kernel internal gid to start with. | 
 | 256 |  * | 
 | 257 |  *	Map @kgid into the user-namespace specified by @targ and | 
 | 258 |  *	return the resulting gid. | 
 | 259 |  * | 
 | 260 |  *	There is always a mapping into the initial user_namespace. | 
 | 261 |  * | 
 | 262 |  *	If @kgid has no mapping in @targ (gid_t)-1 is returned. | 
 | 263 |  */ | 
 | 264 | gid_t from_kgid(struct user_namespace *targ, kgid_t kgid) | 
 | 265 | { | 
 | 266 | 	/* Map the gid from a global kernel gid */ | 
 | 267 | 	return map_id_up(&targ->gid_map, __kgid_val(kgid)); | 
 | 268 | } | 
 | 269 | EXPORT_SYMBOL(from_kgid); | 
 | 270 |  | 
 | 271 | /** | 
 | 272 |  *	from_kgid_munged - Create a gid from a kgid user-namespace pair. | 
 | 273 |  *	@targ: The user namespace we want a gid in. | 
 | 274 |  *	@kgid: The kernel internal gid to start with. | 
 | 275 |  * | 
 | 276 |  *	Map @kgid into the user-namespace specified by @targ and | 
 | 277 |  *	return the resulting gid. | 
 | 278 |  * | 
 | 279 |  *	There is always a mapping into the initial user_namespace. | 
 | 280 |  * | 
 | 281 |  *	Unlike from_kgid from_kgid_munged never fails and always | 
 | 282 |  *	returns a valid gid.  This makes from_kgid_munged appropriate | 
 | 283 |  *	for use in syscalls like stat and getgid where failing the | 
 | 284 |  *	system call and failing to provide a valid gid are not options. | 
 | 285 |  * | 
 | 286 |  *	If @kgid has no mapping in @targ overflowgid is returned. | 
 | 287 |  */ | 
 | 288 | gid_t from_kgid_munged(struct user_namespace *targ, kgid_t kgid) | 
 | 289 | { | 
 | 290 | 	gid_t gid; | 
 | 291 | 	gid = from_kgid(targ, kgid); | 
 | 292 |  | 
 | 293 | 	if (gid == (gid_t) -1) | 
 | 294 | 		gid = overflowgid; | 
 | 295 | 	return gid; | 
 | 296 | } | 
 | 297 | EXPORT_SYMBOL(from_kgid_munged); | 
 | 298 |  | 
| Eric W. Biederman | f76d207 | 2012-08-30 01:24:05 -0700 | [diff] [blame] | 299 | /** | 
 | 300 |  *	make_kprojid - Map a user-namespace projid pair into a kprojid. | 
 | 301 |  *	@ns:  User namespace that the projid is in | 
 | 302 |  *	@projid: Project identifier | 
 | 303 |  * | 
 | 304 |  *	Maps a user-namespace uid pair into a kernel internal kuid, | 
 | 305 |  *	and returns that kuid. | 
 | 306 |  * | 
 | 307 |  *	When there is no mapping defined for the user-namespace projid | 
 | 308 |  *	pair INVALID_PROJID is returned.  Callers are expected to test | 
 | 309 |  *	for and handle handle INVALID_PROJID being returned.  INVALID_PROJID | 
 | 310 |  *	may be tested for using projid_valid(). | 
 | 311 |  */ | 
 | 312 | kprojid_t make_kprojid(struct user_namespace *ns, projid_t projid) | 
 | 313 | { | 
 | 314 | 	/* Map the uid to a global kernel uid */ | 
 | 315 | 	return KPROJIDT_INIT(map_id_down(&ns->projid_map, projid)); | 
 | 316 | } | 
 | 317 | EXPORT_SYMBOL(make_kprojid); | 
 | 318 |  | 
 | 319 | /** | 
 | 320 |  *	from_kprojid - Create a projid from a kprojid user-namespace pair. | 
 | 321 |  *	@targ: The user namespace we want a projid in. | 
 | 322 |  *	@kprojid: The kernel internal project identifier to start with. | 
 | 323 |  * | 
 | 324 |  *	Map @kprojid into the user-namespace specified by @targ and | 
 | 325 |  *	return the resulting projid. | 
 | 326 |  * | 
 | 327 |  *	There is always a mapping into the initial user_namespace. | 
 | 328 |  * | 
 | 329 |  *	If @kprojid has no mapping in @targ (projid_t)-1 is returned. | 
 | 330 |  */ | 
 | 331 | projid_t from_kprojid(struct user_namespace *targ, kprojid_t kprojid) | 
 | 332 | { | 
 | 333 | 	/* Map the uid from a global kernel uid */ | 
 | 334 | 	return map_id_up(&targ->projid_map, __kprojid_val(kprojid)); | 
 | 335 | } | 
 | 336 | EXPORT_SYMBOL(from_kprojid); | 
 | 337 |  | 
 | 338 | /** | 
 | 339 |  *	from_kprojid_munged - Create a projiid from a kprojid user-namespace pair. | 
 | 340 |  *	@targ: The user namespace we want a projid in. | 
 | 341 |  *	@kprojid: The kernel internal projid to start with. | 
 | 342 |  * | 
 | 343 |  *	Map @kprojid into the user-namespace specified by @targ and | 
 | 344 |  *	return the resulting projid. | 
 | 345 |  * | 
 | 346 |  *	There is always a mapping into the initial user_namespace. | 
 | 347 |  * | 
 | 348 |  *	Unlike from_kprojid from_kprojid_munged never fails and always | 
 | 349 |  *	returns a valid projid.  This makes from_kprojid_munged | 
 | 350 |  *	appropriate for use in syscalls like stat and where | 
 | 351 |  *	failing the system call and failing to provide a valid projid are | 
 | 352 |  *	not an options. | 
 | 353 |  * | 
 | 354 |  *	If @kprojid has no mapping in @targ OVERFLOW_PROJID is returned. | 
 | 355 |  */ | 
 | 356 | projid_t from_kprojid_munged(struct user_namespace *targ, kprojid_t kprojid) | 
 | 357 | { | 
 | 358 | 	projid_t projid; | 
 | 359 | 	projid = from_kprojid(targ, kprojid); | 
 | 360 |  | 
 | 361 | 	if (projid == (projid_t) -1) | 
 | 362 | 		projid = OVERFLOW_PROJID; | 
 | 363 | 	return projid; | 
 | 364 | } | 
 | 365 | EXPORT_SYMBOL(from_kprojid_munged); | 
 | 366 |  | 
 | 367 |  | 
| Eric W. Biederman | 22d917d | 2011-11-17 00:11:58 -0800 | [diff] [blame] | 368 | static int uid_m_show(struct seq_file *seq, void *v) | 
 | 369 | { | 
 | 370 | 	struct user_namespace *ns = seq->private; | 
 | 371 | 	struct uid_gid_extent *extent = v; | 
 | 372 | 	struct user_namespace *lower_ns; | 
 | 373 | 	uid_t lower; | 
 | 374 |  | 
 | 375 | 	lower_ns = current_user_ns(); | 
 | 376 | 	if ((lower_ns == ns) && lower_ns->parent) | 
 | 377 | 		lower_ns = lower_ns->parent; | 
 | 378 |  | 
 | 379 | 	lower = from_kuid(lower_ns, KUIDT_INIT(extent->lower_first)); | 
 | 380 |  | 
 | 381 | 	seq_printf(seq, "%10u %10u %10u\n", | 
 | 382 | 		extent->first, | 
 | 383 | 		lower, | 
 | 384 | 		extent->count); | 
 | 385 |  | 
 | 386 | 	return 0; | 
 | 387 | } | 
 | 388 |  | 
 | 389 | static int gid_m_show(struct seq_file *seq, void *v) | 
 | 390 | { | 
 | 391 | 	struct user_namespace *ns = seq->private; | 
 | 392 | 	struct uid_gid_extent *extent = v; | 
 | 393 | 	struct user_namespace *lower_ns; | 
 | 394 | 	gid_t lower; | 
 | 395 |  | 
 | 396 | 	lower_ns = current_user_ns(); | 
 | 397 | 	if ((lower_ns == ns) && lower_ns->parent) | 
 | 398 | 		lower_ns = lower_ns->parent; | 
 | 399 |  | 
 | 400 | 	lower = from_kgid(lower_ns, KGIDT_INIT(extent->lower_first)); | 
 | 401 |  | 
 | 402 | 	seq_printf(seq, "%10u %10u %10u\n", | 
 | 403 | 		extent->first, | 
 | 404 | 		lower, | 
 | 405 | 		extent->count); | 
 | 406 |  | 
 | 407 | 	return 0; | 
 | 408 | } | 
 | 409 |  | 
| Eric W. Biederman | f76d207 | 2012-08-30 01:24:05 -0700 | [diff] [blame] | 410 | static int projid_m_show(struct seq_file *seq, void *v) | 
 | 411 | { | 
 | 412 | 	struct user_namespace *ns = seq->private; | 
 | 413 | 	struct uid_gid_extent *extent = v; | 
 | 414 | 	struct user_namespace *lower_ns; | 
 | 415 | 	projid_t lower; | 
 | 416 |  | 
 | 417 | 	lower_ns = seq_user_ns(seq); | 
 | 418 | 	if ((lower_ns == ns) && lower_ns->parent) | 
 | 419 | 		lower_ns = lower_ns->parent; | 
 | 420 |  | 
 | 421 | 	lower = from_kprojid(lower_ns, KPROJIDT_INIT(extent->lower_first)); | 
 | 422 |  | 
 | 423 | 	seq_printf(seq, "%10u %10u %10u\n", | 
 | 424 | 		extent->first, | 
 | 425 | 		lower, | 
 | 426 | 		extent->count); | 
 | 427 |  | 
 | 428 | 	return 0; | 
 | 429 | } | 
 | 430 |  | 
| Eric W. Biederman | 22d917d | 2011-11-17 00:11:58 -0800 | [diff] [blame] | 431 | static void *m_start(struct seq_file *seq, loff_t *ppos, struct uid_gid_map *map) | 
 | 432 | { | 
 | 433 | 	struct uid_gid_extent *extent = NULL; | 
 | 434 | 	loff_t pos = *ppos; | 
 | 435 |  | 
 | 436 | 	if (pos < map->nr_extents) | 
 | 437 | 		extent = &map->extent[pos]; | 
 | 438 |  | 
 | 439 | 	return extent; | 
 | 440 | } | 
 | 441 |  | 
 | 442 | static void *uid_m_start(struct seq_file *seq, loff_t *ppos) | 
 | 443 | { | 
 | 444 | 	struct user_namespace *ns = seq->private; | 
 | 445 |  | 
 | 446 | 	return m_start(seq, ppos, &ns->uid_map); | 
 | 447 | } | 
 | 448 |  | 
 | 449 | static void *gid_m_start(struct seq_file *seq, loff_t *ppos) | 
 | 450 | { | 
 | 451 | 	struct user_namespace *ns = seq->private; | 
 | 452 |  | 
 | 453 | 	return m_start(seq, ppos, &ns->gid_map); | 
 | 454 | } | 
 | 455 |  | 
| Eric W. Biederman | f76d207 | 2012-08-30 01:24:05 -0700 | [diff] [blame] | 456 | static void *projid_m_start(struct seq_file *seq, loff_t *ppos) | 
 | 457 | { | 
 | 458 | 	struct user_namespace *ns = seq->private; | 
 | 459 |  | 
 | 460 | 	return m_start(seq, ppos, &ns->projid_map); | 
 | 461 | } | 
 | 462 |  | 
| Eric W. Biederman | 22d917d | 2011-11-17 00:11:58 -0800 | [diff] [blame] | 463 | static void *m_next(struct seq_file *seq, void *v, loff_t *pos) | 
 | 464 | { | 
 | 465 | 	(*pos)++; | 
 | 466 | 	return seq->op->start(seq, pos); | 
 | 467 | } | 
 | 468 |  | 
 | 469 | static void m_stop(struct seq_file *seq, void *v) | 
 | 470 | { | 
 | 471 | 	return; | 
 | 472 | } | 
 | 473 |  | 
 | 474 | struct seq_operations proc_uid_seq_operations = { | 
 | 475 | 	.start = uid_m_start, | 
 | 476 | 	.stop = m_stop, | 
 | 477 | 	.next = m_next, | 
 | 478 | 	.show = uid_m_show, | 
 | 479 | }; | 
 | 480 |  | 
 | 481 | struct seq_operations proc_gid_seq_operations = { | 
 | 482 | 	.start = gid_m_start, | 
 | 483 | 	.stop = m_stop, | 
 | 484 | 	.next = m_next, | 
 | 485 | 	.show = gid_m_show, | 
 | 486 | }; | 
 | 487 |  | 
| Eric W. Biederman | f76d207 | 2012-08-30 01:24:05 -0700 | [diff] [blame] | 488 | struct seq_operations proc_projid_seq_operations = { | 
 | 489 | 	.start = projid_m_start, | 
 | 490 | 	.stop = m_stop, | 
 | 491 | 	.next = m_next, | 
 | 492 | 	.show = projid_m_show, | 
 | 493 | }; | 
 | 494 |  | 
| Eric W. Biederman | 22d917d | 2011-11-17 00:11:58 -0800 | [diff] [blame] | 495 | static DEFINE_MUTEX(id_map_mutex); | 
 | 496 |  | 
 | 497 | static ssize_t map_write(struct file *file, const char __user *buf, | 
 | 498 | 			 size_t count, loff_t *ppos, | 
 | 499 | 			 int cap_setid, | 
 | 500 | 			 struct uid_gid_map *map, | 
 | 501 | 			 struct uid_gid_map *parent_map) | 
 | 502 | { | 
 | 503 | 	struct seq_file *seq = file->private_data; | 
 | 504 | 	struct user_namespace *ns = seq->private; | 
 | 505 | 	struct uid_gid_map new_map; | 
 | 506 | 	unsigned idx; | 
 | 507 | 	struct uid_gid_extent *extent, *last = NULL; | 
 | 508 | 	unsigned long page = 0; | 
 | 509 | 	char *kbuf, *pos, *next_line; | 
 | 510 | 	ssize_t ret = -EINVAL; | 
 | 511 |  | 
 | 512 | 	/* | 
 | 513 | 	 * The id_map_mutex serializes all writes to any given map. | 
 | 514 | 	 * | 
 | 515 | 	 * Any map is only ever written once. | 
 | 516 | 	 * | 
 | 517 | 	 * An id map fits within 1 cache line on most architectures. | 
 | 518 | 	 * | 
 | 519 | 	 * On read nothing needs to be done unless you are on an | 
 | 520 | 	 * architecture with a crazy cache coherency model like alpha. | 
 | 521 | 	 * | 
 | 522 | 	 * There is a one time data dependency between reading the | 
 | 523 | 	 * count of the extents and the values of the extents.  The | 
 | 524 | 	 * desired behavior is to see the values of the extents that | 
 | 525 | 	 * were written before the count of the extents. | 
 | 526 | 	 * | 
 | 527 | 	 * To achieve this smp_wmb() is used on guarantee the write | 
 | 528 | 	 * order and smp_read_barrier_depends() is guaranteed that we | 
 | 529 | 	 * don't have crazy architectures returning stale data. | 
 | 530 | 	 * | 
| Eric W. Biederman | 5c1469d | 2010-06-13 03:28:03 +0000 | [diff] [blame] | 531 | 	 */ | 
| Eric W. Biederman | 22d917d | 2011-11-17 00:11:58 -0800 | [diff] [blame] | 532 | 	mutex_lock(&id_map_mutex); | 
 | 533 |  | 
 | 534 | 	ret = -EPERM; | 
 | 535 | 	/* Only allow one successful write to the map */ | 
 | 536 | 	if (map->nr_extents != 0) | 
 | 537 | 		goto out; | 
 | 538 |  | 
 | 539 | 	/* Require the appropriate privilege CAP_SETUID or CAP_SETGID | 
 | 540 | 	 * over the user namespace in order to set the id mapping. | 
 | 541 | 	 */ | 
| Eric W. Biederman | f76d207 | 2012-08-30 01:24:05 -0700 | [diff] [blame] | 542 | 	if (cap_valid(cap_setid) && !ns_capable(ns, cap_setid)) | 
| Eric W. Biederman | 22d917d | 2011-11-17 00:11:58 -0800 | [diff] [blame] | 543 | 		goto out; | 
 | 544 |  | 
 | 545 | 	/* Get a buffer */ | 
 | 546 | 	ret = -ENOMEM; | 
 | 547 | 	page = __get_free_page(GFP_TEMPORARY); | 
 | 548 | 	kbuf = (char *) page; | 
 | 549 | 	if (!page) | 
 | 550 | 		goto out; | 
 | 551 |  | 
 | 552 | 	/* Only allow <= page size writes at the beginning of the file */ | 
 | 553 | 	ret = -EINVAL; | 
 | 554 | 	if ((*ppos != 0) || (count >= PAGE_SIZE)) | 
 | 555 | 		goto out; | 
 | 556 |  | 
 | 557 | 	/* Slurp in the user data */ | 
 | 558 | 	ret = -EFAULT; | 
 | 559 | 	if (copy_from_user(kbuf, buf, count)) | 
 | 560 | 		goto out; | 
 | 561 | 	kbuf[count] = '\0'; | 
 | 562 |  | 
 | 563 | 	/* Parse the user data */ | 
 | 564 | 	ret = -EINVAL; | 
 | 565 | 	pos = kbuf; | 
 | 566 | 	new_map.nr_extents = 0; | 
 | 567 | 	for (;pos; pos = next_line) { | 
 | 568 | 		extent = &new_map.extent[new_map.nr_extents]; | 
 | 569 |  | 
 | 570 | 		/* Find the end of line and ensure I don't look past it */ | 
 | 571 | 		next_line = strchr(pos, '\n'); | 
 | 572 | 		if (next_line) { | 
 | 573 | 			*next_line = '\0'; | 
 | 574 | 			next_line++; | 
 | 575 | 			if (*next_line == '\0') | 
 | 576 | 				next_line = NULL; | 
| Eric W. Biederman | 5c1469d | 2010-06-13 03:28:03 +0000 | [diff] [blame] | 577 | 		} | 
| Eric W. Biederman | 22d917d | 2011-11-17 00:11:58 -0800 | [diff] [blame] | 578 |  | 
 | 579 | 		pos = skip_spaces(pos); | 
 | 580 | 		extent->first = simple_strtoul(pos, &pos, 10); | 
 | 581 | 		if (!isspace(*pos)) | 
 | 582 | 			goto out; | 
 | 583 |  | 
 | 584 | 		pos = skip_spaces(pos); | 
 | 585 | 		extent->lower_first = simple_strtoul(pos, &pos, 10); | 
 | 586 | 		if (!isspace(*pos)) | 
 | 587 | 			goto out; | 
 | 588 |  | 
 | 589 | 		pos = skip_spaces(pos); | 
 | 590 | 		extent->count = simple_strtoul(pos, &pos, 10); | 
 | 591 | 		if (*pos && !isspace(*pos)) | 
 | 592 | 			goto out; | 
 | 593 |  | 
 | 594 | 		/* Verify there is not trailing junk on the line */ | 
 | 595 | 		pos = skip_spaces(pos); | 
 | 596 | 		if (*pos != '\0') | 
 | 597 | 			goto out; | 
 | 598 |  | 
 | 599 | 		/* Verify we have been given valid starting values */ | 
 | 600 | 		if ((extent->first == (u32) -1) || | 
 | 601 | 		    (extent->lower_first == (u32) -1 )) | 
 | 602 | 			goto out; | 
 | 603 |  | 
 | 604 | 		/* Verify count is not zero and does not cause the extent to wrap */ | 
 | 605 | 		if ((extent->first + extent->count) <= extent->first) | 
 | 606 | 			goto out; | 
 | 607 | 		if ((extent->lower_first + extent->count) <= extent->lower_first) | 
 | 608 | 			goto out; | 
 | 609 |  | 
 | 610 | 		/* For now only accept extents that are strictly in order */ | 
 | 611 | 		if (last && | 
 | 612 | 		    (((last->first + last->count) > extent->first) || | 
 | 613 | 		     ((last->lower_first + last->count) > extent->lower_first))) | 
 | 614 | 			goto out; | 
 | 615 |  | 
 | 616 | 		new_map.nr_extents++; | 
 | 617 | 		last = extent; | 
 | 618 |  | 
 | 619 | 		/* Fail if the file contains too many extents */ | 
 | 620 | 		if ((new_map.nr_extents == UID_GID_MAP_MAX_EXTENTS) && | 
 | 621 | 		    (next_line != NULL)) | 
 | 622 | 			goto out; | 
 | 623 | 	} | 
 | 624 | 	/* Be very certaint the new map actually exists */ | 
 | 625 | 	if (new_map.nr_extents == 0) | 
 | 626 | 		goto out; | 
 | 627 |  | 
 | 628 | 	ret = -EPERM; | 
 | 629 | 	/* Validate the user is allowed to use user id's mapped to. */ | 
 | 630 | 	if (!new_idmap_permitted(ns, cap_setid, &new_map)) | 
 | 631 | 		goto out; | 
 | 632 |  | 
 | 633 | 	/* Map the lower ids from the parent user namespace to the | 
 | 634 | 	 * kernel global id space. | 
 | 635 | 	 */ | 
 | 636 | 	for (idx = 0; idx < new_map.nr_extents; idx++) { | 
 | 637 | 		u32 lower_first; | 
 | 638 | 		extent = &new_map.extent[idx]; | 
 | 639 |  | 
 | 640 | 		lower_first = map_id_range_down(parent_map, | 
 | 641 | 						extent->lower_first, | 
 | 642 | 						extent->count); | 
 | 643 |  | 
 | 644 | 		/* Fail if we can not map the specified extent to | 
 | 645 | 		 * the kernel global id space. | 
 | 646 | 		 */ | 
 | 647 | 		if (lower_first == (u32) -1) | 
 | 648 | 			goto out; | 
 | 649 |  | 
 | 650 | 		extent->lower_first = lower_first; | 
| Eric W. Biederman | 5c1469d | 2010-06-13 03:28:03 +0000 | [diff] [blame] | 651 | 	} | 
 | 652 |  | 
| Eric W. Biederman | 22d917d | 2011-11-17 00:11:58 -0800 | [diff] [blame] | 653 | 	/* Install the map */ | 
 | 654 | 	memcpy(map->extent, new_map.extent, | 
 | 655 | 		new_map.nr_extents*sizeof(new_map.extent[0])); | 
 | 656 | 	smp_wmb(); | 
 | 657 | 	map->nr_extents = new_map.nr_extents; | 
 | 658 |  | 
 | 659 | 	*ppos = count; | 
 | 660 | 	ret = count; | 
 | 661 | out: | 
 | 662 | 	mutex_unlock(&id_map_mutex); | 
 | 663 | 	if (page) | 
 | 664 | 		free_page(page); | 
 | 665 | 	return ret; | 
 | 666 | } | 
 | 667 |  | 
 | 668 | ssize_t proc_uid_map_write(struct file *file, const char __user *buf, size_t size, loff_t *ppos) | 
 | 669 | { | 
 | 670 | 	struct seq_file *seq = file->private_data; | 
 | 671 | 	struct user_namespace *ns = seq->private; | 
 | 672 |  | 
 | 673 | 	if (!ns->parent) | 
 | 674 | 		return -EPERM; | 
 | 675 |  | 
 | 676 | 	return map_write(file, buf, size, ppos, CAP_SETUID, | 
 | 677 | 			 &ns->uid_map, &ns->parent->uid_map); | 
 | 678 | } | 
 | 679 |  | 
 | 680 | ssize_t proc_gid_map_write(struct file *file, const char __user *buf, size_t size, loff_t *ppos) | 
 | 681 | { | 
 | 682 | 	struct seq_file *seq = file->private_data; | 
 | 683 | 	struct user_namespace *ns = seq->private; | 
 | 684 |  | 
 | 685 | 	if (!ns->parent) | 
 | 686 | 		return -EPERM; | 
 | 687 |  | 
 | 688 | 	return map_write(file, buf, size, ppos, CAP_SETGID, | 
 | 689 | 			 &ns->gid_map, &ns->parent->gid_map); | 
 | 690 | } | 
 | 691 |  | 
| Eric W. Biederman | f76d207 | 2012-08-30 01:24:05 -0700 | [diff] [blame] | 692 | ssize_t proc_projid_map_write(struct file *file, const char __user *buf, size_t size, loff_t *ppos) | 
 | 693 | { | 
 | 694 | 	struct seq_file *seq = file->private_data; | 
 | 695 | 	struct user_namespace *ns = seq->private; | 
 | 696 | 	struct user_namespace *seq_ns = seq_user_ns(seq); | 
 | 697 |  | 
 | 698 | 	if (!ns->parent) | 
 | 699 | 		return -EPERM; | 
 | 700 |  | 
 | 701 | 	if ((seq_ns != ns) && (seq_ns != ns->parent)) | 
 | 702 | 		return -EPERM; | 
 | 703 |  | 
 | 704 | 	/* Anyone can set any valid project id no capability needed */ | 
 | 705 | 	return map_write(file, buf, size, ppos, -1, | 
 | 706 | 			 &ns->projid_map, &ns->parent->projid_map); | 
 | 707 | } | 
 | 708 |  | 
| Eric W. Biederman | 22d917d | 2011-11-17 00:11:58 -0800 | [diff] [blame] | 709 | static bool new_idmap_permitted(struct user_namespace *ns, int cap_setid, | 
 | 710 | 				struct uid_gid_map *new_map) | 
 | 711 | { | 
| Eric W. Biederman | f76d207 | 2012-08-30 01:24:05 -0700 | [diff] [blame] | 712 | 	/* Allow anyone to set a mapping that doesn't require privilege */ | 
 | 713 | 	if (!cap_valid(cap_setid)) | 
 | 714 | 		return true; | 
 | 715 |  | 
| Eric W. Biederman | 22d917d | 2011-11-17 00:11:58 -0800 | [diff] [blame] | 716 | 	/* Allow the specified ids if we have the appropriate capability | 
 | 717 | 	 * (CAP_SETUID or CAP_SETGID) over the parent user namespace. | 
 | 718 | 	 */ | 
 | 719 | 	if (ns_capable(ns->parent, cap_setid)) | 
 | 720 | 		return true; | 
 | 721 |  | 
 | 722 | 	return false; | 
| Eric W. Biederman | 5c1469d | 2010-06-13 03:28:03 +0000 | [diff] [blame] | 723 | } | 
| Pavel Emelyanov | 6164281 | 2011-01-12 17:00:46 -0800 | [diff] [blame] | 724 |  | 
 | 725 | static __init int user_namespaces_init(void) | 
 | 726 | { | 
 | 727 | 	user_ns_cachep = KMEM_CACHE(user_namespace, SLAB_PANIC); | 
 | 728 | 	return 0; | 
 | 729 | } | 
 | 730 | module_init(user_namespaces_init); |