proc: Usable inode numbers for the namespace file descriptors.
Assign a unique proc inode to each namespace, and use that
inode number to ensure we only allocate at most one proc
inode for every namespace in proc.
A single proc inode per namespace allows userspace to test
to see if two processes are in the same namespace.
This has been a long requested feature and only blocked because
a naive implementation would put the id in a global space and
would ultimately require having a namespace for the names of
namespaces, making migration and certain virtualization tricks
impossible.
We still don't have per superblock inode numbers for proc, which
appears necessary for application unaware checkpoint/restart and
migrations (if the application is using namespace file descriptors)
but that is now allowd by the design if it becomes important.
I have preallocated the ipc and uts initial proc inode numbers so
their structures can be statically initialized.
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
(cherry picked from commit 98f842e675f96ffac96e6c50315790912b2812be)
diff --git a/kernel/pid.c b/kernel/pid.c
index 9f08dfa..c2010d1 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -78,6 +78,7 @@
.last_pid = 0,
.level = 0,
.child_reaper = &init_task,
+ .proc_inum = PROC_PID_INIT_INO,
};
EXPORT_SYMBOL_GPL(init_pid_ns);
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
index 57bc1fd..4f15bef 100644
--- a/kernel/pid_namespace.c
+++ b/kernel/pid_namespace.c
@@ -88,6 +88,10 @@
if (ns->pid_cachep == NULL)
goto out_free_map;
+ err = proc_alloc_inum(&ns->proc_inum);
+ if (err)
+ goto out_free_map;
+
kref_init(&ns->kref);
ns->level = level;
ns->parent = get_pid_ns(parent_pid_ns);
@@ -118,6 +122,7 @@
{
int i;
+ proc_free_inum(ns->proc_inum);
for (i = 0; i < PIDMAP_ENTRIES; i++)
kfree(ns->pidmap[i].page);
kmem_cache_free(pid_ns_cachep, ns);
diff --git a/kernel/user.c b/kernel/user.c
index 71dd236..9013a4f 100644
--- a/kernel/user.c
+++ b/kernel/user.c
@@ -16,6 +16,7 @@
#include <linux/interrupt.h>
#include <linux/export.h>
#include <linux/user_namespace.h>
+#include <linux/proc_fs.h>
/*
* userns count is 1 for root user, 1 for init_uts_ns,
@@ -26,6 +27,7 @@
.refcount = ATOMIC_INIT(3),
},
.creator = &root_user,
+ .proc_inum = PROC_USER_INIT_INO,
};
EXPORT_SYMBOL_GPL(init_user_ns);
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index 3b906e9..c14b7b9 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -27,11 +27,18 @@
struct user_namespace *ns;
struct user_struct *root_user;
int n;
+ int ret;
ns = kmem_cache_alloc(user_ns_cachep, GFP_KERNEL);
if (!ns)
return -ENOMEM;
+ ret = proc_alloc_inum(&ns->proc_inum);
+ if (ret) {
+ kmem_cache_free(user_ns_cachep, ns);
+ return ret;
+ }
+
kref_init(&ns->kref);
for (n = 0; n < UIDHASH_SZ; ++n)
@@ -73,6 +80,7 @@
struct user_namespace *ns =
container_of(work, struct user_namespace, destroyer);
free_uid(ns->creator);
+ proc_free_inum(ns->proc_inum);
kmem_cache_free(user_ns_cachep, ns);
}
diff --git a/kernel/utsname.c b/kernel/utsname.c
index 405caf9..ce3d44b 100644
--- a/kernel/utsname.c
+++ b/kernel/utsname.c
@@ -36,11 +36,18 @@
struct uts_namespace *old_ns)
{
struct uts_namespace *ns;
+ int err;
ns = create_uts_ns();
if (!ns)
return ERR_PTR(-ENOMEM);
+ err = proc_alloc_inum(&ns->proc_inum);
+ if (err) {
+ kfree(ns);
+ return ERR_PTR(err);
+ }
+
down_read(&uts_sem);
memcpy(&ns->name, &old_ns->name, sizeof(ns->name));
ns->user_ns = get_user_ns(task_cred_xxx(tsk, user)->user_ns);
@@ -78,6 +85,7 @@
ns = container_of(kref, struct uts_namespace, kref);
put_user_ns(ns->user_ns);
+ proc_free_inum(ns->proc_inum);
kfree(ns);
}
@@ -110,11 +118,18 @@
return 0;
}
+static unsigned int utsns_inum(void *vp)
+{
+ struct uts_namespace *ns = vp;
+
+ return ns->proc_inum;
+}
+
const struct proc_ns_operations utsns_operations = {
.name = "uts",
.type = CLONE_NEWUTS,
.get = utsns_get,
.put = utsns_put,
.install = utsns_install,
+ .inum = utsns_inum,
};
-