From 520d9eabce18edfef76a60b7b839d54facafe1f9 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 13 Dec 2012 18:06:40 -0800 Subject: [PATCH 1/4] Fix cap_capable to only allow owners in the parent user namespace to have caps. Andy Lutomirski pointed out that the current behavior of allowing the owner of a user namespace to have all caps when that owner is not in a parent user namespace is wrong. Add a test to ensure the owner of a user namespace is in the parent of the user namespace to fix this bug. Thankfully this bug did not apply to the initial user namespace, keeping the mischief that can be caused by this bug quite small. This is bug was introduced in v3.5 by commit 783291e6900 "Simplify the user_namespace by making userns->creator a kuid." But did not matter until the permisions required to create a user namespace were relaxed allowing a user namespace to be created inside of a user namespace. The bug made it possible for the owner of a user namespace to be present in a child user namespace. Since the owner of a user nameapce is granted all capabilities it became possible for users in a grandchild user namespace to have all privilges over their parent user namspace. Reorder the checks in cap_capable. This should make the common case faster and make it clear that nothing magic happens in the initial user namespace. The reordering is safe because cred->user_ns can only be in targ_ns or targ_ns->parent but not both. Add a comment a the top of the loop to make the logic of the code clear. Add a distinct variable ns that changes as we walk up the user namespace hierarchy to make it clear which variable is changing. Acked-by: Serge Hallyn Signed-off-by: "Eric W. Biederman" --- security/commoncap.c | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/security/commoncap.c b/security/commoncap.c index 6dbae4650ab..7ee08c756d6 100644 --- a/security/commoncap.c +++ b/security/commoncap.c @@ -76,24 +76,33 @@ int cap_netlink_send(struct sock *sk, struct sk_buff *skb) int cap_capable(const struct cred *cred, struct user_namespace *targ_ns, int cap, int audit) { - for (;;) { - /* The owner of the user namespace has all caps. */ - if (targ_ns != &init_user_ns && uid_eq(targ_ns->owner, cred->euid)) - return 0; + struct user_namespace *ns = targ_ns; + /* See if cred has the capability in the target user namespace + * by examining the target user namespace and all of the target + * user namespace's parents. + */ + for (;;) { /* Do we have the necessary capabilities? */ - if (targ_ns == cred->user_ns) + if (ns == cred->user_ns) return cap_raised(cred->cap_effective, cap) ? 0 : -EPERM; /* Have we tried all of the parent namespaces? */ - if (targ_ns == &init_user_ns) + if (ns == &init_user_ns) return -EPERM; + /* + * The owner of the user namespace in the parent of the + * user namespace has all caps. + */ + if ((ns->parent == cred->user_ns) && uid_eq(ns->owner, cred->euid)) + return 0; + /* - *If you have a capability in a parent user ns, then you have + * If you have a capability in a parent user ns, then you have * it over all children user namespaces as well. */ - targ_ns = targ_ns->parent; + ns = ns->parent; } /* We never get here */ From 5e4a08476b50fa39210fca82e03325cc46b9c235 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 14 Dec 2012 07:55:36 -0800 Subject: [PATCH 2/4] userns: Require CAP_SYS_ADMIN for most uses of setns. Andy Lutomirski found a nasty little bug in the permissions of setns. With unprivileged user namespaces it became possible to create new namespaces without privilege. However the setns calls were relaxed to only require CAP_SYS_ADMIN in the user nameapce of the targed namespace. Which made the following nasty sequence possible. pid = clone(CLONE_NEWUSER | CLONE_NEWNS); if (pid == 0) { /* child */ system("mount --bind /home/me/passwd /etc/passwd"); } else if (pid != 0) { /* parent */ char path[PATH_MAX]; snprintf(path, sizeof(path), "/proc/%u/ns/mnt"); fd = open(path, O_RDONLY); setns(fd, 0); system("su -"); } Prevent this possibility by requiring CAP_SYS_ADMIN in the current user namespace when joing all but the user namespace. Acked-by: Serge Hallyn Signed-off-by: "Eric W. Biederman" --- fs/namespace.c | 3 ++- ipc/namespace.c | 3 ++- kernel/pid_namespace.c | 3 ++- kernel/utsname.c | 3 ++- net/core/net_namespace.c | 3 ++- 5 files changed, 10 insertions(+), 5 deletions(-) diff --git a/fs/namespace.c b/fs/namespace.c index c1bbe86f492..398a50ff243 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -2781,7 +2781,8 @@ static int mntns_install(struct nsproxy *nsproxy, void *ns) struct path root; if (!ns_capable(mnt_ns->user_ns, CAP_SYS_ADMIN) || - !nsown_capable(CAP_SYS_CHROOT)) + !nsown_capable(CAP_SYS_CHROOT) || + !nsown_capable(CAP_SYS_ADMIN)) return -EPERM; if (fs->users != 1) diff --git a/ipc/namespace.c b/ipc/namespace.c index cf3386a51de..7c1fa451b0b 100644 --- a/ipc/namespace.c +++ b/ipc/namespace.c @@ -170,7 +170,8 @@ static void ipcns_put(void *ns) static int ipcns_install(struct nsproxy *nsproxy, void *new) { struct ipc_namespace *ns = new; - if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN)) + if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN) || + !nsown_capable(CAP_SYS_ADMIN)) return -EPERM; /* Ditch state from the old ipc namespace */ diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c index 560da0dab23..fdbd0cdf271 100644 --- a/kernel/pid_namespace.c +++ b/kernel/pid_namespace.c @@ -325,7 +325,8 @@ static int pidns_install(struct nsproxy *nsproxy, void *ns) struct pid_namespace *active = task_active_pid_ns(current); struct pid_namespace *ancestor, *new = ns; - if (!ns_capable(new->user_ns, CAP_SYS_ADMIN)) + if (!ns_capable(new->user_ns, CAP_SYS_ADMIN) || + !nsown_capable(CAP_SYS_ADMIN)) return -EPERM; /* diff --git a/kernel/utsname.c b/kernel/utsname.c index f6336d51d64..08b197e8c48 100644 --- a/kernel/utsname.c +++ b/kernel/utsname.c @@ -113,7 +113,8 @@ static int utsns_install(struct nsproxy *nsproxy, void *new) { struct uts_namespace *ns = new; - if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN)) + if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN) || + !nsown_capable(CAP_SYS_ADMIN)) return -EPERM; get_uts_ns(ns); diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 2e9a3132b8d..8acce01b6da 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -649,7 +649,8 @@ static int netns_install(struct nsproxy *nsproxy, void *ns) { struct net *net = ns; - if (!ns_capable(net->user_ns, CAP_SYS_ADMIN)) + if (!ns_capable(net->user_ns, CAP_SYS_ADMIN) || + !nsown_capable(CAP_SYS_ADMIN)) return -EPERM; put_net(nsproxy->net_ns); From aa6d054e5ce94720797ca260392a74dbced56412 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 14 Dec 2012 08:50:54 -0800 Subject: [PATCH 3/4] userns: Add a more complete capability subset test to commit_creds When unsharing a user namespace we reduce our credentials to just what can be done in that user namespace. This is a subset of the credentials we previously had. Teach commit_creds to recognize this is a subset of the credentials we have had before and don't clear the dumpability flag. This allows an unprivileged program to do: unshare(CLONE_NEWUSER); fd = open("/proc/self/uid_map", O_RDWR); Where previously opening the uid_map writable would fail because the the task had been made non-dumpable. Acked-by: Serge Hallyn Signed-off-by: "Eric W. Biederman" --- kernel/cred.c | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/kernel/cred.c b/kernel/cred.c index 48cea3da6d0..709d521903f 100644 --- a/kernel/cred.c +++ b/kernel/cred.c @@ -455,6 +455,31 @@ error_put: return ret; } +static bool cred_cap_issubset(const struct cred *set, const struct cred *subset) +{ + const struct user_namespace *set_ns = set->user_ns; + const struct user_namespace *subset_ns = subset->user_ns; + + /* If the two credentials are in the same user namespace see if + * the capabilities of subset are a subset of set. + */ + if (set_ns == subset_ns) + return cap_issubset(subset->cap_permitted, set->cap_permitted); + + /* The credentials are in a different user namespaces + * therefore one is a subset of the other only if a set is an + * ancestor of subset and set->euid is owner of subset or one + * of subsets ancestors. + */ + for (;subset_ns != &init_user_ns; subset_ns = subset_ns->parent) { + if ((set_ns == subset_ns->parent) && + uid_eq(subset_ns->owner, set->euid)) + return true; + } + + return false; +} + /** * commit_creds - Install new credentials upon the current task * @new: The credentials to be assigned @@ -493,7 +518,7 @@ int commit_creds(struct cred *new) !gid_eq(old->egid, new->egid) || !uid_eq(old->fsuid, new->fsuid) || !gid_eq(old->fsgid, new->fsgid) || - !cap_issubset(new->cap_permitted, old->cap_permitted)) { + !cred_cap_issubset(old, new)) { if (task->mm) set_dumpable(task->mm, suid_dumpable); task->pdeath_signal = 0; From 5155040ed349950e16c093ba8e65ad534994df2a Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sun, 9 Dec 2012 17:19:52 -0800 Subject: [PATCH 4/4] userns: Fix typo in description of the limitation of userns_install Acked-by: Serge Hallyn Signed-off-by: "Eric W. Biederman" --- kernel/user_namespace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index f5975ccf934..2b042c42fbc 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c @@ -799,7 +799,7 @@ static int userns_install(struct nsproxy *nsproxy, void *ns) if (user_ns == current_user_ns()) return -EINVAL; - /* Threaded many not enter a different user namespace */ + /* Threaded processes may not enter a different user namespace */ if (atomic_read(¤t->mm->mm_users) > 1) return -EINVAL;