Blob Blame History Raw
From: David Herrmann <dh.herrmann@gmail.com>
Date: Wed, 22 Apr 2015 13:14:24 +0200
Subject: [PATCH] kdbus: translate capabilities between namespaces

Right now, we always drop capability-items if we cross user-namespaces.
However, the kernel _does_ support capability translation, as defined in
./security/commoncap.c cap_capable().

This patch adds capability translation support just like cap_capable()
does. This way, a message sent from a task into a child user-namespace of
its own, will retain the capability-item and thus keep the parent
privileged inside of the user-namespace of its children.

Signed-off-by: David Herrmann <dh.herrmann@gmail.com>
---
 ipc/kdbus/metadata.c | 126 ++++++++++++++++++++++++++++++++++-----------------
 1 file changed, 84 insertions(+), 42 deletions(-)

diff --git a/ipc/kdbus/metadata.c b/ipc/kdbus/metadata.c
index b908b6314a00..7949c8d3ed64 100644
--- a/ipc/kdbus/metadata.c
+++ b/ipc/kdbus/metadata.c
@@ -63,8 +63,7 @@
  * @root_path:		Root-FS path
  * @cmdline:		Command-line
  * @cgroup:		Full cgroup path
- * @caps:		Capabilities
- * @caps_namespace:	User-namespace of @caps
+ * @cred:		Credentials
  * @seclabel:		Seclabel
  * @audit_loginuid:	Audit login-UID
  * @audit_sessionid:	Audit session-ID
@@ -104,14 +103,7 @@ struct kdbus_meta_proc {
 	char *cgroup;
 
 	/* KDBUS_ITEM_CAPS */
-	struct caps {
-		/* binary compatible to kdbus_caps */
-		u32 last_cap;
-		struct {
-			u32 caps[_KERNEL_CAPABILITY_U32S];
-		} set[4];
-	} caps;
-	struct user_namespace *caps_namespace;
+	const struct cred *cred;
 
 	/* KDBUS_ITEM_SECLABEL */
 	char *seclabel;
@@ -149,6 +141,14 @@ struct kdbus_meta_conn {
 	char *conn_description;
 };
 
+/* fixed size equivalent of "kdbus_caps" */
+struct kdbus_meta_caps {
+	u32 last_cap;
+	struct {
+		u32 caps[_KERNEL_CAPABILITY_U32S];
+	} set[4];
+};
+
 /**
  * kdbus_meta_proc_new() - Create process metadata object
  *
@@ -175,7 +175,8 @@ static void kdbus_meta_proc_free(struct kref *kref)
 
 	path_put(&mp->exe_path);
 	path_put(&mp->root_path);
-	put_user_ns(mp->caps_namespace);
+	if (mp->cred)
+		put_cred(mp->cred);
 	put_pid(mp->ppid);
 	put_pid(mp->tgid);
 	put_pid(mp->pid);
@@ -354,25 +355,7 @@ static int kdbus_meta_proc_collect_cgroup(struct kdbus_meta_proc *mp)
 
 static void kdbus_meta_proc_collect_caps(struct kdbus_meta_proc *mp)
 {
-	const struct cred *c = current_cred();
-	int i;
-
-	/* ABI: "last_cap" equals /proc/sys/kernel/cap_last_cap */
-	mp->caps.last_cap = CAP_LAST_CAP;
-	mp->caps_namespace = get_user_ns(current_user_ns());
-
-	CAP_FOR_EACH_U32(i) {
-		mp->caps.set[0].caps[i] = c->cap_inheritable.cap[i];
-		mp->caps.set[1].caps[i] = c->cap_permitted.cap[i];
-		mp->caps.set[2].caps[i] = c->cap_effective.cap[i];
-		mp->caps.set[3].caps[i] = c->cap_bset.cap[i];
-	}
-
-	/* clear unused bits */
-	for (i = 0; i < 4; i++)
-		mp->caps.set[i].caps[CAP_TO_INDEX(CAP_LAST_CAP)] &=
-						CAP_LAST_U32_VALID_MASK;
-
+	mp->cred = get_current_cred();
 	mp->valid |= KDBUS_ATTACH_CAPS;
 }
 
@@ -880,7 +863,7 @@ int kdbus_meta_export_prepare(struct kdbus_meta_proc *mp,
 		size += KDBUS_ITEM_SIZE(strlen(mp->cgroup) + 1);
 
 	if (mp && (*mask & KDBUS_ATTACH_CAPS))
-		size += KDBUS_ITEM_SIZE(sizeof(mp->caps));
+		size += KDBUS_ITEM_SIZE(sizeof(struct kdbus_meta_caps));
 
 	if (mp && (*mask & KDBUS_ATTACH_SECLABEL))
 		size += KDBUS_ITEM_SIZE(strlen(mp->seclabel) + 1);
@@ -917,6 +900,69 @@ static int kdbus_meta_push_kvec(struct kvec *kvec,
 	return 2 + !!kdbus_kvec_pad(kvec++, size);
 }
 
+static void kdbus_meta_export_caps(struct kdbus_meta_caps *out,
+				   struct kdbus_meta_proc *mp)
+{
+	struct user_namespace *iter;
+	const struct cred *cred = mp->cred;
+	bool parent = false, owner = false;
+	int i;
+
+	/*
+	 * This translates the effective capabilities of 'cred' into the current
+	 * user-namespace. If the current user-namespace is a child-namespace of
+	 * the user-namespace of 'cred', the mask can be copied verbatim. If
+	 * not, the mask is cleared.
+	 * There's one exception: If 'cred' is the owner of any user-namespace
+	 * in the path between the current user-namespace and the user-namespace
+	 * of 'cred', then it has all effective capabilities set. This means,
+	 * the user who created a user-namespace always has all effective
+	 * capabilities in any child namespaces. Note that this is based on the
+	 * uid of the namespace creator, not the task hierarchy.
+	 */
+	for (iter = current_user_ns(); iter; iter = iter->parent) {
+		if (iter == cred->user_ns) {
+			parent = true;
+			break;
+		}
+
+		if (iter == &init_user_ns)
+			break;
+
+		if ((iter->parent == cred->user_ns) &&
+		    uid_eq(iter->owner, cred->euid)) {
+			owner = true;
+			break;
+		}
+	}
+
+	out->last_cap = CAP_LAST_CAP;
+
+	CAP_FOR_EACH_U32(i) {
+		if (parent) {
+			out->set[0].caps[i] = cred->cap_inheritable.cap[i];
+			out->set[1].caps[i] = cred->cap_permitted.cap[i];
+			out->set[2].caps[i] = cred->cap_effective.cap[i];
+			out->set[3].caps[i] = cred->cap_bset.cap[i];
+		} else if (owner) {
+			out->set[0].caps[i] = 0U;
+			out->set[1].caps[i] = ~0U;
+			out->set[2].caps[i] = ~0U;
+			out->set[3].caps[i] = ~0U;
+		} else {
+			out->set[0].caps[i] = 0U;
+			out->set[1].caps[i] = 0U;
+			out->set[2].caps[i] = 0U;
+			out->set[3].caps[i] = 0U;
+		}
+	}
+
+	/* clear unused bits */
+	for (i = 0; i < 4; i++)
+		out->set[i].caps[CAP_TO_INDEX(CAP_LAST_CAP)] &=
+					CAP_LAST_U32_VALID_MASK;
+}
+
 /* This is equivalent to from_kuid_munged(), but maps INVALID_UID to itself */
 static uid_t kdbus_from_kuid_keep(kuid_t uid)
 {
@@ -975,14 +1021,6 @@ int kdbus_meta_export(struct kdbus_meta_proc *mp,
 
 	hdr = &item_hdr[0];
 
-	/*
-	 * TODO: We currently have no sane way of translating a set of caps
-	 * between different user namespaces. Until that changes, we have
-	 * to drop such items.
-	 */
-	if (mp && mp->caps_namespace != user_ns)
-		mask &= ~KDBUS_ATTACH_CAPS;
-
 	if (mask == 0) {
 		*real_size = 0;
 		return 0;
@@ -1088,10 +1126,14 @@ int kdbus_meta_export(struct kdbus_meta_proc *mp,
 					    KDBUS_ITEM_CGROUP, mp->cgroup,
 					    strlen(mp->cgroup) + 1, &size);
 
-	if (mp && (mask & KDBUS_ATTACH_CAPS))
+	if (mp && (mask & KDBUS_ATTACH_CAPS)) {
+		struct kdbus_meta_caps caps = {};
+
+		kdbus_meta_export_caps(&caps, mp);
 		cnt += kdbus_meta_push_kvec(kvec + cnt, hdr++,
-					    KDBUS_ITEM_CAPS, &mp->caps,
-					    sizeof(mp->caps), &size);
+					    KDBUS_ITEM_CAPS, &caps,
+					    sizeof(caps), &size);
+	}
 
 	if (mp && (mask & KDBUS_ATTACH_SECLABEL))
 		cnt += kdbus_meta_push_kvec(kvec + cnt, hdr++,