You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
218 lines
11 KiB
218 lines
11 KiB
From 44d19e4beb486969b25c07f7efdee2e8b8bcf986 Mon Sep 17 00:00:00 2001
|
|
From: Lennart Poettering <lennart@poettering.net>
|
|
Date: Fri, 16 Nov 2018 20:19:07 +0100
|
|
Subject: [PATCH] core: when Delegate=yes is set for a unit, run ExecStartPre=
|
|
and friends in a subcgroup of the unit
|
|
|
|
Otherwise we might conflict with the "no-processes-in-inner-cgroup" rule
|
|
of cgroupsv2. Consider nspawn starting up and initializing its cgroup
|
|
hierarchy with "supervisor/" and "payload/" as subcgroup, with itself
|
|
moved into the former and the payload into the latter. Now, if an
|
|
ExecStartPre= is run right after it cannot be placed in the main cgroup,
|
|
because that is now in inner cgroup with populated children.
|
|
|
|
Hence, let's run these helpers in another sub-cgroup .control/ below it.
|
|
|
|
This is somewhat ugly since it weakens the clear separation of
|
|
ownership, but given that this is an explicit contract, and double opt-in should be acceptable.
|
|
|
|
Fixes: #10482
|
|
(cherry picked from commit 78f93209fc7f61f15b12d7a5f74d712bd020b249)
|
|
|
|
Resolves: #2215925
|
|
---
|
|
src/core/execute.c | 67 +++++++++++++++++++++++++++++++++++++++-------
|
|
src/core/execute.h | 9 ++++---
|
|
src/core/service.c | 13 ++++-----
|
|
3 files changed, 70 insertions(+), 19 deletions(-)
|
|
|
|
diff --git a/src/core/execute.c b/src/core/execute.c
|
|
index b1d8dceb32..7e186c948c 100644
|
|
--- a/src/core/execute.c
|
|
+++ b/src/core/execute.c
|
|
@@ -2834,6 +2834,37 @@ bool exec_context_get_cpu_affinity_from_numa(const ExecContext *c) {
|
|
return c->cpu_affinity_from_numa;
|
|
}
|
|
|
|
+static int exec_parameters_get_cgroup_path(const ExecParameters *params, char **ret) {
|
|
+ bool using_subcgroup;
|
|
+ char *p;
|
|
+
|
|
+ assert(params);
|
|
+ assert(ret);
|
|
+
|
|
+ if (!params->cgroup_path)
|
|
+ return -EINVAL;
|
|
+
|
|
+ /* If we are called for a unit where cgroup delegation is on, and the payload created its own populated
|
|
+ * subcgroup (which we expect it to do, after all it asked for delegation), then we cannot place the control
|
|
+ * processes started after the main unit's process in the unit's main cgroup because it is now an inner one,
|
|
+ * and inner cgroups may not contain processes. Hence, if delegation is on, and this is a control process,
|
|
+ * let's use ".control" as subcgroup instead. Note that we do so only for ExecStartPost=, ExecReload=,
|
|
+ * ExecStop=, ExecStopPost=, i.e. for the commands where the main process is already forked. For ExecStartPre=
|
|
+ * this is not necessary, the cgroup is still empty. We distinguish these cases with the EXEC_CONTROL_CGROUP
|
|
+ * flag, which is only passed for the former statements, not for the latter. */
|
|
+
|
|
+ using_subcgroup = FLAGS_SET(params->flags, EXEC_CONTROL_CGROUP|EXEC_CGROUP_DELEGATE|EXEC_IS_CONTROL);
|
|
+ if (using_subcgroup)
|
|
+ p = strjoin(params->cgroup_path, "/.control");
|
|
+ else
|
|
+ p = strdup(params->cgroup_path);
|
|
+ if (!p)
|
|
+ return -ENOMEM;
|
|
+
|
|
+ *ret = p;
|
|
+ return using_subcgroup;
|
|
+}
|
|
+
|
|
static int exec_child(
|
|
Unit *unit,
|
|
const ExecCommand *command,
|
|
@@ -3055,10 +3086,18 @@ static int exec_child(
|
|
}
|
|
|
|
if (params->cgroup_path) {
|
|
- r = cg_attach_everywhere(params->cgroup_supported, params->cgroup_path, 0, NULL, NULL);
|
|
+ _cleanup_free_ char *p = NULL;
|
|
+
|
|
+ r = exec_parameters_get_cgroup_path(params, &p);
|
|
if (r < 0) {
|
|
*exit_status = EXIT_CGROUP;
|
|
- return log_unit_error_errno(unit, r, "Failed to attach to cgroup %s: %m", params->cgroup_path);
|
|
+ return log_unit_error_errno(unit, r, "Failed to acquire cgroup path: %m");
|
|
+ }
|
|
+
|
|
+ r = cg_attach_everywhere(params->cgroup_supported, p, 0, NULL, NULL);
|
|
+ if (r < 0) {
|
|
+ *exit_status = EXIT_CGROUP;
|
|
+ return log_unit_error_errno(unit, r, "Failed to attach to cgroup %s: %m", p);
|
|
}
|
|
}
|
|
|
|
@@ -3659,6 +3698,7 @@ int exec_spawn(Unit *unit,
|
|
DynamicCreds *dcreds,
|
|
pid_t *ret) {
|
|
|
|
+ _cleanup_free_ char *subcgroup_path = NULL;
|
|
_cleanup_strv_free_ char **files_env = NULL;
|
|
int *fds = NULL;
|
|
size_t n_storage_fds = 0, n_socket_fds = 0;
|
|
@@ -3716,6 +3756,17 @@ int exec_spawn(Unit *unit,
|
|
LOG_UNIT_ID(unit),
|
|
LOG_UNIT_INVOCATION_ID(unit));
|
|
|
|
+ if (params->cgroup_path) {
|
|
+ r = exec_parameters_get_cgroup_path(params, &subcgroup_path);
|
|
+ if (r < 0)
|
|
+ return log_unit_error_errno(unit, r, "Failed to acquire subcgroup path: %m");
|
|
+ if (r > 0) { /* We are using a child cgroup */
|
|
+ r = cg_create(SYSTEMD_CGROUP_CONTROLLER, subcgroup_path);
|
|
+ if (r < 0)
|
|
+ return log_unit_error_errno(unit, r, "Failed to create control group '%s': %m", subcgroup_path);
|
|
+ }
|
|
+ }
|
|
+
|
|
pid = fork();
|
|
if (pid < 0)
|
|
return log_unit_error_errno(unit, errno, "Failed to fork: %m");
|
|
@@ -3754,13 +3805,11 @@ int exec_spawn(Unit *unit,
|
|
|
|
log_unit_debug(unit, "Forked %s as "PID_FMT, command->path, pid);
|
|
|
|
- /* We add the new process to the cgroup both in the child (so
|
|
- * that we can be sure that no user code is ever executed
|
|
- * outside of the cgroup) and in the parent (so that we can be
|
|
- * sure that when we kill the cgroup the process will be
|
|
- * killed too). */
|
|
- if (params->cgroup_path)
|
|
- (void) cg_attach(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, pid);
|
|
+ /* We add the new process to the cgroup both in the child (so that we can be sure that no user code is ever
|
|
+ * executed outside of the cgroup) and in the parent (so that we can be sure that when we kill the cgroup the
|
|
+ * process will be killed too). */
|
|
+ if (subcgroup_path)
|
|
+ (void) cg_attach(SYSTEMD_CGROUP_CONTROLLER, subcgroup_path, pid);
|
|
|
|
exec_status_start(&command->exec_status, pid);
|
|
|
|
diff --git a/src/core/execute.h b/src/core/execute.h
|
|
index 62c6229621..f5bc180ece 100644
|
|
--- a/src/core/execute.h
|
|
+++ b/src/core/execute.h
|
|
@@ -289,12 +289,13 @@ typedef enum ExecFlags {
|
|
EXEC_CHOWN_DIRECTORIES = 1 << 5, /* chown() the runtime/state/cache/log directories to the user we run as, under all conditions */
|
|
EXEC_NSS_BYPASS_BUS = 1 << 6, /* Set the SYSTEMD_NSS_BYPASS_BUS environment variable, to disable nss-systemd for dbus */
|
|
EXEC_CGROUP_DELEGATE = 1 << 7,
|
|
+ EXEC_IS_CONTROL = 1 << 8,
|
|
+ EXEC_CONTROL_CGROUP = 1 << 9, /* Place the process not in the indicated cgroup but in a subcgroup '/.control', but only EXEC_CGROUP_DELEGATE and EXEC_IS_CONTROL is set, too */
|
|
|
|
/* The following are not used by execute.c, but by consumers internally */
|
|
- EXEC_PASS_FDS = 1 << 8,
|
|
- EXEC_IS_CONTROL = 1 << 9,
|
|
- EXEC_SETENV_RESULT = 1 << 10,
|
|
- EXEC_SET_WATCHDOG = 1 << 11,
|
|
+ EXEC_PASS_FDS = 1 << 10,
|
|
+ EXEC_SETENV_RESULT = 1 << 11,
|
|
+ EXEC_SET_WATCHDOG = 1 << 12,
|
|
} ExecFlags;
|
|
|
|
struct ExecParameters {
|
|
diff --git a/src/core/service.c b/src/core/service.c
|
|
index e05d0e0514..0423f2c73e 100644
|
|
--- a/src/core/service.c
|
|
+++ b/src/core/service.c
|
|
@@ -1429,7 +1429,7 @@ static int service_spawn(
|
|
assert(c);
|
|
assert(_pid);
|
|
|
|
- r = unit_prepare_exec(UNIT(s));
|
|
+ r = unit_prepare_exec(UNIT(s)); /* This realizes the cgroup, among other things */
|
|
if (r < 0)
|
|
return r;
|
|
|
|
@@ -1798,7 +1798,7 @@ static void service_enter_stop_post(Service *s, ServiceResult f) {
|
|
r = service_spawn(s,
|
|
s->control_command,
|
|
s->timeout_stop_usec,
|
|
- EXEC_APPLY_SANDBOXING|EXEC_APPLY_CHROOT|EXEC_APPLY_TTY_STDIN|EXEC_IS_CONTROL|EXEC_SETENV_RESULT,
|
|
+ EXEC_APPLY_SANDBOXING|EXEC_APPLY_CHROOT|EXEC_APPLY_TTY_STDIN|EXEC_IS_CONTROL|EXEC_SETENV_RESULT|EXEC_CONTROL_CGROUP,
|
|
&s->control_pid);
|
|
if (r < 0)
|
|
goto fail;
|
|
@@ -1913,7 +1913,7 @@ static void service_enter_stop(Service *s, ServiceResult f) {
|
|
r = service_spawn(s,
|
|
s->control_command,
|
|
s->timeout_stop_usec,
|
|
- EXEC_APPLY_SANDBOXING|EXEC_APPLY_CHROOT|EXEC_IS_CONTROL|EXEC_SETENV_RESULT,
|
|
+ EXEC_APPLY_SANDBOXING|EXEC_APPLY_CHROOT|EXEC_IS_CONTROL|EXEC_SETENV_RESULT|EXEC_CONTROL_CGROUP,
|
|
&s->control_pid);
|
|
if (r < 0)
|
|
goto fail;
|
|
@@ -1991,7 +1991,7 @@ static void service_enter_start_post(Service *s) {
|
|
r = service_spawn(s,
|
|
s->control_command,
|
|
s->timeout_start_usec,
|
|
- EXEC_APPLY_SANDBOXING|EXEC_APPLY_CHROOT|EXEC_IS_CONTROL,
|
|
+ EXEC_APPLY_SANDBOXING|EXEC_APPLY_CHROOT|EXEC_IS_CONTROL|EXEC_CONTROL_CGROUP,
|
|
&s->control_pid);
|
|
if (r < 0)
|
|
goto fail;
|
|
@@ -2269,7 +2269,7 @@ static void service_enter_reload(Service *s) {
|
|
r = service_spawn(s,
|
|
s->control_command,
|
|
s->timeout_start_usec,
|
|
- EXEC_APPLY_SANDBOXING|EXEC_APPLY_CHROOT|EXEC_IS_CONTROL,
|
|
+ EXEC_APPLY_SANDBOXING|EXEC_APPLY_CHROOT|EXEC_IS_CONTROL|EXEC_CONTROL_CGROUP,
|
|
&s->control_pid);
|
|
if (r < 0)
|
|
goto fail;
|
|
@@ -2309,7 +2309,8 @@ static void service_run_next_control(Service *s) {
|
|
timeout,
|
|
EXEC_APPLY_SANDBOXING|EXEC_APPLY_CHROOT|EXEC_IS_CONTROL|
|
|
(IN_SET(s->control_command_id, SERVICE_EXEC_CONDITION, SERVICE_EXEC_START_PRE, SERVICE_EXEC_STOP_POST) ? EXEC_APPLY_TTY_STDIN : 0)|
|
|
- (IN_SET(s->control_command_id, SERVICE_EXEC_STOP, SERVICE_EXEC_STOP_POST) ? EXEC_SETENV_RESULT : 0),
|
|
+ (IN_SET(s->control_command_id, SERVICE_EXEC_STOP, SERVICE_EXEC_STOP_POST) ? EXEC_SETENV_RESULT : 0)|
|
|
+ (IN_SET(s->control_command_id, SERVICE_EXEC_START_POST, SERVICE_EXEC_RELOAD, SERVICE_EXEC_STOP, SERVICE_EXEC_STOP_POST) ? EXEC_CONTROL_CGROUP : 0),
|
|
&s->control_pid);
|
|
if (r < 0)
|
|
goto fail;
|