Skip to content

Commit

Permalink
graceful node shutdown when manager destroyed
Browse files Browse the repository at this point in the history
Added graceful node_shutdown function that is called before node_unref
which goes over outstanding requests and cancels them.

This fixes a memory leak when bluechi-controller service is stopped
via bluechi-controller itself

Signed-off-by: Mark Kemel <[email protected]>
  • Loading branch information
mkemel committed Nov 9, 2023
1 parent f65493e commit 016adde
Show file tree
Hide file tree
Showing 4 changed files with 59 additions and 3 deletions.
2 changes: 2 additions & 0 deletions src/manager/manager.c
Original file line number Diff line number Diff line change
Expand Up @@ -83,9 +83,11 @@ void manager_unref(Manager *manager) {
Node *node = NULL;
Node *next_node = NULL;
LIST_FOREACH_SAFE(nodes, node, next_node, manager->nodes) {
node_shutdown(node);
node_unref(node);
}
LIST_FOREACH_SAFE(nodes, node, next_node, manager->anonymous_nodes) {
node_shutdown(node);
node_unref(node);
}

Expand Down
47 changes: 47 additions & 0 deletions src/manager/node.c
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,8 @@ Node *node_new(Manager *manager, const char *name) {
}
}

node->is_shutdown = false;

return steal_pointer(&node);
}

Expand Down Expand Up @@ -206,6 +208,15 @@ void node_unref(Node *node) {
free(node);
}

void node_shutdown(Node *node) {
AgentRequest *req = NULL;
AgentRequest *next_req = NULL;
node->is_shutdown = true;
LIST_FOREACH_SAFE(outstanding_requests, req, next_req, node->outstanding_requests) {
agent_request_cancel(req);
}
}

bool node_export(Node *node) {
Manager *manager = node->manager;

Expand Down Expand Up @@ -1070,6 +1081,7 @@ int node_create_request(
req->cb = cb;
req->userdata = userdata;
req->free_userdata = free_userdata;
req->is_cancelled = false;
LIST_APPEND(outstanding_requests, node->outstanding_requests, req);

*ret = req;
Expand All @@ -1078,10 +1090,24 @@ int node_create_request(

static int agent_request_callback(sd_bus_message *m, void *userdata, UNUSED sd_bus_error *ret_error) {
_cleanup_agent_request_ AgentRequest *req = userdata;
if (req->is_cancelled) {
bc_log_debugf("Response received to a cancelled request for node %s. Dropping message.",
req->node->name);
return 0;
}

return req->cb(req, m, ret_error);
}

int agent_request_cancel(AgentRequest *r) {
_cleanup_agent_request_ AgentRequest *req = r;
req->is_cancelled = true;
_cleanup_sd_bus_message_ sd_bus_message *m = NULL;
sd_bus_message_new_method_errorf(req->message, &m, SD_BUS_ERROR_FAILED, "Request cancelled");

return req->cb(req, m, NULL);
}

int agent_request_start(AgentRequest *req) {
Node *node = req->node;

Expand Down Expand Up @@ -1156,6 +1182,11 @@ static int method_list_units_callback(AgentRequest *req, sd_bus_message *m, UNUS
static int node_method_list_units(sd_bus_message *m, void *userdata, UNUSED sd_bus_error *ret_error) {
Node *node = userdata;

if (node->is_shutdown) {
return sd_bus_reply_method_errorf(
m, SD_BUS_ERROR_FAILED, "Request not allowed: node is in shutdown state");
}

_cleanup_agent_request_ AgentRequest *agent_req = node_request_list_units(
node,
method_list_units_callback,
Expand Down Expand Up @@ -1199,6 +1230,11 @@ static int node_method_set_unit_properties(sd_bus_message *m, void *userdata, UN
const char *unit = NULL;
int runtime = 0;

if (node->is_shutdown) {
return sd_bus_reply_method_errorf(
m, SD_BUS_ERROR_FAILED, "Request not allowed: node is in shutdown state");
}

int r = sd_bus_message_read(m, "sb", &unit, &runtime);
if (r < 0) {
return sd_bus_reply_method_errorf(
Expand Down Expand Up @@ -1282,6 +1318,12 @@ static int node_method_passthrough_to_agent_callback(

static int node_method_passthrough_to_agent(sd_bus_message *m, void *userdata, UNUSED sd_bus_error *ret_error) {
Node *node = userdata;

if (node->is_shutdown) {
return sd_bus_reply_method_errorf(
m, SD_BUS_ERROR_FAILED, "Request not allowed: node is in shutdown state");
}

_cleanup_agent_request_ AgentRequest *req = NULL;
int r = node_create_request(
&req,
Expand Down Expand Up @@ -1382,6 +1424,11 @@ static int node_run_unit_lifecycle_method(
const char *mode = NULL;
uint64_t start_time = get_time_micros();

if (node->is_shutdown) {
return sd_bus_reply_method_errorf(
m, SD_BUS_ERROR_FAILED, "Request not allowed: node is in shutdown state");
}

int r = sd_bus_message_read(m, "ss", &unit, &mode);
if (r < 0) {
return sd_bus_reply_method_errorf(
Expand Down
7 changes: 7 additions & 0 deletions src/manager/node.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ struct AgentRequest {
free_func_t free_userdata;
agent_request_response_t cb;

bool is_cancelled;

LIST_FIELDS(AgentRequest, outstanding_requests);
};

Expand All @@ -29,6 +31,8 @@ void agent_request_unref(AgentRequest *req);

int agent_request_start(AgentRequest *req);

int agent_request_cancel(AgentRequest *r);

struct Node {
int ref_count;

Expand All @@ -54,11 +58,14 @@ struct Node {

struct hashmap *unit_subscriptions;
time_t last_seen;

bool is_shutdown;
};

Node *node_new(Manager *manager, const char *name);
Node *node_ref(Node *node);
void node_unref(Node *node);
void node_shutdown(Node *node);

const char *node_get_status(Node *node);

Expand Down
6 changes: 3 additions & 3 deletions tests/bluechi_test/container.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,11 +148,11 @@ def wait_for_unit_state_to_be(
return False

def enable_valgrind(self) -> None:
self.exec_run(f"sed -i '/ExecStart=/c\\ExecStart=/usr/bin/valgrind --leak-check=yes "
self.exec_run(f"sed -i '/ExecStart=/c\\ExecStart=/usr/bin/valgrind -s --leak-check=yes "
f"--log-file={valgrind_log_path_controller} /usr/libexec/bluechi-controller' "
f"/usr/lib/systemd/system/bluechi-controller.service")
self.exec_run(f"sed -i '/ExecStart=/c\\ExecStart=/usr/bin/valgrind --leak-check=yes "
f"--log-file={valgrind_log_path_controller} /usr/libexec/bluechi-agent' "
self.exec_run(f"sed -i '/ExecStart=/c\\ExecStart=/usr/bin/valgrind -s --leak-check=yes "
f"--log-file={valgrind_log_path_agent} /usr/libexec/bluechi-agent' "
f"/usr/lib/systemd/system/bluechi-agent.service")
self.exec_run("mkdir -p /var/log/valgrind")
self.exec_run("systemctl daemon-reload")
Expand Down

0 comments on commit 016adde

Please sign in to comment.