From fc8959efebc662f4050700cc1ed2798750b15b73 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Fri, 6 Nov 2020 13:32:53 +0100 Subject: [PATCH] resolved: instead of closing DNS UDP transaction fds right-away, add them to a socket "graveyard" The "socket graveyard" shall contain sockets we have sent a question out of, but not received a reply. If we'd close thus sockets immediately when we are not interested anymore, we'd trigger ICMP port unreachable messages once we after all *do* get a reply. Let's avoid that, by leaving the fds open for a bit longer, until a timeout is reached or a reply datagram received. Fixes: #17421 (cherry picked from commit 80710ade03d971a8877fde8ce9d42eb2b07f4c47) Resolves: #2156751 --- src/resolve/meson.build | 2 + src/resolve/resolved-dns-transaction.c | 44 ++++++-- src/resolve/resolved-manager.c | 2 + src/resolve/resolved-manager.h | 5 + src/resolve/resolved-socket-graveyard.c | 133 ++++++++++++++++++++++++ src/resolve/resolved-socket-graveyard.h | 18 ++++ 6 files changed, 194 insertions(+), 10 deletions(-) create mode 100644 src/resolve/resolved-socket-graveyard.c create mode 100644 src/resolve/resolved-socket-graveyard.h diff --git a/src/resolve/meson.build b/src/resolve/meson.build index 15f3835d55..a975e58242 100644 --- a/src/resolve/meson.build +++ b/src/resolve/meson.build @@ -63,6 +63,8 @@ systemd_resolved_sources = files(''' resolved-dns-stub.c resolved-etc-hosts.h resolved-etc-hosts.c + resolved-socket-graveyard.c + resolved-socket-graveyard.h '''.split()) resolvectl_sources = files(''' diff --git a/src/resolve/resolved-dns-transaction.c b/src/resolve/resolved-dns-transaction.c index c60b8215a6..95aea21134 100644 --- a/src/resolve/resolved-dns-transaction.c +++ b/src/resolve/resolved-dns-transaction.c @@ -48,7 +48,14 @@ static void dns_transaction_flush_dnssec_transactions(DnsTransaction *t) { } } -static void dns_transaction_close_connection(DnsTransaction *t) { +static void dns_transaction_close_connection( + DnsTransaction *t, + bool use_graveyard) { /* Set use_graveyard = false when you know the connection is already + * dead, for example because you got a connection error back from the + * kernel. In that case there's no point in keeping the fd around, + * hence don't. */ + int r; + assert(t); if (t->stream) { @@ -62,6 +69,20 @@ static void dns_transaction_close_connection(DnsTransaction *t) { } t->dns_udp_event_source = sd_event_source_unref(t->dns_udp_event_source); + + /* If we have an UDP socket where we sent a packet, but never received one, then add it to the socket + * graveyard, instead of closing it right away. That way it will stick around for a moment longer, + * and the reply we might still get from the server will be eaten up instead of resulting in an ICMP + * port unreachable error message. */ + + if (use_graveyard && t->dns_udp_fd >= 0 && t->sent && !t->received) { + r = manager_add_socket_to_graveyard(t->scope->manager, t->dns_udp_fd); + if (r < 0) + log_debug_errno(r, "Failed to add UDP socket to graveyard, closing immediately: %m"); + else + TAKE_FD(t->dns_udp_fd); + } + t->dns_udp_fd = safe_close(t->dns_udp_fd); } @@ -81,7 +102,7 @@ DnsTransaction* dns_transaction_free(DnsTransaction *t) { log_debug("Freeing transaction %" PRIu16 ".", t->id); - dns_transaction_close_connection(t); + dns_transaction_close_connection(t, true); dns_transaction_stop_timeout(t); dns_packet_unref(t->sent); @@ -341,7 +362,7 @@ void dns_transaction_complete(DnsTransaction *t, DnsTransactionState state) { t->state = state; - dns_transaction_close_connection(t); + dns_transaction_close_connection(t, true); dns_transaction_stop_timeout(t); /* Notify all queries that are interested, but make sure the @@ -455,7 +476,7 @@ static int dns_transaction_maybe_restart(DnsTransaction *t) { static void on_transaction_stream_error(DnsTransaction *t, int error) { assert(t); - dns_transaction_close_connection(t); + dns_transaction_close_connection(t, true); if (ERRNO_IS_DISCONNECT(error)) { if (t->scope->protocol == DNS_PROTOCOL_LLMNR) { @@ -478,7 +499,7 @@ static int dns_transaction_on_stream_packet(DnsTransaction *t, DnsPacket *p) { assert(t); assert(p); - dns_transaction_close_connection(t); + dns_transaction_close_connection(t, true); if (dns_packet_validate_reply(p) <= 0) { log_debug("Invalid TCP reply packet."); @@ -583,7 +604,7 @@ static int dns_transaction_emit_tcp(DnsTransaction *t) { assert(t); - dns_transaction_close_connection(t); + dns_transaction_close_connection(t, true); switch (t->scope->protocol) { @@ -692,7 +713,7 @@ static int dns_transaction_emit_tcp(DnsTransaction *t) { r = dns_stream_write_packet(t->stream, t->sent); if (r < 0) { - dns_transaction_close_connection(t); + dns_transaction_close_connection(t, /* use_graveyard= */ false); return r; } @@ -1196,7 +1217,7 @@ void dns_transaction_process_reply(DnsTransaction *t, DnsPacket *p) { if (r > 0) { /* There are DNSSEC transactions pending now. Update the state accordingly. */ t->state = DNS_TRANSACTION_VALIDATING; - dns_transaction_close_connection(t); + dns_transaction_close_connection(t, true); dns_transaction_stop_timeout(t); return; } @@ -1277,7 +1298,10 @@ static int dns_transaction_emit_udp(DnsTransaction *t) { if (r > 0 || t->dns_udp_fd < 0) { /* Server changed, or no connection yet. */ int fd; - dns_transaction_close_connection(t); + dns_transaction_close_connection(t, true); + + /* Before we allocate a new UDP socket, let's process the graveyard a bit to free some fds */ + manager_socket_graveyard_process(t->scope->manager); fd = dns_scope_socket_udp(t->scope, t->server, 53); if (fd < 0) @@ -1297,7 +1321,7 @@ static int dns_transaction_emit_udp(DnsTransaction *t) { if (r < 0) return r; } else - dns_transaction_close_connection(t); + dns_transaction_close_connection(t, true); r = dns_scope_emit_udp(t->scope, t->dns_udp_fd, t->sent); if (r < 0) diff --git a/src/resolve/resolved-manager.c b/src/resolve/resolved-manager.c index 5583d63527..5feb92a676 100644 --- a/src/resolve/resolved-manager.c +++ b/src/resolve/resolved-manager.c @@ -683,6 +683,8 @@ Manager *manager_free(Manager *m) { manager_mdns_stop(m); manager_dns_stub_stop(m); + manager_socket_graveyard_clear(m); + sd_bus_slot_unref(m->prepare_for_sleep_slot); sd_bus_unref(m->bus); diff --git a/src/resolve/resolved-manager.h b/src/resolve/resolved-manager.h index d94b2888ab..80119bfcb3 100644 --- a/src/resolve/resolved-manager.h +++ b/src/resolve/resolved-manager.h @@ -20,6 +20,7 @@ typedef struct Manager Manager; #include "resolved-dns-stream.h" #include "resolved-dns-trust-anchor.h" #include "resolved-link.h" +#include "resolved-socket-graveyard.h" #define MANAGER_SEARCH_DOMAINS_MAX 32 #define MANAGER_DNS_SERVERS_MAX 32 @@ -130,6 +131,10 @@ struct Manager { sd_event_source *dns_stub_tcp_event_source; Hashmap *polkit_registry; + + LIST_HEAD(SocketGraveyard, socket_graveyard); + SocketGraveyard *socket_graveyard_oldest; + size_t n_socket_graveyard; }; /* Manager */ diff --git a/src/resolve/resolved-socket-graveyard.c b/src/resolve/resolved-socket-graveyard.c new file mode 100644 index 0000000000..067cb666d4 --- /dev/null +++ b/src/resolve/resolved-socket-graveyard.c @@ -0,0 +1,133 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include "resolved-socket-graveyard.h" + +#define SOCKET_GRAVEYARD_USEC (5 * USEC_PER_SEC) +#define SOCKET_GRAVEYARD_MAX 100 + +/* This implements a socket "graveyard" for UDP sockets. If a socket fd is added to the graveyard it is kept + * open for a couple of more seconds, expecting one reply. Once the reply is received the fd is closed + * immediately, or if none is received it is closed after the timeout. Why all this? So that if we contact a + * DNS server, and it doesn't reply instantly, and we lose interest in the response and thus close the fd, we + * don't end up sending back an ICMP error once the server responds but we aren't listening anymore. (See + * https://github.com/systemd/systemd/issues/17421 for further information.) + * + * Note that we don't allocate any timer event source to clear up the graveyard once the socket's timeout is + * reached. Instead we operate lazily: we close old entries when adding a new fd to the graveyard, or + * whenever any code runs manager_socket_graveyard_process() — which the DNS transaction code does right + * before allocating a new UDP socket. */ + +static SocketGraveyard* socket_graveyard_free(SocketGraveyard *g) { + if (!g) + return NULL; + + if (g->manager) { + assert(g->manager->n_socket_graveyard > 0); + g->manager->n_socket_graveyard--; + + if (g->manager->socket_graveyard_oldest == g) + g->manager->socket_graveyard_oldest = g->graveyard_prev; + + LIST_REMOVE(graveyard, g->manager->socket_graveyard, g); + + assert((g->manager->n_socket_graveyard > 0) == !!g->manager->socket_graveyard); + assert((g->manager->n_socket_graveyard > 0) == !!g->manager->socket_graveyard_oldest); + } + + if (g->io_event_source) { + log_debug("Closing graveyard socket fd %i", sd_event_source_get_io_fd(g->io_event_source)); + sd_event_source_unref(g->io_event_source); + } + + return mfree(g); +} + +DEFINE_TRIVIAL_CLEANUP_FUNC(SocketGraveyard*, socket_graveyard_free); + +void manager_socket_graveyard_process(Manager *m) { + usec_t n = USEC_INFINITY; + + assert(m); + + while (m->socket_graveyard_oldest) { + SocketGraveyard *g = m->socket_graveyard_oldest; + + if (n == USEC_INFINITY) + assert_se(sd_event_now(m->event, clock_boottime_or_monotonic(), &n) >= 0); + + if (g->deadline > n) + break; + + socket_graveyard_free(g); + } +} + +void manager_socket_graveyard_clear(Manager *m) { + assert(m); + + while (m->socket_graveyard) + socket_graveyard_free(m->socket_graveyard); +} + +static int on_io_event(sd_event_source *s, int fd, uint32_t revents, void *userdata) { + SocketGraveyard *g = userdata; + + assert(g); + + /* An IO event happened on the graveyard fd. We don't actually care which event that is, and we don't + * read any incoming packet off the socket. We just close the fd, that's enough to not trigger the + * ICMP unreachable port event */ + + socket_graveyard_free(g); + return 0; +} + +static void manager_socket_graveyard_make_room(Manager *m) { + assert(m); + + while (m->n_socket_graveyard >= SOCKET_GRAVEYARD_MAX) + socket_graveyard_free(m->socket_graveyard_oldest); +} + +int manager_add_socket_to_graveyard(Manager *m, int fd) { + _cleanup_(socket_graveyard_freep) SocketGraveyard *g = NULL; + int r; + + assert(m); + assert(fd >= 0); + + manager_socket_graveyard_process(m); + manager_socket_graveyard_make_room(m); + + g = new(SocketGraveyard, 1); + if (!g) + return log_oom(); + + *g = (SocketGraveyard) { + .manager = m, + }; + + LIST_PREPEND(graveyard, m->socket_graveyard, g); + if (!m->socket_graveyard_oldest) + m->socket_graveyard_oldest = g; + + m->n_socket_graveyard++; + + assert_se(sd_event_now(m->event, clock_boottime_or_monotonic(), &g->deadline) >= 0); + g->deadline += SOCKET_GRAVEYARD_USEC; + + r = sd_event_add_io(m->event, &g->io_event_source, fd, EPOLLIN, on_io_event, g); + if (r < 0) + return log_error_errno(r, "Failed to create graveyard IO source: %m"); + + r = sd_event_source_set_io_fd_own(g->io_event_source, true); + if (r < 0) + return log_error_errno(r, "Failed to enable graveyard IO source fd ownership: %m"); + + (void) sd_event_source_set_description(g->io_event_source, "graveyard"); + + log_debug("Added socket %i to graveyard", fd); + + TAKE_PTR(g); + return 0; +} diff --git a/src/resolve/resolved-socket-graveyard.h b/src/resolve/resolved-socket-graveyard.h new file mode 100644 index 0000000000..9b13bb0482 --- /dev/null +++ b/src/resolve/resolved-socket-graveyard.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +typedef struct SocketGraveyard SocketGraveyard; + +#include "resolved-manager.h" + +struct SocketGraveyard { + Manager *manager; + usec_t deadline; + sd_event_source *io_event_source; + LIST_FIELDS(SocketGraveyard, graveyard); +}; + +void manager_socket_graveyard_process(Manager *m); +void manager_socket_graveyard_clear(Manager *m); + +int manager_add_socket_to_graveyard(Manager *m, int fd);