parent
2b068456f8
commit
ee32dc2838
@ -0,0 +1,192 @@
|
|||||||
|
From ed920ea2ae1cc1214b42b82a5149758dbec941a5 Mon Sep 17 00:00:00 2001
|
||||||
|
From: =?UTF-8?q?Ond=C5=99ej=20Sur=C3=BD?= <ondrej@isc.org>
|
||||||
|
Date: Tue, 30 May 2023 08:46:17 +0200
|
||||||
|
Subject: [PATCH] Improve RBT overmem cache cleaning
|
||||||
|
|
||||||
|
When cache memory usage is over the configured cache size (overmem) and
|
||||||
|
we are cleaning unused entries, it might not be enough to clean just two
|
||||||
|
entries if the entries to be expired are smaller than the newly added
|
||||||
|
rdata. This could be abused by an attacker to cause a remote Denial of
|
||||||
|
Service by possibly running out of the operating system memory.
|
||||||
|
|
||||||
|
Currently, the addrdataset() tries to do a single TTL-based cleaning
|
||||||
|
considering the serve-stale TTL and then optionally moves to overmem
|
||||||
|
cleaning if we are in that condition. Then the overmem_purge() tries to
|
||||||
|
do another single TTL based cleaning from the TTL heap and then continue
|
||||||
|
with LRU-based cleaning up to 2 entries cleaned.
|
||||||
|
|
||||||
|
Squash the TTL-cleaning mechanism into single call from addrdataset(),
|
||||||
|
but ignore the serve-stale TTL if we are currently overmem.
|
||||||
|
|
||||||
|
Then instead of having a fixed number of entries to clean, pass the size
|
||||||
|
of newly added rdatasetheader to the overmem_purge() function and
|
||||||
|
cleanup at least the size of the newly added data. This prevents the
|
||||||
|
cache going over the configured memory limit (`max-cache-size`).
|
||||||
|
|
||||||
|
Additionally, refactor the overmem_purge() function to reduce for-loop
|
||||||
|
nesting for readability.
|
||||||
|
|
||||||
|
(cherry picked from commit f1d9e9ee3859976f403914d20ad2a10855343702)
|
||||||
|
---
|
||||||
|
lib/dns/rbtdb.c | 105 ++++++++++++++++++++++++++++++------------------
|
||||||
|
1 file changed, 65 insertions(+), 40 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/lib/dns/rbtdb.c b/lib/dns/rbtdb.c
|
||||||
|
index 51178cc877..75f97f5550 100644
|
||||||
|
--- a/lib/dns/rbtdb.c
|
||||||
|
+++ b/lib/dns/rbtdb.c
|
||||||
|
@@ -599,7 +599,7 @@ static void
|
||||||
|
expire_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header, bool tree_locked,
|
||||||
|
expire_t reason);
|
||||||
|
static void
|
||||||
|
-overmem_purge(dns_rbtdb_t *rbtdb, unsigned int locknum_start, isc_stdtime_t now,
|
||||||
|
+overmem_purge(dns_rbtdb_t *rbtdb, unsigned int locknum_start, size_t purgesize,
|
||||||
|
bool tree_locked);
|
||||||
|
static isc_result_t
|
||||||
|
resign_insert(dns_rbtdb_t *rbtdb, int idx, rdatasetheader_t *newheader);
|
||||||
|
@@ -6802,6 +6802,16 @@ cleanup:
|
||||||
|
|
||||||
|
static dns_dbmethods_t zone_methods;
|
||||||
|
|
||||||
|
+static size_t
|
||||||
|
+rdataset_size(rdatasetheader_t *header) {
|
||||||
|
+ if (!NONEXISTENT(header)) {
|
||||||
|
+ return (dns_rdataslab_size((unsigned char *)header,
|
||||||
|
+ sizeof(*header)));
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ return (sizeof(*header));
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
static isc_result_t
|
||||||
|
addrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
|
||||||
|
isc_stdtime_t now, dns_rdataset_t *rdataset, unsigned int options,
|
||||||
|
@@ -6965,7 +6975,8 @@ addrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
|
||||||
|
}
|
||||||
|
|
||||||
|
if (cache_is_overmem) {
|
||||||
|
- overmem_purge(rbtdb, rbtnode->locknum, now, tree_locked);
|
||||||
|
+ overmem_purge(rbtdb, rbtnode->locknum, rdataset_size(newheader),
|
||||||
|
+ tree_locked);
|
||||||
|
}
|
||||||
|
|
||||||
|
NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
|
||||||
|
@@ -6984,10 +6995,18 @@ addrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
|
||||||
|
}
|
||||||
|
|
||||||
|
header = isc_heap_element(rbtdb->heaps[rbtnode->locknum], 1);
|
||||||
|
- if (header != NULL && header->rdh_ttl + rbtdb->serve_stale_ttl <
|
||||||
|
- now - RBTDB_VIRTUAL)
|
||||||
|
- {
|
||||||
|
- expire_header(rbtdb, header, tree_locked, expire_ttl);
|
||||||
|
+ if (header != NULL) {
|
||||||
|
+ dns_ttl_t rdh_ttl = header->rdh_ttl;
|
||||||
|
+
|
||||||
|
+ /* Only account for stale TTL if cache is not overmem */
|
||||||
|
+ if (!cache_is_overmem) {
|
||||||
|
+ rdh_ttl += rbtdb->serve_stale_ttl;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ if (rdh_ttl < now - RBTDB_VIRTUAL) {
|
||||||
|
+ expire_header(rbtdb, header, tree_locked,
|
||||||
|
+ expire_ttl);
|
||||||
|
+ }
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
@@ -10531,52 +10550,58 @@ update_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header, isc_stdtime_t now) {
|
||||||
|
ISC_LIST_PREPEND(rbtdb->rdatasets[header->node->locknum], header, link);
|
||||||
|
}
|
||||||
|
|
||||||
|
+static size_t
|
||||||
|
+expire_lru_headers(dns_rbtdb_t *rbtdb, unsigned int locknum, size_t purgesize,
|
||||||
|
+ bool tree_locked) {
|
||||||
|
+ rdatasetheader_t *header, *header_prev;
|
||||||
|
+ size_t purged = 0;
|
||||||
|
+
|
||||||
|
+ for (header = ISC_LIST_TAIL(rbtdb->rdatasets[locknum]);
|
||||||
|
+ header != NULL && purged <= purgesize; header = header_prev)
|
||||||
|
+ {
|
||||||
|
+ header_prev = ISC_LIST_PREV(header, link);
|
||||||
|
+ /*
|
||||||
|
+ * Unlink the entry at this point to avoid checking it
|
||||||
|
+ * again even if it's currently used someone else and
|
||||||
|
+ * cannot be purged at this moment. This entry won't be
|
||||||
|
+ * referenced any more (so unlinking is safe) since the
|
||||||
|
+ * TTL was reset to 0.
|
||||||
|
+ */
|
||||||
|
+ ISC_LIST_UNLINK(rbtdb->rdatasets[locknum], header, link);
|
||||||
|
+ size_t header_size = rdataset_size(header);
|
||||||
|
+ expire_header(rbtdb, header, tree_locked, expire_lru);
|
||||||
|
+ purged += header_size;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ return (purged);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
/*%
|
||||||
|
- * Purge some expired and/or stale (i.e. unused for some period) cache entries
|
||||||
|
- * under an overmem condition. To recover from this condition quickly, up to
|
||||||
|
- * 2 entries will be purged. This process is triggered while adding a new
|
||||||
|
- * entry, and we specifically avoid purging entries in the same LRU bucket as
|
||||||
|
- * the one to which the new entry will belong. Otherwise, we might purge
|
||||||
|
- * entries of the same name of different RR types while adding RRsets from a
|
||||||
|
- * single response (consider the case where we're adding A and AAAA glue records
|
||||||
|
- * of the same NS name).
|
||||||
|
+ * Purge some stale (i.e. unused for some period - LRU based cleaning) cache
|
||||||
|
+ * entries under the overmem condition. To recover from this condition quickly,
|
||||||
|
+ * we cleanup entries up to the size of newly added rdata (passed as purgesize).
|
||||||
|
+ *
|
||||||
|
+ * This process is triggered while adding a new entry, and we specifically avoid
|
||||||
|
+ * purging entries in the same LRU bucket as the one to which the new entry will
|
||||||
|
+ * belong. Otherwise, we might purge entries of the same name of different RR
|
||||||
|
+ * types while adding RRsets from a single response (consider the case where
|
||||||
|
+ * we're adding A and AAAA glue records of the same NS name).
|
||||||
|
*/
|
||||||
|
static void
|
||||||
|
-overmem_purge(dns_rbtdb_t *rbtdb, unsigned int locknum_start, isc_stdtime_t now,
|
||||||
|
+overmem_purge(dns_rbtdb_t *rbtdb, unsigned int locknum_start, size_t purgesize,
|
||||||
|
bool tree_locked) {
|
||||||
|
- rdatasetheader_t *header, *header_prev;
|
||||||
|
unsigned int locknum;
|
||||||
|
- int purgecount = 2;
|
||||||
|
+ size_t purged = 0;
|
||||||
|
|
||||||
|
for (locknum = (locknum_start + 1) % rbtdb->node_lock_count;
|
||||||
|
- locknum != locknum_start && purgecount > 0;
|
||||||
|
+ locknum != locknum_start && purged <= purgesize;
|
||||||
|
locknum = (locknum + 1) % rbtdb->node_lock_count)
|
||||||
|
{
|
||||||
|
NODE_LOCK(&rbtdb->node_locks[locknum].lock,
|
||||||
|
isc_rwlocktype_write);
|
||||||
|
|
||||||
|
- header = isc_heap_element(rbtdb->heaps[locknum], 1);
|
||||||
|
- if (header && header->rdh_ttl < now - RBTDB_VIRTUAL) {
|
||||||
|
- expire_header(rbtdb, header, tree_locked, expire_ttl);
|
||||||
|
- purgecount--;
|
||||||
|
- }
|
||||||
|
-
|
||||||
|
- for (header = ISC_LIST_TAIL(rbtdb->rdatasets[locknum]);
|
||||||
|
- header != NULL && purgecount > 0; header = header_prev)
|
||||||
|
- {
|
||||||
|
- header_prev = ISC_LIST_PREV(header, link);
|
||||||
|
- /*
|
||||||
|
- * Unlink the entry at this point to avoid checking it
|
||||||
|
- * again even if it's currently used someone else and
|
||||||
|
- * cannot be purged at this moment. This entry won't be
|
||||||
|
- * referenced any more (so unlinking is safe) since the
|
||||||
|
- * TTL was reset to 0.
|
||||||
|
- */
|
||||||
|
- ISC_LIST_UNLINK(rbtdb->rdatasets[locknum], header,
|
||||||
|
- link);
|
||||||
|
- expire_header(rbtdb, header, tree_locked, expire_lru);
|
||||||
|
- purgecount--;
|
||||||
|
- }
|
||||||
|
+ purged += expire_lru_headers(rbtdb, locknum, purgesize - purged,
|
||||||
|
+ tree_locked);
|
||||||
|
|
||||||
|
NODE_UNLOCK(&rbtdb->node_locks[locknum].lock,
|
||||||
|
isc_rwlocktype_write);
|
||||||
|
--
|
||||||
|
2.40.1
|
||||||
|
|
@ -0,0 +1,37 @@
|
|||||||
|
From e73ecbf039c3b2cd33dd2926691a8a346c9ca574 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Matthijs Mekking <matthijs@isc.org>
|
||||||
|
Date: Tue, 2 Aug 2022 14:21:40 +0200
|
||||||
|
Subject: [PATCH 1/3] Don't enable serve-stale on duplicate queries
|
||||||
|
|
||||||
|
When checking if we should enable serve-stale, add an early out case
|
||||||
|
when the result is an error signalling a duplicate query or a query
|
||||||
|
that would be dropped.
|
||||||
|
|
||||||
|
(cherry picked from commit 059a4c2f4d9d3cff371842f43208d021509314fa)
|
||||||
|
(cherry picked from commit dd7dde5743715dc0dec2defbb92b1a8637977bf9)
|
||||||
|
---
|
||||||
|
lib/ns/query.c | 8 ++++++++
|
||||||
|
1 file changed, 8 insertions(+)
|
||||||
|
|
||||||
|
diff --git a/lib/ns/query.c b/lib/ns/query.c
|
||||||
|
index 4f61374e2c..c169e22bf4 100644
|
||||||
|
--- a/lib/ns/query.c
|
||||||
|
+++ b/lib/ns/query.c
|
||||||
|
@@ -7205,6 +7205,14 @@ query_usestale(query_ctx_t *qctx, isc_result_t result) {
|
||||||
|
return (false);
|
||||||
|
}
|
||||||
|
|
||||||
|
+ if (result == DNS_R_DUPLICATE || result == DNS_R_DROP) {
|
||||||
|
+ /*
|
||||||
|
+ * Don't enable serve-stale if the result signals a duplicate
|
||||||
|
+ * query or query that is being dropped.
|
||||||
|
+ */
|
||||||
|
+ return (false);
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
qctx_clean(qctx);
|
||||||
|
qctx_freedata(qctx);
|
||||||
|
|
||||||
|
--
|
||||||
|
2.40.1
|
||||||
|
|
@ -0,0 +1,72 @@
|
|||||||
|
From 589c06568e3036bfe713d42b53c8e88005ce17e4 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Evan Hunt <each@isc.org>
|
||||||
|
Date: Thu, 25 May 2023 23:53:50 -0700
|
||||||
|
Subject: [PATCH 2/3] Stale answer lookups could loop when over recursion quota
|
||||||
|
|
||||||
|
When a query was aborted because of the recursion quota being exceeded,
|
||||||
|
but triggered a stale answer response and a stale data refresh query,
|
||||||
|
it could cause named to loop back where we are iterating and following
|
||||||
|
a delegation. Having no good answer in cache, we would fall back to
|
||||||
|
using serve-stale again, use the stale data, try to refresh the RRset,
|
||||||
|
and loop back again, without ever terminating until crashing due to
|
||||||
|
stack overflow.
|
||||||
|
|
||||||
|
This happens because in the functions 'query_notfound()' and
|
||||||
|
'query_delegation_recurse()', we check whether we can fall back to
|
||||||
|
serving stale data. We shouldn't do so if we are already refreshing
|
||||||
|
an RRset due to having prioritized stale data in cache.
|
||||||
|
|
||||||
|
In other words, we need to add an extra check to 'query_usestale()' to
|
||||||
|
disallow serving stale data if we are currently refreshing a stale
|
||||||
|
RRset.
|
||||||
|
|
||||||
|
As an additional mitigation to prevent looping, we now use the result
|
||||||
|
code ISC_R_ALREADYRUNNING rather than ISC_R_FAILURE when a recursion
|
||||||
|
loop is encountered, and we check for that condition in
|
||||||
|
'query_usestale()' as well.
|
||||||
|
|
||||||
|
(cherry picked from commit 0101e28f91fb36b6a16a0049d3b3e2b7846f23f0)
|
||||||
|
---
|
||||||
|
lib/ns/query.c | 17 ++++++++++++++---
|
||||||
|
1 file changed, 14 insertions(+), 3 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/lib/ns/query.c b/lib/ns/query.c
|
||||||
|
index c169e22bf4..1eb662ea4d 100644
|
||||||
|
--- a/lib/ns/query.c
|
||||||
|
+++ b/lib/ns/query.c
|
||||||
|
@@ -6229,7 +6229,7 @@ ns_query_recurse(ns_client_t *client, dns_rdatatype_t qtype, dns_name_t *qname,
|
||||||
|
if (recparam_match(&client->query.recparam, qtype, qname, qdomain)) {
|
||||||
|
ns_client_log(client, NS_LOGCATEGORY_CLIENT, NS_LOGMODULE_QUERY,
|
||||||
|
ISC_LOG_INFO, "recursion loop detected");
|
||||||
|
- return (ISC_R_FAILURE);
|
||||||
|
+ return (ISC_R_ALREADYRUNNING);
|
||||||
|
}
|
||||||
|
|
||||||
|
recparam_update(&client->query.recparam, qtype, qname, qdomain);
|
||||||
|
@@ -7205,10 +7205,21 @@ query_usestale(query_ctx_t *qctx, isc_result_t result) {
|
||||||
|
return (false);
|
||||||
|
}
|
||||||
|
|
||||||
|
- if (result == DNS_R_DUPLICATE || result == DNS_R_DROP) {
|
||||||
|
+ if (qctx->refresh_rrset) {
|
||||||
|
+ /*
|
||||||
|
+ * This is a refreshing query, we have already prioritized
|
||||||
|
+ * stale data, so don't enable serve-stale again.
|
||||||
|
+ */
|
||||||
|
+ return (false);
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ if (result == DNS_R_DUPLICATE || result == DNS_R_DROP ||
|
||||||
|
+ result == ISC_R_ALREADYRUNNING)
|
||||||
|
+ {
|
||||||
|
/*
|
||||||
|
* Don't enable serve-stale if the result signals a duplicate
|
||||||
|
- * query or query that is being dropped.
|
||||||
|
+ * query or a query that is being dropped or can't proceed
|
||||||
|
+ * because of a recursion loop.
|
||||||
|
*/
|
||||||
|
return (false);
|
||||||
|
}
|
||||||
|
--
|
||||||
|
2.40.1
|
||||||
|
|
@ -0,0 +1,60 @@
|
|||||||
|
From c20e9d30bae58d3120aa7c6a0e5dcae0e7e93dbd Mon Sep 17 00:00:00 2001
|
||||||
|
From: Matthijs Mekking <matthijs@isc.org>
|
||||||
|
Date: Thu, 1 Jun 2023 10:03:48 +0200
|
||||||
|
Subject: [PATCH 3/3] Fix serve-stale hang at shutdown
|
||||||
|
|
||||||
|
The 'refresh_rrset' variable is used to determine if we can detach from
|
||||||
|
the client. This can cause a hang on shutdown. To fix this, move setting
|
||||||
|
of the 'nodetach' variable up to where 'refresh_rrset' is set (in
|
||||||
|
query_lookup(), and thus not in ns_query_done()), and set it to false
|
||||||
|
when actually refreshing the RRset, so that when this lookup is
|
||||||
|
completed, the client will be detached.
|
||||||
|
|
||||||
|
(cherry picked from commit c003c5bc3c68f3e513654b6689e1f60280d14844)
|
||||||
|
---
|
||||||
|
lib/ns/query.c | 13 ++++++++-----
|
||||||
|
1 file changed, 8 insertions(+), 5 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/lib/ns/query.c b/lib/ns/query.c
|
||||||
|
index 1eb662ea4d..4fe3e30f45 100644
|
||||||
|
--- a/lib/ns/query.c
|
||||||
|
+++ b/lib/ns/query.c
|
||||||
|
@@ -5644,6 +5644,7 @@ query_refresh_rrset(query_ctx_t *orig_qctx) {
|
||||||
|
qctx.client->query.dboptions &= ~(DNS_DBFIND_STALETIMEOUT |
|
||||||
|
DNS_DBFIND_STALEOK |
|
||||||
|
DNS_DBFIND_STALEENABLED);
|
||||||
|
+ qctx.client->nodetach = false;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We'll need some resources...
|
||||||
|
@@ -5868,7 +5869,14 @@ query_lookup(query_ctx_t *qctx) {
|
||||||
|
"%s stale answer used, an attempt to "
|
||||||
|
"refresh the RRset will still be made",
|
||||||
|
namebuf);
|
||||||
|
+
|
||||||
|
qctx->refresh_rrset = STALE(qctx->rdataset);
|
||||||
|
+
|
||||||
|
+ /*
|
||||||
|
+ * If we are refreshing the RRSet, we must not
|
||||||
|
+ * detach from the client in query_send().
|
||||||
|
+ */
|
||||||
|
+ qctx->client->nodetach = qctx->refresh_rrset;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
/*
|
||||||
|
@@ -11469,12 +11477,7 @@ ns_query_done(query_ctx_t *qctx) {
|
||||||
|
/*
|
||||||
|
* Client may have been detached after query_send(), so
|
||||||
|
* we test and store the flag state here, for safety.
|
||||||
|
- * If we are refreshing the RRSet, we must not detach from the client
|
||||||
|
- * in the query_send(), so we need to override the flag.
|
||||||
|
*/
|
||||||
|
- if (qctx->refresh_rrset) {
|
||||||
|
- qctx->client->nodetach = true;
|
||||||
|
- }
|
||||||
|
nodetach = qctx->client->nodetach;
|
||||||
|
query_send(qctx->client);
|
||||||
|
|
||||||
|
--
|
||||||
|
2.40.1
|
||||||
|
|
Loading…
Reference in new issue