You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
194 lines
7.5 KiB
194 lines
7.5 KiB
8 months ago
|
From f3aa755ba5ae5148dd0567357f8c538072e2eabc Mon Sep 17 00:00:00 2001
|
||
|
From: =?UTF-8?q?Ond=C5=99ej=20Sur=C3=BD?= <ondrej@isc.org>
|
||
|
Date: Tue, 30 May 2023 08:46:17 +0200
|
||
|
Subject: [PATCH] Improve RBT overmem cache cleaning
|
||
|
|
||
|
When cache memory usage is over the configured cache size (overmem) and
|
||
|
we are cleaning unused entries, it might not be enough to clean just two
|
||
|
entries if the entries to be expired are smaller than the newly added
|
||
|
rdata. This could be abused by an attacker to cause a remote Denial of
|
||
|
Service by possibly running out of the operating system memory.
|
||
|
|
||
|
Currently, the addrdataset() tries to do a single TTL-based cleaning
|
||
|
considering the serve-stale TTL and then optionally moves to overmem
|
||
|
cleaning if we are in that condition. Then the overmem_purge() tries to
|
||
|
do another single TTL based cleaning from the TTL heap and then continue
|
||
|
with LRU-based cleaning up to 2 entries cleaned.
|
||
|
|
||
|
Squash the TTL-cleaning mechanism into single call from addrdataset(),
|
||
|
but ignore the serve-stale TTL if we are currently overmem.
|
||
|
|
||
|
Then instead of having a fixed number of entries to clean, pass the size
|
||
|
of newly added rdatasetheader to the overmem_purge() function and
|
||
|
cleanup at least the size of the newly added data. This prevents the
|
||
|
cache going over the configured memory limit (`max-cache-size`).
|
||
|
|
||
|
Additionally, refactor the overmem_purge() function to reduce for-loop
|
||
|
nesting for readability.
|
||
|
---
|
||
|
lib/dns/rbtdb.c | 109 +++++++++++++++++++++++++++++-------------------
|
||
|
1 file changed, 67 insertions(+), 42 deletions(-)
|
||
|
|
||
|
diff --git a/lib/dns/rbtdb.c b/lib/dns/rbtdb.c
|
||
|
index 11203e4..cc40eae 100644
|
||
|
--- a/lib/dns/rbtdb.c
|
||
|
+++ b/lib/dns/rbtdb.c
|
||
|
@@ -834,7 +834,7 @@ static void update_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
|
||
|
static void expire_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
|
||
|
bool tree_locked, expire_t reason);
|
||
|
static void overmem_purge(dns_rbtdb_t *rbtdb, unsigned int locknum_start,
|
||
|
- isc_stdtime_t now, bool tree_locked);
|
||
|
+ size_t purgesize, bool tree_locked);
|
||
|
static isc_result_t resign_insert(dns_rbtdb_t *rbtdb, int idx,
|
||
|
rdatasetheader_t *newheader);
|
||
|
static void resign_delete(dns_rbtdb_t *rbtdb, rbtdb_version_t *version,
|
||
|
@@ -6937,6 +6937,16 @@ addclosest(dns_rbtdb_t *rbtdb, rdatasetheader_t *newheader,
|
||
|
|
||
|
static dns_dbmethods_t zone_methods;
|
||
|
|
||
|
+static size_t
|
||
|
+rdataset_size(rdatasetheader_t *header) {
|
||
|
+ if (!NONEXISTENT(header)) {
|
||
|
+ return (dns_rdataslab_size((unsigned char *)header,
|
||
|
+ sizeof(*header)));
|
||
|
+ }
|
||
|
+
|
||
|
+ return (sizeof(*header));
|
||
|
+}
|
||
|
+
|
||
|
static isc_result_t
|
||
|
addrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
|
||
|
isc_stdtime_t now, dns_rdataset_t *rdataset, unsigned int options,
|
||
|
@@ -7091,7 +7101,8 @@ addrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
|
||
|
}
|
||
|
|
||
|
if (cache_is_overmem)
|
||
|
- overmem_purge(rbtdb, rbtnode->locknum, now, tree_locked);
|
||
|
+ overmem_purge(rbtdb, rbtnode->locknum, rdataset_size(newheader),
|
||
|
+ tree_locked);
|
||
|
|
||
|
NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
|
||
|
isc_rwlocktype_write);
|
||
|
@@ -7106,9 +7117,19 @@ addrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
|
||
|
cleanup_dead_nodes(rbtdb, rbtnode->locknum);
|
||
|
|
||
|
header = isc_heap_element(rbtdb->heaps[rbtnode->locknum], 1);
|
||
|
- if (header && header->rdh_ttl < now - RBTDB_VIRTUAL)
|
||
|
- expire_header(rbtdb, header, tree_locked,
|
||
|
- expire_ttl);
|
||
|
+ if (header != NULL) {
|
||
|
+ dns_ttl_t rdh_ttl = header->rdh_ttl;
|
||
|
+
|
||
|
+ /* Only account for stale TTL if cache is not overmem */
|
||
|
+ if (!cache_is_overmem) {
|
||
|
+ rdh_ttl += rbtdb->serve_stale_ttl;
|
||
|
+ }
|
||
|
+
|
||
|
+ if (rdh_ttl < now - RBTDB_VIRTUAL) {
|
||
|
+ expire_header(rbtdb, header, tree_locked,
|
||
|
+ expire_ttl);
|
||
|
+ }
|
||
|
+ }
|
||
|
|
||
|
/*
|
||
|
* If we've been holding a write lock on the tree just for
|
||
|
@@ -10643,54 +10664,58 @@ update_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
|
||
|
ISC_LIST_PREPEND(rbtdb->rdatasets[header->node->locknum], header, link);
|
||
|
}
|
||
|
|
||
|
+static size_t
|
||
|
+expire_lru_headers(dns_rbtdb_t *rbtdb, unsigned int locknum, size_t purgesize,
|
||
|
+ bool tree_locked) {
|
||
|
+ rdatasetheader_t *header, *header_prev;
|
||
|
+ size_t purged = 0;
|
||
|
+
|
||
|
+ for (header = ISC_LIST_TAIL(rbtdb->rdatasets[locknum]);
|
||
|
+ header != NULL && purged <= purgesize; header = header_prev)
|
||
|
+ {
|
||
|
+ header_prev = ISC_LIST_PREV(header, link);
|
||
|
+ /*
|
||
|
+ * Unlink the entry at this point to avoid checking it
|
||
|
+ * again even if it's currently used someone else and
|
||
|
+ * cannot be purged at this moment. This entry won't be
|
||
|
+ * referenced any more (so unlinking is safe) since the
|
||
|
+ * TTL was reset to 0.
|
||
|
+ */
|
||
|
+ ISC_LIST_UNLINK(rbtdb->rdatasets[locknum], header, link);
|
||
|
+ size_t header_size = rdataset_size(header);
|
||
|
+ expire_header(rbtdb, header, tree_locked, expire_lru);
|
||
|
+ purged += header_size;
|
||
|
+ }
|
||
|
+
|
||
|
+ return (purged);
|
||
|
+}
|
||
|
+
|
||
|
/*%
|
||
|
- * Purge some expired and/or stale (i.e. unused for some period) cache entries
|
||
|
- * under an overmem condition. To recover from this condition quickly, up to
|
||
|
- * 2 entries will be purged. This process is triggered while adding a new
|
||
|
- * entry, and we specifically avoid purging entries in the same LRU bucket as
|
||
|
- * the one to which the new entry will belong. Otherwise, we might purge
|
||
|
- * entries of the same name of different RR types while adding RRsets from a
|
||
|
- * single response (consider the case where we're adding A and AAAA glue records
|
||
|
- * of the same NS name).
|
||
|
+ * Purge some stale (i.e. unused for some period - LRU based cleaning) cache
|
||
|
+ * entries under the overmem condition. To recover from this condition quickly,
|
||
|
+ * we cleanup entries up to the size of newly added rdata (passed as purgesize).
|
||
|
+ *
|
||
|
+ * This process is triggered while adding a new entry, and we specifically avoid
|
||
|
+ * purging entries in the same LRU bucket as the one to which the new entry will
|
||
|
+ * belong. Otherwise, we might purge entries of the same name of different RR
|
||
|
+ * types while adding RRsets from a single response (consider the case where
|
||
|
+ * we're adding A and AAAA glue records of the same NS name).
|
||
|
*/
|
||
|
static void
|
||
|
-overmem_purge(dns_rbtdb_t *rbtdb, unsigned int locknum_start,
|
||
|
- isc_stdtime_t now, bool tree_locked)
|
||
|
+overmem_purge(dns_rbtdb_t *rbtdb, unsigned int locknum_start, size_t purgesize,
|
||
|
+ bool tree_locked)
|
||
|
{
|
||
|
- rdatasetheader_t *header, *header_prev;
|
||
|
unsigned int locknum;
|
||
|
- int purgecount = 2;
|
||
|
+ size_t purged = 0;
|
||
|
|
||
|
for (locknum = (locknum_start + 1) % rbtdb->node_lock_count;
|
||
|
- locknum != locknum_start && purgecount > 0;
|
||
|
+ locknum != locknum_start && purged <= purgesize;
|
||
|
locknum = (locknum + 1) % rbtdb->node_lock_count) {
|
||
|
NODE_LOCK(&rbtdb->node_locks[locknum].lock,
|
||
|
isc_rwlocktype_write);
|
||
|
|
||
|
- header = isc_heap_element(rbtdb->heaps[locknum], 1);
|
||
|
- if (header && header->rdh_ttl < now - RBTDB_VIRTUAL) {
|
||
|
- expire_header(rbtdb, header, tree_locked,
|
||
|
- expire_ttl);
|
||
|
- purgecount--;
|
||
|
- }
|
||
|
-
|
||
|
- for (header = ISC_LIST_TAIL(rbtdb->rdatasets[locknum]);
|
||
|
- header != NULL && purgecount > 0;
|
||
|
- header = header_prev) {
|
||
|
- header_prev = ISC_LIST_PREV(header, link);
|
||
|
- /*
|
||
|
- * Unlink the entry at this point to avoid checking it
|
||
|
- * again even if it's currently used someone else and
|
||
|
- * cannot be purged at this moment. This entry won't be
|
||
|
- * referenced any more (so unlinking is safe) since the
|
||
|
- * TTL was reset to 0.
|
||
|
- */
|
||
|
- ISC_LIST_UNLINK(rbtdb->rdatasets[locknum], header,
|
||
|
- link);
|
||
|
- expire_header(rbtdb, header, tree_locked,
|
||
|
- expire_lru);
|
||
|
- purgecount--;
|
||
|
- }
|
||
|
+ purged += expire_lru_headers(rbtdb, locknum, purgesize - purged,
|
||
|
+ tree_locked);
|
||
|
|
||
|
NODE_UNLOCK(&rbtdb->node_locks[locknum].lock,
|
||
|
isc_rwlocktype_write);
|
||
|
--
|
||
|
2.40.1
|
||
|
|