parent
f5a58ed0fe
commit
b5c8a0a948
@ -0,0 +1,193 @@
|
||||
From f3aa755ba5ae5148dd0567357f8c538072e2eabc Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?Ond=C5=99ej=20Sur=C3=BD?= <ondrej@isc.org>
|
||||
Date: Tue, 30 May 2023 08:46:17 +0200
|
||||
Subject: [PATCH] Improve RBT overmem cache cleaning
|
||||
|
||||
When cache memory usage is over the configured cache size (overmem) and
|
||||
we are cleaning unused entries, it might not be enough to clean just two
|
||||
entries if the entries to be expired are smaller than the newly added
|
||||
rdata. This could be abused by an attacker to cause a remote Denial of
|
||||
Service by possibly running out of the operating system memory.
|
||||
|
||||
Currently, the addrdataset() tries to do a single TTL-based cleaning
|
||||
considering the serve-stale TTL and then optionally moves to overmem
|
||||
cleaning if we are in that condition. Then the overmem_purge() tries to
|
||||
do another single TTL based cleaning from the TTL heap and then continue
|
||||
with LRU-based cleaning up to 2 entries cleaned.
|
||||
|
||||
Squash the TTL-cleaning mechanism into single call from addrdataset(),
|
||||
but ignore the serve-stale TTL if we are currently overmem.
|
||||
|
||||
Then instead of having a fixed number of entries to clean, pass the size
|
||||
of newly added rdatasetheader to the overmem_purge() function and
|
||||
cleanup at least the size of the newly added data. This prevents the
|
||||
cache going over the configured memory limit (`max-cache-size`).
|
||||
|
||||
Additionally, refactor the overmem_purge() function to reduce for-loop
|
||||
nesting for readability.
|
||||
---
|
||||
lib/dns/rbtdb.c | 109 +++++++++++++++++++++++++++++-------------------
|
||||
1 file changed, 67 insertions(+), 42 deletions(-)
|
||||
|
||||
diff --git a/lib/dns/rbtdb.c b/lib/dns/rbtdb.c
|
||||
index 11203e4..cc40eae 100644
|
||||
--- a/lib/dns/rbtdb.c
|
||||
+++ b/lib/dns/rbtdb.c
|
||||
@@ -834,7 +834,7 @@ static void update_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
|
||||
static void expire_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
|
||||
bool tree_locked, expire_t reason);
|
||||
static void overmem_purge(dns_rbtdb_t *rbtdb, unsigned int locknum_start,
|
||||
- isc_stdtime_t now, bool tree_locked);
|
||||
+ size_t purgesize, bool tree_locked);
|
||||
static isc_result_t resign_insert(dns_rbtdb_t *rbtdb, int idx,
|
||||
rdatasetheader_t *newheader);
|
||||
static void resign_delete(dns_rbtdb_t *rbtdb, rbtdb_version_t *version,
|
||||
@@ -6937,6 +6937,16 @@ addclosest(dns_rbtdb_t *rbtdb, rdatasetheader_t *newheader,
|
||||
|
||||
static dns_dbmethods_t zone_methods;
|
||||
|
||||
+static size_t
|
||||
+rdataset_size(rdatasetheader_t *header) {
|
||||
+ if (!NONEXISTENT(header)) {
|
||||
+ return (dns_rdataslab_size((unsigned char *)header,
|
||||
+ sizeof(*header)));
|
||||
+ }
|
||||
+
|
||||
+ return (sizeof(*header));
|
||||
+}
|
||||
+
|
||||
static isc_result_t
|
||||
addrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
|
||||
isc_stdtime_t now, dns_rdataset_t *rdataset, unsigned int options,
|
||||
@@ -7091,7 +7101,8 @@ addrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
|
||||
}
|
||||
|
||||
if (cache_is_overmem)
|
||||
- overmem_purge(rbtdb, rbtnode->locknum, now, tree_locked);
|
||||
+ overmem_purge(rbtdb, rbtnode->locknum, rdataset_size(newheader),
|
||||
+ tree_locked);
|
||||
|
||||
NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
|
||||
isc_rwlocktype_write);
|
||||
@@ -7106,9 +7117,19 @@ addrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
|
||||
cleanup_dead_nodes(rbtdb, rbtnode->locknum);
|
||||
|
||||
header = isc_heap_element(rbtdb->heaps[rbtnode->locknum], 1);
|
||||
- if (header && header->rdh_ttl < now - RBTDB_VIRTUAL)
|
||||
- expire_header(rbtdb, header, tree_locked,
|
||||
- expire_ttl);
|
||||
+ if (header != NULL) {
|
||||
+ dns_ttl_t rdh_ttl = header->rdh_ttl;
|
||||
+
|
||||
+ /* Only account for stale TTL if cache is not overmem */
|
||||
+ if (!cache_is_overmem) {
|
||||
+ rdh_ttl += rbtdb->serve_stale_ttl;
|
||||
+ }
|
||||
+
|
||||
+ if (rdh_ttl < now - RBTDB_VIRTUAL) {
|
||||
+ expire_header(rbtdb, header, tree_locked,
|
||||
+ expire_ttl);
|
||||
+ }
|
||||
+ }
|
||||
|
||||
/*
|
||||
* If we've been holding a write lock on the tree just for
|
||||
@@ -10643,54 +10664,58 @@ update_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
|
||||
ISC_LIST_PREPEND(rbtdb->rdatasets[header->node->locknum], header, link);
|
||||
}
|
||||
|
||||
+static size_t
|
||||
+expire_lru_headers(dns_rbtdb_t *rbtdb, unsigned int locknum, size_t purgesize,
|
||||
+ bool tree_locked) {
|
||||
+ rdatasetheader_t *header, *header_prev;
|
||||
+ size_t purged = 0;
|
||||
+
|
||||
+ for (header = ISC_LIST_TAIL(rbtdb->rdatasets[locknum]);
|
||||
+ header != NULL && purged <= purgesize; header = header_prev)
|
||||
+ {
|
||||
+ header_prev = ISC_LIST_PREV(header, link);
|
||||
+ /*
|
||||
+ * Unlink the entry at this point to avoid checking it
|
||||
+ * again even if it's currently used someone else and
|
||||
+ * cannot be purged at this moment. This entry won't be
|
||||
+ * referenced any more (so unlinking is safe) since the
|
||||
+ * TTL was reset to 0.
|
||||
+ */
|
||||
+ ISC_LIST_UNLINK(rbtdb->rdatasets[locknum], header, link);
|
||||
+ size_t header_size = rdataset_size(header);
|
||||
+ expire_header(rbtdb, header, tree_locked, expire_lru);
|
||||
+ purged += header_size;
|
||||
+ }
|
||||
+
|
||||
+ return (purged);
|
||||
+}
|
||||
+
|
||||
/*%
|
||||
- * Purge some expired and/or stale (i.e. unused for some period) cache entries
|
||||
- * under an overmem condition. To recover from this condition quickly, up to
|
||||
- * 2 entries will be purged. This process is triggered while adding a new
|
||||
- * entry, and we specifically avoid purging entries in the same LRU bucket as
|
||||
- * the one to which the new entry will belong. Otherwise, we might purge
|
||||
- * entries of the same name of different RR types while adding RRsets from a
|
||||
- * single response (consider the case where we're adding A and AAAA glue records
|
||||
- * of the same NS name).
|
||||
+ * Purge some stale (i.e. unused for some period - LRU based cleaning) cache
|
||||
+ * entries under the overmem condition. To recover from this condition quickly,
|
||||
+ * we cleanup entries up to the size of newly added rdata (passed as purgesize).
|
||||
+ *
|
||||
+ * This process is triggered while adding a new entry, and we specifically avoid
|
||||
+ * purging entries in the same LRU bucket as the one to which the new entry will
|
||||
+ * belong. Otherwise, we might purge entries of the same name of different RR
|
||||
+ * types while adding RRsets from a single response (consider the case where
|
||||
+ * we're adding A and AAAA glue records of the same NS name).
|
||||
*/
|
||||
static void
|
||||
-overmem_purge(dns_rbtdb_t *rbtdb, unsigned int locknum_start,
|
||||
- isc_stdtime_t now, bool tree_locked)
|
||||
+overmem_purge(dns_rbtdb_t *rbtdb, unsigned int locknum_start, size_t purgesize,
|
||||
+ bool tree_locked)
|
||||
{
|
||||
- rdatasetheader_t *header, *header_prev;
|
||||
unsigned int locknum;
|
||||
- int purgecount = 2;
|
||||
+ size_t purged = 0;
|
||||
|
||||
for (locknum = (locknum_start + 1) % rbtdb->node_lock_count;
|
||||
- locknum != locknum_start && purgecount > 0;
|
||||
+ locknum != locknum_start && purged <= purgesize;
|
||||
locknum = (locknum + 1) % rbtdb->node_lock_count) {
|
||||
NODE_LOCK(&rbtdb->node_locks[locknum].lock,
|
||||
isc_rwlocktype_write);
|
||||
|
||||
- header = isc_heap_element(rbtdb->heaps[locknum], 1);
|
||||
- if (header && header->rdh_ttl < now - RBTDB_VIRTUAL) {
|
||||
- expire_header(rbtdb, header, tree_locked,
|
||||
- expire_ttl);
|
||||
- purgecount--;
|
||||
- }
|
||||
-
|
||||
- for (header = ISC_LIST_TAIL(rbtdb->rdatasets[locknum]);
|
||||
- header != NULL && purgecount > 0;
|
||||
- header = header_prev) {
|
||||
- header_prev = ISC_LIST_PREV(header, link);
|
||||
- /*
|
||||
- * Unlink the entry at this point to avoid checking it
|
||||
- * again even if it's currently used someone else and
|
||||
- * cannot be purged at this moment. This entry won't be
|
||||
- * referenced any more (so unlinking is safe) since the
|
||||
- * TTL was reset to 0.
|
||||
- */
|
||||
- ISC_LIST_UNLINK(rbtdb->rdatasets[locknum], header,
|
||||
- link);
|
||||
- expire_header(rbtdb, header, tree_locked,
|
||||
- expire_lru);
|
||||
- purgecount--;
|
||||
- }
|
||||
+ purged += expire_lru_headers(rbtdb, locknum, purgesize - purged,
|
||||
+ tree_locked);
|
||||
|
||||
NODE_UNLOCK(&rbtdb->node_locks[locknum].lock,
|
||||
isc_rwlocktype_write);
|
||||
--
|
||||
2.40.1
|
||||
|
Loading…
Reference in new issue