You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
284 lines
9.2 KiB
284 lines
9.2 KiB
9 months ago
|
From 6e08fef24d7ba491228a4083ea0f0e33253a1043 Mon Sep 17 00:00:00 2001
|
||
|
From: Petr Mensik <pemensik@redhat.com>
|
||
|
Date: Mon, 12 Feb 2024 20:48:57 +0100
|
||
|
Subject: [PATCH] Specific recursive query patterns may lead to an
|
||
|
out-of-memory condition
|
||
|
|
||
|
6319. [security] Query patterns that continuously triggered cache
|
||
|
database maintenance could exhaust all available memory
|
||
|
on the host running named. (CVE-2023-6516) [GL #4383]
|
||
|
|
||
|
Resolves: CVE-2023-6516
|
||
|
---
|
||
|
lib/dns/include/dns/rbt.h | 6 ++
|
||
|
lib/dns/mapapi | 2 +-
|
||
|
lib/dns/rbt.c | 1 +
|
||
|
lib/dns/rbtdb.c | 149 +++++++++++++++++++++++++-------------
|
||
|
4 files changed, 107 insertions(+), 51 deletions(-)
|
||
|
|
||
|
diff --git a/lib/dns/include/dns/rbt.h b/lib/dns/include/dns/rbt.h
|
||
|
index b67e602..69655b0 100644
|
||
|
--- a/lib/dns/include/dns/rbt.h
|
||
|
+++ b/lib/dns/include/dns/rbt.h
|
||
|
@@ -164,6 +164,12 @@ struct dns_rbtnode {
|
||
|
uint16_t locknum; /* note that this is not in the bitfield */
|
||
|
isc_refcount_t references;
|
||
|
/*@}*/
|
||
|
+
|
||
|
+ /*%
|
||
|
+ * This linked list is used to store nodes from which tree pruning can
|
||
|
+ * be started.
|
||
|
+ */
|
||
|
+ ISC_LINK(dns_rbtnode_t) prunelink;
|
||
|
};
|
||
|
|
||
|
typedef isc_result_t (*dns_rbtfindcallback_t)(dns_rbtnode_t *node,
|
||
|
diff --git a/lib/dns/mapapi b/lib/dns/mapapi
|
||
|
index 1b502d3..a46e190 100644
|
||
|
--- a/lib/dns/mapapi
|
||
|
+++ b/lib/dns/mapapi
|
||
|
@@ -13,4 +13,4 @@
|
||
|
# Whenever releasing a new major release of BIND9, set this value
|
||
|
# back to 1.0 when releasing the first alpha. Map files are *never*
|
||
|
# compatible across major releases.
|
||
|
-MAPAPI=3.0
|
||
|
+MAPAPI=4.0
|
||
|
diff --git a/lib/dns/rbt.c b/lib/dns/rbt.c
|
||
|
index 7f2c2d2..a220368 100644
|
||
|
--- a/lib/dns/rbt.c
|
||
|
+++ b/lib/dns/rbt.c
|
||
|
@@ -2283,6 +2283,7 @@ create_node(isc_mem_t *mctx, const dns_name_t *name, dns_rbtnode_t **nodep) {
|
||
|
HASHVAL(node) = 0;
|
||
|
|
||
|
ISC_LINK_INIT(node, deadlink);
|
||
|
+ ISC_LINK_INIT(node, prunelink);
|
||
|
|
||
|
LOCKNUM(node) = 0;
|
||
|
WILD(node) = 0;
|
||
|
diff --git a/lib/dns/rbtdb.c b/lib/dns/rbtdb.c
|
||
|
index 75f97f5..2707507 100644
|
||
|
--- a/lib/dns/rbtdb.c
|
||
|
+++ b/lib/dns/rbtdb.c
|
||
|
@@ -515,6 +515,10 @@ struct dns_rbtdb {
|
||
|
*/
|
||
|
rbtnodelist_t *deadnodes;
|
||
|
|
||
|
+ /* List of nodes from which recursive tree pruning can be started from.
|
||
|
+ * Locked by tree_lock. */
|
||
|
+ rbtnodelist_t prunenodes;
|
||
|
+
|
||
|
/*
|
||
|
* Heaps. These are used for TTL based expiry in a cache,
|
||
|
* or for zone resigning in a zone DB. hmctx is the memory
|
||
|
@@ -1060,6 +1064,7 @@ free_rbtdb(dns_rbtdb_t *rbtdb, bool log, isc_event_t *event) {
|
||
|
unsigned int i;
|
||
|
isc_result_t result;
|
||
|
char buf[DNS_NAME_FORMATSIZE];
|
||
|
+ dns_rbtnode_t *node = NULL;
|
||
|
dns_rbt_t **treep;
|
||
|
isc_time_t start;
|
||
|
dns_dbonupdatelistener_t *listener, *listener_next;
|
||
|
@@ -1086,8 +1091,6 @@ free_rbtdb(dns_rbtdb_t *rbtdb, bool log, isc_event_t *event) {
|
||
|
* the overhead of unlinking all nodes here should be negligible.
|
||
|
*/
|
||
|
for (i = 0; i < rbtdb->node_lock_count; i++) {
|
||
|
- dns_rbtnode_t *node;
|
||
|
-
|
||
|
node = ISC_LIST_HEAD(rbtdb->deadnodes[i]);
|
||
|
while (node != NULL) {
|
||
|
ISC_LIST_UNLINK(rbtdb->deadnodes[i], node, deadlink);
|
||
|
@@ -1095,6 +1098,12 @@ free_rbtdb(dns_rbtdb_t *rbtdb, bool log, isc_event_t *event) {
|
||
|
}
|
||
|
}
|
||
|
|
||
|
+ node = ISC_LIST_HEAD(rbtdb->prunenodes);
|
||
|
+ while (node != NULL) {
|
||
|
+ ISC_LIST_UNLINK(rbtdb->prunenodes, node, prunelink);
|
||
|
+ node = ISC_LIST_HEAD(rbtdb->prunenodes);
|
||
|
+ }
|
||
|
+
|
||
|
if (event == NULL) {
|
||
|
rbtdb->quantum = (rbtdb->task != NULL) ? 100 : 0;
|
||
|
}
|
||
|
@@ -1934,19 +1943,32 @@ is_leaf(dns_rbtnode_t *node) {
|
||
|
node->left == NULL && node->right == NULL);
|
||
|
}
|
||
|
|
||
|
+/*%
|
||
|
+ * The tree lock must be held when this function is called as it reads and
|
||
|
+ * updates rbtdb->prunenodes.
|
||
|
+ */
|
||
|
static inline void
|
||
|
send_to_prune_tree(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
|
||
|
isc_rwlocktype_t locktype) {
|
||
|
- isc_event_t *ev;
|
||
|
- dns_db_t *db;
|
||
|
+ bool pruning_queued = (ISC_LIST_HEAD(rbtdb->prunenodes) != NULL);
|
||
|
+
|
||
|
+ INSIST(locktype == isc_rwlocktype_write);
|
||
|
|
||
|
- ev = isc_event_allocate(rbtdb->common.mctx, NULL, DNS_EVENT_RBTPRUNE,
|
||
|
- prune_tree, node, sizeof(isc_event_t));
|
||
|
new_reference(rbtdb, node, locktype);
|
||
|
- db = NULL;
|
||
|
- attach((dns_db_t *)rbtdb, &db);
|
||
|
- ev->ev_sender = db;
|
||
|
- isc_task_send(rbtdb->task, &ev);
|
||
|
+ INSIST(!ISC_LINK_LINKED(node, prunelink));
|
||
|
+ ISC_LIST_APPEND(rbtdb->prunenodes, node, prunelink);
|
||
|
+
|
||
|
+ if (!pruning_queued) {
|
||
|
+ isc_event_t *ev = NULL;
|
||
|
+ dns_db_t *db = NULL;
|
||
|
+
|
||
|
+ attach((dns_db_t *)rbtdb, &db);
|
||
|
+
|
||
|
+ ev = isc_event_allocate(rbtdb->common.mctx, NULL,
|
||
|
+ DNS_EVENT_RBTPRUNE, prune_tree, db,
|
||
|
+ sizeof(isc_event_t));
|
||
|
+ isc_task_send(rbtdb->task, &ev);
|
||
|
+ }
|
||
|
}
|
||
|
|
||
|
/*%
|
||
|
@@ -2220,17 +2242,26 @@ restore_locks:
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
- * Prune the tree by recursively cleaning-up single leaves. In the worst
|
||
|
- * case, the number of iteration is the number of tree levels, which is at
|
||
|
- * most the maximum number of domain name labels, i.e, 127. In practice, this
|
||
|
- * should be much smaller (only a few times), and even the worst case would be
|
||
|
- * acceptable for a single event.
|
||
|
+ * Prune the tree by recursively cleaning up single leaves. Go through all
|
||
|
+ * nodes stored in the rbtdb->prunenodes list; for each of them, in the worst
|
||
|
+ * case, it will be necessary to traverse a number of tree levels equal to the
|
||
|
+ * maximum legal number of domain name labels (127); in practice, the number of
|
||
|
+ * tree levels to traverse will virtually always be much smaller (a few levels
|
||
|
+ * at most). While holding the tree lock throughout this entire operation is
|
||
|
+ * less than ideal, so is splitting the latter up by queueing a separate
|
||
|
+ * prune_tree() run for each node to start pruning from (as queueing requires
|
||
|
+ * allocating memory and can therefore potentially be exploited to exhaust
|
||
|
+ * available memory). Also note that actually freeing up the memory used by
|
||
|
+ * RBTDB nodes (which is what this function does) is essential to keeping cache
|
||
|
+ * memory use in check, so since the tree lock needs to be acquired anyway,
|
||
|
+ * freeing as many nodes as possible before the tree lock gets released is
|
||
|
+ * prudent.
|
||
|
*/
|
||
|
static void
|
||
|
prune_tree(isc_task_t *task, isc_event_t *event) {
|
||
|
- dns_rbtdb_t *rbtdb = event->ev_sender;
|
||
|
- dns_rbtnode_t *node = event->ev_arg;
|
||
|
- dns_rbtnode_t *parent;
|
||
|
+ dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)event->ev_arg;
|
||
|
+ dns_rbtnode_t *node = NULL;
|
||
|
+ dns_rbtnode_t *parent = NULL;
|
||
|
unsigned int locknum;
|
||
|
|
||
|
UNUSED(task);
|
||
|
@@ -2238,44 +2269,60 @@ prune_tree(isc_task_t *task, isc_event_t *event) {
|
||
|
isc_event_free(&event);
|
||
|
|
||
|
RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
|
||
|
- locknum = node->locknum;
|
||
|
- NODE_LOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write);
|
||
|
- do {
|
||
|
- parent = node->parent;
|
||
|
- decrement_reference(rbtdb, node, 0, isc_rwlocktype_write,
|
||
|
- isc_rwlocktype_write, true);
|
||
|
|
||
|
- if (parent != NULL && parent->down == NULL) {
|
||
|
- /*
|
||
|
- * node was the only down child of the parent and has
|
||
|
- * just been removed. We'll then need to examine the
|
||
|
- * parent. Keep the lock if possible; otherwise,
|
||
|
- * release the old lock and acquire one for the parent.
|
||
|
- */
|
||
|
- if (parent->locknum != locknum) {
|
||
|
- NODE_UNLOCK(&rbtdb->node_locks[locknum].lock,
|
||
|
- isc_rwlocktype_write);
|
||
|
- locknum = parent->locknum;
|
||
|
- NODE_LOCK(&rbtdb->node_locks[locknum].lock,
|
||
|
- isc_rwlocktype_write);
|
||
|
+ while ((node = ISC_LIST_HEAD(rbtdb->prunenodes)) != NULL) {
|
||
|
+ locknum = node->locknum;
|
||
|
+ NODE_LOCK(&rbtdb->node_locks[locknum].lock,
|
||
|
+ isc_rwlocktype_write);
|
||
|
+ do {
|
||
|
+ if (ISC_LINK_LINKED(node, prunelink)) {
|
||
|
+ ISC_LIST_UNLINK(rbtdb->prunenodes, node,
|
||
|
+ prunelink);
|
||
|
}
|
||
|
|
||
|
- /*
|
||
|
- * We need to gain a reference to the node before
|
||
|
- * decrementing it in the next iteration.
|
||
|
- */
|
||
|
- if (ISC_LINK_LINKED(parent, deadlink)) {
|
||
|
- ISC_LIST_UNLINK(rbtdb->deadnodes[locknum],
|
||
|
+ parent = node->parent;
|
||
|
+ decrement_reference(rbtdb, node, 0,
|
||
|
+ isc_rwlocktype_write,
|
||
|
+ isc_rwlocktype_write, true);
|
||
|
+
|
||
|
+ if (parent != NULL && parent->down == NULL) {
|
||
|
+ /*
|
||
|
+ * node was the only down child of the parent
|
||
|
+ * and has just been removed. We'll then need
|
||
|
+ * to examine the parent. Keep the lock if
|
||
|
+ * possible; otherwise, release the old lock and
|
||
|
+ * acquire one for the parent.
|
||
|
+ */
|
||
|
+ if (parent->locknum != locknum) {
|
||
|
+ NODE_UNLOCK(
|
||
|
+ &rbtdb->node_locks[locknum].lock,
|
||
|
+ isc_rwlocktype_write);
|
||
|
+ locknum = parent->locknum;
|
||
|
+ NODE_LOCK(
|
||
|
+ &rbtdb->node_locks[locknum].lock,
|
||
|
+ isc_rwlocktype_write);
|
||
|
+ }
|
||
|
+
|
||
|
+ /*
|
||
|
+ * We need to gain a reference to the node
|
||
|
+ * before decrementing it in the next iteration.
|
||
|
+ */
|
||
|
+ if (ISC_LINK_LINKED(parent, deadlink)) {
|
||
|
+ ISC_LIST_UNLINK(
|
||
|
+ rbtdb->deadnodes[locknum],
|
||
|
parent, deadlink);
|
||
|
+ }
|
||
|
+ new_reference(rbtdb, parent,
|
||
|
+ isc_rwlocktype_write);
|
||
|
+ } else {
|
||
|
+ parent = NULL;
|
||
|
}
|
||
|
- new_reference(rbtdb, parent, isc_rwlocktype_write);
|
||
|
- } else {
|
||
|
- parent = NULL;
|
||
|
- }
|
||
|
|
||
|
- node = parent;
|
||
|
- } while (node != NULL);
|
||
|
- NODE_UNLOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write);
|
||
|
+ node = parent;
|
||
|
+ } while (node != NULL);
|
||
|
+ NODE_UNLOCK(&rbtdb->node_locks[locknum].lock,
|
||
|
+ isc_rwlocktype_write);
|
||
|
+ }
|
||
|
RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
|
||
|
|
||
|
detach((dns_db_t **)&rbtdb);
|
||
|
@@ -8726,6 +8773,8 @@ dns_rbtdb_create(isc_mem_t *mctx, const dns_name_t *origin, dns_dbtype_t type,
|
||
|
ISC_LIST_INIT(rbtdb->deadnodes[i]);
|
||
|
}
|
||
|
|
||
|
+ ISC_LIST_INIT(rbtdb->prunenodes);
|
||
|
+
|
||
|
rbtdb->active = rbtdb->node_lock_count;
|
||
|
|
||
|
for (i = 0; i < (int)(rbtdb->node_lock_count); i++) {
|
||
|
--
|
||
|
2.43.0
|
||
|
|