You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
268 lines
8.4 KiB
268 lines
8.4 KiB
8 months ago
|
From 2cf7651f0b1b0123dc5568ebad00ac84a9b3c348 Mon Sep 17 00:00:00 2001
|
||
|
From: Donald Sharp <sharpd@nvidia.com>
|
||
|
Date: Wed, 2 Feb 2022 13:28:42 -0500
|
||
|
Subject: [PATCH] zebra: Make netlink buffer reads resizeable when needed
|
||
|
|
||
|
Currently when the kernel sends netlink messages to FRR
|
||
|
the buffers to receive this data is of fixed length.
|
||
|
The kernel, with certain configurations, will send
|
||
|
netlink messages that are larger than this fixed length.
|
||
|
This leads to situations where, on startup, zebra gets
|
||
|
really confused about the state of the kernel. Effectively
|
||
|
the current algorithm is this:
|
||
|
|
||
|
read up to buffer in size
|
||
|
while (data to parse)
|
||
|
get netlink message header, look at size
|
||
|
parse if you can
|
||
|
|
||
|
The problem is that there is a 32k buffer we read.
|
||
|
We get the first message that is say 1k in size,
|
||
|
subtract that 1k to 31k left to parse. We then
|
||
|
get the next header and notice that the length
|
||
|
of the message is 33k. Which is obviously larger
|
||
|
than what we read in. FRR has no recover mechanism
|
||
|
nor is there a way to know, a priori, what the maximum
|
||
|
size the kernel will send us.
|
||
|
|
||
|
Modify FRR to look at the kernel message and see if the
|
||
|
buffer is large enough, if not, make it large enough to
|
||
|
read in the message.
|
||
|
|
||
|
This code has to be per netlink socket because of the usage
|
||
|
of pthreads. So add to `struct nlsock` the buffer and current
|
||
|
buffer length. Growing it as necessary.
|
||
|
|
||
|
Fixes: #10404
|
||
|
Signed-off-by: Donald Sharp <sharpd@nvidia.com>
|
||
|
---
|
||
|
zebra/kernel_netlink.c | 68 +++++++++++++++++++++++++-----------------
|
||
|
zebra/kernel_netlink.h | 2 +-
|
||
|
zebra/zebra_dplane.c | 4 +++
|
||
|
zebra/zebra_ns.h | 3 ++
|
||
|
4 files changed, 49 insertions(+), 28 deletions(-)
|
||
|
|
||
|
diff --git a/zebra/kernel_netlink.h b/zebra/kernel_netlink.h
|
||
|
index ae88f3372b1c..9421ea1c611a 100644
|
||
|
--- a/zebra/kernel_netlink.h
|
||
|
+++ b/zebra/kernel_netlink.h
|
||
|
@@ -96,7 +96,7 @@ extern const char *nl_family_to_str(uint8_t family);
|
||
|
extern const char *nl_rttype_to_str(uint8_t rttype);
|
||
|
|
||
|
extern int netlink_parse_info(int (*filter)(struct nlmsghdr *, ns_id_t, int),
|
||
|
- const struct nlsock *nl,
|
||
|
+ struct nlsock *nl,
|
||
|
const struct zebra_dplane_info *dp_info,
|
||
|
int count, int startup);
|
||
|
extern int netlink_talk_filter(struct nlmsghdr *h, ns_id_t ns, int startup);
|
||
|
diff --git a/zebra/zebra_ns.h b/zebra/zebra_ns.h
|
||
|
index 0519e1d5b33d..7a0ffbc1ee6f 100644
|
||
|
--- a/zebra/zebra_ns.h
|
||
|
+++ b/zebra/zebra_ns.h
|
||
|
@@ -39,6 +39,9 @@ struct nlsock {
|
||
|
int seq;
|
||
|
struct sockaddr_nl snl;
|
||
|
char name[64];
|
||
|
+
|
||
|
+ uint8_t *buf;
|
||
|
+ size_t buflen;
|
||
|
};
|
||
|
#endif
|
||
|
|
||
|
diff --git a/zebra/kernel_netlink.c b/zebra/kernel_netlink.c
|
||
|
index b8eaeb1..14a40a9 100644
|
||
|
--- a/zebra/kernel_netlink.c
|
||
|
+++ b/zebra/kernel_netlink.c
|
||
|
@@ -90,8 +90,6 @@
|
||
|
*/
|
||
|
#define NL_DEFAULT_BATCH_SEND_THRESHOLD (15 * NL_PKT_BUF_SIZE)
|
||
|
|
||
|
-#define NL_BATCH_RX_BUFSIZE NL_RCV_PKT_BUF_SIZE
|
||
|
-
|
||
|
static const struct message nlmsg_str[] = {{RTM_NEWROUTE, "RTM_NEWROUTE"},
|
||
|
{RTM_DELROUTE, "RTM_DELROUTE"},
|
||
|
{RTM_GETROUTE, "RTM_GETROUTE"},
|
||
|
@@ -164,8 +162,6 @@ DEFINE_MTYPE_STATIC(ZEBRA, NL_BUF, "Zebra Netlink buffers")
|
||
|
size_t nl_batch_tx_bufsize;
|
||
|
char *nl_batch_tx_buf;
|
||
|
|
||
|
-char nl_batch_rx_buf[NL_BATCH_RX_BUFSIZE];
|
||
|
-
|
||
|
_Atomic uint32_t nl_batch_bufsize = NL_DEFAULT_BATCH_BUFSIZE;
|
||
|
_Atomic uint32_t nl_batch_send_threshold = NL_DEFAULT_BATCH_SEND_THRESHOLD;
|
||
|
|
||
|
@@ -322,6 +318,9 @@ static int netlink_socket(struct nlsock *nl, unsigned long groups,
|
||
|
|
||
|
nl->snl = snl;
|
||
|
nl->sock = sock;
|
||
|
+ nl->buflen = NL_RCV_PKT_BUF_SIZE;
|
||
|
+ nl->buf = XMALLOC(MTYPE_NL_BUF, nl->buflen);
|
||
|
+
|
||
|
return ret;
|
||
|
}
|
||
|
|
||
|
@@ -729,19 +728,29 @@ static ssize_t netlink_send_msg(const struct nlsock *nl, void *buf,
|
||
|
*
|
||
|
* Returns -1 on error, 0 if read would block or the number of bytes received.
|
||
|
*/
|
||
|
-static int netlink_recv_msg(const struct nlsock *nl, struct msghdr msg,
|
||
|
- void *buf, size_t buflen)
|
||
|
+static int netlink_recv_msg(struct nlsock *nl, struct msghdr *msg)
|
||
|
{
|
||
|
struct iovec iov;
|
||
|
int status;
|
||
|
|
||
|
- iov.iov_base = buf;
|
||
|
- iov.iov_len = buflen;
|
||
|
- msg.msg_iov = &iov;
|
||
|
- msg.msg_iovlen = 1;
|
||
|
+ iov.iov_base = nl->buf;
|
||
|
+ iov.iov_len = nl->buflen;
|
||
|
+ msg->msg_iov = &iov;
|
||
|
+ msg->msg_iovlen = 1;
|
||
|
|
||
|
do {
|
||
|
- status = recvmsg(nl->sock, &msg, 0);
|
||
|
+ int bytes;
|
||
|
+
|
||
|
+ bytes = recv(nl->sock, NULL, 0, MSG_PEEK | MSG_TRUNC);
|
||
|
+
|
||
|
+ if (bytes >= 0 && (size_t)bytes > nl->buflen) {
|
||
|
+ nl->buf = XREALLOC(MTYPE_NL_BUF, nl->buf, bytes);
|
||
|
+ nl->buflen = bytes;
|
||
|
+ iov.iov_base = nl->buf;
|
||
|
+ iov.iov_len = nl->buflen;
|
||
|
+ }
|
||
|
+
|
||
|
+ status = recvmsg(nl->sock, msg, 0);
|
||
|
} while (status == -1 && errno == EINTR);
|
||
|
|
||
|
if (status == -1) {
|
||
|
@@ -761,10 +770,10 @@ static int netlink_recv_msg(const struct nlsock *nl, struct msghdr msg,
|
||
|
return -1;
|
||
|
}
|
||
|
|
||
|
- if (msg.msg_namelen != sizeof(struct sockaddr_nl)) {
|
||
|
+ if (msg->msg_namelen != sizeof(struct sockaddr_nl)) {
|
||
|
flog_err(EC_ZEBRA_NETLINK_LENGTH_ERROR,
|
||
|
"%s sender address length error: length %d", nl->name,
|
||
|
- msg.msg_namelen);
|
||
|
+ msg->msg_namelen);
|
||
|
return -1;
|
||
|
}
|
||
|
|
||
|
@@ -873,8 +882,7 @@ static int netlink_parse_error(const struct nlsock *nl, struct nlmsghdr *h,
|
||
|
* the filter.
|
||
|
*/
|
||
|
int netlink_parse_info(int (*filter)(struct nlmsghdr *, ns_id_t, int),
|
||
|
- const struct nlsock *nl,
|
||
|
- const struct zebra_dplane_info *zns,
|
||
|
+ struct nlsock *nl, const struct zebra_dplane_info *zns,
|
||
|
int count, int startup)
|
||
|
{
|
||
|
int status;
|
||
|
@@ -883,7 +891,6 @@ int netlink_parse_info(int (*filter)(struct nlmsghdr *, ns_id_t, int),
|
||
|
int read_in = 0;
|
||
|
|
||
|
while (1) {
|
||
|
- char buf[NL_RCV_PKT_BUF_SIZE];
|
||
|
struct sockaddr_nl snl;
|
||
|
struct msghdr msg = {.msg_name = (void *)&snl,
|
||
|
.msg_namelen = sizeof(snl)};
|
||
|
@@ -892,14 +899,14 @@ int netlink_parse_info(int (*filter)(struct nlmsghdr *, ns_id_t, int),
|
||
|
if (count && read_in >= count)
|
||
|
return 0;
|
||
|
|
||
|
- status = netlink_recv_msg(nl, msg, buf, sizeof(buf));
|
||
|
+ status = netlink_recv_msg(nl, &msg);
|
||
|
if (status == -1)
|
||
|
return -1;
|
||
|
else if (status == 0)
|
||
|
break;
|
||
|
|
||
|
read_in++;
|
||
|
- for (h = (struct nlmsghdr *)buf;
|
||
|
+ for (h = (struct nlmsghdr *)nl->buf;
|
||
|
(status >= 0 && NLMSG_OK(h, (unsigned int)status));
|
||
|
h = NLMSG_NEXT(h, status)) {
|
||
|
/* Finish of reading. */
|
||
|
@@ -976,10 +983,10 @@ int netlink_parse_info(int (*filter)(struct nlmsghdr *, ns_id_t, int),
|
||
|
*/
|
||
|
static int
|
||
|
netlink_talk_info(int (*filter)(struct nlmsghdr *, ns_id_t, int startup),
|
||
|
- struct nlmsghdr *n, const struct zebra_dplane_info *dp_info,
|
||
|
+ struct nlmsghdr *n, struct zebra_dplane_info *dp_info,
|
||
|
int startup)
|
||
|
{
|
||
|
- const struct nlsock *nl;
|
||
|
+ struct nlsock *nl;
|
||
|
|
||
|
nl = &(dp_info->nls);
|
||
|
n->nlmsg_seq = nl->seq;
|
||
|
@@ -1067,12 +1074,11 @@ static int nl_batch_read_resp(struct nl_batch *bth)
|
||
|
* message at a time.
|
||
|
*/
|
||
|
while (true) {
|
||
|
- status = netlink_recv_msg(nl, msg, nl_batch_rx_buf,
|
||
|
- sizeof(nl_batch_rx_buf));
|
||
|
+ status = netlink_recv_msg(nl, &msg);
|
||
|
if (status == -1 || status == 0)
|
||
|
return status;
|
||
|
|
||
|
- h = (struct nlmsghdr *)nl_batch_rx_buf;
|
||
|
+ h = (struct nlmsghdr *)nl->buf;
|
||
|
ignore_msg = false;
|
||
|
seq = h->nlmsg_seq;
|
||
|
/*
|
||
|
@@ -1506,11 +1512,15 @@ void kernel_terminate(struct zebra_ns *zns, bool complete)
|
||
|
if (zns->netlink.sock >= 0) {
|
||
|
close(zns->netlink.sock);
|
||
|
zns->netlink.sock = -1;
|
||
|
+ XFREE(MTYPE_NL_BUF, zns->netlink.buf);
|
||
|
+ zns->netlink.buflen = 0;
|
||
|
}
|
||
|
|
||
|
if (zns->netlink_cmd.sock >= 0) {
|
||
|
close(zns->netlink_cmd.sock);
|
||
|
zns->netlink_cmd.sock = -1;
|
||
|
+ XFREE(MTYPE_NL_BUF, zns->netlink_cmd.buf);
|
||
|
+ zns->netlink_cmd.buflen = 0;
|
||
|
}
|
||
|
|
||
|
/* During zebra shutdown, we need to leave the dataplane socket
|
||
|
@@ -1520,6 +1530,8 @@ void kernel_terminate(struct zebra_ns *zns, bool complete)
|
||
|
if (zns->netlink_dplane.sock >= 0) {
|
||
|
close(zns->netlink_dplane.sock);
|
||
|
zns->netlink_dplane.sock = -1;
|
||
|
+ XFREE(MTYPE_NL_BUF, zns->netlink_dplane.buf);
|
||
|
+ zns->netlink_dplane.buflen = 0;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
diff --git a/zebra/kernel_netlink.c b/zebra/kernel_netlink.c
|
||
|
index 14a40a9..2b566d4 100644
|
||
|
--- a/zebra/kernel_netlink.c
|
||
|
+++ b/zebra/kernel_netlink.c
|
||
|
@@ -779,7 +779,7 @@ static int netlink_recv_msg(struct nlsock *nl, struct msghdr *msg)
|
||
|
|
||
|
if (IS_ZEBRA_DEBUG_KERNEL_MSGDUMP_RECV) {
|
||
|
zlog_debug("%s: << netlink message dump [recv]", __func__);
|
||
|
- zlog_hexdump(buf, status);
|
||
|
+ zlog_hexdump(nl->buf, status);
|
||
|
}
|
||
|
|
||
|
return status;
|
||
|
diff --git a/zebra/kernel_netlink.c b/zebra/kernel_netlink.c
|
||
|
index 2b566d4..0564a6b 100644
|
||
|
--- a/zebra/kernel_netlink.c
|
||
|
+++ b/zebra/kernel_netlink.c
|
||
|
@@ -1060,7 +1060,7 @@ static int nl_batch_read_resp(struct nl_batch *bth)
|
||
|
struct sockaddr_nl snl;
|
||
|
struct msghdr msg = {};
|
||
|
int status, seq;
|
||
|
- const struct nlsock *nl;
|
||
|
+ struct nlsock *nl;
|
||
|
struct zebra_dplane_ctx *ctx;
|
||
|
bool ignore_msg;
|
||
|
|