parent
eeb5f2af45
commit
e55f5f3e10
@ -0,0 +1,267 @@
|
||||
From 2cf7651f0b1b0123dc5568ebad00ac84a9b3c348 Mon Sep 17 00:00:00 2001
|
||||
From: Donald Sharp <sharpd@nvidia.com>
|
||||
Date: Wed, 2 Feb 2022 13:28:42 -0500
|
||||
Subject: [PATCH] zebra: Make netlink buffer reads resizeable when needed
|
||||
|
||||
Currently when the kernel sends netlink messages to FRR
|
||||
the buffers to receive this data is of fixed length.
|
||||
The kernel, with certain configurations, will send
|
||||
netlink messages that are larger than this fixed length.
|
||||
This leads to situations where, on startup, zebra gets
|
||||
really confused about the state of the kernel. Effectively
|
||||
the current algorithm is this:
|
||||
|
||||
read up to buffer in size
|
||||
while (data to parse)
|
||||
get netlink message header, look at size
|
||||
parse if you can
|
||||
|
||||
The problem is that there is a 32k buffer we read.
|
||||
We get the first message that is say 1k in size,
|
||||
subtract that 1k to 31k left to parse. We then
|
||||
get the next header and notice that the length
|
||||
of the message is 33k. Which is obviously larger
|
||||
than what we read in. FRR has no recover mechanism
|
||||
nor is there a way to know, a priori, what the maximum
|
||||
size the kernel will send us.
|
||||
|
||||
Modify FRR to look at the kernel message and see if the
|
||||
buffer is large enough, if not, make it large enough to
|
||||
read in the message.
|
||||
|
||||
This code has to be per netlink socket because of the usage
|
||||
of pthreads. So add to `struct nlsock` the buffer and current
|
||||
buffer length. Growing it as necessary.
|
||||
|
||||
Fixes: #10404
|
||||
Signed-off-by: Donald Sharp <sharpd@nvidia.com>
|
||||
---
|
||||
zebra/kernel_netlink.c | 68 +++++++++++++++++++++++++-----------------
|
||||
zebra/kernel_netlink.h | 2 +-
|
||||
zebra/zebra_dplane.c | 4 +++
|
||||
zebra/zebra_ns.h | 3 ++
|
||||
4 files changed, 49 insertions(+), 28 deletions(-)
|
||||
|
||||
diff --git a/zebra/kernel_netlink.h b/zebra/kernel_netlink.h
|
||||
index ae88f3372b1c..9421ea1c611a 100644
|
||||
--- a/zebra/kernel_netlink.h
|
||||
+++ b/zebra/kernel_netlink.h
|
||||
@@ -96,7 +96,7 @@ extern const char *nl_family_to_str(uint8_t family);
|
||||
extern const char *nl_rttype_to_str(uint8_t rttype);
|
||||
|
||||
extern int netlink_parse_info(int (*filter)(struct nlmsghdr *, ns_id_t, int),
|
||||
- const struct nlsock *nl,
|
||||
+ struct nlsock *nl,
|
||||
const struct zebra_dplane_info *dp_info,
|
||||
int count, int startup);
|
||||
extern int netlink_talk_filter(struct nlmsghdr *h, ns_id_t ns, int startup);
|
||||
diff --git a/zebra/zebra_ns.h b/zebra/zebra_ns.h
|
||||
index 0519e1d5b33d..7a0ffbc1ee6f 100644
|
||||
--- a/zebra/zebra_ns.h
|
||||
+++ b/zebra/zebra_ns.h
|
||||
@@ -39,6 +39,9 @@ struct nlsock {
|
||||
int seq;
|
||||
struct sockaddr_nl snl;
|
||||
char name[64];
|
||||
+
|
||||
+ uint8_t *buf;
|
||||
+ size_t buflen;
|
||||
};
|
||||
#endif
|
||||
|
||||
diff --git a/zebra/kernel_netlink.c b/zebra/kernel_netlink.c
|
||||
index b8eaeb1..14a40a9 100644
|
||||
--- a/zebra/kernel_netlink.c
|
||||
+++ b/zebra/kernel_netlink.c
|
||||
@@ -90,8 +90,6 @@
|
||||
*/
|
||||
#define NL_DEFAULT_BATCH_SEND_THRESHOLD (15 * NL_PKT_BUF_SIZE)
|
||||
|
||||
-#define NL_BATCH_RX_BUFSIZE NL_RCV_PKT_BUF_SIZE
|
||||
-
|
||||
static const struct message nlmsg_str[] = {{RTM_NEWROUTE, "RTM_NEWROUTE"},
|
||||
{RTM_DELROUTE, "RTM_DELROUTE"},
|
||||
{RTM_GETROUTE, "RTM_GETROUTE"},
|
||||
@@ -164,8 +162,6 @@ DEFINE_MTYPE_STATIC(ZEBRA, NL_BUF, "Zebra Netlink buffers")
|
||||
size_t nl_batch_tx_bufsize;
|
||||
char *nl_batch_tx_buf;
|
||||
|
||||
-char nl_batch_rx_buf[NL_BATCH_RX_BUFSIZE];
|
||||
-
|
||||
_Atomic uint32_t nl_batch_bufsize = NL_DEFAULT_BATCH_BUFSIZE;
|
||||
_Atomic uint32_t nl_batch_send_threshold = NL_DEFAULT_BATCH_SEND_THRESHOLD;
|
||||
|
||||
@@ -322,6 +318,9 @@ static int netlink_socket(struct nlsock *nl, unsigned long groups,
|
||||
|
||||
nl->snl = snl;
|
||||
nl->sock = sock;
|
||||
+ nl->buflen = NL_RCV_PKT_BUF_SIZE;
|
||||
+ nl->buf = XMALLOC(MTYPE_NL_BUF, nl->buflen);
|
||||
+
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -729,19 +728,29 @@ static ssize_t netlink_send_msg(const struct nlsock *nl, void *buf,
|
||||
*
|
||||
* Returns -1 on error, 0 if read would block or the number of bytes received.
|
||||
*/
|
||||
-static int netlink_recv_msg(const struct nlsock *nl, struct msghdr msg,
|
||||
- void *buf, size_t buflen)
|
||||
+static int netlink_recv_msg(struct nlsock *nl, struct msghdr *msg)
|
||||
{
|
||||
struct iovec iov;
|
||||
int status;
|
||||
|
||||
- iov.iov_base = buf;
|
||||
- iov.iov_len = buflen;
|
||||
- msg.msg_iov = &iov;
|
||||
- msg.msg_iovlen = 1;
|
||||
+ iov.iov_base = nl->buf;
|
||||
+ iov.iov_len = nl->buflen;
|
||||
+ msg->msg_iov = &iov;
|
||||
+ msg->msg_iovlen = 1;
|
||||
|
||||
do {
|
||||
- status = recvmsg(nl->sock, &msg, 0);
|
||||
+ int bytes;
|
||||
+
|
||||
+ bytes = recv(nl->sock, NULL, 0, MSG_PEEK | MSG_TRUNC);
|
||||
+
|
||||
+ if (bytes >= 0 && (size_t)bytes > nl->buflen) {
|
||||
+ nl->buf = XREALLOC(MTYPE_NL_BUF, nl->buf, bytes);
|
||||
+ nl->buflen = bytes;
|
||||
+ iov.iov_base = nl->buf;
|
||||
+ iov.iov_len = nl->buflen;
|
||||
+ }
|
||||
+
|
||||
+ status = recvmsg(nl->sock, msg, 0);
|
||||
} while (status == -1 && errno == EINTR);
|
||||
|
||||
if (status == -1) {
|
||||
@@ -761,10 +770,10 @@ static int netlink_recv_msg(const struct nlsock *nl, struct msghdr msg,
|
||||
return -1;
|
||||
}
|
||||
|
||||
- if (msg.msg_namelen != sizeof(struct sockaddr_nl)) {
|
||||
+ if (msg->msg_namelen != sizeof(struct sockaddr_nl)) {
|
||||
flog_err(EC_ZEBRA_NETLINK_LENGTH_ERROR,
|
||||
"%s sender address length error: length %d", nl->name,
|
||||
- msg.msg_namelen);
|
||||
+ msg->msg_namelen);
|
||||
return -1;
|
||||
}
|
||||
|
||||
@@ -873,8 +882,7 @@ static int netlink_parse_error(const struct nlsock *nl, struct nlmsghdr *h,
|
||||
* the filter.
|
||||
*/
|
||||
int netlink_parse_info(int (*filter)(struct nlmsghdr *, ns_id_t, int),
|
||||
- const struct nlsock *nl,
|
||||
- const struct zebra_dplane_info *zns,
|
||||
+ struct nlsock *nl, const struct zebra_dplane_info *zns,
|
||||
int count, int startup)
|
||||
{
|
||||
int status;
|
||||
@@ -883,7 +891,6 @@ int netlink_parse_info(int (*filter)(struct nlmsghdr *, ns_id_t, int),
|
||||
int read_in = 0;
|
||||
|
||||
while (1) {
|
||||
- char buf[NL_RCV_PKT_BUF_SIZE];
|
||||
struct sockaddr_nl snl;
|
||||
struct msghdr msg = {.msg_name = (void *)&snl,
|
||||
.msg_namelen = sizeof(snl)};
|
||||
@@ -892,14 +899,14 @@ int netlink_parse_info(int (*filter)(struct nlmsghdr *, ns_id_t, int),
|
||||
if (count && read_in >= count)
|
||||
return 0;
|
||||
|
||||
- status = netlink_recv_msg(nl, msg, buf, sizeof(buf));
|
||||
+ status = netlink_recv_msg(nl, &msg);
|
||||
if (status == -1)
|
||||
return -1;
|
||||
else if (status == 0)
|
||||
break;
|
||||
|
||||
read_in++;
|
||||
- for (h = (struct nlmsghdr *)buf;
|
||||
+ for (h = (struct nlmsghdr *)nl->buf;
|
||||
(status >= 0 && NLMSG_OK(h, (unsigned int)status));
|
||||
h = NLMSG_NEXT(h, status)) {
|
||||
/* Finish of reading. */
|
||||
@@ -976,10 +983,10 @@ int netlink_parse_info(int (*filter)(struct nlmsghdr *, ns_id_t, int),
|
||||
*/
|
||||
static int
|
||||
netlink_talk_info(int (*filter)(struct nlmsghdr *, ns_id_t, int startup),
|
||||
- struct nlmsghdr *n, const struct zebra_dplane_info *dp_info,
|
||||
+ struct nlmsghdr *n, struct zebra_dplane_info *dp_info,
|
||||
int startup)
|
||||
{
|
||||
- const struct nlsock *nl;
|
||||
+ struct nlsock *nl;
|
||||
|
||||
nl = &(dp_info->nls);
|
||||
n->nlmsg_seq = nl->seq;
|
||||
@@ -1067,12 +1074,11 @@ static int nl_batch_read_resp(struct nl_batch *bth)
|
||||
* message at a time.
|
||||
*/
|
||||
while (true) {
|
||||
- status = netlink_recv_msg(nl, msg, nl_batch_rx_buf,
|
||||
- sizeof(nl_batch_rx_buf));
|
||||
+ status = netlink_recv_msg(nl, &msg);
|
||||
if (status == -1 || status == 0)
|
||||
return status;
|
||||
|
||||
- h = (struct nlmsghdr *)nl_batch_rx_buf;
|
||||
+ h = (struct nlmsghdr *)nl->buf;
|
||||
ignore_msg = false;
|
||||
seq = h->nlmsg_seq;
|
||||
/*
|
||||
@@ -1506,11 +1512,15 @@ void kernel_terminate(struct zebra_ns *zns, bool complete)
|
||||
if (zns->netlink.sock >= 0) {
|
||||
close(zns->netlink.sock);
|
||||
zns->netlink.sock = -1;
|
||||
+ XFREE(MTYPE_NL_BUF, zns->netlink.buf);
|
||||
+ zns->netlink.buflen = 0;
|
||||
}
|
||||
|
||||
if (zns->netlink_cmd.sock >= 0) {
|
||||
close(zns->netlink_cmd.sock);
|
||||
zns->netlink_cmd.sock = -1;
|
||||
+ XFREE(MTYPE_NL_BUF, zns->netlink_cmd.buf);
|
||||
+ zns->netlink_cmd.buflen = 0;
|
||||
}
|
||||
|
||||
/* During zebra shutdown, we need to leave the dataplane socket
|
||||
@@ -1520,6 +1530,8 @@ void kernel_terminate(struct zebra_ns *zns, bool complete)
|
||||
if (zns->netlink_dplane.sock >= 0) {
|
||||
close(zns->netlink_dplane.sock);
|
||||
zns->netlink_dplane.sock = -1;
|
||||
+ XFREE(MTYPE_NL_BUF, zns->netlink_dplane.buf);
|
||||
+ zns->netlink_dplane.buflen = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
diff --git a/zebra/kernel_netlink.c b/zebra/kernel_netlink.c
|
||||
index 14a40a9..2b566d4 100644
|
||||
--- a/zebra/kernel_netlink.c
|
||||
+++ b/zebra/kernel_netlink.c
|
||||
@@ -779,7 +779,7 @@ static int netlink_recv_msg(struct nlsock *nl, struct msghdr *msg)
|
||||
|
||||
if (IS_ZEBRA_DEBUG_KERNEL_MSGDUMP_RECV) {
|
||||
zlog_debug("%s: << netlink message dump [recv]", __func__);
|
||||
- zlog_hexdump(buf, status);
|
||||
+ zlog_hexdump(nl->buf, status);
|
||||
}
|
||||
|
||||
return status;
|
||||
diff --git a/zebra/kernel_netlink.c b/zebra/kernel_netlink.c
|
||||
index 2b566d4..0564a6b 100644
|
||||
--- a/zebra/kernel_netlink.c
|
||||
+++ b/zebra/kernel_netlink.c
|
||||
@@ -1060,7 +1060,7 @@ static int nl_batch_read_resp(struct nl_batch *bth)
|
||||
struct sockaddr_nl snl;
|
||||
struct msghdr msg = {};
|
||||
int status, seq;
|
||||
- const struct nlsock *nl;
|
||||
+ struct nlsock *nl;
|
||||
struct zebra_dplane_ctx *ctx;
|
||||
bool ignore_msg;
|
||||
|
Loading…
Reference in new issue