parent
9aaef75e21
commit
8fa9ef1e16
@ -1,218 +0,0 @@
|
|||||||
From b37f8c15ca6ee079541b0c02ee77ce9d392b18fc Mon Sep 17 00:00:00 2001
|
|
||||||
Message-Id: <b37f8c15ca6ee079541b0c02ee77ce9d392b18fc.1538494812.git.lorenzo.bianconi@redhat.com>
|
|
||||||
In-Reply-To: <cover.1538494812.git.lorenzo.bianconi@redhat.com>
|
|
||||||
References: <cover.1538494812.git.lorenzo.bianconi@redhat.com>
|
|
||||||
From: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
|
|
||||||
Date: Thu, 20 Sep 2018 16:46:02 +0200
|
|
||||||
Subject: [PATCH] OVN: add CT_LB action to ovn-trace
|
|
||||||
|
|
||||||
Add CT_LB action to ovn-trace utility in order to fix the
|
|
||||||
following ovn-trace error if a load balancer rule is added to
|
|
||||||
OVN configuration
|
|
||||||
|
|
||||||
ct_next(ct_state=est|trk /* default (use --ct to customize) */) {
|
|
||||||
*** ct_lb action not implemented;
|
|
||||||
};
|
|
||||||
|
|
||||||
Add '--lb_dst' option in order to specify the ip address to use
|
|
||||||
in VIP pool. If --lb_dst is not provided the destination ip will be
|
|
||||||
randomly choosen
|
|
||||||
|
|
||||||
Signed-off-by: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
|
|
||||||
Signed-off-by: Ben Pfaff <blp@ovn.org>
|
|
||||||
---
|
|
||||||
ovn/utilities/ovn-trace.8.xml | 18 ++++++-
|
|
||||||
ovn/utilities/ovn-trace.c | 98 +++++++++++++++++++++++++++++++++--
|
|
||||||
2 files changed, 111 insertions(+), 5 deletions(-)
|
|
||||||
|
|
||||||
--- a/ovn/utilities/ovn-trace.8.xml
|
|
||||||
+++ b/ovn/utilities/ovn-trace.8.xml
|
|
||||||
@@ -253,9 +253,17 @@
|
|
||||||
<code>ct_snat</code>) action.
|
|
||||||
</dd>
|
|
||||||
|
|
||||||
- <dt><code>ct_lb</code></dt>
|
|
||||||
+ <dt><code>ct_lb;</code></dt>
|
|
||||||
+ <dt><code>ct_lb(<var>ip</var></code>[<code>:<var>port</var></code>]...<code>);</code></dt>
|
|
||||||
<dd>
|
|
||||||
- Not yet implemented; currently implemented as a no-op.
|
|
||||||
+ Forks the pipeline. In one fork, sets <code>ip4.dst</code> (or
|
|
||||||
+ <code>ip6.dst</code>) to one of the load-balancer addresses and the
|
|
||||||
+ destination port to its associated port, if any, and sets
|
|
||||||
+ <code>ct.dnat</code> to 1. With one or more arguments, gives preference
|
|
||||||
+ to the address specified on <code>--lb-dst</code>, if any; without
|
|
||||||
+ arguments, uses the address and port specified on <code>--lb-dst</code>.
|
|
||||||
+ In the other fork, the pipeline continues without change after the
|
|
||||||
+ <code>ct_lb</code> action.
|
|
||||||
</dd>
|
|
||||||
|
|
||||||
<dt><code>ct_commit</code></dt>
|
|
||||||
@@ -424,6 +432,12 @@
|
|
||||||
</p>
|
|
||||||
</dd>
|
|
||||||
|
|
||||||
+ <dt><code>--lb-dst=</code><var>ip</var>[<code>:<var>port</var></code>]</dt>
|
|
||||||
+ <dd>
|
|
||||||
+ Sets the IP from VIP pool to use as destination of the packet.
|
|
||||||
+ <code>--lb-dst</code> is not available in daemon mode.
|
|
||||||
+ </dd>
|
|
||||||
+
|
|
||||||
<dt><code>--friendly-names</code></dt>
|
|
||||||
<dt><code>--no-friendly-names</code></dt>
|
|
||||||
<dd>
|
|
||||||
--- a/ovn/utilities/ovn-trace.c
|
|
||||||
+++ b/ovn/utilities/ovn-trace.c
|
|
||||||
@@ -46,6 +46,7 @@
|
|
||||||
#include "stream.h"
|
|
||||||
#include "unixctl.h"
|
|
||||||
#include "util.h"
|
|
||||||
+#include "random.h"
|
|
||||||
|
|
||||||
VLOG_DEFINE_THIS_MODULE(ovntrace);
|
|
||||||
|
|
||||||
@@ -77,6 +78,9 @@ static uint32_t *ct_states;
|
|
||||||
static size_t n_ct_states;
|
|
||||||
static size_t ct_state_idx;
|
|
||||||
|
|
||||||
+/* --lb-dst: load balancer destination info. */
|
|
||||||
+static struct ovnact_ct_lb_dst lb_dst;
|
|
||||||
+
|
|
||||||
/* --friendly-names, --no-friendly-names: Whether to substitute human-friendly
|
|
||||||
* port and datapath names for the awkward UUIDs typically used in the actual
|
|
||||||
* logical flows. */
|
|
||||||
@@ -187,6 +191,24 @@ parse_ct_option(const char *state_s_)
|
|
||||||
}
|
|
||||||
|
|
||||||
static void
|
|
||||||
+parse_lb_option(const char *s)
|
|
||||||
+{
|
|
||||||
+ struct sockaddr_storage ss;
|
|
||||||
+ if (!inet_parse_active(s, 0, &ss)) {
|
|
||||||
+ ovs_fatal(0, "%s: bad address", s);
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
+ lb_dst.family = ss.ss_family;
|
|
||||||
+ struct in6_addr a = ss_get_address(&ss);
|
|
||||||
+ if (ss.ss_family == AF_INET) {
|
|
||||||
+ lb_dst.ipv4 = in6_addr_get_mapped_ipv4(&a);
|
|
||||||
+ } else {
|
|
||||||
+ lb_dst.ipv6 = a;
|
|
||||||
+ }
|
|
||||||
+ lb_dst.port = ss_get_port(&ss);
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+static void
|
|
||||||
parse_options(int argc, char *argv[])
|
|
||||||
{
|
|
||||||
enum {
|
|
||||||
@@ -202,7 +224,8 @@ parse_options(int argc, char *argv[])
|
|
||||||
OPT_NO_FRIENDLY_NAMES,
|
|
||||||
DAEMON_OPTION_ENUMS,
|
|
||||||
SSL_OPTION_ENUMS,
|
|
||||||
- VLOG_OPTION_ENUMS
|
|
||||||
+ VLOG_OPTION_ENUMS,
|
|
||||||
+ OPT_LB_DST
|
|
||||||
};
|
|
||||||
static const struct option long_options[] = {
|
|
||||||
{"db", required_argument, NULL, OPT_DB},
|
|
||||||
@@ -217,6 +240,7 @@ parse_options(int argc, char *argv[])
|
|
||||||
{"no-friendly-names", no_argument, NULL, OPT_NO_FRIENDLY_NAMES},
|
|
||||||
{"help", no_argument, NULL, 'h'},
|
|
||||||
{"version", no_argument, NULL, 'V'},
|
|
||||||
+ {"lb-dst", required_argument, NULL, OPT_LB_DST},
|
|
||||||
DAEMON_LONG_OPTIONS,
|
|
||||||
VLOG_LONG_OPTIONS,
|
|
||||||
STREAM_SSL_LONG_OPTIONS,
|
|
||||||
@@ -274,6 +298,10 @@ parse_options(int argc, char *argv[])
|
|
||||||
use_friendly_names = false;
|
|
||||||
break;
|
|
||||||
|
|
||||||
+ case OPT_LB_DST:
|
|
||||||
+ parse_lb_option(optarg);
|
|
||||||
+ break;
|
|
||||||
+
|
|
||||||
case 'h':
|
|
||||||
usage();
|
|
||||||
|
|
||||||
@@ -1823,6 +1851,71 @@ execute_ct_nat(const struct ovnact_ct_na
|
|
||||||
}
|
|
||||||
|
|
||||||
static void
|
|
||||||
+execute_ct_lb(const struct ovnact_ct_lb *ct_lb,
|
|
||||||
+ const struct ovntrace_datapath *dp, struct flow *uflow,
|
|
||||||
+ enum ovnact_pipeline pipeline, struct ovs_list *super)
|
|
||||||
+{
|
|
||||||
+ struct flow ct_lb_flow = *uflow;
|
|
||||||
+
|
|
||||||
+ int family = (ct_lb_flow.dl_type == htons(ETH_TYPE_IP) ? AF_INET
|
|
||||||
+ : ct_lb_flow.dl_type == htons(ETH_TYPE_IPV6) ? AF_INET6
|
|
||||||
+ : AF_UNSPEC);
|
|
||||||
+ if (family != AF_UNSPEC) {
|
|
||||||
+ const struct ovnact_ct_lb_dst *dst = NULL;
|
|
||||||
+ if (ct_lb->n_dsts) {
|
|
||||||
+ /* For ct_lb with addresses, choose one of the addresses. */
|
|
||||||
+ int n = 0;
|
|
||||||
+ for (int i = 0; i < ct_lb->n_dsts; i++) {
|
|
||||||
+ const struct ovnact_ct_lb_dst *d = &ct_lb->dsts[i];
|
|
||||||
+ if (d->family != family) {
|
|
||||||
+ continue;
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
+ /* Check for the destination specified by --lb-dst, if any. */
|
|
||||||
+ if (lb_dst.family == family
|
|
||||||
+ && (family == AF_INET
|
|
||||||
+ ? d->ipv4 == lb_dst.ipv4
|
|
||||||
+ : ipv6_addr_equals(&d->ipv6, &lb_dst.ipv6))) {
|
|
||||||
+ lb_dst.family = AF_UNSPEC;
|
|
||||||
+ dst = d;
|
|
||||||
+ break;
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
+ /* Select a random destination as a fallback. */
|
|
||||||
+ if (!random_range(++n)) {
|
|
||||||
+ dst = d;
|
|
||||||
+ }
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
+ if (!dst) {
|
|
||||||
+ ovntrace_node_append(super, OVNTRACE_NODE_ERROR,
|
|
||||||
+ "*** no load balancing destination "
|
|
||||||
+ "(use --lb-dst)");
|
|
||||||
+ }
|
|
||||||
+ } else if (lb_dst.family == family) {
|
|
||||||
+ /* For ct_lb without addresses, use user-specified address. */
|
|
||||||
+ dst = &lb_dst;
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
+ if (dst) {
|
|
||||||
+ if (family == AF_INET6) {
|
|
||||||
+ ct_lb_flow.ipv6_dst = dst->ipv6;
|
|
||||||
+ } else {
|
|
||||||
+ ct_lb_flow.nw_dst = dst->ipv4;
|
|
||||||
+ }
|
|
||||||
+ if (dst->port) {
|
|
||||||
+ ct_lb_flow.tp_dst = htons(dst->port);
|
|
||||||
+ }
|
|
||||||
+ ct_lb_flow.ct_state |= CS_DST_NAT;
|
|
||||||
+ }
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
+ struct ovntrace_node *node = ovntrace_node_append(
|
|
||||||
+ super, OVNTRACE_NODE_TRANSFORMATION, "ct_lb");
|
|
||||||
+ trace__(dp, &ct_lb_flow, ct_lb->ltable, pipeline, &node->subs);
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+static void
|
|
||||||
execute_log(const struct ovnact_log *log, struct flow *uflow,
|
|
||||||
struct ovs_list *super)
|
|
||||||
{
|
|
||||||
@@ -1910,8 +2003,7 @@ trace_actions(const struct ovnact *ovnac
|
|
||||||
break;
|
|
||||||
|
|
||||||
case OVNACT_CT_LB:
|
|
||||||
- ovntrace_node_append(super, OVNTRACE_NODE_ERROR,
|
|
||||||
- "*** ct_lb action not implemented");
|
|
||||||
+ execute_ct_lb(ovnact_get_CT_LB(a), dp, uflow, pipeline, super);
|
|
||||||
break;
|
|
||||||
|
|
||||||
case OVNACT_CT_CLEAR:
|
|
@ -1,228 +0,0 @@
|
|||||||
From 769b50349f28c5f9e4bff102bc61dadcb9b99c37 Mon Sep 17 00:00:00 2001
|
|
||||||
From: Ben Pfaff <blp@ovn.org>
|
|
||||||
Date: Tue, 25 Sep 2018 15:14:13 -0700
|
|
||||||
Subject: [PATCH] dpif: Remove support for multiple queues per port.
|
|
||||||
|
|
||||||
Commit 69c51582ff78 ("dpif-netlink: don't allocate per thread netlink
|
|
||||||
sockets") removed dpif-netlink support for multiple queues per port.
|
|
||||||
No remaining dpif provider supports multiple queues per port, so
|
|
||||||
remove infrastructure for the feature.
|
|
||||||
|
|
||||||
CC: Matteo Croce <mcroce@redhat.com>
|
|
||||||
Signed-off-by: Ben Pfaff <blp@ovn.org>
|
|
||||||
Tested-by: Yifeng Sun <pkusunyifeng@gmail.com>
|
|
||||||
Reviewed-by: Yifeng Sun <pkusunyifeng@gmail.com>
|
|
||||||
---
|
|
||||||
lib/dpif-netlink.c | 9 ++++-----
|
|
||||||
lib/dpif-provider.h | 14 ++------------
|
|
||||||
lib/dpif.c | 15 +++------------
|
|
||||||
lib/dpif.h | 15 +--------------
|
|
||||||
ofproto/ofproto-dpif-upcall.c | 7 +++----
|
|
||||||
ofproto/ofproto-dpif-xlate.c | 6 ++----
|
|
||||||
6 files changed, 15 insertions(+), 51 deletions(-)
|
|
||||||
|
|
||||||
diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c
|
|
||||||
index 4736d21d4..21315033c 100644
|
|
||||||
--- a/lib/dpif-netlink.c
|
|
||||||
+++ b/lib/dpif-netlink.c
|
|
||||||
@@ -234,7 +234,7 @@ static bool ovs_tunnels_out_of_tree = true;
|
|
||||||
static int dpif_netlink_init(void);
|
|
||||||
static int open_dpif(const struct dpif_netlink_dp *, struct dpif **);
|
|
||||||
static uint32_t dpif_netlink_port_get_pid(const struct dpif *,
|
|
||||||
- odp_port_t port_no, uint32_t hash);
|
|
||||||
+ odp_port_t port_no);
|
|
||||||
static void dpif_netlink_handler_uninit(struct dpif_handler *handler);
|
|
||||||
static int dpif_netlink_refresh_channels(struct dpif_netlink *,
|
|
||||||
uint32_t n_handlers);
|
|
||||||
@@ -991,7 +991,7 @@ dpif_netlink_port_query_by_name(const struct dpif *dpif_, const char *devname,
|
|
||||||
|
|
||||||
static uint32_t
|
|
||||||
dpif_netlink_port_get_pid__(const struct dpif_netlink *dpif,
|
|
||||||
- odp_port_t port_no, uint32_t hash OVS_UNUSED)
|
|
||||||
+ odp_port_t port_no)
|
|
||||||
OVS_REQ_RDLOCK(dpif->upcall_lock)
|
|
||||||
{
|
|
||||||
uint32_t port_idx = odp_to_u32(port_no);
|
|
||||||
@@ -1015,14 +1015,13 @@ dpif_netlink_port_get_pid__(const struct dpif_netlink *dpif,
|
|
||||||
}
|
|
||||||
|
|
||||||
static uint32_t
|
|
||||||
-dpif_netlink_port_get_pid(const struct dpif *dpif_, odp_port_t port_no,
|
|
||||||
- uint32_t hash)
|
|
||||||
+dpif_netlink_port_get_pid(const struct dpif *dpif_, odp_port_t port_no)
|
|
||||||
{
|
|
||||||
const struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
|
|
||||||
uint32_t ret;
|
|
||||||
|
|
||||||
fat_rwlock_rdlock(&dpif->upcall_lock);
|
|
||||||
- ret = dpif_netlink_port_get_pid__(dpif, port_no, hash);
|
|
||||||
+ ret = dpif_netlink_port_get_pid__(dpif, port_no);
|
|
||||||
fat_rwlock_unlock(&dpif->upcall_lock);
|
|
||||||
|
|
||||||
return ret;
|
|
||||||
diff --git a/lib/dpif-provider.h b/lib/dpif-provider.h
|
|
||||||
index debdafc42..eb3ee50a6 100644
|
|
||||||
--- a/lib/dpif-provider.h
|
|
||||||
+++ b/lib/dpif-provider.h
|
|
||||||
@@ -191,16 +191,7 @@ struct dpif_class {
|
|
||||||
|
|
||||||
/* Returns the Netlink PID value to supply in OVS_ACTION_ATTR_USERSPACE
|
|
||||||
* actions as the OVS_USERSPACE_ATTR_PID attribute's value, for use in
|
|
||||||
- * flows whose packets arrived on port 'port_no'. In the case where the
|
|
||||||
- * provider allocates multiple Netlink PIDs to a single port, it may use
|
|
||||||
- * 'hash' to spread load among them. The caller need not use a particular
|
|
||||||
- * hash function; a 5-tuple hash is suitable.
|
|
||||||
- *
|
|
||||||
- * (The datapath implementation might use some different hash function for
|
|
||||||
- * distributing packets received via flow misses among PIDs. This means
|
|
||||||
- * that packets received via flow misses might be reordered relative to
|
|
||||||
- * packets received via userspace actions. This is not ordinarily a
|
|
||||||
- * problem.)
|
|
||||||
+ * flows whose packets arrived on port 'port_no'.
|
|
||||||
*
|
|
||||||
* A 'port_no' of UINT32_MAX should be treated as a special case. The
|
|
||||||
* implementation should return a reserved PID, not allocated to any port,
|
|
||||||
@@ -212,8 +203,7 @@ struct dpif_class {
|
|
||||||
*
|
|
||||||
* A dpif provider that doesn't have meaningful Netlink PIDs can use NULL
|
|
||||||
* for this function. This is equivalent to always returning 0. */
|
|
||||||
- uint32_t (*port_get_pid)(const struct dpif *dpif, odp_port_t port_no,
|
|
||||||
- uint32_t hash);
|
|
||||||
+ uint32_t (*port_get_pid)(const struct dpif *dpif, odp_port_t port_no);
|
|
||||||
|
|
||||||
/* Attempts to begin dumping the ports in a dpif. On success, returns 0
|
|
||||||
* and initializes '*statep' with any data needed for iteration. On
|
|
||||||
diff --git a/lib/dpif.c b/lib/dpif.c
|
|
||||||
index 85cf9000e..4697a4dcd 100644
|
|
||||||
--- a/lib/dpif.c
|
|
||||||
+++ b/lib/dpif.c
|
|
||||||
@@ -737,16 +737,7 @@ dpif_port_query_by_name(const struct dpif *dpif, const char *devname,
|
|
||||||
|
|
||||||
/* Returns the Netlink PID value to supply in OVS_ACTION_ATTR_USERSPACE
|
|
||||||
* actions as the OVS_USERSPACE_ATTR_PID attribute's value, for use in
|
|
||||||
- * flows whose packets arrived on port 'port_no'. In the case where the
|
|
||||||
- * provider allocates multiple Netlink PIDs to a single port, it may use
|
|
||||||
- * 'hash' to spread load among them. The caller need not use a particular
|
|
||||||
- * hash function; a 5-tuple hash is suitable.
|
|
||||||
- *
|
|
||||||
- * (The datapath implementation might use some different hash function for
|
|
||||||
- * distributing packets received via flow misses among PIDs. This means
|
|
||||||
- * that packets received via flow misses might be reordered relative to
|
|
||||||
- * packets received via userspace actions. This is not ordinarily a
|
|
||||||
- * problem.)
|
|
||||||
+ * flows whose packets arrived on port 'port_no'.
|
|
||||||
*
|
|
||||||
* A 'port_no' of ODPP_NONE is a special case: it returns a reserved PID, not
|
|
||||||
* allocated to any port, that the client may use for special purposes.
|
|
||||||
@@ -757,10 +748,10 @@ dpif_port_query_by_name(const struct dpif *dpif, const char *devname,
|
|
||||||
* update all of the flows that it installed that contain
|
|
||||||
* OVS_ACTION_ATTR_USERSPACE actions. */
|
|
||||||
uint32_t
|
|
||||||
-dpif_port_get_pid(const struct dpif *dpif, odp_port_t port_no, uint32_t hash)
|
|
||||||
+dpif_port_get_pid(const struct dpif *dpif, odp_port_t port_no)
|
|
||||||
{
|
|
||||||
return (dpif->dpif_class->port_get_pid
|
|
||||||
- ? (dpif->dpif_class->port_get_pid)(dpif, port_no, hash)
|
|
||||||
+ ? (dpif->dpif_class->port_get_pid)(dpif, port_no)
|
|
||||||
: 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
diff --git a/lib/dpif.h b/lib/dpif.h
|
|
||||||
index 8fdfe5f00..1a35cc410 100644
|
|
||||||
--- a/lib/dpif.h
|
|
||||||
+++ b/lib/dpif.h
|
|
||||||
@@ -274,18 +274,6 @@
|
|
||||||
*
|
|
||||||
* - Upcalls that specify the "special" Netlink PID are queued separately.
|
|
||||||
*
|
|
||||||
- * Multiple threads may want to read upcalls simultaneously from a single
|
|
||||||
- * datapath. To support multiple threads well, one extends the above preferred
|
|
||||||
- * behavior:
|
|
||||||
- *
|
|
||||||
- * - Each port has multiple PIDs. The datapath distributes "miss" upcalls
|
|
||||||
- * across the PIDs, ensuring that a given flow is mapped in a stable way
|
|
||||||
- * to a single PID.
|
|
||||||
- *
|
|
||||||
- * - For "action" upcalls, the thread can specify its own Netlink PID or
|
|
||||||
- * other threads' Netlink PID of the same port for offloading purpose
|
|
||||||
- * (e.g. in a "round robin" manner).
|
|
||||||
- *
|
|
||||||
*
|
|
||||||
* Packet Format
|
|
||||||
* =============
|
|
||||||
@@ -470,8 +458,7 @@ int dpif_port_query_by_name(const struct dpif *, const char *devname,
|
|
||||||
struct dpif_port *);
|
|
||||||
int dpif_port_get_name(struct dpif *, odp_port_t port_no,
|
|
||||||
char *name, size_t name_size);
|
|
||||||
-uint32_t dpif_port_get_pid(const struct dpif *, odp_port_t port_no,
|
|
||||||
- uint32_t hash);
|
|
||||||
+uint32_t dpif_port_get_pid(const struct dpif *, odp_port_t port_no);
|
|
||||||
|
|
||||||
struct dpif_port_dump {
|
|
||||||
const struct dpif *dpif;
|
|
||||||
diff --git a/ofproto/ofproto-dpif-upcall.c b/ofproto/ofproto-dpif-upcall.c
|
|
||||||
index 62222079f..0cc964a7f 100644
|
|
||||||
--- a/ofproto/ofproto-dpif-upcall.c
|
|
||||||
+++ b/ofproto/ofproto-dpif-upcall.c
|
|
||||||
@@ -1021,7 +1021,6 @@ classify_upcall(enum dpif_upcall_type type, const struct nlattr *userdata,
|
|
||||||
* initialized with at least 128 bytes of space. */
|
|
||||||
static void
|
|
||||||
compose_slow_path(struct udpif *udpif, struct xlate_out *xout,
|
|
||||||
- const struct flow *flow,
|
|
||||||
odp_port_t odp_in_port, ofp_port_t ofp_in_port,
|
|
||||||
struct ofpbuf *buf, uint32_t meter_id,
|
|
||||||
struct uuid *ofproto_uuid)
|
|
||||||
@@ -1038,7 +1037,7 @@ compose_slow_path(struct udpif *udpif, struct xlate_out *xout,
|
|
||||||
port = xout->slow & (SLOW_CFM | SLOW_BFD | SLOW_LACP | SLOW_STP)
|
|
||||||
? ODPP_NONE
|
|
||||||
: odp_in_port;
|
|
||||||
- pid = dpif_port_get_pid(udpif->dpif, port, flow_hash_5tuple(flow, 0));
|
|
||||||
+ pid = dpif_port_get_pid(udpif->dpif, port);
|
|
||||||
|
|
||||||
size_t offset;
|
|
||||||
size_t ac_offset;
|
|
||||||
@@ -1196,7 +1195,7 @@ upcall_xlate(struct udpif *udpif, struct upcall *upcall,
|
|
||||||
odp_actions->data, odp_actions->size);
|
|
||||||
} else {
|
|
||||||
/* upcall->put_actions already initialized by upcall_receive(). */
|
|
||||||
- compose_slow_path(udpif, &upcall->xout, upcall->flow,
|
|
||||||
+ compose_slow_path(udpif, &upcall->xout,
|
|
||||||
upcall->flow->in_port.odp_port, upcall->ofp_in_port,
|
|
||||||
&upcall->put_actions,
|
|
||||||
upcall->ofproto->up.slowpath_meter_id,
|
|
||||||
@@ -2155,7 +2154,7 @@ revalidate_ukey__(struct udpif *udpif, const struct udpif_key *ukey,
|
|
||||||
goto exit;
|
|
||||||
}
|
|
||||||
|
|
||||||
- compose_slow_path(udpif, xoutp, &ctx.flow, ctx.flow.in_port.odp_port,
|
|
||||||
+ compose_slow_path(udpif, xoutp, ctx.flow.in_port.odp_port,
|
|
||||||
ofp_in_port, odp_actions,
|
|
||||||
ofproto->up.slowpath_meter_id, &ofproto->uuid);
|
|
||||||
}
|
|
||||||
diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c
|
|
||||||
index 6949595ba..f11f60468 100644
|
|
||||||
--- a/ofproto/ofproto-dpif-xlate.c
|
|
||||||
+++ b/ofproto/ofproto-dpif-xlate.c
|
|
||||||
@@ -3084,8 +3084,7 @@ compose_sample_action(struct xlate_ctx *ctx,
|
|
||||||
|
|
||||||
odp_port_t odp_port = ofp_port_to_odp_port(
|
|
||||||
ctx->xbridge, ctx->xin->flow.in_port.ofp_port);
|
|
||||||
- uint32_t pid = dpif_port_get_pid(ctx->xbridge->dpif, odp_port,
|
|
||||||
- flow_hash_5tuple(&ctx->xin->flow, 0));
|
|
||||||
+ uint32_t pid = dpif_port_get_pid(ctx->xbridge->dpif, odp_port);
|
|
||||||
size_t cookie_offset = odp_put_userspace_action(pid, cookie,
|
|
||||||
sizeof *cookie,
|
|
||||||
tunnel_out_port,
|
|
||||||
@@ -4638,8 +4637,7 @@ put_controller_user_action(struct xlate_ctx *ctx,
|
|
||||||
|
|
||||||
odp_port_t odp_port = ofp_port_to_odp_port(ctx->xbridge,
|
|
||||||
ctx->xin->flow.in_port.ofp_port);
|
|
||||||
- uint32_t pid = dpif_port_get_pid(ctx->xbridge->dpif, odp_port,
|
|
||||||
- flow_hash_5tuple(&ctx->xin->flow, 0));
|
|
||||||
+ uint32_t pid = dpif_port_get_pid(ctx->xbridge->dpif, odp_port);
|
|
||||||
odp_put_userspace_action(pid, &cookie, sizeof cookie, ODPP_NONE,
|
|
||||||
false, ctx->odp_actions);
|
|
||||||
}
|
|
||||||
--
|
|
||||||
2.17.1
|
|
||||||
|
|
@ -1,310 +0,0 @@
|
|||||||
From 57ce73db12f6d3e980c0b285015c998183f26c8d Mon Sep 17 00:00:00 2001
|
|
||||||
From: Kevin Traynor <ktraynor@redhat.com>
|
|
||||||
Date: Fri, 31 Aug 2018 09:47:55 +0100
|
|
||||||
Subject: [PATCH] dpif-netdev: Add round-robin based rxq to pmd assignment.
|
|
||||||
|
|
||||||
Prior to OVS 2.9 automatic assignment of Rxqs to PMDs
|
|
||||||
(i.e. CPUs) was done by round-robin.
|
|
||||||
|
|
||||||
That was changed in OVS 2.9 to ordering the Rxqs based on
|
|
||||||
their measured processing cycles. This was to assign the
|
|
||||||
busiest Rxqs to different PMDs, improving aggregate
|
|
||||||
throughput.
|
|
||||||
|
|
||||||
For the most part the new scheme should be better, but
|
|
||||||
there could be situations where a user prefers a simple
|
|
||||||
round-robin scheme because Rxqs from a single port are
|
|
||||||
more likely to be spread across multiple PMDs, and/or
|
|
||||||
traffic is very bursty/unpredictable.
|
|
||||||
|
|
||||||
Add 'pmd-rxq-assign' config to allow a user to select
|
|
||||||
round-robin based assignment.
|
|
||||||
|
|
||||||
Signed-off-by: Kevin Traynor <ktraynor@redhat.com>
|
|
||||||
Acked-by: Eelco Chaudron <echaudro@redhat.com>
|
|
||||||
Acked-by: Ilya Maximets <i.maximets@samsung.com>
|
|
||||||
Signed-off-by: Ian Stokes <ian.stokes@intel.com>
|
|
||||||
---
|
|
||||||
Documentation/topics/dpdk/pmd.rst | 33 +++++++++++++---
|
|
||||||
NEWS | 4 +-
|
|
||||||
lib/dpif-netdev.c | 83 +++++++++++++++++++++++++++++----------
|
|
||||||
tests/pmd.at | 12 +++++-
|
|
||||||
vswitchd/vswitch.xml | 24 +++++++++++
|
|
||||||
5 files changed, 126 insertions(+), 30 deletions(-)
|
|
||||||
|
|
||||||
diff --git a/Documentation/topics/dpdk/pmd.rst b/Documentation/topics/dpdk/pmd.rst
|
|
||||||
index 5f0671e..dd9172d 100644
|
|
||||||
--- a/Documentation/topics/dpdk/pmd.rst
|
|
||||||
+++ b/Documentation/topics/dpdk/pmd.rst
|
|
||||||
@@ -113,10 +113,15 @@ means that this thread will only poll the *pinned* Rx queues.
|
|
||||||
|
|
||||||
If ``pmd-rxq-affinity`` is not set for Rx queues, they will be assigned to PMDs
|
|
||||||
-(cores) automatically. Where known, the processing cycles that have been stored
|
|
||||||
-for each Rx queue will be used to assign Rx queue to PMDs based on a round
|
|
||||||
-robin of the sorted Rx queues. For example, take the following example, where
|
|
||||||
-there are five Rx queues and three cores - 3, 7, and 8 - available and the
|
|
||||||
-measured usage of core cycles per Rx queue over the last interval is seen to
|
|
||||||
-be:
|
|
||||||
+(cores) automatically.
|
|
||||||
+
|
|
||||||
+The algorithm used to automatically assign Rxqs to PMDs can be set by::
|
|
||||||
+
|
|
||||||
+ $ ovs-vsctl set Open_vSwitch . other_config:pmd-rxq-assign=<assignment>
|
|
||||||
+
|
|
||||||
+By default, ``cycles`` assignment is used where the Rxqs will be ordered by
|
|
||||||
+their measured processing cycles, and then be evenly assigned in descending
|
|
||||||
+order to PMDs based on an up/down walk of the PMDs. For example, where there
|
|
||||||
+are five Rx queues and three cores - 3, 7, and 8 - available and the measured
|
|
||||||
+usage of core cycles per Rx queue over the last interval is seen to be:
|
|
||||||
|
|
||||||
- Queue #0: 30%
|
|
||||||
@@ -132,4 +137,20 @@ The Rx queues will be assigned to the cores in the following order::
|
|
||||||
Core 8: Q3 (60%) | Q0 (30%)
|
|
||||||
|
|
||||||
+Alternatively, ``roundrobin`` assignment can be used, where the Rxqs are
|
|
||||||
+assigned to PMDs in a round-robined fashion. This algorithm was used by
|
|
||||||
+default prior to OVS 2.9. For example, given the following ports and queues:
|
|
||||||
+
|
|
||||||
+- Port #0 Queue #0 (P0Q0)
|
|
||||||
+- Port #0 Queue #1 (P0Q1)
|
|
||||||
+- Port #1 Queue #0 (P1Q0)
|
|
||||||
+- Port #1 Queue #1 (P1Q1)
|
|
||||||
+- Port #1 Queue #2 (P1Q2)
|
|
||||||
+
|
|
||||||
+The Rx queues may be assigned to the cores in the following order::
|
|
||||||
+
|
|
||||||
+ Core 3: P0Q0 | P1Q1
|
|
||||||
+ Core 7: P0Q1 | P1Q2
|
|
||||||
+ Core 8: P1Q0 |
|
|
||||||
+
|
|
||||||
To see the current measured usage history of PMD core cycles for each Rx
|
|
||||||
queue::
|
|
||||||
diff --git a/NEWS b/NEWS
|
|
||||||
index 04de807..87da271 100644
|
|
||||||
--- a/NEWS
|
|
||||||
+++ b/NEWS
|
|
||||||
@@ -43,6 +43,8 @@
|
|
||||||
* Allow init to fail and record DPDK status/version in OVS database.
|
|
||||||
* Add experimental flow hardware offload support
|
|
||||||
* Support both shared and per port mempools for DPDK devices.
|
|
||||||
+ * Add option for simple round-robin based Rxq to PMD assignment.
|
|
||||||
+ It can be set with pmd-rxq-assign.
|
|
||||||
- Userspace datapath:
|
|
||||||
* Commands ovs-appctl dpif-netdev/pmd-*-show can now work on a single PMD
|
|
||||||
* Detailed PMD performance metrics available with new command
|
|
||||||
|
|
||||||
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
|
|
||||||
index 52b5bc2..466d5ac 100644
|
|
||||||
--- a/lib/dpif-netdev.c
|
|
||||||
+++ b/lib/dpif-netdev.c
|
|
||||||
@@ -342,4 +342,6 @@ struct dp_netdev {
|
|
||||||
struct id_pool *tx_qid_pool;
|
|
||||||
struct ovs_mutex tx_qid_pool_mutex;
|
|
||||||
+ /* Use measured cycles for rxq to pmd assignment. */
|
|
||||||
+ bool pmd_rxq_assign_cyc;
|
|
||||||
|
|
||||||
/* Protects the access of the 'struct dp_netdev_pmd_thread'
|
|
||||||
@@ -1493,4 +1495,5 @@ create_dp_netdev(const char *name, const struct dpif_class *class,
|
|
||||||
|
|
||||||
cmap_init(&dp->poll_threads);
|
|
||||||
+ dp->pmd_rxq_assign_cyc = true;
|
|
||||||
|
|
||||||
ovs_mutex_init(&dp->tx_qid_pool_mutex);
|
|
||||||
@@ -3717,4 +3720,6 @@ dpif_netdev_set_config(struct dpif *dpif, const struct smap *other_config)
|
|
||||||
struct dp_netdev *dp = get_dp_netdev(dpif);
|
|
||||||
const char *cmask = smap_get(other_config, "pmd-cpu-mask");
|
|
||||||
+ const char *pmd_rxq_assign = smap_get_def(other_config, "pmd-rxq-assign",
|
|
||||||
+ "cycles");
|
|
||||||
unsigned long long insert_prob =
|
|
||||||
smap_get_ullong(other_config, "emc-insert-inv-prob",
|
|
||||||
@@ -3779,4 +3784,18 @@ dpif_netdev_set_config(struct dpif *dpif, const struct smap *other_config)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
+
|
|
||||||
+ bool pmd_rxq_assign_cyc = !strcmp(pmd_rxq_assign, "cycles");
|
|
||||||
+ if (!pmd_rxq_assign_cyc && strcmp(pmd_rxq_assign, "roundrobin")) {
|
|
||||||
+ VLOG_WARN("Unsupported Rxq to PMD assignment mode in pmd-rxq-assign. "
|
|
||||||
+ "Defaulting to 'cycles'.");
|
|
||||||
+ pmd_rxq_assign_cyc = true;
|
|
||||||
+ pmd_rxq_assign = "cycles";
|
|
||||||
+ }
|
|
||||||
+ if (dp->pmd_rxq_assign_cyc != pmd_rxq_assign_cyc) {
|
|
||||||
+ dp->pmd_rxq_assign_cyc = pmd_rxq_assign_cyc;
|
|
||||||
+ VLOG_INFO("Rxq to PMD assignment mode changed to: \'%s\'.",
|
|
||||||
+ pmd_rxq_assign);
|
|
||||||
+ dp_netdev_request_reconfigure(dp);
|
|
||||||
+ }
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
@@ -4249,8 +4268,16 @@ rr_numa_list_populate(struct dp_netdev *dp, struct rr_numa_list *rr)
|
|
||||||
}
|
|
||||||
|
|
||||||
-/* Returns the next pmd from the numa node in
|
|
||||||
- * incrementing or decrementing order. */
|
|
||||||
+/*
|
|
||||||
+ * Returns the next pmd from the numa node.
|
|
||||||
+ *
|
|
||||||
+ * If 'updown' is 'true' it will alternate between selecting the next pmd in
|
|
||||||
+ * either an up or down walk, switching between up/down when the first or last
|
|
||||||
+ * core is reached. e.g. 1,2,3,3,2,1,1,2...
|
|
||||||
+ *
|
|
||||||
+ * If 'updown' is 'false' it will select the next pmd wrapping around when last
|
|
||||||
+ * core reached. e.g. 1,2,3,1,2,3,1,2...
|
|
||||||
+ */
|
|
||||||
static struct dp_netdev_pmd_thread *
|
|
||||||
-rr_numa_get_pmd(struct rr_numa *numa)
|
|
||||||
+rr_numa_get_pmd(struct rr_numa *numa, bool updown)
|
|
||||||
{
|
|
||||||
int numa_idx = numa->cur_index;
|
|
||||||
@@ -4260,5 +4287,9 @@ rr_numa_get_pmd(struct rr_numa *numa)
|
|
||||||
if (numa->cur_index == numa->n_pmds-1) {
|
|
||||||
/* Reached the last pmd. */
|
|
||||||
- numa->idx_inc = false;
|
|
||||||
+ if (updown) {
|
|
||||||
+ numa->idx_inc = false;
|
|
||||||
+ } else {
|
|
||||||
+ numa->cur_index = 0;
|
|
||||||
+ }
|
|
||||||
} else {
|
|
||||||
numa->cur_index++;
|
|
||||||
@@ -4323,7 +4354,4 @@ compare_rxq_cycles(const void *a, const void *b)
|
|
||||||
* pmds to unpinned queues.
|
|
||||||
*
|
|
||||||
- * If 'pinned' is false queues will be sorted by processing cycles they are
|
|
||||||
- * consuming and then assigned to pmds in round robin order.
|
|
||||||
- *
|
|
||||||
* The function doesn't touch the pmd threads, it just stores the assignment
|
|
||||||
* in the 'pmd' member of each rxq. */
|
|
||||||
@@ -4338,4 +4366,5 @@ rxq_scheduling(struct dp_netdev *dp, bool pinned) OVS_REQUIRES(dp->port_mutex)
|
|
||||||
struct rr_numa *numa = NULL;
|
|
||||||
int numa_id;
|
|
||||||
+ bool assign_cyc = dp->pmd_rxq_assign_cyc;
|
|
||||||
|
|
||||||
HMAP_FOR_EACH (port, node, &dp->ports) {
|
|
||||||
@@ -4368,10 +4397,13 @@ rxq_scheduling(struct dp_netdev *dp, bool pinned) OVS_REQUIRES(dp->port_mutex)
|
|
||||||
rxqs = xrealloc(rxqs, sizeof *rxqs * (n_rxqs + 1));
|
|
||||||
}
|
|
||||||
- /* Sum the queue intervals and store the cycle history. */
|
|
||||||
- for (unsigned i = 0; i < PMD_RXQ_INTERVAL_MAX; i++) {
|
|
||||||
- cycle_hist += dp_netdev_rxq_get_intrvl_cycles(q, i);
|
|
||||||
- }
|
|
||||||
- dp_netdev_rxq_set_cycles(q, RXQ_CYCLES_PROC_HIST, cycle_hist);
|
|
||||||
|
|
||||||
+ if (assign_cyc) {
|
|
||||||
+ /* Sum the queue intervals and store the cycle history. */
|
|
||||||
+ for (unsigned i = 0; i < PMD_RXQ_INTERVAL_MAX; i++) {
|
|
||||||
+ cycle_hist += dp_netdev_rxq_get_intrvl_cycles(q, i);
|
|
||||||
+ }
|
|
||||||
+ dp_netdev_rxq_set_cycles(q, RXQ_CYCLES_PROC_HIST,
|
|
||||||
+ cycle_hist);
|
|
||||||
+ }
|
|
||||||
/* Store the queue. */
|
|
||||||
rxqs[n_rxqs++] = q;
|
|
||||||
@@ -4380,5 +4412,5 @@ rxq_scheduling(struct dp_netdev *dp, bool pinned) OVS_REQUIRES(dp->port_mutex)
|
|
||||||
}
|
|
||||||
|
|
||||||
- if (n_rxqs > 1) {
|
|
||||||
+ if (n_rxqs > 1 && assign_cyc) {
|
|
||||||
/* Sort the queues in order of the processing cycles
|
|
||||||
* they consumed during their last pmd interval. */
|
|
||||||
@@ -4404,5 +4436,5 @@ rxq_scheduling(struct dp_netdev *dp, bool pinned) OVS_REQUIRES(dp->port_mutex)
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
- rxqs[i]->pmd = rr_numa_get_pmd(non_local_numa);
|
|
||||||
+ rxqs[i]->pmd = rr_numa_get_pmd(non_local_numa, assign_cyc);
|
|
||||||
VLOG_WARN("There's no available (non-isolated) pmd thread "
|
|
||||||
"on numa node %d. Queue %d on port \'%s\' will "
|
|
||||||
@@ -4413,11 +4445,20 @@ rxq_scheduling(struct dp_netdev *dp, bool pinned) OVS_REQUIRES(dp->port_mutex)
|
|
||||||
rxqs[i]->pmd->core_id, rxqs[i]->pmd->numa_id);
|
|
||||||
} else {
|
|
||||||
- rxqs[i]->pmd = rr_numa_get_pmd(numa);
|
|
||||||
- VLOG_INFO("Core %d on numa node %d assigned port \'%s\' "
|
|
||||||
- "rx queue %d (measured processing cycles %"PRIu64").",
|
|
||||||
- rxqs[i]->pmd->core_id, numa_id,
|
|
||||||
- netdev_rxq_get_name(rxqs[i]->rx),
|
|
||||||
- netdev_rxq_get_queue_id(rxqs[i]->rx),
|
|
||||||
- dp_netdev_rxq_get_cycles(rxqs[i], RXQ_CYCLES_PROC_HIST));
|
|
||||||
+ rxqs[i]->pmd = rr_numa_get_pmd(numa, assign_cyc);
|
|
||||||
+ if (assign_cyc) {
|
|
||||||
+ VLOG_INFO("Core %d on numa node %d assigned port \'%s\' "
|
|
||||||
+ "rx queue %d "
|
|
||||||
+ "(measured processing cycles %"PRIu64").",
|
|
||||||
+ rxqs[i]->pmd->core_id, numa_id,
|
|
||||||
+ netdev_rxq_get_name(rxqs[i]->rx),
|
|
||||||
+ netdev_rxq_get_queue_id(rxqs[i]->rx),
|
|
||||||
+ dp_netdev_rxq_get_cycles(rxqs[i],
|
|
||||||
+ RXQ_CYCLES_PROC_HIST));
|
|
||||||
+ } else {
|
|
||||||
+ VLOG_INFO("Core %d on numa node %d assigned port \'%s\' "
|
|
||||||
+ "rx queue %d.", rxqs[i]->pmd->core_id, numa_id,
|
|
||||||
+ netdev_rxq_get_name(rxqs[i]->rx),
|
|
||||||
+ netdev_rxq_get_queue_id(rxqs[i]->rx));
|
|
||||||
+ }
|
|
||||||
}
|
|
||||||
}
|
|
||||||
diff --git a/tests/pmd.at b/tests/pmd.at
|
|
||||||
index 4cae6c8..1f952f3 100644
|
|
||||||
--- a/tests/pmd.at
|
|
||||||
+++ b/tests/pmd.at
|
|
||||||
@@ -62,5 +62,6 @@ m4_define([CHECK_PMD_THREADS_CREATED], [
|
|
||||||
|
|
||||||
m4_define([SED_NUMA_CORE_PATTERN], ["s/\(numa_id \)[[0-9]]*\( core_id \)[[0-9]]*:/\1<cleared>\2<cleared>:/"])
|
|
||||||
-m4_define([SED_NUMA_CORE_QUEUE_PATTERN], ["s/1 2 5 6/<group>/;s/0 3 4 7/<group>/"])
|
|
||||||
+m4_define([SED_NUMA_CORE_QUEUE_CYC_PATTERN], ["s/1 2 5 6/<group>/;s/0 3 4 7/<group>/"])
|
|
||||||
+m4_define([SED_NUMA_CORE_QUEUE_PQ_PATTERN], ["s/1 3 5 7/<group>/;s/0 2 4 6/<group>/"])
|
|
||||||
m4_define([DUMMY_NUMA], [--dummy-numa="0,0,0,0"])
|
|
||||||
|
|
||||||
@@ -146,9 +147,16 @@ pmd thread numa_id <cleared> core_id <cleared>:
|
|
||||||
])
|
|
||||||
|
|
||||||
+AT_CHECK([ovs-vsctl set Open_vSwitch . other_config:pmd-rxq-assign=cycles])
|
|
||||||
TMP=$(cat ovs-vswitchd.log | wc -l | tr -d [[:blank:]])
|
|
||||||
AT_CHECK([ovs-vsctl set Open_vSwitch . other_config:pmd-cpu-mask=0x3])
|
|
||||||
CHECK_PMD_THREADS_CREATED([2], [], [+$TMP])
|
|
||||||
|
|
||||||
-AT_CHECK([ovs-appctl dpif-netdev/pmd-rxq-show | sed ':a;/AVAIL$/{N;s/\n//;ba;}' | parse_pmd_rxq_show_group | sed SED_NUMA_CORE_QUEUE_PATTERN], [0], [dnl
|
|
||||||
+AT_CHECK([ovs-appctl dpif-netdev/pmd-rxq-show | sed ':a;/AVAIL$/{N;s/\n//;ba;}' | parse_pmd_rxq_show_group | sed SED_NUMA_CORE_QUEUE_CYC_PATTERN], [0], [dnl
|
|
||||||
+port: p0 queue-id: <group>
|
|
||||||
+port: p0 queue-id: <group>
|
|
||||||
+])
|
|
||||||
+
|
|
||||||
+AT_CHECK([ovs-vsctl set Open_vSwitch . other_config:pmd-rxq-assign=roundrobin])
|
|
||||||
+AT_CHECK([ovs-appctl dpif-netdev/pmd-rxq-show | sed ':a;/AVAIL$/{N;s/\n//;ba;}' | parse_pmd_rxq_show_group | sed SED_NUMA_CORE_QUEUE_PQ_PATTERN], [0], [dnl
|
|
||||||
port: p0 queue-id: <group>
|
|
||||||
port: p0 queue-id: <group>
|
|
||||||
diff --git a/vswitchd/vswitch.xml b/vswitchd/vswitch.xml
|
|
||||||
index e318151..91d132d 100644
|
|
||||||
--- a/vswitchd/vswitch.xml
|
|
||||||
+++ b/vswitchd/vswitch.xml
|
|
||||||
@@ -433,4 +433,28 @@
|
|
||||||
</column>
|
|
||||||
|
|
||||||
+ <column name="other_config" key="pmd-rxq-assign"
|
|
||||||
+ type='{"type": "string",
|
|
||||||
+ "enum": ["set", ["cycles", "roundrobin"]]}'>
|
|
||||||
+ <p>
|
|
||||||
+ Specifies how RX queues will be automatically assigned to CPU cores.
|
|
||||||
+ Options:
|
|
||||||
+ <dl>
|
|
||||||
+ <dt><code>cycles</code></dt>
|
|
||||||
+ <dd>Rxqs will be sorted by order of measured processing cycles
|
|
||||||
+ before being assigned to CPU cores.</dd>
|
|
||||||
+ <dt><code>roundrobin</code></dt>
|
|
||||||
+ <dd>Rxqs will be round-robined across CPU cores.</dd>
|
|
||||||
+ </dl>
|
|
||||||
+ </p>
|
|
||||||
+ <p>
|
|
||||||
+ The default value is <code>cycles</code>.
|
|
||||||
+ </p>
|
|
||||||
+ <p>
|
|
||||||
+ Changing this value will affect an automatic re-assignment of Rxqs to
|
|
||||||
+ CPUs. Note: Rxqs mapped to CPU cores with
|
|
||||||
+ <code>pmd-rxq-affinity</code> are unaffected.
|
|
||||||
+ </p>
|
|
||||||
+ </column>
|
|
||||||
+
|
|
||||||
<column name="other_config" key="n-handler-threads"
|
|
||||||
type='{"type": "integer", "minInteger": 1}'>
|
|
||||||
--
|
|
||||||
1.8.3.1
|
|
||||||
|
|
@ -1,669 +0,0 @@
|
|||||||
From 4c91bc3bf8c6005db5795fe51632c1feedc4719e Mon Sep 17 00:00:00 2001
|
|
||||||
From: Matteo Croce <mcroce@redhat.com>
|
|
||||||
Date: Tue, 18 Sep 2018 14:56:37 +0200
|
|
||||||
Subject: [PATCH v2] dpif-netlink: don't allocate per thread netlink sockets
|
|
||||||
|
|
||||||
When using the kernel datapath, OVS allocates a pool of sockets to handle
|
|
||||||
netlink events. The number of sockets is: ports * n-handler-threads, where
|
|
||||||
n-handler-threads is user configurable and defaults to 3/4*number of cores.
|
|
||||||
|
|
||||||
This because vswitchd starts n-handler-threads threads, each one with a
|
|
||||||
netlink socket for every port of the switch. Every thread then, starts
|
|
||||||
listening on events on its set of sockets with epoll().
|
|
||||||
|
|
||||||
On setup with lot of CPUs and ports, the number of sockets easily hits
|
|
||||||
the process file descriptor limit, and ovs-vswitchd will exit with -EMFILE.
|
|
||||||
|
|
||||||
Change the number of allocated sockets to just one per port by moving
|
|
||||||
the socket array from a per handler structure to a per datapath one,
|
|
||||||
and let all the handlers share the same sockets by using EPOLLEXCLUSIVE
|
|
||||||
epoll flag which avoids duplicate events, on systems that support it.
|
|
||||||
|
|
||||||
The patch was tested on a 56 core machine running Linux 4.18 and latest
|
|
||||||
Open vSwitch. A bridge was created with 2000+ ports, some of them being
|
|
||||||
veth interfaces with the peer outside the bridge. The latency of the upcall
|
|
||||||
is measured by setting a single 'action=controller,local' OpenFlow rule to
|
|
||||||
force all the packets going to the slow path and then to the local port.
|
|
||||||
A tool[1] injects some packets to the veth outside the bridge, and measures
|
|
||||||
the delay until the packet is captured on the local port. The rx timestamp
|
|
||||||
is get from the socket ancillary data in the attribute SO_TIMESTAMPNS, to
|
|
||||||
avoid having the scheduler delay in the measured time.
|
|
||||||
|
|
||||||
The first test measures the average latency for an upcall generated from
|
|
||||||
a single port. To measure it 100k packets, one every msec, are sent to a
|
|
||||||
single port and the latencies are measured.
|
|
||||||
|
|
||||||
The second test is meant to check latency fairness among ports, namely if
|
|
||||||
latency is equal between ports or if some ports have lower priority.
|
|
||||||
The previous test is repeated for every port, the average of the average
|
|
||||||
latencies and the standard deviation between averages is measured.
|
|
||||||
|
|
||||||
The third test serves to measure responsiveness under load. Heavy traffic
|
|
||||||
is sent through all ports, latency and packet loss is measured
|
|
||||||
on a single idle port.
|
|
||||||
|
|
||||||
The fourth test is all about fairness. Heavy traffic is injected in all
|
|
||||||
ports but one, latency and packet loss is measured on the single idle port.
|
|
||||||
|
|
||||||
This is the test setup:
|
|
||||||
|
|
||||||
# nproc
|
|
||||||
56
|
|
||||||
# ovs-vsctl show |grep -c Port
|
|
||||||
2223
|
|
||||||
# ovs-ofctl dump-flows ovs_upc_br
|
|
||||||
cookie=0x0, duration=4.827s, table=0, n_packets=0, n_bytes=0, actions=CONTROLLER:65535,LOCAL
|
|
||||||
# uname -a
|
|
||||||
Linux fc28 4.18.7-200.fc28.x86_64 #1 SMP Mon Sep 10 15:44:45 UTC 2018 x86_64 x86_64 x86_64 GNU/Linux
|
|
||||||
|
|
||||||
And these are the results of the tests:
|
|
||||||
|
|
||||||
Stock OVS Patched
|
|
||||||
netlink sockets
|
|
||||||
in use by vswitchd
|
|
||||||
lsof -p $(pidof ovs-vswitchd) \
|
|
||||||
|grep -c GENERIC 91187 2227
|
|
||||||
|
|
||||||
Test 1
|
|
||||||
one port latency
|
|
||||||
min/avg/max/mdev (us) 2.7/6.6/238.7/1.8 1.6/6.8/160.6/1.7
|
|
||||||
|
|
||||||
Test 2
|
|
||||||
all port
|
|
||||||
avg latency/mdev (us) 6.51/0.97 6.86/0.17
|
|
||||||
|
|
||||||
Test 3
|
|
||||||
single port latency
|
|
||||||
under load
|
|
||||||
avg/mdev (us) 7.5/5.9 3.8/4.8
|
|
||||||
packet loss 95 % 62 %
|
|
||||||
|
|
||||||
Test 4
|
|
||||||
idle port latency
|
|
||||||
under load
|
|
||||||
min/avg/max/mdev (us) 0.8/1.5/210.5/0.9 1.0/2.1/344.5/1.2
|
|
||||||
packet loss 94 % 4 %
|
|
||||||
|
|
||||||
CPU and RAM usage seems not to be affected, the resource usage of vswitchd
|
|
||||||
idle with 2000+ ports is unchanged:
|
|
||||||
|
|
||||||
# ps u $(pidof ovs-vswitchd)
|
|
||||||
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
|
|
||||||
openvsw+ 5430 54.3 0.3 4263964 510968 pts/1 RLl+ 16:20 0:50 ovs-vswitchd
|
|
||||||
|
|
||||||
Additionally, to check if vswitchd is thread safe with this patch, the
|
|
||||||
following test was run for circa 48 hours: on a 56 core machine, a
|
|
||||||
bridge with kernel datapath is filled with 2200 dummy interfaces and 22
|
|
||||||
veth, then 22 traffic generators are run in parallel piping traffic into
|
|
||||||
the veths peers outside the bridge.
|
|
||||||
To generate as many upcalls as possible, all packets were forced to the
|
|
||||||
slowpath with an openflow rule like 'action=controller,local' and packet
|
|
||||||
size was set to 64 byte. Also, to avoid overflowing the FDB early and
|
|
||||||
slowing down the upcall processing, generated mac addresses were restricted
|
|
||||||
to a small interval. vswitchd ran without problems for 48+ hours,
|
|
||||||
obviously with all the handler threads with almost 99% CPU usage.
|
|
||||||
|
|
||||||
[1] https://github.com/teknoraver/network-tools/blob/master/weed.c
|
|
||||||
|
|
||||||
Signed-off-by: Matteo Croce <mcroce@redhat.com>
|
|
||||||
---
|
|
||||||
v1 -> v2:
|
|
||||||
- define EPOLLEXCLUSIVE on systems with older kernel headers
|
|
||||||
- explain the thread safety test in the commit message
|
|
||||||
|
|
||||||
lib/dpif-netlink.c | 311 ++++++++++++---------------------------------
|
|
||||||
1 file changed, 82 insertions(+), 229 deletions(-)
|
|
||||||
|
|
||||||
diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c
|
|
||||||
index e6d5a6ec5..bb565ffee 100644
|
|
||||||
--- a/lib/dpif-netlink.c
|
|
||||||
+++ b/lib/dpif-netlink.c
|
|
||||||
@@ -78,6 +78,10 @@ enum { MAX_PORTS = USHRT_MAX };
|
|
||||||
#define FLOW_DUMP_MAX_BATCH 50
|
|
||||||
#define OPERATE_MAX_OPS 50
|
|
||||||
|
|
||||||
+#ifndef EPOLLEXCLUSIVE
|
|
||||||
+#define EPOLLEXCLUSIVE (1u << 28)
|
|
||||||
+#endif
|
|
||||||
+
|
|
||||||
struct dpif_netlink_dp {
|
|
||||||
/* Generic Netlink header. */
|
|
||||||
uint8_t cmd;
|
|
||||||
@@ -170,7 +174,6 @@ struct dpif_windows_vport_sock {
|
|
||||||
#endif
|
|
||||||
|
|
||||||
struct dpif_handler {
|
|
||||||
- struct dpif_channel *channels;/* Array of channels for each handler. */
|
|
||||||
struct epoll_event *epoll_events;
|
|
||||||
int epoll_fd; /* epoll fd that includes channel socks. */
|
|
||||||
int n_events; /* Num events returned by epoll_wait(). */
|
|
||||||
@@ -193,6 +196,7 @@ struct dpif_netlink {
|
|
||||||
struct fat_rwlock upcall_lock;
|
|
||||||
struct dpif_handler *handlers;
|
|
||||||
uint32_t n_handlers; /* Num of upcall handlers. */
|
|
||||||
+ struct dpif_channel *channels; /* Array of channels for each port. */
|
|
||||||
int uc_array_size; /* Size of 'handler->channels' and */
|
|
||||||
/* 'handler->epoll_events'. */
|
|
||||||
|
|
||||||
@@ -331,43 +335,6 @@ open_dpif(const struct dpif_netlink_dp *dp, struct dpif **dpifp)
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
-/* Destroys the netlink sockets pointed by the elements in 'socksp'
|
|
||||||
- * and frees the 'socksp'. */
|
|
||||||
-static void
|
|
||||||
-vport_del_socksp__(struct nl_sock **socksp, uint32_t n_socks)
|
|
||||||
-{
|
|
||||||
- size_t i;
|
|
||||||
-
|
|
||||||
- for (i = 0; i < n_socks; i++) {
|
|
||||||
- nl_sock_destroy(socksp[i]);
|
|
||||||
- }
|
|
||||||
-
|
|
||||||
- free(socksp);
|
|
||||||
-}
|
|
||||||
-
|
|
||||||
-/* Creates an array of netlink sockets. Returns an array of the
|
|
||||||
- * corresponding pointers. Records the error in 'error'. */
|
|
||||||
-static struct nl_sock **
|
|
||||||
-vport_create_socksp__(uint32_t n_socks, int *error)
|
|
||||||
-{
|
|
||||||
- struct nl_sock **socksp = xzalloc(n_socks * sizeof *socksp);
|
|
||||||
- size_t i;
|
|
||||||
-
|
|
||||||
- for (i = 0; i < n_socks; i++) {
|
|
||||||
- *error = nl_sock_create(NETLINK_GENERIC, &socksp[i]);
|
|
||||||
- if (*error) {
|
|
||||||
- goto error;
|
|
||||||
- }
|
|
||||||
- }
|
|
||||||
-
|
|
||||||
- return socksp;
|
|
||||||
-
|
|
||||||
-error:
|
|
||||||
- vport_del_socksp__(socksp, n_socks);
|
|
||||||
-
|
|
||||||
- return NULL;
|
|
||||||
-}
|
|
||||||
-
|
|
||||||
#ifdef _WIN32
|
|
||||||
static void
|
|
||||||
vport_delete_sock_pool(struct dpif_handler *handler)
|
|
||||||
@@ -422,129 +389,34 @@ error:
|
|
||||||
vport_delete_sock_pool(handler);
|
|
||||||
return error;
|
|
||||||
}
|
|
||||||
-
|
|
||||||
-/* Returns an array pointers to netlink sockets. The sockets are picked from a
|
|
||||||
- * pool. Records the error in 'error'. */
|
|
||||||
-static struct nl_sock **
|
|
||||||
-vport_create_socksp_windows(struct dpif_netlink *dpif, int *error)
|
|
||||||
- OVS_REQ_WRLOCK(dpif->upcall_lock)
|
|
||||||
-{
|
|
||||||
- uint32_t n_socks = dpif->n_handlers;
|
|
||||||
- struct nl_sock **socksp;
|
|
||||||
- size_t i;
|
|
||||||
-
|
|
||||||
- ovs_assert(n_socks <= 1);
|
|
||||||
- socksp = xzalloc(n_socks * sizeof *socksp);
|
|
||||||
-
|
|
||||||
- /* Pick netlink sockets to use in a round-robin fashion from each
|
|
||||||
- * handler's pool of sockets. */
|
|
||||||
- for (i = 0; i < n_socks; i++) {
|
|
||||||
- struct dpif_handler *handler = &dpif->handlers[i];
|
|
||||||
- struct dpif_windows_vport_sock *sock_pool = handler->vport_sock_pool;
|
|
||||||
- size_t index = handler->last_used_pool_idx;
|
|
||||||
-
|
|
||||||
- /* A pool of sockets is allocated when the handler is initialized. */
|
|
||||||
- if (sock_pool == NULL) {
|
|
||||||
- free(socksp);
|
|
||||||
- *error = EINVAL;
|
|
||||||
- return NULL;
|
|
||||||
- }
|
|
||||||
-
|
|
||||||
- ovs_assert(index < VPORT_SOCK_POOL_SIZE);
|
|
||||||
- socksp[i] = sock_pool[index].nl_sock;
|
|
||||||
- socksp[i] = sock_pool[index].nl_sock;
|
|
||||||
- ovs_assert(socksp[i]);
|
|
||||||
- index = (index == VPORT_SOCK_POOL_SIZE - 1) ? 0 : index + 1;
|
|
||||||
- handler->last_used_pool_idx = index;
|
|
||||||
- }
|
|
||||||
-
|
|
||||||
- return socksp;
|
|
||||||
-}
|
|
||||||
-
|
|
||||||
-static void
|
|
||||||
-vport_del_socksp_windows(struct dpif_netlink *dpif, struct nl_sock **socksp)
|
|
||||||
-{
|
|
||||||
- free(socksp);
|
|
||||||
-}
|
|
||||||
#endif /* _WIN32 */
|
|
||||||
|
|
||||||
-static struct nl_sock **
|
|
||||||
-vport_create_socksp(struct dpif_netlink *dpif, int *error)
|
|
||||||
-{
|
|
||||||
-#ifdef _WIN32
|
|
||||||
- return vport_create_socksp_windows(dpif, error);
|
|
||||||
-#else
|
|
||||||
- return vport_create_socksp__(dpif->n_handlers, error);
|
|
||||||
-#endif
|
|
||||||
-}
|
|
||||||
-
|
|
||||||
-static void
|
|
||||||
-vport_del_socksp(struct dpif_netlink *dpif, struct nl_sock **socksp)
|
|
||||||
-{
|
|
||||||
-#ifdef _WIN32
|
|
||||||
- vport_del_socksp_windows(dpif, socksp);
|
|
||||||
-#else
|
|
||||||
- vport_del_socksp__(socksp, dpif->n_handlers);
|
|
||||||
-#endif
|
|
||||||
-}
|
|
||||||
-
|
|
||||||
-/* Given the array of pointers to netlink sockets 'socksp', returns
|
|
||||||
- * the array of corresponding pids. If the 'socksp' is NULL, returns
|
|
||||||
- * a single-element array of value 0. */
|
|
||||||
-static uint32_t *
|
|
||||||
-vport_socksp_to_pids(struct nl_sock **socksp, uint32_t n_socks)
|
|
||||||
-{
|
|
||||||
- uint32_t *pids;
|
|
||||||
-
|
|
||||||
- if (!socksp) {
|
|
||||||
- pids = xzalloc(sizeof *pids);
|
|
||||||
- } else {
|
|
||||||
- size_t i;
|
|
||||||
-
|
|
||||||
- pids = xzalloc(n_socks * sizeof *pids);
|
|
||||||
- for (i = 0; i < n_socks; i++) {
|
|
||||||
- pids[i] = nl_sock_pid(socksp[i]);
|
|
||||||
- }
|
|
||||||
- }
|
|
||||||
-
|
|
||||||
- return pids;
|
|
||||||
-}
|
|
||||||
-
|
|
||||||
-/* Given the port number 'port_idx', extracts the pids of netlink sockets
|
|
||||||
- * associated to the port and assigns it to 'upcall_pids'. */
|
|
||||||
+/* Given the port number 'port_idx', extracts the pid of netlink socket
|
|
||||||
+ * associated to the port and assigns it to 'upcall_pid'. */
|
|
||||||
static bool
|
|
||||||
-vport_get_pids(struct dpif_netlink *dpif, uint32_t port_idx,
|
|
||||||
- uint32_t **upcall_pids)
|
|
||||||
+vport_get_pid(struct dpif_netlink *dpif, uint32_t port_idx,
|
|
||||||
+ uint32_t *upcall_pid)
|
|
||||||
{
|
|
||||||
- uint32_t *pids;
|
|
||||||
- size_t i;
|
|
||||||
-
|
|
||||||
/* Since the nl_sock can only be assigned in either all
|
|
||||||
- * or none "dpif->handlers" channels, the following check
|
|
||||||
+ * or none "dpif" channels, the following check
|
|
||||||
* would suffice. */
|
|
||||||
- if (!dpif->handlers[0].channels[port_idx].sock) {
|
|
||||||
+ if (!dpif->channels[port_idx].sock) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
ovs_assert(!WINDOWS || dpif->n_handlers <= 1);
|
|
||||||
|
|
||||||
- pids = xzalloc(dpif->n_handlers * sizeof *pids);
|
|
||||||
-
|
|
||||||
- for (i = 0; i < dpif->n_handlers; i++) {
|
|
||||||
- pids[i] = nl_sock_pid(dpif->handlers[i].channels[port_idx].sock);
|
|
||||||
- }
|
|
||||||
-
|
|
||||||
- *upcall_pids = pids;
|
|
||||||
+ *upcall_pid = nl_sock_pid(dpif->channels[port_idx].sock);
|
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
static int
|
|
||||||
-vport_add_channels(struct dpif_netlink *dpif, odp_port_t port_no,
|
|
||||||
- struct nl_sock **socksp)
|
|
||||||
+vport_add_channel(struct dpif_netlink *dpif, odp_port_t port_no,
|
|
||||||
+ struct nl_sock *socksp)
|
|
||||||
{
|
|
||||||
struct epoll_event event;
|
|
||||||
uint32_t port_idx = odp_to_u32(port_no);
|
|
||||||
- size_t i, j;
|
|
||||||
+ size_t i;
|
|
||||||
int error;
|
|
||||||
|
|
||||||
if (dpif->handlers == NULL) {
|
|
||||||
@@ -553,7 +425,7 @@ vport_add_channels(struct dpif_netlink *dpif, odp_port_t port_no,
|
|
||||||
|
|
||||||
/* We assume that the datapath densely chooses port numbers, which can
|
|
||||||
* therefore be used as an index into 'channels' and 'epoll_events' of
|
|
||||||
- * 'dpif->handler'. */
|
|
||||||
+ * 'dpif'. */
|
|
||||||
if (port_idx >= dpif->uc_array_size) {
|
|
||||||
uint32_t new_size = port_idx + 1;
|
|
||||||
|
|
||||||
@@ -563,15 +435,15 @@ vport_add_channels(struct dpif_netlink *dpif, odp_port_t port_no,
|
|
||||||
return EFBIG;
|
|
||||||
}
|
|
||||||
|
|
||||||
- for (i = 0; i < dpif->n_handlers; i++) {
|
|
||||||
- struct dpif_handler *handler = &dpif->handlers[i];
|
|
||||||
+ dpif->channels = xrealloc(dpif->channels,
|
|
||||||
+ new_size * sizeof *dpif->channels);
|
|
||||||
|
|
||||||
- handler->channels = xrealloc(handler->channels,
|
|
||||||
- new_size * sizeof *handler->channels);
|
|
||||||
+ for (i = dpif->uc_array_size; i < new_size; i++) {
|
|
||||||
+ dpif->channels[i].sock = NULL;
|
|
||||||
+ }
|
|
||||||
|
|
||||||
- for (j = dpif->uc_array_size; j < new_size; j++) {
|
|
||||||
- handler->channels[j].sock = NULL;
|
|
||||||
- }
|
|
||||||
+ for (i = 0; i < dpif->n_handlers; i++) {
|
|
||||||
+ struct dpif_handler *handler = &dpif->handlers[i];
|
|
||||||
|
|
||||||
handler->epoll_events = xrealloc(handler->epoll_events,
|
|
||||||
new_size * sizeof *handler->epoll_events);
|
|
||||||
@@ -581,33 +453,33 @@ vport_add_channels(struct dpif_netlink *dpif, odp_port_t port_no,
|
|
||||||
}
|
|
||||||
|
|
||||||
memset(&event, 0, sizeof event);
|
|
||||||
- event.events = EPOLLIN;
|
|
||||||
+ event.events = EPOLLIN | EPOLLEXCLUSIVE;
|
|
||||||
event.data.u32 = port_idx;
|
|
||||||
|
|
||||||
for (i = 0; i < dpif->n_handlers; i++) {
|
|
||||||
struct dpif_handler *handler = &dpif->handlers[i];
|
|
||||||
|
|
||||||
#ifndef _WIN32
|
|
||||||
- if (epoll_ctl(handler->epoll_fd, EPOLL_CTL_ADD, nl_sock_fd(socksp[i]),
|
|
||||||
+ if (epoll_ctl(handler->epoll_fd, EPOLL_CTL_ADD, nl_sock_fd(socksp),
|
|
||||||
&event) < 0) {
|
|
||||||
error = errno;
|
|
||||||
goto error;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
- dpif->handlers[i].channels[port_idx].sock = socksp[i];
|
|
||||||
- dpif->handlers[i].channels[port_idx].last_poll = LLONG_MIN;
|
|
||||||
}
|
|
||||||
+ dpif->channels[port_idx].sock = socksp;
|
|
||||||
+ dpif->channels[port_idx].last_poll = LLONG_MIN;
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
error:
|
|
||||||
- for (j = 0; j < i; j++) {
|
|
||||||
#ifndef _WIN32
|
|
||||||
- epoll_ctl(dpif->handlers[j].epoll_fd, EPOLL_CTL_DEL,
|
|
||||||
- nl_sock_fd(socksp[j]), NULL);
|
|
||||||
-#endif
|
|
||||||
- dpif->handlers[j].channels[port_idx].sock = NULL;
|
|
||||||
+ while (i--) {
|
|
||||||
+ epoll_ctl(dpif->handlers[i].epoll_fd, EPOLL_CTL_DEL,
|
|
||||||
+ nl_sock_fd(socksp), NULL);
|
|
||||||
}
|
|
||||||
+#endif
|
|
||||||
+ dpif->channels[port_idx].sock = NULL;
|
|
||||||
|
|
||||||
return error;
|
|
||||||
}
|
|
||||||
@@ -618,14 +490,8 @@ vport_del_channels(struct dpif_netlink *dpif, odp_port_t port_no)
|
|
||||||
uint32_t port_idx = odp_to_u32(port_no);
|
|
||||||
size_t i;
|
|
||||||
|
|
||||||
- if (!dpif->handlers || port_idx >= dpif->uc_array_size) {
|
|
||||||
- return;
|
|
||||||
- }
|
|
||||||
-
|
|
||||||
- /* Since the sock can only be assigned in either all or none
|
|
||||||
- * of "dpif->handlers" channels, the following check would
|
|
||||||
- * suffice. */
|
|
||||||
- if (!dpif->handlers[0].channels[port_idx].sock) {
|
|
||||||
+ if (!dpif->handlers || port_idx >= dpif->uc_array_size
|
|
||||||
+ || !dpif->channels[port_idx].sock) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
@@ -633,12 +499,14 @@ vport_del_channels(struct dpif_netlink *dpif, odp_port_t port_no)
|
|
||||||
struct dpif_handler *handler = &dpif->handlers[i];
|
|
||||||
#ifndef _WIN32
|
|
||||||
epoll_ctl(handler->epoll_fd, EPOLL_CTL_DEL,
|
|
||||||
- nl_sock_fd(handler->channels[port_idx].sock), NULL);
|
|
||||||
- nl_sock_destroy(handler->channels[port_idx].sock);
|
|
||||||
+ nl_sock_fd(dpif->channels[port_idx].sock), NULL);
|
|
||||||
#endif
|
|
||||||
- handler->channels[port_idx].sock = NULL;
|
|
||||||
handler->event_offset = handler->n_events = 0;
|
|
||||||
}
|
|
||||||
+#ifndef _WIN32
|
|
||||||
+ nl_sock_destroy(dpif->channels[port_idx].sock);
|
|
||||||
+#endif
|
|
||||||
+ dpif->channels[port_idx].sock = NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void
|
|
||||||
@@ -655,10 +523,7 @@ destroy_all_channels(struct dpif_netlink *dpif)
|
|
||||||
struct dpif_netlink_vport vport_request;
|
|
||||||
uint32_t upcall_pids = 0;
|
|
||||||
|
|
||||||
- /* Since the sock can only be assigned in either all or none
|
|
||||||
- * of "dpif->handlers" channels, the following check would
|
|
||||||
- * suffice. */
|
|
||||||
- if (!dpif->handlers[0].channels[i].sock) {
|
|
||||||
+ if (!dpif->channels[i].sock) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
@@ -679,11 +544,11 @@ destroy_all_channels(struct dpif_netlink *dpif)
|
|
||||||
|
|
||||||
dpif_netlink_handler_uninit(handler);
|
|
||||||
free(handler->epoll_events);
|
|
||||||
- free(handler->channels);
|
|
||||||
}
|
|
||||||
-
|
|
||||||
+ free(dpif->channels);
|
|
||||||
free(dpif->handlers);
|
|
||||||
dpif->handlers = NULL;
|
|
||||||
+ dpif->channels = NULL;
|
|
||||||
dpif->n_handlers = 0;
|
|
||||||
dpif->uc_array_size = 0;
|
|
||||||
}
|
|
||||||
@@ -846,13 +711,12 @@ dpif_netlink_port_add__(struct dpif_netlink *dpif, const char *name,
|
|
||||||
{
|
|
||||||
struct dpif_netlink_vport request, reply;
|
|
||||||
struct ofpbuf *buf;
|
|
||||||
- struct nl_sock **socksp = NULL;
|
|
||||||
- uint32_t *upcall_pids;
|
|
||||||
+ struct nl_sock *socksp = NULL;
|
|
||||||
+ uint32_t upcall_pids;
|
|
||||||
int error = 0;
|
|
||||||
|
|
||||||
if (dpif->handlers) {
|
|
||||||
- socksp = vport_create_socksp(dpif, &error);
|
|
||||||
- if (!socksp) {
|
|
||||||
+ if (nl_sock_create(NETLINK_GENERIC, &socksp)) {
|
|
||||||
return error;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -864,9 +728,9 @@ dpif_netlink_port_add__(struct dpif_netlink *dpif, const char *name,
|
|
||||||
request.name = name;
|
|
||||||
|
|
||||||
request.port_no = *port_nop;
|
|
||||||
- upcall_pids = vport_socksp_to_pids(socksp, dpif->n_handlers);
|
|
||||||
- request.n_upcall_pids = socksp ? dpif->n_handlers : 1;
|
|
||||||
- request.upcall_pids = upcall_pids;
|
|
||||||
+ upcall_pids = nl_sock_pid(socksp);
|
|
||||||
+ request.n_upcall_pids = 1;
|
|
||||||
+ request.upcall_pids = &upcall_pids;
|
|
||||||
|
|
||||||
if (options) {
|
|
||||||
request.options = options->data;
|
|
||||||
@@ -882,31 +746,27 @@ dpif_netlink_port_add__(struct dpif_netlink *dpif, const char *name,
|
|
||||||
dpif_name(&dpif->dpif), *port_nop);
|
|
||||||
}
|
|
||||||
|
|
||||||
- vport_del_socksp(dpif, socksp);
|
|
||||||
+ nl_sock_destroy(socksp);
|
|
||||||
goto exit;
|
|
||||||
}
|
|
||||||
|
|
||||||
- if (socksp) {
|
|
||||||
- error = vport_add_channels(dpif, *port_nop, socksp);
|
|
||||||
- if (error) {
|
|
||||||
- VLOG_INFO("%s: could not add channel for port %s",
|
|
||||||
- dpif_name(&dpif->dpif), name);
|
|
||||||
-
|
|
||||||
- /* Delete the port. */
|
|
||||||
- dpif_netlink_vport_init(&request);
|
|
||||||
- request.cmd = OVS_VPORT_CMD_DEL;
|
|
||||||
- request.dp_ifindex = dpif->dp_ifindex;
|
|
||||||
- request.port_no = *port_nop;
|
|
||||||
- dpif_netlink_vport_transact(&request, NULL, NULL);
|
|
||||||
- vport_del_socksp(dpif, socksp);
|
|
||||||
- goto exit;
|
|
||||||
- }
|
|
||||||
+ error = vport_add_channel(dpif, *port_nop, socksp);
|
|
||||||
+ if (error) {
|
|
||||||
+ VLOG_INFO("%s: could not add channel for port %s",
|
|
||||||
+ dpif_name(&dpif->dpif), name);
|
|
||||||
+
|
|
||||||
+ /* Delete the port. */
|
|
||||||
+ dpif_netlink_vport_init(&request);
|
|
||||||
+ request.cmd = OVS_VPORT_CMD_DEL;
|
|
||||||
+ request.dp_ifindex = dpif->dp_ifindex;
|
|
||||||
+ request.port_no = *port_nop;
|
|
||||||
+ dpif_netlink_vport_transact(&request, NULL, NULL);
|
|
||||||
+ nl_sock_destroy(socksp);
|
|
||||||
+ goto exit;
|
|
||||||
}
|
|
||||||
- free(socksp);
|
|
||||||
|
|
||||||
exit:
|
|
||||||
ofpbuf_delete(buf);
|
|
||||||
- free(upcall_pids);
|
|
||||||
|
|
||||||
return error;
|
|
||||||
}
|
|
||||||
@@ -1131,7 +991,7 @@ dpif_netlink_port_query_by_name(const struct dpif *dpif_, const char *devname,
|
|
||||||
|
|
||||||
static uint32_t
|
|
||||||
dpif_netlink_port_get_pid__(const struct dpif_netlink *dpif,
|
|
||||||
- odp_port_t port_no, uint32_t hash)
|
|
||||||
+ odp_port_t port_no, uint32_t hash OVS_UNUSED)
|
|
||||||
OVS_REQ_RDLOCK(dpif->upcall_lock)
|
|
||||||
{
|
|
||||||
uint32_t port_idx = odp_to_u32(port_no);
|
|
||||||
@@ -1141,14 +1001,13 @@ dpif_netlink_port_get_pid__(const struct dpif_netlink *dpif,
|
|
||||||
/* The ODPP_NONE "reserved" port number uses the "ovs-system"'s
|
|
||||||
* channel, since it is not heavily loaded. */
|
|
||||||
uint32_t idx = port_idx >= dpif->uc_array_size ? 0 : port_idx;
|
|
||||||
- struct dpif_handler *h = &dpif->handlers[hash % dpif->n_handlers];
|
|
||||||
|
|
||||||
/* Needs to check in case the socket pointer is changed in between
|
|
||||||
* the holding of upcall_lock. A known case happens when the main
|
|
||||||
* thread deletes the vport while the handler thread is handling
|
|
||||||
* the upcall from that port. */
|
|
||||||
- if (h->channels[idx].sock) {
|
|
||||||
- pid = nl_sock_pid(h->channels[idx].sock);
|
|
||||||
+ if (dpif->channels[idx].sock) {
|
|
||||||
+ pid = nl_sock_pid(dpif->channels[idx].sock);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@@ -2382,42 +2241,40 @@ dpif_netlink_refresh_channels(struct dpif_netlink *dpif, uint32_t n_handlers)
|
|
||||||
dpif_netlink_port_dump_start__(dpif, &dump);
|
|
||||||
while (!dpif_netlink_port_dump_next__(dpif, &dump, &vport, &buf)) {
|
|
||||||
uint32_t port_no = odp_to_u32(vport.port_no);
|
|
||||||
- uint32_t *upcall_pids = NULL;
|
|
||||||
+ uint32_t upcall_pid;
|
|
||||||
int error;
|
|
||||||
|
|
||||||
if (port_no >= dpif->uc_array_size
|
|
||||||
- || !vport_get_pids(dpif, port_no, &upcall_pids)) {
|
|
||||||
- struct nl_sock **socksp = vport_create_socksp(dpif, &error);
|
|
||||||
+ || !vport_get_pid(dpif, port_no, &upcall_pid)) {
|
|
||||||
+ struct nl_sock *socksp;
|
|
||||||
|
|
||||||
- if (!socksp) {
|
|
||||||
+ if (nl_sock_create(NETLINK_GENERIC, &socksp)) {
|
|
||||||
goto error;
|
|
||||||
}
|
|
||||||
|
|
||||||
- error = vport_add_channels(dpif, vport.port_no, socksp);
|
|
||||||
+ error = vport_add_channel(dpif, vport.port_no, socksp);
|
|
||||||
if (error) {
|
|
||||||
VLOG_INFO("%s: could not add channels for port %s",
|
|
||||||
dpif_name(&dpif->dpif), vport.name);
|
|
||||||
- vport_del_socksp(dpif, socksp);
|
|
||||||
+ nl_sock_destroy(socksp);
|
|
||||||
retval = error;
|
|
||||||
goto error;
|
|
||||||
}
|
|
||||||
- upcall_pids = vport_socksp_to_pids(socksp, dpif->n_handlers);
|
|
||||||
- free(socksp);
|
|
||||||
+ upcall_pid = nl_sock_pid(socksp);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Configure the vport to deliver misses to 'sock'. */
|
|
||||||
if (vport.upcall_pids[0] == 0
|
|
||||||
- || vport.n_upcall_pids != dpif->n_handlers
|
|
||||||
- || memcmp(upcall_pids, vport.upcall_pids, n_handlers * sizeof
|
|
||||||
- *upcall_pids)) {
|
|
||||||
+ || vport.n_upcall_pids != 1
|
|
||||||
+ || upcall_pid != vport.upcall_pids[0]) {
|
|
||||||
struct dpif_netlink_vport vport_request;
|
|
||||||
|
|
||||||
dpif_netlink_vport_init(&vport_request);
|
|
||||||
vport_request.cmd = OVS_VPORT_CMD_SET;
|
|
||||||
vport_request.dp_ifindex = dpif->dp_ifindex;
|
|
||||||
vport_request.port_no = vport.port_no;
|
|
||||||
- vport_request.n_upcall_pids = dpif->n_handlers;
|
|
||||||
- vport_request.upcall_pids = upcall_pids;
|
|
||||||
+ vport_request.n_upcall_pids = 1;
|
|
||||||
+ vport_request.upcall_pids = &upcall_pid;
|
|
||||||
error = dpif_netlink_vport_transact(&vport_request, NULL, NULL);
|
|
||||||
if (error) {
|
|
||||||
VLOG_WARN_RL(&error_rl,
|
|
||||||
@@ -2438,11 +2295,9 @@ dpif_netlink_refresh_channels(struct dpif_netlink *dpif, uint32_t n_handlers)
|
|
||||||
if (port_no < keep_channels_nbits) {
|
|
||||||
bitmap_set1(keep_channels, port_no);
|
|
||||||
}
|
|
||||||
- free(upcall_pids);
|
|
||||||
continue;
|
|
||||||
|
|
||||||
error:
|
|
||||||
- free(upcall_pids);
|
|
||||||
vport_del_channels(dpif, vport.port_no);
|
|
||||||
}
|
|
||||||
nl_dump_done(&dump);
|
|
||||||
@@ -2701,7 +2556,7 @@ dpif_netlink_recv__(struct dpif_netlink *dpif, uint32_t handler_id,
|
|
||||||
|
|
||||||
while (handler->event_offset < handler->n_events) {
|
|
||||||
int idx = handler->epoll_events[handler->event_offset].data.u32;
|
|
||||||
- struct dpif_channel *ch = &dpif->handlers[handler_id].channels[idx];
|
|
||||||
+ struct dpif_channel *ch = &dpif->channels[idx];
|
|
||||||
|
|
||||||
handler->event_offset++;
|
|
||||||
|
|
||||||
@@ -2803,16 +2658,14 @@ dpif_netlink_recv_purge__(struct dpif_netlink *dpif)
|
|
||||||
OVS_REQ_WRLOCK(dpif->upcall_lock)
|
|
||||||
{
|
|
||||||
if (dpif->handlers) {
|
|
||||||
- size_t i, j;
|
|
||||||
+ size_t i;
|
|
||||||
|
|
||||||
+ if (!dpif->channels[0].sock) {
|
|
||||||
+ return;
|
|
||||||
+ }
|
|
||||||
for (i = 0; i < dpif->uc_array_size; i++ ) {
|
|
||||||
- if (!dpif->handlers[0].channels[i].sock) {
|
|
||||||
- continue;
|
|
||||||
- }
|
|
||||||
|
|
||||||
- for (j = 0; j < dpif->n_handlers; j++) {
|
|
||||||
- nl_sock_drain(dpif->handlers[j].channels[i].sock);
|
|
||||||
- }
|
|
||||||
+ nl_sock_drain(dpif->channels[i].sock);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
--
|
|
||||||
2.17.1
|
|
||||||
|
|
@ -0,0 +1,179 @@
|
|||||||
|
From 71981938b2db070c130ec717aab141cd9c0fa860 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Numan Siddique <nusiddiq@redhat.com>
|
||||||
|
Date: Tue, 6 Nov 2018 11:59:38 +0530
|
||||||
|
Subject: [PATCH] ovn-nbctl: Fix the ovn-nbctl test "LBs - daemon" which fails
|
||||||
|
during rpm build
|
||||||
|
|
||||||
|
When 'make check' is called by the mock rpm build (which disables networking),
|
||||||
|
the test "ovn-nbctl: LBs - daemon" fails when it runs the command
|
||||||
|
"ovn-nbctl lb-add lb0 30.0.0.1a 192.168.10.10:80,192.168.10.20:80". ovn-nbctl
|
||||||
|
extracts the vip by calling the socket util function 'inet_parse_active()',
|
||||||
|
and this function blocks when libunbound function ub_resolve() is called
|
||||||
|
further down. ub_resolve() is a blocking function without timeout and all the
|
||||||
|
ovs/ovn utilities use this function.
|
||||||
|
|
||||||
|
As reported by Timothy Redaelli, the issue can also be reproduced by running
|
||||||
|
the below commands
|
||||||
|
|
||||||
|
$ sudo unshare -mn -- sh -c 'ip addr add dev lo 127.0.0.1 && \
|
||||||
|
mount --bind /dev/null /etc/resolv.conf && runuser $SUDO_USER'
|
||||||
|
$ make sandbox SANDBOXFLAGS="--ovn"
|
||||||
|
$ ovn-nbctl -vsocket_util:off lb-add lb0 30.0.0.1a \
|
||||||
|
192.168.10.10:80,192.168.10.20:80
|
||||||
|
|
||||||
|
To address this issue, this patch adds a new bool argument 'resolve_host' to
|
||||||
|
the function inet_parse_active() to resolve the host only if it is 'true'.
|
||||||
|
|
||||||
|
ovn-nbctl/ovn-northd will pass 'false' when it calls this function to parse
|
||||||
|
the load balancer values.
|
||||||
|
|
||||||
|
Reported-by: Timothy Redaelli <tredaelli@redhat.com>
|
||||||
|
Reported-at: https://bugzilla.redhat.com/show_bug.cgi?id=1641672
|
||||||
|
Signed-off-by: Numan Siddique <nusiddiq@redhat.com>
|
||||||
|
Signed-off-by: Ben Pfaff <blp@ovn.org>
|
||||||
|
---
|
||||||
|
lib/socket-util.c | 7 ++++---
|
||||||
|
lib/socket-util.h | 2 +-
|
||||||
|
lib/stream.c | 2 +-
|
||||||
|
ofproto/ofproto-dpif-sflow.c | 2 +-
|
||||||
|
ovn/northd/ovn-northd.c | 2 +-
|
||||||
|
ovn/utilities/ovn-nbctl.c | 6 +++---
|
||||||
|
ovsdb/raft-private.c | 2 +-
|
||||||
|
7 files changed, 12 insertions(+), 11 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/lib/socket-util.c b/lib/socket-util.c
|
||||||
|
index 504f4cd59..5f82e89c1 100644
|
||||||
|
--- a/lib/socket-util.c
|
||||||
|
+++ b/lib/socket-util.c
|
||||||
|
@@ -518,12 +518,13 @@ exit:
|
||||||
|
* is optional and defaults to 'default_port' (use 0 to make the kernel choose
|
||||||
|
* an available port, although this isn't usually appropriate for active
|
||||||
|
* connections). If 'default_port' is negative, then <port> is required.
|
||||||
|
+ * It resolves the host if 'resolve_host' is true.
|
||||||
|
*
|
||||||
|
* On success, returns true and stores the parsed remote address into '*ss'.
|
||||||
|
* On failure, logs an error, stores zeros into '*ss', and returns false. */
|
||||||
|
bool
|
||||||
|
inet_parse_active(const char *target_, int default_port,
|
||||||
|
- struct sockaddr_storage *ss)
|
||||||
|
+ struct sockaddr_storage *ss, bool resolve_host)
|
||||||
|
{
|
||||||
|
char *target = xstrdup(target_);
|
||||||
|
char *port, *host;
|
||||||
|
@@ -538,7 +539,7 @@ inet_parse_active(const char *target_, int default_port,
|
||||||
|
ok = false;
|
||||||
|
} else {
|
||||||
|
ok = parse_sockaddr_components(ss, host, port, default_port,
|
||||||
|
- target_, true);
|
||||||
|
+ target_, resolve_host);
|
||||||
|
}
|
||||||
|
if (!ok) {
|
||||||
|
memset(ss, 0, sizeof *ss);
|
||||||
|
@@ -575,7 +576,7 @@ inet_open_active(int style, const char *target, int default_port,
|
||||||
|
int error;
|
||||||
|
|
||||||
|
/* Parse. */
|
||||||
|
- if (!inet_parse_active(target, default_port, &ss)) {
|
||||||
|
+ if (!inet_parse_active(target, default_port, &ss, true)) {
|
||||||
|
error = EAFNOSUPPORT;
|
||||||
|
goto exit;
|
||||||
|
}
|
||||||
|
diff --git a/lib/socket-util.h b/lib/socket-util.h
|
||||||
|
index 6d386304d..a65433d90 100644
|
||||||
|
--- a/lib/socket-util.h
|
||||||
|
+++ b/lib/socket-util.h
|
||||||
|
@@ -49,7 +49,7 @@ ovs_be32 guess_netmask(ovs_be32 ip);
|
||||||
|
void inet_parse_host_port_tokens(char *s, char **hostp, char **portp);
|
||||||
|
void inet_parse_port_host_tokens(char *s, char **portp, char **hostp);
|
||||||
|
bool inet_parse_active(const char *target, int default_port,
|
||||||
|
- struct sockaddr_storage *ssp);
|
||||||
|
+ struct sockaddr_storage *ssp, bool resolve_host);
|
||||||
|
int inet_open_active(int style, const char *target, int default_port,
|
||||||
|
struct sockaddr_storage *ssp, int *fdp, uint8_t dscp);
|
||||||
|
|
||||||
|
diff --git a/lib/stream.c b/lib/stream.c
|
||||||
|
index 4e15fe0c8..c4dabda39 100644
|
||||||
|
--- a/lib/stream.c
|
||||||
|
+++ b/lib/stream.c
|
||||||
|
@@ -751,7 +751,7 @@ stream_parse_target_with_default_port(const char *target, int default_port,
|
||||||
|
struct sockaddr_storage *ss)
|
||||||
|
{
|
||||||
|
return ((!strncmp(target, "tcp:", 4) || !strncmp(target, "ssl:", 4))
|
||||||
|
- && inet_parse_active(target + 4, default_port, ss));
|
||||||
|
+ && inet_parse_active(target + 4, default_port, ss, true));
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Attempts to guess the content type of a stream whose first few bytes were
|
||||||
|
diff --git a/ofproto/ofproto-dpif-sflow.c b/ofproto/ofproto-dpif-sflow.c
|
||||||
|
index 62a09b5d1..7da31753c 100644
|
||||||
|
--- a/ofproto/ofproto-dpif-sflow.c
|
||||||
|
+++ b/ofproto/ofproto-dpif-sflow.c
|
||||||
|
@@ -468,7 +468,7 @@ sflow_choose_agent_address(const char *agent_device,
|
||||||
|
const char *target;
|
||||||
|
SSET_FOR_EACH (target, targets) {
|
||||||
|
struct sockaddr_storage ss;
|
||||||
|
- if (inet_parse_active(target, SFL_DEFAULT_COLLECTOR_PORT, &ss)) {
|
||||||
|
+ if (inet_parse_active(target, SFL_DEFAULT_COLLECTOR_PORT, &ss, true)) {
|
||||||
|
/* sFlow only supports target in default routing table with
|
||||||
|
* packet mark zero.
|
||||||
|
*/
|
||||||
|
diff --git a/ovn/northd/ovn-northd.c b/ovn/northd/ovn-northd.c
|
||||||
|
index 5e61708be..d59fc45ca 100644
|
||||||
|
--- a/ovn/northd/ovn-northd.c
|
||||||
|
+++ b/ovn/northd/ovn-northd.c
|
||||||
|
@@ -3204,7 +3204,7 @@ ip_address_and_port_from_lb_key(const char *key, char **ip_address,
|
||||||
|
uint16_t *port, int *addr_family)
|
||||||
|
{
|
||||||
|
struct sockaddr_storage ss;
|
||||||
|
- if (!inet_parse_active(key, 0, &ss)) {
|
||||||
|
+ if (!inet_parse_active(key, 0, &ss, false)) {
|
||||||
|
static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
|
||||||
|
VLOG_WARN_RL(&rl, "bad ip address or port for load balancer key %s",
|
||||||
|
key);
|
||||||
|
diff --git a/ovn/utilities/ovn-nbctl.c b/ovn/utilities/ovn-nbctl.c
|
||||||
|
index 42aac2251..09bbcf76a 100644
|
||||||
|
--- a/ovn/utilities/ovn-nbctl.c
|
||||||
|
+++ b/ovn/utilities/ovn-nbctl.c
|
||||||
|
@@ -2553,7 +2553,7 @@ nbctl_lb_add(struct ctl_context *ctx)
|
||||||
|
}
|
||||||
|
|
||||||
|
struct sockaddr_storage ss_vip;
|
||||||
|
- if (!inet_parse_active(lb_vip, 0, &ss_vip)) {
|
||||||
|
+ if (!inet_parse_active(lb_vip, 0, &ss_vip, false)) {
|
||||||
|
ctl_error(ctx, "%s: should be an IP address (or an IP address "
|
||||||
|
"and a port number with : as a separator).", lb_vip);
|
||||||
|
return;
|
||||||
|
@@ -2583,7 +2583,7 @@ nbctl_lb_add(struct ctl_context *ctx)
|
||||||
|
struct sockaddr_storage ss_dst;
|
||||||
|
|
||||||
|
if (lb_vip_port) {
|
||||||
|
- if (!inet_parse_active(token, -1, &ss_dst)) {
|
||||||
|
+ if (!inet_parse_active(token, -1, &ss_dst, false)) {
|
||||||
|
ctl_error(ctx, "%s: should be an IP address and a port "
|
||||||
|
"number with : as a separator.", token);
|
||||||
|
goto out;
|
||||||
|
@@ -2702,7 +2702,7 @@ lb_info_add_smap(const struct nbrec_load_balancer *lb,
|
||||||
|
const struct smap_node *node = nodes[i];
|
||||||
|
|
||||||
|
struct sockaddr_storage ss;
|
||||||
|
- if (!inet_parse_active(node->key, 0, &ss)) {
|
||||||
|
+ if (!inet_parse_active(node->key, 0, &ss, false)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
diff --git a/ovsdb/raft-private.c b/ovsdb/raft-private.c
|
||||||
|
index 07996e35b..e5e2c29cf 100644
|
||||||
|
--- a/ovsdb/raft-private.c
|
||||||
|
+++ b/ovsdb/raft-private.c
|
||||||
|
@@ -33,7 +33,7 @@ raft_address_validate(const char *address)
|
||||||
|
return NULL;
|
||||||
|
} else if (!strncmp(address, "ssl:", 4) || !strncmp(address, "tcp:", 4)) {
|
||||||
|
struct sockaddr_storage ss;
|
||||||
|
- if (!inet_parse_active(address + 4, -1, &ss)) {
|
||||||
|
+ if (!inet_parse_active(address + 4, -1, &ss, true)) {
|
||||||
|
return ovsdb_error(NULL, "%s: syntax error in address", address);
|
||||||
|
}
|
||||||
|
return NULL;
|
||||||
|
--
|
||||||
|
2.19.1
|
||||||
|
|
@ -1,70 +0,0 @@
|
|||||||
From 72bb6af9f31f3d6a000a7f22f9a82939119f63af Mon Sep 17 00:00:00 2001
|
|
||||||
From: Justin Pettit <jpettit@ovn.org>
|
|
||||||
Date: Wed, 5 Sep 2018 16:51:09 -0700
|
|
||||||
Subject: [PATCH] ovn.at: Skip ACL rate-limiting test on slow/overloaded
|
|
||||||
systems.
|
|
||||||
|
|
||||||
In ACL rate-limiting test, we send three sets of 100 packets. One of
|
|
||||||
the sets drops packets at a rate of 10 per second, one at a rate of 5
|
|
||||||
per second, and one not at all. On my setup, it takes roughly 0.67
|
|
||||||
seconds to send those 300 packets, but we have reports of it taking over
|
|
||||||
15 seconds on others. The test was intended to allow some flexibility
|
|
||||||
in run-time, but it's very difficult to design a mechanism that can all
|
|
||||||
possibilities.
|
|
||||||
|
|
||||||
To prevent false test failures, this patch changes the test to check
|
|
||||||
the duration count of the meter, and if it's greater than nine seconds,
|
|
||||||
just skip the test.
|
|
||||||
|
|
||||||
Signed-off-by: Justin Pettit <jpettit@ovn.org>
|
|
||||||
Reported-by: Thomas Goirand <zigo@debian.org>
|
|
||||||
---
|
|
||||||
tests/ovn.at | 21 +++++++++++++++------
|
|
||||||
1 file changed, 15 insertions(+), 6 deletions(-)
|
|
||||||
|
|
||||||
diff --git a/tests/ovn.at b/tests/ovn.at
|
|
||||||
index 031b6ada0..61b4f8924 100644
|
|
||||||
--- a/tests/ovn.at
|
|
||||||
+++ b/tests/ovn.at
|
|
||||||
@@ -6500,15 +6500,25 @@ for i in `seq 1 100`; do
|
|
||||||
ovs-appctl netdev-dummy/receive lp1 'in_port(1),eth(src=f0:00:00:00:00:01,dst=f0:00:00:00:00:02),eth_type(0x0800),ipv4(src=192.168.1.2,dst=192.168.1.3,proto=6,tos=0,ttl=64,frag=no),tcp(src=7777,dst=82)'
|
|
||||||
done
|
|
||||||
|
|
||||||
+# The rate at which packets are sent is highly system-dependent, so we
|
|
||||||
+# can't count on precise drop counts. To work around that, we just
|
|
||||||
+# check that exactly 100 "http-acl3" actions were logged and that there
|
|
||||||
+# were more "http-acl1" actions than "http-acl2" ones.
|
|
||||||
+OVS_WAIT_UNTIL([ test 100 = $(grep -c 'http-acl3' hv/ovn-controller.log) ])
|
|
||||||
+
|
|
||||||
+# On particularly slow or overloaded systems, the transmission rate may
|
|
||||||
+# be lower than the configured meter rate. To prevent false test
|
|
||||||
+# failures, we check the duration count of the meter, and if it's
|
|
||||||
+# greater than nine seconds, just skip the test.
|
|
||||||
+d_secs=$(as hv ovs-ofctl -O OpenFlow13 meter-stats br-int | grep "meter:1" | sed 's/.* duration:\([[0-9]]\{1,\}\)\.[[0-9]]\+s .*/\1/')
|
|
||||||
+
|
|
||||||
+echo "Meter duration: $d_secs"
|
|
||||||
+AT_SKIP_IF([test $d_secs -gt 9])
|
|
||||||
+
|
|
||||||
# Print some information that may help debugging.
|
|
||||||
as hv ovs-appctl -t ovn-controller meter-table-list
|
|
||||||
as hv ovs-ofctl -O OpenFlow13 meter-stats br-int
|
|
||||||
|
|
||||||
-# The userspace meter implementation doesn't precisely enforce counts,
|
|
||||||
-# so we just check that exactly 100 "http-acl3" actions were logged and
|
|
||||||
-# that there were more "http-acl1" actions than "http-acl2" ones.
|
|
||||||
-OVS_WAIT_UNTIL([ test 100 = $(grep -c 'http-acl3' hv/ovn-controller.log) ])
|
|
||||||
-
|
|
||||||
n_acl1=$(grep -c 'http-acl1' hv/ovn-controller.log)
|
|
||||||
n_acl2=$(grep -c 'http-acl2' hv/ovn-controller.log)
|
|
||||||
n_acl3=$(grep -c 'http-acl3' hv/ovn-controller.log)
|
|
||||||
@@ -6516,7 +6526,6 @@ n_acl3=$(grep -c 'http-acl3' hv/ovn-controller.log)
|
|
||||||
AT_CHECK([ test $n_acl3 -gt $n_acl1 ], [0], [])
|
|
||||||
AT_CHECK([ test $n_acl1 -gt $n_acl2 ], [0], [])
|
|
||||||
|
|
||||||
-
|
|
||||||
OVN_CLEANUP([hv])
|
|
||||||
AT_CLEANUP
|
|
||||||
|
|
||||||
--
|
|
||||||
2.17.1
|
|
||||||
|
|
@ -1,38 +0,0 @@
|
|||||||
From 949758946767ff79b4c3eb5eca755c6cf21643e3 Mon Sep 17 00:00:00 2001
|
|
||||||
From: Timothy Redaelli <tredaelli@redhat.com>
|
|
||||||
Date: Sun, 9 Sep 2018 14:20:02 +0200
|
|
||||||
Subject: [PATCH] ovs-save: Don't always include the default flow during
|
|
||||||
restore
|
|
||||||
|
|
||||||
Currently the default flow (actions=NORMAL) is present in the flow table after
|
|
||||||
the flow table is restored also when the default flow is removed.
|
|
||||||
|
|
||||||
This commit changes the behaviour of the "ovs-save save-flows" command to use
|
|
||||||
"replace-flows" instead of "add-flows" to restore the flows. This is needed in
|
|
||||||
order to always have the new flow table as it was before restoring it.
|
|
||||||
|
|
||||||
Reported-by: Flavio Leitner <fbl@sysclose.org>
|
|
||||||
Reported-at: https://bugzilla.redhat.com/show_bug.cgi?id=1626096
|
|
||||||
Signed-off-by: Timothy Redaelli <tredaelli@redhat.com>
|
|
||||||
Acked-by: Flavio Leitner <fbl@sysclose.org>
|
|
||||||
Signed-off-by: Gurucharan Shetty <guru@ovn.org>
|
|
||||||
---
|
|
||||||
utilities/ovs-save | 2 +-
|
|
||||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
|
||||||
|
|
||||||
diff --git a/utilities/ovs-save b/utilities/ovs-save
|
|
||||||
index ea8fb6a45..2294583d6 100755
|
|
||||||
--- a/utilities/ovs-save
|
|
||||||
+++ b/utilities/ovs-save
|
|
||||||
@@ -121,7 +121,7 @@ save_flows () {
|
|
||||||
cnt++;printf "{class="$1",type="$2",len="$3"}->"$4}'
|
|
||||||
echo "'"
|
|
||||||
|
|
||||||
- printf "%s" "ovs-ofctl -O $ofp_version add-flows ${bridge} " \
|
|
||||||
+ printf "%s" "ovs-ofctl -O $ofp_version replace-flows ${bridge} " \
|
|
||||||
"\"$workdir/$bridge.flows.dump\""
|
|
||||||
|
|
||||||
# If possible, use OpenFlow 1.4 atomic bundle transaction to add flows
|
|
||||||
--
|
|
||||||
2.17.1
|
|
||||||
|
|
@ -1,41 +0,0 @@
|
|||||||
Date: Sun, 2 Sep 2018 09:30:43 -0700
|
|
||||||
From: Ben Pfaff <blp@ovn.org>
|
|
||||||
To: dev@openvswitch.org
|
|
||||||
Cc: Ben Pfaff <blp@ovn.org>
|
|
||||||
Subject: [ovs-dev] [PATCH] ofproto-dpif-xlate: Fix translation of groups with
|
|
||||||
no buckets.
|
|
||||||
Message-Id: <20180902163043.11210-1-blp@ovn.org>
|
|
||||||
List-Id: <ovs-dev.openvswitch.org>
|
|
||||||
X-Bogosity: Unsure, tests=bogofilter, spamicity=0.500000, version=1.2.4
|
|
||||||
|
|
||||||
A group can have no buckets, in which case ovs_list_back() assert-fails.
|
|
||||||
This fixes the problem.
|
|
||||||
|
|
||||||
Found by OFTest.
|
|
||||||
|
|
||||||
Fixes: a04e58881e25 ("ofproto-dpif-xlate: Simplify translation for groups.")
|
|
||||||
Signed-off-by: Ben Pfaff <blp@ovn.org>
|
|
||||||
---
|
|
||||||
ofproto/ofproto-dpif-xlate.c | 2 +-
|
|
||||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
|
||||||
|
|
||||||
diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c
|
|
||||||
index e26f6c8f554a..507e14dd0d00 100644
|
|
||||||
--- a/ofproto/ofproto-dpif-xlate.c
|
|
||||||
+++ b/ofproto/ofproto-dpif-xlate.c
|
|
||||||
@@ -4488,7 +4488,7 @@ xlate_group_action__(struct xlate_ctx *ctx, struct group_dpif *group,
|
|
||||||
bool is_last_action)
|
|
||||||
{
|
|
||||||
if (group->up.type == OFPGT11_ALL || group->up.type == OFPGT11_INDIRECT) {
|
|
||||||
- struct ovs_list *last_bucket = ovs_list_back(&group->up.buckets);
|
|
||||||
+ struct ovs_list *last_bucket = group->up.buckets.prev;
|
|
||||||
struct ofputil_bucket *bucket;
|
|
||||||
LIST_FOR_EACH (bucket, list_node, &group->up.buckets) {
|
|
||||||
bool is_last_bucket = &bucket->list_node == last_bucket;
|
|
||||||
--
|
|
||||||
2.16.1
|
|
||||||
|
|
||||||
_______________________________________________
|
|
||||||
dev mailing list
|
|
||||||
dev@openvswitch.org
|
|
||||||
https://mail.openvswitch.org/mailman/listinfo/ovs-dev
|
|
Loading…
Reference in new issue