diff --git a/.gitignore b/.gitignore
index a7384fa..4d0e77b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -32,3 +32,4 @@
/dpdk-17.11.tar.xz
/openvswitch-2.9.1.tar.gz
/openvswitch-2.9.2.tar.gz
+/openvswitch-2.10.0.tar.gz
diff --git a/0001-Add-ovs.compat-module-to-python-package.patch b/0001-Add-ovs.compat-module-to-python-package.patch
deleted file mode 100644
index b0b0733..0000000
--- a/0001-Add-ovs.compat-module-to-python-package.patch
+++ /dev/null
@@ -1,31 +0,0 @@
-From f18adea51cac4f40c50d59d7c001264a8ce83cb3 Mon Sep 17 00:00:00 2001
-From: Terry Wilson
-Date: Fri, 31 Aug 2018 13:40:54 -0500
-Subject: [PATCH] Add ovs.compat module to python package
-
-Signed-off-by: Terry Wilson
-Signed-off-by: Ben Pfaff
-Acked-by: Timothy Redaelli
-(cherry picked from commit 2360464d629de3acacabd960ffc02fbb5081028d)
-Signed-off-by: Ben Pfaff
----
- python/setup.py | 3 ++-
- 1 file changed, 2 insertions(+), 1 deletion(-)
-
-diff --git a/python/setup.py b/python/setup.py
-index 0e86834ea..b52657df3 100644
---- a/python/setup.py
-+++ b/python/setup.py
-@@ -63,7 +63,8 @@ setup_args = dict(
- url='http://www.openvswitch.org/',
- author='Open vSwitch',
- author_email='dev@openvswitch.org',
-- packages=['ovs', 'ovs.db', 'ovs.unixctl'],
-+ packages=['ovs', 'ovs.compat', 'ovs.compat.sortedcontainers',
-+ 'ovs.db', 'ovs.unixctl'],
- keywords=['openvswitch', 'ovs', 'OVSDB'],
- license='Apache 2.0',
- classifiers=[
---
-2.17.1
-
diff --git a/0001-OVN-add-CT_LB-action-to-ovn-trace.patch b/0001-OVN-add-CT_LB-action-to-ovn-trace.patch
new file mode 100644
index 0000000..a712bb8
--- /dev/null
+++ b/0001-OVN-add-CT_LB-action-to-ovn-trace.patch
@@ -0,0 +1,218 @@
+From b37f8c15ca6ee079541b0c02ee77ce9d392b18fc Mon Sep 17 00:00:00 2001
+Message-Id:
+In-Reply-To:
+References:
+From: Lorenzo Bianconi
+Date: Thu, 20 Sep 2018 16:46:02 +0200
+Subject: [PATCH] OVN: add CT_LB action to ovn-trace
+
+Add CT_LB action to ovn-trace utility in order to fix the
+following ovn-trace error if a load balancer rule is added to
+OVN configuration
+
+ct_next(ct_state=est|trk /* default (use --ct to customize) */) {
+ *** ct_lb action not implemented;
+};
+
+Add '--lb_dst' option in order to specify the ip address to use
+in VIP pool. If --lb_dst is not provided the destination ip will be
+randomly choosen
+
+Signed-off-by: Lorenzo Bianconi
+Signed-off-by: Ben Pfaff
+---
+ ovn/utilities/ovn-trace.8.xml | 18 ++++++-
+ ovn/utilities/ovn-trace.c | 98 +++++++++++++++++++++++++++++++++--
+ 2 files changed, 111 insertions(+), 5 deletions(-)
+
+--- a/ovn/utilities/ovn-trace.8.xml
++++ b/ovn/utilities/ovn-trace.8.xml
+@@ -253,9 +253,17 @@
+ ct_snat
) action.
+
+
+- ct_lb
++ ct_lb;
++ ct_lb(ip
[:port
]...);
+
+- Not yet implemented; currently implemented as a no-op.
++ Forks the pipeline. In one fork, sets ip4.dst
(or
++ ip6.dst
) to one of the load-balancer addresses and the
++ destination port to its associated port, if any, and sets
++ ct.dnat
to 1. With one or more arguments, gives preference
++ to the address specified on --lb-dst
, if any; without
++ arguments, uses the address and port specified on --lb-dst
.
++ In the other fork, the pipeline continues without change after the
++ ct_lb
action.
+
+
+ ct_commit
+@@ -424,6 +432,12 @@
+
+
+
++ --lb-dst=
ip[:port
]
++
++ Sets the IP from VIP pool to use as destination of the packet.
++ --lb-dst
is not available in daemon mode.
++
++
+ --friendly-names
+ --no-friendly-names
+
+--- a/ovn/utilities/ovn-trace.c
++++ b/ovn/utilities/ovn-trace.c
+@@ -46,6 +46,7 @@
+ #include "stream.h"
+ #include "unixctl.h"
+ #include "util.h"
++#include "random.h"
+
+ VLOG_DEFINE_THIS_MODULE(ovntrace);
+
+@@ -77,6 +78,9 @@ static uint32_t *ct_states;
+ static size_t n_ct_states;
+ static size_t ct_state_idx;
+
++/* --lb-dst: load balancer destination info. */
++static struct ovnact_ct_lb_dst lb_dst;
++
+ /* --friendly-names, --no-friendly-names: Whether to substitute human-friendly
+ * port and datapath names for the awkward UUIDs typically used in the actual
+ * logical flows. */
+@@ -187,6 +191,24 @@ parse_ct_option(const char *state_s_)
+ }
+
+ static void
++parse_lb_option(const char *s)
++{
++ struct sockaddr_storage ss;
++ if (!inet_parse_active(s, 0, &ss)) {
++ ovs_fatal(0, "%s: bad address", s);
++ }
++
++ lb_dst.family = ss.ss_family;
++ struct in6_addr a = ss_get_address(&ss);
++ if (ss.ss_family == AF_INET) {
++ lb_dst.ipv4 = in6_addr_get_mapped_ipv4(&a);
++ } else {
++ lb_dst.ipv6 = a;
++ }
++ lb_dst.port = ss_get_port(&ss);
++}
++
++static void
+ parse_options(int argc, char *argv[])
+ {
+ enum {
+@@ -202,7 +224,8 @@ parse_options(int argc, char *argv[])
+ OPT_NO_FRIENDLY_NAMES,
+ DAEMON_OPTION_ENUMS,
+ SSL_OPTION_ENUMS,
+- VLOG_OPTION_ENUMS
++ VLOG_OPTION_ENUMS,
++ OPT_LB_DST
+ };
+ static const struct option long_options[] = {
+ {"db", required_argument, NULL, OPT_DB},
+@@ -217,6 +240,7 @@ parse_options(int argc, char *argv[])
+ {"no-friendly-names", no_argument, NULL, OPT_NO_FRIENDLY_NAMES},
+ {"help", no_argument, NULL, 'h'},
+ {"version", no_argument, NULL, 'V'},
++ {"lb-dst", required_argument, NULL, OPT_LB_DST},
+ DAEMON_LONG_OPTIONS,
+ VLOG_LONG_OPTIONS,
+ STREAM_SSL_LONG_OPTIONS,
+@@ -274,6 +298,10 @@ parse_options(int argc, char *argv[])
+ use_friendly_names = false;
+ break;
+
++ case OPT_LB_DST:
++ parse_lb_option(optarg);
++ break;
++
+ case 'h':
+ usage();
+
+@@ -1823,6 +1851,71 @@ execute_ct_nat(const struct ovnact_ct_na
+ }
+
+ static void
++execute_ct_lb(const struct ovnact_ct_lb *ct_lb,
++ const struct ovntrace_datapath *dp, struct flow *uflow,
++ enum ovnact_pipeline pipeline, struct ovs_list *super)
++{
++ struct flow ct_lb_flow = *uflow;
++
++ int family = (ct_lb_flow.dl_type == htons(ETH_TYPE_IP) ? AF_INET
++ : ct_lb_flow.dl_type == htons(ETH_TYPE_IPV6) ? AF_INET6
++ : AF_UNSPEC);
++ if (family != AF_UNSPEC) {
++ const struct ovnact_ct_lb_dst *dst = NULL;
++ if (ct_lb->n_dsts) {
++ /* For ct_lb with addresses, choose one of the addresses. */
++ int n = 0;
++ for (int i = 0; i < ct_lb->n_dsts; i++) {
++ const struct ovnact_ct_lb_dst *d = &ct_lb->dsts[i];
++ if (d->family != family) {
++ continue;
++ }
++
++ /* Check for the destination specified by --lb-dst, if any. */
++ if (lb_dst.family == family
++ && (family == AF_INET
++ ? d->ipv4 == lb_dst.ipv4
++ : ipv6_addr_equals(&d->ipv6, &lb_dst.ipv6))) {
++ lb_dst.family = AF_UNSPEC;
++ dst = d;
++ break;
++ }
++
++ /* Select a random destination as a fallback. */
++ if (!random_range(++n)) {
++ dst = d;
++ }
++ }
++
++ if (!dst) {
++ ovntrace_node_append(super, OVNTRACE_NODE_ERROR,
++ "*** no load balancing destination "
++ "(use --lb-dst)");
++ }
++ } else if (lb_dst.family == family) {
++ /* For ct_lb without addresses, use user-specified address. */
++ dst = &lb_dst;
++ }
++
++ if (dst) {
++ if (family == AF_INET6) {
++ ct_lb_flow.ipv6_dst = dst->ipv6;
++ } else {
++ ct_lb_flow.nw_dst = dst->ipv4;
++ }
++ if (dst->port) {
++ ct_lb_flow.tp_dst = htons(dst->port);
++ }
++ ct_lb_flow.ct_state |= CS_DST_NAT;
++ }
++ }
++
++ struct ovntrace_node *node = ovntrace_node_append(
++ super, OVNTRACE_NODE_TRANSFORMATION, "ct_lb");
++ trace__(dp, &ct_lb_flow, ct_lb->ltable, pipeline, &node->subs);
++}
++
++static void
+ execute_log(const struct ovnact_log *log, struct flow *uflow,
+ struct ovs_list *super)
+ {
+@@ -1910,8 +2003,7 @@ trace_actions(const struct ovnact *ovnac
+ break;
+
+ case OVNACT_CT_LB:
+- ovntrace_node_append(super, OVNTRACE_NODE_ERROR,
+- "*** ct_lb action not implemented");
++ execute_ct_lb(ovnact_get_CT_LB(a), dp, uflow, pipeline, super);
+ break;
+
+ case OVNACT_CT_CLEAR:
diff --git a/0001-dhparams-Fix-.c-file-generation-with-OpenSSL-1.1.1-p.patch b/0001-dhparams-Fix-.c-file-generation-with-OpenSSL-1.1.1-p.patch
deleted file mode 100644
index ee28d29..0000000
--- a/0001-dhparams-Fix-.c-file-generation-with-OpenSSL-1.1.1-p.patch
+++ /dev/null
@@ -1,38 +0,0 @@
-From 44343cb1ca4232f23dba24cab98d3605686f5700 Mon Sep 17 00:00:00 2001
-From: Timothy Redaelli
-Date: Fri, 7 Sep 2018 15:14:53 +0200
-Subject: [PATCH] dhparams: Fix .c file generation with OpenSSL >= 1.1.1-pre9
-
-Since OpenSSL upstream commit 201b305a2409
-("apps/dsaparam.c generates code that is intended to be pasted or included into
-an existing source file: the function is static, and the code doesn't include
-dsa.h. Match the generated C source style of dsaparam.") "openssl dhparam -C"
-generates the get_dh functions as static, but the functions are used inside
-stream-ssl.c and so the static keyword cannot be used.
-
-This commit removes the static keyword from the get_dh functions during
-dhparams.c file generation by restoring the current behaviour.
-
-Signed-off-by: Timothy Redaelli
-Signed-off-by: Ben Pfaff
-(cherry picked from commit dc041eae5019a936618c398a2a1d106f65604ccc)
----
- lib/automake.mk | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/lib/automake.mk b/lib/automake.mk
-index 8ecad1241..70461ec8c 100644
---- a/lib/automake.mk
-+++ b/lib/automake.mk
-@@ -438,7 +438,7 @@ lib/dhparams.c: lib/dh1024.pem lib/dh2048.pem lib/dh4096.pem
- openssl dhparam -C -in $(srcdir)/lib/dh1024.pem -noout && \
- openssl dhparam -C -in $(srcdir)/lib/dh2048.pem -noout && \
- openssl dhparam -C -in $(srcdir)/lib/dh4096.pem -noout) \
-- | sed 's/\(get_dh[0-9]*\)()/\1(void)/' > lib/dhparams.c.tmp && \
-+ | sed 's/^static DH/DH/; s/\(get_dh[0-9]*\)()/\1(void)/' > lib/dhparams.c.tmp && \
- mv lib/dhparams.c.tmp lib/dhparams.c
- else
- lib_libopenvswitch_la_SOURCES += lib/stream-nossl.c
---
-2.17.1
-
diff --git a/0001-dpif-Remove-support-for-multiple-queues-per-port.patch b/0001-dpif-Remove-support-for-multiple-queues-per-port.patch
new file mode 100644
index 0000000..f433271
--- /dev/null
+++ b/0001-dpif-Remove-support-for-multiple-queues-per-port.patch
@@ -0,0 +1,228 @@
+From 769b50349f28c5f9e4bff102bc61dadcb9b99c37 Mon Sep 17 00:00:00 2001
+From: Ben Pfaff
+Date: Tue, 25 Sep 2018 15:14:13 -0700
+Subject: [PATCH] dpif: Remove support for multiple queues per port.
+
+Commit 69c51582ff78 ("dpif-netlink: don't allocate per thread netlink
+sockets") removed dpif-netlink support for multiple queues per port.
+No remaining dpif provider supports multiple queues per port, so
+remove infrastructure for the feature.
+
+CC: Matteo Croce
+Signed-off-by: Ben Pfaff
+Tested-by: Yifeng Sun
+Reviewed-by: Yifeng Sun
+---
+ lib/dpif-netlink.c | 9 ++++-----
+ lib/dpif-provider.h | 14 ++------------
+ lib/dpif.c | 15 +++------------
+ lib/dpif.h | 15 +--------------
+ ofproto/ofproto-dpif-upcall.c | 7 +++----
+ ofproto/ofproto-dpif-xlate.c | 6 ++----
+ 6 files changed, 15 insertions(+), 51 deletions(-)
+
+diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c
+index 4736d21d4..21315033c 100644
+--- a/lib/dpif-netlink.c
++++ b/lib/dpif-netlink.c
+@@ -234,7 +234,7 @@ static bool ovs_tunnels_out_of_tree = true;
+ static int dpif_netlink_init(void);
+ static int open_dpif(const struct dpif_netlink_dp *, struct dpif **);
+ static uint32_t dpif_netlink_port_get_pid(const struct dpif *,
+- odp_port_t port_no, uint32_t hash);
++ odp_port_t port_no);
+ static void dpif_netlink_handler_uninit(struct dpif_handler *handler);
+ static int dpif_netlink_refresh_channels(struct dpif_netlink *,
+ uint32_t n_handlers);
+@@ -991,7 +991,7 @@ dpif_netlink_port_query_by_name(const struct dpif *dpif_, const char *devname,
+
+ static uint32_t
+ dpif_netlink_port_get_pid__(const struct dpif_netlink *dpif,
+- odp_port_t port_no, uint32_t hash OVS_UNUSED)
++ odp_port_t port_no)
+ OVS_REQ_RDLOCK(dpif->upcall_lock)
+ {
+ uint32_t port_idx = odp_to_u32(port_no);
+@@ -1015,14 +1015,13 @@ dpif_netlink_port_get_pid__(const struct dpif_netlink *dpif,
+ }
+
+ static uint32_t
+-dpif_netlink_port_get_pid(const struct dpif *dpif_, odp_port_t port_no,
+- uint32_t hash)
++dpif_netlink_port_get_pid(const struct dpif *dpif_, odp_port_t port_no)
+ {
+ const struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
+ uint32_t ret;
+
+ fat_rwlock_rdlock(&dpif->upcall_lock);
+- ret = dpif_netlink_port_get_pid__(dpif, port_no, hash);
++ ret = dpif_netlink_port_get_pid__(dpif, port_no);
+ fat_rwlock_unlock(&dpif->upcall_lock);
+
+ return ret;
+diff --git a/lib/dpif-provider.h b/lib/dpif-provider.h
+index debdafc42..eb3ee50a6 100644
+--- a/lib/dpif-provider.h
++++ b/lib/dpif-provider.h
+@@ -191,16 +191,7 @@ struct dpif_class {
+
+ /* Returns the Netlink PID value to supply in OVS_ACTION_ATTR_USERSPACE
+ * actions as the OVS_USERSPACE_ATTR_PID attribute's value, for use in
+- * flows whose packets arrived on port 'port_no'. In the case where the
+- * provider allocates multiple Netlink PIDs to a single port, it may use
+- * 'hash' to spread load among them. The caller need not use a particular
+- * hash function; a 5-tuple hash is suitable.
+- *
+- * (The datapath implementation might use some different hash function for
+- * distributing packets received via flow misses among PIDs. This means
+- * that packets received via flow misses might be reordered relative to
+- * packets received via userspace actions. This is not ordinarily a
+- * problem.)
++ * flows whose packets arrived on port 'port_no'.
+ *
+ * A 'port_no' of UINT32_MAX should be treated as a special case. The
+ * implementation should return a reserved PID, not allocated to any port,
+@@ -212,8 +203,7 @@ struct dpif_class {
+ *
+ * A dpif provider that doesn't have meaningful Netlink PIDs can use NULL
+ * for this function. This is equivalent to always returning 0. */
+- uint32_t (*port_get_pid)(const struct dpif *dpif, odp_port_t port_no,
+- uint32_t hash);
++ uint32_t (*port_get_pid)(const struct dpif *dpif, odp_port_t port_no);
+
+ /* Attempts to begin dumping the ports in a dpif. On success, returns 0
+ * and initializes '*statep' with any data needed for iteration. On
+diff --git a/lib/dpif.c b/lib/dpif.c
+index 85cf9000e..4697a4dcd 100644
+--- a/lib/dpif.c
++++ b/lib/dpif.c
+@@ -737,16 +737,7 @@ dpif_port_query_by_name(const struct dpif *dpif, const char *devname,
+
+ /* Returns the Netlink PID value to supply in OVS_ACTION_ATTR_USERSPACE
+ * actions as the OVS_USERSPACE_ATTR_PID attribute's value, for use in
+- * flows whose packets arrived on port 'port_no'. In the case where the
+- * provider allocates multiple Netlink PIDs to a single port, it may use
+- * 'hash' to spread load among them. The caller need not use a particular
+- * hash function; a 5-tuple hash is suitable.
+- *
+- * (The datapath implementation might use some different hash function for
+- * distributing packets received via flow misses among PIDs. This means
+- * that packets received via flow misses might be reordered relative to
+- * packets received via userspace actions. This is not ordinarily a
+- * problem.)
++ * flows whose packets arrived on port 'port_no'.
+ *
+ * A 'port_no' of ODPP_NONE is a special case: it returns a reserved PID, not
+ * allocated to any port, that the client may use for special purposes.
+@@ -757,10 +748,10 @@ dpif_port_query_by_name(const struct dpif *dpif, const char *devname,
+ * update all of the flows that it installed that contain
+ * OVS_ACTION_ATTR_USERSPACE actions. */
+ uint32_t
+-dpif_port_get_pid(const struct dpif *dpif, odp_port_t port_no, uint32_t hash)
++dpif_port_get_pid(const struct dpif *dpif, odp_port_t port_no)
+ {
+ return (dpif->dpif_class->port_get_pid
+- ? (dpif->dpif_class->port_get_pid)(dpif, port_no, hash)
++ ? (dpif->dpif_class->port_get_pid)(dpif, port_no)
+ : 0);
+ }
+
+diff --git a/lib/dpif.h b/lib/dpif.h
+index 8fdfe5f00..1a35cc410 100644
+--- a/lib/dpif.h
++++ b/lib/dpif.h
+@@ -274,18 +274,6 @@
+ *
+ * - Upcalls that specify the "special" Netlink PID are queued separately.
+ *
+- * Multiple threads may want to read upcalls simultaneously from a single
+- * datapath. To support multiple threads well, one extends the above preferred
+- * behavior:
+- *
+- * - Each port has multiple PIDs. The datapath distributes "miss" upcalls
+- * across the PIDs, ensuring that a given flow is mapped in a stable way
+- * to a single PID.
+- *
+- * - For "action" upcalls, the thread can specify its own Netlink PID or
+- * other threads' Netlink PID of the same port for offloading purpose
+- * (e.g. in a "round robin" manner).
+- *
+ *
+ * Packet Format
+ * =============
+@@ -470,8 +458,7 @@ int dpif_port_query_by_name(const struct dpif *, const char *devname,
+ struct dpif_port *);
+ int dpif_port_get_name(struct dpif *, odp_port_t port_no,
+ char *name, size_t name_size);
+-uint32_t dpif_port_get_pid(const struct dpif *, odp_port_t port_no,
+- uint32_t hash);
++uint32_t dpif_port_get_pid(const struct dpif *, odp_port_t port_no);
+
+ struct dpif_port_dump {
+ const struct dpif *dpif;
+diff --git a/ofproto/ofproto-dpif-upcall.c b/ofproto/ofproto-dpif-upcall.c
+index 62222079f..0cc964a7f 100644
+--- a/ofproto/ofproto-dpif-upcall.c
++++ b/ofproto/ofproto-dpif-upcall.c
+@@ -1021,7 +1021,6 @@ classify_upcall(enum dpif_upcall_type type, const struct nlattr *userdata,
+ * initialized with at least 128 bytes of space. */
+ static void
+ compose_slow_path(struct udpif *udpif, struct xlate_out *xout,
+- const struct flow *flow,
+ odp_port_t odp_in_port, ofp_port_t ofp_in_port,
+ struct ofpbuf *buf, uint32_t meter_id,
+ struct uuid *ofproto_uuid)
+@@ -1038,7 +1037,7 @@ compose_slow_path(struct udpif *udpif, struct xlate_out *xout,
+ port = xout->slow & (SLOW_CFM | SLOW_BFD | SLOW_LACP | SLOW_STP)
+ ? ODPP_NONE
+ : odp_in_port;
+- pid = dpif_port_get_pid(udpif->dpif, port, flow_hash_5tuple(flow, 0));
++ pid = dpif_port_get_pid(udpif->dpif, port);
+
+ size_t offset;
+ size_t ac_offset;
+@@ -1196,7 +1195,7 @@ upcall_xlate(struct udpif *udpif, struct upcall *upcall,
+ odp_actions->data, odp_actions->size);
+ } else {
+ /* upcall->put_actions already initialized by upcall_receive(). */
+- compose_slow_path(udpif, &upcall->xout, upcall->flow,
++ compose_slow_path(udpif, &upcall->xout,
+ upcall->flow->in_port.odp_port, upcall->ofp_in_port,
+ &upcall->put_actions,
+ upcall->ofproto->up.slowpath_meter_id,
+@@ -2155,7 +2154,7 @@ revalidate_ukey__(struct udpif *udpif, const struct udpif_key *ukey,
+ goto exit;
+ }
+
+- compose_slow_path(udpif, xoutp, &ctx.flow, ctx.flow.in_port.odp_port,
++ compose_slow_path(udpif, xoutp, ctx.flow.in_port.odp_port,
+ ofp_in_port, odp_actions,
+ ofproto->up.slowpath_meter_id, &ofproto->uuid);
+ }
+diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c
+index 6949595ba..f11f60468 100644
+--- a/ofproto/ofproto-dpif-xlate.c
++++ b/ofproto/ofproto-dpif-xlate.c
+@@ -3084,8 +3084,7 @@ compose_sample_action(struct xlate_ctx *ctx,
+
+ odp_port_t odp_port = ofp_port_to_odp_port(
+ ctx->xbridge, ctx->xin->flow.in_port.ofp_port);
+- uint32_t pid = dpif_port_get_pid(ctx->xbridge->dpif, odp_port,
+- flow_hash_5tuple(&ctx->xin->flow, 0));
++ uint32_t pid = dpif_port_get_pid(ctx->xbridge->dpif, odp_port);
+ size_t cookie_offset = odp_put_userspace_action(pid, cookie,
+ sizeof *cookie,
+ tunnel_out_port,
+@@ -4638,8 +4637,7 @@ put_controller_user_action(struct xlate_ctx *ctx,
+
+ odp_port_t odp_port = ofp_port_to_odp_port(ctx->xbridge,
+ ctx->xin->flow.in_port.ofp_port);
+- uint32_t pid = dpif_port_get_pid(ctx->xbridge->dpif, odp_port,
+- flow_hash_5tuple(&ctx->xin->flow, 0));
++ uint32_t pid = dpif_port_get_pid(ctx->xbridge->dpif, odp_port);
+ odp_put_userspace_action(pid, &cookie, sizeof cookie, ODPP_NONE,
+ false, ctx->odp_actions);
+ }
+--
+2.17.1
+
diff --git a/0001-dpif-netdev-Add-round-robin-based-rxq-to-pmd-assignm.patch b/0001-dpif-netdev-Add-round-robin-based-rxq-to-pmd-assignm.patch
new file mode 100644
index 0000000..e6a3f59
--- /dev/null
+++ b/0001-dpif-netdev-Add-round-robin-based-rxq-to-pmd-assignm.patch
@@ -0,0 +1,310 @@
+From 57ce73db12f6d3e980c0b285015c998183f26c8d Mon Sep 17 00:00:00 2001
+From: Kevin Traynor
+Date: Fri, 31 Aug 2018 09:47:55 +0100
+Subject: [PATCH] dpif-netdev: Add round-robin based rxq to pmd assignment.
+
+Prior to OVS 2.9 automatic assignment of Rxqs to PMDs
+(i.e. CPUs) was done by round-robin.
+
+That was changed in OVS 2.9 to ordering the Rxqs based on
+their measured processing cycles. This was to assign the
+busiest Rxqs to different PMDs, improving aggregate
+throughput.
+
+For the most part the new scheme should be better, but
+there could be situations where a user prefers a simple
+round-robin scheme because Rxqs from a single port are
+more likely to be spread across multiple PMDs, and/or
+traffic is very bursty/unpredictable.
+
+Add 'pmd-rxq-assign' config to allow a user to select
+round-robin based assignment.
+
+Signed-off-by: Kevin Traynor
+Acked-by: Eelco Chaudron
+Acked-by: Ilya Maximets
+Signed-off-by: Ian Stokes
+---
+ Documentation/topics/dpdk/pmd.rst | 33 +++++++++++++---
+ NEWS | 4 +-
+ lib/dpif-netdev.c | 83 +++++++++++++++++++++++++++++----------
+ tests/pmd.at | 12 +++++-
+ vswitchd/vswitch.xml | 24 +++++++++++
+ 5 files changed, 126 insertions(+), 30 deletions(-)
+
+diff --git a/Documentation/topics/dpdk/pmd.rst b/Documentation/topics/dpdk/pmd.rst
+index 5f0671e..dd9172d 100644
+--- a/Documentation/topics/dpdk/pmd.rst
++++ b/Documentation/topics/dpdk/pmd.rst
+@@ -113,10 +113,15 @@ means that this thread will only poll the *pinned* Rx queues.
+
+ If ``pmd-rxq-affinity`` is not set for Rx queues, they will be assigned to PMDs
+-(cores) automatically. Where known, the processing cycles that have been stored
+-for each Rx queue will be used to assign Rx queue to PMDs based on a round
+-robin of the sorted Rx queues. For example, take the following example, where
+-there are five Rx queues and three cores - 3, 7, and 8 - available and the
+-measured usage of core cycles per Rx queue over the last interval is seen to
+-be:
++(cores) automatically.
++
++The algorithm used to automatically assign Rxqs to PMDs can be set by::
++
++ $ ovs-vsctl set Open_vSwitch . other_config:pmd-rxq-assign=
++
++By default, ``cycles`` assignment is used where the Rxqs will be ordered by
++their measured processing cycles, and then be evenly assigned in descending
++order to PMDs based on an up/down walk of the PMDs. For example, where there
++are five Rx queues and three cores - 3, 7, and 8 - available and the measured
++usage of core cycles per Rx queue over the last interval is seen to be:
+
+ - Queue #0: 30%
+@@ -132,4 +137,20 @@ The Rx queues will be assigned to the cores in the following order::
+ Core 8: Q3 (60%) | Q0 (30%)
+
++Alternatively, ``roundrobin`` assignment can be used, where the Rxqs are
++assigned to PMDs in a round-robined fashion. This algorithm was used by
++default prior to OVS 2.9. For example, given the following ports and queues:
++
++- Port #0 Queue #0 (P0Q0)
++- Port #0 Queue #1 (P0Q1)
++- Port #1 Queue #0 (P1Q0)
++- Port #1 Queue #1 (P1Q1)
++- Port #1 Queue #2 (P1Q2)
++
++The Rx queues may be assigned to the cores in the following order::
++
++ Core 3: P0Q0 | P1Q1
++ Core 7: P0Q1 | P1Q2
++ Core 8: P1Q0 |
++
+ To see the current measured usage history of PMD core cycles for each Rx
+ queue::
+diff --git a/NEWS b/NEWS
+index 04de807..87da271 100644
+--- a/NEWS
++++ b/NEWS
+@@ -43,6 +43,8 @@
+ * Allow init to fail and record DPDK status/version in OVS database.
+ * Add experimental flow hardware offload support
+ * Support both shared and per port mempools for DPDK devices.
++ * Add option for simple round-robin based Rxq to PMD assignment.
++ It can be set with pmd-rxq-assign.
+ - Userspace datapath:
+ * Commands ovs-appctl dpif-netdev/pmd-*-show can now work on a single PMD
+ * Detailed PMD performance metrics available with new command
+
+diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
+index 52b5bc2..466d5ac 100644
+--- a/lib/dpif-netdev.c
++++ b/lib/dpif-netdev.c
+@@ -342,4 +342,6 @@ struct dp_netdev {
+ struct id_pool *tx_qid_pool;
+ struct ovs_mutex tx_qid_pool_mutex;
++ /* Use measured cycles for rxq to pmd assignment. */
++ bool pmd_rxq_assign_cyc;
+
+ /* Protects the access of the 'struct dp_netdev_pmd_thread'
+@@ -1493,4 +1495,5 @@ create_dp_netdev(const char *name, const struct dpif_class *class,
+
+ cmap_init(&dp->poll_threads);
++ dp->pmd_rxq_assign_cyc = true;
+
+ ovs_mutex_init(&dp->tx_qid_pool_mutex);
+@@ -3717,4 +3720,6 @@ dpif_netdev_set_config(struct dpif *dpif, const struct smap *other_config)
+ struct dp_netdev *dp = get_dp_netdev(dpif);
+ const char *cmask = smap_get(other_config, "pmd-cpu-mask");
++ const char *pmd_rxq_assign = smap_get_def(other_config, "pmd-rxq-assign",
++ "cycles");
+ unsigned long long insert_prob =
+ smap_get_ullong(other_config, "emc-insert-inv-prob",
+@@ -3779,4 +3784,18 @@ dpif_netdev_set_config(struct dpif *dpif, const struct smap *other_config)
+ }
+ }
++
++ bool pmd_rxq_assign_cyc = !strcmp(pmd_rxq_assign, "cycles");
++ if (!pmd_rxq_assign_cyc && strcmp(pmd_rxq_assign, "roundrobin")) {
++ VLOG_WARN("Unsupported Rxq to PMD assignment mode in pmd-rxq-assign. "
++ "Defaulting to 'cycles'.");
++ pmd_rxq_assign_cyc = true;
++ pmd_rxq_assign = "cycles";
++ }
++ if (dp->pmd_rxq_assign_cyc != pmd_rxq_assign_cyc) {
++ dp->pmd_rxq_assign_cyc = pmd_rxq_assign_cyc;
++ VLOG_INFO("Rxq to PMD assignment mode changed to: \'%s\'.",
++ pmd_rxq_assign);
++ dp_netdev_request_reconfigure(dp);
++ }
+ return 0;
+ }
+@@ -4249,8 +4268,16 @@ rr_numa_list_populate(struct dp_netdev *dp, struct rr_numa_list *rr)
+ }
+
+-/* Returns the next pmd from the numa node in
+- * incrementing or decrementing order. */
++/*
++ * Returns the next pmd from the numa node.
++ *
++ * If 'updown' is 'true' it will alternate between selecting the next pmd in
++ * either an up or down walk, switching between up/down when the first or last
++ * core is reached. e.g. 1,2,3,3,2,1,1,2...
++ *
++ * If 'updown' is 'false' it will select the next pmd wrapping around when last
++ * core reached. e.g. 1,2,3,1,2,3,1,2...
++ */
+ static struct dp_netdev_pmd_thread *
+-rr_numa_get_pmd(struct rr_numa *numa)
++rr_numa_get_pmd(struct rr_numa *numa, bool updown)
+ {
+ int numa_idx = numa->cur_index;
+@@ -4260,5 +4287,9 @@ rr_numa_get_pmd(struct rr_numa *numa)
+ if (numa->cur_index == numa->n_pmds-1) {
+ /* Reached the last pmd. */
+- numa->idx_inc = false;
++ if (updown) {
++ numa->idx_inc = false;
++ } else {
++ numa->cur_index = 0;
++ }
+ } else {
+ numa->cur_index++;
+@@ -4323,7 +4354,4 @@ compare_rxq_cycles(const void *a, const void *b)
+ * pmds to unpinned queues.
+ *
+- * If 'pinned' is false queues will be sorted by processing cycles they are
+- * consuming and then assigned to pmds in round robin order.
+- *
+ * The function doesn't touch the pmd threads, it just stores the assignment
+ * in the 'pmd' member of each rxq. */
+@@ -4338,4 +4366,5 @@ rxq_scheduling(struct dp_netdev *dp, bool pinned) OVS_REQUIRES(dp->port_mutex)
+ struct rr_numa *numa = NULL;
+ int numa_id;
++ bool assign_cyc = dp->pmd_rxq_assign_cyc;
+
+ HMAP_FOR_EACH (port, node, &dp->ports) {
+@@ -4368,10 +4397,13 @@ rxq_scheduling(struct dp_netdev *dp, bool pinned) OVS_REQUIRES(dp->port_mutex)
+ rxqs = xrealloc(rxqs, sizeof *rxqs * (n_rxqs + 1));
+ }
+- /* Sum the queue intervals and store the cycle history. */
+- for (unsigned i = 0; i < PMD_RXQ_INTERVAL_MAX; i++) {
+- cycle_hist += dp_netdev_rxq_get_intrvl_cycles(q, i);
+- }
+- dp_netdev_rxq_set_cycles(q, RXQ_CYCLES_PROC_HIST, cycle_hist);
+
++ if (assign_cyc) {
++ /* Sum the queue intervals and store the cycle history. */
++ for (unsigned i = 0; i < PMD_RXQ_INTERVAL_MAX; i++) {
++ cycle_hist += dp_netdev_rxq_get_intrvl_cycles(q, i);
++ }
++ dp_netdev_rxq_set_cycles(q, RXQ_CYCLES_PROC_HIST,
++ cycle_hist);
++ }
+ /* Store the queue. */
+ rxqs[n_rxqs++] = q;
+@@ -4380,5 +4412,5 @@ rxq_scheduling(struct dp_netdev *dp, bool pinned) OVS_REQUIRES(dp->port_mutex)
+ }
+
+- if (n_rxqs > 1) {
++ if (n_rxqs > 1 && assign_cyc) {
+ /* Sort the queues in order of the processing cycles
+ * they consumed during their last pmd interval. */
+@@ -4404,5 +4436,5 @@ rxq_scheduling(struct dp_netdev *dp, bool pinned) OVS_REQUIRES(dp->port_mutex)
+ continue;
+ }
+- rxqs[i]->pmd = rr_numa_get_pmd(non_local_numa);
++ rxqs[i]->pmd = rr_numa_get_pmd(non_local_numa, assign_cyc);
+ VLOG_WARN("There's no available (non-isolated) pmd thread "
+ "on numa node %d. Queue %d on port \'%s\' will "
+@@ -4413,11 +4445,20 @@ rxq_scheduling(struct dp_netdev *dp, bool pinned) OVS_REQUIRES(dp->port_mutex)
+ rxqs[i]->pmd->core_id, rxqs[i]->pmd->numa_id);
+ } else {
+- rxqs[i]->pmd = rr_numa_get_pmd(numa);
+- VLOG_INFO("Core %d on numa node %d assigned port \'%s\' "
+- "rx queue %d (measured processing cycles %"PRIu64").",
+- rxqs[i]->pmd->core_id, numa_id,
+- netdev_rxq_get_name(rxqs[i]->rx),
+- netdev_rxq_get_queue_id(rxqs[i]->rx),
+- dp_netdev_rxq_get_cycles(rxqs[i], RXQ_CYCLES_PROC_HIST));
++ rxqs[i]->pmd = rr_numa_get_pmd(numa, assign_cyc);
++ if (assign_cyc) {
++ VLOG_INFO("Core %d on numa node %d assigned port \'%s\' "
++ "rx queue %d "
++ "(measured processing cycles %"PRIu64").",
++ rxqs[i]->pmd->core_id, numa_id,
++ netdev_rxq_get_name(rxqs[i]->rx),
++ netdev_rxq_get_queue_id(rxqs[i]->rx),
++ dp_netdev_rxq_get_cycles(rxqs[i],
++ RXQ_CYCLES_PROC_HIST));
++ } else {
++ VLOG_INFO("Core %d on numa node %d assigned port \'%s\' "
++ "rx queue %d.", rxqs[i]->pmd->core_id, numa_id,
++ netdev_rxq_get_name(rxqs[i]->rx),
++ netdev_rxq_get_queue_id(rxqs[i]->rx));
++ }
+ }
+ }
+diff --git a/tests/pmd.at b/tests/pmd.at
+index 4cae6c8..1f952f3 100644
+--- a/tests/pmd.at
++++ b/tests/pmd.at
+@@ -62,5 +62,6 @@ m4_define([CHECK_PMD_THREADS_CREATED], [
+
+ m4_define([SED_NUMA_CORE_PATTERN], ["s/\(numa_id \)[[0-9]]*\( core_id \)[[0-9]]*:/\1\2:/"])
+-m4_define([SED_NUMA_CORE_QUEUE_PATTERN], ["s/1 2 5 6//;s/0 3 4 7//"])
++m4_define([SED_NUMA_CORE_QUEUE_CYC_PATTERN], ["s/1 2 5 6//;s/0 3 4 7//"])
++m4_define([SED_NUMA_CORE_QUEUE_PQ_PATTERN], ["s/1 3 5 7//;s/0 2 4 6//"])
+ m4_define([DUMMY_NUMA], [--dummy-numa="0,0,0,0"])
+
+@@ -146,9 +147,16 @@ pmd thread numa_id core_id :
+ ])
+
++AT_CHECK([ovs-vsctl set Open_vSwitch . other_config:pmd-rxq-assign=cycles])
+ TMP=$(cat ovs-vswitchd.log | wc -l | tr -d [[:blank:]])
+ AT_CHECK([ovs-vsctl set Open_vSwitch . other_config:pmd-cpu-mask=0x3])
+ CHECK_PMD_THREADS_CREATED([2], [], [+$TMP])
+
+-AT_CHECK([ovs-appctl dpif-netdev/pmd-rxq-show | sed ':a;/AVAIL$/{N;s/\n//;ba;}' | parse_pmd_rxq_show_group | sed SED_NUMA_CORE_QUEUE_PATTERN], [0], [dnl
++AT_CHECK([ovs-appctl dpif-netdev/pmd-rxq-show | sed ':a;/AVAIL$/{N;s/\n//;ba;}' | parse_pmd_rxq_show_group | sed SED_NUMA_CORE_QUEUE_CYC_PATTERN], [0], [dnl
++port: p0 queue-id:
++port: p0 queue-id:
++])
++
++AT_CHECK([ovs-vsctl set Open_vSwitch . other_config:pmd-rxq-assign=roundrobin])
++AT_CHECK([ovs-appctl dpif-netdev/pmd-rxq-show | sed ':a;/AVAIL$/{N;s/\n//;ba;}' | parse_pmd_rxq_show_group | sed SED_NUMA_CORE_QUEUE_PQ_PATTERN], [0], [dnl
+ port: p0 queue-id:
+ port: p0 queue-id:
+diff --git a/vswitchd/vswitch.xml b/vswitchd/vswitch.xml
+index e318151..91d132d 100644
+--- a/vswitchd/vswitch.xml
++++ b/vswitchd/vswitch.xml
+@@ -433,4 +433,28 @@
+
+
++
++
++ Specifies how RX queues will be automatically assigned to CPU cores.
++ Options:
++
++ cycles
++ - Rxqs will be sorted by order of measured processing cycles
++ before being assigned to CPU cores.
++ roundrobin
++ - Rxqs will be round-robined across CPU cores.
++
++
++
++ The default value is cycles
.
++
++
++ Changing this value will affect an automatic re-assignment of Rxqs to
++ CPUs. Note: Rxqs mapped to CPU cores with
++ pmd-rxq-affinity
are unaffected.
++
++
++
+
+--
+1.8.3.1
+
diff --git a/0001-dpif-netdev-Avoid-reordering-of-packets-in-a-batch-w.patch b/0001-dpif-netdev-Avoid-reordering-of-packets-in-a-batch-w.patch
new file mode 100644
index 0000000..34fe6a5
--- /dev/null
+++ b/0001-dpif-netdev-Avoid-reordering-of-packets-in-a-batch-w.patch
@@ -0,0 +1,357 @@
+From 9b4f08cdcaf253175edda088683bdd3db9e4c097 Mon Sep 17 00:00:00 2001
+From: Vishal Deep Ajmera
+Date: Fri, 27 Jul 2018 23:56:37 +0530
+Subject: [PATCH] dpif-netdev: Avoid reordering of packets in a batch with same
+ megaflow
+
+OVS reads packets in batches from a given port and packets in the
+batch are subjected to potentially 3 levels of lookups to identify
+the datapath megaflow entry (or flow) associated with the packet.
+Each megaflow entry has a dedicated buffer in which packets that match
+the flow classification criteria are collected. This buffer helps OVS
+perform batch processing for all packets associated with a given flow.
+
+Each packet in the received batch is first subjected to lookup in the
+Exact Match Cache (EMC). Each EMC entry will point to a flow. If the
+EMC lookup is successful, the packet is moved from the rx batch to the
+per-flow buffer.
+
+Packets that did not match any EMC entry are rearranged in the rx batch
+at the beginning and are now subjected to a lookup in the megaflow cache.
+Packets that match a megaflow cache entry are *appended* to the per-flow
+buffer.
+
+Packets that do not match any megaflow entry are subjected to slow-path
+processing through the upcall mechanism. This cannot change the order of
+packets as by definition upcall processing is only done for packets
+without matching megaflow entry.
+
+The EMC entry match fields encompass all potentially significant header
+fields, typically more than specified in the associated flow's match
+criteria. Hence, multiple EMC entries can point to the same flow. Given
+that per-flow batching happens at each lookup stage, packets belonging
+to the same megaflow can get re-ordered because some packets match EMC
+entries while others do not.
+
+The following example can illustrate the issue better. Consider
+following batch of packets (labelled P1 to P8) associated with a single
+TCP connection and associated with a single flow. Let us assume that
+packets with just the ACK bit set in TCP flags have been received in a
+prior batch also and a corresponding EMC entry exists.
+
+1. P1 (TCP Flag: ACK)
+2. P2 (TCP Flag: ACK)
+3. P3 (TCP Flag: ACK)
+4. P4 (TCP Flag: ACK, PSH)
+5. P5 (TCP Flag: ACK)
+6. P6 (TCP Flag: ACK)
+7. P7 (TCP Flag: ACK)
+8. P8 (TCP Flag: ACK)
+
+The megaflow classification criteria does not include TCP flags while
+the EMC match criteria does. Thus, all packets other than P4 match
+the existing EMC entry and are moved to the per-flow packet batch.
+Subsequently, packet P4 is moved to the same per-flow packet batch as
+a result of the megaflow lookup. Though the packets have all been
+correctly classified as being associated with the same flow, the
+packet order has not been preserved because of the per-flow batching
+performed during the EMC lookup stage. This packet re-ordering has
+performance implications for TCP applications.
+
+This patch preserves the packet ordering by performing the per-flow
+batching after both the EMC and megaflow lookups are complete. As an
+optimization, packets are flow-batched in emc processing till any
+packet in the batch has an EMC miss.
+
+A new flow map is maintained to keep the original order of packet
+along with flow information. Post fastpath processing, packets from
+flow map are *appended* to per-flow buffer.
+
+Signed-off-by: Vishal Deep Ajmera
+Co-authored-by: Venkatesan Pradeep
+Signed-off-by: Venkatesan Pradeep
+Signed-off-by: Ian Stokes
+---
+ lib/dpif-netdev.c | 125 +++++++++++++++++++++++++++++++++++++++-------
+ 1 file changed, 106 insertions(+), 19 deletions(-)
+
+diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
+index 7f836bb18..807a46250 100644
+--- a/lib/dpif-netdev.c
++++ b/lib/dpif-netdev.c
+@@ -244,6 +244,13 @@ struct dpcls_rule {
+ /* 'flow' must be the last field, additional space is allocated here. */
+ };
+
++/* Data structure to keep packet order till fastpath processing. */
++struct dp_packet_flow_map {
++ struct dp_packet *packet;
++ struct dp_netdev_flow *flow;
++ uint16_t tcp_flags;
++};
++
+ static void dpcls_init(struct dpcls *);
+ static void dpcls_destroy(struct dpcls *);
+ static void dpcls_sort_subtable_vector(struct dpcls *);
+@@ -5765,6 +5772,19 @@ dp_netdev_queue_batches(struct dp_packet *pkt,
+ packet_batch_per_flow_update(batch, pkt, tcp_flags);
+ }
+
++static inline void
++packet_enqueue_to_flow_map(struct dp_packet *packet,
++ struct dp_netdev_flow *flow,
++ uint16_t tcp_flags,
++ struct dp_packet_flow_map *flow_map,
++ size_t index)
++{
++ struct dp_packet_flow_map *map = &flow_map[index];
++ map->flow = flow;
++ map->packet = packet;
++ map->tcp_flags = tcp_flags;
++}
++
+ /* SMC lookup function for a batch of packets.
+ * By doing batching SMC lookup, we can use prefetch
+ * to hide memory access latency.
+@@ -5774,8 +5794,9 @@ smc_lookup_batch(struct dp_netdev_pmd_thread *pmd,
+ struct netdev_flow_key *keys,
+ struct netdev_flow_key **missed_keys,
+ struct dp_packet_batch *packets_,
+- struct packet_batch_per_flow batches[],
+- size_t *n_batches, const int cnt)
++ const int cnt,
++ struct dp_packet_flow_map *flow_map,
++ uint8_t *index_map)
+ {
+ int i;
+ struct dp_packet *packet;
+@@ -5783,6 +5804,8 @@ smc_lookup_batch(struct dp_netdev_pmd_thread *pmd,
+ struct dfc_cache *cache = &pmd->flow_cache;
+ struct smc_cache *smc_cache = &cache->smc_cache;
+ const struct cmap_node *flow_node;
++ int recv_idx;
++ uint16_t tcp_flags;
+
+ /* Prefetch buckets for all packets */
+ for (i = 0; i < cnt; i++) {
+@@ -5793,6 +5816,8 @@ smc_lookup_batch(struct dp_netdev_pmd_thread *pmd,
+ struct dp_netdev_flow *flow = NULL;
+ flow_node = smc_entry_get(pmd, keys[i].hash);
+ bool hit = false;
++ /* Get the original order of this packet in received batch. */
++ recv_idx = index_map[i];
+
+ if (OVS_LIKELY(flow_node != NULL)) {
+ CMAP_NODE_FOR_EACH (flow, node, flow_node) {
+@@ -5800,12 +5825,17 @@ smc_lookup_batch(struct dp_netdev_pmd_thread *pmd,
+ * number, we need to verify that the input ports match. */
+ if (OVS_LIKELY(dpcls_rule_matches_key(&flow->cr, &keys[i]) &&
+ flow->flow.in_port.odp_port == packet->md.in_port.odp_port)) {
++ tcp_flags = miniflow_get_tcp_flags(&keys[i].mf);
++
+ /* SMC hit and emc miss, we insert into EMC */
+ keys[i].len =
+ netdev_flow_key_size(miniflow_n_values(&keys[i].mf));
+ emc_probabilistic_insert(pmd, &keys[i], flow);
+- dp_netdev_queue_batches(packet, flow,
+- miniflow_get_tcp_flags(&keys[i].mf), batches, n_batches);
++ /* Add these packets into the flow map in the same order
++ * as received.
++ */
++ packet_enqueue_to_flow_map(packet, flow, tcp_flags,
++ flow_map, recv_idx);
+ n_smc_hit++;
+ hit = true;
+ break;
+@@ -5819,6 +5849,10 @@ smc_lookup_batch(struct dp_netdev_pmd_thread *pmd,
+ /* SMC missed. Group missed packets together at
+ * the beginning of the 'packets' array. */
+ dp_packet_batch_refill(packets_, packet, i);
++
++ /* Preserve the order of packet for flow batching. */
++ index_map[n_missed] = recv_idx;
++
+ /* Put missed keys to the pointer arrays return to the caller */
+ missed_keys[n_missed++] = &keys[i];
+ }
+@@ -5847,6 +5881,8 @@ dfc_processing(struct dp_netdev_pmd_thread *pmd,
+ struct netdev_flow_key *keys,
+ struct netdev_flow_key **missed_keys,
+ struct packet_batch_per_flow batches[], size_t *n_batches,
++ struct dp_packet_flow_map *flow_map,
++ size_t *n_flows, uint8_t *index_map,
+ bool md_is_valid, odp_port_t port_no)
+ {
+ struct netdev_flow_key *key = &keys[0];
+@@ -5858,6 +5894,8 @@ dfc_processing(struct dp_netdev_pmd_thread *pmd,
+ int i;
+ uint16_t tcp_flags;
+ bool smc_enable_db;
++ size_t map_cnt = 0;
++ bool batch_enable = true;
+
+ atomic_read_relaxed(&pmd->dp->smc_enable_db, &smc_enable_db);
+ atomic_read_relaxed(&pmd->dp->emc_insert_min, &cur_min);
+@@ -5888,10 +5926,19 @@ dfc_processing(struct dp_netdev_pmd_thread *pmd,
+ if ((*recirc_depth_get() == 0) &&
+ dp_packet_has_flow_mark(packet, &mark)) {
+ flow = mark_to_flow_find(pmd, mark);
+- if (flow) {
++ if (OVS_LIKELY(flow)) {
+ tcp_flags = parse_tcp_flags(packet);
+- dp_netdev_queue_batches(packet, flow, tcp_flags, batches,
+- n_batches);
++ if (OVS_LIKELY(batch_enable)) {
++ dp_netdev_queue_batches(packet, flow, tcp_flags, batches,
++ n_batches);
++ } else {
++ /* Flow batching should be performed only after fast-path
++ * processing is also completed for packets with emc miss
++ * or else it will result in reordering of packets with
++ * same datapath flows. */
++ packet_enqueue_to_flow_map(packet, flow, tcp_flags,
++ flow_map, map_cnt++);
++ }
+ continue;
+ }
+ }
+@@ -5914,13 +5961,27 @@ dfc_processing(struct dp_netdev_pmd_thread *pmd,
+ }
+ if (OVS_LIKELY(flow)) {
+ tcp_flags = miniflow_get_tcp_flags(&key->mf);
+- dp_netdev_queue_batches(packet, flow, tcp_flags, batches,
+- n_batches);
+ n_emc_hit++;
++ if (OVS_LIKELY(batch_enable)) {
++ dp_netdev_queue_batches(packet, flow, tcp_flags, batches,
++ n_batches);
++ } else {
++ /* Flow batching should be performed only after fast-path
++ * processing is also completed for packets with emc miss
++ * or else it will result in reordering of packets with
++ * same datapath flows. */
++ packet_enqueue_to_flow_map(packet, flow, tcp_flags,
++ flow_map, map_cnt++);
++ }
+ } else {
+ /* Exact match cache missed. Group missed packets together at
+ * the beginning of the 'packets' array. */
+ dp_packet_batch_refill(packets_, packet, i);
++
++ /* Preserve the order of packet for flow batching. */
++ index_map[n_missed] = map_cnt;
++ flow_map[map_cnt++].flow = NULL;
++
+ /* 'key[n_missed]' contains the key of the current packet and it
+ * will be passed to SMC lookup. The next key should be extracted
+ * to 'keys[n_missed + 1]'.
+@@ -5928,8 +5989,13 @@ dfc_processing(struct dp_netdev_pmd_thread *pmd,
+ * which will be returned to the caller for future processing. */
+ missed_keys[n_missed] = key;
+ key = &keys[++n_missed];
++
++ /* Skip batching for subsequent packets to avoid reordering. */
++ batch_enable = false;
+ }
+ }
++ /* Count of packets which are not flow batched. */
++ *n_flows = map_cnt;
+
+ pmd_perf_update_counter(&pmd->perf_stats, PMD_STAT_EXACT_HIT, n_emc_hit);
+
+@@ -5938,8 +6004,8 @@ dfc_processing(struct dp_netdev_pmd_thread *pmd,
+ }
+
+ /* Packets miss EMC will do a batch lookup in SMC if enabled */
+- smc_lookup_batch(pmd, keys, missed_keys, packets_, batches,
+- n_batches, n_missed);
++ smc_lookup_batch(pmd, keys, missed_keys, packets_,
++ n_missed, flow_map, index_map);
+
+ return dp_packet_batch_size(packets_);
+ }
+@@ -6026,8 +6092,8 @@ static inline void
+ fast_path_processing(struct dp_netdev_pmd_thread *pmd,
+ struct dp_packet_batch *packets_,
+ struct netdev_flow_key **keys,
+- struct packet_batch_per_flow batches[],
+- size_t *n_batches,
++ struct dp_packet_flow_map *flow_map,
++ uint8_t *index_map,
+ odp_port_t in_port)
+ {
+ const size_t cnt = dp_packet_batch_size(packets_);
+@@ -6107,6 +6173,9 @@ fast_path_processing(struct dp_netdev_pmd_thread *pmd,
+
+ DP_PACKET_BATCH_FOR_EACH (i, packet, packets_) {
+ struct dp_netdev_flow *flow;
++ /* Get the original order of this packet in received batch. */
++ int recv_idx = index_map[i];
++ uint16_t tcp_flags;
+
+ if (OVS_UNLIKELY(!rules[i])) {
+ continue;
+@@ -6117,9 +6186,12 @@ fast_path_processing(struct dp_netdev_pmd_thread *pmd,
+ smc_insert(pmd, keys[i], hash);
+
+ emc_probabilistic_insert(pmd, keys[i], flow);
+- dp_netdev_queue_batches(packet, flow,
+- miniflow_get_tcp_flags(&keys[i]->mf),
+- batches, n_batches);
++ /* Add these packets into the flow map in the same order
++ * as received.
++ */
++ tcp_flags = miniflow_get_tcp_flags(&keys[i]->mf);
++ packet_enqueue_to_flow_map(packet, flow, tcp_flags,
++ flow_map, recv_idx);
+ }
+
+ pmd_perf_update_counter(&pmd->perf_stats, PMD_STAT_MASKED_HIT,
+@@ -6152,18 +6224,34 @@ dp_netdev_input__(struct dp_netdev_pmd_thread *pmd,
+ struct netdev_flow_key *missed_keys[PKT_ARRAY_SIZE];
+ struct packet_batch_per_flow batches[PKT_ARRAY_SIZE];
+ size_t n_batches;
++ struct dp_packet_flow_map flow_map[PKT_ARRAY_SIZE];
++ uint8_t index_map[PKT_ARRAY_SIZE];
++ size_t n_flows, i;
++
+ odp_port_t in_port;
+
+ n_batches = 0;
+ dfc_processing(pmd, packets, keys, missed_keys, batches, &n_batches,
+- md_is_valid, port_no);
++ flow_map, &n_flows, index_map, md_is_valid, port_no);
++
+ if (!dp_packet_batch_is_empty(packets)) {
+ /* Get ingress port from first packet's metadata. */
+ in_port = packets->packets[0]->md.in_port.odp_port;
+ fast_path_processing(pmd, packets, missed_keys,
+- batches, &n_batches, in_port);
++ flow_map, index_map, in_port);
+ }
+
++ /* Batch rest of packets which are in flow map. */
++ for (i = 0; i < n_flows; i++) {
++ struct dp_packet_flow_map *map = &flow_map[i];
++
++ if (OVS_UNLIKELY(!map->flow)) {
++ continue;
++ }
++ dp_netdev_queue_batches(map->packet, map->flow, map->tcp_flags,
++ batches, &n_batches);
++ }
++
+ /* All the flow batches need to be reset before any call to
+ * packet_batch_per_flow_execute() as it could potentially trigger
+ * recirculation. When a packet matching flow ‘j’ happens to be
+@@ -6173,7 +6261,6 @@ dp_netdev_input__(struct dp_netdev_pmd_thread *pmd,
+ * already its own batches[k] still waiting to be served. So if its
+ * ‘batch’ member is not reset, the recirculated packet would be wrongly
+ * appended to batches[k] of the 1st call to dp_netdev_input__(). */
+- size_t i;
+ for (i = 0; i < n_batches; i++) {
+ batches[i].flow->batch = NULL;
+ }
+--
+2.17.1
+
diff --git a/0001-dpif-netlink-don-t-allocate-per-thread-netlink-socke.patch b/0001-dpif-netlink-don-t-allocate-per-thread-netlink-socke.patch
new file mode 100644
index 0000000..7c77843
--- /dev/null
+++ b/0001-dpif-netlink-don-t-allocate-per-thread-netlink-socke.patch
@@ -0,0 +1,669 @@
+From 4c91bc3bf8c6005db5795fe51632c1feedc4719e Mon Sep 17 00:00:00 2001
+From: Matteo Croce
+Date: Tue, 18 Sep 2018 14:56:37 +0200
+Subject: [PATCH v2] dpif-netlink: don't allocate per thread netlink sockets
+
+When using the kernel datapath, OVS allocates a pool of sockets to handle
+netlink events. The number of sockets is: ports * n-handler-threads, where
+n-handler-threads is user configurable and defaults to 3/4*number of cores.
+
+This because vswitchd starts n-handler-threads threads, each one with a
+netlink socket for every port of the switch. Every thread then, starts
+listening on events on its set of sockets with epoll().
+
+On setup with lot of CPUs and ports, the number of sockets easily hits
+the process file descriptor limit, and ovs-vswitchd will exit with -EMFILE.
+
+Change the number of allocated sockets to just one per port by moving
+the socket array from a per handler structure to a per datapath one,
+and let all the handlers share the same sockets by using EPOLLEXCLUSIVE
+epoll flag which avoids duplicate events, on systems that support it.
+
+The patch was tested on a 56 core machine running Linux 4.18 and latest
+Open vSwitch. A bridge was created with 2000+ ports, some of them being
+veth interfaces with the peer outside the bridge. The latency of the upcall
+is measured by setting a single 'action=controller,local' OpenFlow rule to
+force all the packets going to the slow path and then to the local port.
+A tool[1] injects some packets to the veth outside the bridge, and measures
+the delay until the packet is captured on the local port. The rx timestamp
+is get from the socket ancillary data in the attribute SO_TIMESTAMPNS, to
+avoid having the scheduler delay in the measured time.
+
+The first test measures the average latency for an upcall generated from
+a single port. To measure it 100k packets, one every msec, are sent to a
+single port and the latencies are measured.
+
+The second test is meant to check latency fairness among ports, namely if
+latency is equal between ports or if some ports have lower priority.
+The previous test is repeated for every port, the average of the average
+latencies and the standard deviation between averages is measured.
+
+The third test serves to measure responsiveness under load. Heavy traffic
+is sent through all ports, latency and packet loss is measured
+on a single idle port.
+
+The fourth test is all about fairness. Heavy traffic is injected in all
+ports but one, latency and packet loss is measured on the single idle port.
+
+This is the test setup:
+
+ # nproc
+ 56
+ # ovs-vsctl show |grep -c Port
+ 2223
+ # ovs-ofctl dump-flows ovs_upc_br
+ cookie=0x0, duration=4.827s, table=0, n_packets=0, n_bytes=0, actions=CONTROLLER:65535,LOCAL
+ # uname -a
+ Linux fc28 4.18.7-200.fc28.x86_64 #1 SMP Mon Sep 10 15:44:45 UTC 2018 x86_64 x86_64 x86_64 GNU/Linux
+
+And these are the results of the tests:
+
+ Stock OVS Patched
+ netlink sockets
+ in use by vswitchd
+ lsof -p $(pidof ovs-vswitchd) \
+ |grep -c GENERIC 91187 2227
+
+ Test 1
+ one port latency
+ min/avg/max/mdev (us) 2.7/6.6/238.7/1.8 1.6/6.8/160.6/1.7
+
+ Test 2
+ all port
+ avg latency/mdev (us) 6.51/0.97 6.86/0.17
+
+ Test 3
+ single port latency
+ under load
+ avg/mdev (us) 7.5/5.9 3.8/4.8
+ packet loss 95 % 62 %
+
+ Test 4
+ idle port latency
+ under load
+ min/avg/max/mdev (us) 0.8/1.5/210.5/0.9 1.0/2.1/344.5/1.2
+ packet loss 94 % 4 %
+
+CPU and RAM usage seems not to be affected, the resource usage of vswitchd
+idle with 2000+ ports is unchanged:
+
+ # ps u $(pidof ovs-vswitchd)
+ USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
+ openvsw+ 5430 54.3 0.3 4263964 510968 pts/1 RLl+ 16:20 0:50 ovs-vswitchd
+
+Additionally, to check if vswitchd is thread safe with this patch, the
+following test was run for circa 48 hours: on a 56 core machine, a
+bridge with kernel datapath is filled with 2200 dummy interfaces and 22
+veth, then 22 traffic generators are run in parallel piping traffic into
+the veths peers outside the bridge.
+To generate as many upcalls as possible, all packets were forced to the
+slowpath with an openflow rule like 'action=controller,local' and packet
+size was set to 64 byte. Also, to avoid overflowing the FDB early and
+slowing down the upcall processing, generated mac addresses were restricted
+to a small interval. vswitchd ran without problems for 48+ hours,
+obviously with all the handler threads with almost 99% CPU usage.
+
+[1] https://github.com/teknoraver/network-tools/blob/master/weed.c
+
+Signed-off-by: Matteo Croce
+---
+v1 -> v2:
+ - define EPOLLEXCLUSIVE on systems with older kernel headers
+ - explain the thread safety test in the commit message
+
+ lib/dpif-netlink.c | 311 ++++++++++++---------------------------------
+ 1 file changed, 82 insertions(+), 229 deletions(-)
+
+diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c
+index e6d5a6ec5..bb565ffee 100644
+--- a/lib/dpif-netlink.c
++++ b/lib/dpif-netlink.c
+@@ -78,6 +78,10 @@ enum { MAX_PORTS = USHRT_MAX };
+ #define FLOW_DUMP_MAX_BATCH 50
+ #define OPERATE_MAX_OPS 50
+
++#ifndef EPOLLEXCLUSIVE
++#define EPOLLEXCLUSIVE (1u << 28)
++#endif
++
+ struct dpif_netlink_dp {
+ /* Generic Netlink header. */
+ uint8_t cmd;
+@@ -170,7 +174,6 @@ struct dpif_windows_vport_sock {
+ #endif
+
+ struct dpif_handler {
+- struct dpif_channel *channels;/* Array of channels for each handler. */
+ struct epoll_event *epoll_events;
+ int epoll_fd; /* epoll fd that includes channel socks. */
+ int n_events; /* Num events returned by epoll_wait(). */
+@@ -193,6 +196,7 @@ struct dpif_netlink {
+ struct fat_rwlock upcall_lock;
+ struct dpif_handler *handlers;
+ uint32_t n_handlers; /* Num of upcall handlers. */
++ struct dpif_channel *channels; /* Array of channels for each port. */
+ int uc_array_size; /* Size of 'handler->channels' and */
+ /* 'handler->epoll_events'. */
+
+@@ -331,43 +335,6 @@ open_dpif(const struct dpif_netlink_dp *dp, struct dpif **dpifp)
+ return 0;
+ }
+
+-/* Destroys the netlink sockets pointed by the elements in 'socksp'
+- * and frees the 'socksp'. */
+-static void
+-vport_del_socksp__(struct nl_sock **socksp, uint32_t n_socks)
+-{
+- size_t i;
+-
+- for (i = 0; i < n_socks; i++) {
+- nl_sock_destroy(socksp[i]);
+- }
+-
+- free(socksp);
+-}
+-
+-/* Creates an array of netlink sockets. Returns an array of the
+- * corresponding pointers. Records the error in 'error'. */
+-static struct nl_sock **
+-vport_create_socksp__(uint32_t n_socks, int *error)
+-{
+- struct nl_sock **socksp = xzalloc(n_socks * sizeof *socksp);
+- size_t i;
+-
+- for (i = 0; i < n_socks; i++) {
+- *error = nl_sock_create(NETLINK_GENERIC, &socksp[i]);
+- if (*error) {
+- goto error;
+- }
+- }
+-
+- return socksp;
+-
+-error:
+- vport_del_socksp__(socksp, n_socks);
+-
+- return NULL;
+-}
+-
+ #ifdef _WIN32
+ static void
+ vport_delete_sock_pool(struct dpif_handler *handler)
+@@ -422,129 +389,34 @@ error:
+ vport_delete_sock_pool(handler);
+ return error;
+ }
+-
+-/* Returns an array pointers to netlink sockets. The sockets are picked from a
+- * pool. Records the error in 'error'. */
+-static struct nl_sock **
+-vport_create_socksp_windows(struct dpif_netlink *dpif, int *error)
+- OVS_REQ_WRLOCK(dpif->upcall_lock)
+-{
+- uint32_t n_socks = dpif->n_handlers;
+- struct nl_sock **socksp;
+- size_t i;
+-
+- ovs_assert(n_socks <= 1);
+- socksp = xzalloc(n_socks * sizeof *socksp);
+-
+- /* Pick netlink sockets to use in a round-robin fashion from each
+- * handler's pool of sockets. */
+- for (i = 0; i < n_socks; i++) {
+- struct dpif_handler *handler = &dpif->handlers[i];
+- struct dpif_windows_vport_sock *sock_pool = handler->vport_sock_pool;
+- size_t index = handler->last_used_pool_idx;
+-
+- /* A pool of sockets is allocated when the handler is initialized. */
+- if (sock_pool == NULL) {
+- free(socksp);
+- *error = EINVAL;
+- return NULL;
+- }
+-
+- ovs_assert(index < VPORT_SOCK_POOL_SIZE);
+- socksp[i] = sock_pool[index].nl_sock;
+- socksp[i] = sock_pool[index].nl_sock;
+- ovs_assert(socksp[i]);
+- index = (index == VPORT_SOCK_POOL_SIZE - 1) ? 0 : index + 1;
+- handler->last_used_pool_idx = index;
+- }
+-
+- return socksp;
+-}
+-
+-static void
+-vport_del_socksp_windows(struct dpif_netlink *dpif, struct nl_sock **socksp)
+-{
+- free(socksp);
+-}
+ #endif /* _WIN32 */
+
+-static struct nl_sock **
+-vport_create_socksp(struct dpif_netlink *dpif, int *error)
+-{
+-#ifdef _WIN32
+- return vport_create_socksp_windows(dpif, error);
+-#else
+- return vport_create_socksp__(dpif->n_handlers, error);
+-#endif
+-}
+-
+-static void
+-vport_del_socksp(struct dpif_netlink *dpif, struct nl_sock **socksp)
+-{
+-#ifdef _WIN32
+- vport_del_socksp_windows(dpif, socksp);
+-#else
+- vport_del_socksp__(socksp, dpif->n_handlers);
+-#endif
+-}
+-
+-/* Given the array of pointers to netlink sockets 'socksp', returns
+- * the array of corresponding pids. If the 'socksp' is NULL, returns
+- * a single-element array of value 0. */
+-static uint32_t *
+-vport_socksp_to_pids(struct nl_sock **socksp, uint32_t n_socks)
+-{
+- uint32_t *pids;
+-
+- if (!socksp) {
+- pids = xzalloc(sizeof *pids);
+- } else {
+- size_t i;
+-
+- pids = xzalloc(n_socks * sizeof *pids);
+- for (i = 0; i < n_socks; i++) {
+- pids[i] = nl_sock_pid(socksp[i]);
+- }
+- }
+-
+- return pids;
+-}
+-
+-/* Given the port number 'port_idx', extracts the pids of netlink sockets
+- * associated to the port and assigns it to 'upcall_pids'. */
++/* Given the port number 'port_idx', extracts the pid of netlink socket
++ * associated to the port and assigns it to 'upcall_pid'. */
+ static bool
+-vport_get_pids(struct dpif_netlink *dpif, uint32_t port_idx,
+- uint32_t **upcall_pids)
++vport_get_pid(struct dpif_netlink *dpif, uint32_t port_idx,
++ uint32_t *upcall_pid)
+ {
+- uint32_t *pids;
+- size_t i;
+-
+ /* Since the nl_sock can only be assigned in either all
+- * or none "dpif->handlers" channels, the following check
++ * or none "dpif" channels, the following check
+ * would suffice. */
+- if (!dpif->handlers[0].channels[port_idx].sock) {
++ if (!dpif->channels[port_idx].sock) {
+ return false;
+ }
+ ovs_assert(!WINDOWS || dpif->n_handlers <= 1);
+
+- pids = xzalloc(dpif->n_handlers * sizeof *pids);
+-
+- for (i = 0; i < dpif->n_handlers; i++) {
+- pids[i] = nl_sock_pid(dpif->handlers[i].channels[port_idx].sock);
+- }
+-
+- *upcall_pids = pids;
++ *upcall_pid = nl_sock_pid(dpif->channels[port_idx].sock);
+
+ return true;
+ }
+
+ static int
+-vport_add_channels(struct dpif_netlink *dpif, odp_port_t port_no,
+- struct nl_sock **socksp)
++vport_add_channel(struct dpif_netlink *dpif, odp_port_t port_no,
++ struct nl_sock *socksp)
+ {
+ struct epoll_event event;
+ uint32_t port_idx = odp_to_u32(port_no);
+- size_t i, j;
++ size_t i;
+ int error;
+
+ if (dpif->handlers == NULL) {
+@@ -553,7 +425,7 @@ vport_add_channels(struct dpif_netlink *dpif, odp_port_t port_no,
+
+ /* We assume that the datapath densely chooses port numbers, which can
+ * therefore be used as an index into 'channels' and 'epoll_events' of
+- * 'dpif->handler'. */
++ * 'dpif'. */
+ if (port_idx >= dpif->uc_array_size) {
+ uint32_t new_size = port_idx + 1;
+
+@@ -563,15 +435,15 @@ vport_add_channels(struct dpif_netlink *dpif, odp_port_t port_no,
+ return EFBIG;
+ }
+
+- for (i = 0; i < dpif->n_handlers; i++) {
+- struct dpif_handler *handler = &dpif->handlers[i];
++ dpif->channels = xrealloc(dpif->channels,
++ new_size * sizeof *dpif->channels);
+
+- handler->channels = xrealloc(handler->channels,
+- new_size * sizeof *handler->channels);
++ for (i = dpif->uc_array_size; i < new_size; i++) {
++ dpif->channels[i].sock = NULL;
++ }
+
+- for (j = dpif->uc_array_size; j < new_size; j++) {
+- handler->channels[j].sock = NULL;
+- }
++ for (i = 0; i < dpif->n_handlers; i++) {
++ struct dpif_handler *handler = &dpif->handlers[i];
+
+ handler->epoll_events = xrealloc(handler->epoll_events,
+ new_size * sizeof *handler->epoll_events);
+@@ -581,33 +453,33 @@ vport_add_channels(struct dpif_netlink *dpif, odp_port_t port_no,
+ }
+
+ memset(&event, 0, sizeof event);
+- event.events = EPOLLIN;
++ event.events = EPOLLIN | EPOLLEXCLUSIVE;
+ event.data.u32 = port_idx;
+
+ for (i = 0; i < dpif->n_handlers; i++) {
+ struct dpif_handler *handler = &dpif->handlers[i];
+
+ #ifndef _WIN32
+- if (epoll_ctl(handler->epoll_fd, EPOLL_CTL_ADD, nl_sock_fd(socksp[i]),
++ if (epoll_ctl(handler->epoll_fd, EPOLL_CTL_ADD, nl_sock_fd(socksp),
+ &event) < 0) {
+ error = errno;
+ goto error;
+ }
+ #endif
+- dpif->handlers[i].channels[port_idx].sock = socksp[i];
+- dpif->handlers[i].channels[port_idx].last_poll = LLONG_MIN;
+ }
++ dpif->channels[port_idx].sock = socksp;
++ dpif->channels[port_idx].last_poll = LLONG_MIN;
+
+ return 0;
+
+ error:
+- for (j = 0; j < i; j++) {
+ #ifndef _WIN32
+- epoll_ctl(dpif->handlers[j].epoll_fd, EPOLL_CTL_DEL,
+- nl_sock_fd(socksp[j]), NULL);
+-#endif
+- dpif->handlers[j].channels[port_idx].sock = NULL;
++ while (i--) {
++ epoll_ctl(dpif->handlers[i].epoll_fd, EPOLL_CTL_DEL,
++ nl_sock_fd(socksp), NULL);
+ }
++#endif
++ dpif->channels[port_idx].sock = NULL;
+
+ return error;
+ }
+@@ -618,14 +490,8 @@ vport_del_channels(struct dpif_netlink *dpif, odp_port_t port_no)
+ uint32_t port_idx = odp_to_u32(port_no);
+ size_t i;
+
+- if (!dpif->handlers || port_idx >= dpif->uc_array_size) {
+- return;
+- }
+-
+- /* Since the sock can only be assigned in either all or none
+- * of "dpif->handlers" channels, the following check would
+- * suffice. */
+- if (!dpif->handlers[0].channels[port_idx].sock) {
++ if (!dpif->handlers || port_idx >= dpif->uc_array_size
++ || !dpif->channels[port_idx].sock) {
+ return;
+ }
+
+@@ -633,12 +499,14 @@ vport_del_channels(struct dpif_netlink *dpif, odp_port_t port_no)
+ struct dpif_handler *handler = &dpif->handlers[i];
+ #ifndef _WIN32
+ epoll_ctl(handler->epoll_fd, EPOLL_CTL_DEL,
+- nl_sock_fd(handler->channels[port_idx].sock), NULL);
+- nl_sock_destroy(handler->channels[port_idx].sock);
++ nl_sock_fd(dpif->channels[port_idx].sock), NULL);
+ #endif
+- handler->channels[port_idx].sock = NULL;
+ handler->event_offset = handler->n_events = 0;
+ }
++#ifndef _WIN32
++ nl_sock_destroy(dpif->channels[port_idx].sock);
++#endif
++ dpif->channels[port_idx].sock = NULL;
+ }
+
+ static void
+@@ -655,10 +523,7 @@ destroy_all_channels(struct dpif_netlink *dpif)
+ struct dpif_netlink_vport vport_request;
+ uint32_t upcall_pids = 0;
+
+- /* Since the sock can only be assigned in either all or none
+- * of "dpif->handlers" channels, the following check would
+- * suffice. */
+- if (!dpif->handlers[0].channels[i].sock) {
++ if (!dpif->channels[i].sock) {
+ continue;
+ }
+
+@@ -679,11 +544,11 @@ destroy_all_channels(struct dpif_netlink *dpif)
+
+ dpif_netlink_handler_uninit(handler);
+ free(handler->epoll_events);
+- free(handler->channels);
+ }
+-
++ free(dpif->channels);
+ free(dpif->handlers);
+ dpif->handlers = NULL;
++ dpif->channels = NULL;
+ dpif->n_handlers = 0;
+ dpif->uc_array_size = 0;
+ }
+@@ -846,13 +711,12 @@ dpif_netlink_port_add__(struct dpif_netlink *dpif, const char *name,
+ {
+ struct dpif_netlink_vport request, reply;
+ struct ofpbuf *buf;
+- struct nl_sock **socksp = NULL;
+- uint32_t *upcall_pids;
++ struct nl_sock *socksp = NULL;
++ uint32_t upcall_pids;
+ int error = 0;
+
+ if (dpif->handlers) {
+- socksp = vport_create_socksp(dpif, &error);
+- if (!socksp) {
++ if (nl_sock_create(NETLINK_GENERIC, &socksp)) {
+ return error;
+ }
+ }
+@@ -864,9 +728,9 @@ dpif_netlink_port_add__(struct dpif_netlink *dpif, const char *name,
+ request.name = name;
+
+ request.port_no = *port_nop;
+- upcall_pids = vport_socksp_to_pids(socksp, dpif->n_handlers);
+- request.n_upcall_pids = socksp ? dpif->n_handlers : 1;
+- request.upcall_pids = upcall_pids;
++ upcall_pids = nl_sock_pid(socksp);
++ request.n_upcall_pids = 1;
++ request.upcall_pids = &upcall_pids;
+
+ if (options) {
+ request.options = options->data;
+@@ -882,31 +746,27 @@ dpif_netlink_port_add__(struct dpif_netlink *dpif, const char *name,
+ dpif_name(&dpif->dpif), *port_nop);
+ }
+
+- vport_del_socksp(dpif, socksp);
++ nl_sock_destroy(socksp);
+ goto exit;
+ }
+
+- if (socksp) {
+- error = vport_add_channels(dpif, *port_nop, socksp);
+- if (error) {
+- VLOG_INFO("%s: could not add channel for port %s",
+- dpif_name(&dpif->dpif), name);
+-
+- /* Delete the port. */
+- dpif_netlink_vport_init(&request);
+- request.cmd = OVS_VPORT_CMD_DEL;
+- request.dp_ifindex = dpif->dp_ifindex;
+- request.port_no = *port_nop;
+- dpif_netlink_vport_transact(&request, NULL, NULL);
+- vport_del_socksp(dpif, socksp);
+- goto exit;
+- }
++ error = vport_add_channel(dpif, *port_nop, socksp);
++ if (error) {
++ VLOG_INFO("%s: could not add channel for port %s",
++ dpif_name(&dpif->dpif), name);
++
++ /* Delete the port. */
++ dpif_netlink_vport_init(&request);
++ request.cmd = OVS_VPORT_CMD_DEL;
++ request.dp_ifindex = dpif->dp_ifindex;
++ request.port_no = *port_nop;
++ dpif_netlink_vport_transact(&request, NULL, NULL);
++ nl_sock_destroy(socksp);
++ goto exit;
+ }
+- free(socksp);
+
+ exit:
+ ofpbuf_delete(buf);
+- free(upcall_pids);
+
+ return error;
+ }
+@@ -1131,7 +991,7 @@ dpif_netlink_port_query_by_name(const struct dpif *dpif_, const char *devname,
+
+ static uint32_t
+ dpif_netlink_port_get_pid__(const struct dpif_netlink *dpif,
+- odp_port_t port_no, uint32_t hash)
++ odp_port_t port_no, uint32_t hash OVS_UNUSED)
+ OVS_REQ_RDLOCK(dpif->upcall_lock)
+ {
+ uint32_t port_idx = odp_to_u32(port_no);
+@@ -1141,14 +1001,13 @@ dpif_netlink_port_get_pid__(const struct dpif_netlink *dpif,
+ /* The ODPP_NONE "reserved" port number uses the "ovs-system"'s
+ * channel, since it is not heavily loaded. */
+ uint32_t idx = port_idx >= dpif->uc_array_size ? 0 : port_idx;
+- struct dpif_handler *h = &dpif->handlers[hash % dpif->n_handlers];
+
+ /* Needs to check in case the socket pointer is changed in between
+ * the holding of upcall_lock. A known case happens when the main
+ * thread deletes the vport while the handler thread is handling
+ * the upcall from that port. */
+- if (h->channels[idx].sock) {
+- pid = nl_sock_pid(h->channels[idx].sock);
++ if (dpif->channels[idx].sock) {
++ pid = nl_sock_pid(dpif->channels[idx].sock);
+ }
+ }
+
+@@ -2382,42 +2241,40 @@ dpif_netlink_refresh_channels(struct dpif_netlink *dpif, uint32_t n_handlers)
+ dpif_netlink_port_dump_start__(dpif, &dump);
+ while (!dpif_netlink_port_dump_next__(dpif, &dump, &vport, &buf)) {
+ uint32_t port_no = odp_to_u32(vport.port_no);
+- uint32_t *upcall_pids = NULL;
++ uint32_t upcall_pid;
+ int error;
+
+ if (port_no >= dpif->uc_array_size
+- || !vport_get_pids(dpif, port_no, &upcall_pids)) {
+- struct nl_sock **socksp = vport_create_socksp(dpif, &error);
++ || !vport_get_pid(dpif, port_no, &upcall_pid)) {
++ struct nl_sock *socksp;
+
+- if (!socksp) {
++ if (nl_sock_create(NETLINK_GENERIC, &socksp)) {
+ goto error;
+ }
+
+- error = vport_add_channels(dpif, vport.port_no, socksp);
++ error = vport_add_channel(dpif, vport.port_no, socksp);
+ if (error) {
+ VLOG_INFO("%s: could not add channels for port %s",
+ dpif_name(&dpif->dpif), vport.name);
+- vport_del_socksp(dpif, socksp);
++ nl_sock_destroy(socksp);
+ retval = error;
+ goto error;
+ }
+- upcall_pids = vport_socksp_to_pids(socksp, dpif->n_handlers);
+- free(socksp);
++ upcall_pid = nl_sock_pid(socksp);
+ }
+
+ /* Configure the vport to deliver misses to 'sock'. */
+ if (vport.upcall_pids[0] == 0
+- || vport.n_upcall_pids != dpif->n_handlers
+- || memcmp(upcall_pids, vport.upcall_pids, n_handlers * sizeof
+- *upcall_pids)) {
++ || vport.n_upcall_pids != 1
++ || upcall_pid != vport.upcall_pids[0]) {
+ struct dpif_netlink_vport vport_request;
+
+ dpif_netlink_vport_init(&vport_request);
+ vport_request.cmd = OVS_VPORT_CMD_SET;
+ vport_request.dp_ifindex = dpif->dp_ifindex;
+ vport_request.port_no = vport.port_no;
+- vport_request.n_upcall_pids = dpif->n_handlers;
+- vport_request.upcall_pids = upcall_pids;
++ vport_request.n_upcall_pids = 1;
++ vport_request.upcall_pids = &upcall_pid;
+ error = dpif_netlink_vport_transact(&vport_request, NULL, NULL);
+ if (error) {
+ VLOG_WARN_RL(&error_rl,
+@@ -2438,11 +2295,9 @@ dpif_netlink_refresh_channels(struct dpif_netlink *dpif, uint32_t n_handlers)
+ if (port_no < keep_channels_nbits) {
+ bitmap_set1(keep_channels, port_no);
+ }
+- free(upcall_pids);
+ continue;
+
+ error:
+- free(upcall_pids);
+ vport_del_channels(dpif, vport.port_no);
+ }
+ nl_dump_done(&dump);
+@@ -2701,7 +2556,7 @@ dpif_netlink_recv__(struct dpif_netlink *dpif, uint32_t handler_id,
+
+ while (handler->event_offset < handler->n_events) {
+ int idx = handler->epoll_events[handler->event_offset].data.u32;
+- struct dpif_channel *ch = &dpif->handlers[handler_id].channels[idx];
++ struct dpif_channel *ch = &dpif->channels[idx];
+
+ handler->event_offset++;
+
+@@ -2803,16 +2658,14 @@ dpif_netlink_recv_purge__(struct dpif_netlink *dpif)
+ OVS_REQ_WRLOCK(dpif->upcall_lock)
+ {
+ if (dpif->handlers) {
+- size_t i, j;
++ size_t i;
+
++ if (!dpif->channels[0].sock) {
++ return;
++ }
+ for (i = 0; i < dpif->uc_array_size; i++ ) {
+- if (!dpif->handlers[0].channels[i].sock) {
+- continue;
+- }
+
+- for (j = 0; j < dpif->n_handlers; j++) {
+- nl_sock_drain(dpif->handlers[j].channels[i].sock);
+- }
++ nl_sock_drain(dpif->channels[i].sock);
+ }
+ }
+ }
+--
+2.17.1
+
diff --git a/0001-lib-netdev-tc-offloads-Fix-frag-first-later-translat.patch b/0001-lib-netdev-tc-offloads-Fix-frag-first-later-translat.patch
deleted file mode 100644
index fa1a4a3..0000000
--- a/0001-lib-netdev-tc-offloads-Fix-frag-first-later-translat.patch
+++ /dev/null
@@ -1,54 +0,0 @@
-From 45a60c21fc17ba31199fa800cdce92cc1f17f06b Mon Sep 17 00:00:00 2001
-From: Roi Dayan
-Date: Sun, 25 Mar 2018 12:11:48 +0300
-Subject: [PATCH 1/2] lib/netdev-tc-offloads: Fix frag first/later translation
-
-Fragment mask (any and later) always exists so we need to test
-for FLOW_NW_FRAG_LATER only if the state is FLOW_NW_FRAG_ANY.
-Before this fix we could pass frag no and first at the same time to TC
-which is also not tested there for bad frag state.
-This fix make sure we only pass frag first/later if is frag.
-
-Fixes: 83e866067ea6 ("netdev-tc-offloads: Add support for IP fragmentation")
-Signed-off-by: Roi Dayan
-Reviewed-by: Paul Blakey
-Signed-off-by: Simon Horman
----
- lib/netdev-tc-offloads.c | 19 +++++++++++++------
- 1 file changed, 13 insertions(+), 6 deletions(-)
-
-diff --git a/lib/netdev-tc-offloads.c b/lib/netdev-tc-offloads.c
-index f22415ee1..6db76801f 100644
---- a/lib/netdev-tc-offloads.c
-+++ b/lib/netdev-tc-offloads.c
-@@ -948,14 +948,21 @@ netdev_tc_flow_put(struct netdev *netdev, struct match *match,
- flower.key.ip_ttl = key->nw_ttl;
- flower.mask.ip_ttl = mask->nw_ttl;
-
-- if (mask->nw_frag) {
-- if (key->nw_frag & FLOW_NW_FRAG_ANY)
-+ if (mask->nw_frag & FLOW_NW_FRAG_ANY) {
-+ flower.mask.flags |= TCA_FLOWER_KEY_FLAGS_IS_FRAGMENT;
-+
-+ if (key->nw_frag & FLOW_NW_FRAG_ANY) {
- flower.key.flags |= TCA_FLOWER_KEY_FLAGS_IS_FRAGMENT;
-- if (!(key->nw_frag & FLOW_NW_FRAG_LATER))
-- flower.key.flags |= TCA_FLOWER_KEY_FLAGS_FRAG_IS_FIRST;
-
-- flower.mask.flags |= TCA_FLOWER_KEY_FLAGS_IS_FRAGMENT;
-- flower.mask.flags |= TCA_FLOWER_KEY_FLAGS_FRAG_IS_FIRST;
-+ if (mask->nw_frag & FLOW_NW_FRAG_LATER) {
-+ flower.mask.flags |= TCA_FLOWER_KEY_FLAGS_FRAG_IS_FIRST;
-+
-+ if (!(key->nw_frag & FLOW_NW_FRAG_LATER)) {
-+ flower.key.flags |= TCA_FLOWER_KEY_FLAGS_FRAG_IS_FIRST;
-+ }
-+ }
-+ }
-+
- mask->nw_frag = 0;
- }
-
---
-2.17.0
-
diff --git a/0001-ofproto-dpif-Delete-system-tunnel-interface-when-rem.patch b/0001-ofproto-dpif-Delete-system-tunnel-interface-when-rem.patch
deleted file mode 100644
index 0daca36..0000000
--- a/0001-ofproto-dpif-Delete-system-tunnel-interface-when-rem.patch
+++ /dev/null
@@ -1,50 +0,0 @@
-From f6193c08c47bfb4bc2b10114bcdea7ae6581b144 Mon Sep 17 00:00:00 2001
-From: "juyan@redhat.com"
-Date: Wed, 25 Oct 2017 11:41:27 +0800
-Subject: [PATCH] ofproto-dpif: Delete system tunnel interface when remove ovs
- bridge
-
-When a user adds the first tunnel of a given type (e.g. the first VXLAN
-tunnel) to an OVS bridge, OVS adds a vport of the same type to the
-kernel datapath that backs the bridge. There is the corresponding
-expectation that, when the last tunnel of that type is removed from the
-OVS bridges, OVS would remove the vport that represents it from the
-backing kernel datapath, but OVS was not doing that. This commit fixes
-the problem.
-
-There is not any major concern about the lingering tunnel interface, but
-it's cleaner to delete it.
-
-Fixes: 921c370a9df5 ("dpif-netlink: Probe for out-of-tree tunnels, decides used interface")
-Signed-off-by: JunhanYan
-Signed-off-by: Ben Pfaff
----
- ofproto/ofproto-dpif.c | 5 +++++
- 1 file changed, 5 insertions(+)
-
-diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c
-index 3365d4185..1a648c33f 100644
---- a/ofproto/ofproto-dpif.c
-+++ b/ofproto/ofproto-dpif.c
-@@ -661,6 +661,8 @@ dealloc(struct ofproto *ofproto_)
- static void
- close_dpif_backer(struct dpif_backer *backer, bool del)
- {
-+ struct simap_node *node;
-+
- ovs_assert(backer->refcount > 0);
-
- if (--backer->refcount) {
-@@ -669,6 +671,9 @@ close_dpif_backer(struct dpif_backer *backer, bool del)
-
- udpif_destroy(backer->udpif);
-
-+ SIMAP_FOR_EACH (node, &backer->tnl_backers) {
-+ dpif_port_del(backer->dpif, u32_to_odp(node->data), false);
-+ }
- simap_destroy(&backer->tnl_backers);
- ovs_rwlock_destroy(&backer->odp_to_ofport_lock);
- hmap_destroy(&backer->odp_to_ofport_map);
---
-2.14.3
-
diff --git a/0001-ovs-save-Don-t-always-include-the-default-flow-durin.patch b/0001-ovs-save-Don-t-always-include-the-default-flow-durin.patch
new file mode 100644
index 0000000..24f31cf
--- /dev/null
+++ b/0001-ovs-save-Don-t-always-include-the-default-flow-durin.patch
@@ -0,0 +1,38 @@
+From 949758946767ff79b4c3eb5eca755c6cf21643e3 Mon Sep 17 00:00:00 2001
+From: Timothy Redaelli
+Date: Sun, 9 Sep 2018 14:20:02 +0200
+Subject: [PATCH] ovs-save: Don't always include the default flow during
+ restore
+
+Currently the default flow (actions=NORMAL) is present in the flow table after
+the flow table is restored also when the default flow is removed.
+
+This commit changes the behaviour of the "ovs-save save-flows" command to use
+"replace-flows" instead of "add-flows" to restore the flows. This is needed in
+order to always have the new flow table as it was before restoring it.
+
+Reported-by: Flavio Leitner
+Reported-at: https://bugzilla.redhat.com/show_bug.cgi?id=1626096
+Signed-off-by: Timothy Redaelli
+Acked-by: Flavio Leitner
+Signed-off-by: Gurucharan Shetty
+---
+ utilities/ovs-save | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/utilities/ovs-save b/utilities/ovs-save
+index ea8fb6a45..2294583d6 100755
+--- a/utilities/ovs-save
++++ b/utilities/ovs-save
+@@ -121,7 +121,7 @@ save_flows () {
+ cnt++;printf "{class="$1",type="$2",len="$3"}->"$4}'
+ echo "'"
+
+- printf "%s" "ovs-ofctl -O $ofp_version add-flows ${bridge} " \
++ printf "%s" "ovs-ofctl -O $ofp_version replace-flows ${bridge} " \
+ "\"$workdir/$bridge.flows.dump\""
+
+ # If possible, use OpenFlow 1.4 atomic bundle transaction to add flows
+--
+2.17.1
+
diff --git a/0001-stream-ssl-Don-t-enable-new-TLS-versions-by-default.patch b/0001-stream-ssl-Don-t-enable-new-TLS-versions-by-default.patch
deleted file mode 100644
index 77c3cce..0000000
--- a/0001-stream-ssl-Don-t-enable-new-TLS-versions-by-default.patch
+++ /dev/null
@@ -1,40 +0,0 @@
-From a6869520061696cb115afb7de0021556068d1134 Mon Sep 17 00:00:00 2001
-From: Timothy Redaelli
-Date: Fri, 27 Jul 2018 16:29:40 +0200
-Subject: [PATCH 1/2] stream-ssl: Don't enable new TLS versions by default
-
-Currently protocol_flags is populated by the list of SSL and TLS
-protocols by hand. This means that when a new TLS version is added to
-openssl (in this case TLS v1.3 is added to openssl 1.1.1 beta)
-ovsdb-server automatically enable support to it with the default ciphers.
-This can be a security problem (since other ciphers can be enabled) and it
-also makes a test (SSL db: implementation) to fail.
-
-This commit changes the 'protocol_flags' to use the list of all protocol
-flags as provided by openssl library (SSL_OP_NO_SSL_MASK) so there is no
-need to keep the list updated by hand.
-
-Signed-off-by: Timothy Redaelli
-Signed-off-by: Ben Pfaff
-(cherry picked from commit ab16d2c2871b82d1f71c652657791acd9ca51161)
----
- lib/stream-ssl.c | 3 +--
- 1 file changed, 1 insertion(+), 2 deletions(-)
-
-diff --git a/lib/stream-ssl.c b/lib/stream-ssl.c
-index 278468083..95b0f106e 100644
---- a/lib/stream-ssl.c
-+++ b/lib/stream-ssl.c
-@@ -1186,8 +1186,7 @@ stream_ssl_set_protocols(const char *arg)
- }
-
- /* Start with all the flags off and turn them on as requested. */
-- long protocol_flags = SSL_OP_NO_SSLv2 | SSL_OP_NO_SSLv3 | SSL_OP_NO_TLSv1;
-- protocol_flags |= SSL_OP_NO_TLSv1_1 | SSL_OP_NO_TLSv1_2;
-+ long protocol_flags = SSL_OP_NO_SSL_MASK;
-
- char *s = xstrdup(arg);
- char *save_ptr = NULL;
---
-2.17.1
-
diff --git a/0002-lib-tc-Fix-sparse-warnings.patch b/0002-lib-tc-Fix-sparse-warnings.patch
deleted file mode 100644
index 4a5ccb8..0000000
--- a/0002-lib-tc-Fix-sparse-warnings.patch
+++ /dev/null
@@ -1,50 +0,0 @@
-From 7e0f69b581705064e2fd767426c5227150a31e6f Mon Sep 17 00:00:00 2001
-From: Ian Stokes
-Date: Wed, 21 Mar 2018 20:11:22 +0000
-Subject: [PATCH 2/2] lib/tc: Fix sparse warnings.
-
-"sparse" complains with the warning 'incorrect type in argument 1
-(different base types)' in function nl_parse_flower_ip when parsing a key
-flag and in function nl_msg_put_flower_options when writing the key
-flag. Fix this by using network byte order when reading and writing key
-flags to netlink messages.
-
-Fixes: 83e86606 ("netdev-tc-offloads: Add support for IP fragmentation")
-Signed-off-by: Ian Stokes
-Signed-off-by: Ben Pfaff
-Acked-by: Roi Dayan
----
- lib/tc.c | 9 +++++----
- 1 file changed, 5 insertions(+), 4 deletions(-)
-
-diff --git a/lib/tc.c b/lib/tc.c
-index c446d8407..6daa44710 100644
---- a/lib/tc.c
-+++ b/lib/tc.c
-@@ -377,8 +377,9 @@ nl_parse_flower_ip(struct nlattr **attrs, struct tc_flower *flower) {
- }
-
- if (attrs[TCA_FLOWER_KEY_FLAGS_MASK]) {
-- key->flags = ntohl(nl_attr_get_u32(attrs[TCA_FLOWER_KEY_FLAGS]));
-- mask->flags = ntohl(nl_attr_get_u32(attrs[TCA_FLOWER_KEY_FLAGS_MASK]));
-+ key->flags = ntohl(nl_attr_get_be32(attrs[TCA_FLOWER_KEY_FLAGS]));
-+ mask->flags =
-+ ntohl(nl_attr_get_be32(attrs[TCA_FLOWER_KEY_FLAGS_MASK]));
- }
-
- if (attrs[TCA_FLOWER_KEY_IPV4_SRC_MASK]) {
-@@ -1503,9 +1504,9 @@ nl_msg_put_flower_options(struct ofpbuf *request, struct tc_flower *flower)
- }
-
- if (flower->mask.flags) {
-- nl_msg_put_u32(request, TCA_FLOWER_KEY_FLAGS,
-+ nl_msg_put_be32(request, TCA_FLOWER_KEY_FLAGS,
- htonl(flower->key.flags));
-- nl_msg_put_u32(request, TCA_FLOWER_KEY_FLAGS_MASK,
-+ nl_msg_put_be32(request, TCA_FLOWER_KEY_FLAGS_MASK,
- htonl(flower->mask.flags));
- }
-
---
-2.17.0
-
diff --git a/0002-netdev-tc-offloads-Add-support-for-IP-fragmentation.patch b/0002-netdev-tc-offloads-Add-support-for-IP-fragmentation.patch
deleted file mode 100644
index 19de05a..0000000
--- a/0002-netdev-tc-offloads-Add-support-for-IP-fragmentation.patch
+++ /dev/null
@@ -1,185 +0,0 @@
-From a99f73a22e6303555af3f93535d03c7537da5a9a Mon Sep 17 00:00:00 2001
-From: Roi Dayan
-Date: Mon, 12 Mar 2018 14:58:47 +0200
-Subject: [PATCH 2/2] netdev-tc-offloads: Add support for IP fragmentation
-
-Add support for frag no, first and later.
-
-Signed-off-by: Roi Dayan
-Reviewed-by: Shahar Klein
-Reviewed-by: Paul Blakey
-Signed-off-by: Simon Horman
----
- acinclude.m4 | 6 +++---
- include/linux/pkt_cls.h | 5 +++--
- lib/netdev-tc-offloads.c | 38 ++++++++++++++++++++++++++++++++------
- lib/tc.c | 14 ++++++++++++++
- lib/tc.h | 1 +
- 5 files changed, 53 insertions(+), 11 deletions(-)
-
-diff --git a/acinclude.m4 b/acinclude.m4
-index 176b93e8e..6a02f6527 100644
---- a/acinclude.m4
-+++ b/acinclude.m4
-@@ -178,10 +178,10 @@ dnl Configure Linux tc compat.
- AC_DEFUN([OVS_CHECK_LINUX_TC], [
- AC_COMPILE_IFELSE([
- AC_LANG_PROGRAM([#include ], [
-- int x = TCA_FLOWER_KEY_IP_TTL_MASK;
-+ int x = TCA_FLOWER_KEY_FLAGS_FRAG_IS_FIRST;
- ])],
-- [AC_DEFINE([HAVE_TCA_FLOWER_KEY_IP_TTL_MASK], [1],
-- [Define to 1 if TCA_FLOWER_KEY_IP_TTL_MASK is avaiable.])])
-+ [AC_DEFINE([HAVE_TCA_FLOWER_KEY_FLAGS_FRAG_IS_FIRST], [1],
-+ [Define to 1 if TCA_FLOWER_KEY_FLAGS_FRAG_IS_FIRST is avaiable.])])
-
- AC_COMPILE_IFELSE([
- AC_LANG_PROGRAM([#include ], [
-diff --git a/include/linux/pkt_cls.h b/include/linux/pkt_cls.h
-index f7bc7ea70..60976f3f7 100644
---- a/include/linux/pkt_cls.h
-+++ b/include/linux/pkt_cls.h
-@@ -1,7 +1,7 @@
- #ifndef __LINUX_PKT_CLS_WRAPPER_H
- #define __LINUX_PKT_CLS_WRAPPER_H 1
-
--#if defined(__KERNEL__) || defined(HAVE_TCA_FLOWER_KEY_IP_TTL_MASK)
-+#if defined(__KERNEL__) || defined(HAVE_TCA_FLOWER_KEY_FLAGS_FRAG_IS_FIRST)
- #include_next
- #else
-
-@@ -201,8 +201,9 @@ enum {
-
- enum {
- TCA_FLOWER_KEY_FLAGS_IS_FRAGMENT = (1 << 0),
-+ TCA_FLOWER_KEY_FLAGS_FRAG_IS_FIRST = (1 << 1),
- };
-
--#endif /* __KERNEL__ || !HAVE_TCA_FLOWER_KEY_IP_TTL_MASK */
-+#endif /* __KERNEL__ || !HAVE_TCA_FLOWER_KEY_FLAGS_FRAG_IS_FIRST */
-
- #endif /* __LINUX_PKT_CLS_WRAPPER_H */
-diff --git a/lib/netdev-tc-offloads.c b/lib/netdev-tc-offloads.c
-index 9364d94f0..f22415ee1 100644
---- a/lib/netdev-tc-offloads.c
-+++ b/lib/netdev-tc-offloads.c
-@@ -428,6 +428,27 @@ parse_tc_flower_to_match(struct tc_flower *flower,
-
- match_set_nw_ttl_masked(match, key->ip_ttl, mask->ip_ttl);
-
-+ if (mask->flags) {
-+ uint8_t flags = 0;
-+ uint8_t flags_mask = 0;
-+
-+ if (mask->flags & TCA_FLOWER_KEY_FLAGS_IS_FRAGMENT) {
-+ if (key->flags & TCA_FLOWER_KEY_FLAGS_IS_FRAGMENT) {
-+ flags |= FLOW_NW_FRAG_ANY;
-+ }
-+ flags_mask |= FLOW_NW_FRAG_ANY;
-+ }
-+
-+ if (mask->flags & TCA_FLOWER_KEY_FLAGS_FRAG_IS_FIRST) {
-+ if (!(key->flags & TCA_FLOWER_KEY_FLAGS_FRAG_IS_FIRST)) {
-+ flags |= FLOW_NW_FRAG_LATER;
-+ }
-+ flags_mask |= FLOW_NW_FRAG_LATER;
-+ }
-+
-+ match_set_nw_frag_masked(match, flags, flags_mask);
-+ }
-+
- match_set_nw_src_masked(match, key->ipv4.ipv4_src, mask->ipv4.ipv4_src);
- match_set_nw_dst_masked(match, key->ipv4.ipv4_dst, mask->ipv4.ipv4_dst);
-
-@@ -780,11 +801,6 @@ test_key_and_mask(struct match *match)
- return EOPNOTSUPP;
- }
-
-- if (mask->nw_frag) {
-- VLOG_DBG_RL(&rl, "offloading attribute nw_frag isn't supported");
-- return EOPNOTSUPP;
-- }
--
- for (int i = 0; i < FLOW_MAX_MPLS_LABELS; i++) {
- if (mask->mpls_lse[i]) {
- VLOG_DBG_RL(&rl, "offloading attribute mpls_lse isn't supported");
-@@ -932,6 +948,17 @@ netdev_tc_flow_put(struct netdev *netdev, struct match *match,
- flower.key.ip_ttl = key->nw_ttl;
- flower.mask.ip_ttl = mask->nw_ttl;
-
-+ if (mask->nw_frag) {
-+ if (key->nw_frag & FLOW_NW_FRAG_ANY)
-+ flower.key.flags |= TCA_FLOWER_KEY_FLAGS_IS_FRAGMENT;
-+ if (!(key->nw_frag & FLOW_NW_FRAG_LATER))
-+ flower.key.flags |= TCA_FLOWER_KEY_FLAGS_FRAG_IS_FIRST;
-+
-+ flower.mask.flags |= TCA_FLOWER_KEY_FLAGS_IS_FRAGMENT;
-+ flower.mask.flags |= TCA_FLOWER_KEY_FLAGS_FRAG_IS_FIRST;
-+ mask->nw_frag = 0;
-+ }
-+
- if (key->nw_proto == IPPROTO_TCP) {
- flower.key.tcp_dst = key->tp_dst;
- flower.mask.tcp_dst = mask->tp_dst;
-@@ -958,7 +985,6 @@ netdev_tc_flow_put(struct netdev *netdev, struct match *match,
- mask->tp_dst = 0;
- }
-
-- mask->nw_frag = 0;
- mask->nw_tos = 0;
- mask->nw_proto = 0;
- mask->nw_ttl = 0;
-diff --git a/lib/tc.c b/lib/tc.c
-index b49bbe89b..c446d8407 100644
---- a/lib/tc.c
-+++ b/lib/tc.c
-@@ -281,6 +281,8 @@ static const struct nl_policy tca_flower_policy[] = {
- .optional = true, },
- [TCA_FLOWER_KEY_ENC_UDP_DST_PORT] = { .type = NL_A_U16,
- .optional = true, },
-+ [TCA_FLOWER_KEY_FLAGS] = { .type = NL_A_BE32, .optional = true, },
-+ [TCA_FLOWER_KEY_FLAGS_MASK] = { .type = NL_A_BE32, .optional = true, },
- [TCA_FLOWER_KEY_IP_TTL] = { .type = NL_A_U8,
- .optional = true, },
- [TCA_FLOWER_KEY_IP_TTL_MASK] = { .type = NL_A_U8,
-@@ -374,6 +376,11 @@ nl_parse_flower_ip(struct nlattr **attrs, struct tc_flower *flower) {
- mask->ip_proto = UINT8_MAX;
- }
-
-+ if (attrs[TCA_FLOWER_KEY_FLAGS_MASK]) {
-+ key->flags = ntohl(nl_attr_get_u32(attrs[TCA_FLOWER_KEY_FLAGS]));
-+ mask->flags = ntohl(nl_attr_get_u32(attrs[TCA_FLOWER_KEY_FLAGS_MASK]));
-+ }
-+
- if (attrs[TCA_FLOWER_KEY_IPV4_SRC_MASK]) {
- key->ipv4.ipv4_src =
- nl_attr_get_be32(attrs[TCA_FLOWER_KEY_IPV4_SRC]);
-@@ -1495,6 +1502,13 @@ nl_msg_put_flower_options(struct ofpbuf *request, struct tc_flower *flower)
- flower->key.ip_proto);
- }
-
-+ if (flower->mask.flags) {
-+ nl_msg_put_u32(request, TCA_FLOWER_KEY_FLAGS,
-+ htonl(flower->key.flags));
-+ nl_msg_put_u32(request, TCA_FLOWER_KEY_FLAGS_MASK,
-+ htonl(flower->mask.flags));
-+ }
-+
- if (flower->key.ip_proto == IPPROTO_UDP) {
- FLOWER_PUT_MASKED_VALUE(udp_src, TCA_FLOWER_KEY_UDP_SRC);
- FLOWER_PUT_MASKED_VALUE(udp_dst, TCA_FLOWER_KEY_UDP_DST);
-diff --git a/lib/tc.h b/lib/tc.h
-index 6af51c69b..4400a829e 100644
---- a/lib/tc.h
-+++ b/lib/tc.h
-@@ -92,6 +92,7 @@ struct tc_flower_key {
-
- ovs_be16 encap_eth_type;
-
-+ uint8_t flags;
- uint8_t ip_ttl;
-
- struct {
---
-2.14.3
-
diff --git a/0002-stream-ssl-Define-SSL_OP_NO_SSL_MASK-for-OpenSSL-ver.patch b/0002-stream-ssl-Define-SSL_OP_NO_SSL_MASK-for-OpenSSL-ver.patch
deleted file mode 100644
index cf77159..0000000
--- a/0002-stream-ssl-Define-SSL_OP_NO_SSL_MASK-for-OpenSSL-ver.patch
+++ /dev/null
@@ -1,40 +0,0 @@
-From 74f34a896ddaebce7eba66022be8868dd3b44d0a Mon Sep 17 00:00:00 2001
-From: Ben Pfaff
-Date: Mon, 6 Aug 2018 15:39:44 -0700
-Subject: [PATCH 2/2] stream-ssl: Define SSL_OP_NO_SSL_MASK for OpenSSL
- versions that lack it.
-
-10 of the travis builds are failing such as
-TESTSUITE=1 KERNEL=3.16.54 for gcc and clang.
-
-Fixes: ab16d2c2871b ("stream-ssl: Don't enable new TLS versions by default")
-CC: Timothy Redaelli
-Signed-off-by: Darrell Ball
-Signed-off-by: Ben Pfaff
-Acked-by: Han Zhou
-Acked-by: Darrell Ball
-(cherry picked from commit ce679280889f0eb4ebc95b62558a20a7a5f7c0fb)
----
- lib/stream-ssl.c | 6 ++++++
- 1 file changed, 6 insertions(+)
-
-diff --git a/lib/stream-ssl.c b/lib/stream-ssl.c
-index 95b0f106e..c64bb8f19 100644
---- a/lib/stream-ssl.c
-+++ b/lib/stream-ssl.c
-@@ -1186,6 +1186,12 @@ stream_ssl_set_protocols(const char *arg)
- }
-
- /* Start with all the flags off and turn them on as requested. */
-+#ifndef SSL_OP_NO_SSL_MASK
-+ /* For old OpenSSL without this macro, this is the correct value. */
-+#define SSL_OP_NO_SSL_MASK (SSL_OP_NO_SSLv2 | SSL_OP_NO_SSLv3 | \
-+ SSL_OP_NO_TLSv1 | SSL_OP_NO_TLSv1_1 | \
-+ SSL_OP_NO_TLSv1_2)
-+#endif
- long protocol_flags = SSL_OP_NO_SSL_MASK;
-
- char *s = xstrdup(arg);
---
-2.17.1
-
diff --git a/ofproto-dpif-xlate_Fix_translation_of_groups_with_no_bu.patch b/ofproto-dpif-xlate_Fix_translation_of_groups_with_no_bu.patch
new file mode 100644
index 0000000..83656ba
--- /dev/null
+++ b/ofproto-dpif-xlate_Fix_translation_of_groups_with_no_bu.patch
@@ -0,0 +1,41 @@
+Date: Sun, 2 Sep 2018 09:30:43 -0700
+From: Ben Pfaff
+To: dev@openvswitch.org
+Cc: Ben Pfaff
+Subject: [ovs-dev] [PATCH] ofproto-dpif-xlate: Fix translation of groups with
+ no buckets.
+Message-Id: <20180902163043.11210-1-blp@ovn.org>
+List-Id:
+X-Bogosity: Unsure, tests=bogofilter, spamicity=0.500000, version=1.2.4
+
+A group can have no buckets, in which case ovs_list_back() assert-fails.
+This fixes the problem.
+
+Found by OFTest.
+
+Fixes: a04e58881e25 ("ofproto-dpif-xlate: Simplify translation for groups.")
+Signed-off-by: Ben Pfaff
+---
+ ofproto/ofproto-dpif-xlate.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c
+index e26f6c8f554a..507e14dd0d00 100644
+--- a/ofproto/ofproto-dpif-xlate.c
++++ b/ofproto/ofproto-dpif-xlate.c
+@@ -4488,7 +4488,7 @@ xlate_group_action__(struct xlate_ctx *ctx, struct group_dpif *group,
+ bool is_last_action)
+ {
+ if (group->up.type == OFPGT11_ALL || group->up.type == OFPGT11_INDIRECT) {
+- struct ovs_list *last_bucket = ovs_list_back(&group->up.buckets);
++ struct ovs_list *last_bucket = group->up.buckets.prev;
+ struct ofputil_bucket *bucket;
+ LIST_FOR_EACH (bucket, list_node, &group->up.buckets) {
+ bool is_last_bucket = &bucket->list_node == last_bucket;
+--
+2.16.1
+
+_______________________________________________
+dev mailing list
+dev@openvswitch.org
+https://mail.openvswitch.org/mailman/listinfo/ovs-dev
diff --git a/openvswitch.spec b/openvswitch.spec
index 5008f99..ba047be 100644
--- a/openvswitch.spec
+++ b/openvswitch.spec
@@ -39,8 +39,8 @@
Name: openvswitch
Summary: Open vSwitch daemon/database/utilities
URL: http://www.openvswitch.org/
-Version: 2.9.2
-Release: 6%{?commit0:.%{date}git%{shortcommit0}}%{?dist}
+Version: 2.10.0
+Release: 1%{?commit0:.%{date}git%{shortcommit0}}%{?dist}
# Nearly all of openvswitch is ASL 2.0. The bugtool is LGPLv2+, and the
# lib/sflow*.[ch] files are SISSL
@@ -64,21 +64,23 @@ ExclusiveArch: x86_64 aarch64 ppc64le s390x
# ovs-patches
# OVS (including OVN) backports (0 - 300)
-Patch0: ovs-dev-ofproto-macros-Ignore-Dropped-log-messages-in-check_logs..patch
-Patch10: 0001-ofproto-dpif-Delete-system-tunnel-interface-when-rem.patch
-Patch41: 0002-netdev-tc-offloads-Add-support-for-IP-fragmentation.patch
-Patch42: 0001-lib-netdev-tc-offloads-Fix-frag-first-later-translat.patch
-Patch43: 0002-lib-tc-Fix-sparse-warnings.patch
+Patch010: ofproto-dpif-xlate_Fix_translation_of_groups_with_no_bu.patch
-Patch50: 0001-Add-ovs.compat-module-to-python-package.patch
+Patch020: 0001-ovs-save-Don-t-always-include-the-default-flow-durin.patch
+# Bug 1631797
+Patch030: 0001-dpif-netdev-Add-round-robin-based-rxq-to-pmd-assignm.patch
-# Don't enable new TLS versions by default (needed since OpenSSL 1.1.1)
-Patch310: 0001-stream-ssl-Don-t-enable-new-TLS-versions-by-default.patch
-Patch311: 0002-stream-ssl-Define-SSL_OP_NO_SSL_MASK-for-OpenSSL-ver.patch
+# Bug 1565205
+Patch040: 0001-dpif-netdev-Avoid-reordering-of-packets-in-a-batch-w.patch
-Patch315: 0001-dhparams-Fix-.c-file-generation-with-OpenSSL-1.1.1-p.patch
+# Bug 1634015
+Patch050: 0001-dpif-netlink-don-t-allocate-per-thread-netlink-socke.patch
+Patch051: 0001-dpif-Remove-support-for-multiple-queues-per-port.patch
+
+# Bug 1635344
+Patch070: 0001-OVN-add-CT_LB-action-to-ovn-trace.patch
BuildRequires: gcc-c++
BuildRequires: gcc
@@ -89,6 +91,7 @@ BuildRequires: python2-devel python2-six
BuildRequires: python3-devel python3-six
BuildRequires: desktop-file-utils
BuildRequires: groff-base graphviz
+BuildRequires: unbound-devel
# make check dependencies
BuildRequires: procps-ng
BuildRequires: python2-pyOpenSSL
@@ -114,6 +117,8 @@ Requires: openssl iproute module-init-tools
Requires(pre): shadow-utils
Requires(post): /bin/sed
+Requires(post): /usr/sbin/usermod
+Requires(post): /usr/sbin/groupadd
Requires(post): systemd-units
Requires(preun): systemd-units
Requires(postun): systemd-units
@@ -572,6 +577,7 @@ chown -R openvswitch:openvswitch /etc/openvswitch
%{_datadir}/openvswitch/scripts/ovs-save
%{_datadir}/openvswitch/scripts/ovs-vtep
%{_datadir}/openvswitch/scripts/ovs-ctl
+%{_datadir}/openvswitch/scripts/ovs-kmod-ctl
%{_datadir}/openvswitch/scripts/ovs-systemd-reload
%config %{_datadir}/openvswitch/vswitch.ovsschema
%config %{_datadir}/openvswitch/vtep.ovsschema
@@ -604,13 +610,14 @@ chown -R openvswitch:openvswitch /etc/openvswitch
%{_mandir}/man8/ovs-ctl.8*
%{_mandir}/man8/ovs-dpctl.8*
%{_mandir}/man8/ovs-dpctl-top.8*
+%{_mandir}/man8/ovs-kmod-ctl.8.*
%{_mandir}/man8/ovs-ofctl.8*
%{_mandir}/man8/ovs-pki.8*
%{_mandir}/man8/ovs-vsctl.8*
%{_mandir}/man8/ovs-vswitchd.8*
%{_mandir}/man8/ovs-parse-backtrace.8*
%{_udevrulesdir}/91-vfio.rules
-%doc COPYING NOTICE README.rst NEWS rhel/README.RHEL.rst
+%doc LICENSE NOTICE README.rst NEWS rhel/README.RHEL.rst
/var/lib/openvswitch
%attr(750,openvswitch,openvswitch) /var/log/openvswitch
%ghost %attr(755,root,root) %verify(not owner group) %{_rundir}/openvswitch
@@ -661,6 +668,9 @@ chown -R openvswitch:openvswitch /etc/openvswitch
%{_unitdir}/ovn-controller-vtep.service
%changelog
+* Fri Oct 05 2018 Timothy Redaelli - 2.10.0-1
+- Align with "Fast Datapath" 2.10.0-10 (#1633555)
+
* Fri Sep 14 2018 Timothy Redaelli - 2.9.2-6
- Backport "Add ovs.compat module to python package" (#1619712)
- Backport a variant of "dhparams: Fix .c file generation with OpenSSL >= 1.1.1-pre9"
diff --git a/ovs-dev-ofproto-macros-Ignore-Dropped-log-messages-in-check_logs..patch b/ovs-dev-ofproto-macros-Ignore-Dropped-log-messages-in-check_logs..patch
deleted file mode 100644
index d7d5edc..0000000
--- a/ovs-dev-ofproto-macros-Ignore-Dropped-log-messages-in-check_logs..patch
+++ /dev/null
@@ -1,54 +0,0 @@
-From patchwork Tue Jul 3 18:32:18 2018
-Content-Type: text/plain; charset="utf-8"
-MIME-Version: 1.0
-Content-Transfer-Encoding: 7bit
-Subject: [ovs-dev] ofproto-macros: Ignore "Dropped # log messages" in
- check_logs.
-X-Patchwork-Submitter: Ben Pfaff
-X-Patchwork-Id: 938851
-Message-Id: <20180703183218.32329-1-blp@ovn.org>
-To: dev@openvswitch.org
-Cc: Ben Pfaff
-Date: Tue, 3 Jul 2018 11:32:18 -0700
-From: Ben Pfaff
-List-Id:
-
-check_logs ignores some log messages, but it wasn't smart enough to ignore
-the messages that said that the ignored messages had been rate-limited.
-This fixes the problem.
-
-It's OK to ignore all rate-limiting messages because they only appear if at
-least one message was not rate-limited, which check_logs will catch anyway.
-
-Reported-by: Timothy Redaelli
-Reported-at: https://mail.openvswitch.org/pipermail/ovs-discuss/2018-July/046978.html
-Signed-off-by: Ben Pfaff
-Tested-By: Timothy Redaelli
----
- tests/ofproto-macros.at | 6 ++++++
- 1 file changed, 6 insertions(+)
-
-diff --git a/tests/ofproto-macros.at b/tests/ofproto-macros.at
-index 7388a20a2236..2a56ae6e2f3e 100644
---- a/tests/ofproto-macros.at
-+++ b/tests/ofproto-macros.at
-@@ -400,6 +400,11 @@ check_logs () {
- # all "connection reset" warning logs for similar reasons (either EPIPE or
- # ECONNRESET can be returned on a send depending on whether the peer had
- # unconsumed data when it closed the socket).
-+ #
-+ # We also ignore "Dropped # log messages..." messages. Otherwise, even if
-+ # we ignore the messages that were rate-limited, we can end up failing just
-+ # because of the announcement that rate-limiting happened (and in a racy,
-+ # timing-dependent way, too).
- sed -n "$1
- /reset by peer/d
- /Broken pipe/d
-@@ -408,6 +413,7 @@ check_logs () {
- /timeval.*disk: [[0-9]]* reads, [[0-9]]* writes/d
- /timeval.*context switches: [[0-9]]* voluntary, [[0-9]]* involuntary/d
- /ovs_rcu.*blocked [[0-9]]* ms waiting for .* to quiesce/d
-+/Dropped [[0-9]]* log messages/d
- /|WARN|/p
- /|ERR|/p
- /|EMER|/p" ${logs}
diff --git a/sources b/sources
index 9defd9e..7104884 100644
--- a/sources
+++ b/sources
@@ -1 +1 @@
-SHA512 (openvswitch-2.9.2.tar.gz) = 6122651fcdeb64793ae7cdd379f55f87ff1f989d6cb5ab67ad83125c566508b474e1875f024d2f2fc2159b90baf383d5e792cbf515a96266126f0f05511ceb0d
+SHA512 (openvswitch-2.10.0.tar.gz) = f118c1c4ab4e126c3343023b03007ca9819c3c5a5ea42eaffaabdc7c50ecddede3e258574dbe0de95ed3be2e3d101612f5bdb423a7adb679987f4e501183a216