commit e27c84e7c7a6ee0e53207be0ee6b738d73e196bc
Author: MSVSphere Packaging Team
Date: Fri Mar 29 15:37:20 2024 +0300
import gcc-8.5.0-21.el8
diff --git a/.gcc.metadata b/.gcc.metadata
new file mode 100644
index 0000000..2a15f01
--- /dev/null
+++ b/.gcc.metadata
@@ -0,0 +1,3 @@
+b7245af5eab1d4055b6590b6e4f5fb3b7f6c24bf SOURCES/gcc-8.5.0-20210514.tar.xz
+3bdb3cc01fa7690a0e20ea5cfffcbe690f7665eb SOURCES/nvptx-newlib-aadc8eb0ec43b7cd0dd2dfb484bae63c8b05ef24.tar.xz
+ce8eb83be0ac37fb5d5388df455a980fe37b4f13 SOURCES/nvptx-tools-c28050f60193b3b95a18866a96f03334e874e78f.tar.xz
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..b425f7a
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,3 @@
+SOURCES/gcc-8.5.0-20210514.tar.xz
+SOURCES/nvptx-newlib-aadc8eb0ec43b7cd0dd2dfb484bae63c8b05ef24.tar.xz
+SOURCES/nvptx-tools-c28050f60193b3b95a18866a96f03334e874e78f.tar.xz
diff --git a/SOURCES/gcc8-Wbidi-chars.patch b/SOURCES/gcc8-Wbidi-chars.patch
new file mode 100644
index 0000000..988defe
--- /dev/null
+++ b/SOURCES/gcc8-Wbidi-chars.patch
@@ -0,0 +1,1644 @@
+commit 51c500269bf53749b107807d84271385fad35628
+Author: Marek Polacek
+Date: Wed Oct 6 14:33:59 2021 -0400
+
+ libcpp: Implement -Wbidi-chars for CVE-2021-42574 [PR103026]
+
+ From a link below:
+ "An issue was discovered in the Bidirectional Algorithm in the Unicode
+ Specification through 14.0. It permits the visual reordering of
+ characters via control sequences, which can be used to craft source code
+ that renders different logic than the logical ordering of tokens
+ ingested by compilers and interpreters. Adversaries can leverage this to
+ encode source code for compilers accepting Unicode such that targeted
+ vulnerabilities are introduced invisibly to human reviewers."
+
+ More info:
+ https://nvd.nist.gov/vuln/detail/CVE-2021-42574
+ https://trojansource.codes/
+
+ This is not a compiler bug. However, to mitigate the problem, this patch
+ implements -Wbidi-chars=[none|unpaired|any] to warn about possibly
+ misleading Unicode bidirectional control characters the preprocessor may
+ encounter.
+
+ The default is =unpaired, which warns about improperly terminated
+ bidirectional control characters; e.g. a LRE without its corresponding PDF.
+ The level =any warns about any use of bidirectional control characters.
+
+ This patch handles both UCNs and UTF-8 characters. UCNs designating
+ bidi characters in identifiers are accepted since r204886. Then r217144
+ enabled -fextended-identifiers by default. Extended characters in C/C++
+ identifiers have been accepted since r275979. However, this patch still
+ warns about mixing UTF-8 and UCN bidi characters; there seems to be no
+ good reason to allow mixing them.
+
+ We warn in different contexts: comments (both C and C++-style), string
+ literals, character constants, and identifiers. Expectedly, UCNs are ignored
+ in comments and raw string literals. The bidirectional control characters
+ can nest so this patch handles that as well.
+
+ I have not included nor tested this at all with Fortran (which also has
+ string literals and line comments).
+
+ Dave M. posted patches improving diagnostic involving Unicode characters.
+ This patch does not make use of this new infrastructure yet.
+
+ PR preprocessor/103026
+
+ gcc/c-family/ChangeLog:
+
+ * c.opt (Wbidi-chars, Wbidi-chars=): New option.
+
+ gcc/ChangeLog:
+
+ * doc/invoke.texi: Document -Wbidi-chars.
+
+ libcpp/ChangeLog:
+
+ * include/cpplib.h (enum cpp_bidirectional_level): New.
+ (struct cpp_options): Add cpp_warn_bidirectional.
+ (enum cpp_warning_reason): Add CPP_W_BIDIRECTIONAL.
+ * internal.h (struct cpp_reader): Add warn_bidi_p member
+ function.
+ * init.c (cpp_create_reader): Set cpp_warn_bidirectional.
+ * lex.c (bidi): New namespace.
+ (get_bidi_utf8): New function.
+ (get_bidi_ucn): Likewise.
+ (maybe_warn_bidi_on_close): Likewise.
+ (maybe_warn_bidi_on_char): Likewise.
+ (_cpp_skip_block_comment): Implement warning about bidirectional
+ control characters.
+ (skip_line_comment): Likewise.
+ (forms_identifier_p): Likewise.
+ (lex_identifier): Likewise.
+ (lex_string): Likewise.
+ (lex_raw_string): Likewise.
+
+ gcc/testsuite/ChangeLog:
+
+ * c-c++-common/Wbidi-chars-1.c: New test.
+ * c-c++-common/Wbidi-chars-2.c: New test.
+ * c-c++-common/Wbidi-chars-3.c: New test.
+ * c-c++-common/Wbidi-chars-4.c: New test.
+ * c-c++-common/Wbidi-chars-5.c: New test.
+ * c-c++-common/Wbidi-chars-6.c: New test.
+ * c-c++-common/Wbidi-chars-7.c: New test.
+ * c-c++-common/Wbidi-chars-8.c: New test.
+ * c-c++-common/Wbidi-chars-9.c: New test.
+ * c-c++-common/Wbidi-chars-10.c: New test.
+ * c-c++-common/Wbidi-chars-11.c: New test.
+ * c-c++-common/Wbidi-chars-12.c: New test.
+ * c-c++-common/Wbidi-chars-13.c: New test.
+ * c-c++-common/Wbidi-chars-14.c: New test.
+ * c-c++-common/Wbidi-chars-15.c: New test.
+ * c-c++-common/Wbidi-chars-16.c: New test.
+ * c-c++-common/Wbidi-chars-17.c: New test.
+
+diff --git a/gcc/c-family/c.opt b/gcc/c-family/c.opt
+index f591b39be5a..cf922812198 100644
+--- a/gcc/c-family/c.opt
++++ b/gcc/c-family/c.opt
+@@ -334,6 +334,30 @@ Wbad-function-cast
+ C ObjC Var(warn_bad_function_cast) Warning
+ Warn about casting functions to incompatible types.
+
++Wbidi-chars
++C ObjC C++ ObjC++ Warning Alias(Wbidi-chars=,any,none)
++;
++
++Wbidi-chars=
++C ObjC C++ ObjC++ RejectNegative Joined Warning CPP(cpp_warn_bidirectional) CppReason(CPP_W_BIDIRECTIONAL) Var(warn_bidirectional) Init(bidirectional_unpaired) Enum(cpp_bidirectional_level)
++-Wbidi-chars=[none|unpaired|any] Warn about UTF-8 bidirectional control characters.
++
++; Required for these enum values.
++SourceInclude
++cpplib.h
++
++Enum
++Name(cpp_bidirectional_level) Type(int) UnknownError(argument %qs to %<-Wbidi-chars%> not recognized)
++
++EnumValue
++Enum(cpp_bidirectional_level) String(none) Value(bidirectional_none)
++
++EnumValue
++Enum(cpp_bidirectional_level) String(unpaired) Value(bidirectional_unpaired)
++
++EnumValue
++Enum(cpp_bidirectional_level) String(any) Value(bidirectional_any)
++
+ Wbool-compare
+ C ObjC C++ ObjC++ Var(warn_bool_compare) Warning LangEnabledBy(C ObjC C++ ObjC++,Wall)
+ Warn about boolean expression compared with an integer value different from true/false.
+diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
+index 78ca7738df2..cc85c53aede 100644
+--- a/gcc/doc/invoke.texi
++++ b/gcc/doc/invoke.texi
+@@ -264,7 +264,8 @@ Objective-C and Objective-C++ Dialects}.
+ -Walloc-zero -Walloc-size-larger-than=@var{n}
+ -Walloca -Walloca-larger-than=@var{n} @gol
+ -Wno-aggressive-loop-optimizations -Warray-bounds -Warray-bounds=@var{n} @gol
+--Wno-attributes -Wbool-compare -Wbool-operation @gol
++-Wno-attributes -Wbidi-chars=@r{[}none@r{|}unpaired@r{|}any@r{]} @gol
++-Wbool-compare -Wbool-operation @gol
+ -Wno-builtin-declaration-mismatch @gol
+ -Wno-builtin-macro-redefined -Wc90-c99-compat -Wc99-c11-compat @gol
+ -Wc++-compat -Wc++11-compat -Wc++14-compat @gol
+@@ -5606,6 +5607,23 @@ Warn about declarations using the @code{alias} and similar attributes whose
+ target is incompatible with the type of the alias. @xref{Function Attributes,
+ ,Declaring Attributes of Functions}.
+
++@item -Wbidi-chars=@r{[}none@r{|}unpaired@r{|}any@r{]}
++@opindex Wbidi-chars=
++@opindex Wbidi-chars
++@opindex Wno-bidi-chars
++Warn about possibly misleading UTF-8 bidirectional control characters in
++comments, string literals, character constants, and identifiers. Such
++characters can change left-to-right writing direction into right-to-left
++(and vice versa), which can cause confusion between the logical order and
++visual order. This may be dangerous; for instance, it may seem that a piece
++of code is not commented out, whereas it in fact is.
++
++There are three levels of warning supported by GCC@. The default is
++@option{-Wbidi-chars=unpaired}, which warns about improperly terminated
++bidi contexts. @option{-Wbidi-chars=none} turns the warning off.
++@option{-Wbidi-chars=any} warns about any use of bidirectional control
++characters.
++
+ @item -Wbool-compare
+ @opindex Wno-bool-compare
+ @opindex Wbool-compare
+diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-1.c b/gcc/testsuite/c-c++-common/Wbidi-chars-1.c
+new file mode 100644
+index 00000000000..34f5ac19271
+--- /dev/null
++++ b/gcc/testsuite/c-c++-common/Wbidi-chars-1.c
+@@ -0,0 +1,12 @@
++/* PR preprocessor/103026 */
++/* { dg-do compile } */
++
++int main() {
++ int isAdmin = 0;
++ /* } if (isAdmin) begin admins only */
++/* { dg-warning "bidirectional" "" { target *-*-* } .-1 } */
++ __builtin_printf("You are an admin.\n");
++ /* end admins only { */
++/* { dg-warning "bidirectional" "" { target *-*-* } .-1 } */
++ return 0;
++}
+diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-10.c b/gcc/testsuite/c-c++-common/Wbidi-chars-10.c
+new file mode 100644
+index 00000000000..3f851b69e65
+--- /dev/null
++++ b/gcc/testsuite/c-c++-common/Wbidi-chars-10.c
+@@ -0,0 +1,27 @@
++/* PR preprocessor/103026 */
++/* { dg-do compile } */
++/* { dg-options "-Wbidi-chars=unpaired" } */
++/* More nesting testing. */
++
++/* RLE LRI PDF PDI*/
++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
++int LRE_\u202a_PDF_\u202c;
++int LRE_\u202a_PDF_\u202c_LRE_\u202a_PDF_\u202c;
++int LRE_\u202a_LRI_\u2066_PDF_\u202c_PDI_\u2069;
++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
++int RLE_\u202b_RLI_\u2067_PDF_\u202c_PDI_\u2069;
++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
++int RLE_\u202b_RLI_\u2067_PDI_\u2069_PDF_\u202c;
++int FSI_\u2068_LRO_\u202d_PDI_\u2069_PDF_\u202c;
++int FSI_\u2068;
++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
++int FSI_\u2068_PDI_\u2069;
++int FSI_\u2068_FSI_\u2068_PDI_\u2069;
++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
++int RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069;
++int RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069;
++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
++int RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDF_\u202c;
++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
++int RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_FSI_\u2068_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069;
++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-11.c b/gcc/testsuite/c-c++-common/Wbidi-chars-11.c
+new file mode 100644
+index 00000000000..44d044d82de
+--- /dev/null
++++ b/gcc/testsuite/c-c++-common/Wbidi-chars-11.c
+@@ -0,0 +1,9 @@
++/* PR preprocessor/103026 */
++/* { dg-do compile } */
++/* { dg-options "-Wbidi-chars=unpaired" } */
++/* Test that we warn when mixing UCN and UTF-8. */
++
++const char *s1 = "LRE__PDF_\u202c";
++/* { dg-warning "mismatch" "" { target *-*-* } .-1 } */
++const char *s2 = "LRE_\u202a_PDF_";
++/* { dg-warning "mismatch" "" { target *-*-* } .-1 } */
+diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-12.c b/gcc/testsuite/c-c++-common/Wbidi-chars-12.c
+new file mode 100644
+index 00000000000..b07eec1da91
+--- /dev/null
++++ b/gcc/testsuite/c-c++-common/Wbidi-chars-12.c
+@@ -0,0 +1,19 @@
++/* PR preprocessor/103026 */
++/* { dg-do compile { target { c || c++11 } } } */
++/* { dg-options "-Wbidi-chars=any" } */
++/* Test raw strings. */
++
++const char *s1 = R"(a b c LRE 1 2 3 PDF x y z)";
++/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */
++const char *s2 = R"(a b c RLE 1 2 3 PDF x y z)";
++/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */
++const char *s3 = R"(a b c LRO 1 2 3 PDF x y z)";
++/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */
++const char *s4 = R"(a b c RLO 1 2 3 PDF x y z)";
++/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */
++const char *s7 = R"(a b c FSI 1 2 3 PDI x y) z";
++/* { dg-warning "U\\+2068" "" { target *-*-* } .-1 } */
++const char *s8 = R"(a b c PDI x y )z";
++/* { dg-warning "U\\+2069" "" { target *-*-* } .-1 } */
++const char *s9 = R"(a b c PDF x y z)";
++/* { dg-warning "U\\+202C" "" { target *-*-* } .-1 } */
+diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-13.c b/gcc/testsuite/c-c++-common/Wbidi-chars-13.c
+new file mode 100644
+index 00000000000..b2dd9fde752
+--- /dev/null
++++ b/gcc/testsuite/c-c++-common/Wbidi-chars-13.c
+@@ -0,0 +1,17 @@
++/* PR preprocessor/103026 */
++/* { dg-do compile { target { c || c++11 } } } */
++/* { dg-options "-Wbidi-chars=unpaired" } */
++/* Test raw strings. */
++
++const char *s1 = R"(a b c LRE 1 2 3)";
++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
++const char *s2 = R"(a b c RLE 1 2 3)";
++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
++const char *s3 = R"(a b c LRO 1 2 3)";
++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
++const char *s4 = R"(a b c FSI 1 2 3)";
++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
++const char *s5 = R"(a b c LRI 1 2 3)";
++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
++const char *s6 = R"(a b c RLI 1 2 3)";
++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-14.c b/gcc/testsuite/c-c++-common/Wbidi-chars-14.c
+new file mode 100644
+index 00000000000..ba5f75d9553
+--- /dev/null
++++ b/gcc/testsuite/c-c++-common/Wbidi-chars-14.c
+@@ -0,0 +1,38 @@
++/* PR preprocessor/103026 */
++/* { dg-do compile } */
++/* { dg-options "-Wbidi-chars=unpaired" } */
++/* Test PDI handling, which also pops any subsequent LREs, RLEs, LROs,
++ or RLOs. */
++
++/* LRI__LRI__RLE__RLE__RLE__PDI_*/
++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
++// LRI__RLE__RLE__RLE__PDI_
++// LRI__RLO__RLE__RLE__PDI_
++// LRI__RLO__RLE__PDI_
++// FSI__RLO__PDI_
++// FSI__FSI__RLO__PDI_
++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
++
++int LRI_\u2066_LRI_\u2066_LRE_\u202a_LRE_\u202a_LRE_\u202a_PDI_\u2069;
++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
++int LRI_\u2066_LRI_\u2066_LRE_\u202a_LRE_\u202a_LRE_\u202a_PDI_\u2069_PDI_\u2069;
++int LRI_\u2066_LRI_\u2066_LRI_\u2066_LRE_\u202a_LRE_\u202a_LRE_\u202a_PDI_\u2069_PDI_\u2069;
++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
++int PDI_\u2069;
++int LRI_\u2066_PDI_\u2069;
++int RLI_\u2067_PDI_\u2069;
++int LRE_\u202a_LRI_\u2066_PDI_\u2069;
++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
++int LRI_\u2066_LRE_\u202a_PDF_\u202c_PDI_\u2069;
++int LRI_\u2066_LRE_\u202a_LRE_\u202a_PDF_\u202c_PDI_\u2069;
++int RLI_\u2067_LRI_\u2066_LRE_\u202a_LRE_\u202a_PDF_\u202c_PDI_\u2069;
++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
++int FSI_\u2068_LRI_\u2066_LRE_\u202a_LRE_\u202a_PDF_\u202c_PDI_\u2069;
++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
++int RLO_\u202e_PDI_\u2069;
++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
++int RLI_\u2067_PDI_\u2069_RLI_\u2067;
++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
++int FSI_\u2068_PDF_\u202c_PDI_\u2069;
++int FSI_\u2068_FSI_\u2068_PDF_\u202c_PDI_\u2069;
++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-15.c b/gcc/testsuite/c-c++-common/Wbidi-chars-15.c
+new file mode 100644
+index 00000000000..a0ce8ff5e2c
+--- /dev/null
++++ b/gcc/testsuite/c-c++-common/Wbidi-chars-15.c
+@@ -0,0 +1,59 @@
++/* PR preprocessor/103026 */
++/* { dg-do compile } */
++/* { dg-options "-Wbidi-chars=unpaired" } */
++/* Test unpaired bidi control chars in multiline comments. */
++
++/*
++ * LRE end
++ */
++/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */
++/*
++ * RLE end
++ */
++/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */
++/*
++ * LRO end
++ */
++/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */
++/*
++ * RLO end
++ */
++/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */
++/*
++ * LRI end
++ */
++/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */
++/*
++ * RLI end
++ */
++/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */
++/*
++ * FSI end
++ */
++/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */
++/* LRE
++ PDF */
++/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */
++/* FSI
++ PDI */
++/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */
++
++/* LRE<>
++ *
++ */
++/* { dg-warning "unpaired" "" { target *-*-* } .-3 } */
++
++/*
++ * LRE<>
++ */
++/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */
++
++/*
++ *
++ * LRE<> */
++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
++
++/* RLI<> */ /* PDI<> */
++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
++/* LRE<> */ /* PDF<> */
++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-16.c b/gcc/testsuite/c-c++-common/Wbidi-chars-16.c
+new file mode 100644
+index 00000000000..baa0159861c
+--- /dev/null
++++ b/gcc/testsuite/c-c++-common/Wbidi-chars-16.c
+@@ -0,0 +1,26 @@
++/* PR preprocessor/103026 */
++/* { dg-do compile } */
++/* { dg-options "-Wbidi-chars=any" } */
++/* Test LTR/RTL chars. */
++
++/* LTR<> */
++/* { dg-warning "U\\+200E" "" { target *-*-* } .-1 } */
++// LTR<>
++/* { dg-warning "U\\+200E" "" { target *-*-* } .-1 } */
++/* RTL<> */
++/* { dg-warning "U\\+200F" "" { target *-*-* } .-1 } */
++// RTL<>
++/* { dg-warning "U\\+200F" "" { target *-*-* } .-1 } */
++
++const char *s1 = "LTR<>";
++/* { dg-warning "U\\+200E" "" { target *-*-* } .-1 } */
++const char *s2 = "LTR\u200e";
++/* { dg-warning "U\\+200E" "" { target *-*-* } .-1 } */
++const char *s3 = "LTR\u200E";
++/* { dg-warning "U\\+200E" "" { target *-*-* } .-1 } */
++const char *s4 = "RTL<>";
++/* { dg-warning "U\\+200F" "" { target *-*-* } .-1 } */
++const char *s5 = "RTL\u200f";
++/* { dg-warning "U\\+200F" "" { target *-*-* } .-1 } */
++const char *s6 = "RTL\u200F";
++/* { dg-warning "U\\+200F" "" { target *-*-* } .-1 } */
+diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-17.c b/gcc/testsuite/c-c++-common/Wbidi-chars-17.c
+new file mode 100644
+index 00000000000..07cb4321f96
+--- /dev/null
++++ b/gcc/testsuite/c-c++-common/Wbidi-chars-17.c
+@@ -0,0 +1,30 @@
++/* PR preprocessor/103026 */
++/* { dg-do compile } */
++/* { dg-options "-Wbidi-chars=unpaired" } */
++/* Test LTR/RTL chars. */
++
++/* LTR<> */
++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
++// LTR<>
++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
++/* RTL<> */
++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
++// RTL<>
++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
++int ltr_\u200e;
++/* { dg-error "universal character " "" { target *-*-* } .-1 } */
++int rtl_\u200f;
++/* { dg-error "universal character " "" { target *-*-* } .-1 } */
++
++const char *s1 = "LTR<>";
++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
++const char *s2 = "LTR\u200e";
++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
++const char *s3 = "LTR\u200E";
++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
++const char *s4 = "RTL<>";
++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
++const char *s5 = "RTL\u200f";
++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
++const char *s6 = "RTL\u200F";
++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
+diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-2.c b/gcc/testsuite/c-c++-common/Wbidi-chars-2.c
+new file mode 100644
+index 00000000000..2340374f276
+--- /dev/null
++++ b/gcc/testsuite/c-c++-common/Wbidi-chars-2.c
+@@ -0,0 +1,9 @@
++/* PR preprocessor/103026 */
++/* { dg-do compile } */
++
++int main() {
++ /* Say hello; newline/*/ return 0 ;
++/* { dg-warning "bidirectional" "" { target *-*-* } .-1 } */
++ __builtin_printf("Hello world.\n");
++ return 0;
++}
+diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-3.c b/gcc/testsuite/c-c++-common/Wbidi-chars-3.c
+new file mode 100644
+index 00000000000..9dc7edb6e64
+--- /dev/null
++++ b/gcc/testsuite/c-c++-common/Wbidi-chars-3.c
+@@ -0,0 +1,11 @@
++/* PR preprocessor/103026 */
++/* { dg-do compile } */
++
++int main() {
++ const char* access_level = "user";
++ if (__builtin_strcmp(access_level, "user // Check if admin ")) {
++/* { dg-warning "bidirectional" "" { target *-*-* } .-1 } */
++ __builtin_printf("You are an admin.\n");
++ }
++ return 0;
++}
+diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-4.c b/gcc/testsuite/c-c++-common/Wbidi-chars-4.c
+new file mode 100644
+index 00000000000..49f856b9bfe
+--- /dev/null
++++ b/gcc/testsuite/c-c++-common/Wbidi-chars-4.c
+@@ -0,0 +1,172 @@
++/* PR preprocessor/103026 */
++/* { dg-do compile } */
++/* { dg-options "-Wbidi-chars=any -Wno-multichar -Wno-overflow" } */
++/* Test all bidi chars in various contexts (identifiers, comments,
++ string literals, character constants), both UCN and UTF-8. The bidi
++ chars here are properly terminated, except for the character constants. */
++
++/* a b c LRE 1 2 3 PDF x y z */
++/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */
++/* a b c RLE 1 2 3 PDF x y z */
++/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */
++/* a b c LRO 1 2 3 PDF x y z */
++/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */
++/* a b c RLO 1 2 3 PDF x y z */
++/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */
++/* a b c LRI 1 2 3 PDI x y z */
++/* { dg-warning "U\\+2066" "" { target *-*-* } .-1 } */
++/* a b c RLI 1 2 3 PDI x y */
++/* { dg-warning "U\\+2067" "" { target *-*-* } .-1 } */
++/* a b c FSI 1 2 3 PDI x y z */
++/* { dg-warning "U\\+2068" "" { target *-*-* } .-1 } */
++
++/* Same but C++ comments instead. */
++// a b c LRE 1 2 3 PDF x y z
++/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */
++// a b c RLE 1 2 3 PDF x y z
++/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */
++// a b c LRO 1 2 3 PDF x y z
++/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */
++// a b c RLO 1 2 3 PDF x y z
++/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */
++// a b c LRI 1 2 3 PDI x y z
++/* { dg-warning "U\\+2066" "" { target *-*-* } .-1 } */
++// a b c RLI 1 2 3 PDI x y
++/* { dg-warning "U\\+2067" "" { target *-*-* } .-1 } */
++// a b c FSI 1 2 3 PDI x y z
++/* { dg-warning "U\\+2068" "" { target *-*-* } .-1 } */
++
++/* Here we're closing an unopened context, warn when =any. */
++/* a b c PDI x y z */
++/* { dg-warning "U\\+2069" "" { target *-*-* } .-1 } */
++/* a b c PDF x y z */
++/* { dg-warning "U\\+202C" "" { target *-*-* } .-1 } */
++// a b c PDI x y z
++/* { dg-warning "U\\+2069" "" { target *-*-* } .-1 } */
++// a b c PDF x y z
++/* { dg-warning "U\\+202C" "" { target *-*-* } .-1 } */
++
++/* Multiline comments. */
++/* a b c PDI x y z
++ */
++/* { dg-warning "U\\+2069" "" { target *-*-* } .-2 } */
++/* a b c PDF x y z
++ */
++/* { dg-warning "U\\+202C" "" { target *-*-* } .-2 } */
++/* first
++ a b c PDI x y z
++ */
++/* { dg-warning "U\\+2069" "" { target *-*-* } .-2 } */
++/* first
++ a b c PDF x y z
++ */
++/* { dg-warning "U\\+202C" "" { target *-*-* } .-2 } */
++/* first
++ a b c PDI x y z */
++/* { dg-warning "U\\+2069" "" { target *-*-* } .-1 } */
++/* first
++ a b c PDF x y z */
++/* { dg-warning "U\\+202C" "" { target *-*-* } .-1 } */
++
++void
++g1 ()
++{
++ const char *s1 = "a b c LRE 1 2 3 PDF x y z";
++/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */
++ const char *s2 = "a b c RLE 1 2 3 PDF x y z";
++/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */
++ const char *s3 = "a b c LRO 1 2 3 PDF x y z";
++/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */
++ const char *s4 = "a b c RLO 1 2 3 PDF x y z";
++/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */
++ const char *s5 = "a b c LRI 1 2 3 PDI x y z";
++/* { dg-warning "U\\+2066" "" { target *-*-* } .-1 } */
++ const char *s6 = "a b c RLI 1 2 3 PDI x y z";
++/* { dg-warning "U\\+2067" "" { target *-*-* } .-1 } */
++ const char *s7 = "a b c FSI 1 2 3 PDI x y z";
++/* { dg-warning "U\\+2068" "" { target *-*-* } .-1 } */
++ const char *s8 = "a b c PDI x y z";
++/* { dg-warning "U\\+2069" "" { target *-*-* } .-1 } */
++ const char *s9 = "a b c PDF x y z";
++/* { dg-warning "U\\+202C" "" { target *-*-* } .-1 } */
++
++ const char *s10 = "a b c LRE\u202a 1 2 3 PDF\u202c x y z";
++/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */
++ const char *s11 = "a b c LRE\u202A 1 2 3 PDF\u202c x y z";
++/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */
++ const char *s12 = "a b c RLE\u202b 1 2 3 PDF\u202c x y z";
++/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */
++ const char *s13 = "a b c RLE\u202B 1 2 3 PDF\u202c x y z";
++/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */
++ const char *s14 = "a b c LRO\u202d 1 2 3 PDF\u202c x y z";
++/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */
++ const char *s15 = "a b c LRO\u202D 1 2 3 PDF\u202c x y z";
++/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */
++ const char *s16 = "a b c RLO\u202e 1 2 3 PDF\u202c x y z";
++/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */
++ const char *s17 = "a b c RLO\u202E 1 2 3 PDF\u202c x y z";
++/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */
++ const char *s18 = "a b c LRI\u2066 1 2 3 PDI\u2069 x y z";
++/* { dg-warning "U\\+2066" "" { target *-*-* } .-1 } */
++ const char *s19 = "a b c RLI\u2067 1 2 3 PDI\u2069 x y z";
++/* { dg-warning "U\\+2067" "" { target *-*-* } .-1 } */
++ const char *s20 = "a b c FSI\u2068 1 2 3 PDI\u2069 x y z";
++/* { dg-warning "U\\+2068" "" { target *-*-* } .-1 } */
++}
++
++void
++g2 ()
++{
++ const char c1 = '\u202a';
++/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */
++ const char c2 = '\u202A';
++/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */
++ const char c3 = '\u202b';
++/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */
++ const char c4 = '\u202B';
++/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */
++ const char c5 = '\u202d';
++/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */
++ const char c6 = '\u202D';
++/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */
++ const char c7 = '\u202e';
++/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */
++ const char c8 = '\u202E';
++/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */
++ const char c9 = '\u2066';
++/* { dg-warning "U\\+2066" "" { target *-*-* } .-1 } */
++ const char c10 = '\u2067';
++/* { dg-warning "U\\+2067" "" { target *-*-* } .-1 } */
++ const char c11 = '\u2068';
++/* { dg-warning "U\\+2068" "" { target *-*-* } .-1 } */
++}
++
++int A\u202cY;
++/* { dg-warning "U\\+202C" "" { target *-*-* } .-1 } */
++int A\u202CY2;
++/* { dg-warning "U\\+202C" "" { target *-*-* } .-1 } */
++
++int d\u202ae\u202cf;
++/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */
++int d\u202Ae\u202cf2;
++/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */
++int d\u202be\u202cf;
++/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */
++int d\u202Be\u202cf2;
++/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */
++int d\u202de\u202cf;
++/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */
++int d\u202De\u202cf2;
++/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */
++int d\u202ee\u202cf;
++/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */
++int d\u202Ee\u202cf2;
++/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */
++int d\u2066e\u2069f;
++/* { dg-warning "U\\+2066" "" { target *-*-* } .-1 } */
++int d\u2067e\u2069f;
++/* { dg-warning "U\\+2067" "" { target *-*-* } .-1 } */
++int d\u2068e\u2069f;
++/* { dg-warning "U\\+2068" "" { target *-*-* } .-1 } */
++int X\u2069;
++/* { dg-warning "U\\+2069" "" { target *-*-* } .-1 } */
+diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-5.c b/gcc/testsuite/c-c++-common/Wbidi-chars-5.c
+new file mode 100644
+index 00000000000..f5776806c79
+--- /dev/null
++++ b/gcc/testsuite/c-c++-common/Wbidi-chars-5.c
+@@ -0,0 +1,172 @@
++/* PR preprocessor/103026 */
++/* { dg-do compile } */
++/* { dg-options "-Wbidi-chars=unpaired -Wno-multichar -Wno-overflow" } */
++/* Test all bidi chars in various contexts (identifiers, comments,
++ string literals, character constants), both UCN and UTF-8. The bidi
++ chars here are properly terminated, except for the character constants. */
++
++/* a b c LRE 1 2 3 PDF x y z */
++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
++/* a b c RLE 1 2 3 PDF x y z */
++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
++/* a b c LRO 1 2 3 PDF x y z */
++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
++/* a b c RLO 1 2 3 PDF x y z */
++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
++/* a b c LRI 1 2 3 PDI x y z */
++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
++/* a b c RLI 1 2 3 PDI x y */
++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
++/* a b c FSI 1 2 3 PDI x y z */
++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
++
++/* Same but C++ comments instead. */
++// a b c LRE 1 2 3 PDF x y z
++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
++// a b c RLE 1 2 3 PDF x y z
++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
++// a b c LRO 1 2 3 PDF x y z
++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
++// a b c RLO 1 2 3 PDF x y z
++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
++// a b c LRI 1 2 3 PDI x y z
++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
++// a b c RLI 1 2 3 PDI x y
++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
++// a b c FSI 1 2 3 PDI x y z
++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
++
++/* Here we're closing an unopened context, warn when =any. */
++/* a b c PDI x y z */
++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
++/* a b c PDF x y z */
++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
++// a b c PDI x y z
++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
++// a b c PDF x y z
++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
++
++/* Multiline comments. */
++/* a b c PDI x y z
++ */
++/* { dg-bogus "unpaired" "" { target *-*-* } .-2 } */
++/* a b c PDF x y z
++ */
++/* { dg-bogus "unpaired" "" { target *-*-* } .-2 } */
++/* first
++ a b c PDI x y z
++ */
++/* { dg-bogus "unpaired" "" { target *-*-* } .-2 } */
++/* first
++ a b c PDF x y z
++ */
++/* { dg-bogus "unpaired" "" { target *-*-* } .-2 } */
++/* first
++ a b c PDI x y z */
++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
++/* first
++ a b c PDF x y z */
++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
++
++void
++g1 ()
++{
++ const char *s1 = "a b c LRE 1 2 3 PDF x y z";
++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
++ const char *s2 = "a b c RLE 1 2 3 PDF x y z";
++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
++ const char *s3 = "a b c LRO 1 2 3 PDF x y z";
++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
++ const char *s4 = "a b c RLO 1 2 3 PDF x y z";
++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
++ const char *s5 = "a b c LRI 1 2 3 PDI x y z";
++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
++ const char *s6 = "a b c RLI 1 2 3 PDI x y z";
++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
++ const char *s7 = "a b c FSI 1 2 3 PDI x y z";
++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
++ const char *s8 = "a b c PDI x y z";
++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
++ const char *s9 = "a b c PDF x y z";
++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
++
++ const char *s10 = "a b c LRE\u202a 1 2 3 PDF\u202c x y z";
++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
++ const char *s11 = "a b c LRE\u202A 1 2 3 PDF\u202c x y z";
++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
++ const char *s12 = "a b c RLE\u202b 1 2 3 PDF\u202c x y z";
++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
++ const char *s13 = "a b c RLE\u202B 1 2 3 PDF\u202c x y z";
++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
++ const char *s14 = "a b c LRO\u202d 1 2 3 PDF\u202c x y z";
++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
++ const char *s15 = "a b c LRO\u202D 1 2 3 PDF\u202c x y z";
++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
++ const char *s16 = "a b c RLO\u202e 1 2 3 PDF\u202c x y z";
++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
++ const char *s17 = "a b c RLO\u202E 1 2 3 PDF\u202c x y z";
++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
++ const char *s18 = "a b c LRI\u2066 1 2 3 PDI\u2069 x y z";
++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
++ const char *s19 = "a b c RLI\u2067 1 2 3 PDI\u2069 x y z";
++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
++ const char *s20 = "a b c FSI\u2068 1 2 3 PDI\u2069 x y z";
++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
++}
++
++void
++g2 ()
++{
++ const char c1 = '\u202a';
++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
++ const char c2 = '\u202A';
++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
++ const char c3 = '\u202b';
++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
++ const char c4 = '\u202B';
++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
++ const char c5 = '\u202d';
++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
++ const char c6 = '\u202D';
++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
++ const char c7 = '\u202e';
++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
++ const char c8 = '\u202E';
++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
++ const char c9 = '\u2066';
++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
++ const char c10 = '\u2067';
++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
++ const char c11 = '\u2068';
++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
++}
++
++int A\u202cY;
++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
++int A\u202CY2;
++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
++
++int d\u202ae\u202cf;
++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
++int d\u202Ae\u202cf2;
++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
++int d\u202be\u202cf;
++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
++int d\u202Be\u202cf2;
++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
++int d\u202de\u202cf;
++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
++int d\u202De\u202cf2;
++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
++int d\u202ee\u202cf;
++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
++int d\u202Ee\u202cf2;
++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
++int d\u2066e\u2069f;
++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
++int d\u2067e\u2069f;
++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
++int d\u2068e\u2069f;
++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
++int X\u2069;
++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
+diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-6.c b/gcc/testsuite/c-c++-common/Wbidi-chars-6.c
+new file mode 100644
+index 00000000000..a65d6faf60e
+--- /dev/null
++++ b/gcc/testsuite/c-c++-common/Wbidi-chars-6.c
+@@ -0,0 +1,130 @@
++/* PR preprocessor/103026 */
++/* { dg-do compile } */
++/* { dg-options "-Wbidi-chars=unpaired" } */
++/* Test nesting of bidi chars in various contexts. */
++
++/* Terminated by the wrong char: */
++/* a b c LRE 1 2 3 PDI x y z */
++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
++/* a b c RLE 1 2 3 PDI x y z*/
++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
++/* a b c LRO 1 2 3 PDI x y z */
++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
++/* a b c RLO 1 2 3 PDI x y z */
++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
++/* a b c LRI 1 2 3 PDF x y z */
++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
++/* a b c RLI 1 2 3 PDF x y z */
++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
++/* a b c FSI 1 2 3 PDF x y z*/
++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
++
++/* LRE PDF */
++/* LRE LRE PDF PDF */
++/* PDF LRE PDF */
++/* LRE PDF LRE PDF */
++/* LRE LRE PDF */
++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
++/* PDF LRE */
++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
++
++// a b c LRE 1 2 3 PDI x y z
++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
++// a b c RLE 1 2 3 PDI x y z*/
++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
++// a b c LRO 1 2 3 PDI x y z
++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
++// a b c RLO 1 2 3 PDI x y z
++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
++// a b c LRI 1 2 3 PDF x y z
++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
++// a b c RLI 1 2 3 PDF x y z
++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
++// a b c FSI 1 2 3 PDF x y z
++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
++
++// LRE PDF
++// LRE LRE PDF PDF
++// PDF LRE PDF
++// LRE PDF LRE PDF
++// LRE LRE PDF
++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
++// PDF LRE
++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
++
++void
++g1 ()
++{
++ const char *s1 = "a b c LRE 1 2 3 PDI x y z";
++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
++ const char *s2 = "a b c LRE\u202a 1 2 3 PDI\u2069 x y z";
++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
++ const char *s3 = "a b c RLE 1 2 3 PDI x y ";
++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
++ const char *s4 = "a b c RLE\u202b 1 2 3 PDI\u2069 x y z";
++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
++ const char *s5 = "a b c LRO 1 2 3 PDI x y z";
++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
++ const char *s6 = "a b c LRO\u202d 1 2 3 PDI\u2069 x y z";
++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
++ const char *s7 = "a b c RLO 1 2 3 PDI x y z";
++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
++ const char *s8 = "a b c RLO\u202e 1 2 3 PDI\u2069 x y z";
++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
++ const char *s9 = "a b c LRI 1 2 3 PDF x y z";
++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
++ const char *s10 = "a b c LRI\u2066 1 2 3 PDF\u202c x y z";
++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
++ const char *s11 = "a b c RLI 1 2 3 PDF x y z\
++ ";
++/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */
++ const char *s12 = "a b c RLI\u2067 1 2 3 PDF\u202c x y z";
++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
++ const char *s13 = "a b c FSI 1 2 3 PDF x y z";
++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
++ const char *s14 = "a b c FSI\u2068 1 2 3 PDF\u202c x y z";
++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
++ const char *s15 = "PDF LRE";
++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
++ const char *s16 = "PDF\u202c LRE\u202a";
++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
++ const char *s17 = "LRE PDF";
++ const char *s18 = "LRE\u202a PDF\u202c";
++ const char *s19 = "LRE LRE PDF PDF";
++ const char *s20 = "LRE\u202a LRE\u202a PDF\u202c PDF\u202c";
++ const char *s21 = "PDF LRE PDF";
++ const char *s22 = "PDF\u202c LRE\u202a PDF\u202c";
++ const char *s23 = "LRE LRE PDF";
++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
++ const char *s24 = "LRE\u202a LRE\u202a PDF\u202c";
++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
++ const char *s25 = "PDF LRE";
++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
++ const char *s26 = "PDF\u202c LRE\u202a";
++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
++ const char *s27 = "PDF LRE\u202a";
++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
++ const char *s28 = "PDF\u202c LRE";
++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
++}
++
++int A\u202aB\u2069C;
++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
++int a\u202bB\u2069c;
++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
++int a\u202db\u2069c2;
++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
++int a\u202eb\u2069;
++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
++int a\u2066b\u202c;
++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
++int a\u2067b\u202c;
++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
++int a\u2068b\u202c;
++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
++int A\u202aB\u202c;
++int A\u202aA\u202aB\u202cB\u202c;
++int a_\u202C_\u202a_\u202c;
++int a_\u202a_\u202c_\u202a_\u202c_;
++int a_\u202a_\u202c_\u202a_;
++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-7.c b/gcc/testsuite/c-c++-common/Wbidi-chars-7.c
+new file mode 100644
+index 00000000000..d012d420ec0
+--- /dev/null
++++ b/gcc/testsuite/c-c++-common/Wbidi-chars-7.c
+@@ -0,0 +1,9 @@
++/* PR preprocessor/103026 */
++/* { dg-do compile } */
++/* { dg-options "-Wbidi-chars=any" } */
++/* Test we ignore UCNs in comments. */
++
++// a b c \u202a 1 2 3
++// a b c \u202A 1 2 3
++/* a b c \u202a 1 2 3 */
++/* a b c \u202A 1 2 3 */
+diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-8.c b/gcc/testsuite/c-c++-common/Wbidi-chars-8.c
+new file mode 100644
+index 00000000000..4f54c5092ec
+--- /dev/null
++++ b/gcc/testsuite/c-c++-common/Wbidi-chars-8.c
+@@ -0,0 +1,13 @@
++/* PR preprocessor/103026 */
++/* { dg-do compile } */
++/* { dg-options "-Wbidi-chars=any" } */
++/* Test \u vs \U. */
++
++int a_\u202A;
++/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */
++int a_\u202a_2;
++/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */
++int a_\U0000202A_3;
++/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */
++int a_\U0000202a_4;
++/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */
+diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-9.c b/gcc/testsuite/c-c++-common/Wbidi-chars-9.c
+new file mode 100644
+index 00000000000..e2af1b1ca97
+--- /dev/null
++++ b/gcc/testsuite/c-c++-common/Wbidi-chars-9.c
+@@ -0,0 +1,29 @@
++/* PR preprocessor/103026 */
++/* { dg-do compile } */
++/* { dg-options "-Wbidi-chars=unpaired" } */
++/* Test that we properly separate bidi contexts (comment/identifier/character
++ constant/string literal). */
++
++/* LRE -><- */ int pdf_\u202c_1;
++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
++/* RLE -><- */ int pdf_\u202c_2;
++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
++/* LRO -><- */ int pdf_\u202c_3;
++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
++/* RLO -><- */ int pdf_\u202c_4;
++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
++/* LRI -><-*/ int pdi_\u2069_1;
++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
++/* RLI -><- */ int pdi_\u2069_12;
++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
++/* FSI -><- */ int pdi_\u2069_3;
++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
++
++const char *s1 = "LRE\u202a"; /* PDF -><- */
++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
++/* LRE -><- */ const char *s2 = "PDF\u202c";
++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
++const char *s3 = "LRE\u202a"; int pdf_\u202c_5;
++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
++int lre_\u202a; const char *s4 = "PDF\u202c";
++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+diff --git a/libcpp/include/cpplib.h b/libcpp/include/cpplib.h
+index 3ad52d5e01e..e0dcb7f0529 100644
+--- a/libcpp/include/cpplib.h
++++ b/libcpp/include/cpplib.h
+@@ -305,6 +305,17 @@ enum cpp_normalize_level {
+ normalized_none
+ };
+
++/* The possible bidirectional control characters checking levels, from least
++ restrictive to most. */
++enum cpp_bidirectional_level {
++ /* No checking. */
++ bidirectional_none,
++ /* Only detect unpaired uses of bidirectional control characters. */
++ bidirectional_unpaired,
++ /* Detect any use of bidirectional control characters. */
++ bidirectional_any
++};
++
+ /* This structure is nested inside struct cpp_reader, and
+ carries all the options visible to the command line. */
+ struct cpp_options
+@@ -506,6 +517,10 @@ struct cpp_options
+ /* True if warn about differences between C++98 and C++11. */
+ bool cpp_warn_cxx11_compat;
+
++ /* Nonzero if bidirectional control characters checking is on. See enum
++ cpp_bidirectional_level. */
++ unsigned char cpp_warn_bidirectional;
++
+ /* Dependency generation. */
+ struct
+ {
+@@ -1063,7 +1078,8 @@ enum {
+ CPP_W_PEDANTIC,
+ CPP_W_C90_C99_COMPAT,
+ CPP_W_CXX11_COMPAT,
+- CPP_W_EXPANSION_TO_DEFINED
++ CPP_W_EXPANSION_TO_DEFINED,
++ CPP_W_BIDIRECTIONAL
+ };
+
+ /* Output a diagnostic of some kind. */
+diff --git a/libcpp/init.c b/libcpp/init.c
+index ca3fbaa5c05..5c15da82ff8 100644
+--- a/libcpp/init.c
++++ b/libcpp/init.c
+@@ -208,6 +208,7 @@ cpp_create_reader (enum c_lang lang, cpp_hash_table *table,
+ = ENABLE_CANONICAL_SYSTEM_HEADERS;
+ CPP_OPTION (pfile, ext_numeric_literals) = 1;
+ CPP_OPTION (pfile, warn_date_time) = 0;
++ CPP_OPTION (pfile, cpp_warn_bidirectional) = bidirectional_unpaired;
+
+ /* Default CPP arithmetic to something sensible for the host for the
+ benefit of dumb users like fix-header. */
+diff --git a/libcpp/internal.h b/libcpp/internal.h
+index 4f74f995cec..53b4c0f4af7 100644
+--- a/libcpp/internal.h
++++ b/libcpp/internal.h
+@@ -576,6 +576,13 @@ struct cpp_reader
+ /* If non-null, the lexer will use this location for the next token
+ instead of getting a location from the linemap. */
+ source_location *forced_token_location_p;
++
++ /* Returns true iff we should warn about UTF-8 bidirectional control
++ characters. */
++ bool warn_bidi_p () const
++ {
++ return CPP_OPTION (this, cpp_warn_bidirectional) != bidirectional_none;
++ }
+ };
+
+ /* Character classes. Based on the more primitive macros in safe-ctype.h.
+diff --git a/libcpp/lex.c b/libcpp/lex.c
+index a408f912c5c..ea7f75e842e 100644
+--- a/libcpp/lex.c
++++ b/libcpp/lex.c
+@@ -1164,6 +1164,324 @@ _cpp_process_line_notes (cpp_reader *pfile, int in_comment)
+ }
+ }
+
++namespace bidi {
++ enum kind {
++ NONE, LRE, RLE, LRO, RLO, LRI, RLI, FSI, PDF, PDI, LTR, RTL
++ };
++
++ /* All the UTF-8 encodings of bidi characters start with E2. */
++ const uchar utf8_start = 0xe2;
++
++ /* A vector holding currently open bidi contexts. We use a char for
++ each context, its LSB is 1 if it represents a PDF context, 0 if it
++ represents a PDI context. The next bit is 1 if this context was open
++ by a bidi character written as a UCN, and 0 when it was UTF-8. */
++ semi_embedded_vec vec;
++
++ /* Close the whole comment/identifier/string literal/character constant
++ context. */
++ void on_close ()
++ {
++ vec.truncate (0);
++ }
++
++ /* Pop the last element in the vector. */
++ void pop ()
++ {
++ unsigned int len = vec.count ();
++ gcc_checking_assert (len > 0);
++ vec.truncate (len - 1);
++ }
++
++ /* Return the context of the Ith element. */
++ kind ctx_at (unsigned int i)
++ {
++ return (vec[i] & 1) ? PDF : PDI;
++ }
++
++ /* Return which context is currently opened. */
++ kind current_ctx ()
++ {
++ unsigned int len = vec.count ();
++ if (len == 0)
++ return NONE;
++ return ctx_at (len - 1);
++ }
++
++ /* Return true if the current context comes from a UCN origin, that is,
++ the bidi char which started this bidi context was written as a UCN. */
++ bool current_ctx_ucn_p ()
++ {
++ unsigned int len = vec.count ();
++ gcc_checking_assert (len > 0);
++ return (vec[len - 1] >> 1) & 1;
++ }
++
++ /* We've read a bidi char, update the current vector as necessary. */
++ void on_char (kind k, bool ucn_p)
++ {
++ switch (k)
++ {
++ case LRE:
++ case RLE:
++ case LRO:
++ case RLO:
++ vec.push (ucn_p ? 3u : 1u);
++ break;
++ case LRI:
++ case RLI:
++ case FSI:
++ vec.push (ucn_p ? 2u : 0u);
++ break;
++ /* PDF terminates the scope of the last LRE, RLE, LRO, or RLO
++ whose scope has not yet been terminated. */
++ case PDF:
++ if (current_ctx () == PDF)
++ pop ();
++ break;
++ /* PDI terminates the scope of the last LRI, RLI, or FSI whose
++ scope has not yet been terminated, as well as the scopes of
++ any subsequent LREs, RLEs, LROs, or RLOs whose scopes have not
++ yet been terminated. */
++ case PDI:
++ for (int i = vec.count () - 1; i >= 0; --i)
++ if (ctx_at (i) == PDI)
++ {
++ vec.truncate (i);
++ break;
++ }
++ break;
++ case LTR:
++ case RTL:
++ /* These aren't popped by a PDF/PDI. */
++ break;
++ [[likely]] case NONE:
++ break;
++ default:
++ abort ();
++ }
++ }
++
++ /* Return a descriptive string for K. */
++ const char *to_str (kind k)
++ {
++ switch (k)
++ {
++ case LRE:
++ return "U+202A (LEFT-TO-RIGHT EMBEDDING)";
++ case RLE:
++ return "U+202B (RIGHT-TO-LEFT EMBEDDING)";
++ case LRO:
++ return "U+202D (LEFT-TO-RIGHT OVERRIDE)";
++ case RLO:
++ return "U+202E (RIGHT-TO-LEFT OVERRIDE)";
++ case LRI:
++ return "U+2066 (LEFT-TO-RIGHT ISOLATE)";
++ case RLI:
++ return "U+2067 (RIGHT-TO-LEFT ISOLATE)";
++ case FSI:
++ return "U+2068 (FIRST STRONG ISOLATE)";
++ case PDF:
++ return "U+202C (POP DIRECTIONAL FORMATTING)";
++ case PDI:
++ return "U+2069 (POP DIRECTIONAL ISOLATE)";
++ case LTR:
++ return "U+200E (LEFT-TO-RIGHT MARK)";
++ case RTL:
++ return "U+200F (RIGHT-TO-LEFT MARK)";
++ default:
++ abort ();
++ }
++ }
++}
++
++/* Parse a sequence of 3 bytes starting with P and return its bidi code. */
++
++static bidi::kind
++get_bidi_utf8 (const unsigned char *const p)
++{
++ gcc_checking_assert (p[0] == bidi::utf8_start);
++
++ if (p[1] == 0x80)
++ switch (p[2])
++ {
++ case 0xaa:
++ return bidi::LRE;
++ case 0xab:
++ return bidi::RLE;
++ case 0xac:
++ return bidi::PDF;
++ case 0xad:
++ return bidi::LRO;
++ case 0xae:
++ return bidi::RLO;
++ case 0x8e:
++ return bidi::LTR;
++ case 0x8f:
++ return bidi::RTL;
++ default:
++ break;
++ }
++ else if (p[1] == 0x81)
++ switch (p[2])
++ {
++ case 0xa6:
++ return bidi::LRI;
++ case 0xa7:
++ return bidi::RLI;
++ case 0xa8:
++ return bidi::FSI;
++ case 0xa9:
++ return bidi::PDI;
++ default:
++ break;
++ }
++
++ return bidi::NONE;
++}
++
++/* Parse a UCN where P points just past \u or \U and return its bidi code. */
++
++static bidi::kind
++get_bidi_ucn (const unsigned char *p, bool is_U)
++{
++ /* 6.4.3 Universal Character Names
++ \u hex-quad
++ \U hex-quad hex-quad
++ where \unnnn means \U0000nnnn. */
++
++ if (is_U)
++ {
++ if (p[0] != '0' || p[1] != '0' || p[2] != '0' || p[3] != '0')
++ return bidi::NONE;
++ /* Skip 4B so we can treat \u and \U the same below. */
++ p += 4;
++ }
++
++ /* All code points we are looking for start with 20xx. */
++ if (p[0] != '2' || p[1] != '0')
++ return bidi::NONE;
++ else if (p[2] == '2')
++ switch (p[3])
++ {
++ case 'a':
++ case 'A':
++ return bidi::LRE;
++ case 'b':
++ case 'B':
++ return bidi::RLE;
++ case 'c':
++ case 'C':
++ return bidi::PDF;
++ case 'd':
++ case 'D':
++ return bidi::LRO;
++ case 'e':
++ case 'E':
++ return bidi::RLO;
++ default:
++ break;
++ }
++ else if (p[2] == '6')
++ switch (p[3])
++ {
++ case '6':
++ return bidi::LRI;
++ case '7':
++ return bidi::RLI;
++ case '8':
++ return bidi::FSI;
++ case '9':
++ return bidi::PDI;
++ default:
++ break;
++ }
++ else if (p[2] == '0')
++ switch (p[3])
++ {
++ case 'e':
++ case 'E':
++ return bidi::LTR;
++ case 'f':
++ case 'F':
++ return bidi::RTL;
++ default:
++ break;
++ }
++
++ return bidi::NONE;
++}
++
++/* We're closing a bidi context, that is, we've encountered a newline,
++ are closing a C-style comment, or are at the end of a string literal,
++ character constant, or identifier. Warn if this context was not
++ properly terminated by a PDI or PDF. P points to the last character
++ in this context. */
++
++static void
++maybe_warn_bidi_on_close (cpp_reader *pfile, const uchar *p)
++{
++ if (CPP_OPTION (pfile, cpp_warn_bidirectional) == bidirectional_unpaired
++ && bidi::vec.count () > 0)
++ {
++ const source_location loc
++ = linemap_position_for_column (pfile->line_table,
++ CPP_BUF_COLUMN (pfile->buffer, p));
++ cpp_warning_with_line (pfile, CPP_W_BIDIRECTIONAL, loc, 0,
++ "unpaired UTF-8 bidirectional control character "
++ "detected");
++ }
++ /* We're done with this context. */
++ bidi::on_close ();
++}
++
++/* We're at the beginning or in the middle of an identifier/comment/string
++ literal/character constant. Warn if we've encountered a bidi character.
++ KIND says which bidi character it was; P points to it in the character
++ stream. UCN_P is true iff this bidi character was written as a UCN. */
++
++static void
++maybe_warn_bidi_on_char (cpp_reader *pfile, const uchar *p, bidi::kind kind,
++ bool ucn_p)
++{
++ if (__builtin_expect (kind == bidi::NONE, 1))
++ return;
++
++ const unsigned char warn_bidi = CPP_OPTION (pfile, cpp_warn_bidirectional);
++
++ if (warn_bidi != bidirectional_none)
++ {
++ const source_location loc
++ = linemap_position_for_column (pfile->line_table,
++ CPP_BUF_COLUMN (pfile->buffer, p));
++ /* It seems excessive to warn about a PDI/PDF that is closing
++ an opened context because we've already warned about the
++ opening character. Except warn when we have a UCN x UTF-8
++ mismatch. */
++ if (kind == bidi::current_ctx ())
++ {
++ if (warn_bidi == bidirectional_unpaired
++ && bidi::current_ctx_ucn_p () != ucn_p)
++ cpp_warning_with_line (pfile, CPP_W_BIDIRECTIONAL, loc, 0,
++ "UTF-8 vs UCN mismatch when closing "
++ "a context by \"%s\"", bidi::to_str (kind));
++ }
++ else if (warn_bidi == bidirectional_any)
++ {
++ if (kind == bidi::PDF || kind == bidi::PDI)
++ cpp_warning_with_line (pfile, CPP_W_BIDIRECTIONAL, loc, 0,
++ "\"%s\" is closing an unopened context",
++ bidi::to_str (kind));
++ else
++ cpp_warning_with_line (pfile, CPP_W_BIDIRECTIONAL, loc, 0,
++ "found problematic Unicode character \"%s\"",
++ bidi::to_str (kind));
++ }
++ }
++ /* We're done with this context. */
++ bidi::on_char (kind, ucn_p);
++}
++
+ /* Skip a C-style block comment. We find the end of the comment by
+ seeing if an asterisk is before every '/' we encounter. Returns
+ nonzero if comment terminated by EOF, zero otherwise.
+@@ -1175,6 +1493,7 @@ _cpp_skip_block_comment (cpp_reader *pfile)
+ cpp_buffer *buffer = pfile->buffer;
+ const uchar *cur = buffer->cur;
+ uchar c;
++ const bool warn_bidi_p = pfile->warn_bidi_p ();
+
+ cur++;
+ if (*cur == '/')
+@@ -1189,7 +1508,11 @@ _cpp_skip_block_comment (cpp_reader *pfile)
+ if (c == '/')
+ {
+ if (cur[-2] == '*')
+- break;
++ {
++ if (warn_bidi_p)
++ maybe_warn_bidi_on_close (pfile, cur);
++ break;
++ }
+
+ /* Warn about potential nested comments, but not if the '/'
+ comes immediately before the true comment delimiter.
+@@ -1208,6 +1531,8 @@ _cpp_skip_block_comment (cpp_reader *pfile)
+ {
+ unsigned int cols;
+ buffer->cur = cur - 1;
++ if (warn_bidi_p)
++ maybe_warn_bidi_on_close (pfile, cur);
+ _cpp_process_line_notes (pfile, true);
+ if (buffer->next_line >= buffer->rlimit)
+ return true;
+@@ -1218,6 +1543,13 @@ _cpp_skip_block_comment (cpp_reader *pfile)
+
+ cur = buffer->cur;
+ }
++ /* If this is a beginning of a UTF-8 encoding, it might be
++ a bidirectional control character. */
++ else if (__builtin_expect (c == bidi::utf8_start, 0) && warn_bidi_p)
++ {
++ bidi::kind kind = get_bidi_utf8 (cur - 1);
++ maybe_warn_bidi_on_char (pfile, cur, kind, /*ucn_p=*/false);
++ }
+ }
+
+ buffer->cur = cur;
+@@ -1233,9 +1565,31 @@ skip_line_comment (cpp_reader *pfile)
+ {
+ cpp_buffer *buffer = pfile->buffer;
+ source_location orig_line = pfile->line_table->highest_line;
++ const bool warn_bidi_p = pfile->warn_bidi_p ();
+
+- while (*buffer->cur != '\n')
+- buffer->cur++;
++ if (!warn_bidi_p)
++ while (*buffer->cur != '\n')
++ buffer->cur++;
++ else
++ {
++ while (*buffer->cur != '\n'
++ && *buffer->cur != bidi::utf8_start)
++ buffer->cur++;
++ if (__builtin_expect (*buffer->cur == bidi::utf8_start, 0))
++ {
++ while (*buffer->cur != '\n')
++ {
++ if (__builtin_expect (*buffer->cur == bidi::utf8_start, 0))
++ {
++ bidi::kind kind = get_bidi_utf8 (buffer->cur);
++ maybe_warn_bidi_on_char (pfile, buffer->cur, kind,
++ /*ucn_p=*/false);
++ }
++ buffer->cur++;
++ }
++ maybe_warn_bidi_on_close (pfile, buffer->cur);
++ }
++ }
+
+ _cpp_process_line_notes (pfile, true);
+ return orig_line != pfile->line_table->highest_line;
+@@ -1315,11 +1669,13 @@ warn_about_normalization (cpp_reader *pfile,
+
+ /* Returns TRUE if the sequence starting at buffer->cur is invalid in
+ an identifier. FIRST is TRUE if this starts an identifier. */
++
+ static bool
+ forms_identifier_p (cpp_reader *pfile, int first,
+ struct normalize_state *state)
+ {
+ cpp_buffer *buffer = pfile->buffer;
++ const bool warn_bidi_p = pfile->warn_bidi_p ();
+
+ if (*buffer->cur == '$')
+ {
+@@ -1343,6 +1699,12 @@ forms_identifier_p (cpp_reader *pfile, int first,
+ {
+ cppchar_t s;
+ buffer->cur += 2;
++ if (warn_bidi_p)
++ {
++ bidi::kind kind = get_bidi_ucn (buffer->cur,
++ buffer->cur[-1] == 'U');
++ maybe_warn_bidi_on_char (pfile, buffer->cur, kind, /*ucn_p=*/true);
++ }
+ if (_cpp_valid_ucn (pfile, &buffer->cur, buffer->rlimit, 1 + !first,
+ state, &s, NULL, NULL))
+ return true;
+@@ -1450,6 +1812,7 @@ lex_identifier (cpp_reader *pfile, const uchar *base, bool starts_ucn,
+ const uchar *cur;
+ unsigned int len;
+ unsigned int hash = HT_HASHSTEP (0, *base);
++ const bool warn_bidi_p = pfile->warn_bidi_p ();
+
+ cur = pfile->buffer->cur;
+ if (! starts_ucn)
+@@ -1472,6 +1835,8 @@ lex_identifier (cpp_reader *pfile, const uchar *base, bool starts_ucn,
+ pfile->buffer->cur++;
+ }
+ } while (forms_identifier_p (pfile, false, nst));
++ if (warn_bidi_p)
++ maybe_warn_bidi_on_close (pfile, pfile->buffer->cur);
+ result = _cpp_interpret_identifier (pfile, base,
+ pfile->buffer->cur - base);
+ *spelling = cpp_lookup (pfile, base, pfile->buffer->cur - base);
+@@ -1673,6 +2038,7 @@ lex_raw_string (cpp_reader *pfile, cpp_token *token, const uchar *base,
+ _cpp_buff *first_buff = NULL, *last_buff = NULL;
+ size_t raw_prefix_start;
+ _cpp_line_note *note = &pfile->buffer->notes[pfile->buffer->cur_note];
++ const bool warn_bidi_p = pfile->warn_bidi_p ();
+
+ type = (*base == 'L' ? CPP_WSTRING :
+ *base == 'U' ? CPP_STRING32 :
+@@ -1909,8 +2275,15 @@ lex_raw_string (cpp_reader *pfile, cpp_token *token, const uchar *base,
+ cur = base = pfile->buffer->cur;
+ note = &pfile->buffer->notes[pfile->buffer->cur_note];
+ }
++ else if (__builtin_expect ((unsigned char) c == bidi::utf8_start, 0)
++ && warn_bidi_p)
++ maybe_warn_bidi_on_char (pfile, cur - 1, get_bidi_utf8 (cur - 1),
++ /*ucn_p=*/false);
+ }
+
++ if (warn_bidi_p)
++ maybe_warn_bidi_on_close (pfile, cur);
++
+ if (CPP_OPTION (pfile, user_literals))
+ {
+ /* If a string format macro, say from inttypes.h, is placed touching
+@@ -2005,15 +2378,27 @@ lex_string (cpp_reader *pfile, cpp_token *token, const uchar *base)
+ else
+ terminator = '>', type = CPP_HEADER_NAME;
+
++ const bool warn_bidi_p = pfile->warn_bidi_p ();
+ for (;;)
+ {
+ cppchar_t c = *cur++;
+
+ /* In #include-style directives, terminators are not escapable. */
+ if (c == '\\' && !pfile->state.angled_headers && *cur != '\n')
+- cur++;
++ {
++ if ((cur[0] == 'u' || cur[0] == 'U') && warn_bidi_p)
++ {
++ bidi::kind kind = get_bidi_ucn (cur + 1, cur[0] == 'U');
++ maybe_warn_bidi_on_char (pfile, cur, kind, /*ucn_p=*/true);
++ }
++ cur++;
++ }
+ else if (c == terminator)
+- break;
++ {
++ if (warn_bidi_p)
++ maybe_warn_bidi_on_close (pfile, cur - 1);
++ break;
++ }
+ else if (c == '\n')
+ {
+ cur--;
+@@ -2030,6 +2415,11 @@ lex_string (cpp_reader *pfile, cpp_token *token, const uchar *base)
+ }
+ else if (c == '\0')
+ saw_NUL = true;
++ else if (__builtin_expect (c == bidi::utf8_start, 0) && warn_bidi_p)
++ {
++ bidi::kind kind = get_bidi_utf8 (cur - 1);
++ maybe_warn_bidi_on_char (pfile, cur - 1, kind, /*ucn_p=*/false);
++ }
+ }
+
+ if (saw_NUL && !pfile->state.skipping)
diff --git a/SOURCES/gcc8-Wno-format-security.patch b/SOURCES/gcc8-Wno-format-security.patch
new file mode 100644
index 0000000..cb21e5d
--- /dev/null
+++ b/SOURCES/gcc8-Wno-format-security.patch
@@ -0,0 +1,27 @@
+2017-02-25 Jakub Jelinek
+
+ * configure.ac: When adding -Wno-format, also add -Wno-format-security.
+ * configure: Regenerated.
+
+--- gcc/configure.ac.jj 2017-02-13 12:20:53.000000000 +0100
++++ gcc/configure.ac 2017-02-25 12:42:32.859175403 +0100
+@@ -481,7 +481,7 @@ AC_ARG_ENABLE(build-format-warnings,
+ AS_HELP_STRING([--disable-build-format-warnings],[don't use -Wformat while building GCC]),
+ [],[enable_build_format_warnings=yes])
+ AS_IF([test $enable_build_format_warnings = no],
+- [wf_opt=-Wno-format],[wf_opt=])
++ [wf_opt="-Wno-format -Wno-format-security"],[wf_opt=])
+ ACX_PROG_CXX_WARNING_OPTS(
+ m4_quote(m4_do([-W -Wall -Wno-narrowing -Wwrite-strings ],
+ [-Wcast-qual $wf_opt])), [loose_warn])
+--- gcc/configure.jj 2017-02-13 12:20:52.000000000 +0100
++++ gcc/configure 2017-02-25 12:42:50.041946391 +0100
+@@ -6647,7 +6647,7 @@ else
+ fi
+
+ if test $enable_build_format_warnings = no; then :
+- wf_opt=-Wno-format
++ wf_opt="-Wno-format -Wno-format-security"
+ else
+ wf_opt=
+ fi
diff --git a/SOURCES/gcc8-aarch64-mtune-neoverse-512tvb.patch b/SOURCES/gcc8-aarch64-mtune-neoverse-512tvb.patch
new file mode 100644
index 0000000..af0a049
--- /dev/null
+++ b/SOURCES/gcc8-aarch64-mtune-neoverse-512tvb.patch
@@ -0,0 +1,105 @@
+From 9c108bb84d3a2447dac730c455df658be0a2c751 Mon Sep 17 00:00:00 2001
+From: Richard Sandiford
+Date: Tue, 17 Aug 2021 15:15:27 +0100
+Subject: [PATCH] aarch64: Add -mtune=neoverse-512tvb
+To: gcc-patches@gcc.gnu.org
+
+This patch adds an option to tune for Neoverse cores that have
+a total vector bandwidth of 512 bits (4x128 for Advanced SIMD
+and a vector-length-dependent equivalent for SVE). This is intended
+to be a compromise between tuning aggressively for a single core like
+Neoverse V1 (which can be too narrow) and tuning for AArch64 cores
+in general (which can be too wide).
+
+-mcpu=neoverse-512tvb is equivalent to -mcpu=neoverse-v1
+-mtune=neoverse-512tvb.
+
+gcc/
+ * doc/invoke.texi: Document -mtune=neoverse-512tvb and
+ -mcpu=neoverse-512tvb.
+ * config/aarch64/aarch64-cores.def (neoverse-512tvb): New entry.
+ * config/aarch64/aarch64-tune.md: Regenerate.
+
+(cherry picked from commit 048039c49b96875144f67e7789fdea54abf7710b)
+---
+ gcc/config/aarch64/aarch64-cores.def | 1 +
+ gcc/config/aarch64/aarch64-tune.md | 2 +-
+ gcc/doc/invoke.texi | 25 ++++++++++++++++++++++---
+ 3 files changed, 24 insertions(+), 4 deletions(-)
+
+diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def
+index dfb839c01cc..f348d31e22e 100644
+--- a/gcc/config/aarch64/aarch64-cores.def
++++ b/gcc/config/aarch64/aarch64-cores.def
+@@ -99,6 +99,7 @@ AARCH64_CORE("saphira", saphira, falkor, 8_3A, AARCH64_FL_FOR_ARCH8_3
+ /* ARM ('A') cores. */
+ AARCH64_CORE("zeus", zeus, cortexa57, 8_4A, AARCH64_FL_FOR_ARCH8_4 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_SVE | AARCH64_FL_RNG, neoversev1, 0x41, 0xd40, -1)
+ AARCH64_CORE("neoverse-v1", neoversev1, cortexa57, 8_4A, AARCH64_FL_FOR_ARCH8_4 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_SVE | AARCH64_FL_RNG, neoversev1, 0x41, 0xd40, -1)
++AARCH64_CORE("neoverse-512tvb", neoverse512tvb, cortexa57, 8_4A, AARCH64_FL_FOR_ARCH8_4 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_SVE | AARCH64_FL_RNG, neoversev1, INVALID_IMP, INVALID_CORE, -1)
+
+ /* Armv8.5-A Architecture Processors. */
+ AARCH64_CORE("neoverse-n2", neoversen2, cortexa57, 8_4A, AARCH64_FL_FOR_ARCH8_4 | AARCH64_FL_F16 | AARCH64_FL_SVE | AARCH64_FL_RNG, neoversen2, 0x41, 0xd49, -1)
+diff --git a/gcc/config/aarch64/aarch64-tune.md b/gcc/config/aarch64/aarch64-tune.md
+index 2d7c9aa4740..09b76480f0b 100644
+--- a/gcc/config/aarch64/aarch64-tune.md
++++ b/gcc/config/aarch64/aarch64-tune.md
+@@ -1,5 +1,5 @@
+ ;; -*- buffer-read-only: t -*-
+ ;; Generated automatically by gentune.sh from aarch64-cores.def
+ (define_attr "tune"
+- "cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,thunderxt81,thunderxt83,xgene1,falkor,qdf24xx,exynosm1,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,ares,neoversen1,saphira,zeus,neoversev1,neoversen2,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55"
++ "cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,thunderxt81,thunderxt83,xgene1,falkor,qdf24xx,exynosm1,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,ares,neoversen1,saphira,zeus,neoversev1,neoverse512tvb,neoversen2,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55"
+ (const (symbol_ref "((enum attr_tune) aarch64_tune)")))
+diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
+index 78ca7738df2..68fda03281a 100644
+--- a/gcc/doc/invoke.texi
++++ b/gcc/doc/invoke.texi
+@@ -14772,9 +14772,9 @@ performance of the code. Permissible values for this option are:
+ @samp{generic}, @samp{cortex-a35}, @samp{cortex-a53}, @samp{cortex-a55},
+ @samp{cortex-a57}, @samp{cortex-a72}, @samp{cortex-a73}, @samp{cortex-a75},
+ @samp{cortex-a76}, @samp{ares}, @samp{neoverse-n1}, @samp{neoverse-n2},
+-@samp{neoverse-v1}, @samp{zeus}, @samp{exynos-m1}, @samp{falkor},
+-@samp{qdf24xx}, @samp{saphira}, @samp{xgene1}, @samp{vulcan}, @samp{thunderx},
+-@samp{thunderxt88}, @samp{thunderxt88p1}, @samp{thunderxt81},
++@samp{neoverse-v1}, @samp{zeus}, @samp{neoverse-512tvb}, @samp{exynos-m1},
++@samp{falkor}, @samp{qdf24xx}, @samp{saphira}, @samp{xgene1}, @samp{vulcan},
++@samp{thunderx}, @samp{thunderxt88}, @samp{thunderxt88p1}, @samp{thunderxt81},
+ @samp{thunderxt83}, @samp{thunderx2t99}, @samp{cortex-a57.cortex-a53},
+ @samp{cortex-a72.cortex-a53}, @samp{cortex-a73.cortex-a35},
+ @samp{cortex-a73.cortex-a53}, @samp{cortex-a75.cortex-a55},
+@@ -14785,6 +14785,15 @@ The values @samp{cortex-a57.cortex-a53}, @samp{cortex-a72.cortex-a53},
+ @samp{cortex-a75.cortex-a55} specify that GCC should tune for a
+ big.LITTLE system.
+
++The value @samp{neoverse-512tvb} specifies that GCC should tune
++for Neoverse cores that (a) implement SVE and (b) have a total vector
++bandwidth of 512 bits per cycle. In other words, the option tells GCC to
++tune for Neoverse cores that can execute 4 128-bit Advanced SIMD arithmetic
++instructions a cycle and that can execute an equivalent number of SVE
++arithmetic instructions per cycle (2 for 256-bit SVE, 4 for 128-bit SVE).
++This is more general than tuning for a specific core like Neoverse V1
++but is more specific than the default tuning described below.
++
+ Additionally on native AArch64 GNU/Linux systems the value
+ @samp{native} tunes performance to the host system. This option has no effect
+ if the compiler is unable to recognize the processor of the host system.
+@@ -14814,6 +14823,16 @@ by @option{-mtune}). Where this option is used in conjunction
+ with @option{-march} or @option{-mtune}, those options take precedence
+ over the appropriate part of this option.
+
++@option{-mcpu=neoverse-512tvb} is special in that it does not refer
++to a specific core, but instead refers to all Neoverse cores that
++(a) implement SVE and (b) have a total vector bandwidth of 512 bits
++a cycle. Unless overridden by @option{-march},
++@option{-mcpu=neoverse-512tvb} generates code that can run on a
++Neoverse V1 core, since Neoverse V1 is the first Neoverse core with
++these properties. Unless overridden by @option{-mtune},
++@option{-mcpu=neoverse-512tvb} tunes code in the same way as for
++@option{-mtune=neoverse-512tvb}.
++
+ @item -moverride=@var{string}
+ @opindex moverride
+ Override tuning decisions made by the back-end in response to a
+--
+2.25.1
+
diff --git a/SOURCES/gcc8-foffload-default.patch b/SOURCES/gcc8-foffload-default.patch
new file mode 100644
index 0000000..771e1e0
--- /dev/null
+++ b/SOURCES/gcc8-foffload-default.patch
@@ -0,0 +1,117 @@
+2017-01-20 Jakub Jelinek
+
+ * gcc.c (offload_targets_default): New variable.
+ (process_command): Set it if -foffload is defaulted.
+ (driver::maybe_putenv_OFFLOAD_TARGETS): Add OFFLOAD_TARGET_DEFAULT=1
+ into environment if -foffload has been defaulted.
+ * lto-wrapper.c (OFFLOAD_TARGET_DEFAULT_ENV): Define.
+ (compile_images_for_offload_targets): If OFFLOAD_TARGET_DEFAULT
+ is in the environment, don't fail if corresponding mkoffload
+ can't be found. Free and clear offload_names if no valid offload
+ is found.
+libgomp/
+ * target.c (gomp_load_plugin_for_device): If a plugin can't be
+ dlopened, assume it has no devices silently.
+
+--- gcc/gcc.c.jj 2017-01-17 10:28:40.000000000 +0100
++++ gcc/gcc.c 2017-01-20 16:26:29.649962902 +0100
+@@ -290,6 +290,10 @@ static const char *spec_host_machine = D
+
+ static char *offload_targets = NULL;
+
++/* Set to true if -foffload has not been used and offload_targets
++ is set to the configured in default. */
++static bool offload_targets_default;
++
+ /* Nonzero if cross-compiling.
+ When -b is used, the value comes from the `specs' file. */
+
+@@ -4457,7 +4461,10 @@ process_command (unsigned int decoded_op
+ /* If the user didn't specify any, default to all configured offload
+ targets. */
+ if (ENABLE_OFFLOADING && offload_targets == NULL)
+- handle_foffload_option (OFFLOAD_TARGETS);
++ {
++ handle_foffload_option (OFFLOAD_TARGETS);
++ offload_targets_default = true;
++ }
+
+ if (output_file
+ && strcmp (output_file, "-") != 0
+@@ -7693,6 +7700,8 @@ driver::maybe_putenv_OFFLOAD_TARGETS ()
+ obstack_grow (&collect_obstack, offload_targets,
+ strlen (offload_targets) + 1);
+ xputenv (XOBFINISH (&collect_obstack, char *));
++ if (offload_targets_default)
++ xputenv ("OFFLOAD_TARGET_DEFAULT=1");
+ }
+
+ free (offload_targets);
+--- gcc/lto-wrapper.c.jj 2017-01-01 12:45:34.000000000 +0100
++++ gcc/lto-wrapper.c 2017-01-20 16:34:18.294016997 +0100
+@@ -52,6 +52,7 @@ along with GCC; see the file COPYING3.
+ /* Environment variable, used for passing the names of offload targets from GCC
+ driver to lto-wrapper. */
+ #define OFFLOAD_TARGET_NAMES_ENV "OFFLOAD_TARGET_NAMES"
++#define OFFLOAD_TARGET_DEFAULT_ENV "OFFLOAD_TARGET_DEFAULT"
+
+ enum lto_mode_d {
+ LTO_MODE_NONE, /* Not doing LTO. */
+@@ -790,8 +791,10 @@ compile_images_for_offload_targets (unsi
+ if (!target_names)
+ return;
+ unsigned num_targets = parse_env_var (target_names, &names, NULL);
++ const char *target_names_default = getenv (OFFLOAD_TARGET_DEFAULT_ENV);
+
+ int next_name_entry = 0;
++ bool hsa_seen = false;
+ const char *compiler_path = getenv ("COMPILER_PATH");
+ if (!compiler_path)
+ goto out;
+@@ -804,18 +807,32 @@ compile_images_for_offload_targets (unsi
+ /* HSA does not use LTO-like streaming and a different compiler, skip
+ it. */
+ if (strcmp (names[i], "hsa") == 0)
+- continue;
++ {
++ hsa_seen = true;
++ continue;
++ }
+
+ offload_names[next_name_entry]
+ = compile_offload_image (names[i], compiler_path, in_argc, in_argv,
+ compiler_opts, compiler_opt_count,
+ linker_opts, linker_opt_count);
+ if (!offload_names[next_name_entry])
+- fatal_error (input_location,
+- "problem with building target image for %s\n", names[i]);
++ {
++ if (target_names_default != NULL)
++ continue;
++ fatal_error (input_location,
++ "problem with building target image for %s\n",
++ names[i]);
++ }
+ next_name_entry++;
+ }
+
++ if (next_name_entry == 0 && !hsa_seen)
++ {
++ free (offload_names);
++ offload_names = NULL;
++ }
++
+ out:
+ free_array_of_ptrs ((void **) names, num_targets);
+ }
+--- libgomp/target.c.jj 2017-01-01 12:45:52.000000000 +0100
++++ libgomp/target.c 2017-01-20 20:12:13.756710875 +0100
+@@ -2356,7 +2356,7 @@ gomp_load_plugin_for_device (struct gomp
+
+ void *plugin_handle = dlopen (plugin_name, RTLD_LAZY);
+ if (!plugin_handle)
+- goto dl_fail;
++ return 0;
+
+ /* Check if all required functions are available in the plugin and store
+ their handlers. None of the symbols can legitimately be NULL,
diff --git a/SOURCES/gcc8-hack.patch b/SOURCES/gcc8-hack.patch
new file mode 100644
index 0000000..515173f
--- /dev/null
+++ b/SOURCES/gcc8-hack.patch
@@ -0,0 +1,124 @@
+--- libada/Makefile.in.jj 2009-01-14 12:07:35.000000000 +0100
++++ libada/Makefile.in 2009-01-15 14:25:33.000000000 +0100
+@@ -66,18 +66,40 @@ libsubdir := $(libdir)/gcc/$(target_nonc
+ ADA_RTS_DIR=$(GCC_DIR)/ada/rts$(subst /,_,$(MULTISUBDIR))
+ ADA_RTS_SUBDIR=./rts$(subst /,_,$(MULTISUBDIR))
+
++DEFAULTMULTIFLAGS :=
++ifeq ($(MULTISUBDIR),)
++targ:=$(subst -, ,$(target))
++arch:=$(word 1,$(targ))
++ifeq ($(words $(targ)),2)
++osys:=$(word 2,$(targ))
++else
++osys:=$(word 3,$(targ))
++endif
++ifeq ($(strip $(filter-out i%86 x86_64 powerpc% ppc% s390% sparc% linux%, $(arch) $(osys))),)
++ifeq ($(shell $(CC) $(CFLAGS) -print-multi-os-directory),../lib64)
++DEFAULTMULTIFLAGS := -m64
++else
++ifeq ($(strip $(filter-out s390%, $(arch))),)
++DEFAULTMULTIFLAGS := -m31
++else
++DEFAULTMULTIFLAGS := -m32
++endif
++endif
++endif
++endif
++
+ # exeext should not be used because it's the *host* exeext. We're building
+ # a *target* library, aren't we?!? Likewise for CC. Still, provide bogus
+ # definitions just in case something slips through the safety net provided
+ # by recursive make invocations in gcc/ada/Makefile.in
+ LIBADA_FLAGS_TO_PASS = \
+ "MAKEOVERRIDES=" \
+- "LDFLAGS=$(LDFLAGS)" \
++ "LDFLAGS=$(LDFLAGS) $(DEFAULTMULTIFLAGS)" \
+ "LN_S=$(LN_S)" \
+ "SHELL=$(SHELL)" \
+- "GNATLIBFLAGS=$(GNATLIBFLAGS) $(MULTIFLAGS)" \
+- "GNATLIBCFLAGS=$(GNATLIBCFLAGS) $(MULTIFLAGS)" \
+- "GNATLIBCFLAGS_FOR_C=$(GNATLIBCFLAGS_FOR_C) $(MULTIFLAGS)" \
++ "GNATLIBFLAGS=$(GNATLIBFLAGS) $(MULTIFLAGS) $(DEFAULTMULTIFLAGS)" \
++ "GNATLIBCFLAGS=$(GNATLIBCFLAGS) $(MULTIFLAGS) $(DEFAULTMULTIFLAGS)" \
++ "GNATLIBCFLAGS_FOR_C=$(GNATLIBCFLAGS_FOR_C) $(MULTIFLAGS) $(DEFAULTMULTIFLAGS)" \
+ "PICFLAG_FOR_TARGET=$(PICFLAG)" \
+ "THREAD_KIND=$(THREAD_KIND)" \
+ "TRACE=$(TRACE)" \
+@@ -88,7 +110,7 @@ LIBADA_FLAGS_TO_PASS = \
+ "exeext=.exeext.should.not.be.used " \
+ 'CC=the.host.compiler.should.not.be.needed' \
+ "GCC_FOR_TARGET=$(CC)" \
+- "CFLAGS=$(CFLAGS)"
++ "CFLAGS=$(CFLAGS) $(DEFAULTMULTIFLAGS)"
+
+ # Rules to build gnatlib.
+ .PHONY: gnatlib gnatlib-plain gnatlib-sjlj gnatlib-zcx gnatlib-shared osconstool
+--- config-ml.in.jj 2010-06-30 09:50:44.000000000 +0200
++++ config-ml.in 2010-07-02 21:24:17.994211151 +0200
+@@ -511,6 +511,8 @@ multi-do:
+ ADAFLAGS="$(ADAFLAGS) $${flags}" \
+ prefix="$(prefix)" \
+ exec_prefix="$(exec_prefix)" \
++ mandir="$(mandir)" \
++ infodir="$(infodir)" \
+ GOCFLAGS="$(GOCFLAGS) $${flags}" \
+ CXXFLAGS="$(CXXFLAGS) $${flags}" \
+ LIBCFLAGS="$(LIBCFLAGS) $${flags}" \
+--- libcpp/macro.c.jj 2015-01-14 11:01:34.000000000 +0100
++++ libcpp/macro.c 2015-01-14 14:22:19.286949884 +0100
+@@ -2947,8 +2947,6 @@ create_iso_definition (cpp_reader *pfile
+ cpp_token *token;
+ const cpp_token *ctoken;
+ bool following_paste_op = false;
+- const char *paste_op_error_msg =
+- N_("'##' cannot appear at either end of a macro expansion");
+ unsigned int num_extra_tokens = 0;
+
+ /* Get the first token of the expansion (or the '(' of a
+@@ -3059,7 +3057,8 @@ create_iso_definition (cpp_reader *pfile
+ function-like macros, but not at the end. */
+ if (following_paste_op)
+ {
+- cpp_error (pfile, CPP_DL_ERROR, paste_op_error_msg);
++ cpp_error (pfile, CPP_DL_ERROR,
++ "'##' cannot appear at either end of a macro expansion");
+ return false;
+ }
+ break;
+@@ -3072,7 +3071,8 @@ create_iso_definition (cpp_reader *pfile
+ function-like macros, but not at the beginning. */
+ if (macro->count == 1)
+ {
+- cpp_error (pfile, CPP_DL_ERROR, paste_op_error_msg);
++ cpp_error (pfile, CPP_DL_ERROR,
++ "'##' cannot appear at either end of a macro expansion");
+ return false;
+ }
+
+--- libcpp/expr.c.jj 2015-01-14 11:01:34.000000000 +0100
++++ libcpp/expr.c 2015-01-14 14:35:52.851002344 +0100
+@@ -672,16 +672,17 @@ cpp_classify_number (cpp_reader *pfile,
+ if ((result & CPP_N_WIDTH) == CPP_N_LARGE
+ && CPP_OPTION (pfile, cpp_warn_long_long))
+ {
+- const char *message = CPP_OPTION (pfile, cplusplus)
+- ? N_("use of C++11 long long integer constant")
+- : N_("use of C99 long long integer constant");
+-
+ if (CPP_OPTION (pfile, c99))
+ cpp_warning_with_line (pfile, CPP_W_LONG_LONG, virtual_location,
+- 0, message);
++ 0, CPP_OPTION (pfile, cplusplus)
++ ? N_("use of C++11 long long integer constant")
++ : N_("use of C99 long long integer constant"));
+ else
+ cpp_pedwarning_with_line (pfile, CPP_W_LONG_LONG,
+- virtual_location, 0, message);
++ virtual_location, 0,
++ CPP_OPTION (pfile, cplusplus)
++ ? N_("use of C++11 long long integer constant")
++ : N_("use of C99 long long integer constant"));
+ }
+
+ result |= CPP_N_INTEGER;
diff --git a/SOURCES/gcc8-harden-1.patch b/SOURCES/gcc8-harden-1.patch
new file mode 100644
index 0000000..a325c09
--- /dev/null
+++ b/SOURCES/gcc8-harden-1.patch
@@ -0,0 +1,294 @@
+From 88bf1c3910e4cf97dcb85c6d32291c23e572a516 Mon Sep 17 00:00:00 2001
+From: "H.J. Lu"
+Date: Wed, 27 Oct 2021 07:48:54 -0700
+Subject: [PATCH 1/4] x86: Add -mharden-sls=[none|all|return|indirect-branch]
+
+Add -mharden-sls= to mitigate against straight line speculation (SLS)
+for function return and indirect branch by adding an INT3 instruction
+after function return and indirect branch.
+
+gcc/
+
+ PR target/102952
+ * config/i386/i386-opts.h (harden_sls): New enum.
+ * config/i386/i386.c (output_indirect_thunk): Mitigate against
+ SLS for function return.
+ (ix86_output_function_return): Likewise.
+ (ix86_output_jmp_thunk_or_indirect): Mitigate against indirect
+ branch.
+ (ix86_output_indirect_jmp): Likewise.
+ (ix86_output_call_insn): Likewise.
+ * config/i386/i386.opt: Add -mharden-sls=.
+ * doc/invoke.texi: Document -mharden-sls=.
+
+gcc/testsuite/
+
+ PR target/102952
+ * gcc.target/i386/harden-sls-1.c: New test.
+ * gcc.target/i386/harden-sls-2.c: Likewise.
+ * gcc.target/i386/harden-sls-3.c: Likewise.
+ * gcc.target/i386/harden-sls-4.c: Likewise.
+ * gcc.target/i386/harden-sls-5.c: Likewise.
+
+(cherry picked from commit 53a643f8568067d7700a9f2facc8ba39974973d3)
+---
+ gcc/config/i386/i386-opts.h | 7 +++++++
+ gcc/config/i386/i386.c | 22 +++++++++++++++-----
+ gcc/config/i386/i386.opt | 20 ++++++++++++++++++
+ gcc/doc/invoke.texi | 10 ++++++++-
+ gcc/testsuite/gcc.target/i386/harden-sls-1.c | 14 +++++++++++++
+ gcc/testsuite/gcc.target/i386/harden-sls-2.c | 14 +++++++++++++
+ gcc/testsuite/gcc.target/i386/harden-sls-3.c | 14 +++++++++++++
+ gcc/testsuite/gcc.target/i386/harden-sls-4.c | 16 ++++++++++++++
+ gcc/testsuite/gcc.target/i386/harden-sls-5.c | 17 +++++++++++++++
+ 9 files changed, 128 insertions(+), 6 deletions(-)
+ create mode 100644 gcc/testsuite/gcc.target/i386/harden-sls-1.c
+ create mode 100644 gcc/testsuite/gcc.target/i386/harden-sls-2.c
+ create mode 100644 gcc/testsuite/gcc.target/i386/harden-sls-3.c
+ create mode 100644 gcc/testsuite/gcc.target/i386/harden-sls-4.c
+ create mode 100644 gcc/testsuite/gcc.target/i386/harden-sls-5.c
+
+diff --git a/gcc/config/i386/i386-opts.h b/gcc/config/i386/i386-opts.h
+index 46366cbfa72..34718b6d52c 100644
+--- a/gcc/config/i386/i386-opts.h
++++ b/gcc/config/i386/i386-opts.h
+@@ -119,4 +119,11 @@ enum indirect_branch {
+ indirect_branch_thunk_extern
+ };
+
++enum harden_sls {
++ harden_sls_none = 0,
++ harden_sls_return = 1 << 0,
++ harden_sls_indirect_branch = 1 << 1,
++ harden_sls_all = harden_sls_return | harden_sls_indirect_branch
++};
++
+ #endif
+diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
+index 31502774ef3..eb9303f8742 100644
+--- a/gcc/config/i386/i386.c
++++ b/gcc/config/i386/i386.c
+@@ -10977,6 +10977,9 @@ output_indirect_thunk (enum indirect_thunk_prefix need_prefix,
+ fputs ("\tbnd ret\n", asm_out_file);
+ else
+ fputs ("\tret\n", asm_out_file);
++
++ if ((ix86_harden_sls & harden_sls_return))
++ fputs ("\tint3\n", asm_out_file);
+ }
+
+ /* Output a funtion with a call and return thunk for indirect branch.
+@@ -28728,6 +28731,8 @@ ix86_output_jmp_thunk_or_indirect (const char *thunk_name,
+ fprintf (asm_out_file, "\tjmp\t");
+ assemble_name (asm_out_file, thunk_name);
+ putc ('\n', asm_out_file);
++ if ((ix86_harden_sls & harden_sls_indirect_branch))
++ fputs ("\tint3\n", asm_out_file);
+ }
+ else
+ output_indirect_thunk (need_prefix, regno);
+@@ -28973,10 +28978,10 @@ ix86_output_indirect_jmp (rtx call_op)
+ gcc_unreachable ();
+
+ ix86_output_indirect_branch (call_op, "%0", true);
+- return "";
+ }
+ else
+- return "%!jmp\t%A0";
++ output_asm_insn ("%!jmp\t%A0", &call_op);
++ return (ix86_harden_sls & harden_sls_indirect_branch) ? "int3" : "";
+ }
+
+ /* Output function return. CALL_OP is the jump target. Add a REP
+@@ -29018,9 +29023,11 @@ ix86_output_function_return (bool long_p)
+ }
+
+ if (!long_p || ix86_bnd_prefixed_insn_p (current_output_insn))
+- return "%!ret";
++ output_asm_insn ("%!ret", NULL);
++ else
++ output_asm_insn ("rep%; ret", NULL);
+
+- return "rep%; ret";
++ return (ix86_harden_sls & harden_sls_return) ? "int3" : "";
+ }
+
+ /* Output indirect function return. RET_OP is the function return
+@@ -29158,7 +29165,12 @@ ix86_output_call_insn (rtx_insn *insn, rtx call_op)
+ if (output_indirect_p && !direct_p)
+ ix86_output_indirect_branch (call_op, xasm, true);
+ else
+- output_asm_insn (xasm, &call_op);
++ {
++ output_asm_insn (xasm, &call_op);
++ if (!direct_p
++ && (ix86_harden_sls & harden_sls_indirect_branch))
++ return "int3";
++ }
+ return "";
+ }
+
+diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt
+index d9bd909a885..3ae48609e25 100644
+--- a/gcc/config/i386/i386.opt
++++ b/gcc/config/i386/i386.opt
+@@ -1055,3 +1055,23 @@ Support MOVDIRI built-in functions and code generation.
+ mmovdir64b
+ Target Report Mask(ISA_MOVDIR64B) Var(ix86_isa_flags2) Save
+ Support MOVDIR64B built-in functions and code generation.
++
++mharden-sls=
++Target RejectNegative Joined Enum(harden_sls) Var(ix86_harden_sls) Init(harden_sls_none)
++Generate code to mitigate against straight line speculation.
++
++Enum
++Name(harden_sls) Type(enum harden_sls)
++Known choices for mitigation against straight line speculation with -mharden-sls=:
++
++EnumValue
++Enum(harden_sls) String(none) Value(harden_sls_none)
++
++EnumValue
++Enum(harden_sls) String(return) Value(harden_sls_return)
++
++EnumValue
++Enum(harden_sls) String(indirect-branch) Value(harden_sls_indirect_branch)
++
++EnumValue
++Enum(harden_sls) String(all) Value(harden_sls_all)
+diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
+index 78ca7738df2..1e20efd6969 100644
+--- a/gcc/doc/invoke.texi
++++ b/gcc/doc/invoke.texi
+@@ -1284,7 +1284,7 @@ See RS/6000 and PowerPC Options.
+ -mstack-protector-guard-symbol=@var{symbol} -mmitigate-rop @gol
+ -mgeneral-regs-only -mcall-ms2sysv-xlogues @gol
+ -mindirect-branch=@var{choice} -mfunction-return=@var{choice} @gol
+--mindirect-branch-register}
++-mindirect-branch-register -mharden-sls=@var{choice}}
+
+ @emph{x86 Windows Options}
+ @gccoptlist{-mconsole -mcygwin -mno-cygwin -mdll @gol
+@@ -28036,6 +28036,14 @@ not be reachable in the large code model.
+ @opindex -mindirect-branch-register
+ Force indirect call and jump via register.
+
++@item -mharden-sls=@var{choice}
++@opindex mharden-sls
++Generate code to mitigate against straight line speculation (SLS) with
++@var{choice}. The default is @samp{none} which disables all SLS
++hardening. @samp{return} enables SLS hardening for function return.
++@samp{indirect-branch} enables SLS hardening for indirect branch.
++@samp{all} enables all SLS hardening.
++
+ @end table
+
+ These @samp{-m} switches are supported in addition to the above
+diff --git a/gcc/testsuite/gcc.target/i386/harden-sls-1.c b/gcc/testsuite/gcc.target/i386/harden-sls-1.c
+new file mode 100644
+index 00000000000..6f70dc94a23
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/i386/harden-sls-1.c
+@@ -0,0 +1,14 @@
++/* { dg-do compile } */
++/* { dg-options "-O2 -mindirect-branch=thunk-extern -mharden-sls=all" } */
++/* { dg-additional-options "-fno-pic" { target { ! *-*-darwin* } } } */
++
++extern void foo (void);
++
++void
++bar (void)
++{
++ foo ();
++}
++
++/* { dg-final { scan-assembler "jmp\[ \t\]+_?foo" } } */
++/* { dg-final { scan-assembler-not {int3} } } */
+diff --git a/gcc/testsuite/gcc.target/i386/harden-sls-2.c b/gcc/testsuite/gcc.target/i386/harden-sls-2.c
+new file mode 100644
+index 00000000000..a7c59078d03
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/i386/harden-sls-2.c
+@@ -0,0 +1,14 @@
++/* { dg-do compile } */
++/* { dg-options "-O2 -mindirect-branch=thunk-extern -mharden-sls=all" } */
++/* { dg-additional-options "-fno-pic" { target { ! *-*-darwin* } } } */
++
++extern void (*fptr) (void);
++
++void
++foo (void)
++{
++ fptr ();
++}
++
++/* { dg-final { scan-assembler "jmp\[ \t\]+_?__x86_indirect_thunk_(r|e)ax" } } */
++/* { dg-final { scan-assembler-times "int3" 1 } } */
+diff --git a/gcc/testsuite/gcc.target/i386/harden-sls-3.c b/gcc/testsuite/gcc.target/i386/harden-sls-3.c
+new file mode 100644
+index 00000000000..1a6056b6d7b
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/i386/harden-sls-3.c
+@@ -0,0 +1,14 @@
++/* { dg-do compile } */
++/* { dg-options "-O2 -mindirect-branch=thunk -mharden-sls=all" } */
++/* { dg-additional-options "-fno-pic" { target { ! *-*-darwin* } } } */
++
++extern void (*fptr) (void);
++
++void
++foo (void)
++{
++ fptr ();
++}
++
++/* { dg-final { scan-assembler "jmp\[ \t\]+_?__x86_indirect_thunk_(r|e)ax" } } */
++/* { dg-final { scan-assembler-times "int3" 2 } } */
+diff --git a/gcc/testsuite/gcc.target/i386/harden-sls-4.c b/gcc/testsuite/gcc.target/i386/harden-sls-4.c
+new file mode 100644
+index 00000000000..f70dd1379d3
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/i386/harden-sls-4.c
+@@ -0,0 +1,16 @@
++/* { dg-do compile } */
++/* { dg-options "-O2 -mindirect-branch=keep -mharden-sls=all" } */
++/* { dg-additional-options "-fno-pic" { target { ! *-*-darwin* } } } */
++
++extern void (*fptr) (void);
++
++void
++foo (void)
++{
++ fptr ();
++}
++
++/* { dg-final { scan-assembler "jmp\[ \t\]+\\*_?fptr" { target { ! x32 } } } } */
++/* { dg-final { scan-assembler "movl\[ \t\]+fptr\\(%rip\\), %eax" { target x32 } } } */
++/* { dg-final { scan-assembler "jmp\[ \t\]+\\*%rax" { target x32 } } } */
++/* { dg-final { scan-assembler-times "int3" 1 } } */
+diff --git a/gcc/testsuite/gcc.target/i386/harden-sls-5.c b/gcc/testsuite/gcc.target/i386/harden-sls-5.c
+new file mode 100644
+index 00000000000..613c44c6f82
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/i386/harden-sls-5.c
+@@ -0,0 +1,17 @@
++/* { dg-do compile } */
++/* { dg-options "-O2 -mno-indirect-branch-register -mfunction-return=keep -mindirect-branch=thunk-extern -mharden-sls=return" } */
++/* { dg-additional-options "-fno-pic" { target { ! *-*-darwin* } } } */
++
++typedef void (*dispatch_t)(long offset);
++
++dispatch_t dispatch;
++
++int
++male_indirect_jump (long offset)
++{
++ dispatch(offset);
++ return 0;
++}
++
++/* { dg-final { scan-assembler-times "ret" 1 } } */
++/* { dg-final { scan-assembler-times "int3" 1 } } */
+--
+2.36.1
+
diff --git a/SOURCES/gcc8-harden-2.patch b/SOURCES/gcc8-harden-2.patch
new file mode 100644
index 0000000..669c11f
--- /dev/null
+++ b/SOURCES/gcc8-harden-2.patch
@@ -0,0 +1,155 @@
+From 0df8313a0a5d8533f2487e21d7b42e9adee28f18 Mon Sep 17 00:00:00 2001
+From: "H.J. Lu"
+Date: Wed, 27 Oct 2021 06:27:15 -0700
+Subject: [PATCH 2/4] x86: Add -mindirect-branch-cs-prefix
+
+Add -mindirect-branch-cs-prefix to add CS prefix to call and jmp to
+indirect thunk with branch target in r8-r15 registers so that the call
+and jmp instruction length is 6 bytes to allow them to be replaced with
+"lfence; call *%r8-r15" or "lfence; jmp *%r8-r15" at run-time.
+
+gcc/
+
+ PR target/102952
+ * config/i386/i386.c (ix86_output_jmp_thunk_or_indirect): Emit
+ CS prefix for -mindirect-branch-cs-prefix.
+ (ix86_output_indirect_branch_via_reg): Likewise.
+ * config/i386/i386.opt: Add -mindirect-branch-cs-prefix.
+ * doc/invoke.texi: Document -mindirect-branch-cs-prefix.
+
+gcc/testsuite/
+
+ PR target/102952
+ * gcc.target/i386/indirect-thunk-cs-prefix-1.c: New test.
+ * gcc.target/i386/indirect-thunk-cs-prefix-2.c: Likewise.
+
+(cherry picked from commit 2196a681d7810ad8b227bf983f38ba716620545e)
+---
+ gcc/config/i386/i386.c | 14 ++++++++++++--
+ gcc/config/i386/i386.opt | 4 ++++
+ gcc/doc/invoke.texi | 10 +++++++++-
+ .../gcc.target/i386/indirect-thunk-cs-prefix-1.c | 14 ++++++++++++++
+ .../gcc.target/i386/indirect-thunk-cs-prefix-2.c | 15 +++++++++++++++
+ 5 files changed, 54 insertions(+), 3 deletions(-)
+ create mode 100644 gcc/testsuite/gcc.target/i386/indirect-thunk-cs-prefix-1.c
+ create mode 100644 gcc/testsuite/gcc.target/i386/indirect-thunk-cs-prefix-2.c
+
+diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
+index eb9303f8742..8442dd0daea 100644
+--- a/gcc/config/i386/i386.c
++++ b/gcc/config/i386/i386.c
+@@ -28728,7 +28728,12 @@ ix86_output_jmp_thunk_or_indirect (const char *thunk_name,
+ if (need_prefix == indirect_thunk_prefix_bnd)
+ fprintf (asm_out_file, "\tbnd jmp\t");
+ else
+- fprintf (asm_out_file, "\tjmp\t");
++ {
++ if (REX_INT_REGNO_P (regno)
++ && ix86_indirect_branch_cs_prefix)
++ fprintf (asm_out_file, "\tcs\n");
++ fprintf (asm_out_file, "\tjmp\t");
++ }
+ assemble_name (asm_out_file, thunk_name);
+ putc ('\n', asm_out_file);
+ if ((ix86_harden_sls & harden_sls_indirect_branch))
+@@ -28787,7 +28792,12 @@ ix86_output_indirect_branch_via_reg (rtx call_op, bool sibcall_p)
+ if (need_prefix == indirect_thunk_prefix_bnd)
+ fprintf (asm_out_file, "\tbnd call\t");
+ else
+- fprintf (asm_out_file, "\tcall\t");
++ {
++ if (REX_INT_REGNO_P (regno)
++ && ix86_indirect_branch_cs_prefix)
++ fprintf (asm_out_file, "\tcs\n");
++ fprintf (asm_out_file, "\tcall\t");
++ }
+ assemble_name (asm_out_file, thunk_name);
+ putc ('\n', asm_out_file);
+ return;
+diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt
+index 3ae48609e25..9f67ef558dc 100644
+--- a/gcc/config/i386/i386.opt
++++ b/gcc/config/i386/i386.opt
+@@ -1044,6 +1044,10 @@ Enum(indirect_branch) String(thunk-inline) Value(indirect_branch_thunk_inline)
+ EnumValue
+ Enum(indirect_branch) String(thunk-extern) Value(indirect_branch_thunk_extern)
+
++mindirect-branch-cs-prefix
++Target Var(ix86_indirect_branch_cs_prefix) Init(0)
++Add CS prefix to call and jmp to indirect thunk with branch target in r8-r15 registers.
++
+ mindirect-branch-register
+ Target Report Var(ix86_indirect_branch_register) Init(0)
+ Force indirect call and jump via register.
+diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
+index 1e20efd6969..605cd4b93f1 100644
+--- a/gcc/doc/invoke.texi
++++ b/gcc/doc/invoke.texi
+@@ -1284,7 +1284,8 @@ See RS/6000 and PowerPC Options.
+ -mstack-protector-guard-symbol=@var{symbol} -mmitigate-rop @gol
+ -mgeneral-regs-only -mcall-ms2sysv-xlogues @gol
+ -mindirect-branch=@var{choice} -mfunction-return=@var{choice} @gol
+--mindirect-branch-register -mharden-sls=@var{choice}}
++-mindirect-branch-register -mharden-sls=@var{choice} @gol
++-mindirect-branch-cs-prefix}
+
+ @emph{x86 Windows Options}
+ @gccoptlist{-mconsole -mcygwin -mno-cygwin -mdll @gol
+@@ -28044,6 +28045,13 @@ hardening. @samp{return} enables SLS hardening for function return.
+ @samp{indirect-branch} enables SLS hardening for indirect branch.
+ @samp{all} enables all SLS hardening.
+
++@item -mindirect-branch-cs-prefix
++@opindex mindirect-branch-cs-prefix
++Add CS prefix to call and jmp to indirect thunk with branch target in
++r8-r15 registers so that the call and jmp instruction length is 6 bytes
++to allow them to be replaced with @samp{lfence; call *%r8-r15} or
++@samp{lfence; jmp *%r8-r15} at run-time.
++
+ @end table
+
+ These @samp{-m} switches are supported in addition to the above
+diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-cs-prefix-1.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-cs-prefix-1.c
+new file mode 100644
+index 00000000000..db2f3416823
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-cs-prefix-1.c
+@@ -0,0 +1,14 @@
++/* { dg-do compile { target { ! ia32 } } } */
++/* { dg-options "-O2 -ffixed-rax -ffixed-rbx -ffixed-rcx -ffixed-rdx -ffixed-rdi -ffixed-rsi -mindirect-branch-cs-prefix -mindirect-branch=thunk-extern" } */
++/* { dg-additional-options "-fno-pic" { target { ! *-*-darwin* } } } */
++
++extern void (*fptr) (void);
++
++void
++foo (void)
++{
++ fptr ();
++}
++
++/* { dg-final { scan-assembler-times "jmp\[ \t\]+_?__x86_indirect_thunk_r\[0-9\]+" 1 } } */
++/* { dg-final { scan-assembler-times "\tcs" 1 } } */
+diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-cs-prefix-2.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-cs-prefix-2.c
+new file mode 100644
+index 00000000000..adfc39a49d4
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-cs-prefix-2.c
+@@ -0,0 +1,15 @@
++/* { dg-do compile { target { ! ia32 } } } */
++/* { dg-options "-O2 -ffixed-rax -ffixed-rbx -ffixed-rcx -ffixed-rdx -ffixed-rdi -ffixed-rsi -mindirect-branch-cs-prefix -mindirect-branch=thunk-extern" } */
++/* { dg-additional-options "-fno-pic" { target { ! *-*-darwin* } } } */
++
++extern void (*bar) (void);
++
++int
++foo (void)
++{
++ bar ();
++ return 0;
++}
++
++/* { dg-final { scan-assembler-times "call\[ \t\]+_?__x86_indirect_thunk_r\[0-9\]+" 1 } } */
++/* { dg-final { scan-assembler-times "\tcs" 1 } } */
+--
+2.36.1
+
diff --git a/SOURCES/gcc8-harden-3.patch b/SOURCES/gcc8-harden-3.patch
new file mode 100644
index 0000000..ecb643a
--- /dev/null
+++ b/SOURCES/gcc8-harden-3.patch
@@ -0,0 +1,108 @@
+From 621de498ee19e1f2642eebde707430254c0459c0 Mon Sep 17 00:00:00 2001
+From: "H.J. Lu"
+Date: Wed, 5 Jan 2022 16:33:16 -0800
+Subject: [PATCH 3/4] x86: Rename -harden-sls=indirect-branch to
+ -harden-sls=indirect-jmp
+
+Indirect branch also includes indirect call instructions. Rename
+-harden-sls=indirect-branch to -harden-sls=indirect-jmp to match its
+intended behavior.
+
+ PR target/102952
+ * config/i386/i386-opts.h (harden_sls): Replace
+ harden_sls_indirect_branch with harden_sls_indirect_jmp.
+ * config/i386/i386.c (ix86_output_jmp_thunk_or_indirect):
+ Likewise.
+ (ix86_output_indirect_jmp): Likewise.
+ (ix86_output_call_insn): Likewise.
+ * config/i386/i386.opt: Replace indirect-branch with
+ indirect-jmp. Replace harden_sls_indirect_branch with
+ harden_sls_indirect_jmp.
+ * doc/invoke.texi (-harden-sls=): Replace indirect-branch with
+ indirect-jmp.
+
+(cherry picked from commit ed8060950c64f2e449aaf90e438aa26d0d9d0b31)
+---
+ gcc/config/i386/i386-opts.h | 4 ++--
+ gcc/config/i386/i386.c | 6 +++---
+ gcc/config/i386/i386.opt | 2 +-
+ gcc/doc/invoke.texi | 4 ++--
+ 4 files changed, 8 insertions(+), 8 deletions(-)
+
+diff --git a/gcc/config/i386/i386-opts.h b/gcc/config/i386/i386-opts.h
+index 34718b6d52c..47facc254cd 100644
+--- a/gcc/config/i386/i386-opts.h
++++ b/gcc/config/i386/i386-opts.h
+@@ -122,8 +122,8 @@ enum indirect_branch {
+ enum harden_sls {
+ harden_sls_none = 0,
+ harden_sls_return = 1 << 0,
+- harden_sls_indirect_branch = 1 << 1,
+- harden_sls_all = harden_sls_return | harden_sls_indirect_branch
++ harden_sls_indirect_jmp = 1 << 1,
++ harden_sls_all = harden_sls_return | harden_sls_indirect_jmp
+ };
+
+ #endif
+diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
+index 8442dd0daea..3bc14e20105 100644
+--- a/gcc/config/i386/i386.c
++++ b/gcc/config/i386/i386.c
+@@ -28736,7 +28736,7 @@ ix86_output_jmp_thunk_or_indirect (const char *thunk_name,
+ }
+ assemble_name (asm_out_file, thunk_name);
+ putc ('\n', asm_out_file);
+- if ((ix86_harden_sls & harden_sls_indirect_branch))
++ if ((ix86_harden_sls & harden_sls_indirect_jmp))
+ fputs ("\tint3\n", asm_out_file);
+ }
+ else
+@@ -28991,7 +28991,7 @@ ix86_output_indirect_jmp (rtx call_op)
+ }
+ else
+ output_asm_insn ("%!jmp\t%A0", &call_op);
+- return (ix86_harden_sls & harden_sls_indirect_branch) ? "int3" : "";
++ return (ix86_harden_sls & harden_sls_indirect_jmp) ? "int3" : "";
+ }
+
+ /* Output function return. CALL_OP is the jump target. Add a REP
+@@ -29178,7 +29178,7 @@ ix86_output_call_insn (rtx_insn *insn, rtx call_op)
+ {
+ output_asm_insn (xasm, &call_op);
+ if (!direct_p
+- && (ix86_harden_sls & harden_sls_indirect_branch))
++ && (ix86_harden_sls & harden_sls_indirect_jmp))
+ return "int3";
+ }
+ return "";
+diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt
+index 9f67ef558dc..7a5c7b9369a 100644
+--- a/gcc/config/i386/i386.opt
++++ b/gcc/config/i386/i386.opt
+@@ -1075,7 +1075,7 @@ EnumValue
+ Enum(harden_sls) String(return) Value(harden_sls_return)
+
+ EnumValue
+-Enum(harden_sls) String(indirect-branch) Value(harden_sls_indirect_branch)
++Enum(harden_sls) String(indirect-jmp) Value(harden_sls_indirect_jmp)
+
+ EnumValue
+ Enum(harden_sls) String(all) Value(harden_sls_all)
+diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
+index 605cd4b93f1..20d8e3fd782 100644
+--- a/gcc/doc/invoke.texi
++++ b/gcc/doc/invoke.texi
+@@ -28041,8 +28041,8 @@ Force indirect call and jump via register.
+ @opindex mharden-sls
+ Generate code to mitigate against straight line speculation (SLS) with
+ @var{choice}. The default is @samp{none} which disables all SLS
+-hardening. @samp{return} enables SLS hardening for function return.
+-@samp{indirect-branch} enables SLS hardening for indirect branch.
++hardening. @samp{return} enables SLS hardening for function returns.
++@samp{indirect-jmp} enables SLS hardening for indirect jumps.
+ @samp{all} enables all SLS hardening.
+
+ @item -mindirect-branch-cs-prefix
+--
+2.36.1
+
diff --git a/SOURCES/gcc8-harden-4.patch b/SOURCES/gcc8-harden-4.patch
new file mode 100644
index 0000000..648d543
--- /dev/null
+++ b/SOURCES/gcc8-harden-4.patch
@@ -0,0 +1,75 @@
+From 5a5e7890cefa112e95e1de9800d8081c2a38a1da Mon Sep 17 00:00:00 2001
+From: "H.J. Lu"
+Date: Wed, 5 Jan 2022 18:04:21 -0800
+Subject: [PATCH 4/4] x86: Generate INT3 for __builtin_eh_return
+
+Generate INT3 after indirect jmp in exception return for -fcf-protection
+with -mharden-sls=indirect-jmp.
+
+gcc/
+
+ PR target/103925
+ * config/i386/i386.c (ix86_output_indirect_function_return):
+ Generate INT3 after indirect jmp for -mharden-sls=indirect-jmp.
+
+gcc/testsuite/
+
+ PR target/103925
+ * gcc.target/i386/harden-sls-6.c: New test.
+
+(cherry picked from commit c2e5c4feed32c808591b5278f680bbabe63eb225)
+---
+ gcc/config/i386/i386.c | 9 ++++++---
+ gcc/testsuite/gcc.target/i386/harden-sls-6.c | 18 ++++++++++++++++++
+ 2 files changed, 24 insertions(+), 3 deletions(-)
+ create mode 100644 gcc/testsuite/gcc.target/i386/harden-sls-6.c
+
+diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
+index 3bc14e20105..dbc3d462fda 100644
+--- a/gcc/config/i386/i386.c
++++ b/gcc/config/i386/i386.c
+@@ -29083,11 +29083,14 @@ ix86_output_indirect_function_return (rtx ret_op)
+ }
+ else
+ output_indirect_thunk (need_prefix, regno);
+-
+- return "";
+ }
+ else
+- return "%!jmp\t%A0";
++ {
++ output_asm_insn ("%!jmp\t%A0", &ret_op);
++ if (ix86_harden_sls & harden_sls_indirect_jmp)
++ fputs ("\tint3\n", asm_out_file);
++ }
++ return "";
+ }
+
+ /* Split simple return with popping POPC bytes from stack to indirect
+diff --git a/gcc/testsuite/gcc.target/i386/harden-sls-6.c b/gcc/testsuite/gcc.target/i386/harden-sls-6.c
+new file mode 100644
+index 00000000000..9068eb64008
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/i386/harden-sls-6.c
+@@ -0,0 +1,18 @@
++/* { dg-do compile { target { ! ia32 } } } */
++/* { dg-options "-O2 -fcf-protection -mharden-sls=indirect-jmp" } */
++
++struct _Unwind_Context _Unwind_Resume_or_Rethrow_this_context;
++
++void offset (int);
++
++struct _Unwind_Context {
++ void *reg[7];
++} _Unwind_Resume_or_Rethrow() {
++ struct _Unwind_Context cur_contextcur_context =
++ _Unwind_Resume_or_Rethrow_this_context;
++ offset(0);
++ __builtin_eh_return ((long) offset, 0);
++}
++
++/* { dg-final { scan-assembler "jmp\[ \t\]+\\*%rcx" } } */
++/* { dg-final { scan-assembler-times "int3" 1 } } */
+--
+2.36.1
+
diff --git a/SOURCES/gcc8-i386-libgomp.patch b/SOURCES/gcc8-i386-libgomp.patch
new file mode 100644
index 0000000..520561e
--- /dev/null
+++ b/SOURCES/gcc8-i386-libgomp.patch
@@ -0,0 +1,11 @@
+--- libgomp/configure.tgt.jj 2008-01-10 20:53:48.000000000 +0100
++++ libgomp/configure.tgt 2008-03-27 12:44:51.000000000 +0100
+@@ -67,7 +67,7 @@ if test $enable_linux_futex = yes; then
+ ;;
+ *)
+ if test -z "$with_arch"; then
+- XCFLAGS="${XCFLAGS} -march=i486 -mtune=${target_cpu}"
++ XCFLAGS="${XCFLAGS} -march=i486 -mtune=generic"
+ fi
+ esac
+ ;;
diff --git a/SOURCES/gcc8-isl-dl.patch b/SOURCES/gcc8-isl-dl.patch
new file mode 100644
index 0000000..46d3b0d
--- /dev/null
+++ b/SOURCES/gcc8-isl-dl.patch
@@ -0,0 +1,715 @@
+--- gcc/Makefile.in.jj 2015-06-06 10:00:25.000000000 +0200
++++ gcc/Makefile.in 2015-11-04 14:56:02.643536437 +0100
+@@ -1046,7 +1046,7 @@ BUILD_LIBDEPS= $(BUILD_LIBIBERTY)
+ # and the system's installed libraries.
+ LIBS = @LIBS@ libcommon.a $(CPPLIB) $(LIBINTL) $(LIBICONV) $(LIBBACKTRACE) \
+ $(LIBIBERTY) $(LIBDECNUMBER) $(HOST_LIBS)
+-BACKENDLIBS = $(ISLLIBS) $(GMPLIBS) $(PLUGINLIBS) $(HOST_LIBS) \
++BACKENDLIBS = $(if $(ISLLIBS),-ldl) $(GMPLIBS) $(PLUGINLIBS) $(HOST_LIBS) \
+ $(ZLIB)
+ # Any system libraries needed just for GNAT.
+ SYSLIBS = @GNAT_LIBEXC@
+@@ -2196,6 +2196,15 @@ $(out_object_file): $(out_file)
+ $(common_out_object_file): $(common_out_file)
+ $(COMPILE) $<
+ $(POSTCOMPILE)
++
++graphite%.o : \
++ ALL_CFLAGS := -O $(filter-out -fkeep-inline-functions, $(ALL_CFLAGS))
++graphite.o : \
++ ALL_CFLAGS := -O $(filter-out -fkeep-inline-functions, $(ALL_CFLAGS))
++graphite%.o : \
++ ALL_CXXFLAGS := -O $(filter-out -fkeep-inline-functions, $(ALL_CXXFLAGS))
++graphite.o : \
++ ALL_CXXFLAGS := -O $(filter-out -fkeep-inline-functions, $(ALL_CXXFLAGS))
+ #
+ # Generate header and source files from the machine description,
+ # and compile them.
+--- gcc/graphite.h.jj 2016-01-27 12:44:06.000000000 +0100
++++ gcc/graphite.h 2016-01-27 13:26:38.309876856 +0100
+@@ -39,6 +39,590 @@ along with GCC; see the file COPYING3.
+ #include
+ #include
+ #include
++#include
++#include
++
++#define DYNSYMS \
++ DYNSYM (isl_aff_add_coefficient_si); \
++ DYNSYM (isl_aff_free); \
++ DYNSYM (isl_aff_get_space); \
++ DYNSYM (isl_aff_set_coefficient_si); \
++ DYNSYM (isl_aff_set_constant_si); \
++ DYNSYM (isl_aff_zero_on_domain); \
++ DYNSYM (isl_band_free); \
++ DYNSYM (isl_band_get_children); \
++ DYNSYM (isl_band_get_partial_schedule); \
++ DYNSYM (isl_band_has_children); \
++ DYNSYM (isl_band_list_free); \
++ DYNSYM (isl_band_list_get_band); \
++ DYNSYM (isl_band_list_get_ctx); \
++ DYNSYM (isl_band_list_n_band); \
++ DYNSYM (isl_band_n_member); \
++ DYNSYM (isl_basic_map_add_constraint); \
++ DYNSYM (isl_basic_map_project_out); \
++ DYNSYM (isl_basic_map_universe); \
++ DYNSYM (isl_constraint_set_coefficient_si); \
++ DYNSYM (isl_constraint_set_constant_si); \
++ DYNSYM (isl_ctx_alloc); \
++ DYNSYM (isl_ctx_free); \
++ DYNSYM (isl_equality_alloc); \
++ DYNSYM (isl_id_alloc); \
++ DYNSYM (isl_id_copy); \
++ DYNSYM (isl_id_free); \
++ DYNSYM (isl_inequality_alloc); \
++ DYNSYM (isl_local_space_copy); \
++ DYNSYM (isl_local_space_free); \
++ DYNSYM (isl_local_space_from_space); \
++ DYNSYM (isl_local_space_range); \
++ DYNSYM (isl_map_add_constraint); \
++ DYNSYM (isl_map_add_dims); \
++ DYNSYM (isl_map_align_params); \
++ DYNSYM (isl_map_apply_range); \
++ DYNSYM (isl_map_copy); \
++ DYNSYM (isl_map_dim); \
++ DYNSYM (isl_map_dump); \
++ DYNSYM (isl_map_equate); \
++ DYNSYM (isl_map_fix_si); \
++ DYNSYM (isl_map_flat_product); \
++ DYNSYM (isl_map_flat_range_product); \
++ DYNSYM (isl_map_free); \
++ DYNSYM (isl_map_from_basic_map); \
++ DYNSYM (isl_map_from_pw_aff); \
++ DYNSYM (isl_map_from_union_map); \
++ DYNSYM (isl_map_get_ctx); \
++ DYNSYM (isl_map_get_space); \
++ DYNSYM (isl_map_get_tuple_id); \
++ DYNSYM (isl_map_insert_dims); \
++ DYNSYM (isl_map_intersect); \
++ DYNSYM (isl_map_intersect_domain); \
++ DYNSYM (isl_map_intersect_range); \
++ DYNSYM (isl_map_is_empty); \
++ DYNSYM (isl_map_lex_ge); \
++ DYNSYM (isl_map_lex_le); \
++ DYNSYM (isl_map_n_out); \
++ DYNSYM (isl_map_range); \
++ DYNSYM (isl_map_set_tuple_id); \
++ DYNSYM (isl_map_universe); \
++ DYNSYM (isl_options_set_on_error); \
++ DYNSYM (isl_options_set_schedule_serialize_sccs); \
++ DYNSYM (isl_printer_set_yaml_style); \
++ DYNSYM (isl_options_set_schedule_max_constant_term); \
++ DYNSYM (isl_options_set_schedule_maximize_band_depth); \
++ DYNSYM (isl_printer_free); \
++ DYNSYM (isl_printer_print_aff); \
++ DYNSYM (isl_printer_print_constraint); \
++ DYNSYM (isl_printer_print_map); \
++ DYNSYM (isl_printer_print_set); \
++ DYNSYM (isl_printer_to_file); \
++ DYNSYM (isl_pw_aff_add); \
++ DYNSYM (isl_pw_aff_alloc); \
++ DYNSYM (isl_pw_aff_copy); \
++ DYNSYM (isl_pw_aff_eq_set); \
++ DYNSYM (isl_pw_aff_free); \
++ DYNSYM (isl_pw_aff_from_aff); \
++ DYNSYM (isl_pw_aff_ge_set); \
++ DYNSYM (isl_pw_aff_gt_set); \
++ DYNSYM (isl_pw_aff_is_cst); \
++ DYNSYM (isl_pw_aff_le_set); \
++ DYNSYM (isl_pw_aff_lt_set); \
++ DYNSYM (isl_pw_aff_mul); \
++ DYNSYM (isl_pw_aff_ne_set); \
++ DYNSYM (isl_pw_aff_nonneg_set); \
++ DYNSYM (isl_pw_aff_set_tuple_id); \
++ DYNSYM (isl_pw_aff_sub); \
++ DYNSYM (isl_pw_aff_zero_set); \
++ DYNSYM (isl_schedule_free); \
++ DYNSYM (isl_schedule_get_band_forest); \
++ DYNSYM (isl_set_add_constraint); \
++ DYNSYM (isl_set_add_dims); \
++ DYNSYM (isl_set_apply); \
++ DYNSYM (isl_set_coalesce); \
++ DYNSYM (isl_set_copy); \
++ DYNSYM (isl_set_dim); \
++ DYNSYM (isl_set_fix_si); \
++ DYNSYM (isl_set_free); \
++ DYNSYM (isl_set_get_space); \
++ DYNSYM (isl_set_get_tuple_id); \
++ DYNSYM (isl_set_intersect); \
++ DYNSYM (isl_set_is_empty); \
++ DYNSYM (isl_set_n_dim); \
++ DYNSYM (isl_set_nat_universe); \
++ DYNSYM (isl_set_project_out); \
++ DYNSYM (isl_set_set_tuple_id); \
++ DYNSYM (isl_set_universe); \
++ DYNSYM (isl_space_add_dims); \
++ DYNSYM (isl_space_alloc); \
++ DYNSYM (isl_space_copy); \
++ DYNSYM (isl_space_dim); \
++ DYNSYM (isl_space_domain); \
++ DYNSYM (isl_space_find_dim_by_id); \
++ DYNSYM (isl_space_free); \
++ DYNSYM (isl_space_from_domain); \
++ DYNSYM (isl_space_get_tuple_id); \
++ DYNSYM (isl_space_params_alloc); \
++ DYNSYM (isl_space_range); \
++ DYNSYM (isl_space_set_alloc); \
++ DYNSYM (isl_space_set_dim_id); \
++ DYNSYM (isl_space_set_tuple_id); \
++ DYNSYM (isl_union_map_add_map); \
++ DYNSYM (isl_union_map_align_params); \
++ DYNSYM (isl_union_map_apply_domain); \
++ DYNSYM (isl_union_map_apply_range); \
++ DYNSYM (isl_union_map_compute_flow); \
++ DYNSYM (isl_union_map_copy); \
++ DYNSYM (isl_union_map_empty); \
++ DYNSYM (isl_union_map_flat_range_product); \
++ DYNSYM (isl_union_map_foreach_map); \
++ DYNSYM (isl_union_map_free); \
++ DYNSYM (isl_union_map_from_map); \
++ DYNSYM (isl_union_map_get_ctx); \
++ DYNSYM (isl_union_map_get_space); \
++ DYNSYM (isl_union_map_gist_domain); \
++ DYNSYM (isl_union_map_gist_range); \
++ DYNSYM (isl_union_map_intersect_domain); \
++ DYNSYM (isl_union_map_is_empty); \
++ DYNSYM (isl_union_map_subtract); \
++ DYNSYM (isl_union_map_union); \
++ DYNSYM (isl_union_set_add_set); \
++ DYNSYM (isl_union_set_compute_schedule); \
++ DYNSYM (isl_union_set_copy); \
++ DYNSYM (isl_union_set_empty); \
++ DYNSYM (isl_union_set_from_set); \
++ DYNSYM (isl_aff_add_constant_val); \
++ DYNSYM (isl_aff_get_coefficient_val); \
++ DYNSYM (isl_aff_get_ctx); \
++ DYNSYM (isl_aff_mod_val); \
++ DYNSYM (isl_ast_build_ast_from_schedule); \
++ DYNSYM (isl_ast_build_free); \
++ DYNSYM (isl_ast_build_from_context); \
++ DYNSYM (isl_ast_build_get_ctx); \
++ DYNSYM (isl_ast_build_get_schedule); \
++ DYNSYM (isl_ast_build_get_schedule_space); \
++ DYNSYM (isl_ast_build_set_before_each_for); \
++ DYNSYM (isl_ast_build_set_options); \
++ DYNSYM (isl_ast_expr_free); \
++ DYNSYM (isl_ast_expr_from_val); \
++ DYNSYM (isl_ast_expr_get_ctx); \
++ DYNSYM (isl_ast_expr_get_id); \
++ DYNSYM (isl_ast_expr_get_op_arg); \
++ DYNSYM (isl_ast_expr_get_op_n_arg); \
++ DYNSYM (isl_ast_expr_get_op_type); \
++ DYNSYM (isl_ast_expr_get_type); \
++ DYNSYM (isl_ast_expr_get_val); \
++ DYNSYM (isl_ast_expr_sub); \
++ DYNSYM (isl_ast_node_block_get_children); \
++ DYNSYM (isl_ast_node_for_get_body); \
++ DYNSYM (isl_ast_node_for_get_cond); \
++ DYNSYM (isl_ast_node_for_get_inc); \
++ DYNSYM (isl_ast_node_for_get_init); \
++ DYNSYM (isl_ast_node_for_get_iterator); \
++ DYNSYM (isl_ast_node_free); \
++ DYNSYM (isl_ast_node_get_annotation); \
++ DYNSYM (isl_ast_node_get_type); \
++ DYNSYM (isl_ast_node_if_get_cond); \
++ DYNSYM (isl_ast_node_if_get_else); \
++ DYNSYM (isl_ast_node_if_get_then); \
++ DYNSYM (isl_ast_node_list_free); \
++ DYNSYM (isl_ast_node_list_get_ast_node); \
++ DYNSYM (isl_ast_node_list_n_ast_node); \
++ DYNSYM (isl_ast_node_user_get_expr); \
++ DYNSYM (isl_constraint_set_coefficient_val); \
++ DYNSYM (isl_constraint_set_constant_val); \
++ DYNSYM (isl_id_get_user); \
++ DYNSYM (isl_local_space_get_ctx); \
++ DYNSYM (isl_map_fix_val); \
++ DYNSYM (isl_options_set_ast_build_atomic_upper_bound); \
++ DYNSYM (isl_printer_print_ast_node); \
++ DYNSYM (isl_printer_print_str); \
++ DYNSYM (isl_printer_set_output_format); \
++ DYNSYM (isl_pw_aff_mod_val); \
++ DYNSYM (isl_schedule_constraints_compute_schedule); \
++ DYNSYM (isl_schedule_constraints_on_domain); \
++ DYNSYM (isl_schedule_constraints_set_coincidence); \
++ DYNSYM (isl_schedule_constraints_set_proximity); \
++ DYNSYM (isl_schedule_constraints_set_validity); \
++ DYNSYM (isl_set_get_dim_id); \
++ DYNSYM (isl_set_max_val); \
++ DYNSYM (isl_set_min_val); \
++ DYNSYM (isl_set_params); \
++ DYNSYM (isl_space_align_params); \
++ DYNSYM (isl_space_map_from_domain_and_range); \
++ DYNSYM (isl_space_set_tuple_name); \
++ DYNSYM (isl_space_wrap); \
++ DYNSYM (isl_union_map_from_domain_and_range); \
++ DYNSYM (isl_union_map_range); \
++ DYNSYM (isl_union_set_union); \
++ DYNSYM (isl_union_set_universe); \
++ DYNSYM (isl_val_2exp); \
++ DYNSYM (isl_val_add_ui); \
++ DYNSYM (isl_val_copy); \
++ DYNSYM (isl_val_free); \
++ DYNSYM (isl_val_int_from_si); \
++ DYNSYM (isl_val_int_from_ui); \
++ DYNSYM (isl_val_mul); \
++ DYNSYM (isl_val_neg); \
++ DYNSYM (isl_val_sub); \
++ DYNSYM (isl_printer_print_union_map); \
++ DYNSYM (isl_pw_aff_get_ctx); \
++ DYNSYM (isl_val_is_int); \
++ DYNSYM (isl_ctx_get_max_operations); \
++ DYNSYM (isl_ctx_set_max_operations); \
++ DYNSYM (isl_ctx_last_error); \
++ DYNSYM (isl_ctx_reset_operations); \
++ DYNSYM (isl_map_coalesce); \
++ DYNSYM (isl_printer_print_schedule); \
++ DYNSYM (isl_set_set_dim_id); \
++ DYNSYM (isl_union_map_coalesce); \
++ DYNSYM (isl_multi_val_set_val); \
++ DYNSYM (isl_multi_val_zero); \
++ DYNSYM (isl_options_set_schedule_max_coefficient); \
++ DYNSYM (isl_options_set_tile_scale_tile_loops); \
++ DYNSYM (isl_schedule_copy); \
++ DYNSYM (isl_schedule_get_map); \
++ DYNSYM (isl_schedule_map_schedule_node_bottom_up); \
++ DYNSYM (isl_schedule_node_band_get_permutable); \
++ DYNSYM (isl_schedule_node_band_get_space); \
++ DYNSYM (isl_schedule_node_band_tile); \
++ DYNSYM (isl_schedule_node_child); \
++ DYNSYM (isl_schedule_node_free); \
++ DYNSYM (isl_schedule_node_get_child); \
++ DYNSYM (isl_schedule_node_get_ctx); \
++ DYNSYM (isl_schedule_node_get_type); \
++ DYNSYM (isl_schedule_node_n_children); \
++ DYNSYM (isl_union_map_is_equal); \
++ DYNSYM (isl_union_access_info_compute_flow); \
++ DYNSYM (isl_union_access_info_from_sink); \
++ DYNSYM (isl_union_access_info_set_may_source); \
++ DYNSYM (isl_union_access_info_set_must_source); \
++ DYNSYM (isl_union_access_info_set_schedule); \
++ DYNSYM (isl_union_flow_free); \
++ DYNSYM (isl_union_flow_get_may_dependence); \
++ DYNSYM (isl_union_flow_get_must_dependence); \
++ DYNSYM (isl_aff_var_on_domain); \
++ DYNSYM (isl_multi_aff_from_aff); \
++ DYNSYM (isl_schedule_get_ctx); \
++ DYNSYM (isl_multi_aff_set_tuple_id); \
++ DYNSYM (isl_multi_aff_dim); \
++ DYNSYM (isl_schedule_get_domain); \
++ DYNSYM (isl_union_set_is_empty); \
++ DYNSYM (isl_union_set_get_space); \
++ DYNSYM (isl_union_pw_multi_aff_empty); \
++ DYNSYM (isl_union_set_foreach_set); \
++ DYNSYM (isl_union_set_free); \
++ DYNSYM (isl_multi_union_pw_aff_from_union_pw_multi_aff); \
++ DYNSYM (isl_multi_union_pw_aff_apply_multi_aff); \
++ DYNSYM (isl_schedule_insert_partial_schedule); \
++ DYNSYM (isl_union_pw_multi_aff_free); \
++ DYNSYM (isl_pw_multi_aff_project_out_map); \
++ DYNSYM (isl_union_pw_multi_aff_add_pw_multi_aff); \
++ DYNSYM (isl_schedule_from_domain); \
++ DYNSYM (isl_schedule_sequence); \
++ DYNSYM (isl_ast_build_node_from_schedule); \
++ DYNSYM (isl_ast_node_mark_get_node); \
++ DYNSYM (isl_schedule_node_band_member_get_ast_loop_type); \
++ DYNSYM (isl_schedule_node_band_member_set_ast_loop_type); \
++ DYNSYM (isl_val_n_abs_num_chunks); \
++ DYNSYM (isl_val_get_abs_num_chunks); \
++ DYNSYM (isl_val_int_from_chunks); \
++ DYNSYM (isl_val_is_neg); \
++ DYNSYM (isl_version); \
++ DYNSYM (isl_options_get_on_error); \
++ DYNSYM (isl_ctx_reset_error);
++
++extern struct isl_pointers_s__
++{
++ bool inited;
++ void *h;
++#define DYNSYM(x) __typeof (x) *p_##x
++ DYNSYMS
++#undef DYNSYM
++} isl_pointers__;
++
++#define isl_aff_add_coefficient_si (*isl_pointers__.p_isl_aff_add_coefficient_si)
++#define isl_aff_free (*isl_pointers__.p_isl_aff_free)
++#define isl_aff_get_space (*isl_pointers__.p_isl_aff_get_space)
++#define isl_aff_set_coefficient_si (*isl_pointers__.p_isl_aff_set_coefficient_si)
++#define isl_aff_set_constant_si (*isl_pointers__.p_isl_aff_set_constant_si)
++#define isl_aff_zero_on_domain (*isl_pointers__.p_isl_aff_zero_on_domain)
++#define isl_band_free (*isl_pointers__.p_isl_band_free)
++#define isl_band_get_children (*isl_pointers__.p_isl_band_get_children)
++#define isl_band_get_partial_schedule (*isl_pointers__.p_isl_band_get_partial_schedule)
++#define isl_band_has_children (*isl_pointers__.p_isl_band_has_children)
++#define isl_band_list_free (*isl_pointers__.p_isl_band_list_free)
++#define isl_band_list_get_band (*isl_pointers__.p_isl_band_list_get_band)
++#define isl_band_list_get_ctx (*isl_pointers__.p_isl_band_list_get_ctx)
++#define isl_band_list_n_band (*isl_pointers__.p_isl_band_list_n_band)
++#define isl_band_n_member (*isl_pointers__.p_isl_band_n_member)
++#define isl_basic_map_add_constraint (*isl_pointers__.p_isl_basic_map_add_constraint)
++#define isl_basic_map_project_out (*isl_pointers__.p_isl_basic_map_project_out)
++#define isl_basic_map_universe (*isl_pointers__.p_isl_basic_map_universe)
++#define isl_constraint_set_coefficient_si (*isl_pointers__.p_isl_constraint_set_coefficient_si)
++#define isl_constraint_set_constant_si (*isl_pointers__.p_isl_constraint_set_constant_si)
++#define isl_ctx_alloc (*isl_pointers__.p_isl_ctx_alloc)
++#define isl_ctx_free (*isl_pointers__.p_isl_ctx_free)
++#define isl_equality_alloc (*isl_pointers__.p_isl_equality_alloc)
++#define isl_id_alloc (*isl_pointers__.p_isl_id_alloc)
++#define isl_id_copy (*isl_pointers__.p_isl_id_copy)
++#define isl_id_free (*isl_pointers__.p_isl_id_free)
++#define isl_inequality_alloc (*isl_pointers__.p_isl_inequality_alloc)
++#define isl_local_space_copy (*isl_pointers__.p_isl_local_space_copy)
++#define isl_local_space_free (*isl_pointers__.p_isl_local_space_free)
++#define isl_local_space_from_space (*isl_pointers__.p_isl_local_space_from_space)
++#define isl_local_space_range (*isl_pointers__.p_isl_local_space_range)
++#define isl_map_add_constraint (*isl_pointers__.p_isl_map_add_constraint)
++#define isl_map_add_dims (*isl_pointers__.p_isl_map_add_dims)
++#define isl_map_align_params (*isl_pointers__.p_isl_map_align_params)
++#define isl_map_apply_range (*isl_pointers__.p_isl_map_apply_range)
++#define isl_map_copy (*isl_pointers__.p_isl_map_copy)
++#define isl_map_dim (*isl_pointers__.p_isl_map_dim)
++#define isl_map_dump (*isl_pointers__.p_isl_map_dump)
++#define isl_map_equate (*isl_pointers__.p_isl_map_equate)
++#define isl_map_fix_si (*isl_pointers__.p_isl_map_fix_si)
++#define isl_map_flat_product (*isl_pointers__.p_isl_map_flat_product)
++#define isl_map_flat_range_product (*isl_pointers__.p_isl_map_flat_range_product)
++#define isl_map_free (*isl_pointers__.p_isl_map_free)
++#define isl_map_from_basic_map (*isl_pointers__.p_isl_map_from_basic_map)
++#define isl_map_from_pw_aff (*isl_pointers__.p_isl_map_from_pw_aff)
++#define isl_map_from_union_map (*isl_pointers__.p_isl_map_from_union_map)
++#define isl_map_get_ctx (*isl_pointers__.p_isl_map_get_ctx)
++#define isl_map_get_space (*isl_pointers__.p_isl_map_get_space)
++#define isl_map_get_tuple_id (*isl_pointers__.p_isl_map_get_tuple_id)
++#define isl_map_insert_dims (*isl_pointers__.p_isl_map_insert_dims)
++#define isl_map_intersect (*isl_pointers__.p_isl_map_intersect)
++#define isl_map_intersect_domain (*isl_pointers__.p_isl_map_intersect_domain)
++#define isl_map_intersect_range (*isl_pointers__.p_isl_map_intersect_range)
++#define isl_map_is_empty (*isl_pointers__.p_isl_map_is_empty)
++#define isl_map_lex_ge (*isl_pointers__.p_isl_map_lex_ge)
++#define isl_map_lex_le (*isl_pointers__.p_isl_map_lex_le)
++#define isl_map_n_out (*isl_pointers__.p_isl_map_n_out)
++#define isl_map_range (*isl_pointers__.p_isl_map_range)
++#define isl_map_set_tuple_id (*isl_pointers__.p_isl_map_set_tuple_id)
++#define isl_map_universe (*isl_pointers__.p_isl_map_universe)
++#define isl_options_set_on_error (*isl_pointers__.p_isl_options_set_on_error)
++#define isl_options_set_schedule_serialize_sccs (*isl_pointers__.p_isl_options_set_schedule_serialize_sccs)
++#define isl_printer_set_yaml_style (*isl_pointers__.p_isl_printer_set_yaml_style)
++#define isl_options_set_schedule_max_constant_term (*isl_pointers__.p_isl_options_set_schedule_max_constant_term)
++#define isl_options_set_schedule_maximize_band_depth (*isl_pointers__.p_isl_options_set_schedule_maximize_band_depth)
++#define isl_printer_free (*isl_pointers__.p_isl_printer_free)
++#define isl_printer_print_aff (*isl_pointers__.p_isl_printer_print_aff)
++#define isl_printer_print_constraint (*isl_pointers__.p_isl_printer_print_constraint)
++#define isl_printer_print_map (*isl_pointers__.p_isl_printer_print_map)
++#define isl_printer_print_set (*isl_pointers__.p_isl_printer_print_set)
++#define isl_printer_to_file (*isl_pointers__.p_isl_printer_to_file)
++#define isl_pw_aff_add (*isl_pointers__.p_isl_pw_aff_add)
++#define isl_pw_aff_alloc (*isl_pointers__.p_isl_pw_aff_alloc)
++#define isl_pw_aff_copy (*isl_pointers__.p_isl_pw_aff_copy)
++#define isl_pw_aff_eq_set (*isl_pointers__.p_isl_pw_aff_eq_set)
++#define isl_pw_aff_free (*isl_pointers__.p_isl_pw_aff_free)
++#define isl_pw_aff_from_aff (*isl_pointers__.p_isl_pw_aff_from_aff)
++#define isl_pw_aff_ge_set (*isl_pointers__.p_isl_pw_aff_ge_set)
++#define isl_pw_aff_gt_set (*isl_pointers__.p_isl_pw_aff_gt_set)
++#define isl_pw_aff_is_cst (*isl_pointers__.p_isl_pw_aff_is_cst)
++#define isl_pw_aff_le_set (*isl_pointers__.p_isl_pw_aff_le_set)
++#define isl_pw_aff_lt_set (*isl_pointers__.p_isl_pw_aff_lt_set)
++#define isl_pw_aff_mul (*isl_pointers__.p_isl_pw_aff_mul)
++#define isl_pw_aff_ne_set (*isl_pointers__.p_isl_pw_aff_ne_set)
++#define isl_pw_aff_nonneg_set (*isl_pointers__.p_isl_pw_aff_nonneg_set)
++#define isl_pw_aff_set_tuple_id (*isl_pointers__.p_isl_pw_aff_set_tuple_id)
++#define isl_pw_aff_sub (*isl_pointers__.p_isl_pw_aff_sub)
++#define isl_pw_aff_zero_set (*isl_pointers__.p_isl_pw_aff_zero_set)
++#define isl_schedule_free (*isl_pointers__.p_isl_schedule_free)
++#define isl_schedule_get_band_forest (*isl_pointers__.p_isl_schedule_get_band_forest)
++#define isl_set_add_constraint (*isl_pointers__.p_isl_set_add_constraint)
++#define isl_set_add_dims (*isl_pointers__.p_isl_set_add_dims)
++#define isl_set_apply (*isl_pointers__.p_isl_set_apply)
++#define isl_set_coalesce (*isl_pointers__.p_isl_set_coalesce)
++#define isl_set_copy (*isl_pointers__.p_isl_set_copy)
++#define isl_set_dim (*isl_pointers__.p_isl_set_dim)
++#define isl_set_fix_si (*isl_pointers__.p_isl_set_fix_si)
++#define isl_set_free (*isl_pointers__.p_isl_set_free)
++#define isl_set_get_space (*isl_pointers__.p_isl_set_get_space)
++#define isl_set_get_tuple_id (*isl_pointers__.p_isl_set_get_tuple_id)
++#define isl_set_intersect (*isl_pointers__.p_isl_set_intersect)
++#define isl_set_is_empty (*isl_pointers__.p_isl_set_is_empty)
++#define isl_set_n_dim (*isl_pointers__.p_isl_set_n_dim)
++#define isl_set_nat_universe (*isl_pointers__.p_isl_set_nat_universe)
++#define isl_set_project_out (*isl_pointers__.p_isl_set_project_out)
++#define isl_set_set_tuple_id (*isl_pointers__.p_isl_set_set_tuple_id)
++#define isl_set_universe (*isl_pointers__.p_isl_set_universe)
++#define isl_space_add_dims (*isl_pointers__.p_isl_space_add_dims)
++#define isl_space_alloc (*isl_pointers__.p_isl_space_alloc)
++#define isl_space_copy (*isl_pointers__.p_isl_space_copy)
++#define isl_space_dim (*isl_pointers__.p_isl_space_dim)
++#define isl_space_domain (*isl_pointers__.p_isl_space_domain)
++#define isl_space_find_dim_by_id (*isl_pointers__.p_isl_space_find_dim_by_id)
++#define isl_space_free (*isl_pointers__.p_isl_space_free)
++#define isl_space_from_domain (*isl_pointers__.p_isl_space_from_domain)
++#define isl_space_get_tuple_id (*isl_pointers__.p_isl_space_get_tuple_id)
++#define isl_space_params_alloc (*isl_pointers__.p_isl_space_params_alloc)
++#define isl_space_range (*isl_pointers__.p_isl_space_range)
++#define isl_space_set_alloc (*isl_pointers__.p_isl_space_set_alloc)
++#define isl_space_set_dim_id (*isl_pointers__.p_isl_space_set_dim_id)
++#define isl_space_set_tuple_id (*isl_pointers__.p_isl_space_set_tuple_id)
++#define isl_union_map_add_map (*isl_pointers__.p_isl_union_map_add_map)
++#define isl_union_map_align_params (*isl_pointers__.p_isl_union_map_align_params)
++#define isl_union_map_apply_domain (*isl_pointers__.p_isl_union_map_apply_domain)
++#define isl_union_map_apply_range (*isl_pointers__.p_isl_union_map_apply_range)
++#define isl_union_map_compute_flow (*isl_pointers__.p_isl_union_map_compute_flow)
++#define isl_union_map_copy (*isl_pointers__.p_isl_union_map_copy)
++#define isl_union_map_empty (*isl_pointers__.p_isl_union_map_empty)
++#define isl_union_map_flat_range_product (*isl_pointers__.p_isl_union_map_flat_range_product)
++#define isl_union_map_foreach_map (*isl_pointers__.p_isl_union_map_foreach_map)
++#define isl_union_map_free (*isl_pointers__.p_isl_union_map_free)
++#define isl_union_map_from_map (*isl_pointers__.p_isl_union_map_from_map)
++#define isl_union_map_get_ctx (*isl_pointers__.p_isl_union_map_get_ctx)
++#define isl_union_map_get_space (*isl_pointers__.p_isl_union_map_get_space)
++#define isl_union_map_gist_domain (*isl_pointers__.p_isl_union_map_gist_domain)
++#define isl_union_map_gist_range (*isl_pointers__.p_isl_union_map_gist_range)
++#define isl_union_map_intersect_domain (*isl_pointers__.p_isl_union_map_intersect_domain)
++#define isl_union_map_is_empty (*isl_pointers__.p_isl_union_map_is_empty)
++#define isl_union_map_subtract (*isl_pointers__.p_isl_union_map_subtract)
++#define isl_union_map_union (*isl_pointers__.p_isl_union_map_union)
++#define isl_union_set_add_set (*isl_pointers__.p_isl_union_set_add_set)
++#define isl_union_set_compute_schedule (*isl_pointers__.p_isl_union_set_compute_schedule)
++#define isl_union_set_copy (*isl_pointers__.p_isl_union_set_copy)
++#define isl_union_set_empty (*isl_pointers__.p_isl_union_set_empty)
++#define isl_union_set_from_set (*isl_pointers__.p_isl_union_set_from_set)
++#define isl_aff_add_constant_val (*isl_pointers__.p_isl_aff_add_constant_val)
++#define isl_aff_get_coefficient_val (*isl_pointers__.p_isl_aff_get_coefficient_val)
++#define isl_aff_get_ctx (*isl_pointers__.p_isl_aff_get_ctx)
++#define isl_aff_mod_val (*isl_pointers__.p_isl_aff_mod_val)
++#define isl_ast_build_ast_from_schedule (*isl_pointers__.p_isl_ast_build_ast_from_schedule)
++#define isl_ast_build_free (*isl_pointers__.p_isl_ast_build_free)
++#define isl_ast_build_from_context (*isl_pointers__.p_isl_ast_build_from_context)
++#define isl_ast_build_get_ctx (*isl_pointers__.p_isl_ast_build_get_ctx)
++#define isl_ast_build_get_schedule (*isl_pointers__.p_isl_ast_build_get_schedule)
++#define isl_ast_build_get_schedule_space (*isl_pointers__.p_isl_ast_build_get_schedule_space)
++#define isl_ast_build_set_before_each_for (*isl_pointers__.p_isl_ast_build_set_before_each_for)
++#define isl_ast_build_set_options (*isl_pointers__.p_isl_ast_build_set_options)
++#define isl_ast_expr_free (*isl_pointers__.p_isl_ast_expr_free)
++#define isl_ast_expr_from_val (*isl_pointers__.p_isl_ast_expr_from_val)
++#define isl_ast_expr_get_ctx (*isl_pointers__.p_isl_ast_expr_get_ctx)
++#define isl_ast_expr_get_id (*isl_pointers__.p_isl_ast_expr_get_id)
++#define isl_ast_expr_get_op_arg (*isl_pointers__.p_isl_ast_expr_get_op_arg)
++#define isl_ast_expr_get_op_n_arg (*isl_pointers__.p_isl_ast_expr_get_op_n_arg)
++#define isl_ast_expr_get_op_type (*isl_pointers__.p_isl_ast_expr_get_op_type)
++#define isl_ast_expr_get_type (*isl_pointers__.p_isl_ast_expr_get_type)
++#define isl_ast_expr_get_val (*isl_pointers__.p_isl_ast_expr_get_val)
++#define isl_ast_expr_sub (*isl_pointers__.p_isl_ast_expr_sub)
++#define isl_ast_node_block_get_children (*isl_pointers__.p_isl_ast_node_block_get_children)
++#define isl_ast_node_for_get_body (*isl_pointers__.p_isl_ast_node_for_get_body)
++#define isl_ast_node_for_get_cond (*isl_pointers__.p_isl_ast_node_for_get_cond)
++#define isl_ast_node_for_get_inc (*isl_pointers__.p_isl_ast_node_for_get_inc)
++#define isl_ast_node_for_get_init (*isl_pointers__.p_isl_ast_node_for_get_init)
++#define isl_ast_node_for_get_iterator (*isl_pointers__.p_isl_ast_node_for_get_iterator)
++#define isl_ast_node_free (*isl_pointers__.p_isl_ast_node_free)
++#define isl_ast_node_get_annotation (*isl_pointers__.p_isl_ast_node_get_annotation)
++#define isl_ast_node_get_type (*isl_pointers__.p_isl_ast_node_get_type)
++#define isl_ast_node_if_get_cond (*isl_pointers__.p_isl_ast_node_if_get_cond)
++#define isl_ast_node_if_get_else (*isl_pointers__.p_isl_ast_node_if_get_else)
++#define isl_ast_node_if_get_then (*isl_pointers__.p_isl_ast_node_if_get_then)
++#define isl_ast_node_list_free (*isl_pointers__.p_isl_ast_node_list_free)
++#define isl_ast_node_list_get_ast_node (*isl_pointers__.p_isl_ast_node_list_get_ast_node)
++#define isl_ast_node_list_n_ast_node (*isl_pointers__.p_isl_ast_node_list_n_ast_node)
++#define isl_ast_node_user_get_expr (*isl_pointers__.p_isl_ast_node_user_get_expr)
++#define isl_constraint_set_coefficient_val (*isl_pointers__.p_isl_constraint_set_coefficient_val)
++#define isl_constraint_set_constant_val (*isl_pointers__.p_isl_constraint_set_constant_val)
++#define isl_id_get_user (*isl_pointers__.p_isl_id_get_user)
++#define isl_local_space_get_ctx (*isl_pointers__.p_isl_local_space_get_ctx)
++#define isl_map_fix_val (*isl_pointers__.p_isl_map_fix_val)
++#define isl_options_set_ast_build_atomic_upper_bound (*isl_pointers__.p_isl_options_set_ast_build_atomic_upper_bound)
++#define isl_printer_print_ast_node (*isl_pointers__.p_isl_printer_print_ast_node)
++#define isl_printer_print_str (*isl_pointers__.p_isl_printer_print_str)
++#define isl_printer_set_output_format (*isl_pointers__.p_isl_printer_set_output_format)
++#define isl_pw_aff_mod_val (*isl_pointers__.p_isl_pw_aff_mod_val)
++#define isl_schedule_constraints_compute_schedule (*isl_pointers__.p_isl_schedule_constraints_compute_schedule)
++#define isl_schedule_constraints_on_domain (*isl_pointers__.p_isl_schedule_constraints_on_domain)
++#define isl_schedule_constraints_set_coincidence (*isl_pointers__.p_isl_schedule_constraints_set_coincidence)
++#define isl_schedule_constraints_set_proximity (*isl_pointers__.p_isl_schedule_constraints_set_proximity)
++#define isl_schedule_constraints_set_validity (*isl_pointers__.p_isl_schedule_constraints_set_validity)
++#define isl_set_get_dim_id (*isl_pointers__.p_isl_set_get_dim_id)
++#define isl_set_max_val (*isl_pointers__.p_isl_set_max_val)
++#define isl_set_min_val (*isl_pointers__.p_isl_set_min_val)
++#define isl_set_params (*isl_pointers__.p_isl_set_params)
++#define isl_space_align_params (*isl_pointers__.p_isl_space_align_params)
++#define isl_space_map_from_domain_and_range (*isl_pointers__.p_isl_space_map_from_domain_and_range)
++#define isl_space_set_tuple_name (*isl_pointers__.p_isl_space_set_tuple_name)
++#define isl_space_wrap (*isl_pointers__.p_isl_space_wrap)
++#define isl_union_map_from_domain_and_range (*isl_pointers__.p_isl_union_map_from_domain_and_range)
++#define isl_union_map_range (*isl_pointers__.p_isl_union_map_range)
++#define isl_union_set_union (*isl_pointers__.p_isl_union_set_union)
++#define isl_union_set_universe (*isl_pointers__.p_isl_union_set_universe)
++#define isl_val_2exp (*isl_pointers__.p_isl_val_2exp)
++#define isl_val_add_ui (*isl_pointers__.p_isl_val_add_ui)
++#define isl_val_copy (*isl_pointers__.p_isl_val_copy)
++#define isl_val_free (*isl_pointers__.p_isl_val_free)
++#define isl_val_int_from_si (*isl_pointers__.p_isl_val_int_from_si)
++#define isl_val_int_from_ui (*isl_pointers__.p_isl_val_int_from_ui)
++#define isl_val_mul (*isl_pointers__.p_isl_val_mul)
++#define isl_val_neg (*isl_pointers__.p_isl_val_neg)
++#define isl_val_sub (*isl_pointers__.p_isl_val_sub)
++#define isl_printer_print_union_map (*isl_pointers__.p_isl_printer_print_union_map)
++#define isl_pw_aff_get_ctx (*isl_pointers__.p_isl_pw_aff_get_ctx)
++#define isl_val_is_int (*isl_pointers__.p_isl_val_is_int)
++#define isl_ctx_get_max_operations (*isl_pointers__.p_isl_ctx_get_max_operations)
++#define isl_ctx_set_max_operations (*isl_pointers__.p_isl_ctx_set_max_operations)
++#define isl_ctx_last_error (*isl_pointers__.p_isl_ctx_last_error)
++#define isl_ctx_reset_operations (*isl_pointers__.p_isl_ctx_reset_operations)
++#define isl_map_coalesce (*isl_pointers__.p_isl_map_coalesce)
++#define isl_printer_print_schedule (*isl_pointers__.p_isl_printer_print_schedule)
++#define isl_set_set_dim_id (*isl_pointers__.p_isl_set_set_dim_id)
++#define isl_union_map_coalesce (*isl_pointers__.p_isl_union_map_coalesce)
++#define isl_multi_val_set_val (*isl_pointers__.p_isl_multi_val_set_val)
++#define isl_multi_val_zero (*isl_pointers__.p_isl_multi_val_zero)
++#define isl_options_set_schedule_max_coefficient (*isl_pointers__.p_isl_options_set_schedule_max_coefficient)
++#define isl_options_set_tile_scale_tile_loops (*isl_pointers__.p_isl_options_set_tile_scale_tile_loops)
++#define isl_schedule_copy (*isl_pointers__.p_isl_schedule_copy)
++#define isl_schedule_get_map (*isl_pointers__.p_isl_schedule_get_map)
++#define isl_schedule_map_schedule_node_bottom_up (*isl_pointers__.p_isl_schedule_map_schedule_node_bottom_up)
++#define isl_schedule_node_band_get_permutable (*isl_pointers__.p_isl_schedule_node_band_get_permutable)
++#define isl_schedule_node_band_get_space (*isl_pointers__.p_isl_schedule_node_band_get_space)
++#define isl_schedule_node_band_tile (*isl_pointers__.p_isl_schedule_node_band_tile)
++#define isl_schedule_node_child (*isl_pointers__.p_isl_schedule_node_child)
++#define isl_schedule_node_free (*isl_pointers__.p_isl_schedule_node_free)
++#define isl_schedule_node_get_child (*isl_pointers__.p_isl_schedule_node_get_child)
++#define isl_schedule_node_get_ctx (*isl_pointers__.p_isl_schedule_node_get_ctx)
++#define isl_schedule_node_get_type (*isl_pointers__.p_isl_schedule_node_get_type)
++#define isl_schedule_node_n_children (*isl_pointers__.p_isl_schedule_node_n_children)
++#define isl_union_map_is_equal (*isl_pointers__.p_isl_union_map_is_equal)
++#define isl_union_access_info_compute_flow (*isl_pointers__.p_isl_union_access_info_compute_flow)
++#define isl_union_access_info_from_sink (*isl_pointers__.p_isl_union_access_info_from_sink)
++#define isl_union_access_info_set_may_source (*isl_pointers__.p_isl_union_access_info_set_may_source)
++#define isl_union_access_info_set_must_source (*isl_pointers__.p_isl_union_access_info_set_must_source)
++#define isl_union_access_info_set_schedule (*isl_pointers__.p_isl_union_access_info_set_schedule)
++#define isl_union_flow_free (*isl_pointers__.p_isl_union_flow_free)
++#define isl_union_flow_get_may_dependence (*isl_pointers__.p_isl_union_flow_get_may_dependence)
++#define isl_union_flow_get_must_dependence (*isl_pointers__.p_isl_union_flow_get_must_dependence)
++#define isl_aff_var_on_domain (*isl_pointers__.p_isl_aff_var_on_domain)
++#define isl_multi_aff_from_aff (*isl_pointers__.p_isl_multi_aff_from_aff)
++#define isl_schedule_get_ctx (*isl_pointers__.p_isl_schedule_get_ctx)
++#define isl_multi_aff_set_tuple_id (*isl_pointers__.p_isl_multi_aff_set_tuple_id)
++#define isl_multi_aff_dim (*isl_pointers__.p_isl_multi_aff_dim)
++#define isl_schedule_get_domain (*isl_pointers__.p_isl_schedule_get_domain)
++#define isl_union_set_is_empty (*isl_pointers__.p_isl_union_set_is_empty)
++#define isl_union_set_get_space (*isl_pointers__.p_isl_union_set_get_space)
++#define isl_union_pw_multi_aff_empty (*isl_pointers__.p_isl_union_pw_multi_aff_empty)
++#define isl_union_set_foreach_set (*isl_pointers__.p_isl_union_set_foreach_set)
++#define isl_union_set_free (*isl_pointers__.p_isl_union_set_free)
++#define isl_multi_union_pw_aff_from_union_pw_multi_aff (*isl_pointers__.p_isl_multi_union_pw_aff_from_union_pw_multi_aff)
++#define isl_multi_union_pw_aff_apply_multi_aff (*isl_pointers__.p_isl_multi_union_pw_aff_apply_multi_aff)
++#define isl_schedule_insert_partial_schedule (*isl_pointers__.p_isl_schedule_insert_partial_schedule)
++#define isl_union_pw_multi_aff_free (*isl_pointers__.p_isl_union_pw_multi_aff_free)
++#define isl_pw_multi_aff_project_out_map (*isl_pointers__.p_isl_pw_multi_aff_project_out_map)
++#define isl_union_pw_multi_aff_add_pw_multi_aff (*isl_pointers__.p_isl_union_pw_multi_aff_add_pw_multi_aff)
++#define isl_schedule_from_domain (*isl_pointers__.p_isl_schedule_from_domain)
++#define isl_schedule_sequence (*isl_pointers__.p_isl_schedule_sequence)
++#define isl_ast_build_node_from_schedule (*isl_pointers__.p_isl_ast_build_node_from_schedule)
++#define isl_ast_node_mark_get_node (*isl_pointers__.p_isl_ast_node_mark_get_node)
++#define isl_schedule_node_band_member_get_ast_loop_type (*isl_pointers__.p_isl_schedule_node_band_member_get_ast_loop_type)
++#define isl_schedule_node_band_member_set_ast_loop_type (*isl_pointers__.p_isl_schedule_node_band_member_set_ast_loop_type)
++#define isl_val_n_abs_num_chunks (*isl_pointers__.p_isl_val_n_abs_num_chunks)
++#define isl_val_get_abs_num_chunks (*isl_pointers__.p_isl_val_get_abs_num_chunks)
++#define isl_val_int_from_chunks (*isl_pointers__.p_isl_val_int_from_chunks)
++#define isl_val_is_neg (*isl_pointers__.p_isl_val_is_neg)
++#define isl_version (*isl_pointers__.p_isl_version)
++#define isl_options_get_on_error (*isl_pointers__.p_isl_options_get_on_error)
++#define isl_ctx_reset_error (*isl_pointers__.p_isl_ctx_reset_error)
+
+ typedef struct poly_dr *poly_dr_p;
+
+@@ -461,5 +1045,6 @@ extern void build_scops (vec *);
+ extern void dot_all_sese (FILE *, vec &);
+ extern void dot_sese (sese_l &);
+ extern void dot_cfg ();
++extern const char *get_isl_version (bool);
+
+ #endif
+--- gcc/graphite.c.jj 2015-11-04 14:15:32.000000000 +0100
++++ gcc/graphite.c 2015-11-04 14:56:02.645536409 +0100
+@@ -60,6 +60,35 @@ along with GCC; see the file COPYING3.
+ #include "tree-into-ssa.h"
+ #include "graphite.h"
+
++__typeof (isl_pointers__) isl_pointers__;
++
++static bool
++init_isl_pointers (void)
++{
++ void *h;
++
++ if (isl_pointers__.inited)
++ return isl_pointers__.h != NULL;
++ h = dlopen ("libisl.so.15", RTLD_LAZY);
++ isl_pointers__.h = h;
++ if (h == NULL)
++ return false;
++#define DYNSYM(x) \
++ do \
++ { \
++ union { __typeof (isl_pointers__.p_##x) p; void *q; } u; \
++ u.q = dlsym (h, #x); \
++ if (u.q == NULL) \
++ return false; \
++ isl_pointers__.p_##x = u.p; \
++ } \
++ while (0)
++ DYNSYMS
++#undef DYNSYM
++ isl_pointers__.inited = true;
++ return true;
++}
++
+ /* Print global statistics to FILE. */
+
+ static void
+@@ -365,6 +394,15 @@ graphite_transform_loops (void)
+ if (parallelized_function_p (cfun->decl))
+ return;
+
++ if (number_of_loops (cfun) <= 1)
++ return;
++
++ if (!init_isl_pointers ())
++ {
++ sorry ("Graphite loop optimizations cannot be used");
++ return;
++ }
++
+ calculate_dominance_info (CDI_DOMINATORS);
+
+ /* We rely on post-dominators during merging of SESE regions so those
+@@ -455,6 +493,14 @@ graphite_transform_loops (void)
+ }
+ }
+
++const char *
++get_isl_version (bool force)
++{
++ if (force)
++ init_isl_pointers ();
++ return (isl_pointers__.inited && isl_version) ? isl_version () : "none";
++}
++
+ #else /* If isl is not available: #ifndef HAVE_isl. */
+
+ static void
+--- gcc/toplev.c.jj 2017-02-19 13:02:31.000000000 +0100
++++ gcc/toplev.c 2017-02-19 16:50:25.536301350 +0100
+@@ -94,6 +94,7 @@ along with GCC; see the file COPYING3.
+
+ #ifdef HAVE_isl
+ #include
++extern const char *get_isl_version (bool);
+ #endif
+
+ static void general_init (const char *, bool);
+@@ -683,7 +684,7 @@ print_version (FILE *file, const char *i
+ #ifndef HAVE_isl
+ "none"
+ #else
+- isl_version ()
++ get_isl_version (*indent == 0)
+ #endif
+ );
+ if (strcmp (GCC_GMP_STRINGIFY_VERSION, gmp_version))
diff --git a/SOURCES/gcc8-libgcc-hardened.patch b/SOURCES/gcc8-libgcc-hardened.patch
new file mode 100644
index 0000000..51f6c39
--- /dev/null
+++ b/SOURCES/gcc8-libgcc-hardened.patch
@@ -0,0 +1,14 @@
+--- libgcc/config/t-slibgcc.mp 2018-10-03 16:07:00.336990246 -0400
++++ libgcc/config/t-slibgcc 2018-10-03 16:06:26.719946740 -0400
+@@ -30,9 +30,10 @@ SHLIB_LC = -lc
+ SHLIB_MAKE_SOLINK = $(LN_S) $(SHLIB_SONAME) $(SHLIB_DIR)/$(SHLIB_SOLINK)
+ SHLIB_INSTALL_SOLINK = $(LN_S) $(SHLIB_SONAME) \
+ $(DESTDIR)$(slibdir)$(SHLIB_SLIBDIR_QUAL)/$(SHLIB_SOLINK)
++SHLIB_EXTRA_LDFLAGS = -Wl,-z,relro -Wl,-z,now
+
+ SHLIB_LINK = $(CC) $(LIBGCC2_CFLAGS) -shared -nodefaultlibs \
+- $(SHLIB_LDFLAGS) \
++ $(SHLIB_LDFLAGS) $(SHLIB_EXTRA_LDFLAGS) \
+ -o $(SHLIB_DIR)/$(SHLIB_SONAME).tmp @multilib_flags@ \
+ $(SHLIB_OBJS) $(SHLIB_LC) && \
+ rm -f $(SHLIB_DIR)/$(SHLIB_SOLINK) && \
diff --git a/SOURCES/gcc8-libgfortran-default-values.patch b/SOURCES/gcc8-libgfortran-default-values.patch
new file mode 100644
index 0000000..b070abe
--- /dev/null
+++ b/SOURCES/gcc8-libgfortran-default-values.patch
@@ -0,0 +1,516 @@
+diff --git a/gcc/fortran/io.c b/gcc/fortran/io.c
+index d93dcfa..f47565c 100644
+--- a/gcc/fortran/io.c
++++ b/gcc/fortran/io.c
+@@ -909,6 +909,13 @@ data_desc:
+
+ if (u != FMT_POSINT)
+ {
++ if (flag_dec)
++ {
++ /* Assume a default width based on the variable size. */
++ saved_token = u;
++ break;
++ }
++
+ format_locus.nextc += format_string_pos;
+ gfc_error ("Positive width required in format "
+ "specifier %s at %L", token_to_string (t),
+@@ -1030,6 +1037,13 @@ data_desc:
+ goto fail;
+ if (t != FMT_ZERO && t != FMT_POSINT)
+ {
++ if (flag_dec)
++ {
++ /* Assume the default width is expected here and continue lexing. */
++ value = 0; /* It doesn't matter what we set the value to here. */
++ saved_token = t;
++ break;
++ }
+ error = nonneg_required;
+ goto syntax;
+ }
+@@ -1099,8 +1113,17 @@ data_desc:
+ goto fail;
+ if (t != FMT_ZERO && t != FMT_POSINT)
+ {
+- error = nonneg_required;
+- goto syntax;
++ if (flag_dec)
++ {
++ /* Assume the default width is expected here and continue lexing. */
++ value = 0; /* It doesn't matter what we set the value to here. */
++ saved_token = t;
++ }
++ else
++ {
++ error = nonneg_required;
++ goto syntax;
++ }
+ }
+ else if (is_input && t == FMT_ZERO)
+ {
+diff --git a/gcc/testsuite/gfortran.dg/fmt_f_default_field_width.f90 b/gcc/testsuite/gfortran.dg/fmt_f_default_field_width.f90
+new file mode 100644
+index 0000000..b087b8f
+--- /dev/null
++++ b/gcc/testsuite/gfortran.dg/fmt_f_default_field_width.f90
+@@ -0,0 +1,43 @@
++! { dg-do run }
++! { dg-options -fdec }
++!
++! Test case for the default field widths enabled by the -fdec-format-defaults flag.
++!
++! This feature is not part of any Fortran standard, but it is supported by the
++! Oracle Fortran compiler and others.
++!
++! libgfortran uses printf() internally to implement FORMAT. If you print float
++! values to a higher precision than the type can actually store, the results
++! are implementation dependent: some platforms print zeros, others print random
++! numbers. Don't depend on this behaviour in tests because they will not be
++! portable.
++
++ character(50) :: buffer
++
++ real*4 :: real_4
++ real*8 :: real_8
++ real*16 :: real_16
++ integer :: len
++
++ real_4 = 4.18
++ write(buffer, '(A, F, A)') ':',real_4,':'
++ print *,buffer
++ if (buffer.ne.": 4.1799998:") call abort
++
++ real_4 = 0.00000018
++ write(buffer, '(A, F, A)') ':',real_4,':'
++ print *,buffer
++ if (buffer.ne.": 0.0000002:") call abort
++
++ real_8 = 4.18
++ write(buffer, '(A, F, A)') ':',real_8,':'
++ print *,buffer
++ len = len_trim(buffer)
++ if (len /= 27) call abort
++
++ real_16 = 4.18
++ write(buffer, '(A, F, A)') ':',real_16,':'
++ print *,buffer
++ len = len_trim(buffer)
++ if (len /= 44) call abort
++end
+diff --git a/gcc/testsuite/gfortran.dg/fmt_g_default_field_width.f90 b/gcc/testsuite/gfortran.dg/fmt_g_default_field_width.f90
+new file mode 100644
+index 0000000..3d3a476
+--- /dev/null
++++ b/gcc/testsuite/gfortran.dg/fmt_g_default_field_width.f90
+@@ -0,0 +1,48 @@
++! { dg-do run }
++! { dg-options -fdec }
++!
++! Test case for the default field widths enabled by the -fdec-format-defaults flag.
++!
++! This feature is not part of any Fortran standard, but it is supported by the
++! Oracle Fortran compiler and others.
++!
++! libgfortran uses printf() internally to implement FORMAT. If you print float
++! values to a higher precision than the type can actually store, the results
++! are implementation dependent: some platforms print zeros, others print random
++! numbers. Don't depend on this behaviour in tests because they will not be
++! portable.
++
++ character(50) :: buffer
++
++ real*4 :: real_4
++ real*8 :: real_8
++ real*16 :: real_16
++ integer :: len
++
++ real_4 = 4.18
++ write(buffer, '(A, G, A)') ':',real_4,':'
++ print *,buffer
++ if (buffer.ne.": 4.180000 :") call abort
++
++ real_4 = 0.00000018
++ write(buffer, '(A, G, A)') ':',real_4,':'
++ print *,buffer
++ if (buffer.ne.": 0.1800000E-06:") call abort
++
++ real_4 = 18000000.4
++ write(buffer, '(A, G, A)') ':',real_4,':'
++ print *,buffer
++ if (buffer.ne.": 0.1800000E+08:") call abort
++
++ real_8 = 4.18
++ write(buffer, '(A, G, A)') ':',real_8,':'
++ print *,buffer
++ len = len_trim(buffer)
++ if (len /= 27) call abort
++
++ real_16 = 4.18
++ write(buffer, '(A, G, A)') ':',real_16,':'
++ print *,buffer
++ len = len_trim(buffer)
++ if (len /= 44) call abort
++end
+diff --git a/gcc/testsuite/gfortran.dg/fmt_i_default_field_width.f90 b/gcc/testsuite/gfortran.dg/fmt_i_default_field_width.f90
+new file mode 100644
+index 0000000..ac4e165
+--- /dev/null
++++ b/gcc/testsuite/gfortran.dg/fmt_i_default_field_width.f90
+@@ -0,0 +1,38 @@
++! { dg-do run }
++! { dg-options -fdec }
++!
++! Test case for the default field widths enabled by the -fdec-format-defaults flag.
++!
++! This feature is not part of any Fortran standard, but it is supported by the
++! Oracle Fortran compiler and others.
++
++ character(50) :: buffer
++ character(1) :: colon
++
++ integer*2 :: integer_2
++ integer*4 :: integer_4
++ integer*8 :: integer_8
++
++ write(buffer, '(A, I, A)') ':',12340,':'
++ print *,buffer
++ if (buffer.ne.": 12340:") call abort
++
++ read(buffer, '(A1, I, A1)') colon, integer_4, colon
++ if (integer_4.ne.12340) call abort
++
++ integer_2 = -99
++ write(buffer, '(A, I, A)') ':',integer_2,':'
++ print *,buffer
++ if (buffer.ne.": -99:") call abort
++
++ integer_8 = -11112222
++ write(buffer, '(A, I, A)') ':',integer_8,':'
++ print *,buffer
++ if (buffer.ne.": -11112222:") call abort
++
++! If the width is 7 and there are 7 leading zeroes, the result should be zero.
++ integer_2 = 789
++ buffer = '0000000789'
++ read(buffer, '(I)') integer_2
++ if (integer_2.ne.0) call abort
++end
+diff --git a/libgfortran/io/format.c b/libgfortran/io/format.c
+index c2abdd7..692b1ff 100644
+--- a/libgfortran/io/format.c
++++ b/libgfortran/io/format.c
+@@ -956,12 +956,33 @@ parse_format_list (st_parameter_dt *dtp, bool *seen_dd)
+ *seen_dd = true;
+ if (u != FMT_POSINT && u != FMT_ZERO)
+ {
++ if (dtp->common.flags & IOPARM_DT_DEC_EXT)
++ {
++ tail->u.real.w = DEFAULT_WIDTH;
++ tail->u.real.d = 0;
++ tail->u.real.e = -1;
++ fmt->saved_token = u;
++ break;
++ }
+ fmt->error = nonneg_required;
+ goto finished;
+ }
+ }
++ else if (u == FMT_ZERO)
++ {
++ fmt->error = posint_required;
++ goto finished;
++ }
+ else if (u != FMT_POSINT)
+ {
++ if (dtp->common.flags & IOPARM_DT_DEC_EXT)
++ {
++ tail->u.real.w = DEFAULT_WIDTH;
++ tail->u.real.d = 0;
++ tail->u.real.e = -1;
++ fmt->saved_token = u;
++ break;
++ }
+ fmt->error = posint_required;
+ goto finished;
+ }
+@@ -1099,6 +1120,13 @@ parse_format_list (st_parameter_dt *dtp, bool *seen_dd)
+ {
+ if (t != FMT_POSINT)
+ {
++ if (dtp->common.flags & IOPARM_DT_DEC_EXT)
++ {
++ tail->u.integer.w = DEFAULT_WIDTH;
++ tail->u.integer.m = -1;
++ fmt->saved_token = t;
++ break;
++ }
+ fmt->error = posint_required;
+ goto finished;
+ }
+@@ -1107,6 +1135,13 @@ parse_format_list (st_parameter_dt *dtp, bool *seen_dd)
+ {
+ if (t != FMT_ZERO && t != FMT_POSINT)
+ {
++ if (dtp->common.flags & IOPARM_DT_DEC_EXT)
++ {
++ tail->u.integer.w = DEFAULT_WIDTH;
++ tail->u.integer.m = -1;
++ fmt->saved_token = t;
++ break;
++ }
+ fmt->error = nonneg_required;
+ goto finished;
+ }
+diff --git a/libgfortran/io/io.h b/libgfortran/io/io.h
+index 5583183..d1d08e8 100644
+--- a/libgfortran/io/io.h
++++ b/libgfortran/io/io.h
+@@ -981,5 +981,55 @@ memset4 (gfc_char4_t *p, gfc_char4_t c, int k)
+ *p++ = c;
+ }
+
++/* Used in width fields to indicate that the default should be used */
++#define DEFAULT_WIDTH -1
++
++/* Defaults for certain format field descriptors. These are decided based on
++ * the type of the value being formatted.
++ *
++ * The behaviour here is modelled on the Oracle Fortran compiler. At the time
++ * of writing, the details were available at this URL:
++ *
++ * https://docs.oracle.com/cd/E19957-01/805-4939/6j4m0vnc3/index.html#z4000743746d
++ */
++
++static inline int
++default_width_for_integer (int kind)
++{
++ switch (kind)
++ {
++ case 1:
++ case 2: return 7;
++ case 4: return 12;
++ case 8: return 23;
++ case 16: return 44;
++ default: return 0;
++ }
++}
++
++static inline int
++default_width_for_float (int kind)
++{
++ switch (kind)
++ {
++ case 4: return 15;
++ case 8: return 25;
++ case 16: return 42;
++ default: return 0;
++ }
++}
++
++static inline int
++default_precision_for_float (int kind)
++{
++ switch (kind)
++ {
++ case 4: return 7;
++ case 8: return 16;
++ case 16: return 33;
++ default: return 0;
++ }
++}
++
+ #endif
+
+diff --git a/libgfortran/io/read.c b/libgfortran/io/read.c
+index 2c9de48..e911e35 100644
+--- a/libgfortran/io/read.c
++++ b/libgfortran/io/read.c
+@@ -629,6 +629,12 @@ read_decimal (st_parameter_dt *dtp, const fnode *f, char *dest, int length)
+
+ w = f->u.w;
+
++ /* This is a legacy extension, and the frontend will only allow such cases
++ * through when -fdec-format-defaults is passed.
++ */
++ if (w == DEFAULT_WIDTH)
++ w = default_width_for_integer (length);
++
+ p = read_block_form (dtp, &w);
+
+ if (p == NULL)
+diff --git a/libgfortran/io/write.c b/libgfortran/io/write.c
+index a7307a8..c8e52fb 100644
+--- a/libgfortran/io/write.c
++++ b/libgfortran/io/write.c
+@@ -684,9 +684,8 @@ write_l (st_parameter_dt *dtp, const fnode *f, char *source, int len)
+ p[wlen - 1] = (n) ? 'T' : 'F';
+ }
+
+-
+ static void
+-write_boz (st_parameter_dt *dtp, const fnode *f, const char *q, int n)
++write_boz (st_parameter_dt *dtp, const fnode *f, const char *q, int n, int len)
+ {
+ int w, m, digits, nzero, nblank;
+ char *p;
+@@ -719,6 +718,9 @@ write_boz (st_parameter_dt *dtp, const fnode *f, const char *q, int n)
+ /* Select a width if none was specified. The idea here is to always
+ print something. */
+
++ if (w == DEFAULT_WIDTH)
++ w = default_width_for_integer (len);
++
+ if (w == 0)
+ w = ((digits < m) ? m : digits);
+
+@@ -845,6 +847,8 @@ write_decimal (st_parameter_dt *dtp, const fnode *f, const char *source,
+
+ /* Select a width if none was specified. The idea here is to always
+ print something. */
++ if (w == DEFAULT_WIDTH)
++ w = default_width_for_integer (len);
+
+ if (w == 0)
+ w = ((digits < m) ? m : digits) + nsign;
+@@ -1187,13 +1191,13 @@ write_b (st_parameter_dt *dtp, const fnode *f, const char *source, int len)
+ if (len > (int) sizeof (GFC_UINTEGER_LARGEST))
+ {
+ p = btoa_big (source, itoa_buf, len, &n);
+- write_boz (dtp, f, p, n);
++ write_boz (dtp, f, p, n, len);
+ }
+ else
+ {
+ n = extract_uint (source, len);
+ p = btoa (n, itoa_buf, sizeof (itoa_buf));
+- write_boz (dtp, f, p, n);
++ write_boz (dtp, f, p, n, len);
+ }
+ }
+
+@@ -1208,13 +1212,13 @@ write_o (st_parameter_dt *dtp, const fnode *f, const char *source, int len)
+ if (len > (int) sizeof (GFC_UINTEGER_LARGEST))
+ {
+ p = otoa_big (source, itoa_buf, len, &n);
+- write_boz (dtp, f, p, n);
++ write_boz (dtp, f, p, n, len);
+ }
+ else
+ {
+ n = extract_uint (source, len);
+ p = otoa (n, itoa_buf, sizeof (itoa_buf));
+- write_boz (dtp, f, p, n);
++ write_boz (dtp, f, p, n, len);
+ }
+ }
+
+@@ -1228,13 +1232,13 @@ write_z (st_parameter_dt *dtp, const fnode *f, const char *source, int len)
+ if (len > (int) sizeof (GFC_UINTEGER_LARGEST))
+ {
+ p = ztoa_big (source, itoa_buf, len, &n);
+- write_boz (dtp, f, p, n);
++ write_boz (dtp, f, p, n, len);
+ }
+ else
+ {
+ n = extract_uint (source, len);
+ p = gfc_xtoa (n, itoa_buf, sizeof (itoa_buf));
+- write_boz (dtp, f, p, n);
++ write_boz (dtp, f, p, n, len);
+ }
+ }
+
+@@ -1504,7 +1508,7 @@ size_from_kind (st_parameter_dt *dtp, const fnode *f, int kind)
+ {
+ int size;
+
+- if (f->format == FMT_F && f->u.real.w == 0)
++ if ((f->format == FMT_F && f->u.real.w == 0) || f->u.real.w == DEFAULT_WIDTH)
+ {
+ switch (kind)
+ {
+diff --git a/libgfortran/io/write_float.def b/libgfortran/io/write_float.def
+index 7f0aa1d..73dc910 100644
+--- a/libgfortran/io/write_float.def
++++ b/libgfortran/io/write_float.def
+@@ -113,7 +113,8 @@ determine_precision (st_parameter_dt * d
+ static void
+ build_float_string (st_parameter_dt *dtp, const fnode *f, char *buffer,
+ size_t size, int nprinted, int precision, int sign_bit,
+- bool zero_flag, int npad, char *result, size_t *len)
++ bool zero_flag, int npad, int default_width, char *result,
++ size_t *len)
+ {
+ char *put;
+ char *digits;
+@@ -132,8 +133,17 @@ build_float_string (st_parameter_dt *dtp
+ sign_t sign;
+
+ ft = f->format;
+- w = f->u.real.w;
+- d = f->u.real.d;
++ if (f->u.real.w == DEFAULT_WIDTH)
++ /* This codepath can only be reached with -fdec-format-defaults. */
++ {
++ w = default_width;
++ d = precision;
++ }
++ else
++ {
++ w = f->u.real.w;
++ d = f->u.real.d;
++ }
+ p = dtp->u.p.scale_factor;
+ *len = 0;
+
+@@ -959,6 +969,11 @@ determine_en_precision (st_parameter_dt
+ int save_scale_factor;\
+ volatile GFC_REAL_ ## x temp;\
+ save_scale_factor = dtp->u.p.scale_factor;\
++ if (w == DEFAULT_WIDTH)\
++ {\
++ w = default_width;\
++ d = precision;\
++ }\
+ switch (dtp->u.p.current_unit->round_status)\
+ {\
+ case ROUND_ZERO:\
+@@ -1034,7 +1049,8 @@ determine_en_precision (st_parameter_dt
+ nprinted = FDTOA(y,precision,m);\
+ }\
+ build_float_string (dtp, &newf, buffer, size, nprinted, precision,\
+- sign_bit, zero_flag, npad, result, res_len);\
++ sign_bit, zero_flag, npad, default_width,\
++ result, res_len);\
+ dtp->u.p.scale_factor = save_scale_factor;\
+ }\
+ else\
+@@ -1044,7 +1060,8 @@ determine_en_precision (st_parameter_dt
+ else\
+ nprinted = DTOA(y,precision,m);\
+ build_float_string (dtp, f, buffer, size, nprinted, precision,\
+- sign_bit, zero_flag, npad, result, res_len);\
++ sign_bit, zero_flag, npad, default_width,\
++ result, res_len);\
+ }\
+ }\
+
+@@ -1058,6 +1075,16 @@ get_float_string (st_parameter_dt *dtp,
+ {
+ int sign_bit, nprinted;
+ bool zero_flag;
++ int default_width = 0;
++
++ if (f->u.real.w == DEFAULT_WIDTH)
++ /* This codepath can only be reached with -fdec-format-defaults. The default
++ * values are based on those used in the Oracle Fortran compiler.
++ */
++ {
++ default_width = default_width_for_float (kind);
++ precision = default_precision_for_float (kind);
++ }
+
+ switch (kind)
+ {
diff --git a/SOURCES/gcc8-libgomp-20190503.patch b/SOURCES/gcc8-libgomp-20190503.patch
new file mode 100644
index 0000000..fcd226d
--- /dev/null
+++ b/SOURCES/gcc8-libgomp-20190503.patch
@@ -0,0 +1,10068 @@
+--- libgomp/loop.c.jj 2018-04-25 09:40:31.870655561 +0200
++++ libgomp/loop.c 2019-05-07 18:46:36.526109736 +0200
+@@ -27,9 +27,13 @@
+
+ #include
+ #include
++#include
+ #include "libgomp.h"
+
+
++ialias (GOMP_loop_runtime_next)
++ialias_redirect (GOMP_taskgroup_reduction_register)
++
+ /* Initialize the given work share construct from the given arguments. */
+
+ static inline void
+@@ -79,12 +83,12 @@ gomp_loop_init (struct gomp_work_share *
+ }
+
+ /* The *_start routines are called when first encountering a loop construct
+- that is not bound directly to a parallel construct. The first thread
++ that is not bound directly to a parallel construct. The first thread
+ that arrives will create the work-share construct; subsequent threads
+ will see the construct exists and allocate work from it.
+
+ START, END, INCR are the bounds of the loop; due to the restrictions of
+- OpenMP, these values must be the same in every thread. This is not
++ OpenMP, these values must be the same in every thread. This is not
+ verified (nor is it entirely verifiable, since START is not necessarily
+ retained intact in the work-share data structure). CHUNK_SIZE is the
+ scheduling parameter; again this must be identical in all threads.
+@@ -101,7 +105,7 @@ gomp_loop_static_start (long start, long
+ struct gomp_thread *thr = gomp_thread ();
+
+ thr->ts.static_trip = 0;
+- if (gomp_work_share_start (false))
++ if (gomp_work_share_start (0))
+ {
+ gomp_loop_init (thr->ts.work_share, start, end, incr,
+ GFS_STATIC, chunk_size);
+@@ -123,7 +127,7 @@ gomp_loop_dynamic_start (long start, lon
+ struct gomp_thread *thr = gomp_thread ();
+ bool ret;
+
+- if (gomp_work_share_start (false))
++ if (gomp_work_share_start (0))
+ {
+ gomp_loop_init (thr->ts.work_share, start, end, incr,
+ GFS_DYNAMIC, chunk_size);
+@@ -151,7 +155,7 @@ gomp_loop_guided_start (long start, long
+ struct gomp_thread *thr = gomp_thread ();
+ bool ret;
+
+- if (gomp_work_share_start (false))
++ if (gomp_work_share_start (0))
+ {
+ gomp_loop_init (thr->ts.work_share, start, end, incr,
+ GFS_GUIDED, chunk_size);
+@@ -174,7 +178,7 @@ GOMP_loop_runtime_start (long start, lon
+ long *istart, long *iend)
+ {
+ struct gomp_task_icv *icv = gomp_icv (false);
+- switch (icv->run_sched_var)
++ switch (icv->run_sched_var & ~GFS_MONOTONIC)
+ {
+ case GFS_STATIC:
+ return gomp_loop_static_start (start, end, incr,
+@@ -197,6 +201,100 @@ GOMP_loop_runtime_start (long start, lon
+ }
+ }
+
++static long
++gomp_adjust_sched (long sched, long *chunk_size)
++{
++ sched &= ~GFS_MONOTONIC;
++ switch (sched)
++ {
++ case GFS_STATIC:
++ case GFS_DYNAMIC:
++ case GFS_GUIDED:
++ return sched;
++ /* GFS_RUNTIME is used for runtime schedule without monotonic
++ or nonmonotonic modifiers on the clause.
++ GFS_RUNTIME|GFS_MONOTONIC for runtime schedule with monotonic
++ modifier. */
++ case GFS_RUNTIME:
++ /* GFS_AUTO is used for runtime schedule with nonmonotonic
++ modifier. */
++ case GFS_AUTO:
++ {
++ struct gomp_task_icv *icv = gomp_icv (false);
++ sched = icv->run_sched_var & ~GFS_MONOTONIC;
++ switch (sched)
++ {
++ case GFS_STATIC:
++ case GFS_DYNAMIC:
++ case GFS_GUIDED:
++ *chunk_size = icv->run_sched_chunk_size;
++ break;
++ case GFS_AUTO:
++ sched = GFS_STATIC;
++ *chunk_size = 0;
++ break;
++ default:
++ abort ();
++ }
++ return sched;
++ }
++ default:
++ abort ();
++ }
++}
++
++bool
++GOMP_loop_start (long start, long end, long incr, long sched,
++ long chunk_size, long *istart, long *iend,
++ uintptr_t *reductions, void **mem)
++{
++ struct gomp_thread *thr = gomp_thread ();
++
++ thr->ts.static_trip = 0;
++ if (reductions)
++ gomp_workshare_taskgroup_start ();
++ if (gomp_work_share_start (0))
++ {
++ sched = gomp_adjust_sched (sched, &chunk_size);
++ gomp_loop_init (thr->ts.work_share, start, end, incr,
++ sched, chunk_size);
++ if (reductions)
++ {
++ GOMP_taskgroup_reduction_register (reductions);
++ thr->task->taskgroup->workshare = true;
++ thr->ts.work_share->task_reductions = reductions;
++ }
++ if (mem)
++ {
++ uintptr_t size = (uintptr_t) *mem;
++ if (size > (sizeof (struct gomp_work_share)
++ - offsetof (struct gomp_work_share,
++ inline_ordered_team_ids)))
++ thr->ts.work_share->ordered_team_ids
++ = gomp_malloc_cleared (size);
++ else
++ memset (thr->ts.work_share->ordered_team_ids, '\0', size);
++ *mem = (void *) thr->ts.work_share->ordered_team_ids;
++ }
++ gomp_work_share_init_done ();
++ }
++ else
++ {
++ if (reductions)
++ {
++ uintptr_t *first_reductions = thr->ts.work_share->task_reductions;
++ gomp_workshare_task_reduction_register (reductions,
++ first_reductions);
++ }
++ if (mem)
++ *mem = (void *) thr->ts.work_share->ordered_team_ids;
++ }
++
++ if (!istart)
++ return true;
++ return ialias_call (GOMP_loop_runtime_next) (istart, iend);
++}
++
+ /* The *_ordered_*_start routines are similar. The only difference is that
+ this work-share construct is initialized to expect an ORDERED section. */
+
+@@ -207,7 +305,7 @@ gomp_loop_ordered_static_start (long sta
+ struct gomp_thread *thr = gomp_thread ();
+
+ thr->ts.static_trip = 0;
+- if (gomp_work_share_start (true))
++ if (gomp_work_share_start (1))
+ {
+ gomp_loop_init (thr->ts.work_share, start, end, incr,
+ GFS_STATIC, chunk_size);
+@@ -225,7 +323,7 @@ gomp_loop_ordered_dynamic_start (long st
+ struct gomp_thread *thr = gomp_thread ();
+ bool ret;
+
+- if (gomp_work_share_start (true))
++ if (gomp_work_share_start (1))
+ {
+ gomp_loop_init (thr->ts.work_share, start, end, incr,
+ GFS_DYNAMIC, chunk_size);
+@@ -250,7 +348,7 @@ gomp_loop_ordered_guided_start (long sta
+ struct gomp_thread *thr = gomp_thread ();
+ bool ret;
+
+- if (gomp_work_share_start (true))
++ if (gomp_work_share_start (1))
+ {
+ gomp_loop_init (thr->ts.work_share, start, end, incr,
+ GFS_GUIDED, chunk_size);
+@@ -273,7 +371,7 @@ GOMP_loop_ordered_runtime_start (long st
+ long *istart, long *iend)
+ {
+ struct gomp_task_icv *icv = gomp_icv (false);
+- switch (icv->run_sched_var)
++ switch (icv->run_sched_var & ~GFS_MONOTONIC)
+ {
+ case GFS_STATIC:
+ return gomp_loop_ordered_static_start (start, end, incr,
+@@ -297,6 +395,81 @@ GOMP_loop_ordered_runtime_start (long st
+ }
+ }
+
++bool
++GOMP_loop_ordered_start (long start, long end, long incr, long sched,
++ long chunk_size, long *istart, long *iend,
++ uintptr_t *reductions, void **mem)
++{
++ struct gomp_thread *thr = gomp_thread ();
++ size_t ordered = 1;
++ bool ret;
++
++ thr->ts.static_trip = 0;
++ if (reductions)
++ gomp_workshare_taskgroup_start ();
++ if (mem)
++ ordered += (uintptr_t) *mem;
++ if (gomp_work_share_start (ordered))
++ {
++ sched = gomp_adjust_sched (sched, &chunk_size);
++ gomp_loop_init (thr->ts.work_share, start, end, incr,
++ sched, chunk_size);
++ if (reductions)
++ {
++ GOMP_taskgroup_reduction_register (reductions);
++ thr->task->taskgroup->workshare = true;
++ thr->ts.work_share->task_reductions = reductions;
++ }
++ if (sched == GFS_STATIC)
++ gomp_ordered_static_init ();
++ else
++ gomp_mutex_lock (&thr->ts.work_share->lock);
++ gomp_work_share_init_done ();
++ }
++ else
++ {
++ if (reductions)
++ {
++ uintptr_t *first_reductions = thr->ts.work_share->task_reductions;
++ gomp_workshare_task_reduction_register (reductions,
++ first_reductions);
++ }
++ sched = thr->ts.work_share->sched;
++ if (sched != GFS_STATIC)
++ gomp_mutex_lock (&thr->ts.work_share->lock);
++ }
++
++ if (mem)
++ {
++ uintptr_t p
++ = (uintptr_t) (thr->ts.work_share->ordered_team_ids
++ + (thr->ts.team ? thr->ts.team->nthreads : 1));
++ p += __alignof__ (long long) - 1;
++ p &= ~(__alignof__ (long long) - 1);
++ *mem = (void *) p;
++ }
++
++ switch (sched)
++ {
++ case GFS_STATIC:
++ case GFS_AUTO:
++ return !gomp_iter_static_next (istart, iend);
++ case GFS_DYNAMIC:
++ ret = gomp_iter_dynamic_next_locked (istart, iend);
++ break;
++ case GFS_GUIDED:
++ ret = gomp_iter_guided_next_locked (istart, iend);
++ break;
++ default:
++ abort ();
++ }
++
++ if (ret)
++ gomp_ordered_first ();
++ gomp_mutex_unlock (&thr->ts.work_share->lock);
++ return ret;
++}
++
+ /* The *_doacross_*_start routines are similar. The only difference is that
+ this work-share construct is initialized to expect an ORDERED(N) - DOACROSS
+ section, and the worksharing loop iterates always from 0 to COUNTS[0] - 1
+@@ -310,11 +483,11 @@ gomp_loop_doacross_static_start (unsigne
+ struct gomp_thread *thr = gomp_thread ();
+
+ thr->ts.static_trip = 0;
+- if (gomp_work_share_start (false))
++ if (gomp_work_share_start (0))
+ {
+ gomp_loop_init (thr->ts.work_share, 0, counts[0], 1,
+ GFS_STATIC, chunk_size);
+- gomp_doacross_init (ncounts, counts, chunk_size);
++ gomp_doacross_init (ncounts, counts, chunk_size, 0);
+ gomp_work_share_init_done ();
+ }
+
+@@ -328,11 +501,11 @@ gomp_loop_doacross_dynamic_start (unsign
+ struct gomp_thread *thr = gomp_thread ();
+ bool ret;
+
+- if (gomp_work_share_start (false))
++ if (gomp_work_share_start (0))
+ {
+ gomp_loop_init (thr->ts.work_share, 0, counts[0], 1,
+ GFS_DYNAMIC, chunk_size);
+- gomp_doacross_init (ncounts, counts, chunk_size);
++ gomp_doacross_init (ncounts, counts, chunk_size, 0);
+ gomp_work_share_init_done ();
+ }
+
+@@ -354,11 +527,11 @@ gomp_loop_doacross_guided_start (unsigne
+ struct gomp_thread *thr = gomp_thread ();
+ bool ret;
+
+- if (gomp_work_share_start (false))
++ if (gomp_work_share_start (0))
+ {
+ gomp_loop_init (thr->ts.work_share, 0, counts[0], 1,
+ GFS_GUIDED, chunk_size);
+- gomp_doacross_init (ncounts, counts, chunk_size);
++ gomp_doacross_init (ncounts, counts, chunk_size, 0);
+ gomp_work_share_init_done ();
+ }
+
+@@ -378,7 +551,7 @@ GOMP_loop_doacross_runtime_start (unsign
+ long *istart, long *iend)
+ {
+ struct gomp_task_icv *icv = gomp_icv (false);
+- switch (icv->run_sched_var)
++ switch (icv->run_sched_var & ~GFS_MONOTONIC)
+ {
+ case GFS_STATIC:
+ return gomp_loop_doacross_static_start (ncounts, counts,
+@@ -402,8 +575,52 @@ GOMP_loop_doacross_runtime_start (unsign
+ }
+ }
+
+-/* The *_next routines are called when the thread completes processing of
+- the iteration block currently assigned to it. If the work-share
++bool
++GOMP_loop_doacross_start (unsigned ncounts, long *counts, long sched,
++ long chunk_size, long *istart, long *iend,
++ uintptr_t *reductions, void **mem)
++{
++ struct gomp_thread *thr = gomp_thread ();
++
++ thr->ts.static_trip = 0;
++ if (reductions)
++ gomp_workshare_taskgroup_start ();
++ if (gomp_work_share_start (0))
++ {
++ size_t extra = 0;
++ if (mem)
++ extra = (uintptr_t) *mem;
++ sched = gomp_adjust_sched (sched, &chunk_size);
++ gomp_loop_init (thr->ts.work_share, 0, counts[0], 1,
++ sched, chunk_size);
++ gomp_doacross_init (ncounts, counts, chunk_size, extra);
++ if (reductions)
++ {
++ GOMP_taskgroup_reduction_register (reductions);
++ thr->task->taskgroup->workshare = true;
++ thr->ts.work_share->task_reductions = reductions;
++ }
++ gomp_work_share_init_done ();
++ }
++ else
++ {
++ if (reductions)
++ {
++ uintptr_t *first_reductions = thr->ts.work_share->task_reductions;
++ gomp_workshare_task_reduction_register (reductions,
++ first_reductions);
++ }
++ sched = thr->ts.work_share->sched;
++ }
++
++ if (mem)
++ *mem = thr->ts.work_share->doacross->extra;
++
++ return ialias_call (GOMP_loop_runtime_next) (istart, iend);
++}
++
++/* The *_next routines are called when the thread completes processing of
++ the iteration block currently assigned to it. If the work-share
+ construct is bound directly to a parallel construct, then the iteration
+ bounds may have been set up before the parallel. In which case, this
+ may be the first iteration for the thread.
+@@ -456,7 +673,7 @@ bool
+ GOMP_loop_runtime_next (long *istart, long *iend)
+ {
+ struct gomp_thread *thr = gomp_thread ();
+-
++
+ switch (thr->ts.work_share->sched)
+ {
+ case GFS_STATIC:
+@@ -534,7 +751,7 @@ bool
+ GOMP_loop_ordered_runtime_next (long *istart, long *iend)
+ {
+ struct gomp_thread *thr = gomp_thread ();
+-
++
+ switch (thr->ts.work_share->sched)
+ {
+ case GFS_STATIC:
+@@ -563,7 +780,7 @@ gomp_parallel_loop_start (void (*fn) (vo
+ num_threads = gomp_resolve_num_threads (num_threads, 0);
+ team = gomp_new_team (num_threads);
+ gomp_loop_init (&team->work_shares[0], start, end, incr, sched, chunk_size);
+- gomp_team_start (fn, data, num_threads, flags, team);
++ gomp_team_start (fn, data, num_threads, flags, team, NULL);
+ }
+
+ void
+@@ -600,7 +817,8 @@ GOMP_parallel_loop_runtime_start (void (
+ {
+ struct gomp_task_icv *icv = gomp_icv (false);
+ gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
+- icv->run_sched_var, icv->run_sched_chunk_size, 0);
++ icv->run_sched_var & ~GFS_MONOTONIC,
++ icv->run_sched_chunk_size, 0);
+ }
+
+ ialias_redirect (GOMP_parallel_end)
+@@ -638,11 +856,28 @@ GOMP_parallel_loop_guided (void (*fn) (v
+ GOMP_parallel_end ();
+ }
+
++void
++GOMP_parallel_loop_runtime (void (*fn) (void *), void *data,
++ unsigned num_threads, long start, long end,
++ long incr, unsigned flags)
++{
++ struct gomp_task_icv *icv = gomp_icv (false);
++ gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
++ icv->run_sched_var & ~GFS_MONOTONIC,
++ icv->run_sched_chunk_size, flags);
++ fn (data);
++ GOMP_parallel_end ();
++}
++
+ #ifdef HAVE_ATTRIBUTE_ALIAS
+ extern __typeof(GOMP_parallel_loop_dynamic) GOMP_parallel_loop_nonmonotonic_dynamic
+ __attribute__((alias ("GOMP_parallel_loop_dynamic")));
+ extern __typeof(GOMP_parallel_loop_guided) GOMP_parallel_loop_nonmonotonic_guided
+ __attribute__((alias ("GOMP_parallel_loop_guided")));
++extern __typeof(GOMP_parallel_loop_runtime) GOMP_parallel_loop_nonmonotonic_runtime
++ __attribute__((alias ("GOMP_parallel_loop_runtime")));
++extern __typeof(GOMP_parallel_loop_runtime) GOMP_parallel_loop_maybe_nonmonotonic_runtime
++ __attribute__((alias ("GOMP_parallel_loop_runtime")));
+ #else
+ void
+ GOMP_parallel_loop_nonmonotonic_dynamic (void (*fn) (void *), void *data,
+@@ -667,21 +902,35 @@ GOMP_parallel_loop_nonmonotonic_guided (
+ fn (data);
+ GOMP_parallel_end ();
+ }
+-#endif
+
+ void
+-GOMP_parallel_loop_runtime (void (*fn) (void *), void *data,
+- unsigned num_threads, long start, long end,
+- long incr, unsigned flags)
++GOMP_parallel_loop_nonmonotonic_runtime (void (*fn) (void *), void *data,
++ unsigned num_threads, long start,
++ long end, long incr, unsigned flags)
+ {
+ struct gomp_task_icv *icv = gomp_icv (false);
+ gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
+- icv->run_sched_var, icv->run_sched_chunk_size,
+- flags);
++ icv->run_sched_var & ~GFS_MONOTONIC,
++ icv->run_sched_chunk_size, flags);
+ fn (data);
+ GOMP_parallel_end ();
+ }
+
++void
++GOMP_parallel_loop_maybe_nonmonotonic_runtime (void (*fn) (void *), void *data,
++ unsigned num_threads, long start,
++ long end, long incr,
++ unsigned flags)
++{
++ struct gomp_task_icv *icv = gomp_icv (false);
++ gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
++ icv->run_sched_var & ~GFS_MONOTONIC,
++ icv->run_sched_chunk_size, flags);
++ fn (data);
++ GOMP_parallel_end ();
++}
++#endif
++
+ /* The GOMP_loop_end* routines are called after the thread is told that
+ all loop iterations are complete. The first two versions synchronize
+ all threads; the nowait version does not. */
+@@ -721,6 +970,10 @@ extern __typeof(gomp_loop_dynamic_start)
+ __attribute__((alias ("gomp_loop_dynamic_start")));
+ extern __typeof(gomp_loop_guided_start) GOMP_loop_nonmonotonic_guided_start
+ __attribute__((alias ("gomp_loop_guided_start")));
++extern __typeof(GOMP_loop_runtime_start) GOMP_loop_nonmonotonic_runtime_start
++ __attribute__((alias ("GOMP_loop_runtime_start")));
++extern __typeof(GOMP_loop_runtime_start) GOMP_loop_maybe_nonmonotonic_runtime_start
++ __attribute__((alias ("GOMP_loop_runtime_start")));
+
+ extern __typeof(gomp_loop_ordered_static_start) GOMP_loop_ordered_static_start
+ __attribute__((alias ("gomp_loop_ordered_static_start")));
+@@ -746,6 +999,10 @@ extern __typeof(gomp_loop_dynamic_next)
+ __attribute__((alias ("gomp_loop_dynamic_next")));
+ extern __typeof(gomp_loop_guided_next) GOMP_loop_nonmonotonic_guided_next
+ __attribute__((alias ("gomp_loop_guided_next")));
++extern __typeof(GOMP_loop_runtime_next) GOMP_loop_nonmonotonic_runtime_next
++ __attribute__((alias ("GOMP_loop_runtime_next")));
++extern __typeof(GOMP_loop_runtime_next) GOMP_loop_maybe_nonmonotonic_runtime_next
++ __attribute__((alias ("GOMP_loop_runtime_next")));
+
+ extern __typeof(gomp_loop_ordered_static_next) GOMP_loop_ordered_static_next
+ __attribute__((alias ("gomp_loop_ordered_static_next")));
+@@ -791,6 +1048,20 @@ GOMP_loop_nonmonotonic_guided_start (lon
+ }
+
+ bool
++GOMP_loop_nonmonotonic_runtime_start (long start, long end, long incr,
++ long *istart, long *iend)
++{
++ return GOMP_loop_runtime_start (start, end, incr, istart, iend);
++}
++
++bool
++GOMP_loop_maybe_nonmonotonic_runtime_start (long start, long end, long incr,
++ long *istart, long *iend)
++{
++ return GOMP_loop_runtime_start (start, end, incr, istart, iend);
++}
++
++bool
+ GOMP_loop_ordered_static_start (long start, long end, long incr,
+ long chunk_size, long *istart, long *iend)
+ {
+@@ -869,6 +1140,18 @@ GOMP_loop_nonmonotonic_guided_next (long
+ }
+
+ bool
++GOMP_loop_nonmonotonic_runtime_next (long *istart, long *iend)
++{
++ return GOMP_loop_runtime_next (istart, iend);
++}
++
++bool
++GOMP_loop_maybe_nonmonotonic_runtime_next (long *istart, long *iend)
++{
++ return GOMP_loop_runtime_next (istart, iend);
++}
++
++bool
+ GOMP_loop_ordered_static_next (long *istart, long *iend)
+ {
+ return gomp_loop_ordered_static_next (istart, iend);
+--- libgomp/oacc-plugin.c.jj 2018-04-25 09:40:31.322655307 +0200
++++ libgomp/oacc-plugin.c 2019-05-07 18:46:36.531109656 +0200
+@@ -49,3 +49,14 @@ GOMP_PLUGIN_acc_thread (void)
+ struct goacc_thread *thr = goacc_thread ();
+ return thr ? thr->target_tls : NULL;
+ }
++
++int
++GOMP_PLUGIN_acc_default_dim (unsigned int i)
++{
++ if (i >= GOMP_DIM_MAX)
++ {
++ gomp_fatal ("invalid dimension argument: %d", i);
++ return -1;
++ }
++ return goacc_default_dims[i];
++}
+--- libgomp/libgomp_g.h.jj 2018-04-25 09:40:31.320655306 +0200
++++ libgomp/libgomp_g.h 2019-05-07 18:46:36.513109943 +0200
+@@ -1,4 +1,4 @@
+-/* Copyright (C) 2005-2018 Free Software Foundation, Inc.
++/* Copyright (C) 2005-2019 Free Software Foundation, Inc.
+ Contributed by Richard Henderson .
+
+ This file is part of the GNU Offloading and Multi Processing Library
+@@ -31,6 +31,7 @@
+
+ #include
+ #include
++#include "gstdint.h"
+
+ /* barrier.c */
+
+@@ -56,6 +57,12 @@ extern bool GOMP_loop_nonmonotonic_dynam
+ long *, long *);
+ extern bool GOMP_loop_nonmonotonic_guided_start (long, long, long, long,
+ long *, long *);
++extern bool GOMP_loop_nonmonotonic_runtime_start (long, long, long,
++ long *, long *);
++extern bool GOMP_loop_maybe_nonmonotonic_runtime_start (long, long, long,
++ long *, long *);
++extern bool GOMP_loop_start (long, long, long, long, long, long *, long *,
++ uintptr_t *, void **);
+
+ extern bool GOMP_loop_ordered_static_start (long, long, long, long,
+ long *, long *);
+@@ -64,6 +71,8 @@ extern bool GOMP_loop_ordered_dynamic_st
+ extern bool GOMP_loop_ordered_guided_start (long, long, long, long,
+ long *, long *);
+ extern bool GOMP_loop_ordered_runtime_start (long, long, long, long *, long *);
++extern bool GOMP_loop_ordered_start (long, long, long, long, long, long *,
++ long *, uintptr_t *, void **);
+
+ extern bool GOMP_loop_static_next (long *, long *);
+ extern bool GOMP_loop_dynamic_next (long *, long *);
+@@ -71,6 +80,8 @@ extern bool GOMP_loop_guided_next (long
+ extern bool GOMP_loop_runtime_next (long *, long *);
+ extern bool GOMP_loop_nonmonotonic_dynamic_next (long *, long *);
+ extern bool GOMP_loop_nonmonotonic_guided_next (long *, long *);
++extern bool GOMP_loop_nonmonotonic_runtime_next (long *, long *);
++extern bool GOMP_loop_maybe_nonmonotonic_runtime_next (long *, long *);
+
+ extern bool GOMP_loop_ordered_static_next (long *, long *);
+ extern bool GOMP_loop_ordered_dynamic_next (long *, long *);
+@@ -85,6 +96,8 @@ extern bool GOMP_loop_doacross_guided_st
+ long *);
+ extern bool GOMP_loop_doacross_runtime_start (unsigned, long *, long *,
+ long *);
++extern bool GOMP_loop_doacross_start (unsigned, long *, long, long, long *,
++ long *, uintptr_t *, void **);
+
+ extern void GOMP_parallel_loop_static_start (void (*)(void *), void *,
+ unsigned, long, long, long, long);
+@@ -112,6 +125,13 @@ extern void GOMP_parallel_loop_nonmonoto
+ extern void GOMP_parallel_loop_nonmonotonic_guided (void (*)(void *), void *,
+ unsigned, long, long,
+ long, long, unsigned);
++extern void GOMP_parallel_loop_nonmonotonic_runtime (void (*)(void *), void *,
++ unsigned, long, long,
++ long, unsigned);
++extern void GOMP_parallel_loop_maybe_nonmonotonic_runtime (void (*)(void *),
++ void *, unsigned,
++ long, long,
++ long, unsigned);
+
+ extern void GOMP_loop_end (void);
+ extern void GOMP_loop_end_nowait (void);
+@@ -154,6 +174,21 @@ extern bool GOMP_loop_ull_nonmonotonic_g
+ unsigned long long,
+ unsigned long long *,
+ unsigned long long *);
++extern bool GOMP_loop_ull_nonmonotonic_runtime_start (bool, unsigned long long,
++ unsigned long long,
++ unsigned long long,
++ unsigned long long *,
++ unsigned long long *);
++extern bool GOMP_loop_ull_maybe_nonmonotonic_runtime_start (bool,
++ unsigned long long,
++ unsigned long long,
++ unsigned long long,
++ unsigned long long *,
++ unsigned long long *);
++extern bool GOMP_loop_ull_start (bool, unsigned long long, unsigned long long,
++ unsigned long long, long, unsigned long long,
++ unsigned long long *, unsigned long long *,
++ uintptr_t *, void **);
+
+ extern bool GOMP_loop_ull_ordered_static_start (bool, unsigned long long,
+ unsigned long long,
+@@ -178,6 +213,13 @@ extern bool GOMP_loop_ull_ordered_runtim
+ unsigned long long,
+ unsigned long long *,
+ unsigned long long *);
++extern bool GOMP_loop_ull_ordered_start (bool, unsigned long long,
++ unsigned long long,
++ unsigned long long, long,
++ unsigned long long,
++ unsigned long long *,
++ unsigned long long *,
++ uintptr_t *, void **);
+
+ extern bool GOMP_loop_ull_static_next (unsigned long long *,
+ unsigned long long *);
+@@ -191,6 +233,10 @@ extern bool GOMP_loop_ull_nonmonotonic_d
+ unsigned long long *);
+ extern bool GOMP_loop_ull_nonmonotonic_guided_next (unsigned long long *,
+ unsigned long long *);
++extern bool GOMP_loop_ull_nonmonotonic_runtime_next (unsigned long long *,
++ unsigned long long *);
++extern bool GOMP_loop_ull_maybe_nonmonotonic_runtime_next (unsigned long long *,
++ unsigned long long *);
+
+ extern bool GOMP_loop_ull_ordered_static_next (unsigned long long *,
+ unsigned long long *);
+@@ -220,6 +266,11 @@ extern bool GOMP_loop_ull_doacross_runti
+ unsigned long long *,
+ unsigned long long *,
+ unsigned long long *);
++extern bool GOMP_loop_ull_doacross_start (unsigned, unsigned long long *,
++ long, unsigned long long,
++ unsigned long long *,
++ unsigned long long *,
++ uintptr_t *, void **);
+
+ /* ordered.c */
+
+@@ -235,6 +286,8 @@ extern void GOMP_doacross_ull_wait (unsi
+ extern void GOMP_parallel_start (void (*) (void *), void *, unsigned);
+ extern void GOMP_parallel_end (void);
+ extern void GOMP_parallel (void (*) (void *), void *, unsigned, unsigned);
++extern unsigned GOMP_parallel_reductions (void (*) (void *), void *, unsigned,
++ unsigned);
+ extern bool GOMP_cancel (int, bool);
+ extern bool GOMP_cancellation_point (int);
+
+@@ -251,13 +304,19 @@ extern void GOMP_taskloop_ull (void (*)
+ unsigned long long, unsigned long long,
+ unsigned long long);
+ extern void GOMP_taskwait (void);
++extern void GOMP_taskwait_depend (void **);
+ extern void GOMP_taskyield (void);
+ extern void GOMP_taskgroup_start (void);
+ extern void GOMP_taskgroup_end (void);
++extern void GOMP_taskgroup_reduction_register (uintptr_t *);
++extern void GOMP_taskgroup_reduction_unregister (uintptr_t *);
++extern void GOMP_task_reduction_remap (size_t, size_t, void **);
++extern void GOMP_workshare_task_reduction_unregister (bool);
+
+ /* sections.c */
+
+ extern unsigned GOMP_sections_start (unsigned);
++extern unsigned GOMP_sections2_start (unsigned, uintptr_t *, void **);
+ extern unsigned GOMP_sections_next (void);
+ extern void GOMP_parallel_sections_start (void (*) (void *), void *,
+ unsigned, unsigned);
+@@ -293,6 +352,11 @@ extern void GOMP_target_enter_exit_data
+ void **);
+ extern void GOMP_teams (unsigned int, unsigned int);
+
++/* teams.c */
++
++extern void GOMP_teams_reg (void (*) (void *), void *, unsigned, unsigned,
++ unsigned);
++
+ /* oacc-parallel.c */
+
+ extern void GOACC_parallel_keyed (int, void (*) (void *), size_t,
+--- libgomp/affinity.c.jj 2018-04-25 09:40:31.913655581 +0200
++++ libgomp/affinity.c 2019-05-07 18:46:36.254114081 +0200
+@@ -26,6 +26,8 @@
+ /* This is a generic stub implementation of a CPU affinity setting. */
+
+ #include "libgomp.h"
++#include
++#include
+
+ void
+ gomp_init_affinity (void)
+@@ -138,5 +140,17 @@ gomp_get_place_proc_ids_8 (int place_num
+ (void) ids;
+ }
+
++void
++gomp_display_affinity_place (char *buffer, size_t size, size_t *ret,
++ int place)
++{
++ char buf[sizeof (long) * 3 + 4];
++ if (gomp_available_cpus > 1)
++ sprintf (buf, "0-%lu", gomp_available_cpus - 1);
++ else
++ strcpy (buf, "0");
++ gomp_display_string (buffer, size, ret, buf, strlen (buf));
++}
++
+ ialias(omp_get_place_num_procs)
+ ialias(omp_get_place_proc_ids)
+--- libgomp/sections.c.jj 2018-04-25 09:40:31.924655586 +0200
++++ libgomp/sections.c 2019-05-07 18:46:36.535109592 +0200
+@@ -26,8 +26,11 @@
+ /* This file handles the SECTIONS construct. */
+
+ #include "libgomp.h"
++#include
+
+
++ialias_redirect (GOMP_taskgroup_reduction_register)
++
+ /* Initialize the given work share construct from the given arguments. */
+
+ static inline void
+@@ -72,7 +75,7 @@ GOMP_sections_start (unsigned count)
+ struct gomp_thread *thr = gomp_thread ();
+ long s, e, ret;
+
+- if (gomp_work_share_start (false))
++ if (gomp_work_share_start (0))
+ {
+ gomp_sections_init (thr->ts.work_share, count);
+ gomp_work_share_init_done ();
+@@ -95,6 +98,66 @@ GOMP_sections_start (unsigned count)
+ return ret;
+ }
+
++unsigned
++GOMP_sections2_start (unsigned count, uintptr_t *reductions, void **mem)
++{
++ struct gomp_thread *thr = gomp_thread ();
++ long s, e, ret;
++
++ if (reductions)
++ gomp_workshare_taskgroup_start ();
++ if (gomp_work_share_start (0))
++ {
++ gomp_sections_init (thr->ts.work_share, count);
++ if (reductions)
++ {
++ GOMP_taskgroup_reduction_register (reductions);
++ thr->task->taskgroup->workshare = true;
++ thr->ts.work_share->task_reductions = reductions;
++ }
++ if (mem)
++ {
++ uintptr_t size = (uintptr_t) *mem;
++ if (size > (sizeof (struct gomp_work_share)
++ - offsetof (struct gomp_work_share,
++ inline_ordered_team_ids)))
++ thr->ts.work_share->ordered_team_ids
++ = gomp_malloc_cleared (size);
++ else
++ memset (thr->ts.work_share->ordered_team_ids, '\0', size);
++ *mem = (void *) thr->ts.work_share->ordered_team_ids;
++ }
++ gomp_work_share_init_done ();
++ }
++ else
++ {
++ if (reductions)
++ {
++ uintptr_t *first_reductions = thr->ts.work_share->task_reductions;
++ gomp_workshare_task_reduction_register (reductions,
++ first_reductions);
++ }
++ if (mem)
++ *mem = (void *) thr->ts.work_share->ordered_team_ids;
++ }
++
++#ifdef HAVE_SYNC_BUILTINS
++ if (gomp_iter_dynamic_next (&s, &e))
++ ret = s;
++ else
++ ret = 0;
++#else
++ gomp_mutex_lock (&thr->ts.work_share->lock);
++ if (gomp_iter_dynamic_next_locked (&s, &e))
++ ret = s;
++ else
++ ret = 0;
++ gomp_mutex_unlock (&thr->ts.work_share->lock);
++#endif
++
++ return ret;
++}
++
+ /* This routine is called when the thread completes processing of the
+ section currently assigned to it. If the work-share construct is
+ bound directly to a parallel construct, then the construct may have
+@@ -140,7 +203,7 @@ GOMP_parallel_sections_start (void (*fn)
+ num_threads = gomp_resolve_num_threads (num_threads, count);
+ team = gomp_new_team (num_threads);
+ gomp_sections_init (&team->work_shares[0], count);
+- gomp_team_start (fn, data, num_threads, 0, team);
++ gomp_team_start (fn, data, num_threads, 0, team, NULL);
+ }
+
+ ialias_redirect (GOMP_parallel_end)
+@@ -154,7 +217,7 @@ GOMP_parallel_sections (void (*fn) (void
+ num_threads = gomp_resolve_num_threads (num_threads, count);
+ team = gomp_new_team (num_threads);
+ gomp_sections_init (&team->work_shares[0], count);
+- gomp_team_start (fn, data, num_threads, flags, team);
++ gomp_team_start (fn, data, num_threads, flags, team, NULL);
+ fn (data);
+ GOMP_parallel_end ();
+ }
+--- libgomp/config/linux/affinity.c.jj 2018-04-25 09:40:31.875655563 +0200
++++ libgomp/config/linux/affinity.c 2019-05-07 18:46:36.344112642 +0200
+@@ -396,6 +396,56 @@ gomp_get_place_proc_ids_8 (int place_num
+ *ids++ = i;
+ }
+
++void
++gomp_display_affinity_place (char *buffer, size_t size, size_t *ret,
++ int place)
++{
++ cpu_set_t *cpusetp;
++ char buf[sizeof (long) * 3 + 4];
++ if (place >= 0 && place < gomp_places_list_len)
++ cpusetp = (cpu_set_t *) gomp_places_list[place];
++ else if (gomp_cpusetp)
++ cpusetp = gomp_cpusetp;
++ else
++ {
++ if (gomp_available_cpus > 1)
++ sprintf (buf, "0-%lu", gomp_available_cpus - 1);
++ else
++ strcpy (buf, "0");
++ gomp_display_string (buffer, size, ret, buf, strlen (buf));
++ return;
++ }
++
++ unsigned long i, max = 8 * gomp_cpuset_size, start;
++ bool prev_set = false;
++ start = max;
++ for (i = 0; i <= max; i++)
++ {
++ bool this_set;
++ if (i == max)
++ this_set = false;
++ else
++ this_set = CPU_ISSET_S (i, gomp_cpuset_size, cpusetp);
++ if (this_set != prev_set)
++ {
++ prev_set = this_set;
++ if (this_set)
++ {
++ char *p = buf;
++ if (start != max)
++ *p++ = ',';
++ sprintf (p, "%lu", i);
++ start = i;
++ }
++ else if (i == start + 1)
++ continue;
++ else
++ sprintf (buf, "-%lu", i - 1);
++ gomp_display_string (buffer, size, ret, buf, strlen (buf));
++ }
++ }
++}
++
+ ialias(omp_get_place_num_procs)
+ ialias(omp_get_place_proc_ids)
+
+--- libgomp/config/linux/ia64/futex.h.jj 2018-04-25 09:40:31.877655564 +0200
++++ libgomp/config/linux/ia64/futex.h 2019-05-07 18:46:36.344112642 +0200
+@@ -45,8 +45,8 @@ sys_futex0(int *addr, int op, int val)
+ "=r"(r8), "=r"(r10)
+ : "r"(r15), "r"(out0), "r"(out1), "r"(out2), "r"(out3)
+ : "memory", "out4", "out5", "out6", "out7",
+- /* Non-stacked integer registers, minus r8, r10, r15. */
+- "r2", "r3", "r9", "r11", "r12", "r13", "r14", "r16", "r17", "r18",
++ /* Non-stacked integer registers, minus r8, r10, r12, r15. */
++ "r2", "r3", "r9", "r11", "r13", "r14", "r16", "r17", "r18",
+ "r19", "r20", "r21", "r22", "r23", "r24", "r25", "r26", "r27",
+ "r28", "r29", "r30", "r31",
+ /* Predicate registers. */
+--- libgomp/config/nvptx/teams.c.jj 2019-05-07 18:46:36.459110805 +0200
++++ libgomp/config/nvptx/teams.c 2019-05-07 18:46:36.459110805 +0200
+@@ -0,0 +1,57 @@
++/* Copyright (C) 2015-2019 Free Software Foundation, Inc.
++ Contributed by Alexander Monakov
++
++ This file is part of the GNU Offloading and Multi Processing Library
++ (libgomp).
++
++ Libgomp is free software; you can redistribute it and/or modify it
++ under the terms of the GNU General Public License as published by
++ the Free Software Foundation; either version 3, or (at your option)
++ any later version.
++
++ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
++ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
++ FOR A PARTICULAR PURPOSE. See the GNU General Public License for
++ more details.
++
++ Under Section 7 of GPL version 3, you are granted additional
++ permissions described in the GCC Runtime Library Exception, version
++ 3.1, as published by the Free Software Foundation.
++
++ You should have received a copy of the GNU General Public License and
++ a copy of the GCC Runtime Library Exception along with this program;
++ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
++ . */
++
++/* This file defines OpenMP API entry points that accelerator targets are
++ expected to replace. */
++
++#include "libgomp.h"
++
++void
++GOMP_teams_reg (void (*fn) (void *), void *data, unsigned int num_teams,
++ unsigned int thread_limit, unsigned int flags)
++{
++ (void) fn;
++ (void) data;
++ (void) flags;
++ (void) num_teams;
++ (void) thread_limit;
++}
++
++int
++omp_get_num_teams (void)
++{
++ return gomp_num_teams_var + 1;
++}
++
++int
++omp_get_team_num (void)
++{
++ int ctaid;
++ asm ("mov.u32 %0, %%ctaid.x;" : "=r" (ctaid));
++ return ctaid;
++}
++
++ialias (omp_get_num_teams)
++ialias (omp_get_team_num)
+--- libgomp/config/nvptx/team.c.jj 2018-04-25 09:40:31.890655570 +0200
++++ libgomp/config/nvptx/team.c 2019-05-07 18:46:36.459110805 +0200
+@@ -116,7 +116,8 @@ gomp_thread_start (struct gomp_thread_po
+
+ void
+ gomp_team_start (void (*fn) (void *), void *data, unsigned nthreads,
+- unsigned flags, struct gomp_team *team)
++ unsigned flags, struct gomp_team *team,
++ struct gomp_taskgroup *taskgroup)
+ {
+ struct gomp_thread *thr, *nthr;
+ struct gomp_task *task;
+@@ -147,6 +148,7 @@ gomp_team_start (void (*fn) (void *), vo
+ nthreads_var = icv->nthreads_var;
+ gomp_init_task (thr->task, task, icv);
+ team->implicit_task[0].icv.nthreads_var = nthreads_var;
++ team->implicit_task[0].taskgroup = taskgroup;
+
+ if (nthreads == 1)
+ return;
+@@ -166,6 +168,7 @@ gomp_team_start (void (*fn) (void *), vo
+ nthr->task = &team->implicit_task[i];
+ gomp_init_task (nthr->task, task, icv);
+ team->implicit_task[i].icv.nthreads_var = nthreads_var;
++ team->implicit_task[i].taskgroup = taskgroup;
+ nthr->fn = fn;
+ nthr->data = data;
+ team->ordered_release[i] = &nthr->release;
+@@ -174,5 +177,11 @@ gomp_team_start (void (*fn) (void *), vo
+ gomp_simple_barrier_wait (&pool->threads_dock);
+ }
+
++int
++gomp_pause_host (void)
++{
++ return -1;
++}
++
+ #include "../../team.c"
+ #endif
+--- libgomp/config/nvptx/oacc-parallel.c.jj 2018-04-25 09:40:31.887655569 +0200
++++ libgomp/config/nvptx/oacc-parallel.c 2019-05-07 18:46:36.453110901 +0200
+@@ -1,358 +0,0 @@
+-/* OpenACC constructs
+-
+- Copyright (C) 2014-2018 Free Software Foundation, Inc.
+-
+- Contributed by Mentor Embedded.
+-
+- This file is part of the GNU Offloading and Multi Processing Library
+- (libgomp).
+-
+- Libgomp is free software; you can redistribute it and/or modify it
+- under the terms of the GNU General Public License as published by
+- the Free Software Foundation; either version 3, or (at your option)
+- any later version.
+-
+- Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
+- WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+- FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+- more details.
+-
+- Under Section 7 of GPL version 3, you are granted additional
+- permissions described in the GCC Runtime Library Exception, version
+- 3.1, as published by the Free Software Foundation.
+-
+- You should have received a copy of the GNU General Public License and
+- a copy of the GCC Runtime Library Exception along with this program;
+- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+- . */
+-
+-#include "libgomp_g.h"
+-
+-__asm__ (".visible .func (.param .u32 %out_retval) GOACC_tid (.param .u32 %in_ar1);\n"
+- ".visible .func (.param .u32 %out_retval) GOACC_ntid (.param .u32 %in_ar1);\n"
+- ".visible .func (.param .u32 %out_retval) GOACC_ctaid (.param .u32 %in_ar1);\n"
+- ".visible .func (.param .u32 %out_retval) GOACC_nctaid (.param .u32 %in_ar1);\n"
+- "// BEGIN GLOBAL FUNCTION DECL: GOACC_get_num_threads\n"
+- ".visible .func (.param .u32 %out_retval) GOACC_get_num_threads;\n"
+- "// BEGIN GLOBAL FUNCTION DECL: GOACC_get_thread_num\n"
+- ".visible .func (.param .u32 %out_retval) GOACC_get_thread_num;\n"
+- "// BEGIN GLOBAL FUNCTION DECL: abort\n"
+- ".extern .func abort;\n"
+- ".visible .func (.param .u32 %out_retval) GOACC_tid (.param .u32 %in_ar1)\n"
+- "{\n"
+- ".reg .u32 %ar1;\n"
+- ".reg .u32 %retval;\n"
+- ".reg .u64 %hr10;\n"
+- ".reg .u32 %r22;\n"
+- ".reg .u32 %r23;\n"
+- ".reg .u32 %r24;\n"
+- ".reg .u32 %r25;\n"
+- ".reg .u32 %r26;\n"
+- ".reg .u32 %r27;\n"
+- ".reg .u32 %r28;\n"
+- ".reg .u32 %r29;\n"
+- ".reg .pred %r30;\n"
+- ".reg .u32 %r31;\n"
+- ".reg .pred %r32;\n"
+- ".reg .u32 %r33;\n"
+- ".reg .pred %r34;\n"
+- ".local .align 8 .b8 %frame[4];\n"
+- "ld.param.u32 %ar1,[%in_ar1];\n"
+- "mov.u32 %r27,%ar1;\n"
+- "st.local.u32 [%frame],%r27;\n"
+- "ld.local.u32 %r28,[%frame];\n"
+- "mov.u32 %r29,1;\n"
+- "setp.eq.u32 %r30,%r28,%r29;\n"
+- "@%r30 bra $L4;\n"
+- "mov.u32 %r31,2;\n"
+- "setp.eq.u32 %r32,%r28,%r31;\n"
+- "@%r32 bra $L5;\n"
+- "mov.u32 %r33,0;\n"
+- "setp.eq.u32 %r34,%r28,%r33;\n"
+- "@!%r34 bra $L8;\n"
+- "mov.u32 %r23,%tid.x;\n"
+- "mov.u32 %r22,%r23;\n"
+- "bra $L7;\n"
+- "$L4:\n"
+- "mov.u32 %r24,%tid.y;\n"
+- "mov.u32 %r22,%r24;\n"
+- "bra $L7;\n"
+- "$L5:\n"
+- "mov.u32 %r25,%tid.z;\n"
+- "mov.u32 %r22,%r25;\n"
+- "bra $L7;\n"
+- "$L8:\n"
+- "{\n"
+- "{\n"
+- "call abort;\n"
+- "}\n"
+- "}\n"
+- "$L7:\n"
+- "mov.u32 %r26,%r22;\n"
+- "mov.u32 %retval,%r26;\n"
+- "st.param.u32 [%out_retval],%retval;\n"
+- "ret;\n"
+- "}\n"
+- ".visible .func (.param .u32 %out_retval) GOACC_ntid (.param .u32 %in_ar1)\n"
+- "{\n"
+- ".reg .u32 %ar1;\n"
+- ".reg .u32 %retval;\n"
+- ".reg .u64 %hr10;\n"
+- ".reg .u32 %r22;\n"
+- ".reg .u32 %r23;\n"
+- ".reg .u32 %r24;\n"
+- ".reg .u32 %r25;\n"
+- ".reg .u32 %r26;\n"
+- ".reg .u32 %r27;\n"
+- ".reg .u32 %r28;\n"
+- ".reg .u32 %r29;\n"
+- ".reg .pred %r30;\n"
+- ".reg .u32 %r31;\n"
+- ".reg .pred %r32;\n"
+- ".reg .u32 %r33;\n"
+- ".reg .pred %r34;\n"
+- ".local .align 8 .b8 %frame[4];\n"
+- "ld.param.u32 %ar1,[%in_ar1];\n"
+- "mov.u32 %r27,%ar1;\n"
+- "st.local.u32 [%frame],%r27;\n"
+- "ld.local.u32 %r28,[%frame];\n"
+- "mov.u32 %r29,1;\n"
+- "setp.eq.u32 %r30,%r28,%r29;\n"
+- "@%r30 bra $L11;\n"
+- "mov.u32 %r31,2;\n"
+- "setp.eq.u32 %r32,%r28,%r31;\n"
+- "@%r32 bra $L12;\n"
+- "mov.u32 %r33,0;\n"
+- "setp.eq.u32 %r34,%r28,%r33;\n"
+- "@!%r34 bra $L15;\n"
+- "mov.u32 %r23,%ntid.x;\n"
+- "mov.u32 %r22,%r23;\n"
+- "bra $L14;\n"
+- "$L11:\n"
+- "mov.u32 %r24,%ntid.y;\n"
+- "mov.u32 %r22,%r24;\n"
+- "bra $L14;\n"
+- "$L12:\n"
+- "mov.u32 %r25,%ntid.z;\n"
+- "mov.u32 %r22,%r25;\n"
+- "bra $L14;\n"
+- "$L15:\n"
+- "{\n"
+- "{\n"
+- "call abort;\n"
+- "}\n"
+- "}\n"
+- "$L14:\n"
+- "mov.u32 %r26,%r22;\n"
+- "mov.u32 %retval,%r26;\n"
+- "st.param.u32 [%out_retval],%retval;\n"
+- "ret;\n"
+- "}\n"
+- ".visible .func (.param .u32 %out_retval) GOACC_ctaid (.param .u32 %in_ar1)\n"
+- "{\n"
+- ".reg .u32 %ar1;\n"
+- ".reg .u32 %retval;\n"
+- ".reg .u64 %hr10;\n"
+- ".reg .u32 %r22;\n"
+- ".reg .u32 %r23;\n"
+- ".reg .u32 %r24;\n"
+- ".reg .u32 %r25;\n"
+- ".reg .u32 %r26;\n"
+- ".reg .u32 %r27;\n"
+- ".reg .u32 %r28;\n"
+- ".reg .u32 %r29;\n"
+- ".reg .pred %r30;\n"
+- ".reg .u32 %r31;\n"
+- ".reg .pred %r32;\n"
+- ".reg .u32 %r33;\n"
+- ".reg .pred %r34;\n"
+- ".local .align 8 .b8 %frame[4];\n"
+- "ld.param.u32 %ar1,[%in_ar1];\n"
+- "mov.u32 %r27,%ar1;\n"
+- "st.local.u32 [%frame],%r27;\n"
+- "ld.local.u32 %r28,[%frame];\n"
+- "mov.u32 %r29,1;\n"
+- "setp.eq.u32 %r30,%r28,%r29;\n"
+- "@%r30 bra $L18;\n"
+- "mov.u32 %r31,2;\n"
+- "setp.eq.u32 %r32,%r28,%r31;\n"
+- "@%r32 bra $L19;\n"
+- "mov.u32 %r33,0;\n"
+- "setp.eq.u32 %r34,%r28,%r33;\n"
+- "@!%r34 bra $L22;\n"
+- "mov.u32 %r23,%ctaid.x;\n"
+- "mov.u32 %r22,%r23;\n"
+- "bra $L21;\n"
+- "$L18:\n"
+- "mov.u32 %r24,%ctaid.y;\n"
+- "mov.u32 %r22,%r24;\n"
+- "bra $L21;\n"
+- "$L19:\n"
+- "mov.u32 %r25,%ctaid.z;\n"
+- "mov.u32 %r22,%r25;\n"
+- "bra $L21;\n"
+- "$L22:\n"
+- "{\n"
+- "{\n"
+- "call abort;\n"
+- "}\n"
+- "}\n"
+- "$L21:\n"
+- "mov.u32 %r26,%r22;\n"
+- "mov.u32 %retval,%r26;\n"
+- "st.param.u32 [%out_retval],%retval;\n"
+- "ret;\n"
+- "}\n"
+- ".visible .func (.param .u32 %out_retval) GOACC_nctaid (.param .u32 %in_ar1)\n"
+- "{\n"
+- ".reg .u32 %ar1;\n"
+- ".reg .u32 %retval;\n"
+- ".reg .u64 %hr10;\n"
+- ".reg .u32 %r22;\n"
+- ".reg .u32 %r23;\n"
+- ".reg .u32 %r24;\n"
+- ".reg .u32 %r25;\n"
+- ".reg .u32 %r26;\n"
+- ".reg .u32 %r27;\n"
+- ".reg .u32 %r28;\n"
+- ".reg .u32 %r29;\n"
+- ".reg .pred %r30;\n"
+- ".reg .u32 %r31;\n"
+- ".reg .pred %r32;\n"
+- ".reg .u32 %r33;\n"
+- ".reg .pred %r34;\n"
+- ".local .align 8 .b8 %frame[4];\n"
+- "ld.param.u32 %ar1,[%in_ar1];\n"
+- "mov.u32 %r27,%ar1;\n"
+- "st.local.u32 [%frame],%r27;\n"
+- "ld.local.u32 %r28,[%frame];\n"
+- "mov.u32 %r29,1;\n"
+- "setp.eq.u32 %r30,%r28,%r29;\n"
+- "@%r30 bra $L25;\n"
+- "mov.u32 %r31,2;\n"
+- "setp.eq.u32 %r32,%r28,%r31;\n"
+- "@%r32 bra $L26;\n"
+- "mov.u32 %r33,0;\n"
+- "setp.eq.u32 %r34,%r28,%r33;\n"
+- "@!%r34 bra $L29;\n"
+- "mov.u32 %r23,%nctaid.x;\n"
+- "mov.u32 %r22,%r23;\n"
+- "bra $L28;\n"
+- "$L25:\n"
+- "mov.u32 %r24,%nctaid.y;\n"
+- "mov.u32 %r22,%r24;\n"
+- "bra $L28;\n"
+- "$L26:\n"
+- "mov.u32 %r25,%nctaid.z;\n"
+- "mov.u32 %r22,%r25;\n"
+- "bra $L28;\n"
+- "$L29:\n"
+- "{\n"
+- "{\n"
+- "call abort;\n"
+- "}\n"
+- "}\n"
+- "$L28:\n"
+- "mov.u32 %r26,%r22;\n"
+- "mov.u32 %retval,%r26;\n"
+- "st.param.u32 [%out_retval],%retval;\n"
+- "ret;\n"
+- "}\n"
+- "// BEGIN GLOBAL FUNCTION DEF: GOACC_get_num_threads\n"
+- ".visible .func (.param .u32 %out_retval) GOACC_get_num_threads\n"
+- "{\n"
+- ".reg .u32 %retval;\n"
+- ".reg .u64 %hr10;\n"
+- ".reg .u32 %r22;\n"
+- ".reg .u32 %r23;\n"
+- ".reg .u32 %r24;\n"
+- ".reg .u32 %r25;\n"
+- ".reg .u32 %r26;\n"
+- ".reg .u32 %r27;\n"
+- ".reg .u32 %r28;\n"
+- ".reg .u32 %r29;\n"
+- "mov.u32 %r26,0;\n"
+- "{\n"
+- ".param .u32 %retval_in;\n"
+- "{\n"
+- ".param .u32 %out_arg0;\n"
+- "st.param.u32 [%out_arg0],%r26;\n"
+- "call (%retval_in),GOACC_ntid,(%out_arg0);\n"
+- "}\n"
+- "ld.param.u32 %r27,[%retval_in];\n"
+- "}\n"
+- "mov.u32 %r22,%r27;\n"
+- "mov.u32 %r28,0;\n"
+- "{\n"
+- ".param .u32 %retval_in;\n"
+- "{\n"
+- ".param .u32 %out_arg0;\n"
+- "st.param.u32 [%out_arg0],%r28;\n"
+- "call (%retval_in),GOACC_nctaid,(%out_arg0);\n"
+- "}\n"
+- "ld.param.u32 %r29,[%retval_in];\n"
+- "}\n"
+- "mov.u32 %r23,%r29;\n"
+- "mul.lo.u32 %r24,%r22,%r23;\n"
+- "mov.u32 %r25,%r24;\n"
+- "mov.u32 %retval,%r25;\n"
+- "st.param.u32 [%out_retval],%retval;\n"
+- "ret;\n"
+- "}\n"
+- "// BEGIN GLOBAL FUNCTION DEF: GOACC_get_thread_num\n"
+- ".visible .func (.param .u32 %out_retval) GOACC_get_thread_num\n"
+- "{\n"
+- ".reg .u32 %retval;\n"
+- ".reg .u64 %hr10;\n"
+- ".reg .u32 %r22;\n"
+- ".reg .u32 %r23;\n"
+- ".reg .u32 %r24;\n"
+- ".reg .u32 %r25;\n"
+- ".reg .u32 %r26;\n"
+- ".reg .u32 %r27;\n"
+- ".reg .u32 %r28;\n"
+- ".reg .u32 %r29;\n"
+- ".reg .u32 %r30;\n"
+- ".reg .u32 %r31;\n"
+- ".reg .u32 %r32;\n"
+- ".reg .u32 %r33;\n"
+- "mov.u32 %r28,0;\n"
+- "{\n"
+- ".param .u32 %retval_in;\n"
+- "{\n"
+- ".param .u32 %out_arg0;\n"
+- "st.param.u32 [%out_arg0],%r28;\n"
+- "call (%retval_in),GOACC_ntid,(%out_arg0);\n"
+- "}\n"
+- "ld.param.u32 %r29,[%retval_in];\n"
+- "}\n"
+- "mov.u32 %r22,%r29;\n"
+- "mov.u32 %r30,0;\n"
+- "{\n"
+- ".param .u32 %retval_in;\n"
+- "{\n"
+- ".param .u32 %out_arg0;\n"
+- "st.param.u32 [%out_arg0],%r30;\n"
+- "call (%retval_in),GOACC_ctaid,(%out_arg0);\n"
+- "}\n"
+- "ld.param.u32 %r31,[%retval_in];\n"
+- "}\n"
+- "mov.u32 %r23,%r31;\n"
+- "mul.lo.u32 %r24,%r22,%r23;\n"
+- "mov.u32 %r32,0;\n"
+- "{\n"
+- ".param .u32 %retval_in;\n"
+- "{\n"
+- ".param .u32 %out_arg0;\n"
+- "st.param.u32 [%out_arg0],%r32;\n"
+- "call (%retval_in),GOACC_tid,(%out_arg0);\n"
+- "}\n"
+- "ld.param.u32 %r33,[%retval_in];\n"
+- "}\n"
+- "mov.u32 %r25,%r33;\n"
+- "add.u32 %r26,%r24,%r25;\n"
+- "mov.u32 %r27,%r26;\n"
+- "mov.u32 %retval,%r27;\n"
+- "st.param.u32 [%out_retval],%retval;\n"
+- "ret;\n"
+- "}\n");
+--- libgomp/config/nvptx/target.c.jj 2018-04-25 09:40:31.890655570 +0200
++++ libgomp/config/nvptx/target.c 2019-05-07 18:46:36.453110901 +0200
+@@ -47,3 +47,21 @@ GOMP_teams (unsigned int num_teams, unsi
+ }
+ gomp_num_teams_var = num_teams - 1;
+ }
++
++int
++omp_pause_resource (omp_pause_resource_t kind, int device_num)
++{
++ (void) kind;
++ (void) device_num;
++ return -1;
++}
++
++int
++omp_pause_resource_all (omp_pause_resource_t kind)
++{
++ (void) kind;
++ return -1;
++}
++
++ialias (omp_pause_resource)
++ialias (omp_pause_resource_all)
+--- libgomp/config/nvptx/icv-device.c.jj 2018-04-25 09:40:31.889655570 +0200
++++ libgomp/config/nvptx/icv-device.c 2019-05-07 18:46:36.453110901 +0200
+@@ -46,20 +46,6 @@ omp_get_num_devices (void)
+ }
+
+ int
+-omp_get_num_teams (void)
+-{
+- return gomp_num_teams_var + 1;
+-}
+-
+-int
+-omp_get_team_num (void)
+-{
+- int ctaid;
+- asm ("mov.u32 %0, %%ctaid.x;" : "=r" (ctaid));
+- return ctaid;
+-}
+-
+-int
+ omp_is_initial_device (void)
+ {
+ /* NVPTX is an accelerator-only target. */
+@@ -69,6 +55,4 @@ omp_is_initial_device (void)
+ ialias (omp_set_default_device)
+ ialias (omp_get_default_device)
+ ialias (omp_get_num_devices)
+-ialias (omp_get_num_teams)
+-ialias (omp_get_team_num)
+ ialias (omp_is_initial_device)
+--- libgomp/config/nvptx/affinity-fmt.c.jj 2019-05-07 18:46:36.358112419 +0200
++++ libgomp/config/nvptx/affinity-fmt.c 2019-05-07 18:46:36.358112419 +0200
+@@ -0,0 +1,51 @@
++/* Copyright (C) 2018-2019 Free Software Foundation, Inc.
++
++ This file is part of the GNU Offloading and Multi Processing Library
++ (libgomp).
++
++ Libgomp is free software; you can redistribute it and/or modify it
++ under the terms of the GNU General Public License as published by
++ the Free Software Foundation; either version 3, or (at your option)
++ any later version.
++
++ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
++ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
++ FOR A PARTICULAR PURPOSE. See the GNU General Public License for
++ more details.
++
++ Under Section 7 of GPL version 3, you are granted additional
++ permissions described in the GCC Runtime Library Exception, version
++ 3.1, as published by the Free Software Foundation.
++
++ You should have received a copy of the GNU General Public License and
++ a copy of the GCC Runtime Library Exception along with this program;
++ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
++ . */
++
++#include "libgomp.h"
++#include
++#include
++#include
++#ifdef HAVE_UNISTD_H
++#include
++#endif
++#ifdef HAVE_INTTYPES_H
++# include /* For PRIx64. */
++#endif
++#ifdef HAVE_UNAME
++#include
++#endif
++
++/* The HAVE_GETPID and HAVE_GETHOSTNAME configure tests are passing for nvptx,
++ while the nvptx newlib implementation does not support those functions.
++ Override the configure test results here. */
++#undef HAVE_GETPID
++#undef HAVE_GETHOSTNAME
++
++/* The nvptx newlib implementation does not support fwrite, but it does support
++ write. Map fwrite to write. */
++#undef fwrite
++#define fwrite(ptr, size, nmemb, stream) write (1, (ptr), (nmemb) * (size))
++
++#include "../../affinity-fmt.c"
++
+--- libgomp/config/mingw32/affinity-fmt.c.jj 2019-05-07 18:46:36.344112642 +0200
++++ libgomp/config/mingw32/affinity-fmt.c 2019-05-07 18:46:36.344112642 +0200
+@@ -0,0 +1,68 @@
++/* Copyright (C) 2018-2019 Free Software Foundation, Inc.
++ Contributed by Jakub Jelinek .
++
++ This file is part of the GNU Offloading and Multi Processing Library
++ (libgomp).
++
++ Libgomp is free software; you can redistribute it and/or modify it
++ under the terms of the GNU General Public License as published by
++ the Free Software Foundation; either version 3, or (at your option)
++ any later version.
++
++ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
++ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
++ FOR A PARTICULAR PURPOSE. See the GNU General Public License for
++ more details.
++
++ Under Section 7 of GPL version 3, you are granted additional
++ permissions described in the GCC Runtime Library Exception, version
++ 3.1, as published by the Free Software Foundation.
++
++ You should have received a copy of the GNU General Public License and
++ a copy of the GCC Runtime Library Exception along with this program;
++ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
++ . */
++
++#include "libgomp.h"
++#include
++#include
++#include
++#ifdef HAVE_UNISTD_H
++#include
++#endif
++#ifdef HAVE_INTTYPES_H
++# include /* For PRIx64. */
++#endif
++#define WIN32_LEAN_AND_MEAN
++#include
++#include
++
++static int
++gomp_gethostname (char *name, size_t len)
++{
++ /* On Win9x GetComputerName fails if the input size is less
++ than MAX_COMPUTERNAME_LENGTH + 1. */
++ char buffer[MAX_COMPUTERNAME_LENGTH + 1];
++ DWORD size = sizeof (buffer);
++ int ret = 0;
++
++ if (!GetComputerName (buffer, &size))
++ return -1;
++
++ if ((size = strlen (buffer) + 1) > len)
++ {
++ errno = EINVAL;
++ /* Truncate as per POSIX spec. We do not NUL-terminate. */
++ size = len;
++ ret = -1;
++ }
++ memcpy (name, buffer, (size_t) size);
++
++ return ret;
++}
++
++#undef gethostname
++#define gethostname gomp_gethostname
++#define HAVE_GETHOSTNAME 1
++
++#include "../../affinity-fmt.c"
+--- libgomp/config/rtems/bar.c.jj 2018-04-25 09:40:31.902655576 +0200
++++ libgomp/config/rtems/bar.c 2019-05-07 18:46:36.460110789 +0200
+@@ -72,184 +72,5 @@ do_wait (int *addr, int val)
+ futex_wait (addr, val);
+ }
+
+-/* Everything below this point should be identical to the Linux
+- implementation. */
+-
+-void
+-gomp_barrier_wait_end (gomp_barrier_t *bar, gomp_barrier_state_t state)
+-{
+- if (__builtin_expect (state & BAR_WAS_LAST, 0))
+- {
+- /* Next time we'll be awaiting TOTAL threads again. */
+- bar->awaited = bar->total;
+- __atomic_store_n (&bar->generation, bar->generation + BAR_INCR,
+- MEMMODEL_RELEASE);
+- futex_wake ((int *) &bar->generation, INT_MAX);
+- }
+- else
+- {
+- do
+- do_wait ((int *) &bar->generation, state);
+- while (__atomic_load_n (&bar->generation, MEMMODEL_ACQUIRE) == state);
+- }
+-}
+-
+-void
+-gomp_barrier_wait (gomp_barrier_t *bar)
+-{
+- gomp_barrier_wait_end (bar, gomp_barrier_wait_start (bar));
+-}
+-
+-/* Like gomp_barrier_wait, except that if the encountering thread
+- is not the last one to hit the barrier, it returns immediately.
+- The intended usage is that a thread which intends to gomp_barrier_destroy
+- this barrier calls gomp_barrier_wait, while all other threads
+- call gomp_barrier_wait_last. When gomp_barrier_wait returns,
+- the barrier can be safely destroyed. */
+-
+-void
+-gomp_barrier_wait_last (gomp_barrier_t *bar)
+-{
+- gomp_barrier_state_t state = gomp_barrier_wait_start (bar);
+- if (state & BAR_WAS_LAST)
+- gomp_barrier_wait_end (bar, state);
+-}
+-
+-void
+-gomp_team_barrier_wake (gomp_barrier_t *bar, int count)
+-{
+- futex_wake ((int *) &bar->generation, count == 0 ? INT_MAX : count);
+-}
+-
+-void
+-gomp_team_barrier_wait_end (gomp_barrier_t *bar, gomp_barrier_state_t state)
+-{
+- unsigned int generation, gen;
+-
+- if (__builtin_expect (state & BAR_WAS_LAST, 0))
+- {
+- /* Next time we'll be awaiting TOTAL threads again. */
+- struct gomp_thread *thr = gomp_thread ();
+- struct gomp_team *team = thr->ts.team;
+-
+- bar->awaited = bar->total;
+- team->work_share_cancelled = 0;
+- if (__builtin_expect (team->task_count, 0))
+- {
+- gomp_barrier_handle_tasks (state);
+- state &= ~BAR_WAS_LAST;
+- }
+- else
+- {
+- state &= ~BAR_CANCELLED;
+- state += BAR_INCR - BAR_WAS_LAST;
+- __atomic_store_n (&bar->generation, state, MEMMODEL_RELEASE);
+- futex_wake ((int *) &bar->generation, INT_MAX);
+- return;
+- }
+- }
+-
+- generation = state;
+- state &= ~BAR_CANCELLED;
+- do
+- {
+- do_wait ((int *) &bar->generation, generation);
+- gen = __atomic_load_n (&bar->generation, MEMMODEL_ACQUIRE);
+- if (__builtin_expect (gen & BAR_TASK_PENDING, 0))
+- {
+- gomp_barrier_handle_tasks (state);
+- gen = __atomic_load_n (&bar->generation, MEMMODEL_ACQUIRE);
+- }
+- generation |= gen & BAR_WAITING_FOR_TASK;
+- }
+- while (gen != state + BAR_INCR);
+-}
+-
+-void
+-gomp_team_barrier_wait (gomp_barrier_t *bar)
+-{
+- gomp_team_barrier_wait_end (bar, gomp_barrier_wait_start (bar));
+-}
+-
+-void
+-gomp_team_barrier_wait_final (gomp_barrier_t *bar)
+-{
+- gomp_barrier_state_t state = gomp_barrier_wait_final_start (bar);
+- if (__builtin_expect (state & BAR_WAS_LAST, 0))
+- bar->awaited_final = bar->total;
+- gomp_team_barrier_wait_end (bar, state);
+-}
+-
+-bool
+-gomp_team_barrier_wait_cancel_end (gomp_barrier_t *bar,
+- gomp_barrier_state_t state)
+-{
+- unsigned int generation, gen;
+-
+- if (__builtin_expect (state & BAR_WAS_LAST, 0))
+- {
+- /* Next time we'll be awaiting TOTAL threads again. */
+- /* BAR_CANCELLED should never be set in state here, because
+- cancellation means that at least one of the threads has been
+- cancelled, thus on a cancellable barrier we should never see
+- all threads to arrive. */
+- struct gomp_thread *thr = gomp_thread ();
+- struct gomp_team *team = thr->ts.team;
+-
+- bar->awaited = bar->total;
+- team->work_share_cancelled = 0;
+- if (__builtin_expect (team->task_count, 0))
+- {
+- gomp_barrier_handle_tasks (state);
+- state &= ~BAR_WAS_LAST;
+- }
+- else
+- {
+- state += BAR_INCR - BAR_WAS_LAST;
+- __atomic_store_n (&bar->generation, state, MEMMODEL_RELEASE);
+- futex_wake ((int *) &bar->generation, INT_MAX);
+- return false;
+- }
+- }
+-
+- if (__builtin_expect (state & BAR_CANCELLED, 0))
+- return true;
+-
+- generation = state;
+- do
+- {
+- do_wait ((int *) &bar->generation, generation);
+- gen = __atomic_load_n (&bar->generation, MEMMODEL_ACQUIRE);
+- if (__builtin_expect (gen & BAR_CANCELLED, 0))
+- return true;
+- if (__builtin_expect (gen & BAR_TASK_PENDING, 0))
+- {
+- gomp_barrier_handle_tasks (state);
+- gen = __atomic_load_n (&bar->generation, MEMMODEL_ACQUIRE);
+- }
+- generation |= gen & BAR_WAITING_FOR_TASK;
+- }
+- while (gen != state + BAR_INCR);
+-
+- return false;
+-}
+-
+-bool
+-gomp_team_barrier_wait_cancel (gomp_barrier_t *bar)
+-{
+- return gomp_team_barrier_wait_cancel_end (bar, gomp_barrier_wait_start (bar));
+-}
+-
+-void
+-gomp_team_barrier_cancel (struct gomp_team *team)
+-{
+- gomp_mutex_lock (&team->task_lock);
+- if (team->barrier.generation & BAR_CANCELLED)
+- {
+- gomp_mutex_unlock (&team->task_lock);
+- return;
+- }
+- team->barrier.generation |= BAR_CANCELLED;
+- gomp_mutex_unlock (&team->task_lock);
+- futex_wake ((int *) &team->barrier.generation, INT_MAX);
+-}
++#define GOMP_WAIT_H 1
++#include "../linux/bar.c"
+--- libgomp/config/rtems/affinity-fmt.c.jj 2019-05-07 18:46:36.459110805 +0200
++++ libgomp/config/rtems/affinity-fmt.c 2019-05-07 18:46:36.459110805 +0200
+@@ -0,0 +1,49 @@
++/* Copyright (C) 2018-2019 Free Software Foundation, Inc.
++
++ This file is part of the GNU Offloading and Multi Processing Library
++ (libgomp).
++
++ Libgomp is free software; you can redistribute it and/or modify it
++ under the terms of the GNU General Public License as published by
++ the Free Software Foundation; either version 3, or (at your option)
++ any later version.
++
++ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
++ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
++ FOR A PARTICULAR PURPOSE. See the GNU General Public License for
++ more details.
++
++ Under Section 7 of GPL version 3, you are granted additional
++ permissions described in the GCC Runtime Library Exception, version
++ 3.1, as published by the Free Software Foundation.
++
++ You should have received a copy of the GNU General Public License and
++ a copy of the GCC Runtime Library Exception along with this program;
++ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
++ . */
++
++#include "libgomp.h"
++#include
++#include
++#include
++#ifdef HAVE_UNISTD_H
++#include
++#endif
++#ifdef HAVE_INTTYPES_H
++# include /* For PRIx64. */
++#endif
++#ifdef HAVE_UNAME
++#include
++#endif
++
++/* The HAVE_GETPID and HAVE_GETHOSTNAME configure tests are passing for RTEMS,
++ but the extra information they give are of little value for the user.
++ Override the configure test results here. */
++#undef HAVE_GETPID
++#undef HAVE_GETHOSTNAME
++
++/* Avoid the complex fwrite() in favour of the simple write(). */
++#undef fwrite
++#define fwrite(ptr, size, nmemb, stream) write (1, (ptr), (nmemb) * (size))
++
++#include "../../affinity-fmt.c"
+--- libgomp/config.h.in.jj 2018-04-25 09:40:31.870655561 +0200
++++ libgomp/config.h.in 2019-05-07 18:46:36.465110710 +0200
+@@ -1,5 +1,8 @@
+ /* config.h.in. Generated from configure.ac by autoheader. */
+
++/* Define to 1 if you have the `aligned_alloc' function. */
++#undef HAVE_ALIGNED_ALLOC
++
+ /* Define to 1 if the target assembler supports .symver directive. */
+ #undef HAVE_AS_SYMVER_DIRECTIVE
+
+@@ -33,9 +36,15 @@
+ /* Define to 1 if you have the `getgid' function. */
+ #undef HAVE_GETGID
+
++/* Define if gethostname is supported. */
++#undef HAVE_GETHOSTNAME
++
+ /* Define to 1 if you have the `getloadavg' function. */
+ #undef HAVE_GETLOADAVG
+
++/* Define if getpid is supported. */
++#undef HAVE_GETPID
++
+ /* Define to 1 if you have the `getuid' function. */
+ #undef HAVE_GETUID
+
+@@ -45,9 +54,15 @@
+ /* Define to 1 if you have the `dl' library (-ldl). */
+ #undef HAVE_LIBDL
+
++/* Define to 1 if you have the `memalign' function. */
++#undef HAVE_MEMALIGN
++
+ /* Define to 1 if you have the header file. */
+ #undef HAVE_MEMORY_H
+
++/* Define to 1 if you have the `posix_memalign' function. */
++#undef HAVE_POSIX_MEMALIGN
++
+ /* Define if pthread_{,attr_}{g,s}etaffinity_np is supported. */
+ #undef HAVE_PTHREAD_AFFINITY_NP
+
+@@ -103,9 +118,15 @@
+ /* Define to 1 if the target supports thread-local storage. */
+ #undef HAVE_TLS
+
++/* Define if uname is supported and struct utsname has nodename field. */
++#undef HAVE_UNAME
++
+ /* Define to 1 if you have the header file. */
+ #undef HAVE_UNISTD_H
+
++/* Define to 1 if you have the `_aligned_malloc' function. */
++#undef HAVE__ALIGNED_MALLOC
++
+ /* Define to 1 if you have the `__secure_getenv' function. */
+ #undef HAVE___SECURE_GETENV
+
+@@ -125,8 +146,8 @@
+ */
+ #undef LT_OBJDIR
+
+-/* Define to offload targets, separated by commas. */
+-#undef OFFLOAD_TARGETS
++/* Define to offload plugins, separated by commas. */
++#undef OFFLOAD_PLUGINS
+
+ /* Name of package */
+ #undef PACKAGE
+--- libgomp/teams.c.jj 2019-05-07 18:46:36.548109384 +0200
++++ libgomp/teams.c 2019-05-07 18:46:36.548109384 +0200
+@@ -0,0 +1,74 @@
++/* Copyright (C) 2018-2019 Free Software Foundation, Inc.
++ Contributed by Jakub Jelinek .
++
++ This file is part of the GNU Offloading and Multi Processing Library
++ (libgomp).
++
++ Libgomp is free software; you can redistribute it and/or modify it
++ under the terms of the GNU General Public License as published by
++ the Free Software Foundation; either version 3, or (at your option)
++ any later version.
++
++ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
++ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
++ FOR A PARTICULAR PURPOSE. See the GNU General Public License for
++ more details.
++
++ Under Section 7 of GPL version 3, you are granted additional
++ permissions described in the GCC Runtime Library Exception, version
++ 3.1, as published by the Free Software Foundation.
++
++ You should have received a copy of the GNU General Public License and
++ a copy of the GCC Runtime Library Exception along with this program;
++ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
++ . */
++
++/* This file handles the host TEAMS construct. */
++
++#include "libgomp.h"
++#include
++
++static unsigned gomp_num_teams = 1, gomp_team_num = 0;
++
++void
++GOMP_teams_reg (void (*fn) (void *), void *data, unsigned int num_teams,
++ unsigned int thread_limit, unsigned int flags)
++{
++ (void) flags;
++ (void) num_teams;
++ unsigned old_thread_limit_var = 0;
++ if (thread_limit)
++ {
++ struct gomp_task_icv *icv = gomp_icv (true);
++ old_thread_limit_var = icv->thread_limit_var;
++ icv->thread_limit_var
++ = thread_limit > INT_MAX ? UINT_MAX : thread_limit;
++ }
++ if (num_teams == 0)
++ num_teams = 3;
++ gomp_num_teams = num_teams;
++ for (gomp_team_num = 0; gomp_team_num < num_teams; gomp_team_num++)
++ fn (data);
++ gomp_num_teams = 1;
++ gomp_team_num = 0;
++ if (thread_limit)
++ {
++ struct gomp_task_icv *icv = gomp_icv (true);
++ icv->thread_limit_var = old_thread_limit_var;
++ }
++}
++
++int
++omp_get_num_teams (void)
++{
++ return gomp_num_teams;
++}
++
++int
++omp_get_team_num (void)
++{
++ return gomp_team_num;
++}
++
++ialias (omp_get_num_teams)
++ialias (omp_get_team_num)
+--- libgomp/libgomp.map.jj 2018-04-25 09:40:31.321655307 +0200
++++ libgomp/libgomp.map 2019-05-07 18:46:36.525109751 +0200
+@@ -164,6 +164,22 @@ OMP_4.5 {
+ omp_target_disassociate_ptr;
+ } OMP_4.0;
+
++OMP_5.0 {
++ global:
++ omp_capture_affinity;
++ omp_capture_affinity_;
++ omp_display_affinity;
++ omp_display_affinity_;
++ omp_get_affinity_format;
++ omp_get_affinity_format_;
++ omp_set_affinity_format;
++ omp_set_affinity_format_;
++ omp_pause_resource;
++ omp_pause_resource_;
++ omp_pause_resource_all;
++ omp_pause_resource_all_;
++} OMP_4.5;
++
+ GOMP_1.0 {
+ global:
+ GOMP_atomic_end;
+@@ -298,6 +314,34 @@ GOMP_4.5 {
+ GOMP_parallel_loop_nonmonotonic_guided;
+ } GOMP_4.0.1;
+
++GOMP_5.0 {
++ global:
++ GOMP_loop_doacross_start;
++ GOMP_loop_maybe_nonmonotonic_runtime_next;
++ GOMP_loop_maybe_nonmonotonic_runtime_start;
++ GOMP_loop_nonmonotonic_runtime_next;
++ GOMP_loop_nonmonotonic_runtime_start;
++ GOMP_loop_ordered_start;
++ GOMP_loop_start;
++ GOMP_loop_ull_doacross_start;
++ GOMP_loop_ull_maybe_nonmonotonic_runtime_next;
++ GOMP_loop_ull_maybe_nonmonotonic_runtime_start;
++ GOMP_loop_ull_nonmonotonic_runtime_next;
++ GOMP_loop_ull_nonmonotonic_runtime_start;
++ GOMP_loop_ull_ordered_start;
++ GOMP_loop_ull_start;
++ GOMP_parallel_loop_maybe_nonmonotonic_runtime;
++ GOMP_parallel_loop_nonmonotonic_runtime;
++ GOMP_parallel_reductions;
++ GOMP_sections2_start;
++ GOMP_taskgroup_reduction_register;
++ GOMP_taskgroup_reduction_unregister;
++ GOMP_task_reduction_remap;
++ GOMP_taskwait_depend;
++ GOMP_teams_reg;
++ GOMP_workshare_task_reduction_unregister;
++} GOMP_4.5;
++
+ OACC_2.0 {
+ global:
+ acc_get_num_devices;
+@@ -386,6 +430,52 @@ OACC_2.0.1 {
+ acc_pcreate;
+ } OACC_2.0;
+
++OACC_2.5 {
++ global:
++ acc_copyin_async;
++ acc_copyin_async_32_h_;
++ acc_copyin_async_64_h_;
++ acc_copyin_async_array_h_;
++ acc_copyout_async;
++ acc_copyout_async_32_h_;
++ acc_copyout_async_64_h_;
++ acc_copyout_async_array_h_;
++ acc_copyout_finalize;
++ acc_copyout_finalize_32_h_;
++ acc_copyout_finalize_64_h_;
++ acc_copyout_finalize_array_h_;
++ acc_copyout_finalize_async;
++ acc_copyout_finalize_async_32_h_;
++ acc_copyout_finalize_async_64_h_;
++ acc_copyout_finalize_async_array_h_;
++ acc_create_async;
++ acc_create_async_32_h_;
++ acc_create_async_64_h_;
++ acc_create_async_array_h_;
++ acc_delete_async;
++ acc_delete_async_32_h_;
++ acc_delete_async_64_h_;
++ acc_delete_async_array_h_;
++ acc_delete_finalize;
++ acc_delete_finalize_32_h_;
++ acc_delete_finalize_64_h_;
++ acc_delete_finalize_array_h_;
++ acc_delete_finalize_async;
++ acc_delete_finalize_async_32_h_;
++ acc_delete_finalize_async_64_h_;
++ acc_delete_finalize_async_array_h_;
++ acc_memcpy_from_device_async;
++ acc_memcpy_to_device_async;
++ acc_update_device_async;
++ acc_update_device_async_32_h_;
++ acc_update_device_async_64_h_;
++ acc_update_device_async_array_h_;
++ acc_update_self_async;
++ acc_update_self_async_32_h_;
++ acc_update_self_async_64_h_;
++ acc_update_self_async_array_h_;
++} OACC_2.0.1;
++
+ GOACC_2.0 {
+ global:
+ GOACC_data_end;
+@@ -420,3 +510,8 @@ GOMP_PLUGIN_1.1 {
+ global:
+ GOMP_PLUGIN_target_task_completion;
+ } GOMP_PLUGIN_1.0;
++
++GOMP_PLUGIN_1.2 {
++ global:
++ GOMP_PLUGIN_acc_default_dim;
++} GOMP_PLUGIN_1.1;
+--- libgomp/oacc-async.c.jj 2018-04-25 09:40:31.925655587 +0200
++++ libgomp/oacc-async.c 2019-05-07 18:46:36.528109704 +0200
+@@ -34,7 +34,7 @@
+ int
+ acc_async_test (int async)
+ {
+- if (async < acc_async_sync)
++ if (!async_valid_p (async))
+ gomp_fatal ("invalid async argument: %d", async);
+
+ struct goacc_thread *thr = goacc_thread ();
+@@ -59,7 +59,7 @@ acc_async_test_all (void)
+ void
+ acc_wait (int async)
+ {
+- if (async < acc_async_sync)
++ if (!async_valid_p (async))
+ gomp_fatal ("invalid async argument: %d", async);
+
+ struct goacc_thread *thr = goacc_thread ();
+@@ -117,7 +117,7 @@ acc_async_wait_all (void)
+ void
+ acc_wait_all_async (int async)
+ {
+- if (async < acc_async_sync)
++ if (!async_valid_p (async))
+ gomp_fatal ("invalid async argument: %d", async);
+
+ struct goacc_thread *thr = goacc_thread ();
+--- libgomp/loop_ull.c.jj 2018-04-25 09:40:31.912655580 +0200
++++ libgomp/loop_ull.c 2019-05-07 18:46:36.527109719 +0200
+@@ -27,8 +27,12 @@
+
+ #include
+ #include
++#include
+ #include "libgomp.h"
+
++ialias (GOMP_loop_ull_runtime_next)
++ialias_redirect (GOMP_taskgroup_reduction_register)
++
+ typedef unsigned long long gomp_ull;
+
+ /* Initialize the given work share construct from the given arguments. */
+@@ -104,7 +108,7 @@ gomp_loop_ull_static_start (bool up, gom
+ struct gomp_thread *thr = gomp_thread ();
+
+ thr->ts.static_trip = 0;
+- if (gomp_work_share_start (false))
++ if (gomp_work_share_start (0))
+ {
+ gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
+ GFS_STATIC, chunk_size);
+@@ -122,7 +126,7 @@ gomp_loop_ull_dynamic_start (bool up, go
+ struct gomp_thread *thr = gomp_thread ();
+ bool ret;
+
+- if (gomp_work_share_start (false))
++ if (gomp_work_share_start (0))
+ {
+ gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
+ GFS_DYNAMIC, chunk_size);
+@@ -148,7 +152,7 @@ gomp_loop_ull_guided_start (bool up, gom
+ struct gomp_thread *thr = gomp_thread ();
+ bool ret;
+
+- if (gomp_work_share_start (false))
++ if (gomp_work_share_start (0))
+ {
+ gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
+ GFS_GUIDED, chunk_size);
+@@ -171,7 +175,7 @@ GOMP_loop_ull_runtime_start (bool up, go
+ gomp_ull incr, gomp_ull *istart, gomp_ull *iend)
+ {
+ struct gomp_task_icv *icv = gomp_icv (false);
+- switch (icv->run_sched_var)
++ switch (icv->run_sched_var & ~GFS_MONOTONIC)
+ {
+ case GFS_STATIC:
+ return gomp_loop_ull_static_start (up, start, end, incr,
+@@ -195,6 +199,99 @@ GOMP_loop_ull_runtime_start (bool up, go
+ }
+ }
+
++static long
++gomp_adjust_sched (long sched, gomp_ull *chunk_size)
++{
++ sched &= ~GFS_MONOTONIC;
++ switch (sched)
++ {
++ case GFS_STATIC:
++ case GFS_DYNAMIC:
++ case GFS_GUIDED:
++ return sched;
++ /* GFS_RUNTIME is used for runtime schedule without monotonic
++ or nonmonotonic modifiers on the clause.
++ GFS_RUNTIME|GFS_MONOTONIC for runtime schedule with monotonic
++ modifier. */
++ case GFS_RUNTIME:
++ /* GFS_AUTO is used for runtime schedule with nonmonotonic
++ modifier. */
++ case GFS_AUTO:
++ {
++ struct gomp_task_icv *icv = gomp_icv (false);
++ sched = icv->run_sched_var & ~GFS_MONOTONIC;
++ switch (sched)
++ {
++ case GFS_STATIC:
++ case GFS_DYNAMIC:
++ case GFS_GUIDED:
++ *chunk_size = icv->run_sched_chunk_size;
++ break;
++ case GFS_AUTO:
++ sched = GFS_STATIC;
++ *chunk_size = 0;
++ break;
++ default:
++ abort ();
++ }
++ return sched;
++ }
++ default:
++ abort ();
++ }
++}
++
++bool
++GOMP_loop_ull_start (bool up, gomp_ull start, gomp_ull end,
++ gomp_ull incr, long sched, gomp_ull chunk_size,
++ gomp_ull *istart, gomp_ull *iend,
++ uintptr_t *reductions, void **mem)
++{
++ struct gomp_thread *thr = gomp_thread ();
++
++ thr->ts.static_trip = 0;
++ if (reductions)
++ gomp_workshare_taskgroup_start ();
++ if (gomp_work_share_start (0))
++ {
++ sched = gomp_adjust_sched (sched, &chunk_size);
++ gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
++ sched, chunk_size);
++ if (reductions)
++ {
++ GOMP_taskgroup_reduction_register (reductions);
++ thr->task->taskgroup->workshare = true;
++ thr->ts.work_share->task_reductions = reductions;
++ }
++ if (mem)
++ {
++ uintptr_t size = (uintptr_t) *mem;
++ if (size > (sizeof (struct gomp_work_share)
++ - offsetof (struct gomp_work_share,
++ inline_ordered_team_ids)))
++ thr->ts.work_share->ordered_team_ids
++ = gomp_malloc_cleared (size);
++ else
++ memset (thr->ts.work_share->ordered_team_ids, '\0', size);
++ *mem = (void *) thr->ts.work_share->ordered_team_ids;
++ }
++ gomp_work_share_init_done ();
++ }
++ else
++ {
++ if (reductions)
++ {
++ uintptr_t *first_reductions = thr->ts.work_share->task_reductions;
++ gomp_workshare_task_reduction_register (reductions,
++ first_reductions);
++ }
++ if (mem)
++ *mem = (void *) thr->ts.work_share->ordered_team_ids;
++ }
++
++ return ialias_call (GOMP_loop_ull_runtime_next) (istart, iend);
++}
++
+ /* The *_ordered_*_start routines are similar. The only difference is that
+ this work-share construct is initialized to expect an ORDERED section. */
+
+@@ -206,7 +303,7 @@ gomp_loop_ull_ordered_static_start (bool
+ struct gomp_thread *thr = gomp_thread ();
+
+ thr->ts.static_trip = 0;
+- if (gomp_work_share_start (true))
++ if (gomp_work_share_start (1))
+ {
+ gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
+ GFS_STATIC, chunk_size);
+@@ -225,7 +322,7 @@ gomp_loop_ull_ordered_dynamic_start (boo
+ struct gomp_thread *thr = gomp_thread ();
+ bool ret;
+
+- if (gomp_work_share_start (true))
++ if (gomp_work_share_start (1))
+ {
+ gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
+ GFS_DYNAMIC, chunk_size);
+@@ -251,7 +348,7 @@ gomp_loop_ull_ordered_guided_start (bool
+ struct gomp_thread *thr = gomp_thread ();
+ bool ret;
+
+- if (gomp_work_share_start (true))
++ if (gomp_work_share_start (1))
+ {
+ gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
+ GFS_GUIDED, chunk_size);
+@@ -275,7 +372,7 @@ GOMP_loop_ull_ordered_runtime_start (boo
+ gomp_ull *iend)
+ {
+ struct gomp_task_icv *icv = gomp_icv (false);
+- switch (icv->run_sched_var)
++ switch (icv->run_sched_var & ~GFS_MONOTONIC)
+ {
+ case GFS_STATIC:
+ return gomp_loop_ull_ordered_static_start (up, start, end, incr,
+@@ -299,6 +396,82 @@ GOMP_loop_ull_ordered_runtime_start (boo
+ }
+ }
+
++bool
++GOMP_loop_ull_ordered_start (bool up, gomp_ull start, gomp_ull end,
++ gomp_ull incr, long sched, gomp_ull chunk_size,
++ gomp_ull *istart, gomp_ull *iend,
++ uintptr_t *reductions, void **mem)
++{
++ struct gomp_thread *thr = gomp_thread ();
++ size_t ordered = 1;
++ bool ret;
++
++ thr->ts.static_trip = 0;
++ if (reductions)
++ gomp_workshare_taskgroup_start ();
++ if (mem)
++ ordered += (uintptr_t) *mem;
++ if (gomp_work_share_start (ordered))
++ {
++ sched = gomp_adjust_sched (sched, &chunk_size);
++ gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
++ sched, chunk_size);
++ if (reductions)
++ {
++ GOMP_taskgroup_reduction_register (reductions);
++ thr->task->taskgroup->workshare = true;
++ thr->ts.work_share->task_reductions = reductions;
++ }
++ if (sched == GFS_STATIC)
++ gomp_ordered_static_init ();
++ else
++ gomp_mutex_lock (&thr->ts.work_share->lock);
++ gomp_work_share_init_done ();
++ }
++ else
++ {
++ if (reductions)
++ {
++ uintptr_t *first_reductions = thr->ts.work_share->task_reductions;
++ gomp_workshare_task_reduction_register (reductions,
++ first_reductions);
++ }
++ sched = thr->ts.work_share->sched;
++ if (sched != GFS_STATIC)
++ gomp_mutex_lock (&thr->ts.work_share->lock);
++ }
++
++ if (mem)
++ {
++ uintptr_t p
++ = (uintptr_t) (thr->ts.work_share->ordered_team_ids
++ + (thr->ts.team ? thr->ts.team->nthreads : 1));
++ p += __alignof__ (long long) - 1;
++ p &= ~(__alignof__ (long long) - 1);
++ *mem = (void *) p;
++ }
++
++ switch (sched)
++ {
++ case GFS_STATIC:
++ case GFS_AUTO:
++ return !gomp_iter_ull_static_next (istart, iend);
++ case GFS_DYNAMIC:
++ ret = gomp_iter_ull_dynamic_next_locked (istart, iend);
++ break;
++ case GFS_GUIDED:
++ ret = gomp_iter_ull_guided_next_locked (istart, iend);
++ break;
++ default:
++ abort ();
++ }
++
++ if (ret)
++ gomp_ordered_first ();
++ gomp_mutex_unlock (&thr->ts.work_share->lock);
++ return ret;
++}
++
+ /* The *_doacross_*_start routines are similar. The only difference is that
+ this work-share construct is initialized to expect an ORDERED(N) - DOACROSS
+ section, and the worksharing loop iterates always from 0 to COUNTS[0] - 1
+@@ -313,11 +486,11 @@ gomp_loop_ull_doacross_static_start (uns
+ struct gomp_thread *thr = gomp_thread ();
+
+ thr->ts.static_trip = 0;
+- if (gomp_work_share_start (false))
++ if (gomp_work_share_start (0))
+ {
+ gomp_loop_ull_init (thr->ts.work_share, true, 0, counts[0], 1,
+ GFS_STATIC, chunk_size);
+- gomp_doacross_ull_init (ncounts, counts, chunk_size);
++ gomp_doacross_ull_init (ncounts, counts, chunk_size, 0);
+ gomp_work_share_init_done ();
+ }
+
+@@ -332,11 +505,11 @@ gomp_loop_ull_doacross_dynamic_start (un
+ struct gomp_thread *thr = gomp_thread ();
+ bool ret;
+
+- if (gomp_work_share_start (false))
++ if (gomp_work_share_start (0))
+ {
+ gomp_loop_ull_init (thr->ts.work_share, true, 0, counts[0], 1,
+ GFS_DYNAMIC, chunk_size);
+- gomp_doacross_ull_init (ncounts, counts, chunk_size);
++ gomp_doacross_ull_init (ncounts, counts, chunk_size, 0);
+ gomp_work_share_init_done ();
+ }
+
+@@ -359,11 +532,11 @@ gomp_loop_ull_doacross_guided_start (uns
+ struct gomp_thread *thr = gomp_thread ();
+ bool ret;
+
+- if (gomp_work_share_start (false))
++ if (gomp_work_share_start (0))
+ {
+ gomp_loop_ull_init (thr->ts.work_share, true, 0, counts[0], 1,
+ GFS_GUIDED, chunk_size);
+- gomp_doacross_ull_init (ncounts, counts, chunk_size);
++ gomp_doacross_ull_init (ncounts, counts, chunk_size, 0);
+ gomp_work_share_init_done ();
+ }
+
+@@ -383,7 +556,7 @@ GOMP_loop_ull_doacross_runtime_start (un
+ gomp_ull *istart, gomp_ull *iend)
+ {
+ struct gomp_task_icv *icv = gomp_icv (false);
+- switch (icv->run_sched_var)
++ switch (icv->run_sched_var & ~GFS_MONOTONIC)
+ {
+ case GFS_STATIC:
+ return gomp_loop_ull_doacross_static_start (ncounts, counts,
+@@ -407,6 +580,51 @@ GOMP_loop_ull_doacross_runtime_start (un
+ }
+ }
+
++bool
++GOMP_loop_ull_doacross_start (unsigned ncounts, gomp_ull *counts,
++ long sched, gomp_ull chunk_size,
++ gomp_ull *istart, gomp_ull *iend,
++ uintptr_t *reductions, void **mem)
++{
++ struct gomp_thread *thr = gomp_thread ();
++
++ thr->ts.static_trip = 0;
++ if (reductions)
++ gomp_workshare_taskgroup_start ();
++ if (gomp_work_share_start (0))
++ {
++ size_t extra = 0;
++ if (mem)
++ extra = (uintptr_t) *mem;
++ sched = gomp_adjust_sched (sched, &chunk_size);
++ gomp_loop_ull_init (thr->ts.work_share, true, 0, counts[0], 1,
++ sched, chunk_size);
++ gomp_doacross_ull_init (ncounts, counts, chunk_size, extra);
++ if (reductions)
++ {
++ GOMP_taskgroup_reduction_register (reductions);
++ thr->task->taskgroup->workshare = true;
++ thr->ts.work_share->task_reductions = reductions;
++ }
++ gomp_work_share_init_done ();
++ }
++ else
++ {
++ if (reductions)
++ {
++ uintptr_t *first_reductions = thr->ts.work_share->task_reductions;
++ gomp_workshare_task_reduction_register (reductions,
++ first_reductions);
++ }
++ sched = thr->ts.work_share->sched;
++ }
++
++ if (mem)
++ *mem = thr->ts.work_share->doacross->extra;
++
++ return ialias_call (GOMP_loop_ull_runtime_next) (istart, iend);
++}
++
+ /* The *_next routines are called when the thread completes processing of
+ the iteration block currently assigned to it. If the work-share
+ construct is bound directly to a parallel construct, then the iteration
+@@ -570,6 +788,10 @@ extern __typeof(gomp_loop_ull_dynamic_st
+ __attribute__((alias ("gomp_loop_ull_dynamic_start")));
+ extern __typeof(gomp_loop_ull_guided_start) GOMP_loop_ull_nonmonotonic_guided_start
+ __attribute__((alias ("gomp_loop_ull_guided_start")));
++extern __typeof(GOMP_loop_ull_runtime_start) GOMP_loop_ull_nonmonotonic_runtime_start
++ __attribute__((alias ("GOMP_loop_ull_runtime_start")));
++extern __typeof(GOMP_loop_ull_runtime_start) GOMP_loop_ull_maybe_nonmonotonic_runtime_start
++ __attribute__((alias ("GOMP_loop_ull_runtime_start")));
+
+ extern __typeof(gomp_loop_ull_ordered_static_start) GOMP_loop_ull_ordered_static_start
+ __attribute__((alias ("gomp_loop_ull_ordered_static_start")));
+@@ -595,6 +817,10 @@ extern __typeof(gomp_loop_ull_dynamic_ne
+ __attribute__((alias ("gomp_loop_ull_dynamic_next")));
+ extern __typeof(gomp_loop_ull_guided_next) GOMP_loop_ull_nonmonotonic_guided_next
+ __attribute__((alias ("gomp_loop_ull_guided_next")));
++extern __typeof(GOMP_loop_ull_runtime_next) GOMP_loop_ull_nonmonotonic_runtime_next
++ __attribute__((alias ("GOMP_loop_ull_runtime_next")));
++extern __typeof(GOMP_loop_ull_runtime_next) GOMP_loop_ull_maybe_nonmonotonic_runtime_next
++ __attribute__((alias ("GOMP_loop_ull_runtime_next")));
+
+ extern __typeof(gomp_loop_ull_ordered_static_next) GOMP_loop_ull_ordered_static_next
+ __attribute__((alias ("gomp_loop_ull_ordered_static_next")));
+@@ -650,6 +876,23 @@ GOMP_loop_ull_nonmonotonic_guided_start
+ }
+
+ bool
++GOMP_loop_ull_nonmonotonic_runtime_start (bool up, gomp_ull start,
++ gomp_ull end, gomp_ull incr,
++ gomp_ull *istart, gomp_ull *iend)
++{
++ return GOMP_loop_ull_runtime_start (up, start, end, incr, istart, iend);
++}
++
++bool
++GOMP_loop_ull_maybe_nonmonotonic_runtime_start (bool up, gomp_ull start,
++ gomp_ull end, gomp_ull incr,
++ gomp_ull *istart,
++ gomp_ull *iend)
++{
++ return GOMP_loop_ull_runtime_start (up, start, end, incr, istart, iend);
++}
++
++bool
+ GOMP_loop_ull_ordered_static_start (bool up, gomp_ull start, gomp_ull end,
+ gomp_ull incr, gomp_ull chunk_size,
+ gomp_ull *istart, gomp_ull *iend)
+@@ -734,6 +977,19 @@ GOMP_loop_ull_nonmonotonic_guided_next (
+ }
+
+ bool
++GOMP_loop_ull_nonmonotonic_runtime_next (gomp_ull *istart, gomp_ull *iend)
++{
++ return GOMP_loop_ull_runtime_next (istart, iend);
++}
++
++bool
++GOMP_loop_ull_maybe_nonmonotonic_runtime_next (gomp_ull *istart,
++ gomp_ull *iend)
++{
++ return GOMP_loop_ull_runtime_next (istart, iend);
++}
++
++bool
+ GOMP_loop_ull_ordered_static_next (gomp_ull *istart, gomp_ull *iend)
+ {
+ return gomp_loop_ull_ordered_static_next (istart, iend);
+--- libgomp/oacc-int.h.jj 2018-04-25 09:40:31.320655306 +0200
++++ libgomp/oacc-int.h 2019-05-07 18:46:36.529109688 +0200
+@@ -99,6 +99,28 @@ void goacc_restore_bind (void);
+ void goacc_lazy_initialize (void);
+ void goacc_host_init (void);
+
++static inline bool
++async_valid_stream_id_p (int async)
++{
++ return async >= 0;
++}
++
++static inline bool
++async_valid_p (int async)
++{
++ return (async == acc_async_noval || async == acc_async_sync
++ || async_valid_stream_id_p (async));
++}
++
++static inline bool
++async_synchronous_p (int async)
++{
++ if (!async_valid_p (async))
++ return true;
++
++ return async == acc_async_sync;
++}
++
+ #ifdef HAVE_ATTRIBUTE_VISIBILITY
+ # pragma GCC visibility pop
+ #endif
+--- libgomp/testsuite/Makefile.in.jj 2018-04-25 09:40:31.452655368 +0200
++++ libgomp/testsuite/Makefile.in 2019-05-07 18:51:35.754330084 +0200
+@@ -223,6 +223,7 @@ mkdir_p = @mkdir_p@
+ multi_basedir = @multi_basedir@
+ offload_additional_lib_paths = @offload_additional_lib_paths@
+ offload_additional_options = @offload_additional_options@
++offload_plugins = @offload_plugins@
+ offload_targets = @offload_targets@
+ oldincludedir = @oldincludedir@
+ pdfdir = @pdfdir@
+--- libgomp/task.c.jj 2018-04-25 09:40:31.925655587 +0200
++++ libgomp/task.c 2019-05-07 18:46:36.547109400 +0200
+@@ -166,21 +166,72 @@ gomp_task_handle_depend (struct gomp_tas
+ void **depend)
+ {
+ size_t ndepend = (uintptr_t) depend[0];
+- size_t nout = (uintptr_t) depend[1];
+ size_t i;
+ hash_entry_type ent;
+
++ if (ndepend)
++ {
++ /* depend[0] is total # */
++ size_t nout = (uintptr_t) depend[1]; /* # of out: and inout: */
++ /* ndepend - nout is # of in: */
++ for (i = 0; i < ndepend; i++)
++ {
++ task->depend[i].addr = depend[2 + i];
++ task->depend[i].is_in = i >= nout;
++ }
++ }
++ else
++ {
++ ndepend = (uintptr_t) depend[1]; /* total # */
++ size_t nout = (uintptr_t) depend[2]; /* # of out: and inout: */
++ size_t nmutexinoutset = (uintptr_t) depend[3]; /* # of mutexinoutset: */
++ /* For now we treat mutexinoutset like out, which is compliant, but
++ inefficient. */
++ size_t nin = (uintptr_t) depend[4]; /* # of in: */
++ /* ndepend - nout - nmutexinoutset - nin is # of depobjs */
++ size_t normal = nout + nmutexinoutset + nin;
++ size_t n = 0;
++ for (i = normal; i < ndepend; i++)
++ {
++ void **d = (void **) (uintptr_t) depend[5 + i];
++ switch ((uintptr_t) d[1])
++ {
++ case GOMP_DEPEND_OUT:
++ case GOMP_DEPEND_INOUT:
++ case GOMP_DEPEND_MUTEXINOUTSET:
++ break;
++ case GOMP_DEPEND_IN:
++ continue;
++ default:
++ gomp_fatal ("unknown omp_depend_t dependence type %d",
++ (int) (uintptr_t) d[1]);
++ }
++ task->depend[n].addr = d[0];
++ task->depend[n++].is_in = 0;
++ }
++ for (i = 0; i < normal; i++)
++ {
++ task->depend[n].addr = depend[5 + i];
++ task->depend[n++].is_in = i >= nout + nmutexinoutset;
++ }
++ for (i = normal; i < ndepend; i++)
++ {
++ void **d = (void **) (uintptr_t) depend[5 + i];
++ if ((uintptr_t) d[1] != GOMP_DEPEND_IN)
++ continue;
++ task->depend[n].addr = d[0];
++ task->depend[n++].is_in = 1;
++ }
++ }
+ task->depend_count = ndepend;
+ task->num_dependees = 0;
+ if (parent->depend_hash == NULL)
+ parent->depend_hash = htab_create (2 * ndepend > 12 ? 2 * ndepend : 12);
+ for (i = 0; i < ndepend; i++)
+ {
+- task->depend[i].addr = depend[2 + i];
+ task->depend[i].next = NULL;
+ task->depend[i].prev = NULL;
+ task->depend[i].task = task;
+- task->depend[i].is_in = i >= nout;
+ task->depend[i].redundant = false;
+ task->depend[i].redundant_out = false;
+
+@@ -205,7 +256,7 @@ gomp_task_handle_depend (struct gomp_tas
+ last = ent;
+
+ /* depend(in:...) doesn't depend on earlier depend(in:...). */
+- if (i >= nout && ent->is_in)
++ if (task->depend[i].is_in && ent->is_in)
+ continue;
+
+ if (!ent->is_in)
+@@ -280,9 +331,18 @@ gomp_task_handle_depend (struct gomp_tas
+ then the task may be executed by any member of the team.
+
+ DEPEND is an array containing:
++ if depend[0] is non-zero, then:
+ depend[0]: number of depend elements.
+- depend[1]: number of depend elements of type "out".
+- depend[2..N+1]: address of [1..N]th depend element. */
++ depend[1]: number of depend elements of type "out/inout".
++ depend[2..N+1]: address of [1..N]th depend element.
++ otherwise, when depend[0] is zero, then:
++ depend[1]: number of depend elements.
++ depend[2]: number of depend elements of type "out/inout".
++ depend[3]: number of depend elements of type "mutexinoutset".
++ depend[4]: number of depend elements of type "in".
++ depend[5..4+depend[2]+depend[3]+depend[4]]: address of depend elements
++ depend[5+depend[2]+depend[3]+depend[4]..4+depend[1]]: address of
++ omp_depend_t objects. */
+
+ void
+ GOMP_task (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *),
+@@ -303,10 +363,20 @@ GOMP_task (void (*fn) (void *), void *da
+ #endif
+
+ /* If parallel or taskgroup has been cancelled, don't start new tasks. */
+- if (team
+- && (gomp_team_barrier_cancelled (&team->barrier)
+- || (thr->task->taskgroup && thr->task->taskgroup->cancelled)))
+- return;
++ if (__builtin_expect (gomp_cancel_var, 0) && team)
++ {
++ if (gomp_team_barrier_cancelled (&team->barrier))
++ return;
++ if (thr->task->taskgroup)
++ {
++ if (thr->task->taskgroup->cancelled)
++ return;
++ if (thr->task->taskgroup->workshare
++ && thr->task->taskgroup->prev
++ && thr->task->taskgroup->prev->cancelled)
++ return;
++ }
++ }
+
+ if ((flags & GOMP_TASK_FLAG_PRIORITY) == 0)
+ priority = 0;
+@@ -377,7 +447,7 @@ GOMP_task (void (*fn) (void *), void *da
+ size_t depend_size = 0;
+
+ if (flags & GOMP_TASK_FLAG_DEPEND)
+- depend_size = ((uintptr_t) depend[0]
++ depend_size = ((uintptr_t) (depend[0] ? depend[0] : depend[1])
+ * sizeof (struct gomp_task_depend_entry));
+ task = gomp_malloc (sizeof (*task) + depend_size
+ + arg_size + arg_align - 1);
+@@ -404,14 +474,26 @@ GOMP_task (void (*fn) (void *), void *da
+ gomp_mutex_lock (&team->task_lock);
+ /* If parallel or taskgroup has been cancelled, don't start new
+ tasks. */
+- if (__builtin_expect ((gomp_team_barrier_cancelled (&team->barrier)
+- || (taskgroup && taskgroup->cancelled))
+- && !task->copy_ctors_done, 0))
++ if (__builtin_expect (gomp_cancel_var, 0)
++ && !task->copy_ctors_done)
+ {
+- gomp_mutex_unlock (&team->task_lock);
+- gomp_finish_task (task);
+- free (task);
+- return;
++ if (gomp_team_barrier_cancelled (&team->barrier))
++ {
++ do_cancel:
++ gomp_mutex_unlock (&team->task_lock);
++ gomp_finish_task (task);
++ free (task);
++ return;
++ }
++ if (taskgroup)
++ {
++ if (taskgroup->cancelled)
++ goto do_cancel;
++ if (taskgroup->workshare
++ && taskgroup->prev
++ && taskgroup->prev->cancelled)
++ goto do_cancel;
++ }
+ }
+ if (taskgroup)
+ taskgroup->num_children++;
+@@ -463,6 +545,7 @@ GOMP_task (void (*fn) (void *), void *da
+
+ ialias (GOMP_taskgroup_start)
+ ialias (GOMP_taskgroup_end)
++ialias (GOMP_taskgroup_reduction_register)
+
+ #define TYPE long
+ #define UTYPE unsigned long
+@@ -601,10 +684,20 @@ gomp_create_target_task (struct gomp_dev
+ struct gomp_team *team = thr->ts.team;
+
+ /* If parallel or taskgroup has been cancelled, don't start new tasks. */
+- if (team
+- && (gomp_team_barrier_cancelled (&team->barrier)
+- || (thr->task->taskgroup && thr->task->taskgroup->cancelled)))
+- return true;
++ if (__builtin_expect (gomp_cancel_var, 0) && team)
++ {
++ if (gomp_team_barrier_cancelled (&team->barrier))
++ return true;
++ if (thr->task->taskgroup)
++ {
++ if (thr->task->taskgroup->cancelled)
++ return true;
++ if (thr->task->taskgroup->workshare
++ && thr->task->taskgroup->prev
++ && thr->task->taskgroup->prev->cancelled)
++ return true;
++ }
++ }
+
+ struct gomp_target_task *ttask;
+ struct gomp_task *task;
+@@ -617,7 +710,7 @@ gomp_create_target_task (struct gomp_dev
+
+ if (depend != NULL)
+ {
+- depend_cnt = (uintptr_t) depend[0];
++ depend_cnt = (uintptr_t) (depend[0] ? depend[0] : depend[1]);
+ depend_size = depend_cnt * sizeof (struct gomp_task_depend_entry);
+ }
+ if (fn)
+@@ -687,13 +780,25 @@ gomp_create_target_task (struct gomp_dev
+ task->final_task = 0;
+ gomp_mutex_lock (&team->task_lock);
+ /* If parallel or taskgroup has been cancelled, don't start new tasks. */
+- if (__builtin_expect (gomp_team_barrier_cancelled (&team->barrier)
+- || (taskgroup && taskgroup->cancelled), 0))
++ if (__builtin_expect (gomp_cancel_var, 0))
+ {
+- gomp_mutex_unlock (&team->task_lock);
+- gomp_finish_task (task);
+- free (task);
+- return true;
++ if (gomp_team_barrier_cancelled (&team->barrier))
++ {
++ do_cancel:
++ gomp_mutex_unlock (&team->task_lock);
++ gomp_finish_task (task);
++ free (task);
++ return true;
++ }
++ if (taskgroup)
++ {
++ if (taskgroup->cancelled)
++ goto do_cancel;
++ if (taskgroup->workshare
++ && taskgroup->prev
++ && taskgroup->prev->cancelled)
++ goto do_cancel;
++ }
+ }
+ if (depend_size)
+ {
+@@ -986,10 +1091,21 @@ gomp_task_run_pre (struct gomp_task *chi
+
+ if (--team->task_queued_count == 0)
+ gomp_team_barrier_clear_task_pending (&team->barrier);
+- if ((gomp_team_barrier_cancelled (&team->barrier)
+- || (taskgroup && taskgroup->cancelled))
++ if (__builtin_expect (gomp_cancel_var, 0)
+ && !child_task->copy_ctors_done)
+- return true;
++ {
++ if (gomp_team_barrier_cancelled (&team->barrier))
++ return true;
++ if (taskgroup)
++ {
++ if (taskgroup->cancelled)
++ return true;
++ if (taskgroup->workshare
++ && taskgroup->prev
++ && taskgroup->prev->cancelled)
++ return true;
++ }
++ }
+ return false;
+ }
+
+@@ -1456,6 +1572,35 @@ GOMP_taskwait (void)
+ }
+ }
+
++/* Called when encountering a taskwait directive with depend clause(s).
++ Wait as if it was an mergeable included task construct with empty body. */
++
++void
++GOMP_taskwait_depend (void **depend)
++{
++ struct gomp_thread *thr = gomp_thread ();
++ struct gomp_team *team = thr->ts.team;
++
++ /* If parallel or taskgroup has been cancelled, return early. */
++ if (__builtin_expect (gomp_cancel_var, 0) && team)
++ {
++ if (gomp_team_barrier_cancelled (&team->barrier))
++ return;
++ if (thr->task->taskgroup)
++ {
++ if (thr->task->taskgroup->cancelled)
++ return;
++ if (thr->task->taskgroup->workshare
++ && thr->task->taskgroup->prev
++ && thr->task->taskgroup->prev->cancelled)
++ return;
++ }
++ }
++
++ if (thr->task && thr->task->depend_hash)
++ gomp_task_maybe_wait_for_dependencies (depend);
++}
++
+ /* An undeferred task is about to run. Wait for all tasks that this
+ undeferred task depends on.
+
+@@ -1464,7 +1609,7 @@ GOMP_taskwait (void)
+ the scheduling queues. Then we iterate through these imminently
+ ready tasks (and possibly other high priority tasks), and run them.
+ If we run out of ready dependencies to execute, we either wait for
+- the reamining dependencies to finish, or wait for them to get
++ the remaining dependencies to finish, or wait for them to get
+ scheduled so we can run them.
+
+ DEPEND is as in GOMP_task. */
+@@ -1477,21 +1622,50 @@ gomp_task_maybe_wait_for_dependencies (v
+ struct gomp_team *team = thr->ts.team;
+ struct gomp_task_depend_entry elem, *ent = NULL;
+ struct gomp_taskwait taskwait;
+- size_t ndepend = (uintptr_t) depend[0];
++ size_t orig_ndepend = (uintptr_t) depend[0];
+ size_t nout = (uintptr_t) depend[1];
++ size_t ndepend = orig_ndepend;
++ size_t normal = ndepend;
++ size_t n = 2;
+ size_t i;
+ size_t num_awaited = 0;
+ struct gomp_task *child_task = NULL;
+ struct gomp_task *to_free = NULL;
+ int do_wake = 0;
+
++ if (ndepend == 0)
++ {
++ ndepend = nout;
++ nout = (uintptr_t) depend[2] + (uintptr_t) depend[3];
++ normal = nout + (uintptr_t) depend[4];
++ n = 5;
++ }
+ gomp_mutex_lock (&team->task_lock);
+ for (i = 0; i < ndepend; i++)
+ {
+- elem.addr = depend[i + 2];
++ elem.addr = depend[i + n];
++ elem.is_in = i >= nout;
++ if (__builtin_expect (i >= normal, 0))
++ {
++ void **d = (void **) elem.addr;
++ switch ((uintptr_t) d[1])
++ {
++ case GOMP_DEPEND_IN:
++ break;
++ case GOMP_DEPEND_OUT:
++ case GOMP_DEPEND_INOUT:
++ case GOMP_DEPEND_MUTEXINOUTSET:
++ elem.is_in = 0;
++ break;
++ default:
++ gomp_fatal ("unknown omp_depend_t dependence type %d",
++ (int) (uintptr_t) d[1]);
++ }
++ elem.addr = d[0];
++ }
+ ent = htab_find (task->depend_hash, &elem);
+ for (; ent; ent = ent->next)
+- if (i >= nout && ent->is_in)
++ if (elem.is_in && ent->is_in)
+ continue;
+ else
+ {
+@@ -1654,13 +1828,28 @@ GOMP_taskyield (void)
+ /* Nothing at the moment. */
+ }
+
++static inline struct gomp_taskgroup *
++gomp_taskgroup_init (struct gomp_taskgroup *prev)
++{
++ struct gomp_taskgroup *taskgroup
++ = gomp_malloc (sizeof (struct gomp_taskgroup));
++ taskgroup->prev = prev;
++ priority_queue_init (&taskgroup->taskgroup_queue);
++ taskgroup->reductions = prev ? prev->reductions : NULL;
++ taskgroup->in_taskgroup_wait = false;
++ taskgroup->cancelled = false;
++ taskgroup->workshare = false;
++ taskgroup->num_children = 0;
++ gomp_sem_init (&taskgroup->taskgroup_sem, 0);
++ return taskgroup;
++}
++
+ void
+ GOMP_taskgroup_start (void)
+ {
+ struct gomp_thread *thr = gomp_thread ();
+ struct gomp_team *team = thr->ts.team;
+ struct gomp_task *task = thr->task;
+- struct gomp_taskgroup *taskgroup;
+
+ /* If team is NULL, all tasks are executed as
+ GOMP_TASK_UNDEFERRED tasks and thus all children tasks of
+@@ -1668,14 +1857,7 @@ GOMP_taskgroup_start (void)
+ by the time GOMP_taskgroup_end is called. */
+ if (team == NULL)
+ return;
+- taskgroup = gomp_malloc (sizeof (struct gomp_taskgroup));
+- taskgroup->prev = task->taskgroup;
+- priority_queue_init (&taskgroup->taskgroup_queue);
+- taskgroup->in_taskgroup_wait = false;
+- taskgroup->cancelled = false;
+- taskgroup->num_children = 0;
+- gomp_sem_init (&taskgroup->taskgroup_sem, 0);
+- task->taskgroup = taskgroup;
++ task->taskgroup = gomp_taskgroup_init (task->taskgroup);
+ }
+
+ void
+@@ -1840,6 +2022,302 @@ GOMP_taskgroup_end (void)
+ free (taskgroup);
+ }
+
++static inline __attribute__((always_inline)) void
++gomp_reduction_register (uintptr_t *data, uintptr_t *old, uintptr_t *orig,
++ unsigned nthreads)
++{
++ size_t total_cnt = 0;
++ uintptr_t *d = data;
++ struct htab *old_htab = NULL, *new_htab;
++ do
++ {
++ if (__builtin_expect (orig != NULL, 0))
++ {
++ /* For worksharing task reductions, memory has been allocated
++ already by some other thread that encountered the construct
++ earlier. */
++ d[2] = orig[2];
++ d[6] = orig[6];
++ orig = (uintptr_t *) orig[4];
++ }
++ else
++ {
++ size_t sz = d[1] * nthreads;
++ /* Should use omp_alloc if d[3] is not -1. */
++ void *ptr = gomp_aligned_alloc (d[2], sz);
++ memset (ptr, '\0', sz);
++ d[2] = (uintptr_t) ptr;
++ d[6] = d[2] + sz;
++ }
++ d[5] = 0;
++ total_cnt += d[0];
++ if (d[4] == 0)
++ {
++ d[4] = (uintptr_t) old;
++ break;
++ }
++ else
++ d = (uintptr_t *) d[4];
++ }
++ while (1);
++ if (old && old[5])
++ {
++ old_htab = (struct htab *) old[5];
++ total_cnt += htab_elements (old_htab);
++ }
++ new_htab = htab_create (total_cnt);
++ if (old_htab)
++ {
++ /* Copy old hash table, like in htab_expand. */
++ hash_entry_type *p, *olimit;
++ new_htab->n_elements = htab_elements (old_htab);
++ olimit = old_htab->entries + old_htab->size;
++ p = old_htab->entries;
++ do
++ {
++ hash_entry_type x = *p;
++ if (x != HTAB_EMPTY_ENTRY && x != HTAB_DELETED_ENTRY)
++ *find_empty_slot_for_expand (new_htab, htab_hash (x)) = x;
++ p++;
++ }
++ while (p < olimit);
++ }
++ d = data;
++ do
++ {
++ size_t j;
++ for (j = 0; j < d[0]; ++j)
++ {
++ uintptr_t *p = d + 7 + j * 3;
++ p[2] = (uintptr_t) d;
++ /* Ugly hack, hash_entry_type is defined for the task dependencies,
++ which hash on the first element which is a pointer. We need
++ to hash also on the first sizeof (uintptr_t) bytes which contain
++ a pointer. Hide the cast from the compiler. */
++ hash_entry_type n;
++ __asm ("" : "=g" (n) : "0" (p));
++ *htab_find_slot (&new_htab, n, INSERT) = n;
++ }
++ if (d[4] == (uintptr_t) old)
++ break;
++ else
++ d = (uintptr_t *) d[4];
++ }
++ while (1);
++ d[5] = (uintptr_t) new_htab;
++}
++
++static void
++gomp_create_artificial_team (void)
++{
++ struct gomp_thread *thr = gomp_thread ();
++ struct gomp_task_icv *icv;
++ struct gomp_team *team = gomp_new_team (1);
++ struct gomp_task *task = thr->task;
++ icv = task ? &task->icv : &gomp_global_icv;
++ team->prev_ts = thr->ts;
++ thr->ts.team = team;
++ thr->ts.team_id = 0;
++ thr->ts.work_share = &team->work_shares[0];
++ thr->ts.last_work_share = NULL;
++#ifdef HAVE_SYNC_BUILTINS
++ thr->ts.single_count = 0;
++#endif
++ thr->ts.static_trip = 0;
++ thr->task = &team->implicit_task[0];
++ gomp_init_task (thr->task, NULL, icv);
++ if (task)
++ {
++ thr->task = task;
++ gomp_end_task ();
++ free (task);
++ thr->task = &team->implicit_task[0];
++ }
++#ifdef LIBGOMP_USE_PTHREADS
++ else
++ pthread_setspecific (gomp_thread_destructor, thr);
++#endif
++}
++
++/* The format of data is:
++ data[0] cnt
++ data[1] size
++ data[2] alignment (on output array pointer)
++ data[3] allocator (-1 if malloc allocator)
++ data[4] next pointer
++ data[5] used internally (htab pointer)
++ data[6] used internally (end of array)
++ cnt times
++ ent[0] address
++ ent[1] offset
++ ent[2] used internally (pointer to data[0])
++ The entries are sorted by increasing offset, so that a binary
++ search can be performed. Normally, data[8] is 0, exception is
++ for worksharing construct task reductions in cancellable parallel,
++ where at offset 0 there should be space for a pointer and an integer
++ which are used internally. */
++
++void
++GOMP_taskgroup_reduction_register (uintptr_t *data)
++{
++ struct gomp_thread *thr = gomp_thread ();
++ struct gomp_team *team = thr->ts.team;
++ struct gomp_task *task;
++ unsigned nthreads;
++ if (__builtin_expect (team == NULL, 0))
++ {
++ /* The task reduction code needs a team and task, so for
++ orphaned taskgroups just create the implicit team. */
++ gomp_create_artificial_team ();
++ ialias_call (GOMP_taskgroup_start) ();
++ team = thr->ts.team;
++ }
++ nthreads = team->nthreads;
++ task = thr->task;
++ gomp_reduction_register (data, task->taskgroup->reductions, NULL, nthreads);
++ task->taskgroup->reductions = data;
++}
++
++void
++GOMP_taskgroup_reduction_unregister (uintptr_t *data)
++{
++ uintptr_t *d = data;
++ htab_free ((struct htab *) data[5]);
++ do
++ {
++ gomp_aligned_free ((void *) d[2]);
++ d = (uintptr_t *) d[4];
++ }
++ while (d && !d[5]);
++}
++ialias (GOMP_taskgroup_reduction_unregister)
++
++/* For i = 0 to cnt-1, remap ptrs[i] which is either address of the
++ original list item or address of previously remapped original list
++ item to address of the private copy, store that to ptrs[i].
++ For i < cntorig, additionally set ptrs[cnt+i] to the address of
++ the original list item. */
++
++void
++GOMP_task_reduction_remap (size_t cnt, size_t cntorig, void **ptrs)
++{
++ struct gomp_thread *thr = gomp_thread ();
++ struct gomp_task *task = thr->task;
++ unsigned id = thr->ts.team_id;
++ uintptr_t *data = task->taskgroup->reductions;
++ uintptr_t *d;
++ struct htab *reduction_htab = (struct htab *) data[5];
++ size_t i;
++ for (i = 0; i < cnt; ++i)
++ {
++ hash_entry_type ent, n;
++ __asm ("" : "=g" (ent) : "0" (ptrs + i));
++ n = htab_find (reduction_htab, ent);
++ if (n)
++ {
++ uintptr_t *p;
++ __asm ("" : "=g" (p) : "0" (n));
++ /* At this point, p[0] should be equal to (uintptr_t) ptrs[i],
++ p[1] is the offset within the allocated chunk for each
++ thread, p[2] is the array registered with
++ GOMP_taskgroup_reduction_register, d[2] is the base of the
++ allocated memory and d[1] is the size of the allocated chunk
++ for one thread. */
++ d = (uintptr_t *) p[2];
++ ptrs[i] = (void *) (d[2] + id * d[1] + p[1]);
++ if (__builtin_expect (i < cntorig, 0))
++ ptrs[cnt + i] = (void *) p[0];
++ continue;
++ }
++ d = data;
++ while (d != NULL)
++ {
++ if ((uintptr_t) ptrs[i] >= d[2] && (uintptr_t) ptrs[i] < d[6])
++ break;
++ d = (uintptr_t *) d[4];
++ }
++ if (d == NULL)
++ gomp_fatal ("couldn't find matching task_reduction or reduction with "
++ "task modifier for %p", ptrs[i]);
++ uintptr_t off = ((uintptr_t) ptrs[i] - d[2]) % d[1];
++ ptrs[i] = (void *) (d[2] + id * d[1] + off);
++ if (__builtin_expect (i < cntorig, 0))
++ {
++ size_t lo = 0, hi = d[0] - 1;
++ while (lo <= hi)
++ {
++ size_t m = (lo + hi) / 2;
++ if (d[7 + 3 * m + 1] < off)
++ lo = m + 1;
++ else if (d[7 + 3 * m + 1] == off)
++ {
++ ptrs[cnt + i] = (void *) d[7 + 3 * m];
++ break;
++ }
++ else
++ hi = m - 1;
++ }
++ if (lo > hi)
++ gomp_fatal ("couldn't find matching task_reduction or reduction "
++ "with task modifier for %p", ptrs[i]);
++ }
++ }
++}
++
++struct gomp_taskgroup *
++gomp_parallel_reduction_register (uintptr_t *data, unsigned nthreads)
++{
++ struct gomp_taskgroup *taskgroup = gomp_taskgroup_init (NULL);
++ gomp_reduction_register (data, NULL, NULL, nthreads);
++ taskgroup->reductions = data;
++ return taskgroup;
++}
++
++void
++gomp_workshare_task_reduction_register (uintptr_t *data, uintptr_t *orig)
++{
++ struct gomp_thread *thr = gomp_thread ();
++ struct gomp_team *team = thr->ts.team;
++ struct gomp_task *task = thr->task;
++ unsigned nthreads = team->nthreads;
++ gomp_reduction_register (data, task->taskgroup->reductions, orig, nthreads);
++ task->taskgroup->reductions = data;
++}
++
++void
++gomp_workshare_taskgroup_start (void)
++{
++ struct gomp_thread *thr = gomp_thread ();
++ struct gomp_team *team = thr->ts.team;
++ struct gomp_task *task;
++
++ if (team == NULL)
++ {
++ gomp_create_artificial_team ();
++ team = thr->ts.team;
++ }
++ task = thr->task;
++ task->taskgroup = gomp_taskgroup_init (task->taskgroup);
++ task->taskgroup->workshare = true;
++}
++
++void
++GOMP_workshare_task_reduction_unregister (bool cancelled)
++{
++ struct gomp_thread *thr = gomp_thread ();
++ struct gomp_task *task = thr->task;
++ struct gomp_team *team = thr->ts.team;
++ uintptr_t *data = task->taskgroup->reductions;
++ ialias_call (GOMP_taskgroup_end) ();
++ if (thr->ts.team_id == 0)
++ ialias_call (GOMP_taskgroup_reduction_unregister) (data);
++ else
++ htab_free ((struct htab *) data[5]);
++
++ if (!cancelled)
++ gomp_team_barrier_wait (&team->barrier);
++}
++
+ int
+ omp_in_final (void)
+ {
+--- libgomp/team.c.jj 2018-04-25 09:40:31.322655307 +0200
++++ libgomp/team.c 2019-05-07 18:46:36.548109384 +0200
+@@ -32,7 +32,6 @@
+ #include
+
+ #ifdef LIBGOMP_USE_PTHREADS
+-/* This attribute contains PTHREAD_CREATE_DETACHED. */
+ pthread_attr_t gomp_thread_attr;
+
+ /* This key is for the thread destructor. */
+@@ -58,6 +57,7 @@ struct gomp_thread_start_data
+ struct gomp_thread_pool *thread_pool;
+ unsigned int place;
+ bool nested;
++ pthread_t handle;
+ };
+
+
+@@ -89,6 +89,9 @@ gomp_thread_start (void *xdata)
+ thr->ts = data->ts;
+ thr->task = data->task;
+ thr->place = data->place;
++#ifdef GOMP_NEEDS_THREAD_HANDLE
++ thr->handle = data->handle;
++#endif
+
+ thr->ts.team->ordered_release[thr->ts.team_id] = &thr->release;
+
+@@ -131,6 +134,7 @@ gomp_thread_start (void *xdata)
+ }
+
+ gomp_sem_destroy (&thr->release);
++ pthread_detach (pthread_self ());
+ thr->thread_pool = NULL;
+ thr->task = NULL;
+ return NULL;
+@@ -183,7 +187,7 @@ gomp_new_team (unsigned nthreads)
+ team->single_count = 0;
+ #endif
+ team->work_shares_to_free = &team->work_shares[0];
+- gomp_init_work_share (&team->work_shares[0], false, nthreads);
++ gomp_init_work_share (&team->work_shares[0], 0, nthreads);
+ team->work_shares[0].next_alloc = NULL;
+ team->work_share_list_free = NULL;
+ team->work_share_list_alloc = &team->work_shares[1];
+@@ -231,6 +235,7 @@ gomp_free_pool_helper (void *thread_pool
+ thr->thread_pool = NULL;
+ thr->task = NULL;
+ #ifdef LIBGOMP_USE_PTHREADS
++ pthread_detach (pthread_self ());
+ pthread_exit (NULL);
+ #elif defined(__nvptx__)
+ asm ("exit;");
+@@ -297,7 +302,8 @@ gomp_free_thread (void *arg __attribute_
+ #ifdef LIBGOMP_USE_PTHREADS
+ void
+ gomp_team_start (void (*fn) (void *), void *data, unsigned nthreads,
+- unsigned flags, struct gomp_team *team)
++ unsigned flags, struct gomp_team *team,
++ struct gomp_taskgroup *taskgroup)
+ {
+ struct gomp_thread_start_data *start_data;
+ struct gomp_thread *thr, *nthr;
+@@ -312,6 +318,7 @@ gomp_team_start (void (*fn) (void *), vo
+ unsigned int s = 0, rest = 0, p = 0, k = 0;
+ unsigned int affinity_count = 0;
+ struct gomp_thread **affinity_thr = NULL;
++ bool force_display = false;
+
+ thr = gomp_thread ();
+ nested = thr->ts.level;
+@@ -319,7 +326,12 @@ gomp_team_start (void (*fn) (void *), vo
+ task = thr->task;
+ icv = task ? &task->icv : &gomp_global_icv;
+ if (__builtin_expect (gomp_places_list != NULL, 0) && thr->place == 0)
+- gomp_init_affinity ();
++ {
++ gomp_init_affinity ();
++ if (__builtin_expect (gomp_display_affinity_var, 0) && nthreads == 1)
++ gomp_display_affinity_thread (gomp_thread_self (), &thr->ts,
++ thr->place);
++ }
+
+ /* Always save the previous state, even if this isn't a nested team.
+ In particular, we should save any work share state from an outer
+@@ -338,6 +350,9 @@ gomp_team_start (void (*fn) (void *), vo
+ #endif
+ thr->ts.static_trip = 0;
+ thr->task = &team->implicit_task[0];
++#ifdef GOMP_NEEDS_THREAD_HANDLE
++ thr->handle = pthread_self ();
++#endif
+ nthreads_var = icv->nthreads_var;
+ if (__builtin_expect (gomp_nthreads_var_list != NULL, 0)
+ && thr->ts.level < gomp_nthreads_var_list_len)
+@@ -350,6 +365,7 @@ gomp_team_start (void (*fn) (void *), vo
+ && thr->ts.level < gomp_bind_var_list_len)
+ bind_var = gomp_bind_var_list[thr->ts.level];
+ gomp_init_task (thr->task, task, icv);
++ thr->task->taskgroup = taskgroup;
+ team->implicit_task[0].icv.nthreads_var = nthreads_var;
+ team->implicit_task[0].icv.bind_var = bind_var;
+
+@@ -465,7 +481,9 @@ gomp_team_start (void (*fn) (void *), vo
+ pool->threads
+ = gomp_realloc (pool->threads,
+ pool->threads_size
+- * sizeof (struct gomp_thread_data *));
++ * sizeof (struct gomp_thread *));
++ /* Add current (master) thread to threads[]. */
++ pool->threads[0] = thr;
+ }
+
+ /* Release existing idle threads. */
+@@ -540,6 +558,7 @@ gomp_team_start (void (*fn) (void *), vo
+ + place_partition_len))
+ {
+ unsigned int l;
++ force_display = true;
+ if (affinity_thr == NULL)
+ {
+ unsigned int j;
+@@ -623,6 +642,7 @@ gomp_team_start (void (*fn) (void *), vo
+ gomp_init_task (nthr->task, task, icv);
+ team->implicit_task[i].icv.nthreads_var = nthreads_var;
+ team->implicit_task[i].icv.bind_var = bind_var;
++ nthr->task->taskgroup = taskgroup;
+ nthr->fn = fn;
+ nthr->data = data;
+ team->ordered_release[i] = &nthr->release;
+@@ -712,19 +732,17 @@ gomp_team_start (void (*fn) (void *), vo
+ {
+ size_t stacksize;
+ pthread_attr_init (&thread_attr);
+- pthread_attr_setdetachstate (&thread_attr, PTHREAD_CREATE_DETACHED);
+ if (! pthread_attr_getstacksize (&gomp_thread_attr, &stacksize))
+ pthread_attr_setstacksize (&thread_attr, stacksize);
+ attr = &thread_attr;
+ }
+
+ start_data = gomp_alloca (sizeof (struct gomp_thread_start_data)
+- * (nthreads-i));
++ * (nthreads - i));
+
+ /* Launch new threads. */
+ for (; i < nthreads; ++i)
+ {
+- pthread_t pt;
+ int err;
+
+ start_data->ts.place_partition_off = thr->ts.place_partition_off;
+@@ -810,11 +828,14 @@ gomp_team_start (void (*fn) (void *), vo
+ gomp_init_task (start_data->task, task, icv);
+ team->implicit_task[i].icv.nthreads_var = nthreads_var;
+ team->implicit_task[i].icv.bind_var = bind_var;
++ start_data->task->taskgroup = taskgroup;
+ start_data->thread_pool = pool;
+ start_data->nested = nested;
+
+ attr = gomp_adjust_thread_attr (attr, &thread_attr);
+- err = pthread_create (&pt, attr, gomp_thread_start, start_data++);
++ err = pthread_create (&start_data->handle, attr, gomp_thread_start,
++ start_data);
++ start_data++;
+ if (err != 0)
+ gomp_fatal ("Thread creation failed: %s", strerror (err));
+ }
+@@ -854,6 +875,42 @@ gomp_team_start (void (*fn) (void *), vo
+ gomp_mutex_unlock (&gomp_managed_threads_lock);
+ #endif
+ }
++ if (__builtin_expect (gomp_display_affinity_var, 0))
++ {
++ if (nested
++ || nthreads != old_threads_used
++ || force_display)
++ {
++ gomp_display_affinity_thread (gomp_thread_self (), &thr->ts,
++ thr->place);
++ if (nested)
++ {
++ start_data -= nthreads - 1;
++ for (i = 1; i < nthreads; ++i)
++ {
++ gomp_display_affinity_thread (
++#ifdef LIBGOMP_USE_PTHREADS
++ start_data->handle,
++#else
++ gomp_thread_self (),
++#endif
++ &start_data->ts,
++ start_data->place);
++ start_data++;
++ }
++ }
++ else
++ {
++ for (i = 1; i < nthreads; ++i)
++ {
++ gomp_thread_handle handle
++ = gomp_thread_to_pthread_t (pool->threads[i]);
++ gomp_display_affinity_thread (handle, &pool->threads[i]->ts,
++ pool->threads[i]->place);
++ }
++ }
++ }
++ }
+ if (__builtin_expect (affinity_thr != NULL, 0)
+ && team->prev_ts.place_partition_len > 64)
+ free (affinity_thr);
+@@ -894,7 +951,7 @@ gomp_team_end (void)
+ gomp_end_task ();
+ thr->ts = team->prev_ts;
+
+- if (__builtin_expect (thr->ts.team != NULL, 0))
++ if (__builtin_expect (thr->ts.level != 0, 0))
+ {
+ #ifdef HAVE_SYNC_BUILTINS
+ __sync_fetch_and_add (&gomp_managed_threads, 1L - team->nthreads);
+@@ -959,6 +1016,76 @@ team_destructor (void)
+ crashes. */
+ pthread_key_delete (gomp_thread_destructor);
+ }
++
++/* Similar to gomp_free_pool_helper, but don't detach itself,
++ gomp_pause_host will pthread_join those threads. */
++
++static void
++gomp_pause_pool_helper (void *thread_pool)
++{
++ struct gomp_thread *thr = gomp_thread ();
++ struct gomp_thread_pool *pool
++ = (struct gomp_thread_pool *) thread_pool;
++ gomp_simple_barrier_wait_last (&pool->threads_dock);
++ gomp_sem_destroy (&thr->release);
++ thr->thread_pool = NULL;
++ thr->task = NULL;
++ pthread_exit (NULL);
++}
++
++/* Free a thread pool and release its threads. Return non-zero on
++ failure. */
++
++int
++gomp_pause_host (void)
++{
++ struct gomp_thread *thr = gomp_thread ();
++ struct gomp_thread_pool *pool = thr->thread_pool;
++ if (thr->ts.level)
++ return -1;
++ if (pool)
++ {
++ if (pool->threads_used > 0)
++ {
++ int i;
++ pthread_t *thrs
++ = gomp_alloca (sizeof (pthread_t) * pool->threads_used);
++ for (i = 1; i < pool->threads_used; i++)
++ {
++ struct gomp_thread *nthr = pool->threads[i];
++ nthr->fn = gomp_pause_pool_helper;
++ nthr->data = pool;
++ thrs[i] = gomp_thread_to_pthread_t (nthr);
++ }
++ /* This barrier undocks threads docked on pool->threads_dock. */
++ gomp_simple_barrier_wait (&pool->threads_dock);
++ /* And this waits till all threads have called gomp_barrier_wait_last
++ in gomp_pause_pool_helper. */
++ gomp_simple_barrier_wait (&pool->threads_dock);
++ /* Now it is safe to destroy the barrier and free the pool. */
++ gomp_simple_barrier_destroy (&pool->threads_dock);
++
++#ifdef HAVE_SYNC_BUILTINS
++ __sync_fetch_and_add (&gomp_managed_threads,
++ 1L - pool->threads_used);
++#else
++ gomp_mutex_lock (&gomp_managed_threads_lock);
++ gomp_managed_threads -= pool->threads_used - 1L;
++ gomp_mutex_unlock (&gomp_managed_threads_lock);
++#endif
++ for (i = 1; i < pool->threads_used; i++)
++ pthread_join (thrs[i], NULL);
++ }
++ if (pool->last_team)
++ free_team (pool->last_team);
++#ifndef __nvptx__
++ free (pool->threads);
++ free (pool);
++#endif
++ thr->thread_pool = NULL;
++ }
++ return 0;
++}
+ #endif
+
+ struct gomp_task_icv *
+--- libgomp/libgomp.h.jj 2018-04-25 09:40:31.925655587 +0200
++++ libgomp/libgomp.h 2019-05-07 19:01:51.285535999 +0200
+@@ -44,6 +44,7 @@
+ #include "config.h"
+ #include "gstdint.h"
+ #include "libgomp-plugin.h"
++#include "gomp-constants.h"
+
+ #ifdef HAVE_PTHREAD_H
+ #include
+@@ -85,9 +86,21 @@ enum memmodel
+
+ /* alloc.c */
+
++#if defined(HAVE_ALIGNED_ALLOC) \
++ || defined(HAVE__ALIGNED_MALLOC) \
++ || defined(HAVE_POSIX_MEMALIGN) \
++ || defined(HAVE_MEMALIGN)
++/* Defined if gomp_aligned_alloc doesn't use fallback version
++ and free can be used instead of gomp_aligned_free. */
++#define GOMP_HAVE_EFFICIENT_ALIGNED_ALLOC 1
++#endif
++
+ extern void *gomp_malloc (size_t) __attribute__((malloc));
+ extern void *gomp_malloc_cleared (size_t) __attribute__((malloc));
+ extern void *gomp_realloc (void *, size_t);
++extern void *gomp_aligned_alloc (size_t, size_t)
++ __attribute__((malloc, alloc_size (2)));
++extern void gomp_aligned_free (void *);
+
+ /* Avoid conflicting prototypes of alloca() in system headers by using
+ GCC's builtin alloca(). */
+@@ -137,7 +150,8 @@ enum gomp_schedule_type
+ GFS_STATIC,
+ GFS_DYNAMIC,
+ GFS_GUIDED,
+- GFS_AUTO
++ GFS_AUTO,
++ GFS_MONOTONIC = 0x80000000U
+ };
+
+ struct gomp_doacross_work_share
+@@ -174,6 +188,8 @@ struct gomp_doacross_work_share
+ /* Likewise, but for the ull implementation. */
+ unsigned long long boundary_ull;
+ };
++ /* Pointer to extra memory if needed for lastprivate(conditional). */
++ void *extra;
+ /* Array of shift counts for each dimension if they can be flattened. */
+ unsigned int shift_counts[];
+ };
+@@ -275,6 +291,9 @@ struct gomp_work_share
+ struct gomp_work_share *next_free;
+ };
+
++ /* Task reductions for this work-sharing construct. */
++ uintptr_t *task_reductions;
++
+ /* If only few threads are in the team, ordered_team_ids can point
+ to this array which fills the padding at the end of this struct. */
+ unsigned inline_ordered_team_ids[0];
+@@ -365,8 +384,12 @@ extern void **gomp_places_list;
+ extern unsigned long gomp_places_list_len;
+ extern unsigned int gomp_num_teams_var;
+ extern int gomp_debug_var;
++extern bool gomp_display_affinity_var;
++extern char *gomp_affinity_format_var;
++extern size_t gomp_affinity_format_len;
+ extern int goacc_device_num;
+ extern char *goacc_device_type;
++extern int goacc_default_dims[GOMP_DIM_MAX];
+
+ enum gomp_task_kind
+ {
+@@ -469,8 +492,10 @@ struct gomp_taskgroup
+ struct gomp_taskgroup *prev;
+ /* Queue of tasks that belong in this taskgroup. */
+ struct priority_queue taskgroup_queue;
++ uintptr_t *reductions;
+ bool in_taskgroup_wait;
+ bool cancelled;
++ bool workshare;
+ gomp_sem_t taskgroup_sem;
+ size_t num_children;
+ };
+@@ -613,6 +638,19 @@ struct gomp_thread
+
+ /* User pthread thread pool */
+ struct gomp_thread_pool *thread_pool;
++
++#if defined(LIBGOMP_USE_PTHREADS) \
++ && (!defined(HAVE_TLS) \
++ || !defined(__GLIBC__) \
++ || !defined(USING_INITIAL_EXEC_TLS))
++ /* pthread_t of the thread containing this gomp_thread.
++ On Linux when using initial-exec TLS,
++ (typeof (pthread_t)) gomp_thread () - pthread_self ()
++ is constant in all threads, so we can optimize and not
++ store it. */
++#define GOMP_NEEDS_THREAD_HANDLE 1
++ pthread_t handle;
++#endif
+ };
+
+
+@@ -709,6 +747,25 @@ extern bool gomp_affinity_finalize_place
+ extern bool gomp_affinity_init_level (int, unsigned long, bool);
+ extern void gomp_affinity_print_place (void *);
+ extern void gomp_get_place_proc_ids_8 (int, int64_t *);
++extern void gomp_display_affinity_place (char *, size_t, size_t *, int);
++
++/* affinity-fmt.c */
++
++extern void gomp_print_string (const char *str, size_t len);
++extern void gomp_set_affinity_format (const char *, size_t);
++extern void gomp_display_string (char *, size_t, size_t *, const char *,
++ size_t);
++#ifdef LIBGOMP_USE_PTHREADS
++typedef pthread_t gomp_thread_handle;
++#else
++typedef struct {} gomp_thread_handle;
++#endif
++extern size_t gomp_display_affinity (char *, size_t, const char *,
++ gomp_thread_handle,
++ struct gomp_team_state *, unsigned int);
++extern void gomp_display_affinity_thread (gomp_thread_handle,
++ struct gomp_team_state *,
++ unsigned int) __attribute__((cold));
+
+ /* iter.c */
+
+@@ -745,9 +802,9 @@ extern void gomp_ordered_next (void);
+ extern void gomp_ordered_static_init (void);
+ extern void gomp_ordered_static_next (void);
+ extern void gomp_ordered_sync (void);
+-extern void gomp_doacross_init (unsigned, long *, long);
++extern void gomp_doacross_init (unsigned, long *, long, size_t);
+ extern void gomp_doacross_ull_init (unsigned, unsigned long long *,
+- unsigned long long);
++ unsigned long long, size_t);
+
+ /* parallel.c */
+
+@@ -770,6 +827,10 @@ extern bool gomp_create_target_task (str
+ size_t *, unsigned short *, unsigned int,
+ void **, void **,
+ enum gomp_target_task_state);
++extern struct gomp_taskgroup *gomp_parallel_reduction_register (uintptr_t *,
++ unsigned);
++extern void gomp_workshare_taskgroup_start (void);
++extern void gomp_workshare_task_reduction_register (uintptr_t *, uintptr_t *);
+
+ static void inline
+ gomp_finish_task (struct gomp_task *task)
+@@ -782,9 +843,11 @@ gomp_finish_task (struct gomp_task *task
+
+ extern struct gomp_team *gomp_new_team (unsigned);
+ extern void gomp_team_start (void (*) (void *), void *, unsigned,
+- unsigned, struct gomp_team *);
++ unsigned, struct gomp_team *,
++ struct gomp_taskgroup *);
+ extern void gomp_team_end (void);
+ extern void gomp_free_thread (void *);
++extern int gomp_pause_host (void);
+
+ /* target.c */
+
+@@ -851,6 +914,8 @@ struct splay_tree_key_s {
+ uintptr_t tgt_offset;
+ /* Reference count. */
+ uintptr_t refcount;
++ /* Dynamic reference count. */
++ uintptr_t dynamic_refcount;
+ /* Pointer to the original mapping of "omp declare target link" object. */
+ splay_tree_key link_key;
+ };
+@@ -989,7 +1054,9 @@ enum gomp_map_vars_kind
+ };
+
+ extern void gomp_acc_insert_pointer (size_t, void **, size_t *, void *);
+-extern void gomp_acc_remove_pointer (void *, bool, int, int);
++extern void gomp_acc_remove_pointer (void *, size_t, bool, int, int, int);
++extern void gomp_acc_declare_allocate (bool, size_t, void **, size_t *,
++ unsigned short *);
+
+ extern struct target_mem_desc *gomp_map_vars (struct gomp_device_descr *,
+ size_t, void **, void **,
+@@ -999,12 +1066,13 @@ extern void gomp_unmap_vars (struct targ
+ extern void gomp_init_device (struct gomp_device_descr *);
+ extern void gomp_free_memmap (struct splay_tree_s *);
+ extern void gomp_unload_device (struct gomp_device_descr *);
++extern bool gomp_remove_var (struct gomp_device_descr *, splay_tree_key);
+
+ /* work.c */
+
+-extern void gomp_init_work_share (struct gomp_work_share *, bool, unsigned);
++extern void gomp_init_work_share (struct gomp_work_share *, size_t, unsigned);
+ extern void gomp_fini_work_share (struct gomp_work_share *);
+-extern bool gomp_work_share_start (bool);
++extern bool gomp_work_share_start (size_t);
+ extern void gomp_work_share_end (void);
+ extern bool gomp_work_share_end_cancel (void);
+ extern void gomp_work_share_end_nowait (void);
+@@ -1028,6 +1096,14 @@ gomp_work_share_init_done (void)
+ #include "omp-lock.h"
+ #define _LIBGOMP_OMP_LOCK_DEFINED 1
+ #include "omp.h.in"
++#define omp_sched_monotonic 0x80000000U
++typedef enum omp_pause_resource_t
++{
++ omp_pause_soft = 1,
++ omp_pause_hard = 2
++} omp_pause_resource_t;
++extern int omp_pause_resource (omp_pause_resource_t, int) __GOMP_NOTHROW;
++extern int omp_pause_resource_all (omp_pause_resource_t) __GOMP_NOTHROW;
+
+ #if !defined (HAVE_ATTRIBUTE_VISIBILITY) \
+ || !defined (HAVE_ATTRIBUTE_ALIAS) \
+@@ -1082,16 +1158,26 @@ extern int gomp_test_nest_lock_25 (omp_n
+ # define attribute_hidden
+ #endif
+
++#if __GNUC__ >= 9
++# define HAVE_ATTRIBUTE_COPY
++#endif
++
++#ifdef HAVE_ATTRIBUTE_COPY
++# define attribute_copy(arg) __attribute__ ((copy (arg)))
++#else
++# define attribute_copy(arg)
++#endif
++
+ #ifdef HAVE_ATTRIBUTE_ALIAS
+ # define strong_alias(fn, al) \
+- extern __typeof (fn) al __attribute__ ((alias (#fn)));
++ extern __typeof (fn) al __attribute__ ((alias (#fn))) attribute_copy (fn);
+
+ # define ialias_ulp ialias_str1(__USER_LABEL_PREFIX__)
+ # define ialias_str1(x) ialias_str2(x)
+ # define ialias_str2(x) #x
+ # define ialias(fn) \
+ extern __typeof (fn) gomp_ialias_##fn \
+- __attribute__ ((alias (#fn))) attribute_hidden;
++ __attribute__ ((alias (#fn))) attribute_hidden attribute_copy (fn);
+ # define ialias_redirect(fn) \
+ extern __typeof (fn) fn __asm__ (ialias_ulp "gomp_ialias_" #fn) attribute_hidden;
+ # define ialias_call(fn) gomp_ialias_ ## fn
+@@ -1131,4 +1217,42 @@ task_to_priority_node (enum priority_que
+ return (struct priority_node *) ((char *) task
+ + priority_queue_offset (type));
+ }
++
++#ifdef LIBGOMP_USE_PTHREADS
++static inline gomp_thread_handle
++gomp_thread_self (void)
++{
++ return pthread_self ();
++}
++
++static inline gomp_thread_handle
++gomp_thread_to_pthread_t (struct gomp_thread *thr)
++{
++ struct gomp_thread *this_thr = gomp_thread ();
++ if (thr == this_thr)
++ return pthread_self ();
++#ifdef GOMP_NEEDS_THREAD_HANDLE
++ return thr->handle;
++#else
++ /* On Linux with initial-exec TLS, the pthread_t of the thread containing
++ thr can be computed from thr, this_thr and pthread_self (),
++ as the distance between this_thr and pthread_self () is constant. */
++ return pthread_self () + ((uintptr_t) thr - (uintptr_t) this_thr);
++#endif
++}
++#else
++static inline gomp_thread_handle
++gomp_thread_self (void)
++{
++ return (gomp_thread_handle) {};
++}
++
++static inline gomp_thread_handle
++gomp_thread_to_pthread_t (struct gomp_thread *thr)
++{
++ (void) thr;
++ return gomp_thread_self ();
++}
++#endif
++
+ #endif /* LIBGOMP_H */
+--- libgomp/oacc-parallel.c.jj 2018-04-25 09:40:31.319655306 +0200
++++ libgomp/oacc-parallel.c 2019-05-07 19:09:47.010991153 +0200
+@@ -27,6 +27,8 @@
+ /* This file handles OpenACC constructs. */
+
+ #include "openacc.h"
++void acc_copyout_finalize (void *, size_t) __GOACC_NOTHROW;
++void acc_delete_finalize (void *, size_t) __GOACC_NOTHROW;
+ #include "libgomp.h"
+ #include "libgomp_g.h"
+ #include "gomp-constants.h"
+@@ -38,31 +40,95 @@
+ #include
+ #include
+
++
++/* In the ABI, the GOACC_FLAGs are encoded as an inverted bitmask, so that we
++ continue to support the following two legacy values. */
++_Static_assert (GOACC_FLAGS_UNMARSHAL (GOMP_DEVICE_ICV) == 0,
++ "legacy GOMP_DEVICE_ICV broken");
++_Static_assert (GOACC_FLAGS_UNMARSHAL (GOMP_DEVICE_HOST_FALLBACK)
++ == GOACC_FLAG_HOST_FALLBACK,
++ "legacy GOMP_DEVICE_HOST_FALLBACK broken");
++
++
++/* Returns the number of mappings associated with the pointer or pset. PSET
++ have three mappings, whereas pointer have two. */
++
+ static int
+-find_pset (int pos, size_t mapnum, unsigned short *kinds)
++find_pointer (int pos, size_t mapnum, unsigned short *kinds)
+ {
+ if (pos + 1 >= mapnum)
+ return 0;
+
+ unsigned char kind = kinds[pos+1] & 0xff;
+
+- return kind == GOMP_MAP_TO_PSET;
++ if (kind == GOMP_MAP_TO_PSET)
++ return 3;
++ else if (kind == GOMP_MAP_POINTER)
++ return 2;
++
++ return 0;
++}
++
++/* Handle the mapping pair that are presented when a
++ deviceptr clause is used with Fortran. */
++
++static void
++handle_ftn_pointers (size_t mapnum, void **hostaddrs, size_t *sizes,
++ unsigned short *kinds)
++{
++ int i;
++
++ for (i = 0; i < mapnum; i++)
++ {
++ unsigned short kind1 = kinds[i] & 0xff;
++
++ /* Handle Fortran deviceptr clause. */
++ if (kind1 == GOMP_MAP_FORCE_DEVICEPTR)
++ {
++ unsigned short kind2;
++
++ if (i < (signed)mapnum - 1)
++ kind2 = kinds[i + 1] & 0xff;
++ else
++ kind2 = 0xffff;
++
++ if (sizes[i] == sizeof (void *))
++ continue;
++
++ /* At this point, we're dealing with a Fortran deviceptr.
++ If the next element is not what we're expecting, then
++ this is an instance of where the deviceptr variable was
++ not used within the region and the pointer was removed
++ by the gimplifier. */
++ if (kind2 == GOMP_MAP_POINTER
++ && sizes[i + 1] == 0
++ && hostaddrs[i] == *(void **)hostaddrs[i + 1])
++ {
++ kinds[i+1] = kinds[i];
++ sizes[i+1] = sizeof (void *);
++ }
++
++ /* Invalidate the entry. */
++ hostaddrs[i] = NULL;
++ }
++ }
+ }
+
+ static void goacc_wait (int async, int num_waits, va_list *ap);
+
+
+-/* Launch a possibly offloaded function on DEVICE. FN is the host fn
++/* Launch a possibly offloaded function with FLAGS. FN is the host fn
+ address. MAPNUM, HOSTADDRS, SIZES & KINDS describe the memory
+ blocks to be copied to/from the device. Varadic arguments are
+ keyed optional parameters terminated with a zero. */
+
+ void
+-GOACC_parallel_keyed (int device, void (*fn) (void *),
++GOACC_parallel_keyed (int flags_m, void (*fn) (void *),
+ size_t mapnum, void **hostaddrs, size_t *sizes,
+ unsigned short *kinds, ...)
+ {
+- bool host_fallback = device == GOMP_DEVICE_HOST_FALLBACK;
++ int flags = GOACC_FLAGS_UNMARSHAL (flags_m);
++
+ va_list ap;
+ struct goacc_thread *thr;
+ struct gomp_device_descr *acc_dev;
+@@ -88,9 +154,11 @@ GOACC_parallel_keyed (int device, void (
+ thr = goacc_thread ();
+ acc_dev = thr->dev;
+
++ handle_ftn_pointers (mapnum, hostaddrs, sizes, kinds);
++
+ /* Host fallback if "if" clause is false or if the current device is set to
+ the host. */
+- if (host_fallback)
++ if (flags & GOACC_FLAG_HOST_FALLBACK)
+ {
+ goacc_save_and_set_bind (acc_device_host);
+ fn (hostaddrs);
+@@ -140,9 +208,7 @@ GOACC_parallel_keyed (int device, void (
+ case GOMP_LAUNCH_WAIT:
+ {
+ unsigned num_waits = GOMP_LAUNCH_OP (tag);
+-
+- if (num_waits)
+- goacc_wait (async, num_waits, &ap);
++ goacc_wait (async, num_waits, &ap);
+ break;
+ }
+
+@@ -177,16 +243,36 @@ GOACC_parallel_keyed (int device, void (
+ devaddrs = gomp_alloca (sizeof (void *) * mapnum);
+ for (i = 0; i < mapnum; i++)
+ devaddrs[i] = (void *) (tgt->list[i].key->tgt->tgt_start
+- + tgt->list[i].key->tgt_offset);
++ + tgt->list[i].key->tgt_offset
++ + tgt->list[i].offset);
+
+ acc_dev->openacc.exec_func (tgt_fn, mapnum, hostaddrs, devaddrs,
+ async, dims, tgt);
+
+ /* If running synchronously, unmap immediately. */
+- if (async < acc_async_noval)
++ bool copyfrom = true;
++ if (async_synchronous_p (async))
+ gomp_unmap_vars (tgt, true);
+ else
+- tgt->device_descr->openacc.register_async_cleanup_func (tgt, async);
++ {
++ bool async_unmap = false;
++ for (size_t i = 0; i < tgt->list_count; i++)
++ {
++ splay_tree_key k = tgt->list[i].key;
++ if (k && k->refcount == 1)
++ {
++ async_unmap = true;
++ break;
++ }
++ }
++ if (async_unmap)
++ tgt->device_descr->openacc.register_async_cleanup_func (tgt, async);
++ else
++ {
++ copyfrom = false;
++ gomp_unmap_vars (tgt, copyfrom);
++ }
++ }
+
+ acc_dev->openacc.async_set_async_func (acc_async_sync);
+ }
+@@ -194,7 +280,7 @@ GOACC_parallel_keyed (int device, void (
+ /* Legacy entry point, only provide host execution. */
+
+ void
+-GOACC_parallel (int device, void (*fn) (void *),
++GOACC_parallel (int flags_m, void (*fn) (void *),
+ size_t mapnum, void **hostaddrs, size_t *sizes,
+ unsigned short *kinds,
+ int num_gangs, int num_workers, int vector_length,
+@@ -206,10 +292,11 @@ GOACC_parallel (int device, void (*fn) (
+ }
+
+ void
+-GOACC_data_start (int device, size_t mapnum,
++GOACC_data_start (int flags_m, size_t mapnum,
+ void **hostaddrs, size_t *sizes, unsigned short *kinds)
+ {
+- bool host_fallback = device == GOMP_DEVICE_HOST_FALLBACK;
++ int flags = GOACC_FLAGS_UNMARSHAL (flags_m);
++
+ struct target_mem_desc *tgt;
+
+ #ifdef HAVE_INTTYPES_H
+@@ -227,7 +314,7 @@ GOACC_data_start (int device, size_t map
+
+ /* Host fallback or 'do nothing'. */
+ if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
+- || host_fallback)
++ || (flags & GOACC_FLAG_HOST_FALLBACK))
+ {
+ tgt = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, true,
+ GOMP_MAP_VARS_OPENACC);
+@@ -258,13 +345,14 @@ GOACC_data_end (void)
+ }
+
+ void
+-GOACC_enter_exit_data (int device, size_t mapnum,
++GOACC_enter_exit_data (int flags_m, size_t mapnum,
+ void **hostaddrs, size_t *sizes, unsigned short *kinds,
+ int async, int num_waits, ...)
+ {
++ int flags = GOACC_FLAGS_UNMARSHAL (flags_m);
++
+ struct goacc_thread *thr;
+ struct gomp_device_descr *acc_dev;
+- bool host_fallback = device == GOMP_DEVICE_HOST_FALLBACK;
+ bool data_enter = false;
+ size_t i;
+
+@@ -274,7 +362,7 @@ GOACC_enter_exit_data (int device, size_
+ acc_dev = thr->dev;
+
+ if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
+- || host_fallback)
++ || (flags & GOACC_FLAG_HOST_FALLBACK))
+ return;
+
+ if (num_waits)
+@@ -286,6 +374,17 @@ GOACC_enter_exit_data (int device, size_
+ va_end (ap);
+ }
+
++ /* Determine whether "finalize" semantics apply to all mappings of this
++ OpenACC directive. */
++ bool finalize = false;
++ if (mapnum > 0)
++ {
++ unsigned char kind = kinds[0] & 0xff;
++ if (kind == GOMP_MAP_DELETE
++ || kind == GOMP_MAP_FORCE_FROM)
++ finalize = true;
++ }
++
+ acc_dev->openacc.async_set_async_func (async);
+
+ /* Determine if this is an "acc enter data". */
+@@ -298,13 +397,17 @@ GOACC_enter_exit_data (int device, size_
+
+ if (kind == GOMP_MAP_FORCE_ALLOC
+ || kind == GOMP_MAP_FORCE_PRESENT
+- || kind == GOMP_MAP_FORCE_TO)
++ || kind == GOMP_MAP_FORCE_TO
++ || kind == GOMP_MAP_TO
++ || kind == GOMP_MAP_ALLOC)
+ {
+ data_enter = true;
+ break;
+ }
+
+- if (kind == GOMP_MAP_DELETE
++ if (kind == GOMP_MAP_RELEASE
++ || kind == GOMP_MAP_DELETE
++ || kind == GOMP_MAP_FROM
+ || kind == GOMP_MAP_FORCE_FROM)
+ break;
+
+@@ -312,31 +415,35 @@ GOACC_enter_exit_data (int device, size_
+ kind);
+ }
+
++ /* In c, non-pointers and arrays are represented by a single data clause.
++ Dynamically allocated arrays and subarrays are represented by a data
++ clause followed by an internal GOMP_MAP_POINTER.
++
++ In fortran, scalars and not allocated arrays are represented by a
++ single data clause. Allocated arrays and subarrays have three mappings:
++ 1) the original data clause, 2) a PSET 3) a pointer to the array data.
++ */
++
+ if (data_enter)
+ {
+ for (i = 0; i < mapnum; i++)
+ {
+ unsigned char kind = kinds[i] & 0xff;
+
+- /* Scan for PSETs. */
+- int psets = find_pset (i, mapnum, kinds);
++ /* Scan for pointers and PSETs. */
++ int pointer = find_pointer (i, mapnum, kinds);
+
+- if (!psets)
++ if (!pointer)
+ {
+ switch (kind)
+ {
+- case GOMP_MAP_POINTER:
+- gomp_acc_insert_pointer (1, &hostaddrs[i], &sizes[i],
+- &kinds[i]);
+- break;
++ case GOMP_MAP_ALLOC:
+ case GOMP_MAP_FORCE_ALLOC:
+ acc_create (hostaddrs[i], sizes[i]);
+ break;
+- case GOMP_MAP_FORCE_PRESENT:
+- acc_present_or_copyin (hostaddrs[i], sizes[i]);
+- break;
++ case GOMP_MAP_TO:
+ case GOMP_MAP_FORCE_TO:
+- acc_present_or_copyin (hostaddrs[i], sizes[i]);
++ acc_copyin (hostaddrs[i], sizes[i]);
+ break;
+ default:
+ gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
+@@ -346,12 +453,13 @@ GOACC_enter_exit_data (int device, size_
+ }
+ else
+ {
+- gomp_acc_insert_pointer (3, &hostaddrs[i], &sizes[i], &kinds[i]);
++ gomp_acc_insert_pointer (pointer, &hostaddrs[i],
++ &sizes[i], &kinds[i]);
+ /* Increment 'i' by two because OpenACC requires fortran
+ arrays to be contiguous, so each PSET is associated with
+ one of MAP_FORCE_ALLOC/MAP_FORCE_PRESET/MAP_FORCE_TO, and
+ one MAP_POINTER. */
+- i += 2;
++ i += pointer - 1;
+ }
+ }
+ }
+@@ -360,22 +468,28 @@ GOACC_enter_exit_data (int device, size_
+ {
+ unsigned char kind = kinds[i] & 0xff;
+
+- int psets = find_pset (i, mapnum, kinds);
++ int pointer = find_pointer (i, mapnum, kinds);
+
+- if (!psets)
++ if (!pointer)
+ {
+ switch (kind)
+ {
+- case GOMP_MAP_POINTER:
+- gomp_acc_remove_pointer (hostaddrs[i], (kinds[i] & 0xff)
+- == GOMP_MAP_FORCE_FROM,
+- async, 1);
+- break;
++ case GOMP_MAP_RELEASE:
+ case GOMP_MAP_DELETE:
+- acc_delete (hostaddrs[i], sizes[i]);
++ if (acc_is_present (hostaddrs[i], sizes[i]))
++ {
++ if (finalize)
++ acc_delete_finalize (hostaddrs[i], sizes[i]);
++ else
++ acc_delete (hostaddrs[i], sizes[i]);
++ }
+ break;
++ case GOMP_MAP_FROM:
+ case GOMP_MAP_FORCE_FROM:
+- acc_copyout (hostaddrs[i], sizes[i]);
++ if (finalize)
++ acc_copyout_finalize (hostaddrs[i], sizes[i]);
++ else
++ acc_copyout (hostaddrs[i], sizes[i]);
+ break;
+ default:
+ gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
+@@ -385,10 +499,12 @@ GOACC_enter_exit_data (int device, size_
+ }
+ else
+ {
+- gomp_acc_remove_pointer (hostaddrs[i], (kinds[i] & 0xff)
+- == GOMP_MAP_FORCE_FROM, async, 3);
++ bool copyfrom = (kind == GOMP_MAP_FORCE_FROM
++ || kind == GOMP_MAP_FROM);
++ gomp_acc_remove_pointer (hostaddrs[i], sizes[i], copyfrom, async,
++ finalize, pointer);
+ /* See the above comment. */
+- i += 2;
++ i += pointer - 1;
+ }
+ }
+
+@@ -398,13 +514,20 @@ GOACC_enter_exit_data (int device, size_
+ static void
+ goacc_wait (int async, int num_waits, va_list *ap)
+ {
+- struct goacc_thread *thr = goacc_thread ();
+- struct gomp_device_descr *acc_dev = thr->dev;
+-
+ while (num_waits--)
+ {
+ int qid = va_arg (*ap, int);
+-
++
++ /* Waiting on ACC_ASYNC_NOVAL maps to 'wait all'. */
++ if (qid == acc_async_noval)
++ {
++ if (async == acc_async_sync)
++ acc_wait_all ();
++ else
++ acc_wait_all_async (async);
++ break;
++ }
++
+ if (acc_async_test (qid))
+ continue;
+
+@@ -415,16 +538,17 @@ goacc_wait (int async, int num_waits, va
+ launching on, the queue itself will order work as
+ required, so there's no need to wait explicitly. */
+ else
+- acc_dev->openacc.async_wait_async_func (qid, async);
++ acc_wait_async (qid, async);
+ }
+ }
+
+ void
+-GOACC_update (int device, size_t mapnum,
++GOACC_update (int flags_m, size_t mapnum,
+ void **hostaddrs, size_t *sizes, unsigned short *kinds,
+ int async, int num_waits, ...)
+ {
+- bool host_fallback = device == GOMP_DEVICE_HOST_FALLBACK;
++ int flags = GOACC_FLAGS_UNMARSHAL (flags_m);
++
+ size_t i;
+
+ goacc_lazy_initialize ();
+@@ -433,7 +557,7 @@ GOACC_update (int device, size_t mapnum,
+ struct gomp_device_descr *acc_dev = thr->dev;
+
+ if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
+- || host_fallback)
++ || (flags & GOACC_FLAG_HOST_FALLBACK))
+ return;
+
+ if (num_waits)
+@@ -447,6 +571,7 @@ GOACC_update (int device, size_t mapnum,
+
+ acc_dev->openacc.async_set_async_func (async);
+
++ bool update_device = false;
+ for (i = 0; i < mapnum; ++i)
+ {
+ unsigned char kind = kinds[i] & 0xff;
+@@ -457,11 +582,46 @@ GOACC_update (int device, size_t mapnum,
+ case GOMP_MAP_TO_PSET:
+ break;
+
++ case GOMP_MAP_ALWAYS_POINTER:
++ if (update_device)
++ {
++ /* Save the contents of the host pointer. */
++ void *dptr = acc_deviceptr (hostaddrs[i-1]);
++ uintptr_t t = *(uintptr_t *) hostaddrs[i];
++
++ /* Update the contents of the host pointer to reflect
++ the value of the allocated device memory in the
++ previous pointer. */
++ *(uintptr_t *) hostaddrs[i] = (uintptr_t)dptr;
++ acc_update_device (hostaddrs[i], sizeof (uintptr_t));
++
++ /* Restore the host pointer. */
++ *(uintptr_t *) hostaddrs[i] = t;
++ update_device = false;
++ }
++ break;
++
++ case GOMP_MAP_TO:
++ if (!acc_is_present (hostaddrs[i], sizes[i]))
++ {
++ update_device = false;
++ break;
++ }
++ /* Fallthru */
+ case GOMP_MAP_FORCE_TO:
++ update_device = true;
+ acc_update_device (hostaddrs[i], sizes[i]);
+ break;
+
++ case GOMP_MAP_FROM:
++ if (!acc_is_present (hostaddrs[i], sizes[i]))
++ {
++ update_device = false;
++ break;
++ }
++ /* Fallthru */
+ case GOMP_MAP_FORCE_FROM:
++ update_device = false;
+ acc_update_self (hostaddrs[i], sizes[i]);
+ break;
+
+@@ -487,8 +647,8 @@ GOACC_wait (int async, int num_waits, ..
+ }
+ else if (async == acc_async_sync)
+ acc_wait_all ();
+- else if (async == acc_async_noval)
+- goacc_thread ()->dev->openacc.async_wait_all_async_func (acc_async_noval);
++ else
++ acc_wait_all_async (async);
+ }
+
+ int
+@@ -504,7 +664,7 @@ GOACC_get_thread_num (void)
+ }
+
+ void
+-GOACC_declare (int device, size_t mapnum,
++GOACC_declare (int flags_m, size_t mapnum,
+ void **hostaddrs, size_t *sizes, unsigned short *kinds)
+ {
+ int i;
+@@ -522,9 +682,10 @@ GOACC_declare (int device, size_t mapnum
+ case GOMP_MAP_FORCE_FROM:
+ case GOMP_MAP_FORCE_TO:
+ case GOMP_MAP_POINTER:
++ case GOMP_MAP_RELEASE:
+ case GOMP_MAP_DELETE:
+- GOACC_enter_exit_data (device, 1, &hostaddrs[i], &sizes[i],
+- &kinds[i], 0, 0);
++ GOACC_enter_exit_data (flags_m, 1, &hostaddrs[i], &sizes[i],
++ &kinds[i], GOMP_ASYNC_SYNC, 0);
+ break;
+
+ case GOMP_MAP_FORCE_DEVICEPTR:
+@@ -532,20 +693,19 @@ GOACC_declare (int device, size_t mapnum
+
+ case GOMP_MAP_ALLOC:
+ if (!acc_is_present (hostaddrs[i], sizes[i]))
+- GOACC_enter_exit_data (device, 1, &hostaddrs[i], &sizes[i],
+- &kinds[i], 0, 0);
++ GOACC_enter_exit_data (flags_m, 1, &hostaddrs[i], &sizes[i],
++ &kinds[i], GOMP_ASYNC_SYNC, 0);
+ break;
+
+ case GOMP_MAP_TO:
+- GOACC_enter_exit_data (device, 1, &hostaddrs[i], &sizes[i],
+- &kinds[i], 0, 0);
++ GOACC_enter_exit_data (flags_m, 1, &hostaddrs[i], &sizes[i],
++ &kinds[i], GOMP_ASYNC_SYNC, 0);
+
+ break;
+
+ case GOMP_MAP_FROM:
+- kinds[i] = GOMP_MAP_FORCE_FROM;
+- GOACC_enter_exit_data (device, 1, &hostaddrs[i], &sizes[i],
+- &kinds[i], 0, 0);
++ GOACC_enter_exit_data (flags_m, 1, &hostaddrs[i], &sizes[i],
++ &kinds[i], GOMP_ASYNC_SYNC, 0);
+ break;
+
+ case GOMP_MAP_FORCE_PRESENT:
+--- libgomp/openacc2.f90.jj 2019-05-07 19:54:18.828514375 +0200
++++ libgomp/openacc2.f90 2019-05-07 19:56:38.454296347 +0200
+@@ -0,0 +1,1502 @@
++! OpenACC Runtime Library Definitions.
++
++! Copyright (C) 2014-2019 Free Software Foundation, Inc.
++
++! Contributed by Tobias Burnus
++! and Mentor Embedded.
++
++! This file is part of the GNU Offloading and Multi Processing Library
++! (libgomp).
++
++! Libgomp is free software; you can redistribute it and/or modify it
++! under the terms of the GNU General Public License as published by
++! the Free Software Foundation; either version 3, or (at your option)
++! any later version.
++
++! Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
++! WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
++! FOR A PARTICULAR PURPOSE. See the GNU General Public License for
++! more details.
++
++! Under Section 7 of GPL version 3, you are granted additional
++! permissions described in the GCC Runtime Library Exception, version
++! 3.1, as published by the Free Software Foundation.
++
++! You should have received a copy of the GNU General Public License and
++! a copy of the GCC Runtime Library Exception along with this program;
++! see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
++! .
++
++module openacc_kinds2
++ use iso_fortran_env, only: int32
++ implicit none
++
++ private :: int32
++ public :: acc_device_kind
++
++ integer, parameter :: acc_device_kind = int32
++
++ public :: acc_device_none, acc_device_default, acc_device_host
++ public :: acc_device_not_host, acc_device_nvidia
++
++ ! Keep in sync with include/gomp-constants.h.
++ integer (acc_device_kind), parameter :: acc_device_none = 0
++ integer (acc_device_kind), parameter :: acc_device_default = 1
++ integer (acc_device_kind), parameter :: acc_device_host = 2
++ ! integer (acc_device_kind), parameter :: acc_device_host_nonshm = 3 removed.
++ integer (acc_device_kind), parameter :: acc_device_not_host = 4
++ integer (acc_device_kind), parameter :: acc_device_nvidia = 5
++
++ public :: acc_handle_kind
++
++ integer, parameter :: acc_handle_kind = int32
++
++ public :: acc_async_noval, acc_async_sync
++
++ ! Keep in sync with include/gomp-constants.h.
++ integer (acc_handle_kind), parameter :: acc_async_noval = -1
++ integer (acc_handle_kind), parameter :: acc_async_sync = -2
++
++end module
++
++module openacc_internal2
++ use openacc_kinds2
++ implicit none
++
++ interface
++ function acc_get_num_devices_h (d)
++ import
++ integer acc_get_num_devices_h
++ integer (acc_device_kind) d
++ end function
++
++ subroutine acc_set_device_type_h (d)
++ import
++ integer (acc_device_kind) d
++ end subroutine
++
++ function acc_get_device_type_h ()
++ import
++ integer (acc_device_kind) acc_get_device_type_h
++ end function
++
++ subroutine acc_set_device_num_h (n, d)
++ import
++ integer n
++ integer (acc_device_kind) d
++ end subroutine
++
++ function acc_get_device_num_h (d)
++ import
++ integer acc_get_device_num_h
++ integer (acc_device_kind) d
++ end function
++
++ function acc_async_test_h (a)
++ logical acc_async_test_h
++ integer a
++ end function
++
++ function acc_async_test_all_h ()
++ logical acc_async_test_all_h
++ end function
++
++ subroutine acc_wait_h (a)
++ integer a
++ end subroutine
++
++ subroutine acc_wait_async_h (a1, a2)
++ integer a1, a2
++ end subroutine
++
++ subroutine acc_wait_all_h ()
++ end subroutine
++
++ subroutine acc_wait_all_async_h (a)
++ integer a
++ end subroutine
++
++ subroutine acc_init_h (d)
++ import
++ integer (acc_device_kind) d
++ end subroutine
++
++ subroutine acc_shutdown_h (d)
++ import
++ integer (acc_device_kind) d
++ end subroutine
++
++ function acc_on_device_h (d)
++ import
++ integer (acc_device_kind) d
++ logical acc_on_device_h
++ end function
++
++ subroutine acc_copyin_32_h (a, len)
++ use iso_c_binding, only: c_int32_t
++ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++ type (*), dimension (*) :: a
++ integer (c_int32_t) len
++ end subroutine
++
++ subroutine acc_copyin_64_h (a, len)
++ use iso_c_binding, only: c_int64_t
++ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++ type (*), dimension (*) :: a
++ integer (c_int64_t) len
++ end subroutine
++
++ subroutine acc_copyin_array_h (a)
++ type (*), dimension (..), contiguous :: a
++ end subroutine
++
++ subroutine acc_present_or_copyin_32_h (a, len)
++ use iso_c_binding, only: c_int32_t
++ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++ type (*), dimension (*) :: a
++ integer (c_int32_t) len
++ end subroutine
++
++ subroutine acc_present_or_copyin_64_h (a, len)
++ use iso_c_binding, only: c_int64_t
++ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++ type (*), dimension (*) :: a
++ integer (c_int64_t) len
++ end subroutine
++
++ subroutine acc_present_or_copyin_array_h (a)
++ type (*), dimension (..), contiguous :: a
++ end subroutine
++
++ subroutine acc_create_32_h (a, len)
++ use iso_c_binding, only: c_int32_t
++ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++ type (*), dimension (*) :: a
++ integer (c_int32_t) len
++ end subroutine
++
++ subroutine acc_create_64_h (a, len)
++ use iso_c_binding, only: c_int64_t
++ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++ type (*), dimension (*) :: a
++ integer (c_int64_t) len
++ end subroutine
++
++ subroutine acc_create_array_h (a)
++ type (*), dimension (..), contiguous :: a
++ end subroutine
++
++ subroutine acc_present_or_create_32_h (a, len)
++ use iso_c_binding, only: c_int32_t
++ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++ type (*), dimension (*) :: a
++ integer (c_int32_t) len
++ end subroutine
++
++ subroutine acc_present_or_create_64_h (a, len)
++ use iso_c_binding, only: c_int64_t
++ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++ type (*), dimension (*) :: a
++ integer (c_int64_t) len
++ end subroutine
++
++ subroutine acc_present_or_create_array_h (a)
++ type (*), dimension (..), contiguous :: a
++ end subroutine
++
++ subroutine acc_copyout_32_h (a, len)
++ use iso_c_binding, only: c_int32_t
++ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++ type (*), dimension (*) :: a
++ integer (c_int32_t) len
++ end subroutine
++
++ subroutine acc_copyout_64_h (a, len)
++ use iso_c_binding, only: c_int64_t
++ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++ type (*), dimension (*) :: a
++ integer (c_int64_t) len
++ end subroutine
++
++ subroutine acc_copyout_array_h (a)
++ type (*), dimension (..), contiguous :: a
++ end subroutine
++
++ subroutine acc_copyout_finalize_32_h (a, len)
++ use iso_c_binding, only: c_int32_t
++ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++ type (*), dimension (*) :: a
++ integer (c_int32_t) len
++ end subroutine
++
++ subroutine acc_copyout_finalize_64_h (a, len)
++ use iso_c_binding, only: c_int64_t
++ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++ type (*), dimension (*) :: a
++ integer (c_int64_t) len
++ end subroutine
++
++ subroutine acc_copyout_finalize_array_h (a)
++ type (*), dimension (..), contiguous :: a
++ end subroutine
++
++ subroutine acc_delete_32_h (a, len)
++ use iso_c_binding, only: c_int32_t
++ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++ type (*), dimension (*) :: a
++ integer (c_int32_t) len
++ end subroutine
++
++ subroutine acc_delete_64_h (a, len)
++ use iso_c_binding, only: c_int64_t
++ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++ type (*), dimension (*) :: a
++ integer (c_int64_t) len
++ end subroutine
++
++ subroutine acc_delete_array_h (a)
++ type (*), dimension (..), contiguous :: a
++ end subroutine
++
++ subroutine acc_delete_finalize_32_h (a, len)
++ use iso_c_binding, only: c_int32_t
++ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++ type (*), dimension (*) :: a
++ integer (c_int32_t) len
++ end subroutine
++
++ subroutine acc_delete_finalize_64_h (a, len)
++ use iso_c_binding, only: c_int64_t
++ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++ type (*), dimension (*) :: a
++ integer (c_int64_t) len
++ end subroutine
++
++ subroutine acc_delete_finalize_array_h (a)
++ type (*), dimension (..), contiguous :: a
++ end subroutine
++
++ subroutine acc_update_device_32_h (a, len)
++ use iso_c_binding, only: c_int32_t
++ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++ type (*), dimension (*) :: a
++ integer (c_int32_t) len
++ end subroutine
++
++ subroutine acc_update_device_64_h (a, len)
++ use iso_c_binding, only: c_int64_t
++ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++ type (*), dimension (*) :: a
++ integer (c_int64_t) len
++ end subroutine
++
++ subroutine acc_update_device_array_h (a)
++ type (*), dimension (..), contiguous :: a
++ end subroutine
++
++ subroutine acc_update_self_32_h (a, len)
++ use iso_c_binding, only: c_int32_t
++ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++ type (*), dimension (*) :: a
++ integer (c_int32_t) len
++ end subroutine
++
++ subroutine acc_update_self_64_h (a, len)
++ use iso_c_binding, only: c_int64_t
++ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++ type (*), dimension (*) :: a
++ integer (c_int64_t) len
++ end subroutine
++
++ subroutine acc_update_self_array_h (a)
++ type (*), dimension (..), contiguous :: a
++ end subroutine
++
++ function acc_is_present_32_h (a, len)
++ use iso_c_binding, only: c_int32_t
++ logical acc_is_present_32_h
++ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++ type (*), dimension (*) :: a
++ integer (c_int32_t) len
++ end function
++
++ function acc_is_present_64_h (a, len)
++ use iso_c_binding, only: c_int64_t
++ logical acc_is_present_64_h
++ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++ type (*), dimension (*) :: a
++ integer (c_int64_t) len
++ end function
++
++ function acc_is_present_array_h (a)
++ logical acc_is_present_array_h
++ type (*), dimension (..), contiguous :: a
++ end function
++
++ subroutine acc_copyin_async_32_h (a, len, async)
++ use iso_c_binding, only: c_int32_t
++ use openacc_kinds2, only: acc_handle_kind
++ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++ type (*), dimension (*) :: a
++ integer (c_int32_t) len
++ integer (acc_handle_kind) async
++ end subroutine
++
++ subroutine acc_copyin_async_64_h (a, len, async)
++ use iso_c_binding, only: c_int64_t
++ use openacc_kinds2, only: acc_handle_kind
++ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++ type (*), dimension (*) :: a
++ integer (c_int64_t) len
++ integer (acc_handle_kind) async
++ end subroutine
++
++ subroutine acc_copyin_async_array_h (a, async)
++ use openacc_kinds2, only: acc_handle_kind
++ type (*), dimension (..), contiguous :: a
++ integer (acc_handle_kind) async
++ end subroutine
++
++ subroutine acc_create_async_32_h (a, len, async)
++ use iso_c_binding, only: c_int32_t
++ use openacc_kinds2, only: acc_handle_kind
++ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++ type (*), dimension (*) :: a
++ integer (c_int32_t) len
++ integer (acc_handle_kind) async
++ end subroutine
++
++ subroutine acc_create_async_64_h (a, len, async)
++ use iso_c_binding, only: c_int64_t
++ use openacc_kinds2, only: acc_handle_kind
++ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++ type (*), dimension (*) :: a
++ integer (c_int64_t) len
++ integer (acc_handle_kind) async
++ end subroutine
++
++ subroutine acc_create_async_array_h (a, async)
++ use openacc_kinds2, only: acc_handle_kind
++ type (*), dimension (..), contiguous :: a
++ integer (acc_handle_kind) async
++ end subroutine
++
++ subroutine acc_copyout_async_32_h (a, len, async)
++ use iso_c_binding, only: c_int32_t
++ use openacc_kinds2, only: acc_handle_kind
++ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++ type (*), dimension (*) :: a
++ integer (c_int32_t) len
++ integer (acc_handle_kind) async
++ end subroutine
++
++ subroutine acc_copyout_async_64_h (a, len, async)
++ use iso_c_binding, only: c_int64_t
++ use openacc_kinds2, only: acc_handle_kind
++ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++ type (*), dimension (*) :: a
++ integer (c_int64_t) len
++ integer (acc_handle_kind) async
++ end subroutine
++
++ subroutine acc_copyout_async_array_h (a, async)
++ use openacc_kinds2, only: acc_handle_kind
++ type (*), dimension (..), contiguous :: a
++ integer (acc_handle_kind) async
++ end subroutine
++
++ subroutine acc_delete_async_32_h (a, len, async)
++ use iso_c_binding, only: c_int32_t
++ use openacc_kinds2, only: acc_handle_kind
++ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++ type (*), dimension (*) :: a
++ integer (c_int32_t) len
++ integer (acc_handle_kind) async
++ end subroutine
++
++ subroutine acc_delete_async_64_h (a, len, async)
++ use iso_c_binding, only: c_int64_t
++ use openacc_kinds2, only: acc_handle_kind
++ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++ type (*), dimension (*) :: a
++ integer (c_int64_t) len
++ integer (acc_handle_kind) async
++ end subroutine
++
++ subroutine acc_delete_async_array_h (a, async)
++ use openacc_kinds2, only: acc_handle_kind
++ type (*), dimension (..), contiguous :: a
++ integer (acc_handle_kind) async
++ end subroutine
++
++ subroutine acc_update_device_async_32_h (a, len, async)
++ use iso_c_binding, only: c_int32_t
++ use openacc_kinds2, only: acc_handle_kind
++ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++ type (*), dimension (*) :: a
++ integer (c_int32_t) len
++ integer (acc_handle_kind) async
++ end subroutine
++
++ subroutine acc_update_device_async_64_h (a, len, async)
++ use iso_c_binding, only: c_int64_t
++ use openacc_kinds2, only: acc_handle_kind
++ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++ type (*), dimension (*) :: a
++ integer (c_int64_t) len
++ integer (acc_handle_kind) async
++ end subroutine
++
++ subroutine acc_update_device_async_array_h (a, async)
++ use openacc_kinds2, only: acc_handle_kind
++ type (*), dimension (..), contiguous :: a
++ integer (acc_handle_kind) async
++ end subroutine
++
++ subroutine acc_update_self_async_32_h (a, len, async)
++ use iso_c_binding, only: c_int32_t
++ use openacc_kinds2, only: acc_handle_kind
++ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++ type (*), dimension (*) :: a
++ integer (c_int32_t) len
++ integer (acc_handle_kind) async
++ end subroutine
++
++ subroutine acc_update_self_async_64_h (a, len, async)
++ use iso_c_binding, only: c_int64_t
++ use openacc_kinds2, only: acc_handle_kind
++ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++ type (*), dimension (*) :: a
++ integer (c_int64_t) len
++ integer (acc_handle_kind) async
++ end subroutine
++
++ subroutine acc_update_self_async_array_h (a, async)
++ use openacc_kinds2, only: acc_handle_kind
++ type (*), dimension (..), contiguous :: a
++ integer (acc_handle_kind) async
++ end subroutine
++ end interface
++
++ interface
++ function acc_get_num_devices_l (d) &
++ bind (C, name = "acc_get_num_devices")
++ use iso_c_binding, only: c_int
++ integer (c_int) :: acc_get_num_devices_l
++ integer (c_int), value :: d
++ end function
++
++ subroutine acc_set_device_type_l (d) &
++ bind (C, name = "acc_set_device_type")
++ use iso_c_binding, only: c_int
++ integer (c_int), value :: d
++ end subroutine
++
++ function acc_get_device_type_l () &
++ bind (C, name = "acc_get_device_type")
++ use iso_c_binding, only: c_int
++ integer (c_int) :: acc_get_device_type_l
++ end function
++
++ subroutine acc_set_device_num_l (n, d) &
++ bind (C, name = "acc_set_device_num")
++ use iso_c_binding, only: c_int
++ integer (c_int), value :: n, d
++ end subroutine
++
++ function acc_get_device_num_l (d) &
++ bind (C, name = "acc_get_device_num")
++ use iso_c_binding, only: c_int
++ integer (c_int) :: acc_get_device_num_l
++ integer (c_int), value :: d
++ end function
++
++ function acc_async_test_l (a) &
++ bind (C, name = "acc_async_test")
++ use iso_c_binding, only: c_int
++ integer (c_int) :: acc_async_test_l
++ integer (c_int), value :: a
++ end function
++
++ function acc_async_test_all_l () &
++ bind (C, name = "acc_async_test_all")
++ use iso_c_binding, only: c_int
++ integer (c_int) :: acc_async_test_all_l
++ end function
++
++ subroutine acc_wait_l (a) &
++ bind (C, name = "acc_wait")
++ use iso_c_binding, only: c_int
++ integer (c_int), value :: a
++ end subroutine
++
++ subroutine acc_wait_async_l (a1, a2) &
++ bind (C, name = "acc_wait_async")
++ use iso_c_binding, only: c_int
++ integer (c_int), value :: a1, a2
++ end subroutine
++
++ subroutine acc_wait_all_l () &
++ bind (C, name = "acc_wait_all")
++ use iso_c_binding, only: c_int
++ end subroutine
++
++ subroutine acc_wait_all_async_l (a) &
++ bind (C, name = "acc_wait_all_async")
++ use iso_c_binding, only: c_int
++ integer (c_int), value :: a
++ end subroutine
++
++ subroutine acc_init_l (d) &
++ bind (C, name = "acc_init")
++ use iso_c_binding, only: c_int
++ integer (c_int), value :: d
++ end subroutine
++
++ subroutine acc_shutdown_l (d) &
++ bind (C, name = "acc_shutdown")
++ use iso_c_binding, only: c_int
++ integer (c_int), value :: d
++ end subroutine
++
++ function acc_on_device_l (d) &
++ bind (C, name = "acc_on_device")
++ use iso_c_binding, only: c_int
++ integer (c_int) :: acc_on_device_l
++ integer (c_int), value :: d
++ end function
++
++ subroutine acc_copyin_l (a, len) &
++ bind (C, name = "acc_copyin")
++ use iso_c_binding, only: c_size_t
++ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++ type (*), dimension (*) :: a
++ integer (c_size_t), value :: len
++ end subroutine
++
++ subroutine acc_present_or_copyin_l (a, len) &
++ bind (C, name = "acc_present_or_copyin")
++ use iso_c_binding, only: c_size_t
++ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++ type (*), dimension (*) :: a
++ integer (c_size_t), value :: len
++ end subroutine
++
++ subroutine acc_create_l (a, len) &
++ bind (C, name = "acc_create")
++ use iso_c_binding, only: c_size_t
++ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++ type (*), dimension (*) :: a
++ integer (c_size_t), value :: len
++ end subroutine
++
++ subroutine acc_present_or_create_l (a, len) &
++ bind (C, name = "acc_present_or_create")
++ use iso_c_binding, only: c_size_t
++ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++ type (*), dimension (*) :: a
++ integer (c_size_t), value :: len
++ end subroutine
++
++ subroutine acc_copyout_l (a, len) &
++ bind (C, name = "acc_copyout")
++ use iso_c_binding, only: c_size_t
++ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++ type (*), dimension (*) :: a
++ integer (c_size_t), value :: len
++ end subroutine
++
++ subroutine acc_copyout_finalize_l (a, len) &
++ bind (C, name = "acc_copyout_finalize")
++ use iso_c_binding, only: c_size_t
++ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++ type (*), dimension (*) :: a
++ integer (c_size_t), value :: len
++ end subroutine
++
++ subroutine acc_delete_l (a, len) &
++ bind (C, name = "acc_delete")
++ use iso_c_binding, only: c_size_t
++ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++ type (*), dimension (*) :: a
++ integer (c_size_t), value :: len
++ end subroutine
++
++ subroutine acc_delete_finalize_l (a, len) &
++ bind (C, name = "acc_delete_finalize")
++ use iso_c_binding, only: c_size_t
++ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++ type (*), dimension (*) :: a
++ integer (c_size_t), value :: len
++ end subroutine
++
++ subroutine acc_update_device_l (a, len) &
++ bind (C, name = "acc_update_device")
++ use iso_c_binding, only: c_size_t
++ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++ type (*), dimension (*) :: a
++ integer (c_size_t), value :: len
++ end subroutine
++
++ subroutine acc_update_self_l (a, len) &
++ bind (C, name = "acc_update_self")
++ use iso_c_binding, only: c_size_t
++ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++ type (*), dimension (*) :: a
++ integer (c_size_t), value :: len
++ end subroutine
++
++ function acc_is_present_l (a, len) &
++ bind (C, name = "acc_is_present")
++ use iso_c_binding, only: c_int32_t, c_size_t
++ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++ integer (c_int32_t) :: acc_is_present_l
++ type (*), dimension (*) :: a
++ integer (c_size_t), value :: len
++ end function
++
++ subroutine acc_copyin_async_l (a, len, async) &
++ bind (C, name = "acc_copyin_async")
++ use iso_c_binding, only: c_size_t, c_int
++ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++ type (*), dimension (*) :: a
++ integer (c_size_t), value :: len
++ integer (c_int), value :: async
++ end subroutine
++
++ subroutine acc_create_async_l (a, len, async) &
++ bind (C, name = "acc_create_async")
++ use iso_c_binding, only: c_size_t, c_int
++ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++ type (*), dimension (*) :: a
++ integer (c_size_t), value :: len
++ integer (c_int), value :: async
++ end subroutine
++
++ subroutine acc_copyout_async_l (a, len, async) &
++ bind (C, name = "acc_copyout_async")
++ use iso_c_binding, only: c_size_t, c_int
++ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++ type (*), dimension (*) :: a
++ integer (c_size_t), value :: len
++ integer (c_int), value :: async
++ end subroutine
++
++ subroutine acc_delete_async_l (a, len, async) &
++ bind (C, name = "acc_delete_async")
++ use iso_c_binding, only: c_size_t, c_int
++ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++ type (*), dimension (*) :: a
++ integer (c_size_t), value :: len
++ integer (c_int), value :: async
++ end subroutine
++
++ subroutine acc_update_device_async_l (a, len, async) &
++ bind (C, name = "acc_update_device_async")
++ use iso_c_binding, only: c_size_t, c_int
++ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++ type (*), dimension (*) :: a
++ integer (c_size_t), value :: len
++ integer (c_int), value :: async
++ end subroutine
++
++ subroutine acc_update_self_async_l (a, len, async) &
++ bind (C, name = "acc_update_self_async")
++ use iso_c_binding, only: c_size_t, c_int
++ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++ type (*), dimension (*) :: a
++ integer (c_size_t), value :: len
++ integer (c_int), value :: async
++ end subroutine
++ end interface
++end module
++
++module openacc2
++ use openacc_kinds2
++ use openacc_internal2
++ implicit none
++
++ public :: openacc_version
++
++ public :: acc_get_num_devices, acc_set_device_type, acc_get_device_type
++ public :: acc_set_device_num, acc_get_device_num, acc_async_test
++ public :: acc_async_test_all
++ public :: acc_wait, acc_async_wait, acc_wait_async
++ public :: acc_wait_all, acc_async_wait_all, acc_wait_all_async
++ public :: acc_init, acc_shutdown, acc_on_device
++ public :: acc_copyin, acc_present_or_copyin, acc_pcopyin, acc_create
++ public :: acc_present_or_create, acc_pcreate, acc_copyout, acc_delete
++ public :: acc_update_device, acc_update_self, acc_is_present
++ public :: acc_copyin_async, acc_create_async, acc_copyout_async
++ public :: acc_delete_async, acc_update_device_async, acc_update_self_async
++
++ integer, parameter :: openacc_version = 201306
++
++ interface acc_get_num_devices
++ procedure :: acc_get_num_devices_h
++ end interface
++
++ interface acc_set_device_type
++ procedure :: acc_set_device_type_h
++ end interface
++
++ interface acc_get_device_type
++ procedure :: acc_get_device_type_h
++ end interface
++
++ interface acc_set_device_num
++ procedure :: acc_set_device_num_h
++ end interface
++
++ interface acc_get_device_num
++ procedure :: acc_get_device_num_h
++ end interface
++
++ interface acc_async_test
++ procedure :: acc_async_test_h
++ end interface
++
++ interface acc_async_test_all
++ procedure :: acc_async_test_all_h
++ end interface
++
++ interface acc_wait
++ procedure :: acc_wait_h
++ end interface
++
++ ! acc_async_wait is an OpenACC 1.0 compatibility name for acc_wait.
++ interface acc_async_wait
++ procedure :: acc_wait_h
++ end interface
++
++ interface acc_wait_async
++ procedure :: acc_wait_async_h
++ end interface
++
++ interface acc_wait_all
++ procedure :: acc_wait_all_h
++ end interface
++
++ ! acc_async_wait_all is an OpenACC 1.0 compatibility name for acc_wait_all.
++ interface acc_async_wait_all
++ procedure :: acc_wait_all_h
++ end interface
++
++ interface acc_wait_all_async
++ procedure :: acc_wait_all_async_h
++ end interface
++
++ interface acc_init
++ procedure :: acc_init_h
++ end interface
++
++ interface acc_shutdown
++ procedure :: acc_shutdown_h
++ end interface
++
++ interface acc_on_device
++ procedure :: acc_on_device_h
++ end interface
++
++ ! acc_malloc: Only available in C/C++
++ ! acc_free: Only available in C/C++
++
++ ! As vendor extension, the following code supports both 32bit and 64bit
++ ! arguments for "size"; the OpenACC standard only permits default-kind
++ ! integers, which are of kind 4 (i.e. 32 bits).
++ ! Additionally, the two-argument version also takes arrays as argument.
++ ! and the one argument version also scalars. Note that the code assumes
++ ! that the arrays are contiguous.
++
++ interface acc_copyin
++ procedure :: acc_copyin_32_h
++ procedure :: acc_copyin_64_h
++ procedure :: acc_copyin_array_h
++ end interface
++
++ interface acc_present_or_copyin
++ procedure :: acc_present_or_copyin_32_h
++ procedure :: acc_present_or_copyin_64_h
++ procedure :: acc_present_or_copyin_array_h
++ end interface
++
++ interface acc_pcopyin
++ procedure :: acc_present_or_copyin_32_h
++ procedure :: acc_present_or_copyin_64_h
++ procedure :: acc_present_or_copyin_array_h
++ end interface
++
++ interface acc_create
++ procedure :: acc_create_32_h
++ procedure :: acc_create_64_h
++ procedure :: acc_create_array_h
++ end interface
++
++ interface acc_present_or_create
++ procedure :: acc_present_or_create_32_h
++ procedure :: acc_present_or_create_64_h
++ procedure :: acc_present_or_create_array_h
++ end interface
++
++ interface acc_pcreate
++ procedure :: acc_present_or_create_32_h
++ procedure :: acc_present_or_create_64_h
++ procedure :: acc_present_or_create_array_h
++ end interface
++
++ interface acc_copyout
++ procedure :: acc_copyout_32_h
++ procedure :: acc_copyout_64_h
++ procedure :: acc_copyout_array_h
++ end interface
++
++ interface acc_copyout_finalize
++ procedure :: acc_copyout_finalize_32_h
++ procedure :: acc_copyout_finalize_64_h
++ procedure :: acc_copyout_finalize_array_h
++ end interface
++
++ interface acc_delete
++ procedure :: acc_delete_32_h
++ procedure :: acc_delete_64_h
++ procedure :: acc_delete_array_h
++ end interface
++
++ interface acc_delete_finalize
++ procedure :: acc_delete_finalize_32_h
++ procedure :: acc_delete_finalize_64_h
++ procedure :: acc_delete_finalize_array_h
++ end interface
++
++ interface acc_update_device
++ procedure :: acc_update_device_32_h
++ procedure :: acc_update_device_64_h
++ procedure :: acc_update_device_array_h
++ end interface
++
++ interface acc_update_self
++ procedure :: acc_update_self_32_h
++ procedure :: acc_update_self_64_h
++ procedure :: acc_update_self_array_h
++ end interface
++
++ ! acc_map_data: Only available in C/C++
++ ! acc_unmap_data: Only available in C/C++
++ ! acc_deviceptr: Only available in C/C++
++ ! acc_hostptr: Only available in C/C++
++
++ interface acc_is_present
++ procedure :: acc_is_present_32_h
++ procedure :: acc_is_present_64_h
++ procedure :: acc_is_present_array_h
++ end interface
++
++ ! acc_memcpy_to_device: Only available in C/C++
++ ! acc_memcpy_from_device: Only available in C/C++
++
++ interface acc_copyin_async
++ procedure :: acc_copyin_async_32_h
++ procedure :: acc_copyin_async_64_h
++ procedure :: acc_copyin_async_array_h
++ end interface
++
++ interface acc_create_async
++ procedure :: acc_create_async_32_h
++ procedure :: acc_create_async_64_h
++ procedure :: acc_create_async_array_h
++ end interface
++
++ interface acc_copyout_async
++ procedure :: acc_copyout_async_32_h
++ procedure :: acc_copyout_async_64_h
++ procedure :: acc_copyout_async_array_h
++ end interface
++
++ interface acc_delete_async
++ procedure :: acc_delete_async_32_h
++ procedure :: acc_delete_async_64_h
++ procedure :: acc_delete_async_array_h
++ end interface
++
++ interface acc_update_device_async
++ procedure :: acc_update_device_async_32_h
++ procedure :: acc_update_device_async_64_h
++ procedure :: acc_update_device_async_array_h
++ end interface
++
++ interface acc_update_self_async
++ procedure :: acc_update_self_async_32_h
++ procedure :: acc_update_self_async_64_h
++ procedure :: acc_update_self_async_array_h
++ end interface
++
++end module
++
++function acc_get_num_devices_h (d)
++ use openacc_internal2, only: acc_get_num_devices_l
++ use openacc_kinds2
++ integer acc_get_num_devices_h
++ integer (acc_device_kind) d
++ acc_get_num_devices_h = acc_get_num_devices_l (d)
++end function
++
++subroutine acc_set_device_type_h (d)
++ use openacc_internal2, only: acc_set_device_type_l
++ use openacc_kinds2
++ integer (acc_device_kind) d
++ call acc_set_device_type_l (d)
++end subroutine
++
++function acc_get_device_type_h ()
++ use openacc_internal2, only: acc_get_device_type_l
++ use openacc_kinds2
++ integer (acc_device_kind) acc_get_device_type_h
++ acc_get_device_type_h = acc_get_device_type_l ()
++end function
++
++subroutine acc_set_device_num_h (n, d)
++ use openacc_internal2, only: acc_set_device_num_l
++ use openacc_kinds2
++ integer n
++ integer (acc_device_kind) d
++ call acc_set_device_num_l (n, d)
++end subroutine
++
++function acc_get_device_num_h (d)
++ use openacc_internal2, only: acc_get_device_num_l
++ use openacc_kinds2
++ integer acc_get_device_num_h
++ integer (acc_device_kind) d
++ acc_get_device_num_h = acc_get_device_num_l (d)
++end function
++
++function acc_async_test_h (a)
++ use openacc_internal2, only: acc_async_test_l
++ logical acc_async_test_h
++ integer a
++ if (acc_async_test_l (a) .eq. 1) then
++ acc_async_test_h = .TRUE.
++ else
++ acc_async_test_h = .FALSE.
++ end if
++end function
++
++function acc_async_test_all_h ()
++ use openacc_internal2, only: acc_async_test_all_l
++ logical acc_async_test_all_h
++ if (acc_async_test_all_l () .eq. 1) then
++ acc_async_test_all_h = .TRUE.
++ else
++ acc_async_test_all_h = .FALSE.
++ end if
++end function
++
++subroutine acc_wait_h (a)
++ use openacc_internal2, only: acc_wait_l
++ integer a
++ call acc_wait_l (a)
++end subroutine
++
++subroutine acc_wait_async_h (a1, a2)
++ use openacc_internal2, only: acc_wait_async_l
++ integer a1, a2
++ call acc_wait_async_l (a1, a2)
++end subroutine
++
++subroutine acc_wait_all_h ()
++ use openacc_internal2, only: acc_wait_all_l
++ call acc_wait_all_l ()
++end subroutine
++
++subroutine acc_wait_all_async_h (a)
++ use openacc_internal2, only: acc_wait_all_async_l
++ integer a
++ call acc_wait_all_async_l (a)
++end subroutine
++
++subroutine acc_init_h (d)
++ use openacc_internal2, only: acc_init_l
++ use openacc_kinds2
++ integer (acc_device_kind) d
++ call acc_init_l (d)
++end subroutine
++
++subroutine acc_shutdown_h (d)
++ use openacc_internal2, only: acc_shutdown_l
++ use openacc_kinds2
++ integer (acc_device_kind) d
++ call acc_shutdown_l (d)
++end subroutine
++
++function acc_on_device_h (d)
++ use openacc_internal2, only: acc_on_device_l
++ use openacc_kinds2
++ integer (acc_device_kind) d
++ logical acc_on_device_h
++ if (acc_on_device_l (d) .eq. 1) then
++ acc_on_device_h = .TRUE.
++ else
++ acc_on_device_h = .FALSE.
++ end if
++end function
++
++subroutine acc_copyin_32_h (a, len)
++ use iso_c_binding, only: c_int32_t, c_size_t
++ use openacc_internal2, only: acc_copyin_l
++ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++ type (*), dimension (*) :: a
++ integer (c_int32_t) len
++ call acc_copyin_l (a, int (len, kind = c_size_t))
++end subroutine
++
++subroutine acc_copyin_64_h (a, len)
++ use iso_c_binding, only: c_int64_t, c_size_t
++ use openacc_internal2, only: acc_copyin_l
++ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++ type (*), dimension (*) :: a
++ integer (c_int64_t) len
++ call acc_copyin_l (a, int (len, kind = c_size_t))
++end subroutine
++
++subroutine acc_copyin_array_h (a)
++ use openacc_internal2, only: acc_copyin_l
++ type (*), dimension (..), contiguous :: a
++ call acc_copyin_l (a, sizeof (a))
++end subroutine
++
++subroutine acc_present_or_copyin_32_h (a, len)
++ use iso_c_binding, only: c_int32_t, c_size_t
++ use openacc_internal2, only: acc_present_or_copyin_l
++ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++ type (*), dimension (*) :: a
++ integer (c_int32_t) len
++ call acc_present_or_copyin_l (a, int (len, kind = c_size_t))
++end subroutine
++
++subroutine acc_present_or_copyin_64_h (a, len)
++ use iso_c_binding, only: c_int64_t, c_size_t
++ use openacc_internal2, only: acc_present_or_copyin_l
++ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++ type (*), dimension (*) :: a
++ integer (c_int64_t) len
++ call acc_present_or_copyin_l (a, int (len, kind = c_size_t))
++end subroutine
++
++subroutine acc_present_or_copyin_array_h (a)
++ use openacc_internal2, only: acc_present_or_copyin_l
++ type (*), dimension (..), contiguous :: a
++ call acc_present_or_copyin_l (a, sizeof (a))
++end subroutine
++
++subroutine acc_create_32_h (a, len)
++ use iso_c_binding, only: c_int32_t, c_size_t
++ use openacc_internal2, only: acc_create_l
++ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++ type (*), dimension (*) :: a
++ integer (c_int32_t) len
++ call acc_create_l (a, int (len, kind = c_size_t))
++end subroutine
++
++subroutine acc_create_64_h (a, len)
++ use iso_c_binding, only: c_int64_t, c_size_t
++ use openacc_internal2, only: acc_create_l
++ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++ type (*), dimension (*) :: a
++ integer (c_int64_t) len
++ call acc_create_l (a, int (len, kind = c_size_t))
++end subroutine
++
++subroutine acc_create_array_h (a)
++ use openacc_internal2, only: acc_create_l
++ type (*), dimension (..), contiguous :: a
++ call acc_create_l (a, sizeof (a))
++end subroutine
++
++subroutine acc_present_or_create_32_h (a, len)
++ use iso_c_binding, only: c_int32_t, c_size_t
++ use openacc_internal2, only: acc_present_or_create_l
++ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++ type (*), dimension (*) :: a
++ integer (c_int32_t) len
++ call acc_present_or_create_l (a, int (len, kind = c_size_t))
++end subroutine
++
++subroutine acc_present_or_create_64_h (a, len)
++ use iso_c_binding, only: c_int64_t, c_size_t
++ use openacc_internal2, only: acc_present_or_create_l
++ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++ type (*), dimension (*) :: a
++ integer (c_int64_t) len
++ call acc_present_or_create_l (a, int (len, kind = c_size_t))
++end subroutine
++
++subroutine acc_present_or_create_array_h (a)
++ use openacc_internal2, only: acc_present_or_create_l
++ type (*), dimension (..), contiguous :: a
++ call acc_present_or_create_l (a, sizeof (a))
++end subroutine
++
++subroutine acc_copyout_32_h (a, len)
++ use iso_c_binding, only: c_int32_t, c_size_t
++ use openacc_internal2, only: acc_copyout_l
++ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++ type (*), dimension (*) :: a
++ integer (c_int32_t) len
++ call acc_copyout_l (a, int (len, kind = c_size_t))
++end subroutine
++
++subroutine acc_copyout_64_h (a, len)
++ use iso_c_binding, only: c_int64_t, c_size_t
++ use openacc_internal2, only: acc_copyout_l
++ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++ type (*), dimension (*) :: a
++ integer (c_int64_t) len
++ call acc_copyout_l (a, int (len, kind = c_size_t))
++end subroutine
++
++subroutine acc_copyout_array_h (a)
++ use openacc_internal2, only: acc_copyout_l
++ type (*), dimension (..), contiguous :: a
++ call acc_copyout_l (a, sizeof (a))
++end subroutine
++
++subroutine acc_copyout_finalize_32_h (a, len)
++ use iso_c_binding, only: c_int32_t, c_size_t
++ use openacc_internal2, only: acc_copyout_finalize_l
++ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++ type (*), dimension (*) :: a
++ integer (c_int32_t) len
++ call acc_copyout_finalize_l (a, int (len, kind = c_size_t))
++end subroutine
++
++subroutine acc_copyout_finalize_64_h (a, len)
++ use iso_c_binding, only: c_int64_t, c_size_t
++ use openacc_internal2, only: acc_copyout_finalize_l
++ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++ type (*), dimension (*) :: a
++ integer (c_int64_t) len
++ call acc_copyout_finalize_l (a, int (len, kind = c_size_t))
++end subroutine
++
++subroutine acc_copyout_finalize_array_h (a)
++ use openacc_internal2, only: acc_copyout_finalize_l
++ type (*), dimension (..), contiguous :: a
++ call acc_copyout_finalize_l (a, sizeof (a))
++end subroutine
++
++subroutine acc_delete_32_h (a, len)
++ use iso_c_binding, only: c_int32_t, c_size_t
++ use openacc_internal2, only: acc_delete_l
++ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++ type (*), dimension (*) :: a
++ integer (c_int32_t) len
++ call acc_delete_l (a, int (len, kind = c_size_t))
++end subroutine
++
++subroutine acc_delete_64_h (a, len)
++ use iso_c_binding, only: c_int64_t, c_size_t
++ use openacc_internal2, only: acc_delete_l
++ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++ type (*), dimension (*) :: a
++ integer (c_int64_t) len
++ call acc_delete_l (a, int (len, kind = c_size_t))
++end subroutine
++
++subroutine acc_delete_array_h (a)
++ use openacc_internal2, only: acc_delete_l
++ type (*), dimension (..), contiguous :: a
++ call acc_delete_l (a, sizeof (a))
++end subroutine
++
++subroutine acc_delete_finalize_32_h (a, len)
++ use iso_c_binding, only: c_int32_t, c_size_t
++ use openacc_internal2, only: acc_delete_finalize_l
++ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++ type (*), dimension (*) :: a
++ integer (c_int32_t) len
++ call acc_delete_finalize_l (a, int (len, kind = c_size_t))
++end subroutine
++
++subroutine acc_delete_finalize_64_h (a, len)
++ use iso_c_binding, only: c_int64_t, c_size_t
++ use openacc_internal2, only: acc_delete_finalize_l
++ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++ type (*), dimension (*) :: a
++ integer (c_int64_t) len
++ call acc_delete_finalize_l (a, int (len, kind = c_size_t))
++end subroutine
++
++subroutine acc_delete_finalize_array_h (a)
++ use openacc_internal2, only: acc_delete_finalize_l
++ type (*), dimension (..), contiguous :: a
++ call acc_delete_finalize_l (a, sizeof (a))
++end subroutine
++
++subroutine acc_update_device_32_h (a, len)
++ use iso_c_binding, only: c_int32_t, c_size_t
++ use openacc_internal2, only: acc_update_device_l
++ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++ type (*), dimension (*) :: a
++ integer (c_int32_t) len
++ call acc_update_device_l (a, int (len, kind = c_size_t))
++end subroutine
++
++subroutine acc_update_device_64_h (a, len)
++ use iso_c_binding, only: c_int64_t, c_size_t
++ use openacc_internal2, only: acc_update_device_l
++ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++ type (*), dimension (*) :: a
++ integer (c_int64_t) len
++ call acc_update_device_l (a, int (len, kind = c_size_t))
++end subroutine
++
++subroutine acc_update_device_array_h (a)
++ use openacc_internal2, only: acc_update_device_l
++ type (*), dimension (..), contiguous :: a
++ call acc_update_device_l (a, sizeof (a))
++end subroutine
++
++subroutine acc_update_self_32_h (a, len)
++ use iso_c_binding, only: c_int32_t, c_size_t
++ use openacc_internal2, only: acc_update_self_l
++ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++ type (*), dimension (*) :: a
++ integer (c_int32_t) len
++ call acc_update_self_l (a, int (len, kind = c_size_t))
++end subroutine
++
++subroutine acc_update_self_64_h (a, len)
++ use iso_c_binding, only: c_int64_t, c_size_t
++ use openacc_internal2, only: acc_update_self_l
++ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++ type (*), dimension (*) :: a
++ integer (c_int64_t) len
++ call acc_update_self_l (a, int (len, kind = c_size_t))
++end subroutine
++
++subroutine acc_update_self_array_h (a)
++ use openacc_internal2, only: acc_update_self_l
++ type (*), dimension (..), contiguous :: a
++ call acc_update_self_l (a, sizeof (a))
++end subroutine
++
++function acc_is_present_32_h (a, len)
++ use iso_c_binding, only: c_int32_t, c_size_t
++ use openacc_internal2, only: acc_is_present_l
++ logical acc_is_present_32_h
++ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++ type (*), dimension (*) :: a
++ integer (c_int32_t) len
++ if (acc_is_present_l (a, int (len, kind = c_size_t)) .eq. 1) then
++ acc_is_present_32_h = .TRUE.
++ else
++ acc_is_present_32_h = .FALSE.
++ end if
++end function
++
++function acc_is_present_64_h (a, len)
++ use iso_c_binding, only: c_int64_t, c_size_t
++ use openacc_internal2, only: acc_is_present_l
++ logical acc_is_present_64_h
++ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++ type (*), dimension (*) :: a
++ integer (c_int64_t) len
++ if (acc_is_present_l (a, int (len, kind = c_size_t)) .eq. 1) then
++ acc_is_present_64_h = .TRUE.
++ else
++ acc_is_present_64_h = .FALSE.
++ end if
++end function
++
++function acc_is_present_array_h (a)
++ use openacc_internal2, only: acc_is_present_l
++ logical acc_is_present_array_h
++ type (*), dimension (..), contiguous :: a
++ acc_is_present_array_h = acc_is_present_l (a, sizeof (a)) == 1
++end function
++
++subroutine acc_copyin_async_32_h (a, len, async)
++ use iso_c_binding, only: c_int32_t, c_size_t, c_int
++ use openacc_internal2, only: acc_copyin_async_l
++ use openacc_kinds2, only: acc_handle_kind
++ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++ type (*), dimension (*) :: a
++ integer (c_int32_t) len
++ integer (acc_handle_kind) async
++ call acc_copyin_async_l (a, int (len, kind = c_size_t), int (async, kind = c_int))
++end subroutine
++
++subroutine acc_copyin_async_64_h (a, len, async)
++ use iso_c_binding, only: c_int64_t, c_size_t, c_int
++ use openacc_internal2, only: acc_copyin_async_l
++ use openacc_kinds2, only: acc_handle_kind
++ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++ type (*), dimension (*) :: a
++ integer (c_int64_t) len
++ integer (acc_handle_kind) async
++ call acc_copyin_async_l (a, int (len, kind = c_size_t), int (async, kind = c_int))
++end subroutine
++
++subroutine acc_copyin_async_array_h (a, async)
++ use iso_c_binding, only: c_int
++ use openacc_internal2, only: acc_copyin_async_l
++ use openacc_kinds2, only: acc_handle_kind
++ type (*), dimension (..), contiguous :: a
++ integer (acc_handle_kind) async
++ call acc_copyin_async_l (a, sizeof (a), int (async, kind = c_int))
++end subroutine
++
++subroutine acc_create_async_32_h (a, len, async)
++ use iso_c_binding, only: c_int32_t, c_size_t, c_int
++ use openacc_internal2, only: acc_create_async_l
++ use openacc_kinds2, only: acc_handle_kind
++ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++ type (*), dimension (*) :: a
++ integer (c_int32_t) len
++ integer (acc_handle_kind) async
++ call acc_create_async_l (a, int (len, kind = c_size_t), int (async, kind = c_int))
++end subroutine
++
++subroutine acc_create_async_64_h (a, len, async)
++ use iso_c_binding, only: c_int64_t, c_size_t, c_int
++ use openacc_internal2, only: acc_create_async_l
++ use openacc_kinds2, only: acc_handle_kind
++ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++ type (*), dimension (*) :: a
++ integer (c_int64_t) len
++ integer (acc_handle_kind) async
++ call acc_create_async_l (a, int (len, kind = c_size_t), int (async, kind = c_int))
++end subroutine
++
++subroutine acc_create_async_array_h (a, async)
++ use iso_c_binding, only: c_int
++ use openacc_internal2, only: acc_create_async_l
++ use openacc_kinds2, only: acc_handle_kind
++ type (*), dimension (..), contiguous :: a
++ integer (acc_handle_kind) async
++ call acc_create_async_l (a, sizeof (a), int (async, kind = c_int))
++end subroutine
++
++subroutine acc_copyout_async_32_h (a, len, async)
++ use iso_c_binding, only: c_int32_t, c_size_t, c_int
++ use openacc_internal2, only: acc_copyout_async_l
++ use openacc_kinds2, only: acc_handle_kind
++ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++ type (*), dimension (*) :: a
++ integer (c_int32_t) len
++ integer (acc_handle_kind) async
++ call acc_copyout_async_l (a, int (len, kind = c_size_t), int (async, kind = c_int))
++end subroutine
++
++subroutine acc_copyout_async_64_h (a, len, async)
++ use iso_c_binding, only: c_int64_t, c_size_t, c_int
++ use openacc_internal2, only: acc_copyout_async_l
++ use openacc_kinds2, only: acc_handle_kind
++ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++ type (*), dimension (*) :: a
++ integer (c_int64_t) len
++ integer (acc_handle_kind) async
++ call acc_copyout_async_l (a, int (len, kind = c_size_t), int (async, kind = c_int))
++end subroutine
++
++subroutine acc_copyout_async_array_h (a, async)
++ use iso_c_binding, only: c_int
++ use openacc_internal2, only: acc_copyout_async_l
++ use openacc_kinds2, only: acc_handle_kind
++ type (*), dimension (..), contiguous :: a
++ integer (acc_handle_kind) async
++ call acc_copyout_async_l (a, sizeof (a), int (async, kind = c_int))
++end subroutine
++
++subroutine acc_delete_async_32_h (a, len, async)
++ use iso_c_binding, only: c_int32_t, c_size_t, c_int
++ use openacc_internal2, only: acc_delete_async_l
++ use openacc_kinds2, only: acc_handle_kind
++ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++ type (*), dimension (*) :: a
++ integer (c_int32_t) len
++ integer (acc_handle_kind) async
++ call acc_delete_async_l (a, int (len, kind = c_size_t), int (async, kind = c_int))
++end subroutine
++
++subroutine acc_delete_async_64_h (a, len, async)
++ use iso_c_binding, only: c_int64_t, c_size_t, c_int
++ use openacc_internal2, only: acc_delete_async_l
++ use openacc_kinds2, only: acc_handle_kind
++ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++ type (*), dimension (*) :: a
++ integer (c_int64_t) len
++ integer (acc_handle_kind) async
++ call acc_delete_async_l (a, int (len, kind = c_size_t), int (async, kind = c_int))
++end subroutine
++
++subroutine acc_delete_async_array_h (a, async)
++ use iso_c_binding, only: c_int
++ use openacc_internal2, only: acc_delete_async_l
++ use openacc_kinds2, only: acc_handle_kind
++ type (*), dimension (..), contiguous :: a
++ integer (acc_handle_kind) async
++ call acc_delete_async_l (a, sizeof (a), int (async, kind = c_int))
++end subroutine
++
++subroutine acc_update_device_async_32_h (a, len, async)
++ use iso_c_binding, only: c_int32_t, c_size_t, c_int
++ use openacc_internal2, only: acc_update_device_async_l
++ use openacc_kinds2, only: acc_handle_kind
++ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++ type (*), dimension (*) :: a
++ integer (c_int32_t) len
++ integer (acc_handle_kind) async
++ call acc_update_device_async_l (a, int (len, kind = c_size_t), int (async, kind = c_int))
++end subroutine
++
++subroutine acc_update_device_async_64_h (a, len, async)
++ use iso_c_binding, only: c_int64_t, c_size_t, c_int
++ use openacc_internal2, only: acc_update_device_async_l
++ use openacc_kinds2, only: acc_handle_kind
++ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++ type (*), dimension (*) :: a
++ integer (c_int64_t) len
++ integer (acc_handle_kind) async
++ call acc_update_device_async_l (a, int (len, kind = c_size_t), int (async, kind = c_int))
++end subroutine
++
++subroutine acc_update_device_async_array_h (a, async)
++ use iso_c_binding, only: c_int
++ use openacc_internal2, only: acc_update_device_async_l
++ use openacc_kinds2, only: acc_handle_kind
++ type (*), dimension (..), contiguous :: a
++ integer (acc_handle_kind) async
++ call acc_update_device_async_l (a, sizeof (a), int (async, kind = c_int))
++end subroutine
++
++subroutine acc_update_self_async_32_h (a, len, async)
++ use iso_c_binding, only: c_int32_t, c_size_t, c_int
++ use openacc_internal2, only: acc_update_self_async_l
++ use openacc_kinds2, only: acc_handle_kind
++ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++ type (*), dimension (*) :: a
++ integer (c_int32_t) len
++ integer (acc_handle_kind) async
++ call acc_update_self_async_l (a, int (len, kind = c_size_t), int (async, kind = c_int))
++end subroutine
++
++subroutine acc_update_self_async_64_h (a, len, async)
++ use iso_c_binding, only: c_int64_t, c_size_t, c_int
++ use openacc_internal2, only: acc_update_self_async_l
++ use openacc_kinds2, only: acc_handle_kind
++ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++ type (*), dimension (*) :: a
++ integer (c_int64_t) len
++ integer (acc_handle_kind) async
++ call acc_update_self_async_l (a, int (len, kind = c_size_t), int (async, kind = c_int))
++end subroutine
++
++subroutine acc_update_self_async_array_h (a, async)
++ use iso_c_binding, only: c_int
++ use openacc_internal2, only: acc_update_self_async_l
++ use openacc_kinds2, only: acc_handle_kind
++ type (*), dimension (..), contiguous :: a
++ integer (acc_handle_kind) async
++ call acc_update_self_async_l (a, sizeof (a), int (async, kind = c_int))
++end subroutine
+--- libgomp/taskloop.c.jj 2018-04-25 09:40:31.913655581 +0200
++++ libgomp/taskloop.c 2019-05-07 18:46:36.547109400 +0200
+@@ -149,11 +149,28 @@ GOMP_taskloop (void (*fn) (void *), void
+
+ if (flags & GOMP_TASK_FLAG_NOGROUP)
+ {
+- if (thr->task && thr->task->taskgroup && thr->task->taskgroup->cancelled)
+- return;
++ if (__builtin_expect (gomp_cancel_var, 0)
++ && thr->task
++ && thr->task->taskgroup)
++ {
++ if (thr->task->taskgroup->cancelled)
++ return;
++ if (thr->task->taskgroup->workshare
++ && thr->task->taskgroup->prev
++ && thr->task->taskgroup->prev->cancelled)
++ return;
++ }
+ }
+ else
+- ialias_call (GOMP_taskgroup_start) ();
++ {
++ ialias_call (GOMP_taskgroup_start) ();
++ if (flags & GOMP_TASK_FLAG_REDUCTION)
++ {
++ struct gomp_data_head { TYPE t1, t2; uintptr_t *ptr; };
++ uintptr_t *ptr = ((struct gomp_data_head *) data)->ptr;
++ ialias_call (GOMP_taskgroup_reduction_register) (ptr);
++ }
++ }
+
+ if (priority > gomp_max_task_priority_var)
+ priority = gomp_max_task_priority_var;
+@@ -284,19 +301,31 @@ GOMP_taskloop (void (*fn) (void *), void
+ gomp_mutex_lock (&team->task_lock);
+ /* If parallel or taskgroup has been cancelled, don't start new
+ tasks. */
+- if (__builtin_expect ((gomp_team_barrier_cancelled (&team->barrier)
+- || (taskgroup && taskgroup->cancelled))
+- && cpyfn == NULL, 0))
++ if (__builtin_expect (gomp_cancel_var, 0)
++ && cpyfn == NULL)
+ {
+- gomp_mutex_unlock (&team->task_lock);
+- for (i = 0; i < num_tasks; i++)
++ if (gomp_team_barrier_cancelled (&team->barrier))
++ {
++ do_cancel:
++ gomp_mutex_unlock (&team->task_lock);
++ for (i = 0; i < num_tasks; i++)
++ {
++ gomp_finish_task (tasks[i]);
++ free (tasks[i]);
++ }
++ if ((flags & GOMP_TASK_FLAG_NOGROUP) == 0)
++ ialias_call (GOMP_taskgroup_end) ();
++ return;
++ }
++ if (taskgroup)
+ {
+- gomp_finish_task (tasks[i]);
+- free (tasks[i]);
++ if (taskgroup->cancelled)
++ goto do_cancel;
++ if (taskgroup->workshare
++ && taskgroup->prev
++ && taskgroup->prev->cancelled)
++ goto do_cancel;
+ }
+- if ((flags & GOMP_TASK_FLAG_NOGROUP) == 0)
+- ialias_call (GOMP_taskgroup_end) ();
+- return;
+ }
+ if (taskgroup)
+ taskgroup->num_children += num_tasks;
+--- libgomp/parallel.c.jj 2018-04-25 09:40:31.926655587 +0200
++++ libgomp/parallel.c 2019-05-07 18:46:36.532109640 +0200
+@@ -123,7 +123,8 @@ void
+ GOMP_parallel_start (void (*fn) (void *), void *data, unsigned num_threads)
+ {
+ num_threads = gomp_resolve_num_threads (num_threads, 0);
+- gomp_team_start (fn, data, num_threads, 0, gomp_new_team (num_threads));
++ gomp_team_start (fn, data, num_threads, 0, gomp_new_team (num_threads),
++ NULL);
+ }
+
+ void
+@@ -161,14 +162,33 @@ GOMP_parallel_end (void)
+ ialias (GOMP_parallel_end)
+
+ void
+-GOMP_parallel (void (*fn) (void *), void *data, unsigned num_threads, unsigned int flags)
++GOMP_parallel (void (*fn) (void *), void *data, unsigned num_threads,
++ unsigned int flags)
+ {
+ num_threads = gomp_resolve_num_threads (num_threads, 0);
+- gomp_team_start (fn, data, num_threads, flags, gomp_new_team (num_threads));
++ gomp_team_start (fn, data, num_threads, flags, gomp_new_team (num_threads),
++ NULL);
+ fn (data);
+ ialias_call (GOMP_parallel_end) ();
+ }
+
++unsigned
++GOMP_parallel_reductions (void (*fn) (void *), void *data,
++ unsigned num_threads, unsigned int flags)
++{
++ struct gomp_taskgroup *taskgroup;
++ num_threads = gomp_resolve_num_threads (num_threads, 0);
++ uintptr_t *rdata = *(uintptr_t **)data;
++ taskgroup = gomp_parallel_reduction_register (rdata, num_threads);
++ gomp_team_start (fn, data, num_threads, flags, gomp_new_team (num_threads),
++ taskgroup);
++ fn (data);
++ ialias_call (GOMP_parallel_end) ();
++ gomp_sem_destroy (&taskgroup->taskgroup_sem);
++ free (taskgroup);
++ return num_threads;
++}
++
+ bool
+ GOMP_cancellation_point (int which)
+ {
+@@ -185,8 +205,15 @@ GOMP_cancellation_point (int which)
+ }
+ else if (which & GOMP_CANCEL_TASKGROUP)
+ {
+- if (thr->task->taskgroup && thr->task->taskgroup->cancelled)
+- return true;
++ if (thr->task->taskgroup)
++ {
++ if (thr->task->taskgroup->cancelled)
++ return true;
++ if (thr->task->taskgroup->workshare
++ && thr->task->taskgroup->prev
++ && thr->task->taskgroup->prev->cancelled)
++ return true;
++ }
+ /* FALLTHRU into the GOMP_CANCEL_PARALLEL case,
+ as #pragma omp cancel parallel also cancels all explicit
+ tasks. */
+@@ -218,11 +245,17 @@ GOMP_cancel (int which, bool do_cancel)
+ }
+ else if (which & GOMP_CANCEL_TASKGROUP)
+ {
+- if (thr->task->taskgroup && !thr->task->taskgroup->cancelled)
++ if (thr->task->taskgroup)
+ {
+- gomp_mutex_lock (&team->task_lock);
+- thr->task->taskgroup->cancelled = true;
+- gomp_mutex_unlock (&team->task_lock);
++ struct gomp_taskgroup *taskgroup = thr->task->taskgroup;
++ if (taskgroup->workshare && taskgroup->prev)
++ taskgroup = taskgroup->prev;
++ if (!taskgroup->cancelled)
++ {
++ gomp_mutex_lock (&team->task_lock);
++ taskgroup->cancelled = true;
++ gomp_mutex_unlock (&team->task_lock);
++ }
+ }
+ return true;
+ }
+--- libgomp/oacc-plugin.h.jj 2018-04-25 09:40:31.322655307 +0200
++++ libgomp/oacc-plugin.h 2019-05-07 18:46:36.531109656 +0200
+@@ -29,5 +29,6 @@
+
+ extern void GOMP_PLUGIN_async_unmap_vars (void *, int);
+ extern void *GOMP_PLUGIN_acc_thread (void);
++extern int GOMP_PLUGIN_acc_default_dim (unsigned int);
+
+ #endif
+--- libgomp/target.c.jj 2018-04-25 09:40:31.912655580 +0200
++++ libgomp/target.c 2019-05-07 19:07:21.032306327 +0200
+@@ -180,16 +180,22 @@ gomp_device_copy (struct gomp_device_des
+ /* Infrastructure for coalescing adjacent or nearly adjacent (in device addresses)
+ host to device memory transfers. */
+
++struct gomp_coalesce_chunk
++{
++ /* The starting and ending point of a coalesced chunk of memory. */
++ size_t start, end;
++};
++
+ struct gomp_coalesce_buf
+ {
+ /* Buffer into which gomp_copy_host2dev will memcpy data and from which
+ it will be copied to the device. */
+ void *buf;
+ struct target_mem_desc *tgt;
+- /* Array with offsets, chunks[2 * i] is the starting offset and
+- chunks[2 * i + 1] ending offset relative to tgt->tgt_start device address
++ /* Array with offsets, chunks[i].start is the starting offset and
++ chunks[i].end ending offset relative to tgt->tgt_start device address
+ of chunks which are to be copied to buf and later copied to device. */
+- size_t *chunks;
++ struct gomp_coalesce_chunk *chunks;
+ /* Number of chunks in chunks array, or -1 if coalesce buffering should not
+ be performed. */
+ long chunk_cnt;
+@@ -222,14 +228,14 @@ gomp_coalesce_buf_add (struct gomp_coale
+ {
+ if (cbuf->chunk_cnt < 0)
+ return;
+- if (start < cbuf->chunks[2 * cbuf->chunk_cnt - 1])
++ if (start < cbuf->chunks[cbuf->chunk_cnt - 1].end)
+ {
+ cbuf->chunk_cnt = -1;
+ return;
+ }
+- if (start < cbuf->chunks[2 * cbuf->chunk_cnt - 1] + MAX_COALESCE_BUF_GAP)
++ if (start < cbuf->chunks[cbuf->chunk_cnt - 1].end + MAX_COALESCE_BUF_GAP)
+ {
+- cbuf->chunks[2 * cbuf->chunk_cnt - 1] = start + len;
++ cbuf->chunks[cbuf->chunk_cnt - 1].end = start + len;
+ cbuf->use_cnt++;
+ return;
+ }
+@@ -239,8 +245,8 @@ gomp_coalesce_buf_add (struct gomp_coale
+ if (cbuf->use_cnt == 1)
+ cbuf->chunk_cnt--;
+ }
+- cbuf->chunks[2 * cbuf->chunk_cnt] = start;
+- cbuf->chunks[2 * cbuf->chunk_cnt + 1] = start + len;
++ cbuf->chunks[cbuf->chunk_cnt].start = start;
++ cbuf->chunks[cbuf->chunk_cnt].end = start + len;
+ cbuf->chunk_cnt++;
+ cbuf->use_cnt = 1;
+ }
+@@ -271,20 +277,20 @@ gomp_copy_host2dev (struct gomp_device_d
+ if (cbuf)
+ {
+ uintptr_t doff = (uintptr_t) d - cbuf->tgt->tgt_start;
+- if (doff < cbuf->chunks[2 * cbuf->chunk_cnt - 1])
++ if (doff < cbuf->chunks[cbuf->chunk_cnt - 1].end)
+ {
+ long first = 0;
+ long last = cbuf->chunk_cnt - 1;
+ while (first <= last)
+ {
+ long middle = (first + last) >> 1;
+- if (cbuf->chunks[2 * middle + 1] <= doff)
++ if (cbuf->chunks[middle].end <= doff)
+ first = middle + 1;
+- else if (cbuf->chunks[2 * middle] <= doff)
++ else if (cbuf->chunks[middle].start <= doff)
+ {
+- if (doff + sz > cbuf->chunks[2 * middle + 1])
++ if (doff + sz > cbuf->chunks[middle].end)
+ gomp_fatal ("internal libgomp cbuf error");
+- memcpy ((char *) cbuf->buf + (doff - cbuf->chunks[0]),
++ memcpy ((char *) cbuf->buf + (doff - cbuf->chunks[0].start),
+ h, sz);
+ return;
+ }
+@@ -510,8 +516,8 @@ gomp_map_vars (struct gomp_device_descr
+ cbuf.buf = NULL;
+ if (mapnum > 1 || pragma_kind == GOMP_MAP_VARS_TARGET)
+ {
+- cbuf.chunks
+- = (size_t *) gomp_alloca ((2 * mapnum + 2) * sizeof (size_t));
++ size_t chunks_size = (mapnum + 1) * sizeof (struct gomp_coalesce_chunk);
++ cbuf.chunks = (struct gomp_coalesce_chunk *) gomp_alloca (chunks_size);
+ cbuf.chunk_cnt = 0;
+ }
+ if (pragma_kind == GOMP_MAP_VARS_TARGET)
+@@ -521,8 +527,8 @@ gomp_map_vars (struct gomp_device_descr
+ tgt_size = mapnum * sizeof (void *);
+ cbuf.chunk_cnt = 1;
+ cbuf.use_cnt = 1 + (mapnum > 1);
+- cbuf.chunks[0] = 0;
+- cbuf.chunks[1] = tgt_size;
++ cbuf.chunks[0].start = 0;
++ cbuf.chunks[0].end = tgt_size;
+ }
+
+ gomp_mutex_lock (&devicep->lock);
+@@ -707,7 +713,7 @@ gomp_map_vars (struct gomp_device_descr
+ if (cbuf.chunk_cnt > 0)
+ {
+ cbuf.buf
+- = malloc (cbuf.chunks[2 * cbuf.chunk_cnt - 1] - cbuf.chunks[0]);
++ = malloc (cbuf.chunks[cbuf.chunk_cnt - 1].end - cbuf.chunks[0].start);
+ if (cbuf.buf)
+ {
+ cbuf.tgt = tgt;
+@@ -859,6 +865,7 @@ gomp_map_vars (struct gomp_device_descr
+ tgt->list[i].offset = 0;
+ tgt->list[i].length = k->host_end - k->host_start;
+ k->refcount = 1;
++ k->dynamic_refcount = 0;
+ tgt->refcount++;
+ array->left = NULL;
+ array->right = NULL;
+@@ -956,9 +963,10 @@ gomp_map_vars (struct gomp_device_descr
+ /* Set link pointer on target to the device address of the
+ mapped object. */
+ void *tgt_addr = (void *) (tgt->tgt_start + k->tgt_offset);
+- devicep->host2dev_func (devicep->target_id,
+- (void *) n->tgt_offset,
+- &tgt_addr, sizeof (void *));
++ /* We intentionally do not use coalescing here, as it's not
++ data allocated by the current call to this function. */
++ gomp_copy_host2dev (devicep, (void *) n->tgt_offset,
++ &tgt_addr, sizeof (void *), NULL);
+ }
+ array++;
+ }
+@@ -981,10 +989,14 @@ gomp_map_vars (struct gomp_device_descr
+ {
+ long c = 0;
+ for (c = 0; c < cbuf.chunk_cnt; ++c)
+- gomp_copy_host2dev (devicep, (void *) (tgt->tgt_start + cbuf.chunks[2 * c]),
+- (char *) cbuf.buf + (cbuf.chunks[2 * c] - cbuf.chunks[0]),
+- cbuf.chunks[2 * c + 1] - cbuf.chunks[2 * c], NULL);
++ gomp_copy_host2dev (devicep,
++ (void *) (tgt->tgt_start + cbuf.chunks[c].start),
++ (char *) cbuf.buf + (cbuf.chunks[c].start
++ - cbuf.chunks[0].start),
++ cbuf.chunks[c].end - cbuf.chunks[c].start, NULL);
+ free (cbuf.buf);
++ cbuf.buf = NULL;
++ cbufp = NULL;
+ }
+
+ /* If the variable from "omp target enter data" map-list was already mapped,
+@@ -1011,6 +1023,23 @@ gomp_unmap_tgt (struct target_mem_desc *
+ free (tgt);
+ }
+
++attribute_hidden bool
++gomp_remove_var (struct gomp_device_descr *devicep, splay_tree_key k)
++{
++ bool is_tgt_unmapped = false;
++ splay_tree_remove (&devicep->mem_map, k);
++ if (k->link_key)
++ splay_tree_insert (&devicep->mem_map, (splay_tree_node) k->link_key);
++ if (k->tgt->refcount > 1)
++ k->tgt->refcount--;
++ else
++ {
++ is_tgt_unmapped = true;
++ gomp_unmap_tgt (k->tgt);
++ }
++ return is_tgt_unmapped;
++}
++
+ /* Unmap variables described by TGT. If DO_COPYFROM is true, copy relevant
+ variables back from device to host: if it is false, it is assumed that this
+ has been done already. */
+@@ -1059,16 +1088,7 @@ gomp_unmap_vars (struct target_mem_desc
+ + tgt->list[i].offset),
+ tgt->list[i].length);
+ if (do_unmap)
+- {
+- splay_tree_remove (&devicep->mem_map, k);
+- if (k->link_key)
+- splay_tree_insert (&devicep->mem_map,
+- (splay_tree_node) k->link_key);
+- if (k->tgt->refcount > 1)
+- k->tgt->refcount--;
+- else
+- gomp_unmap_tgt (k->tgt);
+- }
++ gomp_remove_var (devicep, k);
+ }
+
+ if (tgt->refcount > 1)
+@@ -1298,17 +1318,7 @@ gomp_unload_image_from_device (struct go
+ else
+ {
+ splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &k);
+- splay_tree_remove (&devicep->mem_map, n);
+- if (n->link_key)
+- {
+- if (n->tgt->refcount > 1)
+- n->tgt->refcount--;
+- else
+- {
+- is_tgt_unmapped = true;
+- gomp_unmap_tgt (n->tgt);
+- }
+- }
++ is_tgt_unmapped = gomp_remove_var (devicep, n);
+ }
+ }
+
+@@ -1855,11 +1865,20 @@ GOMP_target_update_ext (int device, size
+ struct gomp_team *team = thr->ts.team;
+ /* If parallel or taskgroup has been cancelled, don't start new
+ tasks. */
+- if (team
+- && (gomp_team_barrier_cancelled (&team->barrier)
+- || (thr->task->taskgroup
+- && thr->task->taskgroup->cancelled)))
+- return;
++ if (__builtin_expect (gomp_cancel_var, 0) && team)
++ {
++ if (gomp_team_barrier_cancelled (&team->barrier))
++ return;
++ if (thr->task->taskgroup)
++ {
++ if (thr->task->taskgroup->cancelled)
++ return;
++ if (thr->task->taskgroup->workshare
++ && thr->task->taskgroup->prev
++ && thr->task->taskgroup->prev->cancelled)
++ return;
++ }
++ }
+
+ gomp_task_maybe_wait_for_dependencies (depend);
+ }
+@@ -1874,10 +1893,20 @@ GOMP_target_update_ext (int device, size
+ struct gomp_thread *thr = gomp_thread ();
+ struct gomp_team *team = thr->ts.team;
+ /* If parallel or taskgroup has been cancelled, don't start new tasks. */
+- if (team
+- && (gomp_team_barrier_cancelled (&team->barrier)
+- || (thr->task->taskgroup && thr->task->taskgroup->cancelled)))
+- return;
++ if (__builtin_expect (gomp_cancel_var, 0) && team)
++ {
++ if (gomp_team_barrier_cancelled (&team->barrier))
++ return;
++ if (thr->task->taskgroup)
++ {
++ if (thr->task->taskgroup->cancelled)
++ return;
++ if (thr->task->taskgroup->workshare
++ && thr->task->taskgroup->prev
++ && thr->task->taskgroup->prev->cancelled)
++ return;
++ }
++ }
+
+ gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, true);
+ }
+@@ -1986,11 +2015,20 @@ GOMP_target_enter_exit_data (int device,
+ struct gomp_team *team = thr->ts.team;
+ /* If parallel or taskgroup has been cancelled, don't start new
+ tasks. */
+- if (team
+- && (gomp_team_barrier_cancelled (&team->barrier)
+- || (thr->task->taskgroup
+- && thr->task->taskgroup->cancelled)))
+- return;
++ if (__builtin_expect (gomp_cancel_var, 0) && team)
++ {
++ if (gomp_team_barrier_cancelled (&team->barrier))
++ return;
++ if (thr->task->taskgroup)
++ {
++ if (thr->task->taskgroup->cancelled)
++ return;
++ if (thr->task->taskgroup->workshare
++ && thr->task->taskgroup->prev
++ && thr->task->taskgroup->prev->cancelled)
++ return;
++ }
++ }
+
+ gomp_task_maybe_wait_for_dependencies (depend);
+ }
+@@ -2005,10 +2043,20 @@ GOMP_target_enter_exit_data (int device,
+ struct gomp_thread *thr = gomp_thread ();
+ struct gomp_team *team = thr->ts.team;
+ /* If parallel or taskgroup has been cancelled, don't start new tasks. */
+- if (team
+- && (gomp_team_barrier_cancelled (&team->barrier)
+- || (thr->task->taskgroup && thr->task->taskgroup->cancelled)))
+- return;
++ if (__builtin_expect (gomp_cancel_var, 0) && team)
++ {
++ if (gomp_team_barrier_cancelled (&team->barrier))
++ return;
++ if (thr->task->taskgroup)
++ {
++ if (thr->task->taskgroup->cancelled)
++ return;
++ if (thr->task->taskgroup->workshare
++ && thr->task->taskgroup->prev
++ && thr->task->taskgroup->prev->cancelled)
++ return;
++ }
++ }
+
+ size_t i;
+ if ((flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0)
+@@ -2197,8 +2245,9 @@ omp_target_is_present (void *ptr, int de
+ }
+
+ int
+-omp_target_memcpy (void *dst, void *src, size_t length, size_t dst_offset,
+- size_t src_offset, int dst_device_num, int src_device_num)
++omp_target_memcpy (void *dst, void *src, size_t length,
++ size_t dst_offset, size_t src_offset, int dst_device_num,
++ int src_device_num)
+ {
+ struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
+ bool ret;
+@@ -2287,21 +2336,25 @@ omp_target_memcpy_rect_worker (void *dst
+ return EINVAL;
+ if (dst_devicep == NULL && src_devicep == NULL)
+ {
+- memcpy ((char *) dst + dst_off, (char *) src + src_off, length);
++ memcpy ((char *) dst + dst_off, (char *) src + src_off,
++ length);
+ ret = 1;
+ }
+ else if (src_devicep == NULL)
+ ret = dst_devicep->host2dev_func (dst_devicep->target_id,
+ (char *) dst + dst_off,
+- (char *) src + src_off, length);
++ (char *) src + src_off,
++ length);
+ else if (dst_devicep == NULL)
+ ret = src_devicep->dev2host_func (src_devicep->target_id,
+ (char *) dst + dst_off,
+- (char *) src + src_off, length);
++ (char *) src + src_off,
++ length);
+ else if (src_devicep == dst_devicep)
+ ret = src_devicep->dev2dev_func (src_devicep->target_id,
+ (char *) dst + dst_off,
+- (char *) src + src_off, length);
++ (char *) src + src_off,
++ length);
+ else
+ ret = 0;
+ return ret ? 0 : EINVAL;
+@@ -2396,8 +2449,8 @@ omp_target_memcpy_rect (void *dst, void
+ }
+
+ int
+-omp_target_associate_ptr (void *host_ptr, void *device_ptr, size_t size,
+- size_t device_offset, int device_num)
++omp_target_associate_ptr (void *host_ptr, void *device_ptr,
++ size_t size, size_t device_offset, int device_num)
+ {
+ if (device_num == GOMP_DEVICE_HOST_FALLBACK)
+ return EINVAL;
+@@ -2499,6 +2552,31 @@ omp_target_disassociate_ptr (void *ptr,
+ return ret;
+ }
+
++int
++omp_pause_resource (omp_pause_resource_t kind, int device_num)
++{
++ (void) kind;
++ if (device_num == GOMP_DEVICE_HOST_FALLBACK)
++ return gomp_pause_host ();
++ if (device_num < 0 || device_num >= gomp_get_num_devices ())
++ return -1;
++ /* Do nothing for target devices for now. */
++ return 0;
++}
++
++int
++omp_pause_resource_all (omp_pause_resource_t kind)
++{
++ (void) kind;
++ if (gomp_pause_host ())
++ return -1;
++ /* Do nothing for target devices for now. */
++ return 0;
++}
++
++ialias (omp_pause_resource)
++ialias (omp_pause_resource_all)
++
+ #ifdef PLUGIN_SUPPORT
+
+ /* This function tries to load a plugin for DEVICE. Name of plugin is passed
+@@ -2632,9 +2710,9 @@ gomp_target_fini (void)
+ }
+ }
+
+-/* This function initializes the runtime needed for offloading.
+- It parses the list of offload targets and tries to load the plugins for
+- these targets. On return, the variables NUM_DEVICES and NUM_DEVICES_OPENMP
++/* This function initializes the runtime for offloading.
++ It parses the list of offload plugins, and tries to load these.
++ On return, the variables NUM_DEVICES and NUM_DEVICES_OPENMP
+ will be set, and the array DEVICES initialized, containing descriptors for
+ corresponding devices, first the GOMP_OFFLOAD_CAP_OPENMP_400 ones, follows
+ by the others. */
+@@ -2651,7 +2729,7 @@ gomp_target_init (void)
+ num_devices = 0;
+ devices = NULL;
+
+- cur = OFFLOAD_TARGETS;
++ cur = OFFLOAD_PLUGINS;
+ if (*cur)
+ do
+ {
+--- libgomp/ordered.c.jj 2018-04-25 09:40:31.926655587 +0200
++++ libgomp/ordered.c 2019-05-07 18:46:36.532109640 +0200
+@@ -259,7 +259,8 @@ GOMP_ordered_end (void)
+ #define MAX_COLLAPSED_BITS (__SIZEOF_LONG__ * __CHAR_BIT__)
+
+ void
+-gomp_doacross_init (unsigned ncounts, long *counts, long chunk_size)
++gomp_doacross_init (unsigned ncounts, long *counts, long chunk_size,
++ size_t extra)
+ {
+ struct gomp_thread *thr = gomp_thread ();
+ struct gomp_team *team = thr->ts.team;
+@@ -269,13 +270,24 @@ gomp_doacross_init (unsigned ncounts, lo
+ struct gomp_doacross_work_share *doacross;
+
+ if (team == NULL || team->nthreads == 1)
+- return;
++ {
++ empty:
++ if (!extra)
++ ws->doacross = NULL;
++ else
++ {
++ doacross = gomp_malloc_cleared (sizeof (*doacross) + extra);
++ doacross->extra = (void *) (doacross + 1);
++ ws->doacross = doacross;
++ }
++ return;
++ }
+
+ for (i = 0; i < ncounts; i++)
+ {
+ /* If any count is 0, GOMP_doacross_{post,wait} can't be called. */
+ if (counts[i] == 0)
+- return;
++ goto empty;
+
+ if (num_bits <= MAX_COLLAPSED_BITS)
+ {
+@@ -314,7 +326,7 @@ gomp_doacross_init (unsigned ncounts, lo
+ elt_sz = (elt_sz + 63) & ~63UL;
+
+ doacross = gomp_malloc (sizeof (*doacross) + 63 + num_ents * elt_sz
+- + shift_sz);
++ + shift_sz + extra);
+ doacross->chunk_size = chunk_size;
+ doacross->elt_sz = elt_sz;
+ doacross->ncounts = ncounts;
+@@ -322,6 +334,13 @@ gomp_doacross_init (unsigned ncounts, lo
+ doacross->array = (unsigned char *)
+ ((((uintptr_t) (doacross + 1)) + 63 + shift_sz)
+ & ~(uintptr_t) 63);
++ if (extra)
++ {
++ doacross->extra = doacross->array + num_ents * elt_sz;
++ memset (doacross->extra, '\0', extra);
++ }
++ else
++ doacross->extra = NULL;
+ if (num_bits <= MAX_COLLAPSED_BITS)
+ {
+ unsigned int shift_count = 0;
+@@ -360,7 +379,8 @@ GOMP_doacross_post (long *counts)
+ unsigned long ent;
+ unsigned int i;
+
+- if (__builtin_expect (doacross == NULL, 0))
++ if (__builtin_expect (doacross == NULL, 0)
++ || __builtin_expect (doacross->array == NULL, 0))
+ {
+ __sync_synchronize ();
+ return;
+@@ -411,7 +431,8 @@ GOMP_doacross_wait (long first, ...)
+ unsigned long ent;
+ unsigned int i;
+
+- if (__builtin_expect (doacross == NULL, 0))
++ if (__builtin_expect (doacross == NULL, 0)
++ || __builtin_expect (doacross->array == NULL, 0))
+ {
+ __sync_synchronize ();
+ return;
+@@ -488,7 +509,8 @@ GOMP_doacross_wait (long first, ...)
+ typedef unsigned long long gomp_ull;
+
+ void
+-gomp_doacross_ull_init (unsigned ncounts, gomp_ull *counts, gomp_ull chunk_size)
++gomp_doacross_ull_init (unsigned ncounts, gomp_ull *counts,
++ gomp_ull chunk_size, size_t extra)
+ {
+ struct gomp_thread *thr = gomp_thread ();
+ struct gomp_team *team = thr->ts.team;
+@@ -498,13 +520,24 @@ gomp_doacross_ull_init (unsigned ncounts
+ struct gomp_doacross_work_share *doacross;
+
+ if (team == NULL || team->nthreads == 1)
+- return;
++ {
++ empty:
++ if (!extra)
++ ws->doacross = NULL;
++ else
++ {
++ doacross = gomp_malloc_cleared (sizeof (*doacross) + extra);
++ doacross->extra = (void *) (doacross + 1);
++ ws->doacross = doacross;
++ }
++ return;
++ }
+
+ for (i = 0; i < ncounts; i++)
+ {
+ /* If any count is 0, GOMP_doacross_{post,wait} can't be called. */
+ if (counts[i] == 0)
+- return;
++ goto empty;
+
+ if (num_bits <= MAX_COLLAPSED_BITS)
+ {
+@@ -557,6 +590,13 @@ gomp_doacross_ull_init (unsigned ncounts
+ doacross->array = (unsigned char *)
+ ((((uintptr_t) (doacross + 1)) + 63 + shift_sz)
+ & ~(uintptr_t) 63);
++ if (extra)
++ {
++ doacross->extra = doacross->array + num_ents * elt_sz;
++ memset (doacross->extra, '\0', extra);
++ }
++ else
++ doacross->extra = NULL;
+ if (num_bits <= MAX_COLLAPSED_BITS)
+ {
+ unsigned int shift_count = 0;
+@@ -595,7 +635,8 @@ GOMP_doacross_ull_post (gomp_ull *counts
+ unsigned long ent;
+ unsigned int i;
+
+- if (__builtin_expect (doacross == NULL, 0))
++ if (__builtin_expect (doacross == NULL, 0)
++ || __builtin_expect (doacross->array == NULL, 0))
+ {
+ __sync_synchronize ();
+ return;
+@@ -667,7 +708,8 @@ GOMP_doacross_ull_wait (gomp_ull first,
+ unsigned long ent;
+ unsigned int i;
+
+- if (__builtin_expect (doacross == NULL, 0))
++ if (__builtin_expect (doacross == NULL, 0)
++ || __builtin_expect (doacross->array == NULL, 0))
+ {
+ __sync_synchronize ();
+ return;
+--- libgomp/alloc.c.jj 2018-04-25 09:40:31.926655587 +0200
++++ libgomp/alloc.c 2019-05-07 18:46:36.336112770 +0200
+@@ -57,3 +57,50 @@ gomp_realloc (void *old, size_t size)
+ gomp_fatal ("Out of memory allocating %lu bytes", (unsigned long) size);
+ return ret;
+ }
++
++void *
++gomp_aligned_alloc (size_t al, size_t size)
++{
++ void *ret;
++ if (al < sizeof (void *))
++ al = sizeof (void *);
++#ifdef HAVE_ALIGNED_ALLOC
++ ret = aligned_alloc (al, size);
++#elif defined(HAVE__ALIGNED_MALLOC)
++ ret = _aligned_malloc (size, al);
++#elif defined(HAVE_POSIX_MEMALIGN)
++ if (posix_memalign (&ret, al, size) != 0)
++ ret = NULL;
++#elif defined(HAVE_MEMALIGN)
++ {
++ extern void *memalign (size_t, size_t);
++ ret = memalign (al, size);
++ }
++#else
++ ret = NULL;
++ if ((al & (al - 1)) == 0 && size)
++ {
++ void *p = malloc (size + al);
++ if (p)
++ {
++ void *ap = (void *) (((uintptr_t) p + al) & -al);
++ ((void **) ap)[-1] = p;
++ ret = ap;
++ }
++ }
++#endif
++ if (ret == NULL)
++ gomp_fatal ("Out of memory allocating %lu bytes", (unsigned long) size);
++ return ret;
++}
++
++void
++gomp_aligned_free (void *ptr)
++{
++#ifdef GOMP_HAVE_EFFICIENT_ALIGNED_ALLOC
++ free (ptr);
++#else
++ if (ptr)
++ free (((void **) ptr)[-1]);
++#endif
++}
+--- libgomp/configure.ac.jj 2018-04-25 09:40:31.321655307 +0200
++++ libgomp/configure.ac 2019-05-07 18:46:36.471110614 +0200
+@@ -219,6 +219,7 @@ m4_include([plugin/configfrag.ac])
+
+ # Check for functions needed.
+ AC_CHECK_FUNCS(getloadavg clock_gettime strtoull)
++AC_CHECK_FUNCS(aligned_alloc posix_memalign memalign _aligned_malloc)
+
+ # Check for broken semaphore implementation on darwin.
+ # sem_init returns: sem_init error: Function not implemented.
+@@ -266,6 +267,41 @@ if test $ac_cv_func_clock_gettime = no;
+ [Define to 1 if you have the `clock_gettime' function.])])
+ fi
+
++# Check for uname.
++AC_COMPILE_IFELSE(
++ [AC_LANG_PROGRAM(
++ [#include
++ #include
++ #include ],
++ [struct utsname buf;
++ volatile size_t len = 0;
++ if (!uname (buf))
++ len = strlen (buf.nodename);])],
++ AC_DEFINE(HAVE_UNAME, 1,
++[ Define if uname is supported and struct utsname has nodename field.]))
++
++# Check for gethostname.
++AC_COMPILE_IFELSE(
++ [AC_LANG_PROGRAM(
++ [#include ],
++ [
++changequote(,)dnl
++ char buf[256];
++ if (gethostname (buf, sizeof (buf) - 1) == 0)
++ buf[255] = '\0';
++changequote([,])dnl
++ ])],
++ AC_DEFINE(HAVE_GETHOSTNAME, 1,
++[ Define if gethostname is supported.]))
++
++# Check for getpid.
++AC_COMPILE_IFELSE(
++ [AC_LANG_PROGRAM(
++ [#include ],
++ [int pid = getpid ();])],
++ AC_DEFINE(HAVE_GETPID, 1,
++[ Define if getpid is supported.]))
++
+ # See if we support thread-local storage.
+ GCC_CHECK_TLS
+
+--- libgomp/icv.c.jj 2018-04-25 09:40:31.870655561 +0200
++++ libgomp/icv.c 2019-05-07 18:46:36.501110134 +0200
+@@ -69,7 +69,7 @@ void
+ omp_set_schedule (omp_sched_t kind, int chunk_size)
+ {
+ struct gomp_task_icv *icv = gomp_icv (true);
+- switch (kind)
++ switch (kind & ~omp_sched_monotonic)
+ {
+ case omp_sched_static:
+ if (chunk_size < 1)
+--- libgomp/configure.jj 2018-04-25 09:40:31.913655581 +0200
++++ libgomp/configure 2019-05-07 18:47:37.961128420 +0200
+@@ -636,6 +636,8 @@ PLUGIN_NVPTX_FALSE
+ PLUGIN_NVPTX_TRUE
+ offload_additional_lib_paths
+ offload_additional_options
++offload_targets
++offload_plugins
+ PLUGIN_HSA_LIBS
+ PLUGIN_HSA_LDFLAGS
+ PLUGIN_HSA_CPPFLAGS
+@@ -648,7 +650,6 @@ PLUGIN_NVPTX_CPPFLAGS
+ PLUGIN_NVPTX
+ CUDA_DRIVER_LIB
+ CUDA_DRIVER_INCLUDE
+-offload_targets
+ libtool_VERSION
+ ac_ct_FC
+ FCFLAGS
+@@ -11157,7 +11158,7 @@ else
+ lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
+ lt_status=$lt_dlunknown
+ cat > conftest.$ac_ext <<_LT_EOF
+-#line 11160 "configure"
++#line 11161 "configure"
+ #include "confdefs.h"
+
+ #if HAVE_DLFCN_H
+@@ -11263,7 +11264,7 @@ else
+ lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
+ lt_status=$lt_dlunknown
+ cat > conftest.$ac_ext <<_LT_EOF
+-#line 11266 "configure"
++#line 11267 "configure"
+ #include "confdefs.h"
+
+ #if HAVE_DLFCN_H
+@@ -15167,8 +15168,6 @@ fi
+ # see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ # .
+
+-offload_targets=
+-
+ plugin_support=yes
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking for dlsym in -ldl" >&5
+ $as_echo_n "checking for dlsym in -ldl... " >&6; }
+@@ -15302,7 +15301,11 @@ if test "${with_cuda_driver_lib+set}" =
+ fi
+
+ case "x$with_cuda_driver" in
+- x | xno) ;;
++ x) ;;
++ xno)
++ CUDA_DRIVER_INCLUDE=no
++ CUDA_DRIVER_LIB=no
++ ;;
+ *) CUDA_DRIVER_INCLUDE=$with_cuda_driver/include
+ CUDA_DRIVER_LIB=$with_cuda_driver/lib
+ ;;
+@@ -15313,10 +15316,12 @@ fi
+ if test "x$with_cuda_driver_lib" != x; then
+ CUDA_DRIVER_LIB=$with_cuda_driver_lib
+ fi
+-if test "x$CUDA_DRIVER_INCLUDE" != x; then
++if test "x$CUDA_DRIVER_INCLUDE" != x \
++ && test "x$CUDA_DRIVER_INCLUDE" != xno; then
+ CUDA_DRIVER_CPPFLAGS=-I$CUDA_DRIVER_INCLUDE
+ fi
+-if test "x$CUDA_DRIVER_LIB" != x; then
++if test "x$CUDA_DRIVER_LIB" != x \
++ && test "x$CUDA_DRIVER_LIB" != xno; then
+ CUDA_DRIVER_LDFLAGS=-L$CUDA_DRIVER_LIB
+ fi
+
+@@ -15383,7 +15388,13 @@ PLUGIN_HSA_LIBS=
+
+
+
+-# Get offload targets and path to install tree of offloading compiler.
++# Parse '--enable-offload-targets', figure out the corresponding libgomp
++# plugins, and configure to find the corresponding offload compilers.
++# 'offload_plugins' and 'offload_targets' will be populated in the same order.
++offload_plugins=
++offload_targets=
++
++
+ offload_additional_options=
+ offload_additional_lib_paths=
+
+@@ -15403,10 +15403,10 @@ if test x"$enable_offload_targets" != x;
+ for tgt in `echo $enable_offload_targets | sed -e 's#,# #g'`; do
+ tgt_dir=`echo $tgt | grep '=' | sed 's/.*=//'`
+ tgt=`echo $tgt | sed 's/=.*//'`
+- tgt_name=
++ tgt_plugin=
+ case $tgt in
+ *-intelmic-* | *-intelmicemul-*)
+- tgt_name=intelmic
++ tgt_plugin=intelmic
+ ;;
+ nvptx*)
+ case "${target}" in
+@@ -15418,19 +15418,21 @@ if test x"$enable_offload_targets" != x;
+ PLUGIN_NVPTX=0
+ ;;
+ *)
+- tgt_name=nvptx
++ tgt_plugin=nvptx
+ PLUGIN_NVPTX=$tgt
+- PLUGIN_NVPTX_CPPFLAGS=$CUDA_DRIVER_CPPFLAGS
+- PLUGIN_NVPTX_LDFLAGS=$CUDA_DRIVER_LDFLAGS
+- PLUGIN_NVPTX_LIBS='-lcuda'
++ if test "x$CUDA_DRIVER_LIB" != xno \
++ && test "x$CUDA_DRIVER_LIB" != xno; then
++ PLUGIN_NVPTX_CPPFLAGS=$CUDA_DRIVER_CPPFLAGS
++ PLUGIN_NVPTX_LDFLAGS=$CUDA_DRIVER_LDFLAGS
++ PLUGIN_NVPTX_LIBS='-lcuda'
+
+- PLUGIN_NVPTX_save_CPPFLAGS=$CPPFLAGS
+- CPPFLAGS="$PLUGIN_NVPTX_CPPFLAGS $CPPFLAGS"
+- PLUGIN_NVPTX_save_LDFLAGS=$LDFLAGS
+- LDFLAGS="$PLUGIN_NVPTX_LDFLAGS $LDFLAGS"
+- PLUGIN_NVPTX_save_LIBS=$LIBS
+- LIBS="$PLUGIN_NVPTX_LIBS $LIBS"
+- cat confdefs.h - <<_ACEOF >conftest.$ac_ext
++ PLUGIN_NVPTX_save_CPPFLAGS=$CPPFLAGS
++ CPPFLAGS="$PLUGIN_NVPTX_CPPFLAGS $CPPFLAGS"
++ PLUGIN_NVPTX_save_LDFLAGS=$LDFLAGS
++ LDFLAGS="$PLUGIN_NVPTX_LDFLAGS $LDFLAGS"
++ PLUGIN_NVPTX_save_LIBS=$LIBS
++ LIBS="$PLUGIN_NVPTX_LIBS $LIBS"
++ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+ /* end confdefs.h. */
+ #include "cuda.h"
+ int
+@@ -15446,13 +15448,16 @@ if ac_fn_c_try_link "$LINENO"; then :
+ fi
+ rm -f core conftest.err conftest.$ac_objext \
+ conftest$ac_exeext conftest.$ac_ext
+- CPPFLAGS=$PLUGIN_NVPTX_save_CPPFLAGS
+- LDFLAGS=$PLUGIN_NVPTX_save_LDFLAGS
+- LIBS=$PLUGIN_NVPTX_save_LIBS
++ CPPFLAGS=$PLUGIN_NVPTX_save_CPPFLAGS
++ LDFLAGS=$PLUGIN_NVPTX_save_LDFLAGS
++ LIBS=$PLUGIN_NVPTX_save_LIBS
++ fi
+ case $PLUGIN_NVPTX in
+ nvptx*)
+- if test "x$CUDA_DRIVER_INCLUDE" = x \
+- && test "x$CUDA_DRIVER_LIB" = x; then
++ if (test "x$CUDA_DRIVER_INCLUDE" = x \
++ || test "x$CUDA_DRIVER_INCLUDE" = xno) \
++ && (test "x$CUDA_DRIVER_LIB" = x \
++ || test "x$CUDA_DRIVER_LIB" = xno); then
+ PLUGIN_NVPTX=1
+ PLUGIN_NVPTX_CPPFLAGS='-I$(srcdir)/plugin/cuda'
+ PLUGIN_NVPTX_LIBS='-ldl'
+@@ -15452,7 +15468,7 @@ rm -f core conftest.err conftest.$ac_obj
+ PLUGIN_HSA=0
+ ;;
+ *)
+- tgt_name=hsa
++ tgt_plugin=hsa
+ PLUGIN_HSA=$tgt
+ PLUGIN_HSA_CPPFLAGS=$HSA_RUNTIME_CPPFLAGS
+ PLUGIN_HSA_LDFLAGS="$HSA_RUNTIME_LDFLAGS"
+@@ -15470,7 +15486,7 @@ rm -f core conftest.err conftest.$ac_obj
+ LDFLAGS=$PLUGIN_HSA_save_LDFLAGS
+ LIBS=$PLUGIN_HSA_save_LIBS
+ case $PLUGIN_HSA in
+- hsa*)
++ hsa*)
+ HSA_PLUGIN=0
+ as_fn_error "HSA run-time package required for HSA support" "$LINENO" 5
+ ;;
+@@ -15487,16 +15503,19 @@ rm -f core conftest.err conftest.$ac_obj
+ as_fn_error "unknown offload target specified" "$LINENO" 5
+ ;;
+ esac
+- if test x"$tgt_name" = x; then
+- # Don't configure libgomp for this offloading target if we don't build
+- # the corresponding plugin.
++ if test x"$tgt_plugin" = x; then
++ # Not configuring libgomp for this offload target if we're not building
++ # the corresponding offload plugin.
+ continue
+- elif test x"$offload_targets" = x; then
+- offload_targets=$tgt_name
++ elif test x"$offload_plugins" = x; then
++ offload_plugins=$tgt_plugin
++ offload_targets=$tgt
+ else
+- offload_targets=$offload_targets,$tgt_name
++ offload_plugins=$offload_plugins,$tgt_plugin
++ offload_targets=$offload_targets,$tgt
+ fi
+- if test "$tgt_name" = hsa; then
++ # Configure additional search paths.
++ if test "$tgt_plugin" = hsa; then
+ # Offloading compilation is all handled by the target compiler.
+ :
+ elif test x"$tgt_dir" != x; then
+@@ -15510,7 +15529,7 @@ rm -f core conftest.err conftest.$ac_obj
+ fi
+
+ cat >>confdefs.h <<_ACEOF
+-#define OFFLOAD_TARGETS "$offload_targets"
++#define OFFLOAD_PLUGINS "$offload_plugins"
+ _ACEOF
+
+ if test $PLUGIN_NVPTX = 1; then
+@@ -15570,6 +15589,19 @@ _ACEOF
+ fi
+ done
+
++for ac_func in aligned_alloc posix_memalign memalign _aligned_malloc
++do :
++ as_ac_var=`$as_echo "ac_cv_func_$ac_func" | $as_tr_sh`
++ac_fn_c_check_func "$LINENO" "$ac_func" "$as_ac_var"
++eval as_val=\$$as_ac_var
++ if test "x$as_val" = x""yes; then :
++ cat >>confdefs.h <<_ACEOF
++#define `$as_echo "HAVE_$ac_func" | $as_tr_cpp` 1
++_ACEOF
++
++fi
++done
++
+
+ # Check for broken semaphore implementation on darwin.
+ # sem_init returns: sem_init error: Function not implemented.
+@@ -15784,6 +15816,72 @@ fi
+
+ fi
+
++# Check for uname.
++cat confdefs.h - <<_ACEOF >conftest.$ac_ext
++/* end confdefs.h. */
++#include
++ #include
++ #include
++int
++main ()
++{
++struct utsname buf;
++ volatile size_t len = 0;
++ if (!uname (buf))
++ len = strlen (buf.nodename);
++ ;
++ return 0;
++}
++_ACEOF
++if ac_fn_c_try_compile "$LINENO"; then :
++
++$as_echo "#define HAVE_UNAME 1" >>confdefs.h
++
++fi
++rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
++
++# Check for gethostname.
++cat confdefs.h - <<_ACEOF >conftest.$ac_ext
++/* end confdefs.h. */
++#include
++int
++main ()
++{
++
++ char buf[256];
++ if (gethostname (buf, sizeof (buf) - 1) == 0)
++ buf[255] = '\0';
++
++ ;
++ return 0;
++}
++_ACEOF
++if ac_fn_c_try_compile "$LINENO"; then :
++
++$as_echo "#define HAVE_GETHOSTNAME 1" >>confdefs.h
++
++fi
++rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
++
++# Check for getpid.
++cat confdefs.h - <<_ACEOF >conftest.$ac_ext
++/* end confdefs.h. */
++#include
++int
++main ()
++{
++int pid = getpid ();
++ ;
++ return 0;
++}
++_ACEOF
++if ac_fn_c_try_compile "$LINENO"; then :
++
++$as_echo "#define HAVE_GETPID 1" >>confdefs.h
++
++fi
++rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
++
+ # See if we support thread-local storage.
+
+
+--- libgomp/Makefile.am.jj 2018-04-25 09:40:31.926655587 +0200
++++ libgomp/Makefile.am 2019-05-07 19:59:03.683989317 +0200
+@@ -63,12 +63,13 @@ libgomp_la_SOURCES = alloc.c atomic.c ba
+ parallel.c sections.c single.c task.c team.c work.c lock.c mutex.c \
+ proc.c sem.c bar.c ptrlock.c time.c fortran.c affinity.c target.c \
+ splay-tree.c libgomp-plugin.c oacc-parallel.c oacc-host.c oacc-init.c \
+- oacc-mem.c oacc-async.c oacc-plugin.c oacc-cuda.c priority_queue.c
++ oacc-mem.c oacc-async.c oacc-plugin.c oacc-cuda.c priority_queue.c \
++ affinity-fmt.c teams.c
+
+ include $(top_srcdir)/plugin/Makefrag.am
+
+ if USE_FORTRAN
+-libgomp_la_SOURCES += openacc.f90
++libgomp_la_SOURCES += openacc2.f90
+ endif
+
+ nodist_noinst_HEADERS = libgomp_f.h
+@@ -87,8 +88,6 @@ omp_lib_kinds.mod: omp_lib.mod
+ :
+ openacc_kinds.mod: openacc.mod
+ :
+-openacc.mod: openacc.lo
+- :
+ %.mod: %.f90
+ $(FC) $(FCFLAGS) -fsyntax-only $<
+ fortran.lo: libgomp_f.h
+--- libgomp/oacc-mem.c.jj 2018-04-25 09:40:31.924655586 +0200
++++ libgomp/oacc-mem.c 2019-05-07 18:46:36.530109672 +0200
+@@ -153,8 +153,9 @@ acc_free (void *d)
+ gomp_fatal ("error in freeing device memory in %s", __FUNCTION__);
+ }
+
+-void
+-acc_memcpy_to_device (void *d, void *h, size_t s)
++static void
++memcpy_tofrom_device (bool from, void *d, void *h, size_t s, int async,
++ const char *libfnname)
+ {
+ /* No need to call lazy open here, as the device pointer must have
+ been obtained from a routine that did that. */
+@@ -164,31 +165,49 @@ acc_memcpy_to_device (void *d, void *h,
+
+ if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
+ {
+- memmove (d, h, s);
++ if (from)
++ memmove (h, d, s);
++ else
++ memmove (d, h, s);
+ return;
+ }
+
+- if (!thr->dev->host2dev_func (thr->dev->target_id, d, h, s))
+- gomp_fatal ("error in %s", __FUNCTION__);
++ if (async > acc_async_sync)
++ thr->dev->openacc.async_set_async_func (async);
++
++ bool ret = (from
++ ? thr->dev->dev2host_func (thr->dev->target_id, h, d, s)
++ : thr->dev->host2dev_func (thr->dev->target_id, d, h, s));
++
++ if (async > acc_async_sync)
++ thr->dev->openacc.async_set_async_func (acc_async_sync);
++
++ if (!ret)
++ gomp_fatal ("error in %s", libfnname);
+ }
+
+ void
+-acc_memcpy_from_device (void *h, void *d, size_t s)
++acc_memcpy_to_device (void *d, void *h, size_t s)
+ {
+- /* No need to call lazy open here, as the device pointer must have
+- been obtained from a routine that did that. */
+- struct goacc_thread *thr = goacc_thread ();
++ memcpy_tofrom_device (false, d, h, s, acc_async_sync, __FUNCTION__);
++}
+
+- assert (thr && thr->dev);
++void
++acc_memcpy_to_device_async (void *d, void *h, size_t s, int async)
++{
++ memcpy_tofrom_device (false, d, h, s, async, __FUNCTION__);
++}
+
+- if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
+- {
+- memmove (h, d, s);
+- return;
+- }
++void
++acc_memcpy_from_device (void *h, void *d, size_t s)
++{
++ memcpy_tofrom_device (true, d, h, s, acc_async_sync, __FUNCTION__);
++}
+
+- if (!thr->dev->dev2host_func (thr->dev->target_id, h, d, s))
+- gomp_fatal ("error in %s", __FUNCTION__);
++void
++acc_memcpy_from_device_async (void *h, void *d, size_t s, int async)
++{
++ memcpy_tofrom_device (true, d, h, s, async, __FUNCTION__);
+ }
+
+ /* Return the device pointer that corresponds to host data H. Or NULL
+@@ -347,6 +366,7 @@ acc_map_data (void *h, void *d, size_t s
+
+ tgt = gomp_map_vars (acc_dev, mapnum, &hostaddrs, &devaddrs, &sizes,
+ &kinds, true, GOMP_MAP_VARS_OPENACC);
++ tgt->list[0].key->refcount = REFCOUNT_INFINITY;
+ }
+
+ gomp_mutex_lock (&acc_dev->lock);
+@@ -389,6 +409,9 @@ acc_unmap_data (void *h)
+ (void *) n->host_start, (int) host_size, (void *) h);
+ }
+
++ /* Mark for removal. */
++ n->refcount = 1;
++
+ t = n->tgt;
+
+ if (t->refcount == 2)
+@@ -424,7 +447,7 @@ acc_unmap_data (void *h)
+ #define FLAG_COPY (1 << 2)
+
+ static void *
+-present_create_copy (unsigned f, void *h, size_t s)
++present_create_copy (unsigned f, void *h, size_t s, int async)
+ {
+ void *d;
+ splay_tree_key n;
+@@ -460,6 +483,11 @@ present_create_copy (unsigned f, void *h
+ gomp_fatal ("[%p,+%d] not mapped", (void *)h, (int)s);
+ }
+
++ if (n->refcount != REFCOUNT_INFINITY)
++ {
++ n->refcount++;
++ n->dynamic_refcount++;
++ }
+ gomp_mutex_unlock (&acc_dev->lock);
+ }
+ else if (!(f & FLAG_CREATE))
+@@ -481,8 +509,16 @@ present_create_copy (unsigned f, void *h
+
+ gomp_mutex_unlock (&acc_dev->lock);
+
++ if (async > acc_async_sync)
++ acc_dev->openacc.async_set_async_func (async);
++
+ tgt = gomp_map_vars (acc_dev, mapnum, &hostaddrs, NULL, &s, &kinds, true,
+ GOMP_MAP_VARS_OPENACC);
++ /* Initialize dynamic refcount. */
++ tgt->list[0].key->dynamic_refcount = 1;
++
++ if (async > acc_async_sync)
++ acc_dev->openacc.async_set_async_func (acc_async_sync);
+
+ gomp_mutex_lock (&acc_dev->lock);
+
+@@ -499,53 +535,71 @@ present_create_copy (unsigned f, void *h
+ void *
+ acc_create (void *h, size_t s)
+ {
+- return present_create_copy (FLAG_CREATE, h, s);
++ return present_create_copy (FLAG_PRESENT | FLAG_CREATE, h, s, acc_async_sync);
+ }
+
+-void *
+-acc_copyin (void *h, size_t s)
++void
++acc_create_async (void *h, size_t s, int async)
+ {
+- return present_create_copy (FLAG_CREATE | FLAG_COPY, h, s);
++ present_create_copy (FLAG_PRESENT | FLAG_CREATE, h, s, async);
+ }
+
++/* acc_present_or_create used to be what acc_create is now. */
++/* acc_pcreate is acc_present_or_create by a different name. */
++#ifdef HAVE_ATTRIBUTE_ALIAS
++strong_alias (acc_create, acc_present_or_create)
++strong_alias (acc_create, acc_pcreate)
++#else
+ void *
+ acc_present_or_create (void *h, size_t s)
+ {
+- return present_create_copy (FLAG_PRESENT | FLAG_CREATE, h, s);
++ return acc_create (h, s);
+ }
+
+-/* acc_pcreate is acc_present_or_create by a different name. */
+-#ifdef HAVE_ATTRIBUTE_ALIAS
+-strong_alias (acc_present_or_create, acc_pcreate)
+-#else
+ void *
+ acc_pcreate (void *h, size_t s)
+ {
+- return acc_present_or_create (h, s);
++ return acc_create (h, s);
+ }
+ #endif
+
+ void *
+-acc_present_or_copyin (void *h, size_t s)
++acc_copyin (void *h, size_t s)
++{
++ return present_create_copy (FLAG_PRESENT | FLAG_CREATE | FLAG_COPY, h, s,
++ acc_async_sync);
++}
++
++void
++acc_copyin_async (void *h, size_t s, int async)
+ {
+- return present_create_copy (FLAG_PRESENT | FLAG_CREATE | FLAG_COPY, h, s);
++ present_create_copy (FLAG_PRESENT | FLAG_CREATE | FLAG_COPY, h, s, async);
+ }
+
++/* acc_present_or_copyin used to be what acc_copyin is now. */
+ /* acc_pcopyin is acc_present_or_copyin by a different name. */
+ #ifdef HAVE_ATTRIBUTE_ALIAS
+-strong_alias (acc_present_or_copyin, acc_pcopyin)
++strong_alias (acc_copyin, acc_present_or_copyin)
++strong_alias (acc_copyin, acc_pcopyin)
+ #else
+ void *
++acc_present_or_copyin (void *h, size_t s)
++{
++ return acc_copyin (h, s);
++}
++
++void *
+ acc_pcopyin (void *h, size_t s)
+ {
+- return acc_present_or_copyin (h, s);
++ return acc_copyin (h, s);
+ }
+ #endif
+
+-#define FLAG_COPYOUT (1 << 0)
++#define FLAG_COPYOUT (1 << 0)
++#define FLAG_FINALIZE (1 << 1)
+
+ static void
+-delete_copyout (unsigned f, void *h, size_t s, const char *libfnname)
++delete_copyout (unsigned f, void *h, size_t s, int async, const char *libfnname)
+ {
+ size_t host_size;
+ splay_tree_key n;
+@@ -581,31 +635,111 @@ delete_copyout (unsigned f, void *h, siz
+ (void *) n->host_start, (int) host_size, (void *) h, (int) s);
+ }
+
+- gomp_mutex_unlock (&acc_dev->lock);
++ if (n->refcount == REFCOUNT_INFINITY)
++ {
++ n->refcount = 0;
++ n->dynamic_refcount = 0;
++ }
++ if (n->refcount < n->dynamic_refcount)
++ {
++ gomp_mutex_unlock (&acc_dev->lock);
++ gomp_fatal ("Dynamic reference counting assert fail\n");
++ }
+
+- if (f & FLAG_COPYOUT)
+- acc_dev->dev2host_func (acc_dev->target_id, h, d, s);
++ if (f & FLAG_FINALIZE)
++ {
++ n->refcount -= n->dynamic_refcount;
++ n->dynamic_refcount = 0;
++ }
++ else if (n->dynamic_refcount)
++ {
++ n->dynamic_refcount--;
++ n->refcount--;
++ }
++
++ if (n->refcount == 0)
++ {
++ if (n->tgt->refcount == 2)
++ {
++ struct target_mem_desc *tp, *t;
++ for (tp = NULL, t = acc_dev->openacc.data_environ; t != NULL;
++ tp = t, t = t->prev)
++ if (n->tgt == t)
++ {
++ if (tp)
++ tp->prev = t->prev;
++ else
++ acc_dev->openacc.data_environ = t->prev;
++ break;
++ }
++ }
++
++ if (f & FLAG_COPYOUT)
++ {
++ if (async > acc_async_sync)
++ acc_dev->openacc.async_set_async_func (async);
++ acc_dev->dev2host_func (acc_dev->target_id, h, d, s);
++ if (async > acc_async_sync)
++ acc_dev->openacc.async_set_async_func (acc_async_sync);
++ }
+
+- acc_unmap_data (h);
++ gomp_remove_var (acc_dev, n);
++ }
+
+- if (!acc_dev->free_func (acc_dev->target_id, d))
+- gomp_fatal ("error in freeing device memory in %s", libfnname);
++ gomp_mutex_unlock (&acc_dev->lock);
+ }
+
+ void
+ acc_delete (void *h , size_t s)
+ {
+- delete_copyout (0, h, s, __FUNCTION__);
++ delete_copyout (0, h, s, acc_async_sync, __FUNCTION__);
++}
++
++void
++acc_delete_async (void *h , size_t s, int async)
++{
++ delete_copyout (0, h, s, async, __FUNCTION__);
++}
++
++void
++acc_delete_finalize (void *h , size_t s)
++{
++ delete_copyout (FLAG_FINALIZE, h, s, acc_async_sync, __FUNCTION__);
++}
++
++void
++acc_delete_finalize_async (void *h , size_t s, int async)
++{
++ delete_copyout (FLAG_FINALIZE, h, s, async, __FUNCTION__);
+ }
+
+ void
+ acc_copyout (void *h, size_t s)
+ {
+- delete_copyout (FLAG_COPYOUT, h, s, __FUNCTION__);
++ delete_copyout (FLAG_COPYOUT, h, s, acc_async_sync, __FUNCTION__);
++}
++
++void
++acc_copyout_async (void *h, size_t s, int async)
++{
++ delete_copyout (FLAG_COPYOUT, h, s, async, __FUNCTION__);
++}
++
++void
++acc_copyout_finalize (void *h, size_t s)
++{
++ delete_copyout (FLAG_COPYOUT | FLAG_FINALIZE, h, s, acc_async_sync,
++ __FUNCTION__);
++}
++
++void
++acc_copyout_finalize_async (void *h, size_t s, int async)
++{
++ delete_copyout (FLAG_COPYOUT | FLAG_FINALIZE, h, s, async, __FUNCTION__);
+ }
+
+ static void
+-update_dev_host (int is_dev, void *h, size_t s)
++update_dev_host (int is_dev, void *h, size_t s, int async)
+ {
+ splay_tree_key n;
+ void *d;
+@@ -631,24 +765,42 @@ update_dev_host (int is_dev, void *h, si
+ d = (void *) (n->tgt->tgt_start + n->tgt_offset
+ + (uintptr_t) h - n->host_start);
+
++ if (async > acc_async_sync)
++ acc_dev->openacc.async_set_async_func (async);
++
+ if (is_dev)
+ acc_dev->host2dev_func (acc_dev->target_id, d, h, s);
+ else
+ acc_dev->dev2host_func (acc_dev->target_id, h, d, s);
+
++ if (async > acc_async_sync)
++ acc_dev->openacc.async_set_async_func (acc_async_sync);
++
+ gomp_mutex_unlock (&acc_dev->lock);
+ }
+
+ void
+ acc_update_device (void *h, size_t s)
+ {
+- update_dev_host (1, h, s);
++ update_dev_host (1, h, s, acc_async_sync);
++}
++
++void
++acc_update_device_async (void *h, size_t s, int async)
++{
++ update_dev_host (1, h, s, async);
+ }
+
+ void
+ acc_update_self (void *h, size_t s)
+ {
+- update_dev_host (0, h, s);
++ update_dev_host (0, h, s, acc_async_sync);
++}
++
++void
++acc_update_self_async (void *h, size_t s, int async)
++{
++ update_dev_host (0, h, s, async);
+ }
+
+ void
+@@ -659,11 +811,37 @@ gomp_acc_insert_pointer (size_t mapnum,
+ struct goacc_thread *thr = goacc_thread ();
+ struct gomp_device_descr *acc_dev = thr->dev;
+
++ if (acc_is_present (*hostaddrs, *sizes))
++ {
++ splay_tree_key n;
++ gomp_mutex_lock (&acc_dev->lock);
++ n = lookup_host (acc_dev, *hostaddrs, *sizes);
++ gomp_mutex_unlock (&acc_dev->lock);
++
++ tgt = n->tgt;
++ for (size_t i = 0; i < tgt->list_count; i++)
++ if (tgt->list[i].key == n)
++ {
++ for (size_t j = 0; j < mapnum; j++)
++ if (i + j < tgt->list_count && tgt->list[i + j].key)
++ {
++ tgt->list[i + j].key->refcount++;
++ tgt->list[i + j].key->dynamic_refcount++;
++ }
++ return;
++ }
++ /* Should not reach here. */
++ gomp_fatal ("Dynamic refcount incrementing failed for pointer/pset");
++ }
++
+ gomp_debug (0, " %s: prepare mappings\n", __FUNCTION__);
+ tgt = gomp_map_vars (acc_dev, mapnum, hostaddrs,
+ NULL, sizes, kinds, true, GOMP_MAP_VARS_OPENACC);
+ gomp_debug (0, " %s: mappings prepared\n", __FUNCTION__);
+
++ /* Initialize dynamic refcount. */
++ tgt->list[0].key->dynamic_refcount = 1;
++
+ gomp_mutex_lock (&acc_dev->lock);
+ tgt->prev = acc_dev->openacc.data_environ;
+ acc_dev->openacc.data_environ = tgt;
+@@ -671,7 +849,8 @@ gomp_acc_insert_pointer (size_t mapnum,
+ }
+
+ void
+-gomp_acc_remove_pointer (void *h, bool force_copyfrom, int async, int mapnum)
++gomp_acc_remove_pointer (void *h, size_t s, bool force_copyfrom, int async,
++ int finalize, int mapnum)
+ {
+ struct goacc_thread *thr = goacc_thread ();
+ struct gomp_device_descr *acc_dev = thr->dev;
+@@ -679,6 +858,9 @@ gomp_acc_remove_pointer (void *h, bool f
+ struct target_mem_desc *t;
+ int minrefs = (mapnum == 1) ? 2 : 3;
+
++ if (!acc_is_present (h, s))
++ return;
++
+ gomp_mutex_lock (&acc_dev->lock);
+
+ n = lookup_host (acc_dev, h, 1);
+@@ -693,40 +875,65 @@ gomp_acc_remove_pointer (void *h, bool f
+
+ t = n->tgt;
+
+- struct target_mem_desc *tp;
++ if (n->refcount < n->dynamic_refcount)
++ {
++ gomp_mutex_unlock (&acc_dev->lock);
++ gomp_fatal ("Dynamic reference counting assert fail\n");
++ }
+
+- if (t->refcount == minrefs)
++ if (finalize)
+ {
+- /* This is the last reference, so pull the descriptor off the
+- chain. This avoids gomp_unmap_vars via gomp_unmap_tgt from
+- freeing the device memory. */
+- t->tgt_end = 0;
+- t->to_free = 0;
++ n->refcount -= n->dynamic_refcount;
++ n->dynamic_refcount = 0;
++ }
++ else if (n->dynamic_refcount)
++ {
++ n->dynamic_refcount--;
++ n->refcount--;
++ }
+
+- for (tp = NULL, t = acc_dev->openacc.data_environ; t != NULL;
+- tp = t, t = t->prev)
++ gomp_mutex_unlock (&acc_dev->lock);
++
++ if (n->refcount == 0)
++ {
++ if (t->refcount == minrefs)
+ {
+- if (n->tgt == t)
++ /* This is the last reference, so pull the descriptor off the
++ chain. This prevents gomp_unmap_vars via gomp_unmap_tgt from
++ freeing the device memory. */
++ struct target_mem_desc *tp;
++ for (tp = NULL, t = acc_dev->openacc.data_environ; t != NULL;
++ tp = t, t = t->prev)
+ {
+- if (tp)
+- tp->prev = t->prev;
+- else
+- acc_dev->openacc.data_environ = t->prev;
+- break;
++ if (n->tgt == t)
++ {
++ if (tp)
++ tp->prev = t->prev;
++ else
++ acc_dev->openacc.data_environ = t->prev;
++ break;
++ }
+ }
+ }
+- }
+
+- if (force_copyfrom)
+- t->list[0].copy_from = 1;
++ /* Set refcount to 1 to allow gomp_unmap_vars to unmap it. */
++ n->refcount = 1;
++ t->refcount = minrefs;
++ for (size_t i = 0; i < t->list_count; i++)
++ if (t->list[i].key == n)
++ {
++ t->list[i].copy_from = force_copyfrom ? 1 : 0;
++ break;
++ }
+
+- gomp_mutex_unlock (&acc_dev->lock);
++ /* If running synchronously, unmap immediately. */
++ if (async < acc_async_noval)
++ gomp_unmap_vars (t, true);
++ else
++ t->device_descr->openacc.register_async_cleanup_func (t, async);
++ }
+
+- /* If running synchronously, unmap immediately. */
+- if (async < acc_async_noval)
+- gomp_unmap_vars (t, true);
+- else
+- t->device_descr->openacc.register_async_cleanup_func (t, async);
++ gomp_mutex_unlock (&acc_dev->lock);
+
+ gomp_debug (0, " %s: mappings restored\n", __FUNCTION__);
+ }
+--- libgomp/env.c.jj 2018-04-25 09:40:31.924655586 +0200
++++ libgomp/env.c 2019-05-07 18:46:36.482110438 +0200
+@@ -88,8 +88,12 @@ void **gomp_places_list;
+ unsigned long gomp_places_list_len;
+ int gomp_debug_var;
+ unsigned int gomp_num_teams_var;
++bool gomp_display_affinity_var;
++char *gomp_affinity_format_var = "level %L thread %i affinity %A";
++size_t gomp_affinity_format_len;
+ char *goacc_device_type;
+ int goacc_device_num;
++int goacc_default_dims[GOMP_DIM_MAX];
+
+ #ifndef LIBGOMP_OFFLOADED_ONLY
+
+@@ -100,6 +104,7 @@ parse_schedule (void)
+ {
+ char *env, *end;
+ unsigned long value;
++ int monotonic = 0;
+
+ env = getenv ("OMP_SCHEDULE");
+ if (env == NULL)
+@@ -107,6 +112,26 @@ parse_schedule (void)
+
+ while (isspace ((unsigned char) *env))
+ ++env;
++ if (strncasecmp (env, "monotonic", 9) == 0)
++ {
++ monotonic = 1;
++ env += 9;
++ }
++ else if (strncasecmp (env, "nonmonotonic", 12) == 0)
++ {
++ monotonic = -1;
++ env += 12;
++ }
++ if (monotonic)
++ {
++ while (isspace ((unsigned char) *env))
++ ++env;
++ if (*env != ':')
++ goto unknown;
++ ++env;
++ while (isspace ((unsigned char) *env))
++ ++env;
++ }
+ if (strncasecmp (env, "static", 6) == 0)
+ {
+ gomp_global_icv.run_sched_var = GFS_STATIC;
+@@ -130,12 +155,16 @@ parse_schedule (void)
+ else
+ goto unknown;
+
++ if (monotonic == 1
++ || (monotonic == 0 && gomp_global_icv.run_sched_var == GFS_STATIC))
++ gomp_global_icv.run_sched_var |= GFS_MONOTONIC;
++
+ while (isspace ((unsigned char) *env))
+ ++env;
+ if (*env == '\0')
+ {
+ gomp_global_icv.run_sched_chunk_size
+- = gomp_global_icv.run_sched_var != GFS_STATIC;
++ = (gomp_global_icv.run_sched_var & ~GFS_MONOTONIC) != GFS_STATIC;
+ return;
+ }
+ if (*env++ != ',')
+@@ -158,7 +187,8 @@ parse_schedule (void)
+ if ((int)value != value)
+ goto invalid;
+
+- if (value == 0 && gomp_global_icv.run_sched_var != GFS_STATIC)
++ if (value == 0
++ && (gomp_global_icv.run_sched_var & ~GFS_MONOTONIC) != GFS_STATIC)
+ value = 1;
+ gomp_global_icv.run_sched_chunk_size = value;
+ return;
+@@ -1066,6 +1096,36 @@ parse_acc_device_type (void)
+ }
+
+ static void
++parse_gomp_openacc_dim (void)
++{
++ /* The syntax is the same as for the -fopenacc-dim compilation option. */
++ const char *var_name = "GOMP_OPENACC_DIM";
++ const char *env_var = getenv (var_name);
++ if (!env_var)
++ return;
++
++ const char *pos = env_var;
++ int i;
++ for (i = 0; *pos && i != GOMP_DIM_MAX; i++)
++ {
++ if (i && *pos++ != ':')
++ break;
++
++ if (*pos == ':')
++ continue;
++
++ const char *eptr;
++ errno = 0;
++ long val = strtol (pos, (char **)&eptr, 10);
++ if (errno || val < 0 || (unsigned)val != val)
++ break;
++
++ goacc_default_dims[i] = (int)val;
++ pos = eptr;
++ }
++}
++
++static void
+ handle_omp_display_env (unsigned long stacksize, int wait_policy)
+ {
+ const char *env;
+@@ -1119,19 +1179,34 @@ handle_omp_display_env (unsigned long st
+ fputs ("'\n", stderr);
+
+ fprintf (stderr, " OMP_SCHEDULE = '");
+- switch (gomp_global_icv.run_sched_var)
++ if ((gomp_global_icv.run_sched_var & GFS_MONOTONIC))
++ {
++ if (gomp_global_icv.run_sched_var != (GFS_MONOTONIC | GFS_STATIC))
++ fputs ("MONOTONIC:", stderr);
++ }
++ else if (gomp_global_icv.run_sched_var == GFS_STATIC)
++ fputs ("NONMONOTONIC:", stderr);
++ switch (gomp_global_icv.run_sched_var & ~GFS_MONOTONIC)
+ {
+ case GFS_RUNTIME:
+ fputs ("RUNTIME", stderr);
++ if (gomp_global_icv.run_sched_chunk_size != 1)
++ fprintf (stderr, ",%d", gomp_global_icv.run_sched_chunk_size);
+ break;
+ case GFS_STATIC:
+ fputs ("STATIC", stderr);
++ if (gomp_global_icv.run_sched_chunk_size != 0)
++ fprintf (stderr, ",%d", gomp_global_icv.run_sched_chunk_size);
+ break;
+ case GFS_DYNAMIC:
+ fputs ("DYNAMIC", stderr);
++ if (gomp_global_icv.run_sched_chunk_size != 1)
++ fprintf (stderr, ",%d", gomp_global_icv.run_sched_chunk_size);
+ break;
+ case GFS_GUIDED:
+ fputs ("GUIDED", stderr);
++ if (gomp_global_icv.run_sched_chunk_size != 1)
++ fprintf (stderr, ",%d", gomp_global_icv.run_sched_chunk_size);
+ break;
+ case GFS_AUTO:
+ fputs ("AUTO", stderr);
+@@ -1197,6 +1272,10 @@ handle_omp_display_env (unsigned long st
+ gomp_global_icv.default_device_var);
+ fprintf (stderr, " OMP_MAX_TASK_PRIORITY = '%d'\n",
+ gomp_max_task_priority_var);
++ fprintf (stderr, " OMP_DISPLAY_AFFINITY = '%s'\n",
++ gomp_display_affinity_var ? "TRUE" : "FALSE");
++ fprintf (stderr, " OMP_AFFINITY_FORMAT = '%s'\n",
++ gomp_affinity_format_var);
+
+ if (verbose)
+ {
+@@ -1228,6 +1307,7 @@ initialize_env (void)
+ parse_boolean ("OMP_DYNAMIC", &gomp_global_icv.dyn_var);
+ parse_boolean ("OMP_NESTED", &gomp_global_icv.nest_var);
+ parse_boolean ("OMP_CANCELLATION", &gomp_cancel_var);
++ parse_boolean ("OMP_DISPLAY_AFFINITY", &gomp_display_affinity_var);
+ parse_int ("OMP_DEFAULT_DEVICE", &gomp_global_icv.default_device_var, true);
+ parse_int ("OMP_MAX_TASK_PRIORITY", &gomp_max_task_priority_var, true);
+ parse_unsigned_long ("OMP_MAX_ACTIVE_LEVELS", &gomp_max_active_levels_var,
+@@ -1277,6 +1357,13 @@ initialize_env (void)
+ }
+ if (gomp_global_icv.bind_var != omp_proc_bind_false)
+ gomp_init_affinity ();
++
++ {
++ const char *env = getenv ("OMP_AFFINITY_FORMAT");
++ if (env != NULL)
++ gomp_set_affinity_format (env, strlen (env));
++ }
++
+ wait_policy = parse_wait_policy ();
+ if (!parse_spincount ("GOMP_SPINCOUNT", &gomp_spin_count_var))
+ {
+@@ -1302,7 +1389,6 @@ initialize_env (void)
+
+ /* Not strictly environment related, but ordering constructors is tricky. */
+ pthread_attr_init (&gomp_thread_attr);
+- pthread_attr_setdetachstate (&gomp_thread_attr, PTHREAD_CREATE_DETACHED);
+
+ if (parse_stacksize ("OMP_STACKSIZE", &stacksize)
+ || parse_stacksize ("GOMP_STACKSIZE", &stacksize)
+@@ -1336,6 +1422,7 @@ initialize_env (void)
+ goacc_device_num = 0;
+
+ parse_acc_device_type ();
++ parse_gomp_openacc_dim ();
+
+ goacc_runtime_initialize ();
+ }
+--- libgomp/fortran.c.jj 2018-04-25 09:40:31.913655581 +0200
++++ libgomp/fortran.c 2019-05-07 18:46:36.491110295 +0200
+@@ -28,6 +28,8 @@
+ #include "libgomp.h"
+ #include "libgomp_f.h"
+ #include
++#include
++#include
+ #include
+
+ #ifdef HAVE_ATTRIBUTE_ALIAS
+@@ -82,6 +84,8 @@ ialias_redirect (omp_get_team_num)
+ ialias_redirect (omp_is_initial_device)
+ ialias_redirect (omp_get_initial_device)
+ ialias_redirect (omp_get_max_task_priority)
++ialias_redirect (omp_pause_resource)
++ialias_redirect (omp_pause_resource_all)
+ #endif
+
+ #ifndef LIBGOMP_GNU_SYMBOL_VERSIONING
+@@ -368,7 +372,9 @@ omp_get_schedule_ (int32_t *kind, int32_
+ omp_sched_t k;
+ int cs;
+ omp_get_schedule (&k, &cs);
+- *kind = k;
++ /* For now mask off GFS_MONOTONIC, because OpenMP 4.5 code will not
++ expect to see it. */
++ *kind = k & ~GFS_MONOTONIC;
+ *chunk_size = cs;
+ }
+
+@@ -378,7 +384,8 @@ omp_get_schedule_8_ (int32_t *kind, int6
+ omp_sched_t k;
+ int cs;
+ omp_get_schedule (&k, &cs);
+- *kind = k;
++ /* See above. */
++ *kind = k & ~GFS_MONOTONIC;
+ *chunk_size = cs;
+ }
+
+@@ -576,3 +583,96 @@ omp_get_max_task_priority_ (void)
+ {
+ return omp_get_max_task_priority ();
+ }
++
++void
++omp_set_affinity_format_ (const char *format, size_t format_len)
++{
++ gomp_set_affinity_format (format, format_len);
++}
++
++int32_t
++omp_get_affinity_format_ (char *buffer, size_t buffer_len)
++{
++ size_t len = strlen (gomp_affinity_format_var);
++ if (buffer_len)
++ {
++ if (len < buffer_len)
++ {
++ memcpy (buffer, gomp_affinity_format_var, len);
++ memset (buffer + len, ' ', buffer_len - len);
++ }
++ else
++ memcpy (buffer, gomp_affinity_format_var, buffer_len);
++ }
++ return len;
++}
++
++void
++omp_display_affinity_ (const char *format, size_t format_len)
++{
++ char *fmt = NULL, fmt_buf[256];
++ char buf[512];
++ if (format_len)
++ {
++ fmt = format_len < 256 ? fmt_buf : gomp_malloc (format_len + 1);
++ memcpy (fmt, format, format_len);
++ fmt[format_len] = '\0';
++ }
++ struct gomp_thread *thr = gomp_thread ();
++ size_t ret
++ = gomp_display_affinity (buf, sizeof buf,
++ format_len ? fmt : gomp_affinity_format_var,
++ gomp_thread_self (), &thr->ts, thr->place);
++ if (ret < sizeof buf)
++ {
++ buf[ret] = '\n';
++ gomp_print_string (buf, ret + 1);
++ }
++ else
++ {
++ char *b = gomp_malloc (ret + 1);
++ gomp_display_affinity (buf, sizeof buf,
++ format_len ? fmt : gomp_affinity_format_var,
++ gomp_thread_self (), &thr->ts, thr->place);
++ b[ret] = '\n';
++ gomp_print_string (b, ret + 1);
++ free (b);
++ }
++ if (fmt && fmt != fmt_buf)
++ free (fmt);
++}
++
++int32_t
++omp_capture_affinity_ (char *buffer, const char *format,
++ size_t buffer_len, size_t format_len)
++{
++ char *fmt = NULL, fmt_buf[256];
++ if (format_len)
++ {
++ fmt = format_len < 256 ? fmt_buf : gomp_malloc (format_len + 1);
++ memcpy (fmt, format, format_len);
++ fmt[format_len] = '\0';
++ }
++ struct gomp_thread *thr = gomp_thread ();
++ size_t ret
++ = gomp_display_affinity (buffer, buffer_len,
++ format_len ? fmt : gomp_affinity_format_var,
++ gomp_thread_self (), &thr->ts, thr->place);
++ if (fmt && fmt != fmt_buf)
++ free (fmt);
++ if (ret < buffer_len)
++ memset (buffer + ret, ' ', buffer_len - ret);
++ return ret;
++}
++
++int32_t
++omp_pause_resource_ (const int32_t *kind, const int32_t *device_num)
++{
++ return omp_pause_resource (*kind, *device_num);
++}
++
++int32_t
++omp_pause_resource_all_ (const int32_t *kind)
++{
++ return omp_pause_resource_all (*kind);
++}
+--- libgomp/configure.tgt.jj 2018-04-25 09:40:31.925655587 +0200
++++ libgomp/configure.tgt 2019-05-07 18:46:36.479110486 +0200
+@@ -18,7 +18,7 @@ if test $gcc_cv_have_tls = yes ; then
+ ;;
+
+ *-*-linux* | *-*-gnu*)
+- XCFLAGS="${XCFLAGS} -ftls-model=initial-exec"
++ XCFLAGS="${XCFLAGS} -ftls-model=initial-exec -DUSING_INITIAL_EXEC_TLS"
+ ;;
+
+ *-*-rtems*)
+--- libgomp/icv-device.c.jj 2018-04-25 09:40:31.925655587 +0200
++++ libgomp/icv-device.c 2019-05-07 18:46:36.513109943 +0200
+@@ -49,20 +49,6 @@ omp_get_num_devices (void)
+ }
+
+ int
+-omp_get_num_teams (void)
+-{
+- /* Hardcoded to 1 on host, MIC, HSAIL? Maybe variable on PTX. */
+- return 1;
+-}
+-
+-int
+-omp_get_team_num (void)
+-{
+- /* Hardcoded to 0 on host, MIC, HSAIL? Maybe variable on PTX. */
+- return 0;
+-}
+-
+-int
+ omp_is_initial_device (void)
+ {
+ /* Hardcoded to 1 on host, should be 0 on MIC, HSAIL, PTX. */
+@@ -72,6 +58,4 @@ omp_is_initial_device (void)
+ ialias (omp_set_default_device)
+ ialias (omp_get_default_device)
+ ialias (omp_get_num_devices)
+-ialias (omp_get_num_teams)
+-ialias (omp_get_team_num)
+ ialias (omp_is_initial_device)
+--- libgomp/Makefile.in.jj 2018-04-25 09:40:31.320655306 +0200
++++ libgomp/Makefile.in 2019-05-07 20:00:01.082077522 +0200
+@@ -90,7 +90,7 @@ DIST_COMMON = $(top_srcdir)/plugin/Makef
+ $(srcdir)/libgomp.spec.in $(srcdir)/../depcomp
+ @PLUGIN_NVPTX_TRUE@am__append_1 = libgomp-plugin-nvptx.la
+ @PLUGIN_HSA_TRUE@am__append_2 = libgomp-plugin-hsa.la
+-@USE_FORTRAN_TRUE@am__append_3 = openacc.f90
++@USE_FORTRAN_TRUE@am__append_3 = openacc2.f90
+ subdir = .
+ ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
+ am__aclocal_m4_deps = $(top_srcdir)/../config/acx.m4 \
+@@ -172,7 +172,7 @@ libgomp_plugin_nvptx_la_LINK = $(LIBTOOL
+ @PLUGIN_NVPTX_TRUE@am_libgomp_plugin_nvptx_la_rpath = -rpath \
+ @PLUGIN_NVPTX_TRUE@ $(toolexeclibdir)
+ libgomp_la_LIBADD =
+-@USE_FORTRAN_TRUE@am__objects_1 = openacc.lo
++@USE_FORTRAN_TRUE@am__objects_1 = openacc2.lo
+ am_libgomp_la_OBJECTS = alloc.lo atomic.lo barrier.lo critical.lo \
+ env.lo error.lo icv.lo icv-device.lo iter.lo iter_ull.lo \
+ loop.lo loop_ull.lo ordered.lo parallel.lo sections.lo \
+@@ -180,7 +180,8 @@ am_libgomp_la_OBJECTS = alloc.lo atomic.
+ sem.lo bar.lo ptrlock.lo time.lo fortran.lo affinity.lo \
+ target.lo splay-tree.lo libgomp-plugin.lo oacc-parallel.lo \
+ oacc-host.lo oacc-init.lo oacc-mem.lo oacc-async.lo \
+- oacc-plugin.lo oacc-cuda.lo priority_queue.lo $(am__objects_1)
++ oacc-plugin.lo oacc-cuda.lo priority_queue.lo affinity-fmt.lo \
++ teams.lo $(am__objects_1)
+ libgomp_la_OBJECTS = $(am_libgomp_la_OBJECTS)
+ DEFAULT_INCLUDES = -I.@am__isrc@
+ depcomp = $(SHELL) $(top_srcdir)/../depcomp
+@@ -380,6 +381,7 @@ mkdir_p = @mkdir_p@
+ multi_basedir = @multi_basedir@
+ offload_additional_lib_paths = @offload_additional_lib_paths@
+ offload_additional_options = @offload_additional_options@
++offload_plugins = @offload_plugins@
+ offload_targets = @offload_targets@
+ oldincludedir = @oldincludedir@
+ pdfdir = @pdfdir@
+@@ -436,7 +438,7 @@ libgomp_la_SOURCES = alloc.c atomic.c ba
+ affinity.c target.c splay-tree.c libgomp-plugin.c \
+ oacc-parallel.c oacc-host.c oacc-init.c oacc-mem.c \
+ oacc-async.c oacc-plugin.c oacc-cuda.c priority_queue.c \
+- $(am__append_3)
++ affinity-fmt.c teams.c $(am__append_3)
+
+ # Nvidia PTX OpenACC plugin.
+ @PLUGIN_NVPTX_TRUE@libgomp_plugin_nvptx_version_info = -version-info $(libtool_VERSION)
+@@ -599,6 +601,7 @@ mostlyclean-compile:
+ distclean-compile:
+ -rm -f *.tab.c
+
++@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/affinity-fmt.Plo@am__quote@
+ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/affinity.Plo@am__quote@
+ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/alloc.Plo@am__quote@
+ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/atomic.Plo@am__quote@
+@@ -638,6 +641,7 @@ distclean-compile:
+ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/target.Plo@am__quote@
+ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/task.Plo@am__quote@
+ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/team.Plo@am__quote@
++@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/teams.Plo@am__quote@
+ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/time.Plo@am__quote@
+ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/work.Plo@am__quote@
+
+@@ -1292,8 +1296,6 @@ omp_lib_kinds.mod: omp_lib.mod
+ :
+ openacc_kinds.mod: openacc.mod
+ :
+-openacc.mod: openacc.lo
+- :
+ %.mod: %.f90
+ $(FC) $(FCFLAGS) -fsyntax-only $<
+ fortran.lo: libgomp_f.h
+--- libgomp/plugin/cuda/cuda.h.jj 2018-04-25 09:40:31.914655581 +0200
++++ libgomp/plugin/cuda/cuda.h 2019-05-07 18:46:36.533109624 +0200
+@@ -44,6 +44,7 @@ typedef void *CUevent;
+ typedef void *CUfunction;
+ typedef void *CUlinkState;
+ typedef void *CUmodule;
++typedef size_t (*CUoccupancyB2DSize)(int);
+ typedef void *CUstream;
+
+ typedef enum {
+@@ -88,6 +89,7 @@ typedef enum {
+ CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES = 4,
+ CU_JIT_ERROR_LOG_BUFFER = 5,
+ CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES = 6,
++ CU_JIT_OPTIMIZATION_LEVEL = 7,
+ CU_JIT_LOG_VERBOSE = 12
+ } CUjit_option;
+
+@@ -169,6 +171,8 @@ CUresult cuModuleGetGlobal (CUdeviceptr
+ CUresult cuModuleLoad (CUmodule *, const char *);
+ CUresult cuModuleLoadData (CUmodule *, const void *);
+ CUresult cuModuleUnload (CUmodule);
++CUresult cuOccupancyMaxPotentialBlockSize(int *, int *, CUfunction,
++ CUoccupancyB2DSize, size_t, int);
+ CUresult cuStreamCreate (CUstream *, unsigned);
+ #define cuStreamDestroy cuStreamDestroy_v2
+ CUresult cuStreamDestroy (CUstream);
+--- libgomp/plugin/cuda-lib.def.jj 2019-05-07 18:46:36.533109624 +0200
++++ libgomp/plugin/cuda-lib.def 2019-05-07 18:46:36.533109624 +0200
+@@ -0,0 +1,49 @@
++CUDA_ONE_CALL (cuCtxCreate)
++CUDA_ONE_CALL (cuCtxDestroy)
++CUDA_ONE_CALL (cuCtxGetCurrent)
++CUDA_ONE_CALL (cuCtxGetDevice)
++CUDA_ONE_CALL (cuCtxPopCurrent)
++CUDA_ONE_CALL (cuCtxPushCurrent)
++CUDA_ONE_CALL (cuCtxSynchronize)
++CUDA_ONE_CALL (cuDeviceGet)
++CUDA_ONE_CALL (cuDeviceGetAttribute)
++CUDA_ONE_CALL (cuDeviceGetCount)
++CUDA_ONE_CALL (cuEventCreate)
++CUDA_ONE_CALL (cuEventDestroy)
++CUDA_ONE_CALL (cuEventElapsedTime)
++CUDA_ONE_CALL (cuEventQuery)
++CUDA_ONE_CALL (cuEventRecord)
++CUDA_ONE_CALL (cuEventSynchronize)
++CUDA_ONE_CALL (cuFuncGetAttribute)
++CUDA_ONE_CALL_MAYBE_NULL (cuGetErrorString)
++CUDA_ONE_CALL (cuInit)
++CUDA_ONE_CALL (cuLaunchKernel)
++CUDA_ONE_CALL (cuLinkAddData)
++CUDA_ONE_CALL_MAYBE_NULL (cuLinkAddData_v2)
++CUDA_ONE_CALL (cuLinkComplete)
++CUDA_ONE_CALL (cuLinkCreate)
++CUDA_ONE_CALL_MAYBE_NULL (cuLinkCreate_v2)
++CUDA_ONE_CALL (cuLinkDestroy)
++CUDA_ONE_CALL (cuMemAlloc)
++CUDA_ONE_CALL (cuMemAllocHost)
++CUDA_ONE_CALL (cuMemcpy)
++CUDA_ONE_CALL (cuMemcpyDtoDAsync)
++CUDA_ONE_CALL (cuMemcpyDtoH)
++CUDA_ONE_CALL (cuMemcpyDtoHAsync)
++CUDA_ONE_CALL (cuMemcpyHtoD)
++CUDA_ONE_CALL (cuMemcpyHtoDAsync)
++CUDA_ONE_CALL (cuMemFree)
++CUDA_ONE_CALL (cuMemFreeHost)
++CUDA_ONE_CALL (cuMemGetAddressRange)
++CUDA_ONE_CALL (cuMemHostGetDevicePointer)
++CUDA_ONE_CALL (cuModuleGetFunction)
++CUDA_ONE_CALL (cuModuleGetGlobal)
++CUDA_ONE_CALL (cuModuleLoad)
++CUDA_ONE_CALL (cuModuleLoadData)
++CUDA_ONE_CALL (cuModuleUnload)
++CUDA_ONE_CALL_MAYBE_NULL (cuOccupancyMaxPotentialBlockSize)
++CUDA_ONE_CALL (cuStreamCreate)
++CUDA_ONE_CALL (cuStreamDestroy)
++CUDA_ONE_CALL (cuStreamQuery)
++CUDA_ONE_CALL (cuStreamSynchronize)
++CUDA_ONE_CALL (cuStreamWaitEvent)
+--- libgomp/plugin/plugin-nvptx.c.jj 2018-04-25 09:40:31.915655582 +0200
++++ libgomp/plugin/plugin-nvptx.c 2019-05-07 18:46:36.535109592 +0200
+@@ -31,6 +31,7 @@
+ is not clear as to what that state might be. Or how one might
+ propagate it from one thread to another. */
+
++#define _GNU_SOURCE
+ #include "openacc.h"
+ #include "config.h"
+ #include "libgomp-plugin.h"
+@@ -48,60 +49,41 @@
+ #include
+ #include
+
++#if CUDA_VERSION < 6000
++extern CUresult cuGetErrorString (CUresult, const char **);
++#define CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_MULTIPROCESSOR 82
++#endif
++
++#if CUDA_VERSION >= 6050
++#undef cuLinkCreate
++#undef cuLinkAddData
++CUresult cuLinkAddData (CUlinkState, CUjitInputType, void *, size_t,
++ const char *, unsigned, CUjit_option *, void **);
++CUresult cuLinkCreate (unsigned, CUjit_option *, void **, CUlinkState *);
++#else
++typedef size_t (*CUoccupancyB2DSize)(int);
++CUresult cuLinkAddData_v2 (CUlinkState, CUjitInputType, void *, size_t,
++ const char *, unsigned, CUjit_option *, void **);
++CUresult cuLinkCreate_v2 (unsigned, CUjit_option *, void **, CUlinkState *);
++CUresult cuOccupancyMaxPotentialBlockSize(int *, int *, CUfunction,
++ CUoccupancyB2DSize, size_t, int);
++#endif
++
++#define DO_PRAGMA(x) _Pragma (#x)
++
+ #if PLUGIN_NVPTX_DYNAMIC
+ # include
+
+-# define CUDA_CALLS \
+-CUDA_ONE_CALL (cuCtxCreate) \
+-CUDA_ONE_CALL (cuCtxDestroy) \
+-CUDA_ONE_CALL (cuCtxGetCurrent) \
+-CUDA_ONE_CALL (cuCtxGetDevice) \
+-CUDA_ONE_CALL (cuCtxPopCurrent) \
+-CUDA_ONE_CALL (cuCtxPushCurrent) \
+-CUDA_ONE_CALL (cuCtxSynchronize) \
+-CUDA_ONE_CALL (cuDeviceGet) \
+-CUDA_ONE_CALL (cuDeviceGetAttribute) \
+-CUDA_ONE_CALL (cuDeviceGetCount) \
+-CUDA_ONE_CALL (cuEventCreate) \
+-CUDA_ONE_CALL (cuEventDestroy) \
+-CUDA_ONE_CALL (cuEventElapsedTime) \
+-CUDA_ONE_CALL (cuEventQuery) \
+-CUDA_ONE_CALL (cuEventRecord) \
+-CUDA_ONE_CALL (cuEventSynchronize) \
+-CUDA_ONE_CALL (cuFuncGetAttribute) \
+-CUDA_ONE_CALL (cuGetErrorString) \
+-CUDA_ONE_CALL (cuInit) \
+-CUDA_ONE_CALL (cuLaunchKernel) \
+-CUDA_ONE_CALL (cuLinkAddData) \
+-CUDA_ONE_CALL (cuLinkComplete) \
+-CUDA_ONE_CALL (cuLinkCreate) \
+-CUDA_ONE_CALL (cuLinkDestroy) \
+-CUDA_ONE_CALL (cuMemAlloc) \
+-CUDA_ONE_CALL (cuMemAllocHost) \
+-CUDA_ONE_CALL (cuMemcpy) \
+-CUDA_ONE_CALL (cuMemcpyDtoDAsync) \
+-CUDA_ONE_CALL (cuMemcpyDtoH) \
+-CUDA_ONE_CALL (cuMemcpyDtoHAsync) \
+-CUDA_ONE_CALL (cuMemcpyHtoD) \
+-CUDA_ONE_CALL (cuMemcpyHtoDAsync) \
+-CUDA_ONE_CALL (cuMemFree) \
+-CUDA_ONE_CALL (cuMemFreeHost) \
+-CUDA_ONE_CALL (cuMemGetAddressRange) \
+-CUDA_ONE_CALL (cuMemHostGetDevicePointer)\
+-CUDA_ONE_CALL (cuModuleGetFunction) \
+-CUDA_ONE_CALL (cuModuleGetGlobal) \
+-CUDA_ONE_CALL (cuModuleLoad) \
+-CUDA_ONE_CALL (cuModuleLoadData) \
+-CUDA_ONE_CALL (cuModuleUnload) \
+-CUDA_ONE_CALL (cuStreamCreate) \
+-CUDA_ONE_CALL (cuStreamDestroy) \
+-CUDA_ONE_CALL (cuStreamQuery) \
+-CUDA_ONE_CALL (cuStreamSynchronize) \
+-CUDA_ONE_CALL (cuStreamWaitEvent)
+-# define CUDA_ONE_CALL(call) \
+- __typeof (call) *call;
+ struct cuda_lib_s {
+- CUDA_CALLS
++
++# define CUDA_ONE_CALL(call) \
++ __typeof (call) *call;
++# define CUDA_ONE_CALL_MAYBE_NULL(call) \
++ CUDA_ONE_CALL (call)
++#include "cuda-lib.def"
++# undef CUDA_ONE_CALL
++# undef CUDA_ONE_CALL_MAYBE_NULL
++
+ } cuda_lib;
+
+ /* -1 if init_cuda_lib has not been called yet, false
+@@ -120,24 +102,41 @@ init_cuda_lib (void)
+ cuda_lib_inited = false;
+ if (h == NULL)
+ return false;
+-# undef CUDA_ONE_CALL
+-# define CUDA_ONE_CALL(call) CUDA_ONE_CALL_1 (call)
+-# define CUDA_ONE_CALL_1(call) \
++
++# define CUDA_ONE_CALL(call) CUDA_ONE_CALL_1 (call, false)
++# define CUDA_ONE_CALL_MAYBE_NULL(call) CUDA_ONE_CALL_1 (call, true)
++# define CUDA_ONE_CALL_1(call, allow_null) \
+ cuda_lib.call = dlsym (h, #call); \
+- if (cuda_lib.call == NULL) \
++ if (!allow_null && cuda_lib.call == NULL) \
+ return false;
+- CUDA_CALLS
++#include "cuda-lib.def"
++# undef CUDA_ONE_CALL
++# undef CUDA_ONE_CALL_1
++# undef CUDA_ONE_CALL_MAYBE_NULL
++
+ cuda_lib_inited = true;
+ return true;
+ }
+-# undef CUDA_ONE_CALL
+-# undef CUDA_ONE_CALL_1
+ # define CUDA_CALL_PREFIX cuda_lib.
+ #else
++
++# define CUDA_ONE_CALL(call)
++# define CUDA_ONE_CALL_MAYBE_NULL(call) DO_PRAGMA (weak call)
++#include "cuda-lib.def"
++#undef CUDA_ONE_CALL_MAYBE_NULL
++#undef CUDA_ONE_CALL
++
+ # define CUDA_CALL_PREFIX
+ # define init_cuda_lib() true
+ #endif
+
++#include "secure_getenv.h"
++
++#undef MIN
++#undef MAX
++#define MIN(X,Y) ((X) < (Y) ? (X) : (Y))
++#define MAX(X,Y) ((X) > (Y) ? (X) : (Y))
++
+ /* Convenience macros for the frequently used CUDA library call and
+ error handling sequence as well as CUDA library calls that
+ do the error checking themselves or don't do it at all. */
+@@ -171,40 +170,42 @@ init_cuda_lib (void)
+ #define CUDA_CALL_NOCHECK(FN, ...) \
+ CUDA_CALL_PREFIX FN (__VA_ARGS__)
+
++#define CUDA_CALL_EXISTS(FN) \
++ CUDA_CALL_PREFIX FN
++
+ static const char *
+ cuda_error (CUresult r)
+ {
+-#if CUDA_VERSION < 7000
+- /* Specified in documentation and present in library from at least
+- 5.5. Not declared in header file prior to 7.0. */
+- extern CUresult cuGetErrorString (CUresult, const char **);
+-#endif
++ const char *fallback = "unknown cuda error";
+ const char *desc;
+
++ if (!CUDA_CALL_EXISTS (cuGetErrorString))
++ return fallback;
++
+ r = CUDA_CALL_NOCHECK (cuGetErrorString, r, &desc);
+- if (r != CUDA_SUCCESS)
+- desc = "unknown cuda error";
++ if (r == CUDA_SUCCESS)
++ return desc;
+
+- return desc;
++ return fallback;
+ }
+
+ static unsigned int instantiated_devices = 0;
+ static pthread_mutex_t ptx_dev_lock = PTHREAD_MUTEX_INITIALIZER;
+
++struct cuda_map
++{
++ CUdeviceptr d;
++ size_t size;
++ bool active;
++ struct cuda_map *next;
++};
++
+ struct ptx_stream
+ {
+ CUstream stream;
+ pthread_t host_thread;
+ bool multithreaded;
+-
+- CUdeviceptr d;
+- void *h;
+- void *h_begin;
+- void *h_end;
+- void *h_next;
+- void *h_prev;
+- void *h_tail;
+-
++ struct cuda_map *map;
+ struct ptx_stream *next;
+ };
+
+@@ -216,12 +217,64 @@ struct nvptx_thread
+ struct ptx_device *ptx_dev;
+ };
+
+-struct map
++static struct cuda_map *
++cuda_map_create (size_t size)
+ {
+- int async;
+- size_t size;
+- char mappings[0];
+-};
++ struct cuda_map *map = GOMP_PLUGIN_malloc (sizeof (struct cuda_map));
++
++ assert (map);
++
++ map->next = NULL;
++ map->size = size;
++ map->active = false;
++
++ CUDA_CALL_ERET (NULL, cuMemAlloc, &map->d, size);
++ assert (map->d);
++
++ return map;
++}
++
++static void
++cuda_map_destroy (struct cuda_map *map)
++{
++ if (map->active)
++ /* Possible reasons for the map to be still active:
++ - the associated async kernel might still be running.
++ - the associated async kernel might have finished, but the
++ corresponding event that should trigger the pop_map has not been
++ processed by event_gc.
++ - the associated sync kernel might have aborted
++
++ The async cases could happen if the user specified an async region
++ without adding a corresponding wait that is guaranteed to be executed
++ (before returning from main, or in an atexit handler).
++ We do not want to deallocate a device pointer that is still being
++ used, so skip it.
++
++ In the sync case, the device pointer is no longer used, but deallocating
++ it using cuMemFree will not succeed, so skip it.
++
++ TODO: Handle this in a more constructive way, by f.i. waiting for streams
++ to finish before de-allocating them (PR88981), or by ensuring the CUDA
++ lib atexit handler is called before rather than after the libgomp plugin
++ atexit handler (PR83795). */
++ ;
++ else
++ CUDA_CALL_NOCHECK (cuMemFree, map->d);
++
++ free (map);
++}
++
++/* The following map_* routines manage the CUDA device memory that
++ contains the data mapping arguments for cuLaunchKernel. Each
++ asynchronous PTX stream may have multiple pending kernel
++ invocations, which are launched in a FIFO order. As such, the map
++ routines maintains a queue of cuLaunchKernel arguments.
++
++ Calls to map_push and map_pop must be guarded by ptx_event_lock.
++ Likewise, calls to map_init and map_fini are guarded by
++ ptx_dev_lock inside GOMP_OFFLOAD_init_device and
++ GOMP_OFFLOAD_fini_device, respectively. */
+
+ static bool
+ map_init (struct ptx_stream *s)
+@@ -229,109 +282,83 @@ map_init (struct ptx_stream *s)
+ int size = getpagesize ();
+
+ assert (s);
+- assert (!s->d);
+- assert (!s->h);
+-
+- CUDA_CALL (cuMemAllocHost, &s->h, size);
+- CUDA_CALL (cuMemHostGetDevicePointer, &s->d, s->h, 0);
+
+- assert (s->h);
++ s->map = cuda_map_create (size);
+
+- s->h_begin = s->h;
+- s->h_end = s->h_begin + size;
+- s->h_next = s->h_prev = s->h_tail = s->h_begin;
+-
+- assert (s->h_next);
+- assert (s->h_end);
+ return true;
+ }
+
+ static bool
+ map_fini (struct ptx_stream *s)
+ {
+- CUDA_CALL (cuMemFreeHost, s->h);
++ assert (s->map->next == NULL);
++
++ cuda_map_destroy (s->map);
++
+ return true;
+ }
+
+ static void
+ map_pop (struct ptx_stream *s)
+ {
+- struct map *m;
++ struct cuda_map *next;
+
+ assert (s != NULL);
+- assert (s->h_next);
+- assert (s->h_prev);
+- assert (s->h_tail);
+-
+- m = s->h_tail;
+-
+- s->h_tail += m->size;
+-
+- if (s->h_tail >= s->h_end)
+- s->h_tail = s->h_begin + (int) (s->h_tail - s->h_end);
+-
+- if (s->h_next == s->h_tail)
+- s->h_prev = s->h_next;
+
+- assert (s->h_next >= s->h_begin);
+- assert (s->h_tail >= s->h_begin);
+- assert (s->h_prev >= s->h_begin);
++ if (s->map->next == NULL)
++ {
++ s->map->active = false;
++ return;
++ }
+
+- assert (s->h_next <= s->h_end);
+- assert (s->h_tail <= s->h_end);
+- assert (s->h_prev <= s->h_end);
++ next = s->map->next;
++ cuda_map_destroy (s->map);
++ s->map = next;
+ }
+
+-static void
+-map_push (struct ptx_stream *s, int async, size_t size, void **h, void **d)
++static CUdeviceptr
++map_push (struct ptx_stream *s, size_t size)
+ {
+- int left;
+- int offset;
+- struct map *m;
++ struct cuda_map *map = NULL;
++ struct cuda_map **t;
+
+- assert (s != NULL);
+-
+- left = s->h_end - s->h_next;
+- size += sizeof (struct map);
+-
+- assert (s->h_prev);
+- assert (s->h_next);
++ assert (s);
++ assert (s->map);
+
+- if (size >= left)
++ /* Select an element to push. */
++ if (s->map->active)
++ map = cuda_map_create (size);
++ else
+ {
+- m = s->h_prev;
+- m->size += left;
+- s->h_next = s->h_begin;
+-
+- if (s->h_next + size > s->h_end)
+- GOMP_PLUGIN_fatal ("unable to push map");
+- }
+-
+- assert (s->h_next);
+-
+- m = s->h_next;
+- m->async = async;
+- m->size = size;
++ /* Pop the inactive front element. */
++ struct cuda_map *pop = s->map;
++ s->map = pop->next;
++ pop->next = NULL;
+
+- offset = (void *)&m->mappings[0] - s->h;
++ if (pop->size < size)
++ {
++ cuda_map_destroy (pop);
+
+- *d = (void *)(s->d + offset);
+- *h = (void *)(s->h + offset);
++ map = cuda_map_create (size);
++ }
++ else
++ map = pop;
++ }
+
+- s->h_prev = s->h_next;
+- s->h_next += size;
++ /* Check that the element is as expected. */
++ assert (map->next == NULL);
++ assert (!map->active);
+
+- assert (s->h_prev);
+- assert (s->h_next);
++ /* Mark the element active. */
++ map->active = true;
+
+- assert (s->h_next >= s->h_begin);
+- assert (s->h_tail >= s->h_begin);
+- assert (s->h_prev >= s->h_begin);
+- assert (s->h_next <= s->h_end);
+- assert (s->h_tail <= s->h_end);
+- assert (s->h_prev <= s->h_end);
++ /* Push the element to the back of the list. */
++ for (t = &s->map; (*t) != NULL; t = &(*t)->next)
++ ;
++ assert (t != NULL && *t == NULL);
++ *t = map;
+
+- return;
++ return map->d;
+ }
+
+ /* Target data function launch information. */
+@@ -411,6 +438,10 @@ struct ptx_device
+ int num_sms;
+ int regs_per_block;
+ int regs_per_sm;
++ int warp_size;
++ int max_threads_per_block;
++ int max_threads_per_multiprocessor;
++ int default_dims[GOMP_DIM_MAX];
+
+ struct ptx_image_data *images; /* Images loaded on device. */
+ pthread_mutex_t image_lock; /* Lock for above list. */
+@@ -458,8 +489,6 @@ init_streams_for_device (struct ptx_devi
+ null_stream->stream = NULL;
+ null_stream->host_thread = pthread_self ();
+ null_stream->multithreaded = true;
+- null_stream->d = (CUdeviceptr) NULL;
+- null_stream->h = NULL;
+ if (!map_init (null_stream))
+ return false;
+
+@@ -594,8 +623,6 @@ select_stream_for_async (int async, pthr
+ s->host_thread = thread;
+ s->multithreaded = false;
+
+- s->d = (CUdeviceptr) NULL;
+- s->h = NULL;
+ if (!map_init (s))
+ {
+ pthread_mutex_unlock (&ptx_dev->stream_lock);
+@@ -777,9 +804,11 @@ nvptx_open_device (int n)
+ &pi, CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK, dev);
+ ptx_dev->regs_per_block = pi;
+
+- /* CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_MULTIPROCESSOR = 82 is defined only
++ /* CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_MULTIPROCESSOR is defined only
+ in CUDA 6.0 and newer. */
+- r = CUDA_CALL_NOCHECK (cuDeviceGetAttribute, &pi, 82, dev);
++ r = CUDA_CALL_NOCHECK (cuDeviceGetAttribute, &pi,
++ CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_MULTIPROCESSOR,
++ dev);
+ /* Fallback: use limit of registers per block, which is usually equal. */
+ if (r == CUDA_ERROR_INVALID_VALUE)
+ pi = ptx_dev->regs_per_block;
+@@ -797,12 +826,24 @@ nvptx_open_device (int n)
+ GOMP_PLUGIN_error ("Only warp size 32 is supported");
+ return NULL;
+ }
++ ptx_dev->warp_size = pi;
++
++ CUDA_CALL_ERET (NULL, cuDeviceGetAttribute, &pi,
++ CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK, dev);
++ ptx_dev->max_threads_per_block = pi;
++
++ CUDA_CALL_ERET (NULL, cuDeviceGetAttribute, &pi,
++ CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTIPROCESSOR, dev);
++ ptx_dev->max_threads_per_multiprocessor = pi;
+
+ r = CUDA_CALL_NOCHECK (cuDeviceGetAttribute, &async_engines,
+ CU_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT, dev);
+ if (r != CUDA_SUCCESS)
+ async_engines = 1;
+
++ for (int i = 0; i != GOMP_DIM_MAX; i++)
++ ptx_dev->default_dims[i] = 0;
++
+ ptx_dev->images = NULL;
+ pthread_mutex_init (&ptx_dev->image_lock, NULL);
+
+@@ -876,12 +917,42 @@ notify_var (const char *var_name, const
+ GOMP_PLUGIN_debug (0, "%s: '%s'\n", var_name, env_var);
+ }
+
++static void
++process_GOMP_NVPTX_JIT (intptr_t *gomp_nvptx_o)
++{
++ const char *var_name = "GOMP_NVPTX_JIT";
++ const char *env_var = secure_getenv (var_name);
++ notify_var (var_name, env_var);
++
++ if (env_var == NULL)
++ return;
++
++ const char *c = env_var;
++ while (*c != '\0')
++ {
++ while (*c == ' ')
++ c++;
++
++ if (c[0] == '-' && c[1] == 'O'
++ && '0' <= c[2] && c[2] <= '4'
++ && (c[3] == '\0' || c[3] == ' '))
++ {
++ *gomp_nvptx_o = c[2] - '0';
++ c += 3;
++ continue;
++ }
++
++ GOMP_PLUGIN_error ("Error parsing %s", var_name);
++ break;
++ }
++}
++
+ static bool
+ link_ptx (CUmodule *module, const struct targ_ptx_obj *ptx_objs,
+ unsigned num_objs)
+ {
+- CUjit_option opts[6];
+- void *optvals[6];
++ CUjit_option opts[7];
++ void *optvals[7];
+ float elapsed = 0.0;
+ char elog[1024];
+ char ilog[16384];
+@@ -908,16 +979,41 @@ link_ptx (CUmodule *module, const struct
+ opts[5] = CU_JIT_LOG_VERBOSE;
+ optvals[5] = (void *) 1;
+
+- CUDA_CALL (cuLinkCreate, 6, opts, optvals, &linkstate);
++ static intptr_t gomp_nvptx_o = -1;
++
++ static bool init_done = false;
++ if (!init_done)
++ {
++ process_GOMP_NVPTX_JIT (&gomp_nvptx_o);
++ init_done = true;
++ }
++
++ int nopts = 6;
++ if (gomp_nvptx_o != -1)
++ {
++ opts[nopts] = CU_JIT_OPTIMIZATION_LEVEL;
++ optvals[nopts] = (void *) gomp_nvptx_o;
++ nopts++;
++ }
++
++ if (CUDA_CALL_EXISTS (cuLinkCreate_v2))
++ CUDA_CALL (cuLinkCreate_v2, nopts, opts, optvals, &linkstate);
++ else
++ CUDA_CALL (cuLinkCreate, nopts, opts, optvals, &linkstate);
+
+ for (; num_objs--; ptx_objs++)
+ {
+ /* cuLinkAddData's 'data' argument erroneously omits the const
+ qualifier. */
+ GOMP_PLUGIN_debug (0, "Loading:\n---\n%s\n---\n", ptx_objs->code);
+- r = CUDA_CALL_NOCHECK (cuLinkAddData, linkstate, CU_JIT_INPUT_PTX,
+- (char *) ptx_objs->code, ptx_objs->size,
+- 0, 0, 0, 0);
++ if (CUDA_CALL_EXISTS (cuLinkAddData_v2))
++ r = CUDA_CALL_NOCHECK (cuLinkAddData_v2, linkstate, CU_JIT_INPUT_PTX,
++ (char *) ptx_objs->code, ptx_objs->size,
++ 0, 0, 0, 0);
++ else
++ r = CUDA_CALL_NOCHECK (cuLinkAddData, linkstate, CU_JIT_INPUT_PTX,
++ (char *) ptx_objs->code, ptx_objs->size,
++ 0, 0, 0, 0);
+ if (r != CUDA_SUCCESS)
+ {
+ GOMP_PLUGIN_error ("Link error log %s\n", &elog[0]);
+@@ -1067,8 +1163,10 @@ nvptx_exec (void (*fn), size_t mapnum, v
+ int i;
+ struct ptx_stream *dev_str;
+ void *kargs[1];
+- void *hp, *dp;
++ void *hp;
++ CUdeviceptr dp = 0;
+ struct nvptx_thread *nvthd = nvptx_thread ();
++ int warp_size = nvthd->ptx_dev->warp_size;
+ const char *maybe_abort_msg = "(perhaps abort was called)";
+
+ function = targ_fn->fn;
+@@ -1090,68 +1188,36 @@ nvptx_exec (void (*fn), size_t mapnum, v
+
+ if (seen_zero)
+ {
+- /* See if the user provided GOMP_OPENACC_DIM environment
+- variable to specify runtime defaults. */
+- static int default_dims[GOMP_DIM_MAX];
+-
+ pthread_mutex_lock (&ptx_dev_lock);
+- if (!default_dims[0])
+- {
+- const char *var_name = "GOMP_OPENACC_DIM";
+- /* We only read the environment variable once. You can't
+- change it in the middle of execution. The syntax is
+- the same as for the -fopenacc-dim compilation option. */
+- const char *env_var = getenv (var_name);
+- notify_var (var_name, env_var);
+- if (env_var)
+- {
+- const char *pos = env_var;
+
+- for (i = 0; *pos && i != GOMP_DIM_MAX; i++)
+- {
+- if (i && *pos++ != ':')
+- break;
+- if (*pos != ':')
+- {
+- const char *eptr;
+-
+- errno = 0;
+- long val = strtol (pos, (char **)&eptr, 10);
+- if (errno || val < 0 || (unsigned)val != val)
+- break;
+- default_dims[i] = (int)val;
+- pos = eptr;
+- }
+- }
+- }
++ static int gomp_openacc_dims[GOMP_DIM_MAX];
++ if (!gomp_openacc_dims[0])
++ {
++ /* See if the user provided GOMP_OPENACC_DIM environment
++ variable to specify runtime defaults. */
++ for (int i = 0; i < GOMP_DIM_MAX; ++i)
++ gomp_openacc_dims[i] = GOMP_PLUGIN_acc_default_dim (i);
++ }
+
+- int warp_size, block_size, dev_size, cpu_size;
+- CUdevice dev = nvptx_thread()->ptx_dev->dev;
+- /* 32 is the default for known hardware. */
+- int gang = 0, worker = 32, vector = 32;
+- CUdevice_attribute cu_tpb, cu_ws, cu_mpc, cu_tpm;
+-
+- cu_tpb = CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK;
+- cu_ws = CU_DEVICE_ATTRIBUTE_WARP_SIZE;
+- cu_mpc = CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT;
+- cu_tpm = CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTIPROCESSOR;
+-
+- if (CUDA_CALL_NOCHECK (cuDeviceGetAttribute, &block_size, cu_tpb,
+- dev) == CUDA_SUCCESS
+- && CUDA_CALL_NOCHECK (cuDeviceGetAttribute, &warp_size, cu_ws,
+- dev) == CUDA_SUCCESS
+- && CUDA_CALL_NOCHECK (cuDeviceGetAttribute, &dev_size, cu_mpc,
+- dev) == CUDA_SUCCESS
+- && CUDA_CALL_NOCHECK (cuDeviceGetAttribute, &cpu_size, cu_tpm,
+- dev) == CUDA_SUCCESS)
+- {
+- GOMP_PLUGIN_debug (0, " warp_size=%d, block_size=%d,"
+- " dev_size=%d, cpu_size=%d\n",
+- warp_size, block_size, dev_size, cpu_size);
+- gang = (cpu_size / block_size) * dev_size;
+- worker = block_size / warp_size;
+- vector = warp_size;
+- }
++ if (!nvthd->ptx_dev->default_dims[0])
++ {
++ int default_dims[GOMP_DIM_MAX];
++ for (int i = 0; i < GOMP_DIM_MAX; ++i)
++ default_dims[i] = gomp_openacc_dims[i];
++
++ int gang, worker, vector;
++ {
++ int block_size = nvthd->ptx_dev->max_threads_per_block;
++ int cpu_size = nvthd->ptx_dev->max_threads_per_multiprocessor;
++ int dev_size = nvthd->ptx_dev->num_sms;
++ GOMP_PLUGIN_debug (0, " warp_size=%d, block_size=%d,"
++ " dev_size=%d, cpu_size=%d\n",
++ warp_size, block_size, dev_size, cpu_size);
++
++ gang = (cpu_size / block_size) * dev_size;
++ worker = block_size / warp_size;
++ vector = warp_size;
++ }
+
+ /* There is no upper bound on the gang size. The best size
+ matches the hardware configuration. Logical gangs are
+@@ -1172,29 +1238,150 @@ nvptx_exec (void (*fn), size_t mapnum, v
+ default_dims[GOMP_DIM_GANG],
+ default_dims[GOMP_DIM_WORKER],
+ default_dims[GOMP_DIM_VECTOR]);
++
++ for (i = 0; i != GOMP_DIM_MAX; i++)
++ nvthd->ptx_dev->default_dims[i] = default_dims[i];
+ }
+ pthread_mutex_unlock (&ptx_dev_lock);
+
+- for (i = 0; i != GOMP_DIM_MAX; i++)
+- if (!dims[i])
+- dims[i] = default_dims[i];
+- }
+-
+- /* This reserves a chunk of a pre-allocated page of memory mapped on both
+- the host and the device. HP is a host pointer to the new chunk, and DP is
+- the corresponding device pointer. */
+- map_push (dev_str, async, mapnum * sizeof (void *), &hp, &dp);
+-
+- GOMP_PLUGIN_debug (0, " %s: prepare mappings\n", __FUNCTION__);
+-
+- /* Copy the array of arguments to the mapped page. */
+- for (i = 0; i < mapnum; i++)
+- ((void **) hp)[i] = devaddrs[i];
+-
+- /* Copy the (device) pointers to arguments to the device (dp and hp might in
+- fact have the same value on a unified-memory system). */
+- CUDA_CALL_ASSERT (cuMemcpy, (CUdeviceptr) dp, (CUdeviceptr) hp,
+- mapnum * sizeof (void *));
++ {
++ bool default_dim_p[GOMP_DIM_MAX];
++ for (i = 0; i != GOMP_DIM_MAX; i++)
++ default_dim_p[i] = !dims[i];
++
++ if (!CUDA_CALL_EXISTS (cuOccupancyMaxPotentialBlockSize))
++ {
++ for (i = 0; i != GOMP_DIM_MAX; i++)
++ if (default_dim_p[i])
++ dims[i] = nvthd->ptx_dev->default_dims[i];
++
++ if (default_dim_p[GOMP_DIM_VECTOR])
++ dims[GOMP_DIM_VECTOR]
++ = MIN (dims[GOMP_DIM_VECTOR],
++ (targ_fn->max_threads_per_block / warp_size
++ * warp_size));
++
++ if (default_dim_p[GOMP_DIM_WORKER])
++ dims[GOMP_DIM_WORKER]
++ = MIN (dims[GOMP_DIM_WORKER],
++ targ_fn->max_threads_per_block / dims[GOMP_DIM_VECTOR]);
++ }
++ else
++ {
++ /* Handle the case that the compiler allows the runtime to choose
++ the vector-length conservatively, by ignoring
++ gomp_openacc_dims[GOMP_DIM_VECTOR]. TODO: actually handle
++ it. */
++ int vectors = 0;
++ /* TODO: limit gomp_openacc_dims[GOMP_DIM_WORKER] such that that
++ gomp_openacc_dims[GOMP_DIM_WORKER] * actual_vectors does not
++ exceed targ_fn->max_threads_per_block. */
++ int workers = gomp_openacc_dims[GOMP_DIM_WORKER];
++ int gangs = gomp_openacc_dims[GOMP_DIM_GANG];
++ int grids, blocks;
++
++ CUDA_CALL_ASSERT (cuOccupancyMaxPotentialBlockSize, &grids,
++ &blocks, function, NULL, 0,
++ dims[GOMP_DIM_WORKER] * dims[GOMP_DIM_VECTOR]);
++ GOMP_PLUGIN_debug (0, "cuOccupancyMaxPotentialBlockSize: "
++ "grid = %d, block = %d\n", grids, blocks);
++
++ /* Keep the num_gangs proportional to the block size. In
++ the case were a block size is limited by shared-memory
++ or the register file capacity, the runtime will not
++ excessively over assign gangs to the multiprocessor
++ units if their state is going to be swapped out even
++ more than necessary. The constant factor 2 is there to
++ prevent threads from idling when there is insufficient
++ work for them. */
++ if (gangs == 0)
++ gangs = 2 * grids * (blocks / warp_size);
++
++ if (vectors == 0)
++ vectors = warp_size;
++
++ if (workers == 0)
++ {
++ int actual_vectors = (default_dim_p[GOMP_DIM_VECTOR]
++ ? vectors
++ : dims[GOMP_DIM_VECTOR]);
++ workers = blocks / actual_vectors;
++ workers = MAX (workers, 1);
++ /* If we need a per-worker barrier ... . */
++ if (actual_vectors > 32)
++ /* Don't use more barriers than available. */
++ workers = MIN (workers, 15);
++ }
++
++ for (i = 0; i != GOMP_DIM_MAX; i++)
++ if (default_dim_p[i])
++ switch (i)
++ {
++ case GOMP_DIM_GANG: dims[i] = gangs; break;
++ case GOMP_DIM_WORKER: dims[i] = workers; break;
++ case GOMP_DIM_VECTOR: dims[i] = vectors; break;
++ default: GOMP_PLUGIN_fatal ("invalid dim");
++ }
++ }
++ }
++ }
++
++ /* Check if the accelerator has sufficient hardware resources to
++ launch the offloaded kernel. */
++ if (dims[GOMP_DIM_WORKER] * dims[GOMP_DIM_VECTOR]
++ > targ_fn->max_threads_per_block)
++ {
++ const char *msg
++ = ("The Nvidia accelerator has insufficient resources to launch '%s'"
++ " with num_workers = %d and vector_length = %d"
++ "; "
++ "recompile the program with 'num_workers = x and vector_length = y'"
++ " on that offloaded region or '-fopenacc-dim=:x:y' where"
++ " x * y <= %d"
++ ".\n");
++ GOMP_PLUGIN_fatal (msg, targ_fn->launch->fn, dims[GOMP_DIM_WORKER],
++ dims[GOMP_DIM_VECTOR], targ_fn->max_threads_per_block);
++ }
++
++ /* Check if the accelerator has sufficient barrier resources to
++ launch the offloaded kernel. */
++ if (dims[GOMP_DIM_WORKER] > 15 && dims[GOMP_DIM_VECTOR] > 32)
++ {
++ const char *msg
++ = ("The Nvidia accelerator has insufficient barrier resources to launch"
++ " '%s' with num_workers = %d and vector_length = %d"
++ "; "
++ "recompile the program with 'num_workers = x' on that offloaded"
++ " region or '-fopenacc-dim=:x:' where x <= 15"
++ "; "
++ "or, recompile the program with 'vector_length = 32' on that"
++ " offloaded region or '-fopenacc-dim=::32'"
++ ".\n");
++ GOMP_PLUGIN_fatal (msg, targ_fn->launch->fn, dims[GOMP_DIM_WORKER],
++ dims[GOMP_DIM_VECTOR]);
++ }
++
++ if (mapnum > 0)
++ {
++ /* This reserves a chunk of a pre-allocated page of memory mapped on both
++ the host and the device. HP is a host pointer to the new chunk, and DP is
++ the corresponding device pointer. */
++ pthread_mutex_lock (&ptx_event_lock);
++ dp = map_push (dev_str, mapnum * sizeof (void *));
++ pthread_mutex_unlock (&ptx_event_lock);
++
++ GOMP_PLUGIN_debug (0, " %s: prepare mappings\n", __FUNCTION__);
++
++ /* Copy the array of arguments to the mapped page. */
++ hp = alloca(sizeof(void *) * mapnum);
++ for (i = 0; i < mapnum; i++)
++ ((void **) hp)[i] = devaddrs[i];
++
++ /* Copy the (device) pointers to arguments to the device */
++ CUDA_CALL_ASSERT (cuMemcpyHtoD, dp, hp,
++ mapnum * sizeof (void *));
++ }
++
+ GOMP_PLUGIN_debug (0, " %s: kernel %s: launch"
+ " gangs=%u, workers=%u, vectors=%u\n",
+ __FUNCTION__, targ_fn->launch->fn, dims[GOMP_DIM_GANG],
+@@ -1239,7 +1426,8 @@ nvptx_exec (void (*fn), size_t mapnum, v
+
+ CUDA_CALL_ASSERT (cuEventRecord, *e, dev_str->stream);
+
+- event_add (PTX_EVT_KNL, e, (void *)dev_str, 0);
++ if (mapnum > 0)
++ event_add (PTX_EVT_KNL, e, (void *)dev_str, 0);
+ }
+ #else
+ r = CUDA_CALL_NOCHECK (cuCtxSynchronize, );
+@@ -1256,7 +1444,10 @@ nvptx_exec (void (*fn), size_t mapnum, v
+ #ifndef DISABLE_ASYNC
+ if (async < acc_async_noval)
+ #endif
+- map_pop (dev_str);
++ {
++ if (mapnum > 0)
++ map_pop (dev_str);
++ }
+ }
+
+ void * openacc_get_current_cuda_context (void);
+@@ -1415,9 +1606,8 @@ nvptx_async_test (int async)
+ struct ptx_stream *s;
+
+ s = select_stream_for_async (async, pthread_self (), false, NULL);
+-
+ if (!s)
+- GOMP_PLUGIN_fatal ("unknown async %d", async);
++ return 1;
+
+ r = CUDA_CALL_NOCHECK (cuStreamQuery, s->stream);
+ if (r == CUDA_SUCCESS)
+@@ -1472,7 +1662,7 @@ nvptx_wait (int async)
+
+ s = select_stream_for_async (async, pthread_self (), false, NULL);
+ if (!s)
+- GOMP_PLUGIN_fatal ("unknown async %d", async);
++ return;
+
+ CUDA_CALL_ASSERT (cuStreamSynchronize, s->stream);
+
+@@ -1486,16 +1676,17 @@ nvptx_wait_async (int async1, int async2
+ struct ptx_stream *s1, *s2;
+ pthread_t self = pthread_self ();
+
++ s1 = select_stream_for_async (async1, self, false, NULL);
++ if (!s1)
++ return;
++
+ /* The stream that is waiting (rather than being waited for) doesn't
+ necessarily have to exist already. */
+ s2 = select_stream_for_async (async2, self, true, NULL);
+
+- s1 = select_stream_for_async (async1, self, false, NULL);
+- if (!s1)
+- GOMP_PLUGIN_fatal ("invalid async 1\n");
+-
++ /* A stream is always synchronized with itself. */
+ if (s1 == s2)
+- GOMP_PLUGIN_fatal ("identical parameters");
++ return;
+
+ e = (CUevent *) GOMP_PLUGIN_malloc (sizeof (CUevent));
+
+@@ -1629,8 +1820,14 @@ nvptx_set_cuda_stream (int async, void *
+ pthread_t self = pthread_self ();
+ struct nvptx_thread *nvthd = nvptx_thread ();
+
+- if (async < 0)
+- GOMP_PLUGIN_fatal ("bad async %d", async);
++ /* Due to the "null_stream" usage for "acc_async_sync", this cannot be used
++ to change the stream handle associated with "acc_async_sync". */
++ if (async == acc_async_sync)
++ {
++ GOMP_PLUGIN_debug (0, "Refusing request to set CUDA stream associated"
++ " with \"acc_async_sync\"\n");
++ return 0;
++ }
+
+ pthread_mutex_lock (&nvthd->ptx_dev->stream_lock);
+
+@@ -1739,6 +1936,12 @@ GOMP_OFFLOAD_fini_device (int n)
+ instantiated_devices--;
+ }
+
++ if (instantiated_devices == 0)
++ {
++ free (ptx_devices);
++ ptx_devices = NULL;
++ }
++
+ pthread_mutex_unlock (&ptx_dev_lock);
+ return true;
+ }
+--- libgomp/plugin/configfrag.ac.jj 2018-04-25 09:40:31.914655581 +0200
++++ libgomp/plugin/configfrag.ac 2019-05-07 18:46:36.533109624 +0200
+@@ -26,8 +26,6 @@
+ # see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ # .
+
+-offload_targets=
+-AC_SUBST(offload_targets)
+ plugin_support=yes
+ AC_CHECK_LIB(dl, dlsym, , [plugin_support=no])
+ if test x"$plugin_support" = xyes; then
+@@ -59,7 +57,11 @@ AC_ARG_WITH(cuda-driver-lib,
+ [AS_HELP_STRING([--with-cuda-driver-lib=PATH],
+ [specify directory for the installed CUDA driver library])])
+ case "x$with_cuda_driver" in
+- x | xno) ;;
++ x) ;;
++ xno)
++ CUDA_DRIVER_INCLUDE=no
++ CUDA_DRIVER_LIB=no
++ ;;
+ *) CUDA_DRIVER_INCLUDE=$with_cuda_driver/include
+ CUDA_DRIVER_LIB=$with_cuda_driver/lib
+ ;;
+@@ -70,10 +72,12 @@ fi
+ if test "x$with_cuda_driver_lib" != x; then
+ CUDA_DRIVER_LIB=$with_cuda_driver_lib
+ fi
+-if test "x$CUDA_DRIVER_INCLUDE" != x; then
++if test "x$CUDA_DRIVER_INCLUDE" != x \
++ && test "x$CUDA_DRIVER_INCLUDE" != xno; then
+ CUDA_DRIVER_CPPFLAGS=-I$CUDA_DRIVER_INCLUDE
+ fi
+-if test "x$CUDA_DRIVER_LIB" != x; then
++if test "x$CUDA_DRIVER_LIB" != x \
++ && test "x$CUDA_DRIVER_LIB" != xno; then
+ CUDA_DRIVER_LDFLAGS=-L$CUDA_DRIVER_LIB
+ fi
+
+@@ -133,7 +137,13 @@ AC_SUBST(PLUGIN_HSA_CPPFLAGS)
+ AC_SUBST(PLUGIN_HSA_LDFLAGS)
+ AC_SUBST(PLUGIN_HSA_LIBS)
+
+-# Get offload targets and path to install tree of offloading compiler.
++# Parse '--enable-offload-targets', figure out the corresponding libgomp
++# plugins, and configure to find the corresponding offload compilers.
++# 'offload_plugins' and 'offload_targets' will be populated in the same order.
++offload_plugins=
++offload_targets=
++AC_SUBST(offload_plugins)
++AC_SUBST(offload_targets)
+ offload_additional_options=
+ offload_additional_lib_paths=
+ AC_SUBST(offload_additional_options)
+@@ -152,10 +152,10 @@ if test x"$enable_offload_targets" != x;
+ for tgt in `echo $enable_offload_targets | sed -e 's#,# #g'`; do
+ tgt_dir=`echo $tgt | grep '=' | sed 's/.*=//'`
+ tgt=`echo $tgt | sed 's/=.*//'`
+- tgt_name=
++ tgt_plugin=
+ case $tgt in
+ *-intelmic-* | *-intelmicemul-*)
+- tgt_name=intelmic
++ tgt_plugin=intelmic
+ ;;
+ nvptx*)
+ case "${target}" in
+@@ -167,30 +167,35 @@ if test x"$enable_offload_targets" != x;
+ PLUGIN_NVPTX=0
+ ;;
+ *)
+- tgt_name=nvptx
++ tgt_plugin=nvptx
+ PLUGIN_NVPTX=$tgt
+- PLUGIN_NVPTX_CPPFLAGS=$CUDA_DRIVER_CPPFLAGS
+- PLUGIN_NVPTX_LDFLAGS=$CUDA_DRIVER_LDFLAGS
+- PLUGIN_NVPTX_LIBS='-lcuda'
++ if test "x$CUDA_DRIVER_LIB" != xno \
++ && test "x$CUDA_DRIVER_LIB" != xno; then
++ PLUGIN_NVPTX_CPPFLAGS=$CUDA_DRIVER_CPPFLAGS
++ PLUGIN_NVPTX_LDFLAGS=$CUDA_DRIVER_LDFLAGS
++ PLUGIN_NVPTX_LIBS='-lcuda'
+
+- PLUGIN_NVPTX_save_CPPFLAGS=$CPPFLAGS
+- CPPFLAGS="$PLUGIN_NVPTX_CPPFLAGS $CPPFLAGS"
+- PLUGIN_NVPTX_save_LDFLAGS=$LDFLAGS
+- LDFLAGS="$PLUGIN_NVPTX_LDFLAGS $LDFLAGS"
+- PLUGIN_NVPTX_save_LIBS=$LIBS
+- LIBS="$PLUGIN_NVPTX_LIBS $LIBS"
+- AC_LINK_IFELSE(
+- [AC_LANG_PROGRAM(
+- [#include "cuda.h"],
+- [CUresult r = cuCtxPushCurrent (NULL);])],
+- [PLUGIN_NVPTX=1])
+- CPPFLAGS=$PLUGIN_NVPTX_save_CPPFLAGS
+- LDFLAGS=$PLUGIN_NVPTX_save_LDFLAGS
+- LIBS=$PLUGIN_NVPTX_save_LIBS
++ PLUGIN_NVPTX_save_CPPFLAGS=$CPPFLAGS
++ CPPFLAGS="$PLUGIN_NVPTX_CPPFLAGS $CPPFLAGS"
++ PLUGIN_NVPTX_save_LDFLAGS=$LDFLAGS
++ LDFLAGS="$PLUGIN_NVPTX_LDFLAGS $LDFLAGS"
++ PLUGIN_NVPTX_save_LIBS=$LIBS
++ LIBS="$PLUGIN_NVPTX_LIBS $LIBS"
++ AC_LINK_IFELSE(
++ [AC_LANG_PROGRAM(
++ [#include "cuda.h"],
++ [CUresult r = cuCtxPushCurrent (NULL);])],
++ [PLUGIN_NVPTX=1])
++ CPPFLAGS=$PLUGIN_NVPTX_save_CPPFLAGS
++ LDFLAGS=$PLUGIN_NVPTX_save_LDFLAGS
++ LIBS=$PLUGIN_NVPTX_save_LIBS
++ fi
+ case $PLUGIN_NVPTX in
+ nvptx*)
+- if test "x$CUDA_DRIVER_INCLUDE" = x \
+- && test "x$CUDA_DRIVER_LIB" = x; then
++ if (test "x$CUDA_DRIVER_INCLUDE" = x \
++ || test "x$CUDA_DRIVER_INCLUDE" = xno) \
++ && (test "x$CUDA_DRIVER_LIB" = x \
++ || test "x$CUDA_DRIVER_LIB" = xno); then
+ PLUGIN_NVPTX=1
+ PLUGIN_NVPTX_CPPFLAGS='-I$(srcdir)/plugin/cuda'
+ PLUGIN_NVPTX_LIBS='-ldl'
+@@ -191,7 +206,7 @@ if test x"$enable_offload_targets" != x;
+ PLUGIN_HSA=0
+ ;;
+ *)
+- tgt_name=hsa
++ tgt_plugin=hsa
+ PLUGIN_HSA=$tgt
+ PLUGIN_HSA_CPPFLAGS=$HSA_RUNTIME_CPPFLAGS
+ PLUGIN_HSA_LDFLAGS="$HSA_RUNTIME_LDFLAGS"
+@@ -209,7 +224,7 @@ if test x"$enable_offload_targets" != x;
+ LDFLAGS=$PLUGIN_HSA_save_LDFLAGS
+ LIBS=$PLUGIN_HSA_save_LIBS
+ case $PLUGIN_HSA in
+- hsa*)
++ hsa*)
+ HSA_PLUGIN=0
+ AC_MSG_ERROR([HSA run-time package required for HSA support])
+ ;;
+@@ -226,16 +241,19 @@ if test x"$enable_offload_targets" != x;
+ AC_MSG_ERROR([unknown offload target specified])
+ ;;
+ esac
+- if test x"$tgt_name" = x; then
+- # Don't configure libgomp for this offloading target if we don't build
+- # the corresponding plugin.
++ if test x"$tgt_plugin" = x; then
++ # Not configuring libgomp for this offload target if we're not building
++ # the corresponding offload plugin.
+ continue
+- elif test x"$offload_targets" = x; then
+- offload_targets=$tgt_name
++ elif test x"$offload_plugins" = x; then
++ offload_plugins=$tgt_plugin
++ offload_targets=$tgt
+ else
+- offload_targets=$offload_targets,$tgt_name
++ offload_plugins=$offload_plugins,$tgt_plugin
++ offload_targets=$offload_targets,$tgt
+ fi
+- if test "$tgt_name" = hsa; then
++ # Configure additional search paths.
++ if test "$tgt_plugin" = hsa; then
+ # Offloading compilation is all handled by the target compiler.
+ :
+ elif test x"$tgt_dir" != x; then
+@@ -247,8 +265,8 @@ if test x"$enable_offload_targets" != x;
+ fi
+ done
+ fi
+-AC_DEFINE_UNQUOTED(OFFLOAD_TARGETS, "$offload_targets",
+- [Define to offload targets, separated by commas.])
++AC_DEFINE_UNQUOTED(OFFLOAD_PLUGINS, "$offload_plugins",
++ [Define to offload plugins, separated by commas.])
+ AM_CONDITIONAL([PLUGIN_NVPTX], [test $PLUGIN_NVPTX = 1])
+ AC_DEFINE_UNQUOTED([PLUGIN_NVPTX], [$PLUGIN_NVPTX],
+ [Define to 1 if the NVIDIA plugin is built, 0 if not.])
+--- libgomp/affinity-fmt.c.jj 2019-05-07 18:46:36.285113585 +0200
++++ libgomp/affinity-fmt.c 2019-05-07 18:46:36.285113585 +0200
+@@ -0,0 +1,495 @@
++/* Copyright (C) 2018-2019 Free Software Foundation, Inc.
++ Contributed by Jakub Jelinek .
++
++ This file is part of the GNU Offloading and Multi Processing Library
++ (libgomp).
++
++ Libgomp is free software; you can redistribute it and/or modify it
++ under the terms of the GNU General Public License as published by
++ the Free Software Foundation; either version 3, or (at your option)
++ any later version.
++
++ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
++ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
++ FOR A PARTICULAR PURPOSE. See the GNU General Public License for
++ more details.
++
++ Under Section 7 of GPL version 3, you are granted additional
++ permissions described in the GCC Runtime Library Exception, version
++ 3.1, as published by the Free Software Foundation.
++
++ You should have received a copy of the GNU General Public License and
++ a copy of the GCC Runtime Library Exception along with this program;
++ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
++ . */
++
++#include "libgomp.h"
++#include
++#include
++#include
++#ifdef HAVE_UNISTD_H
++#include
++#endif
++#ifdef HAVE_INTTYPES_H
++# include /* For PRIx64. */
++#endif
++#ifdef HAVE_UNAME
++#include
++#endif
++
++void
++gomp_print_string (const char *str, size_t len)
++{
++ fwrite (str, 1, len, stderr);
++}
++
++void
++gomp_set_affinity_format (const char *format, size_t len)
++{
++ if (len < gomp_affinity_format_len)
++ memcpy (gomp_affinity_format_var, format, len);
++ else
++ {
++ char *p;
++ if (gomp_affinity_format_len)
++ p = gomp_realloc (gomp_affinity_format_var, len + 1);
++ else
++ p = gomp_malloc (len + 1);
++ memcpy (p, format, len);
++ gomp_affinity_format_var = p;
++ gomp_affinity_format_len = len + 1;
++ }
++ gomp_affinity_format_var[len] = '\0';
++}
++
++void
++omp_set_affinity_format (const char *format)
++{
++ gomp_set_affinity_format (format, strlen (format));
++}
++
++size_t
++omp_get_affinity_format (char *buffer, size_t size)
++{
++ size_t len = strlen (gomp_affinity_format_var);
++ if (size)
++ {
++ if (len < size)
++ memcpy (buffer, gomp_affinity_format_var, len + 1);
++ else
++ {
++ memcpy (buffer, gomp_affinity_format_var, size - 1);
++ buffer[size - 1] = '\0';
++ }
++ }
++ return len;
++}
++
++void
++gomp_display_string (char *buffer, size_t size, size_t *ret,
++ const char *str, size_t len)
++{
++ size_t r = *ret;
++ if (size && r < size)
++ {
++ size_t l = len;
++ if (size - r < len)
++ l = size - r;
++ memcpy (buffer + r, str, l);
++ }
++ *ret += len;
++ if (__builtin_expect (r > *ret, 0))
++ gomp_fatal ("overflow in omp_capture_affinity");
++}
++
++static void
++gomp_display_repeat (char *buffer, size_t size, size_t *ret,
++ char c, size_t len)
++{
++ size_t r = *ret;
++ if (size && r < size)
++ {
++ size_t l = len;
++ if (size - r < len)
++ l = size - r;
++ memset (buffer + r, c, l);
++ }
++ *ret += len;
++ if (__builtin_expect (r > *ret, 0))
++ gomp_fatal ("overflow in omp_capture_affinity");
++}
++
++static void
++gomp_display_num (char *buffer, size_t size, size_t *ret,
++ bool zero, bool right, size_t sz, char *buf)
++{
++ size_t l = strlen (buf);
++ if (sz == (size_t) -1 || l >= sz)
++ {
++ gomp_display_string (buffer, size, ret, buf, l);
++ return;
++ }
++ if (zero)
++ {
++ if (buf[0] == '-')
++ gomp_display_string (buffer, size, ret, buf, 1);
++ else if (buf[0] == '0' && buf[1] == 'x')
++ gomp_display_string (buffer, size, ret, buf, 2);
++ gomp_display_repeat (buffer, size, ret, '0', sz - l);
++ if (buf[0] == '-')
++ gomp_display_string (buffer, size, ret, buf + 1, l - 1);
++ else if (buf[0] == '0' && buf[1] == 'x')
++ gomp_display_string (buffer, size, ret, buf + 2, l - 2);
++ else
++ gomp_display_string (buffer, size, ret, buf, l);
++ }
++ else if (right)
++ {
++ gomp_display_repeat (buffer, size, ret, ' ', sz - l);
++ gomp_display_string (buffer, size, ret, buf, l);
++ }
++ else
++ {
++ gomp_display_string (buffer, size, ret, buf, l);
++ gomp_display_repeat (buffer, size, ret, ' ', sz - l);
++ }
++}
++
++static void
++gomp_display_int (char *buffer, size_t size, size_t *ret,
++ bool zero, bool right, size_t sz, int num)
++{
++ char buf[3 * sizeof (int) + 2];
++ sprintf (buf, "%d", num);
++ gomp_display_num (buffer, size, ret, zero, right, sz, buf);
++}
++
++static void
++gomp_display_string_len (char *buffer, size_t size, size_t *ret,
++ bool right, size_t sz, char *str, size_t len)
++{
++ if (sz == (size_t) -1 || len >= sz)
++ {
++ gomp_display_string (buffer, size, ret, str, len);
++ return;
++ }
++
++ if (right)
++ {
++ gomp_display_repeat (buffer, size, ret, ' ', sz - len);
++ gomp_display_string (buffer, size, ret, str, len);
++ }
++ else
++ {
++ gomp_display_string (buffer, size, ret, str, len);
++ gomp_display_repeat (buffer, size, ret, ' ', sz - len);
++ }
++}
++
++static void
++gomp_display_hostname (char *buffer, size_t size, size_t *ret,
++ bool right, size_t sz)
++{
++#ifdef HAVE_GETHOSTNAME
++ {
++ char buf[256];
++ char *b = buf;
++ size_t len = 256;
++ do
++ {
++ b[len - 1] = '\0';
++ if (gethostname (b, len - 1) == 0)
++ {
++ size_t l = strlen (b);
++ if (l < len - 1)
++ {
++ gomp_display_string_len (buffer, size, ret,
++ right, sz, b, l);
++ if (b != buf)
++ free (b);
++ return;
++ }
++ }
++ if (len == 1048576)
++ break;
++ len = len * 2;
++ if (len == 512)
++ b = gomp_malloc (len);
++ else
++ b = gomp_realloc (b, len);
++ }
++ while (1);
++ if (b != buf)
++ free (b);
++ }
++#endif
++#ifdef HAVE_UNAME
++ {
++ struct utsname buf;
++ if (uname (&buf) == 0)
++ {
++ gomp_display_string_len (buffer, size, ret, right, sz,
++ buf.nodename, strlen (buf.nodename));
++ return;
++ }
++ }
++#endif
++ gomp_display_string_len (buffer, size, ret, right, sz, "node", 4);
++}
++
++struct affinity_types_struct {
++ char long_str[18];
++ char long_len;
++ char short_c; };
++
++static struct affinity_types_struct affinity_types[] =
++{
++#define AFFINITY_TYPE(l, s) \
++ { #l, sizeof (#l) - 1, s }
++ AFFINITY_TYPE (team_num, 't'),
++ AFFINITY_TYPE (num_teams, 'T'),
++ AFFINITY_TYPE (nesting_level, 'L'),
++ AFFINITY_TYPE (thread_num, 'n'),
++ AFFINITY_TYPE (num_threads, 'N'),
++ AFFINITY_TYPE (ancestor_tnum, 'a'),
++ AFFINITY_TYPE (host, 'H'),
++ AFFINITY_TYPE (process_id, 'P'),
++ AFFINITY_TYPE (native_thread_id, 'i'),
++ AFFINITY_TYPE (thread_affinity, 'A')
++#undef AFFINITY_TYPE
++};
++
++size_t
++gomp_display_affinity (char *buffer, size_t size,
++ const char *format, gomp_thread_handle handle,
++ struct gomp_team_state *ts, unsigned int place)
++{
++ size_t ret = 0;
++ do
++ {
++ const char *p = strchr (format, '%');
++ bool zero = false;
++ bool right = false;
++ size_t sz = -1;
++ char c;
++ int val;
++ if (p == NULL)
++ p = strchr (format, '\0');
++ if (p != format)
++ gomp_display_string (buffer, size, &ret,
++ format, p - format);
++ if (*p == '\0')
++ break;
++ p++;
++ if (*p == '%')
++ {
++ gomp_display_string (buffer, size, &ret, "%", 1);
++ format = p + 1;
++ continue;
++ }
++ if (*p == '0')
++ {
++ zero = true;
++ p++;
++ if (*p != '.')
++ gomp_fatal ("leading zero not followed by dot in affinity format");
++ }
++ if (*p == '.')
++ {
++ right = true;
++ p++;
++ }
++ if (*p >= '1' && *p <= '9')
++ {
++ char *end;
++ sz = strtoul (p, &end, 10);
++ p = end;
++ }
++ else if (zero || right)
++ gomp_fatal ("leading zero or right justification in affinity format "
++ "requires size");
++ c = *p;
++ if (c == '{')
++ {
++ int i;
++ for (i = 0;
++ i < sizeof (affinity_types) / sizeof (affinity_types[0]); ++i)
++ if (strncmp (p + 1, affinity_types[i].long_str,
++ affinity_types[i].long_len) == 0
++ && p[affinity_types[i].long_len + 1] == '}')
++ {
++ c = affinity_types[i].short_c;
++ p += affinity_types[i].long_len + 1;
++ break;
++ }
++ if (c == '{')
++ {
++ char *q = strchr (p + 1, '}');
++ if (q)
++ gomp_fatal ("unsupported long type name '%.*s' in affinity "
++ "format", (int) (q - (p + 1)), p + 1);
++ else
++ gomp_fatal ("unterminated long type name '%s' in affinity "
++ "format", p + 1);
++ }
++ }
++ switch (c)
++ {
++ case 't':
++ val = omp_get_team_num ();
++ goto do_int;
++ case 'T':
++ val = omp_get_num_teams ();
++ goto do_int;
++ case 'L':
++ val = ts->level;
++ goto do_int;
++ case 'n':
++ val = ts->team_id;
++ goto do_int;
++ case 'N':
++ val = ts->team ? ts->team->nthreads : 1;
++ goto do_int;
++ case 'a':
++ val = ts->team ? ts->team->prev_ts.team_id : -1;
++ goto do_int;
++ case 'H':
++ gomp_display_hostname (buffer, size, &ret, right, sz);
++ break;
++ case 'P':
++#ifdef HAVE_GETPID
++ val = getpid ();
++#else
++ val = 0;
++#endif
++ goto do_int;
++ case 'i':
++#if defined(LIBGOMP_USE_PTHREADS) && defined(__GNUC__)
++ {
++ char buf[3 * (sizeof (handle) + sizeof (uintptr_t) + sizeof (int))
++ + 4];
++ /* This macro returns expr unmodified for integral or pointer
++ types and 0 for anything else (e.g. aggregates). */
++#define gomp_nonaggregate(expr) \
++ __builtin_choose_expr (__builtin_classify_type (expr) == 1 \
++ || __builtin_classify_type (expr) == 5, expr, 0)
++ /* This macro returns expr unmodified for integral types,
++ (uintptr_t) (expr) for pointer types and 0 for anything else
++ (e.g. aggregates). */
++#define gomp_integral(expr) \
++ __builtin_choose_expr (__builtin_classify_type (expr) == 5, \
++ (uintptr_t) gomp_nonaggregate (expr), \
++ gomp_nonaggregate (expr))
++
++ if (sizeof (gomp_integral (handle)) == sizeof (unsigned long))
++ sprintf (buf, "0x%lx", (unsigned long) gomp_integral (handle));
++#if defined (HAVE_INTTYPES_H) && defined (PRIx64)
++ else if (sizeof (gomp_integral (handle)) == sizeof (uint64_t))
++ sprintf (buf, "0x%" PRIx64, (uint64_t) gomp_integral (handle));
++#else
++ else if (sizeof (gomp_integral (handle))
++ == sizeof (unsigned long long))
++ sprintf (buf, "0x%llx",
++ (unsigned long long) gomp_integral (handle));
++#endif
++ else
++ sprintf (buf, "0x%x", (unsigned int) gomp_integral (handle));
++ gomp_display_num (buffer, size, &ret, zero, right, sz, buf);
++ break;
++ }
++#else
++ val = 0;
++ goto do_int;
++#endif
++ case 'A':
++ if (sz == (size_t) -1)
++ gomp_display_affinity_place (buffer, size, &ret,
++ place - 1);
++ else if (right)
++ {
++ size_t len = 0;
++ gomp_display_affinity_place (NULL, 0, &len, place - 1);
++ if (len < sz)
++ gomp_display_repeat (buffer, size, &ret, ' ', sz - len);
++ gomp_display_affinity_place (buffer, size, &ret, place - 1);
++ }
++ else
++ {
++ size_t start = ret;
++ gomp_display_affinity_place (buffer, size, &ret, place - 1);
++ if (ret - start < sz)
++ gomp_display_repeat (buffer, size, &ret, ' ', sz - (ret - start));
++ }
++ break;
++ do_int:
++ gomp_display_int (buffer, size, &ret, zero, right, sz, val);
++ break;
++ default:
++ gomp_fatal ("unsupported type %c in affinity format", c);
++ }
++ format = p + 1;
++ }
++ while (1);
++ return ret;
++}
++
++size_t
++omp_capture_affinity (char *buffer, size_t size, const char *format)
++{
++ struct gomp_thread *thr = gomp_thread ();
++ size_t ret
++ = gomp_display_affinity (buffer, size,
++ format && *format
++ ? format : gomp_affinity_format_var,
++ gomp_thread_self (), &thr->ts, thr->place);
++ if (size)
++ {
++ if (ret >= size)
++ buffer[size - 1] = '\0';
++ else
++ buffer[ret] = '\0';
++ }
++ return ret;
++}
++ialias (omp_capture_affinity)
++
++void
++omp_display_affinity (const char *format)
++{
++ char buf[512];
++ char *b;
++ size_t ret = ialias_call (omp_capture_affinity) (buf, sizeof buf, format);
++ if (ret < sizeof buf)
++ {
++ buf[ret] = '\n';
++ gomp_print_string (buf, ret + 1);
++ return;
++ }
++ b = gomp_malloc (ret + 1);
++ ialias_call (omp_capture_affinity) (b, ret + 1, format);
++ b[ret] = '\n';
++ gomp_print_string (b, ret + 1);
++ free (b);
++}
++
++void
++gomp_display_affinity_thread (gomp_thread_handle handle,
++ struct gomp_team_state *ts, unsigned int place)
++{
++ char buf[512];
++ char *b;
++ size_t ret = gomp_display_affinity (buf, sizeof buf, gomp_affinity_format_var,
++ handle, ts, place);
++ if (ret < sizeof buf)
++ {
++ buf[ret] = '\n';
++ gomp_print_string (buf, ret + 1);
++ return;
++ }
++ b = gomp_malloc (ret + 1);
++ gomp_display_affinity (b, ret + 1, gomp_affinity_format_var,
++ handle, ts, place);
++ b[ret] = '\n';
++ gomp_print_string (b, ret + 1);
++ free (b);
++}
+--- libgomp/single.c.jj 2018-04-25 09:40:31.870655561 +0200
++++ libgomp/single.c 2019-05-07 18:46:36.536109576 +0200
+@@ -47,7 +47,7 @@ GOMP_single_start (void)
+ return __sync_bool_compare_and_swap (&team->single_count, single_count,
+ single_count + 1L);
+ #else
+- bool ret = gomp_work_share_start (false);
++ bool ret = gomp_work_share_start (0);
+ if (ret)
+ gomp_work_share_init_done ();
+ gomp_work_share_end_nowait ();
+@@ -68,7 +68,7 @@ GOMP_single_copy_start (void)
+ bool first;
+ void *ret;
+
+- first = gomp_work_share_start (false);
++ first = gomp_work_share_start (0);
+
+ if (first)
+ {
+--- libgomp/oacc-cuda.c.jj 2018-04-25 09:40:31.321655307 +0200
++++ libgomp/oacc-cuda.c 2019-05-07 18:46:36.528109704 +0200
+@@ -58,7 +58,7 @@ acc_get_cuda_stream (int async)
+ {
+ struct goacc_thread *thr = goacc_thread ();
+
+- if (async < 0)
++ if (!async_valid_p (async))
+ return NULL;
+
+ if (thr && thr->dev && thr->dev->openacc.cuda.get_stream_func)
+@@ -72,7 +72,7 @@ acc_set_cuda_stream (int async, void *st
+ {
+ struct goacc_thread *thr;
+
+- if (async < 0 || stream == NULL)
++ if (!async_valid_p (async) || stream == NULL)
+ return 0;
+
+ goacc_lazy_initialize ();
+--- libgomp/work.c.jj 2018-04-25 09:40:31.925655587 +0200
++++ libgomp/work.c 2019-05-07 18:46:36.548109384 +0200
+@@ -76,7 +76,15 @@ alloc_work_share (struct gomp_team *team
+ #endif
+
+ team->work_share_chunk *= 2;
++ /* Allocating gomp_work_share structures aligned is just an
++ optimization, don't do it when using the fallback method. */
++#ifdef GOMP_HAVE_EFFICIENT_ALIGNED_ALLOC
++ ws = gomp_aligned_alloc (__alignof (struct gomp_work_share),
++ team->work_share_chunk
++ * sizeof (struct gomp_work_share));
++#else
+ ws = gomp_malloc (team->work_share_chunk * sizeof (struct gomp_work_share));
++#endif
+ ws->next_alloc = team->work_shares[0].next_alloc;
+ team->work_shares[0].next_alloc = ws;
+ team->work_share_list_alloc = &ws[1];
+@@ -90,30 +98,35 @@ alloc_work_share (struct gomp_team *team
+ This shouldn't touch the next_alloc field. */
+
+ void
+-gomp_init_work_share (struct gomp_work_share *ws, bool ordered,
++gomp_init_work_share (struct gomp_work_share *ws, size_t ordered,
+ unsigned nthreads)
+ {
+ gomp_mutex_init (&ws->lock);
+ if (__builtin_expect (ordered, 0))
+ {
+-#define INLINE_ORDERED_TEAM_IDS_CNT \
+- ((sizeof (struct gomp_work_share) \
+- - offsetof (struct gomp_work_share, inline_ordered_team_ids)) \
+- / sizeof (((struct gomp_work_share *) 0)->inline_ordered_team_ids[0]))
+-
+- if (nthreads > INLINE_ORDERED_TEAM_IDS_CNT)
+- ws->ordered_team_ids
+- = gomp_malloc (nthreads * sizeof (*ws->ordered_team_ids));
++#define INLINE_ORDERED_TEAM_IDS_SIZE \
++ (sizeof (struct gomp_work_share) \
++ - offsetof (struct gomp_work_share, inline_ordered_team_ids))
++
++ if (__builtin_expect (ordered != 1, 0))
++ {
++ ordered += nthreads * sizeof (*ws->ordered_team_ids) - 1;
++ ordered = ordered + __alignof__ (long long) - 1;
++ ordered &= ~(__alignof__ (long long) - 1);
++ }
++ else
++ ordered = nthreads * sizeof (*ws->ordered_team_ids);
++ if (ordered > INLINE_ORDERED_TEAM_IDS_SIZE)
++ ws->ordered_team_ids = gomp_malloc (ordered);
+ else
+ ws->ordered_team_ids = ws->inline_ordered_team_ids;
+- memset (ws->ordered_team_ids, '\0',
+- nthreads * sizeof (*ws->ordered_team_ids));
++ memset (ws->ordered_team_ids, '\0', ordered);
+ ws->ordered_num_used = 0;
+ ws->ordered_owner = -1;
+ ws->ordered_cur = 0;
+ }
+ else
+- ws->ordered_team_ids = NULL;
++ ws->ordered_team_ids = ws->inline_ordered_team_ids;
+ gomp_ptrlock_init (&ws->next_ws, NULL);
+ ws->threads_completed = 0;
+ }
+@@ -166,7 +179,7 @@ free_work_share (struct gomp_team *team,
+ if this was the first thread to reach this point. */
+
+ bool
+-gomp_work_share_start (bool ordered)
++gomp_work_share_start (size_t ordered)
+ {
+ struct gomp_thread *thr = gomp_thread ();
+ struct gomp_team *team = thr->ts.team;
+@@ -178,7 +191,7 @@ gomp_work_share_start (bool ordered)
+ ws = gomp_malloc (sizeof (*ws));
+ gomp_init_work_share (ws, ordered, 1);
+ thr->ts.work_share = ws;
+- return ws;
++ return true;
+ }
+
+ ws = thr->ts.work_share;
+--- include/gomp-constants.h.jj 2018-04-25 09:40:39.757659209 +0200
++++ include/gomp-constants.h 2019-05-07 18:57:33.333627031 +0200
+@@ -189,6 +189,7 @@ enum gomp_map_kind
+ #define GOMP_TASK_FLAG_GRAINSIZE (1 << 9)
+ #define GOMP_TASK_FLAG_IF (1 << 10)
+ #define GOMP_TASK_FLAG_NOGROUP (1 << 11)
++#define GOMP_TASK_FLAG_REDUCTION (1 << 12)
+
+ /* GOMP_target{_ext,update_ext,enter_exit_data} flags argument. */
+ #define GOMP_TARGET_FLAG_NOWAIT (1 << 0)
+@@ -196,6 +197,18 @@ enum gomp_map_kind
+ /* Internal to libgomp. */
+ #define GOMP_TARGET_FLAG_UPDATE (1U << 31)
+
++
++/* OpenACC construct flags. */
++
++/* Force host fallback execution. */
++#define GOACC_FLAG_HOST_FALLBACK (1 << 0)
++
++/* For legacy reasons, in the ABI, the GOACC_FLAGs are encoded as an inverted
++ bitmask. */
++#define GOACC_FLAGS_MARSHAL_OP BIT_NOT_EXPR
++#define GOACC_FLAGS_UNMARSHAL(X) (~(X))
++
++
+ /* Versions of libgomp and device-specific plugins. GOMP_VERSION
+ should be incremented whenever an ABI-incompatible change is introduced
+ to the plugin interface defined in libgomp/libgomp.h. */
+@@ -251,6 +264,12 @@ enum gomp_map_kind
+ at most and shifted by this many bits. */
+ #define GOMP_TARGET_ARG_VALUE_SHIFT 16
+
++/* Dependence types in omp_depend_t objects. */
++#define GOMP_DEPEND_IN 1
++#define GOMP_DEPEND_OUT 2
++#define GOMP_DEPEND_INOUT 3
++#define GOMP_DEPEND_MUTEXINOUTSET 4
++
+ /* HSA specific data structures. */
+
+ /* Identifiers of device-specific target arguments. */
diff --git a/SOURCES/gcc8-libgomp-omp_h-multilib.patch b/SOURCES/gcc8-libgomp-omp_h-multilib.patch
new file mode 100644
index 0000000..d0e98d1
--- /dev/null
+++ b/SOURCES/gcc8-libgomp-omp_h-multilib.patch
@@ -0,0 +1,17 @@
+2008-06-09 Jakub Jelinek
+
+ * omp.h.in (omp_nest_lock_t): Fix up for Linux multilibs.
+
+--- libgomp/omp.h.in.jj 2008-06-09 13:34:05.000000000 +0200
++++ libgomp/omp.h.in 2008-06-09 13:34:48.000000000 +0200
+@@ -42,8 +42,8 @@ typedef struct
+
+ typedef struct
+ {
+- unsigned char _x[@OMP_NEST_LOCK_SIZE@]
+- __attribute__((__aligned__(@OMP_NEST_LOCK_ALIGN@)));
++ unsigned char _x[8 + sizeof (void *)]
++ __attribute__((__aligned__(sizeof (void *))));
+ } omp_nest_lock_t;
+ #endif
+
diff --git a/SOURCES/gcc8-libgomp-testsuite.patch b/SOURCES/gcc8-libgomp-testsuite.patch
new file mode 100644
index 0000000..502ee22
--- /dev/null
+++ b/SOURCES/gcc8-libgomp-testsuite.patch
@@ -0,0 +1,41 @@
+--- libgomp/testsuite/libgomp-test-support.exp.in.jj 2018-04-25 09:40:31.323655308 +0200
++++ libgomp/testsuite/libgomp-test-support.exp.in 2019-04-25 20:01:50.028243827 +0200
+@@ -2,4 +2,5 @@ set cuda_driver_include "@CUDA_DRIVER_IN
+ set cuda_driver_lib "@CUDA_DRIVER_LIB@"
+ set hsa_runtime_lib "@HSA_RUNTIME_LIB@"
+
++set offload_plugins "@offload_plugins@"
+ set offload_targets "@offload_targets@"
+--- libgomp/testsuite/lib/libgomp.exp.jj 2018-04-25 09:40:31.584655429 +0200
++++ libgomp/testsuite/lib/libgomp.exp 2019-05-24 11:41:51.015822702 +0200
+@@ -40,7 +40,7 @@ load_file libgomp-test-support.exp
+ # Populate offload_targets_s (offloading targets separated by a space), and
+ # offload_targets_s_openacc (the same, but with OpenACC names; OpenACC spells
+ # some of them a little differently).
+-set offload_targets_s [split $offload_targets ","]
++set offload_targets_s [split $offload_plugins ","]
+ set offload_targets_s_openacc {}
+ foreach offload_target_openacc $offload_targets_s {
+ # Translate to OpenACC names, or skip if not yet supported.
+@@ -137,8 +137,8 @@ proc libgomp_init { args } {
+
+ # Add liboffloadmic build directory in LD_LIBRARY_PATH to support
+ # non-fallback testing for Intel MIC targets
+- global offload_targets
+- if { [string match "*,intelmic,*" ",$offload_targets,"] } {
++ global offload_plugins
++ if { [string match "*,intelmic,*" ",$offload_plugins,"] } {
+ append always_ld_library_path ":${blddir}/../liboffloadmic/.libs"
+ append always_ld_library_path ":${blddir}/../liboffloadmic/plugin/.libs"
+ # libstdc++ is required by liboffloadmic
+@@ -362,8 +362,8 @@ proc check_effective_target_offload_devi
+ # Return 1 if configured for nvptx offloading.
+
+ proc check_effective_target_openacc_nvidia_accel_configured { } {
+- global offload_targets
+- if { ![string match "*,nvptx,*" ",$offload_targets,"] } {
++ global offload_plugins
++ if { ![string match "*,nvptx,*" ",$offload_plugins,"] } {
+ return 0
+ }
+ # PR libgomp/65099: Currently, we only support offloading in 64-bit
diff --git a/SOURCES/gcc8-libstdc++-docs.patch b/SOURCES/gcc8-libstdc++-docs.patch
new file mode 100644
index 0000000..20fd28e
--- /dev/null
+++ b/SOURCES/gcc8-libstdc++-docs.patch
@@ -0,0 +1,24 @@
+--- libstdc++-v3/doc/html/index.html.jj 2011-01-03 12:53:21.282829010 +0100
++++ libstdc++-v3/doc/html/index.html 2011-01-04 18:06:28.999851145 +0100
+@@ -5,6 +5,8 @@
+ FSF
+
+
++ Release 8.5.0
++
+ Permission is granted to copy, distribute and/or modify this
+ document under the terms of the GNU Free Documentation
+ License, Version 1.2 or any later version published by the
+--- libstdc++-v3/doc/html/api.html.jj 2011-01-03 12:53:21.000000000 +0100
++++ libstdc++-v3/doc/html/api.html 2011-01-04 18:12:01.672757784 +0100
+@@ -20,7 +20,9 @@
+ member functions for the library classes, finding out what is in a
+ particular include file, looking at inheritance diagrams, etc.
+