From 2bc0c7b0aebbeee7eee00d8fcf6f5da2cd446cc9 Mon Sep 17 00:00:00 2001 From: Christian Hergert Date: Thu, 15 Jul 2021 16:18:26 -0700 Subject: [PATCH] language-specs: use \N{U+} escape sequences We seem to be having problems with \x{} on PCRE2 for reasons that are still illusive. This works around the problem by using the \N{U+val} style which appears to fix the issue. The best I've come across is possible compilation settings in PCRE2 related to how these escape sequences work. Fixes #202 --- data/language-specs/css.lang | 10 +++++----- data/language-specs/def.lang | 8 ++++---- data/language-specs/groovy.lang | 2 +- data/language-specs/html.lang | 4 ++-- data/language-specs/j.lang | 2 +- data/language-specs/javascript-expressions.lang | 2 +- data/language-specs/javascript-functions-classes.lang | 2 +- data/language-specs/javascript-literals.lang | 2 +- data/language-specs/javascript-modules.lang | 2 +- data/language-specs/javascript-statements.lang | 2 +- data/language-specs/javascript-values.lang | 2 +- data/language-specs/javascript.lang | 8 ++++---- data/language-specs/jsdoc.lang | 2 +- data/language-specs/jsx.lang | 2 +- data/language-specs/less.lang | 2 +- data/language-specs/objj.lang | 2 +- data/language-specs/php.lang | 2 +- data/language-specs/scss.lang | 2 +- data/language-specs/typescript-js-expressions.lang | 2 +- .../typescript-js-functions-classes.lang | 2 +- data/language-specs/typescript-js-literals.lang | 2 +- data/language-specs/typescript-js-modules.lang | 2 +- data/language-specs/typescript-js-statements.lang | 2 +- data/language-specs/typescript-jsx.lang | 2 +- data/language-specs/typescript-type-expressions.lang | 2 +- data/language-specs/typescript-type-generics.lang | 2 +- data/language-specs/typescript-type-literals.lang | 2 +- data/language-specs/typescript.lang | 2 +- 28 files changed, 39 insertions(+), 39 deletions(-) diff --git a/data/language-specs/css.lang b/data/language-specs/css.lang index 7d49cfbc..e9c9369d 100644 --- a/data/language-specs/css.lang +++ b/data/language-specs/css.lang @@ -97,7 +97,7 @@ - [^\x{0}-\x{2C}\x{2E}\x{2F}\x{3A}-\x{40}\x{5B}\x{5D}\x{5E}\x{60}\x{7B}-\x{7F}] + [^\N{U+0}-\N{U+2C}\N{U+2E}\N{U+2F}\N{U+3A}-\N{U+40}\N{U+5B}\N{U+5D}\N{U+5E}\N{U+60}\N{U+7B}-\N{U+7F}] @@ -136,7 +136,7 @@ U+0080- Non-ASCII --> - [^\x{0}-\x{40}\x{5B}-\x{5E}\x{60}\x{7B}-\x{7F}] + [^\N{U+0}-\N{U+40}\N{U+5B}-\N{U+5E}\N{U+60}\N{U+7B}-\N{U+7F}] - [^\x{0}-\x{2C}\x{2E}\x{2F}\x{3A}-\x{40}\x{5B}-\x{5E}\x{60}\x{7B}-\x{7F}] + [^\N{U+0}-\N{U+2C}\N{U+2E}\N{U+2F}\N{U+3A}-\N{U+40}\N{U+5B}-\N{U+5E}\N{U+60}\N{U+7B}-\N{U+7F}] - [^\x{0}-\x{2F}\x{3A}-\x{40}\x{5B}-\x{5E}\x{60}\x{7B}-\x{7F}] + [^\N{U+0}-\N{U+2F}\N{U+3A}-\N{U+40}\N{U+5B}-\N{U+5E}\N{U+60}\N{U+7B}-\N{U+7F}] - [^\x{0}-\x{2C}\x{2E}\x{2F}\x{3A}-\x{40}\x{5B}\x{5D}\x{5E}\x{60}\x{7B}-\x{7F}] + [^\N{U+0}-\N{U+2C}\N{U+2E}\N{U+2F}\N{U+3A}-\N{U+40}\N{U+5B}\N{U+5D}\N{U+5E}\N{U+60}\N{U+7B}-\N{U+7F}] - (?!\x{2E2F}) [\p{L}\p{Nl}\x{1885}-\x{1886}\x{2118}\x{212E}\x{309B}-\x{309C}] + (?!\N{U+2E2F}) [\p{L}\p{Nl}\N{U+1885}-\N{U+1886}\N{U+2118}\N{U+212E}\N{U+309B}-\N{U+309C}] - (?!\x{2E2F}) [\p{L}\p{Nl}\x{1885}-\x{1886}\x{2118}\x{212E}\x{309B}-\x{309C}\p{Mn}\p{Mc}\p{Nd}\p{Pc}\x{00B7}\x{0387}\x{1369}-\x{1371}\x{19DA}] + (?!\N{U+2E2F}) [\p{L}\p{Nl}\N{U+1885}-\N{U+1886}\N{U+2118}\N{U+212E}\N{U+309B}-\N{U+309C}\p{Mn}\p{Mc}\p{Nd}\p{Pc}\N{U+00B7}\N{U+0387}\N{U+1369}-\N{U+1371}\N{U+19DA}] - (?![\x{037A}\x{0E33}\x{0EB3}\x{309B}-\x{309C}\x{FC5E}-\x{FC63}\x{FDFA}-\x{FDFB}\x{FE70}\x{FE72}\x{FE74}\x{FE76}\x{FE78}\x{FE7A}\x{FE7C}\x{FE7E}\x{FF9E}-\x{FF9F}]) + (?![\N{U+037A}\N{U+0E33}\N{U+0EB3}\N{U+309B}-\N{U+309C}\N{U+FC5E}-\N{U+FC63}\N{U+FDFA}-\N{U+FDFB}\N{U+FE70}\N{U+FE72}\N{U+FE74}\N{U+FE76}\N{U+FE78}\N{U+FE7A}\N{U+FE7C}\N{U+FE7E}\N{U+FF9E}-\N{U+FF9F}]) \%{unicode-id-start} - (?![\x{037A}\x{309B}-\x{309C}\x{FC5E}-\x{FC63}\x{FDFA}-\x{FDFB}\x{FE70}\x{FE72}\x{FE74}\x{FE76}\x{FE78}\x{FE7A}\x{FE7C}\x{FE7E}]) + (?![\N{U+037A}\N{U+309B}-\N{U+309C}\N{U+FC5E}-\N{U+FC63}\N{U+FDFA}-\N{U+FDFB}\N{U+FE70}\N{U+FE72}\N{U+FE74}\N{U+FE76}\N{U+FE78}\N{U+FE7A}\N{U+FE7C}\N{U+FE7E}]) \%{unicode-id-continue} diff --git a/data/language-specs/groovy.lang b/data/language-specs/groovy.lang index 538f4d5c..a1fb9c24 100644 --- a/data/language-specs/groovy.lang +++ b/data/language-specs/groovy.lang @@ -61,7 +61,7 @@ - [a-zA-Z\x{c0}-\x{d6}\x{d8}-\x{f6}\x{f8}-\x{ff}\x{100}-\x{fffe}_] + [a-zA-Z\N{U+c0}-\N{U+d6}\N{U+d8}-\N{U+f6}\N{U+f8}-\N{U+ff}\N{U+100}-\N{U+fffe}_] diff --git a/data/language-specs/html.lang b/data/language-specs/html.lang index effd373b..85c0864a 100644 --- a/data/language-specs/html.lang +++ b/data/language-specs/html.lang @@ -267,7 +267,7 @@ - [^\x00\t\n\f\r "'/<=>\x{007F}-\x{009F}\x{FDD0}-\x{FDEF}\x{FFFE}\x{FFFF}\x{1FFFE}\x{1FFFF}\x{2FFFE}\x{2FFFF}\x{3FFFE}\x{3FFFF}\x{4FFFE}\x{4FFFF}\x{5FFFE}\x{5FFFF}\x{6FFFE}\x{6FFFF}\x{7FFFE}\x{7FFFF}\x{8FFFE}\x{8FFFF}\x{9FFFE}\x{9FFFF}\x{AFFFE}\x{AFFFF}\x{BFFFE}\x{BFFFF}\x{CFFFE}\x{CFFFF}\x{DFFFE}\x{DFFFF}\x{EFFFE}\x{EFFFF}\x{FFFFE}\x{FFFFF}\x{10FFFE}\x{10FFFF}]+ + [^\N{U+00}\t\n\f\r "'/<=>\N{U+007F}-\N{U+009F}\N{U+FDD0}-\N{U+FDEF}\N{U+FFFE}\N{U+FFFF}\N{U+1FFFE}\N{U+1FFFF}\N{U+2FFFE}\N{U+2FFFF}\N{U+3FFFE}\N{U+3FFFF}\N{U+4FFFE}\N{U+4FFFF}\N{U+5FFFE}\N{U+5FFFF}\N{U+6FFFE}\N{U+6FFFF}\N{U+7FFFE}\N{U+7FFFF}\N{U+8FFFE}\N{U+8FFFF}\N{U+9FFFE}\N{U+9FFFF}\N{U+AFFFE}\N{U+AFFFF}\N{U+BFFFE}\N{U+BFFFF}\N{U+CFFFE}\N{U+CFFFF}\N{U+DFFFE}\N{U+DFFFF}\N{U+EFFFE}\N{U+EFFFF}\N{U+FFFFE}\N{U+FFFFF}\N{U+10FFFE}\N{U+10FFFF}]+ @@ -382,7 +382,7 @@ - </?[a-z][^\x00\t\n\f\r />\x{007F}-\x{009F}\x{FDD0}-\x{FDEF}\x{FFFE}\x{FFFF}\x{1FFFE}\x{1FFFF}\x{2FFFE}\x{2FFFF}\x{3FFFE}\x{3FFFF}\x{4FFFE}\x{4FFFF}\x{5FFFE}\x{5FFFF}\x{6FFFE}\x{6FFFF}\x{7FFFE}\x{7FFFF}\x{8FFFE}\x{8FFFF}\x{9FFFE}\x{9FFFF}\x{AFFFE}\x{AFFFF}\x{BFFFE}\x{BFFFF}\x{CFFFE}\x{CFFFF}\x{DFFFE}\x{DFFFF}\x{EFFFE}\x{EFFFF}\x{FFFFE}\x{FFFFF}\x{10FFFE}\x{10FFFF}]* + </?[a-z][^\N{U+00}\t\n\f\r />\N{U+007F}-\N{U+009F}\N{U+FDD0}-\N{U+FDEF}\N{U+FFFE}\N{U+FFFF}\N{U+1FFFE}\N{U+1FFFF}\N{U+2FFFE}\N{U+2FFFF}\N{U+3FFFE}\N{U+3FFFF}\N{U+4FFFE}\N{U+4FFFF}\N{U+5FFFE}\N{U+5FFFF}\N{U+6FFFE}\N{U+6FFFF}\N{U+7FFFE}\N{U+7FFFF}\N{U+8FFFE}\N{U+8FFFF}\N{U+9FFFE}\N{U+9FFFF}\N{U+AFFFE}\N{U+AFFFF}\N{U+BFFFE}\N{U+BFFFF}\N{U+CFFFE}\N{U+CFFFF}\N{U+DFFFE}\N{U+DFFFF}\N{U+EFFFE}\N{U+EFFFF}\N{U+FFFFE}\N{U+FFFFF}\N{U+10FFFE}\N{U+10FFFF}]* /?> diff --git a/data/language-specs/j.lang b/data/language-specs/j.lang index 44d3b5e8..34c8910d 100644 --- a/data/language-specs/j.lang +++ b/data/language-specs/j.lang @@ -189,7 +189,7 @@ - ^\s*[\x{2500}|-]{2,} + ^\s*[\N{U+2500}|-]{2,} \%{close-expdef} diff --git a/data/language-specs/javascript-expressions.lang b/data/language-specs/javascript-expressions.lang index 3b3cc350..9ddaf129 100644 --- a/data/language-specs/javascript-expressions.lang +++ b/data/language-specs/javascript-expressions.lang @@ -26,7 +26,7 @@ -->