Compare commits

..

No commits in common. 'c9' and 'i8c' have entirely different histories.
c9 ... i8c

2
.gitignore vendored

@ -1 +1 @@
SOURCES/pcre-8.44.tar.bz2
SOURCES/pcre-8.42.tar.bz2

@ -1 +1 @@
8179b083053fce9b4a766513fa1f14807aabee42 SOURCES/pcre-8.44.tar.bz2
df0d1c2ff04c359220cb902539a6e134af4497f4 SOURCES/pcre-8.42.tar.bz2

@ -1,39 +0,0 @@
This is Philip Hazel's public GPG key.
-----BEGIN PGP PUBLIC KEY BLOCK-----
Version: GnuPG v1.2.0 (SunOS)
mQEMBD2z9g0BCACpGWJ4K2mjR6K9nv7/K+HLjJtDO0E0ckudeNjg2JvnuUU7m1bK
87nSklHTSMNiqmuVU5hFAuGiMGBMj31GPpce3VbABdm6TmAJB8ZndvJXyf8lC7Ko
5iZX/gCYU6817UI5pFwYR3MTIUSSxTGqk1y62pYLKUq9keSU5Cew4q1KMwl46Bn3
SwlfQeST4WGwL2w9b8mWNltBBY23bgzwYu7vFBD4GQnfzJsvMC9lj825wob2nKy5
CL4bY6tiYUqALzDYaIVCBqJ1Ky1rpPzRcDZUeeF8Ta3njh18Y0KzPEj+Xu5Ksexg
7LUy/2wFzVRtJgqvAFZo43KIOptsj/63zxJjAAkBAbQhUGhpbGlwIEhhemVsIDxw
aDEwQGN1cy5jYW0uYWMudWs+iQEyBBMBAgAcBQI9s/YNAhsDBAsHAwIDFQIDAxYC
AQIeAQIXgAAKCRCXZuCE+w9D2JbrCACBHrxybV+9t3TGR3+uk2QnUuct90a+udcB
cbWgKQgX345ZgwIfyoPKAbG84mN/+SFUN/Ie4ZHZxVsbN//l2P/Osx0CNQBfem4e
HUWegkFGzC5DxQhN5nsJ/GR6pnxG1l/DbNhupNY73oTajWFdi7K8jYfamU/zS/Da
VCwUKxWtGqyEqOxvbdqcjsnTMmLVfXtefx7CbKyhaClPP8Pe4FL+eqyfHJF7uywK
VWlUNmQa4E+ZAK8tkoK9sZAc0ImWwZMumiKZDEpr2D8Ty+Gg2umTS2OMIcbY8QF1
r6DeubFabdPIe1kn0BGXtdAurhbdJCIbcAEQS0eEeWC4A4LiDprntB1QaGlsaXAg
SGF6ZWwgPHBoMTBAY2FtLmFjLnVrPokBMgQTAQIAHAUCPbaC8QIbAwQLBwMCAxUC
AwMWAgECHgECF4AACgkQl2bghPsPQ9ho5Qf+I2HUhZeXOUneeqSa+cITsIELJMrZ
UWcydY1z+L68yOqURVHB1jF4aC6QSlq0pLjozwF6KfZO5AfN9FvkRQ0DzCEXH48W
OXqzbjzgNxRMdaP5+49Axl0UQuhupGJg66T4FiGnSVBhK8kTURPCSpLLgkCJqNcY
t5zuNwk3e7JvleT59EVpk/kw3a5p9oeKYBln57pHwq+HdPLSCdkedQBKdsbNrazy
qduYEXen4ogsIoTpA9lLH1Xsi9wL+soLAlWXtl/GNa1h7Jpwn41zp1hqIZe1ebIM
dSGbtMEaGJlqgDauYJSl0lkVgaPtZCTmfAf57TsGfD0IckN2XNGLuwb6DLkBCwQ9
s/eJAQgAuXq2I1VTDB108O0NAB6QKfA2gMHafoj3OcwEUHnJgXrHqjYOpFr0r5lC
YjwMRMvHO7r4jgtwKjGACI/1IE2hiGXkKFOWZFkCf7Qjoi13a78myC/VRwe4uEIs
xdz++w8WfzuC1sYw8d/rlybVzHTfTsKfmFOQamuyLCTm3Kdx/wZyGM7JMmgVn2zF
HWasdID0n7QJFZGR3yEfqis5zq1t3w28AaXlTZG7QtFj9V0cWIhZRjjjVe1biPA8
Btp+eFkmcat79N9hHbiEiAY3u2qmQCcn1fiBBC0Og09cY4dRyX0/bXUo4n8lHysp
JcL/sIZ7Ahd1LOfg9h+yQfoaey78LQAGKYkBHwQYAQIACQUCPbP3iQIbDAAKCRCX
ZuCE+w9D2P0BCACXsbwFerd4wlnt8PxxQlRGXi1t4EGBFf2zSdgQvIXJ3ntPzcDU
+8hnIWQJhCTZ3PfVmoqB6ZOM1RWI/IcP6b4qVb6vJGI/VxRICraAczw+4bdFxFiB
n2+SyD4MxZg8t66FcH3fHugQqsApHTmtKmahv5BXB+0dPmP/BRUmzxYUv8hdnhHl
91S4R4BDu84DvKYyJK9jlqUl06pvqXrBSNsbWKgAJnmnDe8FaLUk+p0IXeMSavUn
T5E26MRIioxAz/G4FZI8IdO92GHTz82O7/mQMhveAdpspDvpV3M4wnldU425GlxZ
nmq1bbjAU4QVuIS9nkk2GrGx5+N6cYMy8JqG
=EHsz
-----END PGP PUBLIC KEY BLOCK-----

@ -1,161 +0,0 @@
From f1e9a32ee7fad2263636a51536ce0f9f13f09949 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Petr=20P=C3=ADsa=C5=99?= <ppisar@redhat.com>
Date: Wed, 23 Jan 2019 10:16:20 +0100
Subject: [PATCH] Declare POSIX regex function names as macros to PCRE
functions
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
POSIX regex libraries differ in regex_t size. If a program includes
<pcreposix.h>, but is not linked to pcreposix library at run-time
(either in effect of --as-needed or a lazy binding in dlopen)
other implementation touches memory out of the structure and the
program can crash.
That means once a program includes <pcreposix.h>, it must link to the
pcreposix library.
This patch replaces the POSIX regex declaration with macros to the
PCRE uniqely-named function. This ensures that the PCRE's regex_t
structure is always handled by the PCRE functions.
This patch still preserves the POSIX regex definitions in order to
preseve ABI with application compiled before this change. The
definition can be removed in the future.
Signed-off-by: Petr Písař <ppisar@redhat.com>
---
pcreposix.c | 50 +++++++++++++++++++++++++++++++++++++++++++++-----
pcreposix.h | 20 ++++++++++++++------
2 files changed, 59 insertions(+), 11 deletions(-)
diff --git a/pcreposix.c b/pcreposix.c
index a76d6bf..3f2f3ef 100644
--- a/pcreposix.c
+++ b/pcreposix.c
@@ -39,7 +39,10 @@ POSSIBILITY OF SUCH DAMAGE.
/* This module is a wrapper that provides a POSIX API to the underlying PCRE
-functions. */
+functions. The operative functions are called pcre_regcomp(), etc., with
+wrappers that use the plain POSIX names. This makes it easier for an
+application to be sure it gets the PCRE versions in the presence of other
+POSIX regex libraries. */
#ifdef HAVE_CONFIG_H
@@ -204,12 +207,49 @@ static const char *const pstring[] = {
/*************************************************
-* Translate error code to string *
+* Wrappers with traditional POSIX names *
*************************************************/
+/* Keep defining them to preseve ABI with application linked to pcreposix
+ * library before they were changed into macros. */
+
+#undef regerror
PCREPOSIX_EXP_DEFN size_t PCRE_CALL_CONVENTION
regerror(int errcode, const regex_t *preg, char *errbuf, size_t errbuf_size)
{
+return pcre_regerror(errcode, preg, errbuf, errbuf_size);
+}
+
+#undef regfree
+PCREPOSIX_EXP_DEFN void PCRE_CALL_CONVENTION
+regfree(regex_t *preg)
+{
+pcre_regfree(preg);
+}
+
+#undef regcomp
+PCREPOSIX_EXP_DEFN int PCRE_CALL_CONVENTION
+regcomp(regex_t *preg, const char *pattern, int cflags)
+{
+return pcre_regcomp(preg, pattern, cflags);
+}
+
+#undef regexec
+PCREPOSIX_EXP_DEFN int PCRE_CALL_CONVENTION
+regexec(const regex_t *preg, const char *string, size_t nmatch,
+ regmatch_t pmatch[], int eflags)
+{
+return pcre_regexec(preg, string, nmatch, pmatch, eflags);
+}
+
+
+/*************************************************
+* Translate error code to string *
+*************************************************/
+
+PCREPOSIX_EXP_DEFN size_t PCRE_CALL_CONVENTION
+pcre_regerror(int errcode, const regex_t *preg, char *errbuf, size_t errbuf_size)
+{
const char *message, *addmessage;
size_t length, addlength;
@@ -243,7 +283,7 @@ return length + addlength;
*************************************************/
PCREPOSIX_EXP_DEFN void PCRE_CALL_CONVENTION
-regfree(regex_t *preg)
+pcre_regfree(regex_t *preg)
{
(PUBL(free))(preg->re_pcre);
}
@@ -266,7 +306,7 @@ Returns: 0 on success
*/
PCREPOSIX_EXP_DEFN int PCRE_CALL_CONVENTION
-regcomp(regex_t *preg, const char *pattern, int cflags)
+pcre_regcomp(regex_t *preg, const char *pattern, int cflags)
{
const char *errorptr;
int erroffset;
@@ -320,7 +360,7 @@ be set. When this is the case, the nmatch and pmatch arguments are ignored, and
the only result is yes/no/error. */
PCREPOSIX_EXP_DEFN int PCRE_CALL_CONVENTION
-regexec(const regex_t *preg, const char *string, size_t nmatch,
+pcre_regexec(const regex_t *preg, const char *string, size_t nmatch,
regmatch_t pmatch[], int eflags)
{
int rc, so, eo;
diff --git a/pcreposix.h b/pcreposix.h
index c77c0b0..6f108b8 100644
--- a/pcreposix.h
+++ b/pcreposix.h
@@ -131,13 +131,21 @@ file. */
# endif
#endif
-/* The functions */
-
-PCREPOSIX_EXP_DECL int regcomp(regex_t *, const char *, int);
-PCREPOSIX_EXP_DECL int regexec(const regex_t *, const char *, size_t,
+/* The functions. The actual code is in functions with pcre_xxx names for
+uniqueness. POSIX names are provided for API compatibility with POSIX regex
+functions. It's done this way to ensure to they are always get from the
+PCRE library and not by accident from elsewhere. (regex_t differs in size
+elsewhere.) */
+
+PCREPOSIX_EXP_DECL int pcre_regcomp(regex_t *, const char *, int);
+#define regcomp pcre_regcomp
+PCREPOSIX_EXP_DECL int pcre_regexec(const regex_t *, const char *, size_t,
regmatch_t *, int);
-PCREPOSIX_EXP_DECL size_t regerror(int, const regex_t *, char *, size_t);
-PCREPOSIX_EXP_DECL void regfree(regex_t *);
+#define regexec pcre_regexec
+PCREPOSIX_EXP_DECL size_t pcre_regerror(int, const regex_t *, char *, size_t);
+#define regerror pcre_regerror
+PCREPOSIX_EXP_DECL void pcre_regfree(regex_t *);
+#define regfree pcre_regfree
#ifdef __cplusplus
} /* extern "C" */
--
2.17.2

@ -0,0 +1,70 @@
From 7abc4de8303e8908eeb96714dac53ae10ff465e3 Mon Sep 17 00:00:00 2001
From: ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>
Date: Sun, 2 Sep 2018 17:05:38 +0000
Subject: [PATCH] Fix anchoring bug in conditional subexpression.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1739 2f5784b3-3f2a-0410-8824-cb99058d5e15
Petr Písař: Ported to 8.42
---
pcre_compile.c | 12 ++++++++++--
testdata/testinput2 | 3 +++
testdata/testoutput2 | 4 ++++
Version 8.42 20-March-2018
--------------------------
diff --git a/pcre_compile.c b/pcre_compile.c
index 3991d6c..6141fb3 100644
--- a/pcre_compile.c
+++ b/pcre_compile.c
@@ -8682,10 +8682,18 @@ do {
if (!is_anchored(scode, new_map, cd, atomcount)) return FALSE;
}
- /* Positive forward assertions and conditions */
+ /* Positive forward assertion */
- else if (op == OP_ASSERT || op == OP_COND)
+ else if (op == OP_ASSERT)
+ {
+ if (!is_anchored(scode, bracket_map, cd, atomcount)) return FALSE;
+ }
+
+ /* Condition; not anchored if no second branch */
+
+ else if (op == OP_COND)
{
+ if (scode[GET(scode,1)] != OP_ALT) return FALSE;
if (!is_anchored(scode, bracket_map, cd, atomcount)) return FALSE;
}
diff --git a/testdata/testinput2 b/testdata/testinput2
index 8ba4dc4..3528de1 100644
--- a/testdata/testinput2
+++ b/testdata/testinput2
@@ -4257,4 +4257,7 @@ backtracking verbs. --/
ab
aaab
+/(?(?=^))b/
+ abc
+
/-- End of testinput2 --/
diff --git a/testdata/testoutput2 b/testdata/testoutput2
index 61ed8d9..4ccda27 100644
--- a/testdata/testoutput2
+++ b/testdata/testoutput2
@@ -14721,4 +14721,8 @@ No need char
0: ab
1: a
+/(?(?=^))b/
+ abc
+ 0: b
+
/-- End of testinput2 --/
--
2.14.4

@ -0,0 +1,65 @@
From 952cac5f4a17e52aec7d0536f405b25428367840 Mon Sep 17 00:00:00 2001
From: ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>
Date: Fri, 17 Aug 2018 14:50:21 +0000
Subject: [PATCH] Fix bad auto-possessify for certain classes.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1738 2f5784b3-3f2a-0410-8824-cb99058d5e15
Petr Písař: Ported to 8.42.
---
pcre_compile.c | 4 ++--
testdata/testinput4 | 3 +++
testdata/testoutput4 | 4 ++++
diff --git a/pcre_compile.c b/pcre_compile.c
index 6dd8886..3991d6c 100644
--- a/pcre_compile.c
+++ b/pcre_compile.c
@@ -6,7 +6,7 @@
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
- Copyright (c) 1997-2016 University of Cambridge
+ Copyright (c) 1997-2018 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -3299,7 +3299,7 @@ for(;;)
if ((*xclass_flags & XCL_MAP) == 0)
{
/* No bits are set for characters < 256. */
- if (list[1] == 0) return TRUE;
+ if (list[1] == 0) return (*xclass_flags & XCL_NOT) == 0;
/* Might be an empty repeat. */
continue;
}
diff --git a/testdata/testinput4 b/testdata/testinput4
index 8bdbdac..63368c0 100644
--- a/testdata/testinput4
+++ b/testdata/testinput4
@@ -727,4 +727,7 @@
/\C(\W?ſ)'?{{/8
\\C(\\W?ſ)'?{{
+/[^\x{100}-\x{ffff}]*[\x80-\xff]/8
+ \x{99}\x{99}\x{99}
+
/-- End of testinput4 --/
diff --git a/testdata/testoutput4 b/testdata/testoutput4
index d43c123..69e812c 100644
--- a/testdata/testoutput4
+++ b/testdata/testoutput4
@@ -1277,4 +1277,8 @@ No match
\\C(\\W?ſ)'?{{
No match
+/[^\x{100}-\x{ffff}]*[\x80-\xff]/8
+ \x{99}\x{99}\x{99}
+ 0: \x{99}\x{99}\x{99}
+
/-- End of testinput4 --/
--
2.14.4

@ -0,0 +1,29 @@
From 1aa76cb33f04fcea3127a0859450e5d18369e5e2 Mon Sep 17 00:00:00 2001
From: zherczeg <zherczeg@2f5784b3-3f2a-0410-8824-cb99058d5e15>
Date: Fri, 21 Sep 2018 07:34:10 +0000
Subject: [PATCH] Fix subject buffer overread in JIT.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1740 2f5784b3-3f2a-0410-8824-cb99058d5e15
Petr Písař: Ported to 8.42.
---
pcre_jit_compile.c | 2 +-
diff --git a/pcre_jit_compile.c b/pcre_jit_compile.c
index 2bad74b..bc5f9c0 100644
--- a/pcre_jit_compile.c
+++ b/pcre_jit_compile.c
@@ -9002,7 +9002,7 @@ if (exact > 1)
#ifdef SUPPORT_UTF
&& !common->utf
#endif
- )
+ && type != OP_ANYNL && type != OP_EXTUNI)
{
OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(exact));
add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_GREATER, TMP1, 0, STR_END, 0));
--
2.17.2

@ -0,0 +1,178 @@
From 2ede5a4b4a98add3bbf982f5805e015e8c61c565 Mon Sep 17 00:00:00 2001
From: ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>
Date: Tue, 26 Jun 2018 16:51:43 +0000
Subject: [PATCH] Fix two C++ wrapper bugs, unnoticed for years.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1735 2f5784b3-3f2a-0410-8824-cb99058d5e15
Petr Písař: Ported to 8.42.
diff --git a/pcrecpp.cc b/pcrecpp.cc
index d09c9ab..77a2fed 100644
--- a/pcrecpp.cc
+++ b/pcrecpp.cc
@@ -80,6 +80,24 @@ static const string empty_string;
// If the user doesn't ask for any options, we just use this one
static RE_Options default_options;
+// Specials for the start of patterns. See comments where start_options is used
+// below. (PH June 2018)
+static const char *start_options[] = {
+ "(*UTF8)",
+ "(*UTF)",
+ "(*UCP)",
+ "(*NO_START_OPT)",
+ "(*NO_AUTO_POSSESS)",
+ "(*LIMIT_RECURSION=",
+ "(*LIMIT_MATCH=",
+ "(*CRLF)",
+ "(*CR)",
+ "(*BSR_UNICODE)",
+ "(*BSR_ANYCRLF)",
+ "(*ANYCRLF)",
+ "(*ANY)",
+ "" };
+
void RE::Init(const string& pat, const RE_Options* options) {
pattern_ = pat;
if (options == NULL) {
@@ -135,7 +153,49 @@ pcre* RE::Compile(Anchor anchor) {
} else {
// Tack a '\z' at the end of RE. Parenthesize it first so that
// the '\z' applies to all top-level alternatives in the regexp.
- string wrapped = "(?:"; // A non-counting grouping operator
+
+ /* When this code was written (for PCRE 6.0) it was enough just to
+ parenthesize the entire pattern. Unfortunately, when the feature of
+ starting patterns with (*UTF8) or (*CR) etc. was added to PCRE patterns,
+ this code was never updated. This bug was not noticed till 2018, long after
+ PCRE became obsolescent and its maintainer no longer around. Since PCRE is
+ frozen, I have added a hack to check for all the existing "start of
+ pattern" specials - knowing that no new ones will ever be added. I am not a
+ C++ programmer, so the code style is no doubt crude. It is also
+ inefficient, but is only run when the pattern starts with "(*".
+ PH June 2018. */
+
+ string wrapped = "";
+
+ if (pattern_.c_str()[0] == '(' && pattern_.c_str()[1] == '*') {
+ int kk, klen, kmat;
+ for (;;) { // Loop for any number of leading items
+
+ for (kk = 0; start_options[kk][0] != 0; kk++) {
+ klen = strlen(start_options[kk]);
+ kmat = strncmp(pattern_.c_str(), start_options[kk], klen);
+ if (kmat >= 0) break;
+ }
+ if (kmat != 0) break; // Not found
+
+ // If the item ended in "=" we must copy digits up to ")".
+
+ if (start_options[kk][klen-1] == '=') {
+ while (isdigit(pattern_.c_str()[klen])) klen++;
+ if (pattern_.c_str()[klen] != ')') break; // Syntax error
+ klen++;
+ }
+
+ // Move the item from the pattern to the start of the wrapped string.
+
+ wrapped += pattern_.substr(0, klen);
+ pattern_.erase(0, klen);
+ }
+ }
+
+ // Wrap the rest of the pattern.
+
+ wrapped += "(?:"; // A non-counting grouping operator
wrapped += pattern_;
wrapped += ")\\z";
re = pcre_compile(wrapped.c_str(), pcre_options,
@@ -415,7 +475,7 @@ int RE::GlobalReplace(const StringPiece& rewrite,
matchend++;
}
// We also need to advance more than one char if we're in utf8 mode.
-#ifdef SUPPORT_UTF8
+#ifdef SUPPORT_UTF
if (options_.utf8()) {
while (matchend < static_cast<int>(str->length()) &&
((*str)[matchend] & 0xc0) == 0x80)
diff --git a/pcrecpp_unittest.cc b/pcrecpp_unittest.cc
index 4b15fbe..255066f 100644
--- a/pcrecpp_unittest.cc
+++ b/pcrecpp_unittest.cc
@@ -309,7 +309,7 @@ static void TestReplace() {
"@aa",
"@@@",
3 },
-#ifdef SUPPORT_UTF8
+#ifdef SUPPORT_UTF
{ "b*",
"bb",
"\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8", // utf8
@@ -327,7 +327,7 @@ static void TestReplace() {
{ "", NULL, NULL, NULL, NULL, 0 }
};
-#ifdef SUPPORT_UTF8
+#ifdef SUPPORT_UTF
const bool support_utf8 = true;
#else
const bool support_utf8 = false;
@@ -535,7 +535,7 @@ static void TestQuoteMetaLatin1() {
}
static void TestQuoteMetaUtf8() {
-#ifdef SUPPORT_UTF8
+#ifdef SUPPORT_UTF
TestQuoteMeta("Pl\xc3\xa1\x63ido Domingo", pcrecpp::UTF8());
TestQuoteMeta("xyz", pcrecpp::UTF8()); // No fancy utf8
TestQuoteMeta("\xc2\xb0", pcrecpp::UTF8()); // 2-byte utf8 (degree symbol)
@@ -1178,7 +1178,7 @@ int main(int argc, char** argv) {
CHECK(re.error().empty()); // Must have no error
}
-#ifdef SUPPORT_UTF8
+#ifdef SUPPORT_UTF
// Check UTF-8 handling
{
printf("Testing UTF-8 handling\n");
@@ -1202,6 +1202,24 @@ int main(int argc, char** argv) {
CHECK(re_test1.FullMatch(utf8_string));
RE re_test2("...", pcrecpp::UTF8());
CHECK(re_test2.FullMatch(utf8_string));
+
+ // PH added these tests for leading option settings
+
+ RE re_testZ1("(*UTF8)...");
+ CHECK(re_testZ1.FullMatch(utf8_string));
+
+ RE re_testZ2("(*UTF)...");
+ CHECK(re_testZ2.FullMatch(utf8_string));
+
+ RE re_testZ3("(*UCP)(*UTF)...");
+ CHECK(re_testZ3.FullMatch(utf8_string));
+
+ RE re_testZ4("(*UCP)(*LIMIT_MATCH=1000)(*UTF)...");
+ CHECK(re_testZ4.FullMatch(utf8_string));
+
+ RE re_testZ5("(*UCP)(*LIMIT_MATCH=1000)(*ANY)(*UTF)...");
+ CHECK(re_testZ5.FullMatch(utf8_string));
+
// Check that '.' matches one byte or UTF-8 character
// according to the mode.
@@ -1248,7 +1266,7 @@ int main(int argc, char** argv) {
CHECK(!match_sentence.FullMatch(target));
CHECK(!match_sentence_re.FullMatch(target));
}
-#endif /* def SUPPORT_UTF8 */
+#endif /* def SUPPORT_UTF */
printf("Testing error reporting\n");
--
2.14.4

@ -0,0 +1,36 @@
From 6d8fa98d5167bdd915ce1e32bcb6aaed078e5938 Mon Sep 17 00:00:00 2001
From: ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>
Date: Tue, 26 Jun 2018 17:04:02 +0000
Subject: [PATCH] Fix typos in pcrgrep.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1736 2f5784b3-3f2a-0410-8824-cb99058d5e15
Petr Písař: Ported to 8.42.
diff --git a/pcregrep.c b/pcregrep.c
index a406be9..5982406 100644
--- a/pcregrep.c
+++ b/pcregrep.c
@@ -2252,7 +2252,7 @@ if (isdirectory(pathname))
int fnlength = strlen(pathname) + strlen(nextfile) + 2;
if (fnlength > 2048)
{
- fprintf(stderr, "pcre2grep: recursive filename is too long\n");
+ fprintf(stderr, "pcregrep: recursive filename is too long\n");
rc = 2;
break;
}
@@ -3034,7 +3034,7 @@ LC_ALL environment variable is set, and if so, use it. */
if (locale == NULL)
{
locale = getenv("LC_ALL");
- locale_from = "LCC_ALL";
+ locale_from = "LC_ALL";
}
if (locale == NULL)
--
2.14.4

@ -0,0 +1,55 @@
From 3a9026509f9c1745f378595e55e5024361ad152d Mon Sep 17 00:00:00 2001
From: ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>
Date: Mon, 10 Feb 2020 17:17:34 +0000
Subject: [PATCH] Check the size of the number after (?C as it is read, in
order to avoid integer overflow.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1761 2f5784b3-3f2a-0410-8824-cb99058d5e15
Petr Písař: Ported to 8.43.
---
pcre_compile.c | 14 ++++++++------
diff --git a/pcre_compile.c b/pcre_compile.c
index 079d30a..1e3d6c3 100644
--- a/pcre_compile.c
+++ b/pcre_compile.c
@@ -6,7 +6,7 @@
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
- Copyright (c) 1997-2018 University of Cambridge
+ Copyright (c) 1997-2020 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -7130,17 +7130,19 @@ for (;; ptr++)
int n = 0;
ptr++;
while(IS_DIGIT(*ptr))
+ {
n = n * 10 + *ptr++ - CHAR_0;
+ if (n > 255)
+ {
+ *errorcodeptr = ERR38;
+ goto FAILED;
+ }
+ }
if (*ptr != CHAR_RIGHT_PARENTHESIS)
{
*errorcodeptr = ERR39;
goto FAILED;
}
- if (n > 255)
- {
- *errorcodeptr = ERR38;
- goto FAILED;
- }
*code++ = n;
PUT(code, 0, (int)(ptr - cd->start_pattern + 1)); /* Pattern offset */
PUT(code, LINK_SIZE, 0); /* Default length */
--
2.21.1

@ -1,44 +0,0 @@
From f0bb9e8baf3157e0a84f484f194984295b2db23a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Petr=20P=C3=ADsa=C5=99?= <ppisar@redhat.com>
Date: Mon, 19 Oct 2020 16:15:14 +0200
Subject: [PATCH] Inicialize name table memory region
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Name table entry values are accessed past their ends in add_name()
when comparing the values. Also a size of the entries could grow
later. It's safer to initialize just after the allocation than to hunt
the gaps later.
Reproducer:
pcre_compile2("(?<f>)(?<fir>)", PCRE_NO_AUTO_CAPTURE | PCRE_CASELESS, &ec, &eb, &eo, NULL);
built with clang++ -fsanitize=memory -fsanitize=fuzzer-no-link.
https://bugs.exim.org/show_bug.cgi?id=2661
Signed-off-by: Petr Písař <ppisar@redhat.com>
---
pcre_compile.c | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/pcre_compile.c b/pcre_compile.c
index 3be0fbf..75309e0 100644
--- a/pcre_compile.c
+++ b/pcre_compile.c
@@ -9423,6 +9423,11 @@ if (re == NULL)
goto PCRE_EARLY_ERROR_RETURN;
}
+/* Initialize the memory. Name table entry values are accessed past their ends
+ * (e.g. in add_name()) when comparing the values. Also a size of the entry can
+ * grow later. It's safer to initialize here than to hunt the gaps later. */
+memset(re, 0, size);
+
/* Put in the magic number, and save the sizes, initial options, internal
flags, and character table pointer. NULL is used for the default character
tables. The nullpad field is at the end; it's there to help in the case when a
--
2.25.4

File diff suppressed because it is too large Load Diff

@ -1,98 +0,0 @@
From 70fcff23652339438cabef86f2d3d9c645678687 Mon Sep 17 00:00:00 2001
From: "H.J. Lu" <hjl.tools@gmail.com>
Date: Sun, 20 Dec 2020 13:55:40 -0800
Subject: [PATCH 2/2] Pass -mshstk to the compiler when Intel CET is enable
Copied from PCRE2.
---
CMakeLists.txt | 18 ++++++++++++++++++
Makefile.am | 3 +++
configure.ac | 15 +++++++++++++++
3 files changed, 36 insertions(+)
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 858a34b..74c27e9 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -112,6 +112,24 @@ CHECK_FUNCTION_EXISTS(_strtoi64 HAVE__STRTOI64)
CHECK_TYPE_SIZE("long long" LONG_LONG)
CHECK_TYPE_SIZE("unsigned long long" UNSIGNED_LONG_LONG)
+# Check whether Intel CET is enabled, and if so, adjust compiler flags. This
+# code was written by PH, trying to imitate the logic from the autotools
+# configuration.
+
+CHECK_C_SOURCE_COMPILES(
+ "#ifndef __CET__
+ #error CET is not enabled
+ #endif
+ int main() { return 0; }"
+ INTEL_CET_ENABLED
+)
+
+IF (INTEL_CET_ENABLED)
+ SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mshstk")
+ENDIF(INTEL_CET_ENABLED)
+
+
+
# User-configurable options
#
# (Note: CMakeSetup displays these in alphabetical order, regardless of
diff --git a/Makefile.am b/Makefile.am
index 22b6947..984c686 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -250,6 +250,7 @@ libpcre_la_SOURCES = \
libpcre_la_CFLAGS = \
$(VISIBILITY_CFLAGS) \
+ $(CET_CFLAGS) \
$(AM_CFLAGS)
libpcre_la_LIBADD =
@@ -289,6 +290,7 @@ libpcre16_la_SOURCES = \
libpcre16_la_CFLAGS = \
$(VISIBILITY_CFLAGS) \
+ $(CET_CFLAGS) \
$(AM_CFLAGS)
libpcre16_la_LIBADD =
@@ -328,6 +330,7 @@ libpcre32_la_SOURCES = \
libpcre32_la_CFLAGS = \
$(VISIBILITY_CFLAGS) \
+ $(CET_CFLAGS) \
$(AM_CFLAGS)
libpcre32_la_LIBADD =
diff --git a/configure.ac b/configure.ac
index 6a3e5f3..15f37d1 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1043,6 +1043,21 @@ fi # enable_coverage
AM_CONDITIONAL([WITH_GCOV],[test "x$enable_coverage" = "xyes"])
+AC_MSG_CHECKING([whether Intel CET is enabled])
+AC_LANG_PUSH([C])
+AC_COMPILE_IFELSE([AC_LANG_PROGRAM(,
+ [[#ifndef __CET__
+# error CET is not enabled
+#endif]])],
+ [intel_cet_enabled=yes],
+ [intel_cet_enabled=no])
+AC_MSG_RESULT([$intel_cet_enabled])
+if test "$intel_cet_enabled" = yes; then
+ CET_CFLAGS="-mshstk"
+ AC_SUBST([CET_CFLAGS])
+fi
+AC_LANG_POP([C])
+
# Produce these files, in addition to config.h.
AC_CONFIG_FILES(
Makefile
--
2.29.2

Binary file not shown.

@ -1,8 +1,8 @@
# Is this a stable/testing release:
#%%global rcversion RC1
Name: pcre
Version: 8.44
Release: %{?rcversion:0.}3%{?rcversion:.%rcversion}%{?dist}.3
Version: 8.42
Release: %{?rcversion:0.}6%{?rcversion:.%rcversion}%{?dist}
%global myversion %{version}%{?rcversion:-%rcversion}
Summary: Perl-compatible regular expression library
## Source package only:
@ -27,37 +27,41 @@ Summary: Perl-compatible regular expression library
## Binary packages:
# other files: BSD
License: BSD
URL: https://www.pcre.org/
Source0: https://ftp.pcre.org/pub/%{name}/%{?rcversion:Testing/}%{name}-%{myversion}.tar.bz2
Source1: https://ftp.pcre.org/pub/%{name}/%{?rcversion:Testing/}%{name}-%{myversion}.tar.bz2.sig
Source2: https://ftp.pcre.org/pub/pcre/Public-Key
# Do no set RPATH if libdir is not /usr/lib
URL: http://www.pcre.org/
Source: ftp://ftp.csx.cam.ac.uk/pub/software/programming/%{name}/%{?rcversion:Testing/}%{name}-%{myversion}.tar.bz2
# Upstream thinks RPATH is a good idea.
Patch0: pcre-8.21-multilib.patch
# Refused by upstream, bug #675477
Patch1: pcre-8.32-refused_spelling_terminated.patch
# Fix recursion stack estimator, upstream bug #2173, refused by upstream
Patch2: pcre-8.41-fix_stack_estimator.patch
# Link applications to PCRE-specific symbols when using POSIX API, bug #1667614,
# upstream bug 1830, partially borrowed from PCRE2, proposed to upstream,
# This amends ABI, application built with this patch cannot run with
# previous libpcreposix builds.
Patch3: pcre-8.42-Declare-POSIX-regex-function-names-as-macros-to-PCRE.patch
# Fix reading an uninitialized memory when populating a name table,
# upstream bug #2661, proposed to the upstream
Patch4: pcre-8.44-Inicialize-name-table-memory-region.patch
# Implement CET, bug #1909554, proposed to the upstream
# <https://lists.exim.org/lurker/message/20201220.222016.d8cd6d61.en.html>
Patch5: pcre-8.44-JIT-compiler-update-for-Intel-CET.patch
Patch6: pcre-8.44-Pass-mshstk-to-the-compiler-when-Intel-CET-is-enable.patch
# Fix handling UTF and start-of-pattern options in C++ wrapper,
# upstream bug #2283, in upstream after 8.42
Patch3: pcre-8.42-Fix-two-C-wrapper-bugs-unnoticed-for-years.patch
# Fix an error message and locale handling in pcregrep tool,
# in upstream after 8.42
Patch4: pcre-8.42-Fix-typos-in-pcrgrep.patch
# Fix autopossessifying a repeated negative class with no characters less than
# 256 that is followed by a positive class with only characters less than 256,
# bug #1619228, upstream bug #2300, in upstream after 8.42
Patch5: pcre-8.42-Fix-bad-auto-possessify-for-certain-classes.patch
# Fix anchoring in conditionals with only one branch, bug #1619228,
# upstream bug #2307, in upstream after 8.42
Patch6: pcre-8.42-Fix-anchoring-bug-in-conditional-subexpression.patch
# Fix a subject buffer overread in JIT when UTF is disabled and \X or \R has
# a greater than 1 fixed quantifier, CVE-2019-20838, bug #1852252,
# in upstream after 8.42
Patch7: pcre-8.42-Fix-subject-buffer-overread-in-JIT.patch
# Fix an integer overflow when parsing numbers after "(?C", CVE-2020-14155,
# bug #1851552, upstream bug #2463, in upstream after 8.43
Patch8: pcre-8.43-Check-the-size-of-the-number-after-C-as-it-is-read-i.patch
BuildRequires: readline-devel
BuildRequires: autoconf
BuildRequires: automake
BuildRequires: coreutils
BuildRequires: gcc
BuildRequires: gcc-c++
# glibc-common for iconv
BuildRequires: glibc-common
BuildRequires: gnupg2
BuildRequires: libtool
BuildRequires: make
# perl not used because config.h.generic is pregenerated
@ -130,8 +134,8 @@ Requires: %{name}%{_isa} = %{version}-%{release}
Utilities demonstrating PCRE capabilities like pcregrep or pcretest.
%prep
%{gpgverify} --keyring='%{SOURCE2}' --signature='%{SOURCE1}' --data='%{SOURCE0}'
%setup -q -n %{name}-%{myversion}
# Get rid of rpath
%patch0 -p1
%patch1 -p1
%patch2 -p2
@ -139,7 +143,9 @@ Utilities demonstrating PCRE capabilities like pcregrep or pcretest.
%patch4 -p1
%patch5 -p1
%patch6 -p1
# Because of the multilib patch
%patch7 -p1
%patch8 -p1
# Because of rpath patch
libtoolize --copy --force
autoreconf -vif
# One contributor's name is non-UTF-8
@ -167,10 +173,10 @@ done
--enable-pcre16 \
--enable-pcre32 \
--disable-silent-rules
%{make_build}
make %{?_smp_mflags}
%install
%{make_install}
make install DESTDIR=$RPM_BUILD_ROOT
# Get rid of unneeded *.la files
rm -f $RPM_BUILD_ROOT%{_libdir}/*.la
# These are handled by %%doc in %%files
@ -183,29 +189,30 @@ ulimit -s 10240
%endif
make %{?_smp_mflags} check VERBOSE=yes
%ldconfig_scriptlets
%ldconfig_scriptlets utf16
%ldconfig_scriptlets utf32
%ldconfig_scriptlets cpp
%files
%{_libdir}/libpcre.so.1
%{_libdir}/libpcre.so.1.*
%{_libdir}/libpcreposix.so.0
%{_libdir}/libpcreposix.so.0.*
%{_libdir}/libpcre.so.*
%{_libdir}/libpcreposix.so.*
%{!?_licensedir:%global license %%doc}
%license COPYING LICENCE
%doc AUTHORS NEWS
%files utf16
%{_libdir}/libpcre16.so.0
%{_libdir}/libpcre16.so.0.*
%{_libdir}/libpcre16.so.*
%license COPYING LICENCE
%doc AUTHORS NEWS
%files utf32
%{_libdir}/libpcre32.so.0
%{_libdir}/libpcre32.so.0.*
%{_libdir}/libpcre32.so.*
%license COPYING LICENCE
%doc AUTHORS NEWS
%files cpp
%{_libdir}/libpcrecpp.so.0
%{_libdir}/libpcrecpp.so.0.*
%{_libdir}/libpcrecpp.so.*
%files doc
%doc ChangeLog
@ -222,6 +229,7 @@ make %{?_smp_mflags} check VERBOSE=yes
%files static
%{_libdir}/*.a
%{!?_licensedir:%global license %%doc}
%license COPYING LICENCE
%files tools
@ -231,81 +239,25 @@ make %{?_smp_mflags} check VERBOSE=yes
%{_mandir}/man1/pcretest.*
%changelog
* Mon Aug 09 2021 Mohan Boddu <mboddu@redhat.com> - 8.44-3.3
- Rebuilt for IMA sigs, glibc 2.34, aarch64 flags
Related: rhbz#1991688
* Fri Apr 16 2021 Mohan Boddu <mboddu@redhat.com> - 8.44-3.2
- Rebuilt for RHEL 9 BETA on Apr 15th 2021. Related: rhbz#1947937
* Tue Jan 26 2021 Fedora Release Engineering <releng@fedoraproject.org> - 8.44-3.1
- Rebuilt for https://fedoraproject.org/wiki/Fedora_34_Mass_Rebuild
* Mon Jan 11 2021 Petr Pisar <ppisar@redhat.com> - 8.44-3
- Implement CET (bug #1909554)
* Mon Oct 19 2020 Petr Pisar <ppisar@redhat.com> - 8.44-2
- Fix reading an uninitialized memory when populating a name table
(upstream bug #2661)
* Tue Jul 28 2020 Fedora Release Engineering <releng@fedoraproject.org> - 8.44-1.1
- Rebuilt for https://fedoraproject.org/wiki/Fedora_33_Mass_Rebuild
* Fri Feb 14 2020 Petr Pisar <ppisar@redhat.com> - 8.44-1
- 8.44 bump
* Wed Feb 12 2020 Petr Pisar <ppisar@redhat.com> - 8.43-3
- Make erroroffset initializion in a POSIX wrapper thread-safe
(upstream bug #2447)
- Fix an integer overflow when parsing numbers after "(?C" (upstream bug #2463)
- Fix shifting integer bits and a NULL pointer dereferce in pcretest tool
(upstream bug #2380)
* Wed Jan 29 2020 Fedora Release Engineering <releng@fedoraproject.org> - 8.43-2.2
- Rebuilt for https://fedoraproject.org/wiki/Fedora_32_Mass_Rebuild
* Wed Jul 26 2023 MSVSphere Packaging Team <packager@msvsphere.ru> - 8.42-6
- Rebuilt for MSVSphere 8.8
* Fri Jul 26 2019 Fedora Release Engineering <releng@fedoraproject.org> - 8.43-2.1
- Rebuilt for https://fedoraproject.org/wiki/Fedora_31_Mass_Rebuild
* Wed Jun 02 2021 Lukas Javorsky <ljavorsk@redhat.com> - 8.42-6
- Rebuild for BZ#1954441
* Fri May 24 2019 Petr Pisar <ppisar@redhat.com> - 8.43-2
- Add (*LF) to a list of start-of-pattern options in the C++ wrapper
(upstream bug #2400)
* Mon Feb 25 2019 Petr Pisar <ppisar@redhat.com> - 8.43-1
- 8.43 bump
* Sun Feb 17 2019 Igor Gnatenko <ignatenkobrain@fedoraproject.org> - 8.43-0.1.RC1.2
- Rebuild for readline 8.0
* Fri Feb 01 2019 Fedora Release Engineering <releng@fedoraproject.org> - 8.43-0.1.RC1.1
- Rebuilt for https://fedoraproject.org/wiki/Fedora_30_Mass_Rebuild
* Mon Jan 28 2019 Petr Pisar <ppisar@redhat.com> - 8.43-0.1.RC1
- 8.43-RC1 bump
* Wed Jan 23 2019 Petr Pisar <ppisar@redhat.com> - 8.42-7
- Link applications to PCRE-specific symbols when using POSIX API (bug #1667614)
* Thu Jan 03 2019 Petr Pisar <ppisar@redhat.com> - 8.42-6
- Fix OpenPOWER 64-bit ELFv2 ABI detection in JIT compiler (upstream bug #2353)
- Fix an undefined behavior in aarch64 JIT compiler (upstream bug #2355)
* Thu Nov 01 2018 Petr Pisar <ppisar@redhat.com> - 8.42-5
- Fix a subject buffer overread in JIT when UTF is disabled and \X or \R has
a greater than 1 fixed quantifier
- Fix matching a zero-repeated subroutine call at a start of a pattern
(upstream bug #2332)
* Wed Apr 14 2021 Petr Pisar <ppisar@redhat.com> - 8.42-5
- Fix CVE-2019-20838 (a subject buffer overread in JIT when UTF is disabled
and \X or \R has a greater than 1 fixed quantifier) (bug #1852252)
- Fix CVE-2020-14155 (an integer overflow when parsing numbers after "(?C"))
(bug #1851552)
* Mon Sep 03 2018 Petr Pisar <ppisar@redhat.com> - 8.42-4
- Fix anchoring in conditionals with only one branch (upstream bug #2307)
- Fix anchoring in conditionals with only one branch (bug #1619228)
* Mon Aug 20 2018 Petr Pisar <ppisar@redhat.com> - 8.42-3
- Fix autopossessifying a repeated negative class with no characters less than
256 that is followed by a positive class with only characters less than 256
(upstream bug #2300)
* Fri Jul 13 2018 Fedora Release Engineering <releng@fedoraproject.org> - 8.42-2.1
- Rebuilt for https://fedoraproject.org/wiki/Fedora_29_Mass_Rebuild
(bug #1619228)
* Thu Jun 28 2018 Petr Pisar <ppisar@redhat.com> - 8.42-2
- Fix handling UTF and start-of-pattern options in C++ wrapper

Loading…
Cancel
Save