Compare commits
No commits in common. 'i9c-beta' and 'c9' have entirely different histories.
@ -1,120 +0,0 @@
|
|||||||
From 12c313ce49b36160a7ca2e9b07ad5bd92ee4a010 Mon Sep 17 00:00:00 2001
|
|
||||||
From: Karl Williamson <khw@cpan.org>
|
|
||||||
Date: Sat, 9 Sep 2023 11:59:09 -0600
|
|
||||||
Subject: [PATCH 1/2] Fix read/write past buffer end: perl-security#140
|
|
||||||
|
|
||||||
A package name may be specified in a \p{...} regular expression
|
|
||||||
construct. If unspecified, "utf8::" is assumed, which is the package
|
|
||||||
all official Unicode properties are in. By specifying a different
|
|
||||||
package, one can create a user-defined property with the same
|
|
||||||
unqualified name as a Unicode one. Such a property is defined by a sub
|
|
||||||
whose name begins with "Is" or "In", and if the sub wishes to refer to
|
|
||||||
an official Unicode property, it must explicitly specify the "utf8::".
|
|
||||||
S_parse_uniprop_string() is used to parse the interior of both \p{} and
|
|
||||||
the user-defined sub lines.
|
|
||||||
|
|
||||||
In S_parse_uniprop_string(), it parses the input "name" parameter,
|
|
||||||
creating a modified copy, "lookup_name", malloc'ed with the same size as
|
|
||||||
"name". The modifications are essentially to create a canonicalized
|
|
||||||
version of the input, with such things as extraneous white-space
|
|
||||||
stripped off. I found it convenient to strip off the package specifier
|
|
||||||
"utf8::". To to so, the code simply pretends "lookup_name" begins just
|
|
||||||
after the "utf8::", and adjusts various other values to compensate.
|
|
||||||
However, it missed the adjustment of one required one.
|
|
||||||
|
|
||||||
This is only a problem when the property name begins with "perl" and
|
|
||||||
isn't "perlspace" nor "perlword". All such ones are undocumented
|
|
||||||
internal properties.
|
|
||||||
|
|
||||||
What happens in this case is that the input is reparsed with slightly
|
|
||||||
different rules in effect as to what is legal versus illegal. The
|
|
||||||
problem is that "lookup_name" no longer is pointing to its initial
|
|
||||||
value, but "name" is. Thus the space allocated for filling "lookup_name"
|
|
||||||
is now shorter than "name", and as this shortened "lookup_name" is
|
|
||||||
filled by copying suitable portions of "name", the write can be to
|
|
||||||
unallocated space.
|
|
||||||
|
|
||||||
The solution is to skip the "utf8::" when reparsing "name". Then both
|
|
||||||
"lookup_name" and "name" are effectively shortened by the same amount,
|
|
||||||
and there is no going off the end.
|
|
||||||
|
|
||||||
This commit also does white-space adjustment so that things align
|
|
||||||
vertically for readability.
|
|
||||||
|
|
||||||
This can be easily backported to earlier Perl releases.
|
|
||||||
---
|
|
||||||
regcomp.c | 17 +++++++++++------
|
|
||||||
t/re/pat_advanced.t | 8 ++++++++
|
|
||||||
2 files changed, 19 insertions(+), 6 deletions(-)
|
|
||||||
|
|
||||||
diff --git a/regcomp.c b/regcomp.c
|
|
||||||
index 9c6ccc2c1b..833f8644f7 100644
|
|
||||||
--- a/regcomp.c
|
|
||||||
+++ b/regcomp.c
|
|
||||||
@@ -23697,7 +23697,7 @@ S_parse_uniprop_string(pTHX_
|
|
||||||
* compile perl to know about them) */
|
|
||||||
bool is_nv_type = FALSE;
|
|
||||||
|
|
||||||
- unsigned int i, j = 0;
|
|
||||||
+ unsigned int i = 0, i_zero = 0, j = 0;
|
|
||||||
int equals_pos = -1; /* Where the '=' is found, or negative if none */
|
|
||||||
int slash_pos = -1; /* Where the '/' is found, or negative if none */
|
|
||||||
int table_index = 0; /* The entry number for this property in the table
|
|
||||||
@@ -23831,9 +23831,13 @@ S_parse_uniprop_string(pTHX_
|
|
||||||
* all of them are considered to be for that package. For the purposes of
|
|
||||||
* parsing the rest of the property, strip it off */
|
|
||||||
if (non_pkg_begin == STRLENs("utf8::") && memBEGINPs(name, name_len, "utf8::")) {
|
|
||||||
- lookup_name += STRLENs("utf8::");
|
|
||||||
- j -= STRLENs("utf8::");
|
|
||||||
- equals_pos -= STRLENs("utf8::");
|
|
||||||
+ lookup_name += STRLENs("utf8::");
|
|
||||||
+ j -= STRLENs("utf8::");
|
|
||||||
+ equals_pos -= STRLENs("utf8::");
|
|
||||||
+ i_zero = STRLENs("utf8::"); /* When resetting 'i' to reparse
|
|
||||||
+ from the beginning, it has to be
|
|
||||||
+ set past what we're stripping
|
|
||||||
+ off */
|
|
||||||
stripped_utf8_pkg = TRUE;
|
|
||||||
}
|
|
||||||
|
|
||||||
@@ -24238,7 +24242,8 @@ S_parse_uniprop_string(pTHX_
|
|
||||||
|
|
||||||
/* We set the inputs back to 0 and the code below will reparse,
|
|
||||||
* using strict */
|
|
||||||
- i = j = 0;
|
|
||||||
+ i = i_zero;
|
|
||||||
+ j = 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@@ -24259,7 +24264,7 @@ S_parse_uniprop_string(pTHX_
|
|
||||||
* separates two digits */
|
|
||||||
if (cur == '_') {
|
|
||||||
if ( stricter
|
|
||||||
- && ( i == 0 || (int) i == equals_pos || i == name_len- 1
|
|
||||||
+ && ( i == i_zero || (int) i == equals_pos || i == name_len- 1
|
|
||||||
|| ! isDIGIT_A(name[i-1]) || ! isDIGIT_A(name[i+1])))
|
|
||||||
{
|
|
||||||
lookup_name[j++] = '_';
|
|
||||||
diff --git a/t/re/pat_advanced.t b/t/re/pat_advanced.t
|
|
||||||
index 6152c7b85c..1db317fff9 100644
|
|
||||||
--- a/t/re/pat_advanced.t
|
|
||||||
+++ b/t/re/pat_advanced.t
|
|
||||||
@@ -2576,6 +2576,14 @@ EOF
|
|
||||||
{}, "GH #17278");
|
|
||||||
}
|
|
||||||
|
|
||||||
+ { # perl-security#140, read/write past buffer end
|
|
||||||
+ fresh_perl_like('qr/\p{utf8::perl x}/',
|
|
||||||
+ qr/Illegal user-defined property name "utf8::perl x" in regex/,
|
|
||||||
+ {}, "perl-security#140");
|
|
||||||
+ fresh_perl_is('qr/\p{utf8::_perl_surrogate}/', "",
|
|
||||||
+ {}, "perl-security#140");
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
|
|
||||||
# !!! NOTE that tests that aren't at all likely to crash perl should go
|
|
||||||
# a ways above, above these last ones. There's a comment there that, like
|
|
||||||
--
|
|
||||||
2.34.1
|
|
||||||
|
|
Loading…
Reference in new issue