You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
168 lines
6.9 KiB
168 lines
6.9 KiB
1 year ago
|
From 8efa99e7b5d5a37aefb476cc27ee24c2be4da0c7 Mon Sep 17 00:00:00 2001
|
||
|
From: Michael Saboff <msaboff@apple.com>
|
||
|
Date: Mon, 22 May 2023 13:40:46 -0700
|
||
|
Subject: [PATCH] Cherry-pick 264365@main (698c6e293734).
|
||
|
https://bugs.webkit.org/show_bug.cgi?id=254930
|
||
|
|
||
|
[JSC] RegExpGlobalData::performMatch issue leading to OOB read
|
||
|
https://bugs.webkit.org/show_bug.cgi?id=254930
|
||
|
rdar://107436732
|
||
|
|
||
|
Reviewed by Alexey Shvayka.
|
||
|
|
||
|
Fixed two issues:
|
||
|
1) In YarrInterpreter.cpp::matchAssertionBOL() we were advancing the string position for non-BMP
|
||
|
characters. Since it is an assertion, we shouldn't advance the character position.
|
||
|
Made the same fix to matchAssertionEOL().
|
||
|
2) In StringPrototype.cpp::replaceUsingRegExpSearch(), we need to advance past both elements of
|
||
|
a non-BMP character for the case where the RegExp match is empty.
|
||
|
|
||
|
* JSTests/stress/string-replace-regexp-matchBOL-correct-advancing.js: New test.
|
||
|
* Source/JavaScriptCore/runtime/StringPrototype.cpp:
|
||
|
(JSC::replaceUsingRegExpSearch):
|
||
|
* Source/JavaScriptCore/yarr/YarrInterpreter.cpp:
|
||
|
(JSC::Yarr::Interpreter::InputStream::readCheckedDontAdvance):
|
||
|
(JSC::Yarr::Interpreter::matchAssertionBOL):
|
||
|
(JSC::Yarr::Interpreter::matchAssertionEOL):
|
||
|
|
||
|
Originally-landed-as: 259548.551@safari-7615-branch (e34edaa74575). rdar://107436732
|
||
|
Canonical link: https://commits.webkit.org/264365@main
|
||
|
---
|
||
|
...place-regexp-matchBOL-correct-advancing.js | 35 ++++++++++++++++++
|
||
|
.../runtime/StringPrototype.cpp | 10 ++++++
|
||
|
.../JavaScriptCore/yarr/YarrInterpreter.cpp | 36 +++++++++++++++++--
|
||
|
3 files changed, 79 insertions(+), 2 deletions(-)
|
||
|
create mode 100644 JSTests/stress/string-replace-regexp-matchBOL-correct-advancing.js
|
||
|
|
||
|
diff --git a/JSTests/stress/string-replace-regexp-matchBOL-correct-advancing.js b/JSTests/stress/string-replace-regexp-matchBOL-correct-advancing.js
|
||
|
new file mode 100644
|
||
|
index 000000000000..25b1a70b81d2
|
||
|
--- /dev/null
|
||
|
+++ b/JSTests/stress/string-replace-regexp-matchBOL-correct-advancing.js
|
||
|
@@ -0,0 +1,35 @@
|
||
|
+// Check that we don't advance for BOL assertions when matching a non-BMP character in the YARR interpreter
|
||
|
+// and that we do advance in String.replace() when processing an empty match.
|
||
|
+
|
||
|
+let expected = "|";
|
||
|
+
|
||
|
+for (let i = 0; i < 11; ++i)
|
||
|
+ expected += String.fromCodePoint(128512) + '|';
|
||
|
+
|
||
|
+let str = String.fromCodePoint(128512).repeat(11);
|
||
|
+
|
||
|
+let result1 = str.replace(/(?!(?=^a|()+()+x)(abc))/gmu, r => {
|
||
|
+ return '|';
|
||
|
+});
|
||
|
+
|
||
|
+
|
||
|
+if (result1 !== expected)
|
||
|
+ print("FAILED: \"" + result1 + " !== " + expected + '"');
|
||
|
+
|
||
|
+let result2= str.replace(/(?!(?=^a|x)(abc))/gmu, r => {
|
||
|
+ return '|';
|
||
|
+});
|
||
|
+
|
||
|
+if (result2 !== expected)
|
||
|
+ print("FAILED: \"" + result2 + " !== " + expected + '"');
|
||
|
+
|
||
|
+expected = "|" + String.fromCodePoint(128512);
|
||
|
+
|
||
|
+str = String.fromCodePoint(128512).repeat(1);
|
||
|
+
|
||
|
+let result3= str.replace(/(?!(?=^a|x)(abc))/mu, r => {
|
||
|
+ return '|';
|
||
|
+});
|
||
|
+
|
||
|
+if (result3 !== expected)
|
||
|
+ print("FAILED: \"" + result3 + " !== " + expected + '"');
|
||
|
diff --git a/Source/JavaScriptCore/runtime/StringPrototype.cpp b/Source/JavaScriptCore/runtime/StringPrototype.cpp
|
||
|
index 08104b1dbfa9..459295f728a7 100644
|
||
|
--- a/Source/JavaScriptCore/runtime/StringPrototype.cpp
|
||
|
+++ b/Source/JavaScriptCore/runtime/StringPrototype.cpp
|
||
|
@@ -603,6 +603,11 @@ static ALWAYS_INLINE JSString* replaceUsingRegExpSearch(
|
||
|
startPosition++;
|
||
|
if (startPosition > sourceLen)
|
||
|
break;
|
||
|
+ if (U16_IS_LEAD(source[startPosition - 1]) && U16_IS_TRAIL(source[startPosition])) {
|
||
|
+ startPosition++;
|
||
|
+ if (startPosition > sourceLen)
|
||
|
+ break;
|
||
|
+ }
|
||
|
}
|
||
|
}
|
||
|
} else {
|
||
|
@@ -682,6 +687,11 @@ static ALWAYS_INLINE JSString* replaceUsingRegExpSearch(
|
||
|
startPosition++;
|
||
|
if (startPosition > sourceLen)
|
||
|
break;
|
||
|
+ if (U16_IS_LEAD(source[startPosition - 1]) && U16_IS_TRAIL(source[startPosition])) {
|
||
|
+ startPosition++;
|
||
|
+ if (startPosition > sourceLen)
|
||
|
+ break;
|
||
|
+ }
|
||
|
}
|
||
|
} while (global);
|
||
|
}
|
||
|
diff --git a/Source/JavaScriptCore/yarr/YarrInterpreter.cpp b/Source/JavaScriptCore/yarr/YarrInterpreter.cpp
|
||
|
index 95a848a1a66d..b1a22b253866 100644
|
||
|
--- a/Source/JavaScriptCore/yarr/YarrInterpreter.cpp
|
||
|
+++ b/Source/JavaScriptCore/yarr/YarrInterpreter.cpp
|
||
|
@@ -209,6 +209,38 @@ public:
|
||
|
}
|
||
|
return result;
|
||
|
}
|
||
|
+
|
||
|
+ int readCheckedDontAdvance(unsigned negativePositionOffest)
|
||
|
+ {
|
||
|
+ RELEASE_ASSERT(pos >= negativePositionOffest);
|
||
|
+ unsigned p = pos - negativePositionOffest;
|
||
|
+ ASSERT(p < length);
|
||
|
+ int result = input[p];
|
||
|
+ if (U16_IS_LEAD(result) && decodeSurrogatePairs && p + 1 < length && U16_IS_TRAIL(input[p + 1])) {
|
||
|
+ if (atEnd())
|
||
|
+ return -1;
|
||
|
+
|
||
|
+ result = U16_GET_SUPPLEMENTARY(result, input[p + 1]);
|
||
|
+ }
|
||
|
+ return result;
|
||
|
+ }
|
||
|
+
|
||
|
+ // readForCharacterDump() is only for use by the DUMP_CURR_CHAR macro.
|
||
|
+ // We don't want any side effects like the next() in readChecked() above.
|
||
|
+ int readForCharacterDump(unsigned negativePositionOffest)
|
||
|
+ {
|
||
|
+ RELEASE_ASSERT(pos >= negativePositionOffest);
|
||
|
+ unsigned p = pos - negativePositionOffest;
|
||
|
+ ASSERT(p < length);
|
||
|
+ int result = input[p];
|
||
|
+ if (U16_IS_LEAD(result) && decodeSurrogatePairs && p + 1 < length && U16_IS_TRAIL(input[p + 1])) {
|
||
|
+ if (atEnd())
|
||
|
+ return -1;
|
||
|
+
|
||
|
+ result = U16_GET_SUPPLEMENTARY(result, input[p + 1]);
|
||
|
+ }
|
||
|
+ return result;
|
||
|
+ }
|
||
|
|
||
|
int readSurrogatePairChecked(unsigned negativePositionOffset)
|
||
|
{
|
||
|
@@ -482,13 +514,13 @@ public:
|
||
|
|
||
|
bool matchAssertionBOL(ByteTerm& term)
|
||
|
{
|
||
|
- return (input.atStart(term.inputPosition)) || (pattern->multiline() && testCharacterClass(pattern->newlineCharacterClass, input.readChecked(term.inputPosition + 1)));
|
||
|
+ return (input.atStart(term.inputPosition)) || (pattern->multiline() && testCharacterClass(pattern->newlineCharacterClass, input.readCheckedDontAdvance(term.inputPosition + 1)));
|
||
|
}
|
||
|
|
||
|
bool matchAssertionEOL(ByteTerm& term)
|
||
|
{
|
||
|
if (term.inputPosition)
|
||
|
- return (input.atEnd(term.inputPosition)) || (pattern->multiline() && testCharacterClass(pattern->newlineCharacterClass, input.readChecked(term.inputPosition)));
|
||
|
+ return (input.atEnd(term.inputPosition)) || (pattern->multiline() && testCharacterClass(pattern->newlineCharacterClass, input.readCheckedDontAdvance(term.inputPosition)));
|
||
|
|
||
|
return (input.atEnd()) || (pattern->multiline() && testCharacterClass(pattern->newlineCharacterClass, input.read()));
|
||
|
}
|
||
|
--
|
||
|
2.40.1
|
||
|
|