From 8efa99e7b5d5a37aefb476cc27ee24c2be4da0c7 Mon Sep 17 00:00:00 2001
From: Michael Saboff <msaboff@apple.com>
Date: Mon, 22 May 2023 13:40:46 -0700
Subject: [PATCH] Cherry-pick 264365@main (698c6e293734).
https://bugs.webkit.org/show_bug.cgi?id=254930
[JSC] RegExpGlobalData::performMatch issue leading to OOB read
https://bugs.webkit.org/show_bug.cgi?id=254930
rdar://107436732
Reviewed by Alexey Shvayka.
Fixed two issues:
1) In YarrInterpreter.cpp::matchAssertionBOL() we were advancing the string position for non-BMP
characters. Since it is an assertion, we shouldn't advance the character position.
Made the same fix to matchAssertionEOL().
2) In StringPrototype.cpp::replaceUsingRegExpSearch(), we need to advance past both elements of
a non-BMP character for the case where the RegExp match is empty.
* JSTests/stress/string-replace-regexp-matchBOL-correct-advancing.js: New test.
* Source/JavaScriptCore/runtime/StringPrototype.cpp:
(JSC::replaceUsingRegExpSearch):
* Source/JavaScriptCore/yarr/YarrInterpreter.cpp:
(JSC::Yarr::Interpreter::InputStream::readCheckedDontAdvance):
(JSC::Yarr::Interpreter::matchAssertionBOL):
(JSC::Yarr::Interpreter::matchAssertionEOL):
Originally-landed-as: 259548.551@safari-7615-branch (e34edaa74575). rdar://107436732
Canonical link: https://commits.webkit.org/264365@main
---
...place-regexp-matchBOL-correct-advancing.js | 35 ++++++++++++++++++
.../runtime/StringPrototype.cpp | 10 ++++++
.../JavaScriptCore/yarr/YarrInterpreter.cpp | 36 +++++++++++++++++--
3 files changed, 79 insertions(+), 2 deletions(-)
create mode 100644 JSTests/stress/string-replace-regexp-matchBOL-correct-advancing.js
diff --git a/JSTests/stress/string-replace-regexp-matchBOL-correct-advancing.js b/JSTests/stress/string-replace-regexp-matchBOL-correct-advancing.js
new file mode 100644
index 000000000000..25b1a70b81d2
--- /dev/null
+++ b/JSTests/stress/string-replace-regexp-matchBOL-correct-advancing.js
@@ -0,0 +1,35 @@
+// Check that we don't advance for BOL assertions when matching a non-BMP character in the YARR interpreter
+// and that we do advance in String.replace() when processing an empty match.
+
+let expected = "|";
+
+for (let i = 0; i < 11; ++i)
+ expected += String.fromCodePoint(128512) + '|';
+
+let str = String.fromCodePoint(128512).repeat(11);
+
+let result1 = str.replace(/(?!(?=^a|()+()+x)(abc))/gmu, r => {
+ return '|';
+});
+
+
+if (result1 !== expected)
+ print("FAILED: \"" + result1 + " !== " + expected + '"');
+
+let result2= str.replace(/(?!(?=^a|x)(abc))/gmu, r => {
+ return '|';
+});
+
+if (result2 !== expected)
+ print("FAILED: \"" + result2 + " !== " + expected + '"');
+
+expected = "|" + String.fromCodePoint(128512);
+
+str = String.fromCodePoint(128512).repeat(1);
+
+let result3= str.replace(/(?!(?=^a|x)(abc))/mu, r => {
+ return '|';
+});
+
+if (result3 !== expected)
+ print("FAILED: \"" + result3 + " !== " + expected + '"');
diff --git a/Source/JavaScriptCore/runtime/StringPrototype.cpp b/Source/JavaScriptCore/runtime/StringPrototype.cpp
index 08104b1dbfa9..459295f728a7 100644
--- a/Source/JavaScriptCore/runtime/StringPrototype.cpp
+++ b/Source/JavaScriptCore/runtime/StringPrototype.cpp
@@ -603,6 +603,11 @@ static ALWAYS_INLINE JSString* replaceUsingRegExpSearch(
startPosition++;
if (startPosition > sourceLen)
break;
+ if (U16_IS_LEAD(source[startPosition - 1]) && U16_IS_TRAIL(source[startPosition])) {
+ startPosition++;
+ if (startPosition > sourceLen)
+ break;
+ }
}
}
} else {
@@ -682,6 +687,11 @@ static ALWAYS_INLINE JSString* replaceUsingRegExpSearch(
startPosition++;
if (startPosition > sourceLen)
break;
+ if (U16_IS_LEAD(source[startPosition - 1]) && U16_IS_TRAIL(source[startPosition])) {
+ startPosition++;
+ if (startPosition > sourceLen)
+ break;
+ }
}
} while (global);
}
diff --git a/Source/JavaScriptCore/yarr/YarrInterpreter.cpp b/Source/JavaScriptCore/yarr/YarrInterpreter.cpp
index 95a848a1a66d..b1a22b253866 100644
--- a/Source/JavaScriptCore/yarr/YarrInterpreter.cpp
+++ b/Source/JavaScriptCore/yarr/YarrInterpreter.cpp
@@ -209,6 +209,38 @@ public:
}
return result;
}
+
+ int readCheckedDontAdvance(unsigned negativePositionOffest)
+ {
+ RELEASE_ASSERT(pos >= negativePositionOffest);
+ unsigned p = pos - negativePositionOffest;
+ ASSERT(p < length);
+ int result = input[p];
+ if (U16_IS_LEAD(result) && decodeSurrogatePairs && p + 1 < length && U16_IS_TRAIL(input[p + 1])) {
+ if (atEnd())
+ return -1;
+
+ result = U16_GET_SUPPLEMENTARY(result, input[p + 1]);
+ }
+ return result;
+ }
+
+ // readForCharacterDump() is only for use by the DUMP_CURR_CHAR macro.
+ // We don't want any side effects like the next() in readChecked() above.
+ int readForCharacterDump(unsigned negativePositionOffest)
+ {
+ RELEASE_ASSERT(pos >= negativePositionOffest);
+ unsigned p = pos - negativePositionOffest;
+ ASSERT(p < length);
+ int result = input[p];
+ if (U16_IS_LEAD(result) && decodeSurrogatePairs && p + 1 < length && U16_IS_TRAIL(input[p + 1])) {
+ if (atEnd())
+ return -1;
+
+ result = U16_GET_SUPPLEMENTARY(result, input[p + 1]);
+ }
+ return result;
+ }
int readSurrogatePairChecked(unsigned negativePositionOffset)
{
@@ -482,13 +514,13 @@ public:
bool matchAssertionBOL(ByteTerm& term)
{
- return (input.atStart(term.inputPosition)) || (pattern->multiline() && testCharacterClass(pattern->newlineCharacterClass, input.readChecked(term.inputPosition + 1)));
+ return (input.atStart(term.inputPosition)) || (pattern->multiline() && testCharacterClass(pattern->newlineCharacterClass, input.readCheckedDontAdvance(term.inputPosition + 1)));
}
bool matchAssertionEOL(ByteTerm& term)
{
if (term.inputPosition)
- return (input.atEnd(term.inputPosition)) || (pattern->multiline() && testCharacterClass(pattern->newlineCharacterClass, input.readChecked(term.inputPosition)));
+ return (input.atEnd(term.inputPosition)) || (pattern->multiline() && testCharacterClass(pattern->newlineCharacterClass, input.readCheckedDontAdvance(term.inputPosition)));
return (input.atEnd()) || (pattern->multiline() && testCharacterClass(pattern->newlineCharacterClass, input.read()));
}
--
2.40.1