Blob Blame History Raw
From 8efa99e7b5d5a37aefb476cc27ee24c2be4da0c7 Mon Sep 17 00:00:00 2001
From: Michael Saboff <msaboff@apple.com>
Date: Mon, 22 May 2023 13:40:46 -0700
Subject: [PATCH] Cherry-pick 264365@main (698c6e293734).
 https://bugs.webkit.org/show_bug.cgi?id=254930

    [JSC] RegExpGlobalData::performMatch issue leading to OOB read
    https://bugs.webkit.org/show_bug.cgi?id=254930
    rdar://107436732

    Reviewed by Alexey Shvayka.

    Fixed two issues:
    1) In YarrInterpreter.cpp::matchAssertionBOL() we were advancing the string position for non-BMP
       characters.  Since it is an assertion, we shouldn't advance the character position.
       Made the same fix to matchAssertionEOL().
    2) In StringPrototype.cpp::replaceUsingRegExpSearch(), we need to advance past both elements of
       a non-BMP character for the case where the RegExp match is empty.

    * JSTests/stress/string-replace-regexp-matchBOL-correct-advancing.js: New test.
    * Source/JavaScriptCore/runtime/StringPrototype.cpp:
    (JSC::replaceUsingRegExpSearch):
    * Source/JavaScriptCore/yarr/YarrInterpreter.cpp:
    (JSC::Yarr::Interpreter::InputStream::readCheckedDontAdvance):
    (JSC::Yarr::Interpreter::matchAssertionBOL):
    (JSC::Yarr::Interpreter::matchAssertionEOL):

    Originally-landed-as: 259548.551@safari-7615-branch (e34edaa74575). rdar://107436732
    Canonical link: https://commits.webkit.org/264365@main
---
 ...place-regexp-matchBOL-correct-advancing.js | 35 ++++++++++++++++++
 .../runtime/StringPrototype.cpp               | 10 ++++++
 .../JavaScriptCore/yarr/YarrInterpreter.cpp   | 36 +++++++++++++++++--
 3 files changed, 79 insertions(+), 2 deletions(-)
 create mode 100644 JSTests/stress/string-replace-regexp-matchBOL-correct-advancing.js

diff --git a/JSTests/stress/string-replace-regexp-matchBOL-correct-advancing.js b/JSTests/stress/string-replace-regexp-matchBOL-correct-advancing.js
new file mode 100644
index 000000000000..25b1a70b81d2
--- /dev/null
+++ b/JSTests/stress/string-replace-regexp-matchBOL-correct-advancing.js
@@ -0,0 +1,35 @@
+// Check that we don't advance for BOL assertions when matching a non-BMP character in the YARR interpreter
+// and that we do advance in String.replace() when processing an empty match.
+
+let expected = "|";
+
+for (let i = 0; i < 11; ++i)
+    expected += String.fromCodePoint(128512) + '|';
+
+let str = String.fromCodePoint(128512).repeat(11);
+
+let result1 = str.replace(/(?!(?=^a|()+()+x)(abc))/gmu, r => {
+    return '|';
+});
+
+
+if (result1 !== expected)
+    print("FAILED: \"" + result1 + " !== " + expected + '"');
+
+let result2= str.replace(/(?!(?=^a|x)(abc))/gmu, r => {
+    return '|';
+});
+
+if (result2 !== expected)
+    print("FAILED: \"" + result2 + " !== " + expected + '"');
+
+expected = "|" + String.fromCodePoint(128512);
+
+str = String.fromCodePoint(128512).repeat(1);
+
+let result3= str.replace(/(?!(?=^a|x)(abc))/mu, r => {
+    return '|';
+});
+
+if (result3 !== expected)
+    print("FAILED: \"" + result3 + " !== " + expected + '"');
diff --git a/Source/JavaScriptCore/runtime/StringPrototype.cpp b/Source/JavaScriptCore/runtime/StringPrototype.cpp
index 08104b1dbfa9..459295f728a7 100644
--- a/Source/JavaScriptCore/runtime/StringPrototype.cpp
+++ b/Source/JavaScriptCore/runtime/StringPrototype.cpp
@@ -603,6 +603,11 @@ static ALWAYS_INLINE JSString* replaceUsingRegExpSearch(
                 startPosition++;
                 if (startPosition > sourceLen)
                     break;
+                if (U16_IS_LEAD(source[startPosition - 1]) && U16_IS_TRAIL(source[startPosition])) {
+                    startPosition++;
+                    if (startPosition > sourceLen)
+                        break;
+                }
             }
         }
     } else {
@@ -682,6 +687,11 @@ static ALWAYS_INLINE JSString* replaceUsingRegExpSearch(
                 startPosition++;
                 if (startPosition > sourceLen)
                     break;
+                if (U16_IS_LEAD(source[startPosition - 1]) && U16_IS_TRAIL(source[startPosition])) {
+                    startPosition++;
+                    if (startPosition > sourceLen)
+                        break;
+                }
             }
         } while (global);
     }
diff --git a/Source/JavaScriptCore/yarr/YarrInterpreter.cpp b/Source/JavaScriptCore/yarr/YarrInterpreter.cpp
index 95a848a1a66d..b1a22b253866 100644
--- a/Source/JavaScriptCore/yarr/YarrInterpreter.cpp
+++ b/Source/JavaScriptCore/yarr/YarrInterpreter.cpp
@@ -209,6 +209,38 @@ public:
             }
             return result;
         }
+
+        int readCheckedDontAdvance(unsigned negativePositionOffest)
+        {
+            RELEASE_ASSERT(pos >= negativePositionOffest);
+            unsigned p = pos - negativePositionOffest;
+            ASSERT(p < length);
+            int result = input[p];
+            if (U16_IS_LEAD(result) && decodeSurrogatePairs && p + 1 < length && U16_IS_TRAIL(input[p + 1])) {
+                if (atEnd())
+                    return -1;
+
+                result = U16_GET_SUPPLEMENTARY(result, input[p + 1]);
+            }
+            return result;
+        }
+
+        // readForCharacterDump() is only for use by the DUMP_CURR_CHAR macro.
+        // We don't want any side effects like the next() in readChecked() above.
+        int readForCharacterDump(unsigned negativePositionOffest)
+        {
+            RELEASE_ASSERT(pos >= negativePositionOffest);
+            unsigned p = pos - negativePositionOffest;
+            ASSERT(p < length);
+            int result = input[p];
+            if (U16_IS_LEAD(result) && decodeSurrogatePairs && p + 1 < length && U16_IS_TRAIL(input[p + 1])) {
+                if (atEnd())
+                    return -1;
+
+                result = U16_GET_SUPPLEMENTARY(result, input[p + 1]);
+            }
+            return result;
+        }
         
         int readSurrogatePairChecked(unsigned negativePositionOffset)
         {
@@ -482,13 +514,13 @@ public:
 
     bool matchAssertionBOL(ByteTerm& term)
     {
-        return (input.atStart(term.inputPosition)) || (pattern->multiline() && testCharacterClass(pattern->newlineCharacterClass, input.readChecked(term.inputPosition + 1)));
+        return (input.atStart(term.inputPosition)) || (pattern->multiline() && testCharacterClass(pattern->newlineCharacterClass, input.readCheckedDontAdvance(term.inputPosition + 1)));
     }
 
     bool matchAssertionEOL(ByteTerm& term)
     {
         if (term.inputPosition)
-            return (input.atEnd(term.inputPosition)) || (pattern->multiline() && testCharacterClass(pattern->newlineCharacterClass, input.readChecked(term.inputPosition)));
+            return (input.atEnd(term.inputPosition)) || (pattern->multiline() && testCharacterClass(pattern->newlineCharacterClass, input.readCheckedDontAdvance(term.inputPosition)));
 
         return (input.atEnd()) || (pattern->multiline() && testCharacterClass(pattern->newlineCharacterClass, input.read()));
     }
-- 
2.40.1