Blame SOURCES/CVE-2023-28204.patch

f1679e
From 8efa99e7b5d5a37aefb476cc27ee24c2be4da0c7 Mon Sep 17 00:00:00 2001
f1679e
From: Michael Saboff <msaboff@apple.com>
f1679e
Date: Mon, 22 May 2023 13:40:46 -0700
f1679e
Subject: [PATCH] Cherry-pick 264365@main (698c6e293734).
f1679e
 https://bugs.webkit.org/show_bug.cgi?id=254930
f1679e
f1679e
    [JSC] RegExpGlobalData::performMatch issue leading to OOB read
f1679e
    https://bugs.webkit.org/show_bug.cgi?id=254930
f1679e
    rdar://107436732
f1679e
f1679e
    Reviewed by Alexey Shvayka.
f1679e
f1679e
    Fixed two issues:
f1679e
    1) In YarrInterpreter.cpp::matchAssertionBOL() we were advancing the string position for non-BMP
f1679e
       characters.  Since it is an assertion, we shouldn't advance the character position.
f1679e
       Made the same fix to matchAssertionEOL().
f1679e
    2) In StringPrototype.cpp::replaceUsingRegExpSearch(), we need to advance past both elements of
f1679e
       a non-BMP character for the case where the RegExp match is empty.
f1679e
f1679e
    * JSTests/stress/string-replace-regexp-matchBOL-correct-advancing.js: New test.
f1679e
    * Source/JavaScriptCore/runtime/StringPrototype.cpp:
f1679e
    (JSC::replaceUsingRegExpSearch):
f1679e
    * Source/JavaScriptCore/yarr/YarrInterpreter.cpp:
f1679e
    (JSC::Yarr::Interpreter::InputStream::readCheckedDontAdvance):
f1679e
    (JSC::Yarr::Interpreter::matchAssertionBOL):
f1679e
    (JSC::Yarr::Interpreter::matchAssertionEOL):
f1679e
f1679e
    Originally-landed-as: 259548.551@safari-7615-branch (e34edaa74575). rdar://107436732
f1679e
    Canonical link: https://commits.webkit.org/264365@main
f1679e
---
f1679e
 ...place-regexp-matchBOL-correct-advancing.js | 35 ++++++++++++++++++
f1679e
 .../runtime/StringPrototype.cpp               | 10 ++++++
f1679e
 .../JavaScriptCore/yarr/YarrInterpreter.cpp   | 36 +++++++++++++++++--
f1679e
 3 files changed, 79 insertions(+), 2 deletions(-)
f1679e
 create mode 100644 JSTests/stress/string-replace-regexp-matchBOL-correct-advancing.js
f1679e
f1679e
diff --git a/JSTests/stress/string-replace-regexp-matchBOL-correct-advancing.js b/JSTests/stress/string-replace-regexp-matchBOL-correct-advancing.js
f1679e
new file mode 100644
f1679e
index 000000000000..25b1a70b81d2
f1679e
--- /dev/null
f1679e
+++ b/JSTests/stress/string-replace-regexp-matchBOL-correct-advancing.js
f1679e
@@ -0,0 +1,35 @@
f1679e
+// Check that we don't advance for BOL assertions when matching a non-BMP character in the YARR interpreter
f1679e
+// and that we do advance in String.replace() when processing an empty match.
f1679e
+
f1679e
+let expected = "|";
f1679e
+
f1679e
+for (let i = 0; i < 11; ++i)
f1679e
+    expected += String.fromCodePoint(128512) + '|';
f1679e
+
f1679e
+let str = String.fromCodePoint(128512).repeat(11);
f1679e
+
f1679e
+let result1 = str.replace(/(?!(?=^a|()+()+x)(abc))/gmu, r => {
f1679e
+    return '|';
f1679e
+});
f1679e
+
f1679e
+
f1679e
+if (result1 !== expected)
f1679e
+    print("FAILED: \"" + result1 + " !== " + expected + '"');
f1679e
+
f1679e
+let result2= str.replace(/(?!(?=^a|x)(abc))/gmu, r => {
f1679e
+    return '|';
f1679e
+});
f1679e
+
f1679e
+if (result2 !== expected)
f1679e
+    print("FAILED: \"" + result2 + " !== " + expected + '"');
f1679e
+
f1679e
+expected = "|" + String.fromCodePoint(128512);
f1679e
+
f1679e
+str = String.fromCodePoint(128512).repeat(1);
f1679e
+
f1679e
+let result3= str.replace(/(?!(?=^a|x)(abc))/mu, r => {
f1679e
+    return '|';
f1679e
+});
f1679e
+
f1679e
+if (result3 !== expected)
f1679e
+    print("FAILED: \"" + result3 + " !== " + expected + '"');
f1679e
diff --git a/Source/JavaScriptCore/runtime/StringPrototype.cpp b/Source/JavaScriptCore/runtime/StringPrototype.cpp
f1679e
index 08104b1dbfa9..459295f728a7 100644
f1679e
--- a/Source/JavaScriptCore/runtime/StringPrototype.cpp
f1679e
+++ b/Source/JavaScriptCore/runtime/StringPrototype.cpp
f1679e
@@ -603,6 +603,11 @@ static ALWAYS_INLINE JSString* replaceUsingRegExpSearch(
f1679e
                 startPosition++;
f1679e
                 if (startPosition > sourceLen)
f1679e
                     break;
f1679e
+                if (U16_IS_LEAD(source[startPosition - 1]) && U16_IS_TRAIL(source[startPosition])) {
f1679e
+                    startPosition++;
f1679e
+                    if (startPosition > sourceLen)
f1679e
+                        break;
f1679e
+                }
f1679e
             }
f1679e
         }
f1679e
     } else {
f1679e
@@ -682,6 +687,11 @@ static ALWAYS_INLINE JSString* replaceUsingRegExpSearch(
f1679e
                 startPosition++;
f1679e
                 if (startPosition > sourceLen)
f1679e
                     break;
f1679e
+                if (U16_IS_LEAD(source[startPosition - 1]) && U16_IS_TRAIL(source[startPosition])) {
f1679e
+                    startPosition++;
f1679e
+                    if (startPosition > sourceLen)
f1679e
+                        break;
f1679e
+                }
f1679e
             }
f1679e
         } while (global);
f1679e
     }
f1679e
diff --git a/Source/JavaScriptCore/yarr/YarrInterpreter.cpp b/Source/JavaScriptCore/yarr/YarrInterpreter.cpp
f1679e
index 95a848a1a66d..b1a22b253866 100644
f1679e
--- a/Source/JavaScriptCore/yarr/YarrInterpreter.cpp
f1679e
+++ b/Source/JavaScriptCore/yarr/YarrInterpreter.cpp
f1679e
@@ -209,6 +209,38 @@ public:
f1679e
             }
f1679e
             return result;
f1679e
         }
f1679e
+
f1679e
+        int readCheckedDontAdvance(unsigned negativePositionOffest)
f1679e
+        {
f1679e
+            RELEASE_ASSERT(pos >= negativePositionOffest);
f1679e
+            unsigned p = pos - negativePositionOffest;
f1679e
+            ASSERT(p < length);
f1679e
+            int result = input[p];
f1679e
+            if (U16_IS_LEAD(result) && decodeSurrogatePairs && p + 1 < length && U16_IS_TRAIL(input[p + 1])) {
f1679e
+                if (atEnd())
f1679e
+                    return -1;
f1679e
+
f1679e
+                result = U16_GET_SUPPLEMENTARY(result, input[p + 1]);
f1679e
+            }
f1679e
+            return result;
f1679e
+        }
f1679e
+
f1679e
+        // readForCharacterDump() is only for use by the DUMP_CURR_CHAR macro.
f1679e
+        // We don't want any side effects like the next() in readChecked() above.
f1679e
+        int readForCharacterDump(unsigned negativePositionOffest)
f1679e
+        {
f1679e
+            RELEASE_ASSERT(pos >= negativePositionOffest);
f1679e
+            unsigned p = pos - negativePositionOffest;
f1679e
+            ASSERT(p < length);
f1679e
+            int result = input[p];
f1679e
+            if (U16_IS_LEAD(result) && decodeSurrogatePairs && p + 1 < length && U16_IS_TRAIL(input[p + 1])) {
f1679e
+                if (atEnd())
f1679e
+                    return -1;
f1679e
+
f1679e
+                result = U16_GET_SUPPLEMENTARY(result, input[p + 1]);
f1679e
+            }
f1679e
+            return result;
f1679e
+        }
f1679e
         
f1679e
         int readSurrogatePairChecked(unsigned negativePositionOffset)
f1679e
         {
f1679e
@@ -482,13 +514,13 @@ public:
f1679e
 
f1679e
     bool matchAssertionBOL(ByteTerm& term)
f1679e
     {
f1679e
-        return (input.atStart(term.inputPosition)) || (pattern->multiline() && testCharacterClass(pattern->newlineCharacterClass, input.readChecked(term.inputPosition + 1)));
f1679e
+        return (input.atStart(term.inputPosition)) || (pattern->multiline() && testCharacterClass(pattern->newlineCharacterClass, input.readCheckedDontAdvance(term.inputPosition + 1)));
f1679e
     }
f1679e
 
f1679e
     bool matchAssertionEOL(ByteTerm& term)
f1679e
     {
f1679e
         if (term.inputPosition)
f1679e
-            return (input.atEnd(term.inputPosition)) || (pattern->multiline() && testCharacterClass(pattern->newlineCharacterClass, input.readChecked(term.inputPosition)));
f1679e
+            return (input.atEnd(term.inputPosition)) || (pattern->multiline() && testCharacterClass(pattern->newlineCharacterClass, input.readCheckedDontAdvance(term.inputPosition)));
f1679e
 
f1679e
         return (input.atEnd()) || (pattern->multiline() && testCharacterClass(pattern->newlineCharacterClass, input.read()));
f1679e
     }
f1679e
-- 
f1679e
2.40.1
f1679e