Tree - rpms/pcre - CentOS Git server

rpms / pcre

Blame SOURCES/pcre-8.42-Fix-two-C-wrapper-bugs-unnoticed-for-years.patch

Blob History Raw

		22122f	`From 2ede5a4b4a98add3bbf982f5805e015e8c61c565 Mon Sep 17 00:00:00 2001`
		22122f	`From: ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>`
		22122f	`Date: Tue, 26 Jun 2018 16:51:43 +0000`
		22122f	`Subject: [PATCH] Fix two C++ wrapper bugs, unnoticed for years.`
		22122f	`MIME-Version: 1.0`
		22122f	`Content-Type: text/plain; charset=UTF-8`
		22122f	`Content-Transfer-Encoding: 8bit`
		22122f
		22122f	`git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1735 2f5784b3-3f2a-0410-8824-cb99058d5e15`
		22122f
		22122f	`Petr Písař: Ported to 8.42.`
		22122f
		22122f	`diff --git a/pcrecpp.cc b/pcrecpp.cc`
		22122f	`index d09c9ab..77a2fed 100644`
		22122f	`--- a/pcrecpp.cc`
		22122f	`+++ b/pcrecpp.cc`
		22122f	`@@ -80,6 +80,24 @@ static const string empty_string;`
		22122f	`// If the user doesn't ask for any options, we just use this one`
		22122f	`static RE_Options default_options;`
		22122f
		22122f	`+// Specials for the start of patterns. See comments where start_options is used`
		22122f	`+// below. (PH June 2018)`
		22122f	`+static const char *start_options[] = {`
		22122f	`+ "(*UTF8)",`
		22122f	`+ "(*UTF)",`
		22122f	`+ "(*UCP)",`
		22122f	`+ "(*NO_START_OPT)",`
		22122f	`+ "(*NO_AUTO_POSSESS)",`
		22122f	`+ "(*LIMIT_RECURSION=",`
		22122f	`+ "(*LIMIT_MATCH=",`
		22122f	`+ "(*CRLF)",`
		22122f	`+ "(*CR)",`
		22122f	`+ "(*BSR_UNICODE)",`
		22122f	`+ "(*BSR_ANYCRLF)",`
		22122f	`+ "(*ANYCRLF)",`
		22122f	`+ "(*ANY)",`
		22122f	`+ "" };`
		22122f	`+`
		22122f	`void RE::Init(const string& pat, const RE_Options* options) {`
		22122f	`pattern_ = pat;`
		22122f	`if (options == NULL) {`
		22122f	`@@ -135,7 +153,49 @@ pcre* RE::Compile(Anchor anchor) {`
		22122f	`} else {`
		22122f	`// Tack a '\z' at the end of RE. Parenthesize it first so that`
		22122f	`// the '\z' applies to all top-level alternatives in the regexp.`
		22122f	`- string wrapped = "(?:"; // A non-counting grouping operator`
		22122f	`+`
		22122f	`+ /* When this code was written (for PCRE 6.0) it was enough just to`
		22122f	`+ parenthesize the entire pattern. Unfortunately, when the feature of`
		22122f	`+ starting patterns with (UTF8) or (CR) etc. was added to PCRE patterns,`
		22122f	`+ this code was never updated. This bug was not noticed till 2018, long after`
		22122f	`+ PCRE became obsolescent and its maintainer no longer around. Since PCRE is`
		22122f	`+ frozen, I have added a hack to check for all the existing "start of`
		22122f	`+ pattern" specials - knowing that no new ones will ever be added. I am not a`
		22122f	`+ C++ programmer, so the code style is no doubt crude. It is also`
		22122f	`+ inefficient, but is only run when the pattern starts with "(*".`
		22122f	`+ PH June 2018. */`
		22122f	`+`
		22122f	`+ string wrapped = "";`
		22122f	`+`
		22122f	`+ if (pattern_.c_str()[0] == '(' && pattern_.c_str()[1] == '*') {`
		22122f	`+ int kk, klen, kmat;`
		22122f	`+ for (;;) { // Loop for any number of leading items`
		22122f	`+`
		22122f	`+ for (kk = 0; start_options[kk][0] != 0; kk++) {`
		22122f	`+ klen = strlen(start_options[kk]);`
		22122f	`+ kmat = strncmp(pattern_.c_str(), start_options[kk], klen);`
		22122f	`+ if (kmat >= 0) break;`
		22122f	`+ }`
		22122f	`+ if (kmat != 0) break; // Not found`
		22122f	`+`
		22122f	`+ // If the item ended in "=" we must copy digits up to ")".`
		22122f	`+`
		22122f	`+ if (start_options[kk][klen-1] == '=') {`
		22122f	`+ while (isdigit(pattern_.c_str()[klen])) klen++;`
		22122f	`+ if (pattern_.c_str()[klen] != ')') break; // Syntax error`
		22122f	`+ klen++;`
		22122f	`+ }`
		22122f	`+`
		22122f	`+ // Move the item from the pattern to the start of the wrapped string.`
		22122f	`+`
		22122f	`+ wrapped += pattern_.substr(0, klen);`
		22122f	`+ pattern_.erase(0, klen);`
		22122f	`+ }`
		22122f	`+ }`
		22122f	`+`
		22122f	`+ // Wrap the rest of the pattern.`
		22122f	`+`
		22122f	`+ wrapped += "(?:"; // A non-counting grouping operator`
		22122f	`wrapped += pattern_;`
		22122f	`wrapped += ")\\z";`
		22122f	`re = pcre_compile(wrapped.c_str(), pcre_options,`
		22122f	`@@ -415,7 +475,7 @@ int RE::GlobalReplace(const StringPiece& rewrite,`
		22122f	`matchend++;`
		22122f	`}`
		22122f	`// We also need to advance more than one char if we're in utf8 mode.`
		22122f	`-#ifdef SUPPORT_UTF8`
		22122f	`+#ifdef SUPPORT_UTF`
		22122f	`if (options_.utf8()) {`
		22122f	`while (matchend < static_cast<int>(str->length()) &&`
		22122f	`((*str)[matchend] & 0xc0) == 0x80)`
		22122f	`diff --git a/pcrecpp_unittest.cc b/pcrecpp_unittest.cc`
		22122f	`index 4b15fbe..255066f 100644`
		22122f	`--- a/pcrecpp_unittest.cc`
		22122f	`+++ b/pcrecpp_unittest.cc`
		22122f	`@@ -309,7 +309,7 @@ static void TestReplace() {`
		22122f	`"@aa",`
		22122f	`"@@@",`
		22122f	`3 },`
		22122f	`-#ifdef SUPPORT_UTF8`
		22122f	`+#ifdef SUPPORT_UTF`
		22122f	`{ "b*",`
		22122f	`"bb",`
		22122f	`"\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8", // utf8`
		22122f	`@@ -327,7 +327,7 @@ static void TestReplace() {`
		22122f	`{ "", NULL, NULL, NULL, NULL, 0 }`
		22122f	`};`
		22122f
		22122f	`-#ifdef SUPPORT_UTF8`
		22122f	`+#ifdef SUPPORT_UTF`
		22122f	`const bool support_utf8 = true;`
		22122f	`#else`
		22122f	`const bool support_utf8 = false;`
		22122f	`@@ -535,7 +535,7 @@ static void TestQuoteMetaLatin1() {`
		22122f	`}`
		22122f
		22122f	`static void TestQuoteMetaUtf8() {`
		22122f	`-#ifdef SUPPORT_UTF8`
		22122f	`+#ifdef SUPPORT_UTF`
		22122f	`TestQuoteMeta("Pl\xc3\xa1\x63ido Domingo", pcrecpp::UTF8());`
		22122f	`TestQuoteMeta("xyz", pcrecpp::UTF8()); // No fancy utf8`
		22122f	`TestQuoteMeta("\xc2\xb0", pcrecpp::UTF8()); // 2-byte utf8 (degree symbol)`
		22122f	`@@ -1178,7 +1178,7 @@ int main(int argc, char** argv) {`
		22122f	`CHECK(re.error().empty()); // Must have no error`
		22122f	`}`
		22122f
		22122f	`-#ifdef SUPPORT_UTF8`
		22122f	`+#ifdef SUPPORT_UTF`
		22122f	`// Check UTF-8 handling`
		22122f	`{`
		22122f	`printf("Testing UTF-8 handling\n");`
		22122f	`@@ -1202,6 +1202,24 @@ int main(int argc, char** argv) {`
		22122f	`CHECK(re_test1.FullMatch(utf8_string));`
		22122f	`RE re_test2("...", pcrecpp::UTF8());`
		22122f	`CHECK(re_test2.FullMatch(utf8_string));`
		22122f	`+`
		22122f	`+ // PH added these tests for leading option settings`
		22122f	`+`
		22122f	`+ RE re_testZ1("(*UTF8)...");`
		22122f	`+ CHECK(re_testZ1.FullMatch(utf8_string));`
		22122f	`+`
		22122f	`+ RE re_testZ2("(*UTF)...");`
		22122f	`+ CHECK(re_testZ2.FullMatch(utf8_string));`
		22122f	`+`
		22122f	`+ RE re_testZ3("(UCP)(UTF)...");`
		22122f	`+ CHECK(re_testZ3.FullMatch(utf8_string));`
		22122f	`+`
		22122f	`+ RE re_testZ4("(UCP)(LIMIT_MATCH=1000)(*UTF)...");`
		22122f	`+ CHECK(re_testZ4.FullMatch(utf8_string));`
		22122f	`+`
		22122f	`+ RE re_testZ5("(UCP)(LIMIT_MATCH=1000)(ANY)(UTF)...");`
		22122f	`+ CHECK(re_testZ5.FullMatch(utf8_string));`
		22122f	`+`
		22122f
		22122f	`// Check that '.' matches one byte or UTF-8 character`
		22122f	`// according to the mode.`
		22122f	`@@ -1248,7 +1266,7 @@ int main(int argc, char** argv) {`
		22122f	`CHECK(!match_sentence.FullMatch(target));`
		22122f	`CHECK(!match_sentence_re.FullMatch(target));`
		22122f	`}`
		22122f	`-#endif /* def SUPPORT_UTF8 */`
		22122f	`+#endif /* def SUPPORT_UTF */`
		22122f
		22122f	`printf("Testing error reporting\n");`
		22122f
		22122f	`--`
		22122f	`2.14.4`
		22122f

rpms / pcre

Source Code

Blame SOURCES/pcre-8.42-Fix-two-C-wrapper-bugs-unnoticed-for-years.patch