From cb67f21529ca7be71d41a9bc67d55e5be83bc13f Mon Sep 17 00:00:00 2001 From: CentOS Sources Date: Nov 02 2019 22:19:36 +0000 Subject: import pcre-8.32-17.el7 --- diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..950bf3d --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +SOURCES/pcre-8.32.tar.bz2 diff --git a/.pcre.metadata b/.pcre.metadata new file mode 100644 index 0000000..d4cc31d --- /dev/null +++ b/.pcre.metadata @@ -0,0 +1 @@ +dbd44267cf4d7c6464391003908d5a4342726700 SOURCES/pcre-8.32.tar.bz2 diff --git a/SOURCES/pcre-8.21-multilib.patch b/SOURCES/pcre-8.21-multilib.patch new file mode 100644 index 0000000..345e624 --- /dev/null +++ b/SOURCES/pcre-8.21-multilib.patch @@ -0,0 +1,37 @@ +From 1d5ce3e2e6341b01609aefab20786a8638fee17a Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Petr=20P=C3=ADsa=C5=99?= +Date: Tue, 6 Dec 2011 18:44:11 +0100 +Subject: [PATCH] Fix multilib + +Do not set RPATH nor add explicit -L path to compiler. +--- + pcre-config.in | 12 ------------ + 1 files changed, 0 insertions(+), 12 deletions(-) + +diff --git a/pcre-config.in b/pcre-config.in +index ccbf210..aeee182 100644 +--- a/pcre-config.in ++++ b/pcre-config.in +@@ -16,19 +16,7 @@ if test $# -eq 0; then + fi + + libR= +-case `uname -s` in +- *SunOS*) +- libR=" -R@libdir@" +- ;; +- *BSD*) +- libR=" -Wl,-R@libdir@" +- ;; +-esac +- + libS= +-if test @libdir@ != /usr/lib ; then +- libS=-L@libdir@ +-fi + + while test $# -gt 0; do + case "$1" in +-- +1.7.7.4 + diff --git a/SOURCES/pcre-8.32-A-new-flag-is-set-when-property-checks-are-present-i.patch b/SOURCES/pcre-8.32-A-new-flag-is-set-when-property-checks-are-present-i.patch new file mode 100644 index 0000000..0760b64 --- /dev/null +++ b/SOURCES/pcre-8.32-A-new-flag-is-set-when-property-checks-are-present-i.patch @@ -0,0 +1,849 @@ +From d779878352fdce2ca955a5a3135d2c8f2b27ba13 Mon Sep 17 00:00:00 2001 +From: zherczeg +Date: Sun, 22 Dec 2013 16:27:35 +0000 +Subject: [PATCH] A new flag is set, when property checks are present in an + XCLASS. +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Ported to 8.32: + +commit f928c7adccd8daa61e76c22130d79689ec41f21c +Author: zherczeg +Date: Sun Dec 22 16:27:35 2013 +0000 + + A new flag is set, when property checks are present in an XCLASS. + + git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1414 2f5784b3-3f2a-0410-8824-cb99058d5e15 + +Signed-off-by: Petr Písař +--- + pcre_compile.c | 52 +++++++++++++++-------- + pcre_exec.c | 14 +------ + pcre_internal.h | 5 ++- + pcre_jit_compile.c | 114 +++++++++++++++++++++++++++++++++++++------------- + pcre_printint.c | 15 ++++++- + pcre_study.c | 62 ++++++++++++++++++--------- + pcre_xclass.c | 5 +++ + testdata/saved16BE-1 | Bin 402 -> 402 bytes + testdata/saved16LE-1 | Bin 402 -> 402 bytes + testdata/saved32BE-1 | Bin 544 -> 552 bytes + testdata/saved32LE-1 | Bin 544 -> 552 bytes + testdata/testoutput17 | 4 +- + testdata/testoutput23 | 34 +++++++++++++-- + testdata/testoutput25 | 34 +++++++++++++-- + testdata/testoutput5 | 10 ++--- + testdata/testoutput7 | 4 +- + 16 files changed, 256 insertions(+), 97 deletions(-) + +diff --git a/pcre_compile.c b/pcre_compile.c +index 3c75218..962b4d3 100644 +--- a/pcre_compile.c ++++ b/pcre_compile.c +@@ -3512,6 +3512,7 @@ add_to_class(pcre_uint8 *classbits, pcre_uchar **uchardptr, int options, + compile_data *cd, pcre_uint32 start, pcre_uint32 end) + { + pcre_uint32 c; ++pcre_uint32 classbits_end = (end <= 0xff ? end : 0xff); + int n8 = 0; + + /* If caseless matching is required, scan the range and process alternate +@@ -3555,7 +3556,7 @@ if ((options & PCRE_CASELESS) != 0) + + /* Not UTF-mode, or no UCP */ + +- for (c = start; c <= end && c < 256; c++) ++ for (c = start; c <= classbits_end; c++) + { + SETBIT(classbits, cd->fcc[c]); + n8++; +@@ -3580,20 +3581,19 @@ in all cases. */ + + #endif /* COMPILE_PCRE[8|16] */ + +-/* If all characters are less than 256, use the bit map. Otherwise use extra +-data. */ ++/* Use the bitmap for characters < 256. Otherwise use extra data.*/ + +-if (end < 0x100) ++for (c = start; c <= classbits_end; c++) + { +- for (c = start; c <= end; c++) +- { +- n8++; +- SETBIT(classbits, c); +- } ++ /* Regardless of start, c will always be <= 255. */ ++ SETBIT(classbits, c); ++ n8++; + } + +-else +- { ++#if defined SUPPORT_UTF || !defined COMPILE_PCRE8 ++if (start <= 0xff) start = 0xff + 1; ++ ++if (end >= start) { + pcre_uchar *uchardata = *uchardptr; + + #ifdef SUPPORT_UTF +@@ -3635,6 +3635,7 @@ else + + *uchardptr = uchardata; /* Updata extra data pointer */ + } ++#endif /* SUPPORT_UTF || !COMPILE_PCRE8 */ + + return n8; /* Number of 8-bit characters */ + } +@@ -3856,6 +3857,9 @@ for (;; ptr++) + BOOL reset_bracount; + int class_has_8bitchar; + int class_one_char; ++#if defined SUPPORT_UTF || !defined COMPILE_PCRE8 ++ BOOL xclass_has_prop; ++#endif + int newoptions; + int recno; + int refsign; +@@ -4161,13 +4165,26 @@ for (;; ptr++) + + should_flip_negation = FALSE; + ++ /* Extended class (xclass) will be used when characters > 255 ++ might match. */ ++ ++#if defined SUPPORT_UTF || !defined COMPILE_PCRE8 ++ xclass = FALSE; ++ class_uchardata = code + LINK_SIZE + 2; /* For XCLASS items */ ++ class_uchardata_base = class_uchardata; /* Save the start */ ++#endif ++ + /* For optimization purposes, we track some properties of the class: + class_has_8bitchar will be non-zero if the class contains at least one < + 256 character; class_one_char will be 1 if the class contains just one +- character. */ ++ character; xclass_has_prop will be TRUE if unicode property checks ++ are present in the class. */ + + class_has_8bitchar = 0; + class_one_char = 0; ++#if defined SUPPORT_UTF || !defined COMPILE_PCRE8 ++ xclass_has_prop = FALSE; ++#endif + + /* Initialize the 32-char bit map to all zeros. We build the map in a + temporary bit of memory, in case the class contains fewer than two +@@ -4176,12 +4193,6 @@ for (;; ptr++) + + memset(classbits, 0, 32 * sizeof(pcre_uint8)); + +-#if defined SUPPORT_UTF || !defined COMPILE_PCRE8 +- xclass = FALSE; +- class_uchardata = code + LINK_SIZE + 2; /* For XCLASS items */ +- class_uchardata_base = class_uchardata; /* Save the start */ +-#endif +- + /* Process characters until ] is reached. By writing this as a "do" it + means that an initial ] is taken as a data character. At the start of the + loop, c contains the first byte of the character. */ +@@ -4305,6 +4316,7 @@ for (;; ptr++) + *class_uchardata++ = local_negate? XCL_NOTPROP : XCL_PROP; + *class_uchardata++ = ptype; + *class_uchardata++ = 0; ++ xclass_has_prop = TRUE; + ptr = tempptr + 1; + continue; + +@@ -4490,6 +4502,7 @@ for (;; ptr++) + XCL_PROP : XCL_NOTPROP; + *class_uchardata++ = ptype; + *class_uchardata++ = pdata; ++ xclass_has_prop = TRUE; + class_has_8bitchar--; /* Undo! */ + continue; + } +@@ -4767,6 +4780,7 @@ for (;; ptr++) + *code++ = OP_XCLASS; + code += LINK_SIZE; + *code = negate_class? XCL_NOT:0; ++ if (xclass_has_prop) *code |= XCL_HASPROP; + + /* If the map is required, move up the extra data to make room for it; + otherwise just move the code pointer to the end of the extra data. */ +@@ -4776,6 +4790,8 @@ for (;; ptr++) + *code++ |= XCL_MAP; + memmove(code + (32 / sizeof(pcre_uchar)), code, + IN_UCHARS(class_uchardata - code)); ++ if (negate_class && !xclass_has_prop) ++ for (c = 0; c < 32; c++) classbits[c] = ~classbits[c]; + memcpy(code, classbits, 32); + code = class_uchardata + (32 / sizeof(pcre_uchar)); + } +diff --git a/pcre_exec.c b/pcre_exec.c +index 74a2b49..48d9199 100644 +--- a/pcre_exec.c ++++ b/pcre_exec.c +@@ -6732,18 +6732,8 @@ for(;;) + #ifndef COMPILE_PCRE8 + if (c > 255) c = 255; + #endif +- if ((start_bits[c/8] & (1 << (c&7))) == 0) +- { +- start_match++; +-#if defined SUPPORT_UTF && defined COMPILE_PCRE8 +- /* In non 8-bit mode, the iteration will stop for +- characters > 255 at the beginning or not stop at all. */ +- if (utf) +- ACROSSCHAR(start_match < end_subject, *start_match, +- start_match++); +-#endif +- } +- else break; ++ if ((start_bits[c/8] & (1 << (c&7))) != 0) break; ++ start_match++; + } + } + } /* Starting optimizations */ +diff --git a/pcre_internal.h b/pcre_internal.h +index 389848f..10bd911 100644 +--- a/pcre_internal.h ++++ b/pcre_internal.h +@@ -1849,8 +1849,9 @@ table. */ + /* Flag bits and data types for the extended class (OP_XCLASS) for classes that + contain characters with values greater than 255. */ + +-#define XCL_NOT 0x01 /* Flag: this is a negative class */ +-#define XCL_MAP 0x02 /* Flag: a 32-byte map is present */ ++#define XCL_NOT 0x01 /* Flag: this is a negative class */ ++#define XCL_MAP 0x02 /* Flag: a 32-byte map is present */ ++#define XCL_HASPROP 0x04 /* Flag: property checks are present. */ + + #define XCL_END 0 /* Marks end of individual items */ + #define XCL_SINGLE 1 /* Single item (one multibyte char) follows */ +diff --git a/pcre_jit_compile.c b/pcre_jit_compile.c +index 5f74833..e425b91 100644 +--- a/pcre_jit_compile.c ++++ b/pcre_jit_compile.c +@@ -2877,7 +2877,7 @@ if (firstline) + OP1(SLJIT_MOV, STR_END, 0, TMP3, 0); + } + +-static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common, sljit_uw start_bits, BOOL firstline) ++static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common, pcre_uint8 *start_bits, BOOL firstline) + { + DEFINE_COMPILER; + struct sljit_label *start; +@@ -2908,7 +2908,7 @@ JUMPHERE(jump); + #endif + OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7); + OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3); +-OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), start_bits); ++OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)start_bits); + OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0); + OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0); + found = JUMP(SLJIT_C_NOT_ZERO); +@@ -3194,8 +3194,40 @@ switch(ranges[0]) + case 2: + if (readch) + read_char(common); +- OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]); +- add_jump(compiler, backtracks, CMP(ranges[1] != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2])); ++ if (ranges[2] + 1 != ranges[3]) ++ { ++ OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]); ++ add_jump(compiler, backtracks, CMP(ranges[1] != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2])); ++ } ++ else ++ add_jump(compiler, backtracks, CMP(ranges[1] != 0 ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2])); ++ return TRUE; ++ ++ case 3: ++ if (readch) ++ read_char(common); ++ if (ranges[1] != 0) ++ { ++ add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[4])); ++ if (ranges[2] + 1 != ranges[3]) ++ { ++ OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]); ++ add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2])); ++ } ++ else ++ add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2])); ++ } ++ else ++ { ++ add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[2])); ++ if (ranges[3] + 1 != ranges[4]) ++ { ++ OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[3]); ++ add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[4] - ranges[3])); ++ } ++ else ++ add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3])); ++ } + return TRUE; + + case 4: +@@ -3264,15 +3296,15 @@ if (bit != 0) + ranges[0] = length; + } + +-static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, jump_list **backtracks) ++static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks) + { + int ranges[2 + MAX_RANGE_SIZE]; + pcre_uint8 bit, cbit, all; + int i, byte, length = 0; + + bit = bits[0] & 0x1; +-ranges[1] = bit; +-/* Can be 0 or 255. */ ++ranges[1] = !invert ? bit : (bit ^ 0x1); ++/* All bits will be zero or one (since bit is zero or one). */ + all = -bit; + + for (i = 0; i < 256; ) +@@ -3693,7 +3725,7 @@ static void compile_xclass_matchingpath(compiler_common *common, pcre_uchar *cc, + { + DEFINE_COMPILER; + jump_list *found = NULL; +-jump_list **list = (*cc & XCL_NOT) == 0 ? &found : backtracks; ++jump_list **list = (cc[0] & XCL_NOT) == 0 ? &found : backtracks; + pcre_int32 c, charoffset; + const pcre_uint32 *other_cases; + struct sljit_jump *jump = NULL; +@@ -3712,36 +3744,62 @@ pcre_int32 typeoffset; + detect_partial_match(common, backtracks); + read_char(common); + +-if ((*cc++ & XCL_MAP) != 0) ++cc++; ++if ((cc[-1] & XCL_HASPROP) == 0) + { +- OP1(SLJIT_MOV, TMP3, 0, TMP1, 0); +-#ifndef COMPILE_PCRE8 +- jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255); +-#elif defined SUPPORT_UTF +- if (common->utf) +- jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255); ++ if ((cc[-1] & XCL_MAP) != 0) ++ { ++ OP1(SLJIT_MOV, TMP3, 0, TMP1, 0); ++#ifdef SUPPORT_UCP ++ charsaved = TRUE; + #endif ++ if (!check_class_ranges(common, (const pcre_uint8 *)cc, TRUE, FALSE, backtracks)) ++ { ++ jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255); ++ ++ OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7); ++ OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3); ++ OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc); ++ OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0); ++ OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0); ++ add_jump(compiler, &found, JUMP(SLJIT_C_NOT_ZERO)); ++ add_jump(compiler, backtracks, JUMP(SLJIT_JUMP)); ++ ++ JUMPHERE(jump); ++ } ++ else ++ add_jump(compiler, &found, CMP(SLJIT_C_LESS_EQUAL, TMP3, 0, SLJIT_IMM, 0xff)); + +- if (!check_class_ranges(common, (const pcre_uint8 *)cc, TRUE, list)) ++ OP1(SLJIT_MOV, TMP1, 0, TMP3, 0); ++ cc += 32 / sizeof(pcre_uchar); ++ } ++ else ++ add_jump(compiler, (cc[-1] & XCL_NOT) == 0 ? backtracks : &found, CMP(SLJIT_C_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0xff)); ++ } ++else if ((cc[-1] & XCL_MAP) != 0) ++ { ++ OP1(SLJIT_MOV, TMP3, 0, TMP1, 0); ++#ifdef SUPPORT_UCP ++ charsaved = TRUE; ++#endif ++ if (!check_class_ranges(common, (const pcre_uint8 *)cc, FALSE, TRUE, list)) + { ++#ifdef COMPILE_PCRE8 ++ SLJIT_ASSERT(common->utf); ++#endif ++ jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255); ++ + OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7); + OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3); + OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc); + OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0); + OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0); + add_jump(compiler, list, JUMP(SLJIT_C_NOT_ZERO)); +- } + +-#ifndef COMPILE_PCRE8 +- JUMPHERE(jump); +-#elif defined SUPPORT_UTF +- if (common->utf) + JUMPHERE(jump); +-#endif ++ } ++ + OP1(SLJIT_MOV, TMP1, 0, TMP3, 0); +-#ifdef SUPPORT_UCP +- charsaved = TRUE; +-#endif + cc += 32 / sizeof(pcre_uchar); + } + +@@ -4278,7 +4336,7 @@ switch(type) + #ifdef SUPPORT_UCP + case OP_NOTPROP: + case OP_PROP: +- propdata[0] = 0; ++ propdata[0] = XCL_HASPROP; + propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP; + propdata[2] = cc[0]; + propdata[3] = cc[1]; +@@ -4636,7 +4694,7 @@ switch(type) + case OP_NCLASS: + detect_partial_match(common, backtracks); + read_char(common); +- if (check_class_ranges(common, (const pcre_uint8 *)cc, type == OP_NCLASS, backtracks)) ++ if (check_class_ranges(common, (const pcre_uint8 *)cc, type == OP_NCLASS, FALSE, backtracks)) + return cc + 32 / sizeof(pcre_uchar); + + #if defined SUPPORT_UTF || !defined COMPILE_PCRE8 +@@ -8033,7 +8091,7 @@ if ((re->options & PCRE_ANCHORED) == 0) + else if ((re->flags & PCRE_STARTLINE) != 0) + fast_forward_newline(common, (re->options & PCRE_FIRSTLINE) != 0); + else if ((re->flags & PCRE_STARTLINE) == 0 && study != NULL && (study->flags & PCRE_STUDY_MAPPED) != 0) +- fast_forward_start_bits(common, (sljit_uw)study->start_bits, (re->options & PCRE_FIRSTLINE) != 0); ++ fast_forward_start_bits(common, study->start_bits, (re->options & PCRE_FIRSTLINE) != 0); + } + } + if (common->req_char_ptr != 0) +diff --git a/pcre_printint.c b/pcre_printint.c +index c6dcbe6..a71b3b6 100644 +--- a/pcre_printint.c ++++ b/pcre_printint.c +@@ -619,7 +619,9 @@ for(;;) + int i; + unsigned int min, max; + BOOL printmap; ++ BOOL invertmap = FALSE; + pcre_uint8 *map; ++ pcre_uint8 inverted_map[32]; + + fprintf(f, " ["); + +@@ -628,7 +630,12 @@ for(;;) + extra = GET(code, 1); + ccode = code + LINK_SIZE + 1; + printmap = (*ccode & XCL_MAP) != 0; +- if ((*ccode++ & XCL_NOT) != 0) fprintf(f, "^"); ++ if ((*ccode & XCL_NOT) != 0) ++ { ++ invertmap = (*ccode & XCL_HASPROP) == 0; ++ fprintf(f, "^"); ++ } ++ ccode++; + } + else + { +@@ -641,6 +648,12 @@ for(;;) + if (printmap) + { + map = (pcre_uint8 *)ccode; ++ if (invertmap) ++ { ++ for (i = 0; i < 32; i++) inverted_map[i] = ~map[i]; ++ map = inverted_map; ++ } ++ + for (i = 0; i < 256; i++) + { + if ((map[i/8] & (1 << (i&7))) != 0) +diff --git a/pcre_study.c b/pcre_study.c +index 12d2a66..2d11d87 100644 +--- a/pcre_study.c ++++ b/pcre_study.c +@@ -835,9 +835,6 @@ do + case OP_SOM: + case OP_THEN: + case OP_THEN_ARG: +-#if defined SUPPORT_UTF || !defined COMPILE_PCRE8 +- case OP_XCLASS: +-#endif + return SSB_FAIL; + + /* We can ignore word boundary tests. */ +@@ -1221,6 +1218,16 @@ do + with a value >= 0xc4 is a potentially valid starter because it starts a + character with a value > 255. */ + ++#if defined SUPPORT_UTF || !defined COMPILE_PCRE8 ++ case OP_XCLASS: ++ if ((tcode[1 + LINK_SIZE] & XCL_HASPROP) != 0) ++ return SSB_FAIL; ++ /* All bits are set. */ ++ if ((tcode[1 + LINK_SIZE] & XCL_MAP) == 0 && (tcode[1 + LINK_SIZE] & XCL_NOT) != 0) ++ return SSB_FAIL; ++#endif ++ /* Fall through */ ++ + case OP_NCLASS: + #if defined SUPPORT_UTF && defined COMPILE_PCRE8 + if (utf) +@@ -1237,8 +1244,21 @@ do + case OP_CLASS: + { + pcre_uint8 *map; +- tcode++; +- map = (pcre_uint8 *)tcode; ++#if defined SUPPORT_UTF || !defined COMPILE_PCRE8 ++ map = NULL; ++ if (*tcode == OP_XCLASS) ++ { ++ if ((tcode[1 + LINK_SIZE] & XCL_MAP) != 0) ++ map = (pcre_uint8 *)(tcode + 1 + LINK_SIZE + 1); ++ tcode += GET(tcode, 1); ++ } ++ else ++#endif ++ { ++ tcode++; ++ map = (pcre_uint8 *)tcode; ++ tcode += 32 / sizeof(pcre_uchar); ++ } + + /* In UTF-8 mode, the bits in a bit map correspond to character + values, not to byte values. However, the bit map we are constructing is +@@ -1246,31 +1266,35 @@ do + value is > 127. In fact, there are only two possible starting bytes for + characters in the range 128 - 255. */ + +-#if defined SUPPORT_UTF && defined COMPILE_PCRE8 +- if (utf) ++#if defined SUPPORT_UTF || !defined COMPILE_PCRE8 ++ if (map != NULL) ++#endif + { +- for (c = 0; c < 16; c++) start_bits[c] |= map[c]; +- for (c = 128; c < 256; c++) ++#if defined SUPPORT_UTF && defined COMPILE_PCRE8 ++ if (utf) + { +- if ((map[c/8] && (1 << (c&7))) != 0) ++ for (c = 0; c < 16; c++) start_bits[c] |= map[c]; ++ for (c = 128; c < 256; c++) + { +- int d = (c >> 6) | 0xc0; /* Set bit for this starter */ +- start_bits[d/8] |= (1 << (d&7)); /* and then skip on to the */ +- c = (c & 0xc0) + 0x40 - 1; /* next relevant character. */ ++ if ((map[c/8] && (1 << (c&7))) != 0) ++ { ++ int d = (c >> 6) | 0xc0; /* Set bit for this starter */ ++ start_bits[d/8] |= (1 << (d&7)); /* and then skip on to the */ ++ c = (c & 0xc0) + 0x40 - 1; /* next relevant character. */ ++ } + } + } +- } +- else ++ else + #endif +- { +- /* In non-UTF-8 mode, the two bit maps are completely compatible. */ +- for (c = 0; c < 32; c++) start_bits[c] |= map[c]; ++ { ++ /* In non-UTF-8 mode, the two bit maps are completely compatible. */ ++ for (c = 0; c < 32; c++) start_bits[c] |= map[c]; ++ } + } + + /* Advance past the bit map, and act on what follows. For a zero + minimum repeat, continue; otherwise stop processing. */ + +- tcode += 32 / sizeof(pcre_uchar); + switch (*tcode) + { + case OP_CRSTAR: +diff --git a/pcre_xclass.c b/pcre_xclass.c +index dd7008a..bbde1c3 100644 +--- a/pcre_xclass.c ++++ b/pcre_xclass.c +@@ -81,6 +81,11 @@ additional data. */ + + if (c < 256) + { ++ if ((*data & XCL_HASPROP) == 0) ++ { ++ if ((*data & XCL_MAP) == 0) return negated; ++ return (((pcre_uint8 *)(data + 1))[c/8] & (1 << (c&7))) != 0; ++ } + if ((*data & XCL_MAP) != 0 && + (((pcre_uint8 *)(data + 1))[c/8] & (1 << (c&7))) != 0) + return !negated; /* char found */ +diff --git a/testdata/saved16BE-1 b/testdata/saved16BE-1 +index 297f2f2f06300780b83aa64af93e56e7174bc745..4e9a5fffcd14701d14611fa7e47116175c9e152d 100644 +GIT binary patch +delta 97 +zcmbQlJc)V2kFftx@b~}m{~(MG7#KMi7?^G_-2czOz);Gd&XB_(!NC0g-~a#L(Ul_c +JC;KtF004^`M^*p; + +delta 74 +zcmbQlJc)V24{HV}U}CUf0AUQk$icwCbc5mkf1pe$gE~VFg9HOJ10w?i$K*mrnaTc) +F&Hz>236=l= + +diff --git a/testdata/saved16LE-1 b/testdata/saved16LE-1 +index deb44919bbbc263c227c51454f2dd25d582c7a6b..a3dfe05565b5e3f8e8fffd4791531165ca3fa95c 100644 +GIT binary patch +delta 97 +zcmbQlJc)V2kFftx@b~}m{~(MG7#SEim>3vt{J+n@P|Bdrki#Isz|8Rf-~a#L(Ul_c +JC;KtF004`CM^*p; + +delta 74 +zcmbQlJc)V24{HV}U}CUf0AX~%$iTqC#K3Ul|9u9AQU-N~90mynW(Ee3@Z>^9>B)YK +FE&x_M36%f< + +diff --git a/testdata/saved32BE-1 b/testdata/saved32BE-1 +index 42af7b42b026869ac20fae8a78430470c298a8e3..3da404b8cd5982b17eb3e1e84baf8f83d3b4971e 100644 +GIT binary patch +delta 120 +zcmZ3$vVui`fr0T00|SFjfOC*5h&@q2l$DQxoq=gGBa`GrYf+Z}K(H}{g;4>-0D-^% +dkApFUhae|cFzP7$|M&m@cZ3)QYw{7s7yw1mPJ93W + +delta 84 +zcmZ3%vVcW^fr0T20|SFjfOC*5h&@q2l$D2poq=hhks1q7f?;C}3nM>>0Rl`679c!X +ckV#~6GovOe10w?i2aqQ=S%Hat@;SyR05;DJ9{>OV + +diff --git a/testdata/saved32LE-1 b/testdata/saved32LE-1 +index a4044fd17483cc002bb94b3ba83e7cb347ec3a49..6ba74dafd9606361f94e89b2c0cc16892108f6e7 100644 +GIT binary patch +delta 109 +zcmZ3$vVw(|fr0T00|SGOYmjrmmx;XMtb7dY3`~<5nZ!3*C@?Dghl0QVkN*c@bTGMs +VQAgqbzyJTgqst)iCm&&q0RVZWNt6Hp + +delta 71 +zcmZ3%vVet`fr0T20|SGOYmjrmn~A*QtUL_t3``ph^%?nr!eGF}V8H;y6ZJ(Ww=in3 +VGB7eQa7<=olAh?mGWh~y1OUEm3|0UD + +diff --git a/testdata/testoutput17 b/testdata/testoutput17 +index ef82dc9..071539a 100644 +--- a/testdata/testoutput17 ++++ b/testdata/testoutput17 +@@ -290,7 +290,7 @@ No options + No first char + No need char + Subject length lower bound = 1 +-No set of starting bytes ++Starting byte set: \x09 \x20 \xa0 \xff + \x{1681}\x{200b}\x{1680}\x{2000}\x{202f}\x{3000} + 0: \x{1680}\x{2000}\x{202f}\x{3000} + \x{3001}\x{2fff}\x{200a}\xa0\x{2000} +@@ -346,7 +346,7 @@ No options + No first char + No need char + Subject length lower bound = 1 +-No set of starting bytes ++Starting byte set: \x0a \x0b \x0c \x0d \x85 \xff + \x{2027}\x{2030}\x{2028}\x{2029} + 0: \x{2028}\x{2029} + \x09\x0e\x84\x86\x85\x0a\x0b\x0c\x0d +diff --git a/testdata/testoutput23 b/testdata/testoutput23 +index 7b3634c..f7b6f31 100644 +--- a/testdata/testoutput23 ++++ b/testdata/testoutput23 +@@ -14,7 +14,7 @@ Failed: character value in \x{...} sequence is too large at offset 8 + /[\H]/BZSI + ------------------------------------------------------------------ + Bra +- [\x00-\x08\x0a-\x1f!-\x9f\x{a1}-\x{167f}\x{1681}-\x{180d}\x{180f}-\x{1fff}\x{200b}-\x{202e}\x{2030}-\x{205e}\x{2060}-\x{2fff}\x{3001}-\x{ffff}] ++ [\x00-\x08\x0a-\x1f!-\x9f\xa1-\xff\x{100}-\x{167f}\x{1681}-\x{180d}\x{180f}-\x{1fff}\x{200b}-\x{202e}\x{2030}-\x{205e}\x{2060}-\x{2fff}\x{3001}-\x{ffff}] + Ket + End + ------------------------------------------------------------------ +@@ -23,12 +23,25 @@ No options + No first char + No need char + Subject length lower bound = 1 +-No set of starting bytes ++Starting byte set: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x0a \x0b ++ \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a ++ \x1b \x1c \x1d \x1e \x1f ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 ++ : ; < = > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ ++ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f \x80 ++ \x81 \x82 \x83 \x84 \x85 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f ++ \x90 \x91 \x92 \x93 \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e ++ \x9f \xa1 \xa2 \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae ++ \xaf \xb0 \xb1 \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd ++ \xbe \xbf \xc0 \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc ++ \xcd \xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb ++ \xdc \xdd \xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea ++ \xeb \xec \xed \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 ++ \xfa \xfb \xfc \xfd \xfe \xff + + /[\V]/BZSI + ------------------------------------------------------------------ + Bra +- [\x00-\x09\x0e-\x84\x{86}-\x{2027}\x{202a}-\x{ffff}] ++ [\x00-\x09\x0e-\x84\x86-\xff\x{100}-\x{2027}\x{202a}-\x{ffff}] + Ket + End + ------------------------------------------------------------------ +@@ -37,6 +50,19 @@ No options + No first char + No need char + Subject length lower bound = 1 +-No set of starting bytes ++Starting byte set: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0e ++ \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a \x1b \x1c \x1d ++ \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 : ; < = > ++ ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c ++ d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f \x80 \x81 \x82 ++ \x83 \x84 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 ++ \x93 \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa0 \xa1 ++ \xa2 \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 ++ \xb1 \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf ++ \xc0 \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce ++ \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd ++ \xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec ++ \xed \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb ++ \xfc \xfd \xfe \xff + + /-- End of testinput23 --/ +diff --git a/testdata/testoutput25 b/testdata/testoutput25 +index 2a4066d..16f375b 100644 +--- a/testdata/testoutput25 ++++ b/testdata/testoutput25 +@@ -51,7 +51,7 @@ Need char = \x{800000} + /[\H]/BZSI + ------------------------------------------------------------------ + Bra +- [\x00-\x08\x0a-\x1f!-\x9f\x{a1}-\x{167f}\x{1681}-\x{180d}\x{180f}-\x{1fff}\x{200b}-\x{202e}\x{2030}-\x{205e}\x{2060}-\x{2fff}\x{3001}-\x{ffffffff}] ++ [\x00-\x08\x0a-\x1f!-\x9f\xa1-\xff\x{100}-\x{167f}\x{1681}-\x{180d}\x{180f}-\x{1fff}\x{200b}-\x{202e}\x{2030}-\x{205e}\x{2060}-\x{2fff}\x{3001}-\x{ffffffff}] + Ket + End + ------------------------------------------------------------------ +@@ -60,12 +60,25 @@ No options + No first char + No need char + Subject length lower bound = 1 +-No set of starting bytes ++Starting byte set: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x0a \x0b ++ \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a ++ \x1b \x1c \x1d \x1e \x1f ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 ++ : ; < = > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ ++ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f \x80 ++ \x81 \x82 \x83 \x84 \x85 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f ++ \x90 \x91 \x92 \x93 \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e ++ \x9f \xa1 \xa2 \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae ++ \xaf \xb0 \xb1 \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd ++ \xbe \xbf \xc0 \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc ++ \xcd \xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb ++ \xdc \xdd \xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea ++ \xeb \xec \xed \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 ++ \xfa \xfb \xfc \xfd \xfe \xff + + /[\V]/BZSI + ------------------------------------------------------------------ + Bra +- [\x00-\x09\x0e-\x84\x{86}-\x{2027}\x{202a}-\x{ffffffff}] ++ [\x00-\x09\x0e-\x84\x86-\xff\x{100}-\x{2027}\x{202a}-\x{ffffffff}] + Ket + End + ------------------------------------------------------------------ +@@ -74,6 +87,19 @@ No options + No first char + No need char + Subject length lower bound = 1 +-No set of starting bytes ++Starting byte set: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0e ++ \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a \x1b \x1c \x1d ++ \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 : ; < = > ++ ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c ++ d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f \x80 \x81 \x82 ++ \x83 \x84 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 ++ \x93 \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa0 \xa1 ++ \xa2 \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 ++ \xb1 \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf ++ \xc0 \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce ++ \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd ++ \xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec ++ \xed \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb ++ \xfc \xfd \xfe \xff + + /-- End of testinput25 --/ +diff --git a/testdata/testoutput5 b/testdata/testoutput5 +index 0e84054..f8578c0 100644 +--- a/testdata/testoutput5 ++++ b/testdata/testoutput5 +@@ -248,7 +248,7 @@ No match + /[z-\x{100}]/8DZ + ------------------------------------------------------------------ + Bra +- [z-\x{100}] ++ [z-\xff\x{100}] + Ket + End + ------------------------------------------------------------------ +@@ -786,7 +786,7 @@ No match + /[\H]/8BZ + ------------------------------------------------------------------ + Bra +- [\x00-\x08\x0a-\x1f!-\x9f\x{a1}-\x{167f}\x{1681}-\x{180d}\x{180f}-\x{1fff}\x{200b}-\x{202e}\x{2030}-\x{205e}\x{2060}-\x{2fff}\x{3001}-\x{10ffff}] ++ [\x00-\x08\x0a-\x1f!-\x9f\xa1-\xff\x{100}-\x{167f}\x{1681}-\x{180d}\x{180f}-\x{1fff}\x{200b}-\x{202e}\x{2030}-\x{205e}\x{2060}-\x{2fff}\x{3001}-\x{10ffff}] + Ket + End + ------------------------------------------------------------------ +@@ -794,7 +794,7 @@ No match + /[\V]/8BZ + ------------------------------------------------------------------ + Bra +- [\x00-\x09\x0e-\x84\x{86}-\x{2027}\x{202a}-\x{10ffff}] ++ [\x00-\x09\x0e-\x84\x86-\xff\x{100}-\x{2027}\x{202a}-\x{10ffff}] + Ket + End + ------------------------------------------------------------------ +@@ -1594,7 +1594,7 @@ Failed: disallowed Unicode code point (>= 0xd800 && <= 0xdfff) at offset 7 + /[\H\x{d7ff}]+/8BZ + ------------------------------------------------------------------ + Bra +- [\x00-\x08\x0a-\x1f!-\x9f\x{a1}-\x{167f}\x{1681}-\x{180d}\x{180f}-\x{1fff}\x{200b}-\x{202e}\x{2030}-\x{205e}\x{2060}-\x{2fff}\x{3001}-\x{10ffff}\x{d7ff}]+ ++ [\x00-\x08\x0a-\x1f!-\x9f\xa1-\xff\x{100}-\x{167f}\x{1681}-\x{180d}\x{180f}-\x{1fff}\x{200b}-\x{202e}\x{2030}-\x{205e}\x{2060}-\x{2fff}\x{3001}-\x{10ffff}\x{d7ff}]+ + Ket + End + ------------------------------------------------------------------ +@@ -1634,7 +1634,7 @@ Failed: disallowed Unicode code point (>= 0xd800 && <= 0xdfff) at offset 7 + /[\V\x{d7ff}]+/8BZ + ------------------------------------------------------------------ + Bra +- [\x00-\x09\x0e-\x84\x{86}-\x{2027}\x{202a}-\x{10ffff}\x{d7ff}]+ ++ [\x00-\x09\x0e-\x84\x86-\xff\x{100}-\x{2027}\x{202a}-\x{10ffff}\x{d7ff}]+ + Ket + End + ------------------------------------------------------------------ +diff --git a/testdata/testoutput7 b/testdata/testoutput7 +index e3f607c..b02f923 100644 +--- a/testdata/testoutput7 ++++ b/testdata/testoutput7 +@@ -124,7 +124,7 @@ No match + /[z-\x{100}]/8iDZ + ------------------------------------------------------------------ + Bra +- [Z\x{39c}\x{3bc}\x{1e9e}\x{178}z-\x{101}] ++ [Zz-\xff\x{39c}\x{3bc}\x{1e9e}\x{178}\x{100}-\x{101}] + Ket + End + ------------------------------------------------------------------ +@@ -162,7 +162,7 @@ No match + /[z-\x{100}]/8DZi + ------------------------------------------------------------------ + Bra +- [Z\x{39c}\x{3bc}\x{1e9e}\x{178}z-\x{101}] ++ [Zz-\xff\x{39c}\x{3bc}\x{1e9e}\x{178}\x{100}-\x{101}] + Ket + End + ------------------------------------------------------------------ +-- +2.7.4 + diff --git a/SOURCES/pcre-8.32-Add-support-for-PT_PXGRAPH-PT_PXPRINT-and-PT_PXPUNCT.patch b/SOURCES/pcre-8.32-Add-support-for-PT_PXGRAPH-PT_PXPRINT-and-PT_PXPUNCT.patch new file mode 100644 index 0000000..2b150c7 --- /dev/null +++ b/SOURCES/pcre-8.32-Add-support-for-PT_PXGRAPH-PT_PXPRINT-and-PT_PXPUNCT.patch @@ -0,0 +1,117 @@ +From 5c8999dd06c88ec49157f59dab561ce7d5a7c911 Mon Sep 17 00:00:00 2001 +From: zherczeg +Date: Fri, 15 Nov 2013 12:04:55 +0000 +Subject: [PATCH 2/2] Add support for PT_PXGRAPH, PT_PXPRINT, and PT_PXPUNCT in + JIT. +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Petr Pisar: Ported to 8.32: + +commit 9885cc24e4771dbe6daadd2107e4552bb92aafa2 +Author: zherczeg +Date: Fri Nov 15 12:04:55 2013 +0000 + + Add support for PT_PXGRAPH, PT_PXPRINT, and PT_PXPUNCT in JIT. + + git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1402 2f5784b3-3f2a-0410-8824-cb99058d5e15 + +Signed-off-by: Petr Písař +--- + pcre_jit_compile.c | 65 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ + 1 file changed, 65 insertions(+) + +diff --git a/pcre_jit_compile.c b/pcre_jit_compile.c +index c61c688..875947a 100644 +--- a/pcre_jit_compile.c ++++ b/pcre_jit_compile.c +@@ -3699,6 +3699,7 @@ const pcre_uint32 *other_cases; + struct sljit_jump *jump = NULL; + pcre_uchar *ccbegin; + int compares, invertcmp, numberofcmps; ++ + #ifdef SUPPORT_UCP + BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE; + BOOL charsaved = FALSE; +@@ -3798,6 +3799,9 @@ while (*cc != XCL_END) + case PT_SPACE: + case PT_PXSPACE: + case PT_WORD: ++ case PT_PXGRAPH: ++ case PT_PXPRINT: ++ case PT_PXPUNCT: + needstype = TRUE; + needschar = TRUE; + break; +@@ -4068,6 +4072,67 @@ while (*cc != XCL_END) + } + jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp); + break; ++ ++ case PT_PXGRAPH: ++ /* C and Z groups are the farthest two groups. */ ++ SET_TYPE_OFFSET(ucp_Ll); ++ OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll); ++ OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_GREATER); ++ ++ jump = CMP(SLJIT_C_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll); ++ ++ /* In case of ucp_Cf, we overwrite the result. */ ++ SET_CHAR_OFFSET(0x2066); ++ OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066); ++ OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL); ++ ++ OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066); ++ OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL); ++ ++ OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x2066); ++ OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL); ++ ++ JUMPHERE(jump); ++ jump = CMP(SLJIT_C_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0); ++ break; ++ ++ case PT_PXPRINT: ++ /* C and Z groups are the farthest two groups. */ ++ SET_TYPE_OFFSET(ucp_Ll); ++ OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll); ++ OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_GREATER); ++ ++ OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Ll); ++ OP_FLAGS(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_C_NOT_EQUAL); ++ ++ jump = CMP(SLJIT_C_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll); ++ ++ /* In case of ucp_Cf, we overwrite the result. */ ++ SET_CHAR_OFFSET(0x2066); ++ OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066); ++ OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL); ++ ++ OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066); ++ OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL); ++ ++ JUMPHERE(jump); ++ jump = CMP(SLJIT_C_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0); ++ break; ++ ++ case PT_PXPUNCT: ++ SET_TYPE_OFFSET(ucp_Sc); ++ OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Sc); ++ OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL); ++ ++ SET_CHAR_OFFSET(0); ++ OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xff); ++ OP_FLAGS(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL); ++ ++ SET_TYPE_OFFSET(ucp_Pc); ++ OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ps - ucp_Pc); ++ OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL); ++ jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp); ++ break; + } + cc += 2; + } +-- +2.7.4 + diff --git a/SOURCES/pcre-8.32-Fix-bad-compile-of-Qx-.-where-x-is-any-character.patch b/SOURCES/pcre-8.32-Fix-bad-compile-of-Qx-.-where-x-is-any-character.patch new file mode 100644 index 0000000..77a7de8 --- /dev/null +++ b/SOURCES/pcre-8.32-Fix-bad-compile-of-Qx-.-where-x-is-any-character.patch @@ -0,0 +1,68 @@ +From 5561a6a57fc7f663e9d88e9d1beab4de8725f49a Mon Sep 17 00:00:00 2001 +From: ph10 +Date: Wed, 18 Jun 2014 17:17:03 +0000 +Subject: [PATCH] Fix bad compile of [\Qx]... where x is any character. +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1487 2f5784b3-3f2a-0410-8824-cb99058d5e15 +Signed-off-by: Petr Písař + +Petr Pisar: Ported to 8.32. + +Signed-off-by: Petr Písař +--- + pcre_compile.c | 2 +- + testdata/testinput1 | 6 ++++++ + testdata/testoutput1 | 8 ++++++++ + 3 files changed, 15 insertions(+), 1 deletion(-) + +diff --git a/pcre_compile.c b/pcre_compile.c +index 7a2d7c7..8926099 100644 +--- a/pcre_compile.c ++++ b/pcre_compile.c +@@ -4543,7 +4543,7 @@ for (;; ptr++) + whatever repeat count may follow. In the case of reqchar, save the + previous value for reinstating. */ + +- if (class_one_char == 1 && ptr[1] == CHAR_RIGHT_SQUARE_BRACKET) ++ if (!inescq && class_one_char == 1 && ptr[1] == CHAR_RIGHT_SQUARE_BRACKET) + { + ptr++; + zeroreqchar = reqchar; +diff --git a/testdata/testinput1 b/testdata/testinput1 +index e6d048a..a125dee 100644 +--- a/testdata/testinput1 ++++ b/testdata/testinput1 +@@ -5303,4 +5303,10 @@ name were given. ---/ + "(?>.*?)foo" + abcdfooxyz + ++/[\Qa]\E]+/ ++ aa]] ++ ++/[\Q]a\E]+/ ++ aa]] ++ + /-- End of testinput1 --/ +diff --git a/testdata/testoutput1 b/testdata/testoutput1 +index 8310e94..69b3d37 100644 +--- a/testdata/testoutput1 ++++ b/testdata/testoutput1 +@@ -8795,4 +8795,12 @@ No match + abcdfooxyz + 0: foo + ++/[\Qa]\E]+/ ++ aa]] ++ 0: aa]] ++ ++/[\Q]a\E]+/ ++ aa]] ++ 0: aa]] ++ + /-- End of testinput1 --/ +-- +1.9.3 + diff --git a/SOURCES/pcre-8.32-Fix-bad-starting-data-when-char-with-more-than-one-o.patch b/SOURCES/pcre-8.32-Fix-bad-starting-data-when-char-with-more-than-one-o.patch new file mode 100644 index 0000000..c033953 --- /dev/null +++ b/SOURCES/pcre-8.32-Fix-bad-starting-data-when-char-with-more-than-one-o.patch @@ -0,0 +1,94 @@ +From 17b16354612fa15b0d385e0ac57d0e0b4494ddac Mon Sep 17 00:00:00 2001 +From: ph10 +Date: Wed, 18 Jun 2014 16:31:32 +0000 +Subject: [PATCH] Fix bad starting data when char with more than one other case + follows circumflex in multiline UTF mode. +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1485 2f5784b3-3f2a-0410-8824-cb99058d5e15 +Signed-off-by: Petr Písař + +Petr Pisar: Ported to 8.32. + +Signed-off-by: Petr Písař +--- + pcre_compile.c | 3 ++- + testdata/testinput6 | 3 +++ + testdata/testinput7 | 3 +++ + testdata/testoutput6 | 4 ++++ + testdata/testoutput7 | 8 ++++++++ + 5 files changed, 20 insertions(+), 1 deletion(-) + +diff --git a/pcre_compile.c b/pcre_compile.c +index 5f0c8ed..7a2d7c7 100644 +--- a/pcre_compile.c ++++ b/pcre_compile.c +@@ -3991,7 +3991,8 @@ for (;; ptr++) + previous = NULL; + if ((options & PCRE_MULTILINE) != 0) + { +- if (firstcharflags == REQ_UNSET) firstcharflags = REQ_NONE; ++ if (firstcharflags == REQ_UNSET) ++ zerofirstcharflags = firstcharflags = REQ_NONE; + *code++ = OP_CIRCM; + } + else *code++ = OP_CIRC; +diff --git a/testdata/testinput6 b/testdata/testinput6 +index a4bfb3c..219a30e 100644 +--- a/testdata/testinput6 ++++ b/testdata/testinput6 +@@ -1316,4 +1316,7 @@ + /\x{017f}+/8i + \x{0053}\x{0073}\x{017f} + ++/^s?c/mi8 ++ scat ++ + /-- End of testinput6 --/ +diff --git a/testdata/testinput7 b/testdata/testinput7 +index b265f1f..252d246 100644 +--- a/testdata/testinput7 ++++ b/testdata/testinput7 +@@ -669,4 +669,7 @@ of case for anything other than the ASCII letters. --/ + /is{2}t/8i + iskt + ++/^s?c/mi8I ++ scat ++ + /-- End of testinput7 --/ +diff --git a/testdata/testoutput6 b/testdata/testoutput6 +index 0182746..090d23f 100644 +--- a/testdata/testoutput6 ++++ b/testdata/testoutput6 +@@ -2134,4 +2134,8 @@ No match + \x{0053}\x{0073}\x{017f} + 0: Ss\x{17f} + ++/^s?c/mi8 ++ scat ++ 0: sc ++ + /-- End of testinput6 --/ +diff --git a/testdata/testoutput7 b/testdata/testoutput7 +index 4f8b7b9..5f0f546 100644 +--- a/testdata/testoutput7 ++++ b/testdata/testoutput7 +@@ -1470,4 +1470,12 @@ No match + iskt + No match + ++/^s?c/mi8I ++Capturing subpattern count = 0 ++Options: caseless multiline utf ++First char at start or follows newline ++Need char = 'c' (caseless) ++ scat ++ 0: sc ++ + /-- End of testinput7 --/ +-- +1.9.3 + diff --git a/SOURCES/pcre-8.32-Fix-buffer-overflow-for-forward-reference-within-bac.patch b/SOURCES/pcre-8.32-Fix-buffer-overflow-for-forward-reference-within-bac.patch new file mode 100644 index 0000000..2ea98c8 --- /dev/null +++ b/SOURCES/pcre-8.32-Fix-buffer-overflow-for-forward-reference-within-bac.patch @@ -0,0 +1,68 @@ +From 644e923aee63090fd4f68169940327d9eebedc33 Mon Sep 17 00:00:00 2001 +From: ph10 +Date: Tue, 23 Jun 2015 16:34:53 +0000 +Subject: [PATCH] Fix buffer overflow for forward reference within backward + assertion with excess closing parenthesis. Bugzilla 1651. +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +This is upstream commit ported to 8.32: + +commit 764692f9aea9eab50fdba6cb537441d8b34c6c37 +Author: ph10 +Date: Tue Jun 23 16:34:53 2015 +0000 + + Fix buffer overflow for forward reference within backward assertion with excess + closing parenthesis. Bugzilla 1651. + + git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1571 2f5784b3-3f2a-0410-8824-cb99058d5e15 + +It fixes CVE-2015-5073. + +Signed-off-by: Petr Písař +--- + pcre_compile.c | 2 +- + testdata/testinput2 | 2 ++ + testdata/testoutput2 | 3 +++ + 3 files changed, 6 insertions(+), 1 deletion(-) + +diff --git a/pcre_compile.c b/pcre_compile.c +index d570447..aa7f4d6 100644 +--- a/pcre_compile.c ++++ b/pcre_compile.c +@@ -8275,7 +8275,7 @@ OP_RECURSE that are not fixed length get a diagnosic with a useful offset. The + exceptional ones forgo this. We scan the pattern to check that they are fixed + length, and set their lengths. */ + +-if (cd->check_lookbehind) ++if (errorcode == 0 && cd->check_lookbehind) + { + pcre_uchar *cc = (pcre_uchar *)codestart; + +diff --git a/testdata/testinput2 b/testdata/testinput2 +index fc6fe2f..2f460dc 100644 +--- a/testdata/testinput2 ++++ b/testdata/testinput2 +@@ -3827,4 +3827,6 @@ settings of the anchored and startline bits. --/ + + /^(?:(?(1)x|)+)+$()/BZ + ++/(?=di(?<=(?1))|(?=(.))))/ ++ + /-- End of testinput2 --/ +diff --git a/testdata/testoutput2 b/testdata/testoutput2 +index 63b39c2..3c09fdf 100644 +--- a/testdata/testoutput2 ++++ b/testdata/testoutput2 +@@ -12540,4 +12540,7 @@ No match + End + ------------------------------------------------------------------ + ++/(?=di(?<=(?1))|(?=(.))))/ ++Failed: unmatched parentheses at offset 23 ++ + /-- End of testinput2 --/ +-- +2.5.5 + diff --git a/SOURCES/pcre-8.32-Fix-buffer-overflow-for-lookbehind-within-mutually-r.patch b/SOURCES/pcre-8.32-Fix-buffer-overflow-for-lookbehind-within-mutually-r.patch new file mode 100644 index 0000000..b5450c8 --- /dev/null +++ b/SOURCES/pcre-8.32-Fix-buffer-overflow-for-lookbehind-within-mutually-r.patch @@ -0,0 +1,66 @@ +From eb1ab619f3f36539e53e6a481a0aa168afa10596 Mon Sep 17 00:00:00 2001 +From: ph10 +Date: Tue, 19 May 2015 16:02:06 +0000 +Subject: [PATCH] Fix buffer overflow for lookbehind within mutually recursive + subroutines. +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Ported to 8.32: + +commit 9f2cf82ed9380bb4a726250833d6a0d295be8747 +Author: ph10 +Date: Tue May 19 16:02:06 2015 +0000 + + Fix buffer overflow for lookbehind within mutually recursive subroutines. + + git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1560 2f5784b3-3f2a-0410-8824-cb99058d5e15 + +Signed-off-by: Petr Písař +--- + pcre_compile.c | 2 +- + testdata/testinput2 | 3 +++ + testdata/testoutput2 | 3 +++ + 3 files changed, 7 insertions(+), 1 deletion(-) + +diff --git a/pcre_compile.c b/pcre_compile.c +index 0215861..7d203eb 100644 +--- a/pcre_compile.c ++++ b/pcre_compile.c +@@ -1811,7 +1811,7 @@ for (;;) + case OP_ASSERTBACK: + case OP_ASSERTBACK_NOT: + do cc += GET(cc, 1); while (*cc == OP_ALT); +- cc += PRIV(OP_lengths)[*cc]; ++ cc += 1 + LINK_SIZE; + break; + + /* Skip over things that don't match chars */ +diff --git a/testdata/testinput2 b/testdata/testinput2 +index bb11212..1b83cf2 100644 +--- a/testdata/testinput2 ++++ b/testdata/testinput2 +@@ -3822,4 +3822,7 @@ settings of the anchored and startline bits. --/ + + /(?J:(?|(:(?|(?'R')(\k'R')|((?'R')))H'Rk'Rf)|s(?'R')))/ + ++".*?\h.+.\.+\R*?\xd(?i)(?=!(?=b`b`b`\`b\xa9b!)`\a`bbbbbbbbbbbbb`bbbbbbbbbbbb*R\x85bbbbbbb\C?{((?2)(?))(( ++\H){8(?<=(?1){29}\xa8bbbb\x16\xd\xc6^($(? +Date: Wed, 5 Aug 2015 15:38:32 +0000 +Subject: [PATCH 3/3] Fix buffer overflow for named references in (?| + situations. +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Ported for 8.32: + +commit 7af8e8717def179fd7b69e173abd347c1a3547cb +Author: ph10 +Date: Wed Aug 5 15:38:32 2015 +0000 + + Fix buffer overflow for named references in (?| situations. + + git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1585 2f5784b3-3f2a-0410-8824-cb99058d5e15 + +This increases allocation size because of missing "Refactor the code +for creating the name/number table" patch. + +Signed-off-by: Petr Písař +--- + pcre_compile.c | 47 +++++++++++++++++++++++++++++------------------ + pcre_internal.h | 1 + + testdata/testinput2 | 2 ++ + testdata/testoutput11-16 | 2 +- + testdata/testoutput11-32 | 2 +- + testdata/testoutput11-8 | 2 +- + testdata/testoutput2 | 2 ++ + 7 files changed, 37 insertions(+), 21 deletions(-) + +diff --git a/pcre_compile.c b/pcre_compile.c +index 6777542..0215861 100644 +--- a/pcre_compile.c ++++ b/pcre_compile.c +@@ -5796,6 +5796,7 @@ for (;; ptr++) + /* ------------------------------------------------------------ */ + case CHAR_VERTICAL_LINE: /* Reset capture count for each branch */ + reset_bracount = TRUE; ++ cd->dupgroups = TRUE; /* Record (?| encountered */ + /* Fall through */ + + /* ------------------------------------------------------------ */ +@@ -6262,6 +6263,7 @@ for (;; ptr++) + if (lengthptr != NULL) + { + const pcre_uchar *temp; ++ recno = 0; + + if (namelen == 0) + { +@@ -6279,22 +6281,6 @@ for (;; ptr++) + goto FAILED; + } + +- /* The name table does not exist in the first pass, so we cannot +- do a simple search as in the code below. Instead, we have to scan the +- pattern to find the number. It is important that we scan it only as +- far as we have got because the syntax of named subpatterns has not +- been checked for the rest of the pattern, and find_parens() assumes +- correct syntax. In any case, it's a waste of resources to scan +- further. We stop the scan at the current point by temporarily +- adjusting the value of cd->endpattern. */ +- +- temp = cd->end_pattern; +- cd->end_pattern = ptr; +- recno = find_parens(cd, name, namelen, +- (options & PCRE_EXTENDED) != 0, utf); +- cd->end_pattern = temp; +- if (recno < 0) recno = 0; /* Forward ref; set dummy number */ +- + /* We have to allow for a named reference to a duplicated name (this + cannot be determined until the second pass). This needs an extra + 16-bit data item. */ +@@ -6307,7 +6293,31 @@ for (;; ptr++) + real compile this will be picked up and the reference wrapped with + OP_ONCE to make it atomic, so we must space in case this occurs. */ + +- if (recno == 0) *lengthptr += 2 + 2*LINK_SIZE; ++ *lengthptr += 2 + 2*LINK_SIZE; ++ ++ /* It is even worse than that. The current reference may be to an ++ existing named group with a different number (so apparently not ++ recursive) but which later on is also attached to a group with the ++ current number. This can only happen if $(| has been previous ++ encountered. In that case, we allow yet more memory, just in case. ++ (Again, this is fixed "properly" in PCRE2. */ ++ ++ if (cd->dupgroups) *lengthptr += 2 + 2*LINK_SIZE; ++ ++ /* Otherwise, check for recursion here. The name table does not exist ++ in the first pass; instead we must scan the list of names encountered ++ so far in order to get the number. If the name is not found, leave ++ the value of recno as 0 for a forward reference. */ ++ ++ else ++ { ++ temp = cd->end_pattern; ++ cd->end_pattern = ptr; ++ recno = find_parens(cd, name, namelen, ++ (options & PCRE_EXTENDED) != 0, utf); ++ cd->end_pattern = temp; ++ if (recno < 0) recno = 0; /* Forward ref; set dummy number */ ++ } + } + + /* In the real compile, seek the name in the table. We check the name +@@ -8087,6 +8097,7 @@ cd->bracount = cd->final_bracount = 0; + cd->names_found = 0; + cd->name_entry_size = 0; + cd->name_table = NULL; ++cd->dupgroups = FALSE; + cd->start_code = cworkspace; + cd->hwm = cworkspace; + cd->start_workspace = cworkspace; +@@ -8116,7 +8127,7 @@ if (errorcode != 0) goto PCRE_EARLY_ERROR_RETURN; + + DPRINTF(("end pre-compile: length=%d workspace=%d\n", length, + (int)(cd->hwm - cworkspace))); +- ++ + if (length > MAX_PATTERN_SIZE) + { + errorcode = ERR20; +diff --git a/pcre_internal.h b/pcre_internal.h +index f3cb001..536b3d8 100644 +--- a/pcre_internal.h ++++ b/pcre_internal.h +@@ -2410,6 +2410,7 @@ typedef struct compile_data { + BOOL had_accept; /* (*ACCEPT) encountered */ + BOOL had_pruneorskip; /* (*PRUNE) or (*SKIP) encountered */ + BOOL check_lookbehind; /* Lookbehinds need later checking */ ++ BOOL dupgroups; /* Duplicate groups exist: (?| found */ + int nltype; /* Newline type */ + int nllen; /* Newline string length */ + pcre_uchar nl[4]; /* Newline string when fixed length */ +diff --git a/testdata/testinput2 b/testdata/testinput2 +index 53c4718..bb11212 100644 +--- a/testdata/testinput2 ++++ b/testdata/testinput2 +@@ -3820,4 +3820,6 @@ settings of the anchored and startline bits. --/ + + /(?(?J)(?1(111111)11|)1|1|)(?()1)/ + ++/(?J:(?|(:(?|(?'R')(\k'R')|((?'R')))H'Rk'Rf)|s(?'R')))/ ++ + /-- End of testinput2 --/ +diff --git a/testdata/testoutput11-16 b/testdata/testoutput11-16 +index 4115877..3cb3049 100644 +--- a/testdata/testoutput11-16 ++++ b/testdata/testoutput11-16 +@@ -232,7 +232,7 @@ Memory allocation (code space): 73 + ------------------------------------------------------------------ + + /(?Pa)...(?P=a)bbb(?P>a)d/BM +-Memory allocation (code space): 61 ++Memory allocation (code space): 77 + ------------------------------------------------------------------ + 0 24 Bra + 2 5 CBra 1 +diff --git a/testdata/testoutput11-32 b/testdata/testoutput11-32 +index 3f66acd..10dee82 100644 +--- a/testdata/testoutput11-32 ++++ b/testdata/testoutput11-32 +@@ -232,7 +232,7 @@ Memory allocation (code space): 155 + ------------------------------------------------------------------ + + /(?Pa)...(?P=a)bbb(?P>a)d/BM +-Memory allocation (code space): 125 ++Memory allocation (code space): 157 + ------------------------------------------------------------------ + 0 24 Bra + 2 5 CBra 1 +diff --git a/testdata/testoutput11-8 b/testdata/testoutput11-8 +index 27e2c65..a1bd60a 100644 +--- a/testdata/testoutput11-8 ++++ b/testdata/testoutput11-8 +@@ -232,7 +232,7 @@ Memory allocation (code space): 45 + ------------------------------------------------------------------ + + /(?Pa)...(?P=a)bbb(?P>a)d/BM +-Memory allocation (code space): 38 ++Memory allocation (code space): 50 + ------------------------------------------------------------------ + 0 30 Bra + 3 7 CBra 1 +diff --git a/testdata/testoutput2 b/testdata/testoutput2 +index b0b46d7..2dd2381 100644 +--- a/testdata/testoutput2 ++++ b/testdata/testoutput2 +@@ -12517,4 +12517,6 @@ No match + + /(?(?J)(?1(111111)11|)1|1|)(?()1)/ + ++/(?J:(?|(:(?|(?'R')(\k'R')|((?'R')))H'Rk'Rf)|s(?'R')))/ ++ + /-- End of testinput2 --/ +-- +2.5.5 + diff --git a/SOURCES/pcre-8.32-Fix-bug-when-there-are-unset-groups-prior-to-ACCEPT-.patch b/SOURCES/pcre-8.32-Fix-bug-when-there-are-unset-groups-prior-to-ACCEPT-.patch new file mode 100644 index 0000000..243f189 --- /dev/null +++ b/SOURCES/pcre-8.32-Fix-bug-when-there-are-unset-groups-prior-to-ACCEPT-.patch @@ -0,0 +1,78 @@ +From 69285b9e6fa30790d5fc0bcb3d2bcb1713836093 Mon Sep 17 00:00:00 2001 +From: ph10 +Date: Wed, 5 Nov 2014 15:08:03 +0000 +Subject: [PATCH] Fix bug when there are unset groups prior to (*ACCEPT) within + a capturing group. +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1510 2f5784b3-3f2a-0410-8824-cb99058d5e15 +Signed-off-by: Petr Písař + +Petr Pisar: Ported to 8.32 + +Signed-off-by: Petr Písař +--- + pcre_exec.c | 13 ++++++++++++- + testdata/testinput1 | 3 +++ + testdata/testoutput1 | 9 +++++++++ + 3 files changed, 24 insertions(+), 1 deletion(-) + +diff --git a/pcre_exec.c b/pcre_exec.c +index 478a026..74a2b49 100644 +--- a/pcre_exec.c ++++ b/pcre_exec.c +@@ -1539,7 +1539,18 @@ for (;;) + md->offset_vector[offset] = + md->offset_vector[md->offset_end - number]; + md->offset_vector[offset+1] = (int)(eptr - md->start_subject); +- if (offset_top <= offset) offset_top = offset + 2; ++ ++ /* If this group is at or above the current highwater mark, ensure that ++ any groups between the current high water mark and this group are marked ++ unset and then update the high water mark. */ ++ ++ if (offset >= offset_top) ++ { ++ register int *iptr = md->offset_vector + offset_top; ++ register int *iend = md->offset_vector + offset; ++ while (iptr < iend) *iptr++ = -1; ++ offset_top = offset + 2; ++ } + } + ecode += 1 + IMM2_SIZE; + break; +diff --git a/testdata/testinput1 b/testdata/testinput1 +index c248758..3e1061e 100644 +--- a/testdata/testinput1 ++++ b/testdata/testinput1 +@@ -5318,4 +5318,7 @@ name were given. ---/ + '\A(?:[^\"]++|\"(?:[^\"]++|\"\")++\")++' + NON QUOTED \"QUOT\"\"ED\" AFTER \"NOT MATCHED + ++/(?:((abcd))|(((?:(?:(?:(?:abc|(?:abcdef))))b)abcdefghi)abc)|((*ACCEPT)))/ ++ 1234abcd ++ + /-- End of testinput1 --/ +diff --git a/testdata/testoutput1 b/testdata/testoutput1 +index 9741d1a..5015448 100644 +--- a/testdata/testoutput1 ++++ b/testdata/testoutput1 +@@ -8815,4 +8815,13 @@ No match + NON QUOTED \"QUOT\"\"ED\" AFTER \"NOT MATCHED + 0: NON QUOTED "QUOT""ED" AFTER + ++/(?:((abcd))|(((?:(?:(?:(?:abc|(?:abcdef))))b)abcdefghi)abc)|((*ACCEPT)))/ ++ 1234abcd ++ 0: ++ 1: ++ 2: ++ 3: ++ 4: ++ 5: ++ + /-- End of testinput1 --/ +-- +2.1.0 + diff --git a/SOURCES/pcre-8.32-Fix-checking-whether-a-group-could-match-an-empty-st.patch b/SOURCES/pcre-8.32-Fix-checking-whether-a-group-could-match-an-empty-st.patch new file mode 100644 index 0000000..2d89008 --- /dev/null +++ b/SOURCES/pcre-8.32-Fix-checking-whether-a-group-could-match-an-empty-st.patch @@ -0,0 +1,239 @@ +From e3406ec06426fb9a7342541127d4c591d2446b6b Mon Sep 17 00:00:00 2001 +From: ph10 +Date: Fri, 5 Jul 2013 10:38:37 +0000 +Subject: [PATCH 1/2] Fix checking whether a group could match an empty string +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +In UTF mode, the code for checking whether a group could match an empty +string (which is used for indefinitely repeated groups to allow for +breaking an infinite loop) was broken when the group contained a repeated +negated single-character class with a character that occupied more than one +data item and had a minimum repetition of zero (for example, [^\x{100}]* in +UTF-8 mode). The effect was undefined: the group might or might not be +deemed as matching an empty string, or the program might have crashed. + +Based on: + +commit 74d96caf6251eff2f6c6a3e879268ce2d2a6c9be +Author: ph10 +Date: Fri Jul 5 10:38:37 2013 +0000 + + Implement PCRE_INFO_MATCH_EMPTY and fix 2 bugs concerned with scanning for + empty string matching. + + git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1348 2f5784b3-3f2a-0410-8824- +cb99058d5e15 + +Ported to 8.32. Needed for CVE-2015-2328 (bug #1285399). + +Signed-off-by: Petr Písař +--- + pcre_compile.c | 99 +++++++++++++++++++++++++++++++++++++++++++++++----------- + 1 file changed, 81 insertions(+), 18 deletions(-) + +diff --git a/pcre_compile.c b/pcre_compile.c +index 0de3747..ce72527 100644 +--- a/pcre_compile.c ++++ b/pcre_compile.c +@@ -2353,15 +2353,23 @@ Arguments: + endcode points to where to stop + utf TRUE if in UTF-8 / UTF-16 / UTF-32 mode + cd contains pointers to tables etc. ++ recurses chain of recurse_check to catch mutual recursion + + Returns: TRUE if what is matched could be empty + */ + ++typedef struct recurse_check { ++ struct recurse_check *prev; ++ const pcre_uchar *group; ++} recurse_check; ++ + static BOOL + could_be_empty_branch(const pcre_uchar *code, const pcre_uchar *endcode, +- BOOL utf, compile_data *cd) ++ BOOL utf, compile_data *cd, recurse_check *recurses) + { + register pcre_uchar c; ++recurse_check this_recurse; ++ + for (code = first_significant_code(code + PRIV(OP_lengths)[*code], TRUE); + code < endcode; + code = first_significant_code(code + PRIV(OP_lengths)[c], TRUE)) +@@ -2369,7 +2377,7 @@ for (code = first_significant_code(code + PRIV(OP_lengths)[*code], TRUE); + const pcre_uchar *ccode; + + c = *code; +- ++ + /* Skip over forward assertions; the other assertions are skipped by + first_significant_code() with a TRUE final argument. */ + +@@ -2389,25 +2397,50 @@ for (code = first_significant_code(code + PRIV(OP_lengths)[*code], TRUE); + + if (c == OP_RECURSE) + { +- const pcre_uchar *scode; ++ const pcre_uchar *scode = cd->start_code + GET(code, 1); + BOOL empty_branch; + +- /* Test for forward reference */ ++ /* Test for forward reference or uncompleted reference. This is disabled ++ when called to scan a completed pattern by setting cd->start_workspace to ++ NULL. */ + +- for (scode = cd->start_workspace; scode < cd->hwm; scode += LINK_SIZE) +- if ((int)GET(scode, 0) == (int)(code + 1 - cd->start_code)) return TRUE; ++ if (cd->start_workspace != NULL) ++ { ++ const pcre_uchar *tcode; ++ for (tcode = cd->start_workspace; tcode < cd->hwm; tcode += LINK_SIZE) ++ if ((int)GET(tcode, 0) == (int)(code + 1 - cd->start_code)) return TRUE; ++ if (GET(scode, 1) == 0) return TRUE; /* Unclosed */ ++ } ++ ++ /* If we are scanning a completed pattern, there are no forward references ++ and all groups are complete. We need to detect whether this is a recursive ++ call, as otherwise there will be an infinite loop. If it is a recursion, ++ just skip over it. Simple recursions are easily detected. For mutual ++ recursions we keep a chain on the stack. */ ++ ++ else ++ { ++ recurse_check *r = recurses; ++ const pcre_uchar *endgroup = scode; ++ ++ do endgroup += GET(endgroup, 1); while (*endgroup == OP_ALT); ++ if (code >= scode && code <= endgroup) continue; /* Simple recursion */ ++ ++ for (r = recurses; r != NULL; r = r->prev) ++ if (r->group == scode) break; ++ if (r != NULL) continue; /* Mutual recursion */ ++ } + +- /* Not a forward reference, test for completed backward reference */ ++ /* Completed reference; scan the referenced group, remembering it on the ++ stack chain to detect mutual recursions. */ + + empty_branch = FALSE; +- scode = cd->start_code + GET(code, 1); +- if (GET(scode, 1) == 0) return TRUE; /* Unclosed */ +- +- /* Completed backwards reference */ +- ++ this_recurse.prev = recurses; ++ this_recurse.group = scode; ++ + do + { +- if (could_be_empty_branch(scode, endcode, utf, cd)) ++ if (could_be_empty_branch(scode, endcode, utf, cd, &this_recurse)) + { + empty_branch = TRUE; + break; +@@ -2463,7 +2496,7 @@ for (code = first_significant_code(code + PRIV(OP_lengths)[*code], TRUE); + empty_branch = FALSE; + do + { +- if (!empty_branch && could_be_empty_branch(code, endcode, utf, cd)) ++ if (!empty_branch && could_be_empty_branch(code, endcode, utf, cd, NULL)) + empty_branch = TRUE; + code += GET(code, 1); + } +@@ -2582,30 +2615,58 @@ for (code = first_significant_code(code + PRIV(OP_lengths)[*code], TRUE); + return TRUE; + + /* In UTF-8 mode, STAR, MINSTAR, POSSTAR, QUERY, MINQUERY, POSQUERY, UPTO, +- MINUPTO, and POSUPTO may be followed by a multibyte character */ ++ MINUPTO, and POSUPTO and their caseless and negative versions may be ++ followed by a multibyte character. */ + + #if defined SUPPORT_UTF && !defined COMPILE_PCRE32 + case OP_STAR: + case OP_STARI: ++ case OP_NOTSTAR: ++ case OP_NOTSTARI: ++ + case OP_MINSTAR: + case OP_MINSTARI: ++ case OP_NOTMINSTAR: ++ case OP_NOTMINSTARI: ++ + case OP_POSSTAR: + case OP_POSSTARI: ++ case OP_NOTPOSSTAR: ++ case OP_NOTPOSSTARI: ++ + case OP_QUERY: + case OP_QUERYI: ++ case OP_NOTQUERY: ++ case OP_NOTQUERYI: ++ + case OP_MINQUERY: + case OP_MINQUERYI: ++ case OP_NOTMINQUERY: ++ case OP_NOTMINQUERYI: ++ + case OP_POSQUERY: + case OP_POSQUERYI: ++ case OP_NOTPOSQUERY: ++ case OP_NOTPOSQUERYI: ++ + if (utf && HAS_EXTRALEN(code[1])) code += GET_EXTRALEN(code[1]); + break; + + case OP_UPTO: + case OP_UPTOI: ++ case OP_NOTUPTO: ++ case OP_NOTUPTOI: ++ + case OP_MINUPTO: + case OP_MINUPTOI: ++ case OP_NOTMINUPTO: ++ case OP_NOTMINUPTOI: ++ + case OP_POSUPTO: + case OP_POSUPTOI: ++ case OP_NOTPOSUPTO: ++ case OP_NOTPOSUPTOI: ++ + if (utf && HAS_EXTRALEN(code[1 + IMM2_SIZE])) code += GET_EXTRALEN(code[1 + IMM2_SIZE]); + break; + #endif +@@ -2662,7 +2723,7 @@ could_be_empty(const pcre_uchar *code, const pcre_uchar *endcode, + { + while (bcptr != NULL && bcptr->current_branch >= code) + { +- if (!could_be_empty_branch(bcptr->current_branch, endcode, utf, cd)) ++ if (!could_be_empty_branch(bcptr->current_branch, endcode, utf, cd, NULL)) + return FALSE; + bcptr = bcptr->outer; + } +@@ -5416,7 +5477,7 @@ for (;; ptr++) + pcre_uchar *scode = bracode; + do + { +- if (could_be_empty_branch(scode, ketcode, utf, cd)) ++ if (could_be_empty_branch(scode, ketcode, utf, cd, NULL)) + { + *bracode += OP_SBRA - OP_BRA; + break; +@@ -8172,10 +8233,12 @@ if (cd->hwm > cd->start_workspace) + } + } + +-/* If the workspace had to be expanded, free the new memory. */ ++/* If the workspace had to be expanded, free the new memory. Set the pointer to ++NULL to indicate that forward references have been filled in. */ + + if (cd->workspace_size > COMPILE_WORK_SIZE) + (PUBL(free))((void *)cd->start_workspace); ++cd->start_workspace = NULL; + + /* Give an error if there's back reference to a non-existent capturing + subpattern. */ +-- +2.5.5 + diff --git a/SOURCES/pcre-8.32-Fix-compile-time-loop-for-recursive-reference-within.patch b/SOURCES/pcre-8.32-Fix-compile-time-loop-for-recursive-reference-within.patch new file mode 100644 index 0000000..2a6a25f --- /dev/null +++ b/SOURCES/pcre-8.32-Fix-compile-time-loop-for-recursive-reference-within.patch @@ -0,0 +1,101 @@ +From 58c834052f0985406919de157297e0c340c5b2ed Mon Sep 17 00:00:00 2001 +From: ph10 +Date: Fri, 8 Aug 2014 15:22:51 +0000 +Subject: [PATCH 2/2] Fix compile-time loop for recursive reference within a + group with an indefinite repeat. +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1498 2f5784b3-3f2a-0410-8824-cb99058d5e15 +Signed-off-by: Petr Písař + +Petr Pisar: Ported to 8.32. + +Signed-off-by: Petr Písař +--- + pcre_compile.c | 17 +++++++---------- + testdata/testinput1 | 6 ++++++ + testdata/testoutput1 | 10 ++++++++++ + 3 files changed, 23 insertions(+), 10 deletions(-) + +diff --git a/pcre_compile.c b/pcre_compile.c +index ce72527..86cd0c8 100644 +--- a/pcre_compile.c ++++ b/pcre_compile.c +@@ -2398,6 +2398,7 @@ for (code = first_significant_code(code + PRIV(OP_lengths)[*code], TRUE); + if (c == OP_RECURSE) + { + const pcre_uchar *scode = cd->start_code + GET(code, 1); ++ const pcre_uchar *endgroup = scode; + BOOL empty_branch; + + /* Test for forward reference or uncompleted reference. This is disabled +@@ -2412,20 +2413,16 @@ for (code = first_significant_code(code + PRIV(OP_lengths)[*code], TRUE); + if (GET(scode, 1) == 0) return TRUE; /* Unclosed */ + } + +- /* If we are scanning a completed pattern, there are no forward references +- and all groups are complete. We need to detect whether this is a recursive +- call, as otherwise there will be an infinite loop. If it is a recursion, +- just skip over it. Simple recursions are easily detected. For mutual +- recursions we keep a chain on the stack. */ ++ /* If the reference is to a completed group, we need to detect whether this ++ is a recursive call, as otherwise there will be an infinite loop. If it is ++ a recursion, just skip over it. Simple recursions are easily detected. For ++ mutual recursions we keep a chain on the stack. */ + ++ do endgroup += GET(endgroup, 1); while (*endgroup == OP_ALT); ++ if (code >= scode && code <= endgroup) continue; /* Simple recursion */ + else + { + recurse_check *r = recurses; +- const pcre_uchar *endgroup = scode; +- +- do endgroup += GET(endgroup, 1); while (*endgroup == OP_ALT); +- if (code >= scode && code <= endgroup) continue; /* Simple recursion */ +- + for (r = recurses; r != NULL; r = r->prev) + if (r->group == scode) break; + if (r != NULL) continue; /* Mutual recursion */ +diff --git a/testdata/testinput1 b/testdata/testinput1 +index 3e1061e..c45e1ba 100644 +--- a/testdata/testinput1 ++++ b/testdata/testinput1 +@@ -4967,6 +4967,12 @@ however, we need the complication for Perl. ---/ + + /((?(R1)a+|(?1)b))/ + aaaabcde ++ ++/((?(R)a|(?1)))*/ ++ aaa ++ ++/((?(R)a|(?1)))+/ ++ aaa + + /a(*:any + name)/K +diff --git a/testdata/testoutput1 b/testdata/testoutput1 +index 5015448..f0eae49 100644 +--- a/testdata/testoutput1 ++++ b/testdata/testoutput1 +@@ -8271,6 +8271,16 @@ MK: M + aaaabcde + 0: aaaab + 1: aaaab ++ ++/((?(R)a|(?1)))*/ ++ aaa ++ 0: aaa ++ 1: a ++ ++/((?(R)a|(?1)))+/ ++ aaa ++ 0: aaa ++ 1: a + + /a(*:any + name)/K +-- +2.5.5 + diff --git a/SOURCES/pcre-8.32-Fix-compiler-crash-misbehaviour-for-zero-repeated-gr.patch b/SOURCES/pcre-8.32-Fix-compiler-crash-misbehaviour-for-zero-repeated-gr.patch new file mode 100644 index 0000000..e5f9309 --- /dev/null +++ b/SOURCES/pcre-8.32-Fix-compiler-crash-misbehaviour-for-zero-repeated-gr.patch @@ -0,0 +1,206 @@ +From a9d4fbce8772d129902eeb3d9a643e3f5943d818 Mon Sep 17 00:00:00 2001 +From: ph10 +Date: Sat, 12 Jul 2014 18:22:54 +0000 +Subject: [PATCH] Fix compiler crash/misbehaviour for zero-repeated groups that + include a recursive back reference. +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1495 2f5784b3-3f2a-0410-8824-cb99058d5e15 + +Petr Pisar: Ported to 8.32. +Note: The testoutput2 expect 'a?+', while it returns 'a?'. I raised +the anomaly to the upstream +. + +Signed-off-by: Petr Písař +--- + pcre_compile.c | 6 +++++- + testdata/testinput11 | 2 ++ + testdata/testinput2 | 2 ++ + testdata/testoutput11-16 | 24 ++++++++++++++++++++++++ + testdata/testoutput11-32 | 24 ++++++++++++++++++++++++ + testdata/testoutput11-8 | 24 ++++++++++++++++++++++++ + testdata/testoutput2 | 24 ++++++++++++++++++++++++ + 7 files changed, 105 insertions(+), 1 deletion(-) + +diff --git a/pcre_compile.c b/pcre_compile.c +index 8926099..0de3747 100644 +--- a/pcre_compile.c ++++ b/pcre_compile.c +@@ -7316,12 +7316,16 @@ for (;;) + + /* If it was a capturing subpattern, check to see if it contained any + recursive back references. If so, we must wrap it in atomic brackets. +- In any event, remove the block from the chain. */ ++ Because we are moving code along, we must ensure that any pending recursive ++ references are updated. In any event, remove the block from the chain. */ + + if (capnumber > 0) + { + if (cd->open_caps->flag) + { ++ *code = OP_END; ++ adjust_recurse(start_bracket, 1 + LINK_SIZE, ++ (options & PCRE_UTF8) != 0, cd, cd->hwm); + memmove(start_bracket + 1 + LINK_SIZE, start_bracket, + IN_UCHARS(code - start_bracket)); + *start_bracket = OP_ONCE; +diff --git a/testdata/testinput11 b/testdata/testinput11 +index 391ada7..7e8e542 100644 +--- a/testdata/testinput11 ++++ b/testdata/testinput11 +@@ -132,4 +132,6 @@ is required for these tests. --/ + + /abc(d|e)(*THEN)x(123(*THEN)4|567(b|q)(*THEN)xx)/B + ++/(((a\2)|(a*)\g<-1>))*a?/B ++ + /-- End of testinput11 --/ +diff --git a/testdata/testinput2 b/testdata/testinput2 +index 0835a98..e6d0e87 100644 +--- a/testdata/testinput2 ++++ b/testdata/testinput2 +@@ -3816,4 +3816,6 @@ settings of the anchored and startline bits. --/ + "(?(?=)?==)(((((((((?=)))))))))" + a + ++/(((a\2)|(a*)\g<-1>))*a?/BZ ++ + /-- End of testinput2 --/ +diff --git a/testdata/testoutput11-16 b/testdata/testoutput11-16 +index dff72b9..de64e37 100644 +--- a/testdata/testoutput11-16 ++++ b/testdata/testoutput11-16 +@@ -710,4 +710,28 @@ Memory allocation (code space): 14 + 62 End + ------------------------------------------------------------------ + ++/(((a\2)|(a*)\g<-1>))*a?/B ++------------------------------------------------------------------ ++ 0 39 Bra ++ 2 Brazero ++ 3 32 SCBra 1 ++ 6 27 Once ++ 8 12 CBra 2 ++ 11 7 CBra 3 ++ 14 a ++ 16 \2 ++ 18 7 Ket ++ 20 11 Alt ++ 22 5 CBra 4 ++ 25 a* ++ 27 5 Ket ++ 29 22 Recurse ++ 31 23 Ket ++ 33 27 Ket ++ 35 32 KetRmax ++ 37 a? ++ 39 39 Ket ++ 41 End ++------------------------------------------------------------------ ++ + /-- End of testinput11 --/ +diff --git a/testdata/testoutput11-32 b/testdata/testoutput11-32 +index 8335fb8..0d32e2f 100644 +--- a/testdata/testoutput11-32 ++++ b/testdata/testoutput11-32 +@@ -710,4 +710,28 @@ Memory allocation (code space): 28 + 62 End + ------------------------------------------------------------------ + ++/(((a\2)|(a*)\g<-1>))*a?/B ++------------------------------------------------------------------ ++ 0 39 Bra ++ 2 Brazero ++ 3 32 SCBra 1 ++ 6 27 Once ++ 8 12 CBra 2 ++ 11 7 CBra 3 ++ 14 a ++ 16 \2 ++ 18 7 Ket ++ 20 11 Alt ++ 22 5 CBra 4 ++ 25 a* ++ 27 5 Ket ++ 29 22 Recurse ++ 31 23 Ket ++ 33 27 Ket ++ 35 32 KetRmax ++ 37 a? ++ 39 39 Ket ++ 41 End ++------------------------------------------------------------------ ++ + /-- End of testinput11 --/ +diff --git a/testdata/testoutput11-8 b/testdata/testoutput11-8 +index c1c85f9..9447fb6 100644 +--- a/testdata/testoutput11-8 ++++ b/testdata/testoutput11-8 +@@ -710,4 +710,28 @@ Memory allocation (code space): 10 + 76 End + ------------------------------------------------------------------ + ++/(((a\2)|(a*)\g<-1>))*a?/B ++------------------------------------------------------------------ ++ 0 57 Bra ++ 3 Brazero ++ 4 48 SCBra 1 ++ 9 40 Once ++ 12 18 CBra 2 ++ 17 10 CBra 3 ++ 22 a ++ 24 \2 ++ 27 10 Ket ++ 30 16 Alt ++ 33 7 CBra 4 ++ 38 a* ++ 40 7 Ket ++ 43 33 Recurse ++ 46 34 Ket ++ 49 40 Ket ++ 52 48 KetRmax ++ 55 a? ++ 57 57 Ket ++ 60 End ++------------------------------------------------------------------ ++ + /-- End of testinput11 --/ +diff --git a/testdata/testoutput2 b/testdata/testoutput2 +index 66c914f..2a2b577 100644 +--- a/testdata/testoutput2 ++++ b/testdata/testoutput2 +@@ -12491,4 +12491,28 @@ No set of starting bytes + a + No match + ++/(((a\2)|(a*)\g<-1>))*a?/BZ ++------------------------------------------------------------------ ++ Bra ++ Brazero ++ SCBra 1 ++ Once ++ CBra 2 ++ CBra 3 ++ a ++ \2 ++ Ket ++ Alt ++ CBra 4 ++ a* ++ Ket ++ Recurse ++ Ket ++ Ket ++ KetRmax ++ a? ++ Ket ++ End ++------------------------------------------------------------------ ++ + /-- End of testinput2 --/ +-- +2.1.0 + diff --git a/SOURCES/pcre-8.32-Fix-duplicate-names-memory-calculation-error.patch b/SOURCES/pcre-8.32-Fix-duplicate-names-memory-calculation-error.patch new file mode 100644 index 0000000..4fc6240 --- /dev/null +++ b/SOURCES/pcre-8.32-Fix-duplicate-names-memory-calculation-error.patch @@ -0,0 +1,125 @@ +From 7a8c5efada4bd9e9b625b041184299d4aff06bd5 Mon Sep 17 00:00:00 2001 +From: ph10 +Date: Sun, 8 Feb 2015 16:43:13 +0000 +Subject: [PATCH 1/3] Fix duplicate names memory calculation error. +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Ported to 8.32: + +commit 46465068f53358d3ae6cca4d9db8d1ed3b9f1928 +Author: ph10 +Date: Sun Feb 8 16:43:13 2015 +0000 + + Fix duplicate names memory calculation error. + + git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1521 2f5784b3-3f2a-0410-8824-cb99058d5e15 + +Signed-off-by: Petr Písař + +XXX: Complete fix requires: + +commit b9a4e4087ac610792118c6dc7c8e83ab313da1ed +Author: ph10 +Date: Tue Sep 3 10:10:59 2013 +0000 + + Refactor the code for creating the name/number table. + + git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1359 2f5784b3-3f2a-0410-8824- +cb99058d5e15 + +Otherwise the CVE-2015-8385 is fixed by `Fix buffer overflow for named +references in (?| situations' patch. + +Signed-off-by: Petr Písař +--- + pcre_compile.c | 6 ++++++ + testdata/testinput2 | 2 ++ + testdata/testoutput11-16 | 2 +- + testdata/testoutput11-32 | 2 +- + testdata/testoutput11-8 | 2 +- + testdata/testoutput2 | 2 ++ + 6 files changed, 13 insertions(+), 3 deletions(-) + +diff --git a/pcre_compile.c b/pcre_compile.c +index 86cd0c8..4506e56 100644 +--- a/pcre_compile.c ++++ b/pcre_compile.c +@@ -6294,6 +6294,12 @@ for (;; ptr++) + (options & PCRE_EXTENDED) != 0, utf); + cd->end_pattern = temp; + if (recno < 0) recno = 0; /* Forward ref; set dummy number */ ++ ++ /* We have to allow for a named reference to a duplicated name (this ++ cannot be determined until the second pass). This needs an extra ++ 16-bit data item. */ ++ ++ *lengthptr += IMM2_SIZE; + } + + /* In the real compile, seek the name in the table. We check the name +diff --git a/testdata/testinput2 b/testdata/testinput2 +index e6d0e87..53c4718 100644 +--- a/testdata/testinput2 ++++ b/testdata/testinput2 +@@ -3818,4 +3818,6 @@ settings of the anchored and startline bits. --/ + + /(((a\2)|(a*)\g<-1>))*a?/BZ + ++/(?(?J)(?1(111111)11|)1|1|)(?()1)/ ++ + /-- End of testinput2 --/ +diff --git a/testdata/testoutput11-16 b/testdata/testoutput11-16 +index de64e37..4115877 100644 +--- a/testdata/testoutput11-16 ++++ b/testdata/testoutput11-16 +@@ -232,7 +232,7 @@ Memory allocation (code space): 73 + ------------------------------------------------------------------ + + /(?Pa)...(?P=a)bbb(?P>a)d/BM +-Memory allocation (code space): 57 ++Memory allocation (code space): 61 + ------------------------------------------------------------------ + 0 24 Bra + 2 5 CBra 1 +diff --git a/testdata/testoutput11-32 b/testdata/testoutput11-32 +index 0d32e2f..3f66acd 100644 +--- a/testdata/testoutput11-32 ++++ b/testdata/testoutput11-32 +@@ -232,7 +232,7 @@ Memory allocation (code space): 155 + ------------------------------------------------------------------ + + /(?Pa)...(?P=a)bbb(?P>a)d/BM +-Memory allocation (code space): 117 ++Memory allocation (code space): 125 + ------------------------------------------------------------------ + 0 24 Bra + 2 5 CBra 1 +diff --git a/testdata/testoutput11-8 b/testdata/testoutput11-8 +index 9447fb6..27e2c65 100644 +--- a/testdata/testoutput11-8 ++++ b/testdata/testoutput11-8 +@@ -232,7 +232,7 @@ Memory allocation (code space): 45 + ------------------------------------------------------------------ + + /(?Pa)...(?P=a)bbb(?P>a)d/BM +-Memory allocation (code space): 34 ++Memory allocation (code space): 38 + ------------------------------------------------------------------ + 0 30 Bra + 3 7 CBra 1 +diff --git a/testdata/testoutput2 b/testdata/testoutput2 +index 2a2b577..b0b46d7 100644 +--- a/testdata/testoutput2 ++++ b/testdata/testoutput2 +@@ -12515,4 +12515,6 @@ No match + End + ------------------------------------------------------------------ + ++/(?(?J)(?1(111111)11|)1|1|)(?()1)/ ++ + /-- End of testinput2 --/ +-- +2.5.5 + diff --git a/SOURCES/pcre-8.32-Fix-forward-search-in-JIT-when-link-size-is-3-or-gre.patch b/SOURCES/pcre-8.32-Fix-forward-search-in-JIT-when-link-size-is-3-or-gre.patch new file mode 100644 index 0000000..5998612 --- /dev/null +++ b/SOURCES/pcre-8.32-Fix-forward-search-in-JIT-when-link-size-is-3-or-gre.patch @@ -0,0 +1,50 @@ +From 693f81d9b37934fdb3a0b1de6d06cacbecaffb63 Mon Sep 17 00:00:00 2001 +From: zherczeg +Date: Fri, 18 Jan 2013 08:20:44 +0000 +Subject: [PATCH] Fix forward search in JIT when link size is 3 or greater. + +git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1239 2f5784b3-3f2a-0410-8824-cb99058d5e15 + +Petr Pisar: Ported to 8.32. + +diff --git a/pcre_jit_compile.c b/pcre_jit_compile.c +index cc9f097..3b83340 100644 +--- a/pcre_jit_compile.c ++++ b/pcre_jit_compile.c +@@ -2573,7 +2573,7 @@ DEFINE_COMPILER; + struct sljit_label *start; + struct sljit_jump *quit; + pcre_uint32 chars[MAX_N_CHARS * 2]; +-pcre_uchar *cc = common->start + 1 + IMM2_SIZE; ++pcre_uchar *cc = common->start + 1 + LINK_SIZE; + int location = 0; + pcre_int32 len, c, bit, caseless; + int must_stop; +diff --git a/testdata/testinput12 b/testdata/testinput12 +index 7deba3c..92c9603 100644 +--- a/testdata/testinput12 ++++ b/testdata/testinput12 +@@ -86,4 +86,7 @@ and a couple of things that are different with JIT. --/ + + /.?(*THEN)/S!+I + ++/^12345678abcd/mS++ ++ 12345678abcd ++ + /-- End of testinput12 --/ +diff --git a/testdata/testoutput12 b/testdata/testoutput12 +index 559f48d..f3c0230 100644 +--- a/testdata/testoutput12 ++++ b/testdata/testoutput12 +@@ -178,4 +178,8 @@ Subject length lower bound = -1 + No set of starting bytes + JIT study was not successful + ++/^12345678abcd/mS++ ++ 12345678abcd ++ 0: 12345678abcd (JIT) ++ + /-- End of testinput12 --/ +-- +1.8.1 + diff --git a/SOURCES/pcre-8.32-Fix-group-empty-match-bug.patch b/SOURCES/pcre-8.32-Fix-group-empty-match-bug.patch new file mode 100644 index 0000000..3f8cdd6 --- /dev/null +++ b/SOURCES/pcre-8.32-Fix-group-empty-match-bug.patch @@ -0,0 +1,79 @@ +From 17a94e8c64fac96d8c95403f80b7ed61e8c8ff77 Mon Sep 17 00:00:00 2001 +From: ph10 +Date: Tue, 9 Jun 2015 17:45:25 +0000 +Subject: [PATCH] Fix group empty match bug. +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Ported to 8.32: + +commit 382ca2bb4ffd423aceab05c4b1c1885cd3a48958 +Author: ph10 +Date: Tue Jun 9 17:45:25 2015 +0000 + + Fix group empty match bug. + + git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1566 2f5784b3-3f2a-0410-8824-cb99058d5e15 + +Signed-off-by: Petr Písař +--- + pcre_compile.c | 2 +- + testdata/testinput2 | 2 ++ + testdata/testoutput2 | 18 ++++++++++++++++++ + 3 files changed, 21 insertions(+), 1 deletion(-) + +diff --git a/pcre_compile.c b/pcre_compile.c +index 7d203eb..d570447 100644 +--- a/pcre_compile.c ++++ b/pcre_compile.c +@@ -2477,7 +2477,7 @@ for (code = first_significant_code(code + PRIV(OP_lengths)[*code], TRUE); + if (c == OP_BRA || c == OP_BRAPOS || + c == OP_CBRA || c == OP_CBRAPOS || + c == OP_ONCE || c == OP_ONCE_NC || +- c == OP_COND) ++ c == OP_COND || c == OP_SCOND) + { + BOOL empty_branch; + if (GET(code, 1) == 0) return TRUE; /* Hit unclosed bracket */ +diff --git a/testdata/testinput2 b/testdata/testinput2 +index 1b83cf2..fc6fe2f 100644 +--- a/testdata/testinput2 ++++ b/testdata/testinput2 +@@ -3825,4 +3825,6 @@ settings of the anchored and startline bits. --/ + ".*?\h.+.\.+\R*?\xd(?i)(?=!(?=b`b`b`\`b\xa9b!)`\a`bbbbbbbbbbbbb`bbbbbbbbbbbb*R\x85bbbbbbb\C?{((?2)(?))(( + \H){8(?<=(?1){29}\xa8bbbb\x16\xd\xc6^($(? +Date: Sat, 16 May 2015 11:05:40 +0000 +Subject: [PATCH 2/3] Fix named forward reference to duplicate group number + overflow bug. +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Port to 8.32: + +commit 2fa78aa4e42bcebf2d616c4ee89c012f29dc3447 +Author: ph10 +Date: Sat May 16 11:05:40 2015 +0000 + + Fix named forward reference to duplicate group number overflow bug. + + git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1559 2f5784b3-3f2a-0410-8824-cb99058d5e15 + +Signed-off-by: Petr Písař +--- + pcre_compile.c | 8 ++++++++ + testdata/testinput1 | 3 +++ + testdata/testoutput1 | 5 +++++ + 3 files changed, 16 insertions(+) + +diff --git a/pcre_compile.c b/pcre_compile.c +index 4506e56..6777542 100644 +--- a/pcre_compile.c ++++ b/pcre_compile.c +@@ -6300,6 +6300,14 @@ for (;; ptr++) + 16-bit data item. */ + + *lengthptr += IMM2_SIZE; ++ ++ /* If this is a forward reference and we are within a (?|...) group, ++ the reference may end up as the number of a group which we are ++ currently inside, that is, it could be a recursive reference. In the ++ real compile this will be picked up and the reference wrapped with ++ OP_ONCE to make it atomic, so we must space in case this occurs. */ ++ ++ if (recno == 0) *lengthptr += 2 + 2*LINK_SIZE; + } + + /* In the real compile, seek the name in the table. We check the name +diff --git a/testdata/testinput1 b/testdata/testinput1 +index c45e1ba..9f513f6 100644 +--- a/testdata/testinput1 ++++ b/testdata/testinput1 +@@ -5327,4 +5327,7 @@ name were given. ---/ + /(?:((abcd))|(((?:(?:(?:(?:abc|(?:abcdef))))b)abcdefghi)abc)|((*ACCEPT)))/ + 1234abcd + ++"(?|(\k'Pm')|(?'Pm'))" ++ abcd ++ + /-- End of testinput1 --/ +diff --git a/testdata/testoutput1 b/testdata/testoutput1 +index f0eae49..0fbff41 100644 +--- a/testdata/testoutput1 ++++ b/testdata/testoutput1 +@@ -8834,4 +8834,9 @@ No match + 4: + 5: + ++"(?|(\k'Pm')|(?'Pm'))" ++ abcd ++ 0: ++ 1: ++ + /-- End of testinput1 --/ +-- +2.5.5 + diff --git a/SOURCES/pcre-8.32-Fix-run-for-ever-bug-for-deeply-nested-sequences.patch b/SOURCES/pcre-8.32-Fix-run-for-ever-bug-for-deeply-nested-sequences.patch new file mode 100644 index 0000000..82a5bff --- /dev/null +++ b/SOURCES/pcre-8.32-Fix-run-for-ever-bug-for-deeply-nested-sequences.patch @@ -0,0 +1,80 @@ +From 9febe70a64d1669ec0151e51149af1e66bc04b5f Mon Sep 17 00:00:00 2001 +From: ph10 +Date: Tue, 21 Jul 2015 13:47:22 +0000 +Subject: [PATCH] Fix "run for ever" bug for deeply nested [: sequences. +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Ported to 8.32: + +commit 485a930d11bfd1ba8c292fe14976f0a3e12c2b93 +Author: ph10 +Date: Tue Jul 21 13:47:22 2015 +0000 + + Fix "run for ever" bug for deeply nested [: sequences. + + git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1579 2f5784b3-3f2a-0410-8824-cb99058d5e15 + +Signed-off-by: Petr Písař +--- + pcre_compile.c | 17 +++++------------ + testdata/testinput2 | 2 ++ + testdata/testoutput2 | 3 +++ + 3 files changed, 10 insertions(+), 12 deletions(-) + +diff --git a/pcre_compile.c b/pcre_compile.c +index aa7f4d6..8eb4b0f 100644 +--- a/pcre_compile.c ++++ b/pcre_compile.c +@@ -2780,19 +2780,12 @@ for (++ptr; *ptr != CHAR_NULL; ptr++) + { + if (*ptr == CHAR_BACKSLASH && ptr[1] == CHAR_RIGHT_SQUARE_BRACKET) + ptr++; +- else if (*ptr == CHAR_RIGHT_SQUARE_BRACKET) return FALSE; +- else ++ else if ((*ptr == CHAR_LEFT_SQUARE_BRACKET && ptr[1] == terminator) || ++ *ptr == CHAR_RIGHT_SQUARE_BRACKET) return FALSE; ++ else if (*ptr == terminator && ptr[1] == CHAR_RIGHT_SQUARE_BRACKET) + { +- if (*ptr == terminator && ptr[1] == CHAR_RIGHT_SQUARE_BRACKET) +- { +- *endptr = ptr; +- return TRUE; +- } +- if (*ptr == CHAR_LEFT_SQUARE_BRACKET && +- (ptr[1] == CHAR_COLON || ptr[1] == CHAR_DOT || +- ptr[1] == CHAR_EQUALS_SIGN) && +- check_posix_syntax(ptr, endptr)) +- return FALSE; ++ *endptr = ptr; ++ return TRUE; + } + } + return FALSE; +diff --git a/testdata/testinput2 b/testdata/testinput2 +index 2f460dc..61535de 100644 +--- a/testdata/testinput2 ++++ b/testdata/testinput2 +@@ -3829,4 +3829,6 @@ settings of the anchored and startline bits. --/ + + /(?=di(?<=(?1))|(?=(.))))/ + ++"[[[.\xe8Nq\xffq\xff\xe0\x2|||::Nq\xffq\xff\xe0\x6\x2|||::[[[:[::::::[[[[[::::::::[:[[[:[:::[[[[[[[[[[[[:::::::::::::::::[[.\xe8Nq\xffq\xff\xe0\x2|||::Nq\xffq\xff\xe0\x6\x2|||::[[[:[::::::[[[[[::::::::[:[[[:[:::[[[[[[[[[[[[[[:::E[[[:[:[[:[:::[[:::E[[[:[:[[:'[:::::E[[[:[::::::[[[:[[[[[[[::E[[[:[::::::[[[:[[[[[[[[:[[::[::::[[:::::::[[:[[[[[[[:[[::[:[[:[~" ++ + /-- End of testinput2 --/ +diff --git a/testdata/testoutput2 b/testdata/testoutput2 +index 3c09fdf..ab1b2ca 100644 +--- a/testdata/testoutput2 ++++ b/testdata/testoutput2 +@@ -12543,4 +12543,7 @@ No match + /(?=di(?<=(?1))|(?=(.))))/ + Failed: unmatched parentheses at offset 23 + ++"[[[.\xe8Nq\xffq\xff\xe0\x2|||::Nq\xffq\xff\xe0\x6\x2|||::[[[:[::::::[[[[[::::::::[:[[[:[:::[[[[[[[[[[[[:::::::::::::::::[[.\xe8Nq\xffq\xff\xe0\x2|||::Nq\xffq\xff\xe0\x6\x2|||::[[[:[::::::[[[[[::::::::[:[[[:[:::[[[[[[[[[[[[[[:::E[[[:[:[[:[:::[[:::E[[[:[:[[:'[:::::E[[[:[::::::[[[:[[[[[[[::E[[[:[::::::[[[:[[[[[[[[:[[::[::::[[:::::::[[:[[[[[[[:[[::[:[[:[~" ++Failed: missing terminating ] for character class at offset 353 ++ + /-- End of testinput2 --/ +-- +2.5.5 + diff --git a/SOURCES/pcre-8.32-Fix-two-buffer-over-read-issues-in-16-and-32-bit-mod.patch b/SOURCES/pcre-8.32-Fix-two-buffer-over-read-issues-in-16-and-32-bit-mod.patch new file mode 100644 index 0000000..39f1a54 --- /dev/null +++ b/SOURCES/pcre-8.32-Fix-two-buffer-over-read-issues-in-16-and-32-bit-mod.patch @@ -0,0 +1,107 @@ +From be692806851f4883b87fc814adc80ab1bb46455e Mon Sep 17 00:00:00 2001 +From: zherczeg +Date: Sat, 26 Jan 2013 17:51:43 +0000 +Subject: [PATCH] Fix two buffer over read issues in 16 and 32 bit modes. + Affects JIT only. + +git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1242 2f5784b3-3f2a-0410-8824-cb99058d5e15 + +Petr Pisar: Ported to 8.32. + +diff --git a/pcre_jit_compile.c b/pcre_jit_compile.c +index 3b83340..7dfe558 100644 +--- a/pcre_jit_compile.c ++++ b/pcre_jit_compile.c +@@ -2696,10 +2696,10 @@ if (firstline) + { + SLJIT_ASSERT(common->first_line_end != 0); + OP1(SLJIT_MOV, TMP3, 0, STR_END, 0); +- OP2(SLJIT_SUB, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, SLJIT_IMM, (location >> 1) - 1); ++ OP2(SLJIT_SUB, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, SLJIT_IMM, IN_UCHARS((location >> 1) - 1)); + } + else +- OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, (location >> 1) - 1); ++ OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS((location >> 1) - 1)); + + start = LABEL(); + quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); +@@ -2728,7 +2728,7 @@ JUMPHERE(quit); + if (firstline) + OP1(SLJIT_MOV, STR_END, 0, TMP3, 0); + else +- OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, (location >> 1) - 1); ++ OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS((location >> 1) - 1)); + return TRUE; + } + +@@ -3577,7 +3577,7 @@ do + #endif + + context->length -= IN_UCHARS(1); +-#if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED ++#if (defined SLJIT_UNALIGNED && SLJIT_UNALIGNED) && (defined COMPILE_PCRE8 || defined COMPILE_PCRE16) + + /* Unaligned read is supported. */ + if (othercasebit != 0 && othercasechar == cc) +@@ -3594,27 +3594,18 @@ do + + #if defined COMPILE_PCRE8 + if (context->ucharptr >= 4 || context->length == 0 || (context->ucharptr == 2 && context->length == 1)) +-#elif defined COMPILE_PCRE16 ++#else + if (context->ucharptr >= 2 || context->length == 0) +-#elif defined COMPILE_PCRE32 +- if (1 /* context->ucharptr >= 1 || context->length == 0 */) + #endif + { +-#if defined COMPILE_PCRE8 || defined COMPILE_PCRE16 + if (context->length >= 4) + OP1(SLJIT_MOV_SI, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length); +-#if defined COMPILE_PCRE8 + else if (context->length >= 2) + OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length); ++#if defined COMPILE_PCRE8 + else if (context->length >= 1) + OP1(SLJIT_MOV_UB, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length); +-#elif defined COMPILE_PCRE16 +- else if (context->length >= 2) +- OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length); +-#endif /* COMPILE_PCRE[8|16] */ +-#elif defined COMPILE_PCRE32 +- OP1(MOV_UCHAR, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length); +-#endif /* COMPILE_PCRE[8|16|32] */ ++#endif /* COMPILE_PCRE8 */ + context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1; + + switch(context->ucharptr) +@@ -3625,7 +3616,6 @@ do + add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint)); + break; + +-#if defined COMPILE_PCRE8 || defined COMPILE_PCRE16 + case 2 / sizeof(pcre_uchar): + if (context->oc.asushort != 0) + OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asushort); +@@ -3640,8 +3630,6 @@ do + break; + #endif + +-#endif /* COMPILE_PCRE[8|16] */ +- + default: + SLJIT_ASSERT_STOP(); + break; +@@ -3651,8 +3639,8 @@ do + + #else + +- /* Unaligned read is unsupported. */ +- if (context->length > 0) ++ /* Unaligned read is unsupported or in 32 bit mode. */ ++ if (context->length >= 1) + OP1(MOV_UCHAR, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length); + + context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1; +-- +1.8.1 + diff --git a/SOURCES/pcre-8.32-Fix-workspace-overflow-for-ACCEPT-with-deeply-nested.patch b/SOURCES/pcre-8.32-Fix-workspace-overflow-for-ACCEPT-with-deeply-nested.patch new file mode 100644 index 0000000..4ac57a7 --- /dev/null +++ b/SOURCES/pcre-8.32-Fix-workspace-overflow-for-ACCEPT-with-deeply-nested.patch @@ -0,0 +1,153 @@ +From 01611089a2be24b740e67d5fac8d7b44b2330302 Mon Sep 17 00:00:00 2001 +From: ph10 +Date: Wed, 10 Feb 2016 19:13:17 +0000 +Subject: [PATCH] Fix workspace overflow for (*ACCEPT) with deeply nested + parentheses. +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Ported to 8.32: + +commit 943a5105b9fe2842851003f692c7077a6cdbeefe +Author: ph10 +Date: Wed Feb 10 19:13:17 2016 +0000 + + Fix workspace overflow for (*ACCEPT) with deeply nested parentheses. + + git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1631 2f5784b3-3f2a-0410-8824-cb99058d5e15 + +Signed-off-by: Petr Písař +--- + pcre_compile.c | 21 ++++++++++++++++++--- + pcre_internal.h | 2 +- + pcreposix.c | 3 ++- + testdata/testinput11 | 2 ++ + testdata/testoutput11-16 | 3 +++ + testdata/testoutput11-32 | 3 +++ + testdata/testoutput11-8 | 3 +++ + 7 files changed, 32 insertions(+), 5 deletions(-) + +diff --git a/pcre_compile.c b/pcre_compile.c +index 8eb4b0f..746dc70 100644 +--- a/pcre_compile.c ++++ b/pcre_compile.c +@@ -508,6 +508,7 @@ static const char error_texts[] = + "name is too long in (*MARK), (*PRUNE), (*SKIP), or (*THEN)\0" + "character value in \\u.... sequence is too large\0" + "invalid UTF-32 string\0" ++ "regular expression is too complicated\0" + ; + + /* Table to identify digits and hex digits. This is used when compiling +@@ -3881,7 +3882,8 @@ for (;; ptr++) + if (code > cd->start_workspace + cd->workspace_size - + WORK_SIZE_SAFETY_MARGIN) /* Check for overrun */ + { +- *errorcodeptr = ERR52; ++ *errorcodeptr = (code >= cd->start_workspace + cd->workspace_size)? ++ ERR52 : ERR87; + goto FAILED; + } + +@@ -5701,8 +5703,21 @@ for (;; ptr++) + cd->had_accept = TRUE; + for (oc = cd->open_caps; oc != NULL; oc = oc->next) + { +- *code++ = OP_CLOSE; +- PUT2INC(code, 0, oc->number); ++ if (lengthptr != NULL) ++ { ++#ifdef COMPILE_PCRE8 ++ *lengthptr += 1 + IMM2_SIZE; ++#elif defined COMPILE_PCRE16 ++ *lengthptr += 2 + IMM2_SIZE; ++#elif defined COMPILE_PCRE32 ++ *lengthptr += 4 + IMM2_SIZE; ++#endif ++ } ++ else ++ { ++ *code++ = OP_CLOSE; ++ PUT2INC(code, 0, oc->number); ++ } + } + setverb = *code++ = + (cd->assert_depth > 0)? OP_ASSERT_ACCEPT : OP_ACCEPT; +diff --git a/pcre_internal.h b/pcre_internal.h +index 536b3d8..157de08 100644 +--- a/pcre_internal.h ++++ b/pcre_internal.h +@@ -2270,7 +2270,7 @@ enum { ERR0, ERR1, ERR2, ERR3, ERR4, ERR5, ERR6, ERR7, ERR8, ERR9, + ERR40, ERR41, ERR42, ERR43, ERR44, ERR45, ERR46, ERR47, ERR48, ERR49, + ERR50, ERR51, ERR52, ERR53, ERR54, ERR55, ERR56, ERR57, ERR58, ERR59, + ERR60, ERR61, ERR62, ERR63, ERR64, ERR65, ERR66, ERR67, ERR68, ERR69, +- ERR70, ERR71, ERR72, ERR73, ERR74, ERR75, ERR76, ERR77, ERRCOUNT }; ++ ERR70, ERR71, ERR72, ERR73, ERR74, ERR75, ERR76, ERR77, ERR87, ERRCOUNT }; + + /* JIT compiling modes. The function list is indexed by them. */ + enum { JIT_COMPILE, JIT_PARTIAL_SOFT_COMPILE, JIT_PARTIAL_HARD_COMPILE, +diff --git a/pcreposix.c b/pcreposix.c +index 15195c0..700676c 100644 +--- a/pcreposix.c ++++ b/pcreposix.c +@@ -162,7 +162,8 @@ static const int eint[] = { + /* 75 */ + REG_BADPAT, /* overlong MARK name */ + REG_BADPAT, /* character value in \u.... sequence is too large */ +- REG_BADPAT /* invalid UTF-32 string (should not occur) */ ++ REG_BADPAT, /* invalid UTF-32 string (should not occur) */ ++ REG_BADPAT /* pattern too complicated */ + }; + + /* Table of texts corresponding to POSIX error codes */ +diff --git a/testdata/testinput11 b/testdata/testinput11 +index 7e8e542..014c722 100644 +--- a/testdata/testinput11 ++++ b/testdata/testinput11 +@@ -134,4 +134,6 @@ is required for these tests. --/ + + /(((a\2)|(a*)\g<-1>))*a?/B + ++/([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00](*ACCEPT)/ ++ + /-- End of testinput11 --/ +diff --git a/testdata/testoutput11-16 b/testdata/testoutput11-16 +index 3cb3049..6ae9e2f 100644 +--- a/testdata/testoutput11-16 ++++ b/testdata/testoutput11-16 +@@ -734,4 +734,7 @@ Memory allocation (code space): 14 + 41 End + ------------------------------------------------------------------ + ++/([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00](*ACCEPT)/ ++Failed: regular expression is too complicated at offset 490 ++ + /-- End of testinput11 --/ +diff --git a/testdata/testoutput11-32 b/testdata/testoutput11-32 +index 10dee82..124e3d1 100644 +--- a/testdata/testoutput11-32 ++++ b/testdata/testoutput11-32 +@@ -734,4 +734,7 @@ Memory allocation (code space): 28 + 41 End + ------------------------------------------------------------------ + ++/([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00](*ACCEPT)/ ++Failed: missing ) at offset 509 ++ + /-- End of testinput11 --/ +diff --git a/testdata/testoutput11-8 b/testdata/testoutput11-8 +index a1bd60a..36f6e64 100644 +--- a/testdata/testoutput11-8 ++++ b/testdata/testoutput11-8 +@@ -734,4 +734,7 @@ Memory allocation (code space): 10 + 60 End + ------------------------------------------------------------------ + ++/([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00](*ACCEPT)/ ++Failed: missing ) at offset 509 ++ + /-- End of testinput11 --/ +-- +2.5.5 + diff --git a/SOURCES/pcre-8.32-Fix-zero-repeat-assertion-condition-bug.patch b/SOURCES/pcre-8.32-Fix-zero-repeat-assertion-condition-bug.patch new file mode 100644 index 0000000..6a37db8 --- /dev/null +++ b/SOURCES/pcre-8.32-Fix-zero-repeat-assertion-condition-bug.patch @@ -0,0 +1,72 @@ +From e2d6b400b13cde3cffc1933208399c223459b3ba Mon Sep 17 00:00:00 2001 +From: ph10 +Date: Wed, 19 Nov 2014 20:57:13 +0000 +Subject: [PATCH] Fix zero-repeat assertion condition bug. +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1513 2f5784b3-3f2a-0410-8824-cb99058d5e15 +Signed-off-by: Petr Písař + +Petr Pisar: Ported to 8.32. + +Signed-off-by: Petr Písař +--- + pcre_exec.c | 4 +++- + testdata/testinput2 | 6 ++++++ + testdata/testoutput2 | 10 ++++++++++ + 3 files changed, 19 insertions(+), 1 deletion(-) + +diff --git a/pcre_exec.c b/pcre_exec.c +index 05d0e52..a5326dc 100644 +--- a/pcre_exec.c ++++ b/pcre_exec.c +@@ -1459,7 +1459,9 @@ for (;;) + if (md->end_offset_top > offset_top) + offset_top = md->end_offset_top; /* Captures may have happened */ + condition = TRUE; +- ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2); ++ ecode += 1 + LINK_SIZE; ++ if (*ecode == OP_BRAZERO) ecode++; ++ ecode += GET(ecode, 1); + while (*ecode == OP_ALT) ecode += GET(ecode, 1); + } + +diff --git a/testdata/testinput2 b/testdata/testinput2 +index 9670104..0835a98 100644 +--- a/testdata/testinput2 ++++ b/testdata/testinput2 +@@ -3810,4 +3810,10 @@ settings of the anchored and startline bits. --/ + + /.?/S!I + ++"((?=(?(?=(?(?=(?(?=())))*)))))" ++ a ++ ++"(?(?=)?==)(((((((((?=)))))))))" ++ a ++ + /-- End of testinput2 --/ +diff --git a/testdata/testoutput2 b/testdata/testoutput2 +index e9cddf8..66c914f 100644 +--- a/testdata/testoutput2 ++++ b/testdata/testoutput2 +@@ -12481,4 +12481,14 @@ No need char + Subject length lower bound = -1 + No set of starting bytes + ++"((?=(?(?=(?(?=(?(?=())))*)))))" ++ a ++ 0: ++ 1: ++ 2: ++ ++"(?(?=)?==)(((((((((?=)))))))))" ++ a ++No match ++ + /-- End of testinput2 --/ +-- +1.9.3 + diff --git a/SOURCES/pcre-8.32-Update-POSIX-class-handling-in-UCP-mode.patch b/SOURCES/pcre-8.32-Update-POSIX-class-handling-in-UCP-mode.patch new file mode 100644 index 0000000..a17cc0f --- /dev/null +++ b/SOURCES/pcre-8.32-Update-POSIX-class-handling-in-UCP-mode.patch @@ -0,0 +1,1122 @@ +From e74dcd1eec9227fe23c06de2ff109e48695fd879 Mon Sep 17 00:00:00 2001 +From: ph10 +Date: Sat, 2 Nov 2013 18:29:05 +0000 +Subject: [PATCH 1/2] Update POSIX class handling in UCP mode. +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Petr Pisar: Ported to 8.32: + +commit fa3832825e3fe0d49f93658882775cdd6c26129e +Author: ph10 +Date: Sat Nov 2 18:29:05 2013 +0000 + + Update POSIX class handling in UCP mode. + + git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1387 2f5784b3-3f2a-0410-8824-cb99058d5e15 + +It also adjusts some test 7 outputs because 8.32 does not contain +auto-possessification improvement from + +commit 5f42224005b7d9a503903e3342ec7ada75590b07 +Author: ph10 +Date: Tue Oct 1 16:54:40 2013 +0000 + + Refactored auto-possessification code. + + git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1363 2f5784b3-3f2a-0410-8824-cb99058d5e15 + +Signed-off-by: Petr Písař +--- + doc/pcrepattern.3 | 37 +++++-- + pcre_compile.c | 75 +++++++++++--- + pcre_internal.h | 16 ++- + pcre_printint.c | 59 ++++++++--- + pcre_xclass.c | 63 ++++++++++-- + testdata/testinput6 | 146 ++++++++++++++++++++++++++ + testdata/testinput7 | 10 ++ + testdata/testoutput6 | 286 ++++++++++++++++++++++++++++++++++++++++++++++++++- + testdata/testoutput7 | 117 ++++++++++++++++++++- + 9 files changed, 752 insertions(+), 57 deletions(-) + +diff --git a/doc/pcrepattern.3 b/doc/pcrepattern.3 +index c9c7b45..f638846 100644 +--- a/doc/pcrepattern.3 ++++ b/doc/pcrepattern.3 +@@ -861,8 +861,9 @@ the "mark" property always have the "extend" grapheme breaking property. + .sp + As well as the standard Unicode properties described above, PCRE supports four + more that make it possible to convert traditional escape sequences such as \ew +-and \es and POSIX character classes to use Unicode properties. PCRE uses these +-non-standard, non-Perl properties internally when PCRE_UCP is set. They are: ++and \es to use Unicode properties. PCRE uses these non-standard, non-Perl ++properties internally when PCRE_UCP is set. However, they may also be used ++explicitly. These properties are: + .sp + Xan Any alphanumeric character + Xps Any POSIX space character +@@ -873,6 +874,7 @@ Xan matches characters that have either the L (letter) or the N (number) + property. Xps matches the characters tab, linefeed, vertical tab, form feed, or + carriage return, and any other character that has the Z (separator) property. + Xsp is the same as Xps, except that vertical tab is excluded. Xwd matches the ++:qa + same characters as Xan, plus underscore. + . + . +@@ -1258,8 +1260,8 @@ supported, and an error is given if they are encountered. + By default, in UTF modes, characters with values greater than 128 do not match + any of the POSIX character classes. However, if the PCRE_UCP option is passed + to \fBpcre_compile()\fP, some of the classes are changed so that Unicode +-character properties are used. This is achieved by replacing the POSIX classes +-by other sequences, as follows: ++character properties are used. This is achieved by replacing certain POSIX ++classes by other sequences, as follows: + .sp + [:alnum:] becomes \ep{Xan} + [:alpha:] becomes \ep{L} +@@ -1270,9 +1272,30 @@ by other sequences, as follows: + [:upper:] becomes \ep{Lu} + [:word:] becomes \ep{Xwd} + .sp +-Negated versions, such as [:^alpha:] use \eP instead of \ep. The other POSIX +-classes are unchanged, and match only characters with code points less than +-128. ++Negated versions, such as [:^alpha:] use \eP instead of \ep. Three other POSIX ++classes are handled specially in UCP mode: ++.TP 10 ++[:graph:] ++This matches characters that have glyphs that mark the page when printed. In ++Unicode property terms, it matches all characters with the L, M, N, P, S, or Cf ++properties, except for: ++.sp ++ U+061C Arabic Letter Mark ++ U+180E Mongolian Vowel Separator ++ U+2066 - U+2069 Various "isolate"s ++.sp ++.TP 10 ++[:print:] ++This matches the same characters as [:graph:] plus space characters that are ++not controls, that is, characters with the Zs property. ++.TP 10 ++[:punct:] ++This matches all characters that have the Unicode P (punctuation) property, ++plus those characters whose code points are less than 128 that have the S ++(Symbol) property. ++.P ++The other POSIX classes are unchanged, and match only characters with code ++points less than 128. + . + . + .SH "VERTICAL BAR" +diff --git a/pcre_compile.c b/pcre_compile.c +index 746dc70..3c75218 100644 +--- a/pcre_compile.c ++++ b/pcre_compile.c +@@ -257,7 +257,8 @@ static const int verbcount = sizeof(verbs)/sizeof(verbitem); + now all in a single string, to reduce the number of relocations when a shared + library is dynamically loaded. The list of lengths is terminated by a zero + length entry. The first three must be alpha, lower, upper, as this is assumed +-for handling case independence. */ ++for handling case independence. The indices for graph, print, and punct are ++needed, so identify them. */ + + static const char posix_names[] = + STRING_alpha0 STRING_lower0 STRING_upper0 STRING_alnum0 +@@ -268,6 +269,11 @@ static const char posix_names[] = + static const pcre_uint8 posix_name_lengths[] = { + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 4, 6, 0 }; + ++#define PC_GRAPH 8 ++#define PC_PRINT 9 ++#define PC_PUNCT 10 ++ ++ + /* Table of class bit maps for each POSIX class. Each class is formed from a + base map, with an optional addition or removal of another map. Then, for some + classes, there is some additional tweaking: for [:blank:] the vertical space +@@ -295,9 +301,8 @@ static const int posix_class_maps[] = { + cbit_xdigit,-1, 0 /* xdigit */ + }; + +-/* Table of substitutes for \d etc when PCRE_UCP is set. The POSIX class +-substitutes must be in the order of the names, defined above, and there are +-both positive and negative cases. NULL means no substitute. */ ++/* Table of substitutes for \d etc when PCRE_UCP is set. They are replaced by ++Unicode property escapes. */ + + #ifdef SUPPORT_UCP + static const pcre_uchar string_PNd[] = { +@@ -322,12 +327,18 @@ static const pcre_uchar string_pXwd[] = { + static const pcre_uchar *substitutes[] = { + string_PNd, /* \D */ + string_pNd, /* \d */ +- string_PXsp, /* \S */ /* NOTE: Xsp is Perl space */ +- string_pXsp, /* \s */ ++ string_PXsp, /* \S */ /* Xsp is Perl space, but from 8.34, Perl */ ++ string_pXsp, /* \s */ /* space and POSIX space are the same. */ + string_PXwd, /* \W */ + string_pXwd /* \w */ + }; + ++/* The POSIX class substitutes must be in the order of the POSIX class names, ++defined above, and there are both positive and negative cases. NULL means no ++general substitute of a Unicode property escape (\p or \P). However, for some ++POSIX classes (e.g. graph, print, punct) a special property code is compiled ++directly. */ ++ + static const pcre_uchar string_pL[] = { + CHAR_BACKSLASH, CHAR_p, CHAR_LEFT_CURLY_BRACKET, + CHAR_L, CHAR_RIGHT_CURLY_BRACKET, '\0' }; +@@ -375,8 +386,8 @@ static const pcre_uchar *posix_substitutes[] = { + NULL, /* graph */ + NULL, /* print */ + NULL, /* punct */ +- string_pXps, /* space */ /* NOTE: Xps is POSIX space */ +- string_pXwd, /* word */ ++ string_pXps, /* space */ /* Xps is POSIX space, but from 8.34 */ ++ string_pXwd, /* word */ /* Perl and POSIX space are the same */ + NULL, /* xdigit */ + /* Negated cases */ + string_PL, /* ^alpha */ +@@ -390,8 +401,8 @@ static const pcre_uchar *posix_substitutes[] = { + NULL, /* ^graph */ + NULL, /* ^print */ + NULL, /* ^punct */ +- string_PXps, /* ^space */ /* NOTE: Xps is POSIX space */ +- string_PXwd, /* ^word */ ++ string_PXps, /* ^space */ /* Xps is POSIX space, but from 8.34 */ ++ string_PXwd, /* ^word */ /* Perl and POSIX space are the same */ + NULL /* ^xdigit */ + }; + #define POSIX_SUBSIZE (sizeof(posix_substitutes) / sizeof(pcre_uchar *)) +@@ -4258,24 +4269,58 @@ for (;; ptr++) + posix_class = 0; + + /* When PCRE_UCP is set, some of the POSIX classes are converted to +- different escape sequences that use Unicode properties. */ ++ different escape sequences that use Unicode properties \p or \P. Others ++ that are not available via \p or \P generate XCL_PROP/XCL_NOTPROP ++ directly. */ + + #ifdef SUPPORT_UCP + if ((options & PCRE_UCP) != 0) + { ++ unsigned int ptype = 0; + int pc = posix_class + ((local_negate)? POSIX_SUBSIZE/2 : 0); ++ ++ /* The posix_substitutes table specifies which POSIX classes can be ++ converted to \p or \P items. */ ++ + if (posix_substitutes[pc] != NULL) + { + nestptr = tempptr + 1; + ptr = posix_substitutes[pc] - 1; + continue; + } ++ ++ /* There are three other classes that generate special property calls ++ that are recognized only in an XCLASS. */ ++ ++ else switch(posix_class) ++ { ++ case PC_GRAPH: ++ ptype = PT_PXGRAPH; ++ /* Fall through */ ++ case PC_PRINT: ++ if (ptype == 0) ptype = PT_PXPRINT; ++ /* Fall through */ ++ case PC_PUNCT: ++ if (ptype == 0) ptype = PT_PXPUNCT; ++ *class_uchardata++ = local_negate? XCL_NOTPROP : XCL_PROP; ++ *class_uchardata++ = ptype; ++ *class_uchardata++ = 0; ++ ptr = tempptr + 1; ++ continue; ++ ++ /* For all other POSIX classes, no special action is taken in UCP ++ mode. Fall through to the non_UCP case. */ ++ ++ default: ++ break; ++ } + } + #endif +- /* In the non-UCP case, we build the bit map for the POSIX class in a +- chunk of local store because we may be adding and subtracting from it, +- and we don't want to subtract bits that may be in the main map already. +- At the end we or the result into the bit map that is being built. */ ++ /* In the non-UCP case, or when UCP makes no difference, we build the ++ bit map for the POSIX class in a chunk of local store because we may be ++ adding and subtracting from it, and we don't want to subtract bits that ++ may be in the main map already. At the end we or the result into the ++ bit map that is being built. */ + + posix_class *= 3; + +diff --git a/pcre_internal.h b/pcre_internal.h +index 157de08..389848f 100644 +--- a/pcre_internal.h ++++ b/pcre_internal.h +@@ -1836,6 +1836,16 @@ only. */ + #define PT_WORD 8 /* Word - L plus N plus underscore */ + #define PT_CLIST 9 /* Pseudo-property: match character list */ + ++/* The following special properties are used only in XCLASS items, when POSIX ++classes are specified and PCRE_UCP is set - in other words, for Unicode ++handling of these classes. They are not available via the \p or \P escapes like ++those in the above list, and so they do not take part in the autopossessifying ++table. */ ++ ++#define PT_PXGRAPH 11 /* [:graph:] - characters that mark the paper */ ++#define PT_PXPRINT 12 /* [:print:] - [:graph:] plus non-control spaces */ ++#define PT_PXPUNCT 13 /* [:punct:] - punctuation characters */ ++ + /* Flag bits and data types for the extended class (OP_XCLASS) for classes that + contain characters with values greater than 255. */ + +@@ -1849,9 +1859,9 @@ contain characters with values greater than 255. */ + #define XCL_NOTPROP 4 /* Unicode inverted property (ditto) */ + + /* These are escaped items that aren't just an encoding of a particular data +-value such as \n. They must have non-zero values, as check_escape() returns +-0 for a data character. Also, they must appear in the same order as in the opcode +-definitions below, up to ESC_z. There's a dummy for OP_ALLANY because it ++value such as \n. They must have non-zero values, as check_escape() returns 0 ++for a data character. Also, they must appear in the same order as in the ++opcode definitions below, up to ESC_z. There's a dummy for OP_ALLANY because it + corresponds to "." in DOTALL mode rather than an escape sequence. It is also + used for [^] in JavaScript compatibility mode, and for \C in non-utf mode. In + non-DOTALL mode, "." behaves like \N. +diff --git a/pcre_printint.c b/pcre_printint.c +index 10b5754..c6dcbe6 100644 +--- a/pcre_printint.c ++++ b/pcre_printint.c +@@ -608,9 +608,9 @@ for(;;) + print_prop(f, code, " ", ""); + break; + +- /* OP_XCLASS can only occur in UTF or PCRE16 modes. However, there's no +- harm in having this code always here, and it makes it less messy without +- all those #ifdefs. */ ++ /* OP_XCLASS cannot occur in 8-bit, non-UTF mode. However, there's no harm ++ in having this code always here, and it makes it less messy without all ++ those #ifdefs. */ + + case OP_CLASS: + case OP_NCLASS: +@@ -671,27 +671,52 @@ for(;;) + pcre_uchar ch; + while ((ch = *ccode++) != XCL_END) + { +- if (ch == XCL_PROP) +- { +- unsigned int ptype = *ccode++; +- unsigned int pvalue = *ccode++; +- fprintf(f, "\\p{%s}", get_ucpname(ptype, pvalue)); +- } +- else if (ch == XCL_NOTPROP) +- { +- unsigned int ptype = *ccode++; +- unsigned int pvalue = *ccode++; +- fprintf(f, "\\P{%s}", get_ucpname(ptype, pvalue)); +- } +- else ++ BOOL not = FALSE; ++ const char *notch = ""; ++ ++ switch(ch) + { ++ case XCL_NOTPROP: ++ not = TRUE; ++ notch = "^"; ++ /* Fall through */ ++ ++ case XCL_PROP: ++ { ++ unsigned int ptype = *ccode++; ++ unsigned int pvalue = *ccode++; ++ ++ switch(ptype) ++ { ++ case PT_PXGRAPH: ++ fprintf(f, "[:%sgraph:]", notch); ++ break; ++ ++ case PT_PXPRINT: ++ fprintf(f, "[:%sprint:]", notch); ++ break; ++ ++ case PT_PXPUNCT: ++ fprintf(f, "[:%spunct:]", notch); ++ break; ++ ++ default: ++ fprintf(f, "\\%c{%s}", (not? 'P':'p'), ++ get_ucpname(ptype, pvalue)); ++ break; ++ } ++ } ++ break; ++ ++ default: + ccode += 1 + print_char(f, ccode, utf); + if (ch == XCL_RANGE) + { + fprintf(f, "-"); + ccode += 1 + print_char(f, ccode, utf); + } +- } ++ break; ++ } + } + } + +diff --git a/pcre_xclass.c b/pcre_xclass.c +index fa73cd8..dd7008a 100644 +--- a/pcre_xclass.c ++++ b/pcre_xclass.c +@@ -128,57 +128,102 @@ while ((t = *data++) != XCL_END) + else /* XCL_PROP & XCL_NOTPROP */ + { + const ucd_record *prop = GET_UCD(c); ++ BOOL isprop = t == XCL_PROP; + + switch(*data) + { + case PT_ANY: +- if (t == XCL_PROP) return !negated; ++ if (isprop) return !negated; + break; + + case PT_LAMP: + if ((prop->chartype == ucp_Lu || prop->chartype == ucp_Ll || +- prop->chartype == ucp_Lt) == (t == XCL_PROP)) return !negated; ++ prop->chartype == ucp_Lt) == isprop) return !negated; + break; + + case PT_GC: +- if ((data[1] == PRIV(ucp_gentype)[prop->chartype]) == (t == XCL_PROP)) ++ if ((data[1] == PRIV(ucp_gentype)[prop->chartype]) == isprop) + return !negated; + break; + + case PT_PC: +- if ((data[1] == prop->chartype) == (t == XCL_PROP)) return !negated; ++ if ((data[1] == prop->chartype) == isprop) return !negated; + break; + + case PT_SC: +- if ((data[1] == prop->script) == (t == XCL_PROP)) return !negated; ++ if ((data[1] == prop->script) == isprop) return !negated; + break; + + case PT_ALNUM: + if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L || +- PRIV(ucp_gentype)[prop->chartype] == ucp_N) == (t == XCL_PROP)) ++ PRIV(ucp_gentype)[prop->chartype] == ucp_N) == isprop) + return !negated; + break; + + case PT_SPACE: /* Perl space */ + if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z || + c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR) +- == (t == XCL_PROP)) ++ == isprop) + return !negated; + break; + + case PT_PXSPACE: /* POSIX space */ + if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z || + c == CHAR_HT || c == CHAR_NL || c == CHAR_VT || +- c == CHAR_FF || c == CHAR_CR) == (t == XCL_PROP)) ++ c == CHAR_FF || c == CHAR_CR) == isprop) + return !negated; + break; + + case PT_WORD: + if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L || + PRIV(ucp_gentype)[prop->chartype] == ucp_N || c == CHAR_UNDERSCORE) +- == (t == XCL_PROP)) ++ == isprop) + return !negated; + break; ++ ++ /* The following three properties can occur only in an XCLASS, as there ++ is no \p or \P coding for them. */ ++ ++ /* Graphic character. Implement this as not Z (space or separator) and ++ not C (other), except for Cf (format) with a few exceptions. This seems ++ to be what Perl does. The exceptional characters are: ++ ++ U+061C Arabic Letter Mark ++ U+180E Mongolian Vowel Separator ++ U+2066 - U+2069 Various "isolate"s ++ */ ++ ++ case PT_PXGRAPH: ++ if ((PRIV(ucp_gentype)[prop->chartype] != ucp_Z && ++ (PRIV(ucp_gentype)[prop->chartype] != ucp_C || ++ (prop->chartype == ucp_Cf && ++ c != 0x061c && c != 0x180e && (c < 0x2066 || c > 0x2069)) ++ )) == isprop) ++ return !negated; ++ break; ++ ++ /* Printable character: same as graphic, with the addition of Zs, i.e. ++ not Zl and not Zp, and U+180E. */ ++ ++ case PT_PXPRINT: ++ if ((prop->chartype != ucp_Zl && ++ prop->chartype != ucp_Zp && ++ (PRIV(ucp_gentype)[prop->chartype] != ucp_C || ++ (prop->chartype == ucp_Cf && ++ c != 0x061c && (c < 0x2066 || c > 0x2069)) ++ )) == isprop) ++ return !negated; ++ break; ++ ++ /* Punctuation: all Unicode punctuation, plus ASCII characters that ++ Unicode treats as symbols rather than punctuation, for Perl ++ compatibility (these are $+<=>^`|~). */ ++ ++ case PT_PXPUNCT: ++ if ((PRIV(ucp_gentype)[prop->chartype] == ucp_P || ++ (c < 256 && PRIV(ucp_gentype)[prop->chartype] == ucp_S)) == isprop) ++ return !negated; ++ break; + + /* This should never occur, but compilers may mutter if there is no + default. */ +diff --git a/testdata/testinput6 b/testdata/testinput6 +index 219a30e..adafb89 100644 +--- a/testdata/testinput6 ++++ b/testdata/testinput6 +@@ -1319,4 +1319,150 @@ + /^s?c/mi8 + scat + ++/^[[:graph:]]+$/8W ++ Letter:ABC ++ Mark:\x{300}\x{1d172}\x{1d17b} ++ Number:9\x{660} ++ Punctuation:\x{66a},; ++ Symbol:\x{6de}<>\x{fffc} ++ Cf-property:\x{ad}\x{600}\x{601}\x{602}\x{603}\x{604}\x{6dd}\x{70f} ++ \x{200b}\x{200c}\x{200d}\x{200e}\x{200f} ++ \x{202a}\x{202b}\x{202c}\x{202d}\x{202e} ++ \x{2060}\x{2061}\x{2062}\x{2063}\x{2064} ++ \x{206a}\x{206b}\x{206c}\x{206d}\x{206e}\x{206f} ++ \x{feff} ++ \x{fff9}\x{fffa}\x{fffb} ++ \x{110bd} ++ \x{1d173}\x{1d174}\x{1d175}\x{1d176}\x{1d177}\x{1d178}\x{1d179}\x{1d17a} ++ \x{e0001} ++ \x{e0020}\x{e0030}\x{e0040}\x{e0050}\x{e0060}\x{e0070}\x{e007f} ++ ** Failers ++ \x{09} ++ \x{0a} ++ \x{1D} ++ \x{20} ++ \x{85} ++ \x{a0} ++ \x{61c} ++ \x{1680} ++ \x{180e} ++ \x{2028} ++ \x{2029} ++ \x{202f} ++ \x{2065} ++ \x{2066} ++ \x{2067} ++ \x{2068} ++ \x{2069} ++ \x{3000} ++ \x{e0002} ++ \x{e001f} ++ \x{e0080} ++ ++/^[[:print:]]+$/8W ++ Space: \x{a0} ++ \x{1680}\x{2000}\x{2001}\x{2002}\x{2003}\x{2004}\x{2005} ++ \x{2006}\x{2007}\x{2008}\x{2009}\x{200a} ++ \x{202f}\x{205f} ++ \x{3000} ++ Letter:ABC ++ Mark:\x{300}\x{1d172}\x{1d17b} ++ Number:9\x{660} ++ Punctuation:\x{66a},; ++ Symbol:\x{6de}<>\x{fffc} ++ Cf-property:\x{ad}\x{600}\x{601}\x{602}\x{603}\x{604}\x{6dd}\x{70f} ++ \x{180e} ++ \x{200b}\x{200c}\x{200d}\x{200e}\x{200f} ++ \x{202a}\x{202b}\x{202c}\x{202d}\x{202e} ++ \x{202f} ++ \x{2060}\x{2061}\x{2062}\x{2063}\x{2064} ++ \x{206a}\x{206b}\x{206c}\x{206d}\x{206e}\x{206f} ++ \x{feff} ++ \x{fff9}\x{fffa}\x{fffb} ++ \x{110bd} ++ \x{1d173}\x{1d174}\x{1d175}\x{1d176}\x{1d177}\x{1d178}\x{1d179}\x{1d17a} ++ \x{e0001} ++ \x{e0020}\x{e0030}\x{e0040}\x{e0050}\x{e0060}\x{e0070}\x{e007f} ++ ** Failers ++ \x{09} ++ \x{1D} ++ \x{85} ++ \x{61c} ++ \x{2028} ++ \x{2029} ++ \x{2065} ++ \x{2066} ++ \x{2067} ++ \x{2068} ++ \x{2069} ++ \x{e0002} ++ \x{e001f} ++ \x{e0080} ++ ++/^[[:punct:]]+$/8W ++ \$+<=>^`|~ ++ !\"#%&'()*,-./:;?@[\\]_{} ++ \x{a1}\x{a7} ++ \x{37e} ++ ** Failers ++ abcde ++ ++/^[[:^graph:]]+$/8W ++ \x{09}\x{0a}\x{1D}\x{20}\x{85}\x{a0}\x{61c}\x{1680}\x{180e} ++ \x{2028}\x{2029}\x{202f}\x{2065}\x{2066}\x{2067}\x{2068}\x{2069} ++ \x{3000}\x{e0002}\x{e001f}\x{e0080} ++ ** Failers ++ Letter:ABC ++ Mark:\x{300}\x{1d172}\x{1d17b} ++ Number:9\x{660} ++ Punctuation:\x{66a},; ++ Symbol:\x{6de}<>\x{fffc} ++ Cf-property:\x{ad}\x{600}\x{601}\x{602}\x{603}\x{604}\x{6dd}\x{70f} ++ \x{200b}\x{200c}\x{200d}\x{200e}\x{200f} ++ \x{202a}\x{202b}\x{202c}\x{202d}\x{202e} ++ \x{2060}\x{2061}\x{2062}\x{2063}\x{2064} ++ \x{206a}\x{206b}\x{206c}\x{206d}\x{206e}\x{206f} ++ \x{feff} ++ \x{fff9}\x{fffa}\x{fffb} ++ \x{110bd} ++ \x{1d173}\x{1d174}\x{1d175}\x{1d176}\x{1d177}\x{1d178}\x{1d179}\x{1d17a} ++ \x{e0001} ++ \x{e0020}\x{e0030}\x{e0040}\x{e0050}\x{e0060}\x{e0070}\x{e007f} ++ ++/^[[:^print:]]+$/8W ++ \x{09}\x{1D}\x{85}\x{61c}\x{2028}\x{2029}\x{2065}\x{2066}\x{2067} ++ \x{2068}\x{2069}\x{e0002}\x{e001f}\x{e0080} ++ ** Failers ++ Space: \x{a0} ++ \x{1680}\x{2000}\x{2001}\x{2002}\x{2003}\x{2004}\x{2005} ++ \x{2006}\x{2007}\x{2008}\x{2009}\x{200a} ++ \x{202f}\x{205f} ++ \x{3000} ++ Letter:ABC ++ Mark:\x{300}\x{1d172}\x{1d17b} ++ Number:9\x{660} ++ Punctuation:\x{66a},; ++ Symbol:\x{6de}<>\x{fffc} ++ Cf-property:\x{ad}\x{600}\x{601}\x{602}\x{603}\x{604}\x{6dd}\x{70f} ++ \x{180e} ++ \x{200b}\x{200c}\x{200d}\x{200e}\x{200f} ++ \x{202a}\x{202b}\x{202c}\x{202d}\x{202e} ++ \x{202f} ++ \x{2060}\x{2061}\x{2062}\x{2063}\x{2064} ++ \x{206a}\x{206b}\x{206c}\x{206d}\x{206e}\x{206f} ++ \x{feff} ++ \x{fff9}\x{fffa}\x{fffb} ++ \x{110bd} ++ \x{1d173}\x{1d174}\x{1d175}\x{1d176}\x{1d177}\x{1d178}\x{1d179}\x{1d17a} ++ \x{e0001} ++ \x{e0020}\x{e0030}\x{e0040}\x{e0050}\x{e0060}\x{e0070}\x{e007f} ++ ++/^[[:^punct:]]+$/8W ++ abcde ++ ** Failers ++ \$+<=>^`|~ ++ !\"#%&'()*,-./:;?@[\\]_{} ++ \x{a1}\x{a7} ++ \x{37e} ++ + /-- End of testinput6 --/ +diff --git a/testdata/testinput7 b/testdata/testinput7 +index 252d246..bcdcef9 100644 +--- a/testdata/testinput7 ++++ b/testdata/testinput7 +@@ -672,4 +672,14 @@ of case for anything other than the ASCII letters. --/ + /^s?c/mi8I + scat + ++/\D+\X \d+\X \S+\X \s+\X \W+\X \w+\X \C+\X \R+\X \H+\X \h+\X \V+\X \v+\X a+\X \n+\X .+\X/BZx ++ ++/.+\X/BZxs ++ ++/\X+$/BZxm ++ ++/\X+\D \X+\d \X+\S \X+\s \X+\W \X+\w \X+. \X+\C \X+\R \X+\H \X+\h \X+\V \X+\v \X+\X \X+\Z \X+\z \X+$/BZx ++ ++/\d+\s{0,5}=\s*\S?=\w{0,4}\W*/8WBZ ++ + /-- End of testinput7 --/ +diff --git a/testdata/testoutput6 b/testdata/testoutput6 +index 090d23f..c426efc 100644 +--- a/testdata/testoutput6 ++++ b/testdata/testoutput6 +@@ -1338,15 +1338,15 @@ No match + + /^[[:graph:]]*/8W + A\x{a1}\x{a0} +- 0: A ++ 0: A\x{a1} + + /^[[:print:]]*/8W + A z\x{a0}\x{a1} +- 0: A z ++ 0: A z\x{a0}\x{a1} + + /^[[:punct:]]*/8W + .+\x{a1}\x{a0} +- 0: .+ ++ 0: .+\x{a1} + + /\p{Zs}*?\R/ + ** Failers +@@ -2138,4 +2138,284 @@ No match + scat + 0: sc + ++/^[[:graph:]]+$/8W ++ Letter:ABC ++ 0: Letter:ABC ++ Mark:\x{300}\x{1d172}\x{1d17b} ++ 0: Mark:\x{300}\x{1d172}\x{1d17b} ++ Number:9\x{660} ++ 0: Number:9\x{660} ++ Punctuation:\x{66a},; ++ 0: Punctuation:\x{66a},; ++ Symbol:\x{6de}<>\x{fffc} ++ 0: Symbol:\x{6de}<>\x{fffc} ++ Cf-property:\x{ad}\x{600}\x{601}\x{602}\x{603}\x{604}\x{6dd}\x{70f} ++ 0: Cf-property:\x{ad}\x{600}\x{601}\x{602}\x{603}\x{604}\x{6dd}\x{70f} ++ \x{200b}\x{200c}\x{200d}\x{200e}\x{200f} ++ 0: \x{200b}\x{200c}\x{200d}\x{200e}\x{200f} ++ \x{202a}\x{202b}\x{202c}\x{202d}\x{202e} ++ 0: \x{202a}\x{202b}\x{202c}\x{202d}\x{202e} ++ \x{2060}\x{2061}\x{2062}\x{2063}\x{2064} ++ 0: \x{2060}\x{2061}\x{2062}\x{2063}\x{2064} ++ \x{206a}\x{206b}\x{206c}\x{206d}\x{206e}\x{206f} ++ 0: \x{206a}\x{206b}\x{206c}\x{206d}\x{206e}\x{206f} ++ \x{feff} ++ 0: \x{feff} ++ \x{fff9}\x{fffa}\x{fffb} ++ 0: \x{fff9}\x{fffa}\x{fffb} ++ \x{110bd} ++ 0: \x{110bd} ++ \x{1d173}\x{1d174}\x{1d175}\x{1d176}\x{1d177}\x{1d178}\x{1d179}\x{1d17a} ++ 0: \x{1d173}\x{1d174}\x{1d175}\x{1d176}\x{1d177}\x{1d178}\x{1d179}\x{1d17a} ++ \x{e0001} ++ 0: \x{e0001} ++ \x{e0020}\x{e0030}\x{e0040}\x{e0050}\x{e0060}\x{e0070}\x{e007f} ++ 0: \x{e0020}\x{e0030}\x{e0040}\x{e0050}\x{e0060}\x{e0070}\x{e007f} ++ ** Failers ++No match ++ \x{09} ++No match ++ \x{0a} ++No match ++ \x{1D} ++No match ++ \x{20} ++No match ++ \x{85} ++No match ++ \x{a0} ++No match ++ \x{61c} ++No match ++ \x{1680} ++No match ++ \x{180e} ++No match ++ \x{2028} ++No match ++ \x{2029} ++No match ++ \x{202f} ++No match ++ \x{2065} ++No match ++ \x{2066} ++No match ++ \x{2067} ++No match ++ \x{2068} ++No match ++ \x{2069} ++No match ++ \x{3000} ++No match ++ \x{e0002} ++No match ++ \x{e001f} ++No match ++ \x{e0080} ++No match ++ ++/^[[:print:]]+$/8W ++ Space: \x{a0} ++ 0: Space: \x{a0} ++ \x{1680}\x{2000}\x{2001}\x{2002}\x{2003}\x{2004}\x{2005} ++ 0: \x{1680}\x{2000}\x{2001}\x{2002}\x{2003}\x{2004}\x{2005} ++ \x{2006}\x{2007}\x{2008}\x{2009}\x{200a} ++ 0: \x{2006}\x{2007}\x{2008}\x{2009}\x{200a} ++ \x{202f}\x{205f} ++ 0: \x{202f}\x{205f} ++ \x{3000} ++ 0: \x{3000} ++ Letter:ABC ++ 0: Letter:ABC ++ Mark:\x{300}\x{1d172}\x{1d17b} ++ 0: Mark:\x{300}\x{1d172}\x{1d17b} ++ Number:9\x{660} ++ 0: Number:9\x{660} ++ Punctuation:\x{66a},; ++ 0: Punctuation:\x{66a},; ++ Symbol:\x{6de}<>\x{fffc} ++ 0: Symbol:\x{6de}<>\x{fffc} ++ Cf-property:\x{ad}\x{600}\x{601}\x{602}\x{603}\x{604}\x{6dd}\x{70f} ++ 0: Cf-property:\x{ad}\x{600}\x{601}\x{602}\x{603}\x{604}\x{6dd}\x{70f} ++ \x{180e} ++ 0: \x{180e} ++ \x{200b}\x{200c}\x{200d}\x{200e}\x{200f} ++ 0: \x{200b}\x{200c}\x{200d}\x{200e}\x{200f} ++ \x{202a}\x{202b}\x{202c}\x{202d}\x{202e} ++ 0: \x{202a}\x{202b}\x{202c}\x{202d}\x{202e} ++ \x{202f} ++ 0: \x{202f} ++ \x{2060}\x{2061}\x{2062}\x{2063}\x{2064} ++ 0: \x{2060}\x{2061}\x{2062}\x{2063}\x{2064} ++ \x{206a}\x{206b}\x{206c}\x{206d}\x{206e}\x{206f} ++ 0: \x{206a}\x{206b}\x{206c}\x{206d}\x{206e}\x{206f} ++ \x{feff} ++ 0: \x{feff} ++ \x{fff9}\x{fffa}\x{fffb} ++ 0: \x{fff9}\x{fffa}\x{fffb} ++ \x{110bd} ++ 0: \x{110bd} ++ \x{1d173}\x{1d174}\x{1d175}\x{1d176}\x{1d177}\x{1d178}\x{1d179}\x{1d17a} ++ 0: \x{1d173}\x{1d174}\x{1d175}\x{1d176}\x{1d177}\x{1d178}\x{1d179}\x{1d17a} ++ \x{e0001} ++ 0: \x{e0001} ++ \x{e0020}\x{e0030}\x{e0040}\x{e0050}\x{e0060}\x{e0070}\x{e007f} ++ 0: \x{e0020}\x{e0030}\x{e0040}\x{e0050}\x{e0060}\x{e0070}\x{e007f} ++ ** Failers ++ 0: ** Failers ++ \x{09} ++No match ++ \x{1D} ++No match ++ \x{85} ++No match ++ \x{61c} ++No match ++ \x{2028} ++No match ++ \x{2029} ++No match ++ \x{2065} ++No match ++ \x{2066} ++No match ++ \x{2067} ++No match ++ \x{2068} ++No match ++ \x{2069} ++No match ++ \x{e0002} ++No match ++ \x{e001f} ++No match ++ \x{e0080} ++No match ++ ++/^[[:punct:]]+$/8W ++ \$+<=>^`|~ ++ 0: $+<=>^`|~ ++ !\"#%&'()*,-./:;?@[\\]_{} ++ 0: !"#%&'()*,-./:;?@[\]_{} ++ \x{a1}\x{a7} ++ 0: \x{a1}\x{a7} ++ \x{37e} ++ 0: \x{37e} ++ ** Failers ++No match ++ abcde ++No match ++ ++/^[[:^graph:]]+$/8W ++ \x{09}\x{0a}\x{1D}\x{20}\x{85}\x{a0}\x{61c}\x{1680}\x{180e} ++ 0: \x{09}\x{0a}\x{1d} \x{85}\x{a0}\x{61c}\x{1680}\x{180e} ++ \x{2028}\x{2029}\x{202f}\x{2065}\x{2066}\x{2067}\x{2068}\x{2069} ++ 0: \x{2028}\x{2029}\x{202f}\x{2065}\x{2066}\x{2067}\x{2068}\x{2069} ++ \x{3000}\x{e0002}\x{e001f}\x{e0080} ++ 0: \x{3000}\x{e0002}\x{e001f}\x{e0080} ++ ** Failers ++No match ++ Letter:ABC ++No match ++ Mark:\x{300}\x{1d172}\x{1d17b} ++No match ++ Number:9\x{660} ++No match ++ Punctuation:\x{66a},; ++No match ++ Symbol:\x{6de}<>\x{fffc} ++No match ++ Cf-property:\x{ad}\x{600}\x{601}\x{602}\x{603}\x{604}\x{6dd}\x{70f} ++No match ++ \x{200b}\x{200c}\x{200d}\x{200e}\x{200f} ++No match ++ \x{202a}\x{202b}\x{202c}\x{202d}\x{202e} ++No match ++ \x{2060}\x{2061}\x{2062}\x{2063}\x{2064} ++No match ++ \x{206a}\x{206b}\x{206c}\x{206d}\x{206e}\x{206f} ++No match ++ \x{feff} ++No match ++ \x{fff9}\x{fffa}\x{fffb} ++No match ++ \x{110bd} ++No match ++ \x{1d173}\x{1d174}\x{1d175}\x{1d176}\x{1d177}\x{1d178}\x{1d179}\x{1d17a} ++No match ++ \x{e0001} ++No match ++ \x{e0020}\x{e0030}\x{e0040}\x{e0050}\x{e0060}\x{e0070}\x{e007f} ++No match ++ ++/^[[:^print:]]+$/8W ++ \x{09}\x{1D}\x{85}\x{61c}\x{2028}\x{2029}\x{2065}\x{2066}\x{2067} ++ 0: \x{09}\x{1d}\x{85}\x{61c}\x{2028}\x{2029}\x{2065}\x{2066}\x{2067} ++ \x{2068}\x{2069}\x{e0002}\x{e001f}\x{e0080} ++ 0: \x{2068}\x{2069}\x{e0002}\x{e001f}\x{e0080} ++ ** Failers ++No match ++ Space: \x{a0} ++No match ++ \x{1680}\x{2000}\x{2001}\x{2002}\x{2003}\x{2004}\x{2005} ++No match ++ \x{2006}\x{2007}\x{2008}\x{2009}\x{200a} ++No match ++ \x{202f}\x{205f} ++No match ++ \x{3000} ++No match ++ Letter:ABC ++No match ++ Mark:\x{300}\x{1d172}\x{1d17b} ++No match ++ Number:9\x{660} ++No match ++ Punctuation:\x{66a},; ++No match ++ Symbol:\x{6de}<>\x{fffc} ++No match ++ Cf-property:\x{ad}\x{600}\x{601}\x{602}\x{603}\x{604}\x{6dd}\x{70f} ++No match ++ \x{180e} ++No match ++ \x{200b}\x{200c}\x{200d}\x{200e}\x{200f} ++No match ++ \x{202a}\x{202b}\x{202c}\x{202d}\x{202e} ++No match ++ \x{202f} ++No match ++ \x{2060}\x{2061}\x{2062}\x{2063}\x{2064} ++No match ++ \x{206a}\x{206b}\x{206c}\x{206d}\x{206e}\x{206f} ++No match ++ \x{feff} ++No match ++ \x{fff9}\x{fffa}\x{fffb} ++No match ++ \x{110bd} ++No match ++ \x{1d173}\x{1d174}\x{1d175}\x{1d176}\x{1d177}\x{1d178}\x{1d179}\x{1d17a} ++No match ++ \x{e0001} ++No match ++ \x{e0020}\x{e0030}\x{e0040}\x{e0050}\x{e0060}\x{e0070}\x{e007f} ++No match ++ ++/^[[:^punct:]]+$/8W ++ abcde ++ 0: abcde ++ ** Failers ++No match ++ \$+<=>^`|~ ++No match ++ !\"#%&'()*,-./:;?@[\\]_{} ++No match ++ \x{a1}\x{a7} ++No match ++ \x{37e} ++No match ++ + /-- End of testinput6 --/ +diff --git a/testdata/testoutput7 b/testdata/testoutput7 +index 5f0f546..e3f607c 100644 +--- a/testdata/testoutput7 ++++ b/testdata/testoutput7 +@@ -820,7 +820,7 @@ No match + /[[:graph:]]/WBZ + ------------------------------------------------------------------ + Bra +- [!-~] ++ [[:graph:]] + Ket + End + ------------------------------------------------------------------ +@@ -828,7 +828,7 @@ No match + /[[:print:]]/WBZ + ------------------------------------------------------------------ + Bra +- [ -~] ++ [[:print:]] + Ket + End + ------------------------------------------------------------------ +@@ -836,7 +836,7 @@ No match + /[[:punct:]]/WBZ + ------------------------------------------------------------------ + Bra +- [!-/:-@[-`{-~] ++ [[:punct:]] + Ket + End + ------------------------------------------------------------------ +@@ -1478,4 +1478,115 @@ Need char = 'c' (caseless) + scat + 0: sc + ++/\D+\X \d+\X \S+\X \s+\X \W+\X \w+\X \C+\X \R+\X \H+\X \h+\X \V+\X \v+\X a+\X \n+\X .+\X/BZx ++------------------------------------------------------------------ ++ Bra ++ \D+ ++ extuni ++ \d+ ++ extuni ++ \S+ ++ extuni ++ \s+ ++ extuni ++ \W+ ++ extuni ++ \w+ ++ extuni ++ AllAny+ ++ extuni ++ \R+ ++ extuni ++ \H+ ++ extuni ++ \h+ ++ extuni ++ \V+ ++ extuni ++ \v+ ++ extuni ++ a+ ++ extuni ++ \x0a+ ++ extuni ++ Any+ ++ extuni ++ Ket ++ End ++------------------------------------------------------------------ ++ ++/.+\X/BZxs ++------------------------------------------------------------------ ++ Bra ++ AllAny+ ++ extuni ++ Ket ++ End ++------------------------------------------------------------------ ++ ++/\X+$/BZxm ++------------------------------------------------------------------ ++ Bra ++ extuni+ ++ /m $ ++ Ket ++ End ++------------------------------------------------------------------ ++ ++/\X+\D \X+\d \X+\S \X+\s \X+\W \X+\w \X+. \X+\C \X+\R \X+\H \X+\h \X+\V \X+\v \X+\X \X+\Z \X+\z \X+$/BZx ++------------------------------------------------------------------ ++ Bra ++ extuni+ ++ \D ++ extuni+ ++ \d ++ extuni+ ++ \S ++ extuni+ ++ \s ++ extuni+ ++ \W ++ extuni+ ++ \w ++ extuni+ ++ Any ++ extuni+ ++ AllAny ++ extuni+ ++ \R ++ extuni+ ++ \H ++ extuni+ ++ \h ++ extuni+ ++ \V ++ extuni+ ++ \v ++ extuni+ ++ extuni ++ extuni+ ++ \Z ++ extuni+ ++ \z ++ extuni+ ++ $ ++ Ket ++ End ++------------------------------------------------------------------ ++ ++/\d+\s{0,5}=\s*\S?=\w{0,4}\W*/8WBZ ++------------------------------------------------------------------ ++ Bra ++ prop Nd + ++ prop Xsp {0,5} ++ = ++ prop Xsp * ++ notprop Xsp ? ++ = ++ prop Xwd {0,4} ++ notprop Xwd * ++ Ket ++ End ++------------------------------------------------------------------ ++ + /-- End of testinput7 --/ +-- +2.7.4 + diff --git a/SOURCES/pcre-8.32-refused_spelling_terminated.patch b/SOURCES/pcre-8.32-refused_spelling_terminated.patch new file mode 100644 index 0000000..32c9f7e --- /dev/null +++ b/SOURCES/pcre-8.32-refused_spelling_terminated.patch @@ -0,0 +1,48 @@ +From 6f8b68f2740f3100154342338ed3d26e676dae69 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Petr=20P=C3=ADsa=C5=99?= +Date: Mon, 7 Feb 2011 11:37:09 +0100 +Subject: [PATCH] Fix spelling in pcretest(1) manual + +I kept some non-dictionary words to be consistent. +Credits to John Bradshaw. + +Spelling refused by upstream : + +I have applied these patches, except for "termi-nated", because my +Collins dictionary of spelling and word division shows "termin-ate" and +"termin-ating". I do know that there is a cultural difference between +British and American hyphenation conventions; I try to follow the +British ones, as exemplified in the Collins. +--- + doc/pcretest.1 | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/doc/pcretest.1 b/doc/pcretest.1 +index 41ef6ac..258a62f 100644 +--- a/doc/pcretest.1 ++++ b/doc/pcretest.1 +@@ -601,8 +601,8 @@ recognized: + after a successful match (number less than 32) + .\" JOIN + \eCname call pcre[16|32]_copy_named_substring() for substring +- "name" after a successful match (name termin- +- ated by next non alphanumeric character) ++ "name" after a successful match (name terminated ++ by next non alphanumeric character) + .\" JOIN + \eC+ show the current captured substrings at callout + time +@@ -623,8 +623,8 @@ recognized: + after a successful match (number less than 32) + .\" JOIN + \eGname call pcre[16|32]_get_named_substring() for substring +- "name" after a successful match (name termin- +- ated by next non-alphanumeric character) ++ "name" after a successful match (name terminated ++ by next non-alphanumeric character) + .\" JOIN + \eJdd set up a JIT stack of dd kilobytes maximum (any + number of digits) +-- +1.7.11.7 + diff --git a/SOURCES/pcre-8.33-Fix-empty-matching-possessive-zero-repeat-groups-bug.patch b/SOURCES/pcre-8.33-Fix-empty-matching-possessive-zero-repeat-groups-bug.patch new file mode 100644 index 0000000..a2db8d0 --- /dev/null +++ b/SOURCES/pcre-8.33-Fix-empty-matching-possessive-zero-repeat-groups-bug.patch @@ -0,0 +1,187 @@ +From 93c413c5fac105d90f77ab5d03e31e0f64fc6142 Mon Sep 17 00:00:00 2001 +From: ph10 +Date: Tue, 27 May 2014 13:18:31 +0000 +Subject: [PATCH] Fix empty-matching possessive zero-repeat groups bug. +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1478 2f5784b3-3f2a-0410-8824-cb99058d5e15 + +Petr Pisar: Ported to 8.33. + +Signed-off-by: Petr Písař +--- + pcre_exec.c | 41 ++++++++++++++++++++++++++--------------- + testdata/testinput1 | 9 +++++++++ + testdata/testinput8 | 6 ++++++ + testdata/testoutput1 | 12 ++++++++++++ + testdata/testoutput8 | 8 ++++++++ + 5 files changed, 61 insertions(+), 15 deletions(-) + +diff --git a/pcre_exec.c b/pcre_exec.c +index ab76d02..481e899 100644 +--- a/pcre_exec.c ++++ b/pcre_exec.c +@@ -1169,10 +1169,15 @@ for (;;) + if (rrc == MATCH_KETRPOS) + { + offset_top = md->end_offset_top; +- eptr = md->end_match_ptr; + ecode = md->start_code + code_offset; + save_capture_last = md->capture_last; + matched_once = TRUE; ++ if (eptr == md->end_match_ptr) /* Matched an empty string */ ++ { ++ do ecode += GET(ecode, 1); while (*ecode == OP_ALT); ++ break; ++ } ++ eptr = md->end_match_ptr; + continue; + } + +@@ -1242,9 +1247,14 @@ for (;;) + if (rrc == MATCH_KETRPOS) + { + offset_top = md->end_offset_top; +- eptr = md->end_match_ptr; + ecode = md->start_code + code_offset; + matched_once = TRUE; ++ if (eptr == md->end_match_ptr) /* Matched an empty string */ ++ { ++ do ecode += GET(ecode, 1); while (*ecode == OP_ALT); ++ break; ++ } ++ eptr = md->end_match_ptr; + continue; + } + +@@ -1976,7 +1986,7 @@ for (;;) + case OP_KETRMAX: + case OP_KETRPOS: + prev = ecode - GET(ecode, 1); +- ++ + /* If this was a group that remembered the subject start, in order to break + infinite repeats of empty string matches, retrieve the subject start from + the chain. Otherwise, set it NULL. */ +@@ -2001,7 +2011,7 @@ for (;;) + md->start_match_ptr = mstart; + RRETURN(MATCH_MATCH); /* Sets md->mark */ + } +- ++ + /* For capturing groups we have to check the group number back at the start + and if necessary complete handling an extraction by setting the offsets and + bumping the high water mark. Whole-pattern recursion is coded as a recurse +@@ -2061,6 +2071,18 @@ for (;;) + } + } + ++ /* OP_KETRPOS is a possessive repeating ket. Remember the current position, ++ and return the MATCH_KETRPOS. This makes it possible to do the repeats one ++ at a time from the outer level, thus saving stack. This must precede the ++ empty string test - in this case that test is done at the outer level. */ ++ ++ if (*ecode == OP_KETRPOS) ++ { ++ md->end_match_ptr = eptr; ++ md->end_offset_top = offset_top; ++ RRETURN(MATCH_KETRPOS); ++ } ++ + /* For an ordinary non-repeating ket, just continue at this level. This + also happens for a repeating ket if no characters were matched in the + group. This is the forcible breaking of infinite loops as implemented in +@@ -2083,17 +2105,6 @@ for (;;) + break; + } + +- /* OP_KETRPOS is a possessive repeating ket. Remember the current position, +- and return the MATCH_KETRPOS. This makes it possible to do the repeats one +- at a time from the outer level, thus saving stack. */ +- +- if (*ecode == OP_KETRPOS) +- { +- md->end_match_ptr = eptr; +- md->end_offset_top = offset_top; +- RRETURN(MATCH_KETRPOS); +- } +- + /* The normal repeating kets try the rest of the pattern or restart from + the preceding bracket, in the appropriate order. In the second case, we can + use tail recursion to avoid using another stack frame, unless we have an +diff --git a/testdata/testinput1 b/testdata/testinput1 +index d77d8ac..6bde9ec 100644 +--- a/testdata/testinput1 ++++ b/testdata/testinput1 +@@ -5614,4 +5614,13 @@ AbcdCBefgBhiBqz + /[\Q]a\E]+/ + aa]] + ++'\A(?:[^\"]++|\"(?:[^\"]*+|\"\")*+\")++' ++ NON QUOTED \"QUOT\"\"ED\" AFTER \"NOT MATCHED ++ ++'\A(?:[^\"]++|\"(?:[^\"]++|\"\")*+\")++' ++ NON QUOTED \"QUOT\"\"ED\" AFTER \"NOT MATCHED ++ ++'\A(?:[^\"]++|\"(?:[^\"]++|\"\")++\")++' ++ NON QUOTED \"QUOT\"\"ED\" AFTER \"NOT MATCHED ++ + /-- End of testinput1 --/ +diff --git a/testdata/testinput8 b/testdata/testinput8 +index d91013b..98a0b38 100644 +--- a/testdata/testinput8 ++++ b/testdata/testinput8 +@@ -4801,4 +4801,10 @@ + /abcd/ + abcd\O0 + ++'\A(?:[^\"]++|\"(?:[^\"]*+|\"\")*+\")++' ++ NON QUOTED \"QUOT\"\"ED\" AFTER \"NOT MATCHED ++ ++'\A(?:[^\"]++|\"(?:[^\"]++|\"\")*+\")++' ++ NON QUOTED \"QUOT\"\"ED\" AFTER \"NOT MATCHED ++ + /-- End of testinput8 --/ +diff --git a/testdata/testoutput1 b/testdata/testoutput1 +index 1b0b8dc..cb9592d 100644 +--- a/testdata/testoutput1 ++++ b/testdata/testoutput1 +@@ -9208,4 +9208,16 @@ No match + aa]] + 0: aa]] + ++'\A(?:[^\"]++|\"(?:[^\"]*+|\"\")*+\")++' ++ NON QUOTED \"QUOT\"\"ED\" AFTER \"NOT MATCHED ++ 0: NON QUOTED "QUOT""ED" AFTER ++ ++'\A(?:[^\"]++|\"(?:[^\"]++|\"\")*+\")++' ++ NON QUOTED \"QUOT\"\"ED\" AFTER \"NOT MATCHED ++ 0: NON QUOTED "QUOT""ED" AFTER ++ ++'\A(?:[^\"]++|\"(?:[^\"]++|\"\")++\")++' ++ NON QUOTED \"QUOT\"\"ED\" AFTER \"NOT MATCHED ++ 0: NON QUOTED "QUOT""ED" AFTER ++ + /-- End of testinput1 --/ +diff --git a/testdata/testoutput8 b/testdata/testoutput8 +index 75affbe..666b67e 100644 +--- a/testdata/testoutput8 ++++ b/testdata/testoutput8 +@@ -8020,4 +8020,12 @@ Error -30 (invalid data in workspace for DFA restart) + abcd\O0 + Matched, but offsets vector is too small to show all matches + ++'\A(?:[^\"]++|\"(?:[^\"]*+|\"\")*+\")++' ++ NON QUOTED \"QUOT\"\"ED\" AFTER \"NOT MATCHED ++ 0: NON QUOTED "QUOT""ED" AFTER ++ ++'\A(?:[^\"]++|\"(?:[^\"]++|\"\")*+\")++' ++ NON QUOTED \"QUOT\"\"ED\" AFTER \"NOT MATCHED ++ 0: NON QUOTED "QUOT""ED" AFTER ++ + /-- End of testinput8 --/ +-- +1.9.3 + diff --git a/SOURCES/pcre-8.33-RC1-Fix-pcregrep-so-that-it-can-find-empty-lines.patch b/SOURCES/pcre-8.33-RC1-Fix-pcregrep-so-that-it-can-find-empty-lines.patch new file mode 100644 index 0000000..0231790 --- /dev/null +++ b/SOURCES/pcre-8.33-RC1-Fix-pcregrep-so-that-it-can-find-empty-lines.patch @@ -0,0 +1,147 @@ +From 038a52f90a30d93c5688a882620bfd392f386076 Mon Sep 17 00:00:00 2001 +From: ph10 +Date: Fri, 10 May 2013 11:40:06 +0000 +Subject: [PATCH] Fix pcregrep so that it can find empty lines. + +git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1324 2f5784b3-3f2a-0410-8824-cb99058d5e15 + +Petr Pisar: Ported to 8.33-RC1. + +diff --git a/RunGrepTest b/RunGrepTest +index 94fd808..daaf8af 100755 +--- a/RunGrepTest ++++ b/RunGrepTest +@@ -486,6 +486,22 @@ echo "---------------------------- Test 101 ------------------------------" >>te + (cd $srcdir; $valgrind $pcregrep -o3 -Ho2 -o12 --only-matching=1 -o3 --colour=always --om-separator='|' '(\w+) binary (\w+)(\.)?' ./testdata/grepinput) >>testtry + echo "RC=$?" >>testtry + ++echo "---------------------------- Test 102 -----------------------------" >>testtry ++(cd $srcdir; $valgrind $pcregrep -n "^$" ./testdata/grepinput3) >>testtry 2>&1 ++echo "RC=$?" >>testtry ++ ++echo "---------------------------- Test 103 -----------------------------" >>testtry ++(cd $srcdir; $valgrind $pcregrep --only-matching "^$" ./testdata/grepinput3) >>testtry 2>&1 ++echo "RC=$?" >>testtry ++ ++echo "---------------------------- Test 104 -----------------------------" >>testtry ++(cd $srcdir; $valgrind $pcregrep -n --only-matching "^$" ./testdata/grepinput3) >>testtry 2>&1 ++echo "RC=$?" >>testtry ++ ++echo "---------------------------- Test 105 -----------------------------" >>testtry ++(cd $srcdir; $valgrind $pcregrep --colour=always "ipsum|" ./testdata/grepinput3) >>testtry 2>&1 ++echo "RC=$?" >>testtry ++ + + # Now compare the results. + +diff --git a/pcregrep.c b/pcregrep.c +index 2e0dc03..1d20733 100644 +--- a/pcregrep.c ++++ b/pcregrep.c +@@ -1378,6 +1378,7 @@ to find all possible matches. + Arguments: + matchptr the start of the subject + length the length of the subject to match ++ options options for pcre_exec + startoffset where to start matching + offsets the offets vector to fill in + mrc address of where to put the result of pcre_exec() +@@ -1388,8 +1389,8 @@ Returns: TRUE if there was a match + */ + + static BOOL +-match_patterns(char *matchptr, size_t length, int startoffset, int *offsets, +- int *mrc) ++match_patterns(char *matchptr, size_t length, unsigned int options, ++ int startoffset, int *offsets, int *mrc) + { + int i; + size_t slen = length; +@@ -1404,7 +1405,7 @@ if (slen > 200) + for (i = 1; p != NULL; p = p->next, i++) + { + *mrc = pcre_exec(p->compiled, p->hint, matchptr, (int)length, +- startoffset, PCRE_NOTEMPTY, offsets, OFFSET_SIZE); ++ startoffset, options, offsets, OFFSET_SIZE); + if (*mrc >= 0) return TRUE; + if (*mrc == PCRE_ERROR_NOMATCH) continue; + fprintf(stderr, "pcregrep: pcre_exec() gave error %d while matching ", *mrc); +@@ -1539,6 +1540,7 @@ while (ptr < endptr) + int endlinelength; + int mrc = 0; + int startoffset = 0; ++ unsigned int options = 0; + BOOL match; + char *matchptr = ptr; + char *t = ptr; +@@ -1628,9 +1630,12 @@ while (ptr < endptr) + + /* Run through all the patterns until one matches or there is an error other + than NOMATCH. This code is in a subroutine so that it can be re-used for +- finding subsequent matches when colouring matched lines. */ ++ finding subsequent matches when colouring matched lines. After finding one ++ match, set PCRE_NOTEMPTY to disable any further matches of null strings in ++ this line. */ + +- match = match_patterns(matchptr, length, startoffset, offsets, &mrc); ++ match = match_patterns(matchptr, length, options, startoffset, offsets, &mrc); ++ options = PCRE_NOTEMPTY; + + /* If it's a match or a not-match (as required), do what's wanted. */ + +@@ -1871,7 +1876,8 @@ while (ptr < endptr) + { + startoffset = offsets[1]; + if (startoffset >= (int)linelength + endlinelength || +- !match_patterns(matchptr, length, startoffset, offsets, &mrc)) ++ !match_patterns(matchptr, length, options, startoffset, offsets, ++ &mrc)) + break; + FWRITE(matchptr + startoffset, 1, offsets[0] - startoffset, stdout); + fprintf(stdout, "%c[%sm", 0x1b, colour_string); +diff --git a/testdata/grepoutput b/testdata/grepoutput +index 733b9d6..cf04091 100644 +--- a/testdata/grepoutput ++++ b/testdata/grepoutput +@@ -705,3 +705,38 @@ RC=0 + ./testdata/grepinput:zero|a + ./testdata/grepinput:.|zero|the|. + RC=0 ++---------------------------- Test 102 ----------------------------- ++2: ++5: ++7: ++9: ++12: ++14: ++RC=0 ++---------------------------- Test 103 ----------------------------- ++RC=0 ++---------------------------- Test 104 ----------------------------- ++2: ++5: ++7: ++9: ++12: ++14: ++RC=0 ++---------------------------- Test 105 ----------------------------- ++triple: t1_txt s1_tag s_txt p_tag p_txt o_tag o_txt ++ ++triple: t2_txt s1_tag s_txt p_tag p_txt o_tag ++Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum. ++ ++triple: t3_txt s2_tag s_txt p_tag p_txt o_tag o_txt ++ ++triple: t4_txt s1_tag s_txt p_tag p_txt o_tag o_txt ++ ++triple: t5_txt s1_tag s_txt p_tag p_txt o_tag ++o_txt ++ ++triple: t6_txt s2_tag s_txt p_tag p_txt o_tag o_txt ++ ++triple: t7_txt s1_tag s_txt p_tag p_txt o_tag o_txt ++RC=0 +-- +1.8.1.4 + diff --git a/SOURCES/pcre-8.33-RC1-Fix-pcretest-crash-with-a-data-line-longer-than-6553.patch b/SOURCES/pcre-8.33-RC1-Fix-pcretest-crash-with-a-data-line-longer-than-6553.patch new file mode 100644 index 0000000..f2d4865 --- /dev/null +++ b/SOURCES/pcre-8.33-RC1-Fix-pcretest-crash-with-a-data-line-longer-than-6553.patch @@ -0,0 +1,54 @@ +From bf2c63fda75cca4ab3006b6ccdf0f18fafe4fca5 Mon Sep 17 00:00:00 2001 +From: ph10 +Date: Fri, 10 May 2013 16:22:40 +0000 +Subject: [PATCH] Fix pcretest crash with a data line longer than 65536 bytes. + +git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1327 2f5784b3-3f2a-0410-8824-cb99058d5e15 + +Petr Pisar: Port to 8.31-RC1 + +diff --git a/pcretest.c b/pcretest.c +index 6ef3252..25f3853 100644 +--- a/pcretest.c ++++ b/pcretest.c +@@ -4411,7 +4411,8 @@ while (!done) + + #ifndef NOUTF + /* Check that the data is well-formed UTF-8 if we're in UTF mode. To create +- invalid input to pcre_exec, you must use \x?? or \x{} sequences. */ ++ invalid input to pcre_exec, you must use \x?? or \x{} sequences. */ ++ + if (use_utf) + { + pcre_uint8 *q; +@@ -4429,21 +4430,23 @@ while (!done) + + #ifdef SUPPORT_VALGRIND + /* Mark the dbuffer as addressable but undefined again. */ ++ + if (dbuffer != NULL) + { + VALGRIND_MAKE_MEM_UNDEFINED(dbuffer, dbuffer_size * CHAR_SIZE); + } + #endif + +- /* Allocate a buffer to hold the data line. len+1 is an upper bound on +- the number of pcre_uchar units that will be needed. */ +- if (dbuffer == NULL || (size_t)len >= dbuffer_size) ++ /* Allocate a buffer to hold the data line; len+1 is an upper bound on ++ the number of pcre_uchar units that will be needed. */ ++ ++ while (dbuffer == NULL || (size_t)len >= dbuffer_size) + { + dbuffer_size *= 2; + dbuffer = (pcre_uint8 *)realloc(dbuffer, dbuffer_size * CHAR_SIZE); + if (dbuffer == NULL) + { +- fprintf(stderr, "pcretest: malloc(%d) failed\n", (int)dbuffer_size); ++ fprintf(stderr, "pcretest: realloc(%d) failed\n", (int)dbuffer_size); + exit(1); + } + } +-- +1.8.1.4 + diff --git a/SOURCES/pcre-8.33-RC1-Fix-segfault-when-pcre_dfa_exec-is-called-with-an-ou.patch b/SOURCES/pcre-8.33-RC1-Fix-segfault-when-pcre_dfa_exec-is-called-with-an-ou.patch new file mode 100644 index 0000000..080464f --- /dev/null +++ b/SOURCES/pcre-8.33-RC1-Fix-segfault-when-pcre_dfa_exec-is-called-with-an-ou.patch @@ -0,0 +1,267 @@ +From f4176cfb682170c5e9246949df653c82200d7259 Mon Sep 17 00:00:00 2001 +From: ph10 +Date: Wed, 15 May 2013 16:53:18 +0000 +Subject: [PATCH] Fix segfault when pcre_dfa_exec() is called with an output + vector of length less than 2. + +git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1334 2f5784b3-3f2a-0410-8824-cb99058d5e15 + +Petr Pisar: Port to 8.33-RC1. + + +diff --git a/pcre_dfa_exec.c b/pcre_dfa_exec.c +index 8211760..02bd3f0 100644 +--- a/pcre_dfa_exec.c ++++ b/pcre_dfa_exec.c +@@ -636,7 +636,7 @@ for (;;) + const pcre_uchar *code; + int state_offset = current_state->offset; + int codevalue, rrc; +- unsigned int count; ++ int count; + + #ifdef PCRE_DEBUG + printf ("%.*sProcessing state %d c=", rlevel*2-2, SP, state_offset); +@@ -1255,7 +1255,7 @@ for (;;) + (d != OP_ANY || !IS_NEWLINE(ptr)) && + ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0)) + { +- if (++count >= GET2(code, 1)) ++ if (++count >= (int)GET2(code, 1)) + { ADD_NEW(state_offset + 1 + IMM2_SIZE + 1, 0); } + else + { ADD_NEW(state_offset, count); } +@@ -1289,7 +1289,7 @@ for (;;) + active_count--; /* Remove non-match possibility */ + next_active_state--; + } +- if (++count >= GET2(code, 1)) ++ if (++count >= (int)GET2(code, 1)) + { ADD_NEW(state_offset + 2 + IMM2_SIZE, 0); } + else + { ADD_NEW(state_offset, count); } +@@ -1903,7 +1903,7 @@ for (;;) + active_count--; /* Remove non-match possibility */ + next_active_state--; + } +- if (++count >= GET2(code, 1)) ++ if (++count >= (int)GET2(code, 1)) + { ADD_NEW(state_offset + 1 + IMM2_SIZE + 3, 0); } + else + { ADD_NEW(state_offset, count); } +@@ -1942,7 +1942,7 @@ for (;;) + } + if (nptr >= end_subject && (md->moptions & PCRE_PARTIAL_HARD) != 0) + reset_could_continue = TRUE; +- if (++count >= GET2(code, 1)) ++ if (++count >= (int)GET2(code, 1)) + { ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, ncount); } + else + { ADD_NEW_DATA(-state_offset, count, ncount); } +@@ -1984,7 +1984,7 @@ for (;;) + active_count--; /* Remove non-match possibility */ + next_active_state--; + } +- if (++count >= GET2(code, 1)) ++ if (++count >= (int)GET2(code, 1)) + { ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, ncount); } + else + { ADD_NEW_DATA(-state_offset, count, ncount); } +@@ -2024,7 +2024,7 @@ for (;;) + active_count--; /* Remove non-match possibility */ + next_active_state--; + } +- if (++count >= GET2(code, 1)) ++ if (++count >= (int)GET2(code, 1)) + { ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, 0); } + else + { ADD_NEW_DATA(-state_offset, count, 0); } +@@ -2061,7 +2061,7 @@ for (;;) + active_count--; /* Remove non-match possibility */ + next_active_state--; + } +- if (++count >= GET2(code, 1)) ++ if (++count >= (int)GET2(code, 1)) + { ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, 0); } + else + { ADD_NEW_DATA(-state_offset, count, 0); } +@@ -2431,7 +2431,7 @@ for (;;) + } + if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR)) + { +- if (++count >= GET2(code, 1)) ++ if (++count >= (int)GET2(code, 1)) + { ADD_NEW(state_offset + dlen + 1 + IMM2_SIZE, 0); } + else + { ADD_NEW(state_offset, count); } +@@ -2480,7 +2480,7 @@ for (;;) + active_count--; /* Remove non-match possibility */ + next_active_state--; + } +- if (++count >= GET2(code, 1)) ++ if (++count >= (int)GET2(code, 1)) + { ADD_NEW(state_offset + dlen + 1 + IMM2_SIZE, 0); } + else + { ADD_NEW(state_offset, count); } +@@ -2553,11 +2553,11 @@ for (;;) + case OP_CRRANGE: + case OP_CRMINRANGE: + count = current_state->count; /* Already matched */ +- if (count >= GET2(ecode, 1)) ++ if (count >= (int)GET2(ecode, 1)) + { ADD_ACTIVE(next_state_offset + 1 + 2 * IMM2_SIZE, 0); } + if (isinclass) + { +- unsigned int max = GET2(ecode, 1 + IMM2_SIZE); ++ int max = (int)GET2(ecode, 1 + IMM2_SIZE); + if (++count >= max && max != 0) /* Max 0 => no limit */ + { ADD_NEW(next_state_offset + 1 + 2 * IMM2_SIZE, 0); } + else +diff --git a/pcretest.c b/pcretest.c +index 25f3853..20dc0f1 100644 +--- a/pcretest.c ++++ b/pcretest.c +@@ -5043,7 +5043,7 @@ while (!done) + DFA_WS_DIMENSION); + if (count == 0) + { +- fprintf(outfile, "Matched, but too many subsidiary matches\n"); ++ fprintf(outfile, "Matched, but offsets vector is too small to show all matches\n"); + count = use_size_offsets/2; + } + } +diff --git a/testdata/testinput8 b/testdata/testinput8 +index e235445..d91013b 100644 +--- a/testdata/testinput8 ++++ b/testdata/testinput8 +@@ -4798,4 +4798,7 @@ + xxxxxxxxabcd + xx\xa0xxxxxabcd + ++/abcd/ ++ abcd\O0 ++ + /-- End of testinput8 --/ +diff --git a/testdata/testoutput10 b/testdata/testoutput10 +index 9ee0f76..0e04205 100644 +--- a/testdata/testoutput10 ++++ b/testdata/testoutput10 +@@ -813,7 +813,7 @@ No match + 11111111111111111111111111111111111111111111111111111111111111111111111 + No match + aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +-Matched, but too many subsidiary matches ++Matched, but offsets vector is too small to show all matches + 0: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa + 1: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa + 2: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +@@ -841,7 +841,7 @@ Matched, but too many subsidiary matches + 11111111111111111111111111111111111111111111111111111111111111111111111 + No match + aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +-Matched, but too many subsidiary matches ++Matched, but offsets vector is too small to show all matches + 0: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa + 1: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa + 2: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +@@ -869,7 +869,7 @@ Matched, but too many subsidiary matches + 11111111111111111111111111111111111111111111111111111111111111111111111 + No match + aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +-Matched, but too many subsidiary matches ++Matched, but offsets vector is too small to show all matches + 0: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa + 1: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa + 2: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +@@ -897,7 +897,7 @@ Matched, but too many subsidiary matches + 11111111111111111111111111111111111111111111111111111111111111111111111 + No match + aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +-Matched, but too many subsidiary matches ++Matched, but offsets vector is too small to show all matches + 0: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa + 1: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa + 2: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +@@ -925,7 +925,7 @@ Matched, but too many subsidiary matches + 11111111111111111111111111111111111111111111111111111111111111111111111 + No match + aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +-Matched, but too many subsidiary matches ++Matched, but offsets vector is too small to show all matches + 0: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa + 1: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa + 2: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +diff --git a/testdata/testoutput8 b/testdata/testoutput8 +index 527ba4d..75affbe 100644 +--- a/testdata/testoutput8 ++++ b/testdata/testoutput8 +@@ -49,7 +49,7 @@ No match + 16: a + 17: + aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +-Matched, but too many subsidiary matches ++Matched, but offsets vector is too small to show all matches + 0: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa + 1: aaaaaaaaaaaaaaaaaaaaaaaaaaaaa + 2: aaaaaaaaaaaaaaaaaaaaaaaaaaaa +@@ -3260,7 +3260,7 @@ No match + + /(.*)(\d*)/ + I have 2 numbers: 53147 +-Matched, but too many subsidiary matches ++Matched, but offsets vector is too small to show all matches + 0: I have 2 numbers: 53147 + 1: I have 2 numbers: 5314 + 2: I have 2 numbers: 531 +@@ -3295,7 +3295,7 @@ Matched, but too many subsidiary matches + + /(.*?)(\d*)/ + I have 2 numbers: 53147 +-Matched, but too many subsidiary matches ++Matched, but offsets vector is too small to show all matches + 0: I have 2 numbers: 53147 + 1: I have 2 numbers: 5314 + 2: I have 2 numbers: 531 +@@ -7848,7 +7848,7 @@ Error -26 (nested recursion at the same subject position) + + /(a+)/ + \O6aaaa +-Matched, but too many subsidiary matches ++Matched, but offsets vector is too small to show all matches + 0: aaaa + 1: aaa + 2: aa +@@ -8016,4 +8016,8 @@ Error -30 (invalid data in workspace for DFA restart) + 0: xx\xa0xxxxxabcd + 1: xx\xa0xxxxxabc + ++/abcd/ ++ abcd\O0 ++Matched, but offsets vector is too small to show all matches ++ + /-- End of testinput8 --/ +diff --git a/testdata/testoutput9 b/testdata/testoutput9 +index 95cd618..0bb101a 100644 +--- a/testdata/testoutput9 ++++ b/testdata/testoutput9 +@@ -434,7 +434,7 @@ No match + + /\D*/8 + aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +-Matched, but too many subsidiary matches ++Matched, but offsets vector is too small to show all matches + 0: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa + 1: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa + 2: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +@@ -460,7 +460,7 @@ Matched, but too many subsidiary matches + + /\D*/8 + \x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100} +-Matched, but too many subsidiary matches ++Matched, but offsets vector is too small to show all matches + 0: \x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100} + 1: \x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100} + 2: \x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100} +-- +1.8.1.4 + diff --git a/SOURCES/pcre-8.34-Fix-range-check-in-JIT-path.patch b/SOURCES/pcre-8.34-Fix-range-check-in-JIT-path.patch new file mode 100644 index 0000000..6ee6103 --- /dev/null +++ b/SOURCES/pcre-8.34-Fix-range-check-in-JIT-path.patch @@ -0,0 +1,55 @@ +From c061f6a865a473cf93aae1e6a0ccd2325509a6b2 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Petr=20P=C3=ADsa=C5=99?= +Date: Thu, 9 Jan 2014 13:39:28 +0100 +Subject: [PATCH] Fix range check in JIT path +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Jitted range check was wrong: + + re> /[efij]/S+ +data> e +No match + +while interpretted path is correct: + + re> /[efij]/ +data> e + 0: e + +This fix is part of: + +commit c07887b22a83fba842e88889c9e57a622b9ee439 +Author: zherczeg +Date: Sun Dec 22 20:47:08 2013 +0000 + + The auto-possessification of character sets were improved. The JIT compiler also optimizes more character set checks. + + git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1415 2f5784b3-3f2a-0410-8824-cb99058d5e15 + + + +Signed-off-by: Petr Písař +--- + pcre_jit_compile.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/pcre_jit_compile.c b/pcre_jit_compile.c +index d070263..47447da 100644 +--- a/pcre_jit_compile.c ++++ b/pcre_jit_compile.c +@@ -3573,7 +3573,9 @@ switch(ranges[0]) + } + return TRUE; + } +- if ((ranges[3] - ranges[2]) == (ranges[5] - ranges[4]) && is_powerof2(ranges[4] - ranges[2])) ++ if ((ranges[3] - ranges[2]) == (ranges[5] - ranges[4]) ++ && (ranges[2] | (ranges[4] - ranges[2])) == ranges[4] ++ && is_powerof2(ranges[4] - ranges[2])) + { + if (readch) + read_char(common); +-- +1.8.3.1 + diff --git a/SOURCES/pcre-8.34-RC1-Fix-XCLASS-POSIX-types-in-JIT.patch b/SOURCES/pcre-8.34-RC1-Fix-XCLASS-POSIX-types-in-JIT.patch new file mode 100644 index 0000000..8b2cc99 --- /dev/null +++ b/SOURCES/pcre-8.34-RC1-Fix-XCLASS-POSIX-types-in-JIT.patch @@ -0,0 +1,52 @@ +From 2cadfa0ec8900bb784aa4c4171ad5aec0e9b7edf Mon Sep 17 00:00:00 2001 +From: zherczeg +Date: Sat, 30 Nov 2013 07:05:00 +0000 +Subject: [PATCH] Fix XCLASS POSIX types in JIT. +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1406 2f5784b3-3f2a-0410-8824-cb99058d5e15 +Signed-off-by: Petr Písař +--- + pcre_jit_compile.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/pcre_jit_compile.c b/pcre_jit_compile.c +index c71b5a4..15555a6 100644 +--- a/pcre_jit_compile.c ++++ b/pcre_jit_compile.c +@@ -4431,10 +4431,10 @@ while (*cc != XCL_END) + OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066); + OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL); + +- OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066); ++ OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066); + OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL); + +- OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x2066); ++ OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x2066); + OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL); + + JUMPHERE(jump); +@@ -4447,7 +4447,7 @@ while (*cc != XCL_END) + OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll); + OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_GREATER); + +- OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Ll); ++ OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Ll); + OP_FLAGS(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_C_NOT_EQUAL); + + jump = CMP(SLJIT_C_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll); +@@ -4457,7 +4457,7 @@ while (*cc != XCL_END) + OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066); + OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL); + +- OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066); ++ OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066); + OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL); + + JUMPHERE(jump); +-- +2.7.4 + diff --git a/SOURCES/pcre-8.37-RC1-Fix-static-linking-issue-with-pkg-config.patch b/SOURCES/pcre-8.37-RC1-Fix-static-linking-issue-with-pkg-config.patch new file mode 100644 index 0000000..adf46ec --- /dev/null +++ b/SOURCES/pcre-8.37-RC1-Fix-static-linking-issue-with-pkg-config.patch @@ -0,0 +1,44 @@ +From 2765d0933715e6b2c78a4e74fa8d5cad27273f29 Mon Sep 17 00:00:00 2001 +From: ph10 +Date: Thu, 23 Apr 2015 17:34:33 +0000 +Subject: [PATCH] Fix static linking issue with pkg-config. +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1552 2f5784b3-3f2a-0410-8824-cb99058d5e15 +Signed-off-by: Petr Písař + +diff --git a/libpcre.pc.in b/libpcre.pc.in +index 1f26b32..0a35da8 100644 +--- a/libpcre.pc.in ++++ b/libpcre.pc.in +@@ -9,4 +9,5 @@ Name: libpcre + Description: PCRE - Perl compatible regular expressions C library with 8 bit character support + Version: @PACKAGE_VERSION@ + Libs: -L${libdir} -lpcre ++Libs.private: @PTHREAD_CFLAGS@ @PTHREAD_LIBS@ + Cflags: -I${includedir} @PCRE_STATIC_CFLAG@ +diff --git a/libpcre16.pc.in b/libpcre16.pc.in +index f589b75..080c9dc 100644 +--- a/libpcre16.pc.in ++++ b/libpcre16.pc.in +@@ -9,4 +9,5 @@ Name: libpcre16 + Description: PCRE - Perl compatible regular expressions C library with 16 bit character support + Version: @PACKAGE_VERSION@ + Libs: -L${libdir} -lpcre16 ++Libs.private: @PTHREAD_CFLAGS@ @PTHREAD_LIBS@ + Cflags: -I${includedir} @PCRE_STATIC_CFLAG@ +diff --git a/libpcre32.pc.in b/libpcre32.pc.in +index 6582105..a3ae0e1 100644 +--- a/libpcre32.pc.in ++++ b/libpcre32.pc.in +@@ -9,4 +9,5 @@ Name: libpcre32 + Description: PCRE - Perl compatible regular expressions C library with 32 bit character support + Version: @PACKAGE_VERSION@ + Libs: -L${libdir} -lpcre32 ++Libs.private: @PTHREAD_CFLAGS@ @PTHREAD_LIBS@ + Cflags: -I${includedir} @PCRE_STATIC_CFLAG@ +-- +2.1.0 + diff --git a/SPECS/pcre.spec b/SPECS/pcre.spec new file mode 100644 index 0000000..771cffd --- /dev/null +++ b/SPECS/pcre.spec @@ -0,0 +1,607 @@ +# This is stable release: +#%%global rcversion RC1 +Name: pcre +Version: 8.32 +Release: %{?rcversion:0.}17%{?rcversion:.%rcversion}%{?dist} +%global myversion %{version}%{?rcversion:-%rcversion} +Summary: Perl-compatible regular expression library +Group: System Environment/Libraries +License: BSD +URL: http://www.pcre.org/ +Source: ftp://ftp.csx.cam.ac.uk/pub/software/programming/%{name}/%{?rcversion:Testing/}%{name}-%{myversion}.tar.bz2 +# Upstream thinks RPATH is good idea. +Patch0: pcre-8.21-multilib.patch +# Refused by upstream, bug #675477 +Patch1: pcre-8.32-refused_spelling_terminated.patch +# In upstream after 8.32 +Patch2: pcre-8.32-Fix-forward-search-in-JIT-when-link-size-is-3-or-gre.patch +# In upstream after 8.32 +Patch3: pcre-8.32-Fix-two-buffer-over-read-issues-in-16-and-32-bit-mod.patch +# Fix pcregrep on empty line, in upstream after 8.33-RC1 +Patch4: pcre-8.33-RC1-Fix-pcregrep-so-that-it-can-find-empty-lines.patch +# Grow buffer in pcretest properly, in upstream after 8.33-RC1 +Patch5: pcre-8.33-RC1-Fix-pcretest-crash-with-a-data-line-longer-than-6553.patch +# Fix passing too small output vector to pcre_dfa_exec, in upstream after +# 8.33-RC1, bug #963284 +Patch6: pcre-8.33-RC1-Fix-segfault-when-pcre_dfa_exec-is-called-with-an-ou.patch +# Fix jitted range check, in upstream after 8.34, bug #1048101 +Patch7: pcre-8.34-Fix-range-check-in-JIT-path.patch +# Fix unused memory usage on zero-repeat assertion condition, bug #1169797, +# CVE-2014-8964, in upstream after 8.36 +Patch8: pcre-8.32-Fix-zero-repeat-assertion-condition-bug.patch +# Fix compiling expression where start-anchored character with more than one +# other case follows circumflex in multiline UTF mode, bug #1110621, +# in upstream 8.36 +Patch9: pcre-8.32-Fix-bad-starting-data-when-char-with-more-than-one-o.patch +# Fix character class with a literal quotation, bug #1111091, +# upstream bug #1494, in upstream after 8.35 +Patch10: pcre-8.32-Fix-bad-compile-of-Qx-.-where-x-is-any-character.patch +# Fix empty-matching possessive zero-repeat groups in interpreted mode, +# bug #1119320, upstream bug #1500, in upstream after 8.35 +Patch11: pcre-8.33-Fix-empty-matching-possessive-zero-repeat-groups-bug.patch +# Fix compiler crash for zero-repeated groups with a recursive back reference, +# bug #1119356, upstream bug #1503, in upstream after 8.35 +Patch12: pcre-8.32-Fix-compiler-crash-misbehaviour-for-zero-repeated-gr.patch +# Reset non-matched groups within capturing group up to forced match, +# bug #1161597, in upstream after 8.36 +Patch13: pcre-8.32-Fix-bug-when-there-are-unset-groups-prior-to-ACCEPT-.patch +# Fix static linking, bug #1217111, in upstream after 8.37-RC1 +Patch14: pcre-8.37-RC1-Fix-static-linking-issue-with-pkg-config.patch +# Fix checking whether a group could match an empty string, bug #1330509, +# in upstream after 8.33, needed for +# Fix-compile-time-loop-for-recursive-reference-within.patch +Patch15: pcre-8.32-Fix-checking-whether-a-group-could-match-an-empty-st.patch +# Fix CVE-2015-2328 (infinite recursion compiling pattern with recursive +# reference in a group with indefinite repeat), bug #1330509, +# upstream bug #1515, in upstream after 8.35 +Patch16: pcre-8.32-Fix-compile-time-loop-for-recursive-reference-within.patch +# Fix duplicate names memory calculation error, bug #1330509, +# in upstream after 8.37, +# needed for Fix-buffer-overflow-for-named-references-in-situatio.patch +Patch17: pcre-8.32-Fix-duplicate-names-memory-calculation-error.patch +# Fix named forward reference to duplicate group number overflow bug, +# bug #1330509, in upstream after 8.37, +# needed for Fix-buffer-overflow-for-named-references-in-situatio.patch +Patch18: pcre-8.32-Fix-named-forward-reference-to-duplicate-group-numbe.patch +# Fix CVE-2015-8385 (buffer overflow caused by named forward reference to +# duplicate group number), bug #1330509, in upstream after 8.37 +Patch19: pcre-8.32-Fix-buffer-overflow-for-named-references-in-situatio.patch +# Fix CVE-2015-8386 (buffer overflow caused by lookbehind assertion), +# bug #1330509, in upstream after 8.37 +Patch20: pcre-8.32-Fix-buffer-overflow-for-lookbehind-within-mutually-r.patch +# Fix CVE-2015-3217 (stack overflow caused by mishandled group empty match), +# bug #1330509, in upstream after 8.37 +Patch21: pcre-8.32-Fix-group-empty-match-bug.patch +# Fix CVE-2015-5073 and CVE-2015-8388 (buffer overflow for forward reference +# within backward assertion with excess closing parenthesis), bug #1330509, +# in upstream after 8.37 +Patch22: pcre-8.32-Fix-buffer-overflow-for-forward-reference-within-bac.patch +# Fix CVE-2015-8391 (inefficient posix character class syntax check), +# bug #1330509, in upstream after 8.37 +Patch23: pcre-8.32-Fix-run-for-ever-bug-for-deeply-nested-sequences.patch +# Fix CVE-2016-3191 (workspace overflow for (*ACCEPT) with deeply nested +# parentheses), bug #1330509, in upstream after 8.38 +Patch24: pcre-8.32-Fix-workspace-overflow-for-ACCEPT-with-deeply-nested.patch +# 1/3 Let [:graph:], [:print:], and [:punct:] POSIX classes to handle Unicode +# characters in UCP mode to match Perl behavior, bug #1400267, +# in upstream 8.34 +Patch25: pcre-8.32-Update-POSIX-class-handling-in-UCP-mode.patch +# 2/3 Let [:graph:], [:print:], and [:punct:] POSIX classes to handle Unicode +# characters in UCP mode with JIT, bug #1400267, in upstream 8.34 +Patch26: pcre-8.32-Add-support-for-PT_PXGRAPH-PT_PXPRINT-and-PT_PXPUNCT.patch +# 3/3 Fix XCLASS POSIX JIT compilation, tests failed on 32-bit PowerPC, +# bug #1400267, in upstream 8.34 +Patch27: pcre-8.34-RC1-Fix-XCLASS-POSIX-types-in-JIT.patch +# Fix matching Unicode ranges in JIT mode, bug #1402288, in upstream 8.35 +Patch28: pcre-8.32-A-new-flag-is-set-when-property-checks-are-present-i.patch +# git required for A-new-flag-is-set-when-property-checks-are-present-i.patch +BuildRequires: git +BuildRequires: readline-devel +# New libtool to get rid of rpath +BuildRequires: autoconf, automake, libtool + +%description +Perl-compatible regular expression library. +PCRE has its own native API, but a set of "wrapper" functions that are based on +the POSIX API are also supplied in the library libpcreposix. Note that this +just provides a POSIX calling interface to PCRE: the regular expressions +themselves still follow Perl syntax and semantics. The header file +for the POSIX-style functions is called pcreposix.h. + +%package devel +Summary: Development files for %{name} +Group: Development/Libraries +Requires: %{name}%{?_isa} = %{version}-%{release} + +%description devel +Development files (Headers, libraries for dynamic linking, etc) for %{name}. + +%package static +Summary: Static library for %{name} +Group: Development/Libraries +Requires: %{name}-devel%{_isa} = %{version}-%{release} + +%description static +Library for static linking for %{name}. + +%package tools +Summary: Auxiliary utilities for %{name} +Group: Development/Tools +Requires: %{name}%{_isa} = %{version}-%{release} + +%description tools +Utilities demonstrating PCRE capabilities like pcregrep or pcretest. + +%prep +%setup -q -n %{name}-%{myversion} +# Get rid of rpath +%patch0 -p1 -b .multilib +%patch1 -p1 -b .terminated_typos +%patch2 -p1 -b .forward_jit +%patch3 -p1 -b .buffer_over_read +%patch4 -p1 -b .pcregrep_empty_line +%patch5 -p1 -b .pcretest_grow_buffer +%patch6 -p1 -b .vector_size +%patch7 -p1 -b .jitted_range_check +%patch8 -p1 -b .zero_repeat_assertion +%patch9 -p1 -b .starting_data +%patch10 -p1 -b .class_with_literal +%patch11 -p1 -b .empty_zero_repeat_group +%patch12 -p1 -b .compiler_crash_zero_group +%patch13 -p1 -b .reset_groups +%patch14 -p1 -b .static_linking +%patch15 -p1 -b .group_match_empty +%patch16 -p1 -b .compiler_loop_recursive_reference +%patch17 -p1 -b .duplicate_names_memory_calculation +%patch18 -p1 -b .forward_reference_to_duplicate_group_number +%patch19 -p1 -b .named_references_in_pqp +%patch20 -p1 -b .lookbehind_within_mutally_recusive_subroutines +%patch21 -p1 -b .group_empty_match +%patch22 -p1 -b .CVE-2015-5073 +%patch23 -p1 -b .deeply_nested_bracket_colon +%patch24 -p1 -b .accept_with_nested_parentheses +%patch25 -p1 -b .posix_classes_in_ucp +%patch26 -p1 -b .posix_classes_in_ucp_jit +%patch27 -p1 -b .posix_classes_in_ucp_jit_types +# Apply a Git binary patch +git --work-tree=. apply %{PATCH28} +# Because of rpath patch +libtoolize --copy --force && autoreconf -vif +# One contributor's name is non-UTF-8 +for F in ChangeLog; do + iconv -f latin1 -t utf8 "$F" >"${F}.utf8" + touch --reference "$F" "${F}.utf8" + mv "${F}.utf8" "$F" +done + +%build +# There is an explicit request to optimize PCRE more, bugs #1051072, #1123498 +%global _performance_build 1 +%ifarch ppc64 +# There is a strict-aliasing problem on PPC64, bug #881232 +%global optflags %{optflags} -fno-strict-aliasing +%endif +%configure \ +%ifarch aarch64 ppc64le s390 s390x sparc64 sparcv9 + --disable-jit \ +%else + --enable-jit \ +%endif + --enable-pcretest-libreadline --enable-utf --enable-unicode-properties \ + --enable-pcre8 --enable-pcre16 --enable-pcre32 +make %{?_smp_mflags} + +%install +make install DESTDIR=$RPM_BUILD_ROOT +# Get rid of unneeded *.la files +rm -f $RPM_BUILD_ROOT%{_libdir}/*.la +# These are handled by %%doc in %%files +rm -rf $RPM_BUILD_ROOT%{_docdir}/pcre + +%check +%ifarch s390 ppc +# larger stack is needed on s390, ppc +ulimit -s 10240 +%endif +make check VERBOSE=yes + +%post -p /sbin/ldconfig + +%postun -p /sbin/ldconfig + +%files +%{_libdir}/*.so.* +%doc AUTHORS COPYING LICENCE NEWS README ChangeLog + +%files devel +%{_libdir}/*.so +%{_libdir}/pkgconfig/* +%{_includedir}/*.h +%{_mandir}/man1/pcre-config.* +%{_mandir}/man3/* +%{_bindir}/pcre-config +%doc doc/*.txt doc/html +%doc HACKING pcredemo.c + +%files static +%{_libdir}/*.a +%doc COPYING LICENCE + +%files tools +%{_bindir}/pcregrep +%{_bindir}/pcretest +%{_mandir}/man1/pcregrep.* +%{_mandir}/man1/pcretest.* + +%changelog +* Tue Dec 06 2016 Petr Pisar - 8.32-17 +- Let [:graph:], [:print:], and [:punct:] POSIX classes to handle Unicode + characters in UCP mode to match Perl behavior (bug #1400267) +- Fix matching Unicode ranges in JIT mode (bug #1402288) + +* Wed Apr 27 2016 Petr Pisar - 8.32-16 +- Fix CVE-2015-2328 (infinite recursion compiling pattern with recursive + reference in a group with indefinite repeat) (bug #1330509) +- Fix CVE-2015-8385 (buffer overflow caused by named forward reference to + duplicate group number) (bug #1330509) +- Fix CVE-2015-8386 (buffer overflow caused by lookbehind assertion) + (bug #1330509) +- Fix CVE-2015-3217 (stack overflow caused by mishandled group empty match) + (bug #1330509) +- Fix CVE-2015-5073 and CVE-2015-8388 (buffer overflow for forward reference + within backward assertion with excess closing parenthesis) (bug #1330509) +- Fix CVE-2015-8391 (inefficient posix character class syntax check) + (bug #1330509) +- Fix CVE-2016-3191 (workspace overflow for (*ACCEPT) with deeply nested + parentheses) (bug #1330509) + +* Wed Apr 29 2015 Petr Pisar - 8.32-15 +- Fix compiling expression where start-anchored character with more than one + other case follows circumflex in multiline UTF mode (bug #1110621) +- Fix character class with a literal quotation (bug #1111091) +- Fix empty-matching possessive zero-repeat groups in interpreted mode + (bug #1119320) +- Fix compiler crash for zero-repeated groups with a recursive back reference + (bug #1119356) +- Reset non-matched groups within capturing group up to forced match + (bug #1161597) +- Fix static linking (bug #1217111) +- Package pcredemo.c as a documentation for pcre-devel (bug #1217118) + +* Tue Dec 02 2014 Petr Pisar - 8.32-14 +- Fix CVE-2014-8964 (unused memory usage on zero-repeat assertion condition) + (bug #1169797) + +* Fri Aug 01 2014 Petr Pisar - 8.32-13 +- Disable unsupported JIT mode on little-endian 64-bit PowerPC platform + (bug #1125642) +- Raise optimization level to 3 on little-endian 64-bit PowerPC (bug #1123498) + +* Fri Jan 24 2014 Daniel Mach - 8.32-12 +- Mass rebuild 2014-01-24 + +* Fri Jan 10 2014 Petr Pisar - 8.32-11 +- Raise optimization to level 3 on 64-bit PowerPC (bug #1051072) + +* Thu Jan 09 2014 Petr Pisar - 8.32-10 +- Fix jitted range check (bug #1048101) + +* Fri Dec 27 2013 Daniel Mach - 8.32-9 +- Mass rebuild 2013-12-27 + +* Wed Oct 16 2013 Petr Pisar - 8.33-8 +- Disable strict-aliasing on PPC64 (bug #881232) + +* Mon Jun 03 2013 Petr Pisar - 8.32-7 +- Disable unsupported JIT on aarch64 (bug #969693) + +* Thu May 16 2013 Petr Pisar - 8.32-6 +- Fix passing too small output vector to pcre_dfa_exec (bug #963284) + +* Mon May 13 2013 Petr Pisar - 8.32-5 +- Fix bad handling of empty lines in pcregrep tool (bug #961789) +- Fix possible pcretest crash with a data line longer than 65536 bytes + +* Mon Jan 28 2013 Petr Pisar - 8.32-4 +- Fix forward search in JIT when link size is 3 or greater +- Fix buffer over-read in UTF-16 and UTF-32 modes with JIT + +* Fri Jan 25 2013 Peter Robinson 8.32-3 +- Adjust autoreconf to fix FTBFS on F-19 + +* Mon Jan 07 2013 Petr Pisar - 8.32-2 +- Make inter-subpackage dependencies architecture specific (bug #892187) + +* Fri Nov 30 2012 Petr Pisar - 8.32-1 +- 8.32 bump + +* Thu Nov 29 2012 Petr Pisar - 8.32-0.2.RC1 +- Inter-depend sub-packages to prevent from mixing different versions + +* Tue Nov 13 2012 Petr Pisar - 8.32-0.1.RC1 +- 8.32-RC1 bump + +* Mon Sep 03 2012 Petr Pisar - 8.31-2 +- Set re_nsub in regcomp() properly (bug #853990) + +* Fri Jul 20 2012 Fedora Release Engineering - 8.31-1.1 +- Rebuilt for https://fedoraproject.org/wiki/Fedora_18_Mass_Rebuild + +* Fri Jul 13 2012 Petr Pisar - 8.31-1 +- 8.31 bump + +* Tue Jun 05 2012 Petr Pisar - 8.31-0.1.RC1 +- 8.31-RC1 bump + +* Sat May 12 2012 Tom Callaway - 8.30-7 +- disable jit for sparcv9 and sparc64 + +* Fri May 11 2012 Petr Pisar - 8.30-6 +- Fix spelling in manual pages (bug #820978) + +* Mon Apr 23 2012 Petr Pisar - 8.30-5 +- Possessify high ASCII (bug #815217) +- Fix ovector overflow (bug #815214) + +* Fri Apr 20 2012 Petr Pisar - 8.30-4 +- Possesify \s*\R (bug #813237) + +* Thu Apr 05 2012 Petr Pisar - 8.30-3 +- Fix look-behind assertion in UTF-8 JIT mode (bug #810314) + +* Tue Feb 28 2012 Petr Pisar - 8.30-2 +- Remove old libpcre.so.0 from distribution +- Move library to /usr + +* Thu Feb 09 2012 Petr Pisar - 8.30-1 +- 8.30 bump +- Add old libpcre.so.0 to preserve compatibility temporarily + +* Fri Jan 27 2012 Petr Pisar - 8.30-0.1.RC1 +- 8.30 Relase candidate 1 with UTF-16 support and *API change* +- Enable UTF-16 variant of PCRE library +- The pcre_info() function has been removed from pcre library. +- Loading compiled pattern does not fix endianity anymore. Instead an errror + is returned and the application can use pcre_pattern_to_host_byte_order() to + convert the pattern. +- Surrogates (0xD800---0xDFFF) are forbidden in UTF-8 mode now. + +* Fri Jan 13 2012 Fedora Release Engineering - 8.21-2.1 +- Rebuilt for https://fedoraproject.org/wiki/Fedora_17_Mass_Rebuild + +* Mon Jan 02 2012 Petr Pisar - 8.21-2 +- Fix unmatched subpattern to not become wildcard (bug #769597) +- Fix NULL pointer derefernce in pcre_free_study() (upstream bug #1186) + +* Mon Dec 12 2011 Petr Pisar - 8.21-1 +- 8.21 bump + +* Thu Dec 08 2011 Karsten Hopp 8.21-0.2.RC1 +- ppc needs a larger stack similar to s390 + +* Tue Dec 06 2011 Petr Pisar - 8.21-0.1.RC1 +- 8.21-RC1 bump + +* Fri Dec 02 2011 Petr Pisar - 8.20-7 +- Fix case-less match if cases differ in encoding length (bug #756675) + +* Fri Nov 25 2011 Petr Pisar - 8.20-6 +- Fix cache-flush in JIT on PPC + +* Tue Nov 22 2011 Petr Pisar - 8.20-5 +- Fix repeated forward reference (bug #755969) + +* Wed Nov 16 2011 Petr Pisar - 8.20-4 +- Fix other look-behind regressions + +* Tue Nov 15 2011 Petr Pisar - 8.20-3 +- Fix look-behind regression in 8.20 + +* Tue Nov 15 2011 Dan Horák - 8.20-2 +- fix build on s390(x) - disable jit and use larger stack for tests + +* Fri Oct 21 2011 Petr Pisar - 8.20-1 +- 8.20 bump + +* Tue Oct 11 2011 Petr Pisar - 8.20-0.1.RC3 +- 8.20-RC3 bump + +* Fri Sep 23 2011 Petr Pisar - 8.20-0.1.RC2 +- 8.20-RC2 bump + +* Mon Sep 12 2011 Petr Pisar - 8.20-0.1.RC1 +- 8.20-RC1 bump with JIT + +* Tue Sep 06 2011 Petr Pisar - 8.13-4 +- Fix infinite matching PRUNE (bug #735720) + +* Mon Aug 22 2011 Petr Pisar - 8.13-3 +- Fix parsing named class in expression (bug #732368) + +* Thu Aug 18 2011 Petr Pisar - 8.13-2 +- Separate utilities from libraries +- Move pcre-config(1) manual to pcre-devel sub-package +- Remove explicit defattr from spec code +- Compile pcretest with readline support + +* Thu Aug 18 2011 Petr Pisar - 8.13-1 +- 8.13 bump: Bug-fix version, Unicode tables updated to 6.0.0, new pcregrep + option --buffer-size to adjust to long lines, new feature is passing of + *MARK information to callouts. +- Should fix crash back-tracking over unicode sequence (bug #691319) + +* Mon May 09 2011 Petr Pisar - 8.12-4 +- Fix caseless reference matching in UTF-8 mode when the upper/lower case + characters have different lengths (bug #702623) + +* Mon May 09 2011 Petr Pisar - 8.12-3 +- Fix typos in manual pages (bugs #675476, #675477) +- Clean spec file up + +* Tue Feb 08 2011 Fedora Release Engineering - 8.12-2 +- Rebuilt for https://fedoraproject.org/wiki/Fedora_15_Mass_Rebuild + +* Mon Jan 17 2011 Petr Pisar - 8.12-1 +- 8.12 bump +- Remove accepted pcre-8.11-Fix-typo-in-pcreprecompile-3.patch + +* Mon Dec 13 2010 Petr Pisar - 8.11-1 +- 8.11 bump +- See ChangeLog for changes. Namely changes have been made to the way + PCRE_PARTIAL_HARD affects the matching of $, \z, \Z, \b, and \B. +- Fix typo in pcreprecompile(3) manual +- Document why shared library is not under /usr + +* Mon Jul 12 2010 Petr Pisar - 8.10-1 +- 8.10 bump (bug #612635) +- Add LICENCE to static subpackage because COPYING refers to it +- Remove useless rpath by using new libtool (simple sed does not work anymore + because tests need to link against just-compiled library in %%check phase) + +* Thu Jul 08 2010 Petr Pisar - 7.8-4 +- Add COPYING to static subpackage +- Remove useless rpath + +* Sat Jul 25 2009 Fedora Release Engineering - 7.8-3 +- Rebuilt for https://fedoraproject.org/wiki/Fedora_12_Mass_Rebuild + +* Thu Feb 26 2009 Fedora Release Engineering - 7.8-2 +- Rebuilt for https://fedoraproject.org/wiki/Fedora_11_Mass_Rebuild + +* Wed Oct 1 2008 Lubomir Rintel - 7.8-1 +- Update to 7.8, drop upstreamed patches +- Fix destination of documentation (#427763) +- Use buildroot macro consistently +- Separate the static library, as per current Guidelines +- Satisfy rpmlint + +* Fri Jul 4 2008 Tomas Hoger - 7.3-4 +- Apply Tavis Ormandy's patch for CVE-2008-2371. + +* Tue Feb 12 2008 Tomas Hoger - 7.3-3 +- Backport patch from upstream pcre 7.6 to address buffer overflow + caused by "a character class containing a very large number of + characters with codepoints greater than 255 (in UTF-8 mode)" + CVE-2008-0674, #431660 +- Try re-enabling make check again. + +* Fri Nov 16 2007 Stepan Kasal - 7.3-2 +- Remove obsolete ``reqs'' +- add dist tag +- update BuildRoot + +* Mon Sep 17 2007 Than Ngo - 7.3-1 +- bz292501, update to 7.3 + +* Mon Jan 22 2007 Than Ngo - 7.0-1 +- 7.0 + +* Mon Nov 27 2006 Than Ngo - 6.7-1 +- update to 6.7 +- fix #217303, enable-unicode-properties +- sane stack limit + +* Wed Jul 12 2006 Jesse Keating - 6.6-1.1 +- rebuild + +* Tue May 09 2006 Than Ngo 6.6-1 +- update to 6.6 +- fix multilib problem + +* Fri Feb 10 2006 Jesse Keating - 6.3-1.2.1 +- bump again for double-long bug on ppc(64) + +* Tue Feb 07 2006 Jesse Keating - 6.3-1.2 +- rebuilt for new gcc4.1 snapshot and glibc changes + +* Fri Dec 09 2005 Jesse Keating +- rebuilt + +* Wed Aug 24 2005 Than Ngo 6.3-1 +- update to 6.3 + +* Fri Mar 4 2005 Joe Orton 5.0-4 +- rebuild + +* Fri Feb 11 2005 Joe Orton 5.0-3 +- don't print $libdir in 'pcre-config --libs' output + +* Thu Nov 18 2004 Joe Orton 5.0-2 +- include LICENCE, AUTHORS in docdir +- run make check +- move %%configure to %%build + +* Thu Nov 18 2004 Than Ngo 5.0-1 +- update to 5.0 +- change License: BSD +- fix header location #64248 + +* Tue Jun 15 2004 Elliot Lee +- rebuilt + +* Tue Mar 23 2004 Than Ngo 4.5-2 +- add the correct pcre license, #118781 + +* Fri Mar 12 2004 Than Ngo 4.5-1 +- update to 4.5 + +* Tue Mar 02 2004 Elliot Lee +- rebuilt + +* Fri Feb 13 2004 Elliot Lee +- rebuilt + +* Fri Sep 26 2003 Harald Hoyer 4.4-1 +- 4.4 + +* Wed Jun 04 2003 Elliot Lee +- rebuilt + +* Wed May 7 2003 Than Ngo 4.2-1 +- update to 4.2 + +* Wed Jan 22 2003 Tim Powers +- rebuilt + +* Tue Jan 21 2003 Than Ngo 3.9-9 +- build with utf8, bug #81504 + +* Fri Nov 22 2002 Elliot Lee 3.9-8 +- Really remove .la files + +* Fri Oct 11 2002 Than Ngo 3.9-7 +- remove .la + +* Thu Oct 10 2002 Than Ngo 3.9-7 +- Typo bug + +* Wed Oct 9 2002 Than Ngo 3.9-6 +- Added missing so symlink + +* Thu Sep 19 2002 Than Ngo 3.9-5.1 +- Fixed to build s390/s390x/x86_64 + +* Thu Jun 27 2002 Bernhard Rosenkraenzer 3.9-5 +- Fix #65009 + +* Fri Jun 21 2002 Tim Powers +- automated rebuild + +* Thu May 23 2002 Tim Powers +- automated rebuild + +* Mon Mar 4 2002 Bernhard Rosenkraenzer 3.9-2 +- rebuild + +* Fri Jan 11 2002 Bernhard Rosenkraenzer 3.9-1 +- Update to 3.9 + +* Wed Nov 14 2001 Bernhard Rosenkraenzer 3.7-1 +- Update to 3.7 + +* Thu May 17 2001 Bernhard Rosenkraenzer 3.4-2 +- Move libpcre to /lib, grep uses it these days (#41104) + +* Wed Apr 18 2001 Bernhard Rosenkraenzer +- Move this to a separate package, used to be in kdesupport, but it's + generally useful...