From cb7dd8910cd161e843ddc11d65f58b3c3f377041 Mon Sep 17 00:00:00 2001 From: CentOS Sources Date: Aug 01 2017 02:43:51 +0000 Subject: import pcre-8.32-17.el7 --- diff --git a/SOURCES/pcre-8.32-A-new-flag-is-set-when-property-checks-are-present-i.patch b/SOURCES/pcre-8.32-A-new-flag-is-set-when-property-checks-are-present-i.patch new file mode 100644 index 0000000..0760b64 --- /dev/null +++ b/SOURCES/pcre-8.32-A-new-flag-is-set-when-property-checks-are-present-i.patch @@ -0,0 +1,849 @@ +From d779878352fdce2ca955a5a3135d2c8f2b27ba13 Mon Sep 17 00:00:00 2001 +From: zherczeg +Date: Sun, 22 Dec 2013 16:27:35 +0000 +Subject: [PATCH] A new flag is set, when property checks are present in an + XCLASS. +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Ported to 8.32: + +commit f928c7adccd8daa61e76c22130d79689ec41f21c +Author: zherczeg +Date: Sun Dec 22 16:27:35 2013 +0000 + + A new flag is set, when property checks are present in an XCLASS. + + git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1414 2f5784b3-3f2a-0410-8824-cb99058d5e15 + +Signed-off-by: Petr Písař +--- + pcre_compile.c | 52 +++++++++++++++-------- + pcre_exec.c | 14 +------ + pcre_internal.h | 5 ++- + pcre_jit_compile.c | 114 +++++++++++++++++++++++++++++++++++++------------- + pcre_printint.c | 15 ++++++- + pcre_study.c | 62 ++++++++++++++++++--------- + pcre_xclass.c | 5 +++ + testdata/saved16BE-1 | Bin 402 -> 402 bytes + testdata/saved16LE-1 | Bin 402 -> 402 bytes + testdata/saved32BE-1 | Bin 544 -> 552 bytes + testdata/saved32LE-1 | Bin 544 -> 552 bytes + testdata/testoutput17 | 4 +- + testdata/testoutput23 | 34 +++++++++++++-- + testdata/testoutput25 | 34 +++++++++++++-- + testdata/testoutput5 | 10 ++--- + testdata/testoutput7 | 4 +- + 16 files changed, 256 insertions(+), 97 deletions(-) + +diff --git a/pcre_compile.c b/pcre_compile.c +index 3c75218..962b4d3 100644 +--- a/pcre_compile.c ++++ b/pcre_compile.c +@@ -3512,6 +3512,7 @@ add_to_class(pcre_uint8 *classbits, pcre_uchar **uchardptr, int options, + compile_data *cd, pcre_uint32 start, pcre_uint32 end) + { + pcre_uint32 c; ++pcre_uint32 classbits_end = (end <= 0xff ? end : 0xff); + int n8 = 0; + + /* If caseless matching is required, scan the range and process alternate +@@ -3555,7 +3556,7 @@ if ((options & PCRE_CASELESS) != 0) + + /* Not UTF-mode, or no UCP */ + +- for (c = start; c <= end && c < 256; c++) ++ for (c = start; c <= classbits_end; c++) + { + SETBIT(classbits, cd->fcc[c]); + n8++; +@@ -3580,20 +3581,19 @@ in all cases. */ + + #endif /* COMPILE_PCRE[8|16] */ + +-/* If all characters are less than 256, use the bit map. Otherwise use extra +-data. */ ++/* Use the bitmap for characters < 256. Otherwise use extra data.*/ + +-if (end < 0x100) ++for (c = start; c <= classbits_end; c++) + { +- for (c = start; c <= end; c++) +- { +- n8++; +- SETBIT(classbits, c); +- } ++ /* Regardless of start, c will always be <= 255. */ ++ SETBIT(classbits, c); ++ n8++; + } + +-else +- { ++#if defined SUPPORT_UTF || !defined COMPILE_PCRE8 ++if (start <= 0xff) start = 0xff + 1; ++ ++if (end >= start) { + pcre_uchar *uchardata = *uchardptr; + + #ifdef SUPPORT_UTF +@@ -3635,6 +3635,7 @@ else + + *uchardptr = uchardata; /* Updata extra data pointer */ + } ++#endif /* SUPPORT_UTF || !COMPILE_PCRE8 */ + + return n8; /* Number of 8-bit characters */ + } +@@ -3856,6 +3857,9 @@ for (;; ptr++) + BOOL reset_bracount; + int class_has_8bitchar; + int class_one_char; ++#if defined SUPPORT_UTF || !defined COMPILE_PCRE8 ++ BOOL xclass_has_prop; ++#endif + int newoptions; + int recno; + int refsign; +@@ -4161,13 +4165,26 @@ for (;; ptr++) + + should_flip_negation = FALSE; + ++ /* Extended class (xclass) will be used when characters > 255 ++ might match. */ ++ ++#if defined SUPPORT_UTF || !defined COMPILE_PCRE8 ++ xclass = FALSE; ++ class_uchardata = code + LINK_SIZE + 2; /* For XCLASS items */ ++ class_uchardata_base = class_uchardata; /* Save the start */ ++#endif ++ + /* For optimization purposes, we track some properties of the class: + class_has_8bitchar will be non-zero if the class contains at least one < + 256 character; class_one_char will be 1 if the class contains just one +- character. */ ++ character; xclass_has_prop will be TRUE if unicode property checks ++ are present in the class. */ + + class_has_8bitchar = 0; + class_one_char = 0; ++#if defined SUPPORT_UTF || !defined COMPILE_PCRE8 ++ xclass_has_prop = FALSE; ++#endif + + /* Initialize the 32-char bit map to all zeros. We build the map in a + temporary bit of memory, in case the class contains fewer than two +@@ -4176,12 +4193,6 @@ for (;; ptr++) + + memset(classbits, 0, 32 * sizeof(pcre_uint8)); + +-#if defined SUPPORT_UTF || !defined COMPILE_PCRE8 +- xclass = FALSE; +- class_uchardata = code + LINK_SIZE + 2; /* For XCLASS items */ +- class_uchardata_base = class_uchardata; /* Save the start */ +-#endif +- + /* Process characters until ] is reached. By writing this as a "do" it + means that an initial ] is taken as a data character. At the start of the + loop, c contains the first byte of the character. */ +@@ -4305,6 +4316,7 @@ for (;; ptr++) + *class_uchardata++ = local_negate? XCL_NOTPROP : XCL_PROP; + *class_uchardata++ = ptype; + *class_uchardata++ = 0; ++ xclass_has_prop = TRUE; + ptr = tempptr + 1; + continue; + +@@ -4490,6 +4502,7 @@ for (;; ptr++) + XCL_PROP : XCL_NOTPROP; + *class_uchardata++ = ptype; + *class_uchardata++ = pdata; ++ xclass_has_prop = TRUE; + class_has_8bitchar--; /* Undo! */ + continue; + } +@@ -4767,6 +4780,7 @@ for (;; ptr++) + *code++ = OP_XCLASS; + code += LINK_SIZE; + *code = negate_class? XCL_NOT:0; ++ if (xclass_has_prop) *code |= XCL_HASPROP; + + /* If the map is required, move up the extra data to make room for it; + otherwise just move the code pointer to the end of the extra data. */ +@@ -4776,6 +4790,8 @@ for (;; ptr++) + *code++ |= XCL_MAP; + memmove(code + (32 / sizeof(pcre_uchar)), code, + IN_UCHARS(class_uchardata - code)); ++ if (negate_class && !xclass_has_prop) ++ for (c = 0; c < 32; c++) classbits[c] = ~classbits[c]; + memcpy(code, classbits, 32); + code = class_uchardata + (32 / sizeof(pcre_uchar)); + } +diff --git a/pcre_exec.c b/pcre_exec.c +index 74a2b49..48d9199 100644 +--- a/pcre_exec.c ++++ b/pcre_exec.c +@@ -6732,18 +6732,8 @@ for(;;) + #ifndef COMPILE_PCRE8 + if (c > 255) c = 255; + #endif +- if ((start_bits[c/8] & (1 << (c&7))) == 0) +- { +- start_match++; +-#if defined SUPPORT_UTF && defined COMPILE_PCRE8 +- /* In non 8-bit mode, the iteration will stop for +- characters > 255 at the beginning or not stop at all. */ +- if (utf) +- ACROSSCHAR(start_match < end_subject, *start_match, +- start_match++); +-#endif +- } +- else break; ++ if ((start_bits[c/8] & (1 << (c&7))) != 0) break; ++ start_match++; + } + } + } /* Starting optimizations */ +diff --git a/pcre_internal.h b/pcre_internal.h +index 389848f..10bd911 100644 +--- a/pcre_internal.h ++++ b/pcre_internal.h +@@ -1849,8 +1849,9 @@ table. */ + /* Flag bits and data types for the extended class (OP_XCLASS) for classes that + contain characters with values greater than 255. */ + +-#define XCL_NOT 0x01 /* Flag: this is a negative class */ +-#define XCL_MAP 0x02 /* Flag: a 32-byte map is present */ ++#define XCL_NOT 0x01 /* Flag: this is a negative class */ ++#define XCL_MAP 0x02 /* Flag: a 32-byte map is present */ ++#define XCL_HASPROP 0x04 /* Flag: property checks are present. */ + + #define XCL_END 0 /* Marks end of individual items */ + #define XCL_SINGLE 1 /* Single item (one multibyte char) follows */ +diff --git a/pcre_jit_compile.c b/pcre_jit_compile.c +index 5f74833..e425b91 100644 +--- a/pcre_jit_compile.c ++++ b/pcre_jit_compile.c +@@ -2877,7 +2877,7 @@ if (firstline) + OP1(SLJIT_MOV, STR_END, 0, TMP3, 0); + } + +-static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common, sljit_uw start_bits, BOOL firstline) ++static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common, pcre_uint8 *start_bits, BOOL firstline) + { + DEFINE_COMPILER; + struct sljit_label *start; +@@ -2908,7 +2908,7 @@ JUMPHERE(jump); + #endif + OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7); + OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3); +-OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), start_bits); ++OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)start_bits); + OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0); + OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0); + found = JUMP(SLJIT_C_NOT_ZERO); +@@ -3194,8 +3194,40 @@ switch(ranges[0]) + case 2: + if (readch) + read_char(common); +- OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]); +- add_jump(compiler, backtracks, CMP(ranges[1] != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2])); ++ if (ranges[2] + 1 != ranges[3]) ++ { ++ OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]); ++ add_jump(compiler, backtracks, CMP(ranges[1] != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2])); ++ } ++ else ++ add_jump(compiler, backtracks, CMP(ranges[1] != 0 ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2])); ++ return TRUE; ++ ++ case 3: ++ if (readch) ++ read_char(common); ++ if (ranges[1] != 0) ++ { ++ add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[4])); ++ if (ranges[2] + 1 != ranges[3]) ++ { ++ OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]); ++ add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2])); ++ } ++ else ++ add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2])); ++ } ++ else ++ { ++ add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[2])); ++ if (ranges[3] + 1 != ranges[4]) ++ { ++ OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[3]); ++ add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[4] - ranges[3])); ++ } ++ else ++ add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3])); ++ } + return TRUE; + + case 4: +@@ -3264,15 +3296,15 @@ if (bit != 0) + ranges[0] = length; + } + +-static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, jump_list **backtracks) ++static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks) + { + int ranges[2 + MAX_RANGE_SIZE]; + pcre_uint8 bit, cbit, all; + int i, byte, length = 0; + + bit = bits[0] & 0x1; +-ranges[1] = bit; +-/* Can be 0 or 255. */ ++ranges[1] = !invert ? bit : (bit ^ 0x1); ++/* All bits will be zero or one (since bit is zero or one). */ + all = -bit; + + for (i = 0; i < 256; ) +@@ -3693,7 +3725,7 @@ static void compile_xclass_matchingpath(compiler_common *common, pcre_uchar *cc, + { + DEFINE_COMPILER; + jump_list *found = NULL; +-jump_list **list = (*cc & XCL_NOT) == 0 ? &found : backtracks; ++jump_list **list = (cc[0] & XCL_NOT) == 0 ? &found : backtracks; + pcre_int32 c, charoffset; + const pcre_uint32 *other_cases; + struct sljit_jump *jump = NULL; +@@ -3712,36 +3744,62 @@ pcre_int32 typeoffset; + detect_partial_match(common, backtracks); + read_char(common); + +-if ((*cc++ & XCL_MAP) != 0) ++cc++; ++if ((cc[-1] & XCL_HASPROP) == 0) + { +- OP1(SLJIT_MOV, TMP3, 0, TMP1, 0); +-#ifndef COMPILE_PCRE8 +- jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255); +-#elif defined SUPPORT_UTF +- if (common->utf) +- jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255); ++ if ((cc[-1] & XCL_MAP) != 0) ++ { ++ OP1(SLJIT_MOV, TMP3, 0, TMP1, 0); ++#ifdef SUPPORT_UCP ++ charsaved = TRUE; + #endif ++ if (!check_class_ranges(common, (const pcre_uint8 *)cc, TRUE, FALSE, backtracks)) ++ { ++ jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255); ++ ++ OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7); ++ OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3); ++ OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc); ++ OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0); ++ OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0); ++ add_jump(compiler, &found, JUMP(SLJIT_C_NOT_ZERO)); ++ add_jump(compiler, backtracks, JUMP(SLJIT_JUMP)); ++ ++ JUMPHERE(jump); ++ } ++ else ++ add_jump(compiler, &found, CMP(SLJIT_C_LESS_EQUAL, TMP3, 0, SLJIT_IMM, 0xff)); + +- if (!check_class_ranges(common, (const pcre_uint8 *)cc, TRUE, list)) ++ OP1(SLJIT_MOV, TMP1, 0, TMP3, 0); ++ cc += 32 / sizeof(pcre_uchar); ++ } ++ else ++ add_jump(compiler, (cc[-1] & XCL_NOT) == 0 ? backtracks : &found, CMP(SLJIT_C_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0xff)); ++ } ++else if ((cc[-1] & XCL_MAP) != 0) ++ { ++ OP1(SLJIT_MOV, TMP3, 0, TMP1, 0); ++#ifdef SUPPORT_UCP ++ charsaved = TRUE; ++#endif ++ if (!check_class_ranges(common, (const pcre_uint8 *)cc, FALSE, TRUE, list)) + { ++#ifdef COMPILE_PCRE8 ++ SLJIT_ASSERT(common->utf); ++#endif ++ jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255); ++ + OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7); + OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3); + OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc); + OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0); + OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0); + add_jump(compiler, list, JUMP(SLJIT_C_NOT_ZERO)); +- } + +-#ifndef COMPILE_PCRE8 +- JUMPHERE(jump); +-#elif defined SUPPORT_UTF +- if (common->utf) + JUMPHERE(jump); +-#endif ++ } ++ + OP1(SLJIT_MOV, TMP1, 0, TMP3, 0); +-#ifdef SUPPORT_UCP +- charsaved = TRUE; +-#endif + cc += 32 / sizeof(pcre_uchar); + } + +@@ -4278,7 +4336,7 @@ switch(type) + #ifdef SUPPORT_UCP + case OP_NOTPROP: + case OP_PROP: +- propdata[0] = 0; ++ propdata[0] = XCL_HASPROP; + propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP; + propdata[2] = cc[0]; + propdata[3] = cc[1]; +@@ -4636,7 +4694,7 @@ switch(type) + case OP_NCLASS: + detect_partial_match(common, backtracks); + read_char(common); +- if (check_class_ranges(common, (const pcre_uint8 *)cc, type == OP_NCLASS, backtracks)) ++ if (check_class_ranges(common, (const pcre_uint8 *)cc, type == OP_NCLASS, FALSE, backtracks)) + return cc + 32 / sizeof(pcre_uchar); + + #if defined SUPPORT_UTF || !defined COMPILE_PCRE8 +@@ -8033,7 +8091,7 @@ if ((re->options & PCRE_ANCHORED) == 0) + else if ((re->flags & PCRE_STARTLINE) != 0) + fast_forward_newline(common, (re->options & PCRE_FIRSTLINE) != 0); + else if ((re->flags & PCRE_STARTLINE) == 0 && study != NULL && (study->flags & PCRE_STUDY_MAPPED) != 0) +- fast_forward_start_bits(common, (sljit_uw)study->start_bits, (re->options & PCRE_FIRSTLINE) != 0); ++ fast_forward_start_bits(common, study->start_bits, (re->options & PCRE_FIRSTLINE) != 0); + } + } + if (common->req_char_ptr != 0) +diff --git a/pcre_printint.c b/pcre_printint.c +index c6dcbe6..a71b3b6 100644 +--- a/pcre_printint.c ++++ b/pcre_printint.c +@@ -619,7 +619,9 @@ for(;;) + int i; + unsigned int min, max; + BOOL printmap; ++ BOOL invertmap = FALSE; + pcre_uint8 *map; ++ pcre_uint8 inverted_map[32]; + + fprintf(f, " ["); + +@@ -628,7 +630,12 @@ for(;;) + extra = GET(code, 1); + ccode = code + LINK_SIZE + 1; + printmap = (*ccode & XCL_MAP) != 0; +- if ((*ccode++ & XCL_NOT) != 0) fprintf(f, "^"); ++ if ((*ccode & XCL_NOT) != 0) ++ { ++ invertmap = (*ccode & XCL_HASPROP) == 0; ++ fprintf(f, "^"); ++ } ++ ccode++; + } + else + { +@@ -641,6 +648,12 @@ for(;;) + if (printmap) + { + map = (pcre_uint8 *)ccode; ++ if (invertmap) ++ { ++ for (i = 0; i < 32; i++) inverted_map[i] = ~map[i]; ++ map = inverted_map; ++ } ++ + for (i = 0; i < 256; i++) + { + if ((map[i/8] & (1 << (i&7))) != 0) +diff --git a/pcre_study.c b/pcre_study.c +index 12d2a66..2d11d87 100644 +--- a/pcre_study.c ++++ b/pcre_study.c +@@ -835,9 +835,6 @@ do + case OP_SOM: + case OP_THEN: + case OP_THEN_ARG: +-#if defined SUPPORT_UTF || !defined COMPILE_PCRE8 +- case OP_XCLASS: +-#endif + return SSB_FAIL; + + /* We can ignore word boundary tests. */ +@@ -1221,6 +1218,16 @@ do + with a value >= 0xc4 is a potentially valid starter because it starts a + character with a value > 255. */ + ++#if defined SUPPORT_UTF || !defined COMPILE_PCRE8 ++ case OP_XCLASS: ++ if ((tcode[1 + LINK_SIZE] & XCL_HASPROP) != 0) ++ return SSB_FAIL; ++ /* All bits are set. */ ++ if ((tcode[1 + LINK_SIZE] & XCL_MAP) == 0 && (tcode[1 + LINK_SIZE] & XCL_NOT) != 0) ++ return SSB_FAIL; ++#endif ++ /* Fall through */ ++ + case OP_NCLASS: + #if defined SUPPORT_UTF && defined COMPILE_PCRE8 + if (utf) +@@ -1237,8 +1244,21 @@ do + case OP_CLASS: + { + pcre_uint8 *map; +- tcode++; +- map = (pcre_uint8 *)tcode; ++#if defined SUPPORT_UTF || !defined COMPILE_PCRE8 ++ map = NULL; ++ if (*tcode == OP_XCLASS) ++ { ++ if ((tcode[1 + LINK_SIZE] & XCL_MAP) != 0) ++ map = (pcre_uint8 *)(tcode + 1 + LINK_SIZE + 1); ++ tcode += GET(tcode, 1); ++ } ++ else ++#endif ++ { ++ tcode++; ++ map = (pcre_uint8 *)tcode; ++ tcode += 32 / sizeof(pcre_uchar); ++ } + + /* In UTF-8 mode, the bits in a bit map correspond to character + values, not to byte values. However, the bit map we are constructing is +@@ -1246,31 +1266,35 @@ do + value is > 127. In fact, there are only two possible starting bytes for + characters in the range 128 - 255. */ + +-#if defined SUPPORT_UTF && defined COMPILE_PCRE8 +- if (utf) ++#if defined SUPPORT_UTF || !defined COMPILE_PCRE8 ++ if (map != NULL) ++#endif + { +- for (c = 0; c < 16; c++) start_bits[c] |= map[c]; +- for (c = 128; c < 256; c++) ++#if defined SUPPORT_UTF && defined COMPILE_PCRE8 ++ if (utf) + { +- if ((map[c/8] && (1 << (c&7))) != 0) ++ for (c = 0; c < 16; c++) start_bits[c] |= map[c]; ++ for (c = 128; c < 256; c++) + { +- int d = (c >> 6) | 0xc0; /* Set bit for this starter */ +- start_bits[d/8] |= (1 << (d&7)); /* and then skip on to the */ +- c = (c & 0xc0) + 0x40 - 1; /* next relevant character. */ ++ if ((map[c/8] && (1 << (c&7))) != 0) ++ { ++ int d = (c >> 6) | 0xc0; /* Set bit for this starter */ ++ start_bits[d/8] |= (1 << (d&7)); /* and then skip on to the */ ++ c = (c & 0xc0) + 0x40 - 1; /* next relevant character. */ ++ } + } + } +- } +- else ++ else + #endif +- { +- /* In non-UTF-8 mode, the two bit maps are completely compatible. */ +- for (c = 0; c < 32; c++) start_bits[c] |= map[c]; ++ { ++ /* In non-UTF-8 mode, the two bit maps are completely compatible. */ ++ for (c = 0; c < 32; c++) start_bits[c] |= map[c]; ++ } + } + + /* Advance past the bit map, and act on what follows. For a zero + minimum repeat, continue; otherwise stop processing. */ + +- tcode += 32 / sizeof(pcre_uchar); + switch (*tcode) + { + case OP_CRSTAR: +diff --git a/pcre_xclass.c b/pcre_xclass.c +index dd7008a..bbde1c3 100644 +--- a/pcre_xclass.c ++++ b/pcre_xclass.c +@@ -81,6 +81,11 @@ additional data. */ + + if (c < 256) + { ++ if ((*data & XCL_HASPROP) == 0) ++ { ++ if ((*data & XCL_MAP) == 0) return negated; ++ return (((pcre_uint8 *)(data + 1))[c/8] & (1 << (c&7))) != 0; ++ } + if ((*data & XCL_MAP) != 0 && + (((pcre_uint8 *)(data + 1))[c/8] & (1 << (c&7))) != 0) + return !negated; /* char found */ +diff --git a/testdata/saved16BE-1 b/testdata/saved16BE-1 +index 297f2f2f06300780b83aa64af93e56e7174bc745..4e9a5fffcd14701d14611fa7e47116175c9e152d 100644 +GIT binary patch +delta 97 +zcmbQlJc)V2kFftx@b~}m{~(MG7#KMi7?^G_-2czOz);Gd&XB_(!NC0g-~a#L(Ul_c +JC;KtF004^`M^*p; + +delta 74 +zcmbQlJc)V24{HV}U}CUf0AUQk$icwCbc5mkf1pe$gE~VFg9HOJ10w?i$K*mrnaTc) +F&Hz>236=l= + +diff --git a/testdata/saved16LE-1 b/testdata/saved16LE-1 +index deb44919bbbc263c227c51454f2dd25d582c7a6b..a3dfe05565b5e3f8e8fffd4791531165ca3fa95c 100644 +GIT binary patch +delta 97 +zcmbQlJc)V2kFftx@b~}m{~(MG7#SEim>3vt{J+n@P|Bdrki#Isz|8Rf-~a#L(Ul_c +JC;KtF004`CM^*p; + +delta 74 +zcmbQlJc)V24{HV}U}CUf0AX~%$iTqC#K3Ul|9u9AQU-N~90mynW(Ee3@Z>^9>B)YK +FE&x_M36%f< + +diff --git a/testdata/saved32BE-1 b/testdata/saved32BE-1 +index 42af7b42b026869ac20fae8a78430470c298a8e3..3da404b8cd5982b17eb3e1e84baf8f83d3b4971e 100644 +GIT binary patch +delta 120 +zcmZ3$vVui`fr0T00|SFjfOC*5h&@q2l$DQxoq=gGBa`GrYf+Z}K(H}{g;4>-0D-^% +dkApFUhae|cFzP7$|M&m@cZ3)QYw{7s7yw1mPJ93W + +delta 84 +zcmZ3%vVcW^fr0T20|SFjfOC*5h&@q2l$D2poq=hhks1q7f?;C}3nM>>0Rl`679c!X +ckV#~6GovOe10w?i2aqQ=S%Hat@;SyR05;DJ9{>OV + +diff --git a/testdata/saved32LE-1 b/testdata/saved32LE-1 +index a4044fd17483cc002bb94b3ba83e7cb347ec3a49..6ba74dafd9606361f94e89b2c0cc16892108f6e7 100644 +GIT binary patch +delta 109 +zcmZ3$vVw(|fr0T00|SGOYmjrmmx;XMtb7dY3`~<5nZ!3*C@?Dghl0QVkN*c@bTGMs +VQAgqbzyJTgqst)iCm&&q0RVZWNt6Hp + +delta 71 +zcmZ3%vVet`fr0T20|SGOYmjrmn~A*QtUL_t3``ph^%?nr!eGF}V8H;y6ZJ(Ww=in3 +VGB7eQa7<=olAh?mGWh~y1OUEm3|0UD + +diff --git a/testdata/testoutput17 b/testdata/testoutput17 +index ef82dc9..071539a 100644 +--- a/testdata/testoutput17 ++++ b/testdata/testoutput17 +@@ -290,7 +290,7 @@ No options + No first char + No need char + Subject length lower bound = 1 +-No set of starting bytes ++Starting byte set: \x09 \x20 \xa0 \xff + \x{1681}\x{200b}\x{1680}\x{2000}\x{202f}\x{3000} + 0: \x{1680}\x{2000}\x{202f}\x{3000} + \x{3001}\x{2fff}\x{200a}\xa0\x{2000} +@@ -346,7 +346,7 @@ No options + No first char + No need char + Subject length lower bound = 1 +-No set of starting bytes ++Starting byte set: \x0a \x0b \x0c \x0d \x85 \xff + \x{2027}\x{2030}\x{2028}\x{2029} + 0: \x{2028}\x{2029} + \x09\x0e\x84\x86\x85\x0a\x0b\x0c\x0d +diff --git a/testdata/testoutput23 b/testdata/testoutput23 +index 7b3634c..f7b6f31 100644 +--- a/testdata/testoutput23 ++++ b/testdata/testoutput23 +@@ -14,7 +14,7 @@ Failed: character value in \x{...} sequence is too large at offset 8 + /[\H]/BZSI + ------------------------------------------------------------------ + Bra +- [\x00-\x08\x0a-\x1f!-\x9f\x{a1}-\x{167f}\x{1681}-\x{180d}\x{180f}-\x{1fff}\x{200b}-\x{202e}\x{2030}-\x{205e}\x{2060}-\x{2fff}\x{3001}-\x{ffff}] ++ [\x00-\x08\x0a-\x1f!-\x9f\xa1-\xff\x{100}-\x{167f}\x{1681}-\x{180d}\x{180f}-\x{1fff}\x{200b}-\x{202e}\x{2030}-\x{205e}\x{2060}-\x{2fff}\x{3001}-\x{ffff}] + Ket + End + ------------------------------------------------------------------ +@@ -23,12 +23,25 @@ No options + No first char + No need char + Subject length lower bound = 1 +-No set of starting bytes ++Starting byte set: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x0a \x0b ++ \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a ++ \x1b \x1c \x1d \x1e \x1f ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 ++ : ; < = > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ ++ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f \x80 ++ \x81 \x82 \x83 \x84 \x85 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f ++ \x90 \x91 \x92 \x93 \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e ++ \x9f \xa1 \xa2 \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae ++ \xaf \xb0 \xb1 \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd ++ \xbe \xbf \xc0 \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc ++ \xcd \xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb ++ \xdc \xdd \xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea ++ \xeb \xec \xed \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 ++ \xfa \xfb \xfc \xfd \xfe \xff + + /[\V]/BZSI + ------------------------------------------------------------------ + Bra +- [\x00-\x09\x0e-\x84\x{86}-\x{2027}\x{202a}-\x{ffff}] ++ [\x00-\x09\x0e-\x84\x86-\xff\x{100}-\x{2027}\x{202a}-\x{ffff}] + Ket + End + ------------------------------------------------------------------ +@@ -37,6 +50,19 @@ No options + No first char + No need char + Subject length lower bound = 1 +-No set of starting bytes ++Starting byte set: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0e ++ \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a \x1b \x1c \x1d ++ \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 : ; < = > ++ ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c ++ d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f \x80 \x81 \x82 ++ \x83 \x84 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 ++ \x93 \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa0 \xa1 ++ \xa2 \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 ++ \xb1 \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf ++ \xc0 \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce ++ \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd ++ \xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec ++ \xed \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb ++ \xfc \xfd \xfe \xff + + /-- End of testinput23 --/ +diff --git a/testdata/testoutput25 b/testdata/testoutput25 +index 2a4066d..16f375b 100644 +--- a/testdata/testoutput25 ++++ b/testdata/testoutput25 +@@ -51,7 +51,7 @@ Need char = \x{800000} + /[\H]/BZSI + ------------------------------------------------------------------ + Bra +- [\x00-\x08\x0a-\x1f!-\x9f\x{a1}-\x{167f}\x{1681}-\x{180d}\x{180f}-\x{1fff}\x{200b}-\x{202e}\x{2030}-\x{205e}\x{2060}-\x{2fff}\x{3001}-\x{ffffffff}] ++ [\x00-\x08\x0a-\x1f!-\x9f\xa1-\xff\x{100}-\x{167f}\x{1681}-\x{180d}\x{180f}-\x{1fff}\x{200b}-\x{202e}\x{2030}-\x{205e}\x{2060}-\x{2fff}\x{3001}-\x{ffffffff}] + Ket + End + ------------------------------------------------------------------ +@@ -60,12 +60,25 @@ No options + No first char + No need char + Subject length lower bound = 1 +-No set of starting bytes ++Starting byte set: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x0a \x0b ++ \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a ++ \x1b \x1c \x1d \x1e \x1f ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 ++ : ; < = > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ ++ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f \x80 ++ \x81 \x82 \x83 \x84 \x85 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f ++ \x90 \x91 \x92 \x93 \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e ++ \x9f \xa1 \xa2 \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae ++ \xaf \xb0 \xb1 \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd ++ \xbe \xbf \xc0 \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc ++ \xcd \xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb ++ \xdc \xdd \xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea ++ \xeb \xec \xed \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 ++ \xfa \xfb \xfc \xfd \xfe \xff + + /[\V]/BZSI + ------------------------------------------------------------------ + Bra +- [\x00-\x09\x0e-\x84\x{86}-\x{2027}\x{202a}-\x{ffffffff}] ++ [\x00-\x09\x0e-\x84\x86-\xff\x{100}-\x{2027}\x{202a}-\x{ffffffff}] + Ket + End + ------------------------------------------------------------------ +@@ -74,6 +87,19 @@ No options + No first char + No need char + Subject length lower bound = 1 +-No set of starting bytes ++Starting byte set: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0e ++ \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a \x1b \x1c \x1d ++ \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 : ; < = > ++ ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c ++ d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f \x80 \x81 \x82 ++ \x83 \x84 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 ++ \x93 \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa0 \xa1 ++ \xa2 \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 ++ \xb1 \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf ++ \xc0 \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce ++ \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd ++ \xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec ++ \xed \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb ++ \xfc \xfd \xfe \xff + + /-- End of testinput25 --/ +diff --git a/testdata/testoutput5 b/testdata/testoutput5 +index 0e84054..f8578c0 100644 +--- a/testdata/testoutput5 ++++ b/testdata/testoutput5 +@@ -248,7 +248,7 @@ No match + /[z-\x{100}]/8DZ + ------------------------------------------------------------------ + Bra +- [z-\x{100}] ++ [z-\xff\x{100}] + Ket + End + ------------------------------------------------------------------ +@@ -786,7 +786,7 @@ No match + /[\H]/8BZ + ------------------------------------------------------------------ + Bra +- [\x00-\x08\x0a-\x1f!-\x9f\x{a1}-\x{167f}\x{1681}-\x{180d}\x{180f}-\x{1fff}\x{200b}-\x{202e}\x{2030}-\x{205e}\x{2060}-\x{2fff}\x{3001}-\x{10ffff}] ++ [\x00-\x08\x0a-\x1f!-\x9f\xa1-\xff\x{100}-\x{167f}\x{1681}-\x{180d}\x{180f}-\x{1fff}\x{200b}-\x{202e}\x{2030}-\x{205e}\x{2060}-\x{2fff}\x{3001}-\x{10ffff}] + Ket + End + ------------------------------------------------------------------ +@@ -794,7 +794,7 @@ No match + /[\V]/8BZ + ------------------------------------------------------------------ + Bra +- [\x00-\x09\x0e-\x84\x{86}-\x{2027}\x{202a}-\x{10ffff}] ++ [\x00-\x09\x0e-\x84\x86-\xff\x{100}-\x{2027}\x{202a}-\x{10ffff}] + Ket + End + ------------------------------------------------------------------ +@@ -1594,7 +1594,7 @@ Failed: disallowed Unicode code point (>= 0xd800 && <= 0xdfff) at offset 7 + /[\H\x{d7ff}]+/8BZ + ------------------------------------------------------------------ + Bra +- [\x00-\x08\x0a-\x1f!-\x9f\x{a1}-\x{167f}\x{1681}-\x{180d}\x{180f}-\x{1fff}\x{200b}-\x{202e}\x{2030}-\x{205e}\x{2060}-\x{2fff}\x{3001}-\x{10ffff}\x{d7ff}]+ ++ [\x00-\x08\x0a-\x1f!-\x9f\xa1-\xff\x{100}-\x{167f}\x{1681}-\x{180d}\x{180f}-\x{1fff}\x{200b}-\x{202e}\x{2030}-\x{205e}\x{2060}-\x{2fff}\x{3001}-\x{10ffff}\x{d7ff}]+ + Ket + End + ------------------------------------------------------------------ +@@ -1634,7 +1634,7 @@ Failed: disallowed Unicode code point (>= 0xd800 && <= 0xdfff) at offset 7 + /[\V\x{d7ff}]+/8BZ + ------------------------------------------------------------------ + Bra +- [\x00-\x09\x0e-\x84\x{86}-\x{2027}\x{202a}-\x{10ffff}\x{d7ff}]+ ++ [\x00-\x09\x0e-\x84\x86-\xff\x{100}-\x{2027}\x{202a}-\x{10ffff}\x{d7ff}]+ + Ket + End + ------------------------------------------------------------------ +diff --git a/testdata/testoutput7 b/testdata/testoutput7 +index e3f607c..b02f923 100644 +--- a/testdata/testoutput7 ++++ b/testdata/testoutput7 +@@ -124,7 +124,7 @@ No match + /[z-\x{100}]/8iDZ + ------------------------------------------------------------------ + Bra +- [Z\x{39c}\x{3bc}\x{1e9e}\x{178}z-\x{101}] ++ [Zz-\xff\x{39c}\x{3bc}\x{1e9e}\x{178}\x{100}-\x{101}] + Ket + End + ------------------------------------------------------------------ +@@ -162,7 +162,7 @@ No match + /[z-\x{100}]/8DZi + ------------------------------------------------------------------ + Bra +- [Z\x{39c}\x{3bc}\x{1e9e}\x{178}z-\x{101}] ++ [Zz-\xff\x{39c}\x{3bc}\x{1e9e}\x{178}\x{100}-\x{101}] + Ket + End + ------------------------------------------------------------------ +-- +2.7.4 + diff --git a/SOURCES/pcre-8.32-Add-support-for-PT_PXGRAPH-PT_PXPRINT-and-PT_PXPUNCT.patch b/SOURCES/pcre-8.32-Add-support-for-PT_PXGRAPH-PT_PXPRINT-and-PT_PXPUNCT.patch new file mode 100644 index 0000000..2b150c7 --- /dev/null +++ b/SOURCES/pcre-8.32-Add-support-for-PT_PXGRAPH-PT_PXPRINT-and-PT_PXPUNCT.patch @@ -0,0 +1,117 @@ +From 5c8999dd06c88ec49157f59dab561ce7d5a7c911 Mon Sep 17 00:00:00 2001 +From: zherczeg +Date: Fri, 15 Nov 2013 12:04:55 +0000 +Subject: [PATCH 2/2] Add support for PT_PXGRAPH, PT_PXPRINT, and PT_PXPUNCT in + JIT. +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Petr Pisar: Ported to 8.32: + +commit 9885cc24e4771dbe6daadd2107e4552bb92aafa2 +Author: zherczeg +Date: Fri Nov 15 12:04:55 2013 +0000 + + Add support for PT_PXGRAPH, PT_PXPRINT, and PT_PXPUNCT in JIT. + + git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1402 2f5784b3-3f2a-0410-8824-cb99058d5e15 + +Signed-off-by: Petr Písař +--- + pcre_jit_compile.c | 65 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ + 1 file changed, 65 insertions(+) + +diff --git a/pcre_jit_compile.c b/pcre_jit_compile.c +index c61c688..875947a 100644 +--- a/pcre_jit_compile.c ++++ b/pcre_jit_compile.c +@@ -3699,6 +3699,7 @@ const pcre_uint32 *other_cases; + struct sljit_jump *jump = NULL; + pcre_uchar *ccbegin; + int compares, invertcmp, numberofcmps; ++ + #ifdef SUPPORT_UCP + BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE; + BOOL charsaved = FALSE; +@@ -3798,6 +3799,9 @@ while (*cc != XCL_END) + case PT_SPACE: + case PT_PXSPACE: + case PT_WORD: ++ case PT_PXGRAPH: ++ case PT_PXPRINT: ++ case PT_PXPUNCT: + needstype = TRUE; + needschar = TRUE; + break; +@@ -4068,6 +4072,67 @@ while (*cc != XCL_END) + } + jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp); + break; ++ ++ case PT_PXGRAPH: ++ /* C and Z groups are the farthest two groups. */ ++ SET_TYPE_OFFSET(ucp_Ll); ++ OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll); ++ OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_GREATER); ++ ++ jump = CMP(SLJIT_C_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll); ++ ++ /* In case of ucp_Cf, we overwrite the result. */ ++ SET_CHAR_OFFSET(0x2066); ++ OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066); ++ OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL); ++ ++ OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066); ++ OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL); ++ ++ OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x2066); ++ OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL); ++ ++ JUMPHERE(jump); ++ jump = CMP(SLJIT_C_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0); ++ break; ++ ++ case PT_PXPRINT: ++ /* C and Z groups are the farthest two groups. */ ++ SET_TYPE_OFFSET(ucp_Ll); ++ OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll); ++ OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_GREATER); ++ ++ OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Ll); ++ OP_FLAGS(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_C_NOT_EQUAL); ++ ++ jump = CMP(SLJIT_C_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll); ++ ++ /* In case of ucp_Cf, we overwrite the result. */ ++ SET_CHAR_OFFSET(0x2066); ++ OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066); ++ OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL); ++ ++ OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066); ++ OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL); ++ ++ JUMPHERE(jump); ++ jump = CMP(SLJIT_C_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0); ++ break; ++ ++ case PT_PXPUNCT: ++ SET_TYPE_OFFSET(ucp_Sc); ++ OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Sc); ++ OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL); ++ ++ SET_CHAR_OFFSET(0); ++ OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xff); ++ OP_FLAGS(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL); ++ ++ SET_TYPE_OFFSET(ucp_Pc); ++ OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ps - ucp_Pc); ++ OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL); ++ jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp); ++ break; + } + cc += 2; + } +-- +2.7.4 + diff --git a/SOURCES/pcre-8.32-Update-POSIX-class-handling-in-UCP-mode.patch b/SOURCES/pcre-8.32-Update-POSIX-class-handling-in-UCP-mode.patch new file mode 100644 index 0000000..a17cc0f --- /dev/null +++ b/SOURCES/pcre-8.32-Update-POSIX-class-handling-in-UCP-mode.patch @@ -0,0 +1,1122 @@ +From e74dcd1eec9227fe23c06de2ff109e48695fd879 Mon Sep 17 00:00:00 2001 +From: ph10 +Date: Sat, 2 Nov 2013 18:29:05 +0000 +Subject: [PATCH 1/2] Update POSIX class handling in UCP mode. +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Petr Pisar: Ported to 8.32: + +commit fa3832825e3fe0d49f93658882775cdd6c26129e +Author: ph10 +Date: Sat Nov 2 18:29:05 2013 +0000 + + Update POSIX class handling in UCP mode. + + git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1387 2f5784b3-3f2a-0410-8824-cb99058d5e15 + +It also adjusts some test 7 outputs because 8.32 does not contain +auto-possessification improvement from + +commit 5f42224005b7d9a503903e3342ec7ada75590b07 +Author: ph10 +Date: Tue Oct 1 16:54:40 2013 +0000 + + Refactored auto-possessification code. + + git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1363 2f5784b3-3f2a-0410-8824-cb99058d5e15 + +Signed-off-by: Petr Písař +--- + doc/pcrepattern.3 | 37 +++++-- + pcre_compile.c | 75 +++++++++++--- + pcre_internal.h | 16 ++- + pcre_printint.c | 59 ++++++++--- + pcre_xclass.c | 63 ++++++++++-- + testdata/testinput6 | 146 ++++++++++++++++++++++++++ + testdata/testinput7 | 10 ++ + testdata/testoutput6 | 286 ++++++++++++++++++++++++++++++++++++++++++++++++++- + testdata/testoutput7 | 117 ++++++++++++++++++++- + 9 files changed, 752 insertions(+), 57 deletions(-) + +diff --git a/doc/pcrepattern.3 b/doc/pcrepattern.3 +index c9c7b45..f638846 100644 +--- a/doc/pcrepattern.3 ++++ b/doc/pcrepattern.3 +@@ -861,8 +861,9 @@ the "mark" property always have the "extend" grapheme breaking property. + .sp + As well as the standard Unicode properties described above, PCRE supports four + more that make it possible to convert traditional escape sequences such as \ew +-and \es and POSIX character classes to use Unicode properties. PCRE uses these +-non-standard, non-Perl properties internally when PCRE_UCP is set. They are: ++and \es to use Unicode properties. PCRE uses these non-standard, non-Perl ++properties internally when PCRE_UCP is set. However, they may also be used ++explicitly. These properties are: + .sp + Xan Any alphanumeric character + Xps Any POSIX space character +@@ -873,6 +874,7 @@ Xan matches characters that have either the L (letter) or the N (number) + property. Xps matches the characters tab, linefeed, vertical tab, form feed, or + carriage return, and any other character that has the Z (separator) property. + Xsp is the same as Xps, except that vertical tab is excluded. Xwd matches the ++:qa + same characters as Xan, plus underscore. + . + . +@@ -1258,8 +1260,8 @@ supported, and an error is given if they are encountered. + By default, in UTF modes, characters with values greater than 128 do not match + any of the POSIX character classes. However, if the PCRE_UCP option is passed + to \fBpcre_compile()\fP, some of the classes are changed so that Unicode +-character properties are used. This is achieved by replacing the POSIX classes +-by other sequences, as follows: ++character properties are used. This is achieved by replacing certain POSIX ++classes by other sequences, as follows: + .sp + [:alnum:] becomes \ep{Xan} + [:alpha:] becomes \ep{L} +@@ -1270,9 +1272,30 @@ by other sequences, as follows: + [:upper:] becomes \ep{Lu} + [:word:] becomes \ep{Xwd} + .sp +-Negated versions, such as [:^alpha:] use \eP instead of \ep. The other POSIX +-classes are unchanged, and match only characters with code points less than +-128. ++Negated versions, such as [:^alpha:] use \eP instead of \ep. Three other POSIX ++classes are handled specially in UCP mode: ++.TP 10 ++[:graph:] ++This matches characters that have glyphs that mark the page when printed. In ++Unicode property terms, it matches all characters with the L, M, N, P, S, or Cf ++properties, except for: ++.sp ++ U+061C Arabic Letter Mark ++ U+180E Mongolian Vowel Separator ++ U+2066 - U+2069 Various "isolate"s ++.sp ++.TP 10 ++[:print:] ++This matches the same characters as [:graph:] plus space characters that are ++not controls, that is, characters with the Zs property. ++.TP 10 ++[:punct:] ++This matches all characters that have the Unicode P (punctuation) property, ++plus those characters whose code points are less than 128 that have the S ++(Symbol) property. ++.P ++The other POSIX classes are unchanged, and match only characters with code ++points less than 128. + . + . + .SH "VERTICAL BAR" +diff --git a/pcre_compile.c b/pcre_compile.c +index 746dc70..3c75218 100644 +--- a/pcre_compile.c ++++ b/pcre_compile.c +@@ -257,7 +257,8 @@ static const int verbcount = sizeof(verbs)/sizeof(verbitem); + now all in a single string, to reduce the number of relocations when a shared + library is dynamically loaded. The list of lengths is terminated by a zero + length entry. The first three must be alpha, lower, upper, as this is assumed +-for handling case independence. */ ++for handling case independence. The indices for graph, print, and punct are ++needed, so identify them. */ + + static const char posix_names[] = + STRING_alpha0 STRING_lower0 STRING_upper0 STRING_alnum0 +@@ -268,6 +269,11 @@ static const char posix_names[] = + static const pcre_uint8 posix_name_lengths[] = { + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 4, 6, 0 }; + ++#define PC_GRAPH 8 ++#define PC_PRINT 9 ++#define PC_PUNCT 10 ++ ++ + /* Table of class bit maps for each POSIX class. Each class is formed from a + base map, with an optional addition or removal of another map. Then, for some + classes, there is some additional tweaking: for [:blank:] the vertical space +@@ -295,9 +301,8 @@ static const int posix_class_maps[] = { + cbit_xdigit,-1, 0 /* xdigit */ + }; + +-/* Table of substitutes for \d etc when PCRE_UCP is set. The POSIX class +-substitutes must be in the order of the names, defined above, and there are +-both positive and negative cases. NULL means no substitute. */ ++/* Table of substitutes for \d etc when PCRE_UCP is set. They are replaced by ++Unicode property escapes. */ + + #ifdef SUPPORT_UCP + static const pcre_uchar string_PNd[] = { +@@ -322,12 +327,18 @@ static const pcre_uchar string_pXwd[] = { + static const pcre_uchar *substitutes[] = { + string_PNd, /* \D */ + string_pNd, /* \d */ +- string_PXsp, /* \S */ /* NOTE: Xsp is Perl space */ +- string_pXsp, /* \s */ ++ string_PXsp, /* \S */ /* Xsp is Perl space, but from 8.34, Perl */ ++ string_pXsp, /* \s */ /* space and POSIX space are the same. */ + string_PXwd, /* \W */ + string_pXwd /* \w */ + }; + ++/* The POSIX class substitutes must be in the order of the POSIX class names, ++defined above, and there are both positive and negative cases. NULL means no ++general substitute of a Unicode property escape (\p or \P). However, for some ++POSIX classes (e.g. graph, print, punct) a special property code is compiled ++directly. */ ++ + static const pcre_uchar string_pL[] = { + CHAR_BACKSLASH, CHAR_p, CHAR_LEFT_CURLY_BRACKET, + CHAR_L, CHAR_RIGHT_CURLY_BRACKET, '\0' }; +@@ -375,8 +386,8 @@ static const pcre_uchar *posix_substitutes[] = { + NULL, /* graph */ + NULL, /* print */ + NULL, /* punct */ +- string_pXps, /* space */ /* NOTE: Xps is POSIX space */ +- string_pXwd, /* word */ ++ string_pXps, /* space */ /* Xps is POSIX space, but from 8.34 */ ++ string_pXwd, /* word */ /* Perl and POSIX space are the same */ + NULL, /* xdigit */ + /* Negated cases */ + string_PL, /* ^alpha */ +@@ -390,8 +401,8 @@ static const pcre_uchar *posix_substitutes[] = { + NULL, /* ^graph */ + NULL, /* ^print */ + NULL, /* ^punct */ +- string_PXps, /* ^space */ /* NOTE: Xps is POSIX space */ +- string_PXwd, /* ^word */ ++ string_PXps, /* ^space */ /* Xps is POSIX space, but from 8.34 */ ++ string_PXwd, /* ^word */ /* Perl and POSIX space are the same */ + NULL /* ^xdigit */ + }; + #define POSIX_SUBSIZE (sizeof(posix_substitutes) / sizeof(pcre_uchar *)) +@@ -4258,24 +4269,58 @@ for (;; ptr++) + posix_class = 0; + + /* When PCRE_UCP is set, some of the POSIX classes are converted to +- different escape sequences that use Unicode properties. */ ++ different escape sequences that use Unicode properties \p or \P. Others ++ that are not available via \p or \P generate XCL_PROP/XCL_NOTPROP ++ directly. */ + + #ifdef SUPPORT_UCP + if ((options & PCRE_UCP) != 0) + { ++ unsigned int ptype = 0; + int pc = posix_class + ((local_negate)? POSIX_SUBSIZE/2 : 0); ++ ++ /* The posix_substitutes table specifies which POSIX classes can be ++ converted to \p or \P items. */ ++ + if (posix_substitutes[pc] != NULL) + { + nestptr = tempptr + 1; + ptr = posix_substitutes[pc] - 1; + continue; + } ++ ++ /* There are three other classes that generate special property calls ++ that are recognized only in an XCLASS. */ ++ ++ else switch(posix_class) ++ { ++ case PC_GRAPH: ++ ptype = PT_PXGRAPH; ++ /* Fall through */ ++ case PC_PRINT: ++ if (ptype == 0) ptype = PT_PXPRINT; ++ /* Fall through */ ++ case PC_PUNCT: ++ if (ptype == 0) ptype = PT_PXPUNCT; ++ *class_uchardata++ = local_negate? XCL_NOTPROP : XCL_PROP; ++ *class_uchardata++ = ptype; ++ *class_uchardata++ = 0; ++ ptr = tempptr + 1; ++ continue; ++ ++ /* For all other POSIX classes, no special action is taken in UCP ++ mode. Fall through to the non_UCP case. */ ++ ++ default: ++ break; ++ } + } + #endif +- /* In the non-UCP case, we build the bit map for the POSIX class in a +- chunk of local store because we may be adding and subtracting from it, +- and we don't want to subtract bits that may be in the main map already. +- At the end we or the result into the bit map that is being built. */ ++ /* In the non-UCP case, or when UCP makes no difference, we build the ++ bit map for the POSIX class in a chunk of local store because we may be ++ adding and subtracting from it, and we don't want to subtract bits that ++ may be in the main map already. At the end we or the result into the ++ bit map that is being built. */ + + posix_class *= 3; + +diff --git a/pcre_internal.h b/pcre_internal.h +index 157de08..389848f 100644 +--- a/pcre_internal.h ++++ b/pcre_internal.h +@@ -1836,6 +1836,16 @@ only. */ + #define PT_WORD 8 /* Word - L plus N plus underscore */ + #define PT_CLIST 9 /* Pseudo-property: match character list */ + ++/* The following special properties are used only in XCLASS items, when POSIX ++classes are specified and PCRE_UCP is set - in other words, for Unicode ++handling of these classes. They are not available via the \p or \P escapes like ++those in the above list, and so they do not take part in the autopossessifying ++table. */ ++ ++#define PT_PXGRAPH 11 /* [:graph:] - characters that mark the paper */ ++#define PT_PXPRINT 12 /* [:print:] - [:graph:] plus non-control spaces */ ++#define PT_PXPUNCT 13 /* [:punct:] - punctuation characters */ ++ + /* Flag bits and data types for the extended class (OP_XCLASS) for classes that + contain characters with values greater than 255. */ + +@@ -1849,9 +1859,9 @@ contain characters with values greater than 255. */ + #define XCL_NOTPROP 4 /* Unicode inverted property (ditto) */ + + /* These are escaped items that aren't just an encoding of a particular data +-value such as \n. They must have non-zero values, as check_escape() returns +-0 for a data character. Also, they must appear in the same order as in the opcode +-definitions below, up to ESC_z. There's a dummy for OP_ALLANY because it ++value such as \n. They must have non-zero values, as check_escape() returns 0 ++for a data character. Also, they must appear in the same order as in the ++opcode definitions below, up to ESC_z. There's a dummy for OP_ALLANY because it + corresponds to "." in DOTALL mode rather than an escape sequence. It is also + used for [^] in JavaScript compatibility mode, and for \C in non-utf mode. In + non-DOTALL mode, "." behaves like \N. +diff --git a/pcre_printint.c b/pcre_printint.c +index 10b5754..c6dcbe6 100644 +--- a/pcre_printint.c ++++ b/pcre_printint.c +@@ -608,9 +608,9 @@ for(;;) + print_prop(f, code, " ", ""); + break; + +- /* OP_XCLASS can only occur in UTF or PCRE16 modes. However, there's no +- harm in having this code always here, and it makes it less messy without +- all those #ifdefs. */ ++ /* OP_XCLASS cannot occur in 8-bit, non-UTF mode. However, there's no harm ++ in having this code always here, and it makes it less messy without all ++ those #ifdefs. */ + + case OP_CLASS: + case OP_NCLASS: +@@ -671,27 +671,52 @@ for(;;) + pcre_uchar ch; + while ((ch = *ccode++) != XCL_END) + { +- if (ch == XCL_PROP) +- { +- unsigned int ptype = *ccode++; +- unsigned int pvalue = *ccode++; +- fprintf(f, "\\p{%s}", get_ucpname(ptype, pvalue)); +- } +- else if (ch == XCL_NOTPROP) +- { +- unsigned int ptype = *ccode++; +- unsigned int pvalue = *ccode++; +- fprintf(f, "\\P{%s}", get_ucpname(ptype, pvalue)); +- } +- else ++ BOOL not = FALSE; ++ const char *notch = ""; ++ ++ switch(ch) + { ++ case XCL_NOTPROP: ++ not = TRUE; ++ notch = "^"; ++ /* Fall through */ ++ ++ case XCL_PROP: ++ { ++ unsigned int ptype = *ccode++; ++ unsigned int pvalue = *ccode++; ++ ++ switch(ptype) ++ { ++ case PT_PXGRAPH: ++ fprintf(f, "[:%sgraph:]", notch); ++ break; ++ ++ case PT_PXPRINT: ++ fprintf(f, "[:%sprint:]", notch); ++ break; ++ ++ case PT_PXPUNCT: ++ fprintf(f, "[:%spunct:]", notch); ++ break; ++ ++ default: ++ fprintf(f, "\\%c{%s}", (not? 'P':'p'), ++ get_ucpname(ptype, pvalue)); ++ break; ++ } ++ } ++ break; ++ ++ default: + ccode += 1 + print_char(f, ccode, utf); + if (ch == XCL_RANGE) + { + fprintf(f, "-"); + ccode += 1 + print_char(f, ccode, utf); + } +- } ++ break; ++ } + } + } + +diff --git a/pcre_xclass.c b/pcre_xclass.c +index fa73cd8..dd7008a 100644 +--- a/pcre_xclass.c ++++ b/pcre_xclass.c +@@ -128,57 +128,102 @@ while ((t = *data++) != XCL_END) + else /* XCL_PROP & XCL_NOTPROP */ + { + const ucd_record *prop = GET_UCD(c); ++ BOOL isprop = t == XCL_PROP; + + switch(*data) + { + case PT_ANY: +- if (t == XCL_PROP) return !negated; ++ if (isprop) return !negated; + break; + + case PT_LAMP: + if ((prop->chartype == ucp_Lu || prop->chartype == ucp_Ll || +- prop->chartype == ucp_Lt) == (t == XCL_PROP)) return !negated; ++ prop->chartype == ucp_Lt) == isprop) return !negated; + break; + + case PT_GC: +- if ((data[1] == PRIV(ucp_gentype)[prop->chartype]) == (t == XCL_PROP)) ++ if ((data[1] == PRIV(ucp_gentype)[prop->chartype]) == isprop) + return !negated; + break; + + case PT_PC: +- if ((data[1] == prop->chartype) == (t == XCL_PROP)) return !negated; ++ if ((data[1] == prop->chartype) == isprop) return !negated; + break; + + case PT_SC: +- if ((data[1] == prop->script) == (t == XCL_PROP)) return !negated; ++ if ((data[1] == prop->script) == isprop) return !negated; + break; + + case PT_ALNUM: + if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L || +- PRIV(ucp_gentype)[prop->chartype] == ucp_N) == (t == XCL_PROP)) ++ PRIV(ucp_gentype)[prop->chartype] == ucp_N) == isprop) + return !negated; + break; + + case PT_SPACE: /* Perl space */ + if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z || + c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR) +- == (t == XCL_PROP)) ++ == isprop) + return !negated; + break; + + case PT_PXSPACE: /* POSIX space */ + if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z || + c == CHAR_HT || c == CHAR_NL || c == CHAR_VT || +- c == CHAR_FF || c == CHAR_CR) == (t == XCL_PROP)) ++ c == CHAR_FF || c == CHAR_CR) == isprop) + return !negated; + break; + + case PT_WORD: + if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L || + PRIV(ucp_gentype)[prop->chartype] == ucp_N || c == CHAR_UNDERSCORE) +- == (t == XCL_PROP)) ++ == isprop) + return !negated; + break; ++ ++ /* The following three properties can occur only in an XCLASS, as there ++ is no \p or \P coding for them. */ ++ ++ /* Graphic character. Implement this as not Z (space or separator) and ++ not C (other), except for Cf (format) with a few exceptions. This seems ++ to be what Perl does. The exceptional characters are: ++ ++ U+061C Arabic Letter Mark ++ U+180E Mongolian Vowel Separator ++ U+2066 - U+2069 Various "isolate"s ++ */ ++ ++ case PT_PXGRAPH: ++ if ((PRIV(ucp_gentype)[prop->chartype] != ucp_Z && ++ (PRIV(ucp_gentype)[prop->chartype] != ucp_C || ++ (prop->chartype == ucp_Cf && ++ c != 0x061c && c != 0x180e && (c < 0x2066 || c > 0x2069)) ++ )) == isprop) ++ return !negated; ++ break; ++ ++ /* Printable character: same as graphic, with the addition of Zs, i.e. ++ not Zl and not Zp, and U+180E. */ ++ ++ case PT_PXPRINT: ++ if ((prop->chartype != ucp_Zl && ++ prop->chartype != ucp_Zp && ++ (PRIV(ucp_gentype)[prop->chartype] != ucp_C || ++ (prop->chartype == ucp_Cf && ++ c != 0x061c && (c < 0x2066 || c > 0x2069)) ++ )) == isprop) ++ return !negated; ++ break; ++ ++ /* Punctuation: all Unicode punctuation, plus ASCII characters that ++ Unicode treats as symbols rather than punctuation, for Perl ++ compatibility (these are $+<=>^`|~). */ ++ ++ case PT_PXPUNCT: ++ if ((PRIV(ucp_gentype)[prop->chartype] == ucp_P || ++ (c < 256 && PRIV(ucp_gentype)[prop->chartype] == ucp_S)) == isprop) ++ return !negated; ++ break; + + /* This should never occur, but compilers may mutter if there is no + default. */ +diff --git a/testdata/testinput6 b/testdata/testinput6 +index 219a30e..adafb89 100644 +--- a/testdata/testinput6 ++++ b/testdata/testinput6 +@@ -1319,4 +1319,150 @@ + /^s?c/mi8 + scat + ++/^[[:graph:]]+$/8W ++ Letter:ABC ++ Mark:\x{300}\x{1d172}\x{1d17b} ++ Number:9\x{660} ++ Punctuation:\x{66a},; ++ Symbol:\x{6de}<>\x{fffc} ++ Cf-property:\x{ad}\x{600}\x{601}\x{602}\x{603}\x{604}\x{6dd}\x{70f} ++ \x{200b}\x{200c}\x{200d}\x{200e}\x{200f} ++ \x{202a}\x{202b}\x{202c}\x{202d}\x{202e} ++ \x{2060}\x{2061}\x{2062}\x{2063}\x{2064} ++ \x{206a}\x{206b}\x{206c}\x{206d}\x{206e}\x{206f} ++ \x{feff} ++ \x{fff9}\x{fffa}\x{fffb} ++ \x{110bd} ++ \x{1d173}\x{1d174}\x{1d175}\x{1d176}\x{1d177}\x{1d178}\x{1d179}\x{1d17a} ++ \x{e0001} ++ \x{e0020}\x{e0030}\x{e0040}\x{e0050}\x{e0060}\x{e0070}\x{e007f} ++ ** Failers ++ \x{09} ++ \x{0a} ++ \x{1D} ++ \x{20} ++ \x{85} ++ \x{a0} ++ \x{61c} ++ \x{1680} ++ \x{180e} ++ \x{2028} ++ \x{2029} ++ \x{202f} ++ \x{2065} ++ \x{2066} ++ \x{2067} ++ \x{2068} ++ \x{2069} ++ \x{3000} ++ \x{e0002} ++ \x{e001f} ++ \x{e0080} ++ ++/^[[:print:]]+$/8W ++ Space: \x{a0} ++ \x{1680}\x{2000}\x{2001}\x{2002}\x{2003}\x{2004}\x{2005} ++ \x{2006}\x{2007}\x{2008}\x{2009}\x{200a} ++ \x{202f}\x{205f} ++ \x{3000} ++ Letter:ABC ++ Mark:\x{300}\x{1d172}\x{1d17b} ++ Number:9\x{660} ++ Punctuation:\x{66a},; ++ Symbol:\x{6de}<>\x{fffc} ++ Cf-property:\x{ad}\x{600}\x{601}\x{602}\x{603}\x{604}\x{6dd}\x{70f} ++ \x{180e} ++ \x{200b}\x{200c}\x{200d}\x{200e}\x{200f} ++ \x{202a}\x{202b}\x{202c}\x{202d}\x{202e} ++ \x{202f} ++ \x{2060}\x{2061}\x{2062}\x{2063}\x{2064} ++ \x{206a}\x{206b}\x{206c}\x{206d}\x{206e}\x{206f} ++ \x{feff} ++ \x{fff9}\x{fffa}\x{fffb} ++ \x{110bd} ++ \x{1d173}\x{1d174}\x{1d175}\x{1d176}\x{1d177}\x{1d178}\x{1d179}\x{1d17a} ++ \x{e0001} ++ \x{e0020}\x{e0030}\x{e0040}\x{e0050}\x{e0060}\x{e0070}\x{e007f} ++ ** Failers ++ \x{09} ++ \x{1D} ++ \x{85} ++ \x{61c} ++ \x{2028} ++ \x{2029} ++ \x{2065} ++ \x{2066} ++ \x{2067} ++ \x{2068} ++ \x{2069} ++ \x{e0002} ++ \x{e001f} ++ \x{e0080} ++ ++/^[[:punct:]]+$/8W ++ \$+<=>^`|~ ++ !\"#%&'()*,-./:;?@[\\]_{} ++ \x{a1}\x{a7} ++ \x{37e} ++ ** Failers ++ abcde ++ ++/^[[:^graph:]]+$/8W ++ \x{09}\x{0a}\x{1D}\x{20}\x{85}\x{a0}\x{61c}\x{1680}\x{180e} ++ \x{2028}\x{2029}\x{202f}\x{2065}\x{2066}\x{2067}\x{2068}\x{2069} ++ \x{3000}\x{e0002}\x{e001f}\x{e0080} ++ ** Failers ++ Letter:ABC ++ Mark:\x{300}\x{1d172}\x{1d17b} ++ Number:9\x{660} ++ Punctuation:\x{66a},; ++ Symbol:\x{6de}<>\x{fffc} ++ Cf-property:\x{ad}\x{600}\x{601}\x{602}\x{603}\x{604}\x{6dd}\x{70f} ++ \x{200b}\x{200c}\x{200d}\x{200e}\x{200f} ++ \x{202a}\x{202b}\x{202c}\x{202d}\x{202e} ++ \x{2060}\x{2061}\x{2062}\x{2063}\x{2064} ++ \x{206a}\x{206b}\x{206c}\x{206d}\x{206e}\x{206f} ++ \x{feff} ++ \x{fff9}\x{fffa}\x{fffb} ++ \x{110bd} ++ \x{1d173}\x{1d174}\x{1d175}\x{1d176}\x{1d177}\x{1d178}\x{1d179}\x{1d17a} ++ \x{e0001} ++ \x{e0020}\x{e0030}\x{e0040}\x{e0050}\x{e0060}\x{e0070}\x{e007f} ++ ++/^[[:^print:]]+$/8W ++ \x{09}\x{1D}\x{85}\x{61c}\x{2028}\x{2029}\x{2065}\x{2066}\x{2067} ++ \x{2068}\x{2069}\x{e0002}\x{e001f}\x{e0080} ++ ** Failers ++ Space: \x{a0} ++ \x{1680}\x{2000}\x{2001}\x{2002}\x{2003}\x{2004}\x{2005} ++ \x{2006}\x{2007}\x{2008}\x{2009}\x{200a} ++ \x{202f}\x{205f} ++ \x{3000} ++ Letter:ABC ++ Mark:\x{300}\x{1d172}\x{1d17b} ++ Number:9\x{660} ++ Punctuation:\x{66a},; ++ Symbol:\x{6de}<>\x{fffc} ++ Cf-property:\x{ad}\x{600}\x{601}\x{602}\x{603}\x{604}\x{6dd}\x{70f} ++ \x{180e} ++ \x{200b}\x{200c}\x{200d}\x{200e}\x{200f} ++ \x{202a}\x{202b}\x{202c}\x{202d}\x{202e} ++ \x{202f} ++ \x{2060}\x{2061}\x{2062}\x{2063}\x{2064} ++ \x{206a}\x{206b}\x{206c}\x{206d}\x{206e}\x{206f} ++ \x{feff} ++ \x{fff9}\x{fffa}\x{fffb} ++ \x{110bd} ++ \x{1d173}\x{1d174}\x{1d175}\x{1d176}\x{1d177}\x{1d178}\x{1d179}\x{1d17a} ++ \x{e0001} ++ \x{e0020}\x{e0030}\x{e0040}\x{e0050}\x{e0060}\x{e0070}\x{e007f} ++ ++/^[[:^punct:]]+$/8W ++ abcde ++ ** Failers ++ \$+<=>^`|~ ++ !\"#%&'()*,-./:;?@[\\]_{} ++ \x{a1}\x{a7} ++ \x{37e} ++ + /-- End of testinput6 --/ +diff --git a/testdata/testinput7 b/testdata/testinput7 +index 252d246..bcdcef9 100644 +--- a/testdata/testinput7 ++++ b/testdata/testinput7 +@@ -672,4 +672,14 @@ of case for anything other than the ASCII letters. --/ + /^s?c/mi8I + scat + ++/\D+\X \d+\X \S+\X \s+\X \W+\X \w+\X \C+\X \R+\X \H+\X \h+\X \V+\X \v+\X a+\X \n+\X .+\X/BZx ++ ++/.+\X/BZxs ++ ++/\X+$/BZxm ++ ++/\X+\D \X+\d \X+\S \X+\s \X+\W \X+\w \X+. \X+\C \X+\R \X+\H \X+\h \X+\V \X+\v \X+\X \X+\Z \X+\z \X+$/BZx ++ ++/\d+\s{0,5}=\s*\S?=\w{0,4}\W*/8WBZ ++ + /-- End of testinput7 --/ +diff --git a/testdata/testoutput6 b/testdata/testoutput6 +index 090d23f..c426efc 100644 +--- a/testdata/testoutput6 ++++ b/testdata/testoutput6 +@@ -1338,15 +1338,15 @@ No match + + /^[[:graph:]]*/8W + A\x{a1}\x{a0} +- 0: A ++ 0: A\x{a1} + + /^[[:print:]]*/8W + A z\x{a0}\x{a1} +- 0: A z ++ 0: A z\x{a0}\x{a1} + + /^[[:punct:]]*/8W + .+\x{a1}\x{a0} +- 0: .+ ++ 0: .+\x{a1} + + /\p{Zs}*?\R/ + ** Failers +@@ -2138,4 +2138,284 @@ No match + scat + 0: sc + ++/^[[:graph:]]+$/8W ++ Letter:ABC ++ 0: Letter:ABC ++ Mark:\x{300}\x{1d172}\x{1d17b} ++ 0: Mark:\x{300}\x{1d172}\x{1d17b} ++ Number:9\x{660} ++ 0: Number:9\x{660} ++ Punctuation:\x{66a},; ++ 0: Punctuation:\x{66a},; ++ Symbol:\x{6de}<>\x{fffc} ++ 0: Symbol:\x{6de}<>\x{fffc} ++ Cf-property:\x{ad}\x{600}\x{601}\x{602}\x{603}\x{604}\x{6dd}\x{70f} ++ 0: Cf-property:\x{ad}\x{600}\x{601}\x{602}\x{603}\x{604}\x{6dd}\x{70f} ++ \x{200b}\x{200c}\x{200d}\x{200e}\x{200f} ++ 0: \x{200b}\x{200c}\x{200d}\x{200e}\x{200f} ++ \x{202a}\x{202b}\x{202c}\x{202d}\x{202e} ++ 0: \x{202a}\x{202b}\x{202c}\x{202d}\x{202e} ++ \x{2060}\x{2061}\x{2062}\x{2063}\x{2064} ++ 0: \x{2060}\x{2061}\x{2062}\x{2063}\x{2064} ++ \x{206a}\x{206b}\x{206c}\x{206d}\x{206e}\x{206f} ++ 0: \x{206a}\x{206b}\x{206c}\x{206d}\x{206e}\x{206f} ++ \x{feff} ++ 0: \x{feff} ++ \x{fff9}\x{fffa}\x{fffb} ++ 0: \x{fff9}\x{fffa}\x{fffb} ++ \x{110bd} ++ 0: \x{110bd} ++ \x{1d173}\x{1d174}\x{1d175}\x{1d176}\x{1d177}\x{1d178}\x{1d179}\x{1d17a} ++ 0: \x{1d173}\x{1d174}\x{1d175}\x{1d176}\x{1d177}\x{1d178}\x{1d179}\x{1d17a} ++ \x{e0001} ++ 0: \x{e0001} ++ \x{e0020}\x{e0030}\x{e0040}\x{e0050}\x{e0060}\x{e0070}\x{e007f} ++ 0: \x{e0020}\x{e0030}\x{e0040}\x{e0050}\x{e0060}\x{e0070}\x{e007f} ++ ** Failers ++No match ++ \x{09} ++No match ++ \x{0a} ++No match ++ \x{1D} ++No match ++ \x{20} ++No match ++ \x{85} ++No match ++ \x{a0} ++No match ++ \x{61c} ++No match ++ \x{1680} ++No match ++ \x{180e} ++No match ++ \x{2028} ++No match ++ \x{2029} ++No match ++ \x{202f} ++No match ++ \x{2065} ++No match ++ \x{2066} ++No match ++ \x{2067} ++No match ++ \x{2068} ++No match ++ \x{2069} ++No match ++ \x{3000} ++No match ++ \x{e0002} ++No match ++ \x{e001f} ++No match ++ \x{e0080} ++No match ++ ++/^[[:print:]]+$/8W ++ Space: \x{a0} ++ 0: Space: \x{a0} ++ \x{1680}\x{2000}\x{2001}\x{2002}\x{2003}\x{2004}\x{2005} ++ 0: \x{1680}\x{2000}\x{2001}\x{2002}\x{2003}\x{2004}\x{2005} ++ \x{2006}\x{2007}\x{2008}\x{2009}\x{200a} ++ 0: \x{2006}\x{2007}\x{2008}\x{2009}\x{200a} ++ \x{202f}\x{205f} ++ 0: \x{202f}\x{205f} ++ \x{3000} ++ 0: \x{3000} ++ Letter:ABC ++ 0: Letter:ABC ++ Mark:\x{300}\x{1d172}\x{1d17b} ++ 0: Mark:\x{300}\x{1d172}\x{1d17b} ++ Number:9\x{660} ++ 0: Number:9\x{660} ++ Punctuation:\x{66a},; ++ 0: Punctuation:\x{66a},; ++ Symbol:\x{6de}<>\x{fffc} ++ 0: Symbol:\x{6de}<>\x{fffc} ++ Cf-property:\x{ad}\x{600}\x{601}\x{602}\x{603}\x{604}\x{6dd}\x{70f} ++ 0: Cf-property:\x{ad}\x{600}\x{601}\x{602}\x{603}\x{604}\x{6dd}\x{70f} ++ \x{180e} ++ 0: \x{180e} ++ \x{200b}\x{200c}\x{200d}\x{200e}\x{200f} ++ 0: \x{200b}\x{200c}\x{200d}\x{200e}\x{200f} ++ \x{202a}\x{202b}\x{202c}\x{202d}\x{202e} ++ 0: \x{202a}\x{202b}\x{202c}\x{202d}\x{202e} ++ \x{202f} ++ 0: \x{202f} ++ \x{2060}\x{2061}\x{2062}\x{2063}\x{2064} ++ 0: \x{2060}\x{2061}\x{2062}\x{2063}\x{2064} ++ \x{206a}\x{206b}\x{206c}\x{206d}\x{206e}\x{206f} ++ 0: \x{206a}\x{206b}\x{206c}\x{206d}\x{206e}\x{206f} ++ \x{feff} ++ 0: \x{feff} ++ \x{fff9}\x{fffa}\x{fffb} ++ 0: \x{fff9}\x{fffa}\x{fffb} ++ \x{110bd} ++ 0: \x{110bd} ++ \x{1d173}\x{1d174}\x{1d175}\x{1d176}\x{1d177}\x{1d178}\x{1d179}\x{1d17a} ++ 0: \x{1d173}\x{1d174}\x{1d175}\x{1d176}\x{1d177}\x{1d178}\x{1d179}\x{1d17a} ++ \x{e0001} ++ 0: \x{e0001} ++ \x{e0020}\x{e0030}\x{e0040}\x{e0050}\x{e0060}\x{e0070}\x{e007f} ++ 0: \x{e0020}\x{e0030}\x{e0040}\x{e0050}\x{e0060}\x{e0070}\x{e007f} ++ ** Failers ++ 0: ** Failers ++ \x{09} ++No match ++ \x{1D} ++No match ++ \x{85} ++No match ++ \x{61c} ++No match ++ \x{2028} ++No match ++ \x{2029} ++No match ++ \x{2065} ++No match ++ \x{2066} ++No match ++ \x{2067} ++No match ++ \x{2068} ++No match ++ \x{2069} ++No match ++ \x{e0002} ++No match ++ \x{e001f} ++No match ++ \x{e0080} ++No match ++ ++/^[[:punct:]]+$/8W ++ \$+<=>^`|~ ++ 0: $+<=>^`|~ ++ !\"#%&'()*,-./:;?@[\\]_{} ++ 0: !"#%&'()*,-./:;?@[\]_{} ++ \x{a1}\x{a7} ++ 0: \x{a1}\x{a7} ++ \x{37e} ++ 0: \x{37e} ++ ** Failers ++No match ++ abcde ++No match ++ ++/^[[:^graph:]]+$/8W ++ \x{09}\x{0a}\x{1D}\x{20}\x{85}\x{a0}\x{61c}\x{1680}\x{180e} ++ 0: \x{09}\x{0a}\x{1d} \x{85}\x{a0}\x{61c}\x{1680}\x{180e} ++ \x{2028}\x{2029}\x{202f}\x{2065}\x{2066}\x{2067}\x{2068}\x{2069} ++ 0: \x{2028}\x{2029}\x{202f}\x{2065}\x{2066}\x{2067}\x{2068}\x{2069} ++ \x{3000}\x{e0002}\x{e001f}\x{e0080} ++ 0: \x{3000}\x{e0002}\x{e001f}\x{e0080} ++ ** Failers ++No match ++ Letter:ABC ++No match ++ Mark:\x{300}\x{1d172}\x{1d17b} ++No match ++ Number:9\x{660} ++No match ++ Punctuation:\x{66a},; ++No match ++ Symbol:\x{6de}<>\x{fffc} ++No match ++ Cf-property:\x{ad}\x{600}\x{601}\x{602}\x{603}\x{604}\x{6dd}\x{70f} ++No match ++ \x{200b}\x{200c}\x{200d}\x{200e}\x{200f} ++No match ++ \x{202a}\x{202b}\x{202c}\x{202d}\x{202e} ++No match ++ \x{2060}\x{2061}\x{2062}\x{2063}\x{2064} ++No match ++ \x{206a}\x{206b}\x{206c}\x{206d}\x{206e}\x{206f} ++No match ++ \x{feff} ++No match ++ \x{fff9}\x{fffa}\x{fffb} ++No match ++ \x{110bd} ++No match ++ \x{1d173}\x{1d174}\x{1d175}\x{1d176}\x{1d177}\x{1d178}\x{1d179}\x{1d17a} ++No match ++ \x{e0001} ++No match ++ \x{e0020}\x{e0030}\x{e0040}\x{e0050}\x{e0060}\x{e0070}\x{e007f} ++No match ++ ++/^[[:^print:]]+$/8W ++ \x{09}\x{1D}\x{85}\x{61c}\x{2028}\x{2029}\x{2065}\x{2066}\x{2067} ++ 0: \x{09}\x{1d}\x{85}\x{61c}\x{2028}\x{2029}\x{2065}\x{2066}\x{2067} ++ \x{2068}\x{2069}\x{e0002}\x{e001f}\x{e0080} ++ 0: \x{2068}\x{2069}\x{e0002}\x{e001f}\x{e0080} ++ ** Failers ++No match ++ Space: \x{a0} ++No match ++ \x{1680}\x{2000}\x{2001}\x{2002}\x{2003}\x{2004}\x{2005} ++No match ++ \x{2006}\x{2007}\x{2008}\x{2009}\x{200a} ++No match ++ \x{202f}\x{205f} ++No match ++ \x{3000} ++No match ++ Letter:ABC ++No match ++ Mark:\x{300}\x{1d172}\x{1d17b} ++No match ++ Number:9\x{660} ++No match ++ Punctuation:\x{66a},; ++No match ++ Symbol:\x{6de}<>\x{fffc} ++No match ++ Cf-property:\x{ad}\x{600}\x{601}\x{602}\x{603}\x{604}\x{6dd}\x{70f} ++No match ++ \x{180e} ++No match ++ \x{200b}\x{200c}\x{200d}\x{200e}\x{200f} ++No match ++ \x{202a}\x{202b}\x{202c}\x{202d}\x{202e} ++No match ++ \x{202f} ++No match ++ \x{2060}\x{2061}\x{2062}\x{2063}\x{2064} ++No match ++ \x{206a}\x{206b}\x{206c}\x{206d}\x{206e}\x{206f} ++No match ++ \x{feff} ++No match ++ \x{fff9}\x{fffa}\x{fffb} ++No match ++ \x{110bd} ++No match ++ \x{1d173}\x{1d174}\x{1d175}\x{1d176}\x{1d177}\x{1d178}\x{1d179}\x{1d17a} ++No match ++ \x{e0001} ++No match ++ \x{e0020}\x{e0030}\x{e0040}\x{e0050}\x{e0060}\x{e0070}\x{e007f} ++No match ++ ++/^[[:^punct:]]+$/8W ++ abcde ++ 0: abcde ++ ** Failers ++No match ++ \$+<=>^`|~ ++No match ++ !\"#%&'()*,-./:;?@[\\]_{} ++No match ++ \x{a1}\x{a7} ++No match ++ \x{37e} ++No match ++ + /-- End of testinput6 --/ +diff --git a/testdata/testoutput7 b/testdata/testoutput7 +index 5f0f546..e3f607c 100644 +--- a/testdata/testoutput7 ++++ b/testdata/testoutput7 +@@ -820,7 +820,7 @@ No match + /[[:graph:]]/WBZ + ------------------------------------------------------------------ + Bra +- [!-~] ++ [[:graph:]] + Ket + End + ------------------------------------------------------------------ +@@ -828,7 +828,7 @@ No match + /[[:print:]]/WBZ + ------------------------------------------------------------------ + Bra +- [ -~] ++ [[:print:]] + Ket + End + ------------------------------------------------------------------ +@@ -836,7 +836,7 @@ No match + /[[:punct:]]/WBZ + ------------------------------------------------------------------ + Bra +- [!-/:-@[-`{-~] ++ [[:punct:]] + Ket + End + ------------------------------------------------------------------ +@@ -1478,4 +1478,115 @@ Need char = 'c' (caseless) + scat + 0: sc + ++/\D+\X \d+\X \S+\X \s+\X \W+\X \w+\X \C+\X \R+\X \H+\X \h+\X \V+\X \v+\X a+\X \n+\X .+\X/BZx ++------------------------------------------------------------------ ++ Bra ++ \D+ ++ extuni ++ \d+ ++ extuni ++ \S+ ++ extuni ++ \s+ ++ extuni ++ \W+ ++ extuni ++ \w+ ++ extuni ++ AllAny+ ++ extuni ++ \R+ ++ extuni ++ \H+ ++ extuni ++ \h+ ++ extuni ++ \V+ ++ extuni ++ \v+ ++ extuni ++ a+ ++ extuni ++ \x0a+ ++ extuni ++ Any+ ++ extuni ++ Ket ++ End ++------------------------------------------------------------------ ++ ++/.+\X/BZxs ++------------------------------------------------------------------ ++ Bra ++ AllAny+ ++ extuni ++ Ket ++ End ++------------------------------------------------------------------ ++ ++/\X+$/BZxm ++------------------------------------------------------------------ ++ Bra ++ extuni+ ++ /m $ ++ Ket ++ End ++------------------------------------------------------------------ ++ ++/\X+\D \X+\d \X+\S \X+\s \X+\W \X+\w \X+. \X+\C \X+\R \X+\H \X+\h \X+\V \X+\v \X+\X \X+\Z \X+\z \X+$/BZx ++------------------------------------------------------------------ ++ Bra ++ extuni+ ++ \D ++ extuni+ ++ \d ++ extuni+ ++ \S ++ extuni+ ++ \s ++ extuni+ ++ \W ++ extuni+ ++ \w ++ extuni+ ++ Any ++ extuni+ ++ AllAny ++ extuni+ ++ \R ++ extuni+ ++ \H ++ extuni+ ++ \h ++ extuni+ ++ \V ++ extuni+ ++ \v ++ extuni+ ++ extuni ++ extuni+ ++ \Z ++ extuni+ ++ \z ++ extuni+ ++ $ ++ Ket ++ End ++------------------------------------------------------------------ ++ ++/\d+\s{0,5}=\s*\S?=\w{0,4}\W*/8WBZ ++------------------------------------------------------------------ ++ Bra ++ prop Nd + ++ prop Xsp {0,5} ++ = ++ prop Xsp * ++ notprop Xsp ? ++ = ++ prop Xwd {0,4} ++ notprop Xwd * ++ Ket ++ End ++------------------------------------------------------------------ ++ + /-- End of testinput7 --/ +-- +2.7.4 + diff --git a/SOURCES/pcre-8.34-RC1-Fix-XCLASS-POSIX-types-in-JIT.patch b/SOURCES/pcre-8.34-RC1-Fix-XCLASS-POSIX-types-in-JIT.patch new file mode 100644 index 0000000..8b2cc99 --- /dev/null +++ b/SOURCES/pcre-8.34-RC1-Fix-XCLASS-POSIX-types-in-JIT.patch @@ -0,0 +1,52 @@ +From 2cadfa0ec8900bb784aa4c4171ad5aec0e9b7edf Mon Sep 17 00:00:00 2001 +From: zherczeg +Date: Sat, 30 Nov 2013 07:05:00 +0000 +Subject: [PATCH] Fix XCLASS POSIX types in JIT. +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1406 2f5784b3-3f2a-0410-8824-cb99058d5e15 +Signed-off-by: Petr Písař +--- + pcre_jit_compile.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/pcre_jit_compile.c b/pcre_jit_compile.c +index c71b5a4..15555a6 100644 +--- a/pcre_jit_compile.c ++++ b/pcre_jit_compile.c +@@ -4431,10 +4431,10 @@ while (*cc != XCL_END) + OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066); + OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL); + +- OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066); ++ OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066); + OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL); + +- OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x2066); ++ OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x2066); + OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL); + + JUMPHERE(jump); +@@ -4447,7 +4447,7 @@ while (*cc != XCL_END) + OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll); + OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_GREATER); + +- OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Ll); ++ OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Ll); + OP_FLAGS(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_C_NOT_EQUAL); + + jump = CMP(SLJIT_C_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll); +@@ -4457,7 +4457,7 @@ while (*cc != XCL_END) + OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066); + OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL); + +- OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066); ++ OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066); + OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL); + + JUMPHERE(jump); +-- +2.7.4 + diff --git a/SPECS/pcre.spec b/SPECS/pcre.spec index 4f7d15f..771cffd 100644 --- a/SPECS/pcre.spec +++ b/SPECS/pcre.spec @@ -2,7 +2,7 @@ #%%global rcversion RC1 Name: pcre Version: 8.32 -Release: %{?rcversion:0.}15%{?rcversion:.%rcversion}%{?dist}.1 +Release: %{?rcversion:0.}17%{?rcversion:.%rcversion}%{?dist} %global myversion %{version}%{?rcversion:-%rcversion} Summary: Perl-compatible regular expression library Group: System Environment/Libraries @@ -47,41 +47,55 @@ Patch12: pcre-8.32-Fix-compiler-crash-misbehaviour-for-zero-repeated-gr.patch Patch13: pcre-8.32-Fix-bug-when-there-are-unset-groups-prior-to-ACCEPT-.patch # Fix static linking, bug #1217111, in upstream after 8.37-RC1 Patch14: pcre-8.37-RC1-Fix-static-linking-issue-with-pkg-config.patch -# Fix checking whether a group could match an empty string, bug #1330508, +# Fix checking whether a group could match an empty string, bug #1330509, # in upstream after 8.33, needed for # Fix-compile-time-loop-for-recursive-reference-within.patch Patch15: pcre-8.32-Fix-checking-whether-a-group-could-match-an-empty-st.patch # Fix CVE-2015-2328 (infinite recursion compiling pattern with recursive -# reference in a group with indefinite repeat), bug #1330508, +# reference in a group with indefinite repeat), bug #1330509, # upstream bug #1515, in upstream after 8.35 Patch16: pcre-8.32-Fix-compile-time-loop-for-recursive-reference-within.patch -# Fix duplicate names memory calculation error, bug #1330508, +# Fix duplicate names memory calculation error, bug #1330509, # in upstream after 8.37, # needed for Fix-buffer-overflow-for-named-references-in-situatio.patch Patch17: pcre-8.32-Fix-duplicate-names-memory-calculation-error.patch # Fix named forward reference to duplicate group number overflow bug, -# bug #1330508, in upstream after 8.37, +# bug #1330509, in upstream after 8.37, # needed for Fix-buffer-overflow-for-named-references-in-situatio.patch Patch18: pcre-8.32-Fix-named-forward-reference-to-duplicate-group-numbe.patch # Fix CVE-2015-8385 (buffer overflow caused by named forward reference to -# duplicate group number), bug #1330508, in upstream after 8.37 +# duplicate group number), bug #1330509, in upstream after 8.37 Patch19: pcre-8.32-Fix-buffer-overflow-for-named-references-in-situatio.patch # Fix CVE-2015-8386 (buffer overflow caused by lookbehind assertion), -# bug #1330508, in upstream after 8.37 +# bug #1330509, in upstream after 8.37 Patch20: pcre-8.32-Fix-buffer-overflow-for-lookbehind-within-mutually-r.patch # Fix CVE-2015-3217 (stack overflow caused by mishandled group empty match), -# bug #1330508, in upstream after 8.37 +# bug #1330509, in upstream after 8.37 Patch21: pcre-8.32-Fix-group-empty-match-bug.patch # Fix CVE-2015-5073 and CVE-2015-8388 (buffer overflow for forward reference -# within backward assertion with excess closing parenthesis), bug #1330508, +# within backward assertion with excess closing parenthesis), bug #1330509, # in upstream after 8.37 Patch22: pcre-8.32-Fix-buffer-overflow-for-forward-reference-within-bac.patch # Fix CVE-2015-8391 (inefficient posix character class syntax check), -# bug #1330508, in upstream after 8.37 +# bug #1330509, in upstream after 8.37 Patch23: pcre-8.32-Fix-run-for-ever-bug-for-deeply-nested-sequences.patch # Fix CVE-2016-3191 (workspace overflow for (*ACCEPT) with deeply nested -# parentheses), bug #1330508, in upstream after 8.38 +# parentheses), bug #1330509, in upstream after 8.38 Patch24: pcre-8.32-Fix-workspace-overflow-for-ACCEPT-with-deeply-nested.patch +# 1/3 Let [:graph:], [:print:], and [:punct:] POSIX classes to handle Unicode +# characters in UCP mode to match Perl behavior, bug #1400267, +# in upstream 8.34 +Patch25: pcre-8.32-Update-POSIX-class-handling-in-UCP-mode.patch +# 2/3 Let [:graph:], [:print:], and [:punct:] POSIX classes to handle Unicode +# characters in UCP mode with JIT, bug #1400267, in upstream 8.34 +Patch26: pcre-8.32-Add-support-for-PT_PXGRAPH-PT_PXPRINT-and-PT_PXPUNCT.patch +# 3/3 Fix XCLASS POSIX JIT compilation, tests failed on 32-bit PowerPC, +# bug #1400267, in upstream 8.34 +Patch27: pcre-8.34-RC1-Fix-XCLASS-POSIX-types-in-JIT.patch +# Fix matching Unicode ranges in JIT mode, bug #1402288, in upstream 8.35 +Patch28: pcre-8.32-A-new-flag-is-set-when-property-checks-are-present-i.patch +# git required for A-new-flag-is-set-when-property-checks-are-present-i.patch +BuildRequires: git BuildRequires: readline-devel # New libtool to get rid of rpath BuildRequires: autoconf, automake, libtool @@ -146,6 +160,11 @@ Utilities demonstrating PCRE capabilities like pcregrep or pcretest. %patch22 -p1 -b .CVE-2015-5073 %patch23 -p1 -b .deeply_nested_bracket_colon %patch24 -p1 -b .accept_with_nested_parentheses +%patch25 -p1 -b .posix_classes_in_ucp +%patch26 -p1 -b .posix_classes_in_ucp_jit +%patch27 -p1 -b .posix_classes_in_ucp_jit_types +# Apply a Git binary patch +git --work-tree=. apply %{PATCH28} # Because of rpath patch libtoolize --copy --force && autoreconf -vif # One contributor's name is non-UTF-8 @@ -215,21 +234,26 @@ make check VERBOSE=yes %{_mandir}/man1/pcretest.* %changelog -* Wed Apr 27 2016 Petr Pisar - 8.32-15.1 +* Tue Dec 06 2016 Petr Pisar - 8.32-17 +- Let [:graph:], [:print:], and [:punct:] POSIX classes to handle Unicode + characters in UCP mode to match Perl behavior (bug #1400267) +- Fix matching Unicode ranges in JIT mode (bug #1402288) + +* Wed Apr 27 2016 Petr Pisar - 8.32-16 - Fix CVE-2015-2328 (infinite recursion compiling pattern with recursive - reference in a group with indefinite repeat) (bug #1330508) + reference in a group with indefinite repeat) (bug #1330509) - Fix CVE-2015-8385 (buffer overflow caused by named forward reference to - duplicate group number) (bug #1330508) + duplicate group number) (bug #1330509) - Fix CVE-2015-8386 (buffer overflow caused by lookbehind assertion) - (bug #1330508) + (bug #1330509) - Fix CVE-2015-3217 (stack overflow caused by mishandled group empty match) - (bug #1330508) + (bug #1330509) - Fix CVE-2015-5073 and CVE-2015-8388 (buffer overflow for forward reference - within backward assertion with excess closing parenthesis) (bug #1330508) + within backward assertion with excess closing parenthesis) (bug #1330509) - Fix CVE-2015-8391 (inefficient posix character class syntax check) - (bug #1330508) + (bug #1330509) - Fix CVE-2016-3191 (workspace overflow for (*ACCEPT) with deeply nested - parentheses) (bug #1330508) + parentheses) (bug #1330509) * Wed Apr 29 2015 Petr Pisar - 8.32-15 - Fix compiling expression where start-anchored character with more than one