|
|
cb7dd8 |
From e74dcd1eec9227fe23c06de2ff109e48695fd879 Mon Sep 17 00:00:00 2001
|
|
|
cb7dd8 |
From: ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>
|
|
|
cb7dd8 |
Date: Sat, 2 Nov 2013 18:29:05 +0000
|
|
|
cb7dd8 |
Subject: [PATCH 1/2] Update POSIX class handling in UCP mode.
|
|
|
cb7dd8 |
MIME-Version: 1.0
|
|
|
cb7dd8 |
Content-Type: text/plain; charset=UTF-8
|
|
|
cb7dd8 |
Content-Transfer-Encoding: 8bit
|
|
|
cb7dd8 |
|
|
|
cb7dd8 |
Petr Pisar: Ported to 8.32:
|
|
|
cb7dd8 |
|
|
|
cb7dd8 |
commit fa3832825e3fe0d49f93658882775cdd6c26129e
|
|
|
cb7dd8 |
Author: ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>
|
|
|
cb7dd8 |
Date: Sat Nov 2 18:29:05 2013 +0000
|
|
|
cb7dd8 |
|
|
|
cb7dd8 |
Update POSIX class handling in UCP mode.
|
|
|
cb7dd8 |
|
|
|
cb7dd8 |
git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1387 2f5784b3-3f2a-0410-8824-cb99058d5e15
|
|
|
cb7dd8 |
|
|
|
cb7dd8 |
It also adjusts some test 7 outputs because 8.32 does not contain
|
|
|
cb7dd8 |
auto-possessification improvement from
|
|
|
cb7dd8 |
|
|
|
cb7dd8 |
commit 5f42224005b7d9a503903e3342ec7ada75590b07
|
|
|
cb7dd8 |
Author: ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>
|
|
|
cb7dd8 |
Date: Tue Oct 1 16:54:40 2013 +0000
|
|
|
cb7dd8 |
|
|
|
cb7dd8 |
Refactored auto-possessification code.
|
|
|
cb7dd8 |
|
|
|
cb7dd8 |
git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1363 2f5784b3-3f2a-0410-8824-cb99058d5e15
|
|
|
cb7dd8 |
|
|
|
cb7dd8 |
Signed-off-by: Petr Písař <ppisar@redhat.com>
|
|
|
cb7dd8 |
---
|
|
|
cb7dd8 |
doc/pcrepattern.3 | 37 +++++--
|
|
|
cb7dd8 |
pcre_compile.c | 75 +++++++++++---
|
|
|
cb7dd8 |
pcre_internal.h | 16 ++-
|
|
|
cb7dd8 |
pcre_printint.c | 59 ++++++++---
|
|
|
cb7dd8 |
pcre_xclass.c | 63 ++++++++++--
|
|
|
cb7dd8 |
testdata/testinput6 | 146 ++++++++++++++++++++++++++
|
|
|
cb7dd8 |
testdata/testinput7 | 10 ++
|
|
|
cb7dd8 |
testdata/testoutput6 | 286 ++++++++++++++++++++++++++++++++++++++++++++++++++-
|
|
|
cb7dd8 |
testdata/testoutput7 | 117 ++++++++++++++++++++-
|
|
|
cb7dd8 |
9 files changed, 752 insertions(+), 57 deletions(-)
|
|
|
cb7dd8 |
|
|
|
cb7dd8 |
diff --git a/doc/pcrepattern.3 b/doc/pcrepattern.3
|
|
|
cb7dd8 |
index c9c7b45..f638846 100644
|
|
|
cb7dd8 |
--- a/doc/pcrepattern.3
|
|
|
cb7dd8 |
+++ b/doc/pcrepattern.3
|
|
|
cb7dd8 |
@@ -861,8 +861,9 @@ the "mark" property always have the "extend" grapheme breaking property.
|
|
|
cb7dd8 |
.sp
|
|
|
cb7dd8 |
As well as the standard Unicode properties described above, PCRE supports four
|
|
|
cb7dd8 |
more that make it possible to convert traditional escape sequences such as \ew
|
|
|
cb7dd8 |
-and \es and POSIX character classes to use Unicode properties. PCRE uses these
|
|
|
cb7dd8 |
-non-standard, non-Perl properties internally when PCRE_UCP is set. They are:
|
|
|
cb7dd8 |
+and \es to use Unicode properties. PCRE uses these non-standard, non-Perl
|
|
|
cb7dd8 |
+properties internally when PCRE_UCP is set. However, they may also be used
|
|
|
cb7dd8 |
+explicitly. These properties are:
|
|
|
cb7dd8 |
.sp
|
|
|
cb7dd8 |
Xan Any alphanumeric character
|
|
|
cb7dd8 |
Xps Any POSIX space character
|
|
|
cb7dd8 |
@@ -873,6 +874,7 @@ Xan matches characters that have either the L (letter) or the N (number)
|
|
|
cb7dd8 |
property. Xps matches the characters tab, linefeed, vertical tab, form feed, or
|
|
|
cb7dd8 |
carriage return, and any other character that has the Z (separator) property.
|
|
|
cb7dd8 |
Xsp is the same as Xps, except that vertical tab is excluded. Xwd matches the
|
|
|
cb7dd8 |
+:qa
|
|
|
cb7dd8 |
same characters as Xan, plus underscore.
|
|
|
cb7dd8 |
.
|
|
|
cb7dd8 |
.
|
|
|
cb7dd8 |
@@ -1258,8 +1260,8 @@ supported, and an error is given if they are encountered.
|
|
|
cb7dd8 |
By default, in UTF modes, characters with values greater than 128 do not match
|
|
|
cb7dd8 |
any of the POSIX character classes. However, if the PCRE_UCP option is passed
|
|
|
cb7dd8 |
to \fBpcre_compile()\fP, some of the classes are changed so that Unicode
|
|
|
cb7dd8 |
-character properties are used. This is achieved by replacing the POSIX classes
|
|
|
cb7dd8 |
-by other sequences, as follows:
|
|
|
cb7dd8 |
+character properties are used. This is achieved by replacing certain POSIX
|
|
|
cb7dd8 |
+classes by other sequences, as follows:
|
|
|
cb7dd8 |
.sp
|
|
|
cb7dd8 |
[:alnum:] becomes \ep{Xan}
|
|
|
cb7dd8 |
[:alpha:] becomes \ep{L}
|
|
|
cb7dd8 |
@@ -1270,9 +1272,30 @@ by other sequences, as follows:
|
|
|
cb7dd8 |
[:upper:] becomes \ep{Lu}
|
|
|
cb7dd8 |
[:word:] becomes \ep{Xwd}
|
|
|
cb7dd8 |
.sp
|
|
|
cb7dd8 |
-Negated versions, such as [:^alpha:] use \eP instead of \ep. The other POSIX
|
|
|
cb7dd8 |
-classes are unchanged, and match only characters with code points less than
|
|
|
cb7dd8 |
-128.
|
|
|
cb7dd8 |
+Negated versions, such as [:^alpha:] use \eP instead of \ep. Three other POSIX
|
|
|
cb7dd8 |
+classes are handled specially in UCP mode:
|
|
|
cb7dd8 |
+.TP 10
|
|
|
cb7dd8 |
+[:graph:]
|
|
|
cb7dd8 |
+This matches characters that have glyphs that mark the page when printed. In
|
|
|
cb7dd8 |
+Unicode property terms, it matches all characters with the L, M, N, P, S, or Cf
|
|
|
cb7dd8 |
+properties, except for:
|
|
|
cb7dd8 |
+.sp
|
|
|
cb7dd8 |
+ U+061C Arabic Letter Mark
|
|
|
cb7dd8 |
+ U+180E Mongolian Vowel Separator
|
|
|
cb7dd8 |
+ U+2066 - U+2069 Various "isolate"s
|
|
|
cb7dd8 |
+.sp
|
|
|
cb7dd8 |
+.TP 10
|
|
|
cb7dd8 |
+[:print:]
|
|
|
cb7dd8 |
+This matches the same characters as [:graph:] plus space characters that are
|
|
|
cb7dd8 |
+not controls, that is, characters with the Zs property.
|
|
|
cb7dd8 |
+.TP 10
|
|
|
cb7dd8 |
+[:punct:]
|
|
|
cb7dd8 |
+This matches all characters that have the Unicode P (punctuation) property,
|
|
|
cb7dd8 |
+plus those characters whose code points are less than 128 that have the S
|
|
|
cb7dd8 |
+(Symbol) property.
|
|
|
cb7dd8 |
+.P
|
|
|
cb7dd8 |
+The other POSIX classes are unchanged, and match only characters with code
|
|
|
cb7dd8 |
+points less than 128.
|
|
|
cb7dd8 |
.
|
|
|
cb7dd8 |
.
|
|
|
cb7dd8 |
.SH "VERTICAL BAR"
|
|
|
cb7dd8 |
diff --git a/pcre_compile.c b/pcre_compile.c
|
|
|
cb7dd8 |
index 746dc70..3c75218 100644
|
|
|
cb7dd8 |
--- a/pcre_compile.c
|
|
|
cb7dd8 |
+++ b/pcre_compile.c
|
|
|
cb7dd8 |
@@ -257,7 +257,8 @@ static const int verbcount = sizeof(verbs)/sizeof(verbitem);
|
|
|
cb7dd8 |
now all in a single string, to reduce the number of relocations when a shared
|
|
|
cb7dd8 |
library is dynamically loaded. The list of lengths is terminated by a zero
|
|
|
cb7dd8 |
length entry. The first three must be alpha, lower, upper, as this is assumed
|
|
|
cb7dd8 |
-for handling case independence. */
|
|
|
cb7dd8 |
+for handling case independence. The indices for graph, print, and punct are
|
|
|
cb7dd8 |
+needed, so identify them. */
|
|
|
cb7dd8 |
|
|
|
cb7dd8 |
static const char posix_names[] =
|
|
|
cb7dd8 |
STRING_alpha0 STRING_lower0 STRING_upper0 STRING_alnum0
|
|
|
cb7dd8 |
@@ -268,6 +269,11 @@ static const char posix_names[] =
|
|
|
cb7dd8 |
static const pcre_uint8 posix_name_lengths[] = {
|
|
|
cb7dd8 |
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 4, 6, 0 };
|
|
|
cb7dd8 |
|
|
|
cb7dd8 |
+#define PC_GRAPH 8
|
|
|
cb7dd8 |
+#define PC_PRINT 9
|
|
|
cb7dd8 |
+#define PC_PUNCT 10
|
|
|
cb7dd8 |
+
|
|
|
cb7dd8 |
+
|
|
|
cb7dd8 |
/* Table of class bit maps for each POSIX class. Each class is formed from a
|
|
|
cb7dd8 |
base map, with an optional addition or removal of another map. Then, for some
|
|
|
cb7dd8 |
classes, there is some additional tweaking: for [:blank:] the vertical space
|
|
|
cb7dd8 |
@@ -295,9 +301,8 @@ static const int posix_class_maps[] = {
|
|
|
cb7dd8 |
cbit_xdigit,-1, 0 /* xdigit */
|
|
|
cb7dd8 |
};
|
|
|
cb7dd8 |
|
|
|
cb7dd8 |
-/* Table of substitutes for \d etc when PCRE_UCP is set. The POSIX class
|
|
|
cb7dd8 |
-substitutes must be in the order of the names, defined above, and there are
|
|
|
cb7dd8 |
-both positive and negative cases. NULL means no substitute. */
|
|
|
cb7dd8 |
+/* Table of substitutes for \d etc when PCRE_UCP is set. They are replaced by
|
|
|
cb7dd8 |
+Unicode property escapes. */
|
|
|
cb7dd8 |
|
|
|
cb7dd8 |
#ifdef SUPPORT_UCP
|
|
|
cb7dd8 |
static const pcre_uchar string_PNd[] = {
|
|
|
cb7dd8 |
@@ -322,12 +327,18 @@ static const pcre_uchar string_pXwd[] = {
|
|
|
cb7dd8 |
static const pcre_uchar *substitutes[] = {
|
|
|
cb7dd8 |
string_PNd, /* \D */
|
|
|
cb7dd8 |
string_pNd, /* \d */
|
|
|
cb7dd8 |
- string_PXsp, /* \S */ /* NOTE: Xsp is Perl space */
|
|
|
cb7dd8 |
- string_pXsp, /* \s */
|
|
|
cb7dd8 |
+ string_PXsp, /* \S */ /* Xsp is Perl space, but from 8.34, Perl */
|
|
|
cb7dd8 |
+ string_pXsp, /* \s */ /* space and POSIX space are the same. */
|
|
|
cb7dd8 |
string_PXwd, /* \W */
|
|
|
cb7dd8 |
string_pXwd /* \w */
|
|
|
cb7dd8 |
};
|
|
|
cb7dd8 |
|
|
|
cb7dd8 |
+/* The POSIX class substitutes must be in the order of the POSIX class names,
|
|
|
cb7dd8 |
+defined above, and there are both positive and negative cases. NULL means no
|
|
|
cb7dd8 |
+general substitute of a Unicode property escape (\p or \P). However, for some
|
|
|
cb7dd8 |
+POSIX classes (e.g. graph, print, punct) a special property code is compiled
|
|
|
cb7dd8 |
+directly. */
|
|
|
cb7dd8 |
+
|
|
|
cb7dd8 |
static const pcre_uchar string_pL[] = {
|
|
|
cb7dd8 |
CHAR_BACKSLASH, CHAR_p, CHAR_LEFT_CURLY_BRACKET,
|
|
|
cb7dd8 |
CHAR_L, CHAR_RIGHT_CURLY_BRACKET, '\0' };
|
|
|
cb7dd8 |
@@ -375,8 +386,8 @@ static const pcre_uchar *posix_substitutes[] = {
|
|
|
cb7dd8 |
NULL, /* graph */
|
|
|
cb7dd8 |
NULL, /* print */
|
|
|
cb7dd8 |
NULL, /* punct */
|
|
|
cb7dd8 |
- string_pXps, /* space */ /* NOTE: Xps is POSIX space */
|
|
|
cb7dd8 |
- string_pXwd, /* word */
|
|
|
cb7dd8 |
+ string_pXps, /* space */ /* Xps is POSIX space, but from 8.34 */
|
|
|
cb7dd8 |
+ string_pXwd, /* word */ /* Perl and POSIX space are the same */
|
|
|
cb7dd8 |
NULL, /* xdigit */
|
|
|
cb7dd8 |
/* Negated cases */
|
|
|
cb7dd8 |
string_PL, /* ^alpha */
|
|
|
cb7dd8 |
@@ -390,8 +401,8 @@ static const pcre_uchar *posix_substitutes[] = {
|
|
|
cb7dd8 |
NULL, /* ^graph */
|
|
|
cb7dd8 |
NULL, /* ^print */
|
|
|
cb7dd8 |
NULL, /* ^punct */
|
|
|
cb7dd8 |
- string_PXps, /* ^space */ /* NOTE: Xps is POSIX space */
|
|
|
cb7dd8 |
- string_PXwd, /* ^word */
|
|
|
cb7dd8 |
+ string_PXps, /* ^space */ /* Xps is POSIX space, but from 8.34 */
|
|
|
cb7dd8 |
+ string_PXwd, /* ^word */ /* Perl and POSIX space are the same */
|
|
|
cb7dd8 |
NULL /* ^xdigit */
|
|
|
cb7dd8 |
};
|
|
|
cb7dd8 |
#define POSIX_SUBSIZE (sizeof(posix_substitutes) / sizeof(pcre_uchar *))
|
|
|
cb7dd8 |
@@ -4258,24 +4269,58 @@ for (;; ptr++)
|
|
|
cb7dd8 |
posix_class = 0;
|
|
|
cb7dd8 |
|
|
|
cb7dd8 |
/* When PCRE_UCP is set, some of the POSIX classes are converted to
|
|
|
cb7dd8 |
- different escape sequences that use Unicode properties. */
|
|
|
cb7dd8 |
+ different escape sequences that use Unicode properties \p or \P. Others
|
|
|
cb7dd8 |
+ that are not available via \p or \P generate XCL_PROP/XCL_NOTPROP
|
|
|
cb7dd8 |
+ directly. */
|
|
|
cb7dd8 |
|
|
|
cb7dd8 |
#ifdef SUPPORT_UCP
|
|
|
cb7dd8 |
if ((options & PCRE_UCP) != 0)
|
|
|
cb7dd8 |
{
|
|
|
cb7dd8 |
+ unsigned int ptype = 0;
|
|
|
cb7dd8 |
int pc = posix_class + ((local_negate)? POSIX_SUBSIZE/2 : 0);
|
|
|
cb7dd8 |
+
|
|
|
cb7dd8 |
+ /* The posix_substitutes table specifies which POSIX classes can be
|
|
|
cb7dd8 |
+ converted to \p or \P items. */
|
|
|
cb7dd8 |
+
|
|
|
cb7dd8 |
if (posix_substitutes[pc] != NULL)
|
|
|
cb7dd8 |
{
|
|
|
cb7dd8 |
nestptr = tempptr + 1;
|
|
|
cb7dd8 |
ptr = posix_substitutes[pc] - 1;
|
|
|
cb7dd8 |
continue;
|
|
|
cb7dd8 |
}
|
|
|
cb7dd8 |
+
|
|
|
cb7dd8 |
+ /* There are three other classes that generate special property calls
|
|
|
cb7dd8 |
+ that are recognized only in an XCLASS. */
|
|
|
cb7dd8 |
+
|
|
|
cb7dd8 |
+ else switch(posix_class)
|
|
|
cb7dd8 |
+ {
|
|
|
cb7dd8 |
+ case PC_GRAPH:
|
|
|
cb7dd8 |
+ ptype = PT_PXGRAPH;
|
|
|
cb7dd8 |
+ /* Fall through */
|
|
|
cb7dd8 |
+ case PC_PRINT:
|
|
|
cb7dd8 |
+ if (ptype == 0) ptype = PT_PXPRINT;
|
|
|
cb7dd8 |
+ /* Fall through */
|
|
|
cb7dd8 |
+ case PC_PUNCT:
|
|
|
cb7dd8 |
+ if (ptype == 0) ptype = PT_PXPUNCT;
|
|
|
cb7dd8 |
+ *class_uchardata++ = local_negate? XCL_NOTPROP : XCL_PROP;
|
|
|
cb7dd8 |
+ *class_uchardata++ = ptype;
|
|
|
cb7dd8 |
+ *class_uchardata++ = 0;
|
|
|
cb7dd8 |
+ ptr = tempptr + 1;
|
|
|
cb7dd8 |
+ continue;
|
|
|
cb7dd8 |
+
|
|
|
cb7dd8 |
+ /* For all other POSIX classes, no special action is taken in UCP
|
|
|
cb7dd8 |
+ mode. Fall through to the non_UCP case. */
|
|
|
cb7dd8 |
+
|
|
|
cb7dd8 |
+ default:
|
|
|
cb7dd8 |
+ break;
|
|
|
cb7dd8 |
+ }
|
|
|
cb7dd8 |
}
|
|
|
cb7dd8 |
#endif
|
|
|
cb7dd8 |
- /* In the non-UCP case, we build the bit map for the POSIX class in a
|
|
|
cb7dd8 |
- chunk of local store because we may be adding and subtracting from it,
|
|
|
cb7dd8 |
- and we don't want to subtract bits that may be in the main map already.
|
|
|
cb7dd8 |
- At the end we or the result into the bit map that is being built. */
|
|
|
cb7dd8 |
+ /* In the non-UCP case, or when UCP makes no difference, we build the
|
|
|
cb7dd8 |
+ bit map for the POSIX class in a chunk of local store because we may be
|
|
|
cb7dd8 |
+ adding and subtracting from it, and we don't want to subtract bits that
|
|
|
cb7dd8 |
+ may be in the main map already. At the end we or the result into the
|
|
|
cb7dd8 |
+ bit map that is being built. */
|
|
|
cb7dd8 |
|
|
|
cb7dd8 |
posix_class *= 3;
|
|
|
cb7dd8 |
|
|
|
cb7dd8 |
diff --git a/pcre_internal.h b/pcre_internal.h
|
|
|
cb7dd8 |
index 157de08..389848f 100644
|
|
|
cb7dd8 |
--- a/pcre_internal.h
|
|
|
cb7dd8 |
+++ b/pcre_internal.h
|
|
|
cb7dd8 |
@@ -1836,6 +1836,16 @@ only. */
|
|
|
cb7dd8 |
#define PT_WORD 8 /* Word - L plus N plus underscore */
|
|
|
cb7dd8 |
#define PT_CLIST 9 /* Pseudo-property: match character list */
|
|
|
cb7dd8 |
|
|
|
cb7dd8 |
+/* The following special properties are used only in XCLASS items, when POSIX
|
|
|
cb7dd8 |
+classes are specified and PCRE_UCP is set - in other words, for Unicode
|
|
|
cb7dd8 |
+handling of these classes. They are not available via the \p or \P escapes like
|
|
|
cb7dd8 |
+those in the above list, and so they do not take part in the autopossessifying
|
|
|
cb7dd8 |
+table. */
|
|
|
cb7dd8 |
+
|
|
|
cb7dd8 |
+#define PT_PXGRAPH 11 /* [:graph:] - characters that mark the paper */
|
|
|
cb7dd8 |
+#define PT_PXPRINT 12 /* [:print:] - [:graph:] plus non-control spaces */
|
|
|
cb7dd8 |
+#define PT_PXPUNCT 13 /* [:punct:] - punctuation characters */
|
|
|
cb7dd8 |
+
|
|
|
cb7dd8 |
/* Flag bits and data types for the extended class (OP_XCLASS) for classes that
|
|
|
cb7dd8 |
contain characters with values greater than 255. */
|
|
|
cb7dd8 |
|
|
|
cb7dd8 |
@@ -1849,9 +1859,9 @@ contain characters with values greater than 255. */
|
|
|
cb7dd8 |
#define XCL_NOTPROP 4 /* Unicode inverted property (ditto) */
|
|
|
cb7dd8 |
|
|
|
cb7dd8 |
/* These are escaped items that aren't just an encoding of a particular data
|
|
|
cb7dd8 |
-value such as \n. They must have non-zero values, as check_escape() returns
|
|
|
cb7dd8 |
-0 for a data character. Also, they must appear in the same order as in the opcode
|
|
|
cb7dd8 |
-definitions below, up to ESC_z. There's a dummy for OP_ALLANY because it
|
|
|
cb7dd8 |
+value such as \n. They must have non-zero values, as check_escape() returns 0
|
|
|
cb7dd8 |
+for a data character. Also, they must appear in the same order as in the
|
|
|
cb7dd8 |
+opcode definitions below, up to ESC_z. There's a dummy for OP_ALLANY because it
|
|
|
cb7dd8 |
corresponds to "." in DOTALL mode rather than an escape sequence. It is also
|
|
|
cb7dd8 |
used for [^] in JavaScript compatibility mode, and for \C in non-utf mode. In
|
|
|
cb7dd8 |
non-DOTALL mode, "." behaves like \N.
|
|
|
cb7dd8 |
diff --git a/pcre_printint.c b/pcre_printint.c
|
|
|
cb7dd8 |
index 10b5754..c6dcbe6 100644
|
|
|
cb7dd8 |
--- a/pcre_printint.c
|
|
|
cb7dd8 |
+++ b/pcre_printint.c
|
|
|
cb7dd8 |
@@ -608,9 +608,9 @@ for(;;)
|
|
|
cb7dd8 |
print_prop(f, code, " ", "");
|
|
|
cb7dd8 |
break;
|
|
|
cb7dd8 |
|
|
|
cb7dd8 |
- /* OP_XCLASS can only occur in UTF or PCRE16 modes. However, there's no
|
|
|
cb7dd8 |
- harm in having this code always here, and it makes it less messy without
|
|
|
cb7dd8 |
- all those #ifdefs. */
|
|
|
cb7dd8 |
+ /* OP_XCLASS cannot occur in 8-bit, non-UTF mode. However, there's no harm
|
|
|
cb7dd8 |
+ in having this code always here, and it makes it less messy without all
|
|
|
cb7dd8 |
+ those #ifdefs. */
|
|
|
cb7dd8 |
|
|
|
cb7dd8 |
case OP_CLASS:
|
|
|
cb7dd8 |
case OP_NCLASS:
|
|
|
cb7dd8 |
@@ -671,27 +671,52 @@ for(;;)
|
|
|
cb7dd8 |
pcre_uchar ch;
|
|
|
cb7dd8 |
while ((ch = *ccode++) != XCL_END)
|
|
|
cb7dd8 |
{
|
|
|
cb7dd8 |
- if (ch == XCL_PROP)
|
|
|
cb7dd8 |
- {
|
|
|
cb7dd8 |
- unsigned int ptype = *ccode++;
|
|
|
cb7dd8 |
- unsigned int pvalue = *ccode++;
|
|
|
cb7dd8 |
- fprintf(f, "\\p{%s}", get_ucpname(ptype, pvalue));
|
|
|
cb7dd8 |
- }
|
|
|
cb7dd8 |
- else if (ch == XCL_NOTPROP)
|
|
|
cb7dd8 |
- {
|
|
|
cb7dd8 |
- unsigned int ptype = *ccode++;
|
|
|
cb7dd8 |
- unsigned int pvalue = *ccode++;
|
|
|
cb7dd8 |
- fprintf(f, "\\P{%s}", get_ucpname(ptype, pvalue));
|
|
|
cb7dd8 |
- }
|
|
|
cb7dd8 |
- else
|
|
|
cb7dd8 |
+ BOOL not = FALSE;
|
|
|
cb7dd8 |
+ const char *notch = "";
|
|
|
cb7dd8 |
+
|
|
|
cb7dd8 |
+ switch(ch)
|
|
|
cb7dd8 |
{
|
|
|
cb7dd8 |
+ case XCL_NOTPROP:
|
|
|
cb7dd8 |
+ not = TRUE;
|
|
|
cb7dd8 |
+ notch = "^";
|
|
|
cb7dd8 |
+ /* Fall through */
|
|
|
cb7dd8 |
+
|
|
|
cb7dd8 |
+ case XCL_PROP:
|
|
|
cb7dd8 |
+ {
|
|
|
cb7dd8 |
+ unsigned int ptype = *ccode++;
|
|
|
cb7dd8 |
+ unsigned int pvalue = *ccode++;
|
|
|
cb7dd8 |
+
|
|
|
cb7dd8 |
+ switch(ptype)
|
|
|
cb7dd8 |
+ {
|
|
|
cb7dd8 |
+ case PT_PXGRAPH:
|
|
|
cb7dd8 |
+ fprintf(f, "[:%sgraph:]", notch);
|
|
|
cb7dd8 |
+ break;
|
|
|
cb7dd8 |
+
|
|
|
cb7dd8 |
+ case PT_PXPRINT:
|
|
|
cb7dd8 |
+ fprintf(f, "[:%sprint:]", notch);
|
|
|
cb7dd8 |
+ break;
|
|
|
cb7dd8 |
+
|
|
|
cb7dd8 |
+ case PT_PXPUNCT:
|
|
|
cb7dd8 |
+ fprintf(f, "[:%spunct:]", notch);
|
|
|
cb7dd8 |
+ break;
|
|
|
cb7dd8 |
+
|
|
|
cb7dd8 |
+ default:
|
|
|
cb7dd8 |
+ fprintf(f, "\\%c{%s}", (not? 'P':'p'),
|
|
|
cb7dd8 |
+ get_ucpname(ptype, pvalue));
|
|
|
cb7dd8 |
+ break;
|
|
|
cb7dd8 |
+ }
|
|
|
cb7dd8 |
+ }
|
|
|
cb7dd8 |
+ break;
|
|
|
cb7dd8 |
+
|
|
|
cb7dd8 |
+ default:
|
|
|
cb7dd8 |
ccode += 1 + print_char(f, ccode, utf);
|
|
|
cb7dd8 |
if (ch == XCL_RANGE)
|
|
|
cb7dd8 |
{
|
|
|
cb7dd8 |
fprintf(f, "-");
|
|
|
cb7dd8 |
ccode += 1 + print_char(f, ccode, utf);
|
|
|
cb7dd8 |
}
|
|
|
cb7dd8 |
- }
|
|
|
cb7dd8 |
+ break;
|
|
|
cb7dd8 |
+ }
|
|
|
cb7dd8 |
}
|
|
|
cb7dd8 |
}
|
|
|
cb7dd8 |
|
|
|
cb7dd8 |
diff --git a/pcre_xclass.c b/pcre_xclass.c
|
|
|
cb7dd8 |
index fa73cd8..dd7008a 100644
|
|
|
cb7dd8 |
--- a/pcre_xclass.c
|
|
|
cb7dd8 |
+++ b/pcre_xclass.c
|
|
|
cb7dd8 |
@@ -128,57 +128,102 @@ while ((t = *data++) != XCL_END)
|
|
|
cb7dd8 |
else /* XCL_PROP & XCL_NOTPROP */
|
|
|
cb7dd8 |
{
|
|
|
cb7dd8 |
const ucd_record *prop = GET_UCD(c);
|
|
|
cb7dd8 |
+ BOOL isprop = t == XCL_PROP;
|
|
|
cb7dd8 |
|
|
|
cb7dd8 |
switch(*data)
|
|
|
cb7dd8 |
{
|
|
|
cb7dd8 |
case PT_ANY:
|
|
|
cb7dd8 |
- if (t == XCL_PROP) return !negated;
|
|
|
cb7dd8 |
+ if (isprop) return !negated;
|
|
|
cb7dd8 |
break;
|
|
|
cb7dd8 |
|
|
|
cb7dd8 |
case PT_LAMP:
|
|
|
cb7dd8 |
if ((prop->chartype == ucp_Lu || prop->chartype == ucp_Ll ||
|
|
|
cb7dd8 |
- prop->chartype == ucp_Lt) == (t == XCL_PROP)) return !negated;
|
|
|
cb7dd8 |
+ prop->chartype == ucp_Lt) == isprop) return !negated;
|
|
|
cb7dd8 |
break;
|
|
|
cb7dd8 |
|
|
|
cb7dd8 |
case PT_GC:
|
|
|
cb7dd8 |
- if ((data[1] == PRIV(ucp_gentype)[prop->chartype]) == (t == XCL_PROP))
|
|
|
cb7dd8 |
+ if ((data[1] == PRIV(ucp_gentype)[prop->chartype]) == isprop)
|
|
|
cb7dd8 |
return !negated;
|
|
|
cb7dd8 |
break;
|
|
|
cb7dd8 |
|
|
|
cb7dd8 |
case PT_PC:
|
|
|
cb7dd8 |
- if ((data[1] == prop->chartype) == (t == XCL_PROP)) return !negated;
|
|
|
cb7dd8 |
+ if ((data[1] == prop->chartype) == isprop) return !negated;
|
|
|
cb7dd8 |
break;
|
|
|
cb7dd8 |
|
|
|
cb7dd8 |
case PT_SC:
|
|
|
cb7dd8 |
- if ((data[1] == prop->script) == (t == XCL_PROP)) return !negated;
|
|
|
cb7dd8 |
+ if ((data[1] == prop->script) == isprop) return !negated;
|
|
|
cb7dd8 |
break;
|
|
|
cb7dd8 |
|
|
|
cb7dd8 |
case PT_ALNUM:
|
|
|
cb7dd8 |
if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
|
|
|
cb7dd8 |
- PRIV(ucp_gentype)[prop->chartype] == ucp_N) == (t == XCL_PROP))
|
|
|
cb7dd8 |
+ PRIV(ucp_gentype)[prop->chartype] == ucp_N) == isprop)
|
|
|
cb7dd8 |
return !negated;
|
|
|
cb7dd8 |
break;
|
|
|
cb7dd8 |
|
|
|
cb7dd8 |
case PT_SPACE: /* Perl space */
|
|
|
cb7dd8 |
if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
|
|
|
cb7dd8 |
c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR)
|
|
|
cb7dd8 |
- == (t == XCL_PROP))
|
|
|
cb7dd8 |
+ == isprop)
|
|
|
cb7dd8 |
return !negated;
|
|
|
cb7dd8 |
break;
|
|
|
cb7dd8 |
|
|
|
cb7dd8 |
case PT_PXSPACE: /* POSIX space */
|
|
|
cb7dd8 |
if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
|
|
|
cb7dd8 |
c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
|
|
|
cb7dd8 |
- c == CHAR_FF || c == CHAR_CR) == (t == XCL_PROP))
|
|
|
cb7dd8 |
+ c == CHAR_FF || c == CHAR_CR) == isprop)
|
|
|
cb7dd8 |
return !negated;
|
|
|
cb7dd8 |
break;
|
|
|
cb7dd8 |
|
|
|
cb7dd8 |
case PT_WORD:
|
|
|
cb7dd8 |
if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
|
|
|
cb7dd8 |
PRIV(ucp_gentype)[prop->chartype] == ucp_N || c == CHAR_UNDERSCORE)
|
|
|
cb7dd8 |
- == (t == XCL_PROP))
|
|
|
cb7dd8 |
+ == isprop)
|
|
|
cb7dd8 |
return !negated;
|
|
|
cb7dd8 |
break;
|
|
|
cb7dd8 |
+
|
|
|
cb7dd8 |
+ /* The following three properties can occur only in an XCLASS, as there
|
|
|
cb7dd8 |
+ is no \p or \P coding for them. */
|
|
|
cb7dd8 |
+
|
|
|
cb7dd8 |
+ /* Graphic character. Implement this as not Z (space or separator) and
|
|
|
cb7dd8 |
+ not C (other), except for Cf (format) with a few exceptions. This seems
|
|
|
cb7dd8 |
+ to be what Perl does. The exceptional characters are:
|
|
|
cb7dd8 |
+
|
|
|
cb7dd8 |
+ U+061C Arabic Letter Mark
|
|
|
cb7dd8 |
+ U+180E Mongolian Vowel Separator
|
|
|
cb7dd8 |
+ U+2066 - U+2069 Various "isolate"s
|
|
|
cb7dd8 |
+ */
|
|
|
cb7dd8 |
+
|
|
|
cb7dd8 |
+ case PT_PXGRAPH:
|
|
|
cb7dd8 |
+ if ((PRIV(ucp_gentype)[prop->chartype] != ucp_Z &&
|
|
|
cb7dd8 |
+ (PRIV(ucp_gentype)[prop->chartype] != ucp_C ||
|
|
|
cb7dd8 |
+ (prop->chartype == ucp_Cf &&
|
|
|
cb7dd8 |
+ c != 0x061c && c != 0x180e && (c < 0x2066 || c > 0x2069))
|
|
|
cb7dd8 |
+ )) == isprop)
|
|
|
cb7dd8 |
+ return !negated;
|
|
|
cb7dd8 |
+ break;
|
|
|
cb7dd8 |
+
|
|
|
cb7dd8 |
+ /* Printable character: same as graphic, with the addition of Zs, i.e.
|
|
|
cb7dd8 |
+ not Zl and not Zp, and U+180E. */
|
|
|
cb7dd8 |
+
|
|
|
cb7dd8 |
+ case PT_PXPRINT:
|
|
|
cb7dd8 |
+ if ((prop->chartype != ucp_Zl &&
|
|
|
cb7dd8 |
+ prop->chartype != ucp_Zp &&
|
|
|
cb7dd8 |
+ (PRIV(ucp_gentype)[prop->chartype] != ucp_C ||
|
|
|
cb7dd8 |
+ (prop->chartype == ucp_Cf &&
|
|
|
cb7dd8 |
+ c != 0x061c && (c < 0x2066 || c > 0x2069))
|
|
|
cb7dd8 |
+ )) == isprop)
|
|
|
cb7dd8 |
+ return !negated;
|
|
|
cb7dd8 |
+ break;
|
|
|
cb7dd8 |
+
|
|
|
cb7dd8 |
+ /* Punctuation: all Unicode punctuation, plus ASCII characters that
|
|
|
cb7dd8 |
+ Unicode treats as symbols rather than punctuation, for Perl
|
|
|
cb7dd8 |
+ compatibility (these are $+<=>^`|~). */
|
|
|
cb7dd8 |
+
|
|
|
cb7dd8 |
+ case PT_PXPUNCT:
|
|
|
cb7dd8 |
+ if ((PRIV(ucp_gentype)[prop->chartype] == ucp_P ||
|
|
|
cb7dd8 |
+ (c < 256 && PRIV(ucp_gentype)[prop->chartype] == ucp_S)) == isprop)
|
|
|
cb7dd8 |
+ return !negated;
|
|
|
cb7dd8 |
+ break;
|
|
|
cb7dd8 |
|
|
|
cb7dd8 |
/* This should never occur, but compilers may mutter if there is no
|
|
|
cb7dd8 |
default. */
|
|
|
cb7dd8 |
diff --git a/testdata/testinput6 b/testdata/testinput6
|
|
|
cb7dd8 |
index 219a30e..adafb89 100644
|
|
|
cb7dd8 |
--- a/testdata/testinput6
|
|
|
cb7dd8 |
+++ b/testdata/testinput6
|
|
|
cb7dd8 |
@@ -1319,4 +1319,150 @@
|
|
|
cb7dd8 |
/^s?c/mi8
|
|
|
cb7dd8 |
scat
|
|
|
cb7dd8 |
|
|
|
cb7dd8 |
+/^[[:graph:]]+$/8W
|
|
|
cb7dd8 |
+ Letter:ABC
|
|
|
cb7dd8 |
+ Mark:\x{300}\x{1d172}\x{1d17b}
|
|
|
cb7dd8 |
+ Number:9\x{660}
|
|
|
cb7dd8 |
+ Punctuation:\x{66a},;
|
|
|
cb7dd8 |
+ Symbol:\x{6de}<>\x{fffc}
|
|
|
cb7dd8 |
+ Cf-property:\x{ad}\x{600}\x{601}\x{602}\x{603}\x{604}\x{6dd}\x{70f}
|
|
|
cb7dd8 |
+ \x{200b}\x{200c}\x{200d}\x{200e}\x{200f}
|
|
|
cb7dd8 |
+ \x{202a}\x{202b}\x{202c}\x{202d}\x{202e}
|
|
|
cb7dd8 |
+ \x{2060}\x{2061}\x{2062}\x{2063}\x{2064}
|
|
|
cb7dd8 |
+ \x{206a}\x{206b}\x{206c}\x{206d}\x{206e}\x{206f}
|
|
|
cb7dd8 |
+ \x{feff}
|
|
|
cb7dd8 |
+ \x{fff9}\x{fffa}\x{fffb}
|
|
|
cb7dd8 |
+ \x{110bd}
|
|
|
cb7dd8 |
+ \x{1d173}\x{1d174}\x{1d175}\x{1d176}\x{1d177}\x{1d178}\x{1d179}\x{1d17a}
|
|
|
cb7dd8 |
+ \x{e0001}
|
|
|
cb7dd8 |
+ \x{e0020}\x{e0030}\x{e0040}\x{e0050}\x{e0060}\x{e0070}\x{e007f}
|
|
|
cb7dd8 |
+ ** Failers
|
|
|
cb7dd8 |
+ \x{09}
|
|
|
cb7dd8 |
+ \x{0a}
|
|
|
cb7dd8 |
+ \x{1D}
|
|
|
cb7dd8 |
+ \x{20}
|
|
|
cb7dd8 |
+ \x{85}
|
|
|
cb7dd8 |
+ \x{a0}
|
|
|
cb7dd8 |
+ \x{61c}
|
|
|
cb7dd8 |
+ \x{1680}
|
|
|
cb7dd8 |
+ \x{180e}
|
|
|
cb7dd8 |
+ \x{2028}
|
|
|
cb7dd8 |
+ \x{2029}
|
|
|
cb7dd8 |
+ \x{202f}
|
|
|
cb7dd8 |
+ \x{2065}
|
|
|
cb7dd8 |
+ \x{2066}
|
|
|
cb7dd8 |
+ \x{2067}
|
|
|
cb7dd8 |
+ \x{2068}
|
|
|
cb7dd8 |
+ \x{2069}
|
|
|
cb7dd8 |
+ \x{3000}
|
|
|
cb7dd8 |
+ \x{e0002}
|
|
|
cb7dd8 |
+ \x{e001f}
|
|
|
cb7dd8 |
+ \x{e0080}
|
|
|
cb7dd8 |
+
|
|
|
cb7dd8 |
+/^[[:print:]]+$/8W
|
|
|
cb7dd8 |
+ Space: \x{a0}
|
|
|
cb7dd8 |
+ \x{1680}\x{2000}\x{2001}\x{2002}\x{2003}\x{2004}\x{2005}
|
|
|
cb7dd8 |
+ \x{2006}\x{2007}\x{2008}\x{2009}\x{200a}
|
|
|
cb7dd8 |
+ \x{202f}\x{205f}
|
|
|
cb7dd8 |
+ \x{3000}
|
|
|
cb7dd8 |
+ Letter:ABC
|
|
|
cb7dd8 |
+ Mark:\x{300}\x{1d172}\x{1d17b}
|
|
|
cb7dd8 |
+ Number:9\x{660}
|
|
|
cb7dd8 |
+ Punctuation:\x{66a},;
|
|
|
cb7dd8 |
+ Symbol:\x{6de}<>\x{fffc}
|
|
|
cb7dd8 |
+ Cf-property:\x{ad}\x{600}\x{601}\x{602}\x{603}\x{604}\x{6dd}\x{70f}
|
|
|
cb7dd8 |
+ \x{180e}
|
|
|
cb7dd8 |
+ \x{200b}\x{200c}\x{200d}\x{200e}\x{200f}
|
|
|
cb7dd8 |
+ \x{202a}\x{202b}\x{202c}\x{202d}\x{202e}
|
|
|
cb7dd8 |
+ \x{202f}
|
|
|
cb7dd8 |
+ \x{2060}\x{2061}\x{2062}\x{2063}\x{2064}
|
|
|
cb7dd8 |
+ \x{206a}\x{206b}\x{206c}\x{206d}\x{206e}\x{206f}
|
|
|
cb7dd8 |
+ \x{feff}
|
|
|
cb7dd8 |
+ \x{fff9}\x{fffa}\x{fffb}
|
|
|
cb7dd8 |
+ \x{110bd}
|
|
|
cb7dd8 |
+ \x{1d173}\x{1d174}\x{1d175}\x{1d176}\x{1d177}\x{1d178}\x{1d179}\x{1d17a}
|
|
|
cb7dd8 |
+ \x{e0001}
|
|
|
cb7dd8 |
+ \x{e0020}\x{e0030}\x{e0040}\x{e0050}\x{e0060}\x{e0070}\x{e007f}
|
|
|
cb7dd8 |
+ ** Failers
|
|
|
cb7dd8 |
+ \x{09}
|
|
|
cb7dd8 |
+ \x{1D}
|
|
|
cb7dd8 |
+ \x{85}
|
|
|
cb7dd8 |
+ \x{61c}
|
|
|
cb7dd8 |
+ \x{2028}
|
|
|
cb7dd8 |
+ \x{2029}
|
|
|
cb7dd8 |
+ \x{2065}
|
|
|
cb7dd8 |
+ \x{2066}
|
|
|
cb7dd8 |
+ \x{2067}
|
|
|
cb7dd8 |
+ \x{2068}
|
|
|
cb7dd8 |
+ \x{2069}
|
|
|
cb7dd8 |
+ \x{e0002}
|
|
|
cb7dd8 |
+ \x{e001f}
|
|
|
cb7dd8 |
+ \x{e0080}
|
|
|
cb7dd8 |
+
|
|
|
cb7dd8 |
+/^[[:punct:]]+$/8W
|
|
|
cb7dd8 |
+ \$+<=>^`|~
|
|
|
cb7dd8 |
+ !\"#%&'()*,-./:;?@[\\]_{}
|
|
|
cb7dd8 |
+ \x{a1}\x{a7}
|
|
|
cb7dd8 |
+ \x{37e}
|
|
|
cb7dd8 |
+ ** Failers
|
|
|
cb7dd8 |
+ abcde
|
|
|
cb7dd8 |
+
|
|
|
cb7dd8 |
+/^[[:^graph:]]+$/8W
|
|
|
cb7dd8 |
+ \x{09}\x{0a}\x{1D}\x{20}\x{85}\x{a0}\x{61c}\x{1680}\x{180e}
|
|
|
cb7dd8 |
+ \x{2028}\x{2029}\x{202f}\x{2065}\x{2066}\x{2067}\x{2068}\x{2069}
|
|
|
cb7dd8 |
+ \x{3000}\x{e0002}\x{e001f}\x{e0080}
|
|
|
cb7dd8 |
+ ** Failers
|
|
|
cb7dd8 |
+ Letter:ABC
|
|
|
cb7dd8 |
+ Mark:\x{300}\x{1d172}\x{1d17b}
|
|
|
cb7dd8 |
+ Number:9\x{660}
|
|
|
cb7dd8 |
+ Punctuation:\x{66a},;
|
|
|
cb7dd8 |
+ Symbol:\x{6de}<>\x{fffc}
|
|
|
cb7dd8 |
+ Cf-property:\x{ad}\x{600}\x{601}\x{602}\x{603}\x{604}\x{6dd}\x{70f}
|
|
|
cb7dd8 |
+ \x{200b}\x{200c}\x{200d}\x{200e}\x{200f}
|
|
|
cb7dd8 |
+ \x{202a}\x{202b}\x{202c}\x{202d}\x{202e}
|
|
|
cb7dd8 |
+ \x{2060}\x{2061}\x{2062}\x{2063}\x{2064}
|
|
|
cb7dd8 |
+ \x{206a}\x{206b}\x{206c}\x{206d}\x{206e}\x{206f}
|
|
|
cb7dd8 |
+ \x{feff}
|
|
|
cb7dd8 |
+ \x{fff9}\x{fffa}\x{fffb}
|
|
|
cb7dd8 |
+ \x{110bd}
|
|
|
cb7dd8 |
+ \x{1d173}\x{1d174}\x{1d175}\x{1d176}\x{1d177}\x{1d178}\x{1d179}\x{1d17a}
|
|
|
cb7dd8 |
+ \x{e0001}
|
|
|
cb7dd8 |
+ \x{e0020}\x{e0030}\x{e0040}\x{e0050}\x{e0060}\x{e0070}\x{e007f}
|
|
|
cb7dd8 |
+
|
|
|
cb7dd8 |
+/^[[:^print:]]+$/8W
|
|
|
cb7dd8 |
+ \x{09}\x{1D}\x{85}\x{61c}\x{2028}\x{2029}\x{2065}\x{2066}\x{2067}
|
|
|
cb7dd8 |
+ \x{2068}\x{2069}\x{e0002}\x{e001f}\x{e0080}
|
|
|
cb7dd8 |
+ ** Failers
|
|
|
cb7dd8 |
+ Space: \x{a0}
|
|
|
cb7dd8 |
+ \x{1680}\x{2000}\x{2001}\x{2002}\x{2003}\x{2004}\x{2005}
|
|
|
cb7dd8 |
+ \x{2006}\x{2007}\x{2008}\x{2009}\x{200a}
|
|
|
cb7dd8 |
+ \x{202f}\x{205f}
|
|
|
cb7dd8 |
+ \x{3000}
|
|
|
cb7dd8 |
+ Letter:ABC
|
|
|
cb7dd8 |
+ Mark:\x{300}\x{1d172}\x{1d17b}
|
|
|
cb7dd8 |
+ Number:9\x{660}
|
|
|
cb7dd8 |
+ Punctuation:\x{66a},;
|
|
|
cb7dd8 |
+ Symbol:\x{6de}<>\x{fffc}
|
|
|
cb7dd8 |
+ Cf-property:\x{ad}\x{600}\x{601}\x{602}\x{603}\x{604}\x{6dd}\x{70f}
|
|
|
cb7dd8 |
+ \x{180e}
|
|
|
cb7dd8 |
+ \x{200b}\x{200c}\x{200d}\x{200e}\x{200f}
|
|
|
cb7dd8 |
+ \x{202a}\x{202b}\x{202c}\x{202d}\x{202e}
|
|
|
cb7dd8 |
+ \x{202f}
|
|
|
cb7dd8 |
+ \x{2060}\x{2061}\x{2062}\x{2063}\x{2064}
|
|
|
cb7dd8 |
+ \x{206a}\x{206b}\x{206c}\x{206d}\x{206e}\x{206f}
|
|
|
cb7dd8 |
+ \x{feff}
|
|
|
cb7dd8 |
+ \x{fff9}\x{fffa}\x{fffb}
|
|
|
cb7dd8 |
+ \x{110bd}
|
|
|
cb7dd8 |
+ \x{1d173}\x{1d174}\x{1d175}\x{1d176}\x{1d177}\x{1d178}\x{1d179}\x{1d17a}
|
|
|
cb7dd8 |
+ \x{e0001}
|
|
|
cb7dd8 |
+ \x{e0020}\x{e0030}\x{e0040}\x{e0050}\x{e0060}\x{e0070}\x{e007f}
|
|
|
cb7dd8 |
+
|
|
|
cb7dd8 |
+/^[[:^punct:]]+$/8W
|
|
|
cb7dd8 |
+ abcde
|
|
|
cb7dd8 |
+ ** Failers
|
|
|
cb7dd8 |
+ \$+<=>^`|~
|
|
|
cb7dd8 |
+ !\"#%&'()*,-./:;?@[\\]_{}
|
|
|
cb7dd8 |
+ \x{a1}\x{a7}
|
|
|
cb7dd8 |
+ \x{37e}
|
|
|
cb7dd8 |
+
|
|
|
cb7dd8 |
/-- End of testinput6 --/
|
|
|
cb7dd8 |
diff --git a/testdata/testinput7 b/testdata/testinput7
|
|
|
cb7dd8 |
index 252d246..bcdcef9 100644
|
|
|
cb7dd8 |
--- a/testdata/testinput7
|
|
|
cb7dd8 |
+++ b/testdata/testinput7
|
|
|
cb7dd8 |
@@ -672,4 +672,14 @@ of case for anything other than the ASCII letters. --/
|
|
|
cb7dd8 |
/^s?c/mi8I
|
|
|
cb7dd8 |
scat
|
|
|
cb7dd8 |
|
|
|
cb7dd8 |
+/\D+\X \d+\X \S+\X \s+\X \W+\X \w+\X \C+\X \R+\X \H+\X \h+\X \V+\X \v+\X a+\X \n+\X .+\X/BZx
|
|
|
cb7dd8 |
+
|
|
|
cb7dd8 |
+/.+\X/BZxs
|
|
|
cb7dd8 |
+
|
|
|
cb7dd8 |
+/\X+$/BZxm
|
|
|
cb7dd8 |
+
|
|
|
cb7dd8 |
+/\X+\D \X+\d \X+\S \X+\s \X+\W \X+\w \X+. \X+\C \X+\R \X+\H \X+\h \X+\V \X+\v \X+\X \X+\Z \X+\z \X+$/BZx
|
|
|
cb7dd8 |
+
|
|
|
cb7dd8 |
+/\d+\s{0,5}=\s*\S?=\w{0,4}\W*/8WBZ
|
|
|
cb7dd8 |
+
|
|
|
cb7dd8 |
/-- End of testinput7 --/
|
|
|
cb7dd8 |
diff --git a/testdata/testoutput6 b/testdata/testoutput6
|
|
|
cb7dd8 |
index 090d23f..c426efc 100644
|
|
|
cb7dd8 |
--- a/testdata/testoutput6
|
|
|
cb7dd8 |
+++ b/testdata/testoutput6
|
|
|
cb7dd8 |
@@ -1338,15 +1338,15 @@ No match
|
|
|
cb7dd8 |
|
|
|
cb7dd8 |
/^[[:graph:]]*/8W
|
|
|
cb7dd8 |
A\x{a1}\x{a0}
|
|
|
cb7dd8 |
- 0: A
|
|
|
cb7dd8 |
+ 0: A\x{a1}
|
|
|
cb7dd8 |
|
|
|
cb7dd8 |
/^[[:print:]]*/8W
|
|
|
cb7dd8 |
A z\x{a0}\x{a1}
|
|
|
cb7dd8 |
- 0: A z
|
|
|
cb7dd8 |
+ 0: A z\x{a0}\x{a1}
|
|
|
cb7dd8 |
|
|
|
cb7dd8 |
/^[[:punct:]]*/8W
|
|
|
cb7dd8 |
.+\x{a1}\x{a0}
|
|
|
cb7dd8 |
- 0: .+
|
|
|
cb7dd8 |
+ 0: .+\x{a1}
|
|
|
cb7dd8 |
|
|
|
cb7dd8 |
/\p{Zs}*?\R/
|
|
|
cb7dd8 |
** Failers
|
|
|
cb7dd8 |
@@ -2138,4 +2138,284 @@ No match
|
|
|
cb7dd8 |
scat
|
|
|
cb7dd8 |
0: sc
|
|
|
cb7dd8 |
|
|
|
cb7dd8 |
+/^[[:graph:]]+$/8W
|
|
|
cb7dd8 |
+ Letter:ABC
|
|
|
cb7dd8 |
+ 0: Letter:ABC
|
|
|
cb7dd8 |
+ Mark:\x{300}\x{1d172}\x{1d17b}
|
|
|
cb7dd8 |
+ 0: Mark:\x{300}\x{1d172}\x{1d17b}
|
|
|
cb7dd8 |
+ Number:9\x{660}
|
|
|
cb7dd8 |
+ 0: Number:9\x{660}
|
|
|
cb7dd8 |
+ Punctuation:\x{66a},;
|
|
|
cb7dd8 |
+ 0: Punctuation:\x{66a},;
|
|
|
cb7dd8 |
+ Symbol:\x{6de}<>\x{fffc}
|
|
|
cb7dd8 |
+ 0: Symbol:\x{6de}<>\x{fffc}
|
|
|
cb7dd8 |
+ Cf-property:\x{ad}\x{600}\x{601}\x{602}\x{603}\x{604}\x{6dd}\x{70f}
|
|
|
cb7dd8 |
+ 0: Cf-property:\x{ad}\x{600}\x{601}\x{602}\x{603}\x{604}\x{6dd}\x{70f}
|
|
|
cb7dd8 |
+ \x{200b}\x{200c}\x{200d}\x{200e}\x{200f}
|
|
|
cb7dd8 |
+ 0: \x{200b}\x{200c}\x{200d}\x{200e}\x{200f}
|
|
|
cb7dd8 |
+ \x{202a}\x{202b}\x{202c}\x{202d}\x{202e}
|
|
|
cb7dd8 |
+ 0: \x{202a}\x{202b}\x{202c}\x{202d}\x{202e}
|
|
|
cb7dd8 |
+ \x{2060}\x{2061}\x{2062}\x{2063}\x{2064}
|
|
|
cb7dd8 |
+ 0: \x{2060}\x{2061}\x{2062}\x{2063}\x{2064}
|
|
|
cb7dd8 |
+ \x{206a}\x{206b}\x{206c}\x{206d}\x{206e}\x{206f}
|
|
|
cb7dd8 |
+ 0: \x{206a}\x{206b}\x{206c}\x{206d}\x{206e}\x{206f}
|
|
|
cb7dd8 |
+ \x{feff}
|
|
|
cb7dd8 |
+ 0: \x{feff}
|
|
|
cb7dd8 |
+ \x{fff9}\x{fffa}\x{fffb}
|
|
|
cb7dd8 |
+ 0: \x{fff9}\x{fffa}\x{fffb}
|
|
|
cb7dd8 |
+ \x{110bd}
|
|
|
cb7dd8 |
+ 0: \x{110bd}
|
|
|
cb7dd8 |
+ \x{1d173}\x{1d174}\x{1d175}\x{1d176}\x{1d177}\x{1d178}\x{1d179}\x{1d17a}
|
|
|
cb7dd8 |
+ 0: \x{1d173}\x{1d174}\x{1d175}\x{1d176}\x{1d177}\x{1d178}\x{1d179}\x{1d17a}
|
|
|
cb7dd8 |
+ \x{e0001}
|
|
|
cb7dd8 |
+ 0: \x{e0001}
|
|
|
cb7dd8 |
+ \x{e0020}\x{e0030}\x{e0040}\x{e0050}\x{e0060}\x{e0070}\x{e007f}
|
|
|
cb7dd8 |
+ 0: \x{e0020}\x{e0030}\x{e0040}\x{e0050}\x{e0060}\x{e0070}\x{e007f}
|
|
|
cb7dd8 |
+ ** Failers
|
|
|
cb7dd8 |
+No match
|
|
|
cb7dd8 |
+ \x{09}
|
|
|
cb7dd8 |
+No match
|
|
|
cb7dd8 |
+ \x{0a}
|
|
|
cb7dd8 |
+No match
|
|
|
cb7dd8 |
+ \x{1D}
|
|
|
cb7dd8 |
+No match
|
|
|
cb7dd8 |
+ \x{20}
|
|
|
cb7dd8 |
+No match
|
|
|
cb7dd8 |
+ \x{85}
|
|
|
cb7dd8 |
+No match
|
|
|
cb7dd8 |
+ \x{a0}
|
|
|
cb7dd8 |
+No match
|
|
|
cb7dd8 |
+ \x{61c}
|
|
|
cb7dd8 |
+No match
|
|
|
cb7dd8 |
+ \x{1680}
|
|
|
cb7dd8 |
+No match
|
|
|
cb7dd8 |
+ \x{180e}
|
|
|
cb7dd8 |
+No match
|
|
|
cb7dd8 |
+ \x{2028}
|
|
|
cb7dd8 |
+No match
|
|
|
cb7dd8 |
+ \x{2029}
|
|
|
cb7dd8 |
+No match
|
|
|
cb7dd8 |
+ \x{202f}
|
|
|
cb7dd8 |
+No match
|
|
|
cb7dd8 |
+ \x{2065}
|
|
|
cb7dd8 |
+No match
|
|
|
cb7dd8 |
+ \x{2066}
|
|
|
cb7dd8 |
+No match
|
|
|
cb7dd8 |
+ \x{2067}
|
|
|
cb7dd8 |
+No match
|
|
|
cb7dd8 |
+ \x{2068}
|
|
|
cb7dd8 |
+No match
|
|
|
cb7dd8 |
+ \x{2069}
|
|
|
cb7dd8 |
+No match
|
|
|
cb7dd8 |
+ \x{3000}
|
|
|
cb7dd8 |
+No match
|
|
|
cb7dd8 |
+ \x{e0002}
|
|
|
cb7dd8 |
+No match
|
|
|
cb7dd8 |
+ \x{e001f}
|
|
|
cb7dd8 |
+No match
|
|
|
cb7dd8 |
+ \x{e0080}
|
|
|
cb7dd8 |
+No match
|
|
|
cb7dd8 |
+
|
|
|
cb7dd8 |
+/^[[:print:]]+$/8W
|
|
|
cb7dd8 |
+ Space: \x{a0}
|
|
|
cb7dd8 |
+ 0: Space: \x{a0}
|
|
|
cb7dd8 |
+ \x{1680}\x{2000}\x{2001}\x{2002}\x{2003}\x{2004}\x{2005}
|
|
|
cb7dd8 |
+ 0: \x{1680}\x{2000}\x{2001}\x{2002}\x{2003}\x{2004}\x{2005}
|
|
|
cb7dd8 |
+ \x{2006}\x{2007}\x{2008}\x{2009}\x{200a}
|
|
|
cb7dd8 |
+ 0: \x{2006}\x{2007}\x{2008}\x{2009}\x{200a}
|
|
|
cb7dd8 |
+ \x{202f}\x{205f}
|
|
|
cb7dd8 |
+ 0: \x{202f}\x{205f}
|
|
|
cb7dd8 |
+ \x{3000}
|
|
|
cb7dd8 |
+ 0: \x{3000}
|
|
|
cb7dd8 |
+ Letter:ABC
|
|
|
cb7dd8 |
+ 0: Letter:ABC
|
|
|
cb7dd8 |
+ Mark:\x{300}\x{1d172}\x{1d17b}
|
|
|
cb7dd8 |
+ 0: Mark:\x{300}\x{1d172}\x{1d17b}
|
|
|
cb7dd8 |
+ Number:9\x{660}
|
|
|
cb7dd8 |
+ 0: Number:9\x{660}
|
|
|
cb7dd8 |
+ Punctuation:\x{66a},;
|
|
|
cb7dd8 |
+ 0: Punctuation:\x{66a},;
|
|
|
cb7dd8 |
+ Symbol:\x{6de}<>\x{fffc}
|
|
|
cb7dd8 |
+ 0: Symbol:\x{6de}<>\x{fffc}
|
|
|
cb7dd8 |
+ Cf-property:\x{ad}\x{600}\x{601}\x{602}\x{603}\x{604}\x{6dd}\x{70f}
|
|
|
cb7dd8 |
+ 0: Cf-property:\x{ad}\x{600}\x{601}\x{602}\x{603}\x{604}\x{6dd}\x{70f}
|
|
|
cb7dd8 |
+ \x{180e}
|
|
|
cb7dd8 |
+ 0: \x{180e}
|
|
|
cb7dd8 |
+ \x{200b}\x{200c}\x{200d}\x{200e}\x{200f}
|
|
|
cb7dd8 |
+ 0: \x{200b}\x{200c}\x{200d}\x{200e}\x{200f}
|
|
|
cb7dd8 |
+ \x{202a}\x{202b}\x{202c}\x{202d}\x{202e}
|
|
|
cb7dd8 |
+ 0: \x{202a}\x{202b}\x{202c}\x{202d}\x{202e}
|
|
|
cb7dd8 |
+ \x{202f}
|
|
|
cb7dd8 |
+ 0: \x{202f}
|
|
|
cb7dd8 |
+ \x{2060}\x{2061}\x{2062}\x{2063}\x{2064}
|
|
|
cb7dd8 |
+ 0: \x{2060}\x{2061}\x{2062}\x{2063}\x{2064}
|
|
|
cb7dd8 |
+ \x{206a}\x{206b}\x{206c}\x{206d}\x{206e}\x{206f}
|
|
|
cb7dd8 |
+ 0: \x{206a}\x{206b}\x{206c}\x{206d}\x{206e}\x{206f}
|
|
|
cb7dd8 |
+ \x{feff}
|
|
|
cb7dd8 |
+ 0: \x{feff}
|
|
|
cb7dd8 |
+ \x{fff9}\x{fffa}\x{fffb}
|
|
|
cb7dd8 |
+ 0: \x{fff9}\x{fffa}\x{fffb}
|
|
|
cb7dd8 |
+ \x{110bd}
|
|
|
cb7dd8 |
+ 0: \x{110bd}
|
|
|
cb7dd8 |
+ \x{1d173}\x{1d174}\x{1d175}\x{1d176}\x{1d177}\x{1d178}\x{1d179}\x{1d17a}
|
|
|
cb7dd8 |
+ 0: \x{1d173}\x{1d174}\x{1d175}\x{1d176}\x{1d177}\x{1d178}\x{1d179}\x{1d17a}
|
|
|
cb7dd8 |
+ \x{e0001}
|
|
|
cb7dd8 |
+ 0: \x{e0001}
|
|
|
cb7dd8 |
+ \x{e0020}\x{e0030}\x{e0040}\x{e0050}\x{e0060}\x{e0070}\x{e007f}
|
|
|
cb7dd8 |
+ 0: \x{e0020}\x{e0030}\x{e0040}\x{e0050}\x{e0060}\x{e0070}\x{e007f}
|
|
|
cb7dd8 |
+ ** Failers
|
|
|
cb7dd8 |
+ 0: ** Failers
|
|
|
cb7dd8 |
+ \x{09}
|
|
|
cb7dd8 |
+No match
|
|
|
cb7dd8 |
+ \x{1D}
|
|
|
cb7dd8 |
+No match
|
|
|
cb7dd8 |
+ \x{85}
|
|
|
cb7dd8 |
+No match
|
|
|
cb7dd8 |
+ \x{61c}
|
|
|
cb7dd8 |
+No match
|
|
|
cb7dd8 |
+ \x{2028}
|
|
|
cb7dd8 |
+No match
|
|
|
cb7dd8 |
+ \x{2029}
|
|
|
cb7dd8 |
+No match
|
|
|
cb7dd8 |
+ \x{2065}
|
|
|
cb7dd8 |
+No match
|
|
|
cb7dd8 |
+ \x{2066}
|
|
|
cb7dd8 |
+No match
|
|
|
cb7dd8 |
+ \x{2067}
|
|
|
cb7dd8 |
+No match
|
|
|
cb7dd8 |
+ \x{2068}
|
|
|
cb7dd8 |
+No match
|
|
|
cb7dd8 |
+ \x{2069}
|
|
|
cb7dd8 |
+No match
|
|
|
cb7dd8 |
+ \x{e0002}
|
|
|
cb7dd8 |
+No match
|
|
|
cb7dd8 |
+ \x{e001f}
|
|
|
cb7dd8 |
+No match
|
|
|
cb7dd8 |
+ \x{e0080}
|
|
|
cb7dd8 |
+No match
|
|
|
cb7dd8 |
+
|
|
|
cb7dd8 |
+/^[[:punct:]]+$/8W
|
|
|
cb7dd8 |
+ \$+<=>^`|~
|
|
|
cb7dd8 |
+ 0: $+<=>^`|~
|
|
|
cb7dd8 |
+ !\"#%&'()*,-./:;?@[\\]_{}
|
|
|
cb7dd8 |
+ 0: !"#%&'()*,-./:;?@[\]_{}
|
|
|
cb7dd8 |
+ \x{a1}\x{a7}
|
|
|
cb7dd8 |
+ 0: \x{a1}\x{a7}
|
|
|
cb7dd8 |
+ \x{37e}
|
|
|
cb7dd8 |
+ 0: \x{37e}
|
|
|
cb7dd8 |
+ ** Failers
|
|
|
cb7dd8 |
+No match
|
|
|
cb7dd8 |
+ abcde
|
|
|
cb7dd8 |
+No match
|
|
|
cb7dd8 |
+
|
|
|
cb7dd8 |
+/^[[:^graph:]]+$/8W
|
|
|
cb7dd8 |
+ \x{09}\x{0a}\x{1D}\x{20}\x{85}\x{a0}\x{61c}\x{1680}\x{180e}
|
|
|
cb7dd8 |
+ 0: \x{09}\x{0a}\x{1d} \x{85}\x{a0}\x{61c}\x{1680}\x{180e}
|
|
|
cb7dd8 |
+ \x{2028}\x{2029}\x{202f}\x{2065}\x{2066}\x{2067}\x{2068}\x{2069}
|
|
|
cb7dd8 |
+ 0: \x{2028}\x{2029}\x{202f}\x{2065}\x{2066}\x{2067}\x{2068}\x{2069}
|
|
|
cb7dd8 |
+ \x{3000}\x{e0002}\x{e001f}\x{e0080}
|
|
|
cb7dd8 |
+ 0: \x{3000}\x{e0002}\x{e001f}\x{e0080}
|
|
|
cb7dd8 |
+ ** Failers
|
|
|
cb7dd8 |
+No match
|
|
|
cb7dd8 |
+ Letter:ABC
|
|
|
cb7dd8 |
+No match
|
|
|
cb7dd8 |
+ Mark:\x{300}\x{1d172}\x{1d17b}
|
|
|
cb7dd8 |
+No match
|
|
|
cb7dd8 |
+ Number:9\x{660}
|
|
|
cb7dd8 |
+No match
|
|
|
cb7dd8 |
+ Punctuation:\x{66a},;
|
|
|
cb7dd8 |
+No match
|
|
|
cb7dd8 |
+ Symbol:\x{6de}<>\x{fffc}
|
|
|
cb7dd8 |
+No match
|
|
|
cb7dd8 |
+ Cf-property:\x{ad}\x{600}\x{601}\x{602}\x{603}\x{604}\x{6dd}\x{70f}
|
|
|
cb7dd8 |
+No match
|
|
|
cb7dd8 |
+ \x{200b}\x{200c}\x{200d}\x{200e}\x{200f}
|
|
|
cb7dd8 |
+No match
|
|
|
cb7dd8 |
+ \x{202a}\x{202b}\x{202c}\x{202d}\x{202e}
|
|
|
cb7dd8 |
+No match
|
|
|
cb7dd8 |
+ \x{2060}\x{2061}\x{2062}\x{2063}\x{2064}
|
|
|
cb7dd8 |
+No match
|
|
|
cb7dd8 |
+ \x{206a}\x{206b}\x{206c}\x{206d}\x{206e}\x{206f}
|
|
|
cb7dd8 |
+No match
|
|
|
cb7dd8 |
+ \x{feff}
|
|
|
cb7dd8 |
+No match
|
|
|
cb7dd8 |
+ \x{fff9}\x{fffa}\x{fffb}
|
|
|
cb7dd8 |
+No match
|
|
|
cb7dd8 |
+ \x{110bd}
|
|
|
cb7dd8 |
+No match
|
|
|
cb7dd8 |
+ \x{1d173}\x{1d174}\x{1d175}\x{1d176}\x{1d177}\x{1d178}\x{1d179}\x{1d17a}
|
|
|
cb7dd8 |
+No match
|
|
|
cb7dd8 |
+ \x{e0001}
|
|
|
cb7dd8 |
+No match
|
|
|
cb7dd8 |
+ \x{e0020}\x{e0030}\x{e0040}\x{e0050}\x{e0060}\x{e0070}\x{e007f}
|
|
|
cb7dd8 |
+No match
|
|
|
cb7dd8 |
+
|
|
|
cb7dd8 |
+/^[[:^print:]]+$/8W
|
|
|
cb7dd8 |
+ \x{09}\x{1D}\x{85}\x{61c}\x{2028}\x{2029}\x{2065}\x{2066}\x{2067}
|
|
|
cb7dd8 |
+ 0: \x{09}\x{1d}\x{85}\x{61c}\x{2028}\x{2029}\x{2065}\x{2066}\x{2067}
|
|
|
cb7dd8 |
+ \x{2068}\x{2069}\x{e0002}\x{e001f}\x{e0080}
|
|
|
cb7dd8 |
+ 0: \x{2068}\x{2069}\x{e0002}\x{e001f}\x{e0080}
|
|
|
cb7dd8 |
+ ** Failers
|
|
|
cb7dd8 |
+No match
|
|
|
cb7dd8 |
+ Space: \x{a0}
|
|
|
cb7dd8 |
+No match
|
|
|
cb7dd8 |
+ \x{1680}\x{2000}\x{2001}\x{2002}\x{2003}\x{2004}\x{2005}
|
|
|
cb7dd8 |
+No match
|
|
|
cb7dd8 |
+ \x{2006}\x{2007}\x{2008}\x{2009}\x{200a}
|
|
|
cb7dd8 |
+No match
|
|
|
cb7dd8 |
+ \x{202f}\x{205f}
|
|
|
cb7dd8 |
+No match
|
|
|
cb7dd8 |
+ \x{3000}
|
|
|
cb7dd8 |
+No match
|
|
|
cb7dd8 |
+ Letter:ABC
|
|
|
cb7dd8 |
+No match
|
|
|
cb7dd8 |
+ Mark:\x{300}\x{1d172}\x{1d17b}
|
|
|
cb7dd8 |
+No match
|
|
|
cb7dd8 |
+ Number:9\x{660}
|
|
|
cb7dd8 |
+No match
|
|
|
cb7dd8 |
+ Punctuation:\x{66a},;
|
|
|
cb7dd8 |
+No match
|
|
|
cb7dd8 |
+ Symbol:\x{6de}<>\x{fffc}
|
|
|
cb7dd8 |
+No match
|
|
|
cb7dd8 |
+ Cf-property:\x{ad}\x{600}\x{601}\x{602}\x{603}\x{604}\x{6dd}\x{70f}
|
|
|
cb7dd8 |
+No match
|
|
|
cb7dd8 |
+ \x{180e}
|
|
|
cb7dd8 |
+No match
|
|
|
cb7dd8 |
+ \x{200b}\x{200c}\x{200d}\x{200e}\x{200f}
|
|
|
cb7dd8 |
+No match
|
|
|
cb7dd8 |
+ \x{202a}\x{202b}\x{202c}\x{202d}\x{202e}
|
|
|
cb7dd8 |
+No match
|
|
|
cb7dd8 |
+ \x{202f}
|
|
|
cb7dd8 |
+No match
|
|
|
cb7dd8 |
+ \x{2060}\x{2061}\x{2062}\x{2063}\x{2064}
|
|
|
cb7dd8 |
+No match
|
|
|
cb7dd8 |
+ \x{206a}\x{206b}\x{206c}\x{206d}\x{206e}\x{206f}
|
|
|
cb7dd8 |
+No match
|
|
|
cb7dd8 |
+ \x{feff}
|
|
|
cb7dd8 |
+No match
|
|
|
cb7dd8 |
+ \x{fff9}\x{fffa}\x{fffb}
|
|
|
cb7dd8 |
+No match
|
|
|
cb7dd8 |
+ \x{110bd}
|
|
|
cb7dd8 |
+No match
|
|
|
cb7dd8 |
+ \x{1d173}\x{1d174}\x{1d175}\x{1d176}\x{1d177}\x{1d178}\x{1d179}\x{1d17a}
|
|
|
cb7dd8 |
+No match
|
|
|
cb7dd8 |
+ \x{e0001}
|
|
|
cb7dd8 |
+No match
|
|
|
cb7dd8 |
+ \x{e0020}\x{e0030}\x{e0040}\x{e0050}\x{e0060}\x{e0070}\x{e007f}
|
|
|
cb7dd8 |
+No match
|
|
|
cb7dd8 |
+
|
|
|
cb7dd8 |
+/^[[:^punct:]]+$/8W
|
|
|
cb7dd8 |
+ abcde
|
|
|
cb7dd8 |
+ 0: abcde
|
|
|
cb7dd8 |
+ ** Failers
|
|
|
cb7dd8 |
+No match
|
|
|
cb7dd8 |
+ \$+<=>^`|~
|
|
|
cb7dd8 |
+No match
|
|
|
cb7dd8 |
+ !\"#%&'()*,-./:;?@[\\]_{}
|
|
|
cb7dd8 |
+No match
|
|
|
cb7dd8 |
+ \x{a1}\x{a7}
|
|
|
cb7dd8 |
+No match
|
|
|
cb7dd8 |
+ \x{37e}
|
|
|
cb7dd8 |
+No match
|
|
|
cb7dd8 |
+
|
|
|
cb7dd8 |
/-- End of testinput6 --/
|
|
|
cb7dd8 |
diff --git a/testdata/testoutput7 b/testdata/testoutput7
|
|
|
cb7dd8 |
index 5f0f546..e3f607c 100644
|
|
|
cb7dd8 |
--- a/testdata/testoutput7
|
|
|
cb7dd8 |
+++ b/testdata/testoutput7
|
|
|
cb7dd8 |
@@ -820,7 +820,7 @@ No match
|
|
|
cb7dd8 |
/[[:graph:]]/WBZ
|
|
|
cb7dd8 |
------------------------------------------------------------------
|
|
|
cb7dd8 |
Bra
|
|
|
cb7dd8 |
- [!-~]
|
|
|
cb7dd8 |
+ [[:graph:]]
|
|
|
cb7dd8 |
Ket
|
|
|
cb7dd8 |
End
|
|
|
cb7dd8 |
------------------------------------------------------------------
|
|
|
cb7dd8 |
@@ -828,7 +828,7 @@ No match
|
|
|
cb7dd8 |
/[[:print:]]/WBZ
|
|
|
cb7dd8 |
------------------------------------------------------------------
|
|
|
cb7dd8 |
Bra
|
|
|
cb7dd8 |
- [ -~]
|
|
|
cb7dd8 |
+ [[:print:]]
|
|
|
cb7dd8 |
Ket
|
|
|
cb7dd8 |
End
|
|
|
cb7dd8 |
------------------------------------------------------------------
|
|
|
cb7dd8 |
@@ -836,7 +836,7 @@ No match
|
|
|
cb7dd8 |
/[[:punct:]]/WBZ
|
|
|
cb7dd8 |
------------------------------------------------------------------
|
|
|
cb7dd8 |
Bra
|
|
|
cb7dd8 |
- [!-/:-@[-`{-~]
|
|
|
cb7dd8 |
+ [[:punct:]]
|
|
|
cb7dd8 |
Ket
|
|
|
cb7dd8 |
End
|
|
|
cb7dd8 |
------------------------------------------------------------------
|
|
|
cb7dd8 |
@@ -1478,4 +1478,115 @@ Need char = 'c' (caseless)
|
|
|
cb7dd8 |
scat
|
|
|
cb7dd8 |
0: sc
|
|
|
cb7dd8 |
|
|
|
cb7dd8 |
+/\D+\X \d+\X \S+\X \s+\X \W+\X \w+\X \C+\X \R+\X \H+\X \h+\X \V+\X \v+\X a+\X \n+\X .+\X/BZx
|
|
|
cb7dd8 |
+------------------------------------------------------------------
|
|
|
cb7dd8 |
+ Bra
|
|
|
cb7dd8 |
+ \D+
|
|
|
cb7dd8 |
+ extuni
|
|
|
cb7dd8 |
+ \d+
|
|
|
cb7dd8 |
+ extuni
|
|
|
cb7dd8 |
+ \S+
|
|
|
cb7dd8 |
+ extuni
|
|
|
cb7dd8 |
+ \s+
|
|
|
cb7dd8 |
+ extuni
|
|
|
cb7dd8 |
+ \W+
|
|
|
cb7dd8 |
+ extuni
|
|
|
cb7dd8 |
+ \w+
|
|
|
cb7dd8 |
+ extuni
|
|
|
cb7dd8 |
+ AllAny+
|
|
|
cb7dd8 |
+ extuni
|
|
|
cb7dd8 |
+ \R+
|
|
|
cb7dd8 |
+ extuni
|
|
|
cb7dd8 |
+ \H+
|
|
|
cb7dd8 |
+ extuni
|
|
|
cb7dd8 |
+ \h+
|
|
|
cb7dd8 |
+ extuni
|
|
|
cb7dd8 |
+ \V+
|
|
|
cb7dd8 |
+ extuni
|
|
|
cb7dd8 |
+ \v+
|
|
|
cb7dd8 |
+ extuni
|
|
|
cb7dd8 |
+ a+
|
|
|
cb7dd8 |
+ extuni
|
|
|
cb7dd8 |
+ \x0a+
|
|
|
cb7dd8 |
+ extuni
|
|
|
cb7dd8 |
+ Any+
|
|
|
cb7dd8 |
+ extuni
|
|
|
cb7dd8 |
+ Ket
|
|
|
cb7dd8 |
+ End
|
|
|
cb7dd8 |
+------------------------------------------------------------------
|
|
|
cb7dd8 |
+
|
|
|
cb7dd8 |
+/.+\X/BZxs
|
|
|
cb7dd8 |
+------------------------------------------------------------------
|
|
|
cb7dd8 |
+ Bra
|
|
|
cb7dd8 |
+ AllAny+
|
|
|
cb7dd8 |
+ extuni
|
|
|
cb7dd8 |
+ Ket
|
|
|
cb7dd8 |
+ End
|
|
|
cb7dd8 |
+------------------------------------------------------------------
|
|
|
cb7dd8 |
+
|
|
|
cb7dd8 |
+/\X+$/BZxm
|
|
|
cb7dd8 |
+------------------------------------------------------------------
|
|
|
cb7dd8 |
+ Bra
|
|
|
cb7dd8 |
+ extuni+
|
|
|
cb7dd8 |
+ /m $
|
|
|
cb7dd8 |
+ Ket
|
|
|
cb7dd8 |
+ End
|
|
|
cb7dd8 |
+------------------------------------------------------------------
|
|
|
cb7dd8 |
+
|
|
|
cb7dd8 |
+/\X+\D \X+\d \X+\S \X+\s \X+\W \X+\w \X+. \X+\C \X+\R \X+\H \X+\h \X+\V \X+\v \X+\X \X+\Z \X+\z \X+$/BZx
|
|
|
cb7dd8 |
+------------------------------------------------------------------
|
|
|
cb7dd8 |
+ Bra
|
|
|
cb7dd8 |
+ extuni+
|
|
|
cb7dd8 |
+ \D
|
|
|
cb7dd8 |
+ extuni+
|
|
|
cb7dd8 |
+ \d
|
|
|
cb7dd8 |
+ extuni+
|
|
|
cb7dd8 |
+ \S
|
|
|
cb7dd8 |
+ extuni+
|
|
|
cb7dd8 |
+ \s
|
|
|
cb7dd8 |
+ extuni+
|
|
|
cb7dd8 |
+ \W
|
|
|
cb7dd8 |
+ extuni+
|
|
|
cb7dd8 |
+ \w
|
|
|
cb7dd8 |
+ extuni+
|
|
|
cb7dd8 |
+ Any
|
|
|
cb7dd8 |
+ extuni+
|
|
|
cb7dd8 |
+ AllAny
|
|
|
cb7dd8 |
+ extuni+
|
|
|
cb7dd8 |
+ \R
|
|
|
cb7dd8 |
+ extuni+
|
|
|
cb7dd8 |
+ \H
|
|
|
cb7dd8 |
+ extuni+
|
|
|
cb7dd8 |
+ \h
|
|
|
cb7dd8 |
+ extuni+
|
|
|
cb7dd8 |
+ \V
|
|
|
cb7dd8 |
+ extuni+
|
|
|
cb7dd8 |
+ \v
|
|
|
cb7dd8 |
+ extuni+
|
|
|
cb7dd8 |
+ extuni
|
|
|
cb7dd8 |
+ extuni+
|
|
|
cb7dd8 |
+ \Z
|
|
|
cb7dd8 |
+ extuni+
|
|
|
cb7dd8 |
+ \z
|
|
|
cb7dd8 |
+ extuni+
|
|
|
cb7dd8 |
+ $
|
|
|
cb7dd8 |
+ Ket
|
|
|
cb7dd8 |
+ End
|
|
|
cb7dd8 |
+------------------------------------------------------------------
|
|
|
cb7dd8 |
+
|
|
|
cb7dd8 |
+/\d+\s{0,5}=\s*\S?=\w{0,4}\W*/8WBZ
|
|
|
cb7dd8 |
+------------------------------------------------------------------
|
|
|
cb7dd8 |
+ Bra
|
|
|
cb7dd8 |
+ prop Nd +
|
|
|
cb7dd8 |
+ prop Xsp {0,5}
|
|
|
cb7dd8 |
+ =
|
|
|
cb7dd8 |
+ prop Xsp *
|
|
|
cb7dd8 |
+ notprop Xsp ?
|
|
|
cb7dd8 |
+ =
|
|
|
cb7dd8 |
+ prop Xwd {0,4}
|
|
|
cb7dd8 |
+ notprop Xwd *
|
|
|
cb7dd8 |
+ Ket
|
|
|
cb7dd8 |
+ End
|
|
|
cb7dd8 |
+------------------------------------------------------------------
|
|
|
cb7dd8 |
+
|
|
|
cb7dd8 |
/-- End of testinput7 --/
|
|
|
cb7dd8 |
--
|
|
|
cb7dd8 |
2.7.4
|
|
|
cb7dd8 |
|