|
|
ae2451 |
diff --git a/Lib/sre_compile.py b/Lib/sre_compile.py
|
|
|
ae2451 |
index 7cda2b6..15d2324 100644
|
|
|
ae2451 |
--- a/Lib/sre_compile.py
|
|
|
ae2451 |
+++ b/Lib/sre_compile.py
|
|
|
ae2451 |
@@ -355,8 +355,6 @@ def _optimize_unicode(charset, fixup):
|
|
|
ae2451 |
def _simple(av):
|
|
|
ae2451 |
# check if av is a "simple" operator
|
|
|
ae2451 |
lo, hi = av[2].getwidth()
|
|
|
ae2451 |
- if lo == 0 and hi == MAXREPEAT:
|
|
|
ae2451 |
- raise error, "nothing to repeat"
|
|
|
ae2451 |
return lo == hi == 1 and av[2][0][0] != SUBPATTERN
|
|
|
ae2451 |
|
|
|
ae2451 |
def _compile_info(code, pattern, flags):
|
|
|
ae2451 |
diff --git a/Lib/sre_parse.py b/Lib/sre_parse.py
|
|
|
ae2451 |
index 75f8c96..644441d 100644
|
|
|
ae2451 |
--- a/Lib/sre_parse.py
|
|
|
ae2451 |
+++ b/Lib/sre_parse.py
|
|
|
ae2451 |
@@ -147,7 +147,7 @@ class SubPattern:
|
|
|
ae2451 |
REPEATCODES = (MIN_REPEAT, MAX_REPEAT)
|
|
|
ae2451 |
for op, av in self.data:
|
|
|
ae2451 |
if op is BRANCH:
|
|
|
ae2451 |
- i = sys.maxint
|
|
|
ae2451 |
+ i = MAXREPEAT - 1
|
|
|
ae2451 |
j = 0
|
|
|
ae2451 |
for av in av[1]:
|
|
|
ae2451 |
l, h = av.getwidth()
|
|
|
ae2451 |
@@ -165,14 +165,14 @@ class SubPattern:
|
|
|
ae2451 |
hi = hi + j
|
|
|
ae2451 |
elif op in REPEATCODES:
|
|
|
ae2451 |
i, j = av[2].getwidth()
|
|
|
ae2451 |
- lo = lo + long(i) * av[0]
|
|
|
ae2451 |
- hi = hi + long(j) * av[1]
|
|
|
ae2451 |
+ lo = lo + i * av[0]
|
|
|
ae2451 |
+ hi = hi + j * av[1]
|
|
|
ae2451 |
elif op in UNITCODES:
|
|
|
ae2451 |
lo = lo + 1
|
|
|
ae2451 |
hi = hi + 1
|
|
|
ae2451 |
elif op == SUCCESS:
|
|
|
ae2451 |
break
|
|
|
ae2451 |
- self.width = int(min(lo, sys.maxint)), int(min(hi, sys.maxint))
|
|
|
ae2451 |
+ self.width = min(lo, MAXREPEAT - 1), min(hi, MAXREPEAT)
|
|
|
ae2451 |
return self.width
|
|
|
ae2451 |
|
|
|
ae2451 |
class Tokenizer:
|
|
|
ae2451 |
diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py
|
|
|
ae2451 |
index 18a81a2..f0827d8 100644
|
|
|
ae2451 |
--- a/Lib/test/test_re.py
|
|
|
ae2451 |
+++ b/Lib/test/test_re.py
|
|
|
ae2451 |
@@ -897,6 +897,17 @@ class ReTests(unittest.TestCase):
|
|
|
ae2451 |
with self.assertRaisesRegexp(sre_constants.error, '\?foo'):
|
|
|
ae2451 |
re.compile('(?P)')
|
|
|
ae2451 |
|
|
|
ae2451 |
+ def test_bug_2537(self):
|
|
|
ae2451 |
+ # issue 2537: empty submatches
|
|
|
ae2451 |
+ for outer_op in ('{0,}', '*', '+', '{1,187}'):
|
|
|
ae2451 |
+ for inner_op in ('{0,}', '*', '?'):
|
|
|
ae2451 |
+ r = re.compile("^((x|y)%s)%s" % (inner_op, outer_op))
|
|
|
ae2451 |
+ m = r.match("xyyzy")
|
|
|
ae2451 |
+ self.assertEqual(m.group(0), "xyy")
|
|
|
ae2451 |
+ self.assertEqual(m.group(1), "")
|
|
|
ae2451 |
+ self.assertEqual(m.group(2), "y")
|
|
|
ae2451 |
+
|
|
|
ae2451 |
+
|
|
|
ae2451 |
|
|
|
ae2451 |
def run_re_tests():
|
|
|
ae2451 |
from test.re_tests import tests, SUCCEED, FAIL, SYNTAX_ERROR
|
|
|
ae2451 |
diff --git a/Lib/doctest.py b/Lib/doctest.py
|
|
|
ae2451 |
index 90bcca1..0ee40a2 100644
|
|
|
ae2451 |
--- a/Lib/doctest.py
|
|
|
ae2451 |
+++ b/Lib/doctest.py
|
|
|
ae2451 |
@@ -564,7 +564,7 @@ class DocTestParser:
|
|
|
ae2451 |
# Want consists of any non-blank lines that do not start with PS1.
|
|
|
ae2451 |
(?P<want> (?:(?![ ]*$) # Not a blank line
|
|
|
ae2451 |
(?![ ]*>>>) # Not a line starting with PS1
|
|
|
ae2451 |
- .*$\n? # But any other line
|
|
|
ae2451 |
+ .+$\n? # But any other line
|
|
|
ae2451 |
)*)
|
|
|
ae2451 |
''', re.MULTILINE | re.VERBOSE)
|
|
|
ae2451 |
|
|
|
ae2451 |
|