An interpreted, interactive, object-oriented programming language
CentOS Sources
2017-08-01 71084d584ff953f5463757ec6536406320560b4d
commit | author | age
f63228 1
CS 2 # HG changeset patch
3 # User Serhiy Storchaka <storchaka@gmail.com>
4 # Date 1382204269 -10800
5 # Node ID 214c0aac7540947d88a38ff0061734547ef86710
6 # Parent  c207ac413457a1b834e4b7dcf1a6836cd6e036e3
7 Issue #19279: UTF-7 decoder no more produces illegal unicode strings.
8
9 diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py
10 --- a/Lib/test/test_codecs.py
11 +++ b/Lib/test/test_codecs.py
12 @@ -611,6 +611,35 @@ class UTF7Test(ReadTest):
13              ]
14          )
15  
16 +    def test_errors(self):
17 +        tests = [
18 +            ('a\xffb', u'a\ufffdb'),
19 +            ('a+IK', u'a\ufffd'),
20 +            ('a+IK-b', u'a\ufffdb'),
21 +            ('a+IK,b', u'a\ufffdb'),
22 +            ('a+IKx', u'a\u20ac\ufffd'),
23 +            ('a+IKx-b', u'a\u20ac\ufffdb'),
24 +            ('a+IKwgr', u'a\u20ac\ufffd'),
25 +            ('a+IKwgr-b', u'a\u20ac\ufffdb'),
26 +            ('a+IKwgr,', u'a\u20ac\ufffd'),
27 +            ('a+IKwgr,-b', u'a\u20ac\ufffd-b'),
28 +            ('a+IKwgrB', u'a\u20ac\u20ac\ufffd'),
29 +            ('a+IKwgrB-b', u'a\u20ac\u20ac\ufffdb'),
30 +            ('a+/,+IKw-b', u'a\ufffd\u20acb'),
31 +            ('a+//,+IKw-b', u'a\ufffd\u20acb'),
32 +            ('a+///,+IKw-b', u'a\uffff\ufffd\u20acb'),
33 +            ('a+////,+IKw-b', u'a\uffff\ufffd\u20acb'),
34 +        ]
35 +        for raw, expected in tests:
36 +            self.assertRaises(UnicodeDecodeError, codecs.utf_7_decode,
37 +                              raw, 'strict', True)
38 +            self.assertEqual(raw.decode('utf-7', 'replace'), expected)
39 +
40 +    def test_nonbmp(self):
41 +        self.assertEqual(u'\U000104A0'.encode(self.encoding), '+2AHcoA-')
42 +        self.assertEqual(u'\ud801\udca0'.encode(self.encoding), '+2AHcoA-')
43 +        self.assertEqual('+2AHcoA-'.decode(self.encoding), u'\U000104A0')
44 +
45  class UTF16ExTest(unittest.TestCase):
46  
47      def test_errors(self):
48 diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
49 --- a/Objects/unicodeobject.c
50 +++ b/Objects/unicodeobject.c
51 @@ -1671,6 +1671,7 @@ PyObject *PyUnicode_DecodeUTF7Stateful(c
52                                         (base64buffer >> (base64bits-16));
53                      base64bits -= 16;
54                      base64buffer &= (1 << base64bits) - 1; /* clear high bits */
55 +                    assert(outCh <= 0xffff);
56                      if (surrogate) {
57                          /* expecting a second surrogate */
58                          if (outCh >= 0xDC00 && outCh <= 0xDFFF) {
59 @@ -1737,6 +1738,7 @@ PyObject *PyUnicode_DecodeUTF7Stateful(c
60                  inShift = 1;
61                  shiftOutStart = p;
62                  base64bits = 0;
63 +                base64buffer = 0;
64              }
65          }
66          else if (DECODE_DIRECT(ch)) { /* character decodes as itself */
67