An interpreted, interactive, object-oriented programming language
CentOS Sources
2017-08-01 71084d584ff953f5463757ec6536406320560b4d
commit | author | age
f63228 1
CS 2 # HG changeset patch
3 # User Victor Stinner <victor.stinner@gmail.com>
4 # Date 1406673545 -7200
5 # Node ID 263701e0b77e3160bc6a835087f838bd6b24092a
6 # Parent  6c47c6d2033e20e9b35f1d22e0e797961d6e680f
7 Issue #22023: Fix %S, %R and %V formats of PyUnicode_FromFormat().
8
9 diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
10 --- a/Objects/unicodeobject.c
11 +++ b/Objects/unicodeobject.c
12 @@ -690,7 +690,12 @@ makefmt(char *fmt, int longflag, int siz
13      *fmt = '\0';
14  }
15  
16 -#define appendstring(string) {for (copy = string;*copy;) *s++ = *copy++;}
17 +#define appendstring(string) \
18 +    do { \
19 +        for (copy = string;*copy; copy++) { \
20 +            *s++ = (unsigned char)*copy; \
21 +        } \
22 +    } while (0)
23  
24  PyObject *
25  PyUnicode_FromFormatV(const char *format, va_list vargs)
26 @@ -845,7 +850,7 @@ PyUnicode_FromFormatV(const char *format
27                  str = PyObject_Str(obj);
28                  if (!str)
29                      goto fail;
30 -                n += PyUnicode_GET_SIZE(str);
31 +                n += PyString_GET_SIZE(str);
32                  /* Remember the str and switch to the next slot */
33                  *callresult++ = str;
34                  break;
35 @@ -1006,15 +1011,10 @@ PyUnicode_FromFormatV(const char *format
36              case 'S':
37              case 'R':
38              {
39 -                Py_UNICODE *ucopy;
40 -                Py_ssize_t usize;
41 -                Py_ssize_t upos;
42 +                const char *str = PyString_AS_STRING(*callresult);
43                  /* unused, since we already have the result */
44                  (void) va_arg(vargs, PyObject *);
45 -                ucopy = PyUnicode_AS_UNICODE(*callresult);
46 -                usize = PyUnicode_GET_SIZE(*callresult);
47 -                for (upos = 0; upos<usize;)
48 -                    *s++ = ucopy[upos++];
49 +                appendstring(str);
50                  /* We're done with the unicode()/repr() => forget it */
51                  Py_DECREF(*callresult);
52                  /* switch to next unicode()/repr() result */
53
54 diff -up Python-2.7.5/Lib/test/test_unicode.py.uni Python-2.7.5/Lib/test/test_unicode.py
55 --- Python-2.7.5/Lib/test/test_unicode.py.uni    2015-02-24 13:37:01.704739438 +0100
56 +++ Python-2.7.5/Lib/test/test_unicode.py    2015-02-24 13:38:38.439482167 +0100
57 @@ -1633,6 +1633,119 @@ class UnicodeTest(
58          self.assertEqual("%s" % u, u'__unicode__ overridden')
59          self.assertEqual("{}".format(u), '__unicode__ overridden')
60  
61 +    # Test PyUnicode_FromFormat()
62 +    def test_from_format(self):
63 +        test_support.import_module('ctypes')
64 +        from ctypes import (
65 +            pythonapi, py_object, sizeof,
66 +            c_int, c_long, c_longlong, c_ssize_t,
67 +            c_uint, c_ulong, c_ulonglong, c_size_t, c_void_p)
68 +        if sys.maxunicode == 0xffff:
69 +            name = "PyUnicodeUCS2_FromFormat"
70 +        else:
71 +            name = "PyUnicodeUCS4_FromFormat"
72 +        _PyUnicode_FromFormat = getattr(pythonapi, name)
73 +        _PyUnicode_FromFormat.restype = py_object
74 +
75 +        def PyUnicode_FromFormat(format, *args):
76 +            cargs = tuple(
77 +                py_object(arg) if isinstance(arg, unicode) else arg
78 +                for arg in args)
79 +            return _PyUnicode_FromFormat(format, *cargs)
80 +
81 +        def check_format(expected, format, *args):
82 +            text = PyUnicode_FromFormat(format, *args)
83 +            self.assertEqual(expected, text)
84 +
85 +        # ascii format, non-ascii argument
86 +        check_format(u'ascii\x7f=unicode\xe9',
87 +                     b'ascii\x7f=%U', u'unicode\xe9')
88 +
89 +        # non-ascii format, ascii argument: ensure that PyUnicode_FromFormatV()
90 +        # raises an error
91 +        #self.assertRaisesRegex(ValueError,
92 +        #    '^PyUnicode_FromFormatV\(\) expects an ASCII-encoded format '
93 +        #    'string, got a non-ASCII byte: 0xe9$',
94 +        #    PyUnicode_FromFormat, b'unicode\xe9=%s', u'ascii')
95 +
96 +        # test "%c"
97 +        check_format(u'\uabcd',
98 +                     b'%c', c_int(0xabcd))
99 +        if sys.maxunicode > 0xffff:
100 +            check_format(u'\U0010ffff',
101 +                         b'%c', c_int(0x10ffff))
102 +        with self.assertRaises(OverflowError):
103 +            PyUnicode_FromFormat(b'%c', c_int(0x110000))
104 +        # Issue #18183
105 +        if sys.maxunicode > 0xffff:
106 +            check_format(u'\U00010000\U00100000',
107 +                         b'%c%c', c_int(0x10000), c_int(0x100000))
108 +
109 +        # test "%"
110 +        check_format(u'%',
111 +                     b'%')
112 +        check_format(u'%',
113 +                     b'%%')
114 +        check_format(u'%s',
115 +                     b'%%s')
116 +        check_format(u'[%]',
117 +                     b'[%%]')
118 +        check_format(u'%abc',
119 +                     b'%%%s', b'abc')
120 +
121 +        # test %S
122 +        check_format(u"repr=abc",
123 +                     b'repr=%S', u'abc')
124 +
125 +        # test %R
126 +        check_format(u"repr=u'abc'",
127 +                     b'repr=%R', u'abc')
128 +
129 +        # test integer formats (%i, %d, %u)
130 +        check_format(u'010',
131 +                     b'%03i', c_int(10))
132 +        check_format(u'0010',
133 +                     b'%0.4i', c_int(10))
134 +        check_format(u'-123',
135 +                     b'%i', c_int(-123))
136 +
137 +        check_format(u'-123',
138 +                     b'%d', c_int(-123))
139 +        check_format(u'-123',
140 +                     b'%ld', c_long(-123))
141 +        check_format(u'-123',
142 +                     b'%zd', c_ssize_t(-123))
143 +
144 +        check_format(u'123',
145 +                     b'%u', c_uint(123))
146 +        check_format(u'123',
147 +                     b'%lu', c_ulong(123))
148 +        check_format(u'123',
149 +                     b'%zu', c_size_t(123))
150 +
151 +        # test long output
152 +        PyUnicode_FromFormat(b'%p', c_void_p(-1))
153 +
154 +        # test %V
155 +        check_format(u'repr=abc',
156 +                     b'repr=%V', u'abc', b'xyz')
157 +        check_format(u'repr=\xe4\xba\xba\xe6\xb0\x91',
158 +                     b'repr=%V', None, b'\xe4\xba\xba\xe6\xb0\x91')
159 +        check_format(u'repr=abc\xff',
160 +                     b'repr=%V', None, b'abc\xff')
161 +
162 +        # not supported: copy the raw format string. these tests are just here
163 +        # to check for crashs and should not be considered as specifications
164 +        check_format(u'%s',
165 +                     b'%1%s', b'abc')
166 +        check_format(u'%1abc',
167 +                     b'%1abc')
168 +        check_format(u'%+i',
169 +                     b'%+i', c_int(10))
170 +        check_format(u'%s',
171 +                     b'%.%s', b'abc')
172 +
173 +
174      def test_encode_decimal(self):
175          from _testcapi import unicode_encodedecimal
176          self.assertEqual(unicode_encodedecimal(u'123'),