From b0e29fbdf31bb94b11cb8a7cc830b4a76467afa3 Mon Sep 17 00:00:00 2001 From: William S Fulton Date: Mon, 4 Dec 2017 18:41:55 +0000 Subject: [PATCH] Add missing checks for failures in calls to PyUnicode_AsUTF8String. Previously a seg fault could occur when passing invalid UTF8 strings (low surrogates), eg passing u"\udcff" to the C layer (Python 3). --- CHANGES.current | 8 ++++++- Doc/Manual/Python.html | 22 ++++++++++++++++--- Doc/Manual/Varargs.html | 5 ++++- Examples/python/multimap/example.i | 12 +++++++++- .../python/unicode_strings_runme.py | 10 +++++++++ Examples/test-suite/python_varargs_typemap.i | 5 ++++- Examples/test-suite/unicode_strings.i | 2 ++ Lib/python/pyerrors.swg | 11 ++++++---- Lib/python/pyhead.swg | 16 ++++++++------ Lib/python/pyinit.swg | 4 ++-- Lib/python/pyrun.swg | 10 ++++++--- Lib/python/pystrings.swg | 12 ++++++++-- 12 files changed, 92 insertions(+), 25 deletions(-) #diff --git a/CHANGES.current b/CHANGES.current #index 5cab80172..06b958f18 100644 #--- a/CHANGES.current #+++ b/CHANGES.current #@@ -6,8 +6,14 @@ the issue number to the end of the URL: https://github.com/swig/swig/issues/ # # Version 4.0.0 (in progress) # =========================== #+ #+2017-12-04: wsfulton #+ [Python] Add missing checks for failures in calls to PyUnicode_AsUTF8String. Previously a #+ seg fault could occur when passing invalid UTF8 strings (low surrogates), eg passing #+ u"\udcff" to the C layer (Python 3). #+ # 2017-11-24: joequant #- Fix github #1124 and return R_NilValue for null pointers #+ Fix #1124 and return R_NilValue for null pointers # # 2017-11-29: wsfulton # [Java] director exception handling improvements. #diff --git a/Doc/Manual/Python.html b/Doc/Manual/Python.html #index 0c0023dea..27ce084bd 100644 #--- a/Doc/Manual/Python.html #+++ b/Doc/Manual/Python.html #@@ -6521,14 +6521,16 @@ string that cannot be completely decoded as UTF-8: #
# %module example
# 
#-%include <std_string.i>
#-
# %inline %{
# 
#-const char* non_utf8_c_str(void) {
#+const char * non_utf8_c_str(void) {
#   return "h\xe9llo w\xc3\xb6rld";
# }
# 
#+void instring(const char *s) {
#+  ...
#+}
#+
# %}
# 
# #@@ -6590,6 +6592,20 @@ For more details about the surrogateescape error handler, please see # PEP 383. #

# #+

#+When Python 3 strings are passed to the C/C++ layer, they are expected to be valid UTF8 Unicode strings too. #+For example, when the instring method above is wrapped and called, any invalid UTF8 Unicode code strings #+will result in a TypeError because the attempted conversion fails: #+

#+ #+
#+>>> example.instring('h\xe9llo')
#+>>> example.instring('h\udce9llo')
#+Traceback (most recent call last):
#+  File "<stdin>", line 1, in <module>
#+TypeError: in method 'instring', argument 1 of type 'char const *'
#+
#+ #

# In some cases, users may wish to instead handle all byte strings as bytes # objects in Python 3. This can be accomplished by adding #diff --git a/Doc/Manual/Varargs.html b/Doc/Manual/Varargs.html #index eba816382..014a38cae 100644 #--- a/Doc/Manual/Varargs.html #+++ b/Doc/Manual/Varargs.html #@@ -529,8 +529,11 @@ like this: # SWIG_fail; # } # pystr = PyUnicode_AsUTF8String(pyobj); #+ if (!pystr) { #+ SWIG_fail; #+ } # str = strdup(PyBytes_AsString(pystr)); #- Py_XDECREF(pystr); #+ Py_DECREF(pystr); # %#else # if (!PyString_Check(pyobj)) { # PyErr_SetString(PyExc_ValueError, "Expected a string"); diff --git a/Examples/python/multimap/example.i b/Examples/python/multimap/example.i index 66c0f74c6..3ff5d52c0 100644 --- a/Examples/python/multimap/example.i +++ b/Examples/python/multimap/example.i @@ -39,7 +39,11 @@ extern int gcd(int x, int y); %#if PY_VERSION_HEX >= 0x03000000 { PyObject *utf8str = PyUnicode_AsUTF8String(s); - const char *cstr = PyBytes_AsString(utf8str); + const char *cstr; + if (!utf8str) { + SWIG_fail; + } + cstr = PyBytes_AsString(utf8str); $2[i] = strdup(cstr); Py_DECREF(utf8str); } @@ -72,6 +76,9 @@ extern int gcdmain(int argc, char *argv[]); SWIG_fail; } utf8str = PyUnicode_AsUTF8String($input); + if (!utf8str) { + SWIG_fail; + } PyBytes_AsStringAndSize(utf8str, &cstr, &len); $1 = strncpy((char *)malloc(len+1), cstr, (size_t)len); $2 = (int)len; @@ -105,6 +112,9 @@ extern int count(char *bytes, int len, char c); char *cstr; Py_ssize_t len; PyObject *utf8str = PyUnicode_AsUTF8String($input); + if (!utf8str) { + SWIG_fail; + } PyBytes_AsStringAndSize(utf8str, &cstr, &len); $1 = strncpy((char *)malloc(len+1), cstr, (size_t)len); $2 = (int)len; diff --git a/Examples/test-suite/python/unicode_strings_runme.py b/Examples/test-suite/python/unicode_strings_runme.py index fa9c51437..39e93b0fc 100644 --- a/Examples/test-suite/python/unicode_strings_runme.py +++ b/Examples/test-suite/python/unicode_strings_runme.py @@ -25,3 +25,13 @@ if sys.version_info[0:2] < (3, 0): check(unicode_strings.charstring(unicode("hello4")), "hello4") unicode_strings.charstring(u"hell\xb05") unicode_strings.charstring(u"hell\u00f66") + +low_surrogate_string = u"\udcff" +try: + unicode_strings.instring(low_surrogate_string) + # Will succeed with Python 2 +except TypeError, e: + # Python 3 will fail the PyUnicode_AsUTF8String conversion resulting in a TypeError. + # The real error is actually: + # UnicodeEncodeError: 'utf-8' codec can't encode character '\udcff' in position 0: surrogates not allowed + pass diff --git a/Examples/test-suite/python_varargs_typemap.i b/Examples/test-suite/python_varargs_typemap.i index f05fb98eb..d809bf1fa 100644 --- a/Examples/test-suite/python_varargs_typemap.i +++ b/Examples/test-suite/python_varargs_typemap.i @@ -23,8 +23,11 @@ SWIG_fail; } pystr = PyUnicode_AsUTF8String(pyobj); + if (!pystr) { + SWIG_fail; + } str = strdup(PyBytes_AsString(pystr)); - Py_XDECREF(pystr); + Py_DECREF(pystr); %#else if (!PyString_Check(pyobj)) { PyErr_SetString(PyExc_ValueError, "Expected a string"); diff --git a/Examples/test-suite/unicode_strings.i b/Examples/test-suite/unicode_strings.i index 9be3748e6..e7266266e 100644 --- a/Examples/test-suite/unicode_strings.i +++ b/Examples/test-suite/unicode_strings.i @@ -20,4 +20,6 @@ char *charstring(char *s) { return s; } +void instring(const char *s) { +} %} diff --git a/Lib/python/pyerrors.swg b/Lib/python/pyerrors.swg index fe7313554..463afae15 100644 --- a/Lib/python/pyerrors.swg +++ b/Lib/python/pyerrors.swg @@ -53,14 +53,17 @@ SWIG_Python_AddErrorMsg(const char* mesg) PyObject *value = 0; PyObject *traceback = 0; - if (PyErr_Occurred()) PyErr_Fetch(&type, &value, &traceback); + if (PyErr_Occurred()) + PyErr_Fetch(&type, &value, &traceback); if (value) { - char *tmp; PyObject *old_str = PyObject_Str(value); + const char *tmp = SWIG_Python_str_AsChar(old_str); PyErr_Clear(); Py_XINCREF(type); - - PyErr_Format(type, "%s %s", tmp = SWIG_Python_str_AsChar(old_str), mesg); + if (tmp) + PyErr_Format(type, "%s %s", tmp, mesg); + else + PyErr_Format(type, "%s", mesg); SWIG_Python_str_DelForPy3(tmp); Py_DECREF(old_str); Py_DECREF(value); diff --git a/Lib/python/pyhead.swg b/Lib/python/pyhead.swg index 55eb95a6d..2fa8b5b4c 100644 --- a/Lib/python/pyhead.swg +++ b/Lib/python/pyhead.swg @@ -38,14 +38,16 @@ SWIGINTERN char* SWIG_Python_str_AsChar(PyObject *str) { #if PY_VERSION_HEX >= 0x03000000 - char *cstr; - char *newstr; - Py_ssize_t len; + char *newstr = 0; str = PyUnicode_AsUTF8String(str); - PyBytes_AsStringAndSize(str, &cstr, &len); - newstr = (char *) malloc(len+1); - memcpy(newstr, cstr, len+1); - Py_XDECREF(str); + if (str) { + char *cstr; + Py_ssize_t len; + PyBytes_AsStringAndSize(str, &cstr, &len); + newstr = (char *) malloc(len+1); + memcpy(newstr, cstr, len+1); + Py_XDECREF(str); + } return newstr; #else return PyString_AsString(str); diff --git a/Lib/python/pyinit.swg b/Lib/python/pyinit.swg index fe45ac941..826f8411b 100644 --- a/Lib/python/pyinit.swg +++ b/Lib/python/pyinit.swg @@ -84,10 +84,10 @@ swig_varlink_str(swig_varlinkobject *v) { SWIGINTERN int swig_varlink_print(swig_varlinkobject *v, FILE *fp, int SWIGUNUSEDPARM(flags)) { - char *tmp; PyObject *str = swig_varlink_str(v); + const char *tmp = SWIG_Python_str_AsChar(str); fprintf(fp,"Swig global variables "); - fprintf(fp,"%s\n", tmp = SWIG_Python_str_AsChar(str)); + fprintf(fp,"%s\n", tmp ? tmp : "Invalid global variable"); SWIG_Python_str_DelForPy3(tmp); Py_DECREF(str); return 0; diff --git a/Lib/python/pyrun.swg b/Lib/python/pyrun.swg index efc476613..430d3af18 100644 --- a/Lib/python/pyrun.swg +++ b/Lib/python/pyrun.swg @@ -1672,14 +1672,16 @@ SWIG_Python_AddErrMesg(const char* mesg, int infront) PyObject *traceback = 0; PyErr_Fetch(&type, &value, &traceback); if (value) { - char *tmp; PyObject *old_str = PyObject_Str(value); + const char *tmp = SWIG_Python_str_AsChar(old_str); + if (!tmp) + tmp = "Invalid error message"; Py_XINCREF(type); PyErr_Clear(); if (infront) { - PyErr_Format(type, "%s %s", mesg, tmp = SWIG_Python_str_AsChar(old_str)); + PyErr_Format(type, "%s %s", mesg, tmp); } else { - PyErr_Format(type, "%s %s", tmp = SWIG_Python_str_AsChar(old_str), mesg); + PyErr_Format(type, "%s %s", tmp, mesg); } SWIG_Python_str_DelForPy3(tmp); Py_DECREF(old_str); @@ -1805,6 +1807,8 @@ SWIG_Python_NonDynamicSetAttr(PyObject *obj, PyObject *name, PyObject *value) { Py_INCREF(name); } else { encoded_name = PyUnicode_AsUTF8String(name); + if (!encoded_name) + return -1; } PyErr_Format(PyExc_AttributeError, "'%.100s' object has no attribute '%.200s'", tp->tp_name, PyString_AsString(encoded_name)); Py_DECREF(encoded_name); diff --git a/Lib/python/pystrings.swg b/Lib/python/pystrings.swg index fd37855eb..301e0f3e1 100644 --- a/Lib/python/pystrings.swg +++ b/Lib/python/pystrings.swg @@ -16,6 +16,7 @@ SWIG_AsCharPtrAndSize(PyObject *obj, char** cptr, size_t* psize, int *alloc) %#endif { char *cstr; Py_ssize_t len; + int ret = SWIG_OK; %#if PY_VERSION_HEX>=0x03000000 %#if !defined(SWIG_PYTHON_STRICT_BYTE_CHAR) if (!alloc && cptr) { @@ -26,7 +27,10 @@ SWIG_AsCharPtrAndSize(PyObject *obj, char** cptr, size_t* psize, int *alloc) return SWIG_RuntimeError; } obj = PyUnicode_AsUTF8String(obj); - if(alloc) *alloc = SWIG_NEWOBJ; + if (!obj) + return SWIG_TypeError; + if (alloc) + *alloc = SWIG_NEWOBJ; %#endif PyBytes_AsStringAndSize(obj, &cstr, &len); %#else @@ -64,6 +68,8 @@ SWIG_AsCharPtrAndSize(PyObject *obj, char** cptr, size_t* psize, int *alloc) %#endif %#else *cptr = SWIG_Python_str_AsChar(obj); + if (!*cptr) + ret = SWIG_TypeError; %#endif } } @@ -71,7 +77,7 @@ SWIG_AsCharPtrAndSize(PyObject *obj, char** cptr, size_t* psize, int *alloc) %#if PY_VERSION_HEX>=0x03000000 && !defined(SWIG_PYTHON_STRICT_BYTE_CHAR) Py_XDECREF(obj); %#endif - return SWIG_OK; + return ret; } else { %#if defined(SWIG_PYTHON_2_UNICODE) %#if defined(SWIG_PYTHON_STRICT_BYTE_CHAR) @@ -84,6 +90,8 @@ SWIG_AsCharPtrAndSize(PyObject *obj, char** cptr, size_t* psize, int *alloc) return SWIG_RuntimeError; } obj = PyUnicode_AsUTF8String(obj); + if (!obj) + return SWIG_TypeError; if (PyString_AsStringAndSize(obj, &cstr, &len) != -1) { if (cptr) { if (alloc) *alloc = SWIG_NEWOBJ; -- 2.21.1