819d24
From b0e29fbdf31bb94b11cb8a7cc830b4a76467afa3 Mon Sep 17 00:00:00 2001
819d24
From: William S Fulton <wsf@fultondesigns.co.uk>
819d24
Date: Mon, 4 Dec 2017 18:41:55 +0000
819d24
Subject: [PATCH] Add missing checks for failures in calls to
819d24
 PyUnicode_AsUTF8String.
819d24
819d24
Previously a seg fault could occur when passing invalid UTF8 strings (low
819d24
surrogates), eg passing u"\udcff" to the C layer (Python 3).
819d24
---
819d24
 CHANGES.current                               |  8 ++++++-
819d24
 Doc/Manual/Python.html                        | 22 ++++++++++++++++---
819d24
 Doc/Manual/Varargs.html                       |  5 ++++-
819d24
 Examples/python/multimap/example.i            | 12 +++++++++-
819d24
 .../python/unicode_strings_runme.py           | 10 +++++++++
819d24
 Examples/test-suite/python_varargs_typemap.i  |  5 ++++-
819d24
 Examples/test-suite/unicode_strings.i         |  2 ++
819d24
 Lib/python/pyerrors.swg                       | 11 ++++++----
819d24
 Lib/python/pyhead.swg                         | 16 ++++++++------
819d24
 Lib/python/pyinit.swg                         |  4 ++--
819d24
 Lib/python/pyrun.swg                          | 10 ++++++---
819d24
 Lib/python/pystrings.swg                      | 12 ++++++++--
819d24
 12 files changed, 92 insertions(+), 25 deletions(-)
819d24
819d24
#diff --git a/CHANGES.current b/CHANGES.current
819d24
#index 5cab80172..06b958f18 100644
819d24
#--- a/CHANGES.current
819d24
#+++ b/CHANGES.current
819d24
#@@ -6,8 +6,14 @@ the issue number to the end of the URL: https://github.com/swig/swig/issues/
819d24
# 
819d24
# Version 4.0.0 (in progress)
819d24
# ===========================
819d24
#+
819d24
#+2017-12-04: wsfulton
819d24
#+            [Python] Add missing checks for failures in calls to PyUnicode_AsUTF8String. Previously a 
819d24
#+            seg fault could occur when passing invalid UTF8 strings (low surrogates), eg passing
819d24
#+            u"\udcff" to the C layer (Python 3).
819d24
#+
819d24
# 2017-11-24: joequant
819d24
#-	    Fix github #1124 and return R_NilValue for null pointers
819d24
#+	    Fix #1124 and return R_NilValue for null pointers
819d24
# 
819d24
# 2017-11-29: wsfulton
819d24
#             [Java] director exception handling improvements.
819d24
#diff --git a/Doc/Manual/Python.html b/Doc/Manual/Python.html
819d24
#index 0c0023dea..27ce084bd 100644
819d24
#--- a/Doc/Manual/Python.html
819d24
#+++ b/Doc/Manual/Python.html
819d24
#@@ -6521,14 +6521,16 @@ string that cannot be completely decoded as UTF-8:
819d24
# 
819d24
# %module example
819d24
# 
819d24
#-%include <std_string.i>
819d24
#-
819d24
# %inline %{
819d24
# 
819d24
#-const char* non_utf8_c_str(void) {
819d24
#+const char * non_utf8_c_str(void) {
819d24
#   return "h\xe9llo w\xc3\xb6rld";
819d24
# }
819d24
# 
819d24
#+void instring(const char *s) {
819d24
#+  ...
819d24
#+}
819d24
#+
819d24
# %}
819d24
# 
819d24
# 
819d24
#@@ -6590,6 +6592,20 @@ For more details about the <tt>surrogateescape</tt> error handler, please see
819d24
# PEP 383.
819d24
# 

819d24
# 
819d24
#+

819d24
#+When Python 3 strings are passed to the C/C++ layer, they are expected to be valid UTF8 Unicode strings too.
819d24
#+For example, when the <tt>instring</tt> method above is wrapped and called, any invalid UTF8 Unicode code strings
819d24
#+will result in a TypeError because the attempted conversion fails:
819d24
#+

819d24
#+
819d24
#+
819d24
#+>>> example.instring('h\xe9llo')
819d24
#+>>> example.instring('h\udce9llo')
819d24
#+Traceback (most recent call last):
819d24
#+  File "<stdin>", line 1, in <module>
819d24
#+TypeError: in method 'instring', argument 1 of type 'char const *'
819d24
#+
819d24
#+
819d24
# 

819d24
# In some cases, users may wish to instead handle all byte strings as bytes
819d24
# objects in Python 3. This can be accomplished by adding
819d24
#diff --git a/Doc/Manual/Varargs.html b/Doc/Manual/Varargs.html
819d24
#index eba816382..014a38cae 100644
819d24
#--- a/Doc/Manual/Varargs.html
819d24
#+++ b/Doc/Manual/Varargs.html
819d24
#@@ -529,8 +529,11 @@ like this:
819d24
#       SWIG_fail;
819d24
#     }
819d24
#     pystr = PyUnicode_AsUTF8String(pyobj);
819d24
#+    if (!pystr) {
819d24
#+      SWIG_fail;
819d24
#+    }
819d24
#     str = strdup(PyBytes_AsString(pystr));
819d24
#-    Py_XDECREF(pystr);
819d24
#+    Py_DECREF(pystr);
819d24
# %#else  
819d24
#     if (!PyString_Check(pyobj)) {
819d24
#       PyErr_SetString(PyExc_ValueError, "Expected a string");
819d24
diff --git a/Examples/python/multimap/example.i b/Examples/python/multimap/example.i
819d24
index 66c0f74c6..3ff5d52c0 100644
819d24
--- a/Examples/python/multimap/example.i
819d24
+++ b/Examples/python/multimap/example.i
819d24
@@ -39,7 +39,11 @@ extern int    gcd(int x, int y);
819d24
 %#if PY_VERSION_HEX >= 0x03000000
819d24
     {
819d24
       PyObject *utf8str = PyUnicode_AsUTF8String(s);
819d24
-      const char *cstr = PyBytes_AsString(utf8str);
819d24
+      const char *cstr;
819d24
+      if (!utf8str) {
819d24
+        SWIG_fail;
819d24
+      }
819d24
+      cstr = PyBytes_AsString(utf8str);
819d24
       $2[i] = strdup(cstr);
819d24
       Py_DECREF(utf8str);
819d24
     }
819d24
@@ -72,6 +76,9 @@ extern int gcdmain(int argc, char *argv[]);
819d24
     SWIG_fail;
819d24
   }
819d24
   utf8str = PyUnicode_AsUTF8String($input);
819d24
+  if (!utf8str) {
819d24
+    SWIG_fail;
819d24
+  }
819d24
   PyBytes_AsStringAndSize(utf8str, &cstr, &len;;
819d24
   $1 = strncpy((char *)malloc(len+1), cstr, (size_t)len);
819d24
   $2 = (int)len;
819d24
@@ -105,6 +112,9 @@ extern int count(char *bytes, int len, char c);
819d24
   char *cstr;
819d24
   Py_ssize_t len;
819d24
   PyObject *utf8str = PyUnicode_AsUTF8String($input);
819d24
+  if (!utf8str) {
819d24
+    SWIG_fail;
819d24
+  }
819d24
   PyBytes_AsStringAndSize(utf8str, &cstr, &len;;
819d24
   $1 = strncpy((char *)malloc(len+1), cstr, (size_t)len);
819d24
   $2 = (int)len;
819d24
diff --git a/Examples/test-suite/python/unicode_strings_runme.py b/Examples/test-suite/python/unicode_strings_runme.py
819d24
index fa9c51437..39e93b0fc 100644
819d24
--- a/Examples/test-suite/python/unicode_strings_runme.py
819d24
+++ b/Examples/test-suite/python/unicode_strings_runme.py
819d24
@@ -25,3 +25,13 @@ if sys.version_info[0:2] < (3, 0):
819d24
     check(unicode_strings.charstring(unicode("hello4")), "hello4")
819d24
     unicode_strings.charstring(u"hell\xb05")
819d24
     unicode_strings.charstring(u"hell\u00f66")
819d24
+
819d24
+low_surrogate_string = u"\udcff"
819d24
+try:
819d24
+    unicode_strings.instring(low_surrogate_string)
819d24
+    # Will succeed with Python 2
819d24
+except TypeError, e:
819d24
+    # Python 3 will fail the PyUnicode_AsUTF8String conversion resulting in a TypeError.
819d24
+    # The real error is actually:
819d24
+    # UnicodeEncodeError: 'utf-8' codec can't encode character '\udcff' in position 0: surrogates not allowed
819d24
+    pass
819d24
diff --git a/Examples/test-suite/python_varargs_typemap.i b/Examples/test-suite/python_varargs_typemap.i
819d24
index f05fb98eb..d809bf1fa 100644
819d24
--- a/Examples/test-suite/python_varargs_typemap.i
819d24
+++ b/Examples/test-suite/python_varargs_typemap.i
819d24
@@ -23,8 +23,11 @@
819d24
        SWIG_fail;
819d24
     }
819d24
     pystr = PyUnicode_AsUTF8String(pyobj);
819d24
+    if (!pystr) {
819d24
+      SWIG_fail;
819d24
+    }
819d24
     str = strdup(PyBytes_AsString(pystr));
819d24
-    Py_XDECREF(pystr);
819d24
+    Py_DECREF(pystr);
819d24
 %#else  
819d24
     if (!PyString_Check(pyobj)) {
819d24
        PyErr_SetString(PyExc_ValueError, "Expected a string");
819d24
diff --git a/Examples/test-suite/unicode_strings.i b/Examples/test-suite/unicode_strings.i
819d24
index 9be3748e6..e7266266e 100644
819d24
--- a/Examples/test-suite/unicode_strings.i
819d24
+++ b/Examples/test-suite/unicode_strings.i
819d24
@@ -20,4 +20,6 @@ char *charstring(char *s) {
819d24
   return s;
819d24
 }
819d24
 
819d24
+void instring(const char *s) {
819d24
+}
819d24
 %}
819d24
diff --git a/Lib/python/pyerrors.swg b/Lib/python/pyerrors.swg
819d24
index fe7313554..463afae15 100644
819d24
--- a/Lib/python/pyerrors.swg
819d24
+++ b/Lib/python/pyerrors.swg
819d24
@@ -53,14 +53,17 @@ SWIG_Python_AddErrorMsg(const char* mesg)
819d24
   PyObject *value = 0;
819d24
   PyObject *traceback = 0;
819d24
 
819d24
-  if (PyErr_Occurred()) PyErr_Fetch(&type, &value, &traceback);
819d24
+  if (PyErr_Occurred())
819d24
+    PyErr_Fetch(&type, &value, &traceback);
819d24
   if (value) {
819d24
-    char *tmp;
819d24
     PyObject *old_str = PyObject_Str(value);
819d24
+    const char *tmp = SWIG_Python_str_AsChar(old_str);
819d24
     PyErr_Clear();
819d24
     Py_XINCREF(type);
819d24
-
819d24
-    PyErr_Format(type, "%s %s", tmp = SWIG_Python_str_AsChar(old_str), mesg);
819d24
+    if (tmp)
819d24
+      PyErr_Format(type, "%s %s", tmp, mesg);
819d24
+    else
819d24
+      PyErr_Format(type, "%s", mesg);
819d24
     SWIG_Python_str_DelForPy3(tmp);
819d24
     Py_DECREF(old_str);
819d24
     Py_DECREF(value);
819d24
diff --git a/Lib/python/pyhead.swg b/Lib/python/pyhead.swg
819d24
index 55eb95a6d..2fa8b5b4c 100644
819d24
--- a/Lib/python/pyhead.swg
819d24
+++ b/Lib/python/pyhead.swg
819d24
@@ -38,14 +38,16 @@ SWIGINTERN char*
819d24
 SWIG_Python_str_AsChar(PyObject *str)
819d24
 {
819d24
 #if PY_VERSION_HEX >= 0x03000000
819d24
-  char *cstr;
819d24
-  char *newstr;
819d24
-  Py_ssize_t len;
819d24
+  char *newstr = 0;
819d24
   str = PyUnicode_AsUTF8String(str);
819d24
-  PyBytes_AsStringAndSize(str, &cstr, &len;;
819d24
-  newstr = (char *) malloc(len+1);
819d24
-  memcpy(newstr, cstr, len+1);
819d24
-  Py_XDECREF(str);
819d24
+  if (str) {
819d24
+    char *cstr;
819d24
+    Py_ssize_t len;
819d24
+    PyBytes_AsStringAndSize(str, &cstr, &len;;
819d24
+    newstr = (char *) malloc(len+1);
819d24
+    memcpy(newstr, cstr, len+1);
819d24
+    Py_XDECREF(str);
819d24
+  }
819d24
   return newstr;
819d24
 #else
819d24
   return PyString_AsString(str);
819d24
diff --git a/Lib/python/pyinit.swg b/Lib/python/pyinit.swg
819d24
index fe45ac941..826f8411b 100644
819d24
--- a/Lib/python/pyinit.swg
819d24
+++ b/Lib/python/pyinit.swg
819d24
@@ -84,10 +84,10 @@ swig_varlink_str(swig_varlinkobject *v) {
819d24
 
819d24
 SWIGINTERN int
819d24
 swig_varlink_print(swig_varlinkobject *v, FILE *fp, int SWIGUNUSEDPARM(flags)) {
819d24
-  char *tmp;
819d24
   PyObject *str = swig_varlink_str(v);
819d24
+  const char *tmp = SWIG_Python_str_AsChar(str);
819d24
   fprintf(fp,"Swig global variables ");
819d24
-  fprintf(fp,"%s\n", tmp = SWIG_Python_str_AsChar(str));
819d24
+  fprintf(fp,"%s\n", tmp ? tmp : "Invalid global variable");
819d24
   SWIG_Python_str_DelForPy3(tmp);
819d24
   Py_DECREF(str);
819d24
   return 0;
819d24
diff --git a/Lib/python/pyrun.swg b/Lib/python/pyrun.swg
819d24
index efc476613..430d3af18 100644
819d24
--- a/Lib/python/pyrun.swg
819d24
+++ b/Lib/python/pyrun.swg
819d24
@@ -1672,14 +1672,16 @@ SWIG_Python_AddErrMesg(const char* mesg, int infront)
819d24
     PyObject *traceback = 0;
819d24
     PyErr_Fetch(&type, &value, &traceback);
819d24
     if (value) {
819d24
-      char *tmp;
819d24
       PyObject *old_str = PyObject_Str(value);
819d24
+      const char *tmp = SWIG_Python_str_AsChar(old_str);
819d24
+      if (!tmp)
819d24
+        tmp = "Invalid error message";
819d24
       Py_XINCREF(type);
819d24
       PyErr_Clear();
819d24
       if (infront) {
819d24
-	PyErr_Format(type, "%s %s", mesg, tmp = SWIG_Python_str_AsChar(old_str));
819d24
+	PyErr_Format(type, "%s %s", mesg, tmp);
819d24
       } else {
819d24
-	PyErr_Format(type, "%s %s", tmp = SWIG_Python_str_AsChar(old_str), mesg);
819d24
+	PyErr_Format(type, "%s %s", tmp, mesg);
819d24
       }
819d24
       SWIG_Python_str_DelForPy3(tmp);
819d24
       Py_DECREF(old_str);
819d24
@@ -1805,6 +1807,8 @@ SWIG_Python_NonDynamicSetAttr(PyObject *obj, PyObject *name, PyObject *value) {
819d24
       Py_INCREF(name);
819d24
     } else {
819d24
       encoded_name = PyUnicode_AsUTF8String(name);
819d24
+      if (!encoded_name)
819d24
+        return -1;
819d24
     }
819d24
     PyErr_Format(PyExc_AttributeError, "'%.100s' object has no attribute '%.200s'", tp->tp_name, PyString_AsString(encoded_name));
819d24
     Py_DECREF(encoded_name);
819d24
diff --git a/Lib/python/pystrings.swg b/Lib/python/pystrings.swg
819d24
index fd37855eb..301e0f3e1 100644
819d24
--- a/Lib/python/pystrings.swg
819d24
+++ b/Lib/python/pystrings.swg
819d24
@@ -16,6 +16,7 @@ SWIG_AsCharPtrAndSize(PyObject *obj, char** cptr, size_t* psize, int *alloc)
819d24
 %#endif
819d24
   {
819d24
     char *cstr; Py_ssize_t len;
819d24
+    int ret = SWIG_OK;
819d24
 %#if PY_VERSION_HEX>=0x03000000
819d24
 %#if !defined(SWIG_PYTHON_STRICT_BYTE_CHAR)
819d24
     if (!alloc && cptr) {
819d24
@@ -26,7 +27,10 @@ SWIG_AsCharPtrAndSize(PyObject *obj, char** cptr, size_t* psize, int *alloc)
819d24
         return SWIG_RuntimeError;
819d24
     }
819d24
     obj = PyUnicode_AsUTF8String(obj);
819d24
-    if(alloc) *alloc = SWIG_NEWOBJ;
819d24
+    if (!obj)
819d24
+      return SWIG_TypeError;
819d24
+    if (alloc)
819d24
+      *alloc = SWIG_NEWOBJ;
819d24
 %#endif
819d24
     PyBytes_AsStringAndSize(obj, &cstr, &len;;
819d24
 %#else
819d24
@@ -64,6 +68,8 @@ SWIG_AsCharPtrAndSize(PyObject *obj, char** cptr, size_t* psize, int *alloc)
819d24
 %#endif
819d24
 %#else
819d24
 	*cptr = SWIG_Python_str_AsChar(obj);
819d24
+        if (!*cptr)
819d24
+          ret = SWIG_TypeError;
819d24
 %#endif
819d24
       }
819d24
     }
819d24
@@ -71,7 +77,7 @@ SWIG_AsCharPtrAndSize(PyObject *obj, char** cptr, size_t* psize, int *alloc)
819d24
 %#if PY_VERSION_HEX>=0x03000000 && !defined(SWIG_PYTHON_STRICT_BYTE_CHAR)
819d24
     Py_XDECREF(obj);
819d24
 %#endif
819d24
-    return SWIG_OK;
819d24
+    return ret;
819d24
   } else {
819d24
 %#if defined(SWIG_PYTHON_2_UNICODE)
819d24
 %#if defined(SWIG_PYTHON_STRICT_BYTE_CHAR)
819d24
@@ -84,6 +90,8 @@ SWIG_AsCharPtrAndSize(PyObject *obj, char** cptr, size_t* psize, int *alloc)
819d24
         return SWIG_RuntimeError;
819d24
       }
819d24
       obj = PyUnicode_AsUTF8String(obj);
819d24
+      if (!obj)
819d24
+        return SWIG_TypeError;
819d24
       if (PyString_AsStringAndSize(obj, &cstr, &len) != -1) {
819d24
         if (cptr) {
819d24
           if (alloc) *alloc = SWIG_NEWOBJ;
819d24
-- 
819d24
2.21.1
819d24