f02e80
From b0e29fbdf31bb94b11cb8a7cc830b4a76467afa3 Mon Sep 17 00:00:00 2001
f02e80
From: William S Fulton <wsf@fultondesigns.co.uk>
f02e80
Date: Mon, 4 Dec 2017 18:41:55 +0000
f02e80
Subject: [PATCH] Add missing checks for failures in calls to
f02e80
 PyUnicode_AsUTF8String.
f02e80
f02e80
Previously a seg fault could occur when passing invalid UTF8 strings (low
f02e80
surrogates), eg passing u"\udcff" to the C layer (Python 3).
f02e80
---
f02e80
 CHANGES.current                               |  8 ++++++-
f02e80
 Doc/Manual/Python.html                        | 22 ++++++++++++++++---
f02e80
 Doc/Manual/Varargs.html                       |  5 ++++-
f02e80
 Examples/python/multimap/example.i            | 12 +++++++++-
f02e80
 .../python/unicode_strings_runme.py           | 10 +++++++++
f02e80
 Examples/test-suite/python_varargs_typemap.i  |  5 ++++-
f02e80
 Examples/test-suite/unicode_strings.i         |  2 ++
f02e80
 Lib/python/pyerrors.swg                       | 11 ++++++----
f02e80
 Lib/python/pyhead.swg                         | 16 ++++++++------
f02e80
 Lib/python/pyinit.swg                         |  4 ++--
f02e80
 Lib/python/pyrun.swg                          | 10 ++++++---
f02e80
 Lib/python/pystrings.swg                      | 12 ++++++++--
f02e80
 12 files changed, 92 insertions(+), 25 deletions(-)
f02e80
f02e80
#diff --git a/CHANGES.current b/CHANGES.current
f02e80
#index 5cab80172..06b958f18 100644
f02e80
#--- a/CHANGES.current
f02e80
#+++ b/CHANGES.current
f02e80
#@@ -6,8 +6,14 @@ the issue number to the end of the URL: https://github.com/swig/swig/issues/
f02e80
# 
f02e80
# Version 4.0.0 (in progress)
f02e80
# ===========================
f02e80
#+
f02e80
#+2017-12-04: wsfulton
f02e80
#+            [Python] Add missing checks for failures in calls to PyUnicode_AsUTF8String. Previously a 
f02e80
#+            seg fault could occur when passing invalid UTF8 strings (low surrogates), eg passing
f02e80
#+            u"\udcff" to the C layer (Python 3).
f02e80
#+
f02e80
# 2017-11-24: joequant
f02e80
#-	    Fix github #1124 and return R_NilValue for null pointers
f02e80
#+	    Fix #1124 and return R_NilValue for null pointers
f02e80
# 
f02e80
# 2017-11-29: wsfulton
f02e80
#             [Java] director exception handling improvements.
f02e80
#diff --git a/Doc/Manual/Python.html b/Doc/Manual/Python.html
f02e80
#index 0c0023dea..27ce084bd 100644
f02e80
#--- a/Doc/Manual/Python.html
f02e80
#+++ b/Doc/Manual/Python.html
f02e80
#@@ -6521,14 +6521,16 @@ string that cannot be completely decoded as UTF-8:
f02e80
# 
f02e80
# %module example
f02e80
# 
f02e80
#-%include <std_string.i>
f02e80
#-
f02e80
# %inline %{
f02e80
# 
f02e80
#-const char* non_utf8_c_str(void) {
f02e80
#+const char * non_utf8_c_str(void) {
f02e80
#   return "h\xe9llo w\xc3\xb6rld";
f02e80
# }
f02e80
# 
f02e80
#+void instring(const char *s) {
f02e80
#+  ...
f02e80
#+}
f02e80
#+
f02e80
# %}
f02e80
# 
f02e80
# 
f02e80
#@@ -6590,6 +6592,20 @@ For more details about the <tt>surrogateescape</tt> error handler, please see
f02e80
# PEP 383.
f02e80
# 

f02e80
# 
f02e80
#+

f02e80
#+When Python 3 strings are passed to the C/C++ layer, they are expected to be valid UTF8 Unicode strings too.
f02e80
#+For example, when the <tt>instring</tt> method above is wrapped and called, any invalid UTF8 Unicode code strings
f02e80
#+will result in a TypeError because the attempted conversion fails:
f02e80
#+

f02e80
#+
f02e80
#+
f02e80
#+>>> example.instring('h\xe9llo')
f02e80
#+>>> example.instring('h\udce9llo')
f02e80
#+Traceback (most recent call last):
f02e80
#+  File "<stdin>", line 1, in <module>
f02e80
#+TypeError: in method 'instring', argument 1 of type 'char const *'
f02e80
#+
f02e80
#+
f02e80
# 

f02e80
# In some cases, users may wish to instead handle all byte strings as bytes
f02e80
# objects in Python 3. This can be accomplished by adding
f02e80
#diff --git a/Doc/Manual/Varargs.html b/Doc/Manual/Varargs.html
f02e80
#index eba816382..014a38cae 100644
f02e80
#--- a/Doc/Manual/Varargs.html
f02e80
#+++ b/Doc/Manual/Varargs.html
f02e80
#@@ -529,8 +529,11 @@ like this:
f02e80
#       SWIG_fail;
f02e80
#     }
f02e80
#     pystr = PyUnicode_AsUTF8String(pyobj);
f02e80
#+    if (!pystr) {
f02e80
#+      SWIG_fail;
f02e80
#+    }
f02e80
#     str = strdup(PyBytes_AsString(pystr));
f02e80
#-    Py_XDECREF(pystr);
f02e80
#+    Py_DECREF(pystr);
f02e80
# %#else  
f02e80
#     if (!PyString_Check(pyobj)) {
f02e80
#       PyErr_SetString(PyExc_ValueError, "Expected a string");
f02e80
diff --git a/Examples/python/multimap/example.i b/Examples/python/multimap/example.i
f02e80
index 66c0f74c6..3ff5d52c0 100644
f02e80
--- a/Examples/python/multimap/example.i
f02e80
+++ b/Examples/python/multimap/example.i
f02e80
@@ -39,7 +39,11 @@ extern int    gcd(int x, int y);
f02e80
 %#if PY_VERSION_HEX >= 0x03000000
f02e80
     {
f02e80
       PyObject *utf8str = PyUnicode_AsUTF8String(s);
f02e80
-      const char *cstr = PyBytes_AsString(utf8str);
f02e80
+      const char *cstr;
f02e80
+      if (!utf8str) {
f02e80
+        SWIG_fail;
f02e80
+      }
f02e80
+      cstr = PyBytes_AsString(utf8str);
f02e80
       $2[i] = strdup(cstr);
f02e80
       Py_DECREF(utf8str);
f02e80
     }
f02e80
@@ -72,6 +76,9 @@ extern int gcdmain(int argc, char *argv[]);
f02e80
     SWIG_fail;
f02e80
   }
f02e80
   utf8str = PyUnicode_AsUTF8String($input);
f02e80
+  if (!utf8str) {
f02e80
+    SWIG_fail;
f02e80
+  }
f02e80
   PyBytes_AsStringAndSize(utf8str, &cstr, &len;;
f02e80
   $1 = strncpy((char *)malloc(len+1), cstr, (size_t)len);
f02e80
   $2 = (int)len;
f02e80
@@ -105,6 +112,9 @@ extern int count(char *bytes, int len, char c);
f02e80
   char *cstr;
f02e80
   Py_ssize_t len;
f02e80
   PyObject *utf8str = PyUnicode_AsUTF8String($input);
f02e80
+  if (!utf8str) {
f02e80
+    SWIG_fail;
f02e80
+  }
f02e80
   PyBytes_AsStringAndSize(utf8str, &cstr, &len;;
f02e80
   $1 = strncpy((char *)malloc(len+1), cstr, (size_t)len);
f02e80
   $2 = (int)len;
f02e80
diff --git a/Examples/test-suite/python/unicode_strings_runme.py b/Examples/test-suite/python/unicode_strings_runme.py
f02e80
index fa9c51437..39e93b0fc 100644
f02e80
--- a/Examples/test-suite/python/unicode_strings_runme.py
f02e80
+++ b/Examples/test-suite/python/unicode_strings_runme.py
f02e80
@@ -25,3 +25,13 @@ if sys.version_info[0:2] < (3, 0):
f02e80
     check(unicode_strings.charstring(unicode("hello4")), "hello4")
f02e80
     unicode_strings.charstring(u"hell\xb05")
f02e80
     unicode_strings.charstring(u"hell\u00f66")
f02e80
+
f02e80
+low_surrogate_string = u"\udcff"
f02e80
+try:
f02e80
+    unicode_strings.instring(low_surrogate_string)
f02e80
+    # Will succeed with Python 2
f02e80
+except TypeError, e:
f02e80
+    # Python 3 will fail the PyUnicode_AsUTF8String conversion resulting in a TypeError.
f02e80
+    # The real error is actually:
f02e80
+    # UnicodeEncodeError: 'utf-8' codec can't encode character '\udcff' in position 0: surrogates not allowed
f02e80
+    pass
f02e80
diff --git a/Examples/test-suite/python_varargs_typemap.i b/Examples/test-suite/python_varargs_typemap.i
f02e80
index f05fb98eb..d809bf1fa 100644
f02e80
--- a/Examples/test-suite/python_varargs_typemap.i
f02e80
+++ b/Examples/test-suite/python_varargs_typemap.i
f02e80
@@ -23,8 +23,11 @@
f02e80
        SWIG_fail;
f02e80
     }
f02e80
     pystr = PyUnicode_AsUTF8String(pyobj);
f02e80
+    if (!pystr) {
f02e80
+      SWIG_fail;
f02e80
+    }
f02e80
     str = strdup(PyBytes_AsString(pystr));
f02e80
-    Py_XDECREF(pystr);
f02e80
+    Py_DECREF(pystr);
f02e80
 %#else  
f02e80
     if (!PyString_Check(pyobj)) {
f02e80
        PyErr_SetString(PyExc_ValueError, "Expected a string");
f02e80
diff --git a/Examples/test-suite/unicode_strings.i b/Examples/test-suite/unicode_strings.i
f02e80
index 9be3748e6..e7266266e 100644
f02e80
--- a/Examples/test-suite/unicode_strings.i
f02e80
+++ b/Examples/test-suite/unicode_strings.i
f02e80
@@ -20,4 +20,6 @@ char *charstring(char *s) {
f02e80
   return s;
f02e80
 }
f02e80
 
f02e80
+void instring(const char *s) {
f02e80
+}
f02e80
 %}
f02e80
diff --git a/Lib/python/pyerrors.swg b/Lib/python/pyerrors.swg
f02e80
index fe7313554..463afae15 100644
f02e80
--- a/Lib/python/pyerrors.swg
f02e80
+++ b/Lib/python/pyerrors.swg
f02e80
@@ -53,14 +53,17 @@ SWIG_Python_AddErrorMsg(const char* mesg)
f02e80
   PyObject *value = 0;
f02e80
   PyObject *traceback = 0;
f02e80
 
f02e80
-  if (PyErr_Occurred()) PyErr_Fetch(&type, &value, &traceback);
f02e80
+  if (PyErr_Occurred())
f02e80
+    PyErr_Fetch(&type, &value, &traceback);
f02e80
   if (value) {
f02e80
-    char *tmp;
f02e80
     PyObject *old_str = PyObject_Str(value);
f02e80
+    const char *tmp = SWIG_Python_str_AsChar(old_str);
f02e80
     PyErr_Clear();
f02e80
     Py_XINCREF(type);
f02e80
-
f02e80
-    PyErr_Format(type, "%s %s", tmp = SWIG_Python_str_AsChar(old_str), mesg);
f02e80
+    if (tmp)
f02e80
+      PyErr_Format(type, "%s %s", tmp, mesg);
f02e80
+    else
f02e80
+      PyErr_Format(type, "%s", mesg);
f02e80
     SWIG_Python_str_DelForPy3(tmp);
f02e80
     Py_DECREF(old_str);
f02e80
     Py_DECREF(value);
f02e80
diff --git a/Lib/python/pyhead.swg b/Lib/python/pyhead.swg
f02e80
index 55eb95a6d..2fa8b5b4c 100644
f02e80
--- a/Lib/python/pyhead.swg
f02e80
+++ b/Lib/python/pyhead.swg
f02e80
@@ -38,14 +38,16 @@ SWIGINTERN char*
f02e80
 SWIG_Python_str_AsChar(PyObject *str)
f02e80
 {
f02e80
 #if PY_VERSION_HEX >= 0x03000000
f02e80
-  char *cstr;
f02e80
-  char *newstr;
f02e80
-  Py_ssize_t len;
f02e80
+  char *newstr = 0;
f02e80
   str = PyUnicode_AsUTF8String(str);
f02e80
-  PyBytes_AsStringAndSize(str, &cstr, &len;;
f02e80
-  newstr = (char *) malloc(len+1);
f02e80
-  memcpy(newstr, cstr, len+1);
f02e80
-  Py_XDECREF(str);
f02e80
+  if (str) {
f02e80
+    char *cstr;
f02e80
+    Py_ssize_t len;
f02e80
+    PyBytes_AsStringAndSize(str, &cstr, &len;;
f02e80
+    newstr = (char *) malloc(len+1);
f02e80
+    memcpy(newstr, cstr, len+1);
f02e80
+    Py_XDECREF(str);
f02e80
+  }
f02e80
   return newstr;
f02e80
 #else
f02e80
   return PyString_AsString(str);
f02e80
diff --git a/Lib/python/pyinit.swg b/Lib/python/pyinit.swg
f02e80
index fe45ac941..826f8411b 100644
f02e80
--- a/Lib/python/pyinit.swg
f02e80
+++ b/Lib/python/pyinit.swg
f02e80
@@ -84,10 +84,10 @@ swig_varlink_str(swig_varlinkobject *v) {
f02e80
 
f02e80
 SWIGINTERN int
f02e80
 swig_varlink_print(swig_varlinkobject *v, FILE *fp, int SWIGUNUSEDPARM(flags)) {
f02e80
-  char *tmp;
f02e80
   PyObject *str = swig_varlink_str(v);
f02e80
+  const char *tmp = SWIG_Python_str_AsChar(str);
f02e80
   fprintf(fp,"Swig global variables ");
f02e80
-  fprintf(fp,"%s\n", tmp = SWIG_Python_str_AsChar(str));
f02e80
+  fprintf(fp,"%s\n", tmp ? tmp : "Invalid global variable");
f02e80
   SWIG_Python_str_DelForPy3(tmp);
f02e80
   Py_DECREF(str);
f02e80
   return 0;
f02e80
diff --git a/Lib/python/pyrun.swg b/Lib/python/pyrun.swg
f02e80
index efc476613..430d3af18 100644
f02e80
--- a/Lib/python/pyrun.swg
f02e80
+++ b/Lib/python/pyrun.swg
f02e80
@@ -1672,14 +1672,16 @@ SWIG_Python_AddErrMesg(const char* mesg, int infront)
f02e80
     PyObject *traceback = 0;
f02e80
     PyErr_Fetch(&type, &value, &traceback);
f02e80
     if (value) {
f02e80
-      char *tmp;
f02e80
       PyObject *old_str = PyObject_Str(value);
f02e80
+      const char *tmp = SWIG_Python_str_AsChar(old_str);
f02e80
+      if (!tmp)
f02e80
+        tmp = "Invalid error message";
f02e80
       Py_XINCREF(type);
f02e80
       PyErr_Clear();
f02e80
       if (infront) {
f02e80
-	PyErr_Format(type, "%s %s", mesg, tmp = SWIG_Python_str_AsChar(old_str));
f02e80
+	PyErr_Format(type, "%s %s", mesg, tmp);
f02e80
       } else {
f02e80
-	PyErr_Format(type, "%s %s", tmp = SWIG_Python_str_AsChar(old_str), mesg);
f02e80
+	PyErr_Format(type, "%s %s", tmp, mesg);
f02e80
       }
f02e80
       SWIG_Python_str_DelForPy3(tmp);
f02e80
       Py_DECREF(old_str);
f02e80
@@ -1805,6 +1807,8 @@ SWIG_Python_NonDynamicSetAttr(PyObject *obj, PyObject *name, PyObject *value) {
f02e80
       Py_INCREF(name);
f02e80
     } else {
f02e80
       encoded_name = PyUnicode_AsUTF8String(name);
f02e80
+      if (!encoded_name)
f02e80
+        return -1;
f02e80
     }
f02e80
     PyErr_Format(PyExc_AttributeError, "'%.100s' object has no attribute '%.200s'", tp->tp_name, PyString_AsString(encoded_name));
f02e80
     Py_DECREF(encoded_name);
f02e80
diff --git a/Lib/python/pystrings.swg b/Lib/python/pystrings.swg
f02e80
index fd37855eb..301e0f3e1 100644
f02e80
--- a/Lib/python/pystrings.swg
f02e80
+++ b/Lib/python/pystrings.swg
f02e80
@@ -16,6 +16,7 @@ SWIG_AsCharPtrAndSize(PyObject *obj, char** cptr, size_t* psize, int *alloc)
f02e80
 %#endif
f02e80
   {
f02e80
     char *cstr; Py_ssize_t len;
f02e80
+    int ret = SWIG_OK;
f02e80
 %#if PY_VERSION_HEX>=0x03000000
f02e80
 %#if !defined(SWIG_PYTHON_STRICT_BYTE_CHAR)
f02e80
     if (!alloc && cptr) {
f02e80
@@ -26,7 +27,10 @@ SWIG_AsCharPtrAndSize(PyObject *obj, char** cptr, size_t* psize, int *alloc)
f02e80
         return SWIG_RuntimeError;
f02e80
     }
f02e80
     obj = PyUnicode_AsUTF8String(obj);
f02e80
-    if(alloc) *alloc = SWIG_NEWOBJ;
f02e80
+    if (!obj)
f02e80
+      return SWIG_TypeError;
f02e80
+    if (alloc)
f02e80
+      *alloc = SWIG_NEWOBJ;
f02e80
 %#endif
f02e80
     PyBytes_AsStringAndSize(obj, &cstr, &len;;
f02e80
 %#else
f02e80
@@ -64,6 +68,8 @@ SWIG_AsCharPtrAndSize(PyObject *obj, char** cptr, size_t* psize, int *alloc)
f02e80
 %#endif
f02e80
 %#else
f02e80
 	*cptr = SWIG_Python_str_AsChar(obj);
f02e80
+        if (!*cptr)
f02e80
+          ret = SWIG_TypeError;
f02e80
 %#endif
f02e80
       }
f02e80
     }
f02e80
@@ -71,7 +77,7 @@ SWIG_AsCharPtrAndSize(PyObject *obj, char** cptr, size_t* psize, int *alloc)
f02e80
 %#if PY_VERSION_HEX>=0x03000000 && !defined(SWIG_PYTHON_STRICT_BYTE_CHAR)
f02e80
     Py_XDECREF(obj);
f02e80
 %#endif
f02e80
-    return SWIG_OK;
f02e80
+    return ret;
f02e80
   } else {
f02e80
 %#if defined(SWIG_PYTHON_2_UNICODE)
f02e80
 %#if defined(SWIG_PYTHON_STRICT_BYTE_CHAR)
f02e80
@@ -84,6 +90,8 @@ SWIG_AsCharPtrAndSize(PyObject *obj, char** cptr, size_t* psize, int *alloc)
f02e80
         return SWIG_RuntimeError;
f02e80
       }
f02e80
       obj = PyUnicode_AsUTF8String(obj);
f02e80
+      if (!obj)
f02e80
+        return SWIG_TypeError;
f02e80
       if (PyString_AsStringAndSize(obj, &cstr, &len) != -1) {
f02e80
         if (cptr) {
f02e80
           if (alloc) *alloc = SWIG_NEWOBJ;
f02e80
-- 
f02e80
2.21.1
f02e80