From d4f7fd8c0419ade46738327c5278fe631a367f64 Mon Sep 17 00:00:00 2001 From: Panu Matilainen Date: Apr 10 2019 08:52:40 +0000 Subject: Make the Python 3 API actually usable (#1693751) All string data from rpm is now returned as surrogate-escaped utf-8 string objects. 99.999% of that data IS utf-8 encoded to begin with, returning bytes is plain broken. As a temporary crutch, monkey-patch a .decode() method to returned strings to give users time to migrate from the long-standing broken behavior. --- diff --git a/0001-In-Python-3-return-all-our-string-data-as-surrogate-.patch b/0001-In-Python-3-return-all-our-string-data-as-surrogate-.patch new file mode 100644 index 0000000..35f12c2 --- /dev/null +++ b/0001-In-Python-3-return-all-our-string-data-as-surrogate-.patch @@ -0,0 +1,656 @@ +From 84920f898315d09a57a3f1067433eaeb7de5e830 Mon Sep 17 00:00:00 2001 +Message-Id: <84920f898315d09a57a3f1067433eaeb7de5e830.1554884444.git.pmatilai@redhat.com> +From: Panu Matilainen +Date: Fri, 22 Feb 2019 19:44:16 +0200 +Subject: [PATCH] In Python 3, return all our string data as surrogate-escaped + utf-8 strings + +In the almost ten years of rpm sort of supporting Python 3 bindings, quite +obviously nobody has actually tried to use them. There's a major mismatch +between what the header API outputs (bytes) and what all the other APIs +accept (strings), resulting in hysterical TypeErrors all over the place, +including but not limited to labelCompare() (RhBug:1631292). Also a huge +number of other places have been returning strings and silently assuming +utf-8 through use of Py_BuildValue("s", ...), which will just irrevocably +fail when non-utf8 data is encountered. + +The politically Python 3-correct solution would be declaring all our data +as bytes with unspecified encoding - that's exactly what it historically is. +However doing so would by definition break every single rpm script people +have developed on Python 2. And when 99% of the rpm content in the world +actually is utf-8 encoded even if it doesn't say so (and in recent times +packages even advertise themselves as utf-8 encoded), the bytes-only route +seems a wee bit too draconian, even to this grumpy old fella. + +Instead, route all our string returns through a single helper macro +which on Python 2 just does what we always did, but in Python 3 converts +the data to surrogate-escaped utf-8 strings. This makes stuff "just work" +out of the box pretty much everywhere even with Python 3 (including +our own test-suite!), while still allowing to handle the non-utf8 case. +Handling the non-utf8 case is a bit more uglier but still possible, +which is exactly how you want corner-cases to be. There might be some +uses for retrieving raw byte data from the header, but worrying about +such an API is a case for some other rainy day, for now we mostly only +care that stuff works again. + +Also add test-cases for mixed data source labelCompare() and +non-utf8 insert to + retrieve from header. +--- + python/header-py.c | 2 +- + python/rpmds-py.c | 8 ++++---- + python/rpmfd-py.c | 6 +++--- + python/rpmfi-py.c | 24 ++++++++++++------------ + python/rpmfiles-py.c | 26 +++++++++++++------------- + python/rpmkeyring-py.c | 2 +- + python/rpmmacro-py.c | 2 +- + python/rpmmodule.c | 2 +- + python/rpmps-py.c | 8 ++++---- + python/rpmstrpool-py.c | 2 +- + python/rpmsystem-py.h | 7 +++++++ + python/rpmtd-py.c | 2 +- + python/rpmte-py.c | 16 ++++++++-------- + python/rpmts-py.c | 11 ++++++----- + python/spec-py.c | 8 ++++---- + tests/local.at | 1 + + tests/rpmpython.at | 34 ++++++++++++++++++++++++++++++++++ + 17 files changed, 102 insertions(+), 59 deletions(-) + +diff --git a/python/header-py.c b/python/header-py.c +index c9d54e869..93c241cb7 100644 +--- a/python/header-py.c ++++ b/python/header-py.c +@@ -231,7 +231,7 @@ static PyObject * hdrFormat(hdrObject * s, PyObject * args, PyObject * kwds) + return NULL; + } + +- result = Py_BuildValue("s", r); ++ result = utf8FromString(r); + free(r); + + return result; +diff --git a/python/rpmds-py.c b/python/rpmds-py.c +index 39b26628e..ecc9af9d5 100644 +--- a/python/rpmds-py.c ++++ b/python/rpmds-py.c +@@ -31,19 +31,19 @@ rpmds_Ix(rpmdsObject * s) + static PyObject * + rpmds_DNEVR(rpmdsObject * s) + { +- return Py_BuildValue("s", rpmdsDNEVR(s->ds)); ++ return utf8FromString(rpmdsDNEVR(s->ds)); + } + + static PyObject * + rpmds_N(rpmdsObject * s) + { +- return Py_BuildValue("s", rpmdsN(s->ds)); ++ return utf8FromString(rpmdsN(s->ds)); + } + + static PyObject * + rpmds_EVR(rpmdsObject * s) + { +- return Py_BuildValue("s", rpmdsEVR(s->ds)); ++ return utf8FromString(rpmdsEVR(s->ds)); + } + + static PyObject * +@@ -261,7 +261,7 @@ rpmds_subscript(rpmdsObject * s, PyObject * key) + + ix = (int) PyInt_AsLong(key); + rpmdsSetIx(s->ds, ix); +- return Py_BuildValue("s", rpmdsDNEVR(s->ds)); ++ return utf8FromString(rpmdsDNEVR(s->ds)); + } + + static PyMappingMethods rpmds_as_mapping = { +diff --git a/python/rpmfd-py.c b/python/rpmfd-py.c +index 85fb0cd24..4b05cce5f 100644 +--- a/python/rpmfd-py.c ++++ b/python/rpmfd-py.c +@@ -327,17 +327,17 @@ static PyObject *rpmfd_get_closed(rpmfdObject *s) + static PyObject *rpmfd_get_name(rpmfdObject *s) + { + /* XXX: rpm returns non-paths with [mumble], python files use */ +- return Py_BuildValue("s", Fdescr(s->fd)); ++ return utf8FromString(Fdescr(s->fd)); + } + + static PyObject *rpmfd_get_mode(rpmfdObject *s) + { +- return Py_BuildValue("s", s->mode); ++ return utf8FromString(s->mode); + } + + static PyObject *rpmfd_get_flags(rpmfdObject *s) + { +- return Py_BuildValue("s", s->flags); ++ return utf8FromString(s->flags); + } + + static PyGetSetDef rpmfd_getseters[] = { +diff --git a/python/rpmfi-py.c b/python/rpmfi-py.c +index 8d2f926d0..db405c231 100644 +--- a/python/rpmfi-py.c ++++ b/python/rpmfi-py.c +@@ -41,19 +41,19 @@ rpmfi_DX(rpmfiObject * s, PyObject * unused) + static PyObject * + rpmfi_BN(rpmfiObject * s, PyObject * unused) + { +- return Py_BuildValue("s", rpmfiBN(s->fi)); ++ return utf8FromString(rpmfiBN(s->fi)); + } + + static PyObject * + rpmfi_DN(rpmfiObject * s, PyObject * unused) + { +- return Py_BuildValue("s", rpmfiDN(s->fi)); ++ return utf8FromString(rpmfiDN(s->fi)); + } + + static PyObject * + rpmfi_FN(rpmfiObject * s, PyObject * unused) + { +- return Py_BuildValue("s", rpmfiFN(s->fi)); ++ return utf8FromString(rpmfiFN(s->fi)); + } + + static PyObject * +@@ -98,7 +98,7 @@ rpmfi_Digest(rpmfiObject * s, PyObject * unused) + { + char *digest = rpmfiFDigestHex(s->fi, NULL); + if (digest) { +- PyObject *dig = Py_BuildValue("s", digest); ++ PyObject *dig = utf8FromString(digest); + free(digest); + return dig; + } else { +@@ -109,7 +109,7 @@ rpmfi_Digest(rpmfiObject * s, PyObject * unused) + static PyObject * + rpmfi_FLink(rpmfiObject * s, PyObject * unused) + { +- return Py_BuildValue("s", rpmfiFLink(s->fi)); ++ return utf8FromString(rpmfiFLink(s->fi)); + } + + static PyObject * +@@ -133,13 +133,13 @@ rpmfi_FMtime(rpmfiObject * s, PyObject * unused) + static PyObject * + rpmfi_FUser(rpmfiObject * s, PyObject * unused) + { +- return Py_BuildValue("s", rpmfiFUser(s->fi)); ++ return utf8FromString(rpmfiFUser(s->fi)); + } + + static PyObject * + rpmfi_FGroup(rpmfiObject * s, PyObject * unused) + { +- return Py_BuildValue("s", rpmfiFGroup(s->fi)); ++ return utf8FromString(rpmfiFGroup(s->fi)); + } + + static PyObject * +@@ -155,7 +155,7 @@ rpmfi_FClass(rpmfiObject * s, PyObject * unused) + + if ((FClass = rpmfiFClass(s->fi)) == NULL) + FClass = ""; +- return Py_BuildValue("s", FClass); ++ return utf8FromString(FClass); + } + + static PyObject * +@@ -208,7 +208,7 @@ rpmfi_iternext(rpmfiObject * s) + Py_INCREF(Py_None); + PyTuple_SET_ITEM(result, 0, Py_None); + } else +- PyTuple_SET_ITEM(result, 0, Py_BuildValue("s", FN)); ++ PyTuple_SET_ITEM(result, 0, utf8FromString(FN)); + PyTuple_SET_ITEM(result, 1, PyLong_FromLongLong(FSize)); + PyTuple_SET_ITEM(result, 2, PyInt_FromLong(FMode)); + PyTuple_SET_ITEM(result, 3, PyInt_FromLong(FMtime)); +@@ -222,12 +222,12 @@ rpmfi_iternext(rpmfiObject * s) + Py_INCREF(Py_None); + PyTuple_SET_ITEM(result, 10, Py_None); + } else +- PyTuple_SET_ITEM(result, 10, Py_BuildValue("s", FUser)); ++ PyTuple_SET_ITEM(result, 10, utf8FromString(FUser)); + if (FGroup == NULL) { + Py_INCREF(Py_None); + PyTuple_SET_ITEM(result, 11, Py_None); + } else +- PyTuple_SET_ITEM(result, 11, Py_BuildValue("s", FGroup)); ++ PyTuple_SET_ITEM(result, 11, utf8FromString(FGroup)); + PyTuple_SET_ITEM(result, 12, rpmfi_Digest(s, NULL)); + + } else +@@ -313,7 +313,7 @@ rpmfi_subscript(rpmfiObject * s, PyObject * key) + + ix = (int) PyInt_AsLong(key); + rpmfiSetFX(s->fi, ix); +- return Py_BuildValue("s", rpmfiFN(s->fi)); ++ return utf8FromString(rpmfiFN(s->fi)); + } + + static PyMappingMethods rpmfi_as_mapping = { +diff --git a/python/rpmfiles-py.c b/python/rpmfiles-py.c +index bc07dbeaf..557246cae 100644 +--- a/python/rpmfiles-py.c ++++ b/python/rpmfiles-py.c +@@ -41,37 +41,37 @@ static PyObject *rpmfile_dx(rpmfileObject *s) + static PyObject *rpmfile_name(rpmfileObject *s) + { + char * fn = rpmfilesFN(s->files, s->ix); +- PyObject *o = Py_BuildValue("s", fn); ++ PyObject *o = utf8FromString(fn); + free(fn); + return o; + } + + static PyObject *rpmfile_basename(rpmfileObject *s) + { +- return Py_BuildValue("s", rpmfilesBN(s->files, s->ix)); ++ return utf8FromString(rpmfilesBN(s->files, s->ix)); + } + + static PyObject *rpmfile_dirname(rpmfileObject *s) + { +- return Py_BuildValue("s", rpmfilesDN(s->files, rpmfilesDI(s->files, s->ix))); ++ return utf8FromString(rpmfilesDN(s->files, rpmfilesDI(s->files, s->ix))); + } + + static PyObject *rpmfile_orig_name(rpmfileObject *s) + { + char * fn = rpmfilesOFN(s->files, s->ix); +- PyObject *o = Py_BuildValue("s", fn); ++ PyObject *o = utf8FromString(fn); + free(fn); + return o; + } + + static PyObject *rpmfile_orig_basename(rpmfileObject *s) + { +- return Py_BuildValue("s", rpmfilesOBN(s->files, s->ix)); ++ return utf8FromString(rpmfilesOBN(s->files, s->ix)); + } + + static PyObject *rpmfile_orig_dirname(rpmfileObject *s) + { +- return Py_BuildValue("s", rpmfilesODN(s->files, rpmfilesODI(s->files, s->ix))); ++ return utf8FromString(rpmfilesODN(s->files, rpmfilesODI(s->files, s->ix))); + } + static PyObject *rpmfile_mode(rpmfileObject *s) + { +@@ -105,17 +105,17 @@ static PyObject *rpmfile_nlink(rpmfileObject *s) + + static PyObject *rpmfile_linkto(rpmfileObject *s) + { +- return Py_BuildValue("s", rpmfilesFLink(s->files, s->ix)); ++ return utf8FromString(rpmfilesFLink(s->files, s->ix)); + } + + static PyObject *rpmfile_user(rpmfileObject *s) + { +- return Py_BuildValue("s", rpmfilesFUser(s->files, s->ix)); ++ return utf8FromString(rpmfilesFUser(s->files, s->ix)); + } + + static PyObject *rpmfile_group(rpmfileObject *s) + { +- return Py_BuildValue("s", rpmfilesFGroup(s->files, s->ix)); ++ return utf8FromString(rpmfilesFGroup(s->files, s->ix)); + } + + static PyObject *rpmfile_fflags(rpmfileObject *s) +@@ -145,7 +145,7 @@ static PyObject *rpmfile_digest(rpmfileObject *s) + NULL, &diglen); + if (digest) { + char * hex = pgpHexStr(digest, diglen); +- PyObject *o = Py_BuildValue("s", hex); ++ PyObject *o = utf8FromString(hex); + free(hex); + return o; + } +@@ -154,17 +154,17 @@ static PyObject *rpmfile_digest(rpmfileObject *s) + + static PyObject *rpmfile_class(rpmfileObject *s) + { +- return Py_BuildValue("s", rpmfilesFClass(s->files, s->ix)); ++ return utf8FromString(rpmfilesFClass(s->files, s->ix)); + } + + static PyObject *rpmfile_caps(rpmfileObject *s) + { +- return Py_BuildValue("s", rpmfilesFCaps(s->files, s->ix)); ++ return utf8FromString(rpmfilesFCaps(s->files, s->ix)); + } + + static PyObject *rpmfile_langs(rpmfileObject *s) + { +- return Py_BuildValue("s", rpmfilesFLangs(s->files, s->ix)); ++ return utf8FromString(rpmfilesFLangs(s->files, s->ix)); + } + + static PyObject *rpmfile_links(rpmfileObject *s) +diff --git a/python/rpmkeyring-py.c b/python/rpmkeyring-py.c +index d5f131e42..8968e0513 100644 +--- a/python/rpmkeyring-py.c ++++ b/python/rpmkeyring-py.c +@@ -38,7 +38,7 @@ static PyObject *rpmPubkey_new(PyTypeObject *subtype, + static PyObject * rpmPubkey_Base64(rpmPubkeyObject *s) + { + char *b64 = rpmPubkeyBase64(s->pubkey); +- PyObject *res = Py_BuildValue("s", b64); ++ PyObject *res = utf8FromString(b64); + free(b64); + return res; + } +diff --git a/python/rpmmacro-py.c b/python/rpmmacro-py.c +index 3cb1a51f5..d8a365547 100644 +--- a/python/rpmmacro-py.c ++++ b/python/rpmmacro-py.c +@@ -52,7 +52,7 @@ rpmmacro_ExpandMacro(PyObject * self, PyObject * args, PyObject * kwds) + if (rpmExpandMacros(NULL, macro, &str, 0) < 0) + PyErr_SetString(pyrpmError, "error expanding macro"); + else +- res = Py_BuildValue("s", str); ++ res = utf8FromString(str); + free(str); + } + return res; +diff --git a/python/rpmmodule.c b/python/rpmmodule.c +index 3faad23c7..05032edc7 100644 +--- a/python/rpmmodule.c ++++ b/python/rpmmodule.c +@@ -237,7 +237,7 @@ static void addRpmTags(PyObject *module) + + PyModule_AddIntConstant(module, tagname, tagval); + pyval = PyInt_FromLong(tagval); +- pyname = Py_BuildValue("s", shortname); ++ pyname = utf8FromString(shortname); + PyDict_SetItem(dict, pyval, pyname); + Py_DECREF(pyval); + Py_DECREF(pyname); +diff --git a/python/rpmps-py.c b/python/rpmps-py.c +index bdc899a60..902b2ae63 100644 +--- a/python/rpmps-py.c ++++ b/python/rpmps-py.c +@@ -18,12 +18,12 @@ static PyObject *rpmprob_get_type(rpmProblemObject *s, void *closure) + + static PyObject *rpmprob_get_pkgnevr(rpmProblemObject *s, void *closure) + { +- return Py_BuildValue("s", rpmProblemGetPkgNEVR(s->prob)); ++ return utf8FromString(rpmProblemGetPkgNEVR(s->prob)); + } + + static PyObject *rpmprob_get_altnevr(rpmProblemObject *s, void *closure) + { +- return Py_BuildValue("s", rpmProblemGetAltNEVR(s->prob)); ++ return utf8FromString(rpmProblemGetAltNEVR(s->prob)); + } + + static PyObject *rpmprob_get_key(rpmProblemObject *s, void *closure) +@@ -38,7 +38,7 @@ static PyObject *rpmprob_get_key(rpmProblemObject *s, void *closure) + + static PyObject *rpmprob_get_str(rpmProblemObject *s, void *closure) + { +- return Py_BuildValue("s", rpmProblemGetStr(s->prob)); ++ return utf8FromString(rpmProblemGetStr(s->prob)); + } + + static PyObject *rpmprob_get_num(rpmProblemObject *s, void *closure) +@@ -59,7 +59,7 @@ static PyGetSetDef rpmprob_getseters[] = { + static PyObject *rpmprob_str(rpmProblemObject *s) + { + char *str = rpmProblemString(s->prob); +- PyObject *res = Py_BuildValue("s", str); ++ PyObject *res = utf8FromString(str); + free(str); + return res; + } +diff --git a/python/rpmstrpool-py.c b/python/rpmstrpool-py.c +index 356bd1de5..a56e2b540 100644 +--- a/python/rpmstrpool-py.c ++++ b/python/rpmstrpool-py.c +@@ -44,7 +44,7 @@ static PyObject *strpool_id2str(rpmstrPoolObject *s, PyObject *item) + const char *str = rpmstrPoolStr(s->pool, id); + + if (str) +- ret = PyBytes_FromString(str); ++ ret = utf8FromString(str); + else + PyErr_SetObject(PyExc_KeyError, item); + } +diff --git a/python/rpmsystem-py.h b/python/rpmsystem-py.h +index 955d60cd3..87c750571 100644 +--- a/python/rpmsystem-py.h ++++ b/python/rpmsystem-py.h +@@ -19,4 +19,11 @@ + #define PyInt_AsSsize_t PyLong_AsSsize_t + #endif + ++/* In Python 3, we return all strings as surrogate-escaped utf-8 */ ++#if PY_MAJOR_VERSION >= 3 ++#define utf8FromString(_s) PyUnicode_DecodeUTF8(_s, strlen(_s), "surrogateescape") ++#else ++#define utf8FromString(_s) PyBytes_FromString(_s) ++#endif ++ + #endif /* H_SYSTEM_PYTHON */ +diff --git a/python/rpmtd-py.c b/python/rpmtd-py.c +index 247c7502a..23ca10517 100644 +--- a/python/rpmtd-py.c ++++ b/python/rpmtd-py.c +@@ -17,7 +17,7 @@ PyObject * rpmtd_ItemAsPyobj(rpmtd td, rpmTagClass tclass) + + switch (tclass) { + case RPM_STRING_CLASS: +- res = PyBytes_FromString(rpmtdGetString(td)); ++ res = utf8FromString(rpmtdGetString(td)); + break; + case RPM_NUMERIC_CLASS: + res = PyLong_FromLongLong(rpmtdGetNumber(td)); +diff --git a/python/rpmte-py.c b/python/rpmte-py.c +index 99ff2f496..2b3745754 100644 +--- a/python/rpmte-py.c ++++ b/python/rpmte-py.c +@@ -54,49 +54,49 @@ rpmte_TEType(rpmteObject * s, PyObject * unused) + static PyObject * + rpmte_N(rpmteObject * s, PyObject * unused) + { +- return Py_BuildValue("s", rpmteN(s->te)); ++ return utf8FromString(rpmteN(s->te)); + } + + static PyObject * + rpmte_E(rpmteObject * s, PyObject * unused) + { +- return Py_BuildValue("s", rpmteE(s->te)); ++ return utf8FromString(rpmteE(s->te)); + } + + static PyObject * + rpmte_V(rpmteObject * s, PyObject * unused) + { +- return Py_BuildValue("s", rpmteV(s->te)); ++ return utf8FromString(rpmteV(s->te)); + } + + static PyObject * + rpmte_R(rpmteObject * s, PyObject * unused) + { +- return Py_BuildValue("s", rpmteR(s->te)); ++ return utf8FromString(rpmteR(s->te)); + } + + static PyObject * + rpmte_A(rpmteObject * s, PyObject * unused) + { +- return Py_BuildValue("s", rpmteA(s->te)); ++ return utf8FromString(rpmteA(s->te)); + } + + static PyObject * + rpmte_O(rpmteObject * s, PyObject * unused) + { +- return Py_BuildValue("s", rpmteO(s->te)); ++ return utf8FromString(rpmteO(s->te)); + } + + static PyObject * + rpmte_NEVR(rpmteObject * s, PyObject * unused) + { +- return Py_BuildValue("s", rpmteNEVR(s->te)); ++ return utf8FromString(rpmteNEVR(s->te)); + } + + static PyObject * + rpmte_NEVRA(rpmteObject * s, PyObject * unused) + { +- return Py_BuildValue("s", rpmteNEVRA(s->te)); ++ return utf8FromString(rpmteNEVRA(s->te)); + } + + static PyObject * +diff --git a/python/rpmts-py.c b/python/rpmts-py.c +index 1ddfc9a1e..96e3bb28e 100644 +--- a/python/rpmts-py.c ++++ b/python/rpmts-py.c +@@ -230,8 +230,9 @@ rpmts_SolveCallback(rpmts ts, rpmds ds, const void * data) + + PyEval_RestoreThread(cbInfo->_save); + +- args = Py_BuildValue("(Oissi)", cbInfo->tso, +- rpmdsTagN(ds), rpmdsN(ds), rpmdsEVR(ds), rpmdsFlags(ds)); ++ args = Py_BuildValue("(OiNNi)", cbInfo->tso, ++ rpmdsTagN(ds), utf8FromString(rpmdsN(ds)), ++ utf8FromString(rpmdsEVR(ds)), rpmdsFlags(ds)); + result = PyEval_CallObject(cbInfo->cb, args); + Py_DECREF(args); + +@@ -409,7 +410,7 @@ rpmts_HdrCheck(rpmtsObject * s, PyObject *obj) + rpmrc = headerCheck(s->ts, uh, uc, &msg); + Py_END_ALLOW_THREADS; + +- return Py_BuildValue("(is)", rpmrc, msg); ++ return Py_BuildValue("(iN)", rpmrc, utf8FromString(msg)); + } + + static PyObject * +@@ -500,7 +501,7 @@ rpmtsCallback(const void * hd, const rpmCallbackType what, + /* Synthesize a python object for callback (if necessary). */ + if (pkgObj == NULL) { + if (h) { +- pkgObj = Py_BuildValue("s", headerGetString(h, RPMTAG_NAME)); ++ pkgObj = utf8FromString(headerGetString(h, RPMTAG_NAME)); + } else { + pkgObj = Py_None; + Py_INCREF(pkgObj); +@@ -845,7 +846,7 @@ static PyObject *rpmts_get_tid(rpmtsObject *s, void *closure) + + static PyObject *rpmts_get_rootDir(rpmtsObject *s, void *closure) + { +- return Py_BuildValue("s", rpmtsRootDir(s->ts)); ++ return utf8FromString(rpmtsRootDir(s->ts)); + } + + static int rpmts_set_scriptFd(rpmtsObject *s, PyObject *value, void *closure) +diff --git a/python/spec-py.c b/python/spec-py.c +index 4efdbf4bf..70b796531 100644 +--- a/python/spec-py.c ++++ b/python/spec-py.c +@@ -57,7 +57,7 @@ static PyObject *pkgGetSection(rpmSpecPkg pkg, int section) + { + char *sect = rpmSpecPkgGetSection(pkg, section); + if (sect != NULL) { +- PyObject *ps = PyBytes_FromString(sect); ++ PyObject *ps = utf8FromString(sect); + free(sect); + if (ps != NULL) + return ps; +@@ -158,7 +158,7 @@ static PyObject * getSection(rpmSpec spec, int section) + { + const char *sect = rpmSpecGetSection(spec, section); + if (sect) { +- return Py_BuildValue("s", sect); ++ return utf8FromString(sect); + } + Py_RETURN_NONE; + } +@@ -208,8 +208,8 @@ static PyObject * spec_get_sources(specObject *s, void *closure) + + rpmSpecSrcIter iter = rpmSpecSrcIterInit(s->spec); + while ((source = rpmSpecSrcIterNext(iter)) != NULL) { +- PyObject *srcUrl = Py_BuildValue("(sii)", +- rpmSpecSrcFilename(source, 1), ++ PyObject *srcUrl = Py_BuildValue("(Nii)", ++ utf8FromString(rpmSpecSrcFilename(source, 1)), + rpmSpecSrcNum(source), + rpmSpecSrcFlags(source)); + if (!srcUrl) { +diff --git a/tests/local.at b/tests/local.at +index 02ead66c9..42eef1c75 100644 +--- a/tests/local.at ++++ b/tests/local.at +@@ -10,6 +10,7 @@ rm -rf "${abs_builddir}"/testing`rpm --eval '%_dbpath'`/* + + m4_define([RPMPY_RUN],[[ + cat << EOF > test.py ++# coding=utf-8 + import rpm, sys + dbpath=rpm.expandMacro('%_dbpath') + rpm.addMacro('_dbpath', '${abs_builddir}/testing%s' % dbpath) +diff --git a/tests/rpmpython.at b/tests/rpmpython.at +index ff77f868c..58f3e84a6 100644 +--- a/tests/rpmpython.at ++++ b/tests/rpmpython.at +@@ -106,6 +106,25 @@ None + 'rpm.hdr' object has no attribute '__foo__'] + ) + ++RPMPY_TEST([non-utf8 data in header],[ ++str = u'älämölö' ++enc = 'iso-8859-1' ++b = str.encode(enc) ++h = rpm.hdr() ++h['group'] = b ++d = h['group'] ++try: ++ # python 3 ++ t = bytes(d, 'utf-8', 'surrogateescape') ++except TypeError: ++ # python 2 ++ t = bytes(d) ++res = t.decode(enc) ++myprint(str == res) ++], ++[True] ++) ++ + RPMPY_TEST([invalid header data],[ + h1 = rpm.hdr() + h1['basenames'] = ['bing', 'bang', 'bong'] +@@ -125,6 +144,21 @@ for h in [h1, h2]: + /opt/bing,/opt/bang,/flopt/bong] + ) + ++RPMPY_TEST([labelCompare],[ ++v = '1.0' ++r = '1' ++e = 3 ++h = rpm.hdr() ++h['name'] = 'testpkg' ++h['version'] = v ++h['release'] = r ++h['epoch'] = e ++myprint(rpm.labelCompare((str(h['epoch']), h['version'], h['release']), ++ (str(e), v, r))) ++], ++[0] ++) ++ + RPMPY_TEST([vfyflags API],[ + ts = rpm.ts() + dlv = ts.getVfyFlags() +-- +2.20.1 + diff --git a/0001-Monkey-patch-.decode-method-to-our-strings-as-a-temp.patch b/0001-Monkey-patch-.decode-method-to-our-strings-as-a-temp.patch new file mode 100644 index 0000000..e655848 --- /dev/null +++ b/0001-Monkey-patch-.decode-method-to-our-strings-as-a-temp.patch @@ -0,0 +1,90 @@ +From 13b0ebee7cdb1e4d200b3c40d0ec9440f198a1d4 Mon Sep 17 00:00:00 2001 +Message-Id: <13b0ebee7cdb1e4d200b3c40d0ec9440f198a1d4.1554886141.git.pmatilai@redhat.com> +From: Panu Matilainen +Date: Wed, 10 Apr 2019 11:24:44 +0300 +Subject: [PATCH] Monkey-patch .decode() method to our strings as a temporary + compat crutch + +As a temporary crutch to support faster deployment of the sane +string behavior on python3, monkey-patch a decode method into all +strings we return. This seems to be enough to fix practically all +API users who have already adapted to the long-standing broken API +on Python 3. API users compatible with both Python 2 and 3 never needed +this anyway. Issue a warning with pointer to the relevant bug when the +fake decode() method is used to alert users to the issue. + +This is certainly an evil thing to do and will be removed as soon as +the critical users have been fixed to work with the new, corrected +behavior. +--- + python/rpm/__init__.py | 3 +++ + python/rpmmodule.c | 1 + + python/rpmsystem-py.h | 22 ++++++++++++++++++++-- + 3 files changed, 24 insertions(+), 2 deletions(-) + +diff --git a/python/rpm/__init__.py b/python/rpm/__init__.py +index 54728bbd4..6d69eda7b 100644 +--- a/python/rpm/__init__.py ++++ b/python/rpm/__init__.py +@@ -61,6 +61,9 @@ except ImportError: + # backwards compatibility + give the same class both ways + ts = TransactionSet + ++def _fakedecode(self, encoding='utf-8', errors='strict'): ++ warnings.warn("decode() called on unicode string, see https://bugzilla.redhat.com/show_bug.cgi?id=1693751", UnicodeWarning, stacklevel=2) ++ return self + + def headerLoad(*args, **kwds): + """DEPRECATED! Use rpm.hdr() instead.""" +diff --git a/python/rpmmodule.c b/python/rpmmodule.c +index 05032edc7..2a76cfbd0 100644 +--- a/python/rpmmodule.c ++++ b/python/rpmmodule.c +@@ -28,6 +28,7 @@ + */ + + PyObject * pyrpmError; ++PyObject * fakedecode = NULL; + + static PyObject * archScore(PyObject * self, PyObject * arg) + { +diff --git a/python/rpmsystem-py.h b/python/rpmsystem-py.h +index 25938464a..803da0fc1 100644 +--- a/python/rpmsystem-py.h ++++ b/python/rpmsystem-py.h +@@ -19,12 +19,30 @@ + #define PyInt_AsSsize_t PyLong_AsSsize_t + #endif + ++PyObject * fakedecode; ++ + static inline PyObject * utf8FromString(const char *s) + { + /* In Python 3, we return all strings as surrogate-escaped utf-8 */ + #if PY_MAJOR_VERSION >= 3 +- if (s != NULL) +- return PyUnicode_DecodeUTF8(s, strlen(s), "surrogateescape"); ++ if (s != NULL) { ++ PyObject *o = PyUnicode_DecodeUTF8(s, strlen(s), "surrogateescape"); ++ /* fish the fake decode function from python side if not done yet */ ++ if (fakedecode == NULL) { ++ PyObject *n = PyUnicode_FromString("rpm"); ++ PyObject *m = PyImport_Import(n); ++ PyObject *md = PyModule_GetDict(m); ++ fakedecode = PyDict_GetItemString(md, "_fakedecode"); ++ Py_DECREF(m); ++ Py_DECREF(n); ++ } ++ if (fakedecode && o) { ++ Py_INCREF(fakedecode); ++ /* monkey-patch it into the string object as "decode" */ ++ PyDict_SetItemString(Py_TYPE(o)->tp_dict, "decode", fakedecode); ++ } ++ return o; ++ } + #else + if (s != NULL) + return PyBytes_FromString(s); +-- +2.20.1 + diff --git a/0001-Return-NULL-string-as-None-from-utf8FromString.patch b/0001-Return-NULL-string-as-None-from-utf8FromString.patch new file mode 100644 index 0000000..e91db6f --- /dev/null +++ b/0001-Return-NULL-string-as-None-from-utf8FromString.patch @@ -0,0 +1,41 @@ +From aea53a4aead8bd71f519df35fcffd9eec76fbc01 Mon Sep 17 00:00:00 2001 +Message-Id: +From: Panu Matilainen +Date: Tue, 26 Feb 2019 11:27:51 +0200 +Subject: [PATCH] Return NULL string as None from utf8FromString() + +Commit 84920f898315d09a57a3f1067433eaeb7de5e830 regressed dnf install +to segfault at the end due to some NULL string passed to strlen(). +Check for NULL and return it as None, make it an inline function +to make this saner. +--- + python/rpmsystem-py.h | 10 ++++++++-- + 1 file changed, 8 insertions(+), 2 deletions(-) + +diff --git a/python/rpmsystem-py.h b/python/rpmsystem-py.h +index 87c750571..25938464a 100644 +--- a/python/rpmsystem-py.h ++++ b/python/rpmsystem-py.h +@@ -19,11 +19,17 @@ + #define PyInt_AsSsize_t PyLong_AsSsize_t + #endif + ++static inline PyObject * utf8FromString(const char *s) ++{ + /* In Python 3, we return all strings as surrogate-escaped utf-8 */ + #if PY_MAJOR_VERSION >= 3 +-#define utf8FromString(_s) PyUnicode_DecodeUTF8(_s, strlen(_s), "surrogateescape") ++ if (s != NULL) ++ return PyUnicode_DecodeUTF8(s, strlen(s), "surrogateescape"); + #else +-#define utf8FromString(_s) PyBytes_FromString(_s) ++ if (s != NULL) ++ return PyBytes_FromString(s); + #endif ++ Py_RETURN_NONE; ++} + + #endif /* H_SYSTEM_PYTHON */ +-- +2.20.1 + diff --git a/rpm.spec b/rpm.spec index 954665c..11c05da 100644 --- a/rpm.spec +++ b/rpm.spec @@ -23,7 +23,7 @@ %global rpmver 4.14.2.1 #global snapver rc2 -%global rel 5 +%global rel 6 %global srcver %{version}%{?snapver:-%{snapver}} %global srcdir %{?snapver:testing}%{!?snapver:%{name}-%(echo %{version} | cut -d'.' -f1-2).x} @@ -36,7 +36,7 @@ Summary: The RPM package management system Name: rpm Version: %{rpmver} -Release: %{?snapver:0.%{snapver}.}%{rel}%{?dist}.1 +Release: %{?snapver:0.%{snapver}.}%{rel}%{?dist} Url: http://www.rpm.org/ Source0: http://ftp.rpm.org/releases/%{srcdir}/%{name}-%{srcver}.tar.bz2 %if %{with int_bdb} @@ -64,6 +64,12 @@ Patch105: rpm-4.14.2-RPMTAG_MODULARITYLABEL.patch Patch106: 0001-find-debuginfo.sh-Handle-position-independent-execut.patch Patch107: 0001-Add-flag-to-use-strip-g-instead-of-full-strip-on-DSO.patch +# Python 3 string API sanity +Patch150: 0001-In-Python-3-return-all-our-string-data-as-surrogate-.patch +Patch151: 0001-Return-NULL-string-as-None-from-utf8FromString.patch +# Temporary compat crutch, not upstream +Patch152: 0001-Monkey-patch-.decode-method-to-our-strings-as-a-temp.patch + # These are not yet upstream Patch906: rpm-4.7.1-geode-i686.patch # Probably to be upstreamed in slightly different form @@ -570,6 +576,12 @@ make check || (cat tests/rpmtests.log; exit 1) %doc doc/librpm/html/* %changelog +* Wed Apr 10 2019 Panu Matilainen - 4.14.2.1-6 +- Unbreak Python 3 API by returning string data as surrogate-escaped utf-8 + string objects instead of bytes (#1693751) +- As a temporary crutch, monkey-patch a .decode() method to returned strings + to give users time to migrate from the long-standing broken behavior + * Wed Apr 10 2019 Panu Matilainen - 4.14.2.1-5 - Generate minidebug for PIE executables on file >= 5.33 too - Backport find-debuginfo --g-libs option for glibc's benefit (#1661512)