0b2921
From 84920f898315d09a57a3f1067433eaeb7de5e830 Mon Sep 17 00:00:00 2001
0b2921
Message-Id: <84920f898315d09a57a3f1067433eaeb7de5e830.1554884444.git.pmatilai@redhat.com>
0b2921
From: Panu Matilainen <pmatilai@redhat.com>
0b2921
Date: Fri, 22 Feb 2019 19:44:16 +0200
0b2921
Subject: [PATCH] In Python 3, return all our string data as surrogate-escaped
0b2921
 utf-8 strings
0b2921
0b2921
In the almost ten years of rpm sort of supporting Python 3 bindings, quite
0b2921
obviously nobody has actually tried to use them. There's a major mismatch
0b2921
between what the header API outputs (bytes) and what all the other APIs
0b2921
accept (strings), resulting in hysterical TypeErrors all over the place,
0b2921
including but not limited to labelCompare() (RhBug:1631292). Also a huge
0b2921
number of other places have been returning strings and silently assuming
0b2921
utf-8 through use of Py_BuildValue("s", ...), which will just irrevocably
0b2921
fail when non-utf8 data is encountered.
0b2921
0b2921
The politically Python 3-correct solution would be declaring all our data
0b2921
as bytes with unspecified encoding - that's exactly what it historically is.
0b2921
However doing so would by definition break every single rpm script people
0b2921
have developed on Python 2. And when 99% of the rpm content in the world
0b2921
actually is utf-8 encoded even if it doesn't say so (and in recent times
0b2921
packages even advertise themselves as utf-8 encoded), the bytes-only route
0b2921
seems a wee bit too draconian, even to this grumpy old fella.
0b2921
0b2921
Instead, route all our string returns through a single helper macro
0b2921
which on Python 2 just does what we always did, but in Python 3 converts
0b2921
the data to surrogate-escaped utf-8 strings. This makes stuff "just work"
0b2921
out of the box pretty much everywhere even with Python 3 (including
0b2921
our own test-suite!), while still allowing to handle the non-utf8 case.
0b2921
Handling the non-utf8 case is a bit more uglier but still possible,
0b2921
which is exactly how you want corner-cases to be. There might be some
0b2921
uses for retrieving raw byte data from the header, but worrying about
0b2921
such an API is a case for some other rainy day, for now we mostly only
0b2921
care that stuff works again.
0b2921
0b2921
Also add test-cases for mixed data source labelCompare() and
0b2921
non-utf8 insert to + retrieve from header.
0b2921
---
0b2921
 python/header-py.c     |  2 +-
0b2921
 python/rpmds-py.c      |  8 ++++----
0b2921
 python/rpmfd-py.c      |  6 +++---
0b2921
 python/rpmfi-py.c      | 24 ++++++++++++------------
0b2921
 python/rpmfiles-py.c   | 26 +++++++++++++-------------
0b2921
 python/rpmkeyring-py.c |  2 +-
0b2921
 python/rpmmacro-py.c   |  2 +-
0b2921
 python/rpmmodule.c     |  2 +-
0b2921
 python/rpmps-py.c      |  8 ++++----
0b2921
 python/rpmstrpool-py.c |  2 +-
0b2921
 python/rpmsystem-py.h  |  7 +++++++
0b2921
 python/rpmtd-py.c      |  2 +-
0b2921
 python/rpmte-py.c      | 16 ++++++++--------
0b2921
 python/rpmts-py.c      | 11 ++++++-----
0b2921
 python/spec-py.c       |  8 ++++----
0b2921
 tests/local.at         |  1 +
0b2921
 tests/rpmpython.at     | 34 ++++++++++++++++++++++++++++++++++
0b2921
 17 files changed, 102 insertions(+), 59 deletions(-)
0b2921
0b2921
diff --git a/python/header-py.c b/python/header-py.c
0b2921
index c9d54e869..93c241cb7 100644
0b2921
--- a/python/header-py.c
0b2921
+++ b/python/header-py.c
0b2921
@@ -231,7 +231,7 @@ static PyObject * hdrFormat(hdrObject * s, PyObject * args, PyObject * kwds)
0b2921
 	return NULL;
0b2921
     }
0b2921
 
0b2921
-    result = Py_BuildValue("s", r);
0b2921
+    result = utf8FromString(r);
0b2921
     free(r);
0b2921
 
0b2921
     return result;
0b2921
diff --git a/python/rpmds-py.c b/python/rpmds-py.c
0b2921
index 39b26628e..ecc9af9d5 100644
0b2921
--- a/python/rpmds-py.c
0b2921
+++ b/python/rpmds-py.c
0b2921
@@ -31,19 +31,19 @@ rpmds_Ix(rpmdsObject * s)
0b2921
 static PyObject *
0b2921
 rpmds_DNEVR(rpmdsObject * s)
0b2921
 {
0b2921
-    return Py_BuildValue("s", rpmdsDNEVR(s->ds));
0b2921
+    return utf8FromString(rpmdsDNEVR(s->ds));
0b2921
 }
0b2921
 
0b2921
 static PyObject *
0b2921
 rpmds_N(rpmdsObject * s)
0b2921
 {
0b2921
-    return Py_BuildValue("s", rpmdsN(s->ds));
0b2921
+    return utf8FromString(rpmdsN(s->ds));
0b2921
 }
0b2921
 
0b2921
 static PyObject *
0b2921
 rpmds_EVR(rpmdsObject * s)
0b2921
 {
0b2921
-    return Py_BuildValue("s", rpmdsEVR(s->ds));
0b2921
+    return utf8FromString(rpmdsEVR(s->ds));
0b2921
 }
0b2921
 
0b2921
 static PyObject *
0b2921
@@ -261,7 +261,7 @@ rpmds_subscript(rpmdsObject * s, PyObject * key)
0b2921
 
0b2921
     ix = (int) PyInt_AsLong(key);
0b2921
     rpmdsSetIx(s->ds, ix);
0b2921
-    return Py_BuildValue("s", rpmdsDNEVR(s->ds));
0b2921
+    return utf8FromString(rpmdsDNEVR(s->ds));
0b2921
 }
0b2921
 
0b2921
 static PyMappingMethods rpmds_as_mapping = {
0b2921
diff --git a/python/rpmfd-py.c b/python/rpmfd-py.c
0b2921
index 85fb0cd24..4b05cce5f 100644
0b2921
--- a/python/rpmfd-py.c
0b2921
+++ b/python/rpmfd-py.c
0b2921
@@ -327,17 +327,17 @@ static PyObject *rpmfd_get_closed(rpmfdObject *s)
0b2921
 static PyObject *rpmfd_get_name(rpmfdObject *s)
0b2921
 {
0b2921
     /* XXX: rpm returns non-paths with [mumble], python files use <mumble> */
0b2921
-    return Py_BuildValue("s", Fdescr(s->fd));
0b2921
+    return utf8FromString(Fdescr(s->fd));
0b2921
 }
0b2921
 
0b2921
 static PyObject *rpmfd_get_mode(rpmfdObject *s)
0b2921
 {
0b2921
-    return Py_BuildValue("s", s->mode);
0b2921
+    return utf8FromString(s->mode);
0b2921
 }
0b2921
 
0b2921
 static PyObject *rpmfd_get_flags(rpmfdObject *s)
0b2921
 {
0b2921
-    return Py_BuildValue("s", s->flags);
0b2921
+    return utf8FromString(s->flags);
0b2921
 }
0b2921
 
0b2921
 static PyGetSetDef rpmfd_getseters[] = {
0b2921
diff --git a/python/rpmfi-py.c b/python/rpmfi-py.c
0b2921
index 8d2f926d0..db405c231 100644
0b2921
--- a/python/rpmfi-py.c
0b2921
+++ b/python/rpmfi-py.c
0b2921
@@ -41,19 +41,19 @@ rpmfi_DX(rpmfiObject * s, PyObject * unused)
0b2921
 static PyObject *
0b2921
 rpmfi_BN(rpmfiObject * s, PyObject * unused)
0b2921
 {
0b2921
-    return Py_BuildValue("s", rpmfiBN(s->fi));
0b2921
+    return utf8FromString(rpmfiBN(s->fi));
0b2921
 }
0b2921
 
0b2921
 static PyObject *
0b2921
 rpmfi_DN(rpmfiObject * s, PyObject * unused)
0b2921
 {
0b2921
-    return Py_BuildValue("s", rpmfiDN(s->fi));
0b2921
+    return utf8FromString(rpmfiDN(s->fi));
0b2921
 }
0b2921
 
0b2921
 static PyObject *
0b2921
 rpmfi_FN(rpmfiObject * s, PyObject * unused)
0b2921
 {
0b2921
-    return Py_BuildValue("s", rpmfiFN(s->fi));
0b2921
+    return utf8FromString(rpmfiFN(s->fi));
0b2921
 }
0b2921
 
0b2921
 static PyObject *
0b2921
@@ -98,7 +98,7 @@ rpmfi_Digest(rpmfiObject * s, PyObject * unused)
0b2921
 {
0b2921
     char *digest = rpmfiFDigestHex(s->fi, NULL);
0b2921
     if (digest) {
0b2921
-	PyObject *dig = Py_BuildValue("s", digest);
0b2921
+	PyObject *dig = utf8FromString(digest);
0b2921
 	free(digest);
0b2921
 	return dig;
0b2921
     } else {
0b2921
@@ -109,7 +109,7 @@ rpmfi_Digest(rpmfiObject * s, PyObject * unused)
0b2921
 static PyObject *
0b2921
 rpmfi_FLink(rpmfiObject * s, PyObject * unused)
0b2921
 {
0b2921
-    return Py_BuildValue("s", rpmfiFLink(s->fi));
0b2921
+    return utf8FromString(rpmfiFLink(s->fi));
0b2921
 }
0b2921
 
0b2921
 static PyObject *
0b2921
@@ -133,13 +133,13 @@ rpmfi_FMtime(rpmfiObject * s, PyObject * unused)
0b2921
 static PyObject *
0b2921
 rpmfi_FUser(rpmfiObject * s, PyObject * unused)
0b2921
 {
0b2921
-    return Py_BuildValue("s", rpmfiFUser(s->fi));
0b2921
+    return utf8FromString(rpmfiFUser(s->fi));
0b2921
 }
0b2921
 
0b2921
 static PyObject *
0b2921
 rpmfi_FGroup(rpmfiObject * s, PyObject * unused)
0b2921
 {
0b2921
-    return Py_BuildValue("s", rpmfiFGroup(s->fi));
0b2921
+    return utf8FromString(rpmfiFGroup(s->fi));
0b2921
 }
0b2921
 
0b2921
 static PyObject *
0b2921
@@ -155,7 +155,7 @@ rpmfi_FClass(rpmfiObject * s, PyObject * unused)
0b2921
 
0b2921
     if ((FClass = rpmfiFClass(s->fi)) == NULL)
0b2921
 	FClass = "";
0b2921
-    return Py_BuildValue("s", FClass);
0b2921
+    return utf8FromString(FClass);
0b2921
 }
0b2921
 
0b2921
 static PyObject *
0b2921
@@ -208,7 +208,7 @@ rpmfi_iternext(rpmfiObject * s)
0b2921
 	    Py_INCREF(Py_None);
0b2921
 	    PyTuple_SET_ITEM(result, 0, Py_None);
0b2921
 	} else
0b2921
-	    PyTuple_SET_ITEM(result,  0, Py_BuildValue("s", FN));
0b2921
+	    PyTuple_SET_ITEM(result,  0, utf8FromString(FN));
0b2921
 	PyTuple_SET_ITEM(result,  1, PyLong_FromLongLong(FSize));
0b2921
 	PyTuple_SET_ITEM(result,  2, PyInt_FromLong(FMode));
0b2921
 	PyTuple_SET_ITEM(result,  3, PyInt_FromLong(FMtime));
0b2921
@@ -222,12 +222,12 @@ rpmfi_iternext(rpmfiObject * s)
0b2921
 	    Py_INCREF(Py_None);
0b2921
 	    PyTuple_SET_ITEM(result, 10, Py_None);
0b2921
 	} else
0b2921
-	    PyTuple_SET_ITEM(result, 10, Py_BuildValue("s", FUser));
0b2921
+	    PyTuple_SET_ITEM(result, 10, utf8FromString(FUser));
0b2921
 	if (FGroup == NULL) {
0b2921
 	    Py_INCREF(Py_None);
0b2921
 	    PyTuple_SET_ITEM(result, 11, Py_None);
0b2921
 	} else
0b2921
-	    PyTuple_SET_ITEM(result, 11, Py_BuildValue("s", FGroup));
0b2921
+	    PyTuple_SET_ITEM(result, 11, utf8FromString(FGroup));
0b2921
 	PyTuple_SET_ITEM(result, 12, rpmfi_Digest(s, NULL));
0b2921
 
0b2921
     } else
0b2921
@@ -313,7 +313,7 @@ rpmfi_subscript(rpmfiObject * s, PyObject * key)
0b2921
 
0b2921
     ix = (int) PyInt_AsLong(key);
0b2921
     rpmfiSetFX(s->fi, ix);
0b2921
-    return Py_BuildValue("s", rpmfiFN(s->fi));
0b2921
+    return utf8FromString(rpmfiFN(s->fi));
0b2921
 }
0b2921
 
0b2921
 static PyMappingMethods rpmfi_as_mapping = {
0b2921
diff --git a/python/rpmfiles-py.c b/python/rpmfiles-py.c
0b2921
index bc07dbeaf..557246cae 100644
0b2921
--- a/python/rpmfiles-py.c
0b2921
+++ b/python/rpmfiles-py.c
0b2921
@@ -41,37 +41,37 @@ static PyObject *rpmfile_dx(rpmfileObject *s)
0b2921
 static PyObject *rpmfile_name(rpmfileObject *s)
0b2921
 {
0b2921
     char * fn = rpmfilesFN(s->files, s->ix);
0b2921
-    PyObject *o = Py_BuildValue("s", fn);
0b2921
+    PyObject *o = utf8FromString(fn);
0b2921
     free(fn);
0b2921
     return o;
0b2921
 }
0b2921
 
0b2921
 static PyObject *rpmfile_basename(rpmfileObject *s)
0b2921
 {
0b2921
-    return Py_BuildValue("s", rpmfilesBN(s->files, s->ix));
0b2921
+    return utf8FromString(rpmfilesBN(s->files, s->ix));
0b2921
 }
0b2921
 
0b2921
 static PyObject *rpmfile_dirname(rpmfileObject *s)
0b2921
 {
0b2921
-    return Py_BuildValue("s", rpmfilesDN(s->files, rpmfilesDI(s->files, s->ix)));
0b2921
+    return utf8FromString(rpmfilesDN(s->files, rpmfilesDI(s->files, s->ix)));
0b2921
 }
0b2921
 
0b2921
 static PyObject *rpmfile_orig_name(rpmfileObject *s)
0b2921
 {
0b2921
     char * fn = rpmfilesOFN(s->files, s->ix);
0b2921
-    PyObject *o = Py_BuildValue("s", fn);
0b2921
+    PyObject *o = utf8FromString(fn);
0b2921
     free(fn);
0b2921
     return o;
0b2921
 }
0b2921
 
0b2921
 static PyObject *rpmfile_orig_basename(rpmfileObject *s)
0b2921
 {
0b2921
-    return Py_BuildValue("s", rpmfilesOBN(s->files, s->ix));
0b2921
+    return utf8FromString(rpmfilesOBN(s->files, s->ix));
0b2921
 }
0b2921
 
0b2921
 static PyObject *rpmfile_orig_dirname(rpmfileObject *s)
0b2921
 {
0b2921
-    return Py_BuildValue("s", rpmfilesODN(s->files, rpmfilesODI(s->files, s->ix)));
0b2921
+    return utf8FromString(rpmfilesODN(s->files, rpmfilesODI(s->files, s->ix)));
0b2921
 }
0b2921
 static PyObject *rpmfile_mode(rpmfileObject *s)
0b2921
 {
0b2921
@@ -105,17 +105,17 @@ static PyObject *rpmfile_nlink(rpmfileObject *s)
0b2921
 
0b2921
 static PyObject *rpmfile_linkto(rpmfileObject *s)
0b2921
 {
0b2921
-    return Py_BuildValue("s", rpmfilesFLink(s->files, s->ix));
0b2921
+    return utf8FromString(rpmfilesFLink(s->files, s->ix));
0b2921
 }
0b2921
 
0b2921
 static PyObject *rpmfile_user(rpmfileObject *s)
0b2921
 {
0b2921
-    return Py_BuildValue("s", rpmfilesFUser(s->files, s->ix));
0b2921
+    return utf8FromString(rpmfilesFUser(s->files, s->ix));
0b2921
 }
0b2921
 
0b2921
 static PyObject *rpmfile_group(rpmfileObject *s)
0b2921
 {
0b2921
-    return Py_BuildValue("s", rpmfilesFGroup(s->files, s->ix));
0b2921
+    return utf8FromString(rpmfilesFGroup(s->files, s->ix));
0b2921
 }
0b2921
 
0b2921
 static PyObject *rpmfile_fflags(rpmfileObject *s)
0b2921
@@ -145,7 +145,7 @@ static PyObject *rpmfile_digest(rpmfileObject *s)
0b2921
 						  NULL, &diglen);
0b2921
     if (digest) {
0b2921
 	char * hex = pgpHexStr(digest, diglen);
0b2921
-	PyObject *o = Py_BuildValue("s", hex);
0b2921
+	PyObject *o = utf8FromString(hex);
0b2921
 	free(hex);
0b2921
 	return o;
0b2921
     }
0b2921
@@ -154,17 +154,17 @@ static PyObject *rpmfile_digest(rpmfileObject *s)
0b2921
 
0b2921
 static PyObject *rpmfile_class(rpmfileObject *s)
0b2921
 {
0b2921
-    return Py_BuildValue("s", rpmfilesFClass(s->files, s->ix));
0b2921
+    return utf8FromString(rpmfilesFClass(s->files, s->ix));
0b2921
 }
0b2921
 
0b2921
 static PyObject *rpmfile_caps(rpmfileObject *s)
0b2921
 {
0b2921
-    return Py_BuildValue("s", rpmfilesFCaps(s->files, s->ix));
0b2921
+    return utf8FromString(rpmfilesFCaps(s->files, s->ix));
0b2921
 }
0b2921
 
0b2921
 static PyObject *rpmfile_langs(rpmfileObject *s)
0b2921
 {
0b2921
-    return Py_BuildValue("s", rpmfilesFLangs(s->files, s->ix));
0b2921
+    return utf8FromString(rpmfilesFLangs(s->files, s->ix));
0b2921
 }
0b2921
 
0b2921
 static PyObject *rpmfile_links(rpmfileObject *s)
0b2921
diff --git a/python/rpmkeyring-py.c b/python/rpmkeyring-py.c
0b2921
index d5f131e42..8968e0513 100644
0b2921
--- a/python/rpmkeyring-py.c
0b2921
+++ b/python/rpmkeyring-py.c
0b2921
@@ -38,7 +38,7 @@ static PyObject *rpmPubkey_new(PyTypeObject *subtype,
0b2921
 static PyObject * rpmPubkey_Base64(rpmPubkeyObject *s)
0b2921
 {
0b2921
     char *b64 = rpmPubkeyBase64(s->pubkey);
0b2921
-    PyObject *res = Py_BuildValue("s", b64);
0b2921
+    PyObject *res = utf8FromString(b64);
0b2921
     free(b64);
0b2921
     return res;
0b2921
 }
0b2921
diff --git a/python/rpmmacro-py.c b/python/rpmmacro-py.c
0b2921
index 3cb1a51f5..d8a365547 100644
0b2921
--- a/python/rpmmacro-py.c
0b2921
+++ b/python/rpmmacro-py.c
0b2921
@@ -52,7 +52,7 @@ rpmmacro_ExpandMacro(PyObject * self, PyObject * args, PyObject * kwds)
0b2921
 	if (rpmExpandMacros(NULL, macro, &str, 0) < 0)
0b2921
 	    PyErr_SetString(pyrpmError, "error expanding macro");
0b2921
 	else
0b2921
-	    res = Py_BuildValue("s", str);
0b2921
+	    res = utf8FromString(str);
0b2921
 	free(str);
0b2921
     }
0b2921
     return res;
0b2921
diff --git a/python/rpmmodule.c b/python/rpmmodule.c
0b2921
index 3faad23c7..05032edc7 100644
0b2921
--- a/python/rpmmodule.c
0b2921
+++ b/python/rpmmodule.c
0b2921
@@ -237,7 +237,7 @@ static void addRpmTags(PyObject *module)
0b2921
 
0b2921
 	PyModule_AddIntConstant(module, tagname, tagval);
0b2921
 	pyval = PyInt_FromLong(tagval);
0b2921
-	pyname = Py_BuildValue("s", shortname);
0b2921
+	pyname = utf8FromString(shortname);
0b2921
 	PyDict_SetItem(dict, pyval, pyname);
0b2921
 	Py_DECREF(pyval);
0b2921
 	Py_DECREF(pyname);
0b2921
diff --git a/python/rpmps-py.c b/python/rpmps-py.c
0b2921
index bdc899a60..902b2ae63 100644
0b2921
--- a/python/rpmps-py.c
0b2921
+++ b/python/rpmps-py.c
0b2921
@@ -18,12 +18,12 @@ static PyObject *rpmprob_get_type(rpmProblemObject *s, void *closure)
0b2921
 
0b2921
 static PyObject *rpmprob_get_pkgnevr(rpmProblemObject *s, void *closure)
0b2921
 {
0b2921
-    return Py_BuildValue("s", rpmProblemGetPkgNEVR(s->prob));
0b2921
+    return utf8FromString(rpmProblemGetPkgNEVR(s->prob));
0b2921
 }
0b2921
 
0b2921
 static PyObject *rpmprob_get_altnevr(rpmProblemObject *s, void *closure)
0b2921
 {
0b2921
-    return Py_BuildValue("s", rpmProblemGetAltNEVR(s->prob));
0b2921
+    return utf8FromString(rpmProblemGetAltNEVR(s->prob));
0b2921
 }
0b2921
 
0b2921
 static PyObject *rpmprob_get_key(rpmProblemObject *s, void *closure)
0b2921
@@ -38,7 +38,7 @@ static PyObject *rpmprob_get_key(rpmProblemObject *s, void *closure)
0b2921
 
0b2921
 static PyObject *rpmprob_get_str(rpmProblemObject *s, void *closure)
0b2921
 {
0b2921
-    return Py_BuildValue("s", rpmProblemGetStr(s->prob));
0b2921
+    return utf8FromString(rpmProblemGetStr(s->prob));
0b2921
 }
0b2921
 
0b2921
 static PyObject *rpmprob_get_num(rpmProblemObject *s, void *closure)
0b2921
@@ -59,7 +59,7 @@ static PyGetSetDef rpmprob_getseters[] = {
0b2921
 static PyObject *rpmprob_str(rpmProblemObject *s)
0b2921
 {
0b2921
     char *str = rpmProblemString(s->prob);
0b2921
-    PyObject *res = Py_BuildValue("s", str);
0b2921
+    PyObject *res = utf8FromString(str);
0b2921
     free(str);
0b2921
     return res;
0b2921
 }
0b2921
diff --git a/python/rpmstrpool-py.c b/python/rpmstrpool-py.c
0b2921
index 356bd1de5..a56e2b540 100644
0b2921
--- a/python/rpmstrpool-py.c
0b2921
+++ b/python/rpmstrpool-py.c
0b2921
@@ -44,7 +44,7 @@ static PyObject *strpool_id2str(rpmstrPoolObject *s, PyObject *item)
0b2921
 	const char *str = rpmstrPoolStr(s->pool, id);
0b2921
 
0b2921
 	if (str)
0b2921
-	    ret = PyBytes_FromString(str);
0b2921
+	    ret = utf8FromString(str);
0b2921
 	else 
0b2921
 	    PyErr_SetObject(PyExc_KeyError, item);
0b2921
     }
0b2921
diff --git a/python/rpmsystem-py.h b/python/rpmsystem-py.h
0b2921
index 955d60cd3..87c750571 100644
0b2921
--- a/python/rpmsystem-py.h
0b2921
+++ b/python/rpmsystem-py.h
0b2921
@@ -19,4 +19,11 @@
0b2921
 #define PyInt_AsSsize_t PyLong_AsSsize_t
0b2921
 #endif
0b2921
 
0b2921
+/* In Python 3, we return all strings as surrogate-escaped utf-8 */
0b2921
+#if PY_MAJOR_VERSION >= 3
0b2921
+#define utf8FromString(_s) PyUnicode_DecodeUTF8(_s, strlen(_s), "surrogateescape")
0b2921
+#else
0b2921
+#define utf8FromString(_s) PyBytes_FromString(_s)
0b2921
+#endif
0b2921
+
0b2921
 #endif	/* H_SYSTEM_PYTHON */
0b2921
diff --git a/python/rpmtd-py.c b/python/rpmtd-py.c
0b2921
index 247c7502a..23ca10517 100644
0b2921
--- a/python/rpmtd-py.c
0b2921
+++ b/python/rpmtd-py.c
0b2921
@@ -17,7 +17,7 @@ PyObject * rpmtd_ItemAsPyobj(rpmtd td, rpmTagClass tclass)
0b2921
 
0b2921
     switch (tclass) {
0b2921
     case RPM_STRING_CLASS:
0b2921
-	res = PyBytes_FromString(rpmtdGetString(td));
0b2921
+	res = utf8FromString(rpmtdGetString(td));
0b2921
 	break;
0b2921
     case RPM_NUMERIC_CLASS:
0b2921
 	res = PyLong_FromLongLong(rpmtdGetNumber(td));
0b2921
diff --git a/python/rpmte-py.c b/python/rpmte-py.c
0b2921
index 99ff2f496..2b3745754 100644
0b2921
--- a/python/rpmte-py.c
0b2921
+++ b/python/rpmte-py.c
0b2921
@@ -54,49 +54,49 @@ rpmte_TEType(rpmteObject * s, PyObject * unused)
0b2921
 static PyObject *
0b2921
 rpmte_N(rpmteObject * s, PyObject * unused)
0b2921
 {
0b2921
-    return Py_BuildValue("s", rpmteN(s->te));
0b2921
+    return utf8FromString(rpmteN(s->te));
0b2921
 }
0b2921
 
0b2921
 static PyObject *
0b2921
 rpmte_E(rpmteObject * s, PyObject * unused)
0b2921
 {
0b2921
-    return Py_BuildValue("s", rpmteE(s->te));
0b2921
+    return utf8FromString(rpmteE(s->te));
0b2921
 }
0b2921
 
0b2921
 static PyObject *
0b2921
 rpmte_V(rpmteObject * s, PyObject * unused)
0b2921
 {
0b2921
-    return Py_BuildValue("s", rpmteV(s->te));
0b2921
+    return utf8FromString(rpmteV(s->te));
0b2921
 }
0b2921
 
0b2921
 static PyObject *
0b2921
 rpmte_R(rpmteObject * s, PyObject * unused)
0b2921
 {
0b2921
-    return Py_BuildValue("s", rpmteR(s->te));
0b2921
+    return utf8FromString(rpmteR(s->te));
0b2921
 }
0b2921
 
0b2921
 static PyObject *
0b2921
 rpmte_A(rpmteObject * s, PyObject * unused)
0b2921
 {
0b2921
-    return Py_BuildValue("s", rpmteA(s->te));
0b2921
+    return utf8FromString(rpmteA(s->te));
0b2921
 }
0b2921
 
0b2921
 static PyObject *
0b2921
 rpmte_O(rpmteObject * s, PyObject * unused)
0b2921
 {
0b2921
-    return Py_BuildValue("s", rpmteO(s->te));
0b2921
+    return utf8FromString(rpmteO(s->te));
0b2921
 }
0b2921
 
0b2921
 static PyObject *
0b2921
 rpmte_NEVR(rpmteObject * s, PyObject * unused)
0b2921
 {
0b2921
-    return Py_BuildValue("s", rpmteNEVR(s->te));
0b2921
+    return utf8FromString(rpmteNEVR(s->te));
0b2921
 }
0b2921
 
0b2921
 static PyObject *
0b2921
 rpmte_NEVRA(rpmteObject * s, PyObject * unused)
0b2921
 {
0b2921
-    return Py_BuildValue("s", rpmteNEVRA(s->te));
0b2921
+    return utf8FromString(rpmteNEVRA(s->te));
0b2921
 }
0b2921
 
0b2921
 static PyObject *
0b2921
diff --git a/python/rpmts-py.c b/python/rpmts-py.c
0b2921
index 1ddfc9a1e..96e3bb28e 100644
0b2921
--- a/python/rpmts-py.c
0b2921
+++ b/python/rpmts-py.c
0b2921
@@ -230,8 +230,9 @@ rpmts_SolveCallback(rpmts ts, rpmds ds, const void * data)
0b2921
 
0b2921
     PyEval_RestoreThread(cbInfo->_save);
0b2921
 
0b2921
-    args = Py_BuildValue("(Oissi)", cbInfo->tso,
0b2921
-		rpmdsTagN(ds), rpmdsN(ds), rpmdsEVR(ds), rpmdsFlags(ds));
0b2921
+    args = Py_BuildValue("(OiNNi)", cbInfo->tso,
0b2921
+		rpmdsTagN(ds), utf8FromString(rpmdsN(ds)),
0b2921
+		utf8FromString(rpmdsEVR(ds)), rpmdsFlags(ds));
0b2921
     result = PyEval_CallObject(cbInfo->cb, args);
0b2921
     Py_DECREF(args);
0b2921
 
0b2921
@@ -409,7 +410,7 @@ rpmts_HdrCheck(rpmtsObject * s, PyObject *obj)
0b2921
     rpmrc = headerCheck(s->ts, uh, uc, &msg;;
0b2921
     Py_END_ALLOW_THREADS;
0b2921
 
0b2921
-    return Py_BuildValue("(is)", rpmrc, msg);
0b2921
+    return Py_BuildValue("(iN)", rpmrc, utf8FromString(msg));
0b2921
 }
0b2921
 
0b2921
 static PyObject *
0b2921
@@ -500,7 +501,7 @@ rpmtsCallback(const void * hd, const rpmCallbackType what,
0b2921
     /* Synthesize a python object for callback (if necessary). */
0b2921
     if (pkgObj == NULL) {
0b2921
 	if (h) {
0b2921
-	    pkgObj = Py_BuildValue("s", headerGetString(h, RPMTAG_NAME));
0b2921
+	    pkgObj = utf8FromString(headerGetString(h, RPMTAG_NAME));
0b2921
 	} else {
0b2921
 	    pkgObj = Py_None;
0b2921
 	    Py_INCREF(pkgObj);
0b2921
@@ -845,7 +846,7 @@ static PyObject *rpmts_get_tid(rpmtsObject *s, void *closure)
0b2921
 
0b2921
 static PyObject *rpmts_get_rootDir(rpmtsObject *s, void *closure)
0b2921
 {
0b2921
-    return Py_BuildValue("s", rpmtsRootDir(s->ts));
0b2921
+    return utf8FromString(rpmtsRootDir(s->ts));
0b2921
 }
0b2921
 
0b2921
 static int rpmts_set_scriptFd(rpmtsObject *s, PyObject *value, void *closure)
0b2921
diff --git a/python/spec-py.c b/python/spec-py.c
0b2921
index 4efdbf4bf..70b796531 100644
0b2921
--- a/python/spec-py.c
0b2921
+++ b/python/spec-py.c
0b2921
@@ -57,7 +57,7 @@ static PyObject *pkgGetSection(rpmSpecPkg pkg, int section)
0b2921
 {
0b2921
     char *sect = rpmSpecPkgGetSection(pkg, section);
0b2921
     if (sect != NULL) {
0b2921
-        PyObject *ps = PyBytes_FromString(sect);
0b2921
+        PyObject *ps = utf8FromString(sect);
0b2921
         free(sect);
0b2921
         if (ps != NULL)
0b2921
             return ps;
0b2921
@@ -158,7 +158,7 @@ static PyObject * getSection(rpmSpec spec, int section)
0b2921
 {
0b2921
     const char *sect = rpmSpecGetSection(spec, section);
0b2921
     if (sect) {
0b2921
-	return Py_BuildValue("s", sect);
0b2921
+	return utf8FromString(sect);
0b2921
     }
0b2921
     Py_RETURN_NONE;
0b2921
 }
0b2921
@@ -208,8 +208,8 @@ static PyObject * spec_get_sources(specObject *s, void *closure)
0b2921
 
0b2921
     rpmSpecSrcIter iter = rpmSpecSrcIterInit(s->spec);
0b2921
     while ((source = rpmSpecSrcIterNext(iter)) != NULL) {
0b2921
-	PyObject *srcUrl = Py_BuildValue("(sii)",
0b2921
-				rpmSpecSrcFilename(source, 1),
0b2921
+	PyObject *srcUrl = Py_BuildValue("(Nii)",
0b2921
+				utf8FromString(rpmSpecSrcFilename(source, 1)),
0b2921
 				rpmSpecSrcNum(source),
0b2921
 				rpmSpecSrcFlags(source)); 
0b2921
         if (!srcUrl) {
0b2921
diff --git a/tests/local.at b/tests/local.at
0b2921
index 02ead66c9..42eef1c75 100644
0b2921
--- a/tests/local.at
0b2921
+++ b/tests/local.at
0b2921
@@ -10,6 +10,7 @@ rm -rf "${abs_builddir}"/testing`rpm --eval '%_dbpath'`/*
0b2921
 
0b2921
 m4_define([RPMPY_RUN],[[
0b2921
 cat << EOF > test.py
0b2921
+# coding=utf-8
0b2921
 import rpm, sys
0b2921
 dbpath=rpm.expandMacro('%_dbpath')
0b2921
 rpm.addMacro('_dbpath', '${abs_builddir}/testing%s' % dbpath)
0b2921
diff --git a/tests/rpmpython.at b/tests/rpmpython.at
0b2921
index ff77f868c..58f3e84a6 100644
0b2921
--- a/tests/rpmpython.at
0b2921
+++ b/tests/rpmpython.at
0b2921
@@ -106,6 +106,25 @@ None
0b2921
 'rpm.hdr' object has no attribute '__foo__']
0b2921
 )
0b2921
 
0b2921
+RPMPY_TEST([non-utf8 data in header],[
0b2921
+str = u'älämölö'
0b2921
+enc = 'iso-8859-1'
0b2921
+b = str.encode(enc)
0b2921
+h = rpm.hdr()
0b2921
+h['group'] = b
0b2921
+d = h['group']
0b2921
+try:
0b2921
+    # python 3
0b2921
+    t = bytes(d, 'utf-8', 'surrogateescape')
0b2921
+except TypeError:
0b2921
+    # python 2
0b2921
+    t = bytes(d)
0b2921
+res = t.decode(enc)
0b2921
+myprint(str == res)
0b2921
+],
0b2921
+[True]
0b2921
+)
0b2921
+
0b2921
 RPMPY_TEST([invalid header data],[
0b2921
 h1 = rpm.hdr()
0b2921
 h1['basenames'] = ['bing', 'bang', 'bong']
0b2921
@@ -125,6 +144,21 @@ for h in [h1, h2]:
0b2921
 /opt/bing,/opt/bang,/flopt/bong]
0b2921
 )
0b2921
 
0b2921
+RPMPY_TEST([labelCompare],[
0b2921
+v = '1.0'
0b2921
+r = '1'
0b2921
+e = 3
0b2921
+h = rpm.hdr()
0b2921
+h['name'] = 'testpkg'
0b2921
+h['version'] = v
0b2921
+h['release'] = r
0b2921
+h['epoch'] = e
0b2921
+myprint(rpm.labelCompare((str(h['epoch']), h['version'], h['release']),
0b2921
+			 (str(e), v, r)))
0b2921
+],
0b2921
+[0]
0b2921
+)
0b2921
+
0b2921
 RPMPY_TEST([vfyflags API],[
0b2921
 ts = rpm.ts()
0b2921
 dlv = ts.getVfyFlags()
0b2921
-- 
0b2921
2.20.1
0b2921