Blob Blame History Raw
From 3303a8a621467dd7be67cec211fe417e9c81946f Mon Sep 17 00:00:00 2001
From: Simon McVittie <smcv@debian.org>
Date: Fri, 27 Apr 2018 11:09:07 +0100
Subject: [PATCH] avahi-python: Encode unicode strings as UTF-8
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Previously, we would effectively encode anything representable in
Latin-1 as Latin-1, and crash on anything not representable in Latin-1:

>>> import avahi
>>> avahi.string_to_byte_array(u'©')
[dbus.Byte(169)]
>>> avahi.string_to_byte_array(u'\ufeff')
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
  File "/usr/lib/python2.7/dist-packages/avahi/__init__.py", line 94, in string_to_byte_array
    r.append(dbus.Byte(ord(c)))
ValueError: Integer outside range 0-255

This is particularly important for Python 3, where the str type
is a Unicode string.

The b'' syntax for bytestrings is supported since at least Python 2.7.

These functions now accept either Unicode strings (Python 2 unicode,
Python 3 str), which are encoded in UTF-8, or bytestrings
(Python 2 str, Python 3 bytes) which are taken as-is.

Signed-off-by: Simon McVittie <smcv@debian.org>
(cherry picked from commit 169e85dbc13dcaae8a699618883e512614f540b7)

Related: #1561019
---
 avahi-python/avahi/__init__.py | 24 +++++++++++++++++++++---
 1 file changed, 21 insertions(+), 3 deletions(-)

diff --git a/avahi-python/avahi/__init__.py b/avahi-python/avahi/__init__.py
index 7b45029..02305b0 100644
--- a/avahi-python/avahi/__init__.py
+++ b/avahi-python/avahi/__init__.py
@@ -17,6 +17,8 @@
 
 # Some definitions matching those in avahi-common/defs.h
 
+import sys
+
 import dbus
 
 SERVER_INVALID, SERVER_REGISTERING, SERVER_RUNNING, SERVER_COLLISION, SERVER_FAILURE = range(0, 5)
@@ -66,6 +68,9 @@ DBUS_INTERFACE_HOST_NAME_RESOLVER = DBUS_NAME + ".HostNameResolver"
 DBUS_INTERFACE_SERVICE_RESOLVER = DBUS_NAME + ".ServiceResolver"
 DBUS_INTERFACE_RECORD_BROWSER = DBUS_NAME + ".RecordBrowser"
 
+if sys.version_info[0] >= 3:
+    unicode = str
+
 def byte_array_to_string(s):
     r = ""
     
@@ -86,12 +91,19 @@ def txt_array_to_string_array(t):
 
     return l
 
-
 def string_to_byte_array(s):
+    if isinstance(s, unicode):
+        s = s.encode('utf-8')
+
     r = []
 
     for c in s:
-        r.append(dbus.Byte(ord(c)))
+        if isinstance(c, int):
+            # Python 3: iterating over bytes yields ints
+            r.append(dbus.Byte(c))
+        else:
+            # Python 2: iterating over str yields str
+            r.append(dbus.Byte(ord(c)))
 
     return r
 
@@ -107,6 +119,12 @@ def dict_to_txt_array(txt_dict):
     l = []
 
     for k,v in txt_dict.items():
-        l.append(string_to_byte_array("%s=%s" % (k,v)))
+        if isinstance(k, unicode):
+            k = k.encode('utf-8')
+
+        if isinstance(v, unicode):
+            v = v.encode('utf-8')
+
+        l.append(string_to_byte_array(b"%s=%s" % (k,v)))
 
     return l
-- 
2.14.3