738010
From 2428320b2157a0fcc0f35bea12584286ebd02aab Mon Sep 17 00:00:00 2001
738010
From: Eduardo Otubo <otubo@redhat.com>
738010
Date: Wed, 15 May 2019 12:15:25 +0200
738010
Subject: [PATCH 1/5] Azure: Ensure platform random_seed is always serializable
738010
 as JSON.
738010
738010
RH-Author: Eduardo Otubo <otubo@redhat.com>
738010
Message-id: <20190515121529.11191-2-otubo@redhat.com>
738010
Patchwork-id: 87881
738010
O-Subject: [rhel-7 cloud-init PATCHv2 1/5] Azure: Ensure platform random_seed is always serializable as JSON.
738010
Bugzilla: 1687565
738010
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
738010
RH-Acked-by: Mohammed Gamal <mgamal@redhat.com>
738010
738010
From: "Jason Zions (MSFT)" <jasonzio@microsoft.com>
738010
738010
BZ: 1687565
738010
BRANCH: rhel7/master-18.5
738010
UPSTREAM: 0dc3a77f
738010
BREW: 21696239
738010
738010
commit 0dc3a77f41f4544e4cb5a41637af7693410d4cdf
738010
Author: Jason Zions (MSFT) <jasonzio@microsoft.com>
738010
Date:   Tue Mar 26 18:53:50 2019 +0000
738010
738010
    Azure: Ensure platform random_seed is always serializable as JSON.
738010
738010
    The Azure platform surfaces random bytes into /sys via Hyper-V.
738010
    Python 2.7 json.dump() raises an exception if asked to convert
738010
    a str with non-character content, and python 3.0 json.dump()
738010
    won't serialize a "bytes" value. As a result, c-i instance
738010
    data is often not written by Azure, making reboots slower (c-i
738010
    has to repeat work).
738010
738010
    The random data is base64-encoded and then decoded into a string
738010
    (str or unicode depending on the version of Python in use). The
738010
    base64 string has just as many bits of entropy, so we're not
738010
    throwing away useful "information", but we can be certain
738010
    json.dump() will correctly serialize the bits.
738010
738010
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
738010
738010
Conflicts:
738010
    tests/unittests/test_datasource/test_azure.py
738010
    Skipped the commit edf052c as it removes support for python-2.6
738010
738010
Signed-off-by: Eduardo Otubo <otubo@redhat.com>
738010
---
738010
 cloudinit/sources/DataSourceAzure.py          | 24 +++++++++++++++++++-----
738010
 tests/data/azure/non_unicode_random_string    |  1 +
738010
 tests/unittests/test_datasource/test_azure.py | 24 ++++++++++++++++++++++--
738010
 3 files changed, 42 insertions(+), 7 deletions(-)
738010
 create mode 100644 tests/data/azure/non_unicode_random_string
738010
738010
diff --git a/cloudinit/sources/DataSourceAzure.py b/cloudinit/sources/DataSourceAzure.py
738010
index 2062ca5..a768b2c 100644
738010
--- a/cloudinit/sources/DataSourceAzure.py
738010
+++ b/cloudinit/sources/DataSourceAzure.py
738010
@@ -54,6 +54,7 @@ REPROVISION_MARKER_FILE = "/var/lib/cloud/data/poll_imds"
738010
 REPORTED_READY_MARKER_FILE = "/var/lib/cloud/data/reported_ready"
738010
 AGENT_SEED_DIR = '/var/lib/waagent'
738010
 IMDS_URL = "http://169.254.169.254/metadata/"
738010
+PLATFORM_ENTROPY_SOURCE = "/sys/firmware/acpi/tables/OEM0"
738010
 
738010
 # List of static scripts and network config artifacts created by
738010
 # stock ubuntu suported images.
738010
@@ -195,6 +196,8 @@ if util.is_FreeBSD():
738010
         RESOURCE_DISK_PATH = "/dev/" + res_disk
738010
     else:
738010
         LOG.debug("resource disk is None")
738010
+    # TODO Find where platform entropy data is surfaced
738010
+    PLATFORM_ENTROPY_SOURCE = None
738010
 
738010
 BUILTIN_DS_CONFIG = {
738010
     'agent_command': AGENT_START_BUILTIN,
738010
@@ -1100,16 +1103,27 @@ def _check_freebsd_cdrom(cdrom_dev):
738010
     return False
738010
 
738010
 
738010
-def _get_random_seed():
738010
+def _get_random_seed(source=PLATFORM_ENTROPY_SOURCE):
738010
     """Return content random seed file if available, otherwise,
738010
        return None."""
738010
     # azure / hyper-v provides random data here
738010
-    # TODO. find the seed on FreeBSD platform
738010
     # now update ds_cfg to reflect contents pass in config
738010
-    if util.is_FreeBSD():
738010
+    if source is None:
738010
         return None
738010
-    return util.load_file("/sys/firmware/acpi/tables/OEM0",
738010
-                          quiet=True, decode=False)
738010
+    seed = util.load_file(source, quiet=True, decode=False)
738010
+
738010
+    # The seed generally contains non-Unicode characters. load_file puts
738010
+    # them into a str (in python 2) or bytes (in python 3). In python 2,
738010
+    # bad octets in a str cause util.json_dumps() to throw an exception. In
738010
+    # python 3, bytes is a non-serializable type, and the handler load_file
738010
+    # uses applies b64 encoding *again* to handle it. The simplest solution
738010
+    # is to just b64encode the data and then decode it to a serializable
738010
+    # string. Same number of bits of entropy, just with 25% more zeroes.
738010
+    # There's no need to undo this base64-encoding when the random seed is
738010
+    # actually used in cc_seed_random.py.
738010
+    seed = base64.b64encode(seed).decode()
738010
+
738010
+    return seed
738010
 
738010
 
738010
 def list_possible_azure_ds_devs():
738010
diff --git a/tests/data/azure/non_unicode_random_string b/tests/data/azure/non_unicode_random_string
738010
new file mode 100644
738010
index 0000000..b9ecefb
738010
--- /dev/null
738010
+++ b/tests/data/azure/non_unicode_random_string
738010
@@ -0,0 +1 @@
738010
+OEM0d\x00\x00\x00\x01\x80VRTUALMICROSFT\x02\x17\x00\x06MSFT\x97\x00\x00\x00C\xb4{V\xf4X%\x061x\x90\x1c\xfen\x86\xbf~\xf5\x8c\x94&\x88\xed\x84\xf9B\xbd\xd3\xf1\xdb\xee:\xd9\x0fc\x0e\x83(\xbd\xe3'\xfc\x85,\xdf\xf4\x13\x99N\xc5\xf3Y\x1e\xe3\x0b\xa4H\x08J\xb9\xdcdb$
738010
\ No newline at end of file
738010
diff --git a/tests/unittests/test_datasource/test_azure.py b/tests/unittests/test_datasource/test_azure.py
738010
index 417d86a..eacf225 100644
738010
--- a/tests/unittests/test_datasource/test_azure.py
738010
+++ b/tests/unittests/test_datasource/test_azure.py
738010
@@ -7,11 +7,11 @@ from cloudinit.sources import (
738010
     UNSET, DataSourceAzure as dsaz, InvalidMetaDataException)
738010
 from cloudinit.util import (b64e, decode_binary, load_file, write_file,
738010
                             find_freebsd_part, get_path_dev_freebsd,
738010
-                            MountFailedError)
738010
+                            MountFailedError, json_dumps, load_json)
738010
 from cloudinit.version import version_string as vs
738010
 from cloudinit.tests.helpers import (
738010
     HttprettyTestCase, CiTestCase, populate_dir, mock, wrap_and_call,
738010
-    ExitStack, PY26, SkipTest)
738010
+    ExitStack, PY26, SkipTest, resourceLocation)
738010
 
738010
 import crypt
738010
 import httpretty
738010
@@ -1924,4 +1924,24 @@ class TestWBIsPlatformViable(CiTestCase):
738010
             self.logs.getvalue())
738010
 
738010
 
738010
+class TestRandomSeed(CiTestCase):
738010
+    """Test proper handling of random_seed"""
738010
+
738010
+    def test_non_ascii_seed_is_serializable(self):
738010
+        """Pass if a random string from the Azure infrastructure which
738010
+        contains at least one non-Unicode character can be converted to/from
738010
+        JSON without alteration and without throwing an exception.
738010
+        """
738010
+        path = resourceLocation("azure/non_unicode_random_string")
738010
+        result = dsaz._get_random_seed(path)
738010
+
738010
+        obj = {'seed': result}
738010
+        try:
738010
+            serialized = json_dumps(obj)
738010
+            deserialized = load_json(serialized)
738010
+        except UnicodeDecodeError:
738010
+            self.fail("Non-serializable random seed returned")
738010
+
738010
+        self.assertEqual(deserialized['seed'], result)
738010
+
738010
 # vi: ts=4 expandtab
738010
-- 
738010
1.8.3.1
738010