c914e7
From ae9b545cef4a68dfb9f9356dd27e43ff71ec26aa Mon Sep 17 00:00:00 2001
0a07cd
From: Eduardo Otubo <otubo@redhat.com>
0a07cd
Date: Wed, 29 May 2019 13:41:45 +0200
0a07cd
Subject: [PATCH 1/5] Azure: Ensure platform random_seed is always serializable
0a07cd
 as JSON.
0a07cd
0a07cd
RH-Author: Eduardo Otubo <otubo@redhat.com>
0a07cd
Message-id: <20190529134149.842-2-otubo@redhat.com>
0a07cd
Patchwork-id: 88272
0a07cd
O-Subject: [RHEL-8.0.1/RHEL-8.1.0 cloud-init PATCHv2 1/5] Azure: Ensure platform random_seed is always serializable as JSON.
c914e7
Bugzilla: 1691986
0a07cd
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
0a07cd
RH-Acked-by: Cathy Avery <cavery@redhat.com>
0a07cd
0a07cd
From: "Jason Zions (MSFT)" <jasonzio@microsoft.com>
0a07cd
commit 0dc3a77f41f4544e4cb5a41637af7693410d4cdf
0a07cd
Author: Jason Zions (MSFT) <jasonzio@microsoft.com>
0a07cd
Date:   Tue Mar 26 18:53:50 2019 +0000
0a07cd
0a07cd
    Azure: Ensure platform random_seed is always serializable as JSON.
0a07cd
0a07cd
    The Azure platform surfaces random bytes into /sys via Hyper-V.
0a07cd
    Python 2.7 json.dump() raises an exception if asked to convert
0a07cd
    a str with non-character content, and python 3.0 json.dump()
0a07cd
    won't serialize a "bytes" value. As a result, c-i instance
0a07cd
    data is often not written by Azure, making reboots slower (c-i
0a07cd
    has to repeat work).
0a07cd
0a07cd
    The random data is base64-encoded and then decoded into a string
0a07cd
    (str or unicode depending on the version of Python in use). The
0a07cd
    base64 string has just as many bits of entropy, so we're not
0a07cd
    throwing away useful "information", but we can be certain
0a07cd
    json.dump() will correctly serialize the bits.
0a07cd
0a07cd
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
0a07cd
0a07cd
Conflicts:
0a07cd
    tests/unittests/test_datasource/test_azure.py
0a07cd
    Skipped the commit edf052c as it removes support for python-2.6
0a07cd
0a07cd
Signed-off-by: Eduardo Otubo <otubo@redhat.com>
0a07cd
---
0a07cd
 cloudinit/sources/DataSourceAzure.py          | 24 +++++++++++++++++++-----
0a07cd
 tests/data/azure/non_unicode_random_string    |  1 +
0a07cd
 tests/unittests/test_datasource/test_azure.py | 24 ++++++++++++++++++++++--
0a07cd
 3 files changed, 42 insertions(+), 7 deletions(-)
0a07cd
 create mode 100644 tests/data/azure/non_unicode_random_string
0a07cd
0a07cd
diff --git a/cloudinit/sources/DataSourceAzure.py b/cloudinit/sources/DataSourceAzure.py
0a07cd
index 2062ca5..a768b2c 100644
0a07cd
--- a/cloudinit/sources/DataSourceAzure.py
0a07cd
+++ b/cloudinit/sources/DataSourceAzure.py
0a07cd
@@ -54,6 +54,7 @@ REPROVISION_MARKER_FILE = "/var/lib/cloud/data/poll_imds"
0a07cd
 REPORTED_READY_MARKER_FILE = "/var/lib/cloud/data/reported_ready"
0a07cd
 AGENT_SEED_DIR = '/var/lib/waagent'
0a07cd
 IMDS_URL = "http://169.254.169.254/metadata/"
0a07cd
+PLATFORM_ENTROPY_SOURCE = "/sys/firmware/acpi/tables/OEM0"
0a07cd
 
0a07cd
 # List of static scripts and network config artifacts created by
0a07cd
 # stock ubuntu suported images.
0a07cd
@@ -195,6 +196,8 @@ if util.is_FreeBSD():
0a07cd
         RESOURCE_DISK_PATH = "/dev/" + res_disk
0a07cd
     else:
0a07cd
         LOG.debug("resource disk is None")
0a07cd
+    # TODO Find where platform entropy data is surfaced
0a07cd
+    PLATFORM_ENTROPY_SOURCE = None
0a07cd
 
0a07cd
 BUILTIN_DS_CONFIG = {
0a07cd
     'agent_command': AGENT_START_BUILTIN,
0a07cd
@@ -1100,16 +1103,27 @@ def _check_freebsd_cdrom(cdrom_dev):
0a07cd
     return False
0a07cd
 
0a07cd
 
0a07cd
-def _get_random_seed():
0a07cd
+def _get_random_seed(source=PLATFORM_ENTROPY_SOURCE):
0a07cd
     """Return content random seed file if available, otherwise,
0a07cd
        return None."""
0a07cd
     # azure / hyper-v provides random data here
0a07cd
-    # TODO. find the seed on FreeBSD platform
0a07cd
     # now update ds_cfg to reflect contents pass in config
0a07cd
-    if util.is_FreeBSD():
0a07cd
+    if source is None:
0a07cd
         return None
0a07cd
-    return util.load_file("/sys/firmware/acpi/tables/OEM0",
0a07cd
-                          quiet=True, decode=False)
0a07cd
+    seed = util.load_file(source, quiet=True, decode=False)
0a07cd
+
0a07cd
+    # The seed generally contains non-Unicode characters. load_file puts
0a07cd
+    # them into a str (in python 2) or bytes (in python 3). In python 2,
0a07cd
+    # bad octets in a str cause util.json_dumps() to throw an exception. In
0a07cd
+    # python 3, bytes is a non-serializable type, and the handler load_file
0a07cd
+    # uses applies b64 encoding *again* to handle it. The simplest solution
0a07cd
+    # is to just b64encode the data and then decode it to a serializable
0a07cd
+    # string. Same number of bits of entropy, just with 25% more zeroes.
0a07cd
+    # There's no need to undo this base64-encoding when the random seed is
0a07cd
+    # actually used in cc_seed_random.py.
0a07cd
+    seed = base64.b64encode(seed).decode()
0a07cd
+
0a07cd
+    return seed
0a07cd
 
0a07cd
 
0a07cd
 def list_possible_azure_ds_devs():
0a07cd
diff --git a/tests/data/azure/non_unicode_random_string b/tests/data/azure/non_unicode_random_string
0a07cd
new file mode 100644
0a07cd
index 0000000..b9ecefb
0a07cd
--- /dev/null
0a07cd
+++ b/tests/data/azure/non_unicode_random_string
0a07cd
@@ -0,0 +1 @@
0a07cd
+OEM0d\x00\x00\x00\x01\x80VRTUALMICROSFT\x02\x17\x00\x06MSFT\x97\x00\x00\x00C\xb4{V\xf4X%\x061x\x90\x1c\xfen\x86\xbf~\xf5\x8c\x94&\x88\xed\x84\xf9B\xbd\xd3\xf1\xdb\xee:\xd9\x0fc\x0e\x83(\xbd\xe3'\xfc\x85,\xdf\xf4\x13\x99N\xc5\xf3Y\x1e\xe3\x0b\xa4H\x08J\xb9\xdcdb$
0a07cd
\ No newline at end of file
0a07cd
diff --git a/tests/unittests/test_datasource/test_azure.py b/tests/unittests/test_datasource/test_azure.py
0a07cd
index 417d86a..eacf225 100644
0a07cd
--- a/tests/unittests/test_datasource/test_azure.py
0a07cd
+++ b/tests/unittests/test_datasource/test_azure.py
0a07cd
@@ -7,11 +7,11 @@ from cloudinit.sources import (
0a07cd
     UNSET, DataSourceAzure as dsaz, InvalidMetaDataException)
0a07cd
 from cloudinit.util import (b64e, decode_binary, load_file, write_file,
0a07cd
                             find_freebsd_part, get_path_dev_freebsd,
0a07cd
-                            MountFailedError)
0a07cd
+                            MountFailedError, json_dumps, load_json)
0a07cd
 from cloudinit.version import version_string as vs
0a07cd
 from cloudinit.tests.helpers import (
0a07cd
     HttprettyTestCase, CiTestCase, populate_dir, mock, wrap_and_call,
0a07cd
-    ExitStack, PY26, SkipTest)
0a07cd
+    ExitStack, PY26, SkipTest, resourceLocation)
0a07cd
 
0a07cd
 import crypt
0a07cd
 import httpretty
0a07cd
@@ -1924,4 +1924,24 @@ class TestWBIsPlatformViable(CiTestCase):
0a07cd
             self.logs.getvalue())
0a07cd
 
0a07cd
 
0a07cd
+class TestRandomSeed(CiTestCase):
0a07cd
+    """Test proper handling of random_seed"""
0a07cd
+
0a07cd
+    def test_non_ascii_seed_is_serializable(self):
0a07cd
+        """Pass if a random string from the Azure infrastructure which
0a07cd
+        contains at least one non-Unicode character can be converted to/from
0a07cd
+        JSON without alteration and without throwing an exception.
0a07cd
+        """
0a07cd
+        path = resourceLocation("azure/non_unicode_random_string")
0a07cd
+        result = dsaz._get_random_seed(path)
0a07cd
+
0a07cd
+        obj = {'seed': result}
0a07cd
+        try:
0a07cd
+            serialized = json_dumps(obj)
0a07cd
+            deserialized = load_json(serialized)
0a07cd
+        except UnicodeDecodeError:
0a07cd
+            self.fail("Non-serializable random seed returned")
0a07cd
+
0a07cd
+        self.assertEqual(deserialized['seed'], result)
0a07cd
+
0a07cd
 # vi: ts=4 expandtab
0a07cd
-- 
0a07cd
1.8.3.1
0a07cd