Blob Blame History Raw
From 6e79106a09a0d142915da1fb48640575bb4bfe08 Mon Sep 17 00:00:00 2001
From: Anh Vo <anhvo@microsoft.com>
Date: Tue, 13 Apr 2021 17:39:39 -0400
Subject: [PATCH 3/7] azure: Removing ability to invoke walinuxagent (#799)

RH-Author: Eduardo Otubo <otubo@redhat.com>
RH-MergeRequest: 45: Add support for userdata on Azure from IMDS
RH-Commit: [3/7] f5e98665bf2093edeeccfcd95b47df2e44a40536
RH-Bugzilla: 2023940
RH-Acked-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
RH-Acked-by: Mohamed Gamal Morsy <mmorsy@redhat.com>

Invoking walinuxagent from within cloud-init is no longer
supported/necessary
---
 cloudinit/sources/DataSourceAzure.py          | 137 ++++--------------
 doc/rtd/topics/datasources/azure.rst          |  62 ++------
 tests/unittests/test_datasource/test_azure.py |  97 -------------
 3 files changed, 35 insertions(+), 261 deletions(-)

diff --git a/cloudinit/sources/DataSourceAzure.py b/cloudinit/sources/DataSourceAzure.py
index de1452ce..020b7006 100755
--- a/cloudinit/sources/DataSourceAzure.py
+++ b/cloudinit/sources/DataSourceAzure.py
@@ -381,53 +381,6 @@ class DataSourceAzure(sources.DataSource):
                     util.logexc(LOG, "handling set_hostname failed")
         return False
 
-    @azure_ds_telemetry_reporter
-    def get_metadata_from_agent(self):
-        temp_hostname = self.metadata.get('local-hostname')
-        agent_cmd = self.ds_cfg['agent_command']
-        LOG.debug("Getting metadata via agent.  hostname=%s cmd=%s",
-                  temp_hostname, agent_cmd)
-
-        self.bounce_network_with_azure_hostname()
-
-        try:
-            invoke_agent(agent_cmd)
-        except subp.ProcessExecutionError:
-            # claim the datasource even if the command failed
-            util.logexc(LOG, "agent command '%s' failed.",
-                        self.ds_cfg['agent_command'])
-
-        ddir = self.ds_cfg['data_dir']
-
-        fp_files = []
-        key_value = None
-        for pk in self.cfg.get('_pubkeys', []):
-            if pk.get('value', None):
-                key_value = pk['value']
-                LOG.debug("SSH authentication: using value from fabric")
-            else:
-                bname = str(pk['fingerprint'] + ".crt")
-                fp_files += [os.path.join(ddir, bname)]
-                LOG.debug("SSH authentication: "
-                          "using fingerprint from fabric")
-
-        with events.ReportEventStack(
-                name="waiting-for-ssh-public-key",
-                description="wait for agents to retrieve SSH keys",
-                parent=azure_ds_reporter):
-            # wait very long for public SSH keys to arrive
-            # https://bugs.launchpad.net/cloud-init/+bug/1717611
-            missing = util.log_time(logfunc=LOG.debug,
-                                    msg="waiting for SSH public key files",
-                                    func=util.wait_for_files,
-                                    args=(fp_files, 900))
-            if len(missing):
-                LOG.warning("Did not find files, but going on: %s", missing)
-
-        metadata = {}
-        metadata['public-keys'] = key_value or pubkeys_from_crt_files(fp_files)
-        return metadata
-
     def _get_subplatform(self):
         """Return the subplatform metadata source details."""
         if self.seed.startswith('/dev'):
@@ -1354,35 +1307,32 @@ class DataSourceAzure(sources.DataSource):
            On failure, returns False.
         """
 
-        if self.ds_cfg['agent_command'] == AGENT_START_BUILTIN:
-            self.bounce_network_with_azure_hostname()
+        self.bounce_network_with_azure_hostname()
 
-            pubkey_info = None
-            try:
-                raise KeyError(
-                    "Not using public SSH keys from IMDS"
-                )
-                # pylint:disable=unreachable
-                public_keys = self.metadata['imds']['compute']['publicKeys']
-                LOG.debug(
-                    'Successfully retrieved %s key(s) from IMDS',
-                    len(public_keys)
-                    if public_keys is not None
-                    else 0
-                )
-            except KeyError:
-                LOG.debug(
-                    'Unable to retrieve SSH keys from IMDS during '
-                    'negotiation, falling back to OVF'
-                )
-                pubkey_info = self.cfg.get('_pubkeys', None)
-
-            metadata_func = partial(get_metadata_from_fabric,
-                                    fallback_lease_file=self.
-                                    dhclient_lease_file,
-                                    pubkey_info=pubkey_info)
-        else:
-            metadata_func = self.get_metadata_from_agent
+        pubkey_info = None
+        try:
+            raise KeyError(
+                "Not using public SSH keys from IMDS"
+            )
+            # pylint:disable=unreachable
+            public_keys = self.metadata['imds']['compute']['publicKeys']
+            LOG.debug(
+                'Successfully retrieved %s key(s) from IMDS',
+                len(public_keys)
+                if public_keys is not None
+                else 0
+            )
+        except KeyError:
+            LOG.debug(
+                'Unable to retrieve SSH keys from IMDS during '
+                'negotiation, falling back to OVF'
+            )
+            pubkey_info = self.cfg.get('_pubkeys', None)
+
+        metadata_func = partial(get_metadata_from_fabric,
+                                fallback_lease_file=self.
+                                dhclient_lease_file,
+                                pubkey_info=pubkey_info)
 
         LOG.debug("negotiating with fabric via agent command %s",
                   self.ds_cfg['agent_command'])
@@ -1617,33 +1567,6 @@ def perform_hostname_bounce(hostname, cfg, prev_hostname):
     return True
 
 
-@azure_ds_telemetry_reporter
-def crtfile_to_pubkey(fname, data=None):
-    pipeline = ('openssl x509 -noout -pubkey < "$0" |'
-                'ssh-keygen -i -m PKCS8 -f /dev/stdin')
-    (out, _err) = subp.subp(['sh', '-c', pipeline, fname],
-                            capture=True, data=data)
-    return out.rstrip()
-
-
-@azure_ds_telemetry_reporter
-def pubkeys_from_crt_files(flist):
-    pubkeys = []
-    errors = []
-    for fname in flist:
-        try:
-            pubkeys.append(crtfile_to_pubkey(fname))
-        except subp.ProcessExecutionError:
-            errors.append(fname)
-
-    if errors:
-        report_diagnostic_event(
-            "failed to convert the crt files to pubkey: %s" % errors,
-            logger_func=LOG.warning)
-
-    return pubkeys
-
-
 @azure_ds_telemetry_reporter
 def write_files(datadir, files, dirmode=None):
 
@@ -1672,16 +1595,6 @@ def write_files(datadir, files, dirmode=None):
         util.write_file(filename=fname, content=content, mode=0o600)
 
 
-@azure_ds_telemetry_reporter
-def invoke_agent(cmd):
-    # this is a function itself to simplify patching it for test
-    if cmd:
-        LOG.debug("invoking agent: %s", cmd)
-        subp.subp(cmd, shell=(not isinstance(cmd, list)))
-    else:
-        LOG.debug("not invoking agent")
-
-
 def find_child(node, filter_func):
     ret = []
     if not node.hasChildNodes():
diff --git a/doc/rtd/topics/datasources/azure.rst b/doc/rtd/topics/datasources/azure.rst
index e04c3a33..ad9f2236 100644
--- a/doc/rtd/topics/datasources/azure.rst
+++ b/doc/rtd/topics/datasources/azure.rst
@@ -5,28 +5,6 @@ Azure
 
 This datasource finds metadata and user-data from the Azure cloud platform.
 
-walinuxagent
-------------
-walinuxagent has several functions within images.  For cloud-init
-specifically, the relevant functionality it performs is to register the
-instance with the Azure cloud platform at boot so networking will be
-permitted.  For more information about the other functionality of
-walinuxagent, see `Azure's documentation
-<https://github.com/Azure/WALinuxAgent#introduction>`_ for more details.
-(Note, however, that only one of walinuxagent's provisioning and cloud-init
-should be used to perform instance customisation.)
-
-If you are configuring walinuxagent yourself, you will want to ensure that you
-have `Provisioning.UseCloudInit
-<https://github.com/Azure/WALinuxAgent#provisioningusecloudinit>`_ set to
-``y``.
-
-
-Builtin Agent
--------------
-An alternative to using walinuxagent to register to the Azure cloud platform
-is to use the ``__builtin__`` agent command.  This section contains more
-background on what that code path does, and how to enable it.
 
 The Azure cloud platform provides initial data to an instance via an attached
 CD formatted in UDF.  That CD contains a 'ovf-env.xml' file that provides some
@@ -41,16 +19,6 @@ by calling a script in /etc/dhcp/dhclient-exit-hooks or a file in
 'dhclient_hook' of cloud-init itself. This sub-command will write the client
 information in json format to /run/cloud-init/dhclient.hook/<interface>.json.
 
-In order for cloud-init to leverage this method to find the endpoint, the
-cloud.cfg file must contain:
-
-.. sourcecode:: yaml
-
-  datasource:
-    Azure:
-      set_hostname: False
-      agent_command: __builtin__
-
 If those files are not available, the fallback is to check the leases file
 for the endpoint server (again option 245).
 
@@ -83,9 +51,6 @@ configuration (in ``/etc/cloud/cloud.cfg`` or ``/etc/cloud/cloud.cfg.d/``).
 
 The settings that may be configured are:
 
- * **agent_command**: Either __builtin__ (default) or a command to run to getcw
-   metadata. If __builtin__, get metadata from walinuxagent. Otherwise run the
-   provided command to obtain metadata.
  * **apply_network_config**: Boolean set to True to use network configuration
    described by Azure's IMDS endpoint instead of fallback network config of
    dhcp on eth0. Default is True. For Ubuntu 16.04 or earlier, default is
@@ -121,7 +86,6 @@ An example configuration with the default values is provided below:
 
   datasource:
     Azure:
-      agent_command: __builtin__
       apply_network_config: true
       data_dir: /var/lib/waagent
       dhclient_lease_file: /var/lib/dhcp/dhclient.eth0.leases
@@ -144,9 +108,7 @@ child of the ``LinuxProvisioningConfigurationSet`` (a sibling to ``UserName``)
 If both ``UserData`` and ``CustomData`` are provided behavior is undefined on
 which will be selected.
 
-In the example below, user-data provided is 'this is my userdata', and the
-datasource config provided is ``{"agent_command": ["start", "walinuxagent"]}``.
-That agent command will take affect as if it were specified in system config.
+In the example below, user-data provided is 'this is my userdata'
 
 Example:
 
@@ -184,20 +146,16 @@ The hostname is provided to the instance in the ovf-env.xml file as
 Whatever value the instance provides in its dhcp request will resolve in the
 domain returned in the 'search' request.
 
-The interesting issue is that a generic image will already have a hostname
-configured.  The ubuntu cloud images have 'ubuntu' as the hostname of the
-system, and the initial dhcp request on eth0 is not guaranteed to occur after
-the datasource code has been run.  So, on first boot, that initial value will
-be sent in the dhcp request and *that* value will resolve.
-
-In order to make the ``HostName`` provided in the ovf-env.xml resolve, a
-dhcp request must be made with the new value.  Walinuxagent (in its current
-version) handles this by polling the state of hostname and bouncing ('``ifdown
-eth0; ifup eth0``' the network interface if it sees that a change has been
-made.
+A generic image will already have a hostname configured.  The ubuntu
+cloud images have 'ubuntu' as the hostname of the system, and the
+initial dhcp request on eth0 is not guaranteed to occur after the
+datasource code has been run.  So, on first boot, that initial value
+will be sent in the dhcp request and *that* value will resolve.
 
-cloud-init handles this by setting the hostname in the DataSource's 'get_data'
-method via '``hostname $HostName``', and then bouncing the interface.  This
+In order to make the ``HostName`` provided in the ovf-env.xml resolve,
+a dhcp request must be made with the new value. cloud-init handles
+this by setting the hostname in the DataSource's 'get_data' method via
+'``hostname $HostName``', and then bouncing the interface.  This
 behavior can be configured or disabled in the datasource config.  See
 'Configuration' above.
 
diff --git a/tests/unittests/test_datasource/test_azure.py b/tests/unittests/test_datasource/test_azure.py
index dedebeb1..320fa857 100644
--- a/tests/unittests/test_datasource/test_azure.py
+++ b/tests/unittests/test_datasource/test_azure.py
@@ -638,17 +638,10 @@ scbus-1 on xpt0 bus 0
         def dsdevs():
             return data.get('dsdevs', [])
 
-        def _invoke_agent(cmd):
-            data['agent_invoked'] = cmd
-
         def _wait_for_files(flist, _maxwait=None, _naplen=None):
             data['waited'] = flist
             return []
 
-        def _pubkeys_from_crt_files(flist):
-            data['pubkey_files'] = flist
-            return ["pubkey_from: %s" % f for f in flist]
-
         if data.get('ovfcontent') is not None:
             populate_dir(os.path.join(self.paths.seed_dir, "azure"),
                          {'ovf-env.xml': data['ovfcontent']})
@@ -675,8 +668,6 @@ scbus-1 on xpt0 bus 0
 
         self.apply_patches([
             (dsaz, 'list_possible_azure_ds_devs', dsdevs),
-            (dsaz, 'invoke_agent', _invoke_agent),
-            (dsaz, 'pubkeys_from_crt_files', _pubkeys_from_crt_files),
             (dsaz, 'perform_hostname_bounce', mock.MagicMock()),
             (dsaz, 'get_hostname', mock.MagicMock()),
             (dsaz, 'set_hostname', mock.MagicMock()),
@@ -765,7 +756,6 @@ scbus-1 on xpt0 bus 0
             ret = dsrc.get_data()
             self.m_is_platform_viable.assert_called_with(dsrc.seed_dir)
             self.assertFalse(ret)
-            self.assertNotIn('agent_invoked', data)
             # Assert that for non viable platforms,
             # there is no communication with the Azure datasource.
             self.assertEqual(
@@ -789,7 +779,6 @@ scbus-1 on xpt0 bus 0
             ret = dsrc.get_data()
             self.m_is_platform_viable.assert_called_with(dsrc.seed_dir)
             self.assertFalse(ret)
-            self.assertNotIn('agent_invoked', data)
             self.assertEqual(
                 1,
                 m_report_failure.call_count)
@@ -806,7 +795,6 @@ scbus-1 on xpt0 bus 0
                 1,
                 m_crawl_metadata.call_count)
             self.assertFalse(ret)
-            self.assertNotIn('agent_invoked', data)
 
     def test_crawl_metadata_exception_should_report_failure_with_msg(self):
         data = {}
@@ -1086,21 +1074,6 @@ scbus-1 on xpt0 bus 0
         self.assertTrue(os.path.isdir(self.waagent_d))
         self.assertEqual(stat.S_IMODE(os.stat(self.waagent_d).st_mode), 0o700)
 
-    def test_user_cfg_set_agent_command_plain(self):
-        # set dscfg in via plaintext
-        # we must have friendly-to-xml formatted plaintext in yaml_cfg
-        # not all plaintext is expected to work.
-        yaml_cfg = "{agent_command: my_command}\n"
-        cfg = yaml.safe_load(yaml_cfg)
-        odata = {'HostName': "myhost", 'UserName': "myuser",
-                 'dscfg': {'text': yaml_cfg, 'encoding': 'plain'}}
-        data = {'ovfcontent': construct_valid_ovf_env(data=odata)}
-
-        dsrc = self._get_ds(data)
-        ret = self._get_and_setup(dsrc)
-        self.assertTrue(ret)
-        self.assertEqual(data['agent_invoked'], cfg['agent_command'])
-
     @mock.patch('cloudinit.sources.DataSourceAzure.device_driver',
                 return_value=None)
     def test_network_config_set_from_imds(self, m_driver):
@@ -1205,29 +1178,6 @@ scbus-1 on xpt0 bus 0
         dsrc.get_data()
         self.assertEqual('eastus2', dsrc.region)
 
-    def test_user_cfg_set_agent_command(self):
-        # set dscfg in via base64 encoded yaml
-        cfg = {'agent_command': "my_command"}
-        odata = {'HostName': "myhost", 'UserName': "myuser",
-                 'dscfg': {'text': b64e(yaml.dump(cfg)),
-                           'encoding': 'base64'}}
-        data = {'ovfcontent': construct_valid_ovf_env(data=odata)}
-
-        dsrc = self._get_ds(data)
-        ret = self._get_and_setup(dsrc)
-        self.assertTrue(ret)
-        self.assertEqual(data['agent_invoked'], cfg['agent_command'])
-
-    def test_sys_cfg_set_agent_command(self):
-        sys_cfg = {'datasource': {'Azure': {'agent_command': '_COMMAND'}}}
-        data = {'ovfcontent': construct_valid_ovf_env(data={}),
-                'sys_cfg': sys_cfg}
-
-        dsrc = self._get_ds(data)
-        ret = self._get_and_setup(dsrc)
-        self.assertTrue(ret)
-        self.assertEqual(data['agent_invoked'], '_COMMAND')
-
     def test_sys_cfg_set_never_destroy_ntfs(self):
         sys_cfg = {'datasource': {'Azure': {
             'never_destroy_ntfs': 'user-supplied-value'}}}
@@ -1311,51 +1261,6 @@ scbus-1 on xpt0 bus 0
         self.assertTrue(ret)
         self.assertEqual(dsrc.userdata_raw, mydata.encode('utf-8'))
 
-    def test_cfg_has_pubkeys_fingerprint(self):
-        odata = {'HostName': "myhost", 'UserName': "myuser"}
-        mypklist = [{'fingerprint': 'fp1', 'path': 'path1', 'value': ''}]
-        pubkeys = [(x['fingerprint'], x['path'], x['value']) for x in mypklist]
-        data = {'ovfcontent': construct_valid_ovf_env(data=odata,
-                                                      pubkeys=pubkeys)}
-
-        dsrc = self._get_ds(data, agent_command=['not', '__builtin__'])
-        ret = self._get_and_setup(dsrc)
-        self.assertTrue(ret)
-        for mypk in mypklist:
-            self.assertIn(mypk, dsrc.cfg['_pubkeys'])
-            self.assertIn('pubkey_from', dsrc.metadata['public-keys'][-1])
-
-    def test_cfg_has_pubkeys_value(self):
-        # make sure that provided key is used over fingerprint
-        odata = {'HostName': "myhost", 'UserName': "myuser"}
-        mypklist = [{'fingerprint': 'fp1', 'path': 'path1', 'value': 'value1'}]
-        pubkeys = [(x['fingerprint'], x['path'], x['value']) for x in mypklist]
-        data = {'ovfcontent': construct_valid_ovf_env(data=odata,
-                                                      pubkeys=pubkeys)}
-
-        dsrc = self._get_ds(data, agent_command=['not', '__builtin__'])
-        ret = self._get_and_setup(dsrc)
-        self.assertTrue(ret)
-
-        for mypk in mypklist:
-            self.assertIn(mypk, dsrc.cfg['_pubkeys'])
-            self.assertIn(mypk['value'], dsrc.metadata['public-keys'])
-
-    def test_cfg_has_no_fingerprint_has_value(self):
-        # test value is used when fingerprint not provided
-        odata = {'HostName': "myhost", 'UserName': "myuser"}
-        mypklist = [{'fingerprint': None, 'path': 'path1', 'value': 'value1'}]
-        pubkeys = [(x['fingerprint'], x['path'], x['value']) for x in mypklist]
-        data = {'ovfcontent': construct_valid_ovf_env(data=odata,
-                                                      pubkeys=pubkeys)}
-
-        dsrc = self._get_ds(data, agent_command=['not', '__builtin__'])
-        ret = self._get_and_setup(dsrc)
-        self.assertTrue(ret)
-
-        for mypk in mypklist:
-            self.assertIn(mypk['value'], dsrc.metadata['public-keys'])
-
     def test_default_ephemeral_configs_ephemeral_exists(self):
         # make sure the ephemeral configs are correct if disk present
         odata = {}
@@ -1919,8 +1824,6 @@ class TestAzureBounce(CiTestCase):
     with_logs = True
 
     def mock_out_azure_moving_parts(self):
-        self.patches.enter_context(
-            mock.patch.object(dsaz, 'invoke_agent'))
         self.patches.enter_context(
             mock.patch.object(dsaz.util, 'wait_for_files'))
         self.patches.enter_context(
-- 
2.27.0