sailesh1993 / rpms / cloud-init

Forked from rpms/cloud-init a year ago
Clone
c1c26e
From 2e070086275341dfceb6d5b1e12f06f22e7bbfcd Mon Sep 17 00:00:00 2001
c1c26e
From: Eduardo Otubo <otubo@redhat.com>
c1c26e
Date: Wed, 23 Jan 2019 12:30:21 +0100
c1c26e
Subject: net: Wait for dhclient to daemonize before reading lease file
c1c26e
c1c26e
RH-Author: Eduardo Otubo <otubo@redhat.com>
c1c26e
Message-id: <20190123123021.32708-1-otubo@redhat.com>
c1c26e
Patchwork-id: 84095
c1c26e
O-Subject: [RHEL-7.7 cloud-init PATCH] net: Wait for dhclient to daemonize before reading lease file
c1c26e
Bugzilla: 1632967
c1c26e
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
c1c26e
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
c1c26e
c1c26e
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1632967
c1c26e
Brew: https://bugzilla.redhat.com/show_bug.cgi?id=1632967
c1c26e
Tested: Me and upstream
c1c26e
c1c26e
commit fdadcb5fae51f4e6799314ab98e3aec56c79b17c
c1c26e
Author: Jason Zions <jasonzio@microsoft.com>
c1c26e
Date:   Tue Jan 15 21:37:17 2019 +0000
c1c26e
c1c26e
    net: Wait for dhclient to daemonize before reading lease file
c1c26e
c1c26e
    cloud-init uses dhclient to fetch the DHCP lease so it can extract
c1c26e
    DHCP options.  dhclient creates the leasefile, then writes to it;
c1c26e
    simply waiting for the leasefile to appear creates a race between
c1c26e
    dhclient and cloud-init. Instead, wait for dhclient to be parented by
c1c26e
    init. At that point, we know it has written to the leasefile, so it's
c1c26e
    safe to copy the file and kill the process.
c1c26e
c1c26e
    cloud-init creates a temporary directory in which to execute dhclient,
c1c26e
    and deletes that directory after it has killed the process. If
c1c26e
    cloud-init abandons waiting for dhclient to daemonize, it will still
c1c26e
    attempt to delete the temporary directory, but will not report an
c1c26e
    exception should that attempt fail.
c1c26e
c1c26e
    LP: #1794399
c1c26e
c1c26e
Signed-off-by: Eduardo Otubo <otubo@redhat.com>
c1c26e
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
c1c26e
Signed-off-by: Danilo C. L. de Paula <ddepaula@redhat.com>
c1c26e
---
c1c26e
 cloudinit/net/dhcp.py              | 44 +++++++++++++++++++++---------
c1c26e
 cloudinit/net/tests/test_dhcp.py   | 15 ++++++++--
c1c26e
 cloudinit/temp_utils.py            |  4 +--
c1c26e
 cloudinit/tests/test_temp_utils.py | 18 +++++++++++-
c1c26e
 cloudinit/util.py                  | 16 ++++++++++-
c1c26e
 tests/unittests/test_util.py       |  6 ++++
c1c26e
 6 files changed, 83 insertions(+), 20 deletions(-)
c1c26e
c1c26e
diff --git a/cloudinit/net/dhcp.py b/cloudinit/net/dhcp.py
c1c26e
index 0db991db..c98a97cd 100644
c1c26e
--- a/cloudinit/net/dhcp.py
c1c26e
+++ b/cloudinit/net/dhcp.py
c1c26e
@@ -9,6 +9,7 @@ import logging
c1c26e
 import os
c1c26e
 import re
c1c26e
 import signal
c1c26e
+import time
c1c26e
 
c1c26e
 from cloudinit.net import (
c1c26e
     EphemeralIPv4Network, find_fallback_nic, get_devicelist,
c1c26e
@@ -127,7 +128,9 @@ def maybe_perform_dhcp_discovery(nic=None):
c1c26e
     if not dhclient_path:
c1c26e
         LOG.debug('Skip dhclient configuration: No dhclient command found.')
c1c26e
         return []
c1c26e
-    with temp_utils.tempdir(prefix='cloud-init-dhcp-', needs_exe=True) as tdir:
c1c26e
+    with temp_utils.tempdir(rmtree_ignore_errors=True,
c1c26e
+                            prefix='cloud-init-dhcp-',
c1c26e
+                            needs_exe=True) as tdir:
c1c26e
         # Use /var/tmp because /run/cloud-init/tmp is mounted noexec
c1c26e
         return dhcp_discovery(dhclient_path, nic, tdir)
c1c26e
 
c1c26e
@@ -195,24 +198,39 @@ def dhcp_discovery(dhclient_cmd_path, interface, cleandir):
c1c26e
            '-pf', pid_file, interface, '-sf', '/bin/true']
c1c26e
     util.subp(cmd, capture=True)
c1c26e
 
c1c26e
-    # dhclient doesn't write a pid file until after it forks when it gets a
c1c26e
-    # proper lease response. Since cleandir is a temp directory that gets
c1c26e
-    # removed, we need to wait for that pidfile creation before the
c1c26e
-    # cleandir is removed, otherwise we get FileNotFound errors.
c1c26e
+    # Wait for pid file and lease file to appear, and for the process
c1c26e
+    # named by the pid file to daemonize (have pid 1 as its parent). If we
c1c26e
+    # try to read the lease file before daemonization happens, we might try
c1c26e
+    # to read it before the dhclient has actually written it. We also have
c1c26e
+    # to wait until the dhclient has become a daemon so we can be sure to
c1c26e
+    # kill the correct process, thus freeing cleandir to be deleted back
c1c26e
+    # up the callstack.
c1c26e
     missing = util.wait_for_files(
c1c26e
         [pid_file, lease_file], maxwait=5, naplen=0.01)
c1c26e
     if missing:
c1c26e
         LOG.warning("dhclient did not produce expected files: %s",
c1c26e
                     ', '.join(os.path.basename(f) for f in missing))
c1c26e
         return []
c1c26e
-    pid_content = util.load_file(pid_file).strip()
c1c26e
-    try:
c1c26e
-        pid = int(pid_content)
c1c26e
-    except ValueError:
c1c26e
-        LOG.debug(
c1c26e
-            "pid file contains non-integer content '%s'", pid_content)
c1c26e
-    else:
c1c26e
-        os.kill(pid, signal.SIGKILL)
c1c26e
+
c1c26e
+    ppid = 'unknown'
c1c26e
+    for _ in range(0, 1000):
c1c26e
+        pid_content = util.load_file(pid_file).strip()
c1c26e
+        try:
c1c26e
+            pid = int(pid_content)
c1c26e
+        except ValueError:
c1c26e
+            pass
c1c26e
+        else:
c1c26e
+            ppid = util.get_proc_ppid(pid)
c1c26e
+            if ppid == 1:
c1c26e
+                LOG.debug('killing dhclient with pid=%s', pid)
c1c26e
+                os.kill(pid, signal.SIGKILL)
c1c26e
+                return parse_dhcp_lease_file(lease_file)
c1c26e
+        time.sleep(0.01)
c1c26e
+
c1c26e
+    LOG.error(
c1c26e
+        'dhclient(pid=%s, parentpid=%s) failed to daemonize after %s seconds',
c1c26e
+        pid_content, ppid, 0.01 * 1000
c1c26e
+    )
c1c26e
     return parse_dhcp_lease_file(lease_file)
c1c26e
 
c1c26e
 
c1c26e
diff --git a/cloudinit/net/tests/test_dhcp.py b/cloudinit/net/tests/test_dhcp.py
c1c26e
index cd3e7328..79e8842f 100644
c1c26e
--- a/cloudinit/net/tests/test_dhcp.py
c1c26e
+++ b/cloudinit/net/tests/test_dhcp.py
c1c26e
@@ -145,16 +145,20 @@ class TestDHCPDiscoveryClean(CiTestCase):
c1c26e
               'subnet-mask': '255.255.255.0', 'routers': '192.168.2.1'}],
c1c26e
             dhcp_discovery(dhclient_script, 'eth9', tmpdir))
c1c26e
         self.assertIn(
c1c26e
-            "pid file contains non-integer content ''", self.logs.getvalue())
c1c26e
+            "dhclient(pid=, parentpid=unknown) failed "
c1c26e
+            "to daemonize after 10.0 seconds",
c1c26e
+            self.logs.getvalue())
c1c26e
         m_kill.assert_not_called()
c1c26e
 
c1c26e
+    @mock.patch('cloudinit.net.dhcp.util.get_proc_ppid')
c1c26e
     @mock.patch('cloudinit.net.dhcp.os.kill')
c1c26e
     @mock.patch('cloudinit.net.dhcp.util.wait_for_files')
c1c26e
     @mock.patch('cloudinit.net.dhcp.util.subp')
c1c26e
     def test_dhcp_discovery_run_in_sandbox_waits_on_lease_and_pid(self,
c1c26e
                                                                   m_subp,
c1c26e
                                                                   m_wait,
c1c26e
-                                                                  m_kill):
c1c26e
+                                                                  m_kill,
c1c26e
+                                                                  m_getppid):
c1c26e
         """dhcp_discovery waits for the presence of pidfile and dhcp.leases."""
c1c26e
         tmpdir = self.tmp_dir()
c1c26e
         dhclient_script = os.path.join(tmpdir, 'dhclient.orig')
c1c26e
@@ -164,6 +168,7 @@ class TestDHCPDiscoveryClean(CiTestCase):
c1c26e
         pidfile = self.tmp_path('dhclient.pid', tmpdir)
c1c26e
         leasefile = self.tmp_path('dhcp.leases', tmpdir)
c1c26e
         m_wait.return_value = [pidfile]  # Return the missing pidfile wait for
c1c26e
+        m_getppid.return_value = 1  # Indicate that dhclient has daemonized
c1c26e
         self.assertEqual([], dhcp_discovery(dhclient_script, 'eth9', tmpdir))
c1c26e
         self.assertEqual(
c1c26e
             mock.call([pidfile, leasefile], maxwait=5, naplen=0.01),
c1c26e
@@ -173,9 +178,10 @@ class TestDHCPDiscoveryClean(CiTestCase):
c1c26e
             self.logs.getvalue())
c1c26e
         m_kill.assert_not_called()
c1c26e
 
c1c26e
+    @mock.patch('cloudinit.net.dhcp.util.get_proc_ppid')
c1c26e
     @mock.patch('cloudinit.net.dhcp.os.kill')
c1c26e
     @mock.patch('cloudinit.net.dhcp.util.subp')
c1c26e
-    def test_dhcp_discovery_run_in_sandbox(self, m_subp, m_kill):
c1c26e
+    def test_dhcp_discovery_run_in_sandbox(self, m_subp, m_kill, m_getppid):
c1c26e
         """dhcp_discovery brings up the interface and runs dhclient.
c1c26e
 
c1c26e
         It also returns the parsed dhcp.leases file generated in the sandbox.
c1c26e
@@ -197,6 +203,7 @@ class TestDHCPDiscoveryClean(CiTestCase):
c1c26e
         pid_file = os.path.join(tmpdir, 'dhclient.pid')
c1c26e
         my_pid = 1
c1c26e
         write_file(pid_file, "%d\n" % my_pid)
c1c26e
+        m_getppid.return_value = 1  # Indicate that dhclient has daemonized
c1c26e
 
c1c26e
         self.assertItemsEqual(
c1c26e
             [{'interface': 'eth9', 'fixed-address': '192.168.2.74',
c1c26e
@@ -355,3 +362,5 @@ class TestEphemeralDhcpNoNetworkSetup(HttprettyTestCase):
c1c26e
             self.assertEqual(fake_lease, lease)
c1c26e
         # Ensure that dhcp discovery occurs
c1c26e
         m_dhcp.called_once_with()
c1c26e
+
c1c26e
+# vi: ts=4 expandtab
c1c26e
diff --git a/cloudinit/temp_utils.py b/cloudinit/temp_utils.py
c1c26e
index c98a1b53..346276ec 100644
c1c26e
--- a/cloudinit/temp_utils.py
c1c26e
+++ b/cloudinit/temp_utils.py
c1c26e
@@ -81,7 +81,7 @@ def ExtendedTemporaryFile(**kwargs):
c1c26e
 
c1c26e
 
c1c26e
 @contextlib.contextmanager
c1c26e
-def tempdir(**kwargs):
c1c26e
+def tempdir(rmtree_ignore_errors=False, **kwargs):
c1c26e
     # This seems like it was only added in python 3.2
c1c26e
     # Make it since its useful...
c1c26e
     # See: http://bugs.python.org/file12970/tempdir.patch
c1c26e
@@ -89,7 +89,7 @@ def tempdir(**kwargs):
c1c26e
     try:
c1c26e
         yield tdir
c1c26e
     finally:
c1c26e
-        shutil.rmtree(tdir)
c1c26e
+        shutil.rmtree(tdir, ignore_errors=rmtree_ignore_errors)
c1c26e
 
c1c26e
 
c1c26e
 def mkdtemp(**kwargs):
c1c26e
diff --git a/cloudinit/tests/test_temp_utils.py b/cloudinit/tests/test_temp_utils.py
c1c26e
index ffbb92cd..4a52ef89 100644
c1c26e
--- a/cloudinit/tests/test_temp_utils.py
c1c26e
+++ b/cloudinit/tests/test_temp_utils.py
c1c26e
@@ -2,8 +2,9 @@
c1c26e
 
c1c26e
 """Tests for cloudinit.temp_utils"""
c1c26e
 
c1c26e
-from cloudinit.temp_utils import mkdtemp, mkstemp
c1c26e
+from cloudinit.temp_utils import mkdtemp, mkstemp, tempdir
c1c26e
 from cloudinit.tests.helpers import CiTestCase, wrap_and_call
c1c26e
+import os
c1c26e
 
c1c26e
 
c1c26e
 class TestTempUtils(CiTestCase):
c1c26e
@@ -98,4 +99,19 @@ class TestTempUtils(CiTestCase):
c1c26e
         self.assertEqual('/fake/return/path', retval)
c1c26e
         self.assertEqual([{'dir': '/run/cloud-init/tmp'}], calls)
c1c26e
 
c1c26e
+    def test_tempdir_error_suppression(self):
c1c26e
+        """test tempdir suppresses errors during directory removal."""
c1c26e
+
c1c26e
+        with self.assertRaises(OSError):
c1c26e
+            with tempdir(prefix='cloud-init-dhcp-') as tdir:
c1c26e
+                os.rmdir(tdir)
c1c26e
+                # As a result, the directory is already gone,
c1c26e
+                # so shutil.rmtree should raise OSError
c1c26e
+
c1c26e
+        with tempdir(rmtree_ignore_errors=True,
c1c26e
+                     prefix='cloud-init-dhcp-') as tdir:
c1c26e
+            os.rmdir(tdir)
c1c26e
+            # Since the directory is already gone, shutil.rmtree would raise
c1c26e
+            # OSError, but we suppress that
c1c26e
+
c1c26e
 # vi: ts=4 expandtab
c1c26e
diff --git a/cloudinit/util.py b/cloudinit/util.py
c1c26e
index 7800f7bc..a84112a9 100644
c1c26e
--- a/cloudinit/util.py
c1c26e
+++ b/cloudinit/util.py
c1c26e
@@ -2861,7 +2861,6 @@ def mount_is_read_write(mount_point):
c1c26e
     mount_opts = result[-1].split(',')
c1c26e
     return mount_opts[0] == 'rw'
c1c26e
 
c1c26e
-
c1c26e
 def udevadm_settle(exists=None, timeout=None):
c1c26e
     """Invoke udevadm settle with optional exists and timeout parameters"""
c1c26e
     settle_cmd = ["udevadm", "settle"]
c1c26e
@@ -2875,5 +2874,20 @@ def udevadm_settle(exists=None, timeout=None):
c1c26e
 
c1c26e
     return subp(settle_cmd)
c1c26e
 
c1c26e
+def get_proc_ppid(pid):
c1c26e
+    """
c1c26e
+    Return the parent pid of a process.
c1c26e
+    """
c1c26e
+    ppid = 0
c1c26e
+    try:
c1c26e
+        contents = load_file("/proc/%s/stat" % pid, quiet=True)
c1c26e
+    except IOError as e:
c1c26e
+        LOG.warning('Failed to load /proc/%s/stat. %s', pid, e)
c1c26e
+    if contents:
c1c26e
+        parts = contents.split(" ", 4)
c1c26e
+        # man proc says
c1c26e
+        #  ppid %d     (4) The PID of the parent.
c1c26e
+        ppid = int(parts[3])
c1c26e
+    return ppid
c1c26e
 
c1c26e
 # vi: ts=4 expandtab
c1c26e
diff --git a/tests/unittests/test_util.py b/tests/unittests/test_util.py
c1c26e
index 5a14479a..8aebcd62 100644
c1c26e
--- a/tests/unittests/test_util.py
c1c26e
+++ b/tests/unittests/test_util.py
c1c26e
@@ -1114,6 +1114,12 @@ class TestLoadShellContent(helpers.TestCase):
c1c26e
                 'key3="val3 #tricky"',
c1c26e
                 ''])))
c1c26e
 
c1c26e
+    def test_get_proc_ppid(self):
c1c26e
+        """get_proc_ppid returns correct parent pid value."""
c1c26e
+        my_pid = os.getpid()
c1c26e
+        my_ppid = os.getppid()
c1c26e
+        self.assertEqual(my_ppid, util.get_proc_ppid(my_pid))
c1c26e
+
c1c26e
 
c1c26e
 class TestGetProcEnv(helpers.TestCase):
c1c26e
     """test get_proc_env."""
c1c26e
-- 
c1c26e
2.20.1
c1c26e