sailesh1993 / rpms / cloud-init

Forked from rpms/cloud-init 10 months ago
Clone
0a07cd
From 2e070086275341dfceb6d5b1e12f06f22e7bbfcd Mon Sep 17 00:00:00 2001
0a07cd
From: Eduardo Otubo <otubo@redhat.com>
0a07cd
Date: Wed, 23 Jan 2019 12:30:21 +0100
0a07cd
Subject: net: Wait for dhclient to daemonize before reading lease file
0a07cd
0a07cd
RH-Author: Eduardo Otubo <otubo@redhat.com>
0a07cd
Message-id: <20190123123021.32708-1-otubo@redhat.com>
0a07cd
Patchwork-id: 84095
0a07cd
O-Subject: [RHEL-7.7 cloud-init PATCH] net: Wait for dhclient to daemonize before reading lease file
0a07cd
Bugzilla: 1632967
0a07cd
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
0a07cd
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
0a07cd
0a07cd
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1632967
0a07cd
Brew: https://bugzilla.redhat.com/show_bug.cgi?id=1632967
0a07cd
Tested: Me and upstream
0a07cd
0a07cd
commit fdadcb5fae51f4e6799314ab98e3aec56c79b17c
0a07cd
Author: Jason Zions <jasonzio@microsoft.com>
0a07cd
Date:   Tue Jan 15 21:37:17 2019 +0000
0a07cd
0a07cd
    net: Wait for dhclient to daemonize before reading lease file
0a07cd
0a07cd
    cloud-init uses dhclient to fetch the DHCP lease so it can extract
0a07cd
    DHCP options.  dhclient creates the leasefile, then writes to it;
0a07cd
    simply waiting for the leasefile to appear creates a race between
0a07cd
    dhclient and cloud-init. Instead, wait for dhclient to be parented by
0a07cd
    init. At that point, we know it has written to the leasefile, so it's
0a07cd
    safe to copy the file and kill the process.
0a07cd
0a07cd
    cloud-init creates a temporary directory in which to execute dhclient,
0a07cd
    and deletes that directory after it has killed the process. If
0a07cd
    cloud-init abandons waiting for dhclient to daemonize, it will still
0a07cd
    attempt to delete the temporary directory, but will not report an
0a07cd
    exception should that attempt fail.
0a07cd
0a07cd
    LP: #1794399
0a07cd
0a07cd
Signed-off-by: Eduardo Otubo <otubo@redhat.com>
0a07cd
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
0a07cd
Signed-off-by: Danilo C. L. de Paula <ddepaula@redhat.com>
0a07cd
---
0a07cd
 cloudinit/net/dhcp.py              | 44 +++++++++++++++++++++---------
0a07cd
 cloudinit/net/tests/test_dhcp.py   | 15 ++++++++--
0a07cd
 cloudinit/temp_utils.py            |  4 +--
0a07cd
 cloudinit/tests/test_temp_utils.py | 18 +++++++++++-
0a07cd
 cloudinit/util.py                  | 16 ++++++++++-
0a07cd
 tests/unittests/test_util.py       |  6 ++++
0a07cd
 6 files changed, 83 insertions(+), 20 deletions(-)
0a07cd
0a07cd
diff --git a/cloudinit/net/dhcp.py b/cloudinit/net/dhcp.py
0a07cd
index 0db991db..c98a97cd 100644
0a07cd
--- a/cloudinit/net/dhcp.py
0a07cd
+++ b/cloudinit/net/dhcp.py
0a07cd
@@ -9,6 +9,7 @@ import logging
0a07cd
 import os
0a07cd
 import re
0a07cd
 import signal
0a07cd
+import time
0a07cd
 
0a07cd
 from cloudinit.net import (
0a07cd
     EphemeralIPv4Network, find_fallback_nic, get_devicelist,
0a07cd
@@ -127,7 +128,9 @@ def maybe_perform_dhcp_discovery(nic=None):
0a07cd
     if not dhclient_path:
0a07cd
         LOG.debug('Skip dhclient configuration: No dhclient command found.')
0a07cd
         return []
0a07cd
-    with temp_utils.tempdir(prefix='cloud-init-dhcp-', needs_exe=True) as tdir:
0a07cd
+    with temp_utils.tempdir(rmtree_ignore_errors=True,
0a07cd
+                            prefix='cloud-init-dhcp-',
0a07cd
+                            needs_exe=True) as tdir:
0a07cd
         # Use /var/tmp because /run/cloud-init/tmp is mounted noexec
0a07cd
         return dhcp_discovery(dhclient_path, nic, tdir)
0a07cd
 
0a07cd
@@ -195,24 +198,39 @@ def dhcp_discovery(dhclient_cmd_path, interface, cleandir):
0a07cd
            '-pf', pid_file, interface, '-sf', '/bin/true']
0a07cd
     util.subp(cmd, capture=True)
0a07cd
 
0a07cd
-    # dhclient doesn't write a pid file until after it forks when it gets a
0a07cd
-    # proper lease response. Since cleandir is a temp directory that gets
0a07cd
-    # removed, we need to wait for that pidfile creation before the
0a07cd
-    # cleandir is removed, otherwise we get FileNotFound errors.
0a07cd
+    # Wait for pid file and lease file to appear, and for the process
0a07cd
+    # named by the pid file to daemonize (have pid 1 as its parent). If we
0a07cd
+    # try to read the lease file before daemonization happens, we might try
0a07cd
+    # to read it before the dhclient has actually written it. We also have
0a07cd
+    # to wait until the dhclient has become a daemon so we can be sure to
0a07cd
+    # kill the correct process, thus freeing cleandir to be deleted back
0a07cd
+    # up the callstack.
0a07cd
     missing = util.wait_for_files(
0a07cd
         [pid_file, lease_file], maxwait=5, naplen=0.01)
0a07cd
     if missing:
0a07cd
         LOG.warning("dhclient did not produce expected files: %s",
0a07cd
                     ', '.join(os.path.basename(f) for f in missing))
0a07cd
         return []
0a07cd
-    pid_content = util.load_file(pid_file).strip()
0a07cd
-    try:
0a07cd
-        pid = int(pid_content)
0a07cd
-    except ValueError:
0a07cd
-        LOG.debug(
0a07cd
-            "pid file contains non-integer content '%s'", pid_content)
0a07cd
-    else:
0a07cd
-        os.kill(pid, signal.SIGKILL)
0a07cd
+
0a07cd
+    ppid = 'unknown'
0a07cd
+    for _ in range(0, 1000):
0a07cd
+        pid_content = util.load_file(pid_file).strip()
0a07cd
+        try:
0a07cd
+            pid = int(pid_content)
0a07cd
+        except ValueError:
0a07cd
+            pass
0a07cd
+        else:
0a07cd
+            ppid = util.get_proc_ppid(pid)
0a07cd
+            if ppid == 1:
0a07cd
+                LOG.debug('killing dhclient with pid=%s', pid)
0a07cd
+                os.kill(pid, signal.SIGKILL)
0a07cd
+                return parse_dhcp_lease_file(lease_file)
0a07cd
+        time.sleep(0.01)
0a07cd
+
0a07cd
+    LOG.error(
0a07cd
+        'dhclient(pid=%s, parentpid=%s) failed to daemonize after %s seconds',
0a07cd
+        pid_content, ppid, 0.01 * 1000
0a07cd
+    )
0a07cd
     return parse_dhcp_lease_file(lease_file)
0a07cd
 
0a07cd
 
0a07cd
diff --git a/cloudinit/net/tests/test_dhcp.py b/cloudinit/net/tests/test_dhcp.py
0a07cd
index cd3e7328..79e8842f 100644
0a07cd
--- a/cloudinit/net/tests/test_dhcp.py
0a07cd
+++ b/cloudinit/net/tests/test_dhcp.py
0a07cd
@@ -145,16 +145,20 @@ class TestDHCPDiscoveryClean(CiTestCase):
0a07cd
               'subnet-mask': '255.255.255.0', 'routers': '192.168.2.1'}],
0a07cd
             dhcp_discovery(dhclient_script, 'eth9', tmpdir))
0a07cd
         self.assertIn(
0a07cd
-            "pid file contains non-integer content ''", self.logs.getvalue())
0a07cd
+            "dhclient(pid=, parentpid=unknown) failed "
0a07cd
+            "to daemonize after 10.0 seconds",
0a07cd
+            self.logs.getvalue())
0a07cd
         m_kill.assert_not_called()
0a07cd
 
0a07cd
+    @mock.patch('cloudinit.net.dhcp.util.get_proc_ppid')
0a07cd
     @mock.patch('cloudinit.net.dhcp.os.kill')
0a07cd
     @mock.patch('cloudinit.net.dhcp.util.wait_for_files')
0a07cd
     @mock.patch('cloudinit.net.dhcp.util.subp')
0a07cd
     def test_dhcp_discovery_run_in_sandbox_waits_on_lease_and_pid(self,
0a07cd
                                                                   m_subp,
0a07cd
                                                                   m_wait,
0a07cd
-                                                                  m_kill):
0a07cd
+                                                                  m_kill,
0a07cd
+                                                                  m_getppid):
0a07cd
         """dhcp_discovery waits for the presence of pidfile and dhcp.leases."""
0a07cd
         tmpdir = self.tmp_dir()
0a07cd
         dhclient_script = os.path.join(tmpdir, 'dhclient.orig')
0a07cd
@@ -164,6 +168,7 @@ class TestDHCPDiscoveryClean(CiTestCase):
0a07cd
         pidfile = self.tmp_path('dhclient.pid', tmpdir)
0a07cd
         leasefile = self.tmp_path('dhcp.leases', tmpdir)
0a07cd
         m_wait.return_value = [pidfile]  # Return the missing pidfile wait for
0a07cd
+        m_getppid.return_value = 1  # Indicate that dhclient has daemonized
0a07cd
         self.assertEqual([], dhcp_discovery(dhclient_script, 'eth9', tmpdir))
0a07cd
         self.assertEqual(
0a07cd
             mock.call([pidfile, leasefile], maxwait=5, naplen=0.01),
0a07cd
@@ -173,9 +178,10 @@ class TestDHCPDiscoveryClean(CiTestCase):
0a07cd
             self.logs.getvalue())
0a07cd
         m_kill.assert_not_called()
0a07cd
 
0a07cd
+    @mock.patch('cloudinit.net.dhcp.util.get_proc_ppid')
0a07cd
     @mock.patch('cloudinit.net.dhcp.os.kill')
0a07cd
     @mock.patch('cloudinit.net.dhcp.util.subp')
0a07cd
-    def test_dhcp_discovery_run_in_sandbox(self, m_subp, m_kill):
0a07cd
+    def test_dhcp_discovery_run_in_sandbox(self, m_subp, m_kill, m_getppid):
0a07cd
         """dhcp_discovery brings up the interface and runs dhclient.
0a07cd
 
0a07cd
         It also returns the parsed dhcp.leases file generated in the sandbox.
0a07cd
@@ -197,6 +203,7 @@ class TestDHCPDiscoveryClean(CiTestCase):
0a07cd
         pid_file = os.path.join(tmpdir, 'dhclient.pid')
0a07cd
         my_pid = 1
0a07cd
         write_file(pid_file, "%d\n" % my_pid)
0a07cd
+        m_getppid.return_value = 1  # Indicate that dhclient has daemonized
0a07cd
 
0a07cd
         self.assertItemsEqual(
0a07cd
             [{'interface': 'eth9', 'fixed-address': '192.168.2.74',
0a07cd
@@ -355,3 +362,5 @@ class TestEphemeralDhcpNoNetworkSetup(HttprettyTestCase):
0a07cd
             self.assertEqual(fake_lease, lease)
0a07cd
         # Ensure that dhcp discovery occurs
0a07cd
         m_dhcp.called_once_with()
0a07cd
+
0a07cd
+# vi: ts=4 expandtab
0a07cd
diff --git a/cloudinit/temp_utils.py b/cloudinit/temp_utils.py
0a07cd
index c98a1b53..346276ec 100644
0a07cd
--- a/cloudinit/temp_utils.py
0a07cd
+++ b/cloudinit/temp_utils.py
0a07cd
@@ -81,7 +81,7 @@ def ExtendedTemporaryFile(**kwargs):
0a07cd
 
0a07cd
 
0a07cd
 @contextlib.contextmanager
0a07cd
-def tempdir(**kwargs):
0a07cd
+def tempdir(rmtree_ignore_errors=False, **kwargs):
0a07cd
     # This seems like it was only added in python 3.2
0a07cd
     # Make it since its useful...
0a07cd
     # See: http://bugs.python.org/file12970/tempdir.patch
0a07cd
@@ -89,7 +89,7 @@ def tempdir(**kwargs):
0a07cd
     try:
0a07cd
         yield tdir
0a07cd
     finally:
0a07cd
-        shutil.rmtree(tdir)
0a07cd
+        shutil.rmtree(tdir, ignore_errors=rmtree_ignore_errors)
0a07cd
 
0a07cd
 
0a07cd
 def mkdtemp(**kwargs):
0a07cd
diff --git a/cloudinit/tests/test_temp_utils.py b/cloudinit/tests/test_temp_utils.py
0a07cd
index ffbb92cd..4a52ef89 100644
0a07cd
--- a/cloudinit/tests/test_temp_utils.py
0a07cd
+++ b/cloudinit/tests/test_temp_utils.py
0a07cd
@@ -2,8 +2,9 @@
0a07cd
 
0a07cd
 """Tests for cloudinit.temp_utils"""
0a07cd
 
0a07cd
-from cloudinit.temp_utils import mkdtemp, mkstemp
0a07cd
+from cloudinit.temp_utils import mkdtemp, mkstemp, tempdir
0a07cd
 from cloudinit.tests.helpers import CiTestCase, wrap_and_call
0a07cd
+import os
0a07cd
 
0a07cd
 
0a07cd
 class TestTempUtils(CiTestCase):
0a07cd
@@ -98,4 +99,19 @@ class TestTempUtils(CiTestCase):
0a07cd
         self.assertEqual('/fake/return/path', retval)
0a07cd
         self.assertEqual([{'dir': '/run/cloud-init/tmp'}], calls)
0a07cd
 
0a07cd
+    def test_tempdir_error_suppression(self):
0a07cd
+        """test tempdir suppresses errors during directory removal."""
0a07cd
+
0a07cd
+        with self.assertRaises(OSError):
0a07cd
+            with tempdir(prefix='cloud-init-dhcp-') as tdir:
0a07cd
+                os.rmdir(tdir)
0a07cd
+                # As a result, the directory is already gone,
0a07cd
+                # so shutil.rmtree should raise OSError
0a07cd
+
0a07cd
+        with tempdir(rmtree_ignore_errors=True,
0a07cd
+                     prefix='cloud-init-dhcp-') as tdir:
0a07cd
+            os.rmdir(tdir)
0a07cd
+            # Since the directory is already gone, shutil.rmtree would raise
0a07cd
+            # OSError, but we suppress that
0a07cd
+
0a07cd
 # vi: ts=4 expandtab
0a07cd
diff --git a/cloudinit/util.py b/cloudinit/util.py
0a07cd
index 7800f7bc..a84112a9 100644
0a07cd
--- a/cloudinit/util.py
0a07cd
+++ b/cloudinit/util.py
0a07cd
@@ -2861,7 +2861,6 @@ def mount_is_read_write(mount_point):
0a07cd
     mount_opts = result[-1].split(',')
0a07cd
     return mount_opts[0] == 'rw'
0a07cd
 
0a07cd
-
0a07cd
 def udevadm_settle(exists=None, timeout=None):
0a07cd
     """Invoke udevadm settle with optional exists and timeout parameters"""
0a07cd
     settle_cmd = ["udevadm", "settle"]
0a07cd
@@ -2875,5 +2874,20 @@ def udevadm_settle(exists=None, timeout=None):
0a07cd
 
0a07cd
     return subp(settle_cmd)
0a07cd
 
0a07cd
+def get_proc_ppid(pid):
0a07cd
+    """
0a07cd
+    Return the parent pid of a process.
0a07cd
+    """
0a07cd
+    ppid = 0
0a07cd
+    try:
0a07cd
+        contents = load_file("/proc/%s/stat" % pid, quiet=True)
0a07cd
+    except IOError as e:
0a07cd
+        LOG.warning('Failed to load /proc/%s/stat. %s', pid, e)
0a07cd
+    if contents:
0a07cd
+        parts = contents.split(" ", 4)
0a07cd
+        # man proc says
0a07cd
+        #  ppid %d     (4) The PID of the parent.
0a07cd
+        ppid = int(parts[3])
0a07cd
+    return ppid
0a07cd
 
0a07cd
 # vi: ts=4 expandtab
0a07cd
diff --git a/tests/unittests/test_util.py b/tests/unittests/test_util.py
0a07cd
index 5a14479a..8aebcd62 100644
0a07cd
--- a/tests/unittests/test_util.py
0a07cd
+++ b/tests/unittests/test_util.py
0a07cd
@@ -1114,6 +1114,12 @@ class TestLoadShellContent(helpers.TestCase):
0a07cd
                 'key3="val3 #tricky"',
0a07cd
                 ''])))
0a07cd
 
0a07cd
+    def test_get_proc_ppid(self):
0a07cd
+        """get_proc_ppid returns correct parent pid value."""
0a07cd
+        my_pid = os.getpid()
0a07cd
+        my_ppid = os.getppid()
0a07cd
+        self.assertEqual(my_ppid, util.get_proc_ppid(my_pid))
0a07cd
+
0a07cd
 
0a07cd
 class TestGetProcEnv(helpers.TestCase):
0a07cd
     """test get_proc_env."""
0a07cd
-- 
0a07cd
2.20.1
0a07cd