|
|
17dafa |
From c1c6d0b586e5556a37b9b813afbb4e6a24921adf Mon Sep 17 00:00:00 2001
|
|
|
17dafa |
From: Eduardo Otubo <otubo@redhat.com>
|
|
|
17dafa |
Date: Wed, 23 Jan 2019 12:30:21 +0100
|
|
|
17dafa |
Subject: net: Wait for dhclient to daemonize before reading lease file
|
|
|
17dafa |
|
|
|
17dafa |
RH-Author: Eduardo Otubo <otubo@redhat.com>
|
|
|
17dafa |
Message-id: <20190123123021.32708-1-otubo@redhat.com>
|
|
|
17dafa |
Patchwork-id: 84095
|
|
|
17dafa |
O-Subject: [RHEL-7.7 cloud-init PATCH] net: Wait for dhclient to daemonize before reading lease file
|
|
|
17dafa |
Bugzilla: 1632967
|
|
|
17dafa |
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
|
|
|
17dafa |
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
|
|
17dafa |
|
|
|
17dafa |
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1632967
|
|
|
17dafa |
Brew: https://bugzilla.redhat.com/show_bug.cgi?id=1632967
|
|
|
17dafa |
Tested: Me and upstream
|
|
|
17dafa |
|
|
|
17dafa |
commit fdadcb5fae51f4e6799314ab98e3aec56c79b17c
|
|
|
17dafa |
Author: Jason Zions <jasonzio@microsoft.com>
|
|
|
17dafa |
Date: Tue Jan 15 21:37:17 2019 +0000
|
|
|
17dafa |
|
|
|
17dafa |
net: Wait for dhclient to daemonize before reading lease file
|
|
|
17dafa |
|
|
|
17dafa |
cloud-init uses dhclient to fetch the DHCP lease so it can extract
|
|
|
17dafa |
DHCP options. dhclient creates the leasefile, then writes to it;
|
|
|
17dafa |
simply waiting for the leasefile to appear creates a race between
|
|
|
17dafa |
dhclient and cloud-init. Instead, wait for dhclient to be parented by
|
|
|
17dafa |
init. At that point, we know it has written to the leasefile, so it's
|
|
|
17dafa |
safe to copy the file and kill the process.
|
|
|
17dafa |
|
|
|
17dafa |
cloud-init creates a temporary directory in which to execute dhclient,
|
|
|
17dafa |
and deletes that directory after it has killed the process. If
|
|
|
17dafa |
cloud-init abandons waiting for dhclient to daemonize, it will still
|
|
|
17dafa |
attempt to delete the temporary directory, but will not report an
|
|
|
17dafa |
exception should that attempt fail.
|
|
|
17dafa |
|
|
|
17dafa |
LP: #1794399
|
|
|
17dafa |
|
|
|
17dafa |
Signed-off-by: Eduardo Otubo <otubo@redhat.com>
|
|
|
17dafa |
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
|
|
|
17dafa |
---
|
|
|
17dafa |
cloudinit/net/dhcp.py | 44 +++++++++++++++++++++++++++-----------
|
|
|
17dafa |
cloudinit/net/tests/test_dhcp.py | 15 ++++++++++---
|
|
|
17dafa |
cloudinit/temp_utils.py | 4 ++--
|
|
|
17dafa |
cloudinit/tests/test_temp_utils.py | 18 +++++++++++++++-
|
|
|
17dafa |
cloudinit/util.py | 16 +++++++++++++-
|
|
|
17dafa |
tests/unittests/test_util.py | 6 ++++++
|
|
|
17dafa |
6 files changed, 83 insertions(+), 20 deletions(-)
|
|
|
17dafa |
|
|
|
17dafa |
diff --git a/cloudinit/net/dhcp.py b/cloudinit/net/dhcp.py
|
|
|
17dafa |
index 0db991d..c98a97c 100644
|
|
|
17dafa |
--- a/cloudinit/net/dhcp.py
|
|
|
17dafa |
+++ b/cloudinit/net/dhcp.py
|
|
|
17dafa |
@@ -9,6 +9,7 @@ import logging
|
|
|
17dafa |
import os
|
|
|
17dafa |
import re
|
|
|
17dafa |
import signal
|
|
|
17dafa |
+import time
|
|
|
17dafa |
|
|
|
17dafa |
from cloudinit.net import (
|
|
|
17dafa |
EphemeralIPv4Network, find_fallback_nic, get_devicelist,
|
|
|
17dafa |
@@ -127,7 +128,9 @@ def maybe_perform_dhcp_discovery(nic=None):
|
|
|
17dafa |
if not dhclient_path:
|
|
|
17dafa |
LOG.debug('Skip dhclient configuration: No dhclient command found.')
|
|
|
17dafa |
return []
|
|
|
17dafa |
- with temp_utils.tempdir(prefix='cloud-init-dhcp-', needs_exe=True) as tdir:
|
|
|
17dafa |
+ with temp_utils.tempdir(rmtree_ignore_errors=True,
|
|
|
17dafa |
+ prefix='cloud-init-dhcp-',
|
|
|
17dafa |
+ needs_exe=True) as tdir:
|
|
|
17dafa |
# Use /var/tmp because /run/cloud-init/tmp is mounted noexec
|
|
|
17dafa |
return dhcp_discovery(dhclient_path, nic, tdir)
|
|
|
17dafa |
|
|
|
17dafa |
@@ -195,24 +198,39 @@ def dhcp_discovery(dhclient_cmd_path, interface, cleandir):
|
|
|
17dafa |
'-pf', pid_file, interface, '-sf', '/bin/true']
|
|
|
17dafa |
util.subp(cmd, capture=True)
|
|
|
17dafa |
|
|
|
17dafa |
- # dhclient doesn't write a pid file until after it forks when it gets a
|
|
|
17dafa |
- # proper lease response. Since cleandir is a temp directory that gets
|
|
|
17dafa |
- # removed, we need to wait for that pidfile creation before the
|
|
|
17dafa |
- # cleandir is removed, otherwise we get FileNotFound errors.
|
|
|
17dafa |
+ # Wait for pid file and lease file to appear, and for the process
|
|
|
17dafa |
+ # named by the pid file to daemonize (have pid 1 as its parent). If we
|
|
|
17dafa |
+ # try to read the lease file before daemonization happens, we might try
|
|
|
17dafa |
+ # to read it before the dhclient has actually written it. We also have
|
|
|
17dafa |
+ # to wait until the dhclient has become a daemon so we can be sure to
|
|
|
17dafa |
+ # kill the correct process, thus freeing cleandir to be deleted back
|
|
|
17dafa |
+ # up the callstack.
|
|
|
17dafa |
missing = util.wait_for_files(
|
|
|
17dafa |
[pid_file, lease_file], maxwait=5, naplen=0.01)
|
|
|
17dafa |
if missing:
|
|
|
17dafa |
LOG.warning("dhclient did not produce expected files: %s",
|
|
|
17dafa |
', '.join(os.path.basename(f) for f in missing))
|
|
|
17dafa |
return []
|
|
|
17dafa |
- pid_content = util.load_file(pid_file).strip()
|
|
|
17dafa |
- try:
|
|
|
17dafa |
- pid = int(pid_content)
|
|
|
17dafa |
- except ValueError:
|
|
|
17dafa |
- LOG.debug(
|
|
|
17dafa |
- "pid file contains non-integer content '%s'", pid_content)
|
|
|
17dafa |
- else:
|
|
|
17dafa |
- os.kill(pid, signal.SIGKILL)
|
|
|
17dafa |
+
|
|
|
17dafa |
+ ppid = 'unknown'
|
|
|
17dafa |
+ for _ in range(0, 1000):
|
|
|
17dafa |
+ pid_content = util.load_file(pid_file).strip()
|
|
|
17dafa |
+ try:
|
|
|
17dafa |
+ pid = int(pid_content)
|
|
|
17dafa |
+ except ValueError:
|
|
|
17dafa |
+ pass
|
|
|
17dafa |
+ else:
|
|
|
17dafa |
+ ppid = util.get_proc_ppid(pid)
|
|
|
17dafa |
+ if ppid == 1:
|
|
|
17dafa |
+ LOG.debug('killing dhclient with pid=%s', pid)
|
|
|
17dafa |
+ os.kill(pid, signal.SIGKILL)
|
|
|
17dafa |
+ return parse_dhcp_lease_file(lease_file)
|
|
|
17dafa |
+ time.sleep(0.01)
|
|
|
17dafa |
+
|
|
|
17dafa |
+ LOG.error(
|
|
|
17dafa |
+ 'dhclient(pid=%s, parentpid=%s) failed to daemonize after %s seconds',
|
|
|
17dafa |
+ pid_content, ppid, 0.01 * 1000
|
|
|
17dafa |
+ )
|
|
|
17dafa |
return parse_dhcp_lease_file(lease_file)
|
|
|
17dafa |
|
|
|
17dafa |
|
|
|
17dafa |
diff --git a/cloudinit/net/tests/test_dhcp.py b/cloudinit/net/tests/test_dhcp.py
|
|
|
17dafa |
index cd3e732..79e8842 100644
|
|
|
17dafa |
--- a/cloudinit/net/tests/test_dhcp.py
|
|
|
17dafa |
+++ b/cloudinit/net/tests/test_dhcp.py
|
|
|
17dafa |
@@ -145,16 +145,20 @@ class TestDHCPDiscoveryClean(CiTestCase):
|
|
|
17dafa |
'subnet-mask': '255.255.255.0', 'routers': '192.168.2.1'}],
|
|
|
17dafa |
dhcp_discovery(dhclient_script, 'eth9', tmpdir))
|
|
|
17dafa |
self.assertIn(
|
|
|
17dafa |
- "pid file contains non-integer content ''", self.logs.getvalue())
|
|
|
17dafa |
+ "dhclient(pid=, parentpid=unknown) failed "
|
|
|
17dafa |
+ "to daemonize after 10.0 seconds",
|
|
|
17dafa |
+ self.logs.getvalue())
|
|
|
17dafa |
m_kill.assert_not_called()
|
|
|
17dafa |
|
|
|
17dafa |
+ @mock.patch('cloudinit.net.dhcp.util.get_proc_ppid')
|
|
|
17dafa |
@mock.patch('cloudinit.net.dhcp.os.kill')
|
|
|
17dafa |
@mock.patch('cloudinit.net.dhcp.util.wait_for_files')
|
|
|
17dafa |
@mock.patch('cloudinit.net.dhcp.util.subp')
|
|
|
17dafa |
def test_dhcp_discovery_run_in_sandbox_waits_on_lease_and_pid(self,
|
|
|
17dafa |
m_subp,
|
|
|
17dafa |
m_wait,
|
|
|
17dafa |
- m_kill):
|
|
|
17dafa |
+ m_kill,
|
|
|
17dafa |
+ m_getppid):
|
|
|
17dafa |
"""dhcp_discovery waits for the presence of pidfile and dhcp.leases."""
|
|
|
17dafa |
tmpdir = self.tmp_dir()
|
|
|
17dafa |
dhclient_script = os.path.join(tmpdir, 'dhclient.orig')
|
|
|
17dafa |
@@ -164,6 +168,7 @@ class TestDHCPDiscoveryClean(CiTestCase):
|
|
|
17dafa |
pidfile = self.tmp_path('dhclient.pid', tmpdir)
|
|
|
17dafa |
leasefile = self.tmp_path('dhcp.leases', tmpdir)
|
|
|
17dafa |
m_wait.return_value = [pidfile] # Return the missing pidfile wait for
|
|
|
17dafa |
+ m_getppid.return_value = 1 # Indicate that dhclient has daemonized
|
|
|
17dafa |
self.assertEqual([], dhcp_discovery(dhclient_script, 'eth9', tmpdir))
|
|
|
17dafa |
self.assertEqual(
|
|
|
17dafa |
mock.call([pidfile, leasefile], maxwait=5, naplen=0.01),
|
|
|
17dafa |
@@ -173,9 +178,10 @@ class TestDHCPDiscoveryClean(CiTestCase):
|
|
|
17dafa |
self.logs.getvalue())
|
|
|
17dafa |
m_kill.assert_not_called()
|
|
|
17dafa |
|
|
|
17dafa |
+ @mock.patch('cloudinit.net.dhcp.util.get_proc_ppid')
|
|
|
17dafa |
@mock.patch('cloudinit.net.dhcp.os.kill')
|
|
|
17dafa |
@mock.patch('cloudinit.net.dhcp.util.subp')
|
|
|
17dafa |
- def test_dhcp_discovery_run_in_sandbox(self, m_subp, m_kill):
|
|
|
17dafa |
+ def test_dhcp_discovery_run_in_sandbox(self, m_subp, m_kill, m_getppid):
|
|
|
17dafa |
"""dhcp_discovery brings up the interface and runs dhclient.
|
|
|
17dafa |
|
|
|
17dafa |
It also returns the parsed dhcp.leases file generated in the sandbox.
|
|
|
17dafa |
@@ -197,6 +203,7 @@ class TestDHCPDiscoveryClean(CiTestCase):
|
|
|
17dafa |
pid_file = os.path.join(tmpdir, 'dhclient.pid')
|
|
|
17dafa |
my_pid = 1
|
|
|
17dafa |
write_file(pid_file, "%d\n" % my_pid)
|
|
|
17dafa |
+ m_getppid.return_value = 1 # Indicate that dhclient has daemonized
|
|
|
17dafa |
|
|
|
17dafa |
self.assertItemsEqual(
|
|
|
17dafa |
[{'interface': 'eth9', 'fixed-address': '192.168.2.74',
|
|
|
17dafa |
@@ -355,3 +362,5 @@ class TestEphemeralDhcpNoNetworkSetup(HttprettyTestCase):
|
|
|
17dafa |
self.assertEqual(fake_lease, lease)
|
|
|
17dafa |
# Ensure that dhcp discovery occurs
|
|
|
17dafa |
m_dhcp.called_once_with()
|
|
|
17dafa |
+
|
|
|
17dafa |
+# vi: ts=4 expandtab
|
|
|
17dafa |
diff --git a/cloudinit/temp_utils.py b/cloudinit/temp_utils.py
|
|
|
17dafa |
index c98a1b5..346276e 100644
|
|
|
17dafa |
--- a/cloudinit/temp_utils.py
|
|
|
17dafa |
+++ b/cloudinit/temp_utils.py
|
|
|
17dafa |
@@ -81,7 +81,7 @@ def ExtendedTemporaryFile(**kwargs):
|
|
|
17dafa |
|
|
|
17dafa |
|
|
|
17dafa |
@contextlib.contextmanager
|
|
|
17dafa |
-def tempdir(**kwargs):
|
|
|
17dafa |
+def tempdir(rmtree_ignore_errors=False, **kwargs):
|
|
|
17dafa |
# This seems like it was only added in python 3.2
|
|
|
17dafa |
# Make it since its useful...
|
|
|
17dafa |
# See: http://bugs.python.org/file12970/tempdir.patch
|
|
|
17dafa |
@@ -89,7 +89,7 @@ def tempdir(**kwargs):
|
|
|
17dafa |
try:
|
|
|
17dafa |
yield tdir
|
|
|
17dafa |
finally:
|
|
|
17dafa |
- shutil.rmtree(tdir)
|
|
|
17dafa |
+ shutil.rmtree(tdir, ignore_errors=rmtree_ignore_errors)
|
|
|
17dafa |
|
|
|
17dafa |
|
|
|
17dafa |
def mkdtemp(**kwargs):
|
|
|
17dafa |
diff --git a/cloudinit/tests/test_temp_utils.py b/cloudinit/tests/test_temp_utils.py
|
|
|
17dafa |
index ffbb92c..4a52ef8 100644
|
|
|
17dafa |
--- a/cloudinit/tests/test_temp_utils.py
|
|
|
17dafa |
+++ b/cloudinit/tests/test_temp_utils.py
|
|
|
17dafa |
@@ -2,8 +2,9 @@
|
|
|
17dafa |
|
|
|
17dafa |
"""Tests for cloudinit.temp_utils"""
|
|
|
17dafa |
|
|
|
17dafa |
-from cloudinit.temp_utils import mkdtemp, mkstemp
|
|
|
17dafa |
+from cloudinit.temp_utils import mkdtemp, mkstemp, tempdir
|
|
|
17dafa |
from cloudinit.tests.helpers import CiTestCase, wrap_and_call
|
|
|
17dafa |
+import os
|
|
|
17dafa |
|
|
|
17dafa |
|
|
|
17dafa |
class TestTempUtils(CiTestCase):
|
|
|
17dafa |
@@ -98,4 +99,19 @@ class TestTempUtils(CiTestCase):
|
|
|
17dafa |
self.assertEqual('/fake/return/path', retval)
|
|
|
17dafa |
self.assertEqual([{'dir': '/run/cloud-init/tmp'}], calls)
|
|
|
17dafa |
|
|
|
17dafa |
+ def test_tempdir_error_suppression(self):
|
|
|
17dafa |
+ """test tempdir suppresses errors during directory removal."""
|
|
|
17dafa |
+
|
|
|
17dafa |
+ with self.assertRaises(OSError):
|
|
|
17dafa |
+ with tempdir(prefix='cloud-init-dhcp-') as tdir:
|
|
|
17dafa |
+ os.rmdir(tdir)
|
|
|
17dafa |
+ # As a result, the directory is already gone,
|
|
|
17dafa |
+ # so shutil.rmtree should raise OSError
|
|
|
17dafa |
+
|
|
|
17dafa |
+ with tempdir(rmtree_ignore_errors=True,
|
|
|
17dafa |
+ prefix='cloud-init-dhcp-') as tdir:
|
|
|
17dafa |
+ os.rmdir(tdir)
|
|
|
17dafa |
+ # Since the directory is already gone, shutil.rmtree would raise
|
|
|
17dafa |
+ # OSError, but we suppress that
|
|
|
17dafa |
+
|
|
|
17dafa |
# vi: ts=4 expandtab
|
|
|
17dafa |
diff --git a/cloudinit/util.py b/cloudinit/util.py
|
|
|
17dafa |
index 7800f7b..a84112a 100644
|
|
|
17dafa |
--- a/cloudinit/util.py
|
|
|
17dafa |
+++ b/cloudinit/util.py
|
|
|
17dafa |
@@ -2861,7 +2861,6 @@ def mount_is_read_write(mount_point):
|
|
|
17dafa |
mount_opts = result[-1].split(',')
|
|
|
17dafa |
return mount_opts[0] == 'rw'
|
|
|
17dafa |
|
|
|
17dafa |
-
|
|
|
17dafa |
def udevadm_settle(exists=None, timeout=None):
|
|
|
17dafa |
"""Invoke udevadm settle with optional exists and timeout parameters"""
|
|
|
17dafa |
settle_cmd = ["udevadm", "settle"]
|
|
|
17dafa |
@@ -2875,5 +2874,20 @@ def udevadm_settle(exists=None, timeout=None):
|
|
|
17dafa |
|
|
|
17dafa |
return subp(settle_cmd)
|
|
|
17dafa |
|
|
|
17dafa |
+def get_proc_ppid(pid):
|
|
|
17dafa |
+ """
|
|
|
17dafa |
+ Return the parent pid of a process.
|
|
|
17dafa |
+ """
|
|
|
17dafa |
+ ppid = 0
|
|
|
17dafa |
+ try:
|
|
|
17dafa |
+ contents = load_file("/proc/%s/stat" % pid, quiet=True)
|
|
|
17dafa |
+ except IOError as e:
|
|
|
17dafa |
+ LOG.warning('Failed to load /proc/%s/stat. %s', pid, e)
|
|
|
17dafa |
+ if contents:
|
|
|
17dafa |
+ parts = contents.split(" ", 4)
|
|
|
17dafa |
+ # man proc says
|
|
|
17dafa |
+ # ppid %d (4) The PID of the parent.
|
|
|
17dafa |
+ ppid = int(parts[3])
|
|
|
17dafa |
+ return ppid
|
|
|
17dafa |
|
|
|
17dafa |
# vi: ts=4 expandtab
|
|
|
17dafa |
diff --git a/tests/unittests/test_util.py b/tests/unittests/test_util.py
|
|
|
17dafa |
index 5a14479..8aebcd6 100644
|
|
|
17dafa |
--- a/tests/unittests/test_util.py
|
|
|
17dafa |
+++ b/tests/unittests/test_util.py
|
|
|
17dafa |
@@ -1114,6 +1114,12 @@ class TestLoadShellContent(helpers.TestCase):
|
|
|
17dafa |
'key3="val3 #tricky"',
|
|
|
17dafa |
''])))
|
|
|
17dafa |
|
|
|
17dafa |
+ def test_get_proc_ppid(self):
|
|
|
17dafa |
+ """get_proc_ppid returns correct parent pid value."""
|
|
|
17dafa |
+ my_pid = os.getpid()
|
|
|
17dafa |
+ my_ppid = os.getppid()
|
|
|
17dafa |
+ self.assertEqual(my_ppid, util.get_proc_ppid(my_pid))
|
|
|
17dafa |
+
|
|
|
17dafa |
|
|
|
17dafa |
class TestGetProcEnv(helpers.TestCase):
|
|
|
17dafa |
"""test get_proc_env."""
|
|
|
17dafa |
--
|
|
|
17dafa |
1.8.3.1
|
|
|
17dafa |
|