Blame SOURCES/BZ_1964439-ovs-Fix-is_ovs_running-in-container-environment.patch

1797a9
From 48c7645ce8849ac31298e6c2b1d5661d0f581279 Mon Sep 17 00:00:00 2001
1797a9
From: Gris Ge <fge@redhat.com>
1797a9
Date: Mon, 17 May 2021 16:09:52 +0800
1797a9
Subject: [PATCH 1/2] ovs: Fix `is_ovs_running()` in container environment.
1797a9
1797a9
In k8s container environment, the OVS database socket
1797a9
/var/run/openvswitch/db.sock is mounted from host, so NM can managed it
1797a9
without the ovs daemon running in container.
1797a9
1797a9
To support that, this patch removed the top level checking on
1797a9
`is_ovs_running()` and trust plugin raise the proper error on failure.
1797a9
1797a9
Patched the NM plugin to check the error
1797a9
`NM.DeviceStateReason.OVSDB_FAILED` on activation failure, raise
1797a9
`NmstateDependencyError` if OVS DB failed to connected.
1797a9
1797a9
NM will not raise any error when creating OVS internal interface with
1797a9
OVSDB mounted to /dev/null, NM will keep showing the OVS interface as
1797a9
ACTIVATING, changed the fallback checker to give only 30 seconds for OVS
1797a9
interface to exit `NM.DeviceState.PREPARE`, if not treat it as OVS
1797a9
daemon malfunctioning.
1797a9
1797a9
Updated integration test case to mask(mount /dev/null) the OVS DB socket
1797a9
file for simulating the stopped OVS daemon.
1797a9
1797a9
Signed-off-by: Gris Ge <fge@redhat.com>
1797a9
Signed-off-by: Fernando Fernandez Mancera <ffmancera@riseup.net>
1797a9
---
1797a9
 libnmstate/ifaces/ovs.py           | 15 ----------
1797a9
 libnmstate/nm/active_connection.py | 47 ++++++++++++++++++++++++++----
1797a9
 libnmstate/nm/plugin.py            |  3 +-
1797a9
 libnmstate/validator.py            | 16 +++-------
1797a9
 tests/integration/ovs_test.py      | 41 +++++++++++---------------
1797a9
 5 files changed, 64 insertions(+), 58 deletions(-)
1797a9
1797a9
diff --git a/libnmstate/ifaces/ovs.py b/libnmstate/ifaces/ovs.py
1797a9
index 24d4aba..28892ad 100644
1797a9
--- a/libnmstate/ifaces/ovs.py
1797a9
+++ b/libnmstate/ifaces/ovs.py
1797a9
@@ -19,7 +19,6 @@
1797a9
 
1797a9
 from copy import deepcopy
1797a9
 from operator import itemgetter
1797a9
-import subprocess
1797a9
 import warnings
1797a9
 
1797a9
 from libnmstate.error import NmstateValueError
1797a9
@@ -252,20 +251,6 @@ class OvsInternalIface(BaseIface):
1797a9
                 self._info.pop(Interface.MAC, None)
1797a9
 
1797a9
 
1797a9
-def is_ovs_running():
1797a9
-    try:
1797a9
-        subprocess.run(
1797a9
-            ("systemctl", "status", "openvswitch"),
1797a9
-            stdout=subprocess.DEVNULL,
1797a9
-            stderr=subprocess.DEVNULL,
1797a9
-            check=True,
1797a9
-            timeout=SYSTEMCTL_TIMEOUT_SECONDS,
1797a9
-        )
1797a9
-        return True
1797a9
-    except Exception:
1797a9
-        return False
1797a9
-
1797a9
-
1797a9
 def is_ovs_lag_port(port_state):
1797a9
     return port_state.get(OVSBridge.Port.LINK_AGGREGATION_SUBTREE) is not None
1797a9
 
1797a9
diff --git a/libnmstate/nm/active_connection.py b/libnmstate/nm/active_connection.py
1797a9
index ddf93a7..150256f 100644
1797a9
--- a/libnmstate/nm/active_connection.py
1797a9
+++ b/libnmstate/nm/active_connection.py
1797a9
@@ -20,6 +20,7 @@
1797a9
 import logging
1797a9
 
1797a9
 from libnmstate.error import NmstateLibnmError
1797a9
+from libnmstate.error import NmstateDependencyError
1797a9
 from libnmstate.error import NmstateInternalError
1797a9
 
1797a9
 from .common import GLib
1797a9
@@ -33,6 +34,7 @@ from .ipv6 import is_dynamic as is_ipv6_dynamic
1797a9
 
1797a9
 NM_AC_STATE_CHANGED_SIGNAL = "state-changed"
1797a9
 FALLBACK_CHECKER_INTERNAL = 15
1797a9
+MAX_OVS_IFACE_PREPARE_TIME = FALLBACK_CHECKER_INTERNAL * 2
1797a9
 GIO_ERROR_DOMAIN = "g-io-error-quark"
1797a9
 
1797a9
 
1797a9
@@ -92,6 +94,7 @@ class ProfileActivation:
1797a9
         self._dev_handlers = set()
1797a9
         self._action = None
1797a9
         self._fallback_checker = None
1797a9
+        self._fallback_checker_counter = 0
1797a9
 
1797a9
     def run(self):
1797a9
         specific_object = None
1797a9
@@ -336,19 +339,53 @@ class ProfileActivation:
1797a9
             self._activation_clean_up()
1797a9
             self._ctx.finish_async(self._action)
1797a9
         elif not is_activating(self._nm_ac, self._nm_dev):
1797a9
-            reason = f"{self._nm_ac.get_state_reason()}"
1797a9
+            nm_ac_reason = f"{self._nm_ac.get_state_reason()}"
1797a9
+            nm_dev_reason = None
1797a9
             if self._nm_dev:
1797a9
-                reason += f" {self._nm_dev.get_state_reason()}"
1797a9
+                nm_dev_reason = self._nm_dev.get_state_reason()
1797a9
+
1797a9
+            if nm_dev_reason == NM.DeviceStateReason.OVSDB_FAILED:
1797a9
+                error = NmstateDependencyError(
1797a9
+                    f"{self._action} failed: failed to communicating with "
1797a9
+                    f"Open vSwitch database, {nm_dev_reason}"
1797a9
+                )
1797a9
+            else:
1797a9
+                reason = nm_ac_reason + (
1797a9
+                    str(nm_dev_reason) if nm_dev_reason else ""
1797a9
+                )
1797a9
+                error = NmstateLibnmError(
1797a9
+                    f"{self._action} failed: reason={reason}"
1797a9
+                )
1797a9
             self._activation_clean_up()
1797a9
-            self._ctx.fail(
1797a9
-                NmstateLibnmError(f"{self._action} failed: reason={reason}")
1797a9
-            )
1797a9
+            self._ctx.fail(error)
1797a9
 
1797a9
     def _fallback_checker_callback(self, _user_data):
1797a9
+        self._fallback_checker_counter += 1
1797a9
         nm_dev = get_nm_dev(self._ctx, self._iface_name, self._iface_type)
1797a9
         if nm_dev:
1797a9
             self._nm_dev = nm_dev
1797a9
             self._activation_progress_check()
1797a9
+            # When OVSDB connection is invalid(such as been mounted as
1797a9
+            # /dev/null), NM will hang on the activation of ovs internal
1797a9
+            # interface with state ACITVATING with reason UNKNOWN forever with
1797a9
+            # no state change signal. The fallback check only found it
1797a9
+            # as activating which lead us hang till killed by idle timeout.
1797a9
+            # To prevent that, when we found OVS interface interface in
1797a9
+            # `NM.DeviceState.PREPARE` on in second call of fallbacker,
1797a9
+            # we fail the action as NmstateDependencyError.
1797a9
+            if (
1797a9
+                self._fallback_checker_counter
1797a9
+                >= MAX_OVS_IFACE_PREPARE_TIME / FALLBACK_CHECKER_INTERNAL
1797a9
+                and nm_dev.get_device_type() == NM.DeviceType.OVS_INTERFACE
1797a9
+                and nm_dev.get_state() == NM.DeviceState.PREPARE
1797a9
+            ):
1797a9
+                self._ctx.fail(
1797a9
+                    NmstateDependencyError(
1797a9
+                        f"{self._action} failed: timeout on creating OVS "
1797a9
+                        "interface, please check Open vSwitch daemon"
1797a9
+                    )
1797a9
+                )
1797a9
+
1797a9
         return GLib.SOURCE_CONTINUE
1797a9
 
1797a9
 
1797a9
diff --git a/libnmstate/nm/plugin.py b/libnmstate/nm/plugin.py
1797a9
index 335d93c..da933b3 100644
1797a9
--- a/libnmstate/nm/plugin.py
1797a9
+++ b/libnmstate/nm/plugin.py
1797a9
@@ -23,7 +23,6 @@ from operator import itemgetter
1797a9
 from libnmstate.error import NmstateDependencyError
1797a9
 from libnmstate.error import NmstateNotSupportedError
1797a9
 from libnmstate.error import NmstateValueError
1797a9
-from libnmstate.ifaces.ovs import is_ovs_running
1797a9
 from libnmstate.schema import DNS
1797a9
 from libnmstate.schema import Interface
1797a9
 from libnmstate.schema import InterfaceType
1797a9
@@ -103,7 +102,7 @@ class NetworkManagerPlugin(NmstatePlugin):
1797a9
     @property
1797a9
     def capabilities(self):
1797a9
         capabilities = []
1797a9
-        if has_ovs_capability(self.client) and is_ovs_running():
1797a9
+        if has_ovs_capability(self.client):
1797a9
             capabilities.append(NmstatePlugin.OVS_CAPABILITY)
1797a9
         if has_team_capability(self.client):
1797a9
             capabilities.append(NmstatePlugin.TEAM_CAPABILITY)
1797a9
diff --git a/libnmstate/validator.py b/libnmstate/validator.py
1797a9
index 02890b4..cd3b540 100644
1797a9
--- a/libnmstate/validator.py
1797a9
+++ b/libnmstate/validator.py
1797a9
@@ -22,7 +22,6 @@ import logging
1797a9
 
1797a9
 import jsonschema as js
1797a9
 
1797a9
-from libnmstate.ifaces.ovs import is_ovs_running
1797a9
 from libnmstate.schema import Interface
1797a9
 from libnmstate.schema import InterfaceType
1797a9
 from libnmstate.error import NmstateDependencyError
1797a9
@@ -50,7 +49,6 @@ def validate_interface_capabilities(ifaces_state, capabilities):
1797a9
     ifaces_types = {iface_state.get("type") for iface_state in ifaces_state}
1797a9
     has_ovs_capability = NmstatePlugin.OVS_CAPABILITY in capabilities
1797a9
     has_team_capability = NmstatePlugin.TEAM_CAPABILITY in capabilities
1797a9
-    ovs_is_running = is_ovs_running()
1797a9
     for iface_type in ifaces_types:
1797a9
         is_ovs_type = iface_type in (
1797a9
             InterfaceType.OVS_BRIDGE,
1797a9
@@ -58,18 +56,12 @@ def validate_interface_capabilities(ifaces_state, capabilities):
1797a9
             InterfaceType.OVS_PORT,
1797a9
         )
1797a9
         if is_ovs_type and not has_ovs_capability:
1797a9
-            if not ovs_is_running:
1797a9
-                raise NmstateDependencyError(
1797a9
-                    "openvswitch service is not started."
1797a9
-                )
1797a9
-            else:
1797a9
-                raise NmstateDependencyError(
1797a9
-                    "Open vSwitch NetworkManager support not installed "
1797a9
-                    "and started"
1797a9
-                )
1797a9
+            raise NmstateDependencyError(
1797a9
+                "Open vSwitch support not properly installed or started"
1797a9
+            )
1797a9
         elif iface_type == InterfaceType.TEAM and not has_team_capability:
1797a9
             raise NmstateDependencyError(
1797a9
-                "NetworkManager-team plugin not installed and started"
1797a9
+                "Team support not properly installed or started"
1797a9
             )
1797a9
 
1797a9
 
1797a9
-- 
1797a9
2.31.1
1797a9