Blob Blame History Raw
From 48c7645ce8849ac31298e6c2b1d5661d0f581279 Mon Sep 17 00:00:00 2001
From: Gris Ge <fge@redhat.com>
Date: Mon, 17 May 2021 16:09:52 +0800
Subject: [PATCH 1/2] ovs: Fix `is_ovs_running()` in container environment.

In k8s container environment, the OVS database socket
/var/run/openvswitch/db.sock is mounted from host, so NM can managed it
without the ovs daemon running in container.

To support that, this patch removed the top level checking on
`is_ovs_running()` and trust plugin raise the proper error on failure.

Patched the NM plugin to check the error
`NM.DeviceStateReason.OVSDB_FAILED` on activation failure, raise
`NmstateDependencyError` if OVS DB failed to connected.

NM will not raise any error when creating OVS internal interface with
OVSDB mounted to /dev/null, NM will keep showing the OVS interface as
ACTIVATING, changed the fallback checker to give only 30 seconds for OVS
interface to exit `NM.DeviceState.PREPARE`, if not treat it as OVS
daemon malfunctioning.

Updated integration test case to mask(mount /dev/null) the OVS DB socket
file for simulating the stopped OVS daemon.

Signed-off-by: Gris Ge <fge@redhat.com>
Signed-off-by: Fernando Fernandez Mancera <ffmancera@riseup.net>
---
 libnmstate/ifaces/ovs.py           | 15 ----------
 libnmstate/nm/active_connection.py | 47 ++++++++++++++++++++++++++----
 libnmstate/nm/plugin.py            |  3 +-
 libnmstate/validator.py            | 16 +++-------
 tests/integration/ovs_test.py      | 41 +++++++++++---------------
 5 files changed, 64 insertions(+), 58 deletions(-)

diff --git a/libnmstate/ifaces/ovs.py b/libnmstate/ifaces/ovs.py
index 24d4aba..28892ad 100644
--- a/libnmstate/ifaces/ovs.py
+++ b/libnmstate/ifaces/ovs.py
@@ -19,7 +19,6 @@
 
 from copy import deepcopy
 from operator import itemgetter
-import subprocess
 import warnings
 
 from libnmstate.error import NmstateValueError
@@ -252,20 +251,6 @@ class OvsInternalIface(BaseIface):
                 self._info.pop(Interface.MAC, None)
 
 
-def is_ovs_running():
-    try:
-        subprocess.run(
-            ("systemctl", "status", "openvswitch"),
-            stdout=subprocess.DEVNULL,
-            stderr=subprocess.DEVNULL,
-            check=True,
-            timeout=SYSTEMCTL_TIMEOUT_SECONDS,
-        )
-        return True
-    except Exception:
-        return False
-
-
 def is_ovs_lag_port(port_state):
     return port_state.get(OVSBridge.Port.LINK_AGGREGATION_SUBTREE) is not None
 
diff --git a/libnmstate/nm/active_connection.py b/libnmstate/nm/active_connection.py
index ddf93a7..150256f 100644
--- a/libnmstate/nm/active_connection.py
+++ b/libnmstate/nm/active_connection.py
@@ -20,6 +20,7 @@
 import logging
 
 from libnmstate.error import NmstateLibnmError
+from libnmstate.error import NmstateDependencyError
 from libnmstate.error import NmstateInternalError
 
 from .common import GLib
@@ -33,6 +34,7 @@ from .ipv6 import is_dynamic as is_ipv6_dynamic
 
 NM_AC_STATE_CHANGED_SIGNAL = "state-changed"
 FALLBACK_CHECKER_INTERNAL = 15
+MAX_OVS_IFACE_PREPARE_TIME = FALLBACK_CHECKER_INTERNAL * 2
 GIO_ERROR_DOMAIN = "g-io-error-quark"
 
 
@@ -92,6 +94,7 @@ class ProfileActivation:
         self._dev_handlers = set()
         self._action = None
         self._fallback_checker = None
+        self._fallback_checker_counter = 0
 
     def run(self):
         specific_object = None
@@ -336,19 +339,53 @@ class ProfileActivation:
             self._activation_clean_up()
             self._ctx.finish_async(self._action)
         elif not is_activating(self._nm_ac, self._nm_dev):
-            reason = f"{self._nm_ac.get_state_reason()}"
+            nm_ac_reason = f"{self._nm_ac.get_state_reason()}"
+            nm_dev_reason = None
             if self._nm_dev:
-                reason += f" {self._nm_dev.get_state_reason()}"
+                nm_dev_reason = self._nm_dev.get_state_reason()
+
+            if nm_dev_reason == NM.DeviceStateReason.OVSDB_FAILED:
+                error = NmstateDependencyError(
+                    f"{self._action} failed: failed to communicating with "
+                    f"Open vSwitch database, {nm_dev_reason}"
+                )
+            else:
+                reason = nm_ac_reason + (
+                    str(nm_dev_reason) if nm_dev_reason else ""
+                )
+                error = NmstateLibnmError(
+                    f"{self._action} failed: reason={reason}"
+                )
             self._activation_clean_up()
-            self._ctx.fail(
-                NmstateLibnmError(f"{self._action} failed: reason={reason}")
-            )
+            self._ctx.fail(error)
 
     def _fallback_checker_callback(self, _user_data):
+        self._fallback_checker_counter += 1
         nm_dev = get_nm_dev(self._ctx, self._iface_name, self._iface_type)
         if nm_dev:
             self._nm_dev = nm_dev
             self._activation_progress_check()
+            # When OVSDB connection is invalid(such as been mounted as
+            # /dev/null), NM will hang on the activation of ovs internal
+            # interface with state ACITVATING with reason UNKNOWN forever with
+            # no state change signal. The fallback check only found it
+            # as activating which lead us hang till killed by idle timeout.
+            # To prevent that, when we found OVS interface interface in
+            # `NM.DeviceState.PREPARE` on in second call of fallbacker,
+            # we fail the action as NmstateDependencyError.
+            if (
+                self._fallback_checker_counter
+                >= MAX_OVS_IFACE_PREPARE_TIME / FALLBACK_CHECKER_INTERNAL
+                and nm_dev.get_device_type() == NM.DeviceType.OVS_INTERFACE
+                and nm_dev.get_state() == NM.DeviceState.PREPARE
+            ):
+                self._ctx.fail(
+                    NmstateDependencyError(
+                        f"{self._action} failed: timeout on creating OVS "
+                        "interface, please check Open vSwitch daemon"
+                    )
+                )
+
         return GLib.SOURCE_CONTINUE
 
 
diff --git a/libnmstate/nm/plugin.py b/libnmstate/nm/plugin.py
index 335d93c..da933b3 100644
--- a/libnmstate/nm/plugin.py
+++ b/libnmstate/nm/plugin.py
@@ -23,7 +23,6 @@ from operator import itemgetter
 from libnmstate.error import NmstateDependencyError
 from libnmstate.error import NmstateNotSupportedError
 from libnmstate.error import NmstateValueError
-from libnmstate.ifaces.ovs import is_ovs_running
 from libnmstate.schema import DNS
 from libnmstate.schema import Interface
 from libnmstate.schema import InterfaceType
@@ -103,7 +102,7 @@ class NetworkManagerPlugin(NmstatePlugin):
     @property
     def capabilities(self):
         capabilities = []
-        if has_ovs_capability(self.client) and is_ovs_running():
+        if has_ovs_capability(self.client):
             capabilities.append(NmstatePlugin.OVS_CAPABILITY)
         if has_team_capability(self.client):
             capabilities.append(NmstatePlugin.TEAM_CAPABILITY)
diff --git a/libnmstate/validator.py b/libnmstate/validator.py
index 02890b4..cd3b540 100644
--- a/libnmstate/validator.py
+++ b/libnmstate/validator.py
@@ -22,7 +22,6 @@ import logging
 
 import jsonschema as js
 
-from libnmstate.ifaces.ovs import is_ovs_running
 from libnmstate.schema import Interface
 from libnmstate.schema import InterfaceType
 from libnmstate.error import NmstateDependencyError
@@ -50,7 +49,6 @@ def validate_interface_capabilities(ifaces_state, capabilities):
     ifaces_types = {iface_state.get("type") for iface_state in ifaces_state}
     has_ovs_capability = NmstatePlugin.OVS_CAPABILITY in capabilities
     has_team_capability = NmstatePlugin.TEAM_CAPABILITY in capabilities
-    ovs_is_running = is_ovs_running()
     for iface_type in ifaces_types:
         is_ovs_type = iface_type in (
             InterfaceType.OVS_BRIDGE,
@@ -58,18 +56,12 @@ def validate_interface_capabilities(ifaces_state, capabilities):
             InterfaceType.OVS_PORT,
         )
         if is_ovs_type and not has_ovs_capability:
-            if not ovs_is_running:
-                raise NmstateDependencyError(
-                    "openvswitch service is not started."
-                )
-            else:
-                raise NmstateDependencyError(
-                    "Open vSwitch NetworkManager support not installed "
-                    "and started"
-                )
+            raise NmstateDependencyError(
+                "Open vSwitch support not properly installed or started"
+            )
         elif iface_type == InterfaceType.TEAM and not has_team_capability:
             raise NmstateDependencyError(
-                "NetworkManager-team plugin not installed and started"
+                "Team support not properly installed or started"
             )
 
 
-- 
2.31.1