From 48c7645ce8849ac31298e6c2b1d5661d0f581279 Mon Sep 17 00:00:00 2001
From: Gris Ge <fge@redhat.com>
Date: Mon, 17 May 2021 16:09:52 +0800
Subject: [PATCH 1/2] ovs: Fix `is_ovs_running()` in container environment.
In k8s container environment, the OVS database socket
/var/run/openvswitch/db.sock is mounted from host, so NM can managed it
without the ovs daemon running in container.
To support that, this patch removed the top level checking on
`is_ovs_running()` and trust plugin raise the proper error on failure.
Patched the NM plugin to check the error
`NM.DeviceStateReason.OVSDB_FAILED` on activation failure, raise
`NmstateDependencyError` if OVS DB failed to connected.
NM will not raise any error when creating OVS internal interface with
OVSDB mounted to /dev/null, NM will keep showing the OVS interface as
ACTIVATING, changed the fallback checker to give only 30 seconds for OVS
interface to exit `NM.DeviceState.PREPARE`, if not treat it as OVS
daemon malfunctioning.
Updated integration test case to mask(mount /dev/null) the OVS DB socket
file for simulating the stopped OVS daemon.
Signed-off-by: Gris Ge <fge@redhat.com>
Signed-off-by: Fernando Fernandez Mancera <ffmancera@riseup.net>
---
libnmstate/ifaces/ovs.py | 15 ----------
libnmstate/nm/active_connection.py | 47 ++++++++++++++++++++++++++----
libnmstate/nm/plugin.py | 3 +-
libnmstate/validator.py | 16 +++-------
tests/integration/ovs_test.py | 41 +++++++++++---------------
5 files changed, 64 insertions(+), 58 deletions(-)
diff --git a/libnmstate/ifaces/ovs.py b/libnmstate/ifaces/ovs.py
index 24d4aba..28892ad 100644
--- a/libnmstate/ifaces/ovs.py
+++ b/libnmstate/ifaces/ovs.py
@@ -19,7 +19,6 @@
from copy import deepcopy
from operator import itemgetter
-import subprocess
import warnings
from libnmstate.error import NmstateValueError
@@ -252,20 +251,6 @@ class OvsInternalIface(BaseIface):
self._info.pop(Interface.MAC, None)
-def is_ovs_running():
- try:
- subprocess.run(
- ("systemctl", "status", "openvswitch"),
- stdout=subprocess.DEVNULL,
- stderr=subprocess.DEVNULL,
- check=True,
- timeout=SYSTEMCTL_TIMEOUT_SECONDS,
- )
- return True
- except Exception:
- return False
-
-
def is_ovs_lag_port(port_state):
return port_state.get(OVSBridge.Port.LINK_AGGREGATION_SUBTREE) is not None
diff --git a/libnmstate/nm/active_connection.py b/libnmstate/nm/active_connection.py
index ddf93a7..150256f 100644
--- a/libnmstate/nm/active_connection.py
+++ b/libnmstate/nm/active_connection.py
@@ -20,6 +20,7 @@
import logging
from libnmstate.error import NmstateLibnmError
+from libnmstate.error import NmstateDependencyError
from libnmstate.error import NmstateInternalError
from .common import GLib
@@ -33,6 +34,7 @@ from .ipv6 import is_dynamic as is_ipv6_dynamic
NM_AC_STATE_CHANGED_SIGNAL = "state-changed"
FALLBACK_CHECKER_INTERNAL = 15
+MAX_OVS_IFACE_PREPARE_TIME = FALLBACK_CHECKER_INTERNAL * 2
GIO_ERROR_DOMAIN = "g-io-error-quark"
@@ -92,6 +94,7 @@ class ProfileActivation:
self._dev_handlers = set()
self._action = None
self._fallback_checker = None
+ self._fallback_checker_counter = 0
def run(self):
specific_object = None
@@ -336,19 +339,53 @@ class ProfileActivation:
self._activation_clean_up()
self._ctx.finish_async(self._action)
elif not is_activating(self._nm_ac, self._nm_dev):
- reason = f"{self._nm_ac.get_state_reason()}"
+ nm_ac_reason = f"{self._nm_ac.get_state_reason()}"
+ nm_dev_reason = None
if self._nm_dev:
- reason += f" {self._nm_dev.get_state_reason()}"
+ nm_dev_reason = self._nm_dev.get_state_reason()
+
+ if nm_dev_reason == NM.DeviceStateReason.OVSDB_FAILED:
+ error = NmstateDependencyError(
+ f"{self._action} failed: failed to communicating with "
+ f"Open vSwitch database, {nm_dev_reason}"
+ )
+ else:
+ reason = nm_ac_reason + (
+ str(nm_dev_reason) if nm_dev_reason else ""
+ )
+ error = NmstateLibnmError(
+ f"{self._action} failed: reason={reason}"
+ )
self._activation_clean_up()
- self._ctx.fail(
- NmstateLibnmError(f"{self._action} failed: reason={reason}")
- )
+ self._ctx.fail(error)
def _fallback_checker_callback(self, _user_data):
+ self._fallback_checker_counter += 1
nm_dev = get_nm_dev(self._ctx, self._iface_name, self._iface_type)
if nm_dev:
self._nm_dev = nm_dev
self._activation_progress_check()
+ # When OVSDB connection is invalid(such as been mounted as
+ # /dev/null), NM will hang on the activation of ovs internal
+ # interface with state ACITVATING with reason UNKNOWN forever with
+ # no state change signal. The fallback check only found it
+ # as activating which lead us hang till killed by idle timeout.
+ # To prevent that, when we found OVS interface interface in
+ # `NM.DeviceState.PREPARE` on in second call of fallbacker,
+ # we fail the action as NmstateDependencyError.
+ if (
+ self._fallback_checker_counter
+ >= MAX_OVS_IFACE_PREPARE_TIME / FALLBACK_CHECKER_INTERNAL
+ and nm_dev.get_device_type() == NM.DeviceType.OVS_INTERFACE
+ and nm_dev.get_state() == NM.DeviceState.PREPARE
+ ):
+ self._ctx.fail(
+ NmstateDependencyError(
+ f"{self._action} failed: timeout on creating OVS "
+ "interface, please check Open vSwitch daemon"
+ )
+ )
+
return GLib.SOURCE_CONTINUE
diff --git a/libnmstate/nm/plugin.py b/libnmstate/nm/plugin.py
index 335d93c..da933b3 100644
--- a/libnmstate/nm/plugin.py
+++ b/libnmstate/nm/plugin.py
@@ -23,7 +23,6 @@ from operator import itemgetter
from libnmstate.error import NmstateDependencyError
from libnmstate.error import NmstateNotSupportedError
from libnmstate.error import NmstateValueError
-from libnmstate.ifaces.ovs import is_ovs_running
from libnmstate.schema import DNS
from libnmstate.schema import Interface
from libnmstate.schema import InterfaceType
@@ -103,7 +102,7 @@ class NetworkManagerPlugin(NmstatePlugin):
@property
def capabilities(self):
capabilities = []
- if has_ovs_capability(self.client) and is_ovs_running():
+ if has_ovs_capability(self.client):
capabilities.append(NmstatePlugin.OVS_CAPABILITY)
if has_team_capability(self.client):
capabilities.append(NmstatePlugin.TEAM_CAPABILITY)
diff --git a/libnmstate/validator.py b/libnmstate/validator.py
index 02890b4..cd3b540 100644
--- a/libnmstate/validator.py
+++ b/libnmstate/validator.py
@@ -22,7 +22,6 @@ import logging
import jsonschema as js
-from libnmstate.ifaces.ovs import is_ovs_running
from libnmstate.schema import Interface
from libnmstate.schema import InterfaceType
from libnmstate.error import NmstateDependencyError
@@ -50,7 +49,6 @@ def validate_interface_capabilities(ifaces_state, capabilities):
ifaces_types = {iface_state.get("type") for iface_state in ifaces_state}
has_ovs_capability = NmstatePlugin.OVS_CAPABILITY in capabilities
has_team_capability = NmstatePlugin.TEAM_CAPABILITY in capabilities
- ovs_is_running = is_ovs_running()
for iface_type in ifaces_types:
is_ovs_type = iface_type in (
InterfaceType.OVS_BRIDGE,
@@ -58,18 +56,12 @@ def validate_interface_capabilities(ifaces_state, capabilities):
InterfaceType.OVS_PORT,
)
if is_ovs_type and not has_ovs_capability:
- if not ovs_is_running:
- raise NmstateDependencyError(
- "openvswitch service is not started."
- )
- else:
- raise NmstateDependencyError(
- "Open vSwitch NetworkManager support not installed "
- "and started"
- )
+ raise NmstateDependencyError(
+ "Open vSwitch support not properly installed or started"
+ )
elif iface_type == InterfaceType.TEAM and not has_team_capability:
raise NmstateDependencyError(
- "NetworkManager-team plugin not installed and started"
+ "Team support not properly installed or started"
)
--
2.31.1