Blame SOURCES/0001-sync-with-1.1.6.patch

8b0ac9
diff --git a/.github/workflows/pylint.yml b/.github/workflows/pylint.yml
8b0ac9
index 93604e8..4e1b6c5 100644
8b0ac9
--- a/.github/workflows/pylint.yml
8b0ac9
+++ b/.github/workflows/pylint.yml
8b0ac9
@@ -1,9 +1,19 @@
8b0ac9
-name: Pylint
8b0ac9
+name: Linters
8b0ac9
 
8b0ac9
 on: [push]
8b0ac9
 
8b0ac9
 jobs:
8b0ac9
-  build:
8b0ac9
+
8b0ac9
+  docker-lint:
8b0ac9
+    runs-on: ubuntu-latest
8b0ac9
+    steps:
8b0ac9
+      - uses: actions/checkout@v3
8b0ac9
+      - uses: hadolint/hadolint-action@v2.1.0
8b0ac9
+        with:
8b0ac9
+          recursive: true
8b0ac9
+          ignore: DL3041
8b0ac9
+
8b0ac9
+  python-lint:
8b0ac9
     runs-on: ubuntu-latest
8b0ac9
 
8b0ac9
     strategy:
8b0ac9
diff --git a/Dockerfile b/Dockerfile
8b0ac9
index ad6742e..0ab5138 100644
8b0ac9
--- a/Dockerfile
8b0ac9
+++ b/Dockerfile
8b0ac9
@@ -2,12 +2,12 @@ FROM fedora:36
8b0ac9
 
8b0ac9
 WORKDIR /root
8b0ac9
 
8b0ac9
-# for nvme-stas
8b0ac9
-RUN dnf install -y python3-dasbus python3-pyudev python3-systemd python3-gobject meson
8b0ac9
-# for libnvme
8b0ac9
-RUN dnf install -y git gcc g++ cmake openssl-devel libuuid-devel json-c-devel swig python-devel meson
8b0ac9
+# first line for nvme-stas
8b0ac9
+# second line for libnvme
8b0ac9
+RUN dnf install -y python3-dasbus python3-pyudev python3-systemd python3-gobject meson \
8b0ac9
+                   git gcc g++ cmake openssl-devel libuuid-devel json-c-devel swig python-devel meson && dnf clean all
8b0ac9
 
8b0ac9
 COPY . .
8b0ac9
-RUN meson .build && ninja -C .build && cd .build && meson install
8b0ac9
+RUN meson .build && ninja -C .build && meson install -C .build
8b0ac9
 
8b0ac9
 ENTRYPOINT ["python3"]
8b0ac9
diff --git a/NEWS.md b/NEWS.md
8b0ac9
index d1515cd..f56a7c9 100644
8b0ac9
--- a/NEWS.md
8b0ac9
+++ b/NEWS.md
8b0ac9
@@ -5,6 +5,7 @@
8b0ac9
 - Fix issues with I/O controller connection audits
8b0ac9
   - Eliminate pcie devices from list of I/O controller connections to audit
8b0ac9
   - Add soaking timer to workaround race condition between kernel and user-space applications on "add" uevents. When the kernel adds a new nvme device (e.g. `/dev/nvme7`) and sends a "add" uevent to notify user-space applications, the attributes associated with that device (e.g. `/sys/class/nvme/nvme7/cntrltype`) may not be fully initialized which can lead `stacd` to dismiss a device that should get audited. 
8b0ac9
+- Make `sticky-connections=enabled` the default (see `stacd.conf`) 
8b0ac9
 
8b0ac9
 ## Changes with release 1.1.5
8b0ac9
 
8b0ac9
@@ -32,7 +33,7 @@ stacd: Bug fix. Check that self._cfg_soak_tmr is not None before dereferencing i
8b0ac9
 
8b0ac9
 ## Changes with release 1.1.1
8b0ac9
 
8b0ac9
-Make `sticky-connections-disabled` by default
8b0ac9
+Make `sticky-connections=disabled` the default (see `stacd.conf`) 
8b0ac9
 
8b0ac9
 ## Changes with release 1.1
8b0ac9
 
8b0ac9
diff --git a/coverage.sh.in b/coverage.sh.in
8b0ac9
index 96b8c53..5ba2ebe 100755
8b0ac9
--- a/coverage.sh.in
8b0ac9
+++ b/coverage.sh.in
8b0ac9
@@ -38,14 +38,24 @@ PRIMARY_GRP=$( id -ng )
8b0ac9
 PRIMARY_USR=$( id -nu )
8b0ac9
 PYTHON_PATH=.:./subprojects/libnvme
8b0ac9
 
8b0ac9
+log() {
8b0ac9
+    msg="$1"
8b0ac9
+    printf "%b[1;36m%s%b[0m\n" "\0033" "${msg}" "\0033"
8b0ac9
+    sudo logger -i "@@@@@  COVERAGE -" -p 4 "${msg}"
8b0ac9
+}
8b0ac9
+
8b0ac9
 sd_stop() {
8b0ac9
-    unit="$1"-cov.service
8b0ac9
+    app="$1"
8b0ac9
+    unit="${app}"-cov.service
8b0ac9
+    log "Stop ${app}"
8b0ac9
     sudo systemctl stop "${unit}" >/dev/null 2>&1
8b0ac9
     sudo systemctl reset-failed "${unit}" >/dev/null 2>&1
8b0ac9
 }
8b0ac9
 
8b0ac9
 sd_restart() {
8b0ac9
-    unit="$1"-cov.service
8b0ac9
+    app="$1"
8b0ac9
+    unit="${app}"-cov.service
8b0ac9
+    log "Restart ${app}"
8b0ac9
     sudo systemctl restart "${unit}" >/dev/null 2>&1
8b0ac9
 }
8b0ac9
 
8b0ac9
@@ -61,7 +71,7 @@ sd_start() {
8b0ac9
         cmd="${app} --syslog -f ${conf}"
8b0ac9
     fi
8b0ac9
 
8b0ac9
-    printf "\n%b[1;36m%s%b[0m\n" "\0033" "Start ${app}" "\0033"
8b0ac9
+    log "Start ${app}"
8b0ac9
 
8b0ac9
     RUNTIME_DIRECTORY=/tmp/${app}
8b0ac9
     rm -rf ${RUNTIME_DIRECTORY}
8b0ac9
@@ -75,7 +85,7 @@ reload_cfg() {
8b0ac9
     app="$1"
8b0ac9
     unit="${app}"-cov.service
8b0ac9
     pid=$( systemctl show --property MainPID --value "${unit}" )
8b0ac9
-    printf "%b[1;36m%s%b[0m\n" "\0033" "Reload config ${app}" "\0033"
8b0ac9
+    log "Reload config ${app}"
8b0ac9
     sudo kill -HUP "${pid}"
8b0ac9
 }
8b0ac9
 
8b0ac9
@@ -83,15 +93,24 @@ if [ ! -d coverage ]; then
8b0ac9
     mkdir coverage
8b0ac9
 fi
8b0ac9
 
8b0ac9
+
8b0ac9
+log "START-START-START-START-START-START-START-START-START-START-START-START"
8b0ac9
+
8b0ac9
+
8b0ac9
+
8b0ac9
 ################################################################################
8b0ac9
 # Load nvme kernel module
8b0ac9
+log "modprobe nvme-tcp"
8b0ac9
 sudo /usr/sbin/modprobe nvme-tcp
8b0ac9
 
8b0ac9
+log "nvme disconnect-all"
8b0ac9
 sudo nvme disconnect-all
8b0ac9
 
8b0ac9
 ################################################################################
8b0ac9
 # Create a dummy config file for @STAFD_PROCNAME@
8b0ac9
-stafd_conf_fname=$(mktemp /tmp/@STAFD_PROCNAME@.conf.XXXXXX)
8b0ac9
+file=/tmp/@STAFD_PROCNAME@.conf.XXXXXX
8b0ac9
+log "Create dummy config file $file"
8b0ac9
+stafd_conf_fname=$(mktemp $file)
8b0ac9
 cat > "${stafd_conf_fname}" <<'EOF'
8b0ac9
 [Global]
8b0ac9
 tron=true
8b0ac9
@@ -102,7 +121,9 @@ EOF
8b0ac9
 
8b0ac9
 ################################################################################
8b0ac9
 # Create a dummy config file for @STACD_PROCNAME@
8b0ac9
-stacd_conf_fname=$(mktemp /tmp/@STACD_PROCNAME@.conf.XXXXXX)
8b0ac9
+file=/tmp/@STACD_PROCNAME@.conf.XXXXXX
8b0ac9
+log "Create dummy config file $file"
8b0ac9
+stacd_conf_fname=$(mktemp $file)
8b0ac9
 cat > "${stacd_conf_fname}" <<'EOF'
8b0ac9
 [Global]
8b0ac9
 tron=true
8b0ac9
@@ -111,6 +132,7 @@ udev-rule=disabled
8b0ac9
 sticky-connections=enabled
8b0ac9
 EOF
8b0ac9
 
8b0ac9
+log "Stop & Mask Avahi daemon"
8b0ac9
 sudo systemctl stop avahi-daemon.service
8b0ac9
 sudo systemctl stop avahi-daemon.socket
8b0ac9
 sudo systemctl mask avahi-daemon.service
8b0ac9
@@ -118,11 +140,11 @@ sudo systemctl mask avahi-daemon.socket
8b0ac9
 sleep 1
8b0ac9
 
8b0ac9
 
8b0ac9
-printf "%b[1;36m%s%b[0m\n" "\0033" "Invoking @STAFD_CTLNAME@ status while @STAFD_PROCNAME@ is not running" "\0033"
8b0ac9
+log "Invoking @STAFD_CTLNAME@ status while @STAFD_PROCNAME@ is not running"
8b0ac9
 coverage run --rcfile=.coveragerc @STAFD_CTLNAME@ ls >/dev/null 2>&1
8b0ac9
 coverage run --rcfile=.coveragerc @STAFD_CTLNAME@ invalid-command >/dev/null 2>&1
8b0ac9
 
8b0ac9
-printf "%b[1;36m%s%b[0m\n" "\0033" "Invoking @STACD_CTLNAME@ status while @STACD_PROCNAME@ is not running" "\0033"
8b0ac9
+log "Invoking @STACD_CTLNAME@ status while @STACD_PROCNAME@ is not running"
8b0ac9
 coverage run --rcfile=.coveragerc @STACD_CTLNAME@ ls >/dev/null 2>&1
8b0ac9
 coverage run --rcfile=.coveragerc @STACD_CTLNAME@ invalid-command >/dev/null 2>&1
8b0ac9
 
8b0ac9
@@ -132,30 +154,33 @@ sd_start "@STAFD_PROCNAME@" "@STAFD_DBUS_NAME@" "${stafd_conf_fname}"
8b0ac9
 sd_start "@STACD_PROCNAME@" "@STACD_DBUS_NAME@" "${stacd_conf_fname}"
8b0ac9
 sleep 3
8b0ac9
 
8b0ac9
-printf "%b[1;36m%s%b[0m\n" "\0033" "Invoking @STAFD_CTLNAME@ status" "\0033"
8b0ac9
+log "Invoking @STAFD_CTLNAME@ status"
8b0ac9
 coverage run --rcfile=.coveragerc @STAFD_CTLNAME@ status  >/dev/null 2>&1
8b0ac9
 
8b0ac9
 reload_cfg "@STAFD_PROCNAME@"
8b0ac9
 sleep 1
8b0ac9
 
8b0ac9
+log "Restart Avahi daemon"
8b0ac9
 sudo systemctl unmask avahi-daemon.socket
8b0ac9
 sudo systemctl unmask avahi-daemon.service
8b0ac9
 sudo systemctl start avahi-daemon.socket
8b0ac9
 sudo systemctl start avahi-daemon.service
8b0ac9
 sleep 2
8b0ac9
 
8b0ac9
+log "Change stafd config: tron=true, persistent-connections=false, zeroconf=enable"
8b0ac9
 cat > "${stafd_conf_fname}" <<'EOF'
8b0ac9
 [Global]
8b0ac9
 tron=true
8b0ac9
 persistent-connections=false
8b0ac9
 
8b0ac9
 [Service Discovery]
8b0ac9
-zeroconf=disabled
8b0ac9
+zeroconf=enabled
8b0ac9
 EOF
8b0ac9
 reload_cfg "@STAFD_PROCNAME@"
8b0ac9
 
8b0ac9
 sleep 1
8b0ac9
 
8b0ac9
+log "Change stafd config: ip-family=ipv4, kato=10, adding multiple controllers"
8b0ac9
 cat > "${stafd_conf_fname}" <<'EOF'
8b0ac9
 [Global]
8b0ac9
 tron=true
8b0ac9
@@ -172,11 +197,15 @@ controller=transport=tcp;traddr=abracadabra
8b0ac9
 controller=
8b0ac9
 controller=trsvcid
8b0ac9
 controller=transport=rdma;traddr=!@#$
8b0ac9
+controller=transport=fc;traddr=21:00:00:00:00:00:00:00;host-traddr=20:00:00:00:00:00:00:00
8b0ac9
+controller=transport=XM;traddr=2.2.2.2
8b0ac9
 blacklist=transport=tcp;traddr=1.1.1.1
8b0ac9
 blacklist=transport=tcp;traddr=1000.1000.1000.1000
8b0ac9
 EOF
8b0ac9
 reload_cfg "@STAFD_PROCNAME@"
8b0ac9
 
8b0ac9
+
8b0ac9
+log "Change stacd config: tron=true, udev-rule=disabled, sticky-connections=disabled"
8b0ac9
 cat > "${stacd_conf_fname}" <<'EOF'
8b0ac9
 [Global]
8b0ac9
 tron=true
8b0ac9
@@ -186,12 +215,12 @@ EOF
8b0ac9
 reload_cfg "@STACD_PROCNAME@"
8b0ac9
 sleep 3
8b0ac9
 
8b0ac9
-printf "%b[1;36m%s%b[0m\n" "\0033" "Invoking @STAFD_CTLNAME@ status" "\0033"
8b0ac9
+log "Invoking @STAFD_CTLNAME@ status"
8b0ac9
 coverage run --rcfile=.coveragerc @STAFD_CTLNAME@ status  >/dev/null 2>&1
8b0ac9
 
8b0ac9
 ################################################################################
8b0ac9
 # Fake mDNS packets from a CDC
8b0ac9
-printf "\n%b[1;36m%s%b[0m\n" "\0033" "Start Avahi publisher" "\0033"
8b0ac9
+log "Start Avahi publisher"
8b0ac9
 AVAHI_PUBLISHER=mdns_publisher.service
8b0ac9
 sudo systemctl stop ${AVAHI_PUBLISHER} >/dev/null 2>&1
8b0ac9
 sudo systemctl reset-failed ${AVAHI_PUBLISHER} >/dev/null 2>&1
8b0ac9
@@ -200,7 +229,7 @@ sleep 1
8b0ac9
 
8b0ac9
 ################################################################################
8b0ac9
 # Start nvme target simulator
8b0ac9
-printf "\n%b[1;36m%s%b[0m\n" "\0033" "Start nvmet" "\0033"
8b0ac9
+log "Start nvmet"
8b0ac9
 sudo ../utils/nvmet/nvmet.py clean
8b0ac9
 sudo ../utils/nvmet/nvmet.py create -f ../utils/nvmet/nvmet.conf
8b0ac9
 sleep 2
8b0ac9
@@ -210,76 +239,76 @@ reload_cfg "@STACD_PROCNAME@"
8b0ac9
 sleep 3
8b0ac9
 
8b0ac9
 ################################################################################
8b0ac9
-printf "\n%b[1;36m%s%b[0m\n" "\0033" "Invoking @STAFD_PROCNAME@ --version" "\0033"
8b0ac9
+log "Invoking @STAFD_PROCNAME@ --version"
8b0ac9
 coverage run --rcfile=.coveragerc @STAFD_PROCNAME@ --version
8b0ac9
-printf "%b[1;36m%s%b[0m\n" "\0033" "Invoking @STAFD_PROCNAME@ --idl" "\0033"
8b0ac9
+log "Invoking @STAFD_PROCNAME@ --idl"
8b0ac9
 coverage run --rcfile=.coveragerc @STAFD_PROCNAME@ --idl /tmp/@STAFD_PROCNAME@.idl
8b0ac9
 
8b0ac9
-printf "\n%b[1;36m%s%b[0m\n" "\0033" "Invoking @STACD_PROCNAME@ --version" "\0033"
8b0ac9
+log "Invoking @STACD_PROCNAME@ --version"
8b0ac9
 coverage run --rcfile=.coveragerc @STACD_PROCNAME@ --version
8b0ac9
-printf "%b[1;36m%s%b[0m\n" "\0033" "Invoking @STACD_PROCNAME@ --idl" "\0033"
8b0ac9
+log "Invoking @STACD_PROCNAME@ --idl"
8b0ac9
 coverage run --rcfile=.coveragerc @STACD_PROCNAME@ --idl /tmp/@STACD_PROCNAME@.idl
8b0ac9
 
8b0ac9
 ################################################################################
8b0ac9
 # Stimulate D-Bus activity
8b0ac9
-printf "\n%b[1;36m%s%b[0m\n" "\0033" "Invoking @STAFD_CTLNAME@ --version" "\0033"
8b0ac9
+log "Invoking @STAFD_CTLNAME@ --version"
8b0ac9
 sudo coverage run --rcfile=.coveragerc @STAFD_CTLNAME@ --version
8b0ac9
-printf "%b[1;36m%s%b[0m\n" "\0033" "Invoking @STAFD_CTLNAME@ with a bad command" "\0033"
8b0ac9
+log "Invoking @STAFD_CTLNAME@ with a bad command"
8b0ac9
 sudo coverage run --rcfile=.coveragerc @STAFD_CTLNAME@ blah
8b0ac9
-printf "%b[1;36m%s%b[0m\n" "\0033" "Invoking @STAFD_CTLNAME@ troff" "\0033"
8b0ac9
+log "Invoking @STAFD_CTLNAME@ troff"
8b0ac9
 sudo coverage run --rcfile=.coveragerc @STAFD_CTLNAME@ troff
8b0ac9
-printf "%b[1;36m%s%b[0m\n" "\0033" "Invoking @STAFD_CTLNAME@ status" "\0033"
8b0ac9
+log "Invoking @STAFD_CTLNAME@ status"
8b0ac9
 coverage run --rcfile=.coveragerc @STAFD_CTLNAME@ status  >/dev/null 2>&1
8b0ac9
-printf "%b[1;36m%s%b[0m\n" "\0033" "Invoking @STAFD_CTLNAME@ tron" "\0033"
8b0ac9
+log "Invoking @STAFD_CTLNAME@ tron"
8b0ac9
 sudo coverage run --rcfile=.coveragerc @STAFD_CTLNAME@ tron
8b0ac9
-printf "%b[1;36m%s%b[0m\n" "\0033" "Invoking @STAFD_CTLNAME@ ls" "\0033"
8b0ac9
+log "Invoking @STAFD_CTLNAME@ ls"
8b0ac9
 coverage run --rcfile=.coveragerc @STAFD_CTLNAME@ ls -d   >/dev/null 2>&1
8b0ac9
-printf "%b[1;36m%s%b[0m\n" "\0033" "Invoking @STAFD_CTLNAME@ adlp" "\0033"
8b0ac9
+log "Invoking @STAFD_CTLNAME@ adlp"
8b0ac9
 coverage run --rcfile=.coveragerc @STAFD_CTLNAME@ adlp -d >/dev/null 2>&1
8b0ac9
-printf "%b[1;36m%s%b[0m\n" "\0033" "Invoking @STAFD_CTLNAME@ dlp" "\0033"
8b0ac9
+log "Invoking @STAFD_CTLNAME@ dlp"
8b0ac9
 coverage run --rcfile=.coveragerc @STAFD_CTLNAME@ dlp -t tcp -a ::1 -s 8009 >/dev/null 2>&1
8b0ac9
 
8b0ac9
-printf "\n%b[1;36m%s%b[0m\n" "\0033" "Invoking @STACD_CTLNAME@ --version" "\0033"
8b0ac9
+log "Invoking @STACD_CTLNAME@ --version"
8b0ac9
 sudo coverage run --rcfile=.coveragerc @STACD_CTLNAME@ --version
8b0ac9
-printf "%b[1;36m%s%b[0m\n" "\0033" "Invoking @STACD_CTLNAME@ with a bad command" "\0033"
8b0ac9
+log "Invoking @STACD_CTLNAME@ with a bad command"
8b0ac9
 sudo coverage run --rcfile=.coveragerc @STACD_CTLNAME@ blah
8b0ac9
-printf "%b[1;36m%s%b[0m\n" "\0033" "Invoking @STACD_CTLNAME@ troff" "\0033"
8b0ac9
+log "Invoking @STACD_CTLNAME@ troff"
8b0ac9
 sudo coverage run --rcfile=.coveragerc @STACD_CTLNAME@ troff
8b0ac9
-printf "%b[1;36m%s%b[0m\n" "\0033" "Invoking @STACD_CTLNAME@ status" "\0033"
8b0ac9
+log "Invoking @STACD_CTLNAME@ status"
8b0ac9
 coverage run --rcfile=.coveragerc @STACD_CTLNAME@ status >/dev/null 2>&1
8b0ac9
-printf "%b[1;36m%s%b[0m\n" "\0033" "Invoking @STACD_CTLNAME@ tron" "\0033"
8b0ac9
+log "Invoking @STACD_CTLNAME@ tron"
8b0ac9
 sudo coverage run --rcfile=.coveragerc @STACD_CTLNAME@ tron
8b0ac9
-printf "%b[1;36m%s%b[0m\n" "\0033" "Invoking @STACD_CTLNAME@ ls" "\0033"
8b0ac9
+log "Invoking @STACD_CTLNAME@ ls"
8b0ac9
 coverage run --rcfile=.coveragerc @STACD_CTLNAME@ ls -d >/dev/null 2>&1
8b0ac9
 
8b0ac9
 ################################################################################
8b0ac9
 # Stimulate AENs activity by removing/restoring namespaces
8b0ac9
-printf "\n%b[1;36m%s%b[0m\n" "\0033" "Remove namespace: klingons" "\0033"
8b0ac9
+log "Remove namespace: klingons"
8b0ac9
 sudo ../utils/nvmet/nvmet.py unlink -p 1 -s klingons
8b0ac9
 sleep 2
8b0ac9
-printf "\n%b[1;36m%s%b[0m\n" "\0033" "Invoking @STACD_CTLNAME@ ls" "\0033"
8b0ac9
+log "Invoking @STACD_CTLNAME@ ls"
8b0ac9
 coverage run --rcfile=.coveragerc @STACD_CTLNAME@ ls -d >/dev/null 2>&1
8b0ac9
 
8b0ac9
-printf "\n%b[1;36m%s%b[0m\n" "\0033" "Restore namespace: klingons" "\0033"
8b0ac9
+log "Restore namespace: klingons"
8b0ac9
 sudo ../utils/nvmet/nvmet.py link -p 1 -s klingons
8b0ac9
 sleep 2
8b0ac9
-printf "\n%b[1;36m%s%b[0m\n" "\0033" "Invoking @STACD_CTLNAME@ ls" "\0033"
8b0ac9
+log "Invoking @STACD_CTLNAME@ ls"
8b0ac9
 coverage run --rcfile=.coveragerc @STACD_CTLNAME@ ls -d >/dev/null 2>&1
8b0ac9
 
8b0ac9
 ################################################################################
8b0ac9
 # Stop Avahi Publisher
8b0ac9
-printf "\n%b[1;36m%s%b[0m\n" "\0033" "Stop Avahi publisher" "\0033"
8b0ac9
+log "Stop Avahi publisher"
8b0ac9
 sudo systemctl stop ${AVAHI_PUBLISHER}
8b0ac9
 sleep 1
8b0ac9
 
8b0ac9
 ################################################################################
8b0ac9
-printf "\n%b[1;36m%s%b[0m\n" "\0033" "Restart Avahi publisher" "\0033"
8b0ac9
+log "Restart Avahi publisher"
8b0ac9
 sudo systemd-run --unit=${AVAHI_PUBLISHER} --working-directory=. avahi-publish -s SFSS _nvme-disc._tcp 8009 "p=tcp"
8b0ac9
 sleep 2
8b0ac9
 
8b0ac9
 ################################################################################
8b0ac9
 # Make config changes for @STAFD_PROCNAME@
8b0ac9
-printf "\n%b[1;36m%s%b[0m\n" "\0033" "Empty configuration and disable zeroconf for @STAFD_PROCNAME@" "\0033"
8b0ac9
+log "Empty configuration and disable zeroconf for @STAFD_PROCNAME@"
8b0ac9
 cat > "${stafd_conf_fname}" <<'EOF'
8b0ac9
 [Global]
8b0ac9
 tron=true
8b0ac9
@@ -293,7 +322,7 @@ sleep 1
8b0ac9
 
8b0ac9
 ################################################################################
8b0ac9
 # Make more config changes for @STAFD_PROCNAME@
8b0ac9
-printf "\n%b[1;36m%s%b[0m\n" "\0033" "Add single controller (::1) and re-enable zeroconf for @STAFD_PROCNAME@" "\0033"
8b0ac9
+log "Add single controller (::1) and re-enable zeroconf for @STAFD_PROCNAME@"
8b0ac9
 cat > "${stafd_conf_fname}" <<'EOF'
8b0ac9
 [Global]
8b0ac9
 tron=true
8b0ac9
@@ -307,24 +336,23 @@ sleep 2
8b0ac9
 
8b0ac9
 ################################################################################
8b0ac9
 # Stop Avahi Publisher
8b0ac9
-printf "\n%b[1;36m%s%b[0m\n" "\0033" "Stop Avahi publisher" "\0033"
8b0ac9
+log "Stop Avahi publisher"
8b0ac9
 sudo systemctl stop ${AVAHI_PUBLISHER}
8b0ac9
 sleep 2
8b0ac9
 
8b0ac9
 ################################################################################
8b0ac9
 # Remove one of the NVMe device's
8b0ac9
-printf "\n%b[1;36m%s%b[0m\n" "\0033" "Remove (disconnect) nvme1" "\0033"
8b0ac9
+log "Remove (disconnect) nvme1"
8b0ac9
 sudo nvme disconnect -d nvme1
8b0ac9
 sleep 2
8b0ac9
 
8b0ac9
 
8b0ac9
 ################################################################################
8b0ac9
-printf "%b[1;36m%s%b[0m\n" "\0033" "Restart @STAFD_PROCNAME@ and @STACD_PROCNAME@" "\0033"
8b0ac9
 sd_restart "@STAFD_PROCNAME@"
8b0ac9
 sd_restart "@STACD_PROCNAME@"
8b0ac9
 sleep 1
8b0ac9
 
8b0ac9
-printf "%b[1;36m%s%b[0m\n" "\0033" "Create invalid conditions for saving/loading @STAFD_PROCNAME@'s last known config" "\0033"
8b0ac9
+log "Create invalid conditions for saving/loading @STAFD_PROCNAME@'s last known config"
8b0ac9
 rm -rf "/tmp/@STAFD_PROCNAME@"
8b0ac9
 sd_stop "@STAFD_PROCNAME@"
8b0ac9
 sd_restart "@STACD_PROCNAME@"
8b0ac9
@@ -334,7 +362,7 @@ sleep 2
8b0ac9
 
8b0ac9
 ################################################################################
8b0ac9
 # Stop everything and collect coverage stats
8b0ac9
-printf "\n%b[1;36m%s%b[0m\n" "\0033" "Stop @STAFD_PROCNAME@ and @STACD_PROCNAME@" "\0033"
8b0ac9
+log "Stop @STAFD_PROCNAME@ and @STACD_PROCNAME@"
8b0ac9
 sd_stop "@STAFD_PROCNAME@"
8b0ac9
 sd_stop "@STACD_PROCNAME@"
8b0ac9
 sleep 1
8b0ac9
@@ -345,33 +373,49 @@ sudo chown -R "${PRIMARY_USR}":"${PRIMARY_GRP}" coverage  >/dev/null 2>&1
8b0ac9
 sudo chown -R "${PRIMARY_USR}":"${PRIMARY_GRP}" staslib/__pycache__  >/dev/null 2>&1
8b0ac9
 sudo chown -R "${PRIMARY_USR}":"${PRIMARY_GRP}" subprojects/libnvme/libnvme/__pycache__  >/dev/null 2>&1
8b0ac9
 
8b0ac9
+log "nvme disconnect-all"
8b0ac9
 sudo nvme disconnect-all
8b0ac9
 
8b0ac9
+log "Remove ${stafd_conf_fname} and ${stacd_conf_fname}"
8b0ac9
 rm "${stafd_conf_fname}"
8b0ac9
 rm "${stacd_conf_fname}"
8b0ac9
 
8b0ac9
+log "Run unit test: test-udev"
8b0ac9
 PYTHONPATH=${PYTHON_PATH} coverage run --rcfile=.coveragerc ../test/test-udev.py
8b0ac9
+log "Run unit test: test-avahi"
8b0ac9
 PYTHONPATH=${PYTHON_PATH} coverage run --rcfile=.coveragerc ../test/test-avahi.py
8b0ac9
+log "Run unit test: test-gtimer"
8b0ac9
 PYTHONPATH=${PYTHON_PATH} coverage run --rcfile=.coveragerc ../test/test-gtimer.py
8b0ac9
+log "Run unit test: test-version"
8b0ac9
 PYTHONPATH=${PYTHON_PATH} coverage run --rcfile=.coveragerc ../test/test-version.py
8b0ac9
+log "Run unit test: test-transport_id"
8b0ac9
 PYTHONPATH=${PYTHON_PATH} coverage run --rcfile=.coveragerc ../test/test-transport_id.py
8b0ac9
+log "Run unit test: test-config"
8b0ac9
 PYTHONPATH=${PYTHON_PATH} coverage run --rcfile=.coveragerc ../test/test-config.py
8b0ac9
+log "Run unit test: test-controller"
8b0ac9
 PYTHONPATH=${PYTHON_PATH} coverage run --rcfile=.coveragerc ../test/test-controller.py
8b0ac9
+log "Run unit test: test-service"
8b0ac9
 PYTHONPATH=${PYTHON_PATH} coverage run --rcfile=.coveragerc ../test/test-service.py
8b0ac9
+log "Run unit test: test-log"
8b0ac9
 PYTHONPATH=${PYTHON_PATH} coverage run --rcfile=.coveragerc ../test/test-log.py
8b0ac9
+log "Run unit test: test-nvme_options"
8b0ac9
 sudo PYTHONPATH=${PYTHON_PATH} coverage run --rcfile=.coveragerc ../test/test-nvme_options.py
8b0ac9
 
8b0ac9
 ################################################################################
8b0ac9
 # Stop nvme target simulator
8b0ac9
-printf "\n%b[1;36m%s%b[0m\n" "\0033" "Stop nvmet" "\0033"
8b0ac9
+log "Stop nvmet"
8b0ac9
 sudo ../utils/nvmet/nvmet.py clean
8b0ac9
 
8b0ac9
-printf "\n%b[1;36m%s%b[0m\n" "\0033" "Collect all coverage data" "\0033"
8b0ac9
+log "Collect all coverage data"
8b0ac9
 coverage combine --rcfile=.coveragerc
8b0ac9
 
8b0ac9
-printf "\n%b[1;36m%s%b[0m\n" "\0033" "Generating coverage report" "\0033"
8b0ac9
+log "Generating coverage report"
8b0ac9
 coverage report -i --rcfile=.coveragerc
8b0ac9
 
8b0ac9
-printf "\n%b[1;36m%s%b[0m\n" "\0033" "Generating coverage report (HTML)" "\0033"
8b0ac9
+log "Generating coverage report (HTML)"
8b0ac9
 coverage html -i --rcfile=.coveragerc
8b0ac9
 
8b0ac9
+
8b0ac9
+log "All done!!!"
8b0ac9
+
8b0ac9
+log "FINISHED-FINISHED-FINISHED-FINISHED-FINISHED-FINISHED-FINISHED-FINISHED"
8b0ac9
diff --git a/doc/man/stacd.conf.xml b/doc/man/stacd.conf.xml
8b0ac9
index 60622f6..65ee71a 100644
8b0ac9
--- a/doc/man/stacd.conf.xml
8b0ac9
+++ b/doc/man/stacd.conf.xml
8b0ac9
@@ -378,7 +378,7 @@
8b0ac9
                             entries in <filename>stacd.conf</filename> have been removed.
8b0ac9
                         </para>
8b0ac9
 
8b0ac9
-                        <formalpara><title>With sticky-connections=disabled (default)</title>
8b0ac9
+                        <formalpara><title>With sticky-connections=disabled</title>
8b0ac9
                             <para>
8b0ac9
                                 stacd immediately disconnects from
8b0ac9
                                 a previously connected IOC if the response to a
8b0ac9
@@ -411,7 +411,7 @@
8b0ac9
                             </formalpara>
8b0ac9
                         </formalpara>
8b0ac9
 
8b0ac9
-                        <formalpara><title>With sticky-connections=enabled</title>
8b0ac9
+                        <formalpara><title>With sticky-connections=enabled (default)</title>
8b0ac9
                             <para>
8b0ac9
                                 stacd does not disconnect from IOCs
8b0ac9
                                 when a DPLE is removed or a <literal>controller=</literal>
8b0ac9
diff --git a/etc/stas/stacd.conf b/etc/stas/stacd.conf
8b0ac9
index 02e7b3e..0434671 100644
8b0ac9
--- a/etc/stas/stacd.conf
8b0ac9
+++ b/etc/stas/stacd.conf
8b0ac9
@@ -202,8 +202,8 @@
8b0ac9
 #
8b0ac9
 #                     Type:    String
8b0ac9
 #                     Range:   [disabled, enabled]
8b0ac9
-#                     Default: disabled
8b0ac9
-#sticky-connections=disabled
8b0ac9
+#                     Default: enabled
8b0ac9
+#sticky-connections=enabled
8b0ac9
 
8b0ac9
 [Controllers]
8b0ac9
 # controller: I/O Controllers (IOC) are specified with this keyword.
8b0ac9
diff --git a/stacd.py b/stacd.py
8b0ac9
index 708e372..28cefac 100755
8b0ac9
--- a/stacd.py
8b0ac9
+++ b/stacd.py
8b0ac9
@@ -10,14 +10,12 @@
8b0ac9
 ''' STorage Appliance Connector Daemon
8b0ac9
 '''
8b0ac9
 import sys
8b0ac9
-import logging
8b0ac9
 from argparse import ArgumentParser
8b0ac9
 from staslib import defs
8b0ac9
 
8b0ac9
-# pylint: disable=consider-using-f-string
8b0ac9
-DBUS_IDL = '''
8b0ac9
+DBUS_IDL = f'''
8b0ac9
 <node>
8b0ac9
-    <interface name="%s.debug">
8b0ac9
+    <interface name="{defs.STACD_DBUS_NAME}.debug">
8b0ac9
         <property name="tron" type="b" access="readwrite"/>
8b0ac9
         <property name="log_level" type="s" access="read"/>
8b0ac9
         <method name="process_info">
8b0ac9
@@ -34,19 +32,16 @@ DBUS_IDL = '''
8b0ac9
         </method>
8b0ac9
     </interface>
8b0ac9
 
8b0ac9
-    <interface name="%s">
8b0ac9
+    <interface name="{defs.STACD_DBUS_NAME}">
8b0ac9
         <method name="list_controllers">
8b0ac9
             <arg direction="in" type="b" name="detailed"/>
8b0ac9
-            <arg direction="out" type="aa{ss}" name="controller_list"/>
8b0ac9
+            <arg direction="out" type="aa{{ss}}" name="controller_list"/>
8b0ac9
         </method>
8b0ac9
     </interface>
8b0ac9
 </node>
8b0ac9
-''' % (
8b0ac9
-    defs.STACD_DBUS_NAME,
8b0ac9
-    defs.STACD_DBUS_NAME,
8b0ac9
-)
8b0ac9
-
8b0ac9
+'''
8b0ac9
 
8b0ac9
+# ******************************************************************************
8b0ac9
 def parse_args(conf_file: str):  # pylint: disable=missing-function-docstring
8b0ac9
     parser = ArgumentParser(
8b0ac9
         description=f'{defs.STAC_DESCRIPTION} ({defs.STAC_ACRONYM}). Must be root to run this program.'
8b0ac9
@@ -77,6 +72,12 @@ ARGS = parse_args(defs.STACD_CONFIG_FILE)
8b0ac9
 
8b0ac9
 if ARGS.version:
8b0ac9
     print(f'{defs.PROJECT_NAME} {defs.VERSION}')
8b0ac9
+    try:
8b0ac9
+        import libnvme
8b0ac9
+
8b0ac9
+        print(f'libnvme {libnvme.__version__}')
8b0ac9
+    except (AttributeError, ModuleNotFoundError):
8b0ac9
+        pass
8b0ac9
     sys.exit(0)
8b0ac9
 
8b0ac9
 if ARGS.idl:
8b0ac9
@@ -85,78 +86,14 @@ if ARGS.idl:
8b0ac9
     sys.exit(0)
8b0ac9
 
8b0ac9
 
8b0ac9
-# There is a reason for having this import here and not at the top of the file.
8b0ac9
-# We want to allow running stafd with the --version and --idl options and exit
8b0ac9
-# without having to import stas.
8b0ac9
-from staslib import stas  # pylint: disable=wrong-import-position
8b0ac9
-
8b0ac9
-# Before going any further, make sure the script is allowed to run.
8b0ac9
-stas.check_if_allowed_to_continue()
8b0ac9
-
8b0ac9
-
8b0ac9
-################################################################################
8b0ac9
-# Preliminary checks have passed. Let her rip!
8b0ac9
-# pylint: disable=wrong-import-position
8b0ac9
-# pylint: disable=wrong-import-order
8b0ac9
-import json
8b0ac9
-import pathlib
8b0ac9
-import systemd.daemon
8b0ac9
-import dasbus.error
8b0ac9
-import dasbus.client.observer
8b0ac9
-import dasbus.client.proxy
8b0ac9
-from gi.repository import GLib
8b0ac9
-from staslib import conf, log, gutil, trid, udev, ctrl, service  # pylint: disable=ungrouped-imports
8b0ac9
-
8b0ac9
-log.init(ARGS.syslog)
8b0ac9
-
8b0ac9
-UDEV_RULE_SUPPRESS = pathlib.Path('/run/udev/rules.d', '70-nvmf-autoconnect.rules')
8b0ac9
-
8b0ac9
-
8b0ac9
-def udev_rule_ctrl(enable):
8b0ac9
-    '''@brief We add an empty udev rule to /run/udev/rules.d to suppress
8b0ac9
-    nvme-cli's udev rule that is used to tell udevd to automatically
8b0ac9
-    connect to I/O controller. This is to avoid race conditions between
8b0ac9
-    stacd and udevd. This is configurable. See "udev-rule" in stacd.conf
8b0ac9
-    for details.
8b0ac9
-    '''
8b0ac9
-    if enable:
8b0ac9
-        try:
8b0ac9
-            UDEV_RULE_SUPPRESS.unlink()
8b0ac9
-        except FileNotFoundError:
8b0ac9
-            pass
8b0ac9
-    else:
8b0ac9
-        if not UDEV_RULE_SUPPRESS.exists():
8b0ac9
-            pathlib.Path('/run/udev/rules.d').mkdir(parents=True, exist_ok=True)
8b0ac9
-            UDEV_RULE_SUPPRESS.symlink_to('/dev/null')
8b0ac9
-
8b0ac9
-
8b0ac9
 # ******************************************************************************
8b0ac9
-class Ioc(ctrl.Controller):
8b0ac9
-    '''@brief This object establishes a connection to one I/O Controller.'''
8b0ac9
-
8b0ac9
-    def __init__(self, root, host, tid: trid.TID):
8b0ac9
-        super().__init__(root, host, tid)
8b0ac9
-
8b0ac9
-    def _on_udev_remove(self, udev_obj):
8b0ac9
-        '''Called when the associated nvme device (/dev/nvmeX) is removed
8b0ac9
-        from the system.
8b0ac9
-        '''
8b0ac9
-        super()._on_udev_remove(udev_obj)
8b0ac9
-
8b0ac9
-        # Defer removal of this object to the next main loop's idle period.
8b0ac9
-        GLib.idle_add(STAC.remove_controller, self)
8b0ac9
-
8b0ac9
-    def _find_existing_connection(self):
8b0ac9
-        return self._udev.find_nvme_ioc_device(self.tid)
8b0ac9
-
8b0ac9
-
8b0ac9
-# ******************************************************************************
8b0ac9
-class Stac(service.Service):
8b0ac9
-    '''STorage Appliance Connector (STAC)'''
8b0ac9
+if __name__ == '__main__':
8b0ac9
+    import json
8b0ac9
+    import logging
8b0ac9
+    from staslib import log, service, stas, udev  # pylint: disable=ungrouped-imports
8b0ac9
 
8b0ac9
-    CONF_STABILITY_SOAK_TIME_SEC = 1.5
8b0ac9
-    CONF_STABILITY_LONG_SOAK_TIME_SEC = 10  # pylint: disable=invalid-name
8b0ac9
-    ADD_EVENT_SOAK_TIME_SEC = 1
8b0ac9
+    # Before going any further, make sure the script is allowed to run.
8b0ac9
+    stas.check_if_allowed_to_continue()
8b0ac9
 
8b0ac9
     class Dbus:
8b0ac9
         '''This is the DBus interface that external programs can use to
8b0ac9
@@ -205,229 +142,8 @@ class Stac(service.Service):
8b0ac9
                 for controller in STAC.get_controllers()
8b0ac9
             ]
8b0ac9
 
8b0ac9
-    # ==========================================================================
8b0ac9
-    def __init__(self, args):
8b0ac9
-        super().__init__(args, self._reload_hdlr)
8b0ac9
-
8b0ac9
-        # We don't want to apply configuration changes to nvme-cli right away.
8b0ac9
-        # Often, multiple changes will occur in a short amount of time (sub-second).
8b0ac9
-        # We want to wait until there are no more changes before applying them
8b0ac9
-        # to the system. The following timer acts as a "soak period". Changes
8b0ac9
-        # will be applied by calling self._on_config_ctrls() at the end of
8b0ac9
-        # the soak period.
8b0ac9
-        self._cfg_soak_tmr = gutil.GTimer(Stac.CONF_STABILITY_SOAK_TIME_SEC, self._on_config_ctrls)
8b0ac9
-        self._cfg_soak_tmr.start()
8b0ac9
-
8b0ac9
-        self._add_event_soak_tmr = gutil.GTimer(Stac.ADD_EVENT_SOAK_TIME_SEC, self._on_add_event_soaked)
8b0ac9
-
8b0ac9
-        self._config_connections_audit()
8b0ac9
-
8b0ac9
-        # Create the D-Bus instance.
8b0ac9
-        self._config_dbus(Stac.Dbus(), defs.STACD_DBUS_NAME, defs.STACD_DBUS_PATH)
8b0ac9
-
8b0ac9
-        # Connect to STAF D-Bus interface
8b0ac9
-        self._staf = None
8b0ac9
-        self._staf_watcher = dasbus.client.observer.DBusObserver(self._sysbus, defs.STAFD_DBUS_NAME)
8b0ac9
-        self._staf_watcher.service_available.connect(self._connect_to_staf)
8b0ac9
-        self._staf_watcher.service_unavailable.connect(self._disconnect_from_staf)
8b0ac9
-        self._staf_watcher.connect_once_available()
8b0ac9
-
8b0ac9
-        # Suppress udev rule to auto-connect when AEN is received.
8b0ac9
-        udev_rule_ctrl(conf.SvcConf().udev_rule_enabled)
8b0ac9
-
8b0ac9
-    def _release_resources(self):
8b0ac9
-        logging.debug('Stac._release_resources()')
8b0ac9
-
8b0ac9
-        if self._add_event_soak_tmr:
8b0ac9
-            self._add_event_soak_tmr.kill()
8b0ac9
-
8b0ac9
-        udev_rule_ctrl(True)
8b0ac9
-
8b0ac9
-        if self._udev:
8b0ac9
-            self._udev.unregister_for_action_events('add')
8b0ac9
-
8b0ac9
-        self._destroy_staf_comlink(self._staf_watcher)
8b0ac9
-        if self._staf_watcher is not None:
8b0ac9
-            self._staf_watcher.disconnect()
8b0ac9
-
8b0ac9
-        super()._release_resources()
8b0ac9
-
8b0ac9
-        self._staf = None
8b0ac9
-        self._staf_watcher = None
8b0ac9
-        self._add_event_soak_tmr = None
8b0ac9
-
8b0ac9
-    def _audit_connections(self, tids):
8b0ac9
-        '''A host should only connect to I/O controllers that have been zoned
8b0ac9
-        for that host or a manual "controller" entry exists in stcd.conf.
8b0ac9
-        A host should disconnect from an I/O controller when that I/O controller
8b0ac9
-        is removed from the zone or a manual "controller" entry is removed from
8b0ac9
-        stacd.conf. stacd will audit connections if "sticky-connections=disabled".
8b0ac9
-        stacd will delete any connection that is not supposed to exist.
8b0ac9
-        '''
8b0ac9
-        logging.debug('Stac._audit_connections()          - tids = %s', tids)
8b0ac9
-        num_controllers = len(self._controllers)
8b0ac9
-        for tid in tids:
8b0ac9
-            if tid not in self._controllers:
8b0ac9
-                self._controllers[tid] = Ioc(self._root, self._host, tid)
8b0ac9
-
8b0ac9
-        if num_controllers != len(self._controllers):
8b0ac9
-            self._cfg_soak_tmr.start(Stac.CONF_STABILITY_SOAK_TIME_SEC)
8b0ac9
-
8b0ac9
-    def _on_add_event(self, udev_obj):  # pylint: disable=unused-argument
8b0ac9
-        '''@brief This function is called when a "add" event is received from
8b0ac9
-        the kernel for an NVMe device. This is used to trigger an audit and make
8b0ac9
-        sure that the connection to an I/O controller is allowed.
8b0ac9
-
8b0ac9
-        WARNING: There is a race condition with the "add" event from the kernel.
8b0ac9
-        The kernel sends the "add" event a bit early and the sysfs attributes
8b0ac9
-        associated with the nvme object are not always fully initialized.
8b0ac9
-        To workaround this problem we use a soaking timer to give time for the
8b0ac9
-        sysfs attributes to stabilize.
8b0ac9
-        '''
8b0ac9
-        self._add_event_soak_tmr.start()
8b0ac9
-
8b0ac9
-    def _on_add_event_soaked(self):
8b0ac9
-        '''@brief After the add event has been soaking for ADD_EVENT_SOAK_TIME_SEC
8b0ac9
-        seconds, we can audit the connections.
8b0ac9
-        '''
8b0ac9
-        if not conf.SvcConf().sticky_connections:
8b0ac9
-            self._audit_connections(self._udev.get_nvme_ioc_tids())
8b0ac9
-        return GLib.SOURCE_REMOVE
8b0ac9
-
8b0ac9
-    def _config_connections_audit(self):
8b0ac9
-        '''This function checks the "sticky_connections" parameter to determine
8b0ac9
-        whether audits should be performed. Audits are enabled when
8b0ac9
-        "sticky_connections" is disabled.
8b0ac9
-        '''
8b0ac9
-        if not conf.SvcConf().sticky_connections:
8b0ac9
-            if self._udev.get_registered_action_cback('add') is None:
8b0ac9
-                self._udev.register_for_action_events('add', self._on_add_event)
8b0ac9
-                self._audit_connections(self._udev.get_nvme_ioc_tids())
8b0ac9
-        else:
8b0ac9
-            self._udev.unregister_for_action_events('add')
8b0ac9
-
8b0ac9
-    def _keep_connections_on_exit(self):
8b0ac9
-        '''@brief Determine whether connections should remain when the
8b0ac9
-        process exits.
8b0ac9
-        '''
8b0ac9
-        return True
8b0ac9
-
8b0ac9
-    def _reload_hdlr(self):
8b0ac9
-        '''@brief Reload configuration file. This is triggered by the SIGHUP
8b0ac9
-        signal, which can be sent with "systemctl reload stacd".
8b0ac9
-        '''
8b0ac9
-        systemd.daemon.notify('RELOADING=1')
8b0ac9
-        service_cnf = conf.SvcConf()
8b0ac9
-        service_cnf.reload()
8b0ac9
-        self.tron = service_cnf.tron
8b0ac9
-        self._config_connections_audit()
8b0ac9
-        self._cfg_soak_tmr.start(Stac.CONF_STABILITY_SOAK_TIME_SEC)
8b0ac9
-        udev_rule_ctrl(service_cnf.udev_rule_enabled)
8b0ac9
-        systemd.daemon.notify('READY=1')
8b0ac9
-        return GLib.SOURCE_CONTINUE
8b0ac9
-
8b0ac9
-    def _get_log_pages_from_stafd(self):
8b0ac9
-        if self._staf:
8b0ac9
-            try:
8b0ac9
-                return json.loads(self._staf.get_all_log_pages(True))
8b0ac9
-            except dasbus.error.DBusError:
8b0ac9
-                pass
8b0ac9
-
8b0ac9
-        return list()
8b0ac9
-
8b0ac9
-    def _config_ctrls_finish(self, configured_ctrl_list):
8b0ac9
-        configured_ctrl_list = [
8b0ac9
-            ctrl_dict for ctrl_dict in configured_ctrl_list if 'traddr' in ctrl_dict and 'subsysnqn' in ctrl_dict
8b0ac9
-        ]
8b0ac9
-        logging.debug('Stac._config_ctrls_finish()        - configured_ctrl_list = %s', configured_ctrl_list)
8b0ac9
-
8b0ac9
-        discovered_ctrl_list = list()
8b0ac9
-        for staf_data in self._get_log_pages_from_stafd():
8b0ac9
-            host_traddr = staf_data['discovery-controller']['host-traddr']
8b0ac9
-            host_iface = staf_data['discovery-controller']['host-iface']
8b0ac9
-            for dlpe in staf_data['log-pages']:
8b0ac9
-                if dlpe.get('subtype') == 'nvme':  # eliminate discovery controllers
8b0ac9
-                    discovered_ctrl_list.append(stas.cid_from_dlpe(dlpe, host_traddr, host_iface))
8b0ac9
-
8b0ac9
-        logging.debug('Stac._config_ctrls_finish()        - discovered_ctrl_list = %s', discovered_ctrl_list)
8b0ac9
-
8b0ac9
-        controllers = stas.remove_blacklisted(configured_ctrl_list + discovered_ctrl_list)
8b0ac9
-        controllers = stas.remove_invalid_addresses(controllers)
8b0ac9
-
8b0ac9
-        new_controller_ids = {trid.TID(controller) for controller in controllers}
8b0ac9
-        cur_controller_ids = set(self._controllers.keys())
8b0ac9
-        controllers_to_add = new_controller_ids - cur_controller_ids
8b0ac9
-        controllers_to_del = cur_controller_ids - new_controller_ids
8b0ac9
-
8b0ac9
-        logging.debug('Stac._config_ctrls_finish()        - controllers_to_add   = %s', list(controllers_to_add))
8b0ac9
-        logging.debug('Stac._config_ctrls_finish()        - controllers_to_del   = %s', list(controllers_to_del))
8b0ac9
-
8b0ac9
-        for tid in controllers_to_del:
8b0ac9
-            controller = self._controllers.pop(tid, None)
8b0ac9
-            if controller is not None:
8b0ac9
-                controller.disconnect(self.remove_controller, conf.SvcConf().sticky_connections)
8b0ac9
-
8b0ac9
-        for tid in controllers_to_add:
8b0ac9
-            self._controllers[tid] = Ioc(self._root, self._host, tid)
8b0ac9
-
8b0ac9
-    def _connect_to_staf(self, _):
8b0ac9
-        '''@brief Hook up DBus signal handlers for signals from stafd.'''
8b0ac9
-        try:
8b0ac9
-            self._staf = self._sysbus.get_proxy(defs.STAFD_DBUS_NAME, defs.STAFD_DBUS_PATH)
8b0ac9
-            self._staf.log_pages_changed.connect(self._log_pages_changed)
8b0ac9
-            self._cfg_soak_tmr.start()
8b0ac9
-
8b0ac9
-            # Make sure timer is set back to its normal value.
8b0ac9
-            self._cfg_soak_tmr.set_timeout(Stac.CONF_STABILITY_SOAK_TIME_SEC)
8b0ac9
-            logging.debug('Stac._connect_to_staf()            - Connected to staf')
8b0ac9
-        except dasbus.error.DBusError:
8b0ac9
-            logging.error('Failed to connect to staf')
8b0ac9
-
8b0ac9
-    def _destroy_staf_comlink(self, watcher):  # pylint: disable=unused-argument
8b0ac9
-        if self._staf:
8b0ac9
-            self._staf.log_pages_changed.disconnect(self._log_pages_changed)
8b0ac9
-            dasbus.client.proxy.disconnect_proxy(self._staf)
8b0ac9
-            self._staf = None
8b0ac9
-
8b0ac9
-    def _disconnect_from_staf(self, watcher):
8b0ac9
-        self._destroy_staf_comlink(watcher)
8b0ac9
-
8b0ac9
-        # When we lose connectivity with stafd, the most logical explanation
8b0ac9
-        # is that stafd restarted. In that case, it may take some time for stafd
8b0ac9
-        # to re-populate its log pages cache. So let's give stafd plenty of time
8b0ac9
-        # to update its log pages cache and send log pages change notifications
8b0ac9
-        # before triggering a stacd re-config. We do this by momentarily
8b0ac9
-        # increasing the config soak timer to a longer period.
8b0ac9
-        if self._cfg_soak_tmr:
8b0ac9
-            self._cfg_soak_tmr.set_timeout(Stac.CONF_STABILITY_LONG_SOAK_TIME_SEC)
8b0ac9
-
8b0ac9
-        logging.debug('Stac._disconnect_from_staf()       - Disconnected from staf')
8b0ac9
-
8b0ac9
-    def _log_pages_changed(  # pylint: disable=too-many-arguments
8b0ac9
-        self, transport, traddr, trsvcid, host_traddr, host_iface, subsysnqn, device
8b0ac9
-    ):
8b0ac9
-        logging.debug(
8b0ac9
-            'Stac._log_pages_changed()          - transport=%s, traddr=%s, trsvcid=%s, host_traddr=%s, host_iface=%s, subsysnqn=%s, device=%s',
8b0ac9
-            transport,
8b0ac9
-            traddr,
8b0ac9
-            trsvcid,
8b0ac9
-            host_traddr,
8b0ac9
-            host_iface,
8b0ac9
-            subsysnqn,
8b0ac9
-            device,
8b0ac9
-        )
8b0ac9
-        self._cfg_soak_tmr.start(Stac.CONF_STABILITY_SOAK_TIME_SEC)
8b0ac9
-
8b0ac9
-    def _load_last_known_config(self):
8b0ac9
-        return dict()
8b0ac9
-
8b0ac9
-    def _dump_last_known_config(self, controllers):
8b0ac9
-        pass
8b0ac9
-
8b0ac9
-
8b0ac9
-# ******************************************************************************
8b0ac9
-if __name__ == '__main__':
8b0ac9
-    STAC = Stac(ARGS)
8b0ac9
+    log.init(ARGS.syslog)
8b0ac9
+    STAC = service.Stac(ARGS, Dbus())
8b0ac9
     STAC.run()
8b0ac9
 
8b0ac9
     STAC = None
8b0ac9
diff --git a/stafd.py b/stafd.py
8b0ac9
index aff64fd..8a77c51 100755
8b0ac9
--- a/stafd.py
8b0ac9
+++ b/stafd.py
8b0ac9
@@ -10,14 +10,12 @@
8b0ac9
 ''' STorage Appliance Finder Daemon
8b0ac9
 '''
8b0ac9
 import sys
8b0ac9
-import logging
8b0ac9
 from argparse import ArgumentParser
8b0ac9
 from staslib import defs
8b0ac9
 
8b0ac9
-# pylint: disable=consider-using-f-string
8b0ac9
-DBUS_IDL = '''
8b0ac9
+DBUS_IDL = f'''
8b0ac9
 <node>
8b0ac9
-    <interface name="%s.debug">
8b0ac9
+    <interface name="{defs.STAFD_DBUS_NAME}.debug">
8b0ac9
         <property name="tron" type="b" access="readwrite"/>
8b0ac9
         <property name="log_level" type="s" access="read"/>
8b0ac9
         <method name="process_info">
8b0ac9
@@ -34,10 +32,10 @@ DBUS_IDL = '''
8b0ac9
         </method>
8b0ac9
     </interface>
8b0ac9
 
8b0ac9
-    <interface name="%s">
8b0ac9
+    <interface name="{defs.STAFD_DBUS_NAME}">
8b0ac9
         <method name="list_controllers">
8b0ac9
             <arg direction="in" type="b" name="detailed"/>
8b0ac9
-            <arg direction="out" type="aa{ss}" name="controller_list"/>
8b0ac9
+            <arg direction="out" type="aa{{ss}}" name="controller_list"/>
8b0ac9
         </method>
8b0ac9
         <method name="get_log_pages">
8b0ac9
             <arg direction="in" type="s" name="transport"/>
8b0ac9
@@ -46,7 +44,7 @@ DBUS_IDL = '''
8b0ac9
             <arg direction="in" type="s" name="host_traddr"/>
8b0ac9
             <arg direction="in" type="s" name="host_iface"/>
8b0ac9
             <arg direction="in" type="s" name="subsysnqn"/>
8b0ac9
-            <arg direction="out" type="aa{ss}" name="log_pages"/>
8b0ac9
+            <arg direction="out" type="aa{{ss}}" name="log_pages"/>
8b0ac9
         </method>
8b0ac9
         <method name="get_all_log_pages">
8b0ac9
             <arg direction="in" type="b" name="detailed"/>
8b0ac9
@@ -63,12 +61,10 @@ DBUS_IDL = '''
8b0ac9
         </signal>
8b0ac9
     </interface>
8b0ac9
 </node>
8b0ac9
-''' % (
8b0ac9
-    defs.STAFD_DBUS_NAME,
8b0ac9
-    defs.STAFD_DBUS_NAME,
8b0ac9
-)
8b0ac9
+'''
8b0ac9
 
8b0ac9
 
8b0ac9
+# ******************************************************************************
8b0ac9
 def parse_args(conf_file: str):  # pylint: disable=missing-function-docstring
8b0ac9
     parser = ArgumentParser(
8b0ac9
         description=f'{defs.STAF_DESCRIPTION} ({defs.STAF_ACRONYM}). Must be root to run this program.'
8b0ac9
@@ -99,6 +95,12 @@ ARGS = parse_args(defs.STAFD_CONFIG_FILE)
8b0ac9
 
8b0ac9
 if ARGS.version:
8b0ac9
     print(f'{defs.PROJECT_NAME} {defs.VERSION}')
8b0ac9
+    try:
8b0ac9
+        import libnvme
8b0ac9
+
8b0ac9
+        print(f'libnvme {libnvme.__version__}')
8b0ac9
+    except (AttributeError, ModuleNotFoundError):
8b0ac9
+        pass
8b0ac9
     sys.exit(0)
8b0ac9
 
8b0ac9
 if ARGS.idl:
8b0ac9
@@ -107,250 +109,15 @@ if ARGS.idl:
8b0ac9
     sys.exit(0)
8b0ac9
 
8b0ac9
 
8b0ac9
-# There is a reason for having this import here and not at the top of the file.
8b0ac9
-# We want to allow running stafd with the --version and --idl options and exit
8b0ac9
-# without having to import stas and avahi.
8b0ac9
-from staslib import stas, avahi  # pylint: disable=wrong-import-position
8b0ac9
-
8b0ac9
-# Before going any further, make sure the script is allowed to run.
8b0ac9
-stas.check_if_allowed_to_continue()
8b0ac9
-
8b0ac9
-
8b0ac9
-################################################################################
8b0ac9
-# Preliminary checks have passed. Let her rip!
8b0ac9
-# pylint: disable=wrong-import-position
8b0ac9
-# pylint: disable=wrong-import-order
8b0ac9
-import json
8b0ac9
-import pickle
8b0ac9
-import dasbus.server.interface
8b0ac9
-import systemd.daemon
8b0ac9
-from libnvme import nvme
8b0ac9
-from gi.repository import GLib
8b0ac9
-from staslib import conf, log, gutil, trid, udev, ctrl, service  # pylint: disable=ungrouped-imports
8b0ac9
-
8b0ac9
-log.init(ARGS.syslog)
8b0ac9
-
8b0ac9
-DLP_CHANGED = (
8b0ac9
-    (nvme.NVME_LOG_LID_DISCOVER << 16) | (nvme.NVME_AER_NOTICE_DISC_CHANGED << 8) | nvme.NVME_AER_NOTICE
8b0ac9
-)  # 0x70f002
8b0ac9
-
8b0ac9
-
8b0ac9
 # ******************************************************************************
8b0ac9
-class Dc(ctrl.Controller):
8b0ac9
-    '''@brief This object establishes a connection to one Discover Controller (DC).
8b0ac9
-    It retrieves the discovery log pages and caches them.
8b0ac9
-    It also monitors udev events associated with that DC and updates
8b0ac9
-    the cached discovery log pages accordingly.
8b0ac9
-    '''
8b0ac9
-
8b0ac9
-    GET_LOG_PAGE_RETRY_RERIOD_SEC = 20
8b0ac9
-    REGISTRATION_RETRY_RERIOD_SEC = 10
8b0ac9
-
8b0ac9
-    def __init__(self, root, host, tid: trid.TID, log_pages=None):
8b0ac9
-        super().__init__(root, host, tid, discovery_ctrl=True)
8b0ac9
-        self._register_op = None
8b0ac9
-        self._get_log_op = None
8b0ac9
-        self._log_pages = log_pages if log_pages else list()  # Log pages cache
8b0ac9
-
8b0ac9
-    def _release_resources(self):
8b0ac9
-        logging.debug('Dc._release_resources()            - %s | %s', self.id, self.device)
8b0ac9
-        super()._release_resources()
8b0ac9
-        self._log_pages = list()
8b0ac9
-
8b0ac9
-    def _kill_ops(self):
8b0ac9
-        super()._kill_ops()
8b0ac9
-        if self._get_log_op:
8b0ac9
-            self._get_log_op.kill()
8b0ac9
-            self._get_log_op = None
8b0ac9
-        if self._register_op:
8b0ac9
-            self._register_op.kill()
8b0ac9
-            self._register_op = None
8b0ac9
-
8b0ac9
-    def info(self) -> dict:
8b0ac9
-        '''@brief Get the controller info for this object'''
8b0ac9
-        info = super().info()
8b0ac9
-        if self._get_log_op:
8b0ac9
-            info['get log page operation'] = self._get_log_op.as_dict()
8b0ac9
-        if self._register_op:
8b0ac9
-            info['register operation'] = self._register_op.as_dict()
8b0ac9
-        return info
8b0ac9
-
8b0ac9
-    def cancel(self):
8b0ac9
-        '''@brief Used to cancel pending operations.'''
8b0ac9
-        super().cancel()
8b0ac9
-        if self._get_log_op:
8b0ac9
-            self._get_log_op.cancel()
8b0ac9
-        if self._register_op:
8b0ac9
-            self._register_op.cancel()
8b0ac9
-
8b0ac9
-    def log_pages(self) -> list:
8b0ac9
-        '''@brief Get the cached log pages for this object'''
8b0ac9
-        return self._log_pages
8b0ac9
-
8b0ac9
-    def referrals(self) -> list:
8b0ac9
-        '''@brief Return the list of referrals'''
8b0ac9
-        return [page for page in self._log_pages if page['subtype'] == 'referral']
8b0ac9
-
8b0ac9
-    def _on_aen(self, aen: int):
8b0ac9
-        super()._on_aen(aen)
8b0ac9
-        if aen == DLP_CHANGED and self._get_log_op:
8b0ac9
-            self._get_log_op.run_async()
8b0ac9
-
8b0ac9
-    def _on_nvme_event(self, nvme_event: str):
8b0ac9
-        super()._on_nvme_event(nvme_event)
8b0ac9
-        if nvme_event == 'connected' and self._register_op:
8b0ac9
-            self._register_op.run_async()
8b0ac9
-
8b0ac9
-    def _on_udev_remove(self, udev_obj):
8b0ac9
-        super()._on_udev_remove(udev_obj)
8b0ac9
-        if self._try_to_connect_deferred:
8b0ac9
-            self._try_to_connect_deferred.schedule()
8b0ac9
-
8b0ac9
-    def _find_existing_connection(self):
8b0ac9
-        return self._udev.find_nvme_dc_device(self.tid)
8b0ac9
-
8b0ac9
-    # --------------------------------------------------------------------------
8b0ac9
-    def _on_connect_success(self, op_obj, data):
8b0ac9
-        '''@brief Function called when we successfully connect to the
8b0ac9
-        Discovery Controller.
8b0ac9
-        '''
8b0ac9
-        super()._on_connect_success(op_obj, data)
8b0ac9
-
8b0ac9
-        if self._alive():
8b0ac9
-            if self._ctrl.is_registration_supported():
8b0ac9
-                self._register_op = gutil.AsyncOperationWithRetry(
8b0ac9
-                    self._on_registration_success,
8b0ac9
-                    self._on_registration_fail,
8b0ac9
-                    self._ctrl.registration_ctlr,
8b0ac9
-                    nvme.NVMF_DIM_TAS_REGISTER,
8b0ac9
-                )
8b0ac9
-                self._register_op.run_async()
8b0ac9
-            else:
8b0ac9
-                self._get_log_op = gutil.AsyncOperationWithRetry(
8b0ac9
-                    self._on_get_log_success, self._on_get_log_fail, self._ctrl.discover
8b0ac9
-                )
8b0ac9
-                self._get_log_op.run_async()
8b0ac9
-
8b0ac9
-    # --------------------------------------------------------------------------
8b0ac9
-    def _on_registration_success(self, op_obj, data):  # pylint: disable=unused-argument
8b0ac9
-        '''@brief Function called when we successfully register with the
8b0ac9
-        Discovery Controller. See self._register_op object
8b0ac9
-        for details.
8b0ac9
-        '''
8b0ac9
-        if self._alive():
8b0ac9
-            if data is not None:
8b0ac9
-                logging.warning('%s | %s - Registration error. %s.', self.id, self.device, data)
8b0ac9
-            else:
8b0ac9
-                logging.debug('Dc._on_registration_success()      - %s | %s', self.id, self.device)
8b0ac9
-            self._get_log_op = gutil.AsyncOperationWithRetry(
8b0ac9
-                self._on_get_log_success, self._on_get_log_fail, self._ctrl.discover
8b0ac9
-            )
8b0ac9
-            self._get_log_op.run_async()
8b0ac9
-        else:
8b0ac9
-            logging.debug(
8b0ac9
-                'Dc._on_registration_success()      - %s | %s Received event on dead object.', self.id, self.device
8b0ac9
-            )
8b0ac9
-
8b0ac9
-    def _on_registration_fail(self, op_obj, err, fail_cnt):
8b0ac9
-        '''@brief Function called when we fail to register with the
8b0ac9
-        Discovery Controller. See self._register_op object
8b0ac9
-        for details.
8b0ac9
-        '''
8b0ac9
-        if self._alive():
8b0ac9
-            logging.debug(
8b0ac9
-                'Dc._on_registration_fail()         - %s | %s: %s. Retry in %s sec',
8b0ac9
-                self.id,
8b0ac9
-                self.device,
8b0ac9
-                err,
8b0ac9
-                Dc.REGISTRATION_RETRY_RERIOD_SEC,
8b0ac9
-            )
8b0ac9
-            if fail_cnt == 1:  # Throttle the logs. Only print the first time we fail to connect
8b0ac9
-                logging.error('%s | %s - Failed to register with Discovery Controller. %s', self.id, self.device, err)
8b0ac9
-            # op_obj.retry(Dc.REGISTRATION_RETRY_RERIOD_SEC)
8b0ac9
-        else:
8b0ac9
-            logging.debug(
8b0ac9
-                'Dc._on_registration_fail()         - %s | %s Received event on dead object. %s',
8b0ac9
-                self.id,
8b0ac9
-                self.device,
8b0ac9
-                err,
8b0ac9
-            )
8b0ac9
-            op_obj.kill()
8b0ac9
-
8b0ac9
-    # --------------------------------------------------------------------------
8b0ac9
-    def _on_get_log_success(self, op_obj, data):  # pylint: disable=unused-argument
8b0ac9
-        '''@brief Function called when we successfully retrieve the log pages
8b0ac9
-        from the Discovery Controller. See self._get_log_op object
8b0ac9
-        for details.
8b0ac9
-        '''
8b0ac9
-        if self._alive():
8b0ac9
-            # Note that for historical reasons too long to explain, the CDC may
8b0ac9
-            # return invalid addresses ("0.0.0.0", "::", or ""). Those need to be
8b0ac9
-            # filtered out.
8b0ac9
-            referrals_before = self.referrals()
8b0ac9
-            self._log_pages = (
8b0ac9
-                [
8b0ac9
-                    {k: str(v) for k, v in dictionary.items()}
8b0ac9
-                    for dictionary in data
8b0ac9
-                    if dictionary.get('traddr') not in ('0.0.0.0', '::', '')
8b0ac9
-                ]
8b0ac9
-                if data
8b0ac9
-                else list()
8b0ac9
-            )
8b0ac9
-            logging.info(
8b0ac9
-                '%s | %s - Received discovery log pages (num records=%s).', self.id, self.device, len(self._log_pages)
8b0ac9
-            )
8b0ac9
-            referrals_after = self.referrals()
8b0ac9
-            STAF.log_pages_changed(self, self.device)
8b0ac9
-            if referrals_after != referrals_before:
8b0ac9
-                logging.debug(
8b0ac9
-                    'Dc._on_get_log_success()           - %s | %s Referrals before = %s',
8b0ac9
-                    self.id,
8b0ac9
-                    self.device,
8b0ac9
-                    referrals_before,
8b0ac9
-                )
8b0ac9
-                logging.debug(
8b0ac9
-                    'Dc._on_get_log_success()           - %s | %s Referrals after  = %s',
8b0ac9
-                    self.id,
8b0ac9
-                    self.device,
8b0ac9
-                    referrals_after,
8b0ac9
-                )
8b0ac9
-                STAF.referrals_changed()
8b0ac9
-        else:
8b0ac9
-            logging.debug(
8b0ac9
-                'Dc._on_get_log_success()           - %s | %s Received event on dead object.', self.id, self.device
8b0ac9
-            )
8b0ac9
-
8b0ac9
-    def _on_get_log_fail(self, op_obj, err, fail_cnt):
8b0ac9
-        '''@brief Function called when we fail to retrieve the log pages
8b0ac9
-        from the Discovery Controller. See self._get_log_op object
8b0ac9
-        for details.
8b0ac9
-        '''
8b0ac9
-        if self._alive():
8b0ac9
-            logging.debug(
8b0ac9
-                'Dc._on_get_log_fail()              - %s | %s: %s. Retry in %s sec',
8b0ac9
-                self.id,
8b0ac9
-                self.device,
8b0ac9
-                err,
8b0ac9
-                Dc.GET_LOG_PAGE_RETRY_RERIOD_SEC,
8b0ac9
-            )
8b0ac9
-            if fail_cnt == 1:  # Throttle the logs. Only print the first time we fail to connect
8b0ac9
-                logging.error('%s | %s - Failed to retrieve log pages. %s', self.id, self.device, err)
8b0ac9
-            op_obj.retry(Dc.GET_LOG_PAGE_RETRY_RERIOD_SEC)
8b0ac9
-        else:
8b0ac9
-            logging.debug(
8b0ac9
-                'Dc._on_get_log_fail()              - %s | %s Received event on dead object. %s',
8b0ac9
-                self.id,
8b0ac9
-                self.device,
8b0ac9
-                err,
8b0ac9
-            )
8b0ac9
-            op_obj.kill()
8b0ac9
-
8b0ac9
-
8b0ac9
-# ******************************************************************************
8b0ac9
-class Staf(service.Service):
8b0ac9
-    '''STorage Appliance Finder (STAF)'''
8b0ac9
+if __name__ == '__main__':
8b0ac9
+    import json
8b0ac9
+    import logging
8b0ac9
+    import dasbus.server.interface
8b0ac9
+    from staslib import log, service, stas, udev  # pylint: disable=ungrouped-imports
8b0ac9
 
8b0ac9
-    CONF_STABILITY_SOAK_TIME_SEC = 1.5
8b0ac9
+    # Before going any further, make sure the script is allowed to run.
8b0ac9
+    stas.check_if_allowed_to_continue()
8b0ac9
 
8b0ac9
     class Dbus:
8b0ac9
         '''This is the DBus interface that external programs can use to
8b0ac9
@@ -431,148 +198,8 @@ class Staf(service.Service):
8b0ac9
                 for controller in STAF.get_controllers()
8b0ac9
             ]
8b0ac9
 
8b0ac9
-    # ==========================================================================
8b0ac9
-    def __init__(self, args):
8b0ac9
-        super().__init__(args, self._reload_hdlr)
8b0ac9
-
8b0ac9
-        self._avahi = avahi.Avahi(self._sysbus, self._avahi_change)
8b0ac9
-        self._avahi.config_stypes(conf.SvcConf().get_stypes())
8b0ac9
-
8b0ac9
-        # We don't want to apply configuration changes to nvme-cli right away.
8b0ac9
-        # Often, multiple changes will occur in a short amount of time (sub-second).
8b0ac9
-        # We want to wait until there are no more changes before applying them
8b0ac9
-        # to the system. The following timer acts as a "soak period". Changes
8b0ac9
-        # will be applied by calling self._on_config_ctrls() at the end of
8b0ac9
-        # the soak period.
8b0ac9
-        self._cfg_soak_tmr = gutil.GTimer(Staf.CONF_STABILITY_SOAK_TIME_SEC, self._on_config_ctrls)
8b0ac9
-        self._cfg_soak_tmr.start()
8b0ac9
-
8b0ac9
-        # Create the D-Bus instance.
8b0ac9
-        self._config_dbus(Staf.Dbus(), defs.STAFD_DBUS_NAME, defs.STAFD_DBUS_PATH)
8b0ac9
-
8b0ac9
-    def info(self) -> dict:
8b0ac9
-        '''@brief Get the status info for this object (used for debug)'''
8b0ac9
-        info = super().info()
8b0ac9
-        info['avahi'] = self._avahi.info()
8b0ac9
-        return info
8b0ac9
-
8b0ac9
-    def _release_resources(self):
8b0ac9
-        logging.debug('Staf._release_resources()')
8b0ac9
-        super()._release_resources()
8b0ac9
-        if self._avahi:
8b0ac9
-            self._avahi.kill()
8b0ac9
-            self._avahi = None
8b0ac9
-
8b0ac9
-    def _load_last_known_config(self):
8b0ac9
-        try:
8b0ac9
-            with open(self._lkc_file, 'rb') as file:
8b0ac9
-                config = pickle.load(file)
8b0ac9
-        except (FileNotFoundError, AttributeError):
8b0ac9
-            return dict()
8b0ac9
-
8b0ac9
-        logging.debug('Staf._load_last_known_config()     - DC count = %s', len(config))
8b0ac9
-        return {tid: Dc(self._root, self._host, tid, log_pages) for tid, log_pages in config.items()}
8b0ac9
-
8b0ac9
-    def _dump_last_known_config(self, controllers):
8b0ac9
-        try:
8b0ac9
-            with open(self._lkc_file, 'wb') as file:
8b0ac9
-                config = {tid: dc.log_pages() for tid, dc in controllers.items()}
8b0ac9
-                logging.debug('Staf._dump_last_known_config()     - DC count = %s', len(config))
8b0ac9
-                pickle.dump(config, file)
8b0ac9
-        except FileNotFoundError as ex:
8b0ac9
-            logging.error('Unable to save last known config: %s', ex)
8b0ac9
-
8b0ac9
-    def _keep_connections_on_exit(self):
8b0ac9
-        '''@brief Determine whether connections should remain when the
8b0ac9
-        process exits.
8b0ac9
-        '''
8b0ac9
-        return conf.SvcConf().persistent_connections
8b0ac9
-
8b0ac9
-    def _reload_hdlr(self):
8b0ac9
-        '''@brief Reload configuration file. This is triggered by the SIGHUP
8b0ac9
-        signal, which can be sent with "systemctl reload stafd".
8b0ac9
-        '''
8b0ac9
-        systemd.daemon.notify('RELOADING=1')
8b0ac9
-        service_cnf = conf.SvcConf()
8b0ac9
-        service_cnf.reload()
8b0ac9
-        self.tron = service_cnf.tron
8b0ac9
-        self._avahi.kick_start()  # Make sure Avahi is running
8b0ac9
-        self._avahi.config_stypes(service_cnf.get_stypes())
8b0ac9
-        self._cfg_soak_tmr.start()
8b0ac9
-        systemd.daemon.notify('READY=1')
8b0ac9
-        return GLib.SOURCE_CONTINUE
8b0ac9
-
8b0ac9
-    def log_pages_changed(self, controller, device):
8b0ac9
-        '''@brief Function invoked when a controller's cached log pages
8b0ac9
-        have changed. This will emit a D-Bus signal to inform
8b0ac9
-        other applications that the cached log pages have changed.
8b0ac9
-        '''
8b0ac9
-        self._dbus_iface.log_pages_changed.emit(
8b0ac9
-            controller.tid.transport,
8b0ac9
-            controller.tid.traddr,
8b0ac9
-            controller.tid.trsvcid,
8b0ac9
-            controller.tid.host_traddr,
8b0ac9
-            controller.tid.host_iface,
8b0ac9
-            controller.tid.subsysnqn,
8b0ac9
-            device,
8b0ac9
-        )
8b0ac9
-
8b0ac9
-    def referrals_changed(self):
8b0ac9
-        '''@brief Function invoked when a controller's cached referrals
8b0ac9
-        have changed.
8b0ac9
-        '''
8b0ac9
-        logging.debug('Staf.referrals_changed()')
8b0ac9
-        self._cfg_soak_tmr.start()
8b0ac9
-
8b0ac9
-    def _referrals(self) -> list:
8b0ac9
-        return [
8b0ac9
-            stas.cid_from_dlpe(dlpe, controller.tid.host_traddr, controller.tid.host_iface)
8b0ac9
-            for controller in self.get_controllers()
8b0ac9
-            for dlpe in controller.referrals()
8b0ac9
-        ]
8b0ac9
-
8b0ac9
-    def _config_ctrls_finish(self, configured_ctrl_list):
8b0ac9
-        '''@brief Finish discovery controllers configuration after
8b0ac9
-        hostnames (if any) have been resolved.
8b0ac9
-        '''
8b0ac9
-        configured_ctrl_list = [
8b0ac9
-            ctrl_dict
8b0ac9
-            for ctrl_dict in configured_ctrl_list
8b0ac9
-            if 'traddr' in ctrl_dict and ctrl_dict.setdefault('subsysnqn', defs.WELL_KNOWN_DISC_NQN)
8b0ac9
-        ]
8b0ac9
-
8b0ac9
-        discovered_ctrl_list = self._avahi.get_controllers()
8b0ac9
-        referral_ctrl_list = self._referrals()
8b0ac9
-        logging.debug('Staf._config_ctrls_finish()        - configured_ctrl_list = %s', configured_ctrl_list)
8b0ac9
-        logging.debug('Staf._config_ctrls_finish()        - discovered_ctrl_list = %s', discovered_ctrl_list)
8b0ac9
-        logging.debug('Staf._config_ctrls_finish()        - referral_ctrl_list   = %s', referral_ctrl_list)
8b0ac9
-
8b0ac9
-        controllers = stas.remove_blacklisted(configured_ctrl_list + discovered_ctrl_list + referral_ctrl_list)
8b0ac9
-        controllers = stas.remove_invalid_addresses(controllers)
8b0ac9
-
8b0ac9
-        new_controller_ids = {trid.TID(controller) for controller in controllers}
8b0ac9
-        cur_controller_ids = set(self._controllers.keys())
8b0ac9
-        controllers_to_add = new_controller_ids - cur_controller_ids
8b0ac9
-        controllers_to_del = cur_controller_ids - new_controller_ids
8b0ac9
-
8b0ac9
-        logging.debug('Staf._config_ctrls_finish()        - controllers_to_add   = %s', list(controllers_to_add))
8b0ac9
-        logging.debug('Staf._config_ctrls_finish()        - controllers_to_del   = %s', list(controllers_to_del))
8b0ac9
-
8b0ac9
-        for tid in controllers_to_del:
8b0ac9
-            controller = self._controllers.pop(tid, None)
8b0ac9
-            if controller is not None:
8b0ac9
-                controller.disconnect(self.remove_controller, conf.SvcConf().persistent_connections)
8b0ac9
-
8b0ac9
-        for tid in controllers_to_add:
8b0ac9
-            self._controllers[tid] = Dc(self._root, self._host, tid)
8b0ac9
-
8b0ac9
-    def _avahi_change(self):
8b0ac9
-        self._cfg_soak_tmr.start()
8b0ac9
-
8b0ac9
-
8b0ac9
-# ******************************************************************************
8b0ac9
-if __name__ == '__main__':
8b0ac9
-    STAF = Staf(ARGS)
8b0ac9
+    log.init(ARGS.syslog)
8b0ac9
+    STAF = service.Staf(ARGS, Dbus())
8b0ac9
     STAF.run()
8b0ac9
 
8b0ac9
     STAF = None
8b0ac9
diff --git a/staslib/avahi.py b/staslib/avahi.py
8b0ac9
index 768bbf4..90a67c8 100644
8b0ac9
--- a/staslib/avahi.py
8b0ac9
+++ b/staslib/avahi.py
8b0ac9
@@ -172,9 +172,7 @@ class Avahi:  # pylint: disable=too-many-instance-attributes
8b0ac9
         services = dict()
8b0ac9
         for service, obj in self._services.items():
8b0ac9
             interface, protocol, name, stype, domain = service
8b0ac9
-            key = '({}, {}, {}.{}, {})'.format(  # pylint: disable=consider-using-f-string
8b0ac9
-                socket.if_indextoname(interface), Avahi.protos.get(protocol, 'unknown'), name, domain, stype
8b0ac9
-            )
8b0ac9
+            key = f'({socket.if_indextoname(interface)}, {Avahi.protos.get(protocol, "unknown")}, {name}.{domain}, {stype})'
8b0ac9
             services[key] = obj.get('data', {})
8b0ac9
 
8b0ac9
         info = {
8b0ac9
@@ -316,7 +314,7 @@ class Avahi:  # pylint: disable=too-many-instance-attributes
8b0ac9
         _interface_name: str,
8b0ac9
         _signal_name: str,
8b0ac9
         args: typing.Tuple[int, int, str, str, str, int],
8b0ac9
-        *_user_data
8b0ac9
+        *_user_data,
8b0ac9
     ):
8b0ac9
         (interface, protocol, name, stype, domain, flags) = args
8b0ac9
         logging.debug(
8b0ac9
@@ -352,7 +350,7 @@ class Avahi:  # pylint: disable=too-many-instance-attributes
8b0ac9
         _interface_name: str,
8b0ac9
         _signal_name: str,
8b0ac9
         args: typing.Tuple[int, int, str, str, str, int],
8b0ac9
-        *_user_data
8b0ac9
+        *_user_data,
8b0ac9
     ):
8b0ac9
         (interface, protocol, name, stype, domain, flags) = args
8b0ac9
         logging.debug(
8b0ac9
@@ -386,7 +384,7 @@ class Avahi:  # pylint: disable=too-many-instance-attributes
8b0ac9
         _interface_name: str,
8b0ac9
         _signal_name: str,
8b0ac9
         args: typing.Tuple[int, int, str, str, str, str, int, str, int, list, int],
8b0ac9
-        *_user_data
8b0ac9
+        *_user_data,
8b0ac9
     ):
8b0ac9
         (interface, protocol, name, stype, domain, host, aprotocol, address, port, txt, flags) = args
8b0ac9
         txt = _txt2dict(txt)
8b0ac9
@@ -428,7 +426,7 @@ class Avahi:  # pylint: disable=too-many-instance-attributes
8b0ac9
         interface_name: str,
8b0ac9
         _signal_name: str,
8b0ac9
         args: typing.Tuple[str],
8b0ac9
-        *_user_data
8b0ac9
+        *_user_data,
8b0ac9
     ):
8b0ac9
         (error,) = args
8b0ac9
         if 'ServiceResolver' not in interface_name or 'TimeoutError' not in error:
8b0ac9
diff --git a/staslib/conf.py b/staslib/conf.py
8b0ac9
index 3f52e4f..c314a9e 100644
8b0ac9
--- a/staslib/conf.py
8b0ac9
+++ b/staslib/conf.py
8b0ac9
@@ -74,7 +74,7 @@ class SvcConf(metaclass=singleton.Singleton):
8b0ac9
             ('Global', 'ignore-iface'): 'false',
8b0ac9
             ('Global', 'ip-family'): 'ipv4+ipv6',
8b0ac9
             ('Global', 'udev-rule'): 'enabled',
8b0ac9
-            ('Global', 'sticky-connections'): 'disabled',
8b0ac9
+            ('Global', 'sticky-connections'): 'enabled',
8b0ac9
             ('Service Discovery', 'zeroconf'): 'enabled',
8b0ac9
             ('Controllers', 'controller'): list(),
8b0ac9
             ('Controllers', 'blacklist'): list(),
8b0ac9
diff --git a/staslib/ctrl.py b/staslib/ctrl.py
8b0ac9
index 5504baa..dbc1973 100644
8b0ac9
--- a/staslib/ctrl.py
8b0ac9
+++ b/staslib/ctrl.py
8b0ac9
@@ -10,69 +10,76 @@
8b0ac9
 Dc (Discovery Controller) and Ioc (I/O Controller) objects are derived.'''
8b0ac9
 
8b0ac9
 import logging
8b0ac9
-from gi.repository import Gio, GLib
8b0ac9
+from gi.repository import GLib
8b0ac9
 from libnvme import nvme
8b0ac9
-from staslib import conf, gutil, trid, udev
8b0ac9
+from staslib import conf, gutil, trid, udev, stas
8b0ac9
 
8b0ac9
 
8b0ac9
 DC_KATO_DEFAULT = 30  # seconds
8b0ac9
 
8b0ac9
 
8b0ac9
 # ******************************************************************************
8b0ac9
-class Controller:  # pylint: disable=too-many-instance-attributes
8b0ac9
+class Controller(stas.ControllerABC):
8b0ac9
     '''@brief Base class used to manage the connection to a controller.'''
8b0ac9
 
8b0ac9
-    CONNECT_RETRY_PERIOD_SEC = 60
8b0ac9
-    FAST_CONNECT_RETRY_PERIOD_SEC = 3
8b0ac9
-
8b0ac9
     def __init__(self, root, host, tid: trid.TID, discovery_ctrl=False):
8b0ac9
-        self._root              = root
8b0ac9
-        self._host              = host
8b0ac9
-        self._udev              = udev.UDEV
8b0ac9
-        self._tid               = tid
8b0ac9
-        self._cancellable       = Gio.Cancellable()
8b0ac9
-        self._connect_op        = None
8b0ac9
-        self._connect_attempts  = 0
8b0ac9
-        self._retry_connect_tmr = gutil.GTimer(Controller.CONNECT_RETRY_PERIOD_SEC, self._on_try_to_connect)
8b0ac9
-        self._device            = None
8b0ac9
-        self._ctrl              = None
8b0ac9
-        self._discovery_ctrl    = discovery_ctrl
8b0ac9
-        self._try_to_connect_deferred = gutil.Deferred(self._try_to_connect)
8b0ac9
-        self._try_to_connect_deferred.schedule()
8b0ac9
+        self._udev       = udev.UDEV
8b0ac9
+        self._device     = None  # Refers to the nvme device (e.g. /dev/nvme[n])
8b0ac9
+        self._ctrl       = None  # libnvme's nvme.ctrl object
8b0ac9
+        self._connect_op = None
8b0ac9
+
8b0ac9
+        super().__init__(root, host, tid, discovery_ctrl)
8b0ac9
 
8b0ac9
     def _release_resources(self):
8b0ac9
         logging.debug('Controller._release_resources()    - %s', self.id)
8b0ac9
 
8b0ac9
-        # Remove pending deferred from main loop
8b0ac9
-        if self._try_to_connect_deferred:
8b0ac9
-            self._try_to_connect_deferred.cancel()
8b0ac9
-        self._try_to_connect_deferred = None
8b0ac9
-
8b0ac9
         if self._udev:
8b0ac9
             self._udev.unregister_for_device_events(self._on_udev_notification)
8b0ac9
 
8b0ac9
-        if self._retry_connect_tmr is not None:
8b0ac9
-            self._retry_connect_tmr.kill()
8b0ac9
-
8b0ac9
-        if self._cancellable and not self._cancellable.is_cancelled():
8b0ac9
-            self._cancellable.cancel()
8b0ac9
-
8b0ac9
         self._kill_ops()
8b0ac9
 
8b0ac9
-        self._tid = None
8b0ac9
+        super()._release_resources()
8b0ac9
+
8b0ac9
         self._ctrl = None
8b0ac9
-        self._device = None
8b0ac9
-        self._retry_connect_tmr = None
8b0ac9
-        self._cancellable = None
8b0ac9
         self._udev = None
8b0ac9
 
8b0ac9
-    def _alive(self):
8b0ac9
-        '''There may be race condition where a queued event gets processed
8b0ac9
-        after the object is no longer configured (i.e. alive). This method
8b0ac9
-        can be used by callback functions to make sure the object is still
8b0ac9
-        alive before processing further.
8b0ac9
-        '''
8b0ac9
-        return self._cancellable and not self._cancellable.is_cancelled()
8b0ac9
+    @property
8b0ac9
+    def device(self) -> str:
8b0ac9
+        '''@brief return the Linux nvme device id (e.g. nvme3) or empty
8b0ac9
+        string if no device is associated with this controller'''
8b0ac9
+        if not self._device and self._ctrl and self._ctrl.name:
8b0ac9
+            self._device = self._ctrl.name
8b0ac9
+
8b0ac9
+        return self._device or 'nvme?'
8b0ac9
+
8b0ac9
+    def controller_id_dict(self) -> dict:
8b0ac9
+        '''@brief return the controller ID as a dict.'''
8b0ac9
+        cid = super().controller_id_dict()
8b0ac9
+        cid['device'] = self.device
8b0ac9
+        return cid
8b0ac9
+
8b0ac9
+    def details(self) -> dict:
8b0ac9
+        '''@brief return detailed debug info about this controller'''
8b0ac9
+        details = super().details()
8b0ac9
+        details.update(
8b0ac9
+            self._udev.get_attributes(self.device,
8b0ac9
+                                      ('hostid', 'hostnqn', 'model',
8b0ac9
+                                       'serial', 'dctype', 'cntrltype'))
8b0ac9
+        )
8b0ac9
+        return details
8b0ac9
+
8b0ac9
+    def info(self) -> dict:
8b0ac9
+        '''@brief Get the controller info for this object'''
8b0ac9
+        info = super().info()
8b0ac9
+        if self._connect_op:
8b0ac9
+            info['connect operation'] = self._connect_op.as_dict()
8b0ac9
+        return info
8b0ac9
+
8b0ac9
+    def cancel(self):
8b0ac9
+        '''@brief Used to cancel pending operations.'''
8b0ac9
+        super().cancel()
8b0ac9
+        if self._connect_op:
8b0ac9
+            self._connect_op.cancel()
8b0ac9
 
8b0ac9
     def _kill_ops(self):
8b0ac9
         if self._connect_op:
8b0ac9
@@ -91,7 +98,7 @@ class Controller:  # pylint: disable=too-many-instance-attributes
8b0ac9
                     self._on_nvme_event(nvme_event)
8b0ac9
             elif udev_obj.action == 'remove':
8b0ac9
                 logging.info('%s | %s - Received "remove" event', self.id, udev_obj.sys_name)
8b0ac9
-                self._on_udev_remove(udev_obj)
8b0ac9
+                self._on_ctrl_removed(udev_obj)
8b0ac9
             else:
8b0ac9
                 logging.debug(
8b0ac9
                     'Controller._on_udev_notification() - %s | %s - Received "%s" notification.',
8b0ac9
@@ -108,33 +115,12 @@ class Controller:  # pylint: disable=too-many-instance-attributes
8b0ac9
                 udev_obj.sys_name,
8b0ac9
             )
8b0ac9
 
8b0ac9
-    def _on_aen(self, aen: int):
8b0ac9
-        pass
8b0ac9
-
8b0ac9
-    def _on_nvme_event(self, nvme_event):
8b0ac9
-        pass
8b0ac9
-
8b0ac9
-    def _on_udev_remove(self, udev_obj):  # pylint: disable=unused-argument
8b0ac9
+    def _on_ctrl_removed(self, obj):  # pylint: disable=unused-argument
8b0ac9
         self._udev.unregister_for_device_events(self._on_udev_notification)
8b0ac9
         self._kill_ops()  # Kill all pending operations
8b0ac9
         self._ctrl = None
8b0ac9
 
8b0ac9
-    def _find_existing_connection(self):
8b0ac9
-        raise NotImplementedError()
8b0ac9
-
8b0ac9
-    def _on_try_to_connect(self):
8b0ac9
-        self._try_to_connect_deferred.schedule()
8b0ac9
-        return GLib.SOURCE_REMOVE
8b0ac9
-
8b0ac9
-    def _try_to_connect(self):
8b0ac9
-        # This is a deferred function call. Make sure
8b0ac9
-        # the source of the deferred is still good.
8b0ac9
-        source = GLib.main_current_source()
8b0ac9
-        if source and source.is_destroyed():
8b0ac9
-            return
8b0ac9
-
8b0ac9
-        self._connect_attempts += 1
8b0ac9
-
8b0ac9
+    def _do_connect(self):
8b0ac9
         host_iface = (
8b0ac9
             self.tid.host_iface
8b0ac9
             if (self.tid.host_iface and not conf.SvcConf().ignore_iface and conf.NvmeOptions().host_iface_supp)
8b0ac9
@@ -164,7 +150,6 @@ class Controller:  # pylint: disable=too-many-instance-attributes
8b0ac9
                 self._on_connect_success, self._on_connect_fail, self._ctrl.init, self._host, int(udev_obj.sys_number)
8b0ac9
             )
8b0ac9
         else:
8b0ac9
-            self._device = None
8b0ac9
             service_conf = conf.SvcConf()
8b0ac9
             cfg = { 'hdr_digest':  service_conf.hdr_digest,
8b0ac9
                     'data_digest': service_conf.data_digest }
8b0ac9
@@ -198,11 +183,10 @@ class Controller:  # pylint: disable=too-many-instance-attributes
8b0ac9
         self._connect_op = None
8b0ac9
 
8b0ac9
         if self._alive():
8b0ac9
-            if not self._device:
8b0ac9
-                self._device = self._ctrl.name
8b0ac9
+            self._device = self._ctrl.name
8b0ac9
             logging.info('%s | %s - Connection established!', self.id, self.device)
8b0ac9
             self._connect_attempts = 0
8b0ac9
-            self._udev.register_for_device_events(self.device, self._on_udev_notification)
8b0ac9
+            self._udev.register_for_device_events(self._device, self._on_udev_notification)
8b0ac9
         else:
8b0ac9
             logging.debug(
8b0ac9
                 'Controller._on_connect_success()   - %s | %s Received event on dead object. data=%s',
8b0ac9
@@ -227,11 +211,11 @@ class Controller:  # pylint: disable=too-many-instance-attributes
8b0ac9
                 # the same time. This is perfectly fine, except that we may get a bogus
8b0ac9
                 # failed to connect error. By doing a fast re-try, stacd can quickly
8b0ac9
                 # verify that the connection was actually successful.
8b0ac9
-                self._retry_connect_tmr.set_timeout(Controller.FAST_CONNECT_RETRY_PERIOD_SEC)
8b0ac9
+                self._retry_connect_tmr.set_timeout(self.FAST_CONNECT_RETRY_PERIOD_SEC)
8b0ac9
             elif self._connect_attempts == 2:
8b0ac9
                 # If the fast connect re-try fails, then we can print a message to
8b0ac9
                 # indicate the failure, and start a slow re-try period.
8b0ac9
-                self._retry_connect_tmr.set_timeout(Controller.CONNECT_RETRY_PERIOD_SEC)
8b0ac9
+                self._retry_connect_tmr.set_timeout(self.CONNECT_RETRY_PERIOD_SEC)
8b0ac9
                 logging.error('%s Failed to connect to controller. %s', self.id, getattr(err, 'message', err))
8b0ac9
 
8b0ac9
             logging.debug(
8b0ac9
@@ -248,53 +232,6 @@ class Controller:  # pylint: disable=too-many-instance-attributes
8b0ac9
                 getattr(err, 'message', err),
8b0ac9
             )
8b0ac9
 
8b0ac9
-    @property
8b0ac9
-    def id(self) -> str:  # pylint: disable=missing-function-docstring
8b0ac9
-        return str(self.tid)
8b0ac9
-
8b0ac9
-    @property
8b0ac9
-    def tid(self):  # pylint: disable=missing-function-docstring
8b0ac9
-        return self._tid
8b0ac9
-
8b0ac9
-    @property
8b0ac9
-    def device(self) -> str:  # pylint: disable=missing-function-docstring
8b0ac9
-        return self._device if self._device else ''
8b0ac9
-
8b0ac9
-    def controller_id_dict(self) -> dict:
8b0ac9
-        '''@brief return the controller ID as a dict.'''
8b0ac9
-        cid = self.tid.as_dict()
8b0ac9
-        cid['device'] = self.device
8b0ac9
-        return cid
8b0ac9
-
8b0ac9
-    def details(self) -> dict:
8b0ac9
-        '''@brief return detailed debug info about this controller'''
8b0ac9
-        details = self.controller_id_dict()
8b0ac9
-        details.update(self._udev.get_attributes(self.device, ('hostid', 'hostnqn', 'model', 'serial')))
8b0ac9
-        details['connect attempts'] = str(self._connect_attempts)
8b0ac9
-        details['retry connect timer'] = str(self._retry_connect_tmr)
8b0ac9
-        return details
8b0ac9
-
8b0ac9
-    def info(self) -> dict:
8b0ac9
-        '''@brief Get the controller info for this object'''
8b0ac9
-        info = self.details()
8b0ac9
-        if self._connect_op:
8b0ac9
-            info['connect operation'] = self._connect_op.as_dict()
8b0ac9
-        return info
8b0ac9
-
8b0ac9
-    def cancel(self):
8b0ac9
-        '''@brief Used to cancel pending operations.'''
8b0ac9
-        if self._cancellable and not self._cancellable.is_cancelled():
8b0ac9
-            logging.debug('Controller.cancel()                - %s', self.id)
8b0ac9
-            self._cancellable.cancel()
8b0ac9
-
8b0ac9
-        if self._connect_op:
8b0ac9
-            self._connect_op.cancel()
8b0ac9
-
8b0ac9
-    def kill(self):
8b0ac9
-        '''@brief Used to release all resources associated with this object.'''
8b0ac9
-        logging.debug('Controller.kill()                  - %s', self.id)
8b0ac9
-        self._release_resources()
8b0ac9
-
8b0ac9
     def disconnect(self, disconnected_cb, keep_connection):
8b0ac9
         '''@brief Issue an asynchronous disconnect command to a Controller.
8b0ac9
         Once the async command has completed, the callback 'disconnected_cb'
8b0ac9
@@ -313,7 +250,7 @@ class Controller:  # pylint: disable=too-many-instance-attributes
8b0ac9
             # cannot be called directly as the current Controller object is in the
8b0ac9
             # process of being disconnected and the callback will in fact delete
8b0ac9
             # the object. This would invariably lead to unpredictable outcome.
8b0ac9
-            GLib.idle_add(disconnected_cb, self)
8b0ac9
+            GLib.idle_add(disconnected_cb, self, True)
8b0ac9
 
8b0ac9
     def _on_disconn_success(self, op_obj, data, disconnected_cb):  # pylint: disable=unused-argument
8b0ac9
         logging.debug('Controller._on_disconn_success()   - %s | %s', self.id, self.device)
8b0ac9
@@ -322,7 +259,7 @@ class Controller:  # pylint: disable=too-many-instance-attributes
8b0ac9
         # cannot be called directly as the current Controller object is in the
8b0ac9
         # process of being disconnected and the callback will in fact delete
8b0ac9
         # the object. This would invariably lead to unpredictable outcome.
8b0ac9
-        GLib.idle_add(disconnected_cb, self)
8b0ac9
+        GLib.idle_add(disconnected_cb, self, True)
8b0ac9
 
8b0ac9
     def _on_disconn_fail(self, op_obj, err, fail_cnt, disconnected_cb):  # pylint: disable=unused-argument
8b0ac9
         logging.debug('Controller._on_disconn_fail()      - %s | %s: %s', self.id, self.device, err)
8b0ac9
@@ -331,4 +268,249 @@ class Controller:  # pylint: disable=too-many-instance-attributes
8b0ac9
         # cannot be called directly as the current Controller object is in the
8b0ac9
         # process of being disconnected and the callback will in fact delete
8b0ac9
         # the object. This would invariably lead to unpredictable outcome.
8b0ac9
-        GLib.idle_add(disconnected_cb, self)
8b0ac9
+        GLib.idle_add(disconnected_cb, self, False)
8b0ac9
+
8b0ac9
+
8b0ac9
+# ******************************************************************************
8b0ac9
+class Dc(Controller):
8b0ac9
+    '''@brief This object establishes a connection to one Discover Controller (DC).
8b0ac9
+    It retrieves the discovery log pages and caches them.
8b0ac9
+    It also monitors udev events associated with that DC and updates
8b0ac9
+    the cached discovery log pages accordingly.
8b0ac9
+    '''
8b0ac9
+
8b0ac9
+    DLP_CHANGED = (
8b0ac9
+        (nvme.NVME_LOG_LID_DISCOVER << 16) | (nvme.NVME_AER_NOTICE_DISC_CHANGED << 8) | nvme.NVME_AER_NOTICE
8b0ac9
+    )  # 0x70f002
8b0ac9
+    GET_LOG_PAGE_RETRY_RERIOD_SEC = 20
8b0ac9
+    REGISTRATION_RETRY_RERIOD_SEC = 10
8b0ac9
+
8b0ac9
+    def __init__(self, staf, root, host, tid: trid.TID, log_pages=None):  # pylint: disable=too-many-arguments
8b0ac9
+        super().__init__(root, host, tid, discovery_ctrl=True)
8b0ac9
+        self._staf = staf
8b0ac9
+        self._register_op = None
8b0ac9
+        self._get_log_op = None
8b0ac9
+        self._log_pages = log_pages if log_pages else list()  # Log pages cache
8b0ac9
+
8b0ac9
+    def _release_resources(self):
8b0ac9
+        logging.debug('Dc._release_resources()            - %s | %s', self.id, self.device)
8b0ac9
+        super()._release_resources()
8b0ac9
+        self._log_pages = list()
8b0ac9
+        self._staf = None
8b0ac9
+
8b0ac9
+    def _kill_ops(self):
8b0ac9
+        super()._kill_ops()
8b0ac9
+        if self._get_log_op:
8b0ac9
+            self._get_log_op.kill()
8b0ac9
+            self._get_log_op = None
8b0ac9
+        if self._register_op:
8b0ac9
+            self._register_op.kill()
8b0ac9
+            self._register_op = None
8b0ac9
+
8b0ac9
+    def info(self) -> dict:
8b0ac9
+        '''@brief Get the controller info for this object'''
8b0ac9
+        info = super().info()
8b0ac9
+        if self._get_log_op:
8b0ac9
+            info['get log page operation'] = self._get_log_op.as_dict()
8b0ac9
+        if self._register_op:
8b0ac9
+            info['register operation'] = self._register_op.as_dict()
8b0ac9
+        return info
8b0ac9
+
8b0ac9
+    def cancel(self):
8b0ac9
+        '''@brief Used to cancel pending operations.'''
8b0ac9
+        super().cancel()
8b0ac9
+        if self._get_log_op:
8b0ac9
+            self._get_log_op.cancel()
8b0ac9
+        if self._register_op:
8b0ac9
+            self._register_op.cancel()
8b0ac9
+
8b0ac9
+    def log_pages(self) -> list:
8b0ac9
+        '''@brief Get the cached log pages for this object'''
8b0ac9
+        return self._log_pages
8b0ac9
+
8b0ac9
+    def referrals(self) -> list:
8b0ac9
+        '''@brief Return the list of referrals'''
8b0ac9
+        return [page for page in self._log_pages if page['subtype'] == 'referral']
8b0ac9
+
8b0ac9
+    def _on_aen(self, aen: int):
8b0ac9
+        if aen == self.DLP_CHANGED and self._get_log_op:
8b0ac9
+            self._get_log_op.run_async()
8b0ac9
+
8b0ac9
+    def _on_nvme_event(self, nvme_event: str):
8b0ac9
+        if nvme_event == 'connected' and self._register_op:
8b0ac9
+            self._register_op.run_async()
8b0ac9
+
8b0ac9
+    def _on_ctrl_removed(self, obj):
8b0ac9
+        super()._on_ctrl_removed(obj)
8b0ac9
+        if self._try_to_connect_deferred:
8b0ac9
+            self._try_to_connect_deferred.schedule()
8b0ac9
+
8b0ac9
+    def _find_existing_connection(self):
8b0ac9
+        return self._udev.find_nvme_dc_device(self.tid)
8b0ac9
+
8b0ac9
+    # --------------------------------------------------------------------------
8b0ac9
+    def _on_connect_success(self, op_obj, data):
8b0ac9
+        '''@brief Function called when we successfully connect to the
8b0ac9
+        Discovery Controller.
8b0ac9
+        '''
8b0ac9
+        super()._on_connect_success(op_obj, data)
8b0ac9
+
8b0ac9
+        if self._alive():
8b0ac9
+            if self._ctrl.is_registration_supported():
8b0ac9
+                self._register_op = gutil.AsyncOperationWithRetry(
8b0ac9
+                    self._on_registration_success,
8b0ac9
+                    self._on_registration_fail,
8b0ac9
+                    self._ctrl.registration_ctlr,
8b0ac9
+                    nvme.NVMF_DIM_TAS_REGISTER,
8b0ac9
+                )
8b0ac9
+                self._register_op.run_async()
8b0ac9
+            else:
8b0ac9
+                self._get_log_op = gutil.AsyncOperationWithRetry(
8b0ac9
+                    self._on_get_log_success, self._on_get_log_fail, self._ctrl.discover
8b0ac9
+                )
8b0ac9
+                self._get_log_op.run_async()
8b0ac9
+
8b0ac9
+    # --------------------------------------------------------------------------
8b0ac9
+    def _on_registration_success(self, op_obj, data):  # pylint: disable=unused-argument
8b0ac9
+        '''@brief Function called when we successfully register with the
8b0ac9
+        Discovery Controller. See self._register_op object
8b0ac9
+        for details.
8b0ac9
+        '''
8b0ac9
+        if self._alive():
8b0ac9
+            if data is not None:
8b0ac9
+                logging.warning('%s | %s - Registration error. %s.', self.id, self.device, data)
8b0ac9
+            else:
8b0ac9
+                logging.debug('Dc._on_registration_success()      - %s | %s', self.id, self.device)
8b0ac9
+            self._get_log_op = gutil.AsyncOperationWithRetry(
8b0ac9
+                self._on_get_log_success, self._on_get_log_fail, self._ctrl.discover
8b0ac9
+            )
8b0ac9
+            self._get_log_op.run_async()
8b0ac9
+        else:
8b0ac9
+            logging.debug(
8b0ac9
+                'Dc._on_registration_success()      - %s | %s Received event on dead object.', self.id, self.device
8b0ac9
+            )
8b0ac9
+
8b0ac9
+    def _on_registration_fail(self, op_obj, err, fail_cnt):
8b0ac9
+        '''@brief Function called when we fail to register with the
8b0ac9
+        Discovery Controller. See self._register_op object
8b0ac9
+        for details.
8b0ac9
+        '''
8b0ac9
+        if self._alive():
8b0ac9
+            logging.debug(
8b0ac9
+                'Dc._on_registration_fail()         - %s | %s: %s. Retry in %s sec',
8b0ac9
+                self.id,
8b0ac9
+                self.device,
8b0ac9
+                err,
8b0ac9
+                Dc.REGISTRATION_RETRY_RERIOD_SEC,
8b0ac9
+            )
8b0ac9
+            if fail_cnt == 1:  # Throttle the logs. Only print the first time we fail to connect
8b0ac9
+                logging.error('%s | %s - Failed to register with Discovery Controller. %s', self.id, self.device, err)
8b0ac9
+            # op_obj.retry(Dc.REGISTRATION_RETRY_RERIOD_SEC)
8b0ac9
+        else:
8b0ac9
+            logging.debug(
8b0ac9
+                'Dc._on_registration_fail()         - %s | %s Received event on dead object. %s',
8b0ac9
+                self.id,
8b0ac9
+                self.device,
8b0ac9
+                err,
8b0ac9
+            )
8b0ac9
+            op_obj.kill()
8b0ac9
+
8b0ac9
+    # --------------------------------------------------------------------------
8b0ac9
+    def _on_get_log_success(self, op_obj, data):  # pylint: disable=unused-argument
8b0ac9
+        '''@brief Function called when we successfully retrieve the log pages
8b0ac9
+        from the Discovery Controller. See self._get_log_op object
8b0ac9
+        for details.
8b0ac9
+        '''
8b0ac9
+        if self._alive():
8b0ac9
+            # Note that for historical reasons too long to explain, the CDC may
8b0ac9
+            # return invalid addresses ("0.0.0.0", "::", or ""). Those need to be
8b0ac9
+            # filtered out.
8b0ac9
+            referrals_before = self.referrals()
8b0ac9
+            self._log_pages = (
8b0ac9
+                [
8b0ac9
+                    {k: str(v) for k, v in dictionary.items()}
8b0ac9
+                    for dictionary in data
8b0ac9
+                    if dictionary.get('traddr') not in ('0.0.0.0', '::', '')
8b0ac9
+                ]
8b0ac9
+                if data
8b0ac9
+                else list()
8b0ac9
+            )
8b0ac9
+            logging.info(
8b0ac9
+                '%s | %s - Received discovery log pages (num records=%s).', self.id, self.device, len(self._log_pages)
8b0ac9
+            )
8b0ac9
+            referrals_after = self.referrals()
8b0ac9
+            self._staf.log_pages_changed(self, self.device)
8b0ac9
+            if referrals_after != referrals_before:
8b0ac9
+                logging.debug(
8b0ac9
+                    'Dc._on_get_log_success()           - %s | %s Referrals before = %s',
8b0ac9
+                    self.id,
8b0ac9
+                    self.device,
8b0ac9
+                    referrals_before,
8b0ac9
+                )
8b0ac9
+                logging.debug(
8b0ac9
+                    'Dc._on_get_log_success()           - %s | %s Referrals after  = %s',
8b0ac9
+                    self.id,
8b0ac9
+                    self.device,
8b0ac9
+                    referrals_after,
8b0ac9
+                )
8b0ac9
+                self._staf.referrals_changed()
8b0ac9
+        else:
8b0ac9
+            logging.debug(
8b0ac9
+                'Dc._on_get_log_success()           - %s | %s Received event on dead object.', self.id, self.device
8b0ac9
+            )
8b0ac9
+
8b0ac9
+    def _on_get_log_fail(self, op_obj, err, fail_cnt):
8b0ac9
+        '''@brief Function called when we fail to retrieve the log pages
8b0ac9
+        from the Discovery Controller. See self._get_log_op object
8b0ac9
+        for details.
8b0ac9
+        '''
8b0ac9
+        if self._alive():
8b0ac9
+            logging.debug(
8b0ac9
+                'Dc._on_get_log_fail()              - %s | %s: %s. Retry in %s sec',
8b0ac9
+                self.id,
8b0ac9
+                self.device,
8b0ac9
+                err,
8b0ac9
+                Dc.GET_LOG_PAGE_RETRY_RERIOD_SEC,
8b0ac9
+            )
8b0ac9
+            if fail_cnt == 1:  # Throttle the logs. Only print the first time we fail to connect
8b0ac9
+                logging.error('%s | %s - Failed to retrieve log pages. %s', self.id, self.device, err)
8b0ac9
+            op_obj.retry(Dc.GET_LOG_PAGE_RETRY_RERIOD_SEC)
8b0ac9
+        else:
8b0ac9
+            logging.debug(
8b0ac9
+                'Dc._on_get_log_fail()              - %s | %s Received event on dead object. %s',
8b0ac9
+                self.id,
8b0ac9
+                self.device,
8b0ac9
+                err,
8b0ac9
+            )
8b0ac9
+            op_obj.kill()
8b0ac9
+
8b0ac9
+
8b0ac9
+# ******************************************************************************
8b0ac9
+class Ioc(Controller):
8b0ac9
+    '''@brief This object establishes a connection to one I/O Controller.'''
8b0ac9
+
8b0ac9
+    def __init__(self, stac, root, host, tid: trid.TID):
8b0ac9
+        self._stac = stac
8b0ac9
+        super().__init__(root, host, tid)
8b0ac9
+
8b0ac9
+    def _release_resources(self):
8b0ac9
+        super()._release_resources()
8b0ac9
+        self._stac = None
8b0ac9
+
8b0ac9
+    def _on_ctrl_removed(self, obj):
8b0ac9
+        '''Called when the associated nvme device (/dev/nvmeX) is removed
8b0ac9
+        from the system.
8b0ac9
+        '''
8b0ac9
+        super()._on_ctrl_removed(obj)
8b0ac9
+
8b0ac9
+        # Defer removal of this object to the next main loop's idle period.
8b0ac9
+        GLib.idle_add(self._stac.remove_controller, self, True)
8b0ac9
+
8b0ac9
+    def _find_existing_connection(self):
8b0ac9
+        return self._udev.find_nvme_ioc_device(self.tid)
8b0ac9
+
8b0ac9
+    def _on_aen(self, aen: int):
8b0ac9
+        pass
8b0ac9
+
8b0ac9
+    def _on_nvme_event(self, nvme_event):
8b0ac9
+        pass
8b0ac9
diff --git a/staslib/gutil.py b/staslib/gutil.py
8b0ac9
index b302f3a..36ce2c7 100644
8b0ac9
--- a/staslib/gutil.py
8b0ac9
+++ b/staslib/gutil.py
8b0ac9
@@ -104,8 +104,7 @@ class GTimer:
8b0ac9
 
8b0ac9
 
8b0ac9
 # ******************************************************************************
8b0ac9
-class NameResolver:
8b0ac9
-    # pylint: disable=too-few-public-methods
8b0ac9
+class NameResolver:  # pylint: disable=too-few-public-methods
8b0ac9
     '''@brief DNS resolver to convert host names to IP addresses.'''
8b0ac9
 
8b0ac9
     def __init__(self):
8b0ac9
@@ -133,8 +132,10 @@ class NameResolver:
8b0ac9
                 else:
8b0ac9
                     logging.error('Cannot resolve traddr: %s', hostname)
8b0ac9
 
8b0ac9
-            except GLib.GError:
8b0ac9
-                logging.error('Cannot resolve traddr: %s', hostname)
8b0ac9
+            except GLib.GError as err:
8b0ac9
+                # We don't need to report "cancellation" errors.
8b0ac9
+                if not err.matches(Gio.io_error_quark(), Gio.IOErrorEnum.CANCELLED):
8b0ac9
+                    logging.error('Cannot resolve traddr: %s. %s', hostname, err.message)  # pylint: disable=no-member
8b0ac9
 
8b0ac9
             logging.debug('NameResolver.resolve_ctrl_async()  - resolved \'%s\' -> %s', hostname, traddr)
8b0ac9
             controllers[indx]['traddr'] = traddr
8b0ac9
diff --git a/staslib/log.py b/staslib/log.py
8b0ac9
index c624978..9622e98 100644
8b0ac9
--- a/staslib/log.py
8b0ac9
+++ b/staslib/log.py
8b0ac9
@@ -24,7 +24,7 @@ def init(syslog: bool):
8b0ac9
     if syslog:
8b0ac9
         try:
8b0ac9
             # Try journal logger first
8b0ac9
-            import systemd.journal  # pylint: disable=redefined-outer-name,import-outside-toplevel
8b0ac9
+            import systemd.journal  # pylint: disable=import-outside-toplevel
8b0ac9
 
8b0ac9
             handler = systemd.journal.JournalHandler(SYSLOG_IDENTIFIER=defs.PROG_NAME)
8b0ac9
         except ModuleNotFoundError:
8b0ac9
@@ -32,9 +32,7 @@ def init(syslog: bool):
8b0ac9
             from logging.handlers import SysLogHandler  # pylint: disable=import-outside-toplevel
8b0ac9
 
8b0ac9
             handler = SysLogHandler(address="/dev/log")
8b0ac9
-            handler.setFormatter(
8b0ac9
-                logging.Formatter('{}: %(message)s'.format(defs.PROG_NAME))  # pylint: disable=consider-using-f-string
8b0ac9
-            )
8b0ac9
+            handler.setFormatter(logging.Formatter(f'{defs.PROG_NAME}: %(message)s'))
8b0ac9
     else:
8b0ac9
         # Log to stdout
8b0ac9
         handler = logging.StreamHandler(stream=sys.stdout)
8b0ac9
diff --git a/staslib/service.py b/staslib/service.py
8b0ac9
index 556a9f9..a48e66d 100644
8b0ac9
--- a/staslib/service.py
8b0ac9
+++ b/staslib/service.py
8b0ac9
@@ -9,248 +9,416 @@
8b0ac9
 '''This module defines the base Service object from
8b0ac9
 which the Staf and the Stac objects are derived.'''
8b0ac9
 
8b0ac9
-import os
8b0ac9
-import signal
8b0ac9
+import json
8b0ac9
+import pickle
8b0ac9
 import logging
8b0ac9
+import pathlib
8b0ac9
 import systemd.daemon
8b0ac9
-import dasbus.connection
8b0ac9
+import dasbus.error
8b0ac9
+import dasbus.client.observer
8b0ac9
+import dasbus.client.proxy
8b0ac9
 
8b0ac9
-from gi.repository import Gio, GLib
8b0ac9
+from gi.repository import GLib
8b0ac9
 from libnvme import nvme
8b0ac9
-from staslib import conf, ctrl, defs, gutil, log, stas, trid, udev
8b0ac9
+from staslib import avahi, conf, ctrl, defs, gutil, stas, trid, udev
8b0ac9
 
8b0ac9
 
8b0ac9
 # ******************************************************************************
8b0ac9
-class Service:  # pylint: disable=too-many-instance-attributes
8b0ac9
+class Service(stas.ServiceABC):
8b0ac9
     '''@brief Base class used to manage a STorage Appliance Service'''
8b0ac9
 
8b0ac9
     def __init__(self, args, reload_hdlr):
8b0ac9
-
8b0ac9
         sysconf = conf.SysConf()
8b0ac9
         self._root = nvme.root()
8b0ac9
         self._host = nvme.host(self._root, sysconf.hostnqn, sysconf.hostid, sysconf.hostsymname)
8b0ac9
 
8b0ac9
-        service_conf = conf.SvcConf()
8b0ac9
-        service_conf.set_conf_file(args.conf_file) # reload configuration
8b0ac9
-        self._tron = args.tron or service_conf.tron
8b0ac9
-        log.set_level_from_tron(self._tron)
8b0ac9
-        self._root.log_level("debug" if self._tron else "err")
8b0ac9
+        super().__init__(args, reload_hdlr)
8b0ac9
 
8b0ac9
-        self._lkc_file     = os.path.join(os.environ.get('RUNTIME_DIRECTORY', os.path.join('/run', defs.PROG_NAME)), 'last-known-config.pickle')
8b0ac9
-        self._loop         = GLib.MainLoop()
8b0ac9
-        self._udev         = udev.UDEV
8b0ac9
-        self._cancellable  = Gio.Cancellable()
8b0ac9
-        self._resolver     = gutil.NameResolver()
8b0ac9
-        self._controllers  = self._load_last_known_config()
8b0ac9
-        self._dbus_iface   = None
8b0ac9
-        self._cfg_soak_tmr = None
8b0ac9
-        self._sysbus       = dasbus.connection.SystemMessageBus()
8b0ac9
-
8b0ac9
-        GLib.unix_signal_add(GLib.PRIORITY_HIGH, signal.SIGINT, self._stop_hdlr)  # CTRL-C
8b0ac9
-        GLib.unix_signal_add(GLib.PRIORITY_HIGH, signal.SIGTERM, self._stop_hdlr)  # systemctl stop stafd
8b0ac9
-        GLib.unix_signal_add(GLib.PRIORITY_HIGH, signal.SIGHUP, reload_hdlr)  # systemctl reload stafd
8b0ac9
-
8b0ac9
-        nvme_options = conf.NvmeOptions()
8b0ac9
-        if not nvme_options.host_iface_supp or not nvme_options.discovery_supp:
8b0ac9
-            logging.warning(
8b0ac9
-                'Kernel does not appear to support all the options needed to run this program. Consider updating to a later kernel version.'
8b0ac9
-            )
8b0ac9
+        self._root.log_level("debug" if self._tron else "err")
8b0ac9
 
8b0ac9
     def _release_resources(self):
8b0ac9
         logging.debug('Service._release_resources()')
8b0ac9
+        super()._release_resources()
8b0ac9
 
8b0ac9
-        if self._cancellable and not self._cancellable.is_cancelled():
8b0ac9
-            self._cancellable.cancel()
8b0ac9
+        self._host = None
8b0ac9
+        self._root = None
8b0ac9
 
8b0ac9
-        if self._cfg_soak_tmr is not None:
8b0ac9
-            self._cfg_soak_tmr.kill()
8b0ac9
+    @stas.ServiceABC.tron.setter
8b0ac9
+    def tron(self, value):
8b0ac9
+        '''@brief Set Trace ON property'''
8b0ac9
+        super(__class__, self.__class__).tron.__set__(self, value)
8b0ac9
+        self._root.log_level("debug" if self._tron else "err")
8b0ac9
 
8b0ac9
-        self._controllers.clear()
8b0ac9
 
8b0ac9
-        if self._sysbus:
8b0ac9
-            self._sysbus.disconnect()
8b0ac9
+# ******************************************************************************
8b0ac9
+def udev_rule_ctrl(enable):
8b0ac9
+    '''@brief We add an empty udev rule to /run/udev/rules.d to suppress
8b0ac9
+    nvme-cli's udev rule that is used to tell udevd to automatically
8b0ac9
+    connect to I/O controller. This is to avoid race conditions between
8b0ac9
+    stacd and udevd. This is configurable. See "udev-rule" in stacd.conf
8b0ac9
+    for details.
8b0ac9
+    '''
8b0ac9
+    udev_rule_suppress = pathlib.Path('/run/udev/rules.d', '70-nvmf-autoconnect.rules')
8b0ac9
+    if enable:
8b0ac9
+        try:
8b0ac9
+            udev_rule_suppress.unlink()
8b0ac9
+        except FileNotFoundError:
8b0ac9
+            pass
8b0ac9
+    else:
8b0ac9
+        if not udev_rule_suppress.exists():
8b0ac9
+            pathlib.Path('/run/udev/rules.d').mkdir(parents=True, exist_ok=True)
8b0ac9
+            udev_rule_suppress.symlink_to('/dev/null')
8b0ac9
 
8b0ac9
-        self._cfg_soak_tmr = None
8b0ac9
-        self._cancellable = None
8b0ac9
-        self._resolver = None
8b0ac9
-        self._lkc_file = None
8b0ac9
-        self._sysbus = None
8b0ac9
-        self._udev = None
8b0ac9
 
8b0ac9
-    def _config_dbus(self, iface_obj, bus_name: str, obj_name: str):
8b0ac9
-        self._dbus_iface = iface_obj
8b0ac9
-        self._sysbus.publish_object(obj_name, iface_obj)
8b0ac9
-        self._sysbus.register_service(bus_name)
8b0ac9
+# ******************************************************************************
8b0ac9
+class Stac(Service):
8b0ac9
+    '''STorage Appliance Connector (STAC)'''
8b0ac9
 
8b0ac9
-    @property
8b0ac9
-    def tron(self):
8b0ac9
-        '''@brief Get Trace ON property'''
8b0ac9
-        return self._tron
8b0ac9
+    CONF_STABILITY_LONG_SOAK_TIME_SEC = 10  # pylint: disable=invalid-name
8b0ac9
+    ADD_EVENT_SOAK_TIME_SEC = 1
8b0ac9
 
8b0ac9
-    @tron.setter
8b0ac9
-    def tron(self, value):  # pylint: disable=no-self-use
8b0ac9
-        '''@brief Set Trace ON property'''
8b0ac9
-        self._tron = value
8b0ac9
-        log.set_level_from_tron(self._tron)
8b0ac9
-        self._root.log_level("debug" if self._tron else "err")
8b0ac9
+    def __init__(self, args, dbus):
8b0ac9
+        super().__init__(args, self._reload_hdlr)
8b0ac9
 
8b0ac9
-    def run(self):
8b0ac9
-        '''@brief Start the main loop execution'''
8b0ac9
-        try:
8b0ac9
-            self._loop.run()
8b0ac9
-        except Exception as ex:  # pylint: disable=broad-except
8b0ac9
-            logging.critical('exception: %s', ex)
8b0ac9
+        self._udev = udev.UDEV
8b0ac9
 
8b0ac9
-        self._loop = None
8b0ac9
+        self._add_event_soak_tmr = gutil.GTimer(self.ADD_EVENT_SOAK_TIME_SEC, self._on_add_event_soaked)
8b0ac9
 
8b0ac9
-    def info(self) -> dict:
8b0ac9
-        '''@brief Get the status info for this object (used for debug)'''
8b0ac9
-        nvme_options = conf.NvmeOptions()
8b0ac9
-        return {
8b0ac9
-            'last known config file': self._lkc_file,
8b0ac9
-            'config soak timer': str(self._cfg_soak_tmr),
8b0ac9
-            'kernel support': {
8b0ac9
-                'TP8013': nvme_options.discovery_supp,
8b0ac9
-                'host_iface': nvme_options.host_iface_supp,
8b0ac9
-            },
8b0ac9
-            'system config': conf.SysConf().as_dict(),
8b0ac9
-        }
8b0ac9
-
8b0ac9
-    def get_controllers(self):
8b0ac9
-        '''@brief return the list of controller objects'''
8b0ac9
-        return self._controllers.values()
8b0ac9
-
8b0ac9
-    def get_controller(
8b0ac9
-        self, transport: str, traddr: str, trsvcid: str, host_traddr: str, host_iface: str, subsysnqn: str
8b0ac9
-    ):  # pylint: disable=too-many-arguments
8b0ac9
-        '''@brief get the specified controller object from the list of controllers'''
8b0ac9
-        cid = {
8b0ac9
-            'transport': transport,
8b0ac9
-            'traddr': traddr,
8b0ac9
-            'trsvcid': trsvcid,
8b0ac9
-            'host-traddr': host_traddr,
8b0ac9
-            'host-iface': host_iface,
8b0ac9
-            'subsysnqn': subsysnqn,
8b0ac9
-        }
8b0ac9
-        return self._controllers.get(trid.TID(cid))
8b0ac9
-
8b0ac9
-    def _remove_ctrl_from_dict(self, controller):
8b0ac9
-        tid_to_pop = controller.tid
8b0ac9
-        if not tid_to_pop:
8b0ac9
-            # Being paranoid. This should not happen, but let's say the
8b0ac9
-            # controller object has been purged, but it is somehow still
8b0ac9
-            # listed in self._controllers.
8b0ac9
-            for tid, _controller in self._controllers.items():
8b0ac9
-                if _controller is controller:
8b0ac9
-                    tid_to_pop = tid
8b0ac9
-                    break
8b0ac9
-
8b0ac9
-        if tid_to_pop:
8b0ac9
-            logging.debug('Service._remove_ctrl_from_dict()   - %s | %s', tid_to_pop, controller.device)
8b0ac9
-            self._controllers.pop(tid_to_pop, None)
8b0ac9
-        else:
8b0ac9
-            logging.debug('Service._remove_ctrl_from_dict()   - already removed')
8b0ac9
+        self._config_connections_audit()
8b0ac9
 
8b0ac9
-    def remove_controller(self, controller):
8b0ac9
-        '''@brief remove the specified controller object from the list of controllers'''
8b0ac9
-        logging.debug('Service.remove_controller()')
8b0ac9
-        if isinstance(controller, ctrl.Controller):
8b0ac9
-            self._remove_ctrl_from_dict(controller)
8b0ac9
+        # Create the D-Bus instance.
8b0ac9
+        self._config_dbus(dbus, defs.STACD_DBUS_NAME, defs.STACD_DBUS_PATH)
8b0ac9
 
8b0ac9
-            controller.kill()
8b0ac9
+        # Connect to STAF D-Bus interface
8b0ac9
+        self._staf = None
8b0ac9
+        self._staf_watcher = dasbus.client.observer.DBusObserver(self._sysbus, defs.STAFD_DBUS_NAME)
8b0ac9
+        self._staf_watcher.service_available.connect(self._connect_to_staf)
8b0ac9
+        self._staf_watcher.service_unavailable.connect(self._disconnect_from_staf)
8b0ac9
+        self._staf_watcher.connect_once_available()
8b0ac9
 
8b0ac9
-        if self._cfg_soak_tmr:
8b0ac9
-            self._cfg_soak_tmr.start()
8b0ac9
+        # Suppress udev rule to auto-connect when AEN is received.
8b0ac9
+        udev_rule_ctrl(conf.SvcConf().udev_rule_enabled)
8b0ac9
 
8b0ac9
-    def _cancel(self):
8b0ac9
-        logging.debug('Service._cancel()')
8b0ac9
-        if not self._cancellable.is_cancelled():
8b0ac9
-            self._cancellable.cancel()
8b0ac9
+    def _release_resources(self):
8b0ac9
+        logging.debug('Stac._release_resources()')
8b0ac9
+
8b0ac9
+        if self._add_event_soak_tmr:
8b0ac9
+            self._add_event_soak_tmr.kill()
8b0ac9
+
8b0ac9
+        udev_rule_ctrl(True)
8b0ac9
+
8b0ac9
+        if self._udev:
8b0ac9
+            self._udev.unregister_for_action_events('add')
8b0ac9
+
8b0ac9
+        self._destroy_staf_comlink(self._staf_watcher)
8b0ac9
+        if self._staf_watcher is not None:
8b0ac9
+            self._staf_watcher.disconnect()
8b0ac9
 
8b0ac9
-        for controller in self._controllers.values():
8b0ac9
-            controller.cancel()
8b0ac9
+        super()._release_resources()
8b0ac9
+
8b0ac9
+        self._udev = None
8b0ac9
+        self._staf = None
8b0ac9
+        self._staf_watcher = None
8b0ac9
+        self._add_event_soak_tmr = None
8b0ac9
+
8b0ac9
+    def _audit_connections(self, tids):
8b0ac9
+        '''A host should only connect to I/O controllers that have been zoned
8b0ac9
+        for that host or a manual "controller" entry exists in stcd.conf.
8b0ac9
+        A host should disconnect from an I/O controller when that I/O controller
8b0ac9
+        is removed from the zone or a manual "controller" entry is removed from
8b0ac9
+        stacd.conf. stacd will audit connections if "sticky-connections=disabled".
8b0ac9
+        stacd will delete any connection that is not supposed to exist.
8b0ac9
+        '''
8b0ac9
+        logging.debug('Stac._audit_connections()          - tids = %s', tids)
8b0ac9
+        num_controllers = len(self._controllers)
8b0ac9
+        for tid in tids:
8b0ac9
+            if tid not in self._controllers:
8b0ac9
+                self._controllers[tid] = ctrl.Ioc(self, self._root, self._host, tid)
8b0ac9
+
8b0ac9
+        if num_controllers != len(self._controllers):
8b0ac9
+            self._cfg_soak_tmr.start(self.CONF_STABILITY_SOAK_TIME_SEC)
8b0ac9
+
8b0ac9
+    def _on_add_event(self, udev_obj):  # pylint: disable=unused-argument
8b0ac9
+        '''@brief This function is called when a "add" event is received from
8b0ac9
+        the kernel for an NVMe device. This is used to trigger an audit and make
8b0ac9
+        sure that the connection to an I/O controller is allowed.
8b0ac9
+
8b0ac9
+        WARNING: There is a race condition with the "add" event from the kernel.
8b0ac9
+        The kernel sends the "add" event a bit early and the sysfs attributes
8b0ac9
+        associated with the nvme object are not always fully initialized.
8b0ac9
+        To workaround this problem we use a soaking timer to give time for the
8b0ac9
+        sysfs attributes to stabilize.
8b0ac9
+        '''
8b0ac9
+        self._add_event_soak_tmr.start()
8b0ac9
+
8b0ac9
+    def _on_add_event_soaked(self):
8b0ac9
+        '''@brief After the add event has been soaking for ADD_EVENT_SOAK_TIME_SEC
8b0ac9
+        seconds, we can audit the connections.
8b0ac9
+        '''
8b0ac9
+        if not conf.SvcConf().sticky_connections:
8b0ac9
+            self._audit_connections(self._udev.get_nvme_ioc_tids())
8b0ac9
+        return GLib.SOURCE_REMOVE
8b0ac9
+
8b0ac9
+    def _config_connections_audit(self):
8b0ac9
+        '''This function checks the "sticky_connections" parameter to determine
8b0ac9
+        whether audits should be performed. Audits are enabled when
8b0ac9
+        "sticky_connections" is disabled.
8b0ac9
+        '''
8b0ac9
+        if not conf.SvcConf().sticky_connections:
8b0ac9
+            if self._udev.get_registered_action_cback('add') is None:
8b0ac9
+                self._udev.register_for_action_events('add', self._on_add_event)
8b0ac9
+                self._audit_connections(self._udev.get_nvme_ioc_tids())
8b0ac9
+        else:
8b0ac9
+            self._udev.unregister_for_action_events('add')
8b0ac9
 
8b0ac9
     def _keep_connections_on_exit(self):
8b0ac9
         '''@brief Determine whether connections should remain when the
8b0ac9
         process exits.
8b0ac9
-
8b0ac9
-        NOTE) This is the base class method used to define the interface.
8b0ac9
-        It must be overloaded by a child class.
8b0ac9
         '''
8b0ac9
-        raise NotImplementedError()
8b0ac9
+        return True
8b0ac9
 
8b0ac9
-    def _stop_hdlr(self):
8b0ac9
-        systemd.daemon.notify('STOPPING=1')
8b0ac9
+    def _reload_hdlr(self):
8b0ac9
+        '''@brief Reload configuration file. This is triggered by the SIGHUP
8b0ac9
+        signal, which can be sent with "systemctl reload stacd".
8b0ac9
+        '''
8b0ac9
+        systemd.daemon.notify('RELOADING=1')
8b0ac9
+        service_cnf = conf.SvcConf()
8b0ac9
+        service_cnf.reload()
8b0ac9
+        self.tron = service_cnf.tron
8b0ac9
+        self._config_connections_audit()
8b0ac9
+        self._cfg_soak_tmr.start(self.CONF_STABILITY_SOAK_TIME_SEC)
8b0ac9
+        udev_rule_ctrl(service_cnf.udev_rule_enabled)
8b0ac9
+        systemd.daemon.notify('READY=1')
8b0ac9
+        return GLib.SOURCE_CONTINUE
8b0ac9
+
8b0ac9
+    def _get_log_pages_from_stafd(self):
8b0ac9
+        if self._staf:
8b0ac9
+            try:
8b0ac9
+                return json.loads(self._staf.get_all_log_pages(True))
8b0ac9
+            except dasbus.error.DBusError:
8b0ac9
+                pass
8b0ac9
+
8b0ac9
+        return list()
8b0ac9
 
8b0ac9
-        self._cancel()  # Cancel pending operations
8b0ac9
+    def _config_ctrls_finish(self, configured_ctrl_list):
8b0ac9
+        configured_ctrl_list = [
8b0ac9
+            ctrl_dict for ctrl_dict in configured_ctrl_list if 'traddr' in ctrl_dict and 'subsysnqn' in ctrl_dict
8b0ac9
+        ]
8b0ac9
+        logging.debug('Stac._config_ctrls_finish()        - configured_ctrl_list = %s', configured_ctrl_list)
8b0ac9
+
8b0ac9
+        discovered_ctrl_list = list()
8b0ac9
+        for staf_data in self._get_log_pages_from_stafd():
8b0ac9
+            host_traddr = staf_data['discovery-controller']['host-traddr']
8b0ac9
+            host_iface = staf_data['discovery-controller']['host-iface']
8b0ac9
+            for dlpe in staf_data['log-pages']:
8b0ac9
+                if dlpe.get('subtype') == 'nvme':  # eliminate discovery controllers
8b0ac9
+                    discovered_ctrl_list.append(stas.cid_from_dlpe(dlpe, host_traddr, host_iface))
8b0ac9
+
8b0ac9
+        logging.debug('Stac._config_ctrls_finish()        - discovered_ctrl_list = %s', discovered_ctrl_list)
8b0ac9
+
8b0ac9
+        controllers = stas.remove_blacklisted(configured_ctrl_list + discovered_ctrl_list)
8b0ac9
+        controllers = stas.remove_invalid_addresses(controllers)
8b0ac9
+
8b0ac9
+        new_controller_ids = {trid.TID(controller) for controller in controllers}
8b0ac9
+        cur_controller_ids = set(self._controllers.keys())
8b0ac9
+        controllers_to_add = new_controller_ids - cur_controller_ids
8b0ac9
+        controllers_to_del = cur_controller_ids - new_controller_ids
8b0ac9
+
8b0ac9
+        logging.debug('Stac._config_ctrls_finish()        - controllers_to_add   = %s', list(controllers_to_add))
8b0ac9
+        logging.debug('Stac._config_ctrls_finish()        - controllers_to_del   = %s', list(controllers_to_del))
8b0ac9
+
8b0ac9
+        for tid in controllers_to_del:
8b0ac9
+            controller = self._controllers.pop(tid, None)
8b0ac9
+            if controller is not None:
8b0ac9
+                controller.disconnect(self.remove_controller, conf.SvcConf().sticky_connections)
8b0ac9
+
8b0ac9
+        for tid in controllers_to_add:
8b0ac9
+            self._controllers[tid] = ctrl.Ioc(self, self._root, self._host, tid)
8b0ac9
+
8b0ac9
+    def _connect_to_staf(self, _):
8b0ac9
+        '''@brief Hook up DBus signal handlers for signals from stafd.'''
8b0ac9
+        try:
8b0ac9
+            self._staf = self._sysbus.get_proxy(defs.STAFD_DBUS_NAME, defs.STAFD_DBUS_PATH)
8b0ac9
+            self._staf.log_pages_changed.connect(self._log_pages_changed)
8b0ac9
+            self._cfg_soak_tmr.start()
8b0ac9
 
8b0ac9
-        self._dump_last_known_config(self._controllers)
8b0ac9
+            # Make sure timer is set back to its normal value.
8b0ac9
+            self._cfg_soak_tmr.set_timeout(self.CONF_STABILITY_SOAK_TIME_SEC)
8b0ac9
+            logging.debug('Stac._connect_to_staf()            - Connected to staf')
8b0ac9
+        except dasbus.error.DBusError:
8b0ac9
+            logging.error('Failed to connect to staf')
8b0ac9
+
8b0ac9
+    def _destroy_staf_comlink(self, watcher):  # pylint: disable=unused-argument
8b0ac9
+        if self._staf:
8b0ac9
+            self._staf.log_pages_changed.disconnect(self._log_pages_changed)
8b0ac9
+            dasbus.client.proxy.disconnect_proxy(self._staf)
8b0ac9
+            self._staf = None
8b0ac9
+
8b0ac9
+    def _disconnect_from_staf(self, watcher):
8b0ac9
+        self._destroy_staf_comlink(watcher)
8b0ac9
+
8b0ac9
+        # When we lose connectivity with stafd, the most logical explanation
8b0ac9
+        # is that stafd restarted. In that case, it may take some time for stafd
8b0ac9
+        # to re-populate its log pages cache. So let's give stafd plenty of time
8b0ac9
+        # to update its log pages cache and send log pages change notifications
8b0ac9
+        # before triggering a stacd re-config. We do this by momentarily
8b0ac9
+        # increasing the config soak timer to a longer period.
8b0ac9
+        if self._cfg_soak_tmr:
8b0ac9
+            self._cfg_soak_tmr.set_timeout(self.CONF_STABILITY_LONG_SOAK_TIME_SEC)
8b0ac9
+
8b0ac9
+        logging.debug('Stac._disconnect_from_staf()       - Disconnected from staf')
8b0ac9
+
8b0ac9
+    def _log_pages_changed(  # pylint: disable=too-many-arguments
8b0ac9
+        self, transport, traddr, trsvcid, host_traddr, host_iface, subsysnqn, device
8b0ac9
+    ):
8b0ac9
+        logging.debug(
8b0ac9
+            'Stac._log_pages_changed()          - transport=%s, traddr=%s, trsvcid=%s, host_traddr=%s, host_iface=%s, subsysnqn=%s, device=%s',
8b0ac9
+            transport,
8b0ac9
+            traddr,
8b0ac9
+            trsvcid,
8b0ac9
+            host_traddr,
8b0ac9
+            host_iface,
8b0ac9
+            subsysnqn,
8b0ac9
+            device,
8b0ac9
+        )
8b0ac9
+        if self._cfg_soak_tmr:
8b0ac9
+            self._cfg_soak_tmr.start(self.CONF_STABILITY_SOAK_TIME_SEC)
8b0ac9
 
8b0ac9
-        if len(self._controllers) == 0:
8b0ac9
-            GLib.idle_add(self._exit)
8b0ac9
-        else:
8b0ac9
-            # Tell all controller objects to disconnect
8b0ac9
-            keep_connections = self._keep_connections_on_exit()
8b0ac9
-            controllers = self._controllers.values()
8b0ac9
-            for controller in controllers:
8b0ac9
-                controller.disconnect(self._on_final_disconnect, keep_connections)
8b0ac9
+    def _load_last_known_config(self):
8b0ac9
+        return dict()
8b0ac9
 
8b0ac9
-        return GLib.SOURCE_REMOVE
8b0ac9
+    def _dump_last_known_config(self, controllers):
8b0ac9
+        pass
8b0ac9
 
8b0ac9
-    def _on_final_disconnect(self, controller):
8b0ac9
-        '''Callback invoked after a controller is disconnected.
8b0ac9
-        THIS IS USED DURING PROCESS SHUTDOWN TO WAIT FOR ALL CONTROLLERS TO BE
8b0ac9
-        DISCONNECTED BEFORE EXITING THE PROGRAM. ONLY CALL ON SHUTDOWN!
8b0ac9
-        '''
8b0ac9
-        logging.debug('Service._on_final_disconnect()')
8b0ac9
-        self._remove_ctrl_from_dict(controller)
8b0ac9
 
8b0ac9
-        controller.kill()
8b0ac9
+# ******************************************************************************
8b0ac9
+class Staf(Service):
8b0ac9
+    '''STorage Appliance Finder (STAF)'''
8b0ac9
 
8b0ac9
-        # When all controllers have disconnected, we can finish the clean up
8b0ac9
-        if len(self._controllers) == 0:
8b0ac9
-            # Defer exit to the next main loop's idle period.
8b0ac9
-            GLib.idle_add(self._exit)
8b0ac9
+    def __init__(self, args, dbus):
8b0ac9
+        super().__init__(args, self._reload_hdlr)
8b0ac9
 
8b0ac9
-    def _exit(self):
8b0ac9
-        logging.debug('Service._exit()')
8b0ac9
-        self._release_resources()
8b0ac9
-        self._loop.quit()
8b0ac9
+        self._avahi = avahi.Avahi(self._sysbus, self._avahi_change)
8b0ac9
+        self._avahi.config_stypes(conf.SvcConf().get_stypes())
8b0ac9
 
8b0ac9
-    def _on_config_ctrls(self, *_user_data):
8b0ac9
-        self._config_ctrls()
8b0ac9
-        return GLib.SOURCE_REMOVE
8b0ac9
+        # Create the D-Bus instance.
8b0ac9
+        self._config_dbus(dbus, defs.STAFD_DBUS_NAME, defs.STAFD_DBUS_PATH)
8b0ac9
 
8b0ac9
-    def _config_ctrls(self):
8b0ac9
-        '''@brief Start controllers configuration.'''
8b0ac9
-        # The configuration file may contain controllers and/or blacklist
8b0ac9
-        # elements with traddr specified as hostname instead of IP address.
8b0ac9
-        # Because of this, we need to remove those blacklisted elements before
8b0ac9
-        # running name resolution. And we will need to remove blacklisted
8b0ac9
-        # elements after name resolution is complete (i.e. in the calback
8b0ac9
-        # function _config_ctrls_finish)
8b0ac9
-        logging.debug('Service._config_ctrls()')
8b0ac9
-        configured_controllers = stas.remove_blacklisted(conf.SvcConf().get_controllers())
8b0ac9
-        self._resolver.resolve_ctrl_async(self._cancellable, configured_controllers, self._config_ctrls_finish)
8b0ac9
+    def info(self) -> dict:
8b0ac9
+        '''@brief Get the status info for this object (used for debug)'''
8b0ac9
+        info = super().info()
8b0ac9
+        info['avahi'] = self._avahi.info()
8b0ac9
+        return info
8b0ac9
 
8b0ac9
-    def _config_ctrls_finish(self, configured_ctrl_list):
8b0ac9
-        '''@brief Finish controllers configuration after hostnames (if any)
8b0ac9
-        have been resolved.
8b0ac9
-
8b0ac9
-        Configuring controllers must be done asynchronously in 2 steps.
8b0ac9
-        In the first step, host names get resolved to find their IP addresses.
8b0ac9
-        Name resolution can take a while, especially when an external name
8b0ac9
-        resolution server is used. Once that step completed, the callback
8b0ac9
-        method _config_ctrls_finish() (i.e. this method), gets invoked to
8b0ac9
-        complete the controller configuration.
8b0ac9
-
8b0ac9
-        NOTE) This is the base class method used to define the interface.
8b0ac9
-        It must be overloaded by a child class.
8b0ac9
-        '''
8b0ac9
-        raise NotImplementedError()
8b0ac9
+    def _release_resources(self):
8b0ac9
+        logging.debug('Staf._release_resources()')
8b0ac9
+        super()._release_resources()
8b0ac9
+        if self._avahi:
8b0ac9
+            self._avahi.kill()
8b0ac9
+            self._avahi = None
8b0ac9
 
8b0ac9
     def _load_last_known_config(self):
8b0ac9
-        raise NotImplementedError()
8b0ac9
+        try:
8b0ac9
+            with open(self._lkc_file, 'rb') as file:
8b0ac9
+                config = pickle.load(file)
8b0ac9
+        except (FileNotFoundError, AttributeError):
8b0ac9
+            return dict()
8b0ac9
+
8b0ac9
+        logging.debug('Staf._load_last_known_config()     - DC count = %s', len(config))
8b0ac9
+        return {tid: ctrl.Dc(self, self._root, self._host, tid, log_pages) for tid, log_pages in config.items()}
8b0ac9
 
8b0ac9
     def _dump_last_known_config(self, controllers):
8b0ac9
-        raise NotImplementedError()
8b0ac9
+        try:
8b0ac9
+            with open(self._lkc_file, 'wb') as file:
8b0ac9
+                config = {tid: dc.log_pages() for tid, dc in controllers.items()}
8b0ac9
+                logging.debug('Staf._dump_last_known_config()     - DC count = %s', len(config))
8b0ac9
+                pickle.dump(config, file)
8b0ac9
+        except FileNotFoundError as ex:
8b0ac9
+            logging.error('Unable to save last known config: %s', ex)
8b0ac9
+
8b0ac9
+    def _keep_connections_on_exit(self):
8b0ac9
+        '''@brief Determine whether connections should remain when the
8b0ac9
+        process exits.
8b0ac9
+        '''
8b0ac9
+        return conf.SvcConf().persistent_connections
8b0ac9
+
8b0ac9
+    def _reload_hdlr(self):
8b0ac9
+        '''@brief Reload configuration file. This is triggered by the SIGHUP
8b0ac9
+        signal, which can be sent with "systemctl reload stafd".
8b0ac9
+        '''
8b0ac9
+        systemd.daemon.notify('RELOADING=1')
8b0ac9
+        service_cnf = conf.SvcConf()
8b0ac9
+        service_cnf.reload()
8b0ac9
+        self.tron = service_cnf.tron
8b0ac9
+        self._avahi.kick_start()  # Make sure Avahi is running
8b0ac9
+        self._avahi.config_stypes(service_cnf.get_stypes())
8b0ac9
+        self._cfg_soak_tmr.start()
8b0ac9
+        systemd.daemon.notify('READY=1')
8b0ac9
+        return GLib.SOURCE_CONTINUE
8b0ac9
+
8b0ac9
+    def log_pages_changed(self, controller, device):
8b0ac9
+        '''@brief Function invoked when a controller's cached log pages
8b0ac9
+        have changed. This will emit a D-Bus signal to inform
8b0ac9
+        other applications that the cached log pages have changed.
8b0ac9
+        '''
8b0ac9
+        self._dbus_iface.log_pages_changed.emit(
8b0ac9
+            controller.tid.transport,
8b0ac9
+            controller.tid.traddr,
8b0ac9
+            controller.tid.trsvcid,
8b0ac9
+            controller.tid.host_traddr,
8b0ac9
+            controller.tid.host_iface,
8b0ac9
+            controller.tid.subsysnqn,
8b0ac9
+            device,
8b0ac9
+        )
8b0ac9
+
8b0ac9
+    def referrals_changed(self):
8b0ac9
+        '''@brief Function invoked when a controller's cached referrals
8b0ac9
+        have changed.
8b0ac9
+        '''
8b0ac9
+        logging.debug('Staf.referrals_changed()')
8b0ac9
+        self._cfg_soak_tmr.start()
8b0ac9
+
8b0ac9
+    def _referrals(self) -> list:
8b0ac9
+        return [
8b0ac9
+            stas.cid_from_dlpe(dlpe, controller.tid.host_traddr, controller.tid.host_iface)
8b0ac9
+            for controller in self.get_controllers()
8b0ac9
+            for dlpe in controller.referrals()
8b0ac9
+        ]
8b0ac9
+
8b0ac9
+    def _config_ctrls_finish(self, configured_ctrl_list):
8b0ac9
+        '''@brief Finish discovery controllers configuration after
8b0ac9
+        hostnames (if any) have been resolved.
8b0ac9
+        '''
8b0ac9
+        configured_ctrl_list = [
8b0ac9
+            ctrl_dict
8b0ac9
+            for ctrl_dict in configured_ctrl_list
8b0ac9
+            if 'traddr' in ctrl_dict and ctrl_dict.setdefault('subsysnqn', defs.WELL_KNOWN_DISC_NQN)
8b0ac9
+        ]
8b0ac9
+
8b0ac9
+        discovered_ctrl_list = self._avahi.get_controllers()
8b0ac9
+        referral_ctrl_list = self._referrals()
8b0ac9
+        logging.debug('Staf._config_ctrls_finish()        - configured_ctrl_list = %s', configured_ctrl_list)
8b0ac9
+        logging.debug('Staf._config_ctrls_finish()        - discovered_ctrl_list = %s', discovered_ctrl_list)
8b0ac9
+        logging.debug('Staf._config_ctrls_finish()        - referral_ctrl_list   = %s', referral_ctrl_list)
8b0ac9
+
8b0ac9
+        controllers = stas.remove_blacklisted(configured_ctrl_list + discovered_ctrl_list + referral_ctrl_list)
8b0ac9
+        controllers = stas.remove_invalid_addresses(controllers)
8b0ac9
+
8b0ac9
+        new_controller_ids = {trid.TID(controller) for controller in controllers}
8b0ac9
+        cur_controller_ids = set(self._controllers.keys())
8b0ac9
+        controllers_to_add = new_controller_ids - cur_controller_ids
8b0ac9
+        controllers_to_del = cur_controller_ids - new_controller_ids
8b0ac9
+
8b0ac9
+        logging.debug('Staf._config_ctrls_finish()        - controllers_to_add   = %s', list(controllers_to_add))
8b0ac9
+        logging.debug('Staf._config_ctrls_finish()        - controllers_to_del   = %s', list(controllers_to_del))
8b0ac9
+
8b0ac9
+        for tid in controllers_to_del:
8b0ac9
+            controller = self._controllers.pop(tid, None)
8b0ac9
+            if controller is not None:
8b0ac9
+                controller.disconnect(self.remove_controller, conf.SvcConf().persistent_connections)
8b0ac9
+
8b0ac9
+        for tid in controllers_to_add:
8b0ac9
+            self._controllers[tid] = ctrl.Dc(self, self._root, self._host, tid)
8b0ac9
+
8b0ac9
+    def _avahi_change(self):
8b0ac9
+        self._cfg_soak_tmr.start()
8b0ac9
diff --git a/staslib/stas.py b/staslib/stas.py
8b0ac9
index 7bf91e0..496f063 100644
8b0ac9
--- a/staslib/stas.py
8b0ac9
+++ b/staslib/stas.py
8b0ac9
@@ -6,14 +6,19 @@
8b0ac9
 #
8b0ac9
 # Authors: Martin Belanger <Martin.Belanger@dell.com>
8b0ac9
 #
8b0ac9
-'''Library for staf/stac'''
8b0ac9
+'''Library for staf/stac. You will find here common code for stafd and stacd
8b0ac9
+including the Abstract Base Classes (ABC) for Controllers and Services'''
8b0ac9
 
8b0ac9
 import os
8b0ac9
 import sys
8b0ac9
-import ipaddress
8b0ac9
+import abc
8b0ac9
+import signal
8b0ac9
 import logging
8b0ac9
-
8b0ac9
-from staslib import conf, defs, trid
8b0ac9
+import ipaddress
8b0ac9
+import systemd.daemon
8b0ac9
+import dasbus.connection
8b0ac9
+from gi.repository import Gio, GLib
8b0ac9
+from staslib import conf, defs, gutil, log, trid
8b0ac9
 
8b0ac9
 
8b0ac9
 # ******************************************************************************
8b0ac9
@@ -108,3 +113,379 @@ def remove_invalid_addresses(controllers: list):
8b0ac9
             logging.warning('Invalid transport %s', transport)
8b0ac9
 
8b0ac9
     return valid_controllers
8b0ac9
+
8b0ac9
+
8b0ac9
+# ******************************************************************************
8b0ac9
+class ControllerABC(abc.ABC):  # pylint: disable=too-many-instance-attributes
8b0ac9
+    '''@brief Base class used to manage the connection to a controller.'''
8b0ac9
+
8b0ac9
+    CONNECT_RETRY_PERIOD_SEC = 60
8b0ac9
+    FAST_CONNECT_RETRY_PERIOD_SEC = 3
8b0ac9
+
8b0ac9
+    def __init__(self, root, host, tid: trid.TID, discovery_ctrl=False):
8b0ac9
+        self._root              = root
8b0ac9
+        self._host              = host
8b0ac9
+        self._tid               = tid
8b0ac9
+        self._cancellable       = Gio.Cancellable()
8b0ac9
+        self._connect_attempts  = 0
8b0ac9
+        self._retry_connect_tmr = gutil.GTimer(self.CONNECT_RETRY_PERIOD_SEC, self._on_try_to_connect)
8b0ac9
+        self._discovery_ctrl    = discovery_ctrl
8b0ac9
+        self._try_to_connect_deferred = gutil.Deferred(self._try_to_connect)
8b0ac9
+        self._try_to_connect_deferred.schedule()
8b0ac9
+
8b0ac9
+    def _release_resources(self):
8b0ac9
+        # Remove pending deferred from main loop
8b0ac9
+        if self._try_to_connect_deferred:
8b0ac9
+            self._try_to_connect_deferred.cancel()
8b0ac9
+
8b0ac9
+        if self._retry_connect_tmr is not None:
8b0ac9
+            self._retry_connect_tmr.kill()
8b0ac9
+
8b0ac9
+        if self._cancellable and not self._cancellable.is_cancelled():
8b0ac9
+            self._cancellable.cancel()
8b0ac9
+
8b0ac9
+        self._tid = None
8b0ac9
+        self._cancellable = None
8b0ac9
+        self._retry_connect_tmr = None
8b0ac9
+        self._try_to_connect_deferred = None
8b0ac9
+
8b0ac9
+    @property
8b0ac9
+    def id(self) -> str:
8b0ac9
+        '''@brief Return the Transport ID as a printable string'''
8b0ac9
+        return str(self.tid)
8b0ac9
+
8b0ac9
+    @property
8b0ac9
+    def tid(self):
8b0ac9
+        '''@brief Return the Transport ID object'''
8b0ac9
+        return self._tid
8b0ac9
+
8b0ac9
+    def controller_id_dict(self) -> dict:
8b0ac9
+        '''@brief return the controller ID as a dict.'''
8b0ac9
+        return self.tid.as_dict()
8b0ac9
+
8b0ac9
+    def details(self) -> dict:
8b0ac9
+        '''@brief return detailed debug info about this controller'''
8b0ac9
+        details = self.controller_id_dict()
8b0ac9
+        details['connect attempts'] = str(self._connect_attempts)
8b0ac9
+        details['retry connect timer'] = str(self._retry_connect_tmr)
8b0ac9
+        return details
8b0ac9
+
8b0ac9
+    def info(self) -> dict:
8b0ac9
+        '''@brief Get the controller info for this object'''
8b0ac9
+        return self.details()
8b0ac9
+
8b0ac9
+    def cancel(self):
8b0ac9
+        '''@brief Used to cancel pending operations.'''
8b0ac9
+        if self._cancellable and not self._cancellable.is_cancelled():
8b0ac9
+            logging.debug('ControllerABC.cancel()             - %s', self.id)
8b0ac9
+            self._cancellable.cancel()
8b0ac9
+
8b0ac9
+    def kill(self):
8b0ac9
+        '''@brief Used to release all resources associated with this object.'''
8b0ac9
+        logging.debug('ControllerABC.kill()               - %s', self.id)
8b0ac9
+        self._release_resources()
8b0ac9
+
8b0ac9
+    def _alive(self):
8b0ac9
+        '''There may be race condition where a queued event gets processed
8b0ac9
+        after the object is no longer configured (i.e. alive). This method
8b0ac9
+        can be used by callback functions to make sure the object is still
8b0ac9
+        alive before processing further.
8b0ac9
+        '''
8b0ac9
+        return self._cancellable and not self._cancellable.is_cancelled()
8b0ac9
+
8b0ac9
+    def _on_try_to_connect(self):
8b0ac9
+        self._try_to_connect_deferred.schedule()
8b0ac9
+        return GLib.SOURCE_REMOVE
8b0ac9
+
8b0ac9
+    def _try_to_connect(self):
8b0ac9
+        # This is a deferred function call. Make sure
8b0ac9
+        # the source of the deferred is still good.
8b0ac9
+        source = GLib.main_current_source()
8b0ac9
+        if source and source.is_destroyed():
8b0ac9
+            return
8b0ac9
+
8b0ac9
+        self._connect_attempts += 1
8b0ac9
+
8b0ac9
+        self._do_connect()
8b0ac9
+
8b0ac9
+    @abc.abstractmethod
8b0ac9
+    def _do_connect(self):
8b0ac9
+        raise NotImplementedError()
8b0ac9
+
8b0ac9
+    @abc.abstractmethod
8b0ac9
+    def _on_aen(self, aen: int):
8b0ac9
+        raise NotImplementedError()
8b0ac9
+
8b0ac9
+    @abc.abstractmethod
8b0ac9
+    def _on_nvme_event(self, nvme_event):
8b0ac9
+        raise NotImplementedError()
8b0ac9
+
8b0ac9
+    @abc.abstractmethod
8b0ac9
+    def _on_ctrl_removed(self, obj):
8b0ac9
+        raise NotImplementedError()
8b0ac9
+
8b0ac9
+    @abc.abstractmethod
8b0ac9
+    def _find_existing_connection(self):
8b0ac9
+        raise NotImplementedError()
8b0ac9
+
8b0ac9
+    @abc.abstractmethod
8b0ac9
+    def disconnect(self, disconnected_cb, keep_connection):
8b0ac9
+        '''@brief Issue an asynchronous disconnect command to a Controller.
8b0ac9
+        Once the async command has completed, the callback 'disconnected_cb'
8b0ac9
+        will be invoked. If a controller is already disconnected, then the
8b0ac9
+        callback will be added to the main loop's next idle slot to be executed
8b0ac9
+        ASAP.
8b0ac9
+        '''
8b0ac9
+        raise NotImplementedError()
8b0ac9
+
8b0ac9
+
8b0ac9
+# ******************************************************************************
8b0ac9
+class ServiceABC(abc.ABC):  # pylint: disable=too-many-instance-attributes
8b0ac9
+    '''@brief Base class used to manage a STorage Appliance Service'''
8b0ac9
+
8b0ac9
+    CONF_STABILITY_SOAK_TIME_SEC = 1.5
8b0ac9
+
8b0ac9
+    def __init__(self, args, reload_hdlr):
8b0ac9
+
8b0ac9
+        service_conf = conf.SvcConf()
8b0ac9
+        service_conf.set_conf_file(args.conf_file)  # reload configuration
8b0ac9
+        self._tron = args.tron or service_conf.tron
8b0ac9
+        log.set_level_from_tron(self._tron)
8b0ac9
+
8b0ac9
+        self._lkc_file     = os.path.join(os.environ.get('RUNTIME_DIRECTORY', os.path.join('/run', defs.PROG_NAME)), 'last-known-config.pickle')
8b0ac9
+        self._loop         = GLib.MainLoop()
8b0ac9
+        self._cancellable  = Gio.Cancellable()
8b0ac9
+        self._resolver     = gutil.NameResolver()
8b0ac9
+        self._controllers  = self._load_last_known_config()
8b0ac9
+        self._dbus_iface   = None
8b0ac9
+        self._cfg_soak_tmr = gutil.GTimer(self.CONF_STABILITY_SOAK_TIME_SEC, self._on_config_ctrls)
8b0ac9
+        self._sysbus       = dasbus.connection.SystemMessageBus()
8b0ac9
+
8b0ac9
+        GLib.unix_signal_add(GLib.PRIORITY_HIGH, signal.SIGINT, self._stop_hdlr)  # CTRL-C
8b0ac9
+        GLib.unix_signal_add(GLib.PRIORITY_HIGH, signal.SIGTERM, self._stop_hdlr)  # systemctl stop stafd
8b0ac9
+        GLib.unix_signal_add(GLib.PRIORITY_HIGH, signal.SIGHUP, reload_hdlr)  # systemctl reload stafd
8b0ac9
+
8b0ac9
+        nvme_options = conf.NvmeOptions()
8b0ac9
+        if not nvme_options.host_iface_supp or not nvme_options.discovery_supp:
8b0ac9
+            logging.warning(
8b0ac9
+                'Kernel does not appear to support all the options needed to run this program. Consider updating to a later kernel version.'
8b0ac9
+            )
8b0ac9
+
8b0ac9
+        # We don't want to apply configuration changes to nvme-cli right away.
8b0ac9
+        # Often, multiple changes will occur in a short amount of time (sub-second).
8b0ac9
+        # We want to wait until there are no more changes before applying them
8b0ac9
+        # to the system. The following timer acts as a "soak period". Changes
8b0ac9
+        # will be applied by calling self._on_config_ctrls() at the end of
8b0ac9
+        # the soak period.
8b0ac9
+        self._cfg_soak_tmr.start()
8b0ac9
+
8b0ac9
+    def _release_resources(self):
8b0ac9
+        logging.debug('ServiceABC._release_resources()')
8b0ac9
+
8b0ac9
+        if self._cancellable and not self._cancellable.is_cancelled():
8b0ac9
+            self._cancellable.cancel()
8b0ac9
+
8b0ac9
+        if self._cfg_soak_tmr is not None:
8b0ac9
+            self._cfg_soak_tmr.kill()
8b0ac9
+
8b0ac9
+        self._controllers.clear()
8b0ac9
+
8b0ac9
+        if self._sysbus:
8b0ac9
+            self._sysbus.disconnect()
8b0ac9
+
8b0ac9
+        self._cfg_soak_tmr = None
8b0ac9
+        self._cancellable = None
8b0ac9
+        self._resolver = None
8b0ac9
+        self._lkc_file = None
8b0ac9
+        self._sysbus = None
8b0ac9
+
8b0ac9
+    def _config_dbus(self, iface_obj, bus_name: str, obj_name: str):
8b0ac9
+        self._dbus_iface = iface_obj
8b0ac9
+        self._sysbus.publish_object(obj_name, iface_obj)
8b0ac9
+        self._sysbus.register_service(bus_name)
8b0ac9
+
8b0ac9
+    @property
8b0ac9
+    def tron(self):
8b0ac9
+        '''@brief Get Trace ON property'''
8b0ac9
+        return self._tron
8b0ac9
+
8b0ac9
+    @tron.setter
8b0ac9
+    def tron(self, value):
8b0ac9
+        '''@brief Set Trace ON property'''
8b0ac9
+        self._tron = value
8b0ac9
+        log.set_level_from_tron(self._tron)
8b0ac9
+
8b0ac9
+    def run(self):
8b0ac9
+        '''@brief Start the main loop execution'''
8b0ac9
+        try:
8b0ac9
+            self._loop.run()
8b0ac9
+        except Exception as ex:  # pylint: disable=broad-except
8b0ac9
+            logging.critical('exception: %s', ex)
8b0ac9
+
8b0ac9
+        self._loop = None
8b0ac9
+
8b0ac9
+    def info(self) -> dict:
8b0ac9
+        '''@brief Get the status info for this object (used for debug)'''
8b0ac9
+        nvme_options = conf.NvmeOptions()
8b0ac9
+        return {
8b0ac9
+            'last known config file': self._lkc_file,
8b0ac9
+            'config soak timer': str(self._cfg_soak_tmr),
8b0ac9
+            'kernel support': {
8b0ac9
+                'TP8013': nvme_options.discovery_supp,
8b0ac9
+                'host_iface': nvme_options.host_iface_supp,
8b0ac9
+            },
8b0ac9
+            'system config': conf.SysConf().as_dict(),
8b0ac9
+        }
8b0ac9
+
8b0ac9
+    def get_controllers(self) -> dict:
8b0ac9
+        '''@brief return the list of controller objects'''
8b0ac9
+        return self._controllers.values()
8b0ac9
+
8b0ac9
+    def get_controller(
8b0ac9
+        self, transport: str, traddr: str, trsvcid: str, host_traddr: str, host_iface: str, subsysnqn: str
8b0ac9
+    ):  # pylint: disable=too-many-arguments
8b0ac9
+        '''@brief get the specified controller object from the list of controllers'''
8b0ac9
+        cid = {
8b0ac9
+            'transport': transport,
8b0ac9
+            'traddr': traddr,
8b0ac9
+            'trsvcid': trsvcid,
8b0ac9
+            'host-traddr': host_traddr,
8b0ac9
+            'host-iface': host_iface,
8b0ac9
+            'subsysnqn': subsysnqn,
8b0ac9
+        }
8b0ac9
+        return self._controllers.get(trid.TID(cid))
8b0ac9
+
8b0ac9
+    def _remove_ctrl_from_dict(self, controller):
8b0ac9
+        tid_to_pop = controller.tid
8b0ac9
+        if not tid_to_pop:
8b0ac9
+            # Being paranoid. This should not happen, but let's say the
8b0ac9
+            # controller object has been purged, but it is somehow still
8b0ac9
+            # listed in self._controllers.
8b0ac9
+            for tid, _controller in self._controllers.items():
8b0ac9
+                if _controller is controller:
8b0ac9
+                    tid_to_pop = tid
8b0ac9
+                    break
8b0ac9
+
8b0ac9
+        if tid_to_pop:
8b0ac9
+            logging.debug('ServiceABC._remove_ctrl_from_dict()- %s | %s', tid_to_pop, controller.device)
8b0ac9
+            self._controllers.pop(tid_to_pop, None)
8b0ac9
+        else:
8b0ac9
+            logging.debug('ServiceABC._remove_ctrl_from_dict()- already removed')
8b0ac9
+
8b0ac9
+    def remove_controller(self, controller, success):  # pylint: disable=unused-argument
8b0ac9
+        '''@brief remove the specified controller object from the list of controllers
8b0ac9
+        @param controller: the controller object
8b0ac9
+        @param success: whether the disconnect was successful'''
8b0ac9
+        logging.debug('ServiceABC.remove_controller()')
8b0ac9
+        if isinstance(controller, ControllerABC):
8b0ac9
+            self._remove_ctrl_from_dict(controller)
8b0ac9
+
8b0ac9
+            controller.kill()
8b0ac9
+
8b0ac9
+        if self._cfg_soak_tmr:
8b0ac9
+            self._cfg_soak_tmr.start()
8b0ac9
+
8b0ac9
+    def _cancel(self):
8b0ac9
+        logging.debug('ServiceABC._cancel()')
8b0ac9
+        if not self._cancellable.is_cancelled():
8b0ac9
+            self._cancellable.cancel()
8b0ac9
+
8b0ac9
+        for controller in self._controllers.values():
8b0ac9
+            controller.cancel()
8b0ac9
+
8b0ac9
+    def _stop_hdlr(self):
8b0ac9
+        logging.debug('ServiceABC._stop_hdlr()')
8b0ac9
+        systemd.daemon.notify('STOPPING=1')
8b0ac9
+
8b0ac9
+        self._cancel()  # Cancel pending operations
8b0ac9
+
8b0ac9
+        self._dump_last_known_config(self._controllers)
8b0ac9
+
8b0ac9
+        if len(self._controllers) == 0:
8b0ac9
+            GLib.idle_add(self._exit)
8b0ac9
+        else:
8b0ac9
+            # Tell all controller objects to disconnect
8b0ac9
+            keep_connections = self._keep_connections_on_exit()
8b0ac9
+            controllers = self._controllers.values()
8b0ac9
+            logging.debug(
8b0ac9
+                'ServiceABC._stop_hdlr()            - Controller count = %s, keep_connections = %s',
8b0ac9
+                len(controllers), keep_connections
8b0ac9
+            )
8b0ac9
+            for controller in controllers:
8b0ac9
+                controller.disconnect(self._on_final_disconnect, keep_connections)
8b0ac9
+
8b0ac9
+        return GLib.SOURCE_REMOVE
8b0ac9
+
8b0ac9
+    def _on_final_disconnect(self, controller, success):
8b0ac9
+        '''Callback invoked after a controller is disconnected.
8b0ac9
+        THIS IS USED DURING PROCESS SHUTDOWN TO WAIT FOR ALL CONTROLLERS TO BE
8b0ac9
+        DISCONNECTED BEFORE EXITING THE PROGRAM. ONLY CALL ON SHUTDOWN!
8b0ac9
+        @param controller: the controller object
8b0ac9
+        @param success: whether the disconnect operation was successful
8b0ac9
+        '''
8b0ac9
+        logging.debug('ServiceABC._on_final_disconnect()  - %s | %s disconnect %s',
8b0ac9
+                      controller.id, controller.device, 'succeeded' if success else 'failed')
8b0ac9
+        self._remove_ctrl_from_dict(controller)
8b0ac9
+
8b0ac9
+        controller.kill()
8b0ac9
+
8b0ac9
+        # When all controllers have disconnected, we can finish the clean up
8b0ac9
+        if len(self._controllers) == 0:
8b0ac9
+            # Defer exit to the next main loop's idle period.
8b0ac9
+            GLib.idle_add(self._exit)
8b0ac9
+
8b0ac9
+    def _exit(self):
8b0ac9
+        logging.debug('ServiceABC._exit()')
8b0ac9
+        self._release_resources()
8b0ac9
+        self._loop.quit()
8b0ac9
+
8b0ac9
+    def _on_config_ctrls(self, *_user_data):
8b0ac9
+        self._config_ctrls()
8b0ac9
+        return GLib.SOURCE_REMOVE
8b0ac9
+
8b0ac9
+    def _config_ctrls(self):
8b0ac9
+        '''@brief Start controllers configuration.'''
8b0ac9
+        # The configuration file may contain controllers and/or blacklist
8b0ac9
+        # elements with traddr specified as hostname instead of IP address.
8b0ac9
+        # Because of this, we need to remove those blacklisted elements before
8b0ac9
+        # running name resolution. And we will need to remove blacklisted
8b0ac9
+        # elements after name resolution is complete (i.e. in the calback
8b0ac9
+        # function _config_ctrls_finish)
8b0ac9
+        logging.debug('ServiceABC._config_ctrls()')
8b0ac9
+        configured_controllers = remove_blacklisted(conf.SvcConf().get_controllers())
8b0ac9
+        self._resolver.resolve_ctrl_async(self._cancellable, configured_controllers, self._config_ctrls_finish)
8b0ac9
+
8b0ac9
+    @abc.abstractmethod
8b0ac9
+    def _keep_connections_on_exit(self):
8b0ac9
+        '''@brief Determine whether connections should remain when the
8b0ac9
+        process exits.
8b0ac9
+
8b0ac9
+        NOTE) This is the base class method used to define the interface.
8b0ac9
+        It must be overloaded by a child class.
8b0ac9
+        '''
8b0ac9
+        raise NotImplementedError()
8b0ac9
+
8b0ac9
+    @abc.abstractmethod
8b0ac9
+    def _config_ctrls_finish(self, configured_ctrl_list):
8b0ac9
+        '''@brief Finish controllers configuration after hostnames (if any)
8b0ac9
+        have been resolved.
8b0ac9
+
8b0ac9
+        Configuring controllers must be done asynchronously in 2 steps.
8b0ac9
+        In the first step, host names get resolved to find their IP addresses.
8b0ac9
+        Name resolution can take a while, especially when an external name
8b0ac9
+        resolution server is used. Once that step completed, the callback
8b0ac9
+        method _config_ctrls_finish() (i.e. this method), gets invoked to
8b0ac9
+        complete the controller configuration.
8b0ac9
+
8b0ac9
+        NOTE) This is the base class method used to define the interface.
8b0ac9
+        It must be overloaded by a child class.
8b0ac9
+        '''
8b0ac9
+        raise NotImplementedError()
8b0ac9
+
8b0ac9
+    @abc.abstractmethod
8b0ac9
+    def _load_last_known_config(self):
8b0ac9
+        raise NotImplementedError()
8b0ac9
+
8b0ac9
+    @abc.abstractmethod
8b0ac9
+    def _dump_last_known_config(self, controllers):
8b0ac9
+        raise NotImplementedError()
8b0ac9
diff --git a/staslib/trid.py b/staslib/trid.py
8b0ac9
index def6ab2..38619e7 100644
8b0ac9
--- a/staslib/trid.py
8b0ac9
+++ b/staslib/trid.py
8b0ac9
@@ -12,8 +12,7 @@ throughout nvme-stas to uniquely identify a Controller'''
8b0ac9
 import hashlib
8b0ac9
 from staslib import conf
8b0ac9
 
8b0ac9
-class TID:
8b0ac9
-    # pylint: disable=too-many-instance-attributes
8b0ac9
+class TID:  # pylint: disable=too-many-instance-attributes
8b0ac9
     '''Transport Identifier'''
8b0ac9
     RDMA_IP_PORT = '4420'
8b0ac9
     DISC_IP_PORT = '8009'
8b0ac9
diff --git a/staslib/udev.py b/staslib/udev.py
8b0ac9
index 29370b8..37b63cc 100644
8b0ac9
--- a/staslib/udev.py
8b0ac9
+++ b/staslib/udev.py
8b0ac9
@@ -16,7 +16,7 @@ from staslib import defs, trid
8b0ac9
 try:
8b0ac9
     from pyudev.glib import MonitorObserver
8b0ac9
 except (ModuleNotFoundError, AttributeError):
8b0ac9
-    from staslib.glibudev import MonitorObserver  # pylint: disable=relative-beyond-top-level,ungrouped-imports
8b0ac9
+    from staslib.glibudev import MonitorObserver  # pylint: disable=ungrouped-imports
8b0ac9
 
8b0ac9
 # ******************************************************************************
8b0ac9
 class Udev:
8b0ac9
@@ -99,7 +99,7 @@ class Udev:
8b0ac9
     def get_attributes(self, sys_name: str, attr_ids) -> dict:
8b0ac9
         '''@brief Get all the attributes associated with device @sys_name'''
8b0ac9
         attrs = {attr_id: '' for attr_id in attr_ids}
8b0ac9
-        if sys_name:
8b0ac9
+        if sys_name and sys_name != 'nvme?':
8b0ac9
             udev = self.get_nvme_device(sys_name)
8b0ac9
             if udev is not None:
8b0ac9
                 for attr_id in attr_ids:
8b0ac9
diff --git a/test/test-config.py b/test/test-config.py
8b0ac9
index dad0ebd..db58883 100755
8b0ac9
--- a/test/test-config.py
8b0ac9
+++ b/test/test-config.py
8b0ac9
@@ -40,7 +40,7 @@ class StasProcessConfUnitTest(unittest.TestCase):
8b0ac9
         self.assertFalse(service_conf.data_digest)
8b0ac9
         self.assertTrue(service_conf.persistent_connections)
8b0ac9
         self.assertTrue(service_conf.udev_rule_enabled)
8b0ac9
-        self.assertFalse(service_conf.sticky_connections)
8b0ac9
+        self.assertTrue(service_conf.sticky_connections)
8b0ac9
         self.assertFalse(service_conf.ignore_iface)
8b0ac9
         self.assertIn(6, service_conf.ip_family)
8b0ac9
         self.assertNotIn(4, service_conf.ip_family)
8b0ac9
diff --git a/test/test-controller.py b/test/test-controller.py
8b0ac9
index f23125e..f55781a 100755
8b0ac9
--- a/test/test-controller.py
8b0ac9
+++ b/test/test-controller.py
8b0ac9
@@ -8,24 +8,43 @@ from pyfakefs.fake_filesystem_unittest import TestCase
8b0ac9
 
8b0ac9
 LOOP = GLib.MainLoop()
8b0ac9
 
8b0ac9
+
8b0ac9
+class TestController(ctrl.Controller):
8b0ac9
+    def _find_existing_connection(self):
8b0ac9
+        pass
8b0ac9
+
8b0ac9
+    def _on_aen(self, aen: int):
8b0ac9
+        pass
8b0ac9
+
8b0ac9
+    def _on_nvme_event(self, nvme_event):
8b0ac9
+        pass
8b0ac9
+
8b0ac9
+
8b0ac9
 class Test(TestCase):
8b0ac9
     '''Unit tests for class Controller'''
8b0ac9
 
8b0ac9
     def setUp(self):
8b0ac9
         self.setUpPyfakefs()
8b0ac9
 
8b0ac9
-        self.fs.create_file('/etc/nvme/hostnqn', contents='nqn.2014-08.org.nvmexpress:uuid:01234567-0123-0123-0123-0123456789ab\n')
8b0ac9
-        self.fs.create_file('/etc/nvme/hostid',  contents='01234567-89ab-cdef-0123-456789abcdef\n')
8b0ac9
-        self.fs.create_file('/dev/nvme-fabrics', contents='instance=-1,cntlid=-1,transport=%s,traddr=%s,trsvcid=%s,nqn=%s,queue_size=%d,nr_io_queues=%d,reconnect_delay=%d,ctrl_loss_tmo=%d,keep_alive_tmo=%d,hostnqn=%s,host_traddr=%s,host_iface=%s,hostid=%s,duplicate_connect,disable_sqflow,hdr_digest,data_digest,nr_write_queues=%d,nr_poll_queues=%d,tos=%d,fast_io_fail_tmo=%d,discovery,dhchap_secret=%s,dhchap_ctrl_secret=%s\n')
8b0ac9
+        self.fs.create_file(
8b0ac9
+            '/etc/nvme/hostnqn', contents='nqn.2014-08.org.nvmexpress:uuid:01234567-0123-0123-0123-0123456789ab\n'
8b0ac9
+        )
8b0ac9
+        self.fs.create_file('/etc/nvme/hostid', contents='01234567-89ab-cdef-0123-456789abcdef\n')
8b0ac9
+        self.fs.create_file(
8b0ac9
+            '/dev/nvme-fabrics',
8b0ac9
+            contents='instance=-1,cntlid=-1,transport=%s,traddr=%s,trsvcid=%s,nqn=%s,queue_size=%d,nr_io_queues=%d,reconnect_delay=%d,ctrl_loss_tmo=%d,keep_alive_tmo=%d,hostnqn=%s,host_traddr=%s,host_iface=%s,hostid=%s,duplicate_connect,disable_sqflow,hdr_digest,data_digest,nr_write_queues=%d,nr_poll_queues=%d,tos=%d,fast_io_fail_tmo=%d,discovery,dhchap_secret=%s,dhchap_ctrl_secret=%s\n',
8b0ac9
+        )
8b0ac9
 
8b0ac9
-        self.NVME_TID = trid.TID({
8b0ac9
-            'transport':   'tcp',
8b0ac9
-            'traddr':      '10.10.10.10',
8b0ac9
-            'subsysnqn':   'nqn.1988-11.com.dell:SFSS:2:20220208134025e8',
8b0ac9
-            'trsvcid':     '8009',
8b0ac9
-            'host-traddr': '1.2.3.4',
8b0ac9
-            'host-iface':  'wlp0s20f3',
8b0ac9
-        })
8b0ac9
+        self.NVME_TID = trid.TID(
8b0ac9
+            {
8b0ac9
+                'transport': 'tcp',
8b0ac9
+                'traddr': '10.10.10.10',
8b0ac9
+                'subsysnqn': 'nqn.1988-11.com.dell:SFSS:2:20220208134025e8',
8b0ac9
+                'trsvcid': '8009',
8b0ac9
+                'host-traddr': '1.2.3.4',
8b0ac9
+                'host-iface': 'wlp0s20f3',
8b0ac9
+            }
8b0ac9
+        )
8b0ac9
 
8b0ac9
         sysconf = conf.SysConf()
8b0ac9
         self.root = nvme.root()
8b0ac9
@@ -34,32 +53,92 @@ class Test(TestCase):
8b0ac9
     def tearDown(self):
8b0ac9
         LOOP.quit()
8b0ac9
 
8b0ac9
+    def test_cannot_instantiate_concrete_classes_if_abstract_method_are_not_implemented(self):
8b0ac9
+        # Make sure we can't instantiate the ABC directly (Abstract Base Class).
8b0ac9
+        class Controller(ctrl.Controller):
8b0ac9
+            pass
8b0ac9
+
8b0ac9
+        self.assertRaises(TypeError, lambda: ctrl.Controller(root=self.root, host=self.host, tid=self.NVME_TID))
8b0ac9
+
8b0ac9
     def test_get_device(self):
8b0ac9
-        controller = ctrl.Controller(root=self.root, host=self.host, tid=self.NVME_TID)
8b0ac9
+        controller = TestController(root=self.root, host=self.host, tid=self.NVME_TID)
8b0ac9
         self.assertEqual(controller._connect_attempts, 0)
8b0ac9
-        self.assertRaises(NotImplementedError, controller._try_to_connect)
8b0ac9
+        controller._try_to_connect()
8b0ac9
         self.assertEqual(controller._connect_attempts, 1)
8b0ac9
-        self.assertRaises(NotImplementedError, controller._find_existing_connection)
8b0ac9
-        self.assertEqual(controller.id, "(tcp, 10.10.10.10, 8009, nqn.1988-11.com.dell:SFSS:2:20220208134025e8, wlp0s20f3, 1.2.3.4)")
8b0ac9
+        self.assertEqual(
8b0ac9
+            controller.id, "(tcp, 10.10.10.10, 8009, nqn.1988-11.com.dell:SFSS:2:20220208134025e8, wlp0s20f3, 1.2.3.4)"
8b0ac9
+        )
8b0ac9
         # raise Exception(controller._connect_op)
8b0ac9
-        self.assertEqual(str(controller.tid), "(tcp, 10.10.10.10, 8009, nqn.1988-11.com.dell:SFSS:2:20220208134025e8, wlp0s20f3, 1.2.3.4)")
8b0ac9
-        self.assertEqual(controller.device, '')
8b0ac9
-        self.assertEqual(str(controller.controller_id_dict()), "{'transport': 'tcp', 'traddr': '10.10.10.10', 'trsvcid': '8009', 'host-traddr': '1.2.3.4', 'host-iface': 'wlp0s20f3', 'subsysnqn': 'nqn.1988-11.com.dell:SFSS:2:20220208134025e8', 'device': ''}")
8b0ac9
-        # self.assertEqual(controller.details(), "{'transport': 'tcp', 'traddr': '10.10.10.[265 chars]ff]'}")
8b0ac9
-        self.assertEqual(controller.info(), {'transport': 'tcp', 'traddr': '10.10.10.10', 'trsvcid': '8009', 'host-traddr': '1.2.3.4', 'host-iface': 'wlp0s20f3', 'subsysnqn': 'nqn.1988-11.com.dell:SFSS:2:20220208134025e8', 'device': '', 'hostid': '', 'hostnqn': '', 'model': '', 'serial': '', 'connect attempts': '1', 'retry connect timer': '60.0s [off]'})
8b0ac9
+        self.assertEqual(
8b0ac9
+            str(controller.tid),
8b0ac9
+            "(tcp, 10.10.10.10, 8009, nqn.1988-11.com.dell:SFSS:2:20220208134025e8, wlp0s20f3, 1.2.3.4)",
8b0ac9
+        )
8b0ac9
+        self.assertEqual(controller.device, 'nvme?')
8b0ac9
+        self.assertEqual(
8b0ac9
+            str(controller.controller_id_dict()),
8b0ac9
+            "{'transport': 'tcp', 'traddr': '10.10.10.10', 'trsvcid': '8009', 'host-traddr': '1.2.3.4', 'host-iface': 'wlp0s20f3', 'subsysnqn': 'nqn.1988-11.com.dell:SFSS:2:20220208134025e8', 'device': 'nvme?'}",
8b0ac9
+        )
8b0ac9
+        self.assertEqual(
8b0ac9
+            controller.details(),
8b0ac9
+            {
8b0ac9
+                'dctype': '',
8b0ac9
+                'cntrltype': '',
8b0ac9
+                'transport': 'tcp',
8b0ac9
+                'traddr': '10.10.10.10',
8b0ac9
+                'trsvcid': '8009',
8b0ac9
+                'host-traddr': '1.2.3.4',
8b0ac9
+                'host-iface': 'wlp0s20f3',
8b0ac9
+                'subsysnqn': 'nqn.1988-11.com.dell:SFSS:2:20220208134025e8',
8b0ac9
+                'device': 'nvme?',
8b0ac9
+                'connect attempts': '1',
8b0ac9
+                'retry connect timer': '60.0s [off]',
8b0ac9
+                'hostid': '',
8b0ac9
+                'hostnqn': '',
8b0ac9
+                'model': '',
8b0ac9
+                'serial': '',
8b0ac9
+            },
8b0ac9
+        )
8b0ac9
+        self.assertEqual(
8b0ac9
+            controller.info(),
8b0ac9
+            {
8b0ac9
+                'dctype': '',
8b0ac9
+                'cntrltype': '',
8b0ac9
+                'transport': 'tcp',
8b0ac9
+                'traddr': '10.10.10.10',
8b0ac9
+                'trsvcid': '8009',
8b0ac9
+                'host-traddr': '1.2.3.4',
8b0ac9
+                'host-iface': 'wlp0s20f3',
8b0ac9
+                'subsysnqn': 'nqn.1988-11.com.dell:SFSS:2:20220208134025e8',
8b0ac9
+                'device': 'nvme?',
8b0ac9
+                'connect attempts': '1',
8b0ac9
+                'retry connect timer': '60.0s [off]',
8b0ac9
+                'hostid': '',
8b0ac9
+                'hostnqn': '',
8b0ac9
+                'model': '',
8b0ac9
+                'serial': '',
8b0ac9
+                'connect operation': {'fail count': 0},
8b0ac9
+            },
8b0ac9
+        )
8b0ac9
+
8b0ac9
         # print(controller._connect_op)
8b0ac9
         self.assertEqual(controller.cancel(), None)
8b0ac9
         self.assertEqual(controller.kill(), None)
8b0ac9
         # self.assertEqual(controller.disconnect(), 0)
8b0ac9
 
8b0ac9
     def test_connect(self):
8b0ac9
-        controller = ctrl.Controller(root=self.root, host=self.host, tid=self.NVME_TID)
8b0ac9
+        controller = TestController(root=self.root, host=self.host, tid=self.NVME_TID)
8b0ac9
         self.assertEqual(controller._connect_attempts, 0)
8b0ac9
-        controller._find_existing_connection = lambda : None
8b0ac9
+        controller._find_existing_connection = lambda: None
8b0ac9
         with self.assertLogs(logger=logging.getLogger(), level='DEBUG') as captured:
8b0ac9
             controller._try_to_connect()
8b0ac9
         self.assertEqual(len(captured.records), 1)
8b0ac9
-        self.assertTrue(captured.records[0].getMessage().startswith("Controller._try_to_connect()       - (tcp, 10.10.10.10, 8009, nqn.1988-11.com.dell:SFSS:2:20220208134025e8, wlp0s20f3, 1.2.3.4) Connecting to nvme control with cfg={'hdr_digest': False, 'data_digest': False"))
8b0ac9
+        self.assertTrue(
8b0ac9
+            captured.records[0]
8b0ac9
+            .getMessage()
8b0ac9
+            .startswith(
8b0ac9
+                "Controller._try_to_connect()       - (tcp, 10.10.10.10, 8009, nqn.1988-11.com.dell:SFSS:2:20220208134025e8, wlp0s20f3, 1.2.3.4) Connecting to nvme control with cfg={'hdr_digest': False, 'data_digest': False"
8b0ac9
+            )
8b0ac9
+        )
8b0ac9
         self.assertEqual(controller._connect_attempts, 1)
8b0ac9
 
8b0ac9
 
8b0ac9
diff --git a/test/test-service.py b/test/test-service.py
8b0ac9
index 19f9b0c..4ce37be 100755
8b0ac9
--- a/test/test-service.py
8b0ac9
+++ b/test/test-service.py
8b0ac9
@@ -4,6 +4,7 @@ import unittest
8b0ac9
 from staslib import service
8b0ac9
 from pyfakefs.fake_filesystem_unittest import TestCase
8b0ac9
 
8b0ac9
+
8b0ac9
 class Args:
8b0ac9
     def __init__(self):
8b0ac9
         self.tron = True
8b0ac9
@@ -11,6 +12,20 @@ class Args:
8b0ac9
         self.conf_file = '/dev/null'
8b0ac9
 
8b0ac9
 
8b0ac9
+class TestService(service.Service):
8b0ac9
+    def _config_ctrls_finish(self, configured_ctrl_list):
8b0ac9
+        pass
8b0ac9
+
8b0ac9
+    def _dump_last_known_config(self, controllers):
8b0ac9
+        pass
8b0ac9
+
8b0ac9
+    def _keep_connections_on_exit(self):
8b0ac9
+        pass
8b0ac9
+
8b0ac9
+    def _load_last_known_config(self):
8b0ac9
+        return dict()
8b0ac9
+
8b0ac9
+
8b0ac9
 class Test(TestCase):
8b0ac9
     '''Unit tests for class Service'''
8b0ac9
 
8b0ac9
@@ -18,22 +33,39 @@ class Test(TestCase):
8b0ac9
         self.setUpPyfakefs()
8b0ac9
 
8b0ac9
         os.environ['RUNTIME_DIRECTORY'] = "/run"
8b0ac9
-        self.fs.create_file('/etc/nvme/hostnqn', contents='nqn.2014-08.org.nvmexpress:uuid:01234567-0123-0123-0123-0123456789ab\n')
8b0ac9
-        self.fs.create_file('/etc/nvme/hostid',  contents='01234567-89ab-cdef-0123-456789abcdef\n')
8b0ac9
-        self.fs.create_file('/dev/nvme-fabrics', contents='instance=-1,cntlid=-1,transport=%s,traddr=%s,trsvcid=%s,nqn=%s,queue_size=%d,nr_io_queues=%d,reconnect_delay=%d,ctrl_loss_tmo=%d,keep_alive_tmo=%d,hostnqn=%s,host_traddr=%s,host_iface=%s,hostid=%s,duplicate_connect,disable_sqflow,hdr_digest,data_digest,nr_write_queues=%d,nr_poll_queues=%d,tos=%d,fast_io_fail_tmo=%d,discovery,dhchap_secret=%s,dhchap_ctrl_secret=%s\n')
8b0ac9
+        self.fs.create_file(
8b0ac9
+            '/etc/nvme/hostnqn', contents='nqn.2014-08.org.nvmexpress:uuid:01234567-0123-0123-0123-0123456789ab\n'
8b0ac9
+        )
8b0ac9
+        self.fs.create_file('/etc/nvme/hostid', contents='01234567-89ab-cdef-0123-456789abcdef\n')
8b0ac9
+        self.fs.create_file(
8b0ac9
+            '/dev/nvme-fabrics',
8b0ac9
+            contents='instance=-1,cntlid=-1,transport=%s,traddr=%s,trsvcid=%s,nqn=%s,queue_size=%d,nr_io_queues=%d,reconnect_delay=%d,ctrl_loss_tmo=%d,keep_alive_tmo=%d,hostnqn=%s,host_traddr=%s,host_iface=%s,hostid=%s,duplicate_connect,disable_sqflow,hdr_digest,data_digest,nr_write_queues=%d,nr_poll_queues=%d,tos=%d,fast_io_fail_tmo=%d,discovery,dhchap_secret=%s,dhchap_ctrl_secret=%s\n',
8b0ac9
+        )
8b0ac9
+
8b0ac9
+    def test_cannot_instantiate_concrete_classes_if_abstract_method_are_not_implemented(self):
8b0ac9
+        # Make sure we can't instantiate the ABC directly (Abstract Base Class).
8b0ac9
+        class Service(service.Service):
8b0ac9
+            pass
8b0ac9
+
8b0ac9
+        self.assertRaises(TypeError, lambda: Service(Args(), reload_hdlr=lambda x: x))
8b0ac9
 
8b0ac9
     def test_get_controller(self):
8b0ac9
-        # FIXME: this is hack, fix it later
8b0ac9
-        service.Service._load_last_known_config = lambda x : dict()
8b0ac9
-        # start the test
8b0ac9
-
8b0ac9
-        srv = service.Service(Args(), reload_hdlr=lambda x : x)
8b0ac9
-        self.assertRaises(NotImplementedError, srv._keep_connections_on_exit)
8b0ac9
-        self.assertRaises(NotImplementedError, srv._dump_last_known_config, [])
8b0ac9
-        self.assertRaises(NotImplementedError, srv._on_config_ctrls)
8b0ac9
-        #self.assertEqual(srv.get_controllers(), dict())
8b0ac9
-        self.assertEqual(srv.get_controller(transport='tcp', traddr='10.10.10.10', trsvcid='8009', host_traddr='1.2.3.4', host_iface='wlp0s20f3', subsysnqn='nqn.1988-11.com.dell:SFSS:2:20220208134025e8'), None)
8b0ac9
-        self.assertEqual(srv.remove_controller(controller=None), None)
8b0ac9
+        srv = TestService(Args(), reload_hdlr=lambda x: x)
8b0ac9
+
8b0ac9
+        self.assertEqual(list(srv.get_controllers()), list())
8b0ac9
+        self.assertEqual(
8b0ac9
+            srv.get_controller(
8b0ac9
+                transport='tcp',
8b0ac9
+                traddr='10.10.10.10',
8b0ac9
+                trsvcid='8009',
8b0ac9
+                host_traddr='1.2.3.4',
8b0ac9
+                host_iface='wlp0s20f3',
8b0ac9
+                subsysnqn='nqn.1988-11.com.dell:SFSS:2:20220208134025e8',
8b0ac9
+            ),
8b0ac9
+            None,
8b0ac9
+        )
8b0ac9
+        self.assertEqual(srv.remove_controller(controller=None, success=True), None)
8b0ac9
+
8b0ac9
 
8b0ac9
 if __name__ == '__main__':
8b0ac9
     unittest.main()