diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..0d0b884
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1 @@
+SOURCES/nvme-stas-1.1.6.tar.gz
diff --git a/.nvme-stas.metadata b/.nvme-stas.metadata
new file mode 100644
index 0000000..487f8a9
--- /dev/null
+++ b/.nvme-stas.metadata
@@ -0,0 +1 @@
+829d844d8ee2f797fdbf557be1815310cd37a1e3 SOURCES/nvme-stas-1.1.6.tar.gz
diff --git a/SOURCES/0001-sync-with-1.1.6.patch b/SOURCES/0001-sync-with-1.1.6.patch
new file mode 100644
index 0000000..c28df7f
--- /dev/null
+++ b/SOURCES/0001-sync-with-1.1.6.patch
@@ -0,0 +1,3307 @@
+diff --git a/.github/workflows/pylint.yml b/.github/workflows/pylint.yml
+index 93604e8..4e1b6c5 100644
+--- a/.github/workflows/pylint.yml
++++ b/.github/workflows/pylint.yml
+@@ -1,9 +1,19 @@
+-name: Pylint
++name: Linters
+
+ on: [push]
+
+ jobs:
+- build:
++
++ docker-lint:
++ runs-on: ubuntu-latest
++ steps:
++ - uses: actions/checkout@v3
++ - uses: hadolint/hadolint-action@v2.1.0
++ with:
++ recursive: true
++ ignore: DL3041
++
++ python-lint:
+ runs-on: ubuntu-latest
+
+ strategy:
+diff --git a/Dockerfile b/Dockerfile
+index ad6742e..0ab5138 100644
+--- a/Dockerfile
++++ b/Dockerfile
+@@ -2,12 +2,12 @@ FROM fedora:36
+
+ WORKDIR /root
+
+-# for nvme-stas
+-RUN dnf install -y python3-dasbus python3-pyudev python3-systemd python3-gobject meson
+-# for libnvme
+-RUN dnf install -y git gcc g++ cmake openssl-devel libuuid-devel json-c-devel swig python-devel meson
++# first line for nvme-stas
++# second line for libnvme
++RUN dnf install -y python3-dasbus python3-pyudev python3-systemd python3-gobject meson \
++ git gcc g++ cmake openssl-devel libuuid-devel json-c-devel swig python-devel meson && dnf clean all
+
+ COPY . .
+-RUN meson .build && ninja -C .build && cd .build && meson install
++RUN meson .build && ninja -C .build && meson install -C .build
+
+ ENTRYPOINT ["python3"]
+diff --git a/NEWS.md b/NEWS.md
+index d1515cd..f56a7c9 100644
+--- a/NEWS.md
++++ b/NEWS.md
+@@ -5,6 +5,7 @@
+ - Fix issues with I/O controller connection audits
+ - Eliminate pcie devices from list of I/O controller connections to audit
+ - Add soaking timer to workaround race condition between kernel and user-space applications on "add" uevents. When the kernel adds a new nvme device (e.g. `/dev/nvme7`) and sends a "add" uevent to notify user-space applications, the attributes associated with that device (e.g. `/sys/class/nvme/nvme7/cntrltype`) may not be fully initialized which can lead `stacd` to dismiss a device that should get audited.
++- Make `sticky-connections=enabled` the default (see `stacd.conf`)
+
+ ## Changes with release 1.1.5
+
+@@ -32,7 +33,7 @@ stacd: Bug fix. Check that self._cfg_soak_tmr is not None before dereferencing i
+
+ ## Changes with release 1.1.1
+
+-Make `sticky-connections-disabled` by default
++Make `sticky-connections=disabled` the default (see `stacd.conf`)
+
+ ## Changes with release 1.1
+
+diff --git a/coverage.sh.in b/coverage.sh.in
+index 96b8c53..5ba2ebe 100755
+--- a/coverage.sh.in
++++ b/coverage.sh.in
+@@ -38,14 +38,24 @@ PRIMARY_GRP=$( id -ng )
+ PRIMARY_USR=$( id -nu )
+ PYTHON_PATH=.:./subprojects/libnvme
+
++log() {
++ msg="$1"
++ printf "%b[1;36m%s%b[0m\n" "\0033" "${msg}" "\0033"
++ sudo logger -i "@@@@@ COVERAGE -" -p 4 "${msg}"
++}
++
+ sd_stop() {
+- unit="$1"-cov.service
++ app="$1"
++ unit="${app}"-cov.service
++ log "Stop ${app}"
+ sudo systemctl stop "${unit}" >/dev/null 2>&1
+ sudo systemctl reset-failed "${unit}" >/dev/null 2>&1
+ }
+
+ sd_restart() {
+- unit="$1"-cov.service
++ app="$1"
++ unit="${app}"-cov.service
++ log "Restart ${app}"
+ sudo systemctl restart "${unit}" >/dev/null 2>&1
+ }
+
+@@ -61,7 +71,7 @@ sd_start() {
+ cmd="${app} --syslog -f ${conf}"
+ fi
+
+- printf "\n%b[1;36m%s%b[0m\n" "\0033" "Start ${app}" "\0033"
++ log "Start ${app}"
+
+ RUNTIME_DIRECTORY=/tmp/${app}
+ rm -rf ${RUNTIME_DIRECTORY}
+@@ -75,7 +85,7 @@ reload_cfg() {
+ app="$1"
+ unit="${app}"-cov.service
+ pid=$( systemctl show --property MainPID --value "${unit}" )
+- printf "%b[1;36m%s%b[0m\n" "\0033" "Reload config ${app}" "\0033"
++ log "Reload config ${app}"
+ sudo kill -HUP "${pid}"
+ }
+
+@@ -83,15 +93,24 @@ if [ ! -d coverage ]; then
+ mkdir coverage
+ fi
+
++
++log "START-START-START-START-START-START-START-START-START-START-START-START"
++
++
++
+ ################################################################################
+ # Load nvme kernel module
++log "modprobe nvme-tcp"
+ sudo /usr/sbin/modprobe nvme-tcp
+
++log "nvme disconnect-all"
+ sudo nvme disconnect-all
+
+ ################################################################################
+ # Create a dummy config file for @STAFD_PROCNAME@
+-stafd_conf_fname=$(mktemp /tmp/@STAFD_PROCNAME@.conf.XXXXXX)
++file=/tmp/@STAFD_PROCNAME@.conf.XXXXXX
++log "Create dummy config file $file"
++stafd_conf_fname=$(mktemp $file)
+ cat > "${stafd_conf_fname}" <<'EOF'
+ [Global]
+ tron=true
+@@ -102,7 +121,9 @@ EOF
+
+ ################################################################################
+ # Create a dummy config file for @STACD_PROCNAME@
+-stacd_conf_fname=$(mktemp /tmp/@STACD_PROCNAME@.conf.XXXXXX)
++file=/tmp/@STACD_PROCNAME@.conf.XXXXXX
++log "Create dummy config file $file"
++stacd_conf_fname=$(mktemp $file)
+ cat > "${stacd_conf_fname}" <<'EOF'
+ [Global]
+ tron=true
+@@ -111,6 +132,7 @@ udev-rule=disabled
+ sticky-connections=enabled
+ EOF
+
++log "Stop & Mask Avahi daemon"
+ sudo systemctl stop avahi-daemon.service
+ sudo systemctl stop avahi-daemon.socket
+ sudo systemctl mask avahi-daemon.service
+@@ -118,11 +140,11 @@ sudo systemctl mask avahi-daemon.socket
+ sleep 1
+
+
+-printf "%b[1;36m%s%b[0m\n" "\0033" "Invoking @STAFD_CTLNAME@ status while @STAFD_PROCNAME@ is not running" "\0033"
++log "Invoking @STAFD_CTLNAME@ status while @STAFD_PROCNAME@ is not running"
+ coverage run --rcfile=.coveragerc @STAFD_CTLNAME@ ls >/dev/null 2>&1
+ coverage run --rcfile=.coveragerc @STAFD_CTLNAME@ invalid-command >/dev/null 2>&1
+
+-printf "%b[1;36m%s%b[0m\n" "\0033" "Invoking @STACD_CTLNAME@ status while @STACD_PROCNAME@ is not running" "\0033"
++log "Invoking @STACD_CTLNAME@ status while @STACD_PROCNAME@ is not running"
+ coverage run --rcfile=.coveragerc @STACD_CTLNAME@ ls >/dev/null 2>&1
+ coverage run --rcfile=.coveragerc @STACD_CTLNAME@ invalid-command >/dev/null 2>&1
+
+@@ -132,30 +154,33 @@ sd_start "@STAFD_PROCNAME@" "@STAFD_DBUS_NAME@" "${stafd_conf_fname}"
+ sd_start "@STACD_PROCNAME@" "@STACD_DBUS_NAME@" "${stacd_conf_fname}"
+ sleep 3
+
+-printf "%b[1;36m%s%b[0m\n" "\0033" "Invoking @STAFD_CTLNAME@ status" "\0033"
++log "Invoking @STAFD_CTLNAME@ status"
+ coverage run --rcfile=.coveragerc @STAFD_CTLNAME@ status >/dev/null 2>&1
+
+ reload_cfg "@STAFD_PROCNAME@"
+ sleep 1
+
++log "Restart Avahi daemon"
+ sudo systemctl unmask avahi-daemon.socket
+ sudo systemctl unmask avahi-daemon.service
+ sudo systemctl start avahi-daemon.socket
+ sudo systemctl start avahi-daemon.service
+ sleep 2
+
++log "Change stafd config: tron=true, persistent-connections=false, zeroconf=enable"
+ cat > "${stafd_conf_fname}" <<'EOF'
+ [Global]
+ tron=true
+ persistent-connections=false
+
+ [Service Discovery]
+-zeroconf=disabled
++zeroconf=enabled
+ EOF
+ reload_cfg "@STAFD_PROCNAME@"
+
+ sleep 1
+
++log "Change stafd config: ip-family=ipv4, kato=10, adding multiple controllers"
+ cat > "${stafd_conf_fname}" <<'EOF'
+ [Global]
+ tron=true
+@@ -172,11 +197,15 @@ controller=transport=tcp;traddr=abracadabra
+ controller=
+ controller=trsvcid
+ controller=transport=rdma;traddr=!@#$
++controller=transport=fc;traddr=21:00:00:00:00:00:00:00;host-traddr=20:00:00:00:00:00:00:00
++controller=transport=XM;traddr=2.2.2.2
+ blacklist=transport=tcp;traddr=1.1.1.1
+ blacklist=transport=tcp;traddr=1000.1000.1000.1000
+ EOF
+ reload_cfg "@STAFD_PROCNAME@"
+
++
++log "Change stacd config: tron=true, udev-rule=disabled, sticky-connections=disabled"
+ cat > "${stacd_conf_fname}" <<'EOF'
+ [Global]
+ tron=true
+@@ -186,12 +215,12 @@ EOF
+ reload_cfg "@STACD_PROCNAME@"
+ sleep 3
+
+-printf "%b[1;36m%s%b[0m\n" "\0033" "Invoking @STAFD_CTLNAME@ status" "\0033"
++log "Invoking @STAFD_CTLNAME@ status"
+ coverage run --rcfile=.coveragerc @STAFD_CTLNAME@ status >/dev/null 2>&1
+
+ ################################################################################
+ # Fake mDNS packets from a CDC
+-printf "\n%b[1;36m%s%b[0m\n" "\0033" "Start Avahi publisher" "\0033"
++log "Start Avahi publisher"
+ AVAHI_PUBLISHER=mdns_publisher.service
+ sudo systemctl stop ${AVAHI_PUBLISHER} >/dev/null 2>&1
+ sudo systemctl reset-failed ${AVAHI_PUBLISHER} >/dev/null 2>&1
+@@ -200,7 +229,7 @@ sleep 1
+
+ ################################################################################
+ # Start nvme target simulator
+-printf "\n%b[1;36m%s%b[0m\n" "\0033" "Start nvmet" "\0033"
++log "Start nvmet"
+ sudo ../utils/nvmet/nvmet.py clean
+ sudo ../utils/nvmet/nvmet.py create -f ../utils/nvmet/nvmet.conf
+ sleep 2
+@@ -210,76 +239,76 @@ reload_cfg "@STACD_PROCNAME@"
+ sleep 3
+
+ ################################################################################
+-printf "\n%b[1;36m%s%b[0m\n" "\0033" "Invoking @STAFD_PROCNAME@ --version" "\0033"
++log "Invoking @STAFD_PROCNAME@ --version"
+ coverage run --rcfile=.coveragerc @STAFD_PROCNAME@ --version
+-printf "%b[1;36m%s%b[0m\n" "\0033" "Invoking @STAFD_PROCNAME@ --idl" "\0033"
++log "Invoking @STAFD_PROCNAME@ --idl"
+ coverage run --rcfile=.coveragerc @STAFD_PROCNAME@ --idl /tmp/@STAFD_PROCNAME@.idl
+
+-printf "\n%b[1;36m%s%b[0m\n" "\0033" "Invoking @STACD_PROCNAME@ --version" "\0033"
++log "Invoking @STACD_PROCNAME@ --version"
+ coverage run --rcfile=.coveragerc @STACD_PROCNAME@ --version
+-printf "%b[1;36m%s%b[0m\n" "\0033" "Invoking @STACD_PROCNAME@ --idl" "\0033"
++log "Invoking @STACD_PROCNAME@ --idl"
+ coverage run --rcfile=.coveragerc @STACD_PROCNAME@ --idl /tmp/@STACD_PROCNAME@.idl
+
+ ################################################################################
+ # Stimulate D-Bus activity
+-printf "\n%b[1;36m%s%b[0m\n" "\0033" "Invoking @STAFD_CTLNAME@ --version" "\0033"
++log "Invoking @STAFD_CTLNAME@ --version"
+ sudo coverage run --rcfile=.coveragerc @STAFD_CTLNAME@ --version
+-printf "%b[1;36m%s%b[0m\n" "\0033" "Invoking @STAFD_CTLNAME@ with a bad command" "\0033"
++log "Invoking @STAFD_CTLNAME@ with a bad command"
+ sudo coverage run --rcfile=.coveragerc @STAFD_CTLNAME@ blah
+-printf "%b[1;36m%s%b[0m\n" "\0033" "Invoking @STAFD_CTLNAME@ troff" "\0033"
++log "Invoking @STAFD_CTLNAME@ troff"
+ sudo coverage run --rcfile=.coveragerc @STAFD_CTLNAME@ troff
+-printf "%b[1;36m%s%b[0m\n" "\0033" "Invoking @STAFD_CTLNAME@ status" "\0033"
++log "Invoking @STAFD_CTLNAME@ status"
+ coverage run --rcfile=.coveragerc @STAFD_CTLNAME@ status >/dev/null 2>&1
+-printf "%b[1;36m%s%b[0m\n" "\0033" "Invoking @STAFD_CTLNAME@ tron" "\0033"
++log "Invoking @STAFD_CTLNAME@ tron"
+ sudo coverage run --rcfile=.coveragerc @STAFD_CTLNAME@ tron
+-printf "%b[1;36m%s%b[0m\n" "\0033" "Invoking @STAFD_CTLNAME@ ls" "\0033"
++log "Invoking @STAFD_CTLNAME@ ls"
+ coverage run --rcfile=.coveragerc @STAFD_CTLNAME@ ls -d >/dev/null 2>&1
+-printf "%b[1;36m%s%b[0m\n" "\0033" "Invoking @STAFD_CTLNAME@ adlp" "\0033"
++log "Invoking @STAFD_CTLNAME@ adlp"
+ coverage run --rcfile=.coveragerc @STAFD_CTLNAME@ adlp -d >/dev/null 2>&1
+-printf "%b[1;36m%s%b[0m\n" "\0033" "Invoking @STAFD_CTLNAME@ dlp" "\0033"
++log "Invoking @STAFD_CTLNAME@ dlp"
+ coverage run --rcfile=.coveragerc @STAFD_CTLNAME@ dlp -t tcp -a ::1 -s 8009 >/dev/null 2>&1
+
+-printf "\n%b[1;36m%s%b[0m\n" "\0033" "Invoking @STACD_CTLNAME@ --version" "\0033"
++log "Invoking @STACD_CTLNAME@ --version"
+ sudo coverage run --rcfile=.coveragerc @STACD_CTLNAME@ --version
+-printf "%b[1;36m%s%b[0m\n" "\0033" "Invoking @STACD_CTLNAME@ with a bad command" "\0033"
++log "Invoking @STACD_CTLNAME@ with a bad command"
+ sudo coverage run --rcfile=.coveragerc @STACD_CTLNAME@ blah
+-printf "%b[1;36m%s%b[0m\n" "\0033" "Invoking @STACD_CTLNAME@ troff" "\0033"
++log "Invoking @STACD_CTLNAME@ troff"
+ sudo coverage run --rcfile=.coveragerc @STACD_CTLNAME@ troff
+-printf "%b[1;36m%s%b[0m\n" "\0033" "Invoking @STACD_CTLNAME@ status" "\0033"
++log "Invoking @STACD_CTLNAME@ status"
+ coverage run --rcfile=.coveragerc @STACD_CTLNAME@ status >/dev/null 2>&1
+-printf "%b[1;36m%s%b[0m\n" "\0033" "Invoking @STACD_CTLNAME@ tron" "\0033"
++log "Invoking @STACD_CTLNAME@ tron"
+ sudo coverage run --rcfile=.coveragerc @STACD_CTLNAME@ tron
+-printf "%b[1;36m%s%b[0m\n" "\0033" "Invoking @STACD_CTLNAME@ ls" "\0033"
++log "Invoking @STACD_CTLNAME@ ls"
+ coverage run --rcfile=.coveragerc @STACD_CTLNAME@ ls -d >/dev/null 2>&1
+
+ ################################################################################
+ # Stimulate AENs activity by removing/restoring namespaces
+-printf "\n%b[1;36m%s%b[0m\n" "\0033" "Remove namespace: klingons" "\0033"
++log "Remove namespace: klingons"
+ sudo ../utils/nvmet/nvmet.py unlink -p 1 -s klingons
+ sleep 2
+-printf "\n%b[1;36m%s%b[0m\n" "\0033" "Invoking @STACD_CTLNAME@ ls" "\0033"
++log "Invoking @STACD_CTLNAME@ ls"
+ coverage run --rcfile=.coveragerc @STACD_CTLNAME@ ls -d >/dev/null 2>&1
+
+-printf "\n%b[1;36m%s%b[0m\n" "\0033" "Restore namespace: klingons" "\0033"
++log "Restore namespace: klingons"
+ sudo ../utils/nvmet/nvmet.py link -p 1 -s klingons
+ sleep 2
+-printf "\n%b[1;36m%s%b[0m\n" "\0033" "Invoking @STACD_CTLNAME@ ls" "\0033"
++log "Invoking @STACD_CTLNAME@ ls"
+ coverage run --rcfile=.coveragerc @STACD_CTLNAME@ ls -d >/dev/null 2>&1
+
+ ################################################################################
+ # Stop Avahi Publisher
+-printf "\n%b[1;36m%s%b[0m\n" "\0033" "Stop Avahi publisher" "\0033"
++log "Stop Avahi publisher"
+ sudo systemctl stop ${AVAHI_PUBLISHER}
+ sleep 1
+
+ ################################################################################
+-printf "\n%b[1;36m%s%b[0m\n" "\0033" "Restart Avahi publisher" "\0033"
++log "Restart Avahi publisher"
+ sudo systemd-run --unit=${AVAHI_PUBLISHER} --working-directory=. avahi-publish -s SFSS _nvme-disc._tcp 8009 "p=tcp"
+ sleep 2
+
+ ################################################################################
+ # Make config changes for @STAFD_PROCNAME@
+-printf "\n%b[1;36m%s%b[0m\n" "\0033" "Empty configuration and disable zeroconf for @STAFD_PROCNAME@" "\0033"
++log "Empty configuration and disable zeroconf for @STAFD_PROCNAME@"
+ cat > "${stafd_conf_fname}" <<'EOF'
+ [Global]
+ tron=true
+@@ -293,7 +322,7 @@ sleep 1
+
+ ################################################################################
+ # Make more config changes for @STAFD_PROCNAME@
+-printf "\n%b[1;36m%s%b[0m\n" "\0033" "Add single controller (::1) and re-enable zeroconf for @STAFD_PROCNAME@" "\0033"
++log "Add single controller (::1) and re-enable zeroconf for @STAFD_PROCNAME@"
+ cat > "${stafd_conf_fname}" <<'EOF'
+ [Global]
+ tron=true
+@@ -307,24 +336,23 @@ sleep 2
+
+ ################################################################################
+ # Stop Avahi Publisher
+-printf "\n%b[1;36m%s%b[0m\n" "\0033" "Stop Avahi publisher" "\0033"
++log "Stop Avahi publisher"
+ sudo systemctl stop ${AVAHI_PUBLISHER}
+ sleep 2
+
+ ################################################################################
+ # Remove one of the NVMe device's
+-printf "\n%b[1;36m%s%b[0m\n" "\0033" "Remove (disconnect) nvme1" "\0033"
++log "Remove (disconnect) nvme1"
+ sudo nvme disconnect -d nvme1
+ sleep 2
+
+
+ ################################################################################
+-printf "%b[1;36m%s%b[0m\n" "\0033" "Restart @STAFD_PROCNAME@ and @STACD_PROCNAME@" "\0033"
+ sd_restart "@STAFD_PROCNAME@"
+ sd_restart "@STACD_PROCNAME@"
+ sleep 1
+
+-printf "%b[1;36m%s%b[0m\n" "\0033" "Create invalid conditions for saving/loading @STAFD_PROCNAME@'s last known config" "\0033"
++log "Create invalid conditions for saving/loading @STAFD_PROCNAME@'s last known config"
+ rm -rf "/tmp/@STAFD_PROCNAME@"
+ sd_stop "@STAFD_PROCNAME@"
+ sd_restart "@STACD_PROCNAME@"
+@@ -334,7 +362,7 @@ sleep 2
+
+ ################################################################################
+ # Stop everything and collect coverage stats
+-printf "\n%b[1;36m%s%b[0m\n" "\0033" "Stop @STAFD_PROCNAME@ and @STACD_PROCNAME@" "\0033"
++log "Stop @STAFD_PROCNAME@ and @STACD_PROCNAME@"
+ sd_stop "@STAFD_PROCNAME@"
+ sd_stop "@STACD_PROCNAME@"
+ sleep 1
+@@ -345,33 +373,49 @@ sudo chown -R "${PRIMARY_USR}":"${PRIMARY_GRP}" coverage >/dev/null 2>&1
+ sudo chown -R "${PRIMARY_USR}":"${PRIMARY_GRP}" staslib/__pycache__ >/dev/null 2>&1
+ sudo chown -R "${PRIMARY_USR}":"${PRIMARY_GRP}" subprojects/libnvme/libnvme/__pycache__ >/dev/null 2>&1
+
++log "nvme disconnect-all"
+ sudo nvme disconnect-all
+
++log "Remove ${stafd_conf_fname} and ${stacd_conf_fname}"
+ rm "${stafd_conf_fname}"
+ rm "${stacd_conf_fname}"
+
++log "Run unit test: test-udev"
+ PYTHONPATH=${PYTHON_PATH} coverage run --rcfile=.coveragerc ../test/test-udev.py
++log "Run unit test: test-avahi"
+ PYTHONPATH=${PYTHON_PATH} coverage run --rcfile=.coveragerc ../test/test-avahi.py
++log "Run unit test: test-gtimer"
+ PYTHONPATH=${PYTHON_PATH} coverage run --rcfile=.coveragerc ../test/test-gtimer.py
++log "Run unit test: test-version"
+ PYTHONPATH=${PYTHON_PATH} coverage run --rcfile=.coveragerc ../test/test-version.py
++log "Run unit test: test-transport_id"
+ PYTHONPATH=${PYTHON_PATH} coverage run --rcfile=.coveragerc ../test/test-transport_id.py
++log "Run unit test: test-config"
+ PYTHONPATH=${PYTHON_PATH} coverage run --rcfile=.coveragerc ../test/test-config.py
++log "Run unit test: test-controller"
+ PYTHONPATH=${PYTHON_PATH} coverage run --rcfile=.coveragerc ../test/test-controller.py
++log "Run unit test: test-service"
+ PYTHONPATH=${PYTHON_PATH} coverage run --rcfile=.coveragerc ../test/test-service.py
++log "Run unit test: test-log"
+ PYTHONPATH=${PYTHON_PATH} coverage run --rcfile=.coveragerc ../test/test-log.py
++log "Run unit test: test-nvme_options"
+ sudo PYTHONPATH=${PYTHON_PATH} coverage run --rcfile=.coveragerc ../test/test-nvme_options.py
+
+ ################################################################################
+ # Stop nvme target simulator
+-printf "\n%b[1;36m%s%b[0m\n" "\0033" "Stop nvmet" "\0033"
++log "Stop nvmet"
+ sudo ../utils/nvmet/nvmet.py clean
+
+-printf "\n%b[1;36m%s%b[0m\n" "\0033" "Collect all coverage data" "\0033"
++log "Collect all coverage data"
+ coverage combine --rcfile=.coveragerc
+
+-printf "\n%b[1;36m%s%b[0m\n" "\0033" "Generating coverage report" "\0033"
++log "Generating coverage report"
+ coverage report -i --rcfile=.coveragerc
+
+-printf "\n%b[1;36m%s%b[0m\n" "\0033" "Generating coverage report (HTML)" "\0033"
++log "Generating coverage report (HTML)"
+ coverage html -i --rcfile=.coveragerc
+
++
++log "All done!!!"
++
++log "FINISHED-FINISHED-FINISHED-FINISHED-FINISHED-FINISHED-FINISHED-FINISHED"
+diff --git a/doc/man/stacd.conf.xml b/doc/man/stacd.conf.xml
+index 60622f6..65ee71a 100644
+--- a/doc/man/stacd.conf.xml
++++ b/doc/man/stacd.conf.xml
+@@ -378,7 +378,7 @@
+ entries in stacd.conf have been removed.
+
+
+- With sticky-connections=disabled
(default)
++ With sticky-connections=disabled
+
+ stacd
immediately disconnects from
+ a previously connected IOC if the response to a
+@@ -411,7 +411,7 @@
+
+
+
+- With sticky-connections=enabled
++ With sticky-connections=enabled (default)
+
+ stacd
does not disconnect from IOCs
+ when a DPLE is removed or a controller=
+diff --git a/etc/stas/stacd.conf b/etc/stas/stacd.conf
+index 02e7b3e..0434671 100644
+--- a/etc/stas/stacd.conf
++++ b/etc/stas/stacd.conf
+@@ -202,8 +202,8 @@
+ #
+ # Type: String
+ # Range: [disabled, enabled]
+-# Default: disabled
+-#sticky-connections=disabled
++# Default: enabled
++#sticky-connections=enabled
+
+ [Controllers]
+ # controller: I/O Controllers (IOC) are specified with this keyword.
+diff --git a/stacd.py b/stacd.py
+index 708e372..28cefac 100755
+--- a/stacd.py
++++ b/stacd.py
+@@ -10,14 +10,12 @@
+ ''' STorage Appliance Connector Daemon
+ '''
+ import sys
+-import logging
+ from argparse import ArgumentParser
+ from staslib import defs
+
+-# pylint: disable=consider-using-f-string
+-DBUS_IDL = '''
++DBUS_IDL = f'''
+
+-
++
+
+
+
+@@ -34,19 +32,16 @@ DBUS_IDL = '''
+
+
+
+-
++
+
+
+-
++
+
+
+
+-''' % (
+- defs.STACD_DBUS_NAME,
+- defs.STACD_DBUS_NAME,
+-)
+-
++'''
+
++# ******************************************************************************
+ def parse_args(conf_file: str): # pylint: disable=missing-function-docstring
+ parser = ArgumentParser(
+ description=f'{defs.STAC_DESCRIPTION} ({defs.STAC_ACRONYM}). Must be root to run this program.'
+@@ -77,6 +72,12 @@ ARGS = parse_args(defs.STACD_CONFIG_FILE)
+
+ if ARGS.version:
+ print(f'{defs.PROJECT_NAME} {defs.VERSION}')
++ try:
++ import libnvme
++
++ print(f'libnvme {libnvme.__version__}')
++ except (AttributeError, ModuleNotFoundError):
++ pass
+ sys.exit(0)
+
+ if ARGS.idl:
+@@ -85,78 +86,14 @@ if ARGS.idl:
+ sys.exit(0)
+
+
+-# There is a reason for having this import here and not at the top of the file.
+-# We want to allow running stafd with the --version and --idl options and exit
+-# without having to import stas.
+-from staslib import stas # pylint: disable=wrong-import-position
+-
+-# Before going any further, make sure the script is allowed to run.
+-stas.check_if_allowed_to_continue()
+-
+-
+-################################################################################
+-# Preliminary checks have passed. Let her rip!
+-# pylint: disable=wrong-import-position
+-# pylint: disable=wrong-import-order
+-import json
+-import pathlib
+-import systemd.daemon
+-import dasbus.error
+-import dasbus.client.observer
+-import dasbus.client.proxy
+-from gi.repository import GLib
+-from staslib import conf, log, gutil, trid, udev, ctrl, service # pylint: disable=ungrouped-imports
+-
+-log.init(ARGS.syslog)
+-
+-UDEV_RULE_SUPPRESS = pathlib.Path('/run/udev/rules.d', '70-nvmf-autoconnect.rules')
+-
+-
+-def udev_rule_ctrl(enable):
+- '''@brief We add an empty udev rule to /run/udev/rules.d to suppress
+- nvme-cli's udev rule that is used to tell udevd to automatically
+- connect to I/O controller. This is to avoid race conditions between
+- stacd and udevd. This is configurable. See "udev-rule" in stacd.conf
+- for details.
+- '''
+- if enable:
+- try:
+- UDEV_RULE_SUPPRESS.unlink()
+- except FileNotFoundError:
+- pass
+- else:
+- if not UDEV_RULE_SUPPRESS.exists():
+- pathlib.Path('/run/udev/rules.d').mkdir(parents=True, exist_ok=True)
+- UDEV_RULE_SUPPRESS.symlink_to('/dev/null')
+-
+-
+ # ******************************************************************************
+-class Ioc(ctrl.Controller):
+- '''@brief This object establishes a connection to one I/O Controller.'''
+-
+- def __init__(self, root, host, tid: trid.TID):
+- super().__init__(root, host, tid)
+-
+- def _on_udev_remove(self, udev_obj):
+- '''Called when the associated nvme device (/dev/nvmeX) is removed
+- from the system.
+- '''
+- super()._on_udev_remove(udev_obj)
+-
+- # Defer removal of this object to the next main loop's idle period.
+- GLib.idle_add(STAC.remove_controller, self)
+-
+- def _find_existing_connection(self):
+- return self._udev.find_nvme_ioc_device(self.tid)
+-
+-
+-# ******************************************************************************
+-class Stac(service.Service):
+- '''STorage Appliance Connector (STAC)'''
++if __name__ == '__main__':
++ import json
++ import logging
++ from staslib import log, service, stas, udev # pylint: disable=ungrouped-imports
+
+- CONF_STABILITY_SOAK_TIME_SEC = 1.5
+- CONF_STABILITY_LONG_SOAK_TIME_SEC = 10 # pylint: disable=invalid-name
+- ADD_EVENT_SOAK_TIME_SEC = 1
++ # Before going any further, make sure the script is allowed to run.
++ stas.check_if_allowed_to_continue()
+
+ class Dbus:
+ '''This is the DBus interface that external programs can use to
+@@ -205,229 +142,8 @@ class Stac(service.Service):
+ for controller in STAC.get_controllers()
+ ]
+
+- # ==========================================================================
+- def __init__(self, args):
+- super().__init__(args, self._reload_hdlr)
+-
+- # We don't want to apply configuration changes to nvme-cli right away.
+- # Often, multiple changes will occur in a short amount of time (sub-second).
+- # We want to wait until there are no more changes before applying them
+- # to the system. The following timer acts as a "soak period". Changes
+- # will be applied by calling self._on_config_ctrls() at the end of
+- # the soak period.
+- self._cfg_soak_tmr = gutil.GTimer(Stac.CONF_STABILITY_SOAK_TIME_SEC, self._on_config_ctrls)
+- self._cfg_soak_tmr.start()
+-
+- self._add_event_soak_tmr = gutil.GTimer(Stac.ADD_EVENT_SOAK_TIME_SEC, self._on_add_event_soaked)
+-
+- self._config_connections_audit()
+-
+- # Create the D-Bus instance.
+- self._config_dbus(Stac.Dbus(), defs.STACD_DBUS_NAME, defs.STACD_DBUS_PATH)
+-
+- # Connect to STAF D-Bus interface
+- self._staf = None
+- self._staf_watcher = dasbus.client.observer.DBusObserver(self._sysbus, defs.STAFD_DBUS_NAME)
+- self._staf_watcher.service_available.connect(self._connect_to_staf)
+- self._staf_watcher.service_unavailable.connect(self._disconnect_from_staf)
+- self._staf_watcher.connect_once_available()
+-
+- # Suppress udev rule to auto-connect when AEN is received.
+- udev_rule_ctrl(conf.SvcConf().udev_rule_enabled)
+-
+- def _release_resources(self):
+- logging.debug('Stac._release_resources()')
+-
+- if self._add_event_soak_tmr:
+- self._add_event_soak_tmr.kill()
+-
+- udev_rule_ctrl(True)
+-
+- if self._udev:
+- self._udev.unregister_for_action_events('add')
+-
+- self._destroy_staf_comlink(self._staf_watcher)
+- if self._staf_watcher is not None:
+- self._staf_watcher.disconnect()
+-
+- super()._release_resources()
+-
+- self._staf = None
+- self._staf_watcher = None
+- self._add_event_soak_tmr = None
+-
+- def _audit_connections(self, tids):
+- '''A host should only connect to I/O controllers that have been zoned
+- for that host or a manual "controller" entry exists in stcd.conf.
+- A host should disconnect from an I/O controller when that I/O controller
+- is removed from the zone or a manual "controller" entry is removed from
+- stacd.conf. stacd will audit connections if "sticky-connections=disabled".
+- stacd will delete any connection that is not supposed to exist.
+- '''
+- logging.debug('Stac._audit_connections() - tids = %s', tids)
+- num_controllers = len(self._controllers)
+- for tid in tids:
+- if tid not in self._controllers:
+- self._controllers[tid] = Ioc(self._root, self._host, tid)
+-
+- if num_controllers != len(self._controllers):
+- self._cfg_soak_tmr.start(Stac.CONF_STABILITY_SOAK_TIME_SEC)
+-
+- def _on_add_event(self, udev_obj): # pylint: disable=unused-argument
+- '''@brief This function is called when a "add" event is received from
+- the kernel for an NVMe device. This is used to trigger an audit and make
+- sure that the connection to an I/O controller is allowed.
+-
+- WARNING: There is a race condition with the "add" event from the kernel.
+- The kernel sends the "add" event a bit early and the sysfs attributes
+- associated with the nvme object are not always fully initialized.
+- To workaround this problem we use a soaking timer to give time for the
+- sysfs attributes to stabilize.
+- '''
+- self._add_event_soak_tmr.start()
+-
+- def _on_add_event_soaked(self):
+- '''@brief After the add event has been soaking for ADD_EVENT_SOAK_TIME_SEC
+- seconds, we can audit the connections.
+- '''
+- if not conf.SvcConf().sticky_connections:
+- self._audit_connections(self._udev.get_nvme_ioc_tids())
+- return GLib.SOURCE_REMOVE
+-
+- def _config_connections_audit(self):
+- '''This function checks the "sticky_connections" parameter to determine
+- whether audits should be performed. Audits are enabled when
+- "sticky_connections" is disabled.
+- '''
+- if not conf.SvcConf().sticky_connections:
+- if self._udev.get_registered_action_cback('add') is None:
+- self._udev.register_for_action_events('add', self._on_add_event)
+- self._audit_connections(self._udev.get_nvme_ioc_tids())
+- else:
+- self._udev.unregister_for_action_events('add')
+-
+- def _keep_connections_on_exit(self):
+- '''@brief Determine whether connections should remain when the
+- process exits.
+- '''
+- return True
+-
+- def _reload_hdlr(self):
+- '''@brief Reload configuration file. This is triggered by the SIGHUP
+- signal, which can be sent with "systemctl reload stacd".
+- '''
+- systemd.daemon.notify('RELOADING=1')
+- service_cnf = conf.SvcConf()
+- service_cnf.reload()
+- self.tron = service_cnf.tron
+- self._config_connections_audit()
+- self._cfg_soak_tmr.start(Stac.CONF_STABILITY_SOAK_TIME_SEC)
+- udev_rule_ctrl(service_cnf.udev_rule_enabled)
+- systemd.daemon.notify('READY=1')
+- return GLib.SOURCE_CONTINUE
+-
+- def _get_log_pages_from_stafd(self):
+- if self._staf:
+- try:
+- return json.loads(self._staf.get_all_log_pages(True))
+- except dasbus.error.DBusError:
+- pass
+-
+- return list()
+-
+- def _config_ctrls_finish(self, configured_ctrl_list):
+- configured_ctrl_list = [
+- ctrl_dict for ctrl_dict in configured_ctrl_list if 'traddr' in ctrl_dict and 'subsysnqn' in ctrl_dict
+- ]
+- logging.debug('Stac._config_ctrls_finish() - configured_ctrl_list = %s', configured_ctrl_list)
+-
+- discovered_ctrl_list = list()
+- for staf_data in self._get_log_pages_from_stafd():
+- host_traddr = staf_data['discovery-controller']['host-traddr']
+- host_iface = staf_data['discovery-controller']['host-iface']
+- for dlpe in staf_data['log-pages']:
+- if dlpe.get('subtype') == 'nvme': # eliminate discovery controllers
+- discovered_ctrl_list.append(stas.cid_from_dlpe(dlpe, host_traddr, host_iface))
+-
+- logging.debug('Stac._config_ctrls_finish() - discovered_ctrl_list = %s', discovered_ctrl_list)
+-
+- controllers = stas.remove_blacklisted(configured_ctrl_list + discovered_ctrl_list)
+- controllers = stas.remove_invalid_addresses(controllers)
+-
+- new_controller_ids = {trid.TID(controller) for controller in controllers}
+- cur_controller_ids = set(self._controllers.keys())
+- controllers_to_add = new_controller_ids - cur_controller_ids
+- controllers_to_del = cur_controller_ids - new_controller_ids
+-
+- logging.debug('Stac._config_ctrls_finish() - controllers_to_add = %s', list(controllers_to_add))
+- logging.debug('Stac._config_ctrls_finish() - controllers_to_del = %s', list(controllers_to_del))
+-
+- for tid in controllers_to_del:
+- controller = self._controllers.pop(tid, None)
+- if controller is not None:
+- controller.disconnect(self.remove_controller, conf.SvcConf().sticky_connections)
+-
+- for tid in controllers_to_add:
+- self._controllers[tid] = Ioc(self._root, self._host, tid)
+-
+- def _connect_to_staf(self, _):
+- '''@brief Hook up DBus signal handlers for signals from stafd.'''
+- try:
+- self._staf = self._sysbus.get_proxy(defs.STAFD_DBUS_NAME, defs.STAFD_DBUS_PATH)
+- self._staf.log_pages_changed.connect(self._log_pages_changed)
+- self._cfg_soak_tmr.start()
+-
+- # Make sure timer is set back to its normal value.
+- self._cfg_soak_tmr.set_timeout(Stac.CONF_STABILITY_SOAK_TIME_SEC)
+- logging.debug('Stac._connect_to_staf() - Connected to staf')
+- except dasbus.error.DBusError:
+- logging.error('Failed to connect to staf')
+-
+- def _destroy_staf_comlink(self, watcher): # pylint: disable=unused-argument
+- if self._staf:
+- self._staf.log_pages_changed.disconnect(self._log_pages_changed)
+- dasbus.client.proxy.disconnect_proxy(self._staf)
+- self._staf = None
+-
+- def _disconnect_from_staf(self, watcher):
+- self._destroy_staf_comlink(watcher)
+-
+- # When we lose connectivity with stafd, the most logical explanation
+- # is that stafd restarted. In that case, it may take some time for stafd
+- # to re-populate its log pages cache. So let's give stafd plenty of time
+- # to update its log pages cache and send log pages change notifications
+- # before triggering a stacd re-config. We do this by momentarily
+- # increasing the config soak timer to a longer period.
+- if self._cfg_soak_tmr:
+- self._cfg_soak_tmr.set_timeout(Stac.CONF_STABILITY_LONG_SOAK_TIME_SEC)
+-
+- logging.debug('Stac._disconnect_from_staf() - Disconnected from staf')
+-
+- def _log_pages_changed( # pylint: disable=too-many-arguments
+- self, transport, traddr, trsvcid, host_traddr, host_iface, subsysnqn, device
+- ):
+- logging.debug(
+- 'Stac._log_pages_changed() - transport=%s, traddr=%s, trsvcid=%s, host_traddr=%s, host_iface=%s, subsysnqn=%s, device=%s',
+- transport,
+- traddr,
+- trsvcid,
+- host_traddr,
+- host_iface,
+- subsysnqn,
+- device,
+- )
+- self._cfg_soak_tmr.start(Stac.CONF_STABILITY_SOAK_TIME_SEC)
+-
+- def _load_last_known_config(self):
+- return dict()
+-
+- def _dump_last_known_config(self, controllers):
+- pass
+-
+-
+-# ******************************************************************************
+-if __name__ == '__main__':
+- STAC = Stac(ARGS)
++ log.init(ARGS.syslog)
++ STAC = service.Stac(ARGS, Dbus())
+ STAC.run()
+
+ STAC = None
+diff --git a/stafd.py b/stafd.py
+index aff64fd..8a77c51 100755
+--- a/stafd.py
++++ b/stafd.py
+@@ -10,14 +10,12 @@
+ ''' STorage Appliance Finder Daemon
+ '''
+ import sys
+-import logging
+ from argparse import ArgumentParser
+ from staslib import defs
+
+-# pylint: disable=consider-using-f-string
+-DBUS_IDL = '''
++DBUS_IDL = f'''
+
+-
++
+
+
+
+@@ -34,10 +32,10 @@ DBUS_IDL = '''
+
+
+
+-
++
+
+
+-
++
+
+
+
+@@ -46,7 +44,7 @@ DBUS_IDL = '''
+
+
+
+-
++
+
+
+
+@@ -63,12 +61,10 @@ DBUS_IDL = '''
+
+
+
+-''' % (
+- defs.STAFD_DBUS_NAME,
+- defs.STAFD_DBUS_NAME,
+-)
++'''
+
+
++# ******************************************************************************
+ def parse_args(conf_file: str): # pylint: disable=missing-function-docstring
+ parser = ArgumentParser(
+ description=f'{defs.STAF_DESCRIPTION} ({defs.STAF_ACRONYM}). Must be root to run this program.'
+@@ -99,6 +95,12 @@ ARGS = parse_args(defs.STAFD_CONFIG_FILE)
+
+ if ARGS.version:
+ print(f'{defs.PROJECT_NAME} {defs.VERSION}')
++ try:
++ import libnvme
++
++ print(f'libnvme {libnvme.__version__}')
++ except (AttributeError, ModuleNotFoundError):
++ pass
+ sys.exit(0)
+
+ if ARGS.idl:
+@@ -107,250 +109,15 @@ if ARGS.idl:
+ sys.exit(0)
+
+
+-# There is a reason for having this import here and not at the top of the file.
+-# We want to allow running stafd with the --version and --idl options and exit
+-# without having to import stas and avahi.
+-from staslib import stas, avahi # pylint: disable=wrong-import-position
+-
+-# Before going any further, make sure the script is allowed to run.
+-stas.check_if_allowed_to_continue()
+-
+-
+-################################################################################
+-# Preliminary checks have passed. Let her rip!
+-# pylint: disable=wrong-import-position
+-# pylint: disable=wrong-import-order
+-import json
+-import pickle
+-import dasbus.server.interface
+-import systemd.daemon
+-from libnvme import nvme
+-from gi.repository import GLib
+-from staslib import conf, log, gutil, trid, udev, ctrl, service # pylint: disable=ungrouped-imports
+-
+-log.init(ARGS.syslog)
+-
+-DLP_CHANGED = (
+- (nvme.NVME_LOG_LID_DISCOVER << 16) | (nvme.NVME_AER_NOTICE_DISC_CHANGED << 8) | nvme.NVME_AER_NOTICE
+-) # 0x70f002
+-
+-
+ # ******************************************************************************
+-class Dc(ctrl.Controller):
+- '''@brief This object establishes a connection to one Discover Controller (DC).
+- It retrieves the discovery log pages and caches them.
+- It also monitors udev events associated with that DC and updates
+- the cached discovery log pages accordingly.
+- '''
+-
+- GET_LOG_PAGE_RETRY_RERIOD_SEC = 20
+- REGISTRATION_RETRY_RERIOD_SEC = 10
+-
+- def __init__(self, root, host, tid: trid.TID, log_pages=None):
+- super().__init__(root, host, tid, discovery_ctrl=True)
+- self._register_op = None
+- self._get_log_op = None
+- self._log_pages = log_pages if log_pages else list() # Log pages cache
+-
+- def _release_resources(self):
+- logging.debug('Dc._release_resources() - %s | %s', self.id, self.device)
+- super()._release_resources()
+- self._log_pages = list()
+-
+- def _kill_ops(self):
+- super()._kill_ops()
+- if self._get_log_op:
+- self._get_log_op.kill()
+- self._get_log_op = None
+- if self._register_op:
+- self._register_op.kill()
+- self._register_op = None
+-
+- def info(self) -> dict:
+- '''@brief Get the controller info for this object'''
+- info = super().info()
+- if self._get_log_op:
+- info['get log page operation'] = self._get_log_op.as_dict()
+- if self._register_op:
+- info['register operation'] = self._register_op.as_dict()
+- return info
+-
+- def cancel(self):
+- '''@brief Used to cancel pending operations.'''
+- super().cancel()
+- if self._get_log_op:
+- self._get_log_op.cancel()
+- if self._register_op:
+- self._register_op.cancel()
+-
+- def log_pages(self) -> list:
+- '''@brief Get the cached log pages for this object'''
+- return self._log_pages
+-
+- def referrals(self) -> list:
+- '''@brief Return the list of referrals'''
+- return [page for page in self._log_pages if page['subtype'] == 'referral']
+-
+- def _on_aen(self, aen: int):
+- super()._on_aen(aen)
+- if aen == DLP_CHANGED and self._get_log_op:
+- self._get_log_op.run_async()
+-
+- def _on_nvme_event(self, nvme_event: str):
+- super()._on_nvme_event(nvme_event)
+- if nvme_event == 'connected' and self._register_op:
+- self._register_op.run_async()
+-
+- def _on_udev_remove(self, udev_obj):
+- super()._on_udev_remove(udev_obj)
+- if self._try_to_connect_deferred:
+- self._try_to_connect_deferred.schedule()
+-
+- def _find_existing_connection(self):
+- return self._udev.find_nvme_dc_device(self.tid)
+-
+- # --------------------------------------------------------------------------
+- def _on_connect_success(self, op_obj, data):
+- '''@brief Function called when we successfully connect to the
+- Discovery Controller.
+- '''
+- super()._on_connect_success(op_obj, data)
+-
+- if self._alive():
+- if self._ctrl.is_registration_supported():
+- self._register_op = gutil.AsyncOperationWithRetry(
+- self._on_registration_success,
+- self._on_registration_fail,
+- self._ctrl.registration_ctlr,
+- nvme.NVMF_DIM_TAS_REGISTER,
+- )
+- self._register_op.run_async()
+- else:
+- self._get_log_op = gutil.AsyncOperationWithRetry(
+- self._on_get_log_success, self._on_get_log_fail, self._ctrl.discover
+- )
+- self._get_log_op.run_async()
+-
+- # --------------------------------------------------------------------------
+- def _on_registration_success(self, op_obj, data): # pylint: disable=unused-argument
+- '''@brief Function called when we successfully register with the
+- Discovery Controller. See self._register_op object
+- for details.
+- '''
+- if self._alive():
+- if data is not None:
+- logging.warning('%s | %s - Registration error. %s.', self.id, self.device, data)
+- else:
+- logging.debug('Dc._on_registration_success() - %s | %s', self.id, self.device)
+- self._get_log_op = gutil.AsyncOperationWithRetry(
+- self._on_get_log_success, self._on_get_log_fail, self._ctrl.discover
+- )
+- self._get_log_op.run_async()
+- else:
+- logging.debug(
+- 'Dc._on_registration_success() - %s | %s Received event on dead object.', self.id, self.device
+- )
+-
+- def _on_registration_fail(self, op_obj, err, fail_cnt):
+- '''@brief Function called when we fail to register with the
+- Discovery Controller. See self._register_op object
+- for details.
+- '''
+- if self._alive():
+- logging.debug(
+- 'Dc._on_registration_fail() - %s | %s: %s. Retry in %s sec',
+- self.id,
+- self.device,
+- err,
+- Dc.REGISTRATION_RETRY_RERIOD_SEC,
+- )
+- if fail_cnt == 1: # Throttle the logs. Only print the first time we fail to connect
+- logging.error('%s | %s - Failed to register with Discovery Controller. %s', self.id, self.device, err)
+- # op_obj.retry(Dc.REGISTRATION_RETRY_RERIOD_SEC)
+- else:
+- logging.debug(
+- 'Dc._on_registration_fail() - %s | %s Received event on dead object. %s',
+- self.id,
+- self.device,
+- err,
+- )
+- op_obj.kill()
+-
+- # --------------------------------------------------------------------------
+- def _on_get_log_success(self, op_obj, data): # pylint: disable=unused-argument
+- '''@brief Function called when we successfully retrieve the log pages
+- from the Discovery Controller. See self._get_log_op object
+- for details.
+- '''
+- if self._alive():
+- # Note that for historical reasons too long to explain, the CDC may
+- # return invalid addresses ("0.0.0.0", "::", or ""). Those need to be
+- # filtered out.
+- referrals_before = self.referrals()
+- self._log_pages = (
+- [
+- {k: str(v) for k, v in dictionary.items()}
+- for dictionary in data
+- if dictionary.get('traddr') not in ('0.0.0.0', '::', '')
+- ]
+- if data
+- else list()
+- )
+- logging.info(
+- '%s | %s - Received discovery log pages (num records=%s).', self.id, self.device, len(self._log_pages)
+- )
+- referrals_after = self.referrals()
+- STAF.log_pages_changed(self, self.device)
+- if referrals_after != referrals_before:
+- logging.debug(
+- 'Dc._on_get_log_success() - %s | %s Referrals before = %s',
+- self.id,
+- self.device,
+- referrals_before,
+- )
+- logging.debug(
+- 'Dc._on_get_log_success() - %s | %s Referrals after = %s',
+- self.id,
+- self.device,
+- referrals_after,
+- )
+- STAF.referrals_changed()
+- else:
+- logging.debug(
+- 'Dc._on_get_log_success() - %s | %s Received event on dead object.', self.id, self.device
+- )
+-
+- def _on_get_log_fail(self, op_obj, err, fail_cnt):
+- '''@brief Function called when we fail to retrieve the log pages
+- from the Discovery Controller. See self._get_log_op object
+- for details.
+- '''
+- if self._alive():
+- logging.debug(
+- 'Dc._on_get_log_fail() - %s | %s: %s. Retry in %s sec',
+- self.id,
+- self.device,
+- err,
+- Dc.GET_LOG_PAGE_RETRY_RERIOD_SEC,
+- )
+- if fail_cnt == 1: # Throttle the logs. Only print the first time we fail to connect
+- logging.error('%s | %s - Failed to retrieve log pages. %s', self.id, self.device, err)
+- op_obj.retry(Dc.GET_LOG_PAGE_RETRY_RERIOD_SEC)
+- else:
+- logging.debug(
+- 'Dc._on_get_log_fail() - %s | %s Received event on dead object. %s',
+- self.id,
+- self.device,
+- err,
+- )
+- op_obj.kill()
+-
+-
+-# ******************************************************************************
+-class Staf(service.Service):
+- '''STorage Appliance Finder (STAF)'''
++if __name__ == '__main__':
++ import json
++ import logging
++ import dasbus.server.interface
++ from staslib import log, service, stas, udev # pylint: disable=ungrouped-imports
+
+- CONF_STABILITY_SOAK_TIME_SEC = 1.5
++ # Before going any further, make sure the script is allowed to run.
++ stas.check_if_allowed_to_continue()
+
+ class Dbus:
+ '''This is the DBus interface that external programs can use to
+@@ -431,148 +198,8 @@ class Staf(service.Service):
+ for controller in STAF.get_controllers()
+ ]
+
+- # ==========================================================================
+- def __init__(self, args):
+- super().__init__(args, self._reload_hdlr)
+-
+- self._avahi = avahi.Avahi(self._sysbus, self._avahi_change)
+- self._avahi.config_stypes(conf.SvcConf().get_stypes())
+-
+- # We don't want to apply configuration changes to nvme-cli right away.
+- # Often, multiple changes will occur in a short amount of time (sub-second).
+- # We want to wait until there are no more changes before applying them
+- # to the system. The following timer acts as a "soak period". Changes
+- # will be applied by calling self._on_config_ctrls() at the end of
+- # the soak period.
+- self._cfg_soak_tmr = gutil.GTimer(Staf.CONF_STABILITY_SOAK_TIME_SEC, self._on_config_ctrls)
+- self._cfg_soak_tmr.start()
+-
+- # Create the D-Bus instance.
+- self._config_dbus(Staf.Dbus(), defs.STAFD_DBUS_NAME, defs.STAFD_DBUS_PATH)
+-
+- def info(self) -> dict:
+- '''@brief Get the status info for this object (used for debug)'''
+- info = super().info()
+- info['avahi'] = self._avahi.info()
+- return info
+-
+- def _release_resources(self):
+- logging.debug('Staf._release_resources()')
+- super()._release_resources()
+- if self._avahi:
+- self._avahi.kill()
+- self._avahi = None
+-
+- def _load_last_known_config(self):
+- try:
+- with open(self._lkc_file, 'rb') as file:
+- config = pickle.load(file)
+- except (FileNotFoundError, AttributeError):
+- return dict()
+-
+- logging.debug('Staf._load_last_known_config() - DC count = %s', len(config))
+- return {tid: Dc(self._root, self._host, tid, log_pages) for tid, log_pages in config.items()}
+-
+- def _dump_last_known_config(self, controllers):
+- try:
+- with open(self._lkc_file, 'wb') as file:
+- config = {tid: dc.log_pages() for tid, dc in controllers.items()}
+- logging.debug('Staf._dump_last_known_config() - DC count = %s', len(config))
+- pickle.dump(config, file)
+- except FileNotFoundError as ex:
+- logging.error('Unable to save last known config: %s', ex)
+-
+- def _keep_connections_on_exit(self):
+- '''@brief Determine whether connections should remain when the
+- process exits.
+- '''
+- return conf.SvcConf().persistent_connections
+-
+- def _reload_hdlr(self):
+- '''@brief Reload configuration file. This is triggered by the SIGHUP
+- signal, which can be sent with "systemctl reload stafd".
+- '''
+- systemd.daemon.notify('RELOADING=1')
+- service_cnf = conf.SvcConf()
+- service_cnf.reload()
+- self.tron = service_cnf.tron
+- self._avahi.kick_start() # Make sure Avahi is running
+- self._avahi.config_stypes(service_cnf.get_stypes())
+- self._cfg_soak_tmr.start()
+- systemd.daemon.notify('READY=1')
+- return GLib.SOURCE_CONTINUE
+-
+- def log_pages_changed(self, controller, device):
+- '''@brief Function invoked when a controller's cached log pages
+- have changed. This will emit a D-Bus signal to inform
+- other applications that the cached log pages have changed.
+- '''
+- self._dbus_iface.log_pages_changed.emit(
+- controller.tid.transport,
+- controller.tid.traddr,
+- controller.tid.trsvcid,
+- controller.tid.host_traddr,
+- controller.tid.host_iface,
+- controller.tid.subsysnqn,
+- device,
+- )
+-
+- def referrals_changed(self):
+- '''@brief Function invoked when a controller's cached referrals
+- have changed.
+- '''
+- logging.debug('Staf.referrals_changed()')
+- self._cfg_soak_tmr.start()
+-
+- def _referrals(self) -> list:
+- return [
+- stas.cid_from_dlpe(dlpe, controller.tid.host_traddr, controller.tid.host_iface)
+- for controller in self.get_controllers()
+- for dlpe in controller.referrals()
+- ]
+-
+- def _config_ctrls_finish(self, configured_ctrl_list):
+- '''@brief Finish discovery controllers configuration after
+- hostnames (if any) have been resolved.
+- '''
+- configured_ctrl_list = [
+- ctrl_dict
+- for ctrl_dict in configured_ctrl_list
+- if 'traddr' in ctrl_dict and ctrl_dict.setdefault('subsysnqn', defs.WELL_KNOWN_DISC_NQN)
+- ]
+-
+- discovered_ctrl_list = self._avahi.get_controllers()
+- referral_ctrl_list = self._referrals()
+- logging.debug('Staf._config_ctrls_finish() - configured_ctrl_list = %s', configured_ctrl_list)
+- logging.debug('Staf._config_ctrls_finish() - discovered_ctrl_list = %s', discovered_ctrl_list)
+- logging.debug('Staf._config_ctrls_finish() - referral_ctrl_list = %s', referral_ctrl_list)
+-
+- controllers = stas.remove_blacklisted(configured_ctrl_list + discovered_ctrl_list + referral_ctrl_list)
+- controllers = stas.remove_invalid_addresses(controllers)
+-
+- new_controller_ids = {trid.TID(controller) for controller in controllers}
+- cur_controller_ids = set(self._controllers.keys())
+- controllers_to_add = new_controller_ids - cur_controller_ids
+- controllers_to_del = cur_controller_ids - new_controller_ids
+-
+- logging.debug('Staf._config_ctrls_finish() - controllers_to_add = %s', list(controllers_to_add))
+- logging.debug('Staf._config_ctrls_finish() - controllers_to_del = %s', list(controllers_to_del))
+-
+- for tid in controllers_to_del:
+- controller = self._controllers.pop(tid, None)
+- if controller is not None:
+- controller.disconnect(self.remove_controller, conf.SvcConf().persistent_connections)
+-
+- for tid in controllers_to_add:
+- self._controllers[tid] = Dc(self._root, self._host, tid)
+-
+- def _avahi_change(self):
+- self._cfg_soak_tmr.start()
+-
+-
+-# ******************************************************************************
+-if __name__ == '__main__':
+- STAF = Staf(ARGS)
++ log.init(ARGS.syslog)
++ STAF = service.Staf(ARGS, Dbus())
+ STAF.run()
+
+ STAF = None
+diff --git a/staslib/avahi.py b/staslib/avahi.py
+index 768bbf4..90a67c8 100644
+--- a/staslib/avahi.py
++++ b/staslib/avahi.py
+@@ -172,9 +172,7 @@ class Avahi: # pylint: disable=too-many-instance-attributes
+ services = dict()
+ for service, obj in self._services.items():
+ interface, protocol, name, stype, domain = service
+- key = '({}, {}, {}.{}, {})'.format( # pylint: disable=consider-using-f-string
+- socket.if_indextoname(interface), Avahi.protos.get(protocol, 'unknown'), name, domain, stype
+- )
++ key = f'({socket.if_indextoname(interface)}, {Avahi.protos.get(protocol, "unknown")}, {name}.{domain}, {stype})'
+ services[key] = obj.get('data', {})
+
+ info = {
+@@ -316,7 +314,7 @@ class Avahi: # pylint: disable=too-many-instance-attributes
+ _interface_name: str,
+ _signal_name: str,
+ args: typing.Tuple[int, int, str, str, str, int],
+- *_user_data
++ *_user_data,
+ ):
+ (interface, protocol, name, stype, domain, flags) = args
+ logging.debug(
+@@ -352,7 +350,7 @@ class Avahi: # pylint: disable=too-many-instance-attributes
+ _interface_name: str,
+ _signal_name: str,
+ args: typing.Tuple[int, int, str, str, str, int],
+- *_user_data
++ *_user_data,
+ ):
+ (interface, protocol, name, stype, domain, flags) = args
+ logging.debug(
+@@ -386,7 +384,7 @@ class Avahi: # pylint: disable=too-many-instance-attributes
+ _interface_name: str,
+ _signal_name: str,
+ args: typing.Tuple[int, int, str, str, str, str, int, str, int, list, int],
+- *_user_data
++ *_user_data,
+ ):
+ (interface, protocol, name, stype, domain, host, aprotocol, address, port, txt, flags) = args
+ txt = _txt2dict(txt)
+@@ -428,7 +426,7 @@ class Avahi: # pylint: disable=too-many-instance-attributes
+ interface_name: str,
+ _signal_name: str,
+ args: typing.Tuple[str],
+- *_user_data
++ *_user_data,
+ ):
+ (error,) = args
+ if 'ServiceResolver' not in interface_name or 'TimeoutError' not in error:
+diff --git a/staslib/conf.py b/staslib/conf.py
+index 3f52e4f..c314a9e 100644
+--- a/staslib/conf.py
++++ b/staslib/conf.py
+@@ -74,7 +74,7 @@ class SvcConf(metaclass=singleton.Singleton):
+ ('Global', 'ignore-iface'): 'false',
+ ('Global', 'ip-family'): 'ipv4+ipv6',
+ ('Global', 'udev-rule'): 'enabled',
+- ('Global', 'sticky-connections'): 'disabled',
++ ('Global', 'sticky-connections'): 'enabled',
+ ('Service Discovery', 'zeroconf'): 'enabled',
+ ('Controllers', 'controller'): list(),
+ ('Controllers', 'blacklist'): list(),
+diff --git a/staslib/ctrl.py b/staslib/ctrl.py
+index 5504baa..dbc1973 100644
+--- a/staslib/ctrl.py
++++ b/staslib/ctrl.py
+@@ -10,69 +10,76 @@
+ Dc (Discovery Controller) and Ioc (I/O Controller) objects are derived.'''
+
+ import logging
+-from gi.repository import Gio, GLib
++from gi.repository import GLib
+ from libnvme import nvme
+-from staslib import conf, gutil, trid, udev
++from staslib import conf, gutil, trid, udev, stas
+
+
+ DC_KATO_DEFAULT = 30 # seconds
+
+
+ # ******************************************************************************
+-class Controller: # pylint: disable=too-many-instance-attributes
++class Controller(stas.ControllerABC):
+ '''@brief Base class used to manage the connection to a controller.'''
+
+- CONNECT_RETRY_PERIOD_SEC = 60
+- FAST_CONNECT_RETRY_PERIOD_SEC = 3
+-
+ def __init__(self, root, host, tid: trid.TID, discovery_ctrl=False):
+- self._root = root
+- self._host = host
+- self._udev = udev.UDEV
+- self._tid = tid
+- self._cancellable = Gio.Cancellable()
+- self._connect_op = None
+- self._connect_attempts = 0
+- self._retry_connect_tmr = gutil.GTimer(Controller.CONNECT_RETRY_PERIOD_SEC, self._on_try_to_connect)
+- self._device = None
+- self._ctrl = None
+- self._discovery_ctrl = discovery_ctrl
+- self._try_to_connect_deferred = gutil.Deferred(self._try_to_connect)
+- self._try_to_connect_deferred.schedule()
++ self._udev = udev.UDEV
++ self._device = None # Refers to the nvme device (e.g. /dev/nvme[n])
++ self._ctrl = None # libnvme's nvme.ctrl object
++ self._connect_op = None
++
++ super().__init__(root, host, tid, discovery_ctrl)
+
+ def _release_resources(self):
+ logging.debug('Controller._release_resources() - %s', self.id)
+
+- # Remove pending deferred from main loop
+- if self._try_to_connect_deferred:
+- self._try_to_connect_deferred.cancel()
+- self._try_to_connect_deferred = None
+-
+ if self._udev:
+ self._udev.unregister_for_device_events(self._on_udev_notification)
+
+- if self._retry_connect_tmr is not None:
+- self._retry_connect_tmr.kill()
+-
+- if self._cancellable and not self._cancellable.is_cancelled():
+- self._cancellable.cancel()
+-
+ self._kill_ops()
+
+- self._tid = None
++ super()._release_resources()
++
+ self._ctrl = None
+- self._device = None
+- self._retry_connect_tmr = None
+- self._cancellable = None
+ self._udev = None
+
+- def _alive(self):
+- '''There may be race condition where a queued event gets processed
+- after the object is no longer configured (i.e. alive). This method
+- can be used by callback functions to make sure the object is still
+- alive before processing further.
+- '''
+- return self._cancellable and not self._cancellable.is_cancelled()
++ @property
++ def device(self) -> str:
++ '''@brief return the Linux nvme device id (e.g. nvme3) or empty
++ string if no device is associated with this controller'''
++ if not self._device and self._ctrl and self._ctrl.name:
++ self._device = self._ctrl.name
++
++ return self._device or 'nvme?'
++
++ def controller_id_dict(self) -> dict:
++ '''@brief return the controller ID as a dict.'''
++ cid = super().controller_id_dict()
++ cid['device'] = self.device
++ return cid
++
++ def details(self) -> dict:
++ '''@brief return detailed debug info about this controller'''
++ details = super().details()
++ details.update(
++ self._udev.get_attributes(self.device,
++ ('hostid', 'hostnqn', 'model',
++ 'serial', 'dctype', 'cntrltype'))
++ )
++ return details
++
++ def info(self) -> dict:
++ '''@brief Get the controller info for this object'''
++ info = super().info()
++ if self._connect_op:
++ info['connect operation'] = self._connect_op.as_dict()
++ return info
++
++ def cancel(self):
++ '''@brief Used to cancel pending operations.'''
++ super().cancel()
++ if self._connect_op:
++ self._connect_op.cancel()
+
+ def _kill_ops(self):
+ if self._connect_op:
+@@ -91,7 +98,7 @@ class Controller: # pylint: disable=too-many-instance-attributes
+ self._on_nvme_event(nvme_event)
+ elif udev_obj.action == 'remove':
+ logging.info('%s | %s - Received "remove" event', self.id, udev_obj.sys_name)
+- self._on_udev_remove(udev_obj)
++ self._on_ctrl_removed(udev_obj)
+ else:
+ logging.debug(
+ 'Controller._on_udev_notification() - %s | %s - Received "%s" notification.',
+@@ -108,33 +115,12 @@ class Controller: # pylint: disable=too-many-instance-attributes
+ udev_obj.sys_name,
+ )
+
+- def _on_aen(self, aen: int):
+- pass
+-
+- def _on_nvme_event(self, nvme_event):
+- pass
+-
+- def _on_udev_remove(self, udev_obj): # pylint: disable=unused-argument
++ def _on_ctrl_removed(self, obj): # pylint: disable=unused-argument
+ self._udev.unregister_for_device_events(self._on_udev_notification)
+ self._kill_ops() # Kill all pending operations
+ self._ctrl = None
+
+- def _find_existing_connection(self):
+- raise NotImplementedError()
+-
+- def _on_try_to_connect(self):
+- self._try_to_connect_deferred.schedule()
+- return GLib.SOURCE_REMOVE
+-
+- def _try_to_connect(self):
+- # This is a deferred function call. Make sure
+- # the source of the deferred is still good.
+- source = GLib.main_current_source()
+- if source and source.is_destroyed():
+- return
+-
+- self._connect_attempts += 1
+-
++ def _do_connect(self):
+ host_iface = (
+ self.tid.host_iface
+ if (self.tid.host_iface and not conf.SvcConf().ignore_iface and conf.NvmeOptions().host_iface_supp)
+@@ -164,7 +150,6 @@ class Controller: # pylint: disable=too-many-instance-attributes
+ self._on_connect_success, self._on_connect_fail, self._ctrl.init, self._host, int(udev_obj.sys_number)
+ )
+ else:
+- self._device = None
+ service_conf = conf.SvcConf()
+ cfg = { 'hdr_digest': service_conf.hdr_digest,
+ 'data_digest': service_conf.data_digest }
+@@ -198,11 +183,10 @@ class Controller: # pylint: disable=too-many-instance-attributes
+ self._connect_op = None
+
+ if self._alive():
+- if not self._device:
+- self._device = self._ctrl.name
++ self._device = self._ctrl.name
+ logging.info('%s | %s - Connection established!', self.id, self.device)
+ self._connect_attempts = 0
+- self._udev.register_for_device_events(self.device, self._on_udev_notification)
++ self._udev.register_for_device_events(self._device, self._on_udev_notification)
+ else:
+ logging.debug(
+ 'Controller._on_connect_success() - %s | %s Received event on dead object. data=%s',
+@@ -227,11 +211,11 @@ class Controller: # pylint: disable=too-many-instance-attributes
+ # the same time. This is perfectly fine, except that we may get a bogus
+ # failed to connect error. By doing a fast re-try, stacd can quickly
+ # verify that the connection was actually successful.
+- self._retry_connect_tmr.set_timeout(Controller.FAST_CONNECT_RETRY_PERIOD_SEC)
++ self._retry_connect_tmr.set_timeout(self.FAST_CONNECT_RETRY_PERIOD_SEC)
+ elif self._connect_attempts == 2:
+ # If the fast connect re-try fails, then we can print a message to
+ # indicate the failure, and start a slow re-try period.
+- self._retry_connect_tmr.set_timeout(Controller.CONNECT_RETRY_PERIOD_SEC)
++ self._retry_connect_tmr.set_timeout(self.CONNECT_RETRY_PERIOD_SEC)
+ logging.error('%s Failed to connect to controller. %s', self.id, getattr(err, 'message', err))
+
+ logging.debug(
+@@ -248,53 +232,6 @@ class Controller: # pylint: disable=too-many-instance-attributes
+ getattr(err, 'message', err),
+ )
+
+- @property
+- def id(self) -> str: # pylint: disable=missing-function-docstring
+- return str(self.tid)
+-
+- @property
+- def tid(self): # pylint: disable=missing-function-docstring
+- return self._tid
+-
+- @property
+- def device(self) -> str: # pylint: disable=missing-function-docstring
+- return self._device if self._device else ''
+-
+- def controller_id_dict(self) -> dict:
+- '''@brief return the controller ID as a dict.'''
+- cid = self.tid.as_dict()
+- cid['device'] = self.device
+- return cid
+-
+- def details(self) -> dict:
+- '''@brief return detailed debug info about this controller'''
+- details = self.controller_id_dict()
+- details.update(self._udev.get_attributes(self.device, ('hostid', 'hostnqn', 'model', 'serial')))
+- details['connect attempts'] = str(self._connect_attempts)
+- details['retry connect timer'] = str(self._retry_connect_tmr)
+- return details
+-
+- def info(self) -> dict:
+- '''@brief Get the controller info for this object'''
+- info = self.details()
+- if self._connect_op:
+- info['connect operation'] = self._connect_op.as_dict()
+- return info
+-
+- def cancel(self):
+- '''@brief Used to cancel pending operations.'''
+- if self._cancellable and not self._cancellable.is_cancelled():
+- logging.debug('Controller.cancel() - %s', self.id)
+- self._cancellable.cancel()
+-
+- if self._connect_op:
+- self._connect_op.cancel()
+-
+- def kill(self):
+- '''@brief Used to release all resources associated with this object.'''
+- logging.debug('Controller.kill() - %s', self.id)
+- self._release_resources()
+-
+ def disconnect(self, disconnected_cb, keep_connection):
+ '''@brief Issue an asynchronous disconnect command to a Controller.
+ Once the async command has completed, the callback 'disconnected_cb'
+@@ -313,7 +250,7 @@ class Controller: # pylint: disable=too-many-instance-attributes
+ # cannot be called directly as the current Controller object is in the
+ # process of being disconnected and the callback will in fact delete
+ # the object. This would invariably lead to unpredictable outcome.
+- GLib.idle_add(disconnected_cb, self)
++ GLib.idle_add(disconnected_cb, self, True)
+
+ def _on_disconn_success(self, op_obj, data, disconnected_cb): # pylint: disable=unused-argument
+ logging.debug('Controller._on_disconn_success() - %s | %s', self.id, self.device)
+@@ -322,7 +259,7 @@ class Controller: # pylint: disable=too-many-instance-attributes
+ # cannot be called directly as the current Controller object is in the
+ # process of being disconnected and the callback will in fact delete
+ # the object. This would invariably lead to unpredictable outcome.
+- GLib.idle_add(disconnected_cb, self)
++ GLib.idle_add(disconnected_cb, self, True)
+
+ def _on_disconn_fail(self, op_obj, err, fail_cnt, disconnected_cb): # pylint: disable=unused-argument
+ logging.debug('Controller._on_disconn_fail() - %s | %s: %s', self.id, self.device, err)
+@@ -331,4 +268,249 @@ class Controller: # pylint: disable=too-many-instance-attributes
+ # cannot be called directly as the current Controller object is in the
+ # process of being disconnected and the callback will in fact delete
+ # the object. This would invariably lead to unpredictable outcome.
+- GLib.idle_add(disconnected_cb, self)
++ GLib.idle_add(disconnected_cb, self, False)
++
++
++# ******************************************************************************
++class Dc(Controller):
++ '''@brief This object establishes a connection to one Discover Controller (DC).
++ It retrieves the discovery log pages and caches them.
++ It also monitors udev events associated with that DC and updates
++ the cached discovery log pages accordingly.
++ '''
++
++ DLP_CHANGED = (
++ (nvme.NVME_LOG_LID_DISCOVER << 16) | (nvme.NVME_AER_NOTICE_DISC_CHANGED << 8) | nvme.NVME_AER_NOTICE
++ ) # 0x70f002
++ GET_LOG_PAGE_RETRY_RERIOD_SEC = 20
++ REGISTRATION_RETRY_RERIOD_SEC = 10
++
++ def __init__(self, staf, root, host, tid: trid.TID, log_pages=None): # pylint: disable=too-many-arguments
++ super().__init__(root, host, tid, discovery_ctrl=True)
++ self._staf = staf
++ self._register_op = None
++ self._get_log_op = None
++ self._log_pages = log_pages if log_pages else list() # Log pages cache
++
++ def _release_resources(self):
++ logging.debug('Dc._release_resources() - %s | %s', self.id, self.device)
++ super()._release_resources()
++ self._log_pages = list()
++ self._staf = None
++
++ def _kill_ops(self):
++ super()._kill_ops()
++ if self._get_log_op:
++ self._get_log_op.kill()
++ self._get_log_op = None
++ if self._register_op:
++ self._register_op.kill()
++ self._register_op = None
++
++ def info(self) -> dict:
++ '''@brief Get the controller info for this object'''
++ info = super().info()
++ if self._get_log_op:
++ info['get log page operation'] = self._get_log_op.as_dict()
++ if self._register_op:
++ info['register operation'] = self._register_op.as_dict()
++ return info
++
++ def cancel(self):
++ '''@brief Used to cancel pending operations.'''
++ super().cancel()
++ if self._get_log_op:
++ self._get_log_op.cancel()
++ if self._register_op:
++ self._register_op.cancel()
++
++ def log_pages(self) -> list:
++ '''@brief Get the cached log pages for this object'''
++ return self._log_pages
++
++ def referrals(self) -> list:
++ '''@brief Return the list of referrals'''
++ return [page for page in self._log_pages if page['subtype'] == 'referral']
++
++ def _on_aen(self, aen: int):
++ if aen == self.DLP_CHANGED and self._get_log_op:
++ self._get_log_op.run_async()
++
++ def _on_nvme_event(self, nvme_event: str):
++ if nvme_event == 'connected' and self._register_op:
++ self._register_op.run_async()
++
++ def _on_ctrl_removed(self, obj):
++ super()._on_ctrl_removed(obj)
++ if self._try_to_connect_deferred:
++ self._try_to_connect_deferred.schedule()
++
++ def _find_existing_connection(self):
++ return self._udev.find_nvme_dc_device(self.tid)
++
++ # --------------------------------------------------------------------------
++ def _on_connect_success(self, op_obj, data):
++ '''@brief Function called when we successfully connect to the
++ Discovery Controller.
++ '''
++ super()._on_connect_success(op_obj, data)
++
++ if self._alive():
++ if self._ctrl.is_registration_supported():
++ self._register_op = gutil.AsyncOperationWithRetry(
++ self._on_registration_success,
++ self._on_registration_fail,
++ self._ctrl.registration_ctlr,
++ nvme.NVMF_DIM_TAS_REGISTER,
++ )
++ self._register_op.run_async()
++ else:
++ self._get_log_op = gutil.AsyncOperationWithRetry(
++ self._on_get_log_success, self._on_get_log_fail, self._ctrl.discover
++ )
++ self._get_log_op.run_async()
++
++ # --------------------------------------------------------------------------
++ def _on_registration_success(self, op_obj, data): # pylint: disable=unused-argument
++ '''@brief Function called when we successfully register with the
++ Discovery Controller. See self._register_op object
++ for details.
++ '''
++ if self._alive():
++ if data is not None:
++ logging.warning('%s | %s - Registration error. %s.', self.id, self.device, data)
++ else:
++ logging.debug('Dc._on_registration_success() - %s | %s', self.id, self.device)
++ self._get_log_op = gutil.AsyncOperationWithRetry(
++ self._on_get_log_success, self._on_get_log_fail, self._ctrl.discover
++ )
++ self._get_log_op.run_async()
++ else:
++ logging.debug(
++ 'Dc._on_registration_success() - %s | %s Received event on dead object.', self.id, self.device
++ )
++
++ def _on_registration_fail(self, op_obj, err, fail_cnt):
++ '''@brief Function called when we fail to register with the
++ Discovery Controller. See self._register_op object
++ for details.
++ '''
++ if self._alive():
++ logging.debug(
++ 'Dc._on_registration_fail() - %s | %s: %s. Retry in %s sec',
++ self.id,
++ self.device,
++ err,
++ Dc.REGISTRATION_RETRY_RERIOD_SEC,
++ )
++ if fail_cnt == 1: # Throttle the logs. Only print the first time we fail to connect
++ logging.error('%s | %s - Failed to register with Discovery Controller. %s', self.id, self.device, err)
++ # op_obj.retry(Dc.REGISTRATION_RETRY_RERIOD_SEC)
++ else:
++ logging.debug(
++ 'Dc._on_registration_fail() - %s | %s Received event on dead object. %s',
++ self.id,
++ self.device,
++ err,
++ )
++ op_obj.kill()
++
++ # --------------------------------------------------------------------------
++ def _on_get_log_success(self, op_obj, data): # pylint: disable=unused-argument
++ '''@brief Function called when we successfully retrieve the log pages
++ from the Discovery Controller. See self._get_log_op object
++ for details.
++ '''
++ if self._alive():
++ # Note that for historical reasons too long to explain, the CDC may
++ # return invalid addresses ("0.0.0.0", "::", or ""). Those need to be
++ # filtered out.
++ referrals_before = self.referrals()
++ self._log_pages = (
++ [
++ {k: str(v) for k, v in dictionary.items()}
++ for dictionary in data
++ if dictionary.get('traddr') not in ('0.0.0.0', '::', '')
++ ]
++ if data
++ else list()
++ )
++ logging.info(
++ '%s | %s - Received discovery log pages (num records=%s).', self.id, self.device, len(self._log_pages)
++ )
++ referrals_after = self.referrals()
++ self._staf.log_pages_changed(self, self.device)
++ if referrals_after != referrals_before:
++ logging.debug(
++ 'Dc._on_get_log_success() - %s | %s Referrals before = %s',
++ self.id,
++ self.device,
++ referrals_before,
++ )
++ logging.debug(
++ 'Dc._on_get_log_success() - %s | %s Referrals after = %s',
++ self.id,
++ self.device,
++ referrals_after,
++ )
++ self._staf.referrals_changed()
++ else:
++ logging.debug(
++ 'Dc._on_get_log_success() - %s | %s Received event on dead object.', self.id, self.device
++ )
++
++ def _on_get_log_fail(self, op_obj, err, fail_cnt):
++ '''@brief Function called when we fail to retrieve the log pages
++ from the Discovery Controller. See self._get_log_op object
++ for details.
++ '''
++ if self._alive():
++ logging.debug(
++ 'Dc._on_get_log_fail() - %s | %s: %s. Retry in %s sec',
++ self.id,
++ self.device,
++ err,
++ Dc.GET_LOG_PAGE_RETRY_RERIOD_SEC,
++ )
++ if fail_cnt == 1: # Throttle the logs. Only print the first time we fail to connect
++ logging.error('%s | %s - Failed to retrieve log pages. %s', self.id, self.device, err)
++ op_obj.retry(Dc.GET_LOG_PAGE_RETRY_RERIOD_SEC)
++ else:
++ logging.debug(
++ 'Dc._on_get_log_fail() - %s | %s Received event on dead object. %s',
++ self.id,
++ self.device,
++ err,
++ )
++ op_obj.kill()
++
++
++# ******************************************************************************
++class Ioc(Controller):
++ '''@brief This object establishes a connection to one I/O Controller.'''
++
++ def __init__(self, stac, root, host, tid: trid.TID):
++ self._stac = stac
++ super().__init__(root, host, tid)
++
++ def _release_resources(self):
++ super()._release_resources()
++ self._stac = None
++
++ def _on_ctrl_removed(self, obj):
++ '''Called when the associated nvme device (/dev/nvmeX) is removed
++ from the system.
++ '''
++ super()._on_ctrl_removed(obj)
++
++ # Defer removal of this object to the next main loop's idle period.
++ GLib.idle_add(self._stac.remove_controller, self, True)
++
++ def _find_existing_connection(self):
++ return self._udev.find_nvme_ioc_device(self.tid)
++
++ def _on_aen(self, aen: int):
++ pass
++
++ def _on_nvme_event(self, nvme_event):
++ pass
+diff --git a/staslib/gutil.py b/staslib/gutil.py
+index b302f3a..36ce2c7 100644
+--- a/staslib/gutil.py
++++ b/staslib/gutil.py
+@@ -104,8 +104,7 @@ class GTimer:
+
+
+ # ******************************************************************************
+-class NameResolver:
+- # pylint: disable=too-few-public-methods
++class NameResolver: # pylint: disable=too-few-public-methods
+ '''@brief DNS resolver to convert host names to IP addresses.'''
+
+ def __init__(self):
+@@ -133,8 +132,10 @@ class NameResolver:
+ else:
+ logging.error('Cannot resolve traddr: %s', hostname)
+
+- except GLib.GError:
+- logging.error('Cannot resolve traddr: %s', hostname)
++ except GLib.GError as err:
++ # We don't need to report "cancellation" errors.
++ if not err.matches(Gio.io_error_quark(), Gio.IOErrorEnum.CANCELLED):
++ logging.error('Cannot resolve traddr: %s. %s', hostname, err.message) # pylint: disable=no-member
+
+ logging.debug('NameResolver.resolve_ctrl_async() - resolved \'%s\' -> %s', hostname, traddr)
+ controllers[indx]['traddr'] = traddr
+diff --git a/staslib/log.py b/staslib/log.py
+index c624978..9622e98 100644
+--- a/staslib/log.py
++++ b/staslib/log.py
+@@ -24,7 +24,7 @@ def init(syslog: bool):
+ if syslog:
+ try:
+ # Try journal logger first
+- import systemd.journal # pylint: disable=redefined-outer-name,import-outside-toplevel
++ import systemd.journal # pylint: disable=import-outside-toplevel
+
+ handler = systemd.journal.JournalHandler(SYSLOG_IDENTIFIER=defs.PROG_NAME)
+ except ModuleNotFoundError:
+@@ -32,9 +32,7 @@ def init(syslog: bool):
+ from logging.handlers import SysLogHandler # pylint: disable=import-outside-toplevel
+
+ handler = SysLogHandler(address="/dev/log")
+- handler.setFormatter(
+- logging.Formatter('{}: %(message)s'.format(defs.PROG_NAME)) # pylint: disable=consider-using-f-string
+- )
++ handler.setFormatter(logging.Formatter(f'{defs.PROG_NAME}: %(message)s'))
+ else:
+ # Log to stdout
+ handler = logging.StreamHandler(stream=sys.stdout)
+diff --git a/staslib/service.py b/staslib/service.py
+index 556a9f9..a48e66d 100644
+--- a/staslib/service.py
++++ b/staslib/service.py
+@@ -9,248 +9,416 @@
+ '''This module defines the base Service object from
+ which the Staf and the Stac objects are derived.'''
+
+-import os
+-import signal
++import json
++import pickle
+ import logging
++import pathlib
+ import systemd.daemon
+-import dasbus.connection
++import dasbus.error
++import dasbus.client.observer
++import dasbus.client.proxy
+
+-from gi.repository import Gio, GLib
++from gi.repository import GLib
+ from libnvme import nvme
+-from staslib import conf, ctrl, defs, gutil, log, stas, trid, udev
++from staslib import avahi, conf, ctrl, defs, gutil, stas, trid, udev
+
+
+ # ******************************************************************************
+-class Service: # pylint: disable=too-many-instance-attributes
++class Service(stas.ServiceABC):
+ '''@brief Base class used to manage a STorage Appliance Service'''
+
+ def __init__(self, args, reload_hdlr):
+-
+ sysconf = conf.SysConf()
+ self._root = nvme.root()
+ self._host = nvme.host(self._root, sysconf.hostnqn, sysconf.hostid, sysconf.hostsymname)
+
+- service_conf = conf.SvcConf()
+- service_conf.set_conf_file(args.conf_file) # reload configuration
+- self._tron = args.tron or service_conf.tron
+- log.set_level_from_tron(self._tron)
+- self._root.log_level("debug" if self._tron else "err")
++ super().__init__(args, reload_hdlr)
+
+- self._lkc_file = os.path.join(os.environ.get('RUNTIME_DIRECTORY', os.path.join('/run', defs.PROG_NAME)), 'last-known-config.pickle')
+- self._loop = GLib.MainLoop()
+- self._udev = udev.UDEV
+- self._cancellable = Gio.Cancellable()
+- self._resolver = gutil.NameResolver()
+- self._controllers = self._load_last_known_config()
+- self._dbus_iface = None
+- self._cfg_soak_tmr = None
+- self._sysbus = dasbus.connection.SystemMessageBus()
+-
+- GLib.unix_signal_add(GLib.PRIORITY_HIGH, signal.SIGINT, self._stop_hdlr) # CTRL-C
+- GLib.unix_signal_add(GLib.PRIORITY_HIGH, signal.SIGTERM, self._stop_hdlr) # systemctl stop stafd
+- GLib.unix_signal_add(GLib.PRIORITY_HIGH, signal.SIGHUP, reload_hdlr) # systemctl reload stafd
+-
+- nvme_options = conf.NvmeOptions()
+- if not nvme_options.host_iface_supp or not nvme_options.discovery_supp:
+- logging.warning(
+- 'Kernel does not appear to support all the options needed to run this program. Consider updating to a later kernel version.'
+- )
++ self._root.log_level("debug" if self._tron else "err")
+
+ def _release_resources(self):
+ logging.debug('Service._release_resources()')
++ super()._release_resources()
+
+- if self._cancellable and not self._cancellable.is_cancelled():
+- self._cancellable.cancel()
++ self._host = None
++ self._root = None
+
+- if self._cfg_soak_tmr is not None:
+- self._cfg_soak_tmr.kill()
++ @stas.ServiceABC.tron.setter
++ def tron(self, value):
++ '''@brief Set Trace ON property'''
++ super(__class__, self.__class__).tron.__set__(self, value)
++ self._root.log_level("debug" if self._tron else "err")
+
+- self._controllers.clear()
+
+- if self._sysbus:
+- self._sysbus.disconnect()
++# ******************************************************************************
++def udev_rule_ctrl(enable):
++ '''@brief We add an empty udev rule to /run/udev/rules.d to suppress
++ nvme-cli's udev rule that is used to tell udevd to automatically
++ connect to I/O controller. This is to avoid race conditions between
++ stacd and udevd. This is configurable. See "udev-rule" in stacd.conf
++ for details.
++ '''
++ udev_rule_suppress = pathlib.Path('/run/udev/rules.d', '70-nvmf-autoconnect.rules')
++ if enable:
++ try:
++ udev_rule_suppress.unlink()
++ except FileNotFoundError:
++ pass
++ else:
++ if not udev_rule_suppress.exists():
++ pathlib.Path('/run/udev/rules.d').mkdir(parents=True, exist_ok=True)
++ udev_rule_suppress.symlink_to('/dev/null')
+
+- self._cfg_soak_tmr = None
+- self._cancellable = None
+- self._resolver = None
+- self._lkc_file = None
+- self._sysbus = None
+- self._udev = None
+
+- def _config_dbus(self, iface_obj, bus_name: str, obj_name: str):
+- self._dbus_iface = iface_obj
+- self._sysbus.publish_object(obj_name, iface_obj)
+- self._sysbus.register_service(bus_name)
++# ******************************************************************************
++class Stac(Service):
++ '''STorage Appliance Connector (STAC)'''
+
+- @property
+- def tron(self):
+- '''@brief Get Trace ON property'''
+- return self._tron
++ CONF_STABILITY_LONG_SOAK_TIME_SEC = 10 # pylint: disable=invalid-name
++ ADD_EVENT_SOAK_TIME_SEC = 1
+
+- @tron.setter
+- def tron(self, value): # pylint: disable=no-self-use
+- '''@brief Set Trace ON property'''
+- self._tron = value
+- log.set_level_from_tron(self._tron)
+- self._root.log_level("debug" if self._tron else "err")
++ def __init__(self, args, dbus):
++ super().__init__(args, self._reload_hdlr)
+
+- def run(self):
+- '''@brief Start the main loop execution'''
+- try:
+- self._loop.run()
+- except Exception as ex: # pylint: disable=broad-except
+- logging.critical('exception: %s', ex)
++ self._udev = udev.UDEV
+
+- self._loop = None
++ self._add_event_soak_tmr = gutil.GTimer(self.ADD_EVENT_SOAK_TIME_SEC, self._on_add_event_soaked)
+
+- def info(self) -> dict:
+- '''@brief Get the status info for this object (used for debug)'''
+- nvme_options = conf.NvmeOptions()
+- return {
+- 'last known config file': self._lkc_file,
+- 'config soak timer': str(self._cfg_soak_tmr),
+- 'kernel support': {
+- 'TP8013': nvme_options.discovery_supp,
+- 'host_iface': nvme_options.host_iface_supp,
+- },
+- 'system config': conf.SysConf().as_dict(),
+- }
+-
+- def get_controllers(self):
+- '''@brief return the list of controller objects'''
+- return self._controllers.values()
+-
+- def get_controller(
+- self, transport: str, traddr: str, trsvcid: str, host_traddr: str, host_iface: str, subsysnqn: str
+- ): # pylint: disable=too-many-arguments
+- '''@brief get the specified controller object from the list of controllers'''
+- cid = {
+- 'transport': transport,
+- 'traddr': traddr,
+- 'trsvcid': trsvcid,
+- 'host-traddr': host_traddr,
+- 'host-iface': host_iface,
+- 'subsysnqn': subsysnqn,
+- }
+- return self._controllers.get(trid.TID(cid))
+-
+- def _remove_ctrl_from_dict(self, controller):
+- tid_to_pop = controller.tid
+- if not tid_to_pop:
+- # Being paranoid. This should not happen, but let's say the
+- # controller object has been purged, but it is somehow still
+- # listed in self._controllers.
+- for tid, _controller in self._controllers.items():
+- if _controller is controller:
+- tid_to_pop = tid
+- break
+-
+- if tid_to_pop:
+- logging.debug('Service._remove_ctrl_from_dict() - %s | %s', tid_to_pop, controller.device)
+- self._controllers.pop(tid_to_pop, None)
+- else:
+- logging.debug('Service._remove_ctrl_from_dict() - already removed')
++ self._config_connections_audit()
+
+- def remove_controller(self, controller):
+- '''@brief remove the specified controller object from the list of controllers'''
+- logging.debug('Service.remove_controller()')
+- if isinstance(controller, ctrl.Controller):
+- self._remove_ctrl_from_dict(controller)
++ # Create the D-Bus instance.
++ self._config_dbus(dbus, defs.STACD_DBUS_NAME, defs.STACD_DBUS_PATH)
+
+- controller.kill()
++ # Connect to STAF D-Bus interface
++ self._staf = None
++ self._staf_watcher = dasbus.client.observer.DBusObserver(self._sysbus, defs.STAFD_DBUS_NAME)
++ self._staf_watcher.service_available.connect(self._connect_to_staf)
++ self._staf_watcher.service_unavailable.connect(self._disconnect_from_staf)
++ self._staf_watcher.connect_once_available()
+
+- if self._cfg_soak_tmr:
+- self._cfg_soak_tmr.start()
++ # Suppress udev rule to auto-connect when AEN is received.
++ udev_rule_ctrl(conf.SvcConf().udev_rule_enabled)
+
+- def _cancel(self):
+- logging.debug('Service._cancel()')
+- if not self._cancellable.is_cancelled():
+- self._cancellable.cancel()
++ def _release_resources(self):
++ logging.debug('Stac._release_resources()')
++
++ if self._add_event_soak_tmr:
++ self._add_event_soak_tmr.kill()
++
++ udev_rule_ctrl(True)
++
++ if self._udev:
++ self._udev.unregister_for_action_events('add')
++
++ self._destroy_staf_comlink(self._staf_watcher)
++ if self._staf_watcher is not None:
++ self._staf_watcher.disconnect()
+
+- for controller in self._controllers.values():
+- controller.cancel()
++ super()._release_resources()
++
++ self._udev = None
++ self._staf = None
++ self._staf_watcher = None
++ self._add_event_soak_tmr = None
++
++ def _audit_connections(self, tids):
++ '''A host should only connect to I/O controllers that have been zoned
++ for that host or a manual "controller" entry exists in stcd.conf.
++ A host should disconnect from an I/O controller when that I/O controller
++ is removed from the zone or a manual "controller" entry is removed from
++ stacd.conf. stacd will audit connections if "sticky-connections=disabled".
++ stacd will delete any connection that is not supposed to exist.
++ '''
++ logging.debug('Stac._audit_connections() - tids = %s', tids)
++ num_controllers = len(self._controllers)
++ for tid in tids:
++ if tid not in self._controllers:
++ self._controllers[tid] = ctrl.Ioc(self, self._root, self._host, tid)
++
++ if num_controllers != len(self._controllers):
++ self._cfg_soak_tmr.start(self.CONF_STABILITY_SOAK_TIME_SEC)
++
++ def _on_add_event(self, udev_obj): # pylint: disable=unused-argument
++ '''@brief This function is called when a "add" event is received from
++ the kernel for an NVMe device. This is used to trigger an audit and make
++ sure that the connection to an I/O controller is allowed.
++
++ WARNING: There is a race condition with the "add" event from the kernel.
++ The kernel sends the "add" event a bit early and the sysfs attributes
++ associated with the nvme object are not always fully initialized.
++ To workaround this problem we use a soaking timer to give time for the
++ sysfs attributes to stabilize.
++ '''
++ self._add_event_soak_tmr.start()
++
++ def _on_add_event_soaked(self):
++ '''@brief After the add event has been soaking for ADD_EVENT_SOAK_TIME_SEC
++ seconds, we can audit the connections.
++ '''
++ if not conf.SvcConf().sticky_connections:
++ self._audit_connections(self._udev.get_nvme_ioc_tids())
++ return GLib.SOURCE_REMOVE
++
++ def _config_connections_audit(self):
++ '''This function checks the "sticky_connections" parameter to determine
++ whether audits should be performed. Audits are enabled when
++ "sticky_connections" is disabled.
++ '''
++ if not conf.SvcConf().sticky_connections:
++ if self._udev.get_registered_action_cback('add') is None:
++ self._udev.register_for_action_events('add', self._on_add_event)
++ self._audit_connections(self._udev.get_nvme_ioc_tids())
++ else:
++ self._udev.unregister_for_action_events('add')
+
+ def _keep_connections_on_exit(self):
+ '''@brief Determine whether connections should remain when the
+ process exits.
+-
+- NOTE) This is the base class method used to define the interface.
+- It must be overloaded by a child class.
+ '''
+- raise NotImplementedError()
++ return True
+
+- def _stop_hdlr(self):
+- systemd.daemon.notify('STOPPING=1')
++ def _reload_hdlr(self):
++ '''@brief Reload configuration file. This is triggered by the SIGHUP
++ signal, which can be sent with "systemctl reload stacd".
++ '''
++ systemd.daemon.notify('RELOADING=1')
++ service_cnf = conf.SvcConf()
++ service_cnf.reload()
++ self.tron = service_cnf.tron
++ self._config_connections_audit()
++ self._cfg_soak_tmr.start(self.CONF_STABILITY_SOAK_TIME_SEC)
++ udev_rule_ctrl(service_cnf.udev_rule_enabled)
++ systemd.daemon.notify('READY=1')
++ return GLib.SOURCE_CONTINUE
++
++ def _get_log_pages_from_stafd(self):
++ if self._staf:
++ try:
++ return json.loads(self._staf.get_all_log_pages(True))
++ except dasbus.error.DBusError:
++ pass
++
++ return list()
+
+- self._cancel() # Cancel pending operations
++ def _config_ctrls_finish(self, configured_ctrl_list):
++ configured_ctrl_list = [
++ ctrl_dict for ctrl_dict in configured_ctrl_list if 'traddr' in ctrl_dict and 'subsysnqn' in ctrl_dict
++ ]
++ logging.debug('Stac._config_ctrls_finish() - configured_ctrl_list = %s', configured_ctrl_list)
++
++ discovered_ctrl_list = list()
++ for staf_data in self._get_log_pages_from_stafd():
++ host_traddr = staf_data['discovery-controller']['host-traddr']
++ host_iface = staf_data['discovery-controller']['host-iface']
++ for dlpe in staf_data['log-pages']:
++ if dlpe.get('subtype') == 'nvme': # eliminate discovery controllers
++ discovered_ctrl_list.append(stas.cid_from_dlpe(dlpe, host_traddr, host_iface))
++
++ logging.debug('Stac._config_ctrls_finish() - discovered_ctrl_list = %s', discovered_ctrl_list)
++
++ controllers = stas.remove_blacklisted(configured_ctrl_list + discovered_ctrl_list)
++ controllers = stas.remove_invalid_addresses(controllers)
++
++ new_controller_ids = {trid.TID(controller) for controller in controllers}
++ cur_controller_ids = set(self._controllers.keys())
++ controllers_to_add = new_controller_ids - cur_controller_ids
++ controllers_to_del = cur_controller_ids - new_controller_ids
++
++ logging.debug('Stac._config_ctrls_finish() - controllers_to_add = %s', list(controllers_to_add))
++ logging.debug('Stac._config_ctrls_finish() - controllers_to_del = %s', list(controllers_to_del))
++
++ for tid in controllers_to_del:
++ controller = self._controllers.pop(tid, None)
++ if controller is not None:
++ controller.disconnect(self.remove_controller, conf.SvcConf().sticky_connections)
++
++ for tid in controllers_to_add:
++ self._controllers[tid] = ctrl.Ioc(self, self._root, self._host, tid)
++
++ def _connect_to_staf(self, _):
++ '''@brief Hook up DBus signal handlers for signals from stafd.'''
++ try:
++ self._staf = self._sysbus.get_proxy(defs.STAFD_DBUS_NAME, defs.STAFD_DBUS_PATH)
++ self._staf.log_pages_changed.connect(self._log_pages_changed)
++ self._cfg_soak_tmr.start()
+
+- self._dump_last_known_config(self._controllers)
++ # Make sure timer is set back to its normal value.
++ self._cfg_soak_tmr.set_timeout(self.CONF_STABILITY_SOAK_TIME_SEC)
++ logging.debug('Stac._connect_to_staf() - Connected to staf')
++ except dasbus.error.DBusError:
++ logging.error('Failed to connect to staf')
++
++ def _destroy_staf_comlink(self, watcher): # pylint: disable=unused-argument
++ if self._staf:
++ self._staf.log_pages_changed.disconnect(self._log_pages_changed)
++ dasbus.client.proxy.disconnect_proxy(self._staf)
++ self._staf = None
++
++ def _disconnect_from_staf(self, watcher):
++ self._destroy_staf_comlink(watcher)
++
++ # When we lose connectivity with stafd, the most logical explanation
++ # is that stafd restarted. In that case, it may take some time for stafd
++ # to re-populate its log pages cache. So let's give stafd plenty of time
++ # to update its log pages cache and send log pages change notifications
++ # before triggering a stacd re-config. We do this by momentarily
++ # increasing the config soak timer to a longer period.
++ if self._cfg_soak_tmr:
++ self._cfg_soak_tmr.set_timeout(self.CONF_STABILITY_LONG_SOAK_TIME_SEC)
++
++ logging.debug('Stac._disconnect_from_staf() - Disconnected from staf')
++
++ def _log_pages_changed( # pylint: disable=too-many-arguments
++ self, transport, traddr, trsvcid, host_traddr, host_iface, subsysnqn, device
++ ):
++ logging.debug(
++ 'Stac._log_pages_changed() - transport=%s, traddr=%s, trsvcid=%s, host_traddr=%s, host_iface=%s, subsysnqn=%s, device=%s',
++ transport,
++ traddr,
++ trsvcid,
++ host_traddr,
++ host_iface,
++ subsysnqn,
++ device,
++ )
++ if self._cfg_soak_tmr:
++ self._cfg_soak_tmr.start(self.CONF_STABILITY_SOAK_TIME_SEC)
+
+- if len(self._controllers) == 0:
+- GLib.idle_add(self._exit)
+- else:
+- # Tell all controller objects to disconnect
+- keep_connections = self._keep_connections_on_exit()
+- controllers = self._controllers.values()
+- for controller in controllers:
+- controller.disconnect(self._on_final_disconnect, keep_connections)
++ def _load_last_known_config(self):
++ return dict()
+
+- return GLib.SOURCE_REMOVE
++ def _dump_last_known_config(self, controllers):
++ pass
+
+- def _on_final_disconnect(self, controller):
+- '''Callback invoked after a controller is disconnected.
+- THIS IS USED DURING PROCESS SHUTDOWN TO WAIT FOR ALL CONTROLLERS TO BE
+- DISCONNECTED BEFORE EXITING THE PROGRAM. ONLY CALL ON SHUTDOWN!
+- '''
+- logging.debug('Service._on_final_disconnect()')
+- self._remove_ctrl_from_dict(controller)
+
+- controller.kill()
++# ******************************************************************************
++class Staf(Service):
++ '''STorage Appliance Finder (STAF)'''
+
+- # When all controllers have disconnected, we can finish the clean up
+- if len(self._controllers) == 0:
+- # Defer exit to the next main loop's idle period.
+- GLib.idle_add(self._exit)
++ def __init__(self, args, dbus):
++ super().__init__(args, self._reload_hdlr)
+
+- def _exit(self):
+- logging.debug('Service._exit()')
+- self._release_resources()
+- self._loop.quit()
++ self._avahi = avahi.Avahi(self._sysbus, self._avahi_change)
++ self._avahi.config_stypes(conf.SvcConf().get_stypes())
+
+- def _on_config_ctrls(self, *_user_data):
+- self._config_ctrls()
+- return GLib.SOURCE_REMOVE
++ # Create the D-Bus instance.
++ self._config_dbus(dbus, defs.STAFD_DBUS_NAME, defs.STAFD_DBUS_PATH)
+
+- def _config_ctrls(self):
+- '''@brief Start controllers configuration.'''
+- # The configuration file may contain controllers and/or blacklist
+- # elements with traddr specified as hostname instead of IP address.
+- # Because of this, we need to remove those blacklisted elements before
+- # running name resolution. And we will need to remove blacklisted
+- # elements after name resolution is complete (i.e. in the calback
+- # function _config_ctrls_finish)
+- logging.debug('Service._config_ctrls()')
+- configured_controllers = stas.remove_blacklisted(conf.SvcConf().get_controllers())
+- self._resolver.resolve_ctrl_async(self._cancellable, configured_controllers, self._config_ctrls_finish)
++ def info(self) -> dict:
++ '''@brief Get the status info for this object (used for debug)'''
++ info = super().info()
++ info['avahi'] = self._avahi.info()
++ return info
+
+- def _config_ctrls_finish(self, configured_ctrl_list):
+- '''@brief Finish controllers configuration after hostnames (if any)
+- have been resolved.
+-
+- Configuring controllers must be done asynchronously in 2 steps.
+- In the first step, host names get resolved to find their IP addresses.
+- Name resolution can take a while, especially when an external name
+- resolution server is used. Once that step completed, the callback
+- method _config_ctrls_finish() (i.e. this method), gets invoked to
+- complete the controller configuration.
+-
+- NOTE) This is the base class method used to define the interface.
+- It must be overloaded by a child class.
+- '''
+- raise NotImplementedError()
++ def _release_resources(self):
++ logging.debug('Staf._release_resources()')
++ super()._release_resources()
++ if self._avahi:
++ self._avahi.kill()
++ self._avahi = None
+
+ def _load_last_known_config(self):
+- raise NotImplementedError()
++ try:
++ with open(self._lkc_file, 'rb') as file:
++ config = pickle.load(file)
++ except (FileNotFoundError, AttributeError):
++ return dict()
++
++ logging.debug('Staf._load_last_known_config() - DC count = %s', len(config))
++ return {tid: ctrl.Dc(self, self._root, self._host, tid, log_pages) for tid, log_pages in config.items()}
+
+ def _dump_last_known_config(self, controllers):
+- raise NotImplementedError()
++ try:
++ with open(self._lkc_file, 'wb') as file:
++ config = {tid: dc.log_pages() for tid, dc in controllers.items()}
++ logging.debug('Staf._dump_last_known_config() - DC count = %s', len(config))
++ pickle.dump(config, file)
++ except FileNotFoundError as ex:
++ logging.error('Unable to save last known config: %s', ex)
++
++ def _keep_connections_on_exit(self):
++ '''@brief Determine whether connections should remain when the
++ process exits.
++ '''
++ return conf.SvcConf().persistent_connections
++
++ def _reload_hdlr(self):
++ '''@brief Reload configuration file. This is triggered by the SIGHUP
++ signal, which can be sent with "systemctl reload stafd".
++ '''
++ systemd.daemon.notify('RELOADING=1')
++ service_cnf = conf.SvcConf()
++ service_cnf.reload()
++ self.tron = service_cnf.tron
++ self._avahi.kick_start() # Make sure Avahi is running
++ self._avahi.config_stypes(service_cnf.get_stypes())
++ self._cfg_soak_tmr.start()
++ systemd.daemon.notify('READY=1')
++ return GLib.SOURCE_CONTINUE
++
++ def log_pages_changed(self, controller, device):
++ '''@brief Function invoked when a controller's cached log pages
++ have changed. This will emit a D-Bus signal to inform
++ other applications that the cached log pages have changed.
++ '''
++ self._dbus_iface.log_pages_changed.emit(
++ controller.tid.transport,
++ controller.tid.traddr,
++ controller.tid.trsvcid,
++ controller.tid.host_traddr,
++ controller.tid.host_iface,
++ controller.tid.subsysnqn,
++ device,
++ )
++
++ def referrals_changed(self):
++ '''@brief Function invoked when a controller's cached referrals
++ have changed.
++ '''
++ logging.debug('Staf.referrals_changed()')
++ self._cfg_soak_tmr.start()
++
++ def _referrals(self) -> list:
++ return [
++ stas.cid_from_dlpe(dlpe, controller.tid.host_traddr, controller.tid.host_iface)
++ for controller in self.get_controllers()
++ for dlpe in controller.referrals()
++ ]
++
++ def _config_ctrls_finish(self, configured_ctrl_list):
++ '''@brief Finish discovery controllers configuration after
++ hostnames (if any) have been resolved.
++ '''
++ configured_ctrl_list = [
++ ctrl_dict
++ for ctrl_dict in configured_ctrl_list
++ if 'traddr' in ctrl_dict and ctrl_dict.setdefault('subsysnqn', defs.WELL_KNOWN_DISC_NQN)
++ ]
++
++ discovered_ctrl_list = self._avahi.get_controllers()
++ referral_ctrl_list = self._referrals()
++ logging.debug('Staf._config_ctrls_finish() - configured_ctrl_list = %s', configured_ctrl_list)
++ logging.debug('Staf._config_ctrls_finish() - discovered_ctrl_list = %s', discovered_ctrl_list)
++ logging.debug('Staf._config_ctrls_finish() - referral_ctrl_list = %s', referral_ctrl_list)
++
++ controllers = stas.remove_blacklisted(configured_ctrl_list + discovered_ctrl_list + referral_ctrl_list)
++ controllers = stas.remove_invalid_addresses(controllers)
++
++ new_controller_ids = {trid.TID(controller) for controller in controllers}
++ cur_controller_ids = set(self._controllers.keys())
++ controllers_to_add = new_controller_ids - cur_controller_ids
++ controllers_to_del = cur_controller_ids - new_controller_ids
++
++ logging.debug('Staf._config_ctrls_finish() - controllers_to_add = %s', list(controllers_to_add))
++ logging.debug('Staf._config_ctrls_finish() - controllers_to_del = %s', list(controllers_to_del))
++
++ for tid in controllers_to_del:
++ controller = self._controllers.pop(tid, None)
++ if controller is not None:
++ controller.disconnect(self.remove_controller, conf.SvcConf().persistent_connections)
++
++ for tid in controllers_to_add:
++ self._controllers[tid] = ctrl.Dc(self, self._root, self._host, tid)
++
++ def _avahi_change(self):
++ self._cfg_soak_tmr.start()
+diff --git a/staslib/stas.py b/staslib/stas.py
+index 7bf91e0..496f063 100644
+--- a/staslib/stas.py
++++ b/staslib/stas.py
+@@ -6,14 +6,19 @@
+ #
+ # Authors: Martin Belanger
+ #
+-'''Library for staf/stac'''
++'''Library for staf/stac. You will find here common code for stafd and stacd
++including the Abstract Base Classes (ABC) for Controllers and Services'''
+
+ import os
+ import sys
+-import ipaddress
++import abc
++import signal
+ import logging
+-
+-from staslib import conf, defs, trid
++import ipaddress
++import systemd.daemon
++import dasbus.connection
++from gi.repository import Gio, GLib
++from staslib import conf, defs, gutil, log, trid
+
+
+ # ******************************************************************************
+@@ -108,3 +113,379 @@ def remove_invalid_addresses(controllers: list):
+ logging.warning('Invalid transport %s', transport)
+
+ return valid_controllers
++
++
++# ******************************************************************************
++class ControllerABC(abc.ABC): # pylint: disable=too-many-instance-attributes
++ '''@brief Base class used to manage the connection to a controller.'''
++
++ CONNECT_RETRY_PERIOD_SEC = 60
++ FAST_CONNECT_RETRY_PERIOD_SEC = 3
++
++ def __init__(self, root, host, tid: trid.TID, discovery_ctrl=False):
++ self._root = root
++ self._host = host
++ self._tid = tid
++ self._cancellable = Gio.Cancellable()
++ self._connect_attempts = 0
++ self._retry_connect_tmr = gutil.GTimer(self.CONNECT_RETRY_PERIOD_SEC, self._on_try_to_connect)
++ self._discovery_ctrl = discovery_ctrl
++ self._try_to_connect_deferred = gutil.Deferred(self._try_to_connect)
++ self._try_to_connect_deferred.schedule()
++
++ def _release_resources(self):
++ # Remove pending deferred from main loop
++ if self._try_to_connect_deferred:
++ self._try_to_connect_deferred.cancel()
++
++ if self._retry_connect_tmr is not None:
++ self._retry_connect_tmr.kill()
++
++ if self._cancellable and not self._cancellable.is_cancelled():
++ self._cancellable.cancel()
++
++ self._tid = None
++ self._cancellable = None
++ self._retry_connect_tmr = None
++ self._try_to_connect_deferred = None
++
++ @property
++ def id(self) -> str:
++ '''@brief Return the Transport ID as a printable string'''
++ return str(self.tid)
++
++ @property
++ def tid(self):
++ '''@brief Return the Transport ID object'''
++ return self._tid
++
++ def controller_id_dict(self) -> dict:
++ '''@brief return the controller ID as a dict.'''
++ return self.tid.as_dict()
++
++ def details(self) -> dict:
++ '''@brief return detailed debug info about this controller'''
++ details = self.controller_id_dict()
++ details['connect attempts'] = str(self._connect_attempts)
++ details['retry connect timer'] = str(self._retry_connect_tmr)
++ return details
++
++ def info(self) -> dict:
++ '''@brief Get the controller info for this object'''
++ return self.details()
++
++ def cancel(self):
++ '''@brief Used to cancel pending operations.'''
++ if self._cancellable and not self._cancellable.is_cancelled():
++ logging.debug('ControllerABC.cancel() - %s', self.id)
++ self._cancellable.cancel()
++
++ def kill(self):
++ '''@brief Used to release all resources associated with this object.'''
++ logging.debug('ControllerABC.kill() - %s', self.id)
++ self._release_resources()
++
++ def _alive(self):
++ '''There may be race condition where a queued event gets processed
++ after the object is no longer configured (i.e. alive). This method
++ can be used by callback functions to make sure the object is still
++ alive before processing further.
++ '''
++ return self._cancellable and not self._cancellable.is_cancelled()
++
++ def _on_try_to_connect(self):
++ self._try_to_connect_deferred.schedule()
++ return GLib.SOURCE_REMOVE
++
++ def _try_to_connect(self):
++ # This is a deferred function call. Make sure
++ # the source of the deferred is still good.
++ source = GLib.main_current_source()
++ if source and source.is_destroyed():
++ return
++
++ self._connect_attempts += 1
++
++ self._do_connect()
++
++ @abc.abstractmethod
++ def _do_connect(self):
++ raise NotImplementedError()
++
++ @abc.abstractmethod
++ def _on_aen(self, aen: int):
++ raise NotImplementedError()
++
++ @abc.abstractmethod
++ def _on_nvme_event(self, nvme_event):
++ raise NotImplementedError()
++
++ @abc.abstractmethod
++ def _on_ctrl_removed(self, obj):
++ raise NotImplementedError()
++
++ @abc.abstractmethod
++ def _find_existing_connection(self):
++ raise NotImplementedError()
++
++ @abc.abstractmethod
++ def disconnect(self, disconnected_cb, keep_connection):
++ '''@brief Issue an asynchronous disconnect command to a Controller.
++ Once the async command has completed, the callback 'disconnected_cb'
++ will be invoked. If a controller is already disconnected, then the
++ callback will be added to the main loop's next idle slot to be executed
++ ASAP.
++ '''
++ raise NotImplementedError()
++
++
++# ******************************************************************************
++class ServiceABC(abc.ABC): # pylint: disable=too-many-instance-attributes
++ '''@brief Base class used to manage a STorage Appliance Service'''
++
++ CONF_STABILITY_SOAK_TIME_SEC = 1.5
++
++ def __init__(self, args, reload_hdlr):
++
++ service_conf = conf.SvcConf()
++ service_conf.set_conf_file(args.conf_file) # reload configuration
++ self._tron = args.tron or service_conf.tron
++ log.set_level_from_tron(self._tron)
++
++ self._lkc_file = os.path.join(os.environ.get('RUNTIME_DIRECTORY', os.path.join('/run', defs.PROG_NAME)), 'last-known-config.pickle')
++ self._loop = GLib.MainLoop()
++ self._cancellable = Gio.Cancellable()
++ self._resolver = gutil.NameResolver()
++ self._controllers = self._load_last_known_config()
++ self._dbus_iface = None
++ self._cfg_soak_tmr = gutil.GTimer(self.CONF_STABILITY_SOAK_TIME_SEC, self._on_config_ctrls)
++ self._sysbus = dasbus.connection.SystemMessageBus()
++
++ GLib.unix_signal_add(GLib.PRIORITY_HIGH, signal.SIGINT, self._stop_hdlr) # CTRL-C
++ GLib.unix_signal_add(GLib.PRIORITY_HIGH, signal.SIGTERM, self._stop_hdlr) # systemctl stop stafd
++ GLib.unix_signal_add(GLib.PRIORITY_HIGH, signal.SIGHUP, reload_hdlr) # systemctl reload stafd
++
++ nvme_options = conf.NvmeOptions()
++ if not nvme_options.host_iface_supp or not nvme_options.discovery_supp:
++ logging.warning(
++ 'Kernel does not appear to support all the options needed to run this program. Consider updating to a later kernel version.'
++ )
++
++ # We don't want to apply configuration changes to nvme-cli right away.
++ # Often, multiple changes will occur in a short amount of time (sub-second).
++ # We want to wait until there are no more changes before applying them
++ # to the system. The following timer acts as a "soak period". Changes
++ # will be applied by calling self._on_config_ctrls() at the end of
++ # the soak period.
++ self._cfg_soak_tmr.start()
++
++ def _release_resources(self):
++ logging.debug('ServiceABC._release_resources()')
++
++ if self._cancellable and not self._cancellable.is_cancelled():
++ self._cancellable.cancel()
++
++ if self._cfg_soak_tmr is not None:
++ self._cfg_soak_tmr.kill()
++
++ self._controllers.clear()
++
++ if self._sysbus:
++ self._sysbus.disconnect()
++
++ self._cfg_soak_tmr = None
++ self._cancellable = None
++ self._resolver = None
++ self._lkc_file = None
++ self._sysbus = None
++
++ def _config_dbus(self, iface_obj, bus_name: str, obj_name: str):
++ self._dbus_iface = iface_obj
++ self._sysbus.publish_object(obj_name, iface_obj)
++ self._sysbus.register_service(bus_name)
++
++ @property
++ def tron(self):
++ '''@brief Get Trace ON property'''
++ return self._tron
++
++ @tron.setter
++ def tron(self, value):
++ '''@brief Set Trace ON property'''
++ self._tron = value
++ log.set_level_from_tron(self._tron)
++
++ def run(self):
++ '''@brief Start the main loop execution'''
++ try:
++ self._loop.run()
++ except Exception as ex: # pylint: disable=broad-except
++ logging.critical('exception: %s', ex)
++
++ self._loop = None
++
++ def info(self) -> dict:
++ '''@brief Get the status info for this object (used for debug)'''
++ nvme_options = conf.NvmeOptions()
++ return {
++ 'last known config file': self._lkc_file,
++ 'config soak timer': str(self._cfg_soak_tmr),
++ 'kernel support': {
++ 'TP8013': nvme_options.discovery_supp,
++ 'host_iface': nvme_options.host_iface_supp,
++ },
++ 'system config': conf.SysConf().as_dict(),
++ }
++
++ def get_controllers(self) -> dict:
++ '''@brief return the list of controller objects'''
++ return self._controllers.values()
++
++ def get_controller(
++ self, transport: str, traddr: str, trsvcid: str, host_traddr: str, host_iface: str, subsysnqn: str
++ ): # pylint: disable=too-many-arguments
++ '''@brief get the specified controller object from the list of controllers'''
++ cid = {
++ 'transport': transport,
++ 'traddr': traddr,
++ 'trsvcid': trsvcid,
++ 'host-traddr': host_traddr,
++ 'host-iface': host_iface,
++ 'subsysnqn': subsysnqn,
++ }
++ return self._controllers.get(trid.TID(cid))
++
++ def _remove_ctrl_from_dict(self, controller):
++ tid_to_pop = controller.tid
++ if not tid_to_pop:
++ # Being paranoid. This should not happen, but let's say the
++ # controller object has been purged, but it is somehow still
++ # listed in self._controllers.
++ for tid, _controller in self._controllers.items():
++ if _controller is controller:
++ tid_to_pop = tid
++ break
++
++ if tid_to_pop:
++ logging.debug('ServiceABC._remove_ctrl_from_dict()- %s | %s', tid_to_pop, controller.device)
++ self._controllers.pop(tid_to_pop, None)
++ else:
++ logging.debug('ServiceABC._remove_ctrl_from_dict()- already removed')
++
++ def remove_controller(self, controller, success): # pylint: disable=unused-argument
++ '''@brief remove the specified controller object from the list of controllers
++ @param controller: the controller object
++ @param success: whether the disconnect was successful'''
++ logging.debug('ServiceABC.remove_controller()')
++ if isinstance(controller, ControllerABC):
++ self._remove_ctrl_from_dict(controller)
++
++ controller.kill()
++
++ if self._cfg_soak_tmr:
++ self._cfg_soak_tmr.start()
++
++ def _cancel(self):
++ logging.debug('ServiceABC._cancel()')
++ if not self._cancellable.is_cancelled():
++ self._cancellable.cancel()
++
++ for controller in self._controllers.values():
++ controller.cancel()
++
++ def _stop_hdlr(self):
++ logging.debug('ServiceABC._stop_hdlr()')
++ systemd.daemon.notify('STOPPING=1')
++
++ self._cancel() # Cancel pending operations
++
++ self._dump_last_known_config(self._controllers)
++
++ if len(self._controllers) == 0:
++ GLib.idle_add(self._exit)
++ else:
++ # Tell all controller objects to disconnect
++ keep_connections = self._keep_connections_on_exit()
++ controllers = self._controllers.values()
++ logging.debug(
++ 'ServiceABC._stop_hdlr() - Controller count = %s, keep_connections = %s',
++ len(controllers), keep_connections
++ )
++ for controller in controllers:
++ controller.disconnect(self._on_final_disconnect, keep_connections)
++
++ return GLib.SOURCE_REMOVE
++
++ def _on_final_disconnect(self, controller, success):
++ '''Callback invoked after a controller is disconnected.
++ THIS IS USED DURING PROCESS SHUTDOWN TO WAIT FOR ALL CONTROLLERS TO BE
++ DISCONNECTED BEFORE EXITING THE PROGRAM. ONLY CALL ON SHUTDOWN!
++ @param controller: the controller object
++ @param success: whether the disconnect operation was successful
++ '''
++ logging.debug('ServiceABC._on_final_disconnect() - %s | %s disconnect %s',
++ controller.id, controller.device, 'succeeded' if success else 'failed')
++ self._remove_ctrl_from_dict(controller)
++
++ controller.kill()
++
++ # When all controllers have disconnected, we can finish the clean up
++ if len(self._controllers) == 0:
++ # Defer exit to the next main loop's idle period.
++ GLib.idle_add(self._exit)
++
++ def _exit(self):
++ logging.debug('ServiceABC._exit()')
++ self._release_resources()
++ self._loop.quit()
++
++ def _on_config_ctrls(self, *_user_data):
++ self._config_ctrls()
++ return GLib.SOURCE_REMOVE
++
++ def _config_ctrls(self):
++ '''@brief Start controllers configuration.'''
++ # The configuration file may contain controllers and/or blacklist
++ # elements with traddr specified as hostname instead of IP address.
++ # Because of this, we need to remove those blacklisted elements before
++ # running name resolution. And we will need to remove blacklisted
++ # elements after name resolution is complete (i.e. in the calback
++ # function _config_ctrls_finish)
++ logging.debug('ServiceABC._config_ctrls()')
++ configured_controllers = remove_blacklisted(conf.SvcConf().get_controllers())
++ self._resolver.resolve_ctrl_async(self._cancellable, configured_controllers, self._config_ctrls_finish)
++
++ @abc.abstractmethod
++ def _keep_connections_on_exit(self):
++ '''@brief Determine whether connections should remain when the
++ process exits.
++
++ NOTE) This is the base class method used to define the interface.
++ It must be overloaded by a child class.
++ '''
++ raise NotImplementedError()
++
++ @abc.abstractmethod
++ def _config_ctrls_finish(self, configured_ctrl_list):
++ '''@brief Finish controllers configuration after hostnames (if any)
++ have been resolved.
++
++ Configuring controllers must be done asynchronously in 2 steps.
++ In the first step, host names get resolved to find their IP addresses.
++ Name resolution can take a while, especially when an external name
++ resolution server is used. Once that step completed, the callback
++ method _config_ctrls_finish() (i.e. this method), gets invoked to
++ complete the controller configuration.
++
++ NOTE) This is the base class method used to define the interface.
++ It must be overloaded by a child class.
++ '''
++ raise NotImplementedError()
++
++ @abc.abstractmethod
++ def _load_last_known_config(self):
++ raise NotImplementedError()
++
++ @abc.abstractmethod
++ def _dump_last_known_config(self, controllers):
++ raise NotImplementedError()
+diff --git a/staslib/trid.py b/staslib/trid.py
+index def6ab2..38619e7 100644
+--- a/staslib/trid.py
++++ b/staslib/trid.py
+@@ -12,8 +12,7 @@ throughout nvme-stas to uniquely identify a Controller'''
+ import hashlib
+ from staslib import conf
+
+-class TID:
+- # pylint: disable=too-many-instance-attributes
++class TID: # pylint: disable=too-many-instance-attributes
+ '''Transport Identifier'''
+ RDMA_IP_PORT = '4420'
+ DISC_IP_PORT = '8009'
+diff --git a/staslib/udev.py b/staslib/udev.py
+index 29370b8..37b63cc 100644
+--- a/staslib/udev.py
++++ b/staslib/udev.py
+@@ -16,7 +16,7 @@ from staslib import defs, trid
+ try:
+ from pyudev.glib import MonitorObserver
+ except (ModuleNotFoundError, AttributeError):
+- from staslib.glibudev import MonitorObserver # pylint: disable=relative-beyond-top-level,ungrouped-imports
++ from staslib.glibudev import MonitorObserver # pylint: disable=ungrouped-imports
+
+ # ******************************************************************************
+ class Udev:
+@@ -99,7 +99,7 @@ class Udev:
+ def get_attributes(self, sys_name: str, attr_ids) -> dict:
+ '''@brief Get all the attributes associated with device @sys_name'''
+ attrs = {attr_id: '' for attr_id in attr_ids}
+- if sys_name:
++ if sys_name and sys_name != 'nvme?':
+ udev = self.get_nvme_device(sys_name)
+ if udev is not None:
+ for attr_id in attr_ids:
+diff --git a/test/test-config.py b/test/test-config.py
+index dad0ebd..db58883 100755
+--- a/test/test-config.py
++++ b/test/test-config.py
+@@ -40,7 +40,7 @@ class StasProcessConfUnitTest(unittest.TestCase):
+ self.assertFalse(service_conf.data_digest)
+ self.assertTrue(service_conf.persistent_connections)
+ self.assertTrue(service_conf.udev_rule_enabled)
+- self.assertFalse(service_conf.sticky_connections)
++ self.assertTrue(service_conf.sticky_connections)
+ self.assertFalse(service_conf.ignore_iface)
+ self.assertIn(6, service_conf.ip_family)
+ self.assertNotIn(4, service_conf.ip_family)
+diff --git a/test/test-controller.py b/test/test-controller.py
+index f23125e..f55781a 100755
+--- a/test/test-controller.py
++++ b/test/test-controller.py
+@@ -8,24 +8,43 @@ from pyfakefs.fake_filesystem_unittest import TestCase
+
+ LOOP = GLib.MainLoop()
+
++
++class TestController(ctrl.Controller):
++ def _find_existing_connection(self):
++ pass
++
++ def _on_aen(self, aen: int):
++ pass
++
++ def _on_nvme_event(self, nvme_event):
++ pass
++
++
+ class Test(TestCase):
+ '''Unit tests for class Controller'''
+
+ def setUp(self):
+ self.setUpPyfakefs()
+
+- self.fs.create_file('/etc/nvme/hostnqn', contents='nqn.2014-08.org.nvmexpress:uuid:01234567-0123-0123-0123-0123456789ab\n')
+- self.fs.create_file('/etc/nvme/hostid', contents='01234567-89ab-cdef-0123-456789abcdef\n')
+- self.fs.create_file('/dev/nvme-fabrics', contents='instance=-1,cntlid=-1,transport=%s,traddr=%s,trsvcid=%s,nqn=%s,queue_size=%d,nr_io_queues=%d,reconnect_delay=%d,ctrl_loss_tmo=%d,keep_alive_tmo=%d,hostnqn=%s,host_traddr=%s,host_iface=%s,hostid=%s,duplicate_connect,disable_sqflow,hdr_digest,data_digest,nr_write_queues=%d,nr_poll_queues=%d,tos=%d,fast_io_fail_tmo=%d,discovery,dhchap_secret=%s,dhchap_ctrl_secret=%s\n')
++ self.fs.create_file(
++ '/etc/nvme/hostnqn', contents='nqn.2014-08.org.nvmexpress:uuid:01234567-0123-0123-0123-0123456789ab\n'
++ )
++ self.fs.create_file('/etc/nvme/hostid', contents='01234567-89ab-cdef-0123-456789abcdef\n')
++ self.fs.create_file(
++ '/dev/nvme-fabrics',
++ contents='instance=-1,cntlid=-1,transport=%s,traddr=%s,trsvcid=%s,nqn=%s,queue_size=%d,nr_io_queues=%d,reconnect_delay=%d,ctrl_loss_tmo=%d,keep_alive_tmo=%d,hostnqn=%s,host_traddr=%s,host_iface=%s,hostid=%s,duplicate_connect,disable_sqflow,hdr_digest,data_digest,nr_write_queues=%d,nr_poll_queues=%d,tos=%d,fast_io_fail_tmo=%d,discovery,dhchap_secret=%s,dhchap_ctrl_secret=%s\n',
++ )
+
+- self.NVME_TID = trid.TID({
+- 'transport': 'tcp',
+- 'traddr': '10.10.10.10',
+- 'subsysnqn': 'nqn.1988-11.com.dell:SFSS:2:20220208134025e8',
+- 'trsvcid': '8009',
+- 'host-traddr': '1.2.3.4',
+- 'host-iface': 'wlp0s20f3',
+- })
++ self.NVME_TID = trid.TID(
++ {
++ 'transport': 'tcp',
++ 'traddr': '10.10.10.10',
++ 'subsysnqn': 'nqn.1988-11.com.dell:SFSS:2:20220208134025e8',
++ 'trsvcid': '8009',
++ 'host-traddr': '1.2.3.4',
++ 'host-iface': 'wlp0s20f3',
++ }
++ )
+
+ sysconf = conf.SysConf()
+ self.root = nvme.root()
+@@ -34,32 +53,92 @@ class Test(TestCase):
+ def tearDown(self):
+ LOOP.quit()
+
++ def test_cannot_instantiate_concrete_classes_if_abstract_method_are_not_implemented(self):
++ # Make sure we can't instantiate the ABC directly (Abstract Base Class).
++ class Controller(ctrl.Controller):
++ pass
++
++ self.assertRaises(TypeError, lambda: ctrl.Controller(root=self.root, host=self.host, tid=self.NVME_TID))
++
+ def test_get_device(self):
+- controller = ctrl.Controller(root=self.root, host=self.host, tid=self.NVME_TID)
++ controller = TestController(root=self.root, host=self.host, tid=self.NVME_TID)
+ self.assertEqual(controller._connect_attempts, 0)
+- self.assertRaises(NotImplementedError, controller._try_to_connect)
++ controller._try_to_connect()
+ self.assertEqual(controller._connect_attempts, 1)
+- self.assertRaises(NotImplementedError, controller._find_existing_connection)
+- self.assertEqual(controller.id, "(tcp, 10.10.10.10, 8009, nqn.1988-11.com.dell:SFSS:2:20220208134025e8, wlp0s20f3, 1.2.3.4)")
++ self.assertEqual(
++ controller.id, "(tcp, 10.10.10.10, 8009, nqn.1988-11.com.dell:SFSS:2:20220208134025e8, wlp0s20f3, 1.2.3.4)"
++ )
+ # raise Exception(controller._connect_op)
+- self.assertEqual(str(controller.tid), "(tcp, 10.10.10.10, 8009, nqn.1988-11.com.dell:SFSS:2:20220208134025e8, wlp0s20f3, 1.2.3.4)")
+- self.assertEqual(controller.device, '')
+- self.assertEqual(str(controller.controller_id_dict()), "{'transport': 'tcp', 'traddr': '10.10.10.10', 'trsvcid': '8009', 'host-traddr': '1.2.3.4', 'host-iface': 'wlp0s20f3', 'subsysnqn': 'nqn.1988-11.com.dell:SFSS:2:20220208134025e8', 'device': ''}")
+- # self.assertEqual(controller.details(), "{'transport': 'tcp', 'traddr': '10.10.10.[265 chars]ff]'}")
+- self.assertEqual(controller.info(), {'transport': 'tcp', 'traddr': '10.10.10.10', 'trsvcid': '8009', 'host-traddr': '1.2.3.4', 'host-iface': 'wlp0s20f3', 'subsysnqn': 'nqn.1988-11.com.dell:SFSS:2:20220208134025e8', 'device': '', 'hostid': '', 'hostnqn': '', 'model': '', 'serial': '', 'connect attempts': '1', 'retry connect timer': '60.0s [off]'})
++ self.assertEqual(
++ str(controller.tid),
++ "(tcp, 10.10.10.10, 8009, nqn.1988-11.com.dell:SFSS:2:20220208134025e8, wlp0s20f3, 1.2.3.4)",
++ )
++ self.assertEqual(controller.device, 'nvme?')
++ self.assertEqual(
++ str(controller.controller_id_dict()),
++ "{'transport': 'tcp', 'traddr': '10.10.10.10', 'trsvcid': '8009', 'host-traddr': '1.2.3.4', 'host-iface': 'wlp0s20f3', 'subsysnqn': 'nqn.1988-11.com.dell:SFSS:2:20220208134025e8', 'device': 'nvme?'}",
++ )
++ self.assertEqual(
++ controller.details(),
++ {
++ 'dctype': '',
++ 'cntrltype': '',
++ 'transport': 'tcp',
++ 'traddr': '10.10.10.10',
++ 'trsvcid': '8009',
++ 'host-traddr': '1.2.3.4',
++ 'host-iface': 'wlp0s20f3',
++ 'subsysnqn': 'nqn.1988-11.com.dell:SFSS:2:20220208134025e8',
++ 'device': 'nvme?',
++ 'connect attempts': '1',
++ 'retry connect timer': '60.0s [off]',
++ 'hostid': '',
++ 'hostnqn': '',
++ 'model': '',
++ 'serial': '',
++ },
++ )
++ self.assertEqual(
++ controller.info(),
++ {
++ 'dctype': '',
++ 'cntrltype': '',
++ 'transport': 'tcp',
++ 'traddr': '10.10.10.10',
++ 'trsvcid': '8009',
++ 'host-traddr': '1.2.3.4',
++ 'host-iface': 'wlp0s20f3',
++ 'subsysnqn': 'nqn.1988-11.com.dell:SFSS:2:20220208134025e8',
++ 'device': 'nvme?',
++ 'connect attempts': '1',
++ 'retry connect timer': '60.0s [off]',
++ 'hostid': '',
++ 'hostnqn': '',
++ 'model': '',
++ 'serial': '',
++ 'connect operation': {'fail count': 0},
++ },
++ )
++
+ # print(controller._connect_op)
+ self.assertEqual(controller.cancel(), None)
+ self.assertEqual(controller.kill(), None)
+ # self.assertEqual(controller.disconnect(), 0)
+
+ def test_connect(self):
+- controller = ctrl.Controller(root=self.root, host=self.host, tid=self.NVME_TID)
++ controller = TestController(root=self.root, host=self.host, tid=self.NVME_TID)
+ self.assertEqual(controller._connect_attempts, 0)
+- controller._find_existing_connection = lambda : None
++ controller._find_existing_connection = lambda: None
+ with self.assertLogs(logger=logging.getLogger(), level='DEBUG') as captured:
+ controller._try_to_connect()
+ self.assertEqual(len(captured.records), 1)
+- self.assertTrue(captured.records[0].getMessage().startswith("Controller._try_to_connect() - (tcp, 10.10.10.10, 8009, nqn.1988-11.com.dell:SFSS:2:20220208134025e8, wlp0s20f3, 1.2.3.4) Connecting to nvme control with cfg={'hdr_digest': False, 'data_digest': False"))
++ self.assertTrue(
++ captured.records[0]
++ .getMessage()
++ .startswith(
++ "Controller._try_to_connect() - (tcp, 10.10.10.10, 8009, nqn.1988-11.com.dell:SFSS:2:20220208134025e8, wlp0s20f3, 1.2.3.4) Connecting to nvme control with cfg={'hdr_digest': False, 'data_digest': False"
++ )
++ )
+ self.assertEqual(controller._connect_attempts, 1)
+
+
+diff --git a/test/test-service.py b/test/test-service.py
+index 19f9b0c..4ce37be 100755
+--- a/test/test-service.py
++++ b/test/test-service.py
+@@ -4,6 +4,7 @@ import unittest
+ from staslib import service
+ from pyfakefs.fake_filesystem_unittest import TestCase
+
++
+ class Args:
+ def __init__(self):
+ self.tron = True
+@@ -11,6 +12,20 @@ class Args:
+ self.conf_file = '/dev/null'
+
+
++class TestService(service.Service):
++ def _config_ctrls_finish(self, configured_ctrl_list):
++ pass
++
++ def _dump_last_known_config(self, controllers):
++ pass
++
++ def _keep_connections_on_exit(self):
++ pass
++
++ def _load_last_known_config(self):
++ return dict()
++
++
+ class Test(TestCase):
+ '''Unit tests for class Service'''
+
+@@ -18,22 +33,39 @@ class Test(TestCase):
+ self.setUpPyfakefs()
+
+ os.environ['RUNTIME_DIRECTORY'] = "/run"
+- self.fs.create_file('/etc/nvme/hostnqn', contents='nqn.2014-08.org.nvmexpress:uuid:01234567-0123-0123-0123-0123456789ab\n')
+- self.fs.create_file('/etc/nvme/hostid', contents='01234567-89ab-cdef-0123-456789abcdef\n')
+- self.fs.create_file('/dev/nvme-fabrics', contents='instance=-1,cntlid=-1,transport=%s,traddr=%s,trsvcid=%s,nqn=%s,queue_size=%d,nr_io_queues=%d,reconnect_delay=%d,ctrl_loss_tmo=%d,keep_alive_tmo=%d,hostnqn=%s,host_traddr=%s,host_iface=%s,hostid=%s,duplicate_connect,disable_sqflow,hdr_digest,data_digest,nr_write_queues=%d,nr_poll_queues=%d,tos=%d,fast_io_fail_tmo=%d,discovery,dhchap_secret=%s,dhchap_ctrl_secret=%s\n')
++ self.fs.create_file(
++ '/etc/nvme/hostnqn', contents='nqn.2014-08.org.nvmexpress:uuid:01234567-0123-0123-0123-0123456789ab\n'
++ )
++ self.fs.create_file('/etc/nvme/hostid', contents='01234567-89ab-cdef-0123-456789abcdef\n')
++ self.fs.create_file(
++ '/dev/nvme-fabrics',
++ contents='instance=-1,cntlid=-1,transport=%s,traddr=%s,trsvcid=%s,nqn=%s,queue_size=%d,nr_io_queues=%d,reconnect_delay=%d,ctrl_loss_tmo=%d,keep_alive_tmo=%d,hostnqn=%s,host_traddr=%s,host_iface=%s,hostid=%s,duplicate_connect,disable_sqflow,hdr_digest,data_digest,nr_write_queues=%d,nr_poll_queues=%d,tos=%d,fast_io_fail_tmo=%d,discovery,dhchap_secret=%s,dhchap_ctrl_secret=%s\n',
++ )
++
++ def test_cannot_instantiate_concrete_classes_if_abstract_method_are_not_implemented(self):
++ # Make sure we can't instantiate the ABC directly (Abstract Base Class).
++ class Service(service.Service):
++ pass
++
++ self.assertRaises(TypeError, lambda: Service(Args(), reload_hdlr=lambda x: x))
+
+ def test_get_controller(self):
+- # FIXME: this is hack, fix it later
+- service.Service._load_last_known_config = lambda x : dict()
+- # start the test
+-
+- srv = service.Service(Args(), reload_hdlr=lambda x : x)
+- self.assertRaises(NotImplementedError, srv._keep_connections_on_exit)
+- self.assertRaises(NotImplementedError, srv._dump_last_known_config, [])
+- self.assertRaises(NotImplementedError, srv._on_config_ctrls)
+- #self.assertEqual(srv.get_controllers(), dict())
+- self.assertEqual(srv.get_controller(transport='tcp', traddr='10.10.10.10', trsvcid='8009', host_traddr='1.2.3.4', host_iface='wlp0s20f3', subsysnqn='nqn.1988-11.com.dell:SFSS:2:20220208134025e8'), None)
+- self.assertEqual(srv.remove_controller(controller=None), None)
++ srv = TestService(Args(), reload_hdlr=lambda x: x)
++
++ self.assertEqual(list(srv.get_controllers()), list())
++ self.assertEqual(
++ srv.get_controller(
++ transport='tcp',
++ traddr='10.10.10.10',
++ trsvcid='8009',
++ host_traddr='1.2.3.4',
++ host_iface='wlp0s20f3',
++ subsysnqn='nqn.1988-11.com.dell:SFSS:2:20220208134025e8',
++ ),
++ None,
++ )
++ self.assertEqual(srv.remove_controller(controller=None, success=True), None)
++
+
+ if __name__ == '__main__':
+ unittest.main()
diff --git a/SPECS/nvme-stas.spec b/SPECS/nvme-stas.spec
new file mode 100644
index 0000000..f8175ea
--- /dev/null
+++ b/SPECS/nvme-stas.spec
@@ -0,0 +1,121 @@
+# RHEL 8 compatibility
+%{!?version_no_tilde: %define version_no_tilde %{shrink:%(echo '%{version}' | tr '~' '-')}}
+
+Name: nvme-stas
+Summary: NVMe STorage Appliance Services
+Version: 1.1.6
+Release: 3%{?dist}
+License: ASL 2.0
+URL: https://github.com/linux-nvme/nvme-stas
+Source0: %{url}/archive/v%{version_no_tilde}/%{name}-%{version_no_tilde}.tar.gz
+Patch0: 0001-sync-with-1.1.6.patch
+
+BuildArch: noarch
+
+BuildRequires: meson >= 0.57.0
+BuildRequires: glib2-devel
+BuildRequires: libnvme-devel
+BuildRequires: libxslt
+BuildRequires: docbook-style-xsl
+BuildRequires: systemd-devel
+
+BuildRequires: python3-devel
+#BuildRequires: python3-pyflakes
+#BuildRequires: python3-pylint
+#BuildRequires: pylint
+
+BuildRequires: python3-libnvme
+BuildRequires: python3-dasbus
+BuildRequires: python3-pyudev
+BuildRequires: python3-systemd
+BuildRequires: python3-gobject-devel
+BuildRequires: python3-lxml
+
+Requires: avahi
+Requires: python3-libnvme
+Requires: python3-dasbus
+Requires: python3-pyudev
+Requires: python3-systemd
+Requires: python3-gobject
+Requires: python3-lxml
+
+%description
+nvme-stas is a Central Discovery Controller (CDC) client for Linux. It
+handles Asynchronous Event Notifications (AEN), Automated NVMe subsystem
+connection controls, Error handling and reporting, and Automatic (zeroconf)
+and Manual configuration. nvme-stas is composed of two daemons:
+stafd (STorage Appliance Finder) and stacd (STorage Appliance Connector).
+
+%prep
+%autosetup -p1 -n %{name}-%{version_no_tilde}
+
+%build
+%meson -Dman=true -Dhtml=true
+%meson_build
+
+%install
+%meson_install
+mv %{buildroot}/%{_sysconfdir}/stas/sys.conf.doc %{buildroot}/%{_sysconfdir}/stas/sys.conf
+
+%post
+%systemd_post stacd.service
+%systemd_post stafd.service
+
+%preun
+%systemd_preun stacd.service
+%systemd_preun stafd.service
+
+%postun
+%systemd_postun_with_restart stacd.service
+%systemd_postun_with_restart stafd.service
+
+%files
+%license LICENSE
+%doc README.md
+%dir %{_sysconfdir}/stas
+%config(noreplace) %{_sysconfdir}/stas/stacd.conf
+%config(noreplace) %{_sysconfdir}/stas/stafd.conf
+%config(noreplace) %{_sysconfdir}/stas/sys.conf
+%{_datadir}/dbus-1/system.d/org.nvmexpress.*.conf
+%{_bindir}/stacctl
+%{_bindir}/stafctl
+%{_bindir}/stasadm
+%{_sbindir}/stacd
+%{_sbindir}/stafd
+%{_unitdir}/stacd.service
+%{_unitdir}/stafd.service
+%{_unitdir}/stas-config.target
+%{_unitdir}/stas-config@.service
+%dir %{python3_sitelib}/staslib
+%{python3_sitelib}/staslib/*
+%doc %{_pkgdocdir}/html
+%{_mandir}/man1/sta*.1*
+%{_mandir}/man5/*.5*
+%{_mandir}/man7/nvme*.7*
+%{_mandir}/man8/sta*.8*
+
+
+%changelog
+* Thu Aug 04 2022 Maurizio Lombardi - 1.1.6-3
+- Sync with the official 1.1.6 version
+
+* Wed Jul 27 2022 Maurizio Lombardi - 1.1.6-2
+- Rebuild for CentOS Stream
+
+* Wed Jul 20 2022 Maurizio Lombardi - 1.1.6-1
+- Update to version 1.1.6
+
+* Mon Jun 27 2022 Maurizio Lombardi - 1.1.4-1
+- Update to version 1.1.4
+
+* Wed Apr 20 2022 Tomas Bzatek - 1.0-1
+- Upstream v1.0 official release
+
+* Tue Apr 05 2022 Tomas Bzatek - 1.0~rc7-1
+- Upstream v1.0 Release Candidate 7
+
+* Fri Mar 25 2022 Tomas Bzatek - 1.0~rc5-1
+- Upstream v1.0 Release Candidate 5
+
+* Mon Mar 07 2022 Tomas Bzatek - 1.0~rc3-1
+- Upstream v1.0 Release Candidate 3