Blame SOURCES/lower-load-created-by-config-files-syncing-in-pcsd.patch

f40c32
From 016aa2bb9553a9a64ec6645db40ef95dd8de7041 Mon Sep 17 00:00:00 2001
f40c32
From: Tomas Jelinek <tojeline@redhat.com>
f40c32
Date: Tue, 19 Feb 2019 17:53:17 +0100
f40c32
Subject: [PATCH 3/3] lower load created by config files syncing in pcsd
f40c32
f40c32
* make the sync less frequent (10 minutes instead of 1 minute) by
f40c32
  default
f40c32
* if previous attempt for syncing was unable to connect to other nodes,
f40c32
  try again sooner (in 1 minute by default)
f40c32
---
f40c32
 pcsd/cfgsync.rb           |  60 ++++++++++++++++----
f40c32
 pcsd/pcsd.8               |   9 ++-
f40c32
 pcsd/pcsd.rb              |  24 ++++++--
f40c32
 pcsd/test/test_cfgsync.rb | 114 ++++++++++++++++++++++++++++++--------
f40c32
 4 files changed, 167 insertions(+), 40 deletions(-)
f40c32
f40c32
diff --git a/pcsd/cfgsync.rb b/pcsd/cfgsync.rb
f40c32
index 9acd8d0f..44e6d853 100644
f40c32
--- a/pcsd/cfgsync.rb
f40c32
+++ b/pcsd/cfgsync.rb
f40c32
@@ -313,8 +313,11 @@ module Cfgsync
f40c32
 
f40c32
 
f40c32
   class ConfigSyncControl
f40c32
-    @thread_interval_default = 60
f40c32
-    @thread_interval_minimum = 20
f40c32
+    # intervals in seconds
f40c32
+    @thread_interval_default = 600
f40c32
+    @thread_interval_minimum = 60
f40c32
+    @thread_interval_previous_not_connected_default = 60
f40c32
+    @thread_interval_previous_not_connected_minimum = 20
f40c32
     @file_backup_count_default = 50
f40c32
     @file_backup_count_minimum = 0
f40c32
 
f40c32
@@ -349,6 +352,20 @@ module Cfgsync
f40c32
       return self.save(data)
f40c32
     end
f40c32
 
f40c32
+    def self.sync_thread_interval_previous_not_connected()
f40c32
+      return self.get_integer_value(
f40c32
+        self.load()['thread_interval_previous_not_connected'],
f40c32
+        @thread_interval_previous_not_connected_default,
f40c32
+        @thread_interval_previous_not_connected_minimum
f40c32
+      )
f40c32
+    end
f40c32
+
f40c32
+    def self.sync_thread_interval_previous_not_connected=(seconds)
f40c32
+      data = self.load()
f40c32
+      data['thread_interval_previous_not_connected'] = seconds
f40c32
+      return self.save(data)
f40c32
+    end
f40c32
+
f40c32
     def self.sync_thread_pause(semaphore_cfgsync, seconds=300)
f40c32
       # wait for the thread to finish current run and disable it
f40c32
       semaphore_cfgsync.synchronize {
f40c32
@@ -585,14 +602,17 @@ module Cfgsync
f40c32
     end
f40c32
 
f40c32
     def fetch_all()
f40c32
-      return self.filter_configs_cluster(
f40c32
-        self.get_configs_cluster(@nodes, @cluster_name),
f40c32
-        @config_classes
f40c32
+      node_configs, node_connected = self.get_configs_cluster(
f40c32
+        @nodes, @cluster_name
f40c32
       )
f40c32
+      filtered_configs = self.filter_configs_cluster(
f40c32
+        node_configs, @config_classes
f40c32
+      )
f40c32
+      return filtered_configs, node_connected
f40c32
     end
f40c32
 
f40c32
     def fetch()
f40c32
-      configs_cluster = self.fetch_all()
f40c32
+      configs_cluster, node_connected = self.fetch_all()
f40c32
 
f40c32
       newest_configs_cluster = {}
f40c32
       configs_cluster.each { |name, cfgs|
f40c32
@@ -613,7 +633,7 @@ module Cfgsync
f40c32
           end
f40c32
         end
f40c32
       }
f40c32
-      return to_update_locally, to_update_in_cluster
f40c32
+      return to_update_locally, to_update_in_cluster, node_connected
f40c32
     end
f40c32
 
f40c32
     protected
f40c32
@@ -630,12 +650,15 @@ module Cfgsync
f40c32
       $logger.debug 'Fetching configs from the cluster'
f40c32
       threads = []
f40c32
       node_configs = {}
f40c32
+      connected_to = {}
f40c32
       nodes.each { |node|
f40c32
         threads << Thread.new {
f40c32
           code, out = send_request_with_token(
f40c32
             @auth_user, node, 'get_configs', false, data
f40c32
           )
f40c32
+          connected_to[node] = false
f40c32
           if 200 == code
f40c32
+            connected_to[node] = true
f40c32
             begin
f40c32
               parsed = JSON::parse(out)
f40c32
               if 'ok' == parsed['status'] and cluster_name == parsed['cluster_name']
f40c32
@@ -647,7 +670,24 @@ module Cfgsync
f40c32
         }
f40c32
       }
f40c32
       threads.each { |t| t.join }
f40c32
-      return node_configs
f40c32
+
f40c32
+      node_connected = false
f40c32
+      if connected_to.empty?()
f40c32
+        node_connected = true # no nodes to connect to => no connection errors
f40c32
+      else
f40c32
+        connected_count = 0
f40c32
+        connected_to.each { |node, connected|
f40c32
+          if connected
f40c32
+            connected_count += 1
f40c32
+          end
f40c32
+        }
f40c32
+        # If we only connected to one node, consider it a fail and continue as
f40c32
+        # if we could not connect anywhere. The one node is probably the local
f40c32
+        # node.
f40c32
+        node_connected = connected_count > 1
f40c32
+      end
f40c32
+
f40c32
+      return node_configs, node_connected
f40c32
     end
f40c32
 
f40c32
     def filter_configs_cluster(node_configs, wanted_configs_classes)
f40c32
@@ -752,7 +792,7 @@ module Cfgsync
f40c32
           fetcher = ConfigFetcher.new(
f40c32
             PCSAuth.getSuperuserAuth(), [config.class], nodes, cluster_name
f40c32
           )
f40c32
-          cfgs_to_save, _ = fetcher.fetch()
f40c32
+          cfgs_to_save, _, _ = fetcher.fetch()
f40c32
           cfgs_to_save.each { |cfg_to_save|
f40c32
             cfg_to_save.save() if cfg_to_save.class == config.class
f40c32
           }
f40c32
@@ -812,7 +852,7 @@ module Cfgsync
f40c32
     fetcher = ConfigFetcher.new(
f40c32
       PCSAuth.getSuperuserAuth(), [config_new.class], nodes, cluster_name
f40c32
     )
f40c32
-    fetched_tokens = fetcher.fetch_all()[config_new.class.name]
f40c32
+    fetched_tokens, _ = fetcher.fetch_all()[config_new.class.name]
f40c32
     config_new = Cfgsync::merge_tokens_files(
f40c32
       config, fetched_tokens, new_tokens, new_ports
f40c32
     )
f40c32
diff --git a/pcsd/pcsd.8 b/pcsd/pcsd.8
f40c32
index e58b7ff6..bd405043 100644
f40c32
--- a/pcsd/pcsd.8
f40c32
+++ b/pcsd/pcsd.8
f40c32
@@ -63,9 +63,11 @@ Example:
f40c32
 .br
f40c32
   "thread_disabled": false,
f40c32
 .br
f40c32
-  "thread_interval": 60,
f40c32
+  "thread_interval": 600,
f40c32
 .br
f40c32
-  "thread_paused_until": 1487780453,
f40c32
+  "thread_interval_previous_not_connected": 60,
f40c32
+.br
f40c32
+  "thread_paused_until": 1487780453
f40c32
 .br
f40c32
 }
f40c32
 
f40c32
@@ -79,6 +81,9 @@ Set this to \fBtrue\fR to completely disable the synchronization.
f40c32
 .B thread_interval
f40c32
 How often in seconds should pcsd ask other nodes if the synchronized files have changed.
f40c32
 .TP
f40c32
+.B thread_interval_previous_not_connected
f40c32
+How often in seconds should pcsd ask other nodes if the synchronized files have changed if during the previous attempt pcsd was unable to connect to at least two nodes.
f40c32
+.TP
f40c32
 .B thread_paused_until
f40c32
 Disable the synchronization until the set unix timestamp.
f40c32
 
f40c32
diff --git a/pcsd/pcsd.rb b/pcsd/pcsd.rb
f40c32
index 9f9bd091..6e5e27e0 100644
f40c32
--- a/pcsd/pcsd.rb
f40c32
+++ b/pcsd/pcsd.rb
f40c32
@@ -132,14 +132,15 @@ set :run, false
f40c32
 
f40c32
 $thread_cfgsync = Thread.new {
f40c32
   while true
f40c32
+    node_connected = true
f40c32
     $semaphore_cfgsync.synchronize {
f40c32
-      $logger.debug('Config files sync thread started')
f40c32
       if Cfgsync::ConfigSyncControl.sync_thread_allowed?()
f40c32
+        $logger.info('Config files sync thread started')
f40c32
         begin
f40c32
           # do not sync if this host is not in a cluster
f40c32
           cluster_name = get_cluster_name()
f40c32
           cluster_nodes = get_corosync_nodes()
f40c32
-          if cluster_name and !cluster_name.empty?() and cluster_nodes and !cluster_nodes.empty?
f40c32
+          if cluster_name and !cluster_name.empty?() and cluster_nodes and cluster_nodes.count > 1
f40c32
             $logger.debug('Config files sync thread fetching')
f40c32
             fetcher = Cfgsync::ConfigFetcher.new(
f40c32
               PCSAuth.getSuperuserAuth(),
f40c32
@@ -147,18 +148,31 @@ $thread_cfgsync = Thread.new {
f40c32
               cluster_nodes,
f40c32
               cluster_name
f40c32
             )
f40c32
-            cfgs_to_save, _ = fetcher.fetch()
f40c32
+            cfgs_to_save, _, node_connected = fetcher.fetch()
f40c32
             cfgs_to_save.each { |cfg_to_save|
f40c32
               cfg_to_save.save()
f40c32
             }
f40c32
+            $logger.info('Config files sync thread finished')
f40c32
+          else
f40c32
+            $logger.info(
f40c32
+              'Config files sync skipped, this host does not seem to be in ' +
f40c32
+              'a cluster of at least 2 nodes'
f40c32
+            )
f40c32
           end
f40c32
         rescue => e
f40c32
           $logger.warn("Config files sync thread exception: #{e}")
f40c32
         end
f40c32
+      else
f40c32
+        $logger.info('Config files sync is disabled or paused, skipping')
f40c32
       end
f40c32
-      $logger.debug('Config files sync thread finished')
f40c32
     }
f40c32
-    sleep(Cfgsync::ConfigSyncControl.sync_thread_interval())
f40c32
+    if node_connected
f40c32
+      sleep(Cfgsync::ConfigSyncControl.sync_thread_interval())
f40c32
+    else
f40c32
+      sleep(
f40c32
+        Cfgsync::ConfigSyncControl.sync_thread_interval_previous_not_connected()
f40c32
+      )
f40c32
+    end
f40c32
   end
f40c32
 }
f40c32
 
f40c32
diff --git a/pcsd/test/test_cfgsync.rb b/pcsd/test/test_cfgsync.rb
f40c32
index 9b0317ce..b49c44d2 100644
f40c32
--- a/pcsd/test/test_cfgsync.rb
f40c32
+++ b/pcsd/test/test_cfgsync.rb
f40c32
@@ -287,8 +287,10 @@ class TestConfigSyncControll < Test::Unit::TestCase
f40c32
     file = File.open(CFG_SYNC_CONTROL, 'w')
f40c32
     file.write(JSON.pretty_generate({}))
f40c32
     file.close()
f40c32
-    @thread_interval_default = 60
f40c32
-    @thread_interval_minimum = 20
f40c32
+    @thread_interval_default = 600
f40c32
+    @thread_interval_minimum = 60
f40c32
+    @thread_interval_previous_not_connected_default = 60
f40c32
+    @thread_interval_previous_not_connected_minimum = 20
f40c32
     @file_backup_count_default = 50
f40c32
     @file_backup_count_minimum = 0
f40c32
   end
f40c32
@@ -441,6 +443,65 @@ class TestConfigSyncControll < Test::Unit::TestCase
f40c32
     )
f40c32
   end
f40c32
 
f40c32
+  def test_interval_previous_not_connected()
f40c32
+    assert_equal(
f40c32
+      @thread_interval_previous_not_connected_default,
f40c32
+      Cfgsync::ConfigSyncControl.sync_thread_interval_previous_not_connected()
f40c32
+    )
f40c32
+
f40c32
+    interval = (
f40c32
+      @thread_interval_previous_not_connected_default +
f40c32
+      @thread_interval_previous_not_connected_minimum
f40c32
+    )
f40c32
+    assert(
f40c32
+      Cfgsync::ConfigSyncControl.sync_thread_interval_previous_not_connected=(
f40c32
+        interval
f40c32
+      )
f40c32
+    )
f40c32
+    assert_equal(
f40c32
+      interval,
f40c32
+      Cfgsync::ConfigSyncControl.sync_thread_interval_previous_not_connected()
f40c32
+    )
f40c32
+
f40c32
+    assert(
f40c32
+      Cfgsync::ConfigSyncControl.sync_thread_interval_previous_not_connected=(
f40c32
+        @thread_interval_previous_not_connected_minimum / 2
f40c32
+      )
f40c32
+    )
f40c32
+    assert_equal(
f40c32
+      @thread_interval_previous_not_connected_minimum,
f40c32
+      Cfgsync::ConfigSyncControl.sync_thread_interval_previous_not_connected()
f40c32
+    )
f40c32
+
f40c32
+    assert(
f40c32
+      Cfgsync::ConfigSyncControl.sync_thread_interval_previous_not_connected=(0)
f40c32
+    )
f40c32
+    assert_equal(
f40c32
+      @thread_interval_previous_not_connected_minimum,
f40c32
+      Cfgsync::ConfigSyncControl.sync_thread_interval_previous_not_connected()
f40c32
+    )
f40c32
+
f40c32
+    assert(
f40c32
+      Cfgsync::ConfigSyncControl.sync_thread_interval_previous_not_connected=(
f40c32
+        -100
f40c32
+      )
f40c32
+    )
f40c32
+    assert_equal(
f40c32
+      @thread_interval_previous_not_connected_minimum,
f40c32
+      Cfgsync::ConfigSyncControl.sync_thread_interval_previous_not_connected()
f40c32
+    )
f40c32
+
f40c32
+    assert(
f40c32
+      Cfgsync::ConfigSyncControl.sync_thread_interval_previous_not_connected=(
f40c32
+        'abcd'
f40c32
+      )
f40c32
+    )
f40c32
+    assert_equal(
f40c32
+      @thread_interval_previous_not_connected_default,
f40c32
+      Cfgsync::ConfigSyncControl.sync_thread_interval_previous_not_connected()
f40c32
+    )
f40c32
+  end
f40c32
+
f40c32
   def test_file_backup_count()
f40c32
     assert_equal(
f40c32
       @file_backup_count_default,
f40c32
@@ -495,11 +556,12 @@ class TestConfigFetcher < Test::Unit::TestCase
f40c32
     end
f40c32
 
f40c32
     def get_configs_cluster(nodes, cluster_name)
f40c32
-      return @configs_cluster
f40c32
+      return @configs_cluster, @node_connected
f40c32
     end
f40c32
 
f40c32
-    def set_configs_cluster(configs)
f40c32
+    def set_configs_cluster(configs, node_connected=true)
f40c32
       @configs_cluster = configs
f40c32
+      @node_connected = node_connected
f40c32
       return self
f40c32
     end
f40c32
 
f40c32
@@ -569,31 +631,37 @@ class TestConfigFetcher < Test::Unit::TestCase
f40c32
     cfg_name = Cfgsync::ClusterConf.name
f40c32
     fetcher = ConfigFetcherMock.new({}, [Cfgsync::ClusterConf], nil, nil)
f40c32
 
f40c32
+    # unable to connect to any nodes
f40c32
+    fetcher.set_configs_local({cfg_name => cfg1})
f40c32
+
f40c32
+    fetcher.set_configs_cluster({}, false)
f40c32
+    assert_equal([[], [], false], fetcher.fetch())
f40c32
+
f40c32
     # local config is synced
f40c32
     fetcher.set_configs_local({cfg_name => cfg1})
f40c32
 
f40c32
     fetcher.set_configs_cluster({
f40c32
       'node1' => {'configs' => {cfg_name => cfg1}},
f40c32
     })
f40c32
-    assert_equal([[], []], fetcher.fetch())
f40c32
+    assert_equal([[], [], true], fetcher.fetch())
f40c32
 
f40c32
     fetcher.set_configs_cluster({
f40c32
       'node1' => {'configs' => {cfg_name => cfg2}},
f40c32
     })
f40c32
-    assert_equal([[], []], fetcher.fetch())
f40c32
+    assert_equal([[], [], true], fetcher.fetch())
f40c32
 
f40c32
     fetcher.set_configs_cluster({
f40c32
       'node1' => {'configs' => {cfg_name => cfg1}},
f40c32
       'node2' => {'configs' => {cfg_name => cfg2}},
f40c32
     })
f40c32
-    assert_equal([[], []], fetcher.fetch())
f40c32
+    assert_equal([[], [], true], fetcher.fetch())
f40c32
 
f40c32
     fetcher.set_configs_cluster({
f40c32
       'node1' => {'configs' => {cfg_name => cfg1}},
f40c32
       'node2' => {'configs' => {cfg_name => cfg2}},
f40c32
       'node3' => {'configs' => {cfg_name => cfg2}},
f40c32
     })
f40c32
-    assert_equal([[], []], fetcher.fetch())
f40c32
+    assert_equal([[], [], true], fetcher.fetch())
f40c32
 
f40c32
     # local config is older
f40c32
     fetcher.set_configs_local({cfg_name => cfg1})
f40c32
@@ -601,20 +669,20 @@ class TestConfigFetcher < Test::Unit::TestCase
f40c32
     fetcher.set_configs_cluster({
f40c32
       'node1' => {cfg_name => cfg3},
f40c32
     })
f40c32
-    assert_equal([[cfg3], []], fetcher.fetch())
f40c32
+    assert_equal([[cfg3], [], true], fetcher.fetch())
f40c32
 
f40c32
     fetcher.set_configs_cluster({
f40c32
       'node1' => {cfg_name => cfg3},
f40c32
       'node2' => {cfg_name => cfg4},
f40c32
     })
f40c32
-    assert_equal([[cfg4], []], fetcher.fetch())
f40c32
+    assert_equal([[cfg4], [], true], fetcher.fetch())
f40c32
 
f40c32
     fetcher.set_configs_cluster({
f40c32
       'node1' => {cfg_name => cfg3},
f40c32
       'node2' => {cfg_name => cfg4},
f40c32
       'node3' => {cfg_name => cfg3},
f40c32
     })
f40c32
-    assert_equal([[cfg3], []], fetcher.fetch())
f40c32
+    assert_equal([[cfg3], [], true], fetcher.fetch())
f40c32
 
f40c32
     # local config is newer
f40c32
     fetcher.set_configs_local({cfg_name => cfg3})
f40c32
@@ -622,13 +690,13 @@ class TestConfigFetcher < Test::Unit::TestCase
f40c32
     fetcher.set_configs_cluster({
f40c32
       'node1' => {cfg_name => cfg1},
f40c32
     })
f40c32
-    assert_equal([[], [cfg3]], fetcher.fetch())
f40c32
+    assert_equal([[], [cfg3], true], fetcher.fetch())
f40c32
 
f40c32
     fetcher.set_configs_cluster({
f40c32
       'node1' => {cfg_name => cfg1},
f40c32
       'node2' => {cfg_name => cfg1},
f40c32
     })
f40c32
-    assert_equal([[], [cfg3]], fetcher.fetch())
f40c32
+    assert_equal([[], [cfg3], true], fetcher.fetch())
f40c32
 
f40c32
     # local config is the same version
f40c32
     fetcher.set_configs_local({cfg_name => cfg3})
f40c32
@@ -636,32 +704,32 @@ class TestConfigFetcher < Test::Unit::TestCase
f40c32
     fetcher.set_configs_cluster({
f40c32
       'node1' => {cfg_name => cfg3},
f40c32
     })
f40c32
-    assert_equal([[], []], fetcher.fetch())
f40c32
+    assert_equal([[], [], true], fetcher.fetch())
f40c32
 
f40c32
     fetcher.set_configs_cluster({
f40c32
       'node1' => {cfg_name => cfg4},
f40c32
     })
f40c32
-    assert_equal([[cfg4], []], fetcher.fetch())
f40c32
+    assert_equal([[cfg4], [], true], fetcher.fetch())
f40c32
 
f40c32
     fetcher.set_configs_cluster({
f40c32
       'node1' => {cfg_name => cfg3},
f40c32
       'node2' => {cfg_name => cfg4},
f40c32
     })
f40c32
-    assert_equal([[cfg4], []], fetcher.fetch())
f40c32
+    assert_equal([[cfg4], [], true], fetcher.fetch())
f40c32
 
f40c32
     fetcher.set_configs_cluster({
f40c32
       'node1' => {cfg_name => cfg3},
f40c32
       'node2' => {cfg_name => cfg4},
f40c32
       'node3' => {cfg_name => cfg3},
f40c32
     })
f40c32
-    assert_equal([[], []], fetcher.fetch())
f40c32
+    assert_equal([[], [], true], fetcher.fetch())
f40c32
 
f40c32
     fetcher.set_configs_cluster({
f40c32
       'node1' => {cfg_name => cfg3},
f40c32
       'node2' => {cfg_name => cfg4},
f40c32
       'node3' => {cfg_name => cfg4},
f40c32
     })
f40c32
-    assert_equal([[cfg4], []], fetcher.fetch())
f40c32
+    assert_equal([[cfg4], [], true], fetcher.fetch())
f40c32
 
f40c32
     # local config is the same version
f40c32
     fetcher.set_configs_local({cfg_name => cfg4})
f40c32
@@ -669,32 +737,32 @@ class TestConfigFetcher < Test::Unit::TestCase
f40c32
     fetcher.set_configs_cluster({
f40c32
       'node1' => {cfg_name => cfg3},
f40c32
     })
f40c32
-    assert_equal([[cfg3], []], fetcher.fetch())
f40c32
+    assert_equal([[cfg3], [], true], fetcher.fetch())
f40c32
 
f40c32
     fetcher.set_configs_cluster({
f40c32
       'node1' => {cfg_name => cfg4},
f40c32
     })
f40c32
-    assert_equal([[], []], fetcher.fetch())
f40c32
+    assert_equal([[], [], true], fetcher.fetch())
f40c32
 
f40c32
     fetcher.set_configs_cluster({
f40c32
       'node1' => {cfg_name => cfg3},
f40c32
       'node2' => {cfg_name => cfg4},
f40c32
     })
f40c32
-    assert_equal([[], []], fetcher.fetch())
f40c32
+    assert_equal([[], [], true], fetcher.fetch())
f40c32
 
f40c32
     fetcher.set_configs_cluster({
f40c32
       'node1' => {cfg_name => cfg3},
f40c32
       'node2' => {cfg_name => cfg4},
f40c32
       'node3' => {cfg_name => cfg3},
f40c32
     })
f40c32
-    assert_equal([[cfg3], []], fetcher.fetch())
f40c32
+    assert_equal([[cfg3], [], true], fetcher.fetch())
f40c32
 
f40c32
     fetcher.set_configs_cluster({
f40c32
       'node1' => {cfg_name => cfg3},
f40c32
       'node2' => {cfg_name => cfg4},
f40c32
       'node3' => {cfg_name => cfg4},
f40c32
     })
f40c32
-    assert_equal([[], []], fetcher.fetch())
f40c32
+    assert_equal([[], [], true], fetcher.fetch())
f40c32
   end
f40c32
 end
f40c32
 
f40c32
-- 
f40c32
2.17.0
f40c32