f65af0
From 3e8427f3aa392f961923f5c9d509bab64ce3c9ab Mon Sep 17 00:00:00 2001
979ee0
From: Christian Heimes <cheimes@redhat.com>
979ee0
Date: Sun, 8 Jul 2018 11:53:58 +0200
979ee0
Subject: [PATCH] Auto-retry failed certmonger requests
979ee0
979ee0
During parallel replica installation, a request sometimes fails with
979ee0
CA_REJECTED or CA_UNREACHABLE. The error occur when the master is
979ee0
either busy or some information haven't been replicated yet. Even
979ee0
a stuck request can be recovered, e.g. when permission and group
979ee0
information have been replicated.
979ee0
979ee0
A new function request_and_retry_cert() automatically resubmits failing
979ee0
requests until it times out.
979ee0
979ee0
Fixes: https://pagure.io/freeipa/issue/7623
979ee0
Signed-off-by: Christian Heimes <cheimes@redhat.com>
f65af0
Reviewed-By: Stanislav Laznicka <slaznick@redhat.com>
979ee0
---
979ee0
 ipalib/install/certmonger.py      | 64 ++++++++++++++++++++++++-------
979ee0
 ipaserver/install/cainstance.py   |  4 +-
979ee0
 ipaserver/install/certs.py        | 19 ++++++---
979ee0
 ipaserver/install/dsinstance.py   |  4 +-
979ee0
 ipaserver/install/httpinstance.py |  5 ++-
979ee0
 ipaserver/install/krbinstance.py  |  4 +-
979ee0
 6 files changed, 76 insertions(+), 24 deletions(-)
979ee0
979ee0
diff --git a/ipalib/install/certmonger.py b/ipalib/install/certmonger.py
f65af0
index c07242412affa29eca3312fd27985f65869d3f7a..3e1862192eb0d9245797ebbe8abf2ff69d7e7767 100644
979ee0
--- a/ipalib/install/certmonger.py
979ee0
+++ b/ipalib/install/certmonger.py
f65af0
@@ -305,20 +305,56 @@ def add_subject(request_id, subject):
979ee0
 def request_and_wait_for_cert(
979ee0
         certpath, subject, principal, nickname=None, passwd_fname=None,
979ee0
         dns=None, ca='IPA', profile=None,
979ee0
-        pre_command=None, post_command=None, storage='NSSDB', perms=None):
979ee0
-    """
979ee0
-    Execute certmonger to request a server certificate.
979ee0
-
979ee0
-    The method also waits for the certificate to be available.
979ee0
-    """
979ee0
-    reqId = request_cert(certpath, subject, principal, nickname,
979ee0
-                         passwd_fname, dns, ca, profile,
979ee0
-                         pre_command, post_command, storage, perms)
979ee0
-    state = wait_for_request(reqId, api.env.startup_timeout)
979ee0
-    ca_error = get_request_value(reqId, 'ca-error')
979ee0
-    if state != 'MONITORING' or ca_error:
979ee0
-        raise RuntimeError("Certificate issuance failed ({})".format(state))
979ee0
-    return reqId
979ee0
+        pre_command=None, post_command=None, storage='NSSDB', perms=None,
979ee0
+        resubmit_timeout=0):
979ee0
+    """Request certificate, wait and possibly resubmit failing requests
979ee0
+
979ee0
+    Submit a cert request to certmonger and wait until the request has
979ee0
+    finished.
979ee0
+
979ee0
+    With timeout, a failed request is resubmitted. During parallel replica
979ee0
+    installation, a request sometimes fails with CA_REJECTED or
979ee0
+    CA_UNREACHABLE. The error occurs when the master is either busy or some
979ee0
+    information haven't been replicated yet. Even a stuck request can be
979ee0
+    recovered, e.g. when permission and group information have been
979ee0
+    replicated.
979ee0
+    """
979ee0
+    req_id = request_cert(
979ee0
+        certpath, subject, principal, nickname, passwd_fname, dns, ca,
979ee0
+        profile, pre_command, post_command, storage, perms
979ee0
+    )
979ee0
+
979ee0
+    deadline = time.time() + resubmit_timeout
979ee0
+    while True:  # until success, timeout, or error
979ee0
+        state = wait_for_request(req_id, api.env.replication_wait_timeout)
979ee0
+        ca_error = get_request_value(req_id, 'ca-error')
979ee0
+        if state == 'MONITORING' and ca_error is None:
979ee0
+            # we got a winner, exiting
f65af0
+            logger.debug("Cert request %s was successful", req_id)
979ee0
+            return req_id
979ee0
+
f65af0
+        logger.debug(
979ee0
+            "Cert request %s failed: %s (%s)", req_id, state, ca_error
979ee0
+        )
979ee0
+        if state not in {'CA_REJECTED', 'CA_UNREACHABLE'}:
979ee0
+            # probably unrecoverable error
f65af0
+            logger.debug("Giving up on cert request %s", req_id)
979ee0
+            break
979ee0
+        elif not resubmit_timeout:
979ee0
+            # no resubmit
979ee0
+            break
979ee0
+        elif time.time() > deadline:
f65af0
+            logger.debug("Request %s reached resubmit dead line", req_id)
979ee0
+            break
979ee0
+        else:
979ee0
+            # sleep and resubmit
f65af0
+            logger.debug("Sleep and resubmit cert request %s", req_id)
979ee0
+            time.sleep(10)
979ee0
+            resubmit_request(req_id)
979ee0
+
979ee0
+    raise RuntimeError(
979ee0
+        "Certificate issuance failed ({}: {})".format(state, ca_error)
979ee0
+    )
979ee0
 
979ee0
 
979ee0
 def request_cert(
979ee0
diff --git a/ipaserver/install/cainstance.py b/ipaserver/install/cainstance.py
f65af0
index 8193f3da854b3a20d175de523fbc453f5c5104d8..6dbf69b3e5833f220a4d7d640b66a8fcf824f445 100644
979ee0
--- a/ipaserver/install/cainstance.py
979ee0
+++ b/ipaserver/install/cainstance.py
f65af0
@@ -917,7 +917,9 @@ class CAInstance(DogtagInstance):
979ee0
                 profile='caServerCert',
979ee0
                 pre_command='renew_ra_cert_pre',
979ee0
                 post_command='renew_ra_cert',
979ee0
-                storage="FILE")
979ee0
+                storage="FILE",
979ee0
+                resubmit_timeout=api.env.replication_wait_timeout
979ee0
+            )
979ee0
             self.__set_ra_cert_perms()
979ee0
 
979ee0
             self.requestId = str(reqId)
979ee0
diff --git a/ipaserver/install/certs.py b/ipaserver/install/certs.py
f65af0
index 3f843399f4f964223f52242d610e842a5dc473e8..30b2aa0d3e7b2cafbcc17ad3d04764a342ae8002 100644
979ee0
--- a/ipaserver/install/certs.py
979ee0
+++ b/ipaserver/install/certs.py
f65af0
@@ -600,12 +600,19 @@ class CertDB(object):
979ee0
     def export_pem_cert(self, nickname, location):
979ee0
         return self.nssdb.export_pem_cert(nickname, location)
979ee0
 
979ee0
-    def request_service_cert(self, nickname, principal, host):
979ee0
-        certmonger.request_and_wait_for_cert(certpath=self.secdir,
979ee0
-                                             nickname=nickname,
979ee0
-                                             principal=principal,
979ee0
-                                             subject=host,
979ee0
-                                             passwd_fname=self.passwd_fname)
979ee0
+    def request_service_cert(self, nickname, principal, host,
979ee0
+                             resubmit_timeout=None):
979ee0
+        if resubmit_timeout is None:
979ee0
+            resubmit_timeout = api.env.replication_wait_timeout
979ee0
+        return certmonger.request_and_wait_for_cert(
979ee0
+            certpath=self.secdir,
979ee0
+            storage='NSSDB',
979ee0
+            nickname=nickname,
979ee0
+            principal=principal,
979ee0
+            subject=host,
979ee0
+            passwd_fname=self.passwd_fname,
979ee0
+            resubmit_timeout=resubmit_timeout
979ee0
+        )
979ee0
 
979ee0
     def is_ipa_issued_cert(self, api, nickname):
979ee0
         """
979ee0
diff --git a/ipaserver/install/dsinstance.py b/ipaserver/install/dsinstance.py
f65af0
index eefbde3356e1077d490d09c4ea47d961ce3ce8e6..ac95f8c746477da375de518526dea0d02d51d984 100644
979ee0
--- a/ipaserver/install/dsinstance.py
979ee0
+++ b/ipaserver/install/dsinstance.py
f65af0
@@ -851,7 +851,9 @@ class DsInstance(service.Service):
979ee0
                     ca='IPA',
979ee0
                     profile=dogtag.DEFAULT_PROFILE,
979ee0
                     dns=[self.fqdn],
979ee0
-                    post_command=cmd)
979ee0
+                    post_command=cmd,
979ee0
+                    resubmit_timeout=api.env.replication_wait_timeout
979ee0
+                )
979ee0
             finally:
979ee0
                 if prev_helper is not None:
979ee0
                     certmonger.modify_ca_helper('IPA', prev_helper)
979ee0
diff --git a/ipaserver/install/httpinstance.py b/ipaserver/install/httpinstance.py
f65af0
index 0b7023c2f1b0feb996e0dd0adbefbd49c51da757..3f83248dd89118aeecfbf458c5079dde8b2cb93d 100644
979ee0
--- a/ipaserver/install/httpinstance.py
979ee0
+++ b/ipaserver/install/httpinstance.py
f65af0
@@ -447,7 +447,10 @@ class HTTPInstance(service.Service):
979ee0
                     ca='IPA',
979ee0
                     profile=dogtag.DEFAULT_PROFILE,
979ee0
                     dns=[self.fqdn],
979ee0
-                    post_command='restart_httpd')
979ee0
+                    post_command='restart_httpd',
979ee0
+                    storage='NSSDB',
979ee0
+                    resubmit_timeout=api.env.replication_wait_timeout
979ee0
+                )
979ee0
             finally:
979ee0
                 if prev_helper is not None:
979ee0
                     certmonger.modify_ca_helper('IPA', prev_helper)
979ee0
diff --git a/ipaserver/install/krbinstance.py b/ipaserver/install/krbinstance.py
f65af0
index 33d66fb94b0a1f7571b22120e5159a0e0ad2e675..09cafb7b84623594fe88083f5b914cee0f050409 100644
979ee0
--- a/ipaserver/install/krbinstance.py
979ee0
+++ b/ipaserver/install/krbinstance.py
f65af0
@@ -445,7 +445,9 @@ class KrbInstance(service.Service):
979ee0
                 storage='FILE',
979ee0
                 profile=KDC_PROFILE,
979ee0
                 post_command='renew_kdc_cert',
979ee0
-                perms=(0o644, 0o600))
979ee0
+                perms=(0o644, 0o600),
979ee0
+                resubmit_timeout=api.env.replication_wait_timeout
979ee0
+            )
979ee0
         except dbus.DBusException as e:
979ee0
             # if the certificate is already tracked, ignore the error
979ee0
             name = e.get_dbus_name()
979ee0
-- 
979ee0
2.17.1
979ee0