Blame SOURCES/0035-BE-Send-refresh-requests-in-batches.patch

d6181b
From bf3f506d14cf89b9d1d1e3504524b231a6cef2b1 Mon Sep 17 00:00:00 2001
d6181b
From: Jakub Hrozek <jhrozek@redhat.com>
d6181b
Date: Wed, 8 May 2019 23:16:07 +0200
d6181b
Subject: [PATCH 35/48] BE: Send refresh requests in batches
d6181b
d6181b
As we extend the background refresh into larger domains, the amount of
d6181b
data that SSSD refreshes on the background might be larger. And
d6181b
refreshing all expired entries in a single request might block sssd_be
d6181b
for a long time, either triggering the watchdog or starving other
d6181b
legitimate requests.
d6181b
d6181b
Therefore the background refresh will be done in batches of 200 entries.
d6181b
The first batch of every type (up to 200 users, up to 200 groups, ...)
d6181b
will be scheduled imediatelly and subsequent batches with a 0.5 second
d6181b
delay.
d6181b
d6181b
Related:
d6181b
https://pagure.io/SSSD/sssd/issue/4012
d6181b
d6181b
Reviewed-by: Sumit Bose <sbose@redhat.com>
d6181b
---
d6181b
 src/providers/be_refresh.c            | 131 ++++++++++++++++++++++----
d6181b
 src/tests/cmocka/test_expire_common.c |   6 +-
d6181b
 src/tests/sss_idmap-tests.c           |   8 +-
d6181b
 src/util/util.h                       |   8 ++
d6181b
 4 files changed, 128 insertions(+), 25 deletions(-)
d6181b
d6181b
diff --git a/src/providers/be_refresh.c b/src/providers/be_refresh.c
d6181b
index c4ff71e1f..5d86509bb 100644
d6181b
--- a/src/providers/be_refresh.c
d6181b
+++ b/src/providers/be_refresh.c
d6181b
@@ -204,8 +204,21 @@ struct be_refresh_state {
d6181b
     struct sss_domain_info *domain;
d6181b
     enum be_refresh_type index;
d6181b
     time_t period;
d6181b
+
d6181b
+    char **refresh_values;
d6181b
+    size_t refresh_val_size;
d6181b
+    size_t refresh_index;
d6181b
+
d6181b
+    size_t batch_size;
d6181b
+    char **refresh_batch;
d6181b
 };
d6181b
 
d6181b
+static errno_t be_refresh_batch_step(struct tevent_req *req,
d6181b
+                                     uint32_t msec_delay);
d6181b
+static void be_refresh_batch_step_wakeup(struct tevent_context *ev,
d6181b
+                                         struct tevent_timer *tt,
d6181b
+                                         struct timeval tv,
d6181b
+                                         void *pvt);
d6181b
 static errno_t be_refresh_step(struct tevent_req *req);
d6181b
 static void be_refresh_done(struct tevent_req *subreq);
d6181b
 
d6181b
@@ -236,6 +249,13 @@ struct tevent_req *be_refresh_send(TALLOC_CTX *mem_ctx,
d6181b
         goto immediately;
d6181b
     }
d6181b
 
d6181b
+    state->batch_size = 200;
d6181b
+    state->refresh_batch = talloc_zero_array(state, char *, state->batch_size+1);
d6181b
+    if (state->refresh_batch == NULL) {
d6181b
+        ret = ENOMEM;
d6181b
+        goto immediately;
d6181b
+    }
d6181b
+
d6181b
     ret = be_refresh_step(req);
d6181b
     if (ret == EOK) {
d6181b
         goto immediately;
d6181b
@@ -261,8 +281,6 @@ immediately:
d6181b
 static errno_t be_refresh_step(struct tevent_req *req)
d6181b
 {
d6181b
     struct be_refresh_state *state = NULL;
d6181b
-    struct tevent_req *subreq = NULL;
d6181b
-    char **values = NULL;
d6181b
     errno_t ret;
d6181b
 
d6181b
     state = tevent_req_data(req, struct be_refresh_state);
d6181b
@@ -289,42 +307,103 @@ static errno_t be_refresh_step(struct tevent_req *req)
d6181b
             goto done;
d6181b
         }
d6181b
 
d6181b
+        talloc_zfree(state->refresh_values);
d6181b
         ret = be_refresh_get_values(state, state->index, state->ctx->attr_name,
d6181b
-                                    state->domain, state->period, &values);
d6181b
+                                    state->domain, state->period,
d6181b
+                                    &state->refresh_values);
d6181b
         if (ret != EOK) {
d6181b
             DEBUG(SSSDBG_CRIT_FAILURE, "Unable to obtain DN list [%d]: %s\n",
d6181b
                                         ret, sss_strerror(ret));
d6181b
             goto done;
d6181b
         }
d6181b
 
d6181b
-        DEBUG(SSSDBG_TRACE_FUNC, "Refreshing %s in domain %s\n",
d6181b
-              state->cb->name, state->domain->name);
d6181b
+        for (state->refresh_val_size = 0;
d6181b
+             state->refresh_values[state->refresh_val_size] != NULL;
d6181b
+             state->refresh_val_size++);
d6181b
+
d6181b
+        DEBUG(SSSDBG_TRACE_FUNC, "Refreshing %zu %s in domain %s\n",
d6181b
+              state->refresh_val_size, state->cb->name, state->domain->name);
d6181b
 
d6181b
-        subreq = state->cb->send_fn(state, state->ev, state->be_ctx,
d6181b
-                                    state->domain, values, state->cb->pvt);
d6181b
-        if (subreq == NULL) {
d6181b
-            ret = ENOMEM;
d6181b
+        ret = be_refresh_batch_step(req, 0);
d6181b
+        if (ret == EOK) {
d6181b
+            state->index++;
d6181b
+            continue;
d6181b
+        } else if (ret != EAGAIN) {
d6181b
             goto done;
d6181b
         }
d6181b
-
d6181b
-        /* make the list disappear with subreq */
d6181b
-        talloc_steal(subreq, values);
d6181b
-
d6181b
-        tevent_req_set_callback(subreq, be_refresh_done, req);
d6181b
+        /* EAGAIN only, refreshing something.. */
d6181b
 
d6181b
         state->index++;
d6181b
-        ret = EAGAIN;
d6181b
         goto done;
d6181b
     }
d6181b
 
d6181b
     ret = EOK;
d6181b
 
d6181b
 done:
d6181b
-    if (ret != EOK && ret != EAGAIN) {
d6181b
-        talloc_free(values);
d6181b
+    return ret;
d6181b
+}
d6181b
+
d6181b
+static errno_t be_refresh_batch_step(struct tevent_req *req,
d6181b
+                                     uint32_t msec_delay)
d6181b
+{
d6181b
+    struct be_refresh_state *state = tevent_req_data(req, struct be_refresh_state);
d6181b
+    struct timeval tv;
d6181b
+    struct tevent_timer *timeout = NULL;
d6181b
+
d6181b
+    size_t remaining;
d6181b
+    size_t batch_size;
d6181b
+
d6181b
+    memset(state->refresh_batch, 0, sizeof(char *) * state->batch_size);
d6181b
+
d6181b
+    if (state->refresh_index >= state->refresh_val_size) {
d6181b
+        DEBUG(SSSDBG_FUNC_DATA, "The batch is done\n");
d6181b
+        state->refresh_index = 0;
d6181b
+        return EOK;
d6181b
     }
d6181b
 
d6181b
-    return ret;
d6181b
+    remaining = state->refresh_val_size - state->refresh_index;
d6181b
+    batch_size = MIN(remaining, state->batch_size);
d6181b
+    DEBUG(SSSDBG_FUNC_DATA,
d6181b
+          "This batch will refresh %zu entries (so far %zu/%zu)\n",
d6181b
+          batch_size, state->refresh_index, state->refresh_val_size);
d6181b
+
d6181b
+    for (size_t i = 0; i < batch_size; i++) {
d6181b
+        state->refresh_batch[i] = state->refresh_values[state->refresh_index];
d6181b
+        state->refresh_index++;
d6181b
+    }
d6181b
+
d6181b
+    tv = tevent_timeval_current_ofs(0, msec_delay * 1000);
d6181b
+    timeout = tevent_add_timer(state->be_ctx->ev, req, tv,
d6181b
+                               be_refresh_batch_step_wakeup, req);
d6181b
+    if (timeout == NULL) {
d6181b
+        return ENOMEM;
d6181b
+    }
d6181b
+
d6181b
+    return EAGAIN;
d6181b
+}
d6181b
+
d6181b
+static void be_refresh_batch_step_wakeup(struct tevent_context *ev,
d6181b
+                                         struct tevent_timer *tt,
d6181b
+                                         struct timeval tv,
d6181b
+                                         void *pvt)
d6181b
+{
d6181b
+    struct tevent_req *req;
d6181b
+    struct tevent_req *subreq = NULL;
d6181b
+    struct be_refresh_state *state = NULL;
d6181b
+
d6181b
+    req = talloc_get_type(pvt, struct tevent_req);
d6181b
+    state = tevent_req_data(req, struct be_refresh_state);
d6181b
+
d6181b
+    DEBUG(SSSDBG_TRACE_INTERNAL, "Issuing refresh\n");
d6181b
+    subreq = state->cb->send_fn(state, state->ev, state->be_ctx,
d6181b
+                                state->domain,
d6181b
+                                state->refresh_batch,
d6181b
+                                state->cb->pvt);
d6181b
+    if (subreq == NULL) {
d6181b
+        tevent_req_error(req, ENOMEM);
d6181b
+        return;
d6181b
+    }
d6181b
+    tevent_req_set_callback(subreq, be_refresh_done, req);
d6181b
 }
d6181b
 
d6181b
 static void be_refresh_done(struct tevent_req *subreq)
d6181b
@@ -342,8 +421,24 @@ static void be_refresh_done(struct tevent_req *subreq)
d6181b
         goto done;
d6181b
     }
d6181b
 
d6181b
+    ret = be_refresh_batch_step(req, 500);
d6181b
+    if (ret == EAGAIN) {
d6181b
+        DEBUG(SSSDBG_TRACE_INTERNAL,
d6181b
+              "Another batch in this step in progress\n");
d6181b
+        return;
d6181b
+    } else if (ret != EOK) {
d6181b
+        DEBUG(SSSDBG_OP_FAILURE,
d6181b
+              "be_refresh_batch_step failed [%d]: %s\n",
d6181b
+              ret, sss_strerror(ret));
d6181b
+        goto done;
d6181b
+    }
d6181b
+
d6181b
+    DEBUG(SSSDBG_TRACE_INTERNAL, "All batches in this step refreshed\n");
d6181b
+
d6181b
+    /* Proceed to the next step */
d6181b
     ret = be_refresh_step(req);
d6181b
     if (ret == EAGAIN) {
d6181b
+        DEBUG(SSSDBG_TRACE_INTERNAL, "Another step in progress\n");
d6181b
         return;
d6181b
     }
d6181b
 
d6181b
diff --git a/src/tests/cmocka/test_expire_common.c b/src/tests/cmocka/test_expire_common.c
d6181b
index 5d3ea02f3..4f6168190 100644
d6181b
--- a/src/tests/cmocka/test_expire_common.c
d6181b
+++ b/src/tests/cmocka/test_expire_common.c
d6181b
@@ -32,7 +32,7 @@
d6181b
 #include "tests/common_check.h"
d6181b
 #include "tests/cmocka/test_expire_common.h"
d6181b
 
d6181b
-#define MAX 100
d6181b
+#define MAX_VAL 100
d6181b
 
d6181b
 static char *now_str(TALLOC_CTX *mem_ctx, const char* format, int s)
d6181b
 {
d6181b
@@ -41,10 +41,10 @@ static char *now_str(TALLOC_CTX *mem_ctx, const char* format, int s)
d6181b
     size_t len;
d6181b
     char *timestr;
d6181b
 
d6181b
-    timestr = talloc_array(mem_ctx, char, MAX);
d6181b
+    timestr = talloc_array(mem_ctx, char, MAX_VAL);
d6181b
 
d6181b
     tm = gmtime(&t);
d6181b
-    len = strftime(timestr, MAX, format, tm);
d6181b
+    len = strftime(timestr, MAX_VAL, format, tm);
d6181b
     if (len == 0) {
d6181b
         return NULL;
d6181b
     }
d6181b
diff --git a/src/tests/sss_idmap-tests.c b/src/tests/sss_idmap-tests.c
d6181b
index 96f0861ac..e5f3f7041 100644
d6181b
--- a/src/tests/sss_idmap-tests.c
d6181b
+++ b/src/tests/sss_idmap-tests.c
d6181b
@@ -140,8 +140,8 @@ void idmap_add_domain_with_sec_slices_setup_cb_fail(void)
d6181b
 }
d6181b
 
d6181b
 
d6181b
-#define MAX 1000
d6181b
-char data[MAX];
d6181b
+#define DATA_MAX 1000
d6181b
+char data[DATA_MAX];
d6181b
 
d6181b
 enum idmap_error_code cb2(const char *dom_name,
d6181b
                           const char *dom_sid,
d6181b
@@ -154,10 +154,10 @@ enum idmap_error_code cb2(const char *dom_name,
d6181b
     char *p = (char*)pvt;
d6181b
     size_t len;
d6181b
 
d6181b
-    len = snprintf(p, MAX, "%s, %s %s, %"PRIu32", %"PRIu32", %" PRIu32,
d6181b
+    len = snprintf(p, DATA_MAX, "%s, %s %s, %"PRIu32", %"PRIu32", %" PRIu32,
d6181b
                    dom_name, dom_sid, range_id, min_id, max_id, first_rid);
d6181b
 
d6181b
-    if (len >= MAX) {
d6181b
+    if (len >= DATA_MAX) {
d6181b
         return IDMAP_OUT_OF_MEMORY;
d6181b
     }
d6181b
     return IDMAP_SUCCESS;
d6181b
diff --git a/src/util/util.h b/src/util/util.h
d6181b
index c5680d89a..13e434b62 100644
d6181b
--- a/src/util/util.h
d6181b
+++ b/src/util/util.h
d6181b
@@ -67,6 +67,14 @@
d6181b
 #define NULL 0
d6181b
 #endif
d6181b
 
d6181b
+#ifndef MIN
d6181b
+#define MIN(a, b)  (((a) < (b)) ? (a) : (b))
d6181b
+#endif
d6181b
+
d6181b
+#ifndef MAX
d6181b
+#define MAX(a, b)  (((a) > (b)) ? (a) : (b))
d6181b
+#endif
d6181b
+
d6181b
 #define SSSD_MAIN_OPTS SSSD_DEBUG_OPTS
d6181b
 
d6181b
 #define SSSD_SERVER_OPTS(uid, gid) \
d6181b
-- 
d6181b
2.20.1
d6181b