Blame SOURCES/0054-BE-Send-refresh-requests-in-batches.patch

8d3578
From 84268efb9de2befd87b5e251bf2c99eb583923b6 Mon Sep 17 00:00:00 2001
8d3578
From: Jakub Hrozek <jhrozek@redhat.com>
8d3578
Date: Wed, 8 May 2019 23:16:07 +0200
8d3578
Subject: [PATCH 54/64] BE: Send refresh requests in batches
8d3578
8d3578
As we extend the background refresh into larger domains, the amount of
8d3578
data that SSSD refreshes on the background might be larger. And
8d3578
refreshing all expired entries in a single request might block sssd_be
8d3578
for a long time, either triggering the watchdog or starving other
8d3578
legitimate requests.
8d3578
8d3578
Therefore the background refresh will be done in batches of 200 entries.
8d3578
The first batch of every type (up to 200 users, up to 200 groups, ...)
8d3578
will be scheduled imediatelly and subsequent batches with a 0.5 second
8d3578
delay.
8d3578
8d3578
Related:
8d3578
https://pagure.io/SSSD/sssd/issue/4012
8d3578
8d3578
Reviewed-by: Sumit Bose <sbose@redhat.com>
8d3578
(cherry picked from commit 7443498cc074c323e3b307f47ed49d59a5001f64)
8d3578
8d3578
Reviewed-by: Sumit Bose <sbose@redhat.com>
8d3578
---
8d3578
 src/providers/be_refresh.c            | 131 ++++++++++++++++++++++----
8d3578
 src/tests/cmocka/test_expire_common.c |   6 +-
8d3578
 src/tests/sss_idmap-tests.c           |   8 +-
8d3578
 src/util/util.h                       |   8 ++
8d3578
 4 files changed, 128 insertions(+), 25 deletions(-)
8d3578
8d3578
diff --git a/src/providers/be_refresh.c b/src/providers/be_refresh.c
8d3578
index c4ff71e1f..5d86509bb 100644
8d3578
--- a/src/providers/be_refresh.c
8d3578
+++ b/src/providers/be_refresh.c
8d3578
@@ -204,8 +204,21 @@ struct be_refresh_state {
8d3578
     struct sss_domain_info *domain;
8d3578
     enum be_refresh_type index;
8d3578
     time_t period;
8d3578
+
8d3578
+    char **refresh_values;
8d3578
+    size_t refresh_val_size;
8d3578
+    size_t refresh_index;
8d3578
+
8d3578
+    size_t batch_size;
8d3578
+    char **refresh_batch;
8d3578
 };
8d3578
 
8d3578
+static errno_t be_refresh_batch_step(struct tevent_req *req,
8d3578
+                                     uint32_t msec_delay);
8d3578
+static void be_refresh_batch_step_wakeup(struct tevent_context *ev,
8d3578
+                                         struct tevent_timer *tt,
8d3578
+                                         struct timeval tv,
8d3578
+                                         void *pvt);
8d3578
 static errno_t be_refresh_step(struct tevent_req *req);
8d3578
 static void be_refresh_done(struct tevent_req *subreq);
8d3578
 
8d3578
@@ -236,6 +249,13 @@ struct tevent_req *be_refresh_send(TALLOC_CTX *mem_ctx,
8d3578
         goto immediately;
8d3578
     }
8d3578
 
8d3578
+    state->batch_size = 200;
8d3578
+    state->refresh_batch = talloc_zero_array(state, char *, state->batch_size+1);
8d3578
+    if (state->refresh_batch == NULL) {
8d3578
+        ret = ENOMEM;
8d3578
+        goto immediately;
8d3578
+    }
8d3578
+
8d3578
     ret = be_refresh_step(req);
8d3578
     if (ret == EOK) {
8d3578
         goto immediately;
8d3578
@@ -261,8 +281,6 @@ immediately:
8d3578
 static errno_t be_refresh_step(struct tevent_req *req)
8d3578
 {
8d3578
     struct be_refresh_state *state = NULL;
8d3578
-    struct tevent_req *subreq = NULL;
8d3578
-    char **values = NULL;
8d3578
     errno_t ret;
8d3578
 
8d3578
     state = tevent_req_data(req, struct be_refresh_state);
8d3578
@@ -289,42 +307,103 @@ static errno_t be_refresh_step(struct tevent_req *req)
8d3578
             goto done;
8d3578
         }
8d3578
 
8d3578
+        talloc_zfree(state->refresh_values);
8d3578
         ret = be_refresh_get_values(state, state->index, state->ctx->attr_name,
8d3578
-                                    state->domain, state->period, &values);
8d3578
+                                    state->domain, state->period,
8d3578
+                                    &state->refresh_values);
8d3578
         if (ret != EOK) {
8d3578
             DEBUG(SSSDBG_CRIT_FAILURE, "Unable to obtain DN list [%d]: %s\n",
8d3578
                                         ret, sss_strerror(ret));
8d3578
             goto done;
8d3578
         }
8d3578
 
8d3578
-        DEBUG(SSSDBG_TRACE_FUNC, "Refreshing %s in domain %s\n",
8d3578
-              state->cb->name, state->domain->name);
8d3578
+        for (state->refresh_val_size = 0;
8d3578
+             state->refresh_values[state->refresh_val_size] != NULL;
8d3578
+             state->refresh_val_size++);
8d3578
+
8d3578
+        DEBUG(SSSDBG_TRACE_FUNC, "Refreshing %zu %s in domain %s\n",
8d3578
+              state->refresh_val_size, state->cb->name, state->domain->name);
8d3578
 
8d3578
-        subreq = state->cb->send_fn(state, state->ev, state->be_ctx,
8d3578
-                                    state->domain, values, state->cb->pvt);
8d3578
-        if (subreq == NULL) {
8d3578
-            ret = ENOMEM;
8d3578
+        ret = be_refresh_batch_step(req, 0);
8d3578
+        if (ret == EOK) {
8d3578
+            state->index++;
8d3578
+            continue;
8d3578
+        } else if (ret != EAGAIN) {
8d3578
             goto done;
8d3578
         }
8d3578
-
8d3578
-        /* make the list disappear with subreq */
8d3578
-        talloc_steal(subreq, values);
8d3578
-
8d3578
-        tevent_req_set_callback(subreq, be_refresh_done, req);
8d3578
+        /* EAGAIN only, refreshing something.. */
8d3578
 
8d3578
         state->index++;
8d3578
-        ret = EAGAIN;
8d3578
         goto done;
8d3578
     }
8d3578
 
8d3578
     ret = EOK;
8d3578
 
8d3578
 done:
8d3578
-    if (ret != EOK && ret != EAGAIN) {
8d3578
-        talloc_free(values);
8d3578
+    return ret;
8d3578
+}
8d3578
+
8d3578
+static errno_t be_refresh_batch_step(struct tevent_req *req,
8d3578
+                                     uint32_t msec_delay)
8d3578
+{
8d3578
+    struct be_refresh_state *state = tevent_req_data(req, struct be_refresh_state);
8d3578
+    struct timeval tv;
8d3578
+    struct tevent_timer *timeout = NULL;
8d3578
+
8d3578
+    size_t remaining;
8d3578
+    size_t batch_size;
8d3578
+
8d3578
+    memset(state->refresh_batch, 0, sizeof(char *) * state->batch_size);
8d3578
+
8d3578
+    if (state->refresh_index >= state->refresh_val_size) {
8d3578
+        DEBUG(SSSDBG_FUNC_DATA, "The batch is done\n");
8d3578
+        state->refresh_index = 0;
8d3578
+        return EOK;
8d3578
     }
8d3578
 
8d3578
-    return ret;
8d3578
+    remaining = state->refresh_val_size - state->refresh_index;
8d3578
+    batch_size = MIN(remaining, state->batch_size);
8d3578
+    DEBUG(SSSDBG_FUNC_DATA,
8d3578
+          "This batch will refresh %zu entries (so far %zu/%zu)\n",
8d3578
+          batch_size, state->refresh_index, state->refresh_val_size);
8d3578
+
8d3578
+    for (size_t i = 0; i < batch_size; i++) {
8d3578
+        state->refresh_batch[i] = state->refresh_values[state->refresh_index];
8d3578
+        state->refresh_index++;
8d3578
+    }
8d3578
+
8d3578
+    tv = tevent_timeval_current_ofs(0, msec_delay * 1000);
8d3578
+    timeout = tevent_add_timer(state->be_ctx->ev, req, tv,
8d3578
+                               be_refresh_batch_step_wakeup, req);
8d3578
+    if (timeout == NULL) {
8d3578
+        return ENOMEM;
8d3578
+    }
8d3578
+
8d3578
+    return EAGAIN;
8d3578
+}
8d3578
+
8d3578
+static void be_refresh_batch_step_wakeup(struct tevent_context *ev,
8d3578
+                                         struct tevent_timer *tt,
8d3578
+                                         struct timeval tv,
8d3578
+                                         void *pvt)
8d3578
+{
8d3578
+    struct tevent_req *req;
8d3578
+    struct tevent_req *subreq = NULL;
8d3578
+    struct be_refresh_state *state = NULL;
8d3578
+
8d3578
+    req = talloc_get_type(pvt, struct tevent_req);
8d3578
+    state = tevent_req_data(req, struct be_refresh_state);
8d3578
+
8d3578
+    DEBUG(SSSDBG_TRACE_INTERNAL, "Issuing refresh\n");
8d3578
+    subreq = state->cb->send_fn(state, state->ev, state->be_ctx,
8d3578
+                                state->domain,
8d3578
+                                state->refresh_batch,
8d3578
+                                state->cb->pvt);
8d3578
+    if (subreq == NULL) {
8d3578
+        tevent_req_error(req, ENOMEM);
8d3578
+        return;
8d3578
+    }
8d3578
+    tevent_req_set_callback(subreq, be_refresh_done, req);
8d3578
 }
8d3578
 
8d3578
 static void be_refresh_done(struct tevent_req *subreq)
8d3578
@@ -342,8 +421,24 @@ static void be_refresh_done(struct tevent_req *subreq)
8d3578
         goto done;
8d3578
     }
8d3578
 
8d3578
+    ret = be_refresh_batch_step(req, 500);
8d3578
+    if (ret == EAGAIN) {
8d3578
+        DEBUG(SSSDBG_TRACE_INTERNAL,
8d3578
+              "Another batch in this step in progress\n");
8d3578
+        return;
8d3578
+    } else if (ret != EOK) {
8d3578
+        DEBUG(SSSDBG_OP_FAILURE,
8d3578
+              "be_refresh_batch_step failed [%d]: %s\n",
8d3578
+              ret, sss_strerror(ret));
8d3578
+        goto done;
8d3578
+    }
8d3578
+
8d3578
+    DEBUG(SSSDBG_TRACE_INTERNAL, "All batches in this step refreshed\n");
8d3578
+
8d3578
+    /* Proceed to the next step */
8d3578
     ret = be_refresh_step(req);
8d3578
     if (ret == EAGAIN) {
8d3578
+        DEBUG(SSSDBG_TRACE_INTERNAL, "Another step in progress\n");
8d3578
         return;
8d3578
     }
8d3578
 
8d3578
diff --git a/src/tests/cmocka/test_expire_common.c b/src/tests/cmocka/test_expire_common.c
8d3578
index 5d3ea02f3..4f6168190 100644
8d3578
--- a/src/tests/cmocka/test_expire_common.c
8d3578
+++ b/src/tests/cmocka/test_expire_common.c
8d3578
@@ -32,7 +32,7 @@
8d3578
 #include "tests/common_check.h"
8d3578
 #include "tests/cmocka/test_expire_common.h"
8d3578
 
8d3578
-#define MAX 100
8d3578
+#define MAX_VAL 100
8d3578
 
8d3578
 static char *now_str(TALLOC_CTX *mem_ctx, const char* format, int s)
8d3578
 {
8d3578
@@ -41,10 +41,10 @@ static char *now_str(TALLOC_CTX *mem_ctx, const char* format, int s)
8d3578
     size_t len;
8d3578
     char *timestr;
8d3578
 
8d3578
-    timestr = talloc_array(mem_ctx, char, MAX);
8d3578
+    timestr = talloc_array(mem_ctx, char, MAX_VAL);
8d3578
 
8d3578
     tm = gmtime(&t);
8d3578
-    len = strftime(timestr, MAX, format, tm);
8d3578
+    len = strftime(timestr, MAX_VAL, format, tm);
8d3578
     if (len == 0) {
8d3578
         return NULL;
8d3578
     }
8d3578
diff --git a/src/tests/sss_idmap-tests.c b/src/tests/sss_idmap-tests.c
8d3578
index 885913645..ef6843403 100644
8d3578
--- a/src/tests/sss_idmap-tests.c
8d3578
+++ b/src/tests/sss_idmap-tests.c
8d3578
@@ -140,8 +140,8 @@ void idmap_add_domain_with_sec_slices_setup_cb_fail(void)
8d3578
 }
8d3578
 
8d3578
 
8d3578
-#define MAX 1000
8d3578
-char data[MAX];
8d3578
+#define DATA_MAX 1000
8d3578
+char data[DATA_MAX];
8d3578
 
8d3578
 enum idmap_error_code cb2(const char *dom_name,
8d3578
                           const char *dom_sid,
8d3578
@@ -154,10 +154,10 @@ enum idmap_error_code cb2(const char *dom_name,
8d3578
     char *p = (char*)pvt;
8d3578
     size_t len;
8d3578
 
8d3578
-    len = snprintf(p, MAX, "%s, %s %s, %"PRIu32", %"PRIu32", %" PRIu32,
8d3578
+    len = snprintf(p, DATA_MAX, "%s, %s %s, %"PRIu32", %"PRIu32", %" PRIu32,
8d3578
                    dom_name, dom_sid, range_id, min_id, max_id, first_rid);
8d3578
 
8d3578
-    if (len >= MAX) {
8d3578
+    if (len >= DATA_MAX) {
8d3578
         return IDMAP_OUT_OF_MEMORY;
8d3578
     }
8d3578
     return IDMAP_SUCCESS;
8d3578
diff --git a/src/util/util.h b/src/util/util.h
8d3578
index 3003583b7..fce7e42c3 100644
8d3578
--- a/src/util/util.h
8d3578
+++ b/src/util/util.h
8d3578
@@ -68,6 +68,14 @@
8d3578
 
8d3578
 #define ZERO_STRUCT(x) memset((char *)&(x), 0, sizeof(x))
8d3578
 
8d3578
+#ifndef MIN
8d3578
+#define MIN(a, b)  (((a) < (b)) ? (a) : (b))
8d3578
+#endif
8d3578
+
8d3578
+#ifndef MAX
8d3578
+#define MAX(a, b)  (((a) > (b)) ? (a) : (b))
8d3578
+#endif
8d3578
+
8d3578
 #define SSSD_MAIN_OPTS SSSD_DEBUG_OPTS
8d3578
 
8d3578
 #define SSSD_SERVER_OPTS(uid, gid) \
8d3578
-- 
8d3578
2.20.1
8d3578