ebde72
--- a/server/mpm/event/event.c	2022/05/24 08:59:30	1901198
ebde72
+++ b/server/mpm/event/event.c	2022/05/24 09:00:19	1901199
ebde72
@@ -379,7 +379,7 @@
ebde72
      * We use this value to optimize routines that have to scan the entire
ebde72
      * scoreboard.
ebde72
      */
ebde72
-    int max_daemons_limit;
ebde72
+    int max_daemon_used;
ebde72
 
ebde72
     /*
ebde72
      * All running workers, active and shutting down, including those that
ebde72
@@ -645,7 +645,7 @@
ebde72
     *rv = APR_SUCCESS;
ebde72
     switch (query_code) {
ebde72
     case AP_MPMQ_MAX_DAEMON_USED:
ebde72
-        *result = retained->max_daemons_limit;
ebde72
+        *result = retained->max_daemon_used;
ebde72
         break;
ebde72
     case AP_MPMQ_IS_THREADED:
ebde72
         *result = AP_MPMQ_STATIC;
ebde72
@@ -696,14 +696,32 @@
ebde72
     return OK;
ebde72
 }
ebde72
 
ebde72
-static void event_note_child_killed(int childnum, pid_t pid, ap_generation_t gen)
ebde72
+static void event_note_child_stopped(int slot, pid_t pid, ap_generation_t gen)
ebde72
 {
ebde72
-    if (childnum != -1) { /* child had a scoreboard slot? */
ebde72
-        ap_run_child_status(ap_server_conf,
ebde72
-                            ap_scoreboard_image->parent[childnum].pid,
ebde72
-                            ap_scoreboard_image->parent[childnum].generation,
ebde72
-                            childnum, MPM_CHILD_EXITED);
ebde72
-        ap_scoreboard_image->parent[childnum].pid = 0;
ebde72
+    if (slot != -1) { /* child had a scoreboard slot? */
ebde72
+        process_score *ps = &ap_scoreboard_image->parent[slot];
ebde72
+        int i;
ebde72
+
ebde72
+        pid = ps->pid;
ebde72
+        gen = ps->generation;
ebde72
+        for (i = 0; i < threads_per_child; i++) {
ebde72
+            ap_update_child_status_from_indexes(slot, i, SERVER_DEAD, NULL);
ebde72
+        }
ebde72
+        ap_run_child_status(ap_server_conf, pid, gen, slot, MPM_CHILD_EXITED);
ebde72
+        if (ps->quiescing != 2) { /* vs perform_idle_server_maintenance() */
ebde72
+            retained->active_daemons--;
ebde72
+        }
ebde72
+        retained->total_daemons--;
ebde72
+        ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, ap_server_conf,
ebde72
+                     "Child %d stopped: pid %d, gen %d, "
ebde72
+                     "active %d/%d, total %d/%d/%d, quiescing %d",
ebde72
+                     slot, (int)pid, (int)gen,
ebde72
+                     retained->active_daemons, active_daemons_limit,
ebde72
+                     retained->total_daemons, retained->max_daemon_used,
ebde72
+                     server_limit, ps->quiescing);
ebde72
+        ps->not_accepting = 0;
ebde72
+        ps->quiescing = 0;
ebde72
+        ps->pid = 0;
ebde72
     }
ebde72
     else {
ebde72
         ap_run_child_status(ap_server_conf, pid, gen, -1, MPM_CHILD_EXITED);
ebde72
@@ -713,9 +731,19 @@
ebde72
 static void event_note_child_started(int slot, pid_t pid)
ebde72
 {
ebde72
     ap_generation_t gen = retained->mpm->my_generation;
ebde72
+
ebde72
+    retained->total_daemons++;
ebde72
+    retained->active_daemons++;
ebde72
     ap_scoreboard_image->parent[slot].pid = pid;
ebde72
     ap_scoreboard_image->parent[slot].generation = gen;
ebde72
     ap_run_child_status(ap_server_conf, pid, gen, slot, MPM_CHILD_STARTED);
ebde72
+    ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, ap_server_conf,
ebde72
+                 "Child %d started: pid %d, gen %d, "
ebde72
+                 "active %d/%d, total %d/%d/%d",
ebde72
+                 slot, (int)pid, (int)gen,
ebde72
+                 retained->active_daemons, active_daemons_limit,
ebde72
+                 retained->total_daemons, retained->max_daemon_used,
ebde72
+                 server_limit);
ebde72
 }
ebde72
 
ebde72
 static const char *event_get_name(void)
ebde72
@@ -737,7 +765,7 @@
ebde72
     }
ebde72
 
ebde72
     if (one_process) {
ebde72
-        event_note_child_killed(/* slot */ 0, 0, 0);
ebde72
+        event_note_child_stopped(/* slot */ 0, 0, 0);
ebde72
     }
ebde72
 
ebde72
     exit(code);
ebde72
@@ -2712,8 +2740,8 @@
ebde72
 {
ebde72
     int pid;
ebde72
 
ebde72
-    if (slot + 1 > retained->max_daemons_limit) {
ebde72
-        retained->max_daemons_limit = slot + 1;
ebde72
+    if (slot + 1 > retained->max_daemon_used) {
ebde72
+        retained->max_daemon_used = slot + 1;
ebde72
     }
ebde72
 
ebde72
     if (ap_scoreboard_image->parent[slot].pid != 0) {
ebde72
@@ -2781,11 +2809,7 @@
ebde72
         return -1;
ebde72
     }
ebde72
 
ebde72
-    ap_scoreboard_image->parent[slot].quiescing = 0;
ebde72
-    ap_scoreboard_image->parent[slot].not_accepting = 0;
ebde72
     event_note_child_started(slot, pid);
ebde72
-    retained->active_daemons++;
ebde72
-    retained->total_daemons++;
ebde72
     return 0;
ebde72
 }
ebde72
 
ebde72
@@ -2805,7 +2829,8 @@
ebde72
     }
ebde72
 }
ebde72
 
ebde72
-static void perform_idle_server_maintenance(int child_bucket)
ebde72
+static void perform_idle_server_maintenance(int child_bucket,
ebde72
+                                            int *max_daemon_used)
ebde72
 {
ebde72
     int num_buckets = retained->mpm->num_buckets;
ebde72
     int idle_thread_count = 0;
ebde72
@@ -2821,7 +2846,7 @@
ebde72
             /* We only care about child_bucket in this call */
ebde72
             continue;
ebde72
         }
ebde72
-        if (i >= retained->max_daemons_limit &&
ebde72
+        if (i >= retained->max_daemon_used &&
ebde72
             free_length == retained->idle_spawn_rate[child_bucket]) {
ebde72
             /* short cut if all active processes have been examined and
ebde72
              * enough empty scoreboard slots have been found
ebde72
@@ -2835,6 +2860,13 @@
ebde72
             if (ps->quiescing == 1) {
ebde72
                 ps->quiescing = 2;
ebde72
                 retained->active_daemons--;
ebde72
+                ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, ap_server_conf,
ebde72
+                             "Child %d quiescing: pid %d, gen %d, "
ebde72
+                             "active %d/%d, total %d/%d/%d",
ebde72
+                             i, (int)ps->pid, (int)ps->generation,
ebde72
+                             retained->active_daemons, active_daemons_limit,
ebde72
+                             retained->total_daemons, retained->max_daemon_used,
ebde72
+                             server_limit);
ebde72
             }
ebde72
             for (j = 0; j < threads_per_child; j++) {
ebde72
                 int status = ap_scoreboard_image->servers[i][j].status;
ebde72
@@ -2863,8 +2895,9 @@
ebde72
             free_slots[free_length++] = i;
ebde72
         }
ebde72
     }
ebde72
-
ebde72
-    retained->max_daemons_limit = last_non_dead + 1;
ebde72
+    if (*max_daemon_used < last_non_dead + 1) {
ebde72
+        *max_daemon_used = last_non_dead + 1;
ebde72
+    }
ebde72
 
ebde72
     if (retained->sick_child_detected) {
ebde72
         if (had_healthy_child) {
ebde72
@@ -2893,6 +2926,10 @@
ebde72
         }
ebde72
     }
ebde72
 
ebde72
+    AP_DEBUG_ASSERT(retained->active_daemons <= retained->total_daemons
ebde72
+                    && retained->total_daemons <= retained->max_daemon_used
ebde72
+                    && retained->max_daemon_used <= server_limit);
ebde72
+
ebde72
     if (idle_thread_count > max_spare_threads / num_buckets) {
ebde72
         /*
ebde72
          * Child processes that we ask to shut down won't die immediately
ebde72
@@ -2915,13 +2952,12 @@
ebde72
                            active_daemons_limit));
ebde72
         ap_log_error(APLOG_MARK, APLOG_TRACE5, 0, ap_server_conf,
ebde72
                      "%shutting down one child: "
ebde72
-                     "active daemons %d / active limit %d / "
ebde72
-                     "total daemons %d / ServerLimit %d / "
ebde72
-                     "idle threads %d / max workers %d",
ebde72
+                     "active %d/%d, total %d/%d/%d, "
ebde72
+                     "idle threads %d, max workers %d",
ebde72
                      (do_kill) ? "S" : "Not s",
ebde72
                      retained->active_daemons, active_daemons_limit,
ebde72
-                     retained->total_daemons, server_limit,
ebde72
-                     idle_thread_count, max_workers);
ebde72
+                     retained->total_daemons, retained->max_daemon_used,
ebde72
+                     server_limit, idle_thread_count, max_workers);
ebde72
         if (do_kill) {
ebde72
             ap_mpm_podx_signal(all_buckets[child_bucket].pod,
ebde72
                                AP_MPM_PODX_GRACEFUL);
ebde72
@@ -2970,10 +3006,14 @@
ebde72
                 else {
ebde72
                     ap_log_error(APLOG_MARK, APLOG_TRACE1, 0, ap_server_conf,
ebde72
                                  "server is at active daemons limit, spawning "
ebde72
-                                 "of %d children cancelled: %d/%d active, "
ebde72
-                                 "rate %d", free_length,
ebde72
+                                 "of %d children cancelled: active %d/%d, "
ebde72
+                                 "total %d/%d/%d, rate %d", free_length,
ebde72
                                  retained->active_daemons, active_daemons_limit,
ebde72
-                                 retained->idle_spawn_rate[child_bucket]);
ebde72
+                                 retained->total_daemons, retained->max_daemon_used,
ebde72
+                                 server_limit, retained->idle_spawn_rate[child_bucket]);
ebde72
+                    /* reset the spawning rate and prevent its growth below */
ebde72
+                    retained->idle_spawn_rate[child_bucket] = 1;
ebde72
+                    ++retained->hold_off_on_exponential_spawning;
ebde72
                     free_length = 0;
ebde72
                 }
ebde72
             }
ebde72
@@ -2989,12 +3029,13 @@
ebde72
                              retained->total_daemons);
ebde72
             }
ebde72
             for (i = 0; i < free_length; ++i) {
ebde72
-                ap_log_error(APLOG_MARK, APLOG_TRACE5, 0, ap_server_conf,
ebde72
-                             "Spawning new child: slot %d active / "
ebde72
-                             "total daemons: %d/%d",
ebde72
-                             free_slots[i], retained->active_daemons,
ebde72
-                             retained->total_daemons);
ebde72
-                make_child(ap_server_conf, free_slots[i], child_bucket);
ebde72
+                int slot = free_slots[i];
ebde72
+                if (make_child(ap_server_conf, slot, child_bucket) < 0) {
ebde72
+                    continue;
ebde72
+                }
ebde72
+                if (*max_daemon_used < slot + 1) {
ebde72
+                    *max_daemon_used = slot + 1;
ebde72
+                }
ebde72
             }
ebde72
             /* the next time around we want to spawn twice as many if this
ebde72
              * wasn't good enough, but not if we've just done a graceful
ebde72
@@ -3016,6 +3057,7 @@
ebde72
 static void server_main_loop(int remaining_children_to_start)
ebde72
 {
ebde72
     int num_buckets = retained->mpm->num_buckets;
ebde72
+    int max_daemon_used = 0;
ebde72
     int child_slot;
ebde72
     apr_exit_why_e exitwhy;
ebde72
     int status, processed_status;
ebde72
@@ -3061,19 +3103,8 @@
ebde72
             }
ebde72
             /* non-fatal death... note that it's gone in the scoreboard. */
ebde72
             if (child_slot >= 0) {
ebde72
-                process_score *ps;
ebde72
+                event_note_child_stopped(child_slot, 0, 0);
ebde72
 
ebde72
-                for (i = 0; i < threads_per_child; i++)
ebde72
-                    ap_update_child_status_from_indexes(child_slot, i,
ebde72
-                                                        SERVER_DEAD, NULL);
ebde72
-
ebde72
-                event_note_child_killed(child_slot, 0, 0);
ebde72
-                ps = &ap_scoreboard_image->parent[child_slot];
ebde72
-                if (ps->quiescing != 2)
ebde72
-                    retained->active_daemons--;
ebde72
-                ps->quiescing = 0;
ebde72
-                /* NOTE: We don't dec in the (child_slot < 0) case! */
ebde72
-                retained->total_daemons--;
ebde72
                 if (processed_status == APEXIT_CHILDSICK) {
ebde72
                     /* resource shortage, minimize the fork rate */
ebde72
                     retained->idle_spawn_rate[child_slot % num_buckets] = 1;
ebde72
@@ -3123,9 +3154,11 @@
ebde72
             continue;
ebde72
         }
ebde72
 
ebde72
+        max_daemon_used = 0;
ebde72
         for (i = 0; i < num_buckets; i++) {
ebde72
-            perform_idle_server_maintenance(i);
ebde72
+            perform_idle_server_maintenance(i, &max_daemon_used);
ebde72
         }
ebde72
+        retained->max_daemon_used = max_daemon_used;
ebde72
     }
ebde72
 }
ebde72
 
ebde72
@@ -3213,7 +3246,7 @@
ebde72
                                AP_MPM_PODX_RESTART);
ebde72
         }
ebde72
         ap_reclaim_child_processes(1, /* Start with SIGTERM */
ebde72
-                                   event_note_child_killed);
ebde72
+                                   event_note_child_stopped);
ebde72
 
ebde72
         if (!child_fatal) {
ebde72
             /* cleanup pid file on normal shutdown */
ebde72
@@ -3239,7 +3272,7 @@
ebde72
             ap_mpm_podx_killpg(all_buckets[i].pod, active_daemons_limit,
ebde72
                                AP_MPM_PODX_GRACEFUL);
ebde72
         }
ebde72
-        ap_relieve_child_processes(event_note_child_killed);
ebde72
+        ap_relieve_child_processes(event_note_child_stopped);
ebde72
 
ebde72
         if (!child_fatal) {
ebde72
             /* cleanup pid file on normal shutdown */
ebde72
@@ -3261,10 +3294,10 @@
ebde72
             apr_sleep(apr_time_from_sec(1));
ebde72
 
ebde72
             /* Relieve any children which have now exited */
ebde72
-            ap_relieve_child_processes(event_note_child_killed);
ebde72
+            ap_relieve_child_processes(event_note_child_stopped);
ebde72
 
ebde72
             active_children = 0;
ebde72
-            for (index = 0; index < retained->max_daemons_limit; ++index) {
ebde72
+            for (index = 0; index < retained->max_daemon_used; ++index) {
ebde72
                 if (ap_mpm_safe_kill(MPM_CHILD_PID(index), 0) == APR_SUCCESS) {
ebde72
                     active_children = 1;
ebde72
                     /* Having just one child is enough to stay around */
ebde72
@@ -3282,7 +3315,7 @@
ebde72
             ap_mpm_podx_killpg(all_buckets[i].pod, active_daemons_limit,
ebde72
                                AP_MPM_PODX_RESTART);
ebde72
         }
ebde72
-        ap_reclaim_child_processes(1, event_note_child_killed);
ebde72
+        ap_reclaim_child_processes(1, event_note_child_stopped);
ebde72
 
ebde72
         return DONE;
ebde72
     }
ebde72
@@ -3302,8 +3335,7 @@
ebde72
 
ebde72
     if (!retained->mpm->is_ungraceful) {
ebde72
         ap_log_error(APLOG_MARK, APLOG_NOTICE, 0, ap_server_conf, APLOGNO(00493)
ebde72
-                     AP_SIG_GRACEFUL_STRING
ebde72
-                     " received.  Doing graceful restart");
ebde72
+                     AP_SIG_GRACEFUL_STRING " received.  Doing graceful restart");
ebde72
         /* wake up the children...time to die.  But we'll have more soon */
ebde72
         for (i = 0; i < num_buckets; i++) {
ebde72
             ap_mpm_podx_killpg(all_buckets[i].pod, active_daemons_limit,
ebde72
@@ -3316,6 +3348,8 @@
ebde72
 
ebde72
     }
ebde72
     else {
ebde72
+        ap_log_error(APLOG_MARK, APLOG_NOTICE, 0, ap_server_conf, APLOGNO(00494)
ebde72
+                     "SIGHUP received.  Attempting to restart");
ebde72
         /* Kill 'em all.  Since the child acts the same on the parents SIGTERM
ebde72
          * and a SIGHUP, we may as well use the same signal, because some user
ebde72
          * pthreads are stealing signals from us left and right.
ebde72
@@ -3326,9 +3360,7 @@
ebde72
         }
ebde72
 
ebde72
         ap_reclaim_child_processes(1,  /* Start with SIGTERM */
ebde72
-                                   event_note_child_killed);
ebde72
-        ap_log_error(APLOG_MARK, APLOG_NOTICE, 0, ap_server_conf, APLOGNO(00494)
ebde72
-                     "SIGHUP received.  Attempting to restart");
ebde72
+                                   event_note_child_stopped);
ebde72
     }
ebde72
 
ebde72
     return OK;