--- a/server/mpm/event/event.c 2022/05/24 08:59:30 1901198
+++ b/server/mpm/event/event.c 2022/05/24 09:00:19 1901199
@@ -379,7 +379,7 @@
* We use this value to optimize routines that have to scan the entire
* scoreboard.
*/
- int max_daemons_limit;
+ int max_daemon_used;
/*
* All running workers, active and shutting down, including those that
@@ -645,7 +645,7 @@
*rv = APR_SUCCESS;
switch (query_code) {
case AP_MPMQ_MAX_DAEMON_USED:
- *result = retained->max_daemons_limit;
+ *result = retained->max_daemon_used;
break;
case AP_MPMQ_IS_THREADED:
*result = AP_MPMQ_STATIC;
@@ -696,14 +696,32 @@
return OK;
}
-static void event_note_child_killed(int childnum, pid_t pid, ap_generation_t gen)
+static void event_note_child_stopped(int slot, pid_t pid, ap_generation_t gen)
{
- if (childnum != -1) { /* child had a scoreboard slot? */
- ap_run_child_status(ap_server_conf,
- ap_scoreboard_image->parent[childnum].pid,
- ap_scoreboard_image->parent[childnum].generation,
- childnum, MPM_CHILD_EXITED);
- ap_scoreboard_image->parent[childnum].pid = 0;
+ if (slot != -1) { /* child had a scoreboard slot? */
+ process_score *ps = &ap_scoreboard_image->parent[slot];
+ int i;
+
+ pid = ps->pid;
+ gen = ps->generation;
+ for (i = 0; i < threads_per_child; i++) {
+ ap_update_child_status_from_indexes(slot, i, SERVER_DEAD, NULL);
+ }
+ ap_run_child_status(ap_server_conf, pid, gen, slot, MPM_CHILD_EXITED);
+ if (ps->quiescing != 2) { /* vs perform_idle_server_maintenance() */
+ retained->active_daemons--;
+ }
+ retained->total_daemons--;
+ ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, ap_server_conf,
+ "Child %d stopped: pid %d, gen %d, "
+ "active %d/%d, total %d/%d/%d, quiescing %d",
+ slot, (int)pid, (int)gen,
+ retained->active_daemons, active_daemons_limit,
+ retained->total_daemons, retained->max_daemon_used,
+ server_limit, ps->quiescing);
+ ps->not_accepting = 0;
+ ps->quiescing = 0;
+ ps->pid = 0;
}
else {
ap_run_child_status(ap_server_conf, pid, gen, -1, MPM_CHILD_EXITED);
@@ -713,9 +731,19 @@
static void event_note_child_started(int slot, pid_t pid)
{
ap_generation_t gen = retained->mpm->my_generation;
+
+ retained->total_daemons++;
+ retained->active_daemons++;
ap_scoreboard_image->parent[slot].pid = pid;
ap_scoreboard_image->parent[slot].generation = gen;
ap_run_child_status(ap_server_conf, pid, gen, slot, MPM_CHILD_STARTED);
+ ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, ap_server_conf,
+ "Child %d started: pid %d, gen %d, "
+ "active %d/%d, total %d/%d/%d",
+ slot, (int)pid, (int)gen,
+ retained->active_daemons, active_daemons_limit,
+ retained->total_daemons, retained->max_daemon_used,
+ server_limit);
}
static const char *event_get_name(void)
@@ -737,7 +765,7 @@
}
if (one_process) {
- event_note_child_killed(/* slot */ 0, 0, 0);
+ event_note_child_stopped(/* slot */ 0, 0, 0);
}
exit(code);
@@ -2712,8 +2740,8 @@
{
int pid;
- if (slot + 1 > retained->max_daemons_limit) {
- retained->max_daemons_limit = slot + 1;
+ if (slot + 1 > retained->max_daemon_used) {
+ retained->max_daemon_used = slot + 1;
}
if (ap_scoreboard_image->parent[slot].pid != 0) {
@@ -2781,11 +2809,7 @@
return -1;
}
- ap_scoreboard_image->parent[slot].quiescing = 0;
- ap_scoreboard_image->parent[slot].not_accepting = 0;
event_note_child_started(slot, pid);
- retained->active_daemons++;
- retained->total_daemons++;
return 0;
}
@@ -2805,7 +2829,8 @@
}
}
-static void perform_idle_server_maintenance(int child_bucket)
+static void perform_idle_server_maintenance(int child_bucket,
+ int *max_daemon_used)
{
int num_buckets = retained->mpm->num_buckets;
int idle_thread_count = 0;
@@ -2821,7 +2846,7 @@
/* We only care about child_bucket in this call */
continue;
}
- if (i >= retained->max_daemons_limit &&
+ if (i >= retained->max_daemon_used &&
free_length == retained->idle_spawn_rate[child_bucket]) {
/* short cut if all active processes have been examined and
* enough empty scoreboard slots have been found
@@ -2835,6 +2860,13 @@
if (ps->quiescing == 1) {
ps->quiescing = 2;
retained->active_daemons--;
+ ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, ap_server_conf,
+ "Child %d quiescing: pid %d, gen %d, "
+ "active %d/%d, total %d/%d/%d",
+ i, (int)ps->pid, (int)ps->generation,
+ retained->active_daemons, active_daemons_limit,
+ retained->total_daemons, retained->max_daemon_used,
+ server_limit);
}
for (j = 0; j < threads_per_child; j++) {
int status = ap_scoreboard_image->servers[i][j].status;
@@ -2863,8 +2895,9 @@
free_slots[free_length++] = i;
}
}
-
- retained->max_daemons_limit = last_non_dead + 1;
+ if (*max_daemon_used < last_non_dead + 1) {
+ *max_daemon_used = last_non_dead + 1;
+ }
if (retained->sick_child_detected) {
if (had_healthy_child) {
@@ -2893,6 +2926,10 @@
}
}
+ AP_DEBUG_ASSERT(retained->active_daemons <= retained->total_daemons
+ && retained->total_daemons <= retained->max_daemon_used
+ && retained->max_daemon_used <= server_limit);
+
if (idle_thread_count > max_spare_threads / num_buckets) {
/*
* Child processes that we ask to shut down won't die immediately
@@ -2915,13 +2952,12 @@
active_daemons_limit));
ap_log_error(APLOG_MARK, APLOG_TRACE5, 0, ap_server_conf,
"%shutting down one child: "
- "active daemons %d / active limit %d / "
- "total daemons %d / ServerLimit %d / "
- "idle threads %d / max workers %d",
+ "active %d/%d, total %d/%d/%d, "
+ "idle threads %d, max workers %d",
(do_kill) ? "S" : "Not s",
retained->active_daemons, active_daemons_limit,
- retained->total_daemons, server_limit,
- idle_thread_count, max_workers);
+ retained->total_daemons, retained->max_daemon_used,
+ server_limit, idle_thread_count, max_workers);
if (do_kill) {
ap_mpm_podx_signal(all_buckets[child_bucket].pod,
AP_MPM_PODX_GRACEFUL);
@@ -2970,10 +3006,14 @@
else {
ap_log_error(APLOG_MARK, APLOG_TRACE1, 0, ap_server_conf,
"server is at active daemons limit, spawning "
- "of %d children cancelled: %d/%d active, "
- "rate %d", free_length,
+ "of %d children cancelled: active %d/%d, "
+ "total %d/%d/%d, rate %d", free_length,
retained->active_daemons, active_daemons_limit,
- retained->idle_spawn_rate[child_bucket]);
+ retained->total_daemons, retained->max_daemon_used,
+ server_limit, retained->idle_spawn_rate[child_bucket]);
+ /* reset the spawning rate and prevent its growth below */
+ retained->idle_spawn_rate[child_bucket] = 1;
+ ++retained->hold_off_on_exponential_spawning;
free_length = 0;
}
}
@@ -2989,12 +3029,13 @@
retained->total_daemons);
}
for (i = 0; i < free_length; ++i) {
- ap_log_error(APLOG_MARK, APLOG_TRACE5, 0, ap_server_conf,
- "Spawning new child: slot %d active / "
- "total daemons: %d/%d",
- free_slots[i], retained->active_daemons,
- retained->total_daemons);
- make_child(ap_server_conf, free_slots[i], child_bucket);
+ int slot = free_slots[i];
+ if (make_child(ap_server_conf, slot, child_bucket) < 0) {
+ continue;
+ }
+ if (*max_daemon_used < slot + 1) {
+ *max_daemon_used = slot + 1;
+ }
}
/* the next time around we want to spawn twice as many if this
* wasn't good enough, but not if we've just done a graceful
@@ -3016,6 +3057,7 @@
static void server_main_loop(int remaining_children_to_start)
{
int num_buckets = retained->mpm->num_buckets;
+ int max_daemon_used = 0;
int child_slot;
apr_exit_why_e exitwhy;
int status, processed_status;
@@ -3061,19 +3103,8 @@
}
/* non-fatal death... note that it's gone in the scoreboard. */
if (child_slot >= 0) {
- process_score *ps;
+ event_note_child_stopped(child_slot, 0, 0);
- for (i = 0; i < threads_per_child; i++)
- ap_update_child_status_from_indexes(child_slot, i,
- SERVER_DEAD, NULL);
-
- event_note_child_killed(child_slot, 0, 0);
- ps = &ap_scoreboard_image->parent[child_slot];
- if (ps->quiescing != 2)
- retained->active_daemons--;
- ps->quiescing = 0;
- /* NOTE: We don't dec in the (child_slot < 0) case! */
- retained->total_daemons--;
if (processed_status == APEXIT_CHILDSICK) {
/* resource shortage, minimize the fork rate */
retained->idle_spawn_rate[child_slot % num_buckets] = 1;
@@ -3123,9 +3154,11 @@
continue;
}
+ max_daemon_used = 0;
for (i = 0; i < num_buckets; i++) {
- perform_idle_server_maintenance(i);
+ perform_idle_server_maintenance(i, &max_daemon_used);
}
+ retained->max_daemon_used = max_daemon_used;
}
}
@@ -3213,7 +3246,7 @@
AP_MPM_PODX_RESTART);
}
ap_reclaim_child_processes(1, /* Start with SIGTERM */
- event_note_child_killed);
+ event_note_child_stopped);
if (!child_fatal) {
/* cleanup pid file on normal shutdown */
@@ -3239,7 +3272,7 @@
ap_mpm_podx_killpg(all_buckets[i].pod, active_daemons_limit,
AP_MPM_PODX_GRACEFUL);
}
- ap_relieve_child_processes(event_note_child_killed);
+ ap_relieve_child_processes(event_note_child_stopped);
if (!child_fatal) {
/* cleanup pid file on normal shutdown */
@@ -3261,10 +3294,10 @@
apr_sleep(apr_time_from_sec(1));
/* Relieve any children which have now exited */
- ap_relieve_child_processes(event_note_child_killed);
+ ap_relieve_child_processes(event_note_child_stopped);
active_children = 0;
- for (index = 0; index < retained->max_daemons_limit; ++index) {
+ for (index = 0; index < retained->max_daemon_used; ++index) {
if (ap_mpm_safe_kill(MPM_CHILD_PID(index), 0) == APR_SUCCESS) {
active_children = 1;
/* Having just one child is enough to stay around */
@@ -3282,7 +3315,7 @@
ap_mpm_podx_killpg(all_buckets[i].pod, active_daemons_limit,
AP_MPM_PODX_RESTART);
}
- ap_reclaim_child_processes(1, event_note_child_killed);
+ ap_reclaim_child_processes(1, event_note_child_stopped);
return DONE;
}
@@ -3302,8 +3335,7 @@
if (!retained->mpm->is_ungraceful) {
ap_log_error(APLOG_MARK, APLOG_NOTICE, 0, ap_server_conf, APLOGNO(00493)
- AP_SIG_GRACEFUL_STRING
- " received. Doing graceful restart");
+ AP_SIG_GRACEFUL_STRING " received. Doing graceful restart");
/* wake up the children...time to die. But we'll have more soon */
for (i = 0; i < num_buckets; i++) {
ap_mpm_podx_killpg(all_buckets[i].pod, active_daemons_limit,
@@ -3316,6 +3348,8 @@
}
else {
+ ap_log_error(APLOG_MARK, APLOG_NOTICE, 0, ap_server_conf, APLOGNO(00494)
+ "SIGHUP received. Attempting to restart");
/* Kill 'em all. Since the child acts the same on the parents SIGTERM
* and a SIGHUP, we may as well use the same signal, because some user
* pthreads are stealing signals from us left and right.
@@ -3326,9 +3360,7 @@
}
ap_reclaim_child_processes(1, /* Start with SIGTERM */
- event_note_child_killed);
- ap_log_error(APLOG_MARK, APLOG_NOTICE, 0, ap_server_conf, APLOGNO(00494)
- "SIGHUP received. Attempting to restart");
+ event_note_child_stopped);
}
return OK;