diff --git a/SOURCES/0004-Doc-sbd.8.pod-use-the-generic-term-cluster-services-.patch b/SOURCES/0004-Doc-sbd.8.pod-use-the-generic-term-cluster-services-.patch new file mode 100644 index 0000000..ce9d698 --- /dev/null +++ b/SOURCES/0004-Doc-sbd.8.pod-use-the-generic-term-cluster-services-.patch @@ -0,0 +1,26 @@ +From 6d4289655dacad4b72fb64373c37bd1ad33649e6 Mon Sep 17 00:00:00 2001 +From: "Gao,Yan" +Date: Tue, 22 Jan 2019 16:33:04 +0100 +Subject: [PATCH] Doc: sbd.8.pod: use the generic term "cluster services" + instead of the specific "openais" + +--- + man/sbd.8.pod | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/man/sbd.8.pod b/man/sbd.8.pod +index ffd01c2..fed6bd3 100644 +--- a/man/sbd.8.pod ++++ b/man/sbd.8.pod +@@ -476,7 +476,7 @@ storage (with internal redundancy) anyway; the SBD device does not + introduce an additional single point of failure then. + + If the SBD device is not accessible, the daemon will fail to start and +-inhibit openais startup. ++inhibit startup of cluster services. + + =item Two devices + +-- +1.8.3.1 + diff --git a/SOURCES/0005-Doc-sbd.sysconfig-watchdog-timeout-set-in-the-on-dis.patch b/SOURCES/0005-Doc-sbd.sysconfig-watchdog-timeout-set-in-the-on-dis.patch new file mode 100644 index 0000000..8c096e0 --- /dev/null +++ b/SOURCES/0005-Doc-sbd.sysconfig-watchdog-timeout-set-in-the-on-dis.patch @@ -0,0 +1,27 @@ +From 091e10ae3f62239251b53bf7d81d47a57a9b82f2 Mon Sep 17 00:00:00 2001 +From: "Gao,Yan" +Date: Wed, 23 Jan 2019 17:21:15 +0100 +Subject: [PATCH] Doc: sbd.sysconfig: watchdog timeout set in the on-disk + metadata takes precedence + +--- + src/sbd.sysconfig | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/src/sbd.sysconfig b/src/sbd.sysconfig +index e661f96..f163f21 100644 +--- a/src/sbd.sysconfig ++++ b/src/sbd.sysconfig +@@ -68,6 +68,9 @@ SBD_WATCHDOG_DEV=/dev/watchdog + # If your sbd device(s) reside on a multipath setup or iSCSI, this + # should be the time required to detect a path failure. + # ++# Be aware that watchdog timeout set in the on-disk metadata takes ++# precedence. ++# + SBD_WATCHDOG_TIMEOUT=5 + + ## Type: string +-- +1.8.3.1 + diff --git a/SOURCES/0006-Refactor-fail-earlier-on-invalid-servants.patch b/SOURCES/0006-Refactor-fail-earlier-on-invalid-servants.patch new file mode 100644 index 0000000..cb71002 --- /dev/null +++ b/SOURCES/0006-Refactor-fail-earlier-on-invalid-servants.patch @@ -0,0 +1,142 @@ +From 8301cbafed191f30656a22876941cc7c9189b623 Mon Sep 17 00:00:00 2001 +From: Klaus Wenninger +Date: Thu, 31 Jan 2019 14:42:01 +0100 +Subject: [PATCH] Refactor: fail earlier on invalid servants + +--- + src/sbd-inquisitor.c | 51 ++++++++++++++++++++++++++++++++------------------- + src/sbd-md.c | 7 +------ + src/sbd.h | 2 +- + 3 files changed, 34 insertions(+), 26 deletions(-) + +diff --git a/src/sbd-inquisitor.c b/src/sbd-inquisitor.c +index 8e0bc87..9be6c99 100644 +--- a/src/sbd-inquisitor.c ++++ b/src/sbd-inquisitor.c +@@ -42,19 +42,36 @@ void recruit_servant(const char *devname, pid_t pid) + struct servants_list_item *newbie; + + if (lookup_servant_by_dev(devname)) { +- cl_log(LOG_DEBUG, "Servant %s already exists", devname); +- return; ++ cl_log(LOG_DEBUG, "Servant %s already exists", devname); ++ return; + } + + newbie = malloc(sizeof(*newbie)); +- if (!newbie) { +- fprintf(stderr, "malloc failed in recruit_servant.\n"); +- exit(1); ++ if (newbie) { ++ memset(newbie, 0, sizeof(*newbie)); ++ newbie->devname = strdup(devname); ++ newbie->pid = pid; ++ newbie->first_start = 1; ++ } ++ if (!newbie || !newbie->devname) { ++ fprintf(stderr, "heap allocation failed in recruit_servant.\n"); ++ exit(1); ++ } ++ ++ /* some sanity-check on our newbie */ ++ if (sbd_is_disk(newbie)) { ++ cl_log(LOG_INFO, "Monitoring %s", devname); ++ disk_count++; ++ } else if (sbd_is_pcmk(newbie) || sbd_is_cluster(newbie)) { ++ /* alive just after pcmk and cluster servants have shown up */ ++ newbie->outdated = 1; ++ } else { ++ /* toss our newbie */ ++ cl_log(LOG_ERR, "Refusing to recruit unrecognized servant %s", devname); ++ free((void *) newbie->devname); ++ free(newbie); ++ return; + } +- memset(newbie, 0, sizeof(*newbie)); +- newbie->devname = strdup(devname); +- newbie->pid = pid; +- newbie->first_start = 1; + + if (!s) { + servants_leader = newbie; +@@ -65,12 +82,6 @@ void recruit_servant(const char *devname, pid_t pid) + } + + servant_count++; +- if(sbd_is_disk(newbie)) { +- cl_log(LOG_INFO, "Monitoring %s", devname); +- disk_count++; +- } else { +- newbie->outdated = 1; +- } + } + + int assign_servant(const char* devname, functionp_t functionp, int mode, const void* argp) +@@ -148,7 +159,7 @@ void servant_start(struct servants_list_item *s) + if (sbd_is_disk(s)) { + #if SUPPORT_SHARED_DISK + DBGLOG(LOG_INFO, "Starting servant for device %s", s->devname); +- s->pid = assign_servant(s->devname, servant, start_mode, s); ++ s->pid = assign_servant(s->devname, servant_md, start_mode, s); + #else + cl_log(LOG_ERR, "Shared disk functionality not supported"); + return; +@@ -785,12 +796,14 @@ parse_device_line(const char *line) + + if (lpc > last) { + entry = calloc(1, 1 + lpc - last); ++ if (!entry) { ++ fprintf(stderr, "heap allocation failed parsing device-line.\n"); ++ exit(1); ++ } + rc = sscanf(line + last, "%[^;]", entry); + } + +- if (entry == NULL) { +- /* Skip */ +- } else if (rc != 1) { ++ if (rc != 1) { + cl_log(LOG_WARNING, "Could not parse (%d %d): %s", last, lpc, line + last); + } else { + cl_log(LOG_DEBUG, "Adding '%s'", entry); +diff --git a/src/sbd-md.c b/src/sbd-md.c +index 579d273..ba2c34d 100644 +--- a/src/sbd-md.c ++++ b/src/sbd-md.c +@@ -1031,7 +1031,7 @@ static int servant_check_timeout_inconsistent(struct sector_header_s *hdr) + return 0; + } + +-int servant(const char *diskname, int mode, const void* argp) ++int servant_md(const char *diskname, int mode, const void* argp) + { + struct sector_mbox_s *s_mbox = NULL; + struct sector_node_s *s_node = NULL; +@@ -1046,11 +1046,6 @@ int servant(const char *diskname, int mode, const void* argp) + char uuid[37]; + const struct servants_list_item *s = argp; + +- if (!diskname) { +- cl_log(LOG_ERR, "Empty disk name %s.", diskname); +- return -1; +- } +- + cl_log(LOG_INFO, "Servant starting for device %s", diskname); + + /* Block most of the signals */ +diff --git a/src/sbd.h b/src/sbd.h +index 386c85c..6fe07f9 100644 +--- a/src/sbd.h ++++ b/src/sbd.h +@@ -175,7 +175,7 @@ int ping_via_slots(const char *name, struct servants_list_item *servants); + int dump_headers(struct servants_list_item *servants); + unsigned long get_first_msgwait(struct servants_list_item *servants); + int messenger(const char *name, const char *msg, struct servants_list_item *servants); +-int servant(const char *diskname, int mode, const void* argp); ++int servant_md(const char *diskname, int mode, const void* argp); + #endif + + int servant_pcmk(const char *diskname, int mode, const void* argp); +-- +1.8.3.1 + diff --git a/SOURCES/0007-Fix-sbd-inquisitor-overhaul-device-list-parser.patch b/SOURCES/0007-Fix-sbd-inquisitor-overhaul-device-list-parser.patch new file mode 100644 index 0000000..c0ccf3d --- /dev/null +++ b/SOURCES/0007-Fix-sbd-inquisitor-overhaul-device-list-parser.patch @@ -0,0 +1,112 @@ +From d3be2caffb9edbb6bfe0e2658c66a1826f4e9c3a Mon Sep 17 00:00:00 2001 +From: Klaus Wenninger +Date: Mon, 15 Apr 2019 17:41:51 +0200 +Subject: [PATCH] Fix: sbd-inquisitor: overhaul device-list-parser + +for readability and robustness +--- + src/sbd-inquisitor.c | 60 ++++++++++++++++++++++++++-------------------------- + 1 file changed, 30 insertions(+), 30 deletions(-) + +diff --git a/src/sbd-inquisitor.c b/src/sbd-inquisitor.c +index 715e978..b4b5585 100644 +--- a/src/sbd-inquisitor.c ++++ b/src/sbd-inquisitor.c +@@ -780,56 +780,56 @@ int inquisitor(void) + int + parse_device_line(const char *line) + { +- int lpc = 0; +- int last = 0; +- int max = 0; ++ size_t lpc = 0; ++ size_t last = 0; ++ size_t max = 0; + int found = 0; ++ bool skip_space = true; ++ int space_run = 0; + +- if(line) { +- max = strlen(line); ++ if (!line) { ++ return 0; + } + +- if (max <= 0) { +- return found; +- } ++ max = strlen(line); + +- cl_log(LOG_DEBUG, "Processing %d bytes: [%s]", max, line); +- /* Skip initial whitespace */ +- for (lpc = 0; lpc <= max && isspace(line[lpc]); lpc++) { +- last = lpc + 1; +- } ++ cl_log(LOG_DEBUG, "Processing %d bytes: [%s]", (int) max, line); + +- /* Now the actual content */ + for (lpc = 0; lpc <= max; lpc++) { +- int a_space = isspace(line[lpc]); +- +- if (a_space && lpc < max && isspace(line[lpc + 1])) { +- /* fast-forward to the end of the spaces */ +- +- } else if (a_space || line[lpc] == ';' || line[lpc] == 0) { +- int rc = 1; +- char *entry = NULL; ++ if (isspace(line[lpc])) { ++ if (skip_space) { ++ last = lpc + 1; ++ } else { ++ space_run++; ++ } ++ continue; ++ } ++ skip_space = false; ++ if (line[lpc] == ';' || line[lpc] == 0) { ++ int rc = 0; ++ char *entry = calloc(1, 1 + lpc - last); + +- if (lpc > last) { +- entry = calloc(1, 1 + lpc - last); +- if (!entry) { +- fprintf(stderr, "heap allocation failed parsing device-line.\n"); +- exit(1); +- } ++ if (entry) { + rc = sscanf(line + last, "%[^;]", entry); ++ } else { ++ fprintf(stderr, "Heap allocation failed parsing device-line.\n"); ++ exit(1); + } + + if (rc != 1) { +- cl_log(LOG_WARNING, "Could not parse (%d %d): %s", last, lpc, line + last); ++ cl_log(LOG_WARNING, "Could not parse: '%s'", line + last); + } else { ++ entry[strlen(entry)-space_run] = '\0'; + cl_log(LOG_DEBUG, "Adding '%s'", entry); + recruit_servant(entry, 0); + found++; + } + + free(entry); ++ skip_space = true; + last = lpc + 1; + } ++ space_run = 0; + } + return found; + } +@@ -890,7 +890,7 @@ int main(int argc, char **argv, char **envp) + int devices = parse_device_line(value); + if(devices < 1) { + fprintf(stderr, "Invalid device line: %s\n", value); +- exit_status = -2; ++ exit_status = -2; + goto out; + } + #else +-- +1.8.3.1 + diff --git a/SOURCES/0008-Refactor-sbd-common-no-reason-for-stack-hogger-havin.patch b/SOURCES/0008-Refactor-sbd-common-no-reason-for-stack-hogger-havin.patch new file mode 100644 index 0000000..f0a6d44 --- /dev/null +++ b/SOURCES/0008-Refactor-sbd-common-no-reason-for-stack-hogger-havin.patch @@ -0,0 +1,47 @@ +From 8e94781169fc2f36eb49078de1978ceb53df6b6c Mon Sep 17 00:00:00 2001 +From: Klaus Wenninger +Date: Mon, 15 Apr 2019 17:40:26 +0200 +Subject: [PATCH] Refactor: sbd-common: no reason for stack-hogger having + retval + +--- + src/sbd-common.c | 10 +++++----- + 1 file changed, 5 insertions(+), 5 deletions(-) + +diff --git a/src/sbd-common.c b/src/sbd-common.c +index 3966f25..873a76e 100644 +--- a/src/sbd-common.c ++++ b/src/sbd-common.c +@@ -568,13 +568,13 @@ enum { + #define IOPRIO_PRIO_DATA(mask) ((mask) & IOPRIO_PRIO_MASK) + #define IOPRIO_PRIO_VALUE(class, data) (((class) << IOPRIO_CLASS_SHIFT) | data) + +-static unsigned char ++static void + sbd_stack_hogger(unsigned char * inbuf, int kbytes) + { + unsigned char buf[1024]; + + if(kbytes <= 0) { +- return HOG_CHAR; ++ return; + } + + if (inbuf == NULL) { +@@ -584,10 +584,10 @@ sbd_stack_hogger(unsigned char * inbuf, int kbytes) + } + + if (kbytes > 0) { +- return sbd_stack_hogger(buf, kbytes-1); +- } else { +- return buf[sizeof(buf)-1]; ++ sbd_stack_hogger(buf, kbytes-1); + } ++ ++ return; + } + + static void +-- +1.8.3.1 + diff --git a/SOURCES/0009-Sanity-sbd-inquisitor-free-timeout-action-on-bail-ou.patch b/SOURCES/0009-Sanity-sbd-inquisitor-free-timeout-action-on-bail-ou.patch new file mode 100644 index 0000000..9eae5d6 --- /dev/null +++ b/SOURCES/0009-Sanity-sbd-inquisitor-free-timeout-action-on-bail-ou.patch @@ -0,0 +1,36 @@ +From 5c80753afb4abc2b5b024f4a5f2fc78669bda70b Mon Sep 17 00:00:00 2001 +From: Klaus Wenninger +Date: Mon, 15 Apr 2019 17:39:12 +0200 +Subject: [PATCH] Sanity: sbd-inquisitor: free timeout action on bail out + +--- + src/sbd-inquisitor.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +diff --git a/src/sbd-inquisitor.c b/src/sbd-inquisitor.c +index 77c6e4f..715e978 100644 +--- a/src/sbd-inquisitor.c ++++ b/src/sbd-inquisitor.c +@@ -1088,7 +1088,8 @@ int main(int argc, char **argv, char **envp) + break; + case 'h': + usage(); +- return (0); ++ goto out; ++ break; + default: + exit_status = -2; + goto out; +@@ -1241,6 +1242,9 @@ int main(int argc, char **argv, char **envp) + } + + out: ++ if (timeout_action) { ++ free(timeout_action); ++ } + if (exit_status < 0) { + if (exit_status == -2) { + usage(); +-- +1.8.3.1 + diff --git a/SOURCES/0010-Sanity-sbd-md-prevent-unrealistic-overflow-on-sector.patch b/SOURCES/0010-Sanity-sbd-md-prevent-unrealistic-overflow-on-sector.patch new file mode 100644 index 0000000..cc8c4ee --- /dev/null +++ b/SOURCES/0010-Sanity-sbd-md-prevent-unrealistic-overflow-on-sector.patch @@ -0,0 +1,29 @@ +From f6af36a0fb05b5a37b3dfb153677e28ca5cb3fd8 Mon Sep 17 00:00:00 2001 +From: Klaus Wenninger +Date: Mon, 15 Apr 2019 17:37:42 +0200 +Subject: [PATCH] Sanity: sbd-md: prevent unrealistic overflow on sector io + calc + +--- + src/sbd-md.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/src/sbd-md.c b/src/sbd-md.c +index 60a1873..f437c41 100644 +--- a/src/sbd-md.c ++++ b/src/sbd-md.c +@@ -162,9 +162,9 @@ sector_io(struct sbd_context *st, int sector, void *data, int rw) + + memset(&st->io, 0, sizeof(struct iocb)); + if (rw) { +- io_prep_pwrite(&st->io, st->devfd, data, sector_size, sector_size * sector); ++ io_prep_pwrite(&st->io, st->devfd, data, sector_size, (long long) sector_size * sector); + } else { +- io_prep_pread(&st->io, st->devfd, data, sector_size, sector_size * sector); ++ io_prep_pread(&st->io, st->devfd, data, sector_size, (long long) sector_size * sector); + } + + if (io_submit(st->ioctx, 1, ios) != 1) { +-- +1.8.3.1 + diff --git a/SOURCES/0011-Sanity-sbd-md-remove-some-left-over-code.patch b/SOURCES/0011-Sanity-sbd-md-remove-some-left-over-code.patch new file mode 100644 index 0000000..64dd8cb --- /dev/null +++ b/SOURCES/0011-Sanity-sbd-md-remove-some-left-over-code.patch @@ -0,0 +1,35 @@ +From a80fe9392fd910074eccc4733ff2cd3e1625e48e Mon Sep 17 00:00:00 2001 +From: Klaus Wenninger +Date: Mon, 15 Apr 2019 17:36:12 +0200 +Subject: [PATCH] Sanity: sbd-md: remove some left over code + +--- + src/sbd-md.c | 5 ----- + 1 file changed, 5 deletions(-) + +diff --git a/src/sbd-md.c b/src/sbd-md.c +index c51d381..60a1873 100644 +--- a/src/sbd-md.c ++++ b/src/sbd-md.c +@@ -373,7 +373,6 @@ init_device(struct sbd_context *st) + struct sector_header_s *s_header; + struct sector_node_s *s_node; + struct sector_mbox_s *s_mbox; +- struct stat s; + char uuid[37]; + int i; + int rc = 0; +@@ -394,10 +393,6 @@ init_device(struct sbd_context *st) + uuid_generate(s_header->uuid); + uuid_unparse_lower(s_header->uuid, uuid); + +- fstat(st->devfd, &s); +- /* printf("st_size = %ld, st_blksize = %ld, st_blocks = %ld\n", +- s.st_size, s.st_blksize, s.st_blocks); */ +- + cl_log(LOG_INFO, "Creating version %d.%d header on device %d (uuid: %s)", + s_header->version, s_header->minor_version, + st->devfd, uuid); +-- +1.8.3.1 + diff --git a/SOURCES/0012-Fix-sbd-common-query-rt-budget-0-otherwise-try-movin.patch b/SOURCES/0012-Fix-sbd-common-query-rt-budget-0-otherwise-try-movin.patch new file mode 100644 index 0000000..41cf428 --- /dev/null +++ b/SOURCES/0012-Fix-sbd-common-query-rt-budget-0-otherwise-try-movin.patch @@ -0,0 +1,215 @@ +From eaeed6cca46a0223617ead834aaa576dd5ad07ff Mon Sep 17 00:00:00 2001 +From: Klaus Wenninger +Date: Fri, 31 May 2019 16:11:16 +0200 +Subject: [PATCH] Fix: sbd-common: query rt-budget > 0 otherwise try moving to + root-slice + +--- + src/sbd-common.c | 110 +++++++++++++++++++++++++++++++++++++++++++++++++++ + src/sbd-inquisitor.c | 15 +++++++ + src/sbd.h | 2 + + src/sbd.sysconfig | 14 +++++++ + 4 files changed, 141 insertions(+) + +diff --git a/src/sbd-common.c b/src/sbd-common.c +index 873a76e..ebfdaa3 100644 +--- a/src/sbd-common.c ++++ b/src/sbd-common.c +@@ -662,6 +662,112 @@ static void sbd_memlock(int stackgrowK, int heapgrowK) + #endif + } + ++static int get_realtime_budget(void) ++{ ++ FILE *f; ++ char fname[PATH_MAX]; ++ int res = -1, lnum = 0; ++ char *cgroup = NULL, *namespecs = NULL; ++ ++ snprintf(fname, PATH_MAX, "/proc/%jd/cgroup", (intmax_t)getpid()); ++ f = fopen(fname, "rt"); ++ if (f == NULL) { ++ cl_log(LOG_WARNING, "Can't open cgroup file for pid=%jd", ++ (intmax_t)getpid()); ++ goto exit_res; ++ } ++ while( fscanf(f, "%d:%m[^:]:%m[^\n]", &lnum, &namespecs, &cgroup) !=EOF ) { ++ if (namespecs && strstr(namespecs, "cpuacct")) { ++ free(namespecs); ++ break; ++ } ++ if (cgroup) { ++ free(cgroup); ++ cgroup = NULL; ++ } ++ if (namespecs) { ++ free(namespecs); ++ namespecs = NULL; ++ } ++ } ++ fclose(f); ++ if (cgroup == NULL) { ++ cl_log(LOG_WARNING, "Failed getting cgroup for pid=%jd", ++ (intmax_t)getpid()); ++ goto exit_res; ++ } ++ snprintf(fname, PATH_MAX, "/sys/fs/cgroup/cpu%s/cpu.rt_runtime_us", ++ cgroup); ++ f = fopen(fname, "rt"); ++ if (f == NULL) { ++ cl_log(LOG_WARNING, "cpu.rt_runtime_us existed for root-slice but " ++ "doesn't for '%s'", cgroup); ++ goto exit_res; ++ } ++ if (fscanf(f, "%d", &res) != 1) { ++ cl_log(LOG_WARNING, "failed reading rt-budget from %s", fname); ++ } else { ++ cl_log(LOG_INFO, "slice='%s' has rt-budget=%d", cgroup, res); ++ } ++ fclose(f); ++ ++exit_res: ++ if (cgroup) { ++ free(cgroup); ++ } ++ return res; ++} ++ ++/* stolen from corosync */ ++static int sbd_move_to_root_cgroup(bool enforce_root_cgroup) { ++ FILE *f; ++ int res = -1; ++ ++ /* ++ * /sys/fs/cgroup is hardcoded, because most of Linux distributions are now ++ * using systemd and systemd uses hardcoded path of cgroup mount point. ++ * ++ * This feature is expected to be removed as soon as systemd gets support ++ * for managing RT configuration. ++ */ ++ f = fopen("/sys/fs/cgroup/cpu/cpu.rt_runtime_us", "rt"); ++ if (f == NULL) { ++ cl_log(LOG_DEBUG, "cpu.rt_runtime_us doesn't exist -> " ++ "system without cgroup or with disabled CONFIG_RT_GROUP_SCHED"); ++ res = 0; ++ goto exit_res; ++ } ++ fclose(f); ++ ++ if ((!enforce_root_cgroup) && (get_realtime_budget() > 0)) { ++ cl_log(LOG_DEBUG, "looks as if we have rt-budget in the slice we are " ++ "-> skip moving to root-slice"); ++ res = 0; ++ goto exit_res; ++ } ++ ++ f = fopen("/sys/fs/cgroup/cpu/tasks", "w"); ++ if (f == NULL) { ++ cl_log(LOG_WARNING, "Can't open cgroups tasks file for writing"); ++ ++ goto exit_res; ++ } ++ ++ if (fprintf(f, "%jd\n", (intmax_t)getpid()) <= 0) { ++ cl_log(LOG_WARNING, "Can't write sbd pid into cgroups tasks file"); ++ goto close_and_exit_res; ++ } ++ ++close_and_exit_res: ++ if (fclose(f) != 0) { ++ cl_log(LOG_WARNING, "Can't close cgroups tasks file"); ++ goto exit_res; ++ } ++ ++exit_res: ++ return (res); ++} ++ + void + sbd_make_realtime(int priority, int stackgrowK, int heapgrowK) + { +@@ -670,6 +776,10 @@ sbd_make_realtime(int priority, int stackgrowK, int heapgrowK) + } + + #ifdef SCHED_RR ++ if (move_to_root_cgroup) { ++ sbd_move_to_root_cgroup(enforce_moving_to_root_cgroup); ++ } ++ + { + int pcurrent = 0; + int pmin = sched_get_priority_min(SCHED_RR); +diff --git a/src/sbd-inquisitor.c b/src/sbd-inquisitor.c +index abde4e5..cef5cc7 100644 +--- a/src/sbd-inquisitor.c ++++ b/src/sbd-inquisitor.c +@@ -33,6 +33,8 @@ int start_mode = 0; + char* pidfile = NULL; + bool do_flush = true; + char timeout_sysrq_char = 'b'; ++bool move_to_root_cgroup = true; ++bool enforce_moving_to_root_cgroup = false; + + int parse_device_line(const char *line); + +@@ -965,6 +967,19 @@ int main(int argc, char **argv, char **envp) + timeout_action = strdup(value); + } + ++ value = getenv("SBD_MOVE_TO_ROOT_CGROUP"); ++ if(value) { ++ move_to_root_cgroup = crm_is_true(value); ++ ++ if (move_to_root_cgroup) { ++ enforce_moving_to_root_cgroup = true; ++ } else { ++ if (strcmp(value, "auto") == 0) { ++ move_to_root_cgroup = true; ++ } ++ } ++ } ++ + while ((c = getopt(argc, argv, "czC:DPRTWZhvw:d:n:p:1:2:3:4:5:t:I:F:S:s:r:")) != -1) { + switch (c) { + case 'D': +diff --git a/src/sbd.h b/src/sbd.h +index 3b05a11..ac30ec7 100644 +--- a/src/sbd.h ++++ b/src/sbd.h +@@ -159,6 +159,8 @@ extern bool watchdogdev_is_default; + extern char* local_uname; + extern bool do_flush; + extern char timeout_sysrq_char; ++extern bool move_to_root_cgroup; ++extern bool enforce_moving_to_root_cgroup; + + /* Global, non-tunable variables: */ + extern int sector_size; +diff --git a/src/sbd.sysconfig b/src/sbd.sysconfig +index f163f21..e1a60ed 100644 +--- a/src/sbd.sysconfig ++++ b/src/sbd.sysconfig +@@ -91,6 +91,20 @@ SBD_WATCHDOG_TIMEOUT=5 + # + SBD_TIMEOUT_ACTION=flush,reboot + ++## Type: yesno / auto ++## Default: auto ++# ++# If CPUAccounting is enabled default is not to assign any RT-budget ++# to the system.slice which prevents sbd from running RR-scheduled. ++# ++# One way to escape that issue is to move sbd-processes from the ++# slice they were originally started to root-slice. ++# Of course starting sbd in a certain slice might be intentional. ++# Thus in auto-mode sbd will check if the slice has RT-budget assigned. ++# If that is the case sbd will stay in that slice while it will ++# be moved to root-slice otherwise. ++SBD_MOVE_TO_ROOT_CGROUP=auto ++ + ## Type: string + ## Default: "" + # +-- +1.8.3.1 + diff --git a/SOURCES/0014-Fix-sbd-cluster-periodically-check-corosync-daemon-l.patch b/SOURCES/0014-Fix-sbd-cluster-periodically-check-corosync-daemon-l.patch new file mode 100644 index 0000000..9f4de96 --- /dev/null +++ b/SOURCES/0014-Fix-sbd-cluster-periodically-check-corosync-daemon-l.patch @@ -0,0 +1,123 @@ +From 1387ed890e3a9e246e9b9f780b2a7cb5379459ab Mon Sep 17 00:00:00 2001 +From: Klaus Wenninger +Date: Wed, 5 Jun 2019 11:32:49 +0200 +Subject: [PATCH] Fix: sbd-cluster: periodically check corosync-daemon liveness + +using votequorum_getinfo. +--- + configure.ac | 12 +++++++++++- + src/sbd-cluster.c | 36 ++++++++++++++++++++++++++++++++++-- + 2 files changed, 45 insertions(+), 3 deletions(-) + +diff --git a/configure.ac b/configure.ac +index fac26a8..c44e747 100644 +--- a/configure.ac ++++ b/configure.ac +@@ -33,6 +33,7 @@ PKG_CHECK_MODULES(glib, [glib-2.0]) + dnl PKG_CHECK_MODULES(libcoroipcc, [libcoroipcc]) + + PKG_CHECK_MODULES(cmap, [libcmap], HAVE_cmap=1, HAVE_cmap=0) ++PKG_CHECK_MODULES(votequorum, [libvotequorum], HAVE_votequorum=1, HAVE_votequorum=0) + + dnl pacemaker > 1.1.8 + PKG_CHECK_MODULES(pacemaker, [pacemaker, pacemaker-cib], HAVE_pacemaker=1, HAVE_pacemaker=0) +@@ -49,7 +50,12 @@ elif test $HAVE_pacemaker = 1; then + if test $HAVE_cmap = 0; then + AC_MSG_NOTICE(No package 'cmap' found) + else +- CPPFLAGS="$CPPFLAGS $cmap_CFLAGS" ++ CPPFLAGS="$CPPFLAGS $cmap_CFLAGS" ++ fi ++ if test $HAVE_votequorum = 0; then ++ AC_MSG_NOTICE(No library 'votequorum' found) ++ else ++ CPPFLAGS="$CPPFLAGS $votequorum_CFLAGS" + fi + fi + +@@ -66,6 +72,7 @@ AC_CHECK_LIB(pe_rules, test_rule, , missing="yes") + AC_CHECK_LIB(crmcluster, crm_peer_init, , missing="yes") + AC_CHECK_LIB(uuid, uuid_unparse, , missing="yes") + AC_CHECK_LIB(cmap, cmap_initialize, , HAVE_cmap=0) ++AC_CHECK_LIB(votequorum, votequorum_getinfo, , HAVE_votequorum=0) + + dnl pacemaker >= 1.1.8 + AC_CHECK_HEADERS(pacemaker/crm/cluster.h) +@@ -107,6 +114,9 @@ fi + AC_DEFINE_UNQUOTED(CHECK_TWO_NODE, $HAVE_cmap, Turn on checking for 2-node cluster) + AM_CONDITIONAL(CHECK_TWO_NODE, test "$HAVE_cmap" = "1") + ++AC_DEFINE_UNQUOTED(CHECK_VOTEQUORUM_HANDLE, $HAVE_votequorum, Turn on periodic checking of votequorum-handle) ++AM_CONDITIONAL(CHECK_VOTEQUORUM_HANDLE, test "$HAVE_votequorum" = "1") ++ + CONFIGDIR="" + AC_ARG_WITH(configdir, + [ --with-configdir=DIR +diff --git a/src/sbd-cluster.c b/src/sbd-cluster.c +index 541212f..9fb6224 100644 +--- a/src/sbd-cluster.c ++++ b/src/sbd-cluster.c +@@ -80,6 +80,12 @@ sbd_plugin_membership_dispatch(cpg_handle_t handle, + + #if SUPPORT_COROSYNC + ++#if CHECK_VOTEQUORUM_HANDLE ++#include ++ ++static votequorum_handle_t votequorum_handle = 0; ++#endif ++ + static bool two_node = false; + static bool ever_seen_both = false; + static int cpg_membership_entries = -1; +@@ -261,12 +267,32 @@ notify_timer_cb(gpointer data) + + #endif + case pcmk_cluster_corosync: ++ do { ++#if SUPPORT_COROSYNC && CHECK_VOTEQUORUM_HANDLE ++ struct votequorum_info info; ++ ++ if (votequorum_getinfo(votequorum_handle, 0, &info) != CS_OK) { ++ ++ votequorum_finalize(votequorum_handle); ++ if (votequorum_initialize(&votequorum_handle, NULL) != CS_OK) { ++ votequorum_handle = 0; ++ break; ++ } ++ if (votequorum_getinfo(votequorum_handle, 0, &info) != CS_OK) { ++ break; ++ } ++ } ++#endif ++ notify_parent(); ++ } while (0); ++ break; ++ + #if HAVE_DECL_PCMK_CLUSTER_CMAN + case pcmk_cluster_cman: +-#endif +- /* TODO - Make a CPG call and only call notify_parent() when we get a reply */ ++ + notify_parent(); + break; ++#endif + + default: + break; +@@ -533,6 +559,12 @@ find_pacemaker_remote(void) + static void + clean_up(int rc) + { ++#if CHECK_VOTEQUORUM_HANDLE ++ votequorum_finalize(votequorum_handle); ++ votequorum_handle = 0; /* there isn't really an invalid handle value ++ * just to be back where we started ++ */ ++#endif + return; + } + +-- +1.8.3.1 + diff --git a/SOURCES/0015-build-say-library-when-missing-cmap-not-package-to-a.patch b/SOURCES/0015-build-say-library-when-missing-cmap-not-package-to-a.patch new file mode 100644 index 0000000..231c77c --- /dev/null +++ b/SOURCES/0015-build-say-library-when-missing-cmap-not-package-to-a.patch @@ -0,0 +1,26 @@ +From 0de14256fc873aee735117955662503b773bf71c Mon Sep 17 00:00:00 2001 +From: Klaus Wenninger +Date: Tue, 11 Jun 2019 08:05:33 +0200 +Subject: [PATCH] build: say library when missing cmap not package to avoid + confusion + +--- + configure.ac | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/configure.ac b/configure.ac +index c44e747..1c55094 100644 +--- a/configure.ac ++++ b/configure.ac +@@ -48,7 +48,7 @@ if test $HAVE_pacemaker = 0 -a $HAVE_pcmk = 0; then + elif test $HAVE_pacemaker = 1; then + CPPFLAGS="$CPPFLAGS $glib_CFLAGS $pacemaker_CFLAGS" + if test $HAVE_cmap = 0; then +- AC_MSG_NOTICE(No package 'cmap' found) ++ AC_MSG_NOTICE(No library 'cmap' found) + else + CPPFLAGS="$CPPFLAGS $cmap_CFLAGS" + fi +-- +1.8.3.1 + diff --git a/SPECS/sbd.spec b/SPECS/sbd.spec index 5e0654c..668fea5 100644 --- a/SPECS/sbd.spec +++ b/SPECS/sbd.spec @@ -18,20 +18,31 @@ %global commit 7f33d1a409d0a4e2cd69946688c48eaa8f3c5d26 %global shortcommit %(c=%{commit}; echo ${c:0:7}) %global github_owner Clusterlabs -%global buildnum 4 +%global buildnum 15 Name: sbd Summary: Storage-based death License: GPLv2+ Group: System Environment/Daemons Version: 1.4.0 -Release: %{buildnum}%{?dist}.3 +Release: %{buildnum}%{?dist} Url: https://github.com/%{github_owner}/%{name} Source0: https://github.com/%{github_owner}/%{name}/archive/%{commit}/%{name}-%{commit}.tar.gz Patch0: 0001-Fix-sbd-cluster-finalize-cmap-connection-if-disconne.patch Patch1: 0002-Fix-sbd-pacemaker-make-handling-of-cib-connection-lo.patch Patch2: 0003-Fix-sbd-pacemaker-bail-out-of-status-earlier.patch +Patch3: 0004-Doc-sbd.8.pod-use-the-generic-term-cluster-services-.patch +Patch4: 0005-Doc-sbd.sysconfig-watchdog-timeout-set-in-the-on-dis.patch +Patch5: 0006-Refactor-fail-earlier-on-invalid-servants.patch +Patch6: 0007-Fix-sbd-inquisitor-overhaul-device-list-parser.patch +Patch7: 0008-Refactor-sbd-common-no-reason-for-stack-hogger-havin.patch +Patch8: 0009-Sanity-sbd-inquisitor-free-timeout-action-on-bail-ou.patch +Patch9: 0010-Sanity-sbd-md-prevent-unrealistic-overflow-on-sector.patch +Patch10: 0011-Sanity-sbd-md-remove-some-left-over-code.patch +Patch11: 0012-Fix-sbd-common-query-rt-budget-0-otherwise-try-movin.patch Patch12: 0013-Fix-sbd-pacemaker-assume-graceful-exit-if-leftovers-.patch +Patch13: 0014-Fix-sbd-cluster-periodically-check-corosync-daemon-l.patch +Patch14: 0015-build-say-library-when-missing-cmap-not-package-to-a.patch Patch15: 0016-Fix-sbd-pacemaker-check-for-shutdown-attribute-on-ev.patch BuildRoot: %{_tmppath}/%{name}-%{version}-build BuildRequires: autoconf @@ -130,20 +141,38 @@ fi %doc COPYING %changelog -* Thu Aug 16 2019 Klaus Wenninger - 1.4.0-4.3 +* Fri Aug 16 2019 Klaus Wenninger - 1.4.0-15 - check for shutdown attribute on every cib-diff - Resolves: rhbz#1732334 + Resolves: rhbz#1713023 -* Thu Aug 1 2019 Klaus Wenninger - 1.4.0-4.2 -- rebuild using correct target +* Tue Aug 13 2019 Klaus Wenninger - 1.4.0-14 +- rebuild with updated brew-root - Resolves: rhbz#1732334 + Resolves: rhbz#1718297 + Resolves: rhbz#1713023 + Resolves: rhbz#1732368 -* Tue Jul 30 2019 Klaus Wenninger - 1.4.0-4.1 -- assume graceful pacemaker exit if leftovers are unmanaged +* Fri Aug 9 2019 Klaus Wenninger - 1.4.0-13 +- properly increment release - Resolves: rhbz#1732334 + Resolves: rhbz#1718297 + Resolves: rhbz#1713023 + Resolves: rhbz#1732368 + +* Fri Aug 9 2019 Klaus Wenninger - 1.4.0-12 +- rebuild with updated brew-root + + Resolves: rhbz#1718297 + Resolves: rhbz#1713023 + Resolves: rhbz#1732368 + +* Tue Jul 23 2019 Klaus Wenninger - 1.4.0-11 +- synchronize with rhel-8.1.0 + + Resolves: rhbz#1718297 + Resolves: rhbz#1713023 + Resolves: rhbz#1732368 * Tue Mar 26 2019 Klaus Wenninger - 1.4.0-4 - fix possible null-pointer-access just introduced