Blame SOURCES/0043-libmultipath-add-eh_deadline-multipath.conf-paramete.patch

60b218
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
60b218
From: Benjamin Marzinski <bmarzins@redhat.com>
60b218
Date: Wed, 14 Oct 2020 18:38:20 -0500
60b218
Subject: [PATCH] libmultipath: add eh_deadline multipath.conf parameter
60b218
60b218
There are times a fc rport is never lost, meaning that fast_io_fail_tmo
60b218
and dev_loss_tmo never trigger, but scsi commands still hang. This can
60b218
cause problems in cases where users have string timing requirements, and
60b218
the easiest way to solve these issues is to set eh_deadline. Since it's
60b218
already possible to set fast_io_fail_tmo and dev_loss_tmo from
60b218
multipath.conf, and have multipath take care of setting it correctly for
60b218
the scsi devices in sysfs, it makes sense to allow users to set
60b218
eh_deadline here as well.
60b218
60b218
Signed-off-by: Benjamin Marzinski <bmarzins@redhat.com>
60b218
---
60b218
 libmultipath/config.c      |  2 ++
60b218
 libmultipath/config.h      |  2 ++
60b218
 libmultipath/configure.c   |  1 +
60b218
 libmultipath/dict.c        | 10 +++++++
60b218
 libmultipath/discovery.c   | 58 +++++++++++++++++++++++++++++++++-----
60b218
 libmultipath/propsel.c     | 17 +++++++++++
60b218
 libmultipath/propsel.h     |  1 +
60b218
 libmultipath/structs.h     |  7 +++++
60b218
 multipath/multipath.conf.5 | 16 +++++++++++
60b218
 9 files changed, 107 insertions(+), 7 deletions(-)
60b218
60b218
diff --git a/libmultipath/config.c b/libmultipath/config.c
60b218
index 26f8e050..a71db2d0 100644
60b218
--- a/libmultipath/config.c
60b218
+++ b/libmultipath/config.c
60b218
@@ -359,6 +359,7 @@ merge_hwe (struct hwentry * dst, struct hwentry * src)
60b218
 	merge_num(flush_on_last_del);
60b218
 	merge_num(fast_io_fail);
60b218
 	merge_num(dev_loss);
60b218
+	merge_num(eh_deadline);
60b218
 	merge_num(user_friendly_names);
60b218
 	merge_num(retain_hwhandler);
60b218
 	merge_num(detect_prio);
60b218
@@ -514,6 +515,7 @@ store_hwe (vector hwtable, struct hwentry * dhwe)
60b218
 	hwe->flush_on_last_del = dhwe->flush_on_last_del;
60b218
 	hwe->fast_io_fail = dhwe->fast_io_fail;
60b218
 	hwe->dev_loss = dhwe->dev_loss;
60b218
+	hwe->eh_deadline = dhwe->eh_deadline;
60b218
 	hwe->user_friendly_names = dhwe->user_friendly_names;
60b218
 	hwe->retain_hwhandler = dhwe->retain_hwhandler;
60b218
 	hwe->detect_prio = dhwe->detect_prio;
60b218
diff --git a/libmultipath/config.h b/libmultipath/config.h
60b218
index f38c7639..a22c1b4e 100644
60b218
--- a/libmultipath/config.h
60b218
+++ b/libmultipath/config.h
60b218
@@ -64,6 +64,7 @@ struct hwentry {
60b218
 	int flush_on_last_del;
60b218
 	int fast_io_fail;
60b218
 	unsigned int dev_loss;
60b218
+	int eh_deadline;
60b218
 	int user_friendly_names;
60b218
 	int retain_hwhandler;
60b218
 	int detect_prio;
60b218
@@ -149,6 +150,7 @@ struct config {
60b218
 	int attribute_flags;
60b218
 	int fast_io_fail;
60b218
 	unsigned int dev_loss;
60b218
+	int eh_deadline;
60b218
 	int log_checker_err;
60b218
 	int allow_queueing;
60b218
 	int find_multipaths;
60b218
diff --git a/libmultipath/configure.c b/libmultipath/configure.c
60b218
index 96c79610..b7113291 100644
60b218
--- a/libmultipath/configure.c
60b218
+++ b/libmultipath/configure.c
60b218
@@ -340,6 +340,7 @@ int setup_map(struct multipath *mpp, char *params, int params_size,
60b218
 	select_gid(conf, mpp);
60b218
 	select_fast_io_fail(conf, mpp);
60b218
 	select_dev_loss(conf, mpp);
60b218
+	select_eh_deadline(conf, mpp);
60b218
 	select_reservation_key(conf, mpp);
60b218
 	select_deferred_remove(conf, mpp);
60b218
 	select_marginal_path_err_sample_time(conf, mpp);
60b218
diff --git a/libmultipath/dict.c b/libmultipath/dict.c
60b218
index ce8e1cda..8fd91d8c 100644
60b218
--- a/libmultipath/dict.c
60b218
+++ b/libmultipath/dict.c
60b218
@@ -911,6 +911,13 @@ declare_ovr_snprint(dev_loss, print_dev_loss)
60b218
 declare_hw_handler(dev_loss, set_dev_loss)
60b218
 declare_hw_snprint(dev_loss, print_dev_loss)
60b218
 
60b218
+declare_def_handler(eh_deadline, set_undef_off_zero)
60b218
+declare_def_snprint(eh_deadline, print_undef_off_zero)
60b218
+declare_ovr_handler(eh_deadline, set_undef_off_zero)
60b218
+declare_ovr_snprint(eh_deadline, print_undef_off_zero)
60b218
+declare_hw_handler(eh_deadline, set_undef_off_zero)
60b218
+declare_hw_snprint(eh_deadline, print_undef_off_zero)
60b218
+
60b218
 static int
60b218
 set_pgpolicy(vector strvec, void *ptr)
60b218
 {
60b218
@@ -1776,6 +1783,7 @@ init_keywords(vector keywords)
60b218
 	install_keyword("gid", &def_gid_handler, &snprint_def_gid);
60b218
 	install_keyword("fast_io_fail_tmo", &def_fast_io_fail_handler, &snprint_def_fast_io_fail);
60b218
 	install_keyword("dev_loss_tmo", &def_dev_loss_handler, &snprint_def_dev_loss);
60b218
+	install_keyword("eh_deadline", &def_eh_deadline_handler, &snprint_def_eh_deadline);
60b218
 	install_keyword("bindings_file", &def_bindings_file_handler, &snprint_def_bindings_file);
60b218
 	install_keyword("wwids_file", &def_wwids_file_handler, &snprint_def_wwids_file);
60b218
 	install_keyword("prkeys_file", &def_prkeys_file_handler, &snprint_def_prkeys_file);
60b218
@@ -1885,6 +1893,7 @@ init_keywords(vector keywords)
60b218
 	install_keyword("flush_on_last_del", &hw_flush_on_last_del_handler, &snprint_hw_flush_on_last_del);
60b218
 	install_keyword("fast_io_fail_tmo", &hw_fast_io_fail_handler, &snprint_hw_fast_io_fail);
60b218
 	install_keyword("dev_loss_tmo", &hw_dev_loss_handler, &snprint_hw_dev_loss);
60b218
+	install_keyword("eh_deadline", &hw_eh_deadline_handler, &snprint_hw_eh_deadline);
60b218
 	install_keyword("user_friendly_names", &hw_user_friendly_names_handler, &snprint_hw_user_friendly_names);
60b218
 	install_keyword("retain_attached_hw_handler", &hw_retain_hwhandler_handler, &snprint_hw_retain_hwhandler);
60b218
 	install_keyword("detect_prio", &hw_detect_prio_handler, &snprint_hw_detect_prio);
60b218
@@ -1925,6 +1934,7 @@ init_keywords(vector keywords)
60b218
 	install_keyword("flush_on_last_del", &ovr_flush_on_last_del_handler, &snprint_ovr_flush_on_last_del);
60b218
 	install_keyword("fast_io_fail_tmo", &ovr_fast_io_fail_handler, &snprint_ovr_fast_io_fail);
60b218
 	install_keyword("dev_loss_tmo", &ovr_dev_loss_handler, &snprint_ovr_dev_loss);
60b218
+	install_keyword("eh_deadline", &ovr_eh_deadline_handler, &snprint_ovr_eh_deadline);
60b218
 	install_keyword("user_friendly_names", &ovr_user_friendly_names_handler, &snprint_ovr_user_friendly_names);
60b218
 	install_keyword("retain_attached_hw_handler", &ovr_retain_hwhandler_handler, &snprint_ovr_retain_hwhandler);
60b218
 	install_keyword("detect_prio", &ovr_detect_prio_handler, &snprint_ovr_detect_prio);
60b218
diff --git a/libmultipath/discovery.c b/libmultipath/discovery.c
60b218
index 01aadba9..a328aafa 100644
60b218
--- a/libmultipath/discovery.c
60b218
+++ b/libmultipath/discovery.c
60b218
@@ -577,6 +577,42 @@ sysfs_get_asymmetric_access_state(struct path *pp, char *buff, int buflen)
60b218
 	return !!preferred;
60b218
 }
60b218
 
60b218
+static int
60b218
+sysfs_set_eh_deadline(struct multipath *mpp, struct path *pp)
60b218
+{
60b218
+	struct udev_device *hostdev;
60b218
+	char host_name[HOST_NAME_LEN], value[16];
60b218
+	int ret;
60b218
+
60b218
+	if (mpp->eh_deadline == EH_DEADLINE_UNSET)
60b218
+		return 0;
60b218
+
60b218
+	sprintf(host_name, "host%d", pp->sg_id.host_no);
60b218
+	hostdev = udev_device_new_from_subsystem_sysname(udev,
60b218
+			"scsi_host", host_name);
60b218
+	if (!hostdev)
60b218
+		return 1;
60b218
+
60b218
+	if (mpp->eh_deadline == EH_DEADLINE_OFF)
60b218
+		sprintf(value, "off");
60b218
+	else if (mpp->eh_deadline == EH_DEADLINE_ZERO)
60b218
+		sprintf(value, "0");
60b218
+	else
60b218
+		snprintf(value, 16, "%u", mpp->eh_deadline);
60b218
+
60b218
+	ret = sysfs_attr_set_value(hostdev, "eh_deadline",
60b218
+				   value, strlen(value));
60b218
+	/*
60b218
+	 * not all scsi drivers support setting eh_deadline, so failing
60b218
+	 * is totally reasonable
60b218
+	 */
60b218
+	if (ret <= 0)
60b218
+		condlog(4, "%s: failed to set eh_deadline to %s, error %d", udev_device_get_sysname(hostdev), value, -ret);
60b218
+
60b218
+	udev_device_unref(hostdev);
60b218
+	return (ret <= 0);
60b218
+}
60b218
+
60b218
 static void
60b218
 sysfs_set_rport_tmo(struct multipath *mpp, struct path *pp)
60b218
 {
60b218
@@ -787,16 +823,24 @@ sysfs_set_scsi_tmo (struct multipath *mpp, unsigned int checkint)
60b218
 			mpp->alias, mpp->fast_io_fail);
60b218
 		mpp->fast_io_fail = MP_FAST_IO_FAIL_OFF;
60b218
 	}
60b218
-	if (!mpp->dev_loss && mpp->fast_io_fail == MP_FAST_IO_FAIL_UNSET)
60b218
+	if (!mpp->dev_loss && mpp->fast_io_fail == MP_FAST_IO_FAIL_UNSET &&
60b218
+	    mpp->eh_deadline == EH_DEADLINE_UNSET)
60b218
 		return 0;
60b218
 
60b218
 	vector_foreach_slot(mpp->paths, pp, i) {
60b218
-		if (pp->sg_id.proto_id == SCSI_PROTOCOL_FCP)
60b218
-			sysfs_set_rport_tmo(mpp, pp);
60b218
-		if (pp->sg_id.proto_id == SCSI_PROTOCOL_ISCSI)
60b218
-			sysfs_set_session_tmo(mpp, pp);
60b218
-		if (pp->sg_id.proto_id == SCSI_PROTOCOL_SAS)
60b218
-			sysfs_set_nexus_loss_tmo(mpp, pp);
60b218
+		if (pp->bus != SYSFS_BUS_SCSI)
60b218
+			continue;
60b218
+
60b218
+		if (mpp->dev_loss ||
60b218
+		    mpp->fast_io_fail != MP_FAST_IO_FAIL_UNSET) {
60b218
+			if (pp->sg_id.proto_id == SCSI_PROTOCOL_FCP)
60b218
+				sysfs_set_rport_tmo(mpp, pp);
60b218
+			else if (pp->sg_id.proto_id == SCSI_PROTOCOL_ISCSI)
60b218
+				sysfs_set_session_tmo(mpp, pp);
60b218
+			else if (pp->sg_id.proto_id == SCSI_PROTOCOL_SAS)
60b218
+				sysfs_set_nexus_loss_tmo(mpp, pp);
60b218
+		}
60b218
+		sysfs_set_eh_deadline(mpp, pp);
60b218
 	}
60b218
 	return 0;
60b218
 }
60b218
diff --git a/libmultipath/propsel.c b/libmultipath/propsel.c
60b218
index 725db2b1..1150cfe8 100644
60b218
--- a/libmultipath/propsel.c
60b218
+++ b/libmultipath/propsel.c
60b218
@@ -776,6 +776,23 @@ out:
60b218
 	return 0;
60b218
 }
60b218
 
60b218
+int select_eh_deadline(struct config *conf, struct multipath *mp)
60b218
+{
60b218
+	const char *origin;
60b218
+	char buff[12];
60b218
+
60b218
+	mp_set_ovr(eh_deadline);
60b218
+	mp_set_hwe(eh_deadline);
60b218
+	mp_set_conf(eh_deadline);
60b218
+	mp->eh_deadline = EH_DEADLINE_UNSET;
60b218
+	/* not changing sysfs in default cause, so don't print anything */
60b218
+	return 0;
60b218
+out:
60b218
+	print_undef_off_zero(buff, 12, mp->eh_deadline);
60b218
+	condlog(3, "%s: eh_deadline = %s %s", mp->alias, buff, origin);
60b218
+	return 0;
60b218
+}
60b218
+
60b218
 int select_flush_on_last_del(struct config *conf, struct multipath *mp)
60b218
 {
60b218
 	const char *origin;
60b218
diff --git a/libmultipath/propsel.h b/libmultipath/propsel.h
60b218
index 3d6edd8a..a68bacf0 100644
60b218
--- a/libmultipath/propsel.h
60b218
+++ b/libmultipath/propsel.h
60b218
@@ -17,6 +17,7 @@ int select_uid(struct config *conf, struct multipath *mp);
60b218
 int select_gid(struct config *conf, struct multipath *mp);
60b218
 int select_fast_io_fail(struct config *conf, struct multipath *mp);
60b218
 int select_dev_loss(struct config *conf, struct multipath *mp);
60b218
+int select_eh_deadline(struct config *conf, struct multipath *mp);
60b218
 int select_reservation_key(struct config *conf, struct multipath *mp);
60b218
 int select_retain_hwhandler (struct config *conf, struct multipath * mp);
60b218
 int select_detect_prio(struct config *conf, struct path * pp);
60b218
diff --git a/libmultipath/structs.h b/libmultipath/structs.h
60b218
index 29209984..65542dea 100644
60b218
--- a/libmultipath/structs.h
60b218
+++ b/libmultipath/structs.h
60b218
@@ -246,6 +246,12 @@ enum fast_io_fail_states {
60b218
 	MP_FAST_IO_FAIL_ZERO = UOZ_ZERO,
60b218
 };
60b218
 
60b218
+enum eh_deadline_states {
60b218
+	EH_DEADLINE_UNSET = UOZ_UNDEF,
60b218
+	EH_DEADLINE_OFF = UOZ_OFF,
60b218
+	EH_DEADLINE_ZERO = UOZ_ZERO,
60b218
+};
60b218
+
60b218
 struct vpd_vendor_page {
60b218
 	int pg;
60b218
 	const char *name;
60b218
@@ -366,6 +372,7 @@ struct multipath {
60b218
 	int ghost_delay;
60b218
 	int ghost_delay_tick;
60b218
 	unsigned int dev_loss;
60b218
+	int eh_deadline;
60b218
 	uid_t uid;
60b218
 	gid_t gid;
60b218
 	mode_t mode;
60b218
diff --git a/multipath/multipath.conf.5 b/multipath/multipath.conf.5
60b218
index 6dc26f10..60954574 100644
60b218
--- a/multipath/multipath.conf.5
60b218
+++ b/multipath/multipath.conf.5
60b218
@@ -700,6 +700,22 @@ The default is: \fB600\fR
60b218
 .
60b218
 .
60b218
 .TP
60b218
+.B eh_deadline
60b218
+Specify the maximum number of seconds the SCSI layer will spend doing error
60b218
+handling when scsi devices fail. After this timeout the scsi layer will perform
60b218
+a full HBA reset. Setting this may be necessary in cases where the rport is
60b218
+never lost, so \fIfast_io_fail_tmo\fR and \fIdev_loss_tmo\fR will never
60b218
+trigger, but (frequently do to load) scsi commands still hang. \fBNote:\fR when
60b218
+the scsi error handler performs the HBA reset, all target paths on that HBA
60b218
+will be affected. eh_deadline should only be set in cases where all targets on
60b218
+the affected HBAs are multipathed.
60b218
+.RS
60b218
+.TP
60b218
+The default is: \fB<unset>\fR
60b218
+.RE
60b218
+.
60b218
+.
60b218
+.TP
60b218
 .B bindings_file
60b218
 The full pathname of the binding file to be used when the user_friendly_names
60b218
 option is set.
60b218
-- 
60b218
2.17.2
60b218