Blame SOURCES/0043-libmultipath-add-eh_deadline-multipath.conf-paramete.patch

b7337d
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
b7337d
From: Benjamin Marzinski <bmarzins@redhat.com>
b7337d
Date: Wed, 14 Oct 2020 18:38:20 -0500
b7337d
Subject: [PATCH] libmultipath: add eh_deadline multipath.conf parameter
b7337d
b7337d
There are times a fc rport is never lost, meaning that fast_io_fail_tmo
b7337d
and dev_loss_tmo never trigger, but scsi commands still hang. This can
b7337d
cause problems in cases where users have string timing requirements, and
b7337d
the easiest way to solve these issues is to set eh_deadline. Since it's
b7337d
already possible to set fast_io_fail_tmo and dev_loss_tmo from
b7337d
multipath.conf, and have multipath take care of setting it correctly for
b7337d
the scsi devices in sysfs, it makes sense to allow users to set
b7337d
eh_deadline here as well.
b7337d
b7337d
Signed-off-by: Benjamin Marzinski <bmarzins@redhat.com>
b7337d
---
b7337d
 libmultipath/config.c      |  2 ++
b7337d
 libmultipath/config.h      |  2 ++
b7337d
 libmultipath/configure.c   |  1 +
b7337d
 libmultipath/dict.c        | 10 +++++++
b7337d
 libmultipath/discovery.c   | 58 +++++++++++++++++++++++++++++++++-----
b7337d
 libmultipath/propsel.c     | 17 +++++++++++
b7337d
 libmultipath/propsel.h     |  1 +
b7337d
 libmultipath/structs.h     |  7 +++++
b7337d
 multipath/multipath.conf.5 | 16 +++++++++++
b7337d
 9 files changed, 107 insertions(+), 7 deletions(-)
b7337d
b7337d
diff --git a/libmultipath/config.c b/libmultipath/config.c
b7337d
index 26f8e050..a71db2d0 100644
b7337d
--- a/libmultipath/config.c
b7337d
+++ b/libmultipath/config.c
b7337d
@@ -359,6 +359,7 @@ merge_hwe (struct hwentry * dst, struct hwentry * src)
b7337d
 	merge_num(flush_on_last_del);
b7337d
 	merge_num(fast_io_fail);
b7337d
 	merge_num(dev_loss);
b7337d
+	merge_num(eh_deadline);
b7337d
 	merge_num(user_friendly_names);
b7337d
 	merge_num(retain_hwhandler);
b7337d
 	merge_num(detect_prio);
b7337d
@@ -514,6 +515,7 @@ store_hwe (vector hwtable, struct hwentry * dhwe)
b7337d
 	hwe->flush_on_last_del = dhwe->flush_on_last_del;
b7337d
 	hwe->fast_io_fail = dhwe->fast_io_fail;
b7337d
 	hwe->dev_loss = dhwe->dev_loss;
b7337d
+	hwe->eh_deadline = dhwe->eh_deadline;
b7337d
 	hwe->user_friendly_names = dhwe->user_friendly_names;
b7337d
 	hwe->retain_hwhandler = dhwe->retain_hwhandler;
b7337d
 	hwe->detect_prio = dhwe->detect_prio;
b7337d
diff --git a/libmultipath/config.h b/libmultipath/config.h
b7337d
index f38c7639..a22c1b4e 100644
b7337d
--- a/libmultipath/config.h
b7337d
+++ b/libmultipath/config.h
b7337d
@@ -64,6 +64,7 @@ struct hwentry {
b7337d
 	int flush_on_last_del;
b7337d
 	int fast_io_fail;
b7337d
 	unsigned int dev_loss;
b7337d
+	int eh_deadline;
b7337d
 	int user_friendly_names;
b7337d
 	int retain_hwhandler;
b7337d
 	int detect_prio;
b7337d
@@ -149,6 +150,7 @@ struct config {
b7337d
 	int attribute_flags;
b7337d
 	int fast_io_fail;
b7337d
 	unsigned int dev_loss;
b7337d
+	int eh_deadline;
b7337d
 	int log_checker_err;
b7337d
 	int allow_queueing;
b7337d
 	int find_multipaths;
b7337d
diff --git a/libmultipath/configure.c b/libmultipath/configure.c
b7337d
index 96c79610..b7113291 100644
b7337d
--- a/libmultipath/configure.c
b7337d
+++ b/libmultipath/configure.c
b7337d
@@ -340,6 +340,7 @@ int setup_map(struct multipath *mpp, char *params, int params_size,
b7337d
 	select_gid(conf, mpp);
b7337d
 	select_fast_io_fail(conf, mpp);
b7337d
 	select_dev_loss(conf, mpp);
b7337d
+	select_eh_deadline(conf, mpp);
b7337d
 	select_reservation_key(conf, mpp);
b7337d
 	select_deferred_remove(conf, mpp);
b7337d
 	select_marginal_path_err_sample_time(conf, mpp);
b7337d
diff --git a/libmultipath/dict.c b/libmultipath/dict.c
b7337d
index ce8e1cda..8fd91d8c 100644
b7337d
--- a/libmultipath/dict.c
b7337d
+++ b/libmultipath/dict.c
b7337d
@@ -911,6 +911,13 @@ declare_ovr_snprint(dev_loss, print_dev_loss)
b7337d
 declare_hw_handler(dev_loss, set_dev_loss)
b7337d
 declare_hw_snprint(dev_loss, print_dev_loss)
b7337d
 
b7337d
+declare_def_handler(eh_deadline, set_undef_off_zero)
b7337d
+declare_def_snprint(eh_deadline, print_undef_off_zero)
b7337d
+declare_ovr_handler(eh_deadline, set_undef_off_zero)
b7337d
+declare_ovr_snprint(eh_deadline, print_undef_off_zero)
b7337d
+declare_hw_handler(eh_deadline, set_undef_off_zero)
b7337d
+declare_hw_snprint(eh_deadline, print_undef_off_zero)
b7337d
+
b7337d
 static int
b7337d
 set_pgpolicy(vector strvec, void *ptr)
b7337d
 {
b7337d
@@ -1776,6 +1783,7 @@ init_keywords(vector keywords)
b7337d
 	install_keyword("gid", &def_gid_handler, &snprint_def_gid);
b7337d
 	install_keyword("fast_io_fail_tmo", &def_fast_io_fail_handler, &snprint_def_fast_io_fail);
b7337d
 	install_keyword("dev_loss_tmo", &def_dev_loss_handler, &snprint_def_dev_loss);
b7337d
+	install_keyword("eh_deadline", &def_eh_deadline_handler, &snprint_def_eh_deadline);
b7337d
 	install_keyword("bindings_file", &def_bindings_file_handler, &snprint_def_bindings_file);
b7337d
 	install_keyword("wwids_file", &def_wwids_file_handler, &snprint_def_wwids_file);
b7337d
 	install_keyword("prkeys_file", &def_prkeys_file_handler, &snprint_def_prkeys_file);
b7337d
@@ -1885,6 +1893,7 @@ init_keywords(vector keywords)
b7337d
 	install_keyword("flush_on_last_del", &hw_flush_on_last_del_handler, &snprint_hw_flush_on_last_del);
b7337d
 	install_keyword("fast_io_fail_tmo", &hw_fast_io_fail_handler, &snprint_hw_fast_io_fail);
b7337d
 	install_keyword("dev_loss_tmo", &hw_dev_loss_handler, &snprint_hw_dev_loss);
b7337d
+	install_keyword("eh_deadline", &hw_eh_deadline_handler, &snprint_hw_eh_deadline);
b7337d
 	install_keyword("user_friendly_names", &hw_user_friendly_names_handler, &snprint_hw_user_friendly_names);
b7337d
 	install_keyword("retain_attached_hw_handler", &hw_retain_hwhandler_handler, &snprint_hw_retain_hwhandler);
b7337d
 	install_keyword("detect_prio", &hw_detect_prio_handler, &snprint_hw_detect_prio);
b7337d
@@ -1925,6 +1934,7 @@ init_keywords(vector keywords)
b7337d
 	install_keyword("flush_on_last_del", &ovr_flush_on_last_del_handler, &snprint_ovr_flush_on_last_del);
b7337d
 	install_keyword("fast_io_fail_tmo", &ovr_fast_io_fail_handler, &snprint_ovr_fast_io_fail);
b7337d
 	install_keyword("dev_loss_tmo", &ovr_dev_loss_handler, &snprint_ovr_dev_loss);
b7337d
+	install_keyword("eh_deadline", &ovr_eh_deadline_handler, &snprint_ovr_eh_deadline);
b7337d
 	install_keyword("user_friendly_names", &ovr_user_friendly_names_handler, &snprint_ovr_user_friendly_names);
b7337d
 	install_keyword("retain_attached_hw_handler", &ovr_retain_hwhandler_handler, &snprint_ovr_retain_hwhandler);
b7337d
 	install_keyword("detect_prio", &ovr_detect_prio_handler, &snprint_ovr_detect_prio);
b7337d
diff --git a/libmultipath/discovery.c b/libmultipath/discovery.c
b7337d
index 01aadba9..a328aafa 100644
b7337d
--- a/libmultipath/discovery.c
b7337d
+++ b/libmultipath/discovery.c
b7337d
@@ -577,6 +577,42 @@ sysfs_get_asymmetric_access_state(struct path *pp, char *buff, int buflen)
b7337d
 	return !!preferred;
b7337d
 }
b7337d
 
b7337d
+static int
b7337d
+sysfs_set_eh_deadline(struct multipath *mpp, struct path *pp)
b7337d
+{
b7337d
+	struct udev_device *hostdev;
b7337d
+	char host_name[HOST_NAME_LEN], value[16];
b7337d
+	int ret;
b7337d
+
b7337d
+	if (mpp->eh_deadline == EH_DEADLINE_UNSET)
b7337d
+		return 0;
b7337d
+
b7337d
+	sprintf(host_name, "host%d", pp->sg_id.host_no);
b7337d
+	hostdev = udev_device_new_from_subsystem_sysname(udev,
b7337d
+			"scsi_host", host_name);
b7337d
+	if (!hostdev)
b7337d
+		return 1;
b7337d
+
b7337d
+	if (mpp->eh_deadline == EH_DEADLINE_OFF)
b7337d
+		sprintf(value, "off");
b7337d
+	else if (mpp->eh_deadline == EH_DEADLINE_ZERO)
b7337d
+		sprintf(value, "0");
b7337d
+	else
b7337d
+		snprintf(value, 16, "%u", mpp->eh_deadline);
b7337d
+
b7337d
+	ret = sysfs_attr_set_value(hostdev, "eh_deadline",
b7337d
+				   value, strlen(value));
b7337d
+	/*
b7337d
+	 * not all scsi drivers support setting eh_deadline, so failing
b7337d
+	 * is totally reasonable
b7337d
+	 */
b7337d
+	if (ret <= 0)
b7337d
+		condlog(4, "%s: failed to set eh_deadline to %s, error %d", udev_device_get_sysname(hostdev), value, -ret);
b7337d
+
b7337d
+	udev_device_unref(hostdev);
b7337d
+	return (ret <= 0);
b7337d
+}
b7337d
+
b7337d
 static void
b7337d
 sysfs_set_rport_tmo(struct multipath *mpp, struct path *pp)
b7337d
 {
b7337d
@@ -787,16 +823,24 @@ sysfs_set_scsi_tmo (struct multipath *mpp, unsigned int checkint)
b7337d
 			mpp->alias, mpp->fast_io_fail);
b7337d
 		mpp->fast_io_fail = MP_FAST_IO_FAIL_OFF;
b7337d
 	}
b7337d
-	if (!mpp->dev_loss && mpp->fast_io_fail == MP_FAST_IO_FAIL_UNSET)
b7337d
+	if (!mpp->dev_loss && mpp->fast_io_fail == MP_FAST_IO_FAIL_UNSET &&
b7337d
+	    mpp->eh_deadline == EH_DEADLINE_UNSET)
b7337d
 		return 0;
b7337d
 
b7337d
 	vector_foreach_slot(mpp->paths, pp, i) {
b7337d
-		if (pp->sg_id.proto_id == SCSI_PROTOCOL_FCP)
b7337d
-			sysfs_set_rport_tmo(mpp, pp);
b7337d
-		if (pp->sg_id.proto_id == SCSI_PROTOCOL_ISCSI)
b7337d
-			sysfs_set_session_tmo(mpp, pp);
b7337d
-		if (pp->sg_id.proto_id == SCSI_PROTOCOL_SAS)
b7337d
-			sysfs_set_nexus_loss_tmo(mpp, pp);
b7337d
+		if (pp->bus != SYSFS_BUS_SCSI)
b7337d
+			continue;
b7337d
+
b7337d
+		if (mpp->dev_loss ||
b7337d
+		    mpp->fast_io_fail != MP_FAST_IO_FAIL_UNSET) {
b7337d
+			if (pp->sg_id.proto_id == SCSI_PROTOCOL_FCP)
b7337d
+				sysfs_set_rport_tmo(mpp, pp);
b7337d
+			else if (pp->sg_id.proto_id == SCSI_PROTOCOL_ISCSI)
b7337d
+				sysfs_set_session_tmo(mpp, pp);
b7337d
+			else if (pp->sg_id.proto_id == SCSI_PROTOCOL_SAS)
b7337d
+				sysfs_set_nexus_loss_tmo(mpp, pp);
b7337d
+		}
b7337d
+		sysfs_set_eh_deadline(mpp, pp);
b7337d
 	}
b7337d
 	return 0;
b7337d
 }
b7337d
diff --git a/libmultipath/propsel.c b/libmultipath/propsel.c
b7337d
index 725db2b1..1150cfe8 100644
b7337d
--- a/libmultipath/propsel.c
b7337d
+++ b/libmultipath/propsel.c
b7337d
@@ -776,6 +776,23 @@ out:
b7337d
 	return 0;
b7337d
 }
b7337d
 
b7337d
+int select_eh_deadline(struct config *conf, struct multipath *mp)
b7337d
+{
b7337d
+	const char *origin;
b7337d
+	char buff[12];
b7337d
+
b7337d
+	mp_set_ovr(eh_deadline);
b7337d
+	mp_set_hwe(eh_deadline);
b7337d
+	mp_set_conf(eh_deadline);
b7337d
+	mp->eh_deadline = EH_DEADLINE_UNSET;
b7337d
+	/* not changing sysfs in default cause, so don't print anything */
b7337d
+	return 0;
b7337d
+out:
b7337d
+	print_undef_off_zero(buff, 12, mp->eh_deadline);
b7337d
+	condlog(3, "%s: eh_deadline = %s %s", mp->alias, buff, origin);
b7337d
+	return 0;
b7337d
+}
b7337d
+
b7337d
 int select_flush_on_last_del(struct config *conf, struct multipath *mp)
b7337d
 {
b7337d
 	const char *origin;
b7337d
diff --git a/libmultipath/propsel.h b/libmultipath/propsel.h
b7337d
index 3d6edd8a..a68bacf0 100644
b7337d
--- a/libmultipath/propsel.h
b7337d
+++ b/libmultipath/propsel.h
b7337d
@@ -17,6 +17,7 @@ int select_uid(struct config *conf, struct multipath *mp);
b7337d
 int select_gid(struct config *conf, struct multipath *mp);
b7337d
 int select_fast_io_fail(struct config *conf, struct multipath *mp);
b7337d
 int select_dev_loss(struct config *conf, struct multipath *mp);
b7337d
+int select_eh_deadline(struct config *conf, struct multipath *mp);
b7337d
 int select_reservation_key(struct config *conf, struct multipath *mp);
b7337d
 int select_retain_hwhandler (struct config *conf, struct multipath * mp);
b7337d
 int select_detect_prio(struct config *conf, struct path * pp);
b7337d
diff --git a/libmultipath/structs.h b/libmultipath/structs.h
b7337d
index 29209984..65542dea 100644
b7337d
--- a/libmultipath/structs.h
b7337d
+++ b/libmultipath/structs.h
b7337d
@@ -246,6 +246,12 @@ enum fast_io_fail_states {
b7337d
 	MP_FAST_IO_FAIL_ZERO = UOZ_ZERO,
b7337d
 };
b7337d
 
b7337d
+enum eh_deadline_states {
b7337d
+	EH_DEADLINE_UNSET = UOZ_UNDEF,
b7337d
+	EH_DEADLINE_OFF = UOZ_OFF,
b7337d
+	EH_DEADLINE_ZERO = UOZ_ZERO,
b7337d
+};
b7337d
+
b7337d
 struct vpd_vendor_page {
b7337d
 	int pg;
b7337d
 	const char *name;
b7337d
@@ -366,6 +372,7 @@ struct multipath {
b7337d
 	int ghost_delay;
b7337d
 	int ghost_delay_tick;
b7337d
 	unsigned int dev_loss;
b7337d
+	int eh_deadline;
b7337d
 	uid_t uid;
b7337d
 	gid_t gid;
b7337d
 	mode_t mode;
b7337d
diff --git a/multipath/multipath.conf.5 b/multipath/multipath.conf.5
b7337d
index 6dc26f10..60954574 100644
b7337d
--- a/multipath/multipath.conf.5
b7337d
+++ b/multipath/multipath.conf.5
b7337d
@@ -700,6 +700,22 @@ The default is: \fB600\fR
b7337d
 .
b7337d
 .
b7337d
 .TP
b7337d
+.B eh_deadline
b7337d
+Specify the maximum number of seconds the SCSI layer will spend doing error
b7337d
+handling when scsi devices fail. After this timeout the scsi layer will perform
b7337d
+a full HBA reset. Setting this may be necessary in cases where the rport is
b7337d
+never lost, so \fIfast_io_fail_tmo\fR and \fIdev_loss_tmo\fR will never
b7337d
+trigger, but (frequently do to load) scsi commands still hang. \fBNote:\fR when
b7337d
+the scsi error handler performs the HBA reset, all target paths on that HBA
b7337d
+will be affected. eh_deadline should only be set in cases where all targets on
b7337d
+the affected HBAs are multipathed.
b7337d
+.RS
b7337d
+.TP
b7337d
+The default is: \fB<unset>\fR
b7337d
+.RE
b7337d
+.
b7337d
+.
b7337d
+.TP
b7337d
 .B bindings_file
b7337d
 The full pathname of the binding file to be used when the user_friendly_names
b7337d
 option is set.
b7337d
-- 
b7337d
2.17.2
b7337d