Blame SOURCES/0043-libmultipath-add-eh_deadline-multipath.conf-paramete.patch

05be62
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
05be62
From: Benjamin Marzinski <bmarzins@redhat.com>
05be62
Date: Wed, 14 Oct 2020 18:38:20 -0500
05be62
Subject: [PATCH] libmultipath: add eh_deadline multipath.conf parameter
05be62
05be62
There are times a fc rport is never lost, meaning that fast_io_fail_tmo
05be62
and dev_loss_tmo never trigger, but scsi commands still hang. This can
05be62
cause problems in cases where users have string timing requirements, and
05be62
the easiest way to solve these issues is to set eh_deadline. Since it's
05be62
already possible to set fast_io_fail_tmo and dev_loss_tmo from
05be62
multipath.conf, and have multipath take care of setting it correctly for
05be62
the scsi devices in sysfs, it makes sense to allow users to set
05be62
eh_deadline here as well.
05be62
05be62
Signed-off-by: Benjamin Marzinski <bmarzins@redhat.com>
05be62
---
05be62
 libmultipath/config.c      |  2 ++
05be62
 libmultipath/config.h      |  2 ++
05be62
 libmultipath/configure.c   |  1 +
05be62
 libmultipath/dict.c        | 10 +++++++
05be62
 libmultipath/discovery.c   | 58 +++++++++++++++++++++++++++++++++-----
05be62
 libmultipath/propsel.c     | 17 +++++++++++
05be62
 libmultipath/propsel.h     |  1 +
05be62
 libmultipath/structs.h     |  7 +++++
05be62
 multipath/multipath.conf.5 | 16 +++++++++++
05be62
 9 files changed, 107 insertions(+), 7 deletions(-)
05be62
05be62
diff --git a/libmultipath/config.c b/libmultipath/config.c
05be62
index 26f8e050..a71db2d0 100644
05be62
--- a/libmultipath/config.c
05be62
+++ b/libmultipath/config.c
05be62
@@ -359,6 +359,7 @@ merge_hwe (struct hwentry * dst, struct hwentry * src)
05be62
 	merge_num(flush_on_last_del);
05be62
 	merge_num(fast_io_fail);
05be62
 	merge_num(dev_loss);
05be62
+	merge_num(eh_deadline);
05be62
 	merge_num(user_friendly_names);
05be62
 	merge_num(retain_hwhandler);
05be62
 	merge_num(detect_prio);
05be62
@@ -514,6 +515,7 @@ store_hwe (vector hwtable, struct hwentry * dhwe)
05be62
 	hwe->flush_on_last_del = dhwe->flush_on_last_del;
05be62
 	hwe->fast_io_fail = dhwe->fast_io_fail;
05be62
 	hwe->dev_loss = dhwe->dev_loss;
05be62
+	hwe->eh_deadline = dhwe->eh_deadline;
05be62
 	hwe->user_friendly_names = dhwe->user_friendly_names;
05be62
 	hwe->retain_hwhandler = dhwe->retain_hwhandler;
05be62
 	hwe->detect_prio = dhwe->detect_prio;
05be62
diff --git a/libmultipath/config.h b/libmultipath/config.h
05be62
index f38c7639..a22c1b4e 100644
05be62
--- a/libmultipath/config.h
05be62
+++ b/libmultipath/config.h
05be62
@@ -64,6 +64,7 @@ struct hwentry {
05be62
 	int flush_on_last_del;
05be62
 	int fast_io_fail;
05be62
 	unsigned int dev_loss;
05be62
+	int eh_deadline;
05be62
 	int user_friendly_names;
05be62
 	int retain_hwhandler;
05be62
 	int detect_prio;
05be62
@@ -149,6 +150,7 @@ struct config {
05be62
 	int attribute_flags;
05be62
 	int fast_io_fail;
05be62
 	unsigned int dev_loss;
05be62
+	int eh_deadline;
05be62
 	int log_checker_err;
05be62
 	int allow_queueing;
05be62
 	int find_multipaths;
05be62
diff --git a/libmultipath/configure.c b/libmultipath/configure.c
05be62
index 96c79610..b7113291 100644
05be62
--- a/libmultipath/configure.c
05be62
+++ b/libmultipath/configure.c
05be62
@@ -340,6 +340,7 @@ int setup_map(struct multipath *mpp, char *params, int params_size,
05be62
 	select_gid(conf, mpp);
05be62
 	select_fast_io_fail(conf, mpp);
05be62
 	select_dev_loss(conf, mpp);
05be62
+	select_eh_deadline(conf, mpp);
05be62
 	select_reservation_key(conf, mpp);
05be62
 	select_deferred_remove(conf, mpp);
05be62
 	select_marginal_path_err_sample_time(conf, mpp);
05be62
diff --git a/libmultipath/dict.c b/libmultipath/dict.c
05be62
index ce8e1cda..8fd91d8c 100644
05be62
--- a/libmultipath/dict.c
05be62
+++ b/libmultipath/dict.c
05be62
@@ -911,6 +911,13 @@ declare_ovr_snprint(dev_loss, print_dev_loss)
05be62
 declare_hw_handler(dev_loss, set_dev_loss)
05be62
 declare_hw_snprint(dev_loss, print_dev_loss)
05be62
 
05be62
+declare_def_handler(eh_deadline, set_undef_off_zero)
05be62
+declare_def_snprint(eh_deadline, print_undef_off_zero)
05be62
+declare_ovr_handler(eh_deadline, set_undef_off_zero)
05be62
+declare_ovr_snprint(eh_deadline, print_undef_off_zero)
05be62
+declare_hw_handler(eh_deadline, set_undef_off_zero)
05be62
+declare_hw_snprint(eh_deadline, print_undef_off_zero)
05be62
+
05be62
 static int
05be62
 set_pgpolicy(vector strvec, void *ptr)
05be62
 {
05be62
@@ -1776,6 +1783,7 @@ init_keywords(vector keywords)
05be62
 	install_keyword("gid", &def_gid_handler, &snprint_def_gid);
05be62
 	install_keyword("fast_io_fail_tmo", &def_fast_io_fail_handler, &snprint_def_fast_io_fail);
05be62
 	install_keyword("dev_loss_tmo", &def_dev_loss_handler, &snprint_def_dev_loss);
05be62
+	install_keyword("eh_deadline", &def_eh_deadline_handler, &snprint_def_eh_deadline);
05be62
 	install_keyword("bindings_file", &def_bindings_file_handler, &snprint_def_bindings_file);
05be62
 	install_keyword("wwids_file", &def_wwids_file_handler, &snprint_def_wwids_file);
05be62
 	install_keyword("prkeys_file", &def_prkeys_file_handler, &snprint_def_prkeys_file);
05be62
@@ -1885,6 +1893,7 @@ init_keywords(vector keywords)
05be62
 	install_keyword("flush_on_last_del", &hw_flush_on_last_del_handler, &snprint_hw_flush_on_last_del);
05be62
 	install_keyword("fast_io_fail_tmo", &hw_fast_io_fail_handler, &snprint_hw_fast_io_fail);
05be62
 	install_keyword("dev_loss_tmo", &hw_dev_loss_handler, &snprint_hw_dev_loss);
05be62
+	install_keyword("eh_deadline", &hw_eh_deadline_handler, &snprint_hw_eh_deadline);
05be62
 	install_keyword("user_friendly_names", &hw_user_friendly_names_handler, &snprint_hw_user_friendly_names);
05be62
 	install_keyword("retain_attached_hw_handler", &hw_retain_hwhandler_handler, &snprint_hw_retain_hwhandler);
05be62
 	install_keyword("detect_prio", &hw_detect_prio_handler, &snprint_hw_detect_prio);
05be62
@@ -1925,6 +1934,7 @@ init_keywords(vector keywords)
05be62
 	install_keyword("flush_on_last_del", &ovr_flush_on_last_del_handler, &snprint_ovr_flush_on_last_del);
05be62
 	install_keyword("fast_io_fail_tmo", &ovr_fast_io_fail_handler, &snprint_ovr_fast_io_fail);
05be62
 	install_keyword("dev_loss_tmo", &ovr_dev_loss_handler, &snprint_ovr_dev_loss);
05be62
+	install_keyword("eh_deadline", &ovr_eh_deadline_handler, &snprint_ovr_eh_deadline);
05be62
 	install_keyword("user_friendly_names", &ovr_user_friendly_names_handler, &snprint_ovr_user_friendly_names);
05be62
 	install_keyword("retain_attached_hw_handler", &ovr_retain_hwhandler_handler, &snprint_ovr_retain_hwhandler);
05be62
 	install_keyword("detect_prio", &ovr_detect_prio_handler, &snprint_ovr_detect_prio);
05be62
diff --git a/libmultipath/discovery.c b/libmultipath/discovery.c
05be62
index 01aadba9..a328aafa 100644
05be62
--- a/libmultipath/discovery.c
05be62
+++ b/libmultipath/discovery.c
05be62
@@ -577,6 +577,42 @@ sysfs_get_asymmetric_access_state(struct path *pp, char *buff, int buflen)
05be62
 	return !!preferred;
05be62
 }
05be62
 
05be62
+static int
05be62
+sysfs_set_eh_deadline(struct multipath *mpp, struct path *pp)
05be62
+{
05be62
+	struct udev_device *hostdev;
05be62
+	char host_name[HOST_NAME_LEN], value[16];
05be62
+	int ret;
05be62
+
05be62
+	if (mpp->eh_deadline == EH_DEADLINE_UNSET)
05be62
+		return 0;
05be62
+
05be62
+	sprintf(host_name, "host%d", pp->sg_id.host_no);
05be62
+	hostdev = udev_device_new_from_subsystem_sysname(udev,
05be62
+			"scsi_host", host_name);
05be62
+	if (!hostdev)
05be62
+		return 1;
05be62
+
05be62
+	if (mpp->eh_deadline == EH_DEADLINE_OFF)
05be62
+		sprintf(value, "off");
05be62
+	else if (mpp->eh_deadline == EH_DEADLINE_ZERO)
05be62
+		sprintf(value, "0");
05be62
+	else
05be62
+		snprintf(value, 16, "%u", mpp->eh_deadline);
05be62
+
05be62
+	ret = sysfs_attr_set_value(hostdev, "eh_deadline",
05be62
+				   value, strlen(value));
05be62
+	/*
05be62
+	 * not all scsi drivers support setting eh_deadline, so failing
05be62
+	 * is totally reasonable
05be62
+	 */
05be62
+	if (ret <= 0)
05be62
+		condlog(4, "%s: failed to set eh_deadline to %s, error %d", udev_device_get_sysname(hostdev), value, -ret);
05be62
+
05be62
+	udev_device_unref(hostdev);
05be62
+	return (ret <= 0);
05be62
+}
05be62
+
05be62
 static void
05be62
 sysfs_set_rport_tmo(struct multipath *mpp, struct path *pp)
05be62
 {
05be62
@@ -787,16 +823,24 @@ sysfs_set_scsi_tmo (struct multipath *mpp, unsigned int checkint)
05be62
 			mpp->alias, mpp->fast_io_fail);
05be62
 		mpp->fast_io_fail = MP_FAST_IO_FAIL_OFF;
05be62
 	}
05be62
-	if (!mpp->dev_loss && mpp->fast_io_fail == MP_FAST_IO_FAIL_UNSET)
05be62
+	if (!mpp->dev_loss && mpp->fast_io_fail == MP_FAST_IO_FAIL_UNSET &&
05be62
+	    mpp->eh_deadline == EH_DEADLINE_UNSET)
05be62
 		return 0;
05be62
 
05be62
 	vector_foreach_slot(mpp->paths, pp, i) {
05be62
-		if (pp->sg_id.proto_id == SCSI_PROTOCOL_FCP)
05be62
-			sysfs_set_rport_tmo(mpp, pp);
05be62
-		if (pp->sg_id.proto_id == SCSI_PROTOCOL_ISCSI)
05be62
-			sysfs_set_session_tmo(mpp, pp);
05be62
-		if (pp->sg_id.proto_id == SCSI_PROTOCOL_SAS)
05be62
-			sysfs_set_nexus_loss_tmo(mpp, pp);
05be62
+		if (pp->bus != SYSFS_BUS_SCSI)
05be62
+			continue;
05be62
+
05be62
+		if (mpp->dev_loss ||
05be62
+		    mpp->fast_io_fail != MP_FAST_IO_FAIL_UNSET) {
05be62
+			if (pp->sg_id.proto_id == SCSI_PROTOCOL_FCP)
05be62
+				sysfs_set_rport_tmo(mpp, pp);
05be62
+			else if (pp->sg_id.proto_id == SCSI_PROTOCOL_ISCSI)
05be62
+				sysfs_set_session_tmo(mpp, pp);
05be62
+			else if (pp->sg_id.proto_id == SCSI_PROTOCOL_SAS)
05be62
+				sysfs_set_nexus_loss_tmo(mpp, pp);
05be62
+		}
05be62
+		sysfs_set_eh_deadline(mpp, pp);
05be62
 	}
05be62
 	return 0;
05be62
 }
05be62
diff --git a/libmultipath/propsel.c b/libmultipath/propsel.c
05be62
index 725db2b1..1150cfe8 100644
05be62
--- a/libmultipath/propsel.c
05be62
+++ b/libmultipath/propsel.c
05be62
@@ -776,6 +776,23 @@ out:
05be62
 	return 0;
05be62
 }
05be62
 
05be62
+int select_eh_deadline(struct config *conf, struct multipath *mp)
05be62
+{
05be62
+	const char *origin;
05be62
+	char buff[12];
05be62
+
05be62
+	mp_set_ovr(eh_deadline);
05be62
+	mp_set_hwe(eh_deadline);
05be62
+	mp_set_conf(eh_deadline);
05be62
+	mp->eh_deadline = EH_DEADLINE_UNSET;
05be62
+	/* not changing sysfs in default cause, so don't print anything */
05be62
+	return 0;
05be62
+out:
05be62
+	print_undef_off_zero(buff, 12, mp->eh_deadline);
05be62
+	condlog(3, "%s: eh_deadline = %s %s", mp->alias, buff, origin);
05be62
+	return 0;
05be62
+}
05be62
+
05be62
 int select_flush_on_last_del(struct config *conf, struct multipath *mp)
05be62
 {
05be62
 	const char *origin;
05be62
diff --git a/libmultipath/propsel.h b/libmultipath/propsel.h
05be62
index 3d6edd8a..a68bacf0 100644
05be62
--- a/libmultipath/propsel.h
05be62
+++ b/libmultipath/propsel.h
05be62
@@ -17,6 +17,7 @@ int select_uid(struct config *conf, struct multipath *mp);
05be62
 int select_gid(struct config *conf, struct multipath *mp);
05be62
 int select_fast_io_fail(struct config *conf, struct multipath *mp);
05be62
 int select_dev_loss(struct config *conf, struct multipath *mp);
05be62
+int select_eh_deadline(struct config *conf, struct multipath *mp);
05be62
 int select_reservation_key(struct config *conf, struct multipath *mp);
05be62
 int select_retain_hwhandler (struct config *conf, struct multipath * mp);
05be62
 int select_detect_prio(struct config *conf, struct path * pp);
05be62
diff --git a/libmultipath/structs.h b/libmultipath/structs.h
05be62
index 29209984..65542dea 100644
05be62
--- a/libmultipath/structs.h
05be62
+++ b/libmultipath/structs.h
05be62
@@ -246,6 +246,12 @@ enum fast_io_fail_states {
05be62
 	MP_FAST_IO_FAIL_ZERO = UOZ_ZERO,
05be62
 };
05be62
 
05be62
+enum eh_deadline_states {
05be62
+	EH_DEADLINE_UNSET = UOZ_UNDEF,
05be62
+	EH_DEADLINE_OFF = UOZ_OFF,
05be62
+	EH_DEADLINE_ZERO = UOZ_ZERO,
05be62
+};
05be62
+
05be62
 struct vpd_vendor_page {
05be62
 	int pg;
05be62
 	const char *name;
05be62
@@ -366,6 +372,7 @@ struct multipath {
05be62
 	int ghost_delay;
05be62
 	int ghost_delay_tick;
05be62
 	unsigned int dev_loss;
05be62
+	int eh_deadline;
05be62
 	uid_t uid;
05be62
 	gid_t gid;
05be62
 	mode_t mode;
05be62
diff --git a/multipath/multipath.conf.5 b/multipath/multipath.conf.5
05be62
index 6dc26f10..60954574 100644
05be62
--- a/multipath/multipath.conf.5
05be62
+++ b/multipath/multipath.conf.5
05be62
@@ -700,6 +700,22 @@ The default is: \fB600\fR
05be62
 .
05be62
 .
05be62
 .TP
05be62
+.B eh_deadline
05be62
+Specify the maximum number of seconds the SCSI layer will spend doing error
05be62
+handling when scsi devices fail. After this timeout the scsi layer will perform
05be62
+a full HBA reset. Setting this may be necessary in cases where the rport is
05be62
+never lost, so \fIfast_io_fail_tmo\fR and \fIdev_loss_tmo\fR will never
05be62
+trigger, but (frequently do to load) scsi commands still hang. \fBNote:\fR when
05be62
+the scsi error handler performs the HBA reset, all target paths on that HBA
05be62
+will be affected. eh_deadline should only be set in cases where all targets on
05be62
+the affected HBAs are multipathed.
05be62
+.RS
05be62
+.TP
05be62
+The default is: \fB<unset>\fR
05be62
+.RE
05be62
+.
05be62
+.
05be62
+.TP
05be62
 .B bindings_file
05be62
 The full pathname of the binding file to be used when the user_friendly_names
05be62
 option is set.
05be62
-- 
05be62
2.17.2
05be62