526b4e
---
526b4e
 libmultipath/checkers.c    |   19 +++-
526b4e
 libmultipath/checkers.h    |    3 
526b4e
 libmultipath/discovery.c   |  183 +++++++++++++++++++++++++++++++++++++++------
526b4e
 libmultipath/discovery.h   |    2 
526b4e
 libmultipath/hwtable.c     |   10 ++
526b4e
 libmultipath/structs.h     |    1 
526b4e
 libmultipath/uevent.c      |    2 
526b4e
 multipath/multipath.conf.5 |    3 
526b4e
 multipathd/main.c          |   27 ------
526b4e
 9 files changed, 194 insertions(+), 56 deletions(-)
526b4e
526b4e
Index: multipath-tools-130222/libmultipath/discovery.c
526b4e
===================================================================
526b4e
--- multipath-tools-130222.orig/libmultipath/discovery.c
526b4e
+++ multipath-tools-130222/libmultipath/discovery.c
526b4e
@@ -13,6 +13,7 @@
526b4e
 #include <libgen.h>
526b4e
 #include <libudev.h>
526b4e
 #include <libdevmapper.h>
526b4e
+#include <ctype.h>
526b4e
 
526b4e
 #include "checkers.h"
526b4e
 #include "vector.h"
526b4e
@@ -881,6 +882,46 @@ scsi_sysfs_pathinfo (struct path * pp)
526b4e
 }
526b4e
 
526b4e
 static int
526b4e
+nvme_sysfs_pathinfo (struct path * pp)
526b4e
+{
526b4e
+	struct udev_device *parent;
526b4e
+	const char *attr_path = NULL;
526b4e
+
526b4e
+
526b4e
+	attr_path = udev_device_get_sysname(pp->udev);
526b4e
+	if (!attr_path)
526b4e
+		return 1;
526b4e
+
526b4e
+	if (sscanf(attr_path, "nvme%dn%d",
526b4e
+		   &pp->sg_id.host_no,
526b4e
+		   &pp->sg_id.scsi_id) != 2)
526b4e
+		return 1;
526b4e
+	pp->sg_id.channel = 0;
526b4e
+	pp->sg_id.lun = 0;
526b4e
+
526b4e
+	parent = udev_device_get_parent(pp->udev);
526b4e
+	if (!parent)
526b4e
+		return 1;
526b4e
+
526b4e
+	snprintf(pp->vendor_id, SCSI_VENDOR_SIZE, "NVME");
526b4e
+	snprintf(pp->product_id, SCSI_PRODUCT_SIZE, "%s",
526b4e
+		 udev_device_get_sysattr_value(parent, "model"));
526b4e
+	snprintf(pp->serial, SERIAL_SIZE, "%s",
526b4e
+		 udev_device_get_sysattr_value(parent, "serial"));
526b4e
+	snprintf(pp->rev, SCSI_REV_SIZE, "%s",
526b4e
+		 udev_device_get_sysattr_value(parent, "firmware_rev"));
526b4e
+
526b4e
+	condlog(3, "%s: vendor = %s", pp->dev, pp->vendor_id);
526b4e
+	condlog(3, "%s: product = %s", pp->dev, pp->product_id);
526b4e
+	condlog(3, "%s: serial = %s", pp->dev, pp->serial);
526b4e
+	condlog(3, "%s: rev = %s", pp->dev, pp->rev);
526b4e
+
526b4e
+	pp->hwe = find_hwe(conf->hwtable, pp->vendor_id, pp->product_id, NULL);
526b4e
+
526b4e
+	return 0;
526b4e
+}
526b4e
+
526b4e
+static int
526b4e
 rbd_sysfs_pathinfo (struct path * pp)
526b4e
 {
526b4e
 	sprintf(pp->vendor_id, "Ceph");
526b4e
@@ -1040,14 +1081,20 @@ path_offline (struct path * pp)
526b4e
 {
526b4e
 	struct udev_device * parent;
526b4e
 	char buff[SCSI_STATE_SIZE];
526b4e
+	const char *subsys_type;
526b4e
 
526b4e
-	if (pp->bus != SYSFS_BUS_SCSI)
526b4e
+	if (pp->bus == SYSFS_BUS_SCSI)
526b4e
+		subsys_type = "scsi";
526b4e
+	else if (pp->bus == SYSFS_BUS_NVME)
526b4e
+		subsys_type = "nvme";
526b4e
+	else
526b4e
 		return PATH_UP;
526b4e
 
526b4e
 	parent = pp->udev;
526b4e
 	while (parent) {
526b4e
 		const char *subsys = udev_device_get_subsystem(parent);
526b4e
-		if (subsys && !strncmp(subsys, "scsi", 4))
526b4e
+		if (subsys && !strncmp(subsys, subsys_type,
526b4e
+		    		       strlen(subsys_type)))
526b4e
 			break;
526b4e
 		parent = udev_device_get_parent(parent);
526b4e
 	}
526b4e
@@ -1063,15 +1110,30 @@ path_offline (struct path * pp)
526b4e
 
526b4e
 	condlog(3, "%s: path state = %s", pp->dev, buff);
526b4e
 
526b4e
-	if (!strncmp(buff, "offline", 7)) {
526b4e
-		pp->offline = 1;
526b4e
-		return PATH_DOWN;
526b4e
+	if (pp->bus == SYSFS_BUS_SCSI) {
526b4e
+		if (!strncmp(buff, "offline", 7)) {
526b4e
+			pp->offline = 1;
526b4e
+			return PATH_DOWN;
526b4e
+		}
526b4e
+		pp->offline = 0;
526b4e
+		if (!strncmp(buff, "blocked", 7) ||
526b4e
+		    !strncmp(buff, "quiesce", 7))
526b4e
+			return PATH_PENDING;
526b4e
+		else if (!strncmp(buff, "running", 7))
526b4e
+			return PATH_UP;
526b4e
+	}
526b4e
+	else if (pp->bus == SYSFS_BUS_NVME) {
526b4e
+		if (!strncmp(buff, "dead", 4)) {
526b4e
+			pp->offline = 1;
526b4e
+			return PATH_DOWN;
526b4e
+		}
526b4e
+		pp->offline = 0;
526b4e
+		if (!strncmp(buff, "new", 3) ||
526b4e
+		    !strncmp(buff, "deleting", 8))
526b4e
+			return PATH_PENDING;
526b4e
+		else if (!strncmp(buff, "live", 4))
526b4e
+			return PATH_UP;
526b4e
 	}
526b4e
-	pp->offline = 0;
526b4e
-	if (!strncmp(buff, "blocked", 7) || !strncmp(buff, "quiesce", 7))
526b4e
-		return PATH_PENDING;
526b4e
-	else if (!strncmp(buff, "running", 7))
526b4e
-		return PATH_UP;
526b4e
 
526b4e
 	return PATH_DOWN;
526b4e
 }
526b4e
@@ -1091,6 +1153,8 @@ sysfs_pathinfo(struct path * pp)
526b4e
 		pp->bus = SYSFS_BUS_SCSI;
526b4e
 	if (!strncmp(pp->dev,"rbd", 3))
526b4e
 		pp->bus = SYSFS_BUS_RBD;
526b4e
+	if (!strncmp(pp->dev,"nvme", 4))
526b4e
+		pp->bus = SYSFS_BUS_NVME;
526b4e
 
526b4e
 	if (pp->bus == SYSFS_BUS_UNDEF)
526b4e
 		return 0;
526b4e
@@ -1106,6 +1170,9 @@ sysfs_pathinfo(struct path * pp)
526b4e
 	} else if (pp->bus == SYSFS_BUS_RBD) {
526b4e
 		if (rbd_sysfs_pathinfo(pp))
526b4e
 			return 1;
526b4e
+	} else if (pp->bus == SYSFS_BUS_NVME) {
526b4e
+		if (nvme_sysfs_pathinfo(pp))
526b4e
+			return 1;
526b4e
 	}
526b4e
 	return 0;
526b4e
 }
526b4e
@@ -1132,7 +1199,7 @@ cciss_ioctl_pathinfo (struct path * pp,
526b4e
 }
526b4e
 
526b4e
 int
526b4e
-get_state (struct path * pp, int daemon)
526b4e
+get_state (struct path * pp, int daemon, int oldstate)
526b4e
 {
526b4e
 	struct checker * c = &pp->checker;
526b4e
 	int state;
526b4e
@@ -1171,8 +1238,9 @@ get_state (struct path * pp, int daemon)
526b4e
 	    (pp->bus != SYSFS_BUS_SCSI ||
526b4e
 	     sysfs_get_timeout(pp, &(c->timeout))))
526b4e
 		c->timeout = DEF_TIMEOUT;
526b4e
-	state = checker_check(c);
526b4e
-	condlog(3, "%s: state = %s", pp->dev, checker_state_name(state));
526b4e
+	state = checker_check(c, oldstate);
526b4e
+	condlog(3, "%s: %s state = %s", pp->dev,
526b4e
+		checker_name(c), checker_state_name(state));
526b4e
 	if (state != PATH_UP && state != PATH_GHOST &&
526b4e
 	    strlen(checker_message(c)))
526b4e
 		condlog(3, "%s: checker msg is \"%s\"",
526b4e
@@ -1256,6 +1324,82 @@ free_dev:
526b4e
 	return ret;
526b4e
 }
526b4e
 
526b4e
+/*
526b4e
+ * Mangle string of length *len starting at start
526b4e
+ * by removing character sequence "00" (hex for a 0 byte),
526b4e
+ * starting at end, backwards.
526b4e
+ * Changes the value of *len if characters were removed.
526b4e
+ * Returns a pointer to the position where "end" was moved to.
526b4e
+ */
526b4e
+static char *
526b4e
+skip_zeroes_backward(char* start, int *len, char *end)
526b4e
+{
526b4e
+	char *p = end;
526b4e
+
526b4e
+	while (p >= start + 2 && *(p - 1) == '0' && *(p - 2) == '0')
526b4e
+		p -= 2;
526b4e
+
526b4e
+	if (p == end)
526b4e
+		return p;
526b4e
+
526b4e
+	memmove(p, end, start + *len + 1 - end);
526b4e
+	*len -= end - p;
526b4e
+
526b4e
+	return p;
526b4e
+}
526b4e
+
526b4e
+/*
526b4e
+ * Fix for NVME wwids looking like this:
526b4e
+ * nvme.0000-3163653363666438366239656630386200-4c696e75780000000000000000000000000000000000000000000000000000000000000000000000-00000002
526b4e
+ * which are encountered in some combinations of Linux NVME host and target.
526b4e
+ * The '00' are hex-encoded 0-bytes which are forbidden in the serial (SN)
526b4e
+ * and model (MN) fields. Discard them.
526b4e
+ * If a WWID of the above type is found, sets pp->wwid and returns a value > 0.
526b4e
+ * Otherwise, returns 0.
526b4e
+ */
526b4e
+static int
526b4e
+fix_broken_nvme_wwid(struct path *pp, const char *value, int size)
526b4e
+{
526b4e
+	static const char _nvme[] = "nvme.";
526b4e
+	int len, i;
526b4e
+	char mangled[256];
526b4e
+	char *p;
526b4e
+
526b4e
+	len = strlen(value);
526b4e
+	if (len >= sizeof(mangled))
526b4e
+		return 0;
526b4e
+
526b4e
+	/* Check that value starts with "nvme.%04x-" */
526b4e
+	if (memcmp(value, _nvme, sizeof(_nvme) - 1) || value[9] != '-')
526b4e
+		return 0;
526b4e
+	for (i = 5; i < 9; i++)
526b4e
+		if (!isxdigit(value[i]))
526b4e
+			return 0;
526b4e
+
526b4e
+	memcpy(mangled, value, len + 1);
526b4e
+
526b4e
+	/* search end of "model" part and strip trailing '00' */
526b4e
+	p = memrchr(mangled, '-', len);
526b4e
+	if (p == NULL)
526b4e
+		return 0;
526b4e
+
526b4e
+	p = skip_zeroes_backward(mangled, &len, p);
526b4e
+
526b4e
+	/* search end of "serial" part */
526b4e
+	p = memrchr(mangled, '-', p - mangled);
526b4e
+	if (p == NULL || memrchr(mangled, '-', p - mangled) != mangled + 9)
526b4e
+		/* We expect exactly 3 '-' in the value */
526b4e
+		return 0;
526b4e
+
526b4e
+	p = skip_zeroes_backward(mangled, &len, p);
526b4e
+	if (len >= size)
526b4e
+		return 0;
526b4e
+
526b4e
+	memcpy(pp->wwid, mangled, len + 1);
526b4e
+	condlog(2, "%s: over-long WWID shortened to %s", pp->dev, pp->wwid);
526b4e
+	return len;
526b4e
+}
526b4e
+
526b4e
 int
526b4e
 get_uid (struct path * pp, struct udev_device *udev)
526b4e
 {
526b4e
@@ -1287,14 +1431,10 @@ get_uid (struct path * pp, struct udev_d
526b4e
 		     conf->cmd == CMD_VALID_PATH)
526b4e
 			value = getenv(pp->uid_attribute);
526b4e
 		if (value && strlen(value)) {
526b4e
-			size_t len = WWID_SIZE;
526b4e
-
526b4e
-			if (strlen(value) + 1 > WWID_SIZE) {
526b4e
+			size_t len = strlcpy(pp->wwid, value, WWID_SIZE);
526b4e
+			if (len > WWID_SIZE &&
526b4e
+			    !fix_broken_nvme_wwid(pp, value, WWID_SIZE))
526b4e
 				condlog(0, "%s: wwid overflow", pp->dev);
526b4e
-			} else {
526b4e
-				len = strlen(value);
526b4e
-			}
526b4e
-			strncpy(pp->wwid, value, len);
526b4e
 			condlog(4, "%s: got wwid of '%s'", pp->dev, pp->wwid);
526b4e
 			pp->missing_udev_info = INFO_OK;
526b4e
 			pp->tick = 0;
526b4e
@@ -1381,7 +1521,8 @@ pathinfo (struct path *pp, vector hwtabl
526b4e
 
526b4e
 	if (mask & DI_CHECKER) {
526b4e
 		if (path_state == PATH_UP) {
526b4e
-			pp->chkrstate = pp->state = get_state(pp, 0);
526b4e
+			pp->chkrstate = pp->state = get_state(pp, 0,
526b4e
+							      path_state);
526b4e
 			if (pp->state == PATH_UNCHECKED ||
526b4e
 			    pp->state == PATH_WILD)
526b4e
 				goto blank;
526b4e
Index: multipath-tools-130222/libmultipath/hwtable.c
526b4e
===================================================================
526b4e
--- multipath-tools-130222.orig/libmultipath/hwtable.c
526b4e
+++ multipath-tools-130222/libmultipath/hwtable.c
526b4e
@@ -1185,7 +1185,15 @@ static struct hwentry default_hw[] = {
526b4e
 		.checker_name  = RBD,
526b4e
 		.deferred_remove = DEFERRED_REMOVE_ON,
526b4e
 	},
526b4e
-
526b4e
+	/*
526b4e
+	 *  Generic NVMe devices
526b4e
+	 */
526b4e
+	{
526b4e
+		.vendor        = "NVME",
526b4e
+		.product       = ".*",
526b4e
+		.uid_attribute = "ID_WWN",
526b4e
+		.checker_name  = NONE,
526b4e
+	},
526b4e
 	/*
526b4e
 	 * EOL
526b4e
 	 */
526b4e
Index: multipath-tools-130222/libmultipath/structs.h
526b4e
===================================================================
526b4e
--- multipath-tools-130222.orig/libmultipath/structs.h
526b4e
+++ multipath-tools-130222/libmultipath/structs.h
526b4e
@@ -54,6 +54,7 @@ enum sysfs_buses {
526b4e
 	SYSFS_BUS_CCW,
526b4e
 	SYSFS_BUS_CCISS,
526b4e
 	SYSFS_BUS_RBD,
526b4e
+	SYSFS_BUS_NVME,
526b4e
 };
526b4e
 
526b4e
 enum pathstates {
526b4e
Index: multipath-tools-130222/libmultipath/checkers.c
526b4e
===================================================================
526b4e
--- multipath-tools-130222.orig/libmultipath/checkers.c
526b4e
+++ multipath-tools-130222/libmultipath/checkers.c
526b4e
@@ -101,6 +101,8 @@ struct checker * add_checker (char * nam
526b4e
 	if (!c)
526b4e
 		return NULL;
526b4e
 	snprintf(c->name, CHECKER_NAME_LEN, "%s", name);
526b4e
+	if (!strncmp(c->name, NONE, 4))
526b4e
+		goto done;
526b4e
 	snprintf(libname, LIB_CHECKER_NAMELEN, "%s/libcheck%s.so",
526b4e
 		 conf->multipath_dir, name);
526b4e
 	if (stat(libname,&stbuf) < 0) {
526b4e
@@ -144,7 +146,7 @@ struct checker * add_checker (char * nam
526b4e
 		condlog(0, "A dynamic linking error occurred: (%s)", errstr);
526b4e
 	if (!c->repair)
526b4e
 		goto out;
526b4e
-
526b4e
+done:
526b4e
 	c->fd = 0;
526b4e
 	c->sync = 1;
526b4e
 	list_add(&c->node, &checkers);
526b4e
@@ -194,14 +196,16 @@ int checker_init (struct checker * c, vo
526b4e
 	if (!c)
526b4e
 		return 1;
526b4e
 	c->mpcontext = mpctxt_addr;
526b4e
-	return c->init(c);
526b4e
+	if (c->init)
526b4e
+		return c->init(c);
526b4e
+	return 0;
526b4e
 }
526b4e
 
526b4e
 void checker_put (struct checker * dst)
526b4e
 {
526b4e
 	struct checker * src;
526b4e
 
526b4e
-	if (!dst)
526b4e
+	if (!dst || !strlen(dst->name))
526b4e
 		return;
526b4e
 	src = checker_lookup(dst->name);
526b4e
 	if (dst->free)
526b4e
@@ -221,10 +225,11 @@ void checker_repair (struct checker * c)
526b4e
 		return;
526b4e
 	}
526b4e
 
526b4e
-	c->repair(c);
526b4e
+	if (c->repair)
526b4e
+		c->repair(c);
526b4e
 }
526b4e
 
526b4e
-int checker_check (struct checker * c)
526b4e
+int checker_check (struct checker * c, int path_state)
526b4e
 {
526b4e
 	int r;
526b4e
 
526b4e
@@ -236,6 +241,8 @@ int checker_check (struct checker * c)
526b4e
 		MSG(c, "checker disabled");
526b4e
 		return PATH_UNCHECKED;
526b4e
 	}
526b4e
+	if (!strncmp(c->name, NONE, 4))
526b4e
+		return path_state;
526b4e
 	if (c->fd <= 0) {
526b4e
 		MSG(c, "no usable fd");
526b4e
 		return PATH_WILD;
526b4e
@@ -249,6 +256,8 @@ int checker_selected (struct checker * c
526b4e
 {
526b4e
 	if (!c)
526b4e
 		return 0;
526b4e
+	if (!strncmp(c->name, NONE, 4))
526b4e
+		return 1;
526b4e
 	return (c->check) ? 1 : 0;
526b4e
 }
526b4e
 
526b4e
Index: multipath-tools-130222/libmultipath/checkers.h
526b4e
===================================================================
526b4e
--- multipath-tools-130222.orig/libmultipath/checkers.h
526b4e
+++ multipath-tools-130222/libmultipath/checkers.h
526b4e
@@ -75,6 +75,7 @@ enum path_check_state {
526b4e
 #define EMC_CLARIION "emc_clariion"
526b4e
 #define READSECTOR0  "readsector0"
526b4e
 #define CCISS_TUR    "cciss_tur"
526b4e
+#define NONE         "none"
526b4e
 #define RBD          "rbd"
526b4e
 
526b4e
 #define DEFAULT_CHECKER DIRECTIO
526b4e
@@ -129,7 +130,7 @@ void checker_set_fd (struct checker *, i
526b4e
 void checker_enable (struct checker *);
526b4e
 void checker_disable (struct checker *);
526b4e
 void checker_repair (struct checker *);
526b4e
-int checker_check (struct checker *);
526b4e
+int checker_check (struct checker *, int);
526b4e
 int checker_selected (struct checker *);
526b4e
 char * checker_name (struct checker *);
526b4e
 char * checker_message (struct checker *);
526b4e
Index: multipath-tools-130222/libmultipath/discovery.h
526b4e
===================================================================
526b4e
--- multipath-tools-130222.orig/libmultipath/discovery.h
526b4e
+++ multipath-tools-130222/libmultipath/discovery.h
526b4e
@@ -35,7 +35,7 @@ int path_discovery (vector pathvec, stru
526b4e
 
526b4e
 int do_tur (char *);
526b4e
 int path_offline (struct path *);
526b4e
-int get_state (struct path * pp, int daemon);
526b4e
+int get_state (struct path * pp, int daemon, int state);
526b4e
 int pathinfo (struct path *, vector hwtable, int mask);
526b4e
 int store_pathinfo (vector pathvec, vector hwtable,
526b4e
 		    struct udev_device *udevice, int flag,
526b4e
Index: multipath-tools-130222/libmultipath/uevent.c
526b4e
===================================================================
526b4e
--- multipath-tools-130222.orig/libmultipath/uevent.c
526b4e
+++ multipath-tools-130222/libmultipath/uevent.c
526b4e
@@ -447,7 +447,7 @@ int uevent_listen(struct udev *udev)
526b4e
 		goto out;
526b4e
 	}
526b4e
 	err = udev_monitor_filter_add_match_subsystem_devtype(monitor, "block",
526b4e
-							      NULL);
526b4e
+							      "disk");
526b4e
 	if (err)
526b4e
 		condlog(2, "failed to create filter : %s", strerror(-err));
526b4e
 	err = udev_monitor_enable_receiving(monitor);
526b4e
Index: multipath-tools-130222/multipath/multipath.conf.5
526b4e
===================================================================
526b4e
--- multipath-tools-130222.orig/multipath/multipath.conf.5
526b4e
+++ multipath-tools-130222/multipath/multipath.conf.5
526b4e
@@ -284,6 +284,9 @@ Check the path state for LSI/Engenio/Net
526b4e
 .B directio
526b4e
 Read the first sector with direct I/O.
526b4e
 .TP
526b4e
+.B none
526b4e
+Do not check the device, fallback to use the values retrieved from sysfs
526b4e
+.TP
526b4e
 .B rbd
526b4e
 Check if the path is in the Ceph blacklist.
526b4e
 .TP
526b4e
Index: multipath-tools-130222/multipathd/main.c
526b4e
===================================================================
526b4e
--- multipath-tools-130222.orig/multipathd/main.c
526b4e
+++ multipath-tools-130222/multipathd/main.c
526b4e
@@ -908,28 +908,6 @@ out:
526b4e
 	return r;
526b4e
 }
526b4e
 
526b4e
-static int
526b4e
-uev_discard(char * devpath)
526b4e
-{
526b4e
-	char *tmp;
526b4e
-	char a[11], b[11];
526b4e
-
526b4e
-	/*
526b4e
-	 * keep only block devices, discard partitions
526b4e
-	 */
526b4e
-	tmp = strstr(devpath, "/block/");
526b4e
-	if (tmp == NULL){
526b4e
-		condlog(4, "no /block/ in '%s'", devpath);
526b4e
-		return 1;
526b4e
-	}
526b4e
-	if (sscanf(tmp, "/block/%10s", a) != 1 ||
526b4e
-	    sscanf(tmp, "/block/%10[^/]/%10s", a, b) == 2) {
526b4e
-		condlog(4, "discard event on %s", devpath);
526b4e
-		return 1;
526b4e
-	}
526b4e
-	return 0;
526b4e
-}
526b4e
-
526b4e
 int
526b4e
 uev_trigger (struct uevent * uev, void * trigger_data)
526b4e
 {
526b4e
@@ -938,9 +916,6 @@ uev_trigger (struct uevent * uev, void *
526b4e
 
526b4e
 	vecs = (struct vectors *)trigger_data;
526b4e
 
526b4e
-	if (uev_discard(uev->devpath))
526b4e
-		return 0;
526b4e
-
526b4e
 	pthread_cleanup_push(cleanup_lock, &vecs->lock);
526b4e
 	lock(vecs->lock);
526b4e
 	pthread_testcancel();
526b4e
@@ -1358,7 +1333,7 @@ check_path (struct vectors * vecs, struc
526b4e
 
526b4e
 	newstate = path_offline(pp);
526b4e
 	if (newstate == PATH_UP)
526b4e
-		newstate = get_state(pp, 1);
526b4e
+		newstate = get_state(pp, 1, newstate);
526b4e
 	else
526b4e
 		checker_clear_message(&pp->checker);
526b4e