Blame SOURCES/0258-RHBZ-1634183-ANA-prioritizer.patch

a385ba
---
a385ba
 Makefile.inc                         |    1 
a385ba
 libmultipath/Makefile                |    7 
a385ba
 libmultipath/hwtable.c               |    1 
a385ba
 libmultipath/nvme-ioctl.c            |  869 ++++++++++++++++++++
a385ba
 libmultipath/nvme-ioctl.h            |  139 +++
a385ba
 libmultipath/nvme-lib.c              |   49 +
a385ba
 libmultipath/nvme-lib.h              |   39 
a385ba
 libmultipath/nvme/argconfig.h        |   99 ++
a385ba
 libmultipath/nvme/json.h             |   87 ++
a385ba
 libmultipath/nvme/linux/nvme.h       | 1450 +++++++++++++++++++++++++++++++++++
a385ba
 libmultipath/nvme/linux/nvme_ioctl.h |   67 +
a385ba
 libmultipath/nvme/nvme.h             |  163 +++
a385ba
 libmultipath/nvme/plugin.h           |   36 
a385ba
 libmultipath/prio.h                  |    1 
a385ba
 libmultipath/prioritizers/Makefile   |    4 
a385ba
 libmultipath/prioritizers/ana.c      |  236 +++++
a385ba
 libmultipath/propsel.c               |   10 
a385ba
 libmultipath/util.h                  |    2 
a385ba
 multipath/multipath.conf.5           |    3 
a385ba
 19 files changed, 3258 insertions(+), 5 deletions(-)
a385ba
a385ba
Index: multipath-tools-130222/libmultipath/nvme/argconfig.h
a385ba
===================================================================
a385ba
--- /dev/null
a385ba
+++ multipath-tools-130222/libmultipath/nvme/argconfig.h
a385ba
@@ -0,0 +1,99 @@
a385ba
+////////////////////////////////////////////////////////////////////////
a385ba
+//
a385ba
+// Copyright 2014 PMC-Sierra, Inc.
a385ba
+//
a385ba
+// This program is free software; you can redistribute it and/or
a385ba
+// modify it under the terms of the GNU General Public License
a385ba
+// as published by the Free Software Foundation; either version 2
a385ba
+// of the License, or (at your option) any later version.
a385ba
+//
a385ba
+// This program is distributed in the hope that it will be useful,
a385ba
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
a385ba
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
a385ba
+// GNU General Public License for more details.
a385ba
+//
a385ba
+// You should have received a copy of the GNU General Public License
a385ba
+// along with this program; if not, write to the Free Software
a385ba
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
a385ba
+//
a385ba
+////////////////////////////////////////////////////////////////////////
a385ba
+
a385ba
+////////////////////////////////////////////////////////////////////////
a385ba
+//
a385ba
+//   Author: Logan Gunthorpe <logang@deltatee.com>
a385ba
+//           Logan Gunthorpe
a385ba
+//
a385ba
+//   Date:   Oct 23 2014
a385ba
+//
a385ba
+//   Description:
a385ba
+//     Header file for argconfig.c
a385ba
+//
a385ba
+////////////////////////////////////////////////////////////////////////
a385ba
+
a385ba
+#ifndef argconfig_H
a385ba
+#define argconfig_H
a385ba
+
a385ba
+#include <string.h>
a385ba
+#include <getopt.h>
a385ba
+#include <stdarg.h>
a385ba
+
a385ba
+enum argconfig_types {
a385ba
+	CFG_NONE,
a385ba
+	CFG_STRING,
a385ba
+	CFG_INT,
a385ba
+	CFG_SIZE,
a385ba
+	CFG_LONG,
a385ba
+	CFG_LONG_SUFFIX,
a385ba
+	CFG_DOUBLE,
a385ba
+	CFG_BOOL,
a385ba
+	CFG_BYTE,
a385ba
+	CFG_SHORT,
a385ba
+	CFG_POSITIVE,
a385ba
+	CFG_INCREMENT,
a385ba
+	CFG_SUBOPTS,
a385ba
+	CFG_FILE_A,
a385ba
+	CFG_FILE_W,
a385ba
+	CFG_FILE_R,
a385ba
+	CFG_FILE_AP,
a385ba
+	CFG_FILE_WP,
a385ba
+	CFG_FILE_RP,
a385ba
+};
a385ba
+
a385ba
+struct argconfig_commandline_options {
a385ba
+	const char *option;
a385ba
+	const char short_option;
a385ba
+	const char *meta;
a385ba
+	enum argconfig_types config_type;
a385ba
+	void *default_value;
a385ba
+	int argument_type;
a385ba
+	const char *help;
a385ba
+};
a385ba
+
a385ba
+#define CFG_MAX_SUBOPTS 500
a385ba
+#define MAX_HELP_FUNC 20
a385ba
+
a385ba
+#ifdef __cplusplus
a385ba
+extern "C" {
a385ba
+#endif
a385ba
+
a385ba
+typedef void argconfig_help_func(void);
a385ba
+void argconfig_append_usage(const char *str);
a385ba
+void argconfig_print_help(const char *program_desc,
a385ba
+			  const struct argconfig_commandline_options *options);
a385ba
+int argconfig_parse(int argc, char *argv[], const char *program_desc,
a385ba
+		    const struct argconfig_commandline_options *options,
a385ba
+		    void *config_out, size_t config_size);
a385ba
+int argconfig_parse_subopt_string(char *string, char **options,
a385ba
+				  size_t max_options);
a385ba
+unsigned argconfig_parse_comma_sep_array(char *string, int *ret,
a385ba
+					 unsigned max_length);
a385ba
+unsigned argconfig_parse_comma_sep_array_long(char *string,
a385ba
+					      unsigned long long *ret,
a385ba
+					      unsigned max_length);
a385ba
+void argconfig_register_help_func(argconfig_help_func * f);
a385ba
+
a385ba
+void print_word_wrapped(const char *s, int indent, int start);
a385ba
+#ifdef __cplusplus
a385ba
+}
a385ba
+#endif
a385ba
+#endif
a385ba
Index: multipath-tools-130222/libmultipath/nvme/json.h
a385ba
===================================================================
a385ba
--- /dev/null
a385ba
+++ multipath-tools-130222/libmultipath/nvme/json.h
a385ba
@@ -0,0 +1,87 @@
a385ba
+#ifndef __JSON__H
a385ba
+#define __JSON__H
a385ba
+
a385ba
+struct json_object;
a385ba
+struct json_array;
a385ba
+struct json_pair;
a385ba
+
a385ba
+#define JSON_TYPE_STRING 0
a385ba
+#define JSON_TYPE_INTEGER 1
a385ba
+#define JSON_TYPE_FLOAT 2
a385ba
+#define JSON_TYPE_OBJECT 3
a385ba
+#define JSON_TYPE_ARRAY 4
a385ba
+#define JSON_TYPE_UINT 5
a385ba
+#define JSON_PARENT_TYPE_PAIR 0
a385ba
+#define JSON_PARENT_TYPE_ARRAY 1
a385ba
+struct json_value {
a385ba
+	int type;
a385ba
+	union {
a385ba
+		long long integer_number;
a385ba
+		unsigned long long uint_number;
a385ba
+		long double float_number;
a385ba
+		char *string;
a385ba
+		struct json_object *object;
a385ba
+		struct json_array *array;
a385ba
+	};
a385ba
+	int parent_type;
a385ba
+	union {
a385ba
+		struct json_pair *parent_pair;
a385ba
+		struct json_array *parent_array;
a385ba
+	};
a385ba
+};
a385ba
+
a385ba
+struct json_array {
a385ba
+	struct json_value **values;
a385ba
+	int value_cnt;
a385ba
+	struct json_value *parent;
a385ba
+};
a385ba
+
a385ba
+struct json_object {
a385ba
+	struct json_pair **pairs;
a385ba
+	int pair_cnt;
a385ba
+	struct json_value *parent;
a385ba
+};
a385ba
+
a385ba
+struct json_pair {
a385ba
+	char *name;
a385ba
+	struct json_value *value;
a385ba
+	struct json_object *parent;
a385ba
+};
a385ba
+
a385ba
+struct json_object *json_create_object(void);
a385ba
+struct json_array *json_create_array(void);
a385ba
+
a385ba
+void json_free_object(struct json_object *obj);
a385ba
+
a385ba
+int json_object_add_value_type(struct json_object *obj, const char *name, int type, ...);
a385ba
+#define json_object_add_value_int(obj, name, val) \
a385ba
+	json_object_add_value_type((obj), name, JSON_TYPE_INTEGER, (long long) (val))
a385ba
+#define json_object_add_value_uint(obj, name, val) \
a385ba
+	json_object_add_value_type((obj), name, JSON_TYPE_UINT, (unsigned long long) (val))
a385ba
+#define json_object_add_value_float(obj, name, val) \
a385ba
+	json_object_add_value_type((obj), name, JSON_TYPE_FLOAT, (val))
a385ba
+#define json_object_add_value_string(obj, name, val) \
a385ba
+	json_object_add_value_type((obj), name, JSON_TYPE_STRING, (val))
a385ba
+#define json_object_add_value_object(obj, name, val) \
a385ba
+	json_object_add_value_type((obj), name, JSON_TYPE_OBJECT, (val))
a385ba
+#define json_object_add_value_array(obj, name, val) \
a385ba
+	json_object_add_value_type((obj), name, JSON_TYPE_ARRAY, (val))
a385ba
+int json_array_add_value_type(struct json_array *array, int type, ...);
a385ba
+#define json_array_add_value_int(obj, val) \
a385ba
+	json_array_add_value_type((obj), JSON_TYPE_INTEGER, (val))
a385ba
+#define json_array_add_value_uint(obj, val) \
a385ba
+	json_array_add_value_type((obj), JSON_TYPE_UINT, (val))
a385ba
+#define json_array_add_value_float(obj, val) \
a385ba
+	json_array_add_value_type((obj), JSON_TYPE_FLOAT, (val))
a385ba
+#define json_array_add_value_string(obj, val) \
a385ba
+	json_array_add_value_type((obj), JSON_TYPE_STRING, (val))
a385ba
+#define json_array_add_value_object(obj, val) \
a385ba
+	json_array_add_value_type((obj), JSON_TYPE_OBJECT, (val))
a385ba
+#define json_array_add_value_array(obj, val) \
a385ba
+	json_array_add_value_type((obj), JSON_TYPE_ARRAY, (val))
a385ba
+
a385ba
+#define json_array_last_value_object(obj) \
a385ba
+	(obj->values[obj->value_cnt - 1]->object)
a385ba
+
a385ba
+void json_print_object(struct json_object *obj, void *);
a385ba
+#endif
a385ba
Index: multipath-tools-130222/libmultipath/nvme/nvme.h
a385ba
===================================================================
a385ba
--- /dev/null
a385ba
+++ multipath-tools-130222/libmultipath/nvme/nvme.h
a385ba
@@ -0,0 +1,163 @@
a385ba
+/*
a385ba
+ * Definitions for the NVM Express interface
a385ba
+ * Copyright (c) 2011-2014, Intel Corporation.
a385ba
+ *
a385ba
+ * This program is free software; you can redistribute it and/or modify it
a385ba
+ * under the terms and conditions of the GNU General Public License,
a385ba
+ * version 2, as published by the Free Software Foundation.
a385ba
+ *
a385ba
+ * This program is distributed in the hope it will be useful, but WITHOUT
a385ba
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
a385ba
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
a385ba
+ * more details.
a385ba
+ */
a385ba
+
a385ba
+#ifndef _NVME_H
a385ba
+#define _NVME_H
a385ba
+
a385ba
+#include <stdbool.h>
a385ba
+#include <stdint.h>
a385ba
+#include <endian.h>
a385ba
+#include "plugin.h"
a385ba
+#include "json.h"
a385ba
+
a385ba
+#define unlikely(x) x
a385ba
+
a385ba
+#ifdef LIBUUID
a385ba
+#include <uuid/uuid.h>
a385ba
+#else
a385ba
+typedef struct {
a385ba
+	uint8_t b[16];
a385ba
+} uuid_t;
a385ba
+#endif
a385ba
+
a385ba
+#include "linux/nvme.h"
a385ba
+
a385ba
+struct nvme_effects_log_page {
a385ba
+	__le32 acs[256];
a385ba
+	__le32 iocs[256];
a385ba
+	__u8   resv[2048];
a385ba
+};
a385ba
+
a385ba
+struct nvme_error_log_page {
a385ba
+	__u64	error_count;
a385ba
+	__u16	sqid;
a385ba
+	__u16	cmdid;
a385ba
+	__u16	status_field;
a385ba
+	__u16	parm_error_location;
a385ba
+	__u64	lba;
a385ba
+	__u32	nsid;
a385ba
+	__u8	vs;
a385ba
+	__u8	resv[3];
a385ba
+	__u64	cs;
a385ba
+	__u8	resv2[24];
a385ba
+};
a385ba
+
a385ba
+struct nvme_firmware_log_page {
a385ba
+	__u8	afi;
a385ba
+	__u8	resv[7];
a385ba
+	__u64	frs[7];
a385ba
+	__u8	resv2[448];
a385ba
+};
a385ba
+
a385ba
+/* idle and active power scales occupy the last 2 bits of the field */
a385ba
+#define POWER_SCALE(s) ((s) >> 6)
a385ba
+
a385ba
+struct nvme_host_mem_buffer {
a385ba
+	__u32			hsize;
a385ba
+	__u32			hmdlal;
a385ba
+	__u32			hmdlau;
a385ba
+	__u32			hmdlec;
a385ba
+	__u8			rsvd16[4080];
a385ba
+};
a385ba
+
a385ba
+struct nvme_auto_pst {
a385ba
+	__u32	data;
a385ba
+	__u32	rsvd32;
a385ba
+};
a385ba
+
a385ba
+struct nvme_timestamp {
a385ba
+	__u8 timestamp[6];
a385ba
+	__u8 attr;
a385ba
+	__u8 rsvd;
a385ba
+};
a385ba
+
a385ba
+struct nvme_controller_list {
a385ba
+	__le16 num;
a385ba
+	__le16 identifier[];
a385ba
+};
a385ba
+
a385ba
+struct nvme_bar_cap {
a385ba
+	__u16	mqes;
a385ba
+	__u8	ams_cqr;
a385ba
+	__u8	to;
a385ba
+	__u16	bps_css_nssrs_dstrd;
a385ba
+	__u8	mpsmax_mpsmin;
a385ba
+	__u8	reserved;
a385ba
+};
a385ba
+
a385ba
+#ifdef __CHECKER__
a385ba
+#define __force       __attribute__((force))
a385ba
+#else
a385ba
+#define __force
a385ba
+#endif
a385ba
+
a385ba
+#define cpu_to_le16(x) \
a385ba
+	((__force __le16)htole16(x))
a385ba
+#define cpu_to_le32(x) \
a385ba
+	((__force __le32)htole32(x))
a385ba
+#define cpu_to_le64(x) \
a385ba
+	((__force __le64)htole64(x))
a385ba
+
a385ba
+#define le16_to_cpu(x) \
a385ba
+	le16toh((__force __u16)(x))
a385ba
+#define le32_to_cpu(x) \
a385ba
+	le32toh((__force __u32)(x))
a385ba
+#define le64_to_cpu(x) \
a385ba
+	le64toh((__force __u64)(x))
a385ba
+
a385ba
+#define MAX_LIST_ITEMS 256
a385ba
+struct list_item {
a385ba
+	char                node[1024];
a385ba
+	struct nvme_id_ctrl ctrl;
a385ba
+	int                 nsid;
a385ba
+	struct nvme_id_ns   ns;
a385ba
+	unsigned            block;
a385ba
+};
a385ba
+
a385ba
+struct ctrl_list_item {
a385ba
+	char *name;
a385ba
+	char *address;
a385ba
+	char *transport;
a385ba
+	char *state;
a385ba
+	char *ana_state;
a385ba
+};
a385ba
+
a385ba
+struct subsys_list_item {
a385ba
+	char *name;
a385ba
+	char *subsysnqn;
a385ba
+	int nctrls;
a385ba
+	struct ctrl_list_item *ctrls;
a385ba
+};
a385ba
+
a385ba
+enum {
a385ba
+	NORMAL,
a385ba
+	JSON,
a385ba
+	BINARY,
a385ba
+};
a385ba
+
a385ba
+void register_extension(struct plugin *plugin);
a385ba
+
a385ba
+#include "argconfig.h"
a385ba
+int parse_and_open(int argc, char **argv, const char *desc,
a385ba
+	const struct argconfig_commandline_options *clo, void *cfg, size_t size);
a385ba
+
a385ba
+extern const char *devicename;
a385ba
+
a385ba
+int __id_ctrl(int argc, char **argv, struct command *cmd, struct plugin *plugin, void (*vs)(__u8 *vs, struct json_object *root));
a385ba
+int	validate_output_format(char *format);
a385ba
+
a385ba
+struct subsys_list_item *get_subsys_list(int *subcnt, char *subsysnqn, __u32 nsid);
a385ba
+void free_subsys_list(struct subsys_list_item *slist, int n);
a385ba
+char *nvme_char_from_block(char *block);
a385ba
+#endif /* _NVME_H */
a385ba
Index: multipath-tools-130222/libmultipath/nvme/plugin.h
a385ba
===================================================================
a385ba
--- /dev/null
a385ba
+++ multipath-tools-130222/libmultipath/nvme/plugin.h
a385ba
@@ -0,0 +1,36 @@
a385ba
+#ifndef PLUGIN_H
a385ba
+#define PLUGIN_H
a385ba
+
a385ba
+#include <stdbool.h>
a385ba
+
a385ba
+struct program {
a385ba
+	const char *name;
a385ba
+	const char *version;
a385ba
+	const char *usage;
a385ba
+	const char *desc;
a385ba
+	const char *more;
a385ba
+	struct command **commands;
a385ba
+	struct plugin *extensions;
a385ba
+};
a385ba
+
a385ba
+struct plugin {
a385ba
+	const char *name;
a385ba
+	const char *desc;
a385ba
+	struct command **commands;
a385ba
+	struct program *parent;
a385ba
+	struct plugin *next;
a385ba
+	struct plugin *tail;
a385ba
+};
a385ba
+
a385ba
+struct command {
a385ba
+	char *name;
a385ba
+	char *help;
a385ba
+	int (*fn)(int argc, char **argv, struct command *command, struct plugin *plugin);
a385ba
+	char *alias;
a385ba
+};
a385ba
+
a385ba
+void usage(struct plugin *plugin);
a385ba
+void general_help(struct plugin *plugin);
a385ba
+int handle_plugin(int argc, char **argv, struct plugin *plugin);
a385ba
+
a385ba
+#endif
a385ba
Index: multipath-tools-130222/libmultipath/nvme/linux/nvme.h
a385ba
===================================================================
a385ba
--- /dev/null
a385ba
+++ multipath-tools-130222/libmultipath/nvme/linux/nvme.h
a385ba
@@ -0,0 +1,1450 @@
a385ba
+/*
a385ba
+ * Definitions for the NVM Express interface
a385ba
+ * Copyright (c) 2011-2014, Intel Corporation.
a385ba
+ *
a385ba
+ * This program is free software; you can redistribute it and/or modify it
a385ba
+ * under the terms and conditions of the GNU General Public License,
a385ba
+ * version 2, as published by the Free Software Foundation.
a385ba
+ *
a385ba
+ * This program is distributed in the hope it will be useful, but WITHOUT
a385ba
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
a385ba
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
a385ba
+ * more details.
a385ba
+ */
a385ba
+
a385ba
+#ifndef _LINUX_NVME_H
a385ba
+#define _LINUX_NVME_H
a385ba
+
a385ba
+#include <linux/types.h>
a385ba
+#include <linux/uuid.h>
a385ba
+
a385ba
+/* NQN names in commands fields specified one size */
a385ba
+#define NVMF_NQN_FIELD_LEN	256
a385ba
+
a385ba
+/* However the max length of a qualified name is another size */
a385ba
+#define NVMF_NQN_SIZE		223
a385ba
+
a385ba
+#define NVMF_TRSVCID_SIZE	32
a385ba
+#define NVMF_TRADDR_SIZE	256
a385ba
+#define NVMF_TSAS_SIZE		256
a385ba
+
a385ba
+#define NVME_DISC_SUBSYS_NAME	"nqn.2014-08.org.nvmexpress.discovery"
a385ba
+
a385ba
+#define NVME_RDMA_IP_PORT	4420
a385ba
+
a385ba
+#define NVME_NSID_ALL		0xffffffff
a385ba
+
a385ba
+enum nvme_subsys_type {
a385ba
+	NVME_NQN_DISC	= 1,		/* Discovery type target subsystem */
a385ba
+	NVME_NQN_NVME	= 2,		/* NVME type target subsystem */
a385ba
+};
a385ba
+
a385ba
+/* Address Family codes for Discovery Log Page entry ADRFAM field */
a385ba
+enum {
a385ba
+	NVMF_ADDR_FAMILY_PCI	= 0,	/* PCIe */
a385ba
+	NVMF_ADDR_FAMILY_IP4	= 1,	/* IP4 */
a385ba
+	NVMF_ADDR_FAMILY_IP6	= 2,	/* IP6 */
a385ba
+	NVMF_ADDR_FAMILY_IB	= 3,	/* InfiniBand */
a385ba
+	NVMF_ADDR_FAMILY_FC	= 4,	/* Fibre Channel */
a385ba
+};
a385ba
+
a385ba
+/* Transport Type codes for Discovery Log Page entry TRTYPE field */
a385ba
+enum {
a385ba
+	NVMF_TRTYPE_RDMA	= 1,	/* RDMA */
a385ba
+	NVMF_TRTYPE_FC		= 2,	/* Fibre Channel */
a385ba
+	NVMF_TRTYPE_TCP		= 3,	/* TCP */
a385ba
+	NVMF_TRTYPE_LOOP	= 254,	/* Reserved for host usage */
a385ba
+	NVMF_TRTYPE_MAX,
a385ba
+};
a385ba
+
a385ba
+/* Transport Requirements codes for Discovery Log Page entry TREQ field */
a385ba
+enum {
a385ba
+	NVMF_TREQ_NOT_SPECIFIED	= 0,		/* Not specified */
a385ba
+	NVMF_TREQ_REQUIRED	= 1,		/* Required */
a385ba
+	NVMF_TREQ_NOT_REQUIRED	= 2,		/* Not Required */
a385ba
+	NVMF_TREQ_DISABLE_SQFLOW = (1 << 2),	/* SQ flow control disable supported */
a385ba
+};
a385ba
+
a385ba
+/* RDMA QP Service Type codes for Discovery Log Page entry TSAS
a385ba
+ * RDMA_QPTYPE field
a385ba
+ */
a385ba
+enum {
a385ba
+	NVMF_RDMA_QPTYPE_CONNECTED	= 1, /* Reliable Connected */
a385ba
+	NVMF_RDMA_QPTYPE_DATAGRAM	= 2, /* Reliable Datagram */
a385ba
+};
a385ba
+
a385ba
+/* RDMA QP Service Type codes for Discovery Log Page entry TSAS
a385ba
+ * RDMA_QPTYPE field
a385ba
+ */
a385ba
+enum {
a385ba
+	NVMF_RDMA_PRTYPE_NOT_SPECIFIED	= 1, /* No Provider Specified */
a385ba
+	NVMF_RDMA_PRTYPE_IB		= 2, /* InfiniBand */
a385ba
+	NVMF_RDMA_PRTYPE_ROCE		= 3, /* InfiniBand RoCE */
a385ba
+	NVMF_RDMA_PRTYPE_ROCEV2		= 4, /* InfiniBand RoCEV2 */
a385ba
+	NVMF_RDMA_PRTYPE_IWARP		= 5, /* IWARP */
a385ba
+};
a385ba
+
a385ba
+/* RDMA Connection Management Service Type codes for Discovery Log Page
a385ba
+ * entry TSAS RDMA_CMS field
a385ba
+ */
a385ba
+enum {
a385ba
+	NVMF_RDMA_CMS_RDMA_CM	= 1, /* Sockets based endpoint addressing */
a385ba
+};
a385ba
+
a385ba
+/* TCP port security type for  Discovery Log Page entry TSAS
a385ba
+ */
a385ba
+enum {
a385ba
+	NVMF_TCP_SECTYPE_NONE	= 0, /* No Security */
a385ba
+	NVMF_TCP_SECTYPE_TLS	= 1, /* Transport Layer Security */
a385ba
+};
a385ba
+
a385ba
+#define NVME_AQ_DEPTH		32
a385ba
+#define NVME_NR_AEN_COMMANDS	1
a385ba
+#define NVME_AQ_BLK_MQ_DEPTH	(NVME_AQ_DEPTH - NVME_NR_AEN_COMMANDS)
a385ba
+
a385ba
+/*
a385ba
+ * Subtract one to leave an empty queue entry for 'Full Queue' condition. See
a385ba
+ * NVM-Express 1.2 specification, section 4.1.2.
a385ba
+ */
a385ba
+#define NVME_AQ_MQ_TAG_DEPTH	(NVME_AQ_BLK_MQ_DEPTH - 1)
a385ba
+
a385ba
+enum {
a385ba
+	NVME_REG_CAP	= 0x0000,	/* Controller Capabilities */
a385ba
+	NVME_REG_VS	= 0x0008,	/* Version */
a385ba
+	NVME_REG_INTMS	= 0x000c,	/* Interrupt Mask Set */
a385ba
+	NVME_REG_INTMC	= 0x0010,	/* Interrupt Mask Clear */
a385ba
+	NVME_REG_CC	= 0x0014,	/* Controller Configuration */
a385ba
+	NVME_REG_CSTS	= 0x001c,	/* Controller Status */
a385ba
+	NVME_REG_NSSR	= 0x0020,	/* NVM Subsystem Reset */
a385ba
+	NVME_REG_AQA	= 0x0024,	/* Admin Queue Attributes */
a385ba
+	NVME_REG_ASQ	= 0x0028,	/* Admin SQ Base Address */
a385ba
+	NVME_REG_ACQ	= 0x0030,	/* Admin CQ Base Address */
a385ba
+	NVME_REG_CMBLOC = 0x0038,	/* Controller Memory Buffer Location */
a385ba
+	NVME_REG_CMBSZ	= 0x003c,	/* Controller Memory Buffer Size */
a385ba
+	NVME_REG_BPINFO	= 0x0040,	/* Boot Partition Information */
a385ba
+	NVME_REG_BPRSEL	= 0x0044,	/* Boot Partition Read Select */
a385ba
+	NVME_REG_BPMBL	= 0x0048,	/* Boot Partition Memory Buffer Location */
a385ba
+	NVME_REG_DBS	= 0x1000,	/* SQ 0 Tail Doorbell */
a385ba
+};
a385ba
+
a385ba
+#define NVME_CAP_MQES(cap)	((cap) & 0xffff)
a385ba
+#define NVME_CAP_TIMEOUT(cap)	(((cap) >> 24) & 0xff)
a385ba
+#define NVME_CAP_STRIDE(cap)	(((cap) >> 32) & 0xf)
a385ba
+#define NVME_CAP_NSSRC(cap)	(((cap) >> 36) & 0x1)
a385ba
+#define NVME_CAP_MPSMIN(cap)	(((cap) >> 48) & 0xf)
a385ba
+#define NVME_CAP_MPSMAX(cap)	(((cap) >> 52) & 0xf)
a385ba
+
a385ba
+#define NVME_CMB_BIR(cmbloc)	((cmbloc) & 0x7)
a385ba
+#define NVME_CMB_OFST(cmbloc)	(((cmbloc) >> 12) & 0xfffff)
a385ba
+#define NVME_CMB_SZ(cmbsz)	(((cmbsz) >> 12) & 0xfffff)
a385ba
+#define NVME_CMB_SZU(cmbsz)	(((cmbsz) >> 8) & 0xf)
a385ba
+
a385ba
+#define NVME_CMB_WDS(cmbsz)	((cmbsz) & 0x10)
a385ba
+#define NVME_CMB_RDS(cmbsz)	((cmbsz) & 0x8)
a385ba
+#define NVME_CMB_LISTS(cmbsz)	((cmbsz) & 0x4)
a385ba
+#define NVME_CMB_CQS(cmbsz)	((cmbsz) & 0x2)
a385ba
+#define NVME_CMB_SQS(cmbsz)	((cmbsz) & 0x1)
a385ba
+
a385ba
+/*
a385ba
+ * Submission and Completion Queue Entry Sizes for the NVM command set.
a385ba
+ * (In bytes and specified as a power of two (2^n)).
a385ba
+ */
a385ba
+#define NVME_NVM_IOSQES		6
a385ba
+#define NVME_NVM_IOCQES		4
a385ba
+
a385ba
+enum {
a385ba
+	NVME_CC_ENABLE		= 1 << 0,
a385ba
+	NVME_CC_CSS_NVM		= 0 << 4,
a385ba
+	NVME_CC_EN_SHIFT	= 0,
a385ba
+	NVME_CC_CSS_SHIFT	= 4,
a385ba
+	NVME_CC_MPS_SHIFT	= 7,
a385ba
+	NVME_CC_AMS_SHIFT	= 11,
a385ba
+	NVME_CC_SHN_SHIFT	= 14,
a385ba
+	NVME_CC_IOSQES_SHIFT	= 16,
a385ba
+	NVME_CC_IOCQES_SHIFT	= 20,
a385ba
+	NVME_CC_AMS_RR		= 0 << NVME_CC_AMS_SHIFT,
a385ba
+	NVME_CC_AMS_WRRU	= 1 << NVME_CC_AMS_SHIFT,
a385ba
+	NVME_CC_AMS_VS		= 7 << NVME_CC_AMS_SHIFT,
a385ba
+	NVME_CC_SHN_NONE	= 0 << NVME_CC_SHN_SHIFT,
a385ba
+	NVME_CC_SHN_NORMAL	= 1 << NVME_CC_SHN_SHIFT,
a385ba
+	NVME_CC_SHN_ABRUPT	= 2 << NVME_CC_SHN_SHIFT,
a385ba
+	NVME_CC_SHN_MASK	= 3 << NVME_CC_SHN_SHIFT,
a385ba
+	NVME_CC_IOSQES		= NVME_NVM_IOSQES << NVME_CC_IOSQES_SHIFT,
a385ba
+	NVME_CC_IOCQES		= NVME_NVM_IOCQES << NVME_CC_IOCQES_SHIFT,
a385ba
+	NVME_CSTS_RDY		= 1 << 0,
a385ba
+	NVME_CSTS_CFS		= 1 << 1,
a385ba
+	NVME_CSTS_NSSRO		= 1 << 4,
a385ba
+	NVME_CSTS_PP		= 1 << 5,
a385ba
+	NVME_CSTS_SHST_NORMAL	= 0 << 2,
a385ba
+	NVME_CSTS_SHST_OCCUR	= 1 << 2,
a385ba
+	NVME_CSTS_SHST_CMPLT	= 2 << 2,
a385ba
+	NVME_CSTS_SHST_MASK	= 3 << 2,
a385ba
+};
a385ba
+
a385ba
+struct nvme_id_power_state {
a385ba
+	__le16			max_power;	/* centiwatts */
a385ba
+	__u8			rsvd2;
a385ba
+	__u8			flags;
a385ba
+	__le32			entry_lat;	/* microseconds */
a385ba
+	__le32			exit_lat;	/* microseconds */
a385ba
+	__u8			read_tput;
a385ba
+	__u8			read_lat;
a385ba
+	__u8			write_tput;
a385ba
+	__u8			write_lat;
a385ba
+	__le16			idle_power;
a385ba
+	__u8			idle_scale;
a385ba
+	__u8			rsvd19;
a385ba
+	__le16			active_power;
a385ba
+	__u8			active_work_scale;
a385ba
+	__u8			rsvd23[9];
a385ba
+};
a385ba
+
a385ba
+enum {
a385ba
+	NVME_PS_FLAGS_MAX_POWER_SCALE	= 1 << 0,
a385ba
+	NVME_PS_FLAGS_NON_OP_STATE	= 1 << 1,
a385ba
+};
a385ba
+
a385ba
+struct nvme_id_ctrl {
a385ba
+	__le16			vid;
a385ba
+	__le16			ssvid;
a385ba
+	char			sn[20];
a385ba
+	char			mn[40];
a385ba
+	char			fr[8];
a385ba
+	__u8			rab;
a385ba
+	__u8			ieee[3];
a385ba
+	__u8			cmic;
a385ba
+	__u8			mdts;
a385ba
+	__le16			cntlid;
a385ba
+	__le32			ver;
a385ba
+	__le32			rtd3r;
a385ba
+	__le32			rtd3e;
a385ba
+	__le32			oaes;
a385ba
+	__le32			ctratt;
a385ba
+	__le16			rrls;
a385ba
+	__u8			rsvd102[154];
a385ba
+	__le16			oacs;
a385ba
+	__u8			acl;
a385ba
+	__u8			aerl;
a385ba
+	__u8			frmw;
a385ba
+	__u8			lpa;
a385ba
+	__u8			elpe;
a385ba
+	__u8			npss;
a385ba
+	__u8			avscc;
a385ba
+	__u8			apsta;
a385ba
+	__le16			wctemp;
a385ba
+	__le16			cctemp;
a385ba
+	__le16			mtfa;
a385ba
+	__le32			hmpre;
a385ba
+	__le32			hmmin;
a385ba
+	__u8			tnvmcap[16];
a385ba
+	__u8			unvmcap[16];
a385ba
+	__le32			rpmbs;
a385ba
+	__le16			edstt;
a385ba
+	__u8			dsto;
a385ba
+	__u8			fwug;
a385ba
+	__le16			kas;
a385ba
+	__le16			hctma;
a385ba
+	__le16			mntmt;
a385ba
+	__le16			mxtmt;
a385ba
+	__le32			sanicap;
a385ba
+	__le32			hmminds;
a385ba
+	__le16			hmmaxd;
a385ba
+	__le16			nsetidmax;
a385ba
+	__u8			rsvd340[2];
a385ba
+	__u8			anatt;
a385ba
+	__u8			anacap;
a385ba
+	__le32			anagrpmax;
a385ba
+	__le32			nanagrpid;
a385ba
+	__u8			rsvd352[160];
a385ba
+	__u8			sqes;
a385ba
+	__u8			cqes;
a385ba
+	__le16			maxcmd;
a385ba
+	__le32			nn;
a385ba
+	__le16			oncs;
a385ba
+	__le16			fuses;
a385ba
+	__u8			fna;
a385ba
+	__u8			vwc;
a385ba
+	__le16			awun;
a385ba
+	__le16			awupf;
a385ba
+	__u8			nvscc;
a385ba
+	__u8			nwpc;
a385ba
+	__le16			acwu;
a385ba
+	__u8			rsvd534[2];
a385ba
+	__le32			sgls;
a385ba
+	__le32			mnan;
a385ba
+	__u8			rsvd544[224];
a385ba
+	char			subnqn[256];
a385ba
+	__u8			rsvd1024[768];
a385ba
+	__le32			ioccsz;
a385ba
+	__le32			iorcsz;
a385ba
+	__le16			icdoff;
a385ba
+	__u8			ctrattr;
a385ba
+	__u8			msdbd;
a385ba
+	__u8			rsvd1804[244];
a385ba
+	struct nvme_id_power_state	psd[32];
a385ba
+	__u8			vs[1024];
a385ba
+};
a385ba
+
a385ba
+enum {
a385ba
+	NVME_CTRL_ONCS_COMPARE			= 1 << 0,
a385ba
+	NVME_CTRL_ONCS_WRITE_UNCORRECTABLE	= 1 << 1,
a385ba
+	NVME_CTRL_ONCS_DSM			= 1 << 2,
a385ba
+	NVME_CTRL_ONCS_WRITE_ZEROES		= 1 << 3,
a385ba
+	NVME_CTRL_ONCS_TIMESTAMP		= 1 << 6,
a385ba
+	NVME_CTRL_VWC_PRESENT			= 1 << 0,
a385ba
+	NVME_CTRL_OACS_SEC_SUPP                 = 1 << 0,
a385ba
+	NVME_CTRL_OACS_DIRECTIVES		= 1 << 5,
a385ba
+	NVME_CTRL_OACS_DBBUF_SUPP		= 1 << 8,
a385ba
+	NVME_CTRL_LPA_CMD_EFFECTS_LOG		= 1 << 1,
a385ba
+	NVME_CTRL_CTRATT_128_ID			= 1 << 0,
a385ba
+	NVME_CTRL_CTRATT_NON_OP_PSP		= 1 << 1,
a385ba
+	NVME_CTRL_CTRATT_NVM_SETS		= 1 << 2,
a385ba
+	NVME_CTRL_CTRATT_READ_RECV_LVLS		= 1 << 3,
a385ba
+	NVME_CTRL_CTRATT_ENDURANCE_GROUPS	= 1 << 4,
a385ba
+	NVME_CTRL_CTRATT_PREDICTABLE_LAT	= 1 << 5,
a385ba
+};
a385ba
+
a385ba
+struct nvme_lbaf {
a385ba
+	__le16			ms;
a385ba
+	__u8			ds;
a385ba
+	__u8			rp;
a385ba
+};
a385ba
+
a385ba
+struct nvme_id_ns {
a385ba
+	__le64			nsze;
a385ba
+	__le64			ncap;
a385ba
+	__le64			nuse;
a385ba
+	__u8			nsfeat;
a385ba
+	__u8			nlbaf;
a385ba
+	__u8			flbas;
a385ba
+	__u8			mc;
a385ba
+	__u8			dpc;
a385ba
+	__u8			dps;
a385ba
+	__u8			nmic;
a385ba
+	__u8			rescap;
a385ba
+	__u8			fpi;
a385ba
+	__u8			dlfeat;
a385ba
+	__le16			nawun;
a385ba
+	__le16			nawupf;
a385ba
+	__le16			nacwu;
a385ba
+	__le16			nabsn;
a385ba
+	__le16			nabo;
a385ba
+	__le16			nabspf;
a385ba
+	__le16			noiob;
a385ba
+	__u8			nvmcap[16];
a385ba
+	__u8			rsvd64[28];
a385ba
+	__le32			anagrpid;
a385ba
+	__u8			rsvd96[3];
a385ba
+	__u8			nsattr;
a385ba
+	__le16			nvmsetid;
a385ba
+	__le16			endgid;
a385ba
+	__u8			nguid[16];
a385ba
+	__u8			eui64[8];
a385ba
+	struct nvme_lbaf	lbaf[16];
a385ba
+	__u8			rsvd192[192];
a385ba
+	__u8			vs[3712];
a385ba
+};
a385ba
+
a385ba
+enum {
a385ba
+	NVME_ID_CNS_NS			= 0x00,
a385ba
+	NVME_ID_CNS_CTRL		= 0x01,
a385ba
+	NVME_ID_CNS_NS_ACTIVE_LIST	= 0x02,
a385ba
+	NVME_ID_CNS_NS_DESC_LIST	= 0x03,
a385ba
+	NVME_ID_CNS_NVMSET_LIST		= 0x04,
a385ba
+	NVME_ID_CNS_NS_PRESENT_LIST	= 0x10,
a385ba
+	NVME_ID_CNS_NS_PRESENT		= 0x11,
a385ba
+	NVME_ID_CNS_CTRL_NS_LIST	= 0x12,
a385ba
+	NVME_ID_CNS_CTRL_LIST		= 0x13,
a385ba
+};
a385ba
+
a385ba
+enum {
a385ba
+	NVME_DIR_IDENTIFY		= 0x00,
a385ba
+	NVME_DIR_STREAMS		= 0x01,
a385ba
+	NVME_DIR_SND_ID_OP_ENABLE	= 0x01,
a385ba
+	NVME_DIR_SND_ST_OP_REL_ID	= 0x01,
a385ba
+	NVME_DIR_SND_ST_OP_REL_RSC	= 0x02,
a385ba
+	NVME_DIR_RCV_ID_OP_PARAM	= 0x01,
a385ba
+	NVME_DIR_RCV_ST_OP_PARAM	= 0x01,
a385ba
+	NVME_DIR_RCV_ST_OP_STATUS	= 0x02,
a385ba
+	NVME_DIR_RCV_ST_OP_RESOURCE	= 0x03,
a385ba
+	NVME_DIR_ENDIR			= 0x01,
a385ba
+};
a385ba
+
a385ba
+enum {
a385ba
+	NVME_NS_FEAT_THIN	= 1 << 0,
a385ba
+	NVME_NS_FLBAS_LBA_MASK	= 0xf,
a385ba
+	NVME_NS_FLBAS_META_EXT	= 0x10,
a385ba
+	NVME_LBAF_RP_BEST	= 0,
a385ba
+	NVME_LBAF_RP_BETTER	= 1,
a385ba
+	NVME_LBAF_RP_GOOD	= 2,
a385ba
+	NVME_LBAF_RP_DEGRADED	= 3,
a385ba
+	NVME_NS_DPC_PI_LAST	= 1 << 4,
a385ba
+	NVME_NS_DPC_PI_FIRST	= 1 << 3,
a385ba
+	NVME_NS_DPC_PI_TYPE3	= 1 << 2,
a385ba
+	NVME_NS_DPC_PI_TYPE2	= 1 << 1,
a385ba
+	NVME_NS_DPC_PI_TYPE1	= 1 << 0,
a385ba
+	NVME_NS_DPS_PI_FIRST	= 1 << 3,
a385ba
+	NVME_NS_DPS_PI_MASK	= 0x7,
a385ba
+	NVME_NS_DPS_PI_TYPE1	= 1,
a385ba
+	NVME_NS_DPS_PI_TYPE2	= 2,
a385ba
+	NVME_NS_DPS_PI_TYPE3	= 3,
a385ba
+};
a385ba
+
a385ba
+struct nvme_ns_id_desc {
a385ba
+	__u8 nidt;
a385ba
+	__u8 nidl;
a385ba
+	__le16 reserved;
a385ba
+};
a385ba
+
a385ba
+#define NVME_NIDT_EUI64_LEN	8
a385ba
+#define NVME_NIDT_NGUID_LEN	16
a385ba
+#define NVME_NIDT_UUID_LEN	16
a385ba
+
a385ba
+enum {
a385ba
+	NVME_NIDT_EUI64		= 0x01,
a385ba
+	NVME_NIDT_NGUID		= 0x02,
a385ba
+	NVME_NIDT_UUID		= 0x03,
a385ba
+};
a385ba
+
a385ba
+#define NVME_MAX_NVMSET		31
a385ba
+
a385ba
+struct nvme_nvmset_attr_entry {
a385ba
+	__le16			id;
a385ba
+	__le16			endurance_group_id;
a385ba
+	__u8			rsvd4[4];
a385ba
+	__le32			random_4k_read_typical;
a385ba
+	__le32			opt_write_size;
a385ba
+	__u8			total_nvmset_cap[16];
a385ba
+	__u8			unalloc_nvmset_cap[16];
a385ba
+	__u8			rsvd48[80];
a385ba
+};
a385ba
+
a385ba
+struct nvme_id_nvmset {
a385ba
+	__u8				nid;
a385ba
+	__u8				rsvd1[127];
a385ba
+	struct nvme_nvmset_attr_entry	ent[NVME_MAX_NVMSET];
a385ba
+};
a385ba
+
a385ba
+/* Derived from 1.3a Figure 101: Get Log Page – Telemetry Host
a385ba
+ * -Initiated Log (Log Identifier 07h)
a385ba
+ */
a385ba
+struct nvme_telemetry_log_page_hdr {
a385ba
+	__u8    lpi; /* Log page identifier */
a385ba
+	__u8    rsvd[4];
a385ba
+	__u8    iee_oui[3];
a385ba
+	__u16   dalb1; /* Data area 1 last block */
a385ba
+	__u16   dalb2; /* Data area 2 last block */
a385ba
+	__u16   dalb3; /* Data area 3 last block */
a385ba
+	__u8    rsvd1[368]; /* TODO verify */
a385ba
+	__u8    ctrlavail; /* Controller initiated data avail?*/
a385ba
+	__u8    ctrldgn; /* Controller initiated telemetry Data Gen # */
a385ba
+	__u8    rsnident[128];
a385ba
+	/* We'll have to double fetch so we can get the header,
a385ba
+	 * parse dalb1->3 determine how much size we need for the
a385ba
+	 * log then alloc below. Or just do a secondary non-struct
a385ba
+	 * allocation.
a385ba
+	 */
a385ba
+	__u8    telemetry_dataarea[0];
a385ba
+};
a385ba
+
a385ba
+struct nvme_endurance_group_log {
a385ba
+	__u32	rsvd0;
a385ba
+	__u8	avl_spare_threshold;
a385ba
+	__u8	percent_used;
a385ba
+	__u8	rsvd6[26];
a385ba
+	__u8	endurance_estimate[16];
a385ba
+	__u8	data_units_read[16];
a385ba
+	__u8	data_units_written[16];
a385ba
+	__u8	media_units_written[16];
a385ba
+	__u8	rsvd96[416];
a385ba
+};
a385ba
+
a385ba
+struct nvme_smart_log {
a385ba
+	__u8			critical_warning;
a385ba
+	__u8			temperature[2];
a385ba
+	__u8			avail_spare;
a385ba
+	__u8			spare_thresh;
a385ba
+	__u8			percent_used;
a385ba
+	__u8			rsvd6[26];
a385ba
+	__u8			data_units_read[16];
a385ba
+	__u8			data_units_written[16];
a385ba
+	__u8			host_reads[16];
a385ba
+	__u8			host_writes[16];
a385ba
+	__u8			ctrl_busy_time[16];
a385ba
+	__u8			power_cycles[16];
a385ba
+	__u8			power_on_hours[16];
a385ba
+	__u8			unsafe_shutdowns[16];
a385ba
+	__u8			media_errors[16];
a385ba
+	__u8			num_err_log_entries[16];
a385ba
+	__le32			warning_temp_time;
a385ba
+	__le32			critical_comp_time;
a385ba
+	__le16			temp_sensor[8];
a385ba
+	__le32			thm_temp1_trans_count;
a385ba
+	__le32			thm_temp2_trans_count;
a385ba
+	__le32			thm_temp1_total_time;
a385ba
+	__le32			thm_temp2_total_time;
a385ba
+	__u8			rsvd232[280];
a385ba
+};
a385ba
+
a385ba
+struct nvme_self_test_res {
a385ba
+	__u8 			device_self_test_status;
a385ba
+	__u8			segment_num;
a385ba
+	__u8			valid_diagnostic_info;
a385ba
+	__u8			rsvd;
a385ba
+	__le64			power_on_hours;
a385ba
+	__le32			nsid;
a385ba
+	__le64			failing_lba;
a385ba
+	__u8			status_code_type;
a385ba
+	__u8			status_code;
a385ba
+	__u8			vendor_specific[2];
a385ba
+} __attribute__((packed));
a385ba
+
a385ba
+struct nvme_self_test_log {
a385ba
+	__u8                      crnt_dev_selftest_oprn;
a385ba
+	__u8                      crnt_dev_selftest_compln;
a385ba
+	__u8                      rsvd[2];
a385ba
+	struct nvme_self_test_res result[20];
a385ba
+} __attribute__((packed));
a385ba
+
a385ba
+struct nvme_fw_slot_info_log {
a385ba
+	__u8			afi;
a385ba
+	__u8			rsvd1[7];
a385ba
+	__le64			frs[7];
a385ba
+	__u8			rsvd64[448];
a385ba
+};
a385ba
+
a385ba
+/* NVMe Namespace Write Protect State */
a385ba
+enum {
a385ba
+	NVME_NS_NO_WRITE_PROTECT = 0,
a385ba
+	NVME_NS_WRITE_PROTECT,
a385ba
+	NVME_NS_WRITE_PROTECT_POWER_CYCLE,
a385ba
+	NVME_NS_WRITE_PROTECT_PERMANENT,
a385ba
+};
a385ba
+
a385ba
+#define NVME_MAX_CHANGED_NAMESPACES     1024
a385ba
+
a385ba
+struct nvme_changed_ns_list_log {
a385ba
+	__le32			log[NVME_MAX_CHANGED_NAMESPACES];
a385ba
+};
a385ba
+
a385ba
+enum {
a385ba
+	NVME_CMD_EFFECTS_CSUPP		= 1 << 0,
a385ba
+	NVME_CMD_EFFECTS_LBCC		= 1 << 1,
a385ba
+	NVME_CMD_EFFECTS_NCC		= 1 << 2,
a385ba
+	NVME_CMD_EFFECTS_NIC		= 1 << 3,
a385ba
+	NVME_CMD_EFFECTS_CCC		= 1 << 4,
a385ba
+	NVME_CMD_EFFECTS_CSE_MASK	= 3 << 16,
a385ba
+};
a385ba
+
a385ba
+struct nvme_effects_log {
a385ba
+	__le32 acs[256];
a385ba
+	__le32 iocs[256];
a385ba
+	__u8   resv[2048];
a385ba
+};
a385ba
+
a385ba
+enum nvme_ana_state {
a385ba
+	NVME_ANA_OPTIMIZED		= 0x01,
a385ba
+	NVME_ANA_NONOPTIMIZED		= 0x02,
a385ba
+	NVME_ANA_INACCESSIBLE		= 0x03,
a385ba
+	NVME_ANA_PERSISTENT_LOSS	= 0x04,
a385ba
+	NVME_ANA_CHANGE			= 0x0f,
a385ba
+};
a385ba
+
a385ba
+struct nvme_ana_group_desc {
a385ba
+	__le32  grpid;
a385ba
+	__le32  nnsids;
a385ba
+	__le64  chgcnt;
a385ba
+	__u8    state;
a385ba
+	__u8    rsvd17[15];
a385ba
+	__le32  nsids[];
a385ba
+};
a385ba
+
a385ba
+/* flag for the log specific field of the ANA log */
a385ba
+#define NVME_ANA_LOG_RGO   (1 << 0)
a385ba
+
a385ba
+struct nvme_ana_rsp_hdr {
a385ba
+	__le64  chgcnt;
a385ba
+	__le16  ngrps;
a385ba
+	__le16  rsvd10[3];
a385ba
+};
a385ba
+
a385ba
+enum {
a385ba
+	NVME_SMART_CRIT_SPARE		= 1 << 0,
a385ba
+	NVME_SMART_CRIT_TEMPERATURE	= 1 << 1,
a385ba
+	NVME_SMART_CRIT_RELIABILITY	= 1 << 2,
a385ba
+	NVME_SMART_CRIT_MEDIA		= 1 << 3,
a385ba
+	NVME_SMART_CRIT_VOLATILE_MEMORY	= 1 << 4,
a385ba
+};
a385ba
+
a385ba
+enum {
a385ba
+	NVME_AER_ERROR			= 0,
a385ba
+	NVME_AER_SMART			= 1,
a385ba
+	NVME_AER_CSS			= 6,
a385ba
+	NVME_AER_VS			= 7,
a385ba
+	NVME_AER_NOTICE_NS_CHANGED	= 0x0002,
a385ba
+	NVME_AER_NOTICE_ANA		= 0x0003,
a385ba
+	NVME_AER_NOTICE_FW_ACT_STARTING = 0x0102,
a385ba
+};
a385ba
+
a385ba
+struct nvme_lba_range_type {
a385ba
+	__u8			type;
a385ba
+	__u8			attributes;
a385ba
+	__u8			rsvd2[14];
a385ba
+	__u64			slba;
a385ba
+	__u64			nlb;
a385ba
+	__u8			guid[16];
a385ba
+	__u8			rsvd48[16];
a385ba
+};
a385ba
+
a385ba
+enum {
a385ba
+	NVME_LBART_TYPE_FS	= 0x01,
a385ba
+	NVME_LBART_TYPE_RAID	= 0x02,
a385ba
+	NVME_LBART_TYPE_CACHE	= 0x03,
a385ba
+	NVME_LBART_TYPE_SWAP	= 0x04,
a385ba
+
a385ba
+	NVME_LBART_ATTRIB_TEMP	= 1 << 0,
a385ba
+	NVME_LBART_ATTRIB_HIDE	= 1 << 1,
a385ba
+};
a385ba
+
a385ba
+struct nvme_plm_config {
a385ba
+	__u16	enable_event;
a385ba
+	__u8	rsvd2[30];
a385ba
+	__u64	dtwin_reads_thresh;
a385ba
+	__u64	dtwin_writes_thresh;
a385ba
+	__u64	dtwin_time_thresh;
a385ba
+	__u8	rsvd56[456];
a385ba
+};
a385ba
+
a385ba
+struct nvme_reservation_status {
a385ba
+	__le32	gen;
a385ba
+	__u8	rtype;
a385ba
+	__u8	regctl[2];
a385ba
+	__u8	resv5[2];
a385ba
+	__u8	ptpls;
a385ba
+	__u8	resv10[13];
a385ba
+	struct {
a385ba
+		__le16	cntlid;
a385ba
+		__u8	rcsts;
a385ba
+		__u8	resv3[5];
a385ba
+		__le64	hostid;
a385ba
+		__le64	rkey;
a385ba
+	} regctl_ds[];
a385ba
+};
a385ba
+
a385ba
+struct nvme_reservation_status_ext {
a385ba
+	__le32	gen;
a385ba
+	__u8	rtype;
a385ba
+	__u8	regctl[2];
a385ba
+	__u8	resv5[2];
a385ba
+	__u8	ptpls;
a385ba
+	__u8	resv10[14];
a385ba
+	__u8	resv24[40];
a385ba
+	struct {
a385ba
+		__le16	cntlid;
a385ba
+		__u8	rcsts;
a385ba
+		__u8	resv3[5];
a385ba
+		__le64	rkey;
a385ba
+		__u8	hostid[16];
a385ba
+		__u8	resv32[32];
a385ba
+	} regctl_eds[];
a385ba
+};
a385ba
+
a385ba
+enum nvme_async_event_type {
a385ba
+	NVME_AER_TYPE_ERROR	= 0,
a385ba
+	NVME_AER_TYPE_SMART	= 1,
a385ba
+	NVME_AER_TYPE_NOTICE	= 2,
a385ba
+};
a385ba
+
a385ba
+/* I/O commands */
a385ba
+
a385ba
+enum nvme_opcode {
a385ba
+	nvme_cmd_flush		= 0x00,
a385ba
+	nvme_cmd_write		= 0x01,
a385ba
+	nvme_cmd_read		= 0x02,
a385ba
+	nvme_cmd_write_uncor	= 0x04,
a385ba
+	nvme_cmd_compare	= 0x05,
a385ba
+	nvme_cmd_write_zeroes	= 0x08,
a385ba
+	nvme_cmd_dsm		= 0x09,
a385ba
+	nvme_cmd_resv_register	= 0x0d,
a385ba
+	nvme_cmd_resv_report	= 0x0e,
a385ba
+	nvme_cmd_resv_acquire	= 0x11,
a385ba
+	nvme_cmd_resv_release	= 0x15,
a385ba
+};
a385ba
+
a385ba
+/*
a385ba
+ * Descriptor subtype - lower 4 bits of nvme_(keyed_)sgl_desc identifier
a385ba
+ *
a385ba
+ * @NVME_SGL_FMT_ADDRESS:     absolute address of the data block
a385ba
+ * @NVME_SGL_FMT_OFFSET:      relative offset of the in-capsule data block
a385ba
+ * @NVME_SGL_FMT_TRANSPORT_A: transport defined format, value 0xA
a385ba
+ * @NVME_SGL_FMT_INVALIDATE:  RDMA transport specific remote invalidation
a385ba
+ *                            request subtype
a385ba
+ */
a385ba
+enum {
a385ba
+	NVME_SGL_FMT_ADDRESS		= 0x00,
a385ba
+	NVME_SGL_FMT_OFFSET		= 0x01,
a385ba
+	NVME_SGL_FMT_TRANSPORT_A	= 0x0A,
a385ba
+	NVME_SGL_FMT_INVALIDATE		= 0x0f,
a385ba
+};
a385ba
+
a385ba
+/*
a385ba
+ * Descriptor type - upper 4 bits of nvme_(keyed_)sgl_desc identifier
a385ba
+ *
a385ba
+ * For struct nvme_sgl_desc:
a385ba
+ *   @NVME_SGL_FMT_DATA_DESC:		data block descriptor
a385ba
+ *   @NVME_SGL_FMT_SEG_DESC:		sgl segment descriptor
a385ba
+ *   @NVME_SGL_FMT_LAST_SEG_DESC:	last sgl segment descriptor
a385ba
+ *
a385ba
+ * For struct nvme_keyed_sgl_desc:
a385ba
+ *   @NVME_KEY_SGL_FMT_DATA_DESC:	keyed data block descriptor
a385ba
+ *
a385ba
+ * Transport-specific SGL types:
a385ba
+ *   @NVME_TRANSPORT_SGL_DATA_DESC:	Transport SGL data dlock descriptor
a385ba
+ */
a385ba
+enum {
a385ba
+	NVME_SGL_FMT_DATA_DESC		= 0x00,
a385ba
+	NVME_SGL_FMT_SEG_DESC		= 0x02,
a385ba
+	NVME_SGL_FMT_LAST_SEG_DESC	= 0x03,
a385ba
+	NVME_KEY_SGL_FMT_DATA_DESC	= 0x04,
a385ba
+	NVME_TRANSPORT_SGL_DATA_DESC	= 0x05,
a385ba
+};
a385ba
+
a385ba
+struct nvme_sgl_desc {
a385ba
+	__le64	addr;
a385ba
+	__le32	length;
a385ba
+	__u8	rsvd[3];
a385ba
+	__u8	type;
a385ba
+};
a385ba
+
a385ba
+struct nvme_keyed_sgl_desc {
a385ba
+	__le64	addr;
a385ba
+	__u8	length[3];
a385ba
+	__u8	key[4];
a385ba
+	__u8	type;
a385ba
+};
a385ba
+
a385ba
+union nvme_data_ptr {
a385ba
+	struct {
a385ba
+		__le64	prp1;
a385ba
+		__le64	prp2;
a385ba
+	};
a385ba
+	struct nvme_sgl_desc	sgl;
a385ba
+	struct nvme_keyed_sgl_desc ksgl;
a385ba
+};
a385ba
+
a385ba
+/*
a385ba
+ * Lowest two bits of our flags field (FUSE field in the spec):
a385ba
+ *
a385ba
+ * @NVME_CMD_FUSE_FIRST:   Fused Operation, first command
a385ba
+ * @NVME_CMD_FUSE_SECOND:  Fused Operation, second command
a385ba
+ *
a385ba
+ * Highest two bits in our flags field (PSDT field in the spec):
a385ba
+ *
a385ba
+ * @NVME_CMD_PSDT_SGL_METABUF:	Use SGLS for this transfer,
a385ba
+ *	If used, MPTR contains addr of single physical buffer (byte aligned).
a385ba
+ * @NVME_CMD_PSDT_SGL_METASEG:	Use SGLS for this transfer,
a385ba
+ *	If used, MPTR contains an address of an SGL segment containing
a385ba
+ *	exactly 1 SGL descriptor (qword aligned).
a385ba
+ */
a385ba
+enum {
a385ba
+	NVME_CMD_FUSE_FIRST	= (1 << 0),
a385ba
+	NVME_CMD_FUSE_SECOND	= (1 << 1),
a385ba
+
a385ba
+	NVME_CMD_SGL_METABUF	= (1 << 6),
a385ba
+	NVME_CMD_SGL_METASEG	= (1 << 7),
a385ba
+	NVME_CMD_SGL_ALL	= NVME_CMD_SGL_METABUF | NVME_CMD_SGL_METASEG,
a385ba
+};
a385ba
+
a385ba
+struct nvme_common_command {
a385ba
+	__u8			opcode;
a385ba
+	__u8			flags;
a385ba
+	__u16			command_id;
a385ba
+	__le32			nsid;
a385ba
+	__le32			cdw2[2];
a385ba
+	__le64			metadata;
a385ba
+	union nvme_data_ptr	dptr;
a385ba
+	__le32			cdw10[6];
a385ba
+};
a385ba
+
a385ba
+struct nvme_rw_command {
a385ba
+	__u8			opcode;
a385ba
+	__u8			flags;
a385ba
+	__u16			command_id;
a385ba
+	__le32			nsid;
a385ba
+	__u64			rsvd2;
a385ba
+	__le64			metadata;
a385ba
+	union nvme_data_ptr	dptr;
a385ba
+	__le64			slba;
a385ba
+	__le16			length;
a385ba
+	__le16			control;
a385ba
+	__le32			dsmgmt;
a385ba
+	__le32			reftag;
a385ba
+	__le16			apptag;
a385ba
+	__le16			appmask;
a385ba
+};
a385ba
+
a385ba
+enum {
a385ba
+	NVME_RW_LR			= 1 << 15,
a385ba
+	NVME_RW_FUA			= 1 << 14,
a385ba
+	NVME_RW_DEAC			= 1 << 9,
a385ba
+	NVME_RW_DSM_FREQ_UNSPEC		= 0,
a385ba
+	NVME_RW_DSM_FREQ_TYPICAL	= 1,
a385ba
+	NVME_RW_DSM_FREQ_RARE		= 2,
a385ba
+	NVME_RW_DSM_FREQ_READS		= 3,
a385ba
+	NVME_RW_DSM_FREQ_WRITES		= 4,
a385ba
+	NVME_RW_DSM_FREQ_RW		= 5,
a385ba
+	NVME_RW_DSM_FREQ_ONCE		= 6,
a385ba
+	NVME_RW_DSM_FREQ_PREFETCH	= 7,
a385ba
+	NVME_RW_DSM_FREQ_TEMP		= 8,
a385ba
+	NVME_RW_DSM_LATENCY_NONE	= 0 << 4,
a385ba
+	NVME_RW_DSM_LATENCY_IDLE	= 1 << 4,
a385ba
+	NVME_RW_DSM_LATENCY_NORM	= 2 << 4,
a385ba
+	NVME_RW_DSM_LATENCY_LOW		= 3 << 4,
a385ba
+	NVME_RW_DSM_SEQ_REQ		= 1 << 6,
a385ba
+	NVME_RW_DSM_COMPRESSED		= 1 << 7,
a385ba
+	NVME_RW_PRINFO_PRCHK_REF	= 1 << 10,
a385ba
+	NVME_RW_PRINFO_PRCHK_APP	= 1 << 11,
a385ba
+	NVME_RW_PRINFO_PRCHK_GUARD	= 1 << 12,
a385ba
+	NVME_RW_PRINFO_PRACT		= 1 << 13,
a385ba
+	NVME_RW_DTYPE_STREAMS		= 1 << 4,
a385ba
+};
a385ba
+
a385ba
+struct nvme_dsm_cmd {
a385ba
+	__u8			opcode;
a385ba
+	__u8			flags;
a385ba
+	__u16			command_id;
a385ba
+	__le32			nsid;
a385ba
+	__u64			rsvd2[2];
a385ba
+	union nvme_data_ptr	dptr;
a385ba
+	__le32			nr;
a385ba
+	__le32			attributes;
a385ba
+	__u32			rsvd12[4];
a385ba
+};
a385ba
+
a385ba
+enum {
a385ba
+	NVME_DSMGMT_IDR		= 1 << 0,
a385ba
+	NVME_DSMGMT_IDW		= 1 << 1,
a385ba
+	NVME_DSMGMT_AD		= 1 << 2,
a385ba
+};
a385ba
+
a385ba
+#define NVME_DSM_MAX_RANGES	256
a385ba
+
a385ba
+struct nvme_dsm_range {
a385ba
+	__le32			cattr;
a385ba
+	__le32			nlb;
a385ba
+	__le64			slba;
a385ba
+};
a385ba
+
a385ba
+struct nvme_write_zeroes_cmd {
a385ba
+	__u8			opcode;
a385ba
+	__u8			flags;
a385ba
+	__u16			command_id;
a385ba
+	__le32			nsid;
a385ba
+	__u64			rsvd2;
a385ba
+	__le64			metadata;
a385ba
+	union nvme_data_ptr	dptr;
a385ba
+	__le64			slba;
a385ba
+	__le16			length;
a385ba
+	__le16			control;
a385ba
+	__le32			dsmgmt;
a385ba
+	__le32			reftag;
a385ba
+	__le16			apptag;
a385ba
+	__le16			appmask;
a385ba
+};
a385ba
+
a385ba
+/* Features */
a385ba
+
a385ba
+struct nvme_feat_auto_pst {
a385ba
+	__le64 entries[32];
a385ba
+};
a385ba
+
a385ba
+enum {
a385ba
+	NVME_HOST_MEM_ENABLE	= (1 << 0),
a385ba
+	NVME_HOST_MEM_RETURN	= (1 << 1),
a385ba
+};
a385ba
+
a385ba
+/* Admin commands */
a385ba
+
a385ba
+enum nvme_admin_opcode {
a385ba
+	nvme_admin_delete_sq		= 0x00,
a385ba
+	nvme_admin_create_sq		= 0x01,
a385ba
+	nvme_admin_get_log_page		= 0x02,
a385ba
+	nvme_admin_delete_cq		= 0x04,
a385ba
+	nvme_admin_create_cq		= 0x05,
a385ba
+	nvme_admin_identify		= 0x06,
a385ba
+	nvme_admin_abort_cmd		= 0x08,
a385ba
+	nvme_admin_set_features		= 0x09,
a385ba
+	nvme_admin_get_features		= 0x0a,
a385ba
+	nvme_admin_async_event		= 0x0c,
a385ba
+	nvme_admin_ns_mgmt		= 0x0d,
a385ba
+	nvme_admin_activate_fw		= 0x10,
a385ba
+	nvme_admin_download_fw		= 0x11,
a385ba
+	nvme_admin_dev_self_test	= 0x14,
a385ba
+	nvme_admin_ns_attach		= 0x15,
a385ba
+	nvme_admin_keep_alive		= 0x18,
a385ba
+	nvme_admin_directive_send	= 0x19,
a385ba
+	nvme_admin_directive_recv	= 0x1a,
a385ba
+	nvme_admin_virtual_mgmt		= 0x1c,
a385ba
+	nvme_admin_nvme_mi_send		= 0x1d,
a385ba
+	nvme_admin_nvme_mi_recv		= 0x1e,
a385ba
+	nvme_admin_dbbuf		= 0x7C,
a385ba
+	nvme_admin_format_nvm		= 0x80,
a385ba
+	nvme_admin_security_send	= 0x81,
a385ba
+	nvme_admin_security_recv	= 0x82,
a385ba
+	nvme_admin_sanitize_nvm		= 0x84,
a385ba
+};
a385ba
+
a385ba
+enum {
a385ba
+	NVME_QUEUE_PHYS_CONTIG	= (1 << 0),
a385ba
+	NVME_CQ_IRQ_ENABLED	= (1 << 1),
a385ba
+	NVME_SQ_PRIO_URGENT	= (0 << 1),
a385ba
+	NVME_SQ_PRIO_HIGH	= (1 << 1),
a385ba
+	NVME_SQ_PRIO_MEDIUM	= (2 << 1),
a385ba
+	NVME_SQ_PRIO_LOW	= (3 << 1),
a385ba
+	NVME_FEAT_ARBITRATION	= 0x01,
a385ba
+	NVME_FEAT_POWER_MGMT	= 0x02,
a385ba
+	NVME_FEAT_LBA_RANGE	= 0x03,
a385ba
+	NVME_FEAT_TEMP_THRESH	= 0x04,
a385ba
+	NVME_FEAT_ERR_RECOVERY	= 0x05,
a385ba
+	NVME_FEAT_VOLATILE_WC	= 0x06,
a385ba
+	NVME_FEAT_NUM_QUEUES	= 0x07,
a385ba
+	NVME_FEAT_IRQ_COALESCE	= 0x08,
a385ba
+	NVME_FEAT_IRQ_CONFIG	= 0x09,
a385ba
+	NVME_FEAT_WRITE_ATOMIC	= 0x0a,
a385ba
+	NVME_FEAT_ASYNC_EVENT	= 0x0b,
a385ba
+	NVME_FEAT_AUTO_PST	= 0x0c,
a385ba
+	NVME_FEAT_HOST_MEM_BUF	= 0x0d,
a385ba
+	NVME_FEAT_TIMESTAMP	= 0x0e,
a385ba
+	NVME_FEAT_KATO		= 0x0f,
a385ba
+	NVME_FEAT_HCTM		= 0X10,
a385ba
+	NVME_FEAT_NOPSC		= 0X11,
a385ba
+	NVME_FEAT_RRL		= 0x12,
a385ba
+	NVME_FEAT_PLM_CONFIG	= 0x13,
a385ba
+	NVME_FEAT_PLM_WINDOW	= 0x14,
a385ba
+	NVME_FEAT_SW_PROGRESS	= 0x80,
a385ba
+	NVME_FEAT_HOST_ID	= 0x81,
a385ba
+	NVME_FEAT_RESV_MASK	= 0x82,
a385ba
+	NVME_FEAT_RESV_PERSIST	= 0x83,
a385ba
+	NVME_FEAT_WRITE_PROTECT	= 0x84,
a385ba
+	NVME_LOG_ERROR		= 0x01,
a385ba
+	NVME_LOG_SMART		= 0x02,
a385ba
+	NVME_LOG_FW_SLOT	= 0x03,
a385ba
+	NVME_LOG_CHANGED_NS	= 0x04,
a385ba
+	NVME_LOG_CMD_EFFECTS	= 0x05,
a385ba
+	NVME_LOG_DEVICE_SELF_TEST = 0x06,
a385ba
+	NVME_LOG_TELEMETRY_HOST = 0x07,
a385ba
+	NVME_LOG_TELEMETRY_CTRL = 0x08,
a385ba
+	NVME_LOG_ENDURANCE_GROUP = 0x09,
a385ba
+	NVME_LOG_ANA		= 0x0c,
a385ba
+	NVME_LOG_DISC		= 0x70,
a385ba
+	NVME_LOG_RESERVATION	= 0x80,
a385ba
+	NVME_LOG_SANITIZE	= 0x81,
a385ba
+	NVME_FWACT_REPL		= (0 << 3),
a385ba
+	NVME_FWACT_REPL_ACTV	= (1 << 3),
a385ba
+	NVME_FWACT_ACTV		= (2 << 3),
a385ba
+};
a385ba
+
a385ba
+enum {
a385ba
+	NVME_NO_LOG_LSP       = 0x0,
a385ba
+	NVME_NO_LOG_LPO       = 0x0,
a385ba
+	NVME_LOG_ANA_LSP_RGO  = 0x1,
a385ba
+	NVME_TELEM_LSP_CREATE = 0x1,
a385ba
+};
a385ba
+
a385ba
+/* Sanitize and Sanitize Monitor/Log */
a385ba
+enum {
a385ba
+	/* Sanitize */
a385ba
+	NVME_SANITIZE_NO_DEALLOC	= 0x00000200,
a385ba
+	NVME_SANITIZE_OIPBP		= 0x00000100,
a385ba
+	NVME_SANITIZE_OWPASS_SHIFT	= 0x00000004,
a385ba
+	NVME_SANITIZE_AUSE		= 0x00000008,
a385ba
+	NVME_SANITIZE_ACT_CRYPTO_ERASE	= 0x00000004,
a385ba
+	NVME_SANITIZE_ACT_OVERWRITE	= 0x00000003,
a385ba
+	NVME_SANITIZE_ACT_BLOCK_ERASE	= 0x00000002,
a385ba
+	NVME_SANITIZE_ACT_EXIT		= 0x00000001,
a385ba
+
a385ba
+	/* Sanitize Monitor/Log */
a385ba
+	NVME_SANITIZE_LOG_DATA_LEN		= 0x0014,
a385ba
+	NVME_SANITIZE_LOG_GLOBAL_DATA_ERASED	= 0x0100,
a385ba
+	NVME_SANITIZE_LOG_NUM_CMPLTED_PASS_MASK	= 0x00F8,
a385ba
+	NVME_SANITIZE_LOG_STATUS_MASK		= 0x0007,
a385ba
+	NVME_SANITIZE_LOG_NEVER_SANITIZED	= 0x0000,
a385ba
+	NVME_SANITIZE_LOG_COMPLETED_SUCCESS	= 0x0001,
a385ba
+	NVME_SANITIZE_LOG_IN_PROGESS		= 0x0002,
a385ba
+	NVME_SANITIZE_LOG_COMPLETED_FAILED	= 0x0003,
a385ba
+};
a385ba
+
a385ba
+enum {
a385ba
+	/* Self-test log Validation bits */
a385ba
+	NVME_SELF_TEST_VALID_NSID	= 1 << 0,
a385ba
+	NVME_SELF_TEST_VALID_FLBA	= 1 << 1,
a385ba
+	NVME_SELF_TEST_VALID_SCT	= 1 << 2,
a385ba
+	NVME_SELF_TEST_VALID_SC		= 1 << 3,
a385ba
+	NVME_SELF_TEST_REPORTS		= 20,
a385ba
+};
a385ba
+
a385ba
+struct nvme_identify {
a385ba
+	__u8			opcode;
a385ba
+	__u8			flags;
a385ba
+	__u16			command_id;
a385ba
+	__le32			nsid;
a385ba
+	__u64			rsvd2[2];
a385ba
+	union nvme_data_ptr	dptr;
a385ba
+	__u8			cns;
a385ba
+	__u8			rsvd3;
a385ba
+	__le16			ctrlid;
a385ba
+	__u32			rsvd11[5];
a385ba
+};
a385ba
+
a385ba
+#define NVME_IDENTIFY_DATA_SIZE 4096
a385ba
+
a385ba
+struct nvme_features {
a385ba
+	__u8			opcode;
a385ba
+	__u8			flags;
a385ba
+	__u16			command_id;
a385ba
+	__le32			nsid;
a385ba
+	__u64			rsvd2[2];
a385ba
+	union nvme_data_ptr	dptr;
a385ba
+	__le32			fid;
a385ba
+	__le32			dword11;
a385ba
+	__le32                  dword12;
a385ba
+	__le32                  dword13;
a385ba
+	__le32                  dword14;
a385ba
+	__le32                  dword15;
a385ba
+};
a385ba
+
a385ba
+struct nvme_host_mem_buf_desc {
a385ba
+	__le64			addr;
a385ba
+	__le32			size;
a385ba
+	__u32			rsvd;
a385ba
+};
a385ba
+
a385ba
+struct nvme_create_cq {
a385ba
+	__u8			opcode;
a385ba
+	__u8			flags;
a385ba
+	__u16			command_id;
a385ba
+	__u32			rsvd1[5];
a385ba
+	__le64			prp1;
a385ba
+	__u64			rsvd8;
a385ba
+	__le16			cqid;
a385ba
+	__le16			qsize;
a385ba
+	__le16			cq_flags;
a385ba
+	__le16			irq_vector;
a385ba
+	__u32			rsvd12[4];
a385ba
+};
a385ba
+
a385ba
+struct nvme_create_sq {
a385ba
+	__u8			opcode;
a385ba
+	__u8			flags;
a385ba
+	__u16			command_id;
a385ba
+	__u32			rsvd1[5];
a385ba
+	__le64			prp1;
a385ba
+	__u64			rsvd8;
a385ba
+	__le16			sqid;
a385ba
+	__le16			qsize;
a385ba
+	__le16			sq_flags;
a385ba
+	__le16			cqid;
a385ba
+	__u32			rsvd12[4];
a385ba
+};
a385ba
+
a385ba
+struct nvme_delete_queue {
a385ba
+	__u8			opcode;
a385ba
+	__u8			flags;
a385ba
+	__u16			command_id;
a385ba
+	__u32			rsvd1[9];
a385ba
+	__le16			qid;
a385ba
+	__u16			rsvd10;
a385ba
+	__u32			rsvd11[5];
a385ba
+};
a385ba
+
a385ba
+struct nvme_abort_cmd {
a385ba
+	__u8			opcode;
a385ba
+	__u8			flags;
a385ba
+	__u16			command_id;
a385ba
+	__u32			rsvd1[9];
a385ba
+	__le16			sqid;
a385ba
+	__u16			cid;
a385ba
+	__u32			rsvd11[5];
a385ba
+};
a385ba
+
a385ba
+struct nvme_download_firmware {
a385ba
+	__u8			opcode;
a385ba
+	__u8			flags;
a385ba
+	__u16			command_id;
a385ba
+	__u32			rsvd1[5];
a385ba
+	union nvme_data_ptr	dptr;
a385ba
+	__le32			numd;
a385ba
+	__le32			offset;
a385ba
+	__u32			rsvd12[4];
a385ba
+};
a385ba
+
a385ba
+struct nvme_format_cmd {
a385ba
+	__u8			opcode;
a385ba
+	__u8			flags;
a385ba
+	__u16			command_id;
a385ba
+	__le32			nsid;
a385ba
+	__u64			rsvd2[4];
a385ba
+	__le32			cdw10;
a385ba
+	__u32			rsvd11[5];
a385ba
+};
a385ba
+
a385ba
+struct nvme_get_log_page_command {
a385ba
+	__u8			opcode;
a385ba
+	__u8			flags;
a385ba
+	__u16			command_id;
a385ba
+	__le32			nsid;
a385ba
+	__u64			rsvd2[2];
a385ba
+	union nvme_data_ptr	dptr;
a385ba
+	__u8			lid;
a385ba
+	__u8			lsp;
a385ba
+	__le16			numdl;
a385ba
+	__le16			numdu;
a385ba
+	__u16			rsvd11;
a385ba
+	__le32			lpol;
a385ba
+	__le32			lpou;
a385ba
+	__u32			rsvd14[2];
a385ba
+};
a385ba
+
a385ba
+struct nvme_directive_cmd {
a385ba
+	__u8			opcode;
a385ba
+	__u8			flags;
a385ba
+	__u16			command_id;
a385ba
+	__le32			nsid;
a385ba
+	__u64			rsvd2[2];
a385ba
+	union nvme_data_ptr	dptr;
a385ba
+	__le32			numd;
a385ba
+	__u8			doper;
a385ba
+	__u8			dtype;
a385ba
+	__le16			dspec;
a385ba
+	__u8			endir;
a385ba
+	__u8			tdtype;
a385ba
+	__u16			rsvd15;
a385ba
+
a385ba
+	__u32			rsvd16[3];
a385ba
+};
a385ba
+
a385ba
+/* Sanitize Log Page */
a385ba
+struct nvme_sanitize_log_page {
a385ba
+	__le16			progress;
a385ba
+	__le16			status;
a385ba
+	__le32			cdw10_info;
a385ba
+	__le32			est_ovrwrt_time;
a385ba
+	__le32			est_blk_erase_time;
a385ba
+	__le32			est_crypto_erase_time;
a385ba
+};
a385ba
+
a385ba
+/*
a385ba
+ * Fabrics subcommands.
a385ba
+ */
a385ba
+enum nvmf_fabrics_opcode {
a385ba
+	nvme_fabrics_command		= 0x7f,
a385ba
+};
a385ba
+
a385ba
+enum nvmf_capsule_command {
a385ba
+	nvme_fabrics_type_property_set	= 0x00,
a385ba
+	nvme_fabrics_type_connect	= 0x01,
a385ba
+	nvme_fabrics_type_property_get	= 0x04,
a385ba
+};
a385ba
+
a385ba
+struct nvmf_common_command {
a385ba
+	__u8	opcode;
a385ba
+	__u8	resv1;
a385ba
+	__u16	command_id;
a385ba
+	__u8	fctype;
a385ba
+	__u8	resv2[35];
a385ba
+	__u8	ts[24];
a385ba
+};
a385ba
+
a385ba
+/*
a385ba
+ * The legal cntlid range a NVMe Target will provide.
a385ba
+ * Note that cntlid of value 0 is considered illegal in the fabrics world.
a385ba
+ * Devices based on earlier specs did not have the subsystem concept;
a385ba
+ * therefore, those devices had their cntlid value set to 0 as a result.
a385ba
+ */
a385ba
+#define NVME_CNTLID_MIN		1
a385ba
+#define NVME_CNTLID_MAX		0xffef
a385ba
+#define NVME_CNTLID_DYNAMIC	0xffff
a385ba
+
a385ba
+#define MAX_DISC_LOGS	255
a385ba
+
a385ba
+/* Discovery log page entry */
a385ba
+struct nvmf_disc_rsp_page_entry {
a385ba
+	__u8		trtype;
a385ba
+	__u8		adrfam;
a385ba
+	__u8		subtype;
a385ba
+	__u8		treq;
a385ba
+	__le16		portid;
a385ba
+	__le16		cntlid;
a385ba
+	__le16		asqsz;
a385ba
+	__u8		resv8[22];
a385ba
+	char		trsvcid[NVMF_TRSVCID_SIZE];
a385ba
+	__u8		resv64[192];
a385ba
+	char		subnqn[NVMF_NQN_FIELD_LEN];
a385ba
+	char		traddr[NVMF_TRADDR_SIZE];
a385ba
+	union tsas {
a385ba
+		char		common[NVMF_TSAS_SIZE];
a385ba
+		struct rdma {
a385ba
+			__u8	qptype;
a385ba
+			__u8	prtype;
a385ba
+			__u8	cms;
a385ba
+			__u8	resv3[5];
a385ba
+			__u16	pkey;
a385ba
+			__u8	resv10[246];
a385ba
+		} rdma;
a385ba
+		struct tcp {
a385ba
+			__u8	sectype;
a385ba
+		} tcp;
a385ba
+	} tsas;
a385ba
+};
a385ba
+
a385ba
+/* Discovery log page header */
a385ba
+struct nvmf_disc_rsp_page_hdr {
a385ba
+	__le64		genctr;
a385ba
+	__le64		numrec;
a385ba
+	__le16		recfmt;
a385ba
+	__u8		resv14[1006];
a385ba
+	struct nvmf_disc_rsp_page_entry entries[0];
a385ba
+};
a385ba
+
a385ba
+struct nvmf_connect_command {
a385ba
+	__u8		opcode;
a385ba
+	__u8		resv1;
a385ba
+	__u16		command_id;
a385ba
+	__u8		fctype;
a385ba
+	__u8		resv2[19];
a385ba
+	union nvme_data_ptr dptr;
a385ba
+	__le16		recfmt;
a385ba
+	__le16		qid;
a385ba
+	__le16		sqsize;
a385ba
+	__u8		cattr;
a385ba
+	__u8		resv3;
a385ba
+	__le32		kato;
a385ba
+	__u8		resv4[12];
a385ba
+};
a385ba
+
a385ba
+struct nvmf_connect_data {
a385ba
+	uuid_t		hostid;
a385ba
+	__le16		cntlid;
a385ba
+	char		resv4[238];
a385ba
+	char		subsysnqn[NVMF_NQN_FIELD_LEN];
a385ba
+	char		hostnqn[NVMF_NQN_FIELD_LEN];
a385ba
+	char		resv5[256];
a385ba
+};
a385ba
+
a385ba
+struct nvmf_property_set_command {
a385ba
+	__u8		opcode;
a385ba
+	__u8		resv1;
a385ba
+	__u16		command_id;
a385ba
+	__u8		fctype;
a385ba
+	__u8		resv2[35];
a385ba
+	__u8		attrib;
a385ba
+	__u8		resv3[3];
a385ba
+	__le32		offset;
a385ba
+	__le64		value;
a385ba
+	__u8		resv4[8];
a385ba
+};
a385ba
+
a385ba
+struct nvmf_property_get_command {
a385ba
+	__u8		opcode;
a385ba
+	__u8		resv1;
a385ba
+	__u16		command_id;
a385ba
+	__u8		fctype;
a385ba
+	__u8		resv2[35];
a385ba
+	__u8		attrib;
a385ba
+	__u8		resv3[3];
a385ba
+	__le32		offset;
a385ba
+	__u8		resv4[16];
a385ba
+};
a385ba
+
a385ba
+struct nvme_dbbuf {
a385ba
+	__u8			opcode;
a385ba
+	__u8			flags;
a385ba
+	__u16			command_id;
a385ba
+	__u32			rsvd1[5];
a385ba
+	__le64			prp1;
a385ba
+	__le64			prp2;
a385ba
+	__u32			rsvd12[6];
a385ba
+};
a385ba
+
a385ba
+struct streams_directive_params {
a385ba
+	__le16	msl;
a385ba
+	__le16	nssa;
a385ba
+	__le16	nsso;
a385ba
+	__u8	rsvd[10];
a385ba
+	__le32	sws;
a385ba
+	__le16	sgs;
a385ba
+	__le16	nsa;
a385ba
+	__le16	nso;
a385ba
+	__u8	rsvd2[6];
a385ba
+};
a385ba
+
a385ba
+struct nvme_command {
a385ba
+	union {
a385ba
+		struct nvme_common_command common;
a385ba
+		struct nvme_rw_command rw;
a385ba
+		struct nvme_identify identify;
a385ba
+		struct nvme_features features;
a385ba
+		struct nvme_create_cq create_cq;
a385ba
+		struct nvme_create_sq create_sq;
a385ba
+		struct nvme_delete_queue delete_queue;
a385ba
+		struct nvme_download_firmware dlfw;
a385ba
+		struct nvme_format_cmd format;
a385ba
+		struct nvme_dsm_cmd dsm;
a385ba
+		struct nvme_write_zeroes_cmd write_zeroes;
a385ba
+		struct nvme_abort_cmd abort;
a385ba
+		struct nvme_get_log_page_command get_log_page;
a385ba
+		struct nvmf_common_command fabrics;
a385ba
+		struct nvmf_connect_command connect;
a385ba
+		struct nvmf_property_set_command prop_set;
a385ba
+		struct nvmf_property_get_command prop_get;
a385ba
+		struct nvme_dbbuf dbbuf;
a385ba
+		struct nvme_directive_cmd directive;
a385ba
+	};
a385ba
+};
a385ba
+
a385ba
+static inline bool nvme_is_write(struct nvme_command *cmd)
a385ba
+{
a385ba
+	/*
a385ba
+	 * What a mess...
a385ba
+	 *
a385ba
+	 * Why can't we simply have a Fabrics In and Fabrics out command?
a385ba
+	 */
a385ba
+	if (unlikely(cmd->common.opcode == nvme_fabrics_command))
a385ba
+		return cmd->fabrics.fctype & 1;
a385ba
+	return cmd->common.opcode & 1;
a385ba
+}
a385ba
+
a385ba
+enum {
a385ba
+	/*
a385ba
+	 * Generic Command Status:
a385ba
+	 */
a385ba
+	NVME_SC_SUCCESS			= 0x0,
a385ba
+	NVME_SC_INVALID_OPCODE		= 0x1,
a385ba
+	NVME_SC_INVALID_FIELD		= 0x2,
a385ba
+	NVME_SC_CMDID_CONFLICT		= 0x3,
a385ba
+	NVME_SC_DATA_XFER_ERROR		= 0x4,
a385ba
+	NVME_SC_POWER_LOSS		= 0x5,
a385ba
+	NVME_SC_INTERNAL		= 0x6,
a385ba
+	NVME_SC_ABORT_REQ		= 0x7,
a385ba
+	NVME_SC_ABORT_QUEUE		= 0x8,
a385ba
+	NVME_SC_FUSED_FAIL		= 0x9,
a385ba
+	NVME_SC_FUSED_MISSING		= 0xa,
a385ba
+	NVME_SC_INVALID_NS		= 0xb,
a385ba
+	NVME_SC_CMD_SEQ_ERROR		= 0xc,
a385ba
+	NVME_SC_SGL_INVALID_LAST	= 0xd,
a385ba
+	NVME_SC_SGL_INVALID_COUNT	= 0xe,
a385ba
+	NVME_SC_SGL_INVALID_DATA	= 0xf,
a385ba
+	NVME_SC_SGL_INVALID_METADATA	= 0x10,
a385ba
+	NVME_SC_SGL_INVALID_TYPE	= 0x11,
a385ba
+
a385ba
+	NVME_SC_SGL_INVALID_OFFSET	= 0x16,
a385ba
+	NVME_SC_SGL_INVALID_SUBTYPE	= 0x17,
a385ba
+
a385ba
+	NVME_SC_SANITIZE_FAILED		= 0x1C,
a385ba
+	NVME_SC_SANITIZE_IN_PROGRESS	= 0x1D,
a385ba
+
a385ba
+	NVME_SC_NS_WRITE_PROTECTED	= 0x20,
a385ba
+
a385ba
+	NVME_SC_LBA_RANGE		= 0x80,
a385ba
+	NVME_SC_CAP_EXCEEDED		= 0x81,
a385ba
+	NVME_SC_NS_NOT_READY		= 0x82,
a385ba
+	NVME_SC_RESERVATION_CONFLICT	= 0x83,
a385ba
+
a385ba
+	/*
a385ba
+	 * Command Specific Status:
a385ba
+	 */
a385ba
+	NVME_SC_CQ_INVALID		= 0x100,
a385ba
+	NVME_SC_QID_INVALID		= 0x101,
a385ba
+	NVME_SC_QUEUE_SIZE		= 0x102,
a385ba
+	NVME_SC_ABORT_LIMIT		= 0x103,
a385ba
+	NVME_SC_ABORT_MISSING		= 0x104,
a385ba
+	NVME_SC_ASYNC_LIMIT		= 0x105,
a385ba
+	NVME_SC_FIRMWARE_SLOT		= 0x106,
a385ba
+	NVME_SC_FIRMWARE_IMAGE		= 0x107,
a385ba
+	NVME_SC_INVALID_VECTOR		= 0x108,
a385ba
+	NVME_SC_INVALID_LOG_PAGE	= 0x109,
a385ba
+	NVME_SC_INVALID_FORMAT		= 0x10a,
a385ba
+	NVME_SC_FW_NEEDS_CONV_RESET	= 0x10b,
a385ba
+	NVME_SC_INVALID_QUEUE		= 0x10c,
a385ba
+	NVME_SC_FEATURE_NOT_SAVEABLE	= 0x10d,
a385ba
+	NVME_SC_FEATURE_NOT_CHANGEABLE	= 0x10e,
a385ba
+	NVME_SC_FEATURE_NOT_PER_NS	= 0x10f,
a385ba
+	NVME_SC_FW_NEEDS_SUBSYS_RESET	= 0x110,
a385ba
+	NVME_SC_FW_NEEDS_RESET		= 0x111,
a385ba
+	NVME_SC_FW_NEEDS_MAX_TIME	= 0x112,
a385ba
+	NVME_SC_FW_ACIVATE_PROHIBITED	= 0x113,
a385ba
+	NVME_SC_OVERLAPPING_RANGE	= 0x114,
a385ba
+	NVME_SC_NS_INSUFFICENT_CAP	= 0x115,
a385ba
+	NVME_SC_NS_ID_UNAVAILABLE	= 0x116,
a385ba
+	NVME_SC_NS_ALREADY_ATTACHED	= 0x118,
a385ba
+	NVME_SC_NS_IS_PRIVATE		= 0x119,
a385ba
+	NVME_SC_NS_NOT_ATTACHED		= 0x11a,
a385ba
+	NVME_SC_THIN_PROV_NOT_SUPP	= 0x11b,
a385ba
+	NVME_SC_CTRL_LIST_INVALID	= 0x11c,
a385ba
+	NVME_SC_BP_WRITE_PROHIBITED	= 0x11e,
a385ba
+
a385ba
+	/*
a385ba
+	 * I/O Command Set Specific - NVM commands:
a385ba
+	 */
a385ba
+	NVME_SC_BAD_ATTRIBUTES		= 0x180,
a385ba
+	NVME_SC_INVALID_PI		= 0x181,
a385ba
+	NVME_SC_READ_ONLY		= 0x182,
a385ba
+	NVME_SC_ONCS_NOT_SUPPORTED	= 0x183,
a385ba
+
a385ba
+	/*
a385ba
+	 * I/O Command Set Specific - Fabrics commands:
a385ba
+	 */
a385ba
+	NVME_SC_CONNECT_FORMAT		= 0x180,
a385ba
+	NVME_SC_CONNECT_CTRL_BUSY	= 0x181,
a385ba
+	NVME_SC_CONNECT_INVALID_PARAM	= 0x182,
a385ba
+	NVME_SC_CONNECT_RESTART_DISC	= 0x183,
a385ba
+	NVME_SC_CONNECT_INVALID_HOST	= 0x184,
a385ba
+
a385ba
+	NVME_SC_DISCOVERY_RESTART	= 0x190,
a385ba
+	NVME_SC_AUTH_REQUIRED		= 0x191,
a385ba
+
a385ba
+	/*
a385ba
+	 * Media and Data Integrity Errors:
a385ba
+	 */
a385ba
+	NVME_SC_WRITE_FAULT		= 0x280,
a385ba
+	NVME_SC_READ_ERROR		= 0x281,
a385ba
+	NVME_SC_GUARD_CHECK		= 0x282,
a385ba
+	NVME_SC_APPTAG_CHECK		= 0x283,
a385ba
+	NVME_SC_REFTAG_CHECK		= 0x284,
a385ba
+	NVME_SC_COMPARE_FAILED		= 0x285,
a385ba
+	NVME_SC_ACCESS_DENIED		= 0x286,
a385ba
+	NVME_SC_UNWRITTEN_BLOCK		= 0x287,
a385ba
+
a385ba
+	/*
a385ba
+	 * Path-related Errors:
a385ba
+	 */
a385ba
+	NVME_SC_ANA_PERSISTENT_LOSS	= 0x301,
a385ba
+	NVME_SC_ANA_INACCESSIBLE	= 0x302,
a385ba
+	NVME_SC_ANA_TRANSITION		= 0x303,
a385ba
+
a385ba
+	NVME_SC_DNR			= 0x4000,
a385ba
+};
a385ba
+
a385ba
+struct nvme_completion {
a385ba
+	/*
a385ba
+	 * Used by Admin and Fabrics commands to return data:
a385ba
+	 */
a385ba
+	union nvme_result {
a385ba
+		__le16	u16;
a385ba
+		__le32	u32;
a385ba
+		__le64	u64;
a385ba
+	} result;
a385ba
+	__le16	sq_head;	/* how much of this queue may be reclaimed */
a385ba
+	__le16	sq_id;		/* submission queue that generated this entry */
a385ba
+	__u16	command_id;	/* of the command which completed */
a385ba
+	__le16	status;		/* did the command fail, and if so, why? */
a385ba
+};
a385ba
+
a385ba
+#define NVME_VS(major, minor, tertiary) \
a385ba
+	(((major) << 16) | ((minor) << 8) | (tertiary))
a385ba
+
a385ba
+#define NVME_MAJOR(ver)		((ver) >> 16)
a385ba
+#define NVME_MINOR(ver)		(((ver) >> 8) & 0xff)
a385ba
+#define NVME_TERTIARY(ver)	((ver) & 0xff)
a385ba
+
a385ba
+#endif /* _LINUX_NVME_H */
a385ba
Index: multipath-tools-130222/libmultipath/nvme/linux/nvme_ioctl.h
a385ba
===================================================================
a385ba
--- /dev/null
a385ba
+++ multipath-tools-130222/libmultipath/nvme/linux/nvme_ioctl.h
a385ba
@@ -0,0 +1,67 @@
a385ba
+/*
a385ba
+ * Definitions for the NVM Express ioctl interface
a385ba
+ * Copyright (c) 2011-2014, Intel Corporation.
a385ba
+ *
a385ba
+ * This program is free software; you can redistribute it and/or modify it
a385ba
+ * under the terms and conditions of the GNU General Public License,
a385ba
+ * version 2, as published by the Free Software Foundation.
a385ba
+ *
a385ba
+ * This program is distributed in the hope it will be useful, but WITHOUT
a385ba
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
a385ba
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
a385ba
+ * more details.
a385ba
+ */
a385ba
+
a385ba
+#ifndef _UAPI_LINUX_NVME_IOCTL_H
a385ba
+#define _UAPI_LINUX_NVME_IOCTL_H
a385ba
+
a385ba
+#include <linux/types.h>
a385ba
+#include <sys/ioctl.h>
a385ba
+
a385ba
+struct nvme_user_io {
a385ba
+	__u8	opcode;
a385ba
+	__u8	flags;
a385ba
+	__u16	control;
a385ba
+	__u16	nblocks;
a385ba
+	__u16	rsvd;
a385ba
+	__u64	metadata;
a385ba
+	__u64	addr;
a385ba
+	__u64	slba;
a385ba
+	__u32	dsmgmt;
a385ba
+	__u32	reftag;
a385ba
+	__u16	apptag;
a385ba
+	__u16	appmask;
a385ba
+};
a385ba
+
a385ba
+struct nvme_passthru_cmd {
a385ba
+	__u8	opcode;
a385ba
+	__u8	flags;
a385ba
+	__u16	rsvd1;
a385ba
+	__u32	nsid;
a385ba
+	__u32	cdw2;
a385ba
+	__u32	cdw3;
a385ba
+	__u64	metadata;
a385ba
+	__u64	addr;
a385ba
+	__u32	metadata_len;
a385ba
+	__u32	data_len;
a385ba
+	__u32	cdw10;
a385ba
+	__u32	cdw11;
a385ba
+	__u32	cdw12;
a385ba
+	__u32	cdw13;
a385ba
+	__u32	cdw14;
a385ba
+	__u32	cdw15;
a385ba
+	__u32	timeout_ms;
a385ba
+	__u32	result;
a385ba
+};
a385ba
+
a385ba
+#define nvme_admin_cmd nvme_passthru_cmd
a385ba
+
a385ba
+#define NVME_IOCTL_ID		_IO('N', 0x40)
a385ba
+#define NVME_IOCTL_ADMIN_CMD	_IOWR('N', 0x41, struct nvme_admin_cmd)
a385ba
+#define NVME_IOCTL_SUBMIT_IO	_IOW('N', 0x42, struct nvme_user_io)
a385ba
+#define NVME_IOCTL_IO_CMD	_IOWR('N', 0x43, struct nvme_passthru_cmd)
a385ba
+#define NVME_IOCTL_RESET	_IO('N', 0x44)
a385ba
+#define NVME_IOCTL_SUBSYS_RESET	_IO('N', 0x45)
a385ba
+#define NVME_IOCTL_RESCAN	_IO('N', 0x46)
a385ba
+
a385ba
+#endif /* _UAPI_LINUX_NVME_IOCTL_H */
a385ba
Index: multipath-tools-130222/Makefile.inc
a385ba
===================================================================
a385ba
--- multipath-tools-130222.orig/Makefile.inc
a385ba
+++ multipath-tools-130222/Makefile.inc
a385ba
@@ -37,6 +37,7 @@ mpathpersistdir = $(TOPDIR)/libmpathpers
a385ba
 includedir  = $(prefix)/usr/include
a385ba
 mpathcmddir = $(TOPDIR)/libmpathcmd
a385ba
 libdmmpdir     = $(TOPDIR)/libdmmp
a385ba
+nvmedir     = $(TOPDIR)/libmultipath/nvme
a385ba
 pkgconfdir     = $(prefix)/usr/$(LIB)/pkgconfig
a385ba
 
a385ba
 GZIP            = /bin/gzip -9 -c
a385ba
Index: multipath-tools-130222/libmultipath/Makefile
a385ba
===================================================================
a385ba
--- multipath-tools-130222.orig/libmultipath/Makefile
a385ba
+++ multipath-tools-130222/libmultipath/Makefile
a385ba
@@ -8,7 +8,7 @@ SONAME=0
a385ba
 DEVLIB = libmultipath.so
a385ba
 LIBS = $(DEVLIB).$(SONAME)
a385ba
 LIBDEPS = -lpthread -ldl -ldevmapper -ludev -L$(mpathcmddir) -lmpathcmd -laio
a385ba
-CFLAGS += -fPIC -I$(mpathcmddir) -I$(mpathpersistdir)
a385ba
+CFLAGS += -fPIC -I$(mpathcmddir) -I$(mpathpersistdir) -I$(nvmedir)
a385ba
 
a385ba
 OBJS = memory.o parser.o vector.o devmapper.o \
a385ba
        hwtable.o blacklist.o util.o dmparser.o config.o \
a385ba
@@ -17,7 +17,7 @@ OBJS = memory.o parser.o vector.o devmap
a385ba
        switchgroup.o uxsock.o print.o alias.o log_pthread.o \
a385ba
        log.o configure.o structs_vec.o sysfs.o prio.o checkers.o \
a385ba
        lock.o waiter.o file.o wwids.o prioritizers/alua_rtpg.o prkey.o \
a385ba
-       io_err_stat.o
a385ba
+       io_err_stat.o nvme-lib.o
a385ba
 
a385ba
 LIBDM_API_FLUSH = $(shell grep -Ecs '^[a-z]*[[:space:]]+dm_task_no_flush' /usr/include/libdevmapper.h)
a385ba
 
a385ba
@@ -46,6 +46,9 @@ endif
a385ba
 
a385ba
 all: $(LIBS)
a385ba
 
a385ba
+nvme-lib.o: nvme-lib.c nvme-ioctl.c nvme-ioctl.h
a385ba
+	$(CC) $(CFLAGS) -Wno-unused-function -c -o $@ $<
a385ba
+
a385ba
 $(LIBS): $(OBJS)
a385ba
 	$(CC) $(LDFLAGS) $(SHARED_FLAGS) -Wl,-soname=$@ $(CFLAGS) -o $@ $(OBJS) $(LIBDEPS)
a385ba
 	ln -sf $@ $(DEVLIB)
a385ba
Index: multipath-tools-130222/libmultipath/nvme-ioctl.c
a385ba
===================================================================
a385ba
--- /dev/null
a385ba
+++ multipath-tools-130222/libmultipath/nvme-ioctl.c
a385ba
@@ -0,0 +1,869 @@
a385ba
+#include <sys/ioctl.h>
a385ba
+#include <sys/stat.h>
a385ba
+#include <string.h>
a385ba
+#include <errno.h>
a385ba
+#include <unistd.h>
a385ba
+
a385ba
+#include <errno.h>
a385ba
+#include <getopt.h>
a385ba
+#include <fcntl.h>
a385ba
+#include <inttypes.h>
a385ba
+#include <locale.h>
a385ba
+#include <stdio.h>
a385ba
+#include <stdlib.h>
a385ba
+#include <string.h>
a385ba
+#include <unistd.h>
a385ba
+#include <math.h>
a385ba
+
a385ba
+#include "nvme-ioctl.h"
a385ba
+
a385ba
+static int nvme_verify_chr(int fd)
a385ba
+{
a385ba
+	static struct stat nvme_stat;
a385ba
+	int err = fstat(fd, &nvme_stat);
a385ba
+
a385ba
+	if (err < 0) {
a385ba
+		perror("fstat");
a385ba
+		return errno;
a385ba
+	}
a385ba
+	if (!S_ISCHR(nvme_stat.st_mode)) {
a385ba
+		fprintf(stderr,
a385ba
+			"Error: requesting reset on non-controller handle\n");
a385ba
+		return ENOTBLK;
a385ba
+	}
a385ba
+	return 0;
a385ba
+}
a385ba
+
a385ba
+static int nvme_subsystem_reset(int fd)
a385ba
+{
a385ba
+	int ret;
a385ba
+
a385ba
+	ret = nvme_verify_chr(fd);
a385ba
+	if (ret)
a385ba
+		return ret;
a385ba
+	return ioctl(fd, NVME_IOCTL_SUBSYS_RESET);
a385ba
+}
a385ba
+
a385ba
+static int nvme_reset_controller(int fd)
a385ba
+{
a385ba
+	int ret;
a385ba
+
a385ba
+	ret = nvme_verify_chr(fd);
a385ba
+	if (ret)
a385ba
+		return ret;
a385ba
+	return ioctl(fd, NVME_IOCTL_RESET);
a385ba
+}
a385ba
+
a385ba
+static int nvme_ns_rescan(int fd)
a385ba
+{
a385ba
+	int ret;
a385ba
+
a385ba
+	ret = nvme_verify_chr(fd);
a385ba
+	if (ret)
a385ba
+		return ret;
a385ba
+	return ioctl(fd, NVME_IOCTL_RESCAN);
a385ba
+}
a385ba
+
a385ba
+static int nvme_get_nsid(int fd)
a385ba
+{
a385ba
+	static struct stat nvme_stat;
a385ba
+	int err = fstat(fd, &nvme_stat);
a385ba
+
a385ba
+	if (err < 0)
a385ba
+		return -errno;
a385ba
+
a385ba
+	if (!S_ISBLK(nvme_stat.st_mode)) {
a385ba
+		fprintf(stderr,
a385ba
+			"Error: requesting namespace-id from non-block device\n");
a385ba
+		errno = ENOTBLK;
a385ba
+		return -errno;
a385ba
+	}
a385ba
+	return ioctl(fd, NVME_IOCTL_ID);
a385ba
+}
a385ba
+
a385ba
+static int nvme_submit_passthru(int fd, unsigned long ioctl_cmd,
a385ba
+			 struct nvme_passthru_cmd *cmd)
a385ba
+{
a385ba
+	return ioctl(fd, ioctl_cmd, cmd);
a385ba
+}
a385ba
+
a385ba
+static int nvme_submit_admin_passthru(int fd, struct nvme_passthru_cmd *cmd)
a385ba
+{
a385ba
+	return ioctl(fd, NVME_IOCTL_ADMIN_CMD, cmd);
a385ba
+}
a385ba
+
a385ba
+static int nvme_submit_io_passthru(int fd, struct nvme_passthru_cmd *cmd)
a385ba
+{
a385ba
+	return ioctl(fd, NVME_IOCTL_IO_CMD, cmd);
a385ba
+}
a385ba
+
a385ba
+static int nvme_passthru(int fd, unsigned long ioctl_cmd, __u8 opcode,
a385ba
+		  __u8 flags, __u16 rsvd,
a385ba
+		  __u32 nsid, __u32 cdw2, __u32 cdw3, __u32 cdw10, __u32 cdw11,
a385ba
+		  __u32 cdw12, __u32 cdw13, __u32 cdw14, __u32 cdw15,
a385ba
+		  __u32 data_len, void *data, __u32 metadata_len,
a385ba
+		  void *metadata, __u32 timeout_ms, __u32 *result)
a385ba
+{
a385ba
+	struct nvme_passthru_cmd cmd = {
a385ba
+		.opcode		= opcode,
a385ba
+		.flags		= flags,
a385ba
+		.rsvd1		= rsvd,
a385ba
+		.nsid		= nsid,
a385ba
+		.cdw2		= cdw2,
a385ba
+		.cdw3		= cdw3,
a385ba
+		.metadata	= (__u64)(uintptr_t) metadata,
a385ba
+		.addr		= (__u64)(uintptr_t) data,
a385ba
+		.metadata_len	= metadata_len,
a385ba
+		.data_len	= data_len,
a385ba
+		.cdw10		= cdw10,
a385ba
+		.cdw11		= cdw11,
a385ba
+		.cdw12		= cdw12,
a385ba
+		.cdw13		= cdw13,
a385ba
+		.cdw14		= cdw14,
a385ba
+		.cdw15		= cdw15,
a385ba
+		.timeout_ms	= timeout_ms,
a385ba
+		.result		= 0,
a385ba
+	};
a385ba
+	int err;
a385ba
+
a385ba
+	err = nvme_submit_passthru(fd, ioctl_cmd, &cmd);
a385ba
+	if (!err && result)
a385ba
+		*result = cmd.result;
a385ba
+	return err;
a385ba
+}
a385ba
+
a385ba
+static int nvme_io(int fd, __u8 opcode, __u64 slba, __u16 nblocks, __u16 control,
a385ba
+	    __u32 dsmgmt, __u32 reftag, __u16 apptag, __u16 appmask, void *data,
a385ba
+	    void *metadata)
a385ba
+{
a385ba
+	struct nvme_user_io io = {
a385ba
+		.opcode		= opcode,
a385ba
+		.flags		= 0,
a385ba
+		.control	= control,
a385ba
+		.nblocks	= nblocks,
a385ba
+		.rsvd		= 0,
a385ba
+		.metadata	= (__u64)(uintptr_t) metadata,
a385ba
+		.addr		= (__u64)(uintptr_t) data,
a385ba
+		.slba		= slba,
a385ba
+		.dsmgmt		= dsmgmt,
a385ba
+		.reftag		= reftag,
a385ba
+		.appmask	= appmask,
a385ba
+		.apptag		= apptag,
a385ba
+	};
a385ba
+	return ioctl(fd, NVME_IOCTL_SUBMIT_IO, &io);
a385ba
+}
a385ba
+
a385ba
+static int nvme_read(int fd, __u64 slba, __u16 nblocks, __u16 control, __u32 dsmgmt,
a385ba
+	      __u32 reftag, __u16 apptag, __u16 appmask, void *data,
a385ba
+	      void *metadata)
a385ba
+{
a385ba
+	return nvme_io(fd, nvme_cmd_read, slba, nblocks, control, dsmgmt,
a385ba
+		       reftag, apptag, appmask, data, metadata);
a385ba
+}
a385ba
+
a385ba
+static int nvme_write(int fd, __u64 slba, __u16 nblocks, __u16 control, __u32 dsmgmt,
a385ba
+	       __u32 reftag, __u16 apptag, __u16 appmask, void *data,
a385ba
+	       void *metadata)
a385ba
+{
a385ba
+	return nvme_io(fd, nvme_cmd_write, slba, nblocks, control, dsmgmt,
a385ba
+		       reftag, apptag, appmask, data, metadata);
a385ba
+}
a385ba
+
a385ba
+static int nvme_compare(int fd, __u64 slba, __u16 nblocks, __u16 control, __u32 dsmgmt,
a385ba
+		 __u32 reftag, __u16 apptag, __u16 appmask, void *data,
a385ba
+		 void *metadata)
a385ba
+{
a385ba
+	return nvme_io(fd, nvme_cmd_compare, slba, nblocks, control, dsmgmt,
a385ba
+		       reftag, apptag, appmask, data, metadata);
a385ba
+}
a385ba
+
a385ba
+static int nvme_passthru_io(int fd, __u8 opcode, __u8 flags, __u16 rsvd,
a385ba
+		     __u32 nsid, __u32 cdw2, __u32 cdw3, __u32 cdw10,
a385ba
+		     __u32 cdw11, __u32 cdw12, __u32 cdw13, __u32 cdw14,
a385ba
+		     __u32 cdw15, __u32 data_len, void *data,
a385ba
+		     __u32 metadata_len, void *metadata, __u32 timeout_ms)
a385ba
+{
a385ba
+	return nvme_passthru(fd, NVME_IOCTL_IO_CMD, opcode, flags, rsvd, nsid,
a385ba
+			     cdw2, cdw3, cdw10, cdw11, cdw12, cdw13, cdw14,
a385ba
+			     cdw15, data_len, data, metadata_len, metadata,
a385ba
+			     timeout_ms, NULL);
a385ba
+}
a385ba
+
a385ba
+static int nvme_write_zeros(int fd, __u32 nsid, __u64 slba, __u16 nlb,
a385ba
+		     __u16 control, __u32 reftag, __u16 apptag, __u16 appmask)
a385ba
+{
a385ba
+	struct nvme_passthru_cmd cmd = {
a385ba
+		.opcode		= nvme_cmd_write_zeroes,
a385ba
+		.nsid		= nsid,
a385ba
+		.cdw10		= slba & 0xffffffff,
a385ba
+		.cdw11		= slba >> 32,
a385ba
+		.cdw12		= nlb | (control << 16),
a385ba
+		.cdw14		= reftag,
a385ba
+		.cdw15		= apptag | (appmask << 16),
a385ba
+	};
a385ba
+
a385ba
+	return nvme_submit_io_passthru(fd, &cmd);
a385ba
+}
a385ba
+
a385ba
+static int nvme_write_uncorrectable(int fd, __u32 nsid, __u64 slba, __u16 nlb)
a385ba
+{
a385ba
+	struct nvme_passthru_cmd cmd = {
a385ba
+		.opcode		= nvme_cmd_write_uncor,
a385ba
+		.nsid		= nsid,
a385ba
+		.cdw10		= slba & 0xffffffff,
a385ba
+		.cdw11		= slba >> 32,
a385ba
+		.cdw12		= nlb,
a385ba
+	};
a385ba
+
a385ba
+	return nvme_submit_io_passthru(fd, &cmd);
a385ba
+}
a385ba
+
a385ba
+static int nvme_flush(int fd, __u32 nsid)
a385ba
+{
a385ba
+	struct nvme_passthru_cmd cmd = {
a385ba
+		.opcode		= nvme_cmd_flush,
a385ba
+		.nsid		= nsid,
a385ba
+	};
a385ba
+
a385ba
+	return nvme_submit_io_passthru(fd, &cmd);
a385ba
+}
a385ba
+
a385ba
+static int nvme_dsm(int fd, __u32 nsid, __u32 cdw11, struct nvme_dsm_range *dsm,
a385ba
+	     __u16 nr_ranges)
a385ba
+{
a385ba
+	struct nvme_passthru_cmd cmd = {
a385ba
+		.opcode		= nvme_cmd_dsm,
a385ba
+		.nsid		= nsid,
a385ba
+		.addr		= (__u64)(uintptr_t) dsm,
a385ba
+		.data_len	= nr_ranges * sizeof(*dsm),
a385ba
+		.cdw10		= nr_ranges - 1,
a385ba
+		.cdw11		= cdw11,
a385ba
+	};
a385ba
+
a385ba
+	return nvme_submit_io_passthru(fd, &cmd);
a385ba
+}
a385ba
+
a385ba
+static struct nvme_dsm_range *nvme_setup_dsm_range(__u32 *ctx_attrs, __u32 *llbas,
a385ba
+					    __u64 *slbas, __u16 nr_ranges)
a385ba
+{
a385ba
+	int i;
a385ba
+	struct nvme_dsm_range *dsm = malloc(nr_ranges * sizeof(*dsm));
a385ba
+
a385ba
+	if (!dsm) {
a385ba
+		fprintf(stderr, "malloc: %s\n", strerror(errno));
a385ba
+		return NULL;
a385ba
+	}
a385ba
+	for (i = 0; i < nr_ranges; i++) {
a385ba
+		dsm[i].cattr = cpu_to_le32(ctx_attrs[i]);
a385ba
+		dsm[i].nlb = cpu_to_le32(llbas[i]);
a385ba
+		dsm[i].slba = cpu_to_le64(slbas[i]);
a385ba
+	}
a385ba
+	return dsm;
a385ba
+}
a385ba
+
a385ba
+static int nvme_resv_acquire(int fd, __u32 nsid, __u8 rtype, __u8 racqa,
a385ba
+		      bool iekey, __u64 crkey, __u64 nrkey)
a385ba
+{
a385ba
+	__le64 payload[2] = { cpu_to_le64(crkey), cpu_to_le64(nrkey) };
a385ba
+	__u32 cdw10 = (racqa & 0x7) | (iekey ? 1 << 3 : 0) | rtype << 8;
a385ba
+	struct nvme_passthru_cmd cmd = {
a385ba
+		.opcode		= nvme_cmd_resv_acquire,
a385ba
+		.nsid		= nsid,
a385ba
+		.cdw10		= cdw10,
a385ba
+		.addr		= (__u64)(uintptr_t) (payload),
a385ba
+		.data_len	= sizeof(payload),
a385ba
+	};
a385ba
+
a385ba
+	return nvme_submit_io_passthru(fd, &cmd);
a385ba
+}
a385ba
+
a385ba
+static int nvme_resv_register(int fd, __u32 nsid, __u8 rrega, __u8 cptpl,
a385ba
+		       bool iekey, __u64 crkey, __u64 nrkey)
a385ba
+{
a385ba
+	__le64 payload[2] = { cpu_to_le64(crkey), cpu_to_le64(nrkey) };
a385ba
+	__u32 cdw10 = (rrega & 0x7) | (iekey ? 1 << 3 : 0) | cptpl << 30;
a385ba
+
a385ba
+	struct nvme_passthru_cmd cmd = {
a385ba
+		.opcode		= nvme_cmd_resv_register,
a385ba
+		.nsid		= nsid,
a385ba
+		.cdw10		= cdw10,
a385ba
+		.addr		= (__u64)(uintptr_t) (payload),
a385ba
+		.data_len	= sizeof(payload),
a385ba
+	};
a385ba
+
a385ba
+	return nvme_submit_io_passthru(fd, &cmd);
a385ba
+}
a385ba
+
a385ba
+static int nvme_resv_release(int fd, __u32 nsid, __u8 rtype, __u8 rrela,
a385ba
+		      bool iekey, __u64 crkey)
a385ba
+{
a385ba
+	__le64 payload[1] = { cpu_to_le64(crkey) };
a385ba
+	__u32 cdw10 = (rrela & 0x7) | (iekey ? 1 << 3 : 0) | rtype << 8;
a385ba
+
a385ba
+	struct nvme_passthru_cmd cmd = {
a385ba
+		.opcode		= nvme_cmd_resv_release,
a385ba
+		.nsid		= nsid,
a385ba
+		.cdw10		= cdw10,
a385ba
+		.addr		= (__u64)(uintptr_t) (payload),
a385ba
+		.data_len	= sizeof(payload),
a385ba
+	};
a385ba
+
a385ba
+	return nvme_submit_io_passthru(fd, &cmd);
a385ba
+}
a385ba
+
a385ba
+static int nvme_resv_report(int fd, __u32 nsid, __u32 numd, __u32 cdw11, void *data)
a385ba
+{
a385ba
+	struct nvme_passthru_cmd cmd = {
a385ba
+		.opcode		= nvme_cmd_resv_report,
a385ba
+		.nsid		= nsid,
a385ba
+		.cdw10		= numd,
a385ba
+		.cdw11		= cdw11,
a385ba
+		.addr		= (__u64)(uintptr_t) data,
a385ba
+		.data_len	= (numd + 1) << 2,
a385ba
+	};
a385ba
+
a385ba
+	return nvme_submit_io_passthru(fd, &cmd);
a385ba
+}
a385ba
+
a385ba
+static int nvme_identify13(int fd, __u32 nsid, __u32 cdw10, __u32 cdw11, void *data)
a385ba
+{
a385ba
+	struct nvme_admin_cmd cmd = {
a385ba
+		.opcode		= nvme_admin_identify,
a385ba
+		.nsid		= nsid,
a385ba
+		.addr		= (__u64)(uintptr_t) data,
a385ba
+		.data_len	= NVME_IDENTIFY_DATA_SIZE,
a385ba
+		.cdw10		= cdw10,
a385ba
+		.cdw11		= cdw11,
a385ba
+	};
a385ba
+
a385ba
+	return nvme_submit_admin_passthru(fd, &cmd);
a385ba
+}
a385ba
+
a385ba
+static int nvme_identify(int fd, __u32 nsid, __u32 cdw10, void *data)
a385ba
+{
a385ba
+	return nvme_identify13(fd, nsid, cdw10, 0, data);
a385ba
+}
a385ba
+
a385ba
+static int nvme_identify_ctrl(int fd, void *data)
a385ba
+{
a385ba
+	return nvme_identify(fd, 0, 1, data);
a385ba
+}
a385ba
+
a385ba
+static int nvme_identify_ns(int fd, __u32 nsid, bool present, void *data)
a385ba
+{
a385ba
+	int cns = present ? NVME_ID_CNS_NS_PRESENT : NVME_ID_CNS_NS;
a385ba
+
a385ba
+	return nvme_identify(fd, nsid, cns, data);
a385ba
+}
a385ba
+
a385ba
+static int nvme_identify_ns_list(int fd, __u32 nsid, bool all, void *data)
a385ba
+{
a385ba
+	int cns = all ? NVME_ID_CNS_NS_PRESENT_LIST : NVME_ID_CNS_NS_ACTIVE_LIST;
a385ba
+
a385ba
+	return nvme_identify(fd, nsid, cns, data);
a385ba
+}
a385ba
+
a385ba
+static int nvme_identify_ctrl_list(int fd, __u32 nsid, __u16 cntid, void *data)
a385ba
+{
a385ba
+	int cns = nsid ? NVME_ID_CNS_CTRL_NS_LIST : NVME_ID_CNS_CTRL_LIST;
a385ba
+
a385ba
+	return nvme_identify(fd, nsid, (cntid << 16) | cns, data);
a385ba
+}
a385ba
+
a385ba
+static int nvme_identify_ns_descs(int fd, __u32 nsid, void *data)
a385ba
+{
a385ba
+
a385ba
+	return nvme_identify(fd, nsid, NVME_ID_CNS_NS_DESC_LIST, data);
a385ba
+}
a385ba
+
a385ba
+static int nvme_identify_nvmset(int fd, __u16 nvmset_id, void *data)
a385ba
+{
a385ba
+	return nvme_identify13(fd, 0, NVME_ID_CNS_NVMSET_LIST, nvmset_id, data);
a385ba
+}
a385ba
+
a385ba
+static int nvme_get_log13(int fd, __u32 nsid, __u8 log_id, __u8 lsp, __u64 lpo,
a385ba
+                 __u16 lsi, bool rae, __u32 data_len, void *data)
a385ba
+{
a385ba
+	struct nvme_admin_cmd cmd = {
a385ba
+		.opcode		= nvme_admin_get_log_page,
a385ba
+		.nsid		= nsid,
a385ba
+		.addr		= (__u64)(uintptr_t) data,
a385ba
+		.data_len	= data_len,
a385ba
+	};
a385ba
+	__u32 numd = (data_len >> 2) - 1;
a385ba
+	__u16 numdu = numd >> 16, numdl = numd & 0xffff;
a385ba
+
a385ba
+	cmd.cdw10 = log_id | (numdl << 16) | (rae ? 1 << 15 : 0);
a385ba
+	if (lsp)
a385ba
+                cmd.cdw10 |= lsp << 8;
a385ba
+
a385ba
+	cmd.cdw11 = numdu | (lsi << 16);
a385ba
+	cmd.cdw12 = lpo;
a385ba
+	cmd.cdw13 = (lpo >> 32);
a385ba
+
a385ba
+	return nvme_submit_admin_passthru(fd, &cmd);
a385ba
+
a385ba
+}
a385ba
+
a385ba
+static int nvme_get_log(int fd, __u32 nsid, __u8 log_id, bool rae,
a385ba
+		 __u32 data_len, void *data)
a385ba
+{
a385ba
+	void *ptr = data;
a385ba
+	__u32 offset = 0, xfer_len = data_len;
a385ba
+	int ret;
a385ba
+
a385ba
+	/*
a385ba
+	 * 4k is the smallest possible transfer unit, so by
a385ba
+	 * restricting ourselves for 4k transfers we avoid having
a385ba
+	 * to check the MDTS value of the controller.
a385ba
+	 */
a385ba
+	do {
a385ba
+		xfer_len = data_len - offset;
a385ba
+		if (xfer_len > 4096)
a385ba
+			xfer_len = 4096;
a385ba
+
a385ba
+		ret = nvme_get_log13(fd, nsid, log_id, NVME_NO_LOG_LSP,
a385ba
+				     offset, 0, rae, xfer_len, ptr);
a385ba
+		if (ret)
a385ba
+			return ret;
a385ba
+
a385ba
+		offset += xfer_len;
a385ba
+		ptr += xfer_len;
a385ba
+	} while (offset < data_len);
a385ba
+
a385ba
+	return 0;
a385ba
+}
a385ba
+
a385ba
+static int nvme_get_telemetry_log(int fd, void *lp, int generate_report,
a385ba
+			   int ctrl_init, size_t log_page_size, __u64 offset)
a385ba
+{
a385ba
+	if (ctrl_init)
a385ba
+		return nvme_get_log13(fd, NVME_NSID_ALL, NVME_LOG_TELEMETRY_CTRL,
a385ba
+				      NVME_NO_LOG_LSP, offset,
a385ba
+				      0, 1, log_page_size, lp);
a385ba
+	if (generate_report)
a385ba
+		return nvme_get_log13(fd, NVME_NSID_ALL, NVME_LOG_TELEMETRY_HOST,
a385ba
+				      NVME_TELEM_LSP_CREATE, offset,
a385ba
+				      0, 1, log_page_size, lp);
a385ba
+	else
a385ba
+		return nvme_get_log13(fd, NVME_NSID_ALL, NVME_LOG_TELEMETRY_HOST,
a385ba
+				      NVME_NO_LOG_LSP, offset,
a385ba
+				      0, 1, log_page_size, lp);
a385ba
+}
a385ba
+
a385ba
+static int nvme_fw_log(int fd, struct nvme_firmware_log_page *fw_log)
a385ba
+{
a385ba
+	return nvme_get_log(fd, NVME_NSID_ALL, NVME_LOG_FW_SLOT, true,
a385ba
+			sizeof(*fw_log), fw_log);
a385ba
+}
a385ba
+
a385ba
+static int nvme_changed_ns_list_log(int fd, struct nvme_changed_ns_list_log *changed_ns_list_log)
a385ba
+{
a385ba
+	return nvme_get_log(fd, 0, NVME_LOG_CHANGED_NS, true,
a385ba
+			sizeof(changed_ns_list_log->log),
a385ba
+			changed_ns_list_log->log);
a385ba
+}
a385ba
+
a385ba
+static int nvme_error_log(int fd, int entries, struct nvme_error_log_page *err_log)
a385ba
+{
a385ba
+	return nvme_get_log(fd, NVME_NSID_ALL, NVME_LOG_ERROR, false,
a385ba
+			entries * sizeof(*err_log), err_log);
a385ba
+}
a385ba
+
a385ba
+static int nvme_endurance_log(int fd, __u16 group_id, struct nvme_endurance_group_log *endurance_log)
a385ba
+{
a385ba
+	return nvme_get_log13(fd, 0, NVME_LOG_ENDURANCE_GROUP, 0, 0, group_id, 0,
a385ba
+			sizeof(*endurance_log), endurance_log);
a385ba
+}
a385ba
+
a385ba
+static int nvme_smart_log(int fd, __u32 nsid, struct nvme_smart_log *smart_log)
a385ba
+{
a385ba
+	return nvme_get_log(fd, nsid, NVME_LOG_SMART, false,
a385ba
+			sizeof(*smart_log), smart_log);
a385ba
+}
a385ba
+
a385ba
+static int nvme_ana_log(int fd, void *ana_log, size_t ana_log_len, int rgo)
a385ba
+{
a385ba
+	__u64 lpo = 0;
a385ba
+
a385ba
+	return nvme_get_log13(fd, NVME_NSID_ALL, NVME_LOG_ANA, rgo, lpo, 0,
a385ba
+			true, ana_log_len, ana_log);
a385ba
+}
a385ba
+
a385ba
+static int nvme_self_test_log(int fd, struct nvme_self_test_log *self_test_log)
a385ba
+{
a385ba
+	return nvme_get_log(fd, NVME_NSID_ALL, NVME_LOG_DEVICE_SELF_TEST, false,
a385ba
+		sizeof(*self_test_log), self_test_log);
a385ba
+}
a385ba
+
a385ba
+static int nvme_effects_log(int fd, struct nvme_effects_log_page *effects_log)
a385ba
+{
a385ba
+	return nvme_get_log(fd, 0, NVME_LOG_CMD_EFFECTS, false,
a385ba
+			sizeof(*effects_log), effects_log);
a385ba
+}
a385ba
+
a385ba
+static int nvme_discovery_log(int fd, struct nvmf_disc_rsp_page_hdr *log, __u32 size)
a385ba
+{
a385ba
+	return nvme_get_log(fd, 0, NVME_LOG_DISC, false, size, log);
a385ba
+}
a385ba
+
a385ba
+static int nvme_sanitize_log(int fd, struct nvme_sanitize_log_page *sanitize_log)
a385ba
+{
a385ba
+	return nvme_get_log(fd, 0, NVME_LOG_SANITIZE, false,
a385ba
+			sizeof(*sanitize_log), sanitize_log);
a385ba
+}
a385ba
+
a385ba
+static int nvme_feature(int fd, __u8 opcode, __u32 nsid, __u32 cdw10, __u32 cdw11,
a385ba
+		 __u32 cdw12, __u32 data_len, void *data, __u32 *result)
a385ba
+{
a385ba
+	struct nvme_admin_cmd cmd = {
a385ba
+		.opcode		= opcode,
a385ba
+		.nsid		= nsid,
a385ba
+		.cdw10		= cdw10,
a385ba
+		.cdw11		= cdw11,
a385ba
+		.cdw12		= cdw12,
a385ba
+		.addr		= (__u64)(uintptr_t) data,
a385ba
+		.data_len	= data_len,
a385ba
+	};
a385ba
+	int err;
a385ba
+
a385ba
+	err = nvme_submit_admin_passthru(fd, &cmd);
a385ba
+	if (!err && result)
a385ba
+		*result = cmd.result;
a385ba
+	return err;
a385ba
+}
a385ba
+
a385ba
+static int nvme_set_feature(int fd, __u32 nsid, __u8 fid, __u32 value, __u32 cdw12,
a385ba
+		     bool save, __u32 data_len, void *data, __u32 *result)
a385ba
+{
0fe00a
+	__u32 cdw10 = fid | (save ? 0x80000000 : 0);
a385ba
+
a385ba
+	return nvme_feature(fd, nvme_admin_set_features, nsid, cdw10, value,
a385ba
+			    cdw12, data_len, data, result);
a385ba
+}
a385ba
+
a385ba
+static int nvme_property(int fd, __u8 fctype, __le32 off, __le64 *value, __u8 attrib)
a385ba
+{
a385ba
+	int err;
a385ba
+	struct nvme_admin_cmd cmd = {
a385ba
+		.opcode		= nvme_fabrics_command,
a385ba
+		.cdw10		= attrib,
a385ba
+		.cdw11		= off,
a385ba
+	};
a385ba
+
a385ba
+	if (!value) {
a385ba
+		errno = EINVAL;
a385ba
+		return -errno;
a385ba
+	}
a385ba
+
a385ba
+	if (fctype == nvme_fabrics_type_property_get){
a385ba
+		cmd.nsid = nvme_fabrics_type_property_get;
a385ba
+	} else if(fctype == nvme_fabrics_type_property_set) {
a385ba
+		cmd.nsid = nvme_fabrics_type_property_set;
a385ba
+		cmd.cdw12 = *value;
a385ba
+	} else {
a385ba
+		errno = EINVAL;
a385ba
+		return -errno;
a385ba
+	}
a385ba
+
a385ba
+	err = nvme_submit_admin_passthru(fd, &cmd);
a385ba
+	if (!err && fctype == nvme_fabrics_type_property_get)
a385ba
+		*value = cpu_to_le64(cmd.result);
a385ba
+	return err;
a385ba
+}
a385ba
+
a385ba
+static int get_property_helper(int fd, int offset, void *value, int *advance)
a385ba
+{
a385ba
+	__le64 value64;
a385ba
+	int err = -EINVAL;
a385ba
+
a385ba
+	switch (offset) {
a385ba
+	case NVME_REG_CAP:
a385ba
+	case NVME_REG_ASQ:
a385ba
+	case NVME_REG_ACQ:
a385ba
+		*advance = 8;
a385ba
+		break;
a385ba
+	default:
a385ba
+		*advance = 4;
a385ba
+	}
a385ba
+
a385ba
+	if (!value)
a385ba
+		return err;
a385ba
+
a385ba
+	err = nvme_property(fd, nvme_fabrics_type_property_get,
a385ba
+			cpu_to_le32(offset), &value64, (*advance == 8));
a385ba
+
a385ba
+	if (!err) {
a385ba
+		if (*advance == 8)
a385ba
+			*((uint64_t *)value) = le64_to_cpu(value64);
a385ba
+		else
a385ba
+			*((uint32_t *)value) = le32_to_cpu(value64);
a385ba
+	}
a385ba
+
a385ba
+	return err;
a385ba
+}
a385ba
+
a385ba
+static int nvme_get_property(int fd, int offset, uint64_t *value)
a385ba
+{
a385ba
+	int advance;
a385ba
+	return get_property_helper(fd, offset, value, &advance);
a385ba
+}
a385ba
+
a385ba
+static int nvme_get_properties(int fd, void **pbar)
a385ba
+{
a385ba
+	int offset, advance;
a385ba
+	int err, ret = -EINVAL;
a385ba
+	int size = getpagesize();
a385ba
+
a385ba
+	*pbar = malloc(size);
a385ba
+	if (!*pbar) {
a385ba
+		fprintf(stderr, "malloc: %s\n", strerror(errno));
a385ba
+		return -ENOMEM;
a385ba
+	}
a385ba
+
a385ba
+	memset(*pbar, 0xff, size);
a385ba
+	for (offset = NVME_REG_CAP; offset <= NVME_REG_CMBSZ; offset += advance) {
a385ba
+		err = get_property_helper(fd, offset, *pbar + offset, &advance);
a385ba
+		if (!err)
a385ba
+			ret = 0;
a385ba
+	}
a385ba
+
a385ba
+	return ret;
a385ba
+}
a385ba
+
a385ba
+static int nvme_set_property(int fd, int offset, int value)
a385ba
+{
a385ba
+	__le64 val = cpu_to_le64(value);
a385ba
+	__le32 off = cpu_to_le32(offset);
a385ba
+	bool is64bit;
a385ba
+
a385ba
+	switch (off) {
a385ba
+	case NVME_REG_CAP:
a385ba
+	case NVME_REG_ASQ:
a385ba
+	case NVME_REG_ACQ:
a385ba
+		is64bit = true;
a385ba
+		break;
a385ba
+	default:
a385ba
+		is64bit = false;
a385ba
+	}
a385ba
+
a385ba
+	return nvme_property(fd, nvme_fabrics_type_property_set,
a385ba
+			off, &val, is64bit ? 1: 0);
a385ba
+}
a385ba
+
a385ba
+static int nvme_get_feature(int fd, __u32 nsid, __u8 fid, __u8 sel, __u32 cdw11,
a385ba
+		     __u32 data_len, void *data, __u32 *result)
a385ba
+{
a385ba
+	__u32 cdw10 = fid | sel << 8;
a385ba
+
a385ba
+	return nvme_feature(fd, nvme_admin_get_features, nsid, cdw10, cdw11,
a385ba
+			    0, data_len, data, result);
a385ba
+}
a385ba
+
a385ba
+static int nvme_format(int fd, __u32 nsid, __u8 lbaf, __u8 ses, __u8 pi,
a385ba
+		__u8 pil, __u8 ms, __u32 timeout)
a385ba
+{
a385ba
+	__u32 cdw10 = lbaf | ms << 4 | pi << 5 | pil << 8 | ses << 9;
a385ba
+	struct nvme_admin_cmd cmd = {
a385ba
+		.opcode		= nvme_admin_format_nvm,
a385ba
+		.nsid		= nsid,
a385ba
+		.cdw10		= cdw10,
a385ba
+		.timeout_ms	= timeout,
a385ba
+	};
a385ba
+
a385ba
+	return nvme_submit_admin_passthru(fd, &cmd);
a385ba
+}
a385ba
+
a385ba
+static int nvme_ns_create(int fd, __u64 nsze, __u64 ncap, __u8 flbas,
a385ba
+		   __u8 dps, __u8 nmic, __u32 *result)
a385ba
+{
a385ba
+	struct nvme_id_ns ns = {
a385ba
+		.nsze		= cpu_to_le64(nsze),
a385ba
+		.ncap		= cpu_to_le64(ncap),
a385ba
+		.flbas		= flbas,
a385ba
+		.dps		= dps,
a385ba
+		.nmic		= nmic,
a385ba
+	};
a385ba
+	struct nvme_admin_cmd cmd = {
a385ba
+		.opcode		= nvme_admin_ns_mgmt,
a385ba
+		.addr		= (__u64)(uintptr_t) ((void *)&ns),
a385ba
+		.cdw10		= 0,
a385ba
+		.data_len	= 0x1000,
a385ba
+	};
a385ba
+	int err;
a385ba
+
a385ba
+	err = nvme_submit_admin_passthru(fd, &cmd);
a385ba
+	if (!err && result)
a385ba
+		*result = cmd.result;
a385ba
+	return err;
a385ba
+}
a385ba
+
a385ba
+static int nvme_ns_delete(int fd, __u32 nsid)
a385ba
+{
a385ba
+	struct nvme_admin_cmd cmd = {
a385ba
+		.opcode		= nvme_admin_ns_mgmt,
a385ba
+		.nsid		= nsid,
a385ba
+		.cdw10		= 1,
a385ba
+	};
a385ba
+
a385ba
+	return nvme_submit_admin_passthru(fd, &cmd);
a385ba
+}
a385ba
+
a385ba
+static int nvme_ns_attachment(int fd, __u32 nsid, __u16 num_ctrls, __u16 *ctrlist,
a385ba
+		       bool attach)
a385ba
+{
a385ba
+	int i;
a385ba
+	__u8 buf[0x1000];
a385ba
+	struct nvme_controller_list *cntlist =
a385ba
+					(struct nvme_controller_list *)buf;
a385ba
+	struct nvme_admin_cmd cmd = {
a385ba
+		.opcode		= nvme_admin_ns_attach,
a385ba
+		.nsid		= nsid,
a385ba
+		.addr		= (__u64)(uintptr_t) cntlist,
a385ba
+		.cdw10		= attach ? 0 : 1,
a385ba
+		.data_len	= 0x1000,
a385ba
+	};
a385ba
+
a385ba
+	memset(buf, 0, sizeof(buf));
a385ba
+	cntlist->num = cpu_to_le16(num_ctrls);
a385ba
+	for (i = 0; i < num_ctrls; i++)
a385ba
+		cntlist->identifier[i] = cpu_to_le16(ctrlist[i]);
a385ba
+
a385ba
+	return nvme_submit_admin_passthru(fd, &cmd);
a385ba
+}
a385ba
+
a385ba
+static int nvme_ns_attach_ctrls(int fd, __u32 nsid, __u16 num_ctrls, __u16 *ctrlist)
a385ba
+{
a385ba
+	return nvme_ns_attachment(fd, nsid, num_ctrls, ctrlist, true);
a385ba
+}
a385ba
+
a385ba
+static int nvme_ns_detach_ctrls(int fd, __u32 nsid, __u16 num_ctrls, __u16 *ctrlist)
a385ba
+{
a385ba
+	return nvme_ns_attachment(fd, nsid, num_ctrls, ctrlist, false);
a385ba
+}
a385ba
+
a385ba
+static int nvme_fw_download(int fd, __u32 offset, __u32 data_len, void *data)
a385ba
+{
a385ba
+	struct nvme_admin_cmd cmd = {
a385ba
+		.opcode		= nvme_admin_download_fw,
a385ba
+		.addr		= (__u64)(uintptr_t) data,
a385ba
+		.data_len	= data_len,
a385ba
+		.cdw10		= (data_len >> 2) - 1,
a385ba
+		.cdw11		= offset >> 2,
a385ba
+	};
a385ba
+
a385ba
+	return nvme_submit_admin_passthru(fd, &cmd);
a385ba
+}
a385ba
+
a385ba
+static int nvme_fw_commit(int fd, __u8 slot, __u8 action, __u8 bpid)
a385ba
+{
a385ba
+	struct nvme_admin_cmd cmd = {
a385ba
+		.opcode		= nvme_admin_activate_fw,
a385ba
+		.cdw10		= (bpid << 31) | (action << 3) | slot,
a385ba
+	};
a385ba
+
a385ba
+	return nvme_submit_admin_passthru(fd, &cmd);
a385ba
+}
a385ba
+
a385ba
+static int nvme_sec_send(int fd, __u32 nsid, __u8 nssf, __u16 spsp,
a385ba
+		  __u8 secp, __u32 tl, __u32 data_len, void *data, __u32 *result)
a385ba
+{
a385ba
+	struct nvme_admin_cmd cmd = {
a385ba
+		.opcode		= nvme_admin_security_send,
a385ba
+		.addr		= (__u64)(uintptr_t) data,
a385ba
+		.data_len	= data_len,
a385ba
+		.nsid		= nsid,
a385ba
+		.cdw10		= secp << 24 | spsp << 8 | nssf,
a385ba
+		.cdw11		= tl,
a385ba
+	};
a385ba
+	int err;
a385ba
+
a385ba
+	err = nvme_submit_admin_passthru(fd, &cmd);
a385ba
+	if (!err && result)
a385ba
+		*result = cmd.result;
a385ba
+	return err;
a385ba
+}
a385ba
+
a385ba
+static int nvme_sec_recv(int fd, __u32 nsid, __u8 nssf, __u16 spsp,
a385ba
+		  __u8 secp, __u32 al, __u32 data_len, void *data, __u32 *result)
a385ba
+{
a385ba
+	struct nvme_admin_cmd cmd = {
a385ba
+		.opcode		= nvme_admin_security_recv,
a385ba
+		.nsid		= nsid,
a385ba
+		.cdw10		= secp << 24 | spsp << 8 | nssf,
a385ba
+		.cdw11		= al,
a385ba
+		.addr		= (__u64)(uintptr_t) data,
a385ba
+		.data_len	= data_len,
a385ba
+	};
a385ba
+	int err;
a385ba
+
a385ba
+	err = nvme_submit_admin_passthru(fd, &cmd);
a385ba
+	if (!err && result)
a385ba
+		*result = cmd.result;
a385ba
+	return err;
a385ba
+}
a385ba
+
a385ba
+static int nvme_dir_send(int fd, __u32 nsid, __u16 dspec, __u8 dtype, __u8 doper,
a385ba
+                  __u32 data_len, __u32 dw12, void *data, __u32 *result)
a385ba
+{
a385ba
+        struct nvme_admin_cmd cmd = {
a385ba
+                .opcode         = nvme_admin_directive_send,
a385ba
+                .addr           = (__u64)(uintptr_t) data,
a385ba
+                .data_len       = data_len,
a385ba
+                .nsid           = nsid,
a385ba
+                .cdw10          = data_len? (data_len >> 2) - 1 : 0,
a385ba
+                .cdw11          = dspec << 16 | dtype << 8 | doper,
a385ba
+                .cdw12          = dw12,
a385ba
+        };
a385ba
+        int err;
a385ba
+
a385ba
+        err = nvme_submit_admin_passthru(fd, &cmd);
a385ba
+        if (!err && result)
a385ba
+                *result = cmd.result;
a385ba
+        return err;
a385ba
+}
a385ba
+
a385ba
+static int nvme_dir_recv(int fd, __u32 nsid, __u16 dspec, __u8 dtype, __u8 doper,
a385ba
+                  __u32 data_len, __u32 dw12, void *data, __u32 *result)
a385ba
+{
a385ba
+        struct nvme_admin_cmd cmd = {
a385ba
+                .opcode         = nvme_admin_directive_recv,
a385ba
+                .addr           = (__u64)(uintptr_t) data,
a385ba
+                .data_len       = data_len,
a385ba
+                .nsid           = nsid,
a385ba
+                .cdw10          = data_len? (data_len >> 2) - 1 : 0,
a385ba
+                .cdw11          = dspec << 16 | dtype << 8 | doper,
a385ba
+                .cdw12          = dw12,
a385ba
+        };
a385ba
+        int err;
a385ba
+
a385ba
+        err = nvme_submit_admin_passthru(fd, &cmd);
a385ba
+        if (!err && result)
a385ba
+                *result = cmd.result;
a385ba
+        return err;
a385ba
+}
a385ba
+
a385ba
+static int nvme_sanitize(int fd, __u8 sanact, __u8 ause, __u8 owpass, __u8 oipbp,
a385ba
+		  __u8 no_dealloc, __u32 ovrpat)
a385ba
+{
a385ba
+	struct nvme_admin_cmd cmd = {
a385ba
+		.opcode		= nvme_admin_sanitize_nvm,
a385ba
+		.cdw10		= no_dealloc << 9 | oipbp << 8 |
a385ba
+				  owpass << NVME_SANITIZE_OWPASS_SHIFT |
a385ba
+				  ause << 3 | sanact,
a385ba
+		.cdw11		= ovrpat,
a385ba
+	};
a385ba
+
a385ba
+	return nvme_submit_admin_passthru(fd, &cmd);
a385ba
+}
a385ba
+
a385ba
+static int nvme_self_test_start(int fd, __u32 nsid, __u32 cdw10)
a385ba
+{
a385ba
+	struct nvme_admin_cmd cmd = {
a385ba
+		.opcode = nvme_admin_dev_self_test,
a385ba
+		.nsid = nsid,
a385ba
+		.cdw10 = cdw10,
a385ba
+	};
a385ba
+
a385ba
+	return nvme_submit_admin_passthru(fd, &cmd);
a385ba
+}
a385ba
Index: multipath-tools-130222/libmultipath/nvme-ioctl.h
a385ba
===================================================================
a385ba
--- /dev/null
a385ba
+++ multipath-tools-130222/libmultipath/nvme-ioctl.h
a385ba
@@ -0,0 +1,139 @@
a385ba
+#ifndef _NVME_LIB_H
a385ba
+#define _NVME_LIB_H
a385ba
+
a385ba
+#include <linux/types.h>
a385ba
+#include <stdbool.h>
a385ba
+#include "linux/nvme_ioctl.h"
a385ba
+#include "nvme.h"
a385ba
+
a385ba
+static int nvme_get_nsid(int fd);
a385ba
+
a385ba
+/* Generic passthrough */
a385ba
+static int nvme_submit_passthru(int fd, unsigned long ioctl_cmd,
a385ba
+			 struct nvme_passthru_cmd *cmd);
a385ba
+
a385ba
+static int nvme_passthru(int fd, unsigned long ioctl_cmd, __u8 opcode, __u8 flags,
a385ba
+		  __u16 rsvd, __u32 nsid, __u32 cdw2, __u32 cdw3,
a385ba
+		  __u32 cdw10, __u32 cdw11, __u32 cdw12,
a385ba
+		  __u32 cdw13, __u32 cdw14, __u32 cdw15,
a385ba
+		  __u32 data_len, void *data, __u32 metadata_len,
a385ba
+		  void *metadata, __u32 timeout_ms, __u32 *result);
a385ba
+
a385ba
+/* NVME_SUBMIT_IO */
a385ba
+static int nvme_io(int fd, __u8 opcode, __u64 slba, __u16 nblocks, __u16 control,
a385ba
+	      __u32 dsmgmt, __u32 reftag, __u16 apptag,
a385ba
+	      __u16 appmask, void *data, void *metadata);
a385ba
+
a385ba
+static int nvme_read(int fd, __u64 slba, __u16 nblocks, __u16 control,
a385ba
+	      __u32 dsmgmt, __u32 reftag, __u16 apptag,
a385ba
+	      __u16 appmask, void *data, void *metadata);
a385ba
+
a385ba
+static int nvme_write(int fd, __u64 slba, __u16 nblocks, __u16 control,
a385ba
+	       __u32 dsmgmt, __u32 reftag, __u16 apptag,
a385ba
+	       __u16 appmask, void *data, void *metadata);
a385ba
+
a385ba
+static int nvme_compare(int fd, __u64 slba, __u16 nblocks, __u16 control,
a385ba
+		 __u32 dsmgmt, __u32 reftag, __u16 apptag,
a385ba
+		 __u16 appmask, void *data, void *metadata);
a385ba
+
a385ba
+/* NVME_IO_CMD */
a385ba
+static int nvme_passthru_io(int fd, __u8 opcode, __u8 flags, __u16 rsvd,
a385ba
+		     __u32 nsid, __u32 cdw2, __u32 cdw3,
a385ba
+		     __u32 cdw10, __u32 cdw11, __u32 cdw12,
a385ba
+		     __u32 cdw13, __u32 cdw14, __u32 cdw15,
a385ba
+		     __u32 data_len, void *data, __u32 metadata_len,
a385ba
+		     void *metadata, __u32 timeout);
a385ba
+
a385ba
+static int nvme_write_zeros(int fd, __u32 nsid, __u64 slba, __u16 nlb,
a385ba
+		     __u16 control, __u32 reftag, __u16 apptag, __u16 appmask);
a385ba
+
a385ba
+static int nvme_write_uncorrectable(int fd, __u32 nsid, __u64 slba, __u16 nlb);
a385ba
+
a385ba
+static int nvme_flush(int fd, __u32 nsid);
a385ba
+
a385ba
+static int nvme_dsm(int fd, __u32 nsid, __u32 cdw11, struct nvme_dsm_range *dsm,
a385ba
+	     __u16 nr_ranges);
a385ba
+static struct nvme_dsm_range *nvme_setup_dsm_range(__u32 *ctx_attrs,
a385ba
+					    __u32 *llbas, __u64 *slbas,
a385ba
+					    __u16 nr_ranges);
a385ba
+
a385ba
+static int nvme_resv_acquire(int fd, __u32 nsid, __u8 rtype, __u8 racqa,
a385ba
+		      bool iekey, __u64 crkey, __u64 nrkey);
a385ba
+static int nvme_resv_register(int fd, __u32 nsid, __u8 rrega, __u8 cptpl,
a385ba
+		       bool iekey, __u64 crkey, __u64 nrkey);
a385ba
+static int nvme_resv_release(int fd, __u32 nsid, __u8 rtype, __u8 rrela,
a385ba
+		      bool iekey, __u64 crkey);
a385ba
+static int nvme_resv_report(int fd, __u32 nsid, __u32 numd, __u32 cdw11, void *data);
a385ba
+
a385ba
+static int nvme_identify13(int fd, __u32 nsid, __u32 cdw10, __u32 cdw11, void *data);
a385ba
+static int nvme_identify(int fd, __u32 nsid, __u32 cdw10, void *data);
a385ba
+static int nvme_identify_ctrl(int fd, void *data);
a385ba
+static int nvme_identify_ns(int fd, __u32 nsid, bool present, void *data);
a385ba
+static int nvme_identify_ns_list(int fd, __u32 nsid, bool all, void *data);
a385ba
+static int nvme_identify_ctrl_list(int fd, __u32 nsid, __u16 cntid, void *data);
a385ba
+static int nvme_identify_ns_descs(int fd, __u32 nsid, void *data);
a385ba
+static int nvme_identify_nvmset(int fd, __u16 nvmset_id, void *data);
a385ba
+static int nvme_get_log13(int fd, __u32 nsid, __u8 log_id, __u8 lsp, __u64 lpo,
a385ba
+		   __u16 group_id, bool rae, __u32 data_len, void *data);
a385ba
+static int nvme_get_log(int fd, __u32 nsid, __u8 log_id, bool rae,
a385ba
+		 __u32 data_len, void *data);
a385ba
+
a385ba
+
a385ba
+static int nvme_get_telemetry_log(int fd, void *lp, int generate_report,
a385ba
+			   int ctrl_gen, size_t log_page_size, __u64 offset);
a385ba
+static int nvme_fw_log(int fd, struct nvme_firmware_log_page *fw_log);
a385ba
+static int nvme_changed_ns_list_log(int fd,
a385ba
+		struct nvme_changed_ns_list_log *changed_ns_list_log);
a385ba
+static int nvme_error_log(int fd, int entries, struct nvme_error_log_page *err_log);
a385ba
+static int nvme_smart_log(int fd, __u32 nsid, struct nvme_smart_log *smart_log);
a385ba
+static int nvme_ana_log(int fd, void *ana_log, size_t ana_log_len, int rgo);
a385ba
+static int nvme_effects_log(int fd, struct nvme_effects_log_page *effects_log);
a385ba
+static int nvme_discovery_log(int fd, struct nvmf_disc_rsp_page_hdr *log, __u32 size);
a385ba
+static int nvme_sanitize_log(int fd, struct nvme_sanitize_log_page *sanitize_log);
a385ba
+static int nvme_endurance_log(int fd, __u16 group_id,
a385ba
+		       struct nvme_endurance_group_log *endurance_log);
a385ba
+
a385ba
+static int nvme_feature(int fd, __u8 opcode, __u32 nsid, __u32 cdw10,
a385ba
+		 __u32 cdw11, __u32 cdw12, __u32 data_len, void *data,
a385ba
+		 __u32 *result);
a385ba
+static int nvme_set_feature(int fd, __u32 nsid, __u8 fid, __u32 value, __u32 cdw12,
a385ba
+		     bool save, __u32 data_len, void *data, __u32 *result);
a385ba
+static int nvme_get_feature(int fd, __u32 nsid, __u8 fid, __u8 sel,
a385ba
+		     __u32 cdw11, __u32 data_len, void *data, __u32 *result);
a385ba
+
a385ba
+static int nvme_format(int fd, __u32 nsid, __u8 lbaf, __u8 ses, __u8 pi,
a385ba
+		__u8 pil, __u8 ms, __u32 timeout);
a385ba
+
a385ba
+static int nvme_ns_create(int fd, __u64 nsze, __u64 ncap, __u8 flbas,
a385ba
+		   __u8 dps, __u8 nmic, __u32 *result);
a385ba
+static int nvme_ns_delete(int fd, __u32 nsid);
a385ba
+
a385ba
+static int nvme_ns_attachment(int fd, __u32 nsid, __u16 num_ctrls,
a385ba
+		       __u16 *ctrlist, bool attach);
a385ba
+static int nvme_ns_attach_ctrls(int fd, __u32 nsid, __u16 num_ctrls, __u16 *ctrlist);
a385ba
+static int nvme_ns_detach_ctrls(int fd, __u32 nsid, __u16 num_ctrls, __u16 *ctrlist);
a385ba
+
a385ba
+static int nvme_fw_download(int fd, __u32 offset, __u32 data_len, void *data);
a385ba
+static int nvme_fw_commit(int fd, __u8 slot, __u8 action, __u8 bpid);
a385ba
+
a385ba
+static int nvme_sec_send(int fd, __u32 nsid, __u8 nssf, __u16 spsp,
a385ba
+		  __u8 secp, __u32 tl, __u32 data_len, void *data, __u32 *result);
a385ba
+static int nvme_sec_recv(int fd, __u32 nsid, __u8 nssf, __u16 spsp,
a385ba
+		  __u8 secp, __u32 al, __u32 data_len, void *data, __u32 *result);
a385ba
+
a385ba
+static int nvme_subsystem_reset(int fd);
a385ba
+static int nvme_reset_controller(int fd);
a385ba
+static int nvme_ns_rescan(int fd);
a385ba
+
a385ba
+static int nvme_dir_send(int fd, __u32 nsid, __u16 dspec, __u8 dtype, __u8 doper,
a385ba
+		  __u32 data_len, __u32 dw12, void *data, __u32 *result);
a385ba
+static int nvme_dir_recv(int fd, __u32 nsid, __u16 dspec, __u8 dtype, __u8 doper,
a385ba
+		  __u32 data_len, __u32 dw12, void *data, __u32 *result);
a385ba
+static int nvme_get_properties(int fd, void **pbar);
a385ba
+static int nvme_set_property(int fd, int offset, int value);
a385ba
+static int nvme_get_property(int fd, int offset, uint64_t *value);
a385ba
+static int nvme_sanitize(int fd, __u8 sanact, __u8 ause, __u8 owpass, __u8 oipbp,
a385ba
+		  __u8 no_dealloc, __u32 ovrpat);
a385ba
+static int nvme_self_test_start(int fd, __u32 nsid, __u32 cdw10);
a385ba
+static int nvme_self_test_log(int fd, struct nvme_self_test_log *self_test_log);
a385ba
+#endif				/* _NVME_LIB_H */
a385ba
Index: multipath-tools-130222/libmultipath/nvme-lib.c
a385ba
===================================================================
a385ba
--- /dev/null
a385ba
+++ multipath-tools-130222/libmultipath/nvme-lib.c
a385ba
@@ -0,0 +1,49 @@
a385ba
+#include <sys/types.h>
a385ba
+/* avoid inclusion of standard API */
a385ba
+#define _NVME_LIB_C 1
a385ba
+#include "nvme-lib.h"
a385ba
+#include "nvme-ioctl.c"
a385ba
+#include "debug.h"
a385ba
+
a385ba
+int log_nvme_errcode(int err, const char *dev, const char *msg)
a385ba
+{
a385ba
+	if (err > 0)
a385ba
+		condlog(3, "%s: %s: NVMe status %d", dev, msg, err);
a385ba
+	else if (err < 0)
a385ba
+		condlog(3, "%s: %s: %s", dev, msg, strerror(errno));
a385ba
+	return err;
a385ba
+}
a385ba
+
a385ba
+int libmp_nvme_get_nsid(int fd)
a385ba
+{
a385ba
+	return nvme_get_nsid(fd);
a385ba
+}
a385ba
+
a385ba
+int libmp_nvme_identify_ctrl(int fd, struct nvme_id_ctrl *ctrl)
a385ba
+{
a385ba
+	return nvme_identify_ctrl(fd, ctrl);
a385ba
+}
a385ba
+
a385ba
+int libmp_nvme_identify_ns(int fd, __u32 nsid, bool present,
a385ba
+			   struct nvme_id_ns *ns)
a385ba
+{
a385ba
+	return nvme_identify_ns(fd, nsid, present, ns);
a385ba
+}
a385ba
+
a385ba
+int libmp_nvme_ana_log(int fd, void *ana_log, size_t ana_log_len, int rgo)
a385ba
+{
a385ba
+	return nvme_ana_log(fd, ana_log, ana_log_len, rgo);
a385ba
+}
a385ba
+
a385ba
+int nvme_id_ctrl_ana(int fd, struct nvme_id_ctrl *ctrl)
a385ba
+{
a385ba
+	int rc;
a385ba
+	struct nvme_id_ctrl c;
a385ba
+
a385ba
+	rc = nvme_identify_ctrl(fd, &c);
a385ba
+	if (rc < 0)
a385ba
+		return rc;
a385ba
+	if (ctrl)
a385ba
+		*ctrl = c;
a385ba
+	return c.cmic & (1 << 3) ? 1 : 0;
a385ba
+}
a385ba
Index: multipath-tools-130222/libmultipath/nvme-lib.h
a385ba
===================================================================
a385ba
--- /dev/null
a385ba
+++ multipath-tools-130222/libmultipath/nvme-lib.h
a385ba
@@ -0,0 +1,39 @@
a385ba
+#ifndef NVME_LIB_H
a385ba
+#define NVME_LIB_H
a385ba
+
a385ba
+#include "nvme.h"
a385ba
+
a385ba
+int log_nvme_errcode(int err, const char *dev, const char *msg);
a385ba
+int libmp_nvme_get_nsid(int fd);
a385ba
+int libmp_nvme_identify_ctrl(int fd, struct nvme_id_ctrl *ctrl);
a385ba
+int libmp_nvme_identify_ns(int fd, __u32 nsid, bool present,
a385ba
+			   struct nvme_id_ns *ns);
a385ba
+int libmp_nvme_ana_log(int fd, void *ana_log, size_t ana_log_len, int rgo);
a385ba
+/*
a385ba
+ * Identify controller, and return true if ANA is supported
a385ba
+ * ctrl will be filled in if controller is identified, even w/o ANA
a385ba
+ * ctrl may be NULL
a385ba
+ */
a385ba
+int nvme_id_ctrl_ana(int fd, struct nvme_id_ctrl *ctrl);
a385ba
+
a385ba
+#ifndef _NVME_LIB_C
a385ba
+/*
a385ba
+ * In all files except nvme-lib.c, the nvme functions can be called
a385ba
+ * by their usual name.
a385ba
+ */
a385ba
+#define nvme_get_nsid libmp_nvme_get_nsid
a385ba
+#define nvme_identify_ctrl libmp_nvme_identify_ctrl
a385ba
+#define nvme_identify_ns libmp_nvme_identify_ns
a385ba
+#define nvme_ana_log libmp_nvme_ana_log
a385ba
+/*
a385ba
+ * Undefine these to avoid clashes with libmultipath's byteorder.h
a385ba
+ */
a385ba
+#undef cpu_to_le16
a385ba
+#undef cpu_to_le32
a385ba
+#undef cpu_to_le64
a385ba
+#undef le16_to_cpu
a385ba
+#undef le32_to_cpu
a385ba
+#undef le64_to_cpu
a385ba
+#endif
a385ba
+
a385ba
+#endif /* NVME_LIB_H */
a385ba
Index: multipath-tools-130222/libmultipath/prio.h
a385ba
===================================================================
a385ba
--- multipath-tools-130222.orig/libmultipath/prio.h
a385ba
+++ multipath-tools-130222/libmultipath/prio.h
a385ba
@@ -29,6 +29,7 @@ struct path;
a385ba
 #define PRIO_RDAC "rdac"
a385ba
 #define PRIO_DATACORE "datacore"
a385ba
 #define PRIO_WEIGHTED_PATH "weightedpath"
a385ba
+#define PRIO_ANA "ana"
a385ba
 
a385ba
 /*
a385ba
  * Value used to mark the fact prio was not defined
a385ba
Index: multipath-tools-130222/libmultipath/prioritizers/Makefile
a385ba
===================================================================
a385ba
--- multipath-tools-130222.orig/libmultipath/prioritizers/Makefile
a385ba
+++ multipath-tools-130222/libmultipath/prioritizers/Makefile
a385ba
@@ -2,6 +2,7 @@
a385ba
 #
a385ba
 # Copyright (C) 2007 Christophe Varoqui, <christophe.varoqui@opensvc.com>
a385ba
 #
a385ba
+TOPDIR = ../..
a385ba
 include ../../Makefile.inc
a385ba
 
a385ba
 LIBS = \
a385ba
@@ -15,9 +16,10 @@ LIBS = \
a385ba
 	libpriodatacore.so \
a385ba
 	libpriohds.so \
a385ba
 	libprioweightedpath.so \
a385ba
+	libprioana.so \
a385ba
 	libprioiet.so
a385ba
 
a385ba
-CFLAGS += -fPIC -I..
a385ba
+CFLAGS += -fPIC -I.. -I$(nvmedir)
a385ba
 
a385ba
 all: $(LIBS)
a385ba
 
a385ba
Index: multipath-tools-130222/libmultipath/prioritizers/ana.c
a385ba
===================================================================
a385ba
--- /dev/null
a385ba
+++ multipath-tools-130222/libmultipath/prioritizers/ana.c
a385ba
@@ -0,0 +1,236 @@
a385ba
+/*
a385ba
+ * (C) Copyright HUAWEI Technology Corp. 2017   All Rights Reserved.
a385ba
+ *
a385ba
+ * ana.c
a385ba
+ * Version 1.00
a385ba
+ *
a385ba
+ * Tool to make use of a NVMe-feature called  Asymmetric Namespace Access.
a385ba
+ * It determines the ANA state of a device and prints a priority value to stdout.
a385ba
+ *
a385ba
+ * Author(s): Cheng Jike <chengjike.cheng@huawei.com>
a385ba
+ *            Li Jie <lijie34@huawei.com>
a385ba
+ *
a385ba
+ * This file is released under the GPL version 2, or any later version.
a385ba
+ */
a385ba
+#include <stdio.h>
a385ba
+#include <sys/ioctl.h>
a385ba
+#include <sys/stat.h>
a385ba
+#include <sys/types.h>
a385ba
+#include <stdbool.h>
a385ba
+#include <libudev.h>
a385ba
+
a385ba
+#include "debug.h"
a385ba
+#include "nvme-lib.h"
a385ba
+#include "prio.h"
a385ba
+#include "util.h"
a385ba
+#include "structs.h"
a385ba
+#include "def_func.h"
a385ba
+
a385ba
+enum {
a385ba
+	ANA_ERR_GETCTRL_FAILED		= 1,
a385ba
+	ANA_ERR_NOT_NVME,
a385ba
+	ANA_ERR_NOT_SUPPORTED,
a385ba
+	ANA_ERR_GETANAS_OVERFLOW,
a385ba
+	ANA_ERR_GETANAS_NOTFOUND,
a385ba
+	ANA_ERR_GETANALOG_FAILED,
a385ba
+	ANA_ERR_GETNSID_FAILED,
a385ba
+	ANA_ERR_GETNS_FAILED,
a385ba
+	ANA_ERR_NO_MEMORY,
a385ba
+	ANA_ERR_NO_INFORMATION,
a385ba
+};
a385ba
+
a385ba
+static const char *ana_errmsg[] = {
a385ba
+	[ANA_ERR_GETCTRL_FAILED]	= "couldn't get ctrl info",
a385ba
+	[ANA_ERR_NOT_NVME]		= "not an NVMe device",
a385ba
+	[ANA_ERR_NOT_SUPPORTED]		= "ANA not supported",
a385ba
+	[ANA_ERR_GETANAS_OVERFLOW]	= "buffer overflow in ANA log",
a385ba
+	[ANA_ERR_GETANAS_NOTFOUND]	= "NSID or ANAGRPID not found",
a385ba
+	[ANA_ERR_GETANALOG_FAILED]	= "couldn't get ana log",
a385ba
+	[ANA_ERR_GETNSID_FAILED]	= "couldn't get NSID",
a385ba
+	[ANA_ERR_GETNS_FAILED]		= "couldn't get namespace info",
a385ba
+	[ANA_ERR_NO_MEMORY]		= "out of memory",
a385ba
+	[ANA_ERR_NO_INFORMATION]	= "invalid fd",
a385ba
+};
a385ba
+
a385ba
+static const char *anas_string[] = {
a385ba
+	[NVME_ANA_OPTIMIZED]			= "ANA Optimized State",
a385ba
+	[NVME_ANA_NONOPTIMIZED]			= "ANA Non-Optimized State",
a385ba
+	[NVME_ANA_INACCESSIBLE]			= "ANA Inaccessible State",
a385ba
+	[NVME_ANA_PERSISTENT_LOSS]		= "ANA Persistent Loss State",
a385ba
+	[NVME_ANA_CHANGE]			= "ANA Change state",
a385ba
+};
a385ba
+
a385ba
+static const char *aas_print_string(int rc)
a385ba
+{
a385ba
+	rc &= 0xff;
a385ba
+	if (rc >= 0 && rc < ARRAY_SIZE(anas_string) &&
a385ba
+	    anas_string[rc] != NULL)
a385ba
+		return anas_string[rc];
a385ba
+
a385ba
+	return "invalid ANA state";
a385ba
+}
a385ba
+
a385ba
+static int get_ana_state(__u32 nsid, __u32 anagrpid, void *ana_log,
a385ba
+			 size_t ana_log_len)
a385ba
+{
a385ba
+	void *base = ana_log;
a385ba
+	struct nvme_ana_rsp_hdr *hdr = base;
a385ba
+	struct nvme_ana_group_desc *ana_desc;
a385ba
+	size_t offset = sizeof(struct nvme_ana_rsp_hdr);
a385ba
+	__u32 nr_nsids;
a385ba
+	size_t nsid_buf_size;
a385ba
+	int i, j;
a385ba
+
a385ba
+	for (i = 0; i < le16_to_cpu(hdr->ngrps); i++) {
a385ba
+		ana_desc = base + offset;
a385ba
+
a385ba
+		offset += sizeof(*ana_desc);
a385ba
+		if (offset > ana_log_len)
a385ba
+			return -ANA_ERR_GETANAS_OVERFLOW;
a385ba
+
a385ba
+		nr_nsids = le32_to_cpu(ana_desc->nnsids);
a385ba
+		nsid_buf_size = nr_nsids * sizeof(__le32);
a385ba
+
a385ba
+		offset += nsid_buf_size;
a385ba
+		if (offset > ana_log_len)
a385ba
+			return -ANA_ERR_GETANAS_OVERFLOW;
a385ba
+
a385ba
+		for (j = 0; j < nr_nsids; j++) {
a385ba
+			if (nsid == le32_to_cpu(ana_desc->nsids[j]))
a385ba
+				return ana_desc->state;
a385ba
+		}
a385ba
+
a385ba
+		if (anagrpid != 0 && anagrpid == le32_to_cpu(ana_desc->grpid))
a385ba
+			return ana_desc->state;
a385ba
+
a385ba
+	}
a385ba
+	return -ANA_ERR_GETANAS_NOTFOUND;
a385ba
+}
a385ba
+
a385ba
+int get_ana_info(struct path * pp, unsigned int timeout)
a385ba
+{
a385ba
+	int	rc;
a385ba
+	__u32 nsid;
a385ba
+	struct nvme_id_ctrl ctrl;
a385ba
+	struct nvme_id_ns ns;
a385ba
+	void *ana_log;
a385ba
+	size_t ana_log_len;
a385ba
+	bool is_anagrpid_const;
a385ba
+
a385ba
+	rc = nvme_id_ctrl_ana(pp->fd, &ctrl);
a385ba
+	if (rc < 0) {
a385ba
+		log_nvme_errcode(rc, pp->dev, "nvme_identify_ctrl");
a385ba
+		return -ANA_ERR_GETCTRL_FAILED;
a385ba
+	} else if (rc == 0)
a385ba
+		return -ANA_ERR_NOT_SUPPORTED;
a385ba
+
a385ba
+	nsid = nvme_get_nsid(pp->fd);
a385ba
+	if (nsid <= 0) {
a385ba
+		log_nvme_errcode(rc, pp->dev, "nvme_get_nsid");
a385ba
+		return -ANA_ERR_GETNSID_FAILED;
a385ba
+	}
a385ba
+	is_anagrpid_const = ctrl.anacap & (1 << 6);
a385ba
+
a385ba
+	/*
a385ba
+	 * Code copied from nvme-cli/nvme.c. We don't need to allocate an
a385ba
+	 * [nanagrpid*mnan] array of NSIDs because each NSID can occur at most
a385ba
+	 * in one ANA group.
a385ba
+	 */
a385ba
+	ana_log_len = sizeof(struct nvme_ana_rsp_hdr) +
a385ba
+		le32_to_cpu(ctrl.nanagrpid)
a385ba
+		* sizeof(struct nvme_ana_group_desc);
a385ba
+
a385ba
+	if (is_anagrpid_const) {
a385ba
+		rc = nvme_identify_ns(pp->fd, nsid, 0, &ns);
a385ba
+		if (rc) {
a385ba
+			log_nvme_errcode(rc, pp->dev, "nvme_identify_ns");
a385ba
+			return -ANA_ERR_GETNS_FAILED;
a385ba
+		}
a385ba
+	} else
a385ba
+		ana_log_len += le32_to_cpu(ctrl.mnan) * sizeof(__le32);
a385ba
+
a385ba
+	ana_log = malloc(ana_log_len);
a385ba
+	if (!ana_log)
a385ba
+		return -ANA_ERR_NO_MEMORY;
a385ba
+	pthread_cleanup_push(free, ana_log);
a385ba
+	rc = nvme_ana_log(pp->fd, ana_log, ana_log_len,
a385ba
+			  is_anagrpid_const ? NVME_ANA_LOG_RGO : 0);
a385ba
+	if (rc) {
a385ba
+		log_nvme_errcode(rc, pp->dev, "nvme_ana_log");
a385ba
+		rc = -ANA_ERR_GETANALOG_FAILED;
a385ba
+	} else
a385ba
+		rc = get_ana_state(nsid,
a385ba
+				   is_anagrpid_const ?
a385ba
+				   le32_to_cpu(ns.anagrpid) : 0,
a385ba
+				   ana_log, ana_log_len);
a385ba
+	pthread_cleanup_pop(1);
a385ba
+	if (rc >= 0)
a385ba
+		condlog(3, "%s: ana state = %02x [%s]", pp->dev, rc,
a385ba
+			aas_print_string(rc));
a385ba
+	return rc;
a385ba
+}
a385ba
+
a385ba
+/*
a385ba
+ * Priorities modeled roughly after the ALUA model (alua.c/sysfs.c)
a385ba
+ * Reference: ANA Base Protocol (NVMe TP 4004a, 11/13/2018).
a385ba
+ *
a385ba
+ * Differences:
a385ba
+ *
a385ba
+ * - The ANA base spec defines no implicit or explicit (STPG) state management.
a385ba
+ *   If a state is encountered that doesn't allow normal I/O (all except
a385ba
+ *   OPTIMIZED and NON_OPTIMIZED), we can't do anything but either wait for a
a385ba
+ *   Access State Change Notice (can't do that in multipathd as we don't receive
a385ba
+ *   those), or retry commands in regular time intervals until ANATT is expired
a385ba
+ *   (not implemented). Mapping UNAVAILABLE state to ALUA STANDBY is the best we
a385ba
+ *   can currently do.
a385ba
+ *
a385ba
+ *   FIXME: Waiting for ANATT could be implemented with a "delayed failback"
a385ba
+ *   mechanism. The current "failback" method can't be used, as it would
a385ba
+ *   affect failback to every state, and here only failback to UNAVAILABLE
a385ba
+ *   should be delayed.
a385ba
+ *
a385ba
+ * - PERSISTENT_LOSS state is even below ALUA's UNAVAILABLE state.
a385ba
+ *   FIXME: According to the ANA TP, accessing paths in PERSISTENT_LOSS state
a385ba
+ *   in any way makes no sense (e.g. §8.19.6 - paths in this state shouldn't
a385ba
+ *   even be checked under "all paths down" conditions). Device mapper can,
a385ba
+ *   and will, select a PG for IO if it has non-failed paths, even if the
a385ba
+ *   PG has priority 0. We could avoid that only with an "ANA path checker".
a385ba
+ *
a385ba
+ * - ALUA has no CHANGE state. The ANA TP §8.18.3 / §8.19.4 suggests
a385ba
+ *   that CHANGE state should be treated in roughly the same way as
a385ba
+ *   INACCESSIBLE. Therefore we assign the same prio to it.
a385ba
+ *
a385ba
+ * - ALUA's LBA-dependent state has no ANA equivalent.
a385ba
+ */
a385ba
+
a385ba
+int getprio(struct path *pp, char *args)
a385ba
+{
a385ba
+	int rc;
a385ba
+
a385ba
+	if (pp->fd < 0)
a385ba
+		rc = -ANA_ERR_NO_INFORMATION;
a385ba
+	else
a385ba
+		rc = get_ana_info(pp, get_prio_timeout(60000));
a385ba
+
a385ba
+	switch (rc) {
a385ba
+	case NVME_ANA_OPTIMIZED:
a385ba
+		return 50;
a385ba
+	case NVME_ANA_NONOPTIMIZED:
a385ba
+		return 10;
a385ba
+	case NVME_ANA_INACCESSIBLE:
a385ba
+	case NVME_ANA_CHANGE:
a385ba
+		return 1;
a385ba
+	case NVME_ANA_PERSISTENT_LOSS:
a385ba
+		return 0;
a385ba
+	default:
a385ba
+		break;
a385ba
+	}
a385ba
+	if (rc < 0 && -rc < ARRAY_SIZE(ana_errmsg))
a385ba
+		condlog(2, "%s: ANA error: %s", pp->dev, ana_errmsg[-rc]);
a385ba
+	else
a385ba
+		condlog(1, "%s: invalid ANA rc code %d", pp->dev, rc);
a385ba
+	return -1;
a385ba
+}
a385ba
+
a385ba
+declare_nop_prio(initprio)
a385ba
+declare_nop_prio(freeprio)
a385ba
Index: multipath-tools-130222/libmultipath/util.h
a385ba
===================================================================
a385ba
--- multipath-tools-130222.orig/libmultipath/util.h
a385ba
+++ multipath-tools-130222/libmultipath/util.h
a385ba
@@ -18,6 +18,8 @@ int parse_prkey(char *ptr, uint64_t *prk
a385ba
 int parse_prkey_flags(char *ptr, uint64_t *prkey, uint8_t *flags);
a385ba
 int safe_write(int fd, const void *buf, size_t count);
a385ba
 
a385ba
+#define ARRAY_SIZE(x) (sizeof(x)/sizeof((x)[0]))
a385ba
+
a385ba
 #define safe_sprintf(var, format, args...)	\
a385ba
 	snprintf(var, sizeof(var), format, ##args) >= sizeof(var)
a385ba
 #define safe_snprintf(var, size, format, args...)      \
a385ba
Index: multipath-tools-130222/multipath/multipath.conf.5
a385ba
===================================================================
a385ba
--- multipath-tools-130222.orig/multipath/multipath.conf.5
a385ba
+++ multipath-tools-130222/multipath/multipath.conf.5
a385ba
@@ -196,6 +196,9 @@ Generate the path priority for LSI/Engen
a385ba
 Generate the path priority for Compaq/HP controller in
a385ba
 active/standby mode.
a385ba
 .TP
a385ba
+.B ana
a385ba
+Generate the path priority based on the NVMe ANA settings.
a385ba
+.TP
a385ba
 .B hds
a385ba
 Generate the path priority for Hitachi HDS Modular storage arrays.
a385ba
 .TP
a385ba
Index: multipath-tools-130222/libmultipath/propsel.c
a385ba
===================================================================
a385ba
--- multipath-tools-130222.orig/libmultipath/propsel.c
a385ba
+++ multipath-tools-130222/libmultipath/propsel.c
a385ba
@@ -5,6 +5,7 @@
a385ba
  */
a385ba
 #include <stdio.h>
a385ba
 
a385ba
+#include "nvme-lib.h"
a385ba
 #include "checkers.h"
a385ba
 #include "memory.h"
a385ba
 #include "vector.h"
a385ba
@@ -489,8 +490,13 @@ select_getuid (struct path * pp)
a385ba
 void
a385ba
 detect_prio(struct path * pp)
a385ba
 {
a385ba
-	if (detect_alua(pp))
a385ba
-		prio_get(&pp->prio, PRIO_ALUA, DEFAULT_PRIO_ARGS);
a385ba
+	if (pp->bus == SYSFS_BUS_NVME) {
a385ba
+		if (nvme_id_ctrl_ana(pp->fd, NULL) == 1)
a385ba
+			prio_get(&pp->prio, PRIO_ANA, DEFAULT_PRIO_ARGS);
a385ba
+	} else if (pp->bus == SYSFS_BUS_SCSI) {
a385ba
+		if (detect_alua(pp))
a385ba
+			prio_get(&pp->prio, PRIO_ALUA, DEFAULT_PRIO_ARGS);
a385ba
+	}
a385ba
 }
a385ba
 
a385ba
 extern int
a385ba
Index: multipath-tools-130222/libmultipath/hwtable.c
a385ba
===================================================================
a385ba
--- multipath-tools-130222.orig/libmultipath/hwtable.c
a385ba
+++ multipath-tools-130222/libmultipath/hwtable.c
a385ba
@@ -1178,6 +1178,7 @@ static struct hwentry default_hw[] = {
a385ba
 		.vendor        = "NVME",
a385ba
 		.product       = ".*",
a385ba
 		.uid_attribute = "ID_WWN",
a385ba
+		.detect_prio   = DETECT_PRIO_ON,
a385ba
 		.checker_name  = NONE,
a385ba
 	},
a385ba
 	/*