Update Linux to v5.10.109
Sourced from [1]
[1] https://cdn.kernel.org/pub/linux/kernel/v5.x/linux-5.10.109.tar.xz
Change-Id: I19bca9fc6762d4e63bcf3e4cba88bbe560d9c76c
Signed-off-by: Olivier Deprez <olivier.deprez@arm.com>
diff --git a/drivers/nvme/target/Kconfig b/drivers/nvme/target/Kconfig
index d7f48c0..8056955 100644
--- a/drivers/nvme/target/Kconfig
+++ b/drivers/nvme/target/Kconfig
@@ -4,6 +4,7 @@
tristate "NVMe Target support"
depends on BLOCK
depends on CONFIGFS_FS
+ select BLK_DEV_INTEGRITY_T10 if BLK_DEV_INTEGRITY
select SGL_ALLOC
help
This enabled target side support for the NVMe protocol, that is
@@ -15,6 +16,18 @@
To configure the NVMe target you probably want to use the nvmetcli
tool from http://git.infradead.org/users/hch/nvmetcli.git.
+config NVME_TARGET_PASSTHRU
+ bool "NVMe Target Passthrough support"
+ depends on NVME_TARGET
+ depends on NVME_CORE=y || NVME_CORE=NVME_TARGET
+ help
+ This enables target side NVMe passthru controller support for the
+ NVMe Over Fabrics protocol. It allows for hosts to manage and
+ directly access an actual NVMe controller residing on the target
+ side, incuding executing Vendor Unique Commands.
+
+ If unsure, say N.
+
config NVME_TARGET_LOOP
tristate "NVMe loopback device support"
depends on NVME_TARGET
diff --git a/drivers/nvme/target/Makefile b/drivers/nvme/target/Makefile
index 2b33836..ebf91fc 100644
--- a/drivers/nvme/target/Makefile
+++ b/drivers/nvme/target/Makefile
@@ -11,6 +11,7 @@
nvmet-y += core.o configfs.o admin-cmd.o fabrics-cmd.o \
discovery.o io-cmd-file.o io-cmd-bdev.o
+nvmet-$(CONFIG_NVME_TARGET_PASSTHRU) += passthru.o
nvme-loop-y += loop.o
nvmet-rdma-y += rdma.o
nvmet-fc-y += fc.o
diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c
index 831a062..6a8274c 100644
--- a/drivers/nvme/target/admin-cmd.c
+++ b/drivers/nvme/target/admin-cmd.c
@@ -6,6 +6,7 @@
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/module.h>
#include <linux/rculist.h>
+#include <linux/part_stat.h>
#include <generated/utsrelease.h>
#include <asm/unaligned.h>
@@ -24,6 +25,16 @@
return len;
}
+static u32 nvmet_feat_data_len(struct nvmet_req *req, u32 cdw10)
+{
+ switch (cdw10 & 0xff) {
+ case NVME_FEAT_HOST_ID:
+ return sizeof(req->sq->ctrl->hostid);
+ default:
+ return 0;
+ }
+}
+
u64 nvmet_get_log_page_offset(struct nvme_command *cmd)
{
return le64_to_cpu(cmd->get_log_page.lpo);
@@ -31,7 +42,7 @@
static void nvmet_execute_get_log_page_noop(struct nvmet_req *req)
{
- nvmet_req_complete(req, nvmet_zero_sgl(req, 0, req->data_len));
+ nvmet_req_complete(req, nvmet_zero_sgl(req, 0, req->transfer_len));
}
static void nvmet_execute_get_log_page_error(struct nvmet_req *req)
@@ -102,11 +113,10 @@
u64 data_units_read = 0, data_units_written = 0;
struct nvmet_ns *ns;
struct nvmet_ctrl *ctrl;
+ unsigned long idx;
ctrl = req->sq->ctrl;
-
- rcu_read_lock();
- list_for_each_entry_rcu(ns, &ctrl->subsys->namespaces, dev_link) {
+ xa_for_each(&ctrl->subsys->namespaces, idx, ns) {
/* we don't have the right data for file backed ns */
if (!ns->bdev)
continue;
@@ -116,9 +126,7 @@
host_writes += part_stat_read(ns->bdev->bd_part, ios[WRITE]);
data_units_written += DIV_ROUND_UP(
part_stat_read(ns->bdev->bd_part, sectors[WRITE]), 1000);
-
}
- rcu_read_unlock();
put_unaligned_le64(host_reads, &slog->host_reads[0]);
put_unaligned_le64(data_units_read, &slog->data_units_read[0]);
@@ -134,7 +142,7 @@
u16 status = NVME_SC_INTERNAL;
unsigned long flags;
- if (req->data_len != sizeof(*log))
+ if (req->transfer_len != sizeof(*log))
goto out;
log = kzalloc(sizeof(*log), GFP_KERNEL);
@@ -196,7 +204,7 @@
u16 status = NVME_SC_INTERNAL;
size_t len;
- if (req->data_len != NVME_MAX_CHANGED_NAMESPACES * sizeof(__le32))
+ if (req->transfer_len != NVME_MAX_CHANGED_NAMESPACES * sizeof(__le32))
goto out;
mutex_lock(&ctrl->lock);
@@ -206,7 +214,7 @@
len = ctrl->nr_changed_ns * sizeof(__le32);
status = nvmet_copy_to_sgl(req, 0, ctrl->changed_ns_list, len);
if (!status)
- status = nvmet_zero_sgl(req, len, req->data_len - len);
+ status = nvmet_zero_sgl(req, len, req->transfer_len - len);
ctrl->nr_changed_ns = 0;
nvmet_clear_aen_bit(req, NVME_AEN_BIT_NS_ATTR);
mutex_unlock(&ctrl->lock);
@@ -219,14 +227,13 @@
{
struct nvmet_ctrl *ctrl = req->sq->ctrl;
struct nvmet_ns *ns;
+ unsigned long idx;
u32 count = 0;
if (!(req->cmd->get_log_page.lsp & NVME_ANA_LOG_RGO)) {
- rcu_read_lock();
- list_for_each_entry_rcu(ns, &ctrl->subsys->namespaces, dev_link)
+ xa_for_each(&ctrl->subsys->namespaces, idx, ns)
if (ns->anagrpid == grpid)
desc->nsids[count++] = cpu_to_le32(ns->nsid);
- rcu_read_unlock();
}
desc->grpid = cpu_to_le32(grpid);
@@ -282,12 +289,56 @@
nvmet_req_complete(req, status);
}
+static void nvmet_execute_get_log_page(struct nvmet_req *req)
+{
+ if (!nvmet_check_transfer_len(req, nvmet_get_log_page_len(req->cmd)))
+ return;
+
+ switch (req->cmd->get_log_page.lid) {
+ case NVME_LOG_ERROR:
+ return nvmet_execute_get_log_page_error(req);
+ case NVME_LOG_SMART:
+ return nvmet_execute_get_log_page_smart(req);
+ case NVME_LOG_FW_SLOT:
+ /*
+ * We only support a single firmware slot which always is
+ * active, so we can zero out the whole firmware slot log and
+ * still claim to fully implement this mandatory log page.
+ */
+ return nvmet_execute_get_log_page_noop(req);
+ case NVME_LOG_CHANGED_NS:
+ return nvmet_execute_get_log_changed_ns(req);
+ case NVME_LOG_CMD_EFFECTS:
+ return nvmet_execute_get_log_cmd_effects_ns(req);
+ case NVME_LOG_ANA:
+ return nvmet_execute_get_log_page_ana(req);
+ }
+ pr_debug("unhandled lid %d on qid %d\n",
+ req->cmd->get_log_page.lid, req->sq->qid);
+ req->error_loc = offsetof(struct nvme_get_log_page_command, lid);
+ nvmet_req_complete(req, NVME_SC_INVALID_FIELD | NVME_SC_DNR);
+}
+
+static void nvmet_id_set_model_number(struct nvme_id_ctrl *id,
+ struct nvmet_subsys *subsys)
+{
+ const char *model = NVMET_DEFAULT_CTRL_MODEL;
+ struct nvmet_subsys_model *subsys_model;
+
+ rcu_read_lock();
+ subsys_model = rcu_dereference(subsys->model);
+ if (subsys_model)
+ model = subsys_model->number;
+ memcpy_and_pad(id->mn, sizeof(id->mn), model, strlen(model), ' ');
+ rcu_read_unlock();
+}
+
static void nvmet_execute_identify_ctrl(struct nvmet_req *req)
{
struct nvmet_ctrl *ctrl = req->sq->ctrl;
struct nvme_id_ctrl *id;
+ u32 cmd_capsule_size;
u16 status = 0;
- const char model[] = "Linux";
id = kzalloc(sizeof(*id), GFP_KERNEL);
if (!id) {
@@ -302,7 +353,7 @@
memset(id->sn, ' ', sizeof(id->sn));
bin2hex(id->sn, &ctrl->subsys->serial,
min(sizeof(ctrl->subsys->serial), sizeof(id->sn) / 2));
- memcpy_and_pad(id->mn, sizeof(id->mn), model, sizeof(model) - 1, ' ');
+ nvmet_id_set_model_number(id, ctrl->subsys);
memcpy_and_pad(id->fr, sizeof(id->fr),
UTS_RELEASE, strlen(UTS_RELEASE), ' ');
@@ -316,8 +367,12 @@
/* we support multiple ports, multiples hosts and ANA: */
id->cmic = (1 << 0) | (1 << 1) | (1 << 3);
- /* no limit on data transfer sizes for now */
- id->mdts = 0;
+ /* Limit MDTS according to transport capability */
+ if (ctrl->ops->get_mdts)
+ id->mdts = ctrl->ops->get_mdts(ctrl);
+ else
+ id->mdts = 0;
+
id->cntlid = cpu_to_le16(ctrl->cntlid);
id->ver = cpu_to_le32(ctrl->subsys->ver);
@@ -368,16 +423,22 @@
id->awupf = 0;
id->sgls = cpu_to_le32(1 << 0); /* we always support SGLs */
- if (ctrl->ops->has_keyed_sgls)
+ if (ctrl->ops->flags & NVMF_KEYED_SGLS)
id->sgls |= cpu_to_le32(1 << 2);
if (req->port->inline_data_size)
id->sgls |= cpu_to_le32(1 << 20);
strlcpy(id->subnqn, ctrl->subsys->subsysnqn, sizeof(id->subnqn));
- /* Max command capsule size is sqe + single page of in-capsule data */
- id->ioccsz = cpu_to_le32((sizeof(struct nvme_command) +
- req->port->inline_data_size) / 16);
+ /*
+ * Max command capsule size is sqe + in-capsule data size.
+ * Disable in-capsule data for Metadata capable controllers.
+ */
+ cmd_capsule_size = sizeof(struct nvme_command);
+ if (!ctrl->pi_support)
+ cmd_capsule_size += req->port->inline_data_size;
+ id->ioccsz = cpu_to_le32(cmd_capsule_size / 16);
+
/* Max response capsule size is cqe */
id->iorcsz = cpu_to_le32(sizeof(struct nvme_completion) / 16);
@@ -407,7 +468,7 @@
static void nvmet_execute_identify_ns(struct nvmet_req *req)
{
- struct nvmet_ns *ns;
+ struct nvmet_ctrl *ctrl = req->sq->ctrl;
struct nvme_id_ns *id;
u16 status = 0;
@@ -424,16 +485,21 @@
}
/* return an all zeroed buffer if we can't find an active namespace */
- ns = nvmet_find_namespace(req->sq->ctrl, req->cmd->identify.nsid);
- if (!ns)
+ req->ns = nvmet_find_namespace(ctrl, req->cmd->identify.nsid);
+ if (!req->ns) {
+ status = 0;
goto done;
+ }
+
+ nvmet_ns_revalidate(req->ns);
/*
* nuse = ncap = nsze isn't always true, but we have no way to find
* that out from the underlying device.
*/
- id->ncap = id->nsze = cpu_to_le64(ns->size >> ns->blksize_shift);
- switch (req->port->ana_state[ns->anagrpid]) {
+ id->ncap = id->nsze =
+ cpu_to_le64(req->ns->size >> req->ns->blksize_shift);
+ switch (req->port->ana_state[req->ns->anagrpid]) {
case NVME_ANA_INACCESSIBLE:
case NVME_ANA_PERSISTENT_LOSS:
break;
@@ -442,8 +508,8 @@
break;
}
- if (ns->bdev)
- nvmet_bdev_set_limits(ns->bdev, id);
+ if (req->ns->bdev)
+ nvmet_bdev_set_limits(req->ns->bdev, id);
/*
* We just provide a single LBA format that matches what the
@@ -457,17 +523,28 @@
* controllers, but also with any other user of the block device.
*/
id->nmic = (1 << 0);
- id->anagrpid = cpu_to_le32(ns->anagrpid);
+ id->anagrpid = cpu_to_le32(req->ns->anagrpid);
- memcpy(&id->nguid, &ns->nguid, sizeof(id->nguid));
+ memcpy(&id->nguid, &req->ns->nguid, sizeof(id->nguid));
- id->lbaf[0].ds = ns->blksize_shift;
+ id->lbaf[0].ds = req->ns->blksize_shift;
- if (ns->readonly)
+ if (ctrl->pi_support && nvmet_ns_has_pi(req->ns)) {
+ id->dpc = NVME_NS_DPC_PI_FIRST | NVME_NS_DPC_PI_LAST |
+ NVME_NS_DPC_PI_TYPE1 | NVME_NS_DPC_PI_TYPE2 |
+ NVME_NS_DPC_PI_TYPE3;
+ id->mc = NVME_MC_EXTENDED_LBA;
+ id->dps = req->ns->pi_type;
+ id->flbas = NVME_NS_FLBAS_META_EXT;
+ id->lbaf[0].ms = cpu_to_le16(req->ns->metadata_size);
+ }
+
+ if (req->ns->readonly)
id->nsattr |= (1 << 0);
- nvmet_put_namespace(ns);
done:
- status = nvmet_copy_to_sgl(req, 0, id, sizeof(*id));
+ if (!status)
+ status = nvmet_copy_to_sgl(req, 0, id, sizeof(*id));
+
kfree(id);
out:
nvmet_req_complete(req, status);
@@ -478,6 +555,7 @@
static const int buf_size = NVME_IDENTIFY_DATA_SIZE;
struct nvmet_ctrl *ctrl = req->sq->ctrl;
struct nvmet_ns *ns;
+ unsigned long idx;
u32 min_nsid = le32_to_cpu(req->cmd->identify.nsid);
__le32 *list;
u16 status = 0;
@@ -489,15 +567,13 @@
goto out;
}
- rcu_read_lock();
- list_for_each_entry_rcu(ns, &ctrl->subsys->namespaces, dev_link) {
+ xa_for_each(&ctrl->subsys->namespaces, idx, ns) {
if (ns->nsid <= min_nsid)
continue;
list[i++] = cpu_to_le32(ns->nsid);
if (i == buf_size / sizeof(__le32))
break;
}
- rcu_read_unlock();
status = nvmet_copy_to_sgl(req, 0, list, buf_size);
@@ -565,6 +641,28 @@
nvmet_req_complete(req, status);
}
+static void nvmet_execute_identify(struct nvmet_req *req)
+{
+ if (!nvmet_check_transfer_len(req, NVME_IDENTIFY_DATA_SIZE))
+ return;
+
+ switch (req->cmd->identify.cns) {
+ case NVME_ID_CNS_NS:
+ return nvmet_execute_identify_ns(req);
+ case NVME_ID_CNS_CTRL:
+ return nvmet_execute_identify_ctrl(req);
+ case NVME_ID_CNS_NS_ACTIVE_LIST:
+ return nvmet_execute_identify_nslist(req);
+ case NVME_ID_CNS_NS_DESC_LIST:
+ return nvmet_execute_identify_desclist(req);
+ }
+
+ pr_debug("unhandled identify cns %d on qid %d\n",
+ req->cmd->identify.cns, req->sq->qid);
+ req->error_loc = offsetof(struct nvme_identify, cns);
+ nvmet_req_complete(req, NVME_SC_INVALID_FIELD | NVME_SC_DNR);
+}
+
/*
* A "minimum viable" abort implementation: the command is mandatory in the
* spec, but we are not required to do any useful work. We couldn't really
@@ -574,6 +672,8 @@
*/
static void nvmet_execute_abort(struct nvmet_req *req)
{
+ if (!nvmet_check_transfer_len(req, 0))
+ return;
nvmet_set_result(req, 1);
nvmet_req_complete(req, 0);
}
@@ -630,7 +730,9 @@
{
u32 val32 = le32_to_cpu(req->cmd->common.cdw11);
+ nvmet_stop_keep_alive_timer(req->sq->ctrl);
req->sq->ctrl->kato = DIV_ROUND_UP(val32, 1000);
+ nvmet_start_keep_alive_timer(req->sq->ctrl);
nvmet_set_result(req, req->sq->ctrl->kato);
@@ -652,14 +754,26 @@
return 0;
}
-static void nvmet_execute_set_features(struct nvmet_req *req)
+void nvmet_execute_set_features(struct nvmet_req *req)
{
struct nvmet_subsys *subsys = req->sq->ctrl->subsys;
u32 cdw10 = le32_to_cpu(req->cmd->common.cdw10);
+ u32 cdw11 = le32_to_cpu(req->cmd->common.cdw11);
u16 status = 0;
+ u16 nsqr;
+ u16 ncqr;
+
+ if (!nvmet_check_transfer_len(req, 0))
+ return;
switch (cdw10 & 0xff) {
case NVME_FEAT_NUM_QUEUES:
+ ncqr = (cdw11 >> 16) & 0xffff;
+ nsqr = cdw11 & 0xffff;
+ if (ncqr == 0xffff || nsqr == 0xffff) {
+ status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
+ break;
+ }
nvmet_set_result(req,
(subsys->max_qid - 1) | ((subsys->max_qid - 1) << 16));
break;
@@ -715,12 +829,15 @@
nvmet_set_result(req, READ_ONCE(req->sq->ctrl->aen_enabled));
}
-static void nvmet_execute_get_features(struct nvmet_req *req)
+void nvmet_execute_get_features(struct nvmet_req *req)
{
struct nvmet_subsys *subsys = req->sq->ctrl->subsys;
u32 cdw10 = le32_to_cpu(req->cmd->common.cdw10);
u16 status = 0;
+ if (!nvmet_check_transfer_len(req, nvmet_feat_data_len(req, cdw10)))
+ return;
+
switch (cdw10 & 0xff) {
/*
* These features are mandatory in the spec, but we don't
@@ -785,6 +902,9 @@
{
struct nvmet_ctrl *ctrl = req->sq->ctrl;
+ if (!nvmet_check_transfer_len(req, 0))
+ return;
+
mutex_lock(&ctrl->lock);
if (ctrl->nr_async_event_cmds >= NVMET_ASYNC_EVENTS) {
mutex_unlock(&ctrl->lock);
@@ -801,6 +921,9 @@
{
struct nvmet_ctrl *ctrl = req->sq->ctrl;
+ if (!nvmet_check_transfer_len(req, 0))
+ return;
+
pr_debug("ctrl %d update keep-alive timer for %d secs\n",
ctrl->cntlid, ctrl->kato);
@@ -813,81 +936,43 @@
struct nvme_command *cmd = req->cmd;
u16 ret;
+ if (nvme_is_fabrics(cmd))
+ return nvmet_parse_fabrics_cmd(req);
+ if (req->sq->ctrl->subsys->type == NVME_NQN_DISC)
+ return nvmet_parse_discovery_cmd(req);
+
ret = nvmet_check_ctrl_status(req, cmd);
if (unlikely(ret))
return ret;
+ if (nvmet_req_passthru_ctrl(req))
+ return nvmet_parse_passthru_admin_cmd(req);
+
switch (cmd->common.opcode) {
case nvme_admin_get_log_page:
- req->data_len = nvmet_get_log_page_len(cmd);
-
- switch (cmd->get_log_page.lid) {
- case NVME_LOG_ERROR:
- req->execute = nvmet_execute_get_log_page_error;
- return 0;
- case NVME_LOG_SMART:
- req->execute = nvmet_execute_get_log_page_smart;
- return 0;
- case NVME_LOG_FW_SLOT:
- /*
- * We only support a single firmware slot which always
- * is active, so we can zero out the whole firmware slot
- * log and still claim to fully implement this mandatory
- * log page.
- */
- req->execute = nvmet_execute_get_log_page_noop;
- return 0;
- case NVME_LOG_CHANGED_NS:
- req->execute = nvmet_execute_get_log_changed_ns;
- return 0;
- case NVME_LOG_CMD_EFFECTS:
- req->execute = nvmet_execute_get_log_cmd_effects_ns;
- return 0;
- case NVME_LOG_ANA:
- req->execute = nvmet_execute_get_log_page_ana;
- return 0;
- }
- break;
+ req->execute = nvmet_execute_get_log_page;
+ return 0;
case nvme_admin_identify:
- req->data_len = NVME_IDENTIFY_DATA_SIZE;
- switch (cmd->identify.cns) {
- case NVME_ID_CNS_NS:
- req->execute = nvmet_execute_identify_ns;
- return 0;
- case NVME_ID_CNS_CTRL:
- req->execute = nvmet_execute_identify_ctrl;
- return 0;
- case NVME_ID_CNS_NS_ACTIVE_LIST:
- req->execute = nvmet_execute_identify_nslist;
- return 0;
- case NVME_ID_CNS_NS_DESC_LIST:
- req->execute = nvmet_execute_identify_desclist;
- return 0;
- }
- break;
+ req->execute = nvmet_execute_identify;
+ return 0;
case nvme_admin_abort_cmd:
req->execute = nvmet_execute_abort;
- req->data_len = 0;
return 0;
case nvme_admin_set_features:
req->execute = nvmet_execute_set_features;
- req->data_len = 0;
return 0;
case nvme_admin_get_features:
req->execute = nvmet_execute_get_features;
- req->data_len = 0;
return 0;
case nvme_admin_async_event:
req->execute = nvmet_execute_async_event;
- req->data_len = 0;
return 0;
case nvme_admin_keep_alive:
req->execute = nvmet_execute_keep_alive;
- req->data_len = 0;
return 0;
}
- pr_err("unhandled cmd %d on qid %d\n", cmd->common.opcode,
+ pr_debug("unhandled cmd %d on qid %d\n", cmd->common.opcode,
req->sq->qid);
req->error_loc = offsetof(struct nvme_common_command, opcode);
return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c
index 98613a4..9aed5cc 100644
--- a/drivers/nvme/target/configfs.c
+++ b/drivers/nvme/target/configfs.c
@@ -20,61 +20,71 @@
static LIST_HEAD(nvmet_ports_list);
struct list_head *nvmet_ports = &nvmet_ports_list;
-static const struct nvmet_transport_name {
+struct nvmet_type_name_map {
u8 type;
const char *name;
-} nvmet_transport_names[] = {
+};
+
+static struct nvmet_type_name_map nvmet_transport[] = {
{ NVMF_TRTYPE_RDMA, "rdma" },
{ NVMF_TRTYPE_FC, "fc" },
{ NVMF_TRTYPE_TCP, "tcp" },
{ NVMF_TRTYPE_LOOP, "loop" },
};
+static const struct nvmet_type_name_map nvmet_addr_family[] = {
+ { NVMF_ADDR_FAMILY_PCI, "pcie" },
+ { NVMF_ADDR_FAMILY_IP4, "ipv4" },
+ { NVMF_ADDR_FAMILY_IP6, "ipv6" },
+ { NVMF_ADDR_FAMILY_IB, "ib" },
+ { NVMF_ADDR_FAMILY_FC, "fc" },
+ { NVMF_ADDR_FAMILY_LOOP, "loop" },
+};
+
+static bool nvmet_is_port_enabled(struct nvmet_port *p, const char *caller)
+{
+ if (p->enabled)
+ pr_err("Disable port '%u' before changing attribute in %s\n",
+ le16_to_cpu(p->disc_addr.portid), caller);
+ return p->enabled;
+}
+
/*
* nvmet_port Generic ConfigFS definitions.
* Used in any place in the ConfigFS tree that refers to an address.
*/
-static ssize_t nvmet_addr_adrfam_show(struct config_item *item,
- char *page)
+static ssize_t nvmet_addr_adrfam_show(struct config_item *item, char *page)
{
- switch (to_nvmet_port(item)->disc_addr.adrfam) {
- case NVMF_ADDR_FAMILY_IP4:
- return sprintf(page, "ipv4\n");
- case NVMF_ADDR_FAMILY_IP6:
- return sprintf(page, "ipv6\n");
- case NVMF_ADDR_FAMILY_IB:
- return sprintf(page, "ib\n");
- case NVMF_ADDR_FAMILY_FC:
- return sprintf(page, "fc\n");
- default:
- return sprintf(page, "\n");
+ u8 adrfam = to_nvmet_port(item)->disc_addr.adrfam;
+ int i;
+
+ for (i = 1; i < ARRAY_SIZE(nvmet_addr_family); i++) {
+ if (nvmet_addr_family[i].type == adrfam)
+ return sprintf(page, "%s\n", nvmet_addr_family[i].name);
}
+
+ return sprintf(page, "\n");
}
static ssize_t nvmet_addr_adrfam_store(struct config_item *item,
const char *page, size_t count)
{
struct nvmet_port *port = to_nvmet_port(item);
+ int i;
- if (port->enabled) {
- pr_err("Cannot modify address while enabled\n");
- pr_err("Disable the address before modifying\n");
+ if (nvmet_is_port_enabled(port, __func__))
return -EACCES;
+
+ for (i = 1; i < ARRAY_SIZE(nvmet_addr_family); i++) {
+ if (sysfs_streq(page, nvmet_addr_family[i].name))
+ goto found;
}
- if (sysfs_streq(page, "ipv4")) {
- port->disc_addr.adrfam = NVMF_ADDR_FAMILY_IP4;
- } else if (sysfs_streq(page, "ipv6")) {
- port->disc_addr.adrfam = NVMF_ADDR_FAMILY_IP6;
- } else if (sysfs_streq(page, "ib")) {
- port->disc_addr.adrfam = NVMF_ADDR_FAMILY_IB;
- } else if (sysfs_streq(page, "fc")) {
- port->disc_addr.adrfam = NVMF_ADDR_FAMILY_FC;
- } else {
- pr_err("Invalid value '%s' for adrfam\n", page);
- return -EINVAL;
- }
+ pr_err("Invalid value '%s' for adrfam\n", page);
+ return -EINVAL;
+found:
+ port->disc_addr.adrfam = nvmet_addr_family[i].type;
return count;
}
@@ -100,11 +110,9 @@
return -EINVAL;
}
- if (port->enabled) {
- pr_err("Cannot modify address while enabled\n");
- pr_err("Disable the address before modifying\n");
+ if (nvmet_is_port_enabled(port, __func__))
return -EACCES;
- }
+
port->disc_addr.portid = cpu_to_le16(portid);
return count;
}
@@ -130,11 +138,8 @@
return -EINVAL;
}
- if (port->enabled) {
- pr_err("Cannot modify address while enabled\n");
- pr_err("Disable the address before modifying\n");
+ if (nvmet_is_port_enabled(port, __func__))
return -EACCES;
- }
if (sscanf(page, "%s\n", port->disc_addr.traddr) != 1)
return -EINVAL;
@@ -143,20 +148,24 @@
CONFIGFS_ATTR(nvmet_, addr_traddr);
-static ssize_t nvmet_addr_treq_show(struct config_item *item,
- char *page)
+static const struct nvmet_type_name_map nvmet_addr_treq[] = {
+ { NVMF_TREQ_NOT_SPECIFIED, "not specified" },
+ { NVMF_TREQ_REQUIRED, "required" },
+ { NVMF_TREQ_NOT_REQUIRED, "not required" },
+};
+
+static ssize_t nvmet_addr_treq_show(struct config_item *item, char *page)
{
- switch (to_nvmet_port(item)->disc_addr.treq &
- NVME_TREQ_SECURE_CHANNEL_MASK) {
- case NVMF_TREQ_NOT_SPECIFIED:
- return sprintf(page, "not specified\n");
- case NVMF_TREQ_REQUIRED:
- return sprintf(page, "required\n");
- case NVMF_TREQ_NOT_REQUIRED:
- return sprintf(page, "not required\n");
- default:
- return sprintf(page, "\n");
+ u8 treq = to_nvmet_port(item)->disc_addr.treq &
+ NVME_TREQ_SECURE_CHANNEL_MASK;
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(nvmet_addr_treq); i++) {
+ if (treq == nvmet_addr_treq[i].type)
+ return sprintf(page, "%s\n", nvmet_addr_treq[i].name);
}
+
+ return sprintf(page, "\n");
}
static ssize_t nvmet_addr_treq_store(struct config_item *item,
@@ -164,25 +173,22 @@
{
struct nvmet_port *port = to_nvmet_port(item);
u8 treq = port->disc_addr.treq & ~NVME_TREQ_SECURE_CHANNEL_MASK;
+ int i;
- if (port->enabled) {
- pr_err("Cannot modify address while enabled\n");
- pr_err("Disable the address before modifying\n");
+ if (nvmet_is_port_enabled(port, __func__))
return -EACCES;
+
+ for (i = 0; i < ARRAY_SIZE(nvmet_addr_treq); i++) {
+ if (sysfs_streq(page, nvmet_addr_treq[i].name))
+ goto found;
}
- if (sysfs_streq(page, "not specified")) {
- treq |= NVMF_TREQ_NOT_SPECIFIED;
- } else if (sysfs_streq(page, "required")) {
- treq |= NVMF_TREQ_REQUIRED;
- } else if (sysfs_streq(page, "not required")) {
- treq |= NVMF_TREQ_NOT_REQUIRED;
- } else {
- pr_err("Invalid value '%s' for treq\n", page);
- return -EINVAL;
- }
+ pr_err("Invalid value '%s' for treq\n", page);
+ return -EINVAL;
+
+found:
+ treq |= nvmet_addr_treq[i].type;
port->disc_addr.treq = treq;
-
return count;
}
@@ -206,11 +212,8 @@
pr_err("Invalid value '%s' for trsvcid\n", page);
return -EINVAL;
}
- if (port->enabled) {
- pr_err("Cannot modify address while enabled\n");
- pr_err("Disable the address before modifying\n");
+ if (nvmet_is_port_enabled(port, __func__))
return -EACCES;
- }
if (sscanf(page, "%s\n", port->disc_addr.trsvcid) != 1)
return -EINVAL;
@@ -233,11 +236,8 @@
struct nvmet_port *port = to_nvmet_port(item);
int ret;
- if (port->enabled) {
- pr_err("Cannot modify inline_data_size while port enabled\n");
- pr_err("Disable the port before modifying\n");
+ if (nvmet_is_port_enabled(port, __func__))
return -EACCES;
- }
ret = kstrtoint(page, 0, &port->inline_data_size);
if (ret) {
pr_err("Invalid value '%s' for inline_data_size\n", page);
@@ -248,16 +248,45 @@
CONFIGFS_ATTR(nvmet_, param_inline_data_size);
+#ifdef CONFIG_BLK_DEV_INTEGRITY
+static ssize_t nvmet_param_pi_enable_show(struct config_item *item,
+ char *page)
+{
+ struct nvmet_port *port = to_nvmet_port(item);
+
+ return snprintf(page, PAGE_SIZE, "%d\n", port->pi_enable);
+}
+
+static ssize_t nvmet_param_pi_enable_store(struct config_item *item,
+ const char *page, size_t count)
+{
+ struct nvmet_port *port = to_nvmet_port(item);
+ bool val;
+
+ if (strtobool(page, &val))
+ return -EINVAL;
+
+ if (port->enabled) {
+ pr_err("Disable port before setting pi_enable value.\n");
+ return -EACCES;
+ }
+
+ port->pi_enable = val;
+ return count;
+}
+
+CONFIGFS_ATTR(nvmet_, param_pi_enable);
+#endif
+
static ssize_t nvmet_addr_trtype_show(struct config_item *item,
char *page)
{
struct nvmet_port *port = to_nvmet_port(item);
int i;
- for (i = 0; i < ARRAY_SIZE(nvmet_transport_names); i++) {
- if (port->disc_addr.trtype != nvmet_transport_names[i].type)
- continue;
- return sprintf(page, "%s\n", nvmet_transport_names[i].name);
+ for (i = 0; i < ARRAY_SIZE(nvmet_transport); i++) {
+ if (port->disc_addr.trtype == nvmet_transport[i].type)
+ return sprintf(page, "%s\n", nvmet_transport[i].name);
}
return sprintf(page, "\n");
@@ -276,22 +305,20 @@
struct nvmet_port *port = to_nvmet_port(item);
int i;
- if (port->enabled) {
- pr_err("Cannot modify address while enabled\n");
- pr_err("Disable the address before modifying\n");
+ if (nvmet_is_port_enabled(port, __func__))
return -EACCES;
- }
- for (i = 0; i < ARRAY_SIZE(nvmet_transport_names); i++) {
- if (sysfs_streq(page, nvmet_transport_names[i].name))
+ for (i = 0; i < ARRAY_SIZE(nvmet_transport); i++) {
+ if (sysfs_streq(page, nvmet_transport[i].name))
goto found;
}
pr_err("Invalid value '%s' for trtype\n", page);
return -EINVAL;
+
found:
memset(&port->disc_addr.tsas, 0, NVMF_TSAS_SIZE);
- port->disc_addr.trtype = nvmet_transport_names[i].type;
+ port->disc_addr.trtype = nvmet_transport[i].type;
if (port->disc_addr.trtype == NVMF_TRTYPE_RDMA)
nvmet_port_init_tsas_rdma(port);
return count;
@@ -327,7 +354,7 @@
kfree(ns->device_path);
ret = -ENOMEM;
- ns->device_path = kstrndup(page, len, GFP_KERNEL);
+ ns->device_path = kmemdup_nul(page, len, GFP_KERNEL);
if (!ns->device_path)
goto out_unlock;
@@ -395,14 +422,12 @@
struct nvmet_subsys *subsys = ns->subsys;
int ret = 0;
-
mutex_lock(&subsys->lock);
if (ns->enabled) {
ret = -EBUSY;
goto out_unlock;
}
-
if (uuid_parse(page, &ns->uuid))
ret = -EINVAL;
@@ -545,6 +570,31 @@
CONFIGFS_ATTR(nvmet_ns_, buffered_io);
+static ssize_t nvmet_ns_revalidate_size_store(struct config_item *item,
+ const char *page, size_t count)
+{
+ struct nvmet_ns *ns = to_nvmet_ns(item);
+ bool val;
+
+ if (strtobool(page, &val))
+ return -EINVAL;
+
+ if (!val)
+ return -EINVAL;
+
+ mutex_lock(&ns->subsys->lock);
+ if (!ns->enabled) {
+ pr_err("enable ns before revalidate.\n");
+ mutex_unlock(&ns->subsys->lock);
+ return -EINVAL;
+ }
+ nvmet_ns_revalidate(ns);
+ mutex_unlock(&ns->subsys->lock);
+ return count;
+}
+
+CONFIGFS_ATTR_WO(nvmet_ns_, revalidate_size);
+
static struct configfs_attribute *nvmet_ns_attrs[] = {
&nvmet_ns_attr_device_path,
&nvmet_ns_attr_device_nguid,
@@ -552,6 +602,7 @@
&nvmet_ns_attr_ana_grpid,
&nvmet_ns_attr_enable,
&nvmet_ns_attr_buffered_io,
+ &nvmet_ns_attr_revalidate_size,
#ifdef CONFIG_PCI_P2PDMA
&nvmet_ns_attr_p2pmem,
#endif
@@ -615,6 +666,103 @@
.ct_owner = THIS_MODULE,
};
+#ifdef CONFIG_NVME_TARGET_PASSTHRU
+
+static ssize_t nvmet_passthru_device_path_show(struct config_item *item,
+ char *page)
+{
+ struct nvmet_subsys *subsys = to_subsys(item->ci_parent);
+
+ return snprintf(page, PAGE_SIZE, "%s\n", subsys->passthru_ctrl_path);
+}
+
+static ssize_t nvmet_passthru_device_path_store(struct config_item *item,
+ const char *page, size_t count)
+{
+ struct nvmet_subsys *subsys = to_subsys(item->ci_parent);
+ size_t len;
+ int ret;
+
+ mutex_lock(&subsys->lock);
+
+ ret = -EBUSY;
+ if (subsys->passthru_ctrl)
+ goto out_unlock;
+
+ ret = -EINVAL;
+ len = strcspn(page, "\n");
+ if (!len)
+ goto out_unlock;
+
+ kfree(subsys->passthru_ctrl_path);
+ ret = -ENOMEM;
+ subsys->passthru_ctrl_path = kstrndup(page, len, GFP_KERNEL);
+ if (!subsys->passthru_ctrl_path)
+ goto out_unlock;
+
+ mutex_unlock(&subsys->lock);
+
+ return count;
+out_unlock:
+ mutex_unlock(&subsys->lock);
+ return ret;
+}
+CONFIGFS_ATTR(nvmet_passthru_, device_path);
+
+static ssize_t nvmet_passthru_enable_show(struct config_item *item,
+ char *page)
+{
+ struct nvmet_subsys *subsys = to_subsys(item->ci_parent);
+
+ return sprintf(page, "%d\n", subsys->passthru_ctrl ? 1 : 0);
+}
+
+static ssize_t nvmet_passthru_enable_store(struct config_item *item,
+ const char *page, size_t count)
+{
+ struct nvmet_subsys *subsys = to_subsys(item->ci_parent);
+ bool enable;
+ int ret = 0;
+
+ if (strtobool(page, &enable))
+ return -EINVAL;
+
+ if (enable)
+ ret = nvmet_passthru_ctrl_enable(subsys);
+ else
+ nvmet_passthru_ctrl_disable(subsys);
+
+ return ret ? ret : count;
+}
+CONFIGFS_ATTR(nvmet_passthru_, enable);
+
+static struct configfs_attribute *nvmet_passthru_attrs[] = {
+ &nvmet_passthru_attr_device_path,
+ &nvmet_passthru_attr_enable,
+ NULL,
+};
+
+static const struct config_item_type nvmet_passthru_type = {
+ .ct_attrs = nvmet_passthru_attrs,
+ .ct_owner = THIS_MODULE,
+};
+
+static void nvmet_add_passthru_group(struct nvmet_subsys *subsys)
+{
+ config_group_init_type_name(&subsys->passthru_group,
+ "passthru", &nvmet_passthru_type);
+ configfs_add_default_group(&subsys->passthru_group,
+ &subsys->group);
+}
+
+#else /* CONFIG_NVME_TARGET_PASSTHRU */
+
+static void nvmet_add_passthru_group(struct nvmet_subsys *subsys)
+{
+}
+
+#endif /* CONFIG_NVME_TARGET_PASSTHRU */
+
static int nvmet_port_subsys_allow_link(struct config_item *parent,
struct config_item *target)
{
@@ -811,14 +959,14 @@
struct nvmet_subsys *subsys = to_subsys(item);
if (NVME_TERTIARY(subsys->ver))
- return snprintf(page, PAGE_SIZE, "%d.%d.%d\n",
- (int)NVME_MAJOR(subsys->ver),
- (int)NVME_MINOR(subsys->ver),
- (int)NVME_TERTIARY(subsys->ver));
- else
- return snprintf(page, PAGE_SIZE, "%d.%d\n",
- (int)NVME_MAJOR(subsys->ver),
- (int)NVME_MINOR(subsys->ver));
+ return snprintf(page, PAGE_SIZE, "%llu.%llu.%llu\n",
+ NVME_MAJOR(subsys->ver),
+ NVME_MINOR(subsys->ver),
+ NVME_TERTIARY(subsys->ver));
+
+ return snprintf(page, PAGE_SIZE, "%llu.%llu\n",
+ NVME_MAJOR(subsys->ver),
+ NVME_MINOR(subsys->ver));
}
static ssize_t nvmet_subsys_attr_version_store(struct config_item *item,
@@ -828,6 +976,9 @@
int major, minor, tertiary = 0;
int ret;
+ /* passthru subsystems use the underlying controller's version */
+ if (nvmet_passthru_ctrl(subsys))
+ return -EINVAL;
ret = sscanf(page, "%d.%d.%d\n", &major, &minor, &tertiary);
if (ret != 2 && ret != 3)
@@ -852,20 +1003,177 @@
static ssize_t nvmet_subsys_attr_serial_store(struct config_item *item,
const char *page, size_t count)
{
- struct nvmet_subsys *subsys = to_subsys(item);
+ u64 serial;
+
+ if (sscanf(page, "%llx\n", &serial) != 1)
+ return -EINVAL;
down_write(&nvmet_config_sem);
- sscanf(page, "%llx\n", &subsys->serial);
+ to_subsys(item)->serial = serial;
up_write(&nvmet_config_sem);
return count;
}
CONFIGFS_ATTR(nvmet_subsys_, attr_serial);
+static ssize_t nvmet_subsys_attr_cntlid_min_show(struct config_item *item,
+ char *page)
+{
+ return snprintf(page, PAGE_SIZE, "%u\n", to_subsys(item)->cntlid_min);
+}
+
+static ssize_t nvmet_subsys_attr_cntlid_min_store(struct config_item *item,
+ const char *page, size_t cnt)
+{
+ u16 cntlid_min;
+
+ if (sscanf(page, "%hu\n", &cntlid_min) != 1)
+ return -EINVAL;
+
+ if (cntlid_min == 0)
+ return -EINVAL;
+
+ down_write(&nvmet_config_sem);
+ if (cntlid_min >= to_subsys(item)->cntlid_max)
+ goto out_unlock;
+ to_subsys(item)->cntlid_min = cntlid_min;
+ up_write(&nvmet_config_sem);
+ return cnt;
+
+out_unlock:
+ up_write(&nvmet_config_sem);
+ return -EINVAL;
+}
+CONFIGFS_ATTR(nvmet_subsys_, attr_cntlid_min);
+
+static ssize_t nvmet_subsys_attr_cntlid_max_show(struct config_item *item,
+ char *page)
+{
+ return snprintf(page, PAGE_SIZE, "%u\n", to_subsys(item)->cntlid_max);
+}
+
+static ssize_t nvmet_subsys_attr_cntlid_max_store(struct config_item *item,
+ const char *page, size_t cnt)
+{
+ u16 cntlid_max;
+
+ if (sscanf(page, "%hu\n", &cntlid_max) != 1)
+ return -EINVAL;
+
+ if (cntlid_max == 0)
+ return -EINVAL;
+
+ down_write(&nvmet_config_sem);
+ if (cntlid_max <= to_subsys(item)->cntlid_min)
+ goto out_unlock;
+ to_subsys(item)->cntlid_max = cntlid_max;
+ up_write(&nvmet_config_sem);
+ return cnt;
+
+out_unlock:
+ up_write(&nvmet_config_sem);
+ return -EINVAL;
+}
+CONFIGFS_ATTR(nvmet_subsys_, attr_cntlid_max);
+
+static ssize_t nvmet_subsys_attr_model_show(struct config_item *item,
+ char *page)
+{
+ struct nvmet_subsys *subsys = to_subsys(item);
+ struct nvmet_subsys_model *subsys_model;
+ char *model = NVMET_DEFAULT_CTRL_MODEL;
+ int ret;
+
+ rcu_read_lock();
+ subsys_model = rcu_dereference(subsys->model);
+ if (subsys_model)
+ model = subsys_model->number;
+ ret = snprintf(page, PAGE_SIZE, "%s\n", model);
+ rcu_read_unlock();
+
+ return ret;
+}
+
+/* See Section 1.5 of NVMe 1.4 */
+static bool nvmet_is_ascii(const char c)
+{
+ return c >= 0x20 && c <= 0x7e;
+}
+
+static ssize_t nvmet_subsys_attr_model_store(struct config_item *item,
+ const char *page, size_t count)
+{
+ struct nvmet_subsys *subsys = to_subsys(item);
+ struct nvmet_subsys_model *new_model;
+ char *new_model_number;
+ int pos = 0, len;
+
+ len = strcspn(page, "\n");
+ if (!len)
+ return -EINVAL;
+
+ for (pos = 0; pos < len; pos++) {
+ if (!nvmet_is_ascii(page[pos]))
+ return -EINVAL;
+ }
+
+ new_model_number = kmemdup_nul(page, len, GFP_KERNEL);
+ if (!new_model_number)
+ return -ENOMEM;
+
+ new_model = kzalloc(sizeof(*new_model) + len + 1, GFP_KERNEL);
+ if (!new_model) {
+ kfree(new_model_number);
+ return -ENOMEM;
+ }
+ memcpy(new_model->number, new_model_number, len);
+
+ down_write(&nvmet_config_sem);
+ mutex_lock(&subsys->lock);
+ new_model = rcu_replace_pointer(subsys->model, new_model,
+ mutex_is_locked(&subsys->lock));
+ mutex_unlock(&subsys->lock);
+ up_write(&nvmet_config_sem);
+
+ kfree_rcu(new_model, rcuhead);
+ kfree(new_model_number);
+
+ return count;
+}
+CONFIGFS_ATTR(nvmet_subsys_, attr_model);
+
+#ifdef CONFIG_BLK_DEV_INTEGRITY
+static ssize_t nvmet_subsys_attr_pi_enable_show(struct config_item *item,
+ char *page)
+{
+ return snprintf(page, PAGE_SIZE, "%d\n", to_subsys(item)->pi_support);
+}
+
+static ssize_t nvmet_subsys_attr_pi_enable_store(struct config_item *item,
+ const char *page, size_t count)
+{
+ struct nvmet_subsys *subsys = to_subsys(item);
+ bool pi_enable;
+
+ if (strtobool(page, &pi_enable))
+ return -EINVAL;
+
+ subsys->pi_support = pi_enable;
+ return count;
+}
+CONFIGFS_ATTR(nvmet_subsys_, attr_pi_enable);
+#endif
+
static struct configfs_attribute *nvmet_subsys_attrs[] = {
&nvmet_subsys_attr_attr_allow_any_host,
&nvmet_subsys_attr_attr_version,
&nvmet_subsys_attr_attr_serial,
+ &nvmet_subsys_attr_attr_cntlid_min,
+ &nvmet_subsys_attr_attr_cntlid_max,
+ &nvmet_subsys_attr_attr_model,
+#ifdef CONFIG_BLK_DEV_INTEGRITY
+ &nvmet_subsys_attr_attr_pi_enable,
+#endif
NULL,
};
@@ -915,6 +1223,8 @@
configfs_add_default_group(&subsys->allowed_hosts_group,
&subsys->group);
+ nvmet_add_passthru_group(subsys);
+
return &subsys->group;
}
@@ -970,12 +1280,19 @@
NULL,
};
-static void nvmet_referral_release(struct config_item *item)
+static void nvmet_referral_notify(struct config_group *group,
+ struct config_item *item)
{
struct nvmet_port *parent = to_nvmet_port(item->ci_parent->ci_parent);
struct nvmet_port *port = to_nvmet_port(item);
nvmet_referral_disable(parent, port);
+}
+
+static void nvmet_referral_release(struct config_item *item)
+{
+ struct nvmet_port *port = to_nvmet_port(item);
+
kfree(port);
}
@@ -1006,6 +1323,7 @@
static struct configfs_group_operations nvmet_referral_group_ops = {
.make_group = nvmet_referral_make,
+ .disconnect_notify = nvmet_referral_notify,
};
static const struct config_item_type nvmet_referrals_type = {
@@ -1013,10 +1331,7 @@
.ct_group_ops = &nvmet_referral_group_ops,
};
-static struct {
- enum nvme_ana_state state;
- const char *name;
-} nvmet_ana_state_names[] = {
+static struct nvmet_type_name_map nvmet_ana_state[] = {
{ NVME_ANA_OPTIMIZED, "optimized" },
{ NVME_ANA_NONOPTIMIZED, "non-optimized" },
{ NVME_ANA_INACCESSIBLE, "inaccessible" },
@@ -1031,10 +1346,9 @@
enum nvme_ana_state state = grp->port->ana_state[grp->grpid];
int i;
- for (i = 0; i < ARRAY_SIZE(nvmet_ana_state_names); i++) {
- if (state != nvmet_ana_state_names[i].state)
- continue;
- return sprintf(page, "%s\n", nvmet_ana_state_names[i].name);
+ for (i = 0; i < ARRAY_SIZE(nvmet_ana_state); i++) {
+ if (state == nvmet_ana_state[i].type)
+ return sprintf(page, "%s\n", nvmet_ana_state[i].name);
}
return sprintf(page, "\n");
@@ -1044,10 +1358,11 @@
const char *page, size_t count)
{
struct nvmet_ana_group *grp = to_ana_group(item);
+ enum nvme_ana_state *ana_state = grp->port->ana_state;
int i;
- for (i = 0; i < ARRAY_SIZE(nvmet_ana_state_names); i++) {
- if (sysfs_streq(page, nvmet_ana_state_names[i].name))
+ for (i = 0; i < ARRAY_SIZE(nvmet_ana_state); i++) {
+ if (sysfs_streq(page, nvmet_ana_state[i].name))
goto found;
}
@@ -1056,10 +1371,9 @@
found:
down_write(&nvmet_ana_sem);
- grp->port->ana_state[grp->grpid] = nvmet_ana_state_names[i].state;
+ ana_state[grp->grpid] = (enum nvme_ana_state) nvmet_ana_state[i].type;
nvmet_ana_chgcnt++;
up_write(&nvmet_ana_sem);
-
nvmet_port_send_ana_event(grp->port);
return count;
}
@@ -1148,6 +1462,8 @@
{
struct nvmet_port *port = to_nvmet_port(item);
+ /* Let inflight controllers teardown complete */
+ flush_scheduled_work();
list_del(&port->global_entry);
kfree(port->ana_state);
@@ -1161,6 +1477,9 @@
&nvmet_attr_addr_trsvcid,
&nvmet_attr_addr_trtype,
&nvmet_attr_param_inline_data_size,
+#ifdef CONFIG_BLK_DEV_INTEGRITY
+ &nvmet_attr_param_pi_enable,
+#endif
NULL,
};
@@ -1210,6 +1529,7 @@
port->inline_data_size = -1; /* < 0 == let the transport choose */
port->disc_addr.portid = cpu_to_le16(portid);
+ port->disc_addr.adrfam = NVMF_ADDR_FAMILY_MAX;
port->disc_addr.treq = NVMF_TREQ_DISABLE_SQFLOW;
config_group_init_type_name(&port->group, name, &nvmet_port_type);
diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c
index ee81d94..9a8fa2e 100644
--- a/drivers/nvme/target/core.c
+++ b/drivers/nvme/target/core.c
@@ -73,7 +73,7 @@
status = NVME_SC_ACCESS_DENIED;
break;
case -EIO:
- /* FALLTHRU */
+ fallthrough;
default:
req->error_loc = offsetof(struct nvme_common_command, opcode);
status = NVME_SC_INTERNAL | NVME_SC_DNR;
@@ -115,13 +115,14 @@
static unsigned int nvmet_max_nsid(struct nvmet_subsys *subsys)
{
- struct nvmet_ns *ns;
+ unsigned long nsid = 0;
+ struct nvmet_ns *cur;
+ unsigned long idx;
- if (list_empty(&subsys->namespaces))
- return 0;
+ xa_for_each(&subsys->namespaces, idx, cur)
+ nsid = cur->nsid;
- ns = list_last_entry(&subsys->namespaces, struct nvmet_ns, dev_link);
- return ns->nsid;
+ return nsid;
}
static u32 nvmet_async_event_result(struct nvmet_async_event *aen)
@@ -129,39 +130,30 @@
return aen->event_type | (aen->event_info << 8) | (aen->log_page << 16);
}
-static void nvmet_async_events_free(struct nvmet_ctrl *ctrl)
+static void nvmet_async_events_failall(struct nvmet_ctrl *ctrl)
{
+ u16 status = NVME_SC_INTERNAL | NVME_SC_DNR;
struct nvmet_req *req;
- while (1) {
- mutex_lock(&ctrl->lock);
- if (!ctrl->nr_async_event_cmds) {
- mutex_unlock(&ctrl->lock);
- return;
- }
-
+ mutex_lock(&ctrl->lock);
+ while (ctrl->nr_async_event_cmds) {
req = ctrl->async_event_cmds[--ctrl->nr_async_event_cmds];
mutex_unlock(&ctrl->lock);
- nvmet_req_complete(req, NVME_SC_INTERNAL | NVME_SC_DNR);
+ nvmet_req_complete(req, status);
+ mutex_lock(&ctrl->lock);
}
+ mutex_unlock(&ctrl->lock);
}
-static void nvmet_async_event_work(struct work_struct *work)
+static void nvmet_async_events_process(struct nvmet_ctrl *ctrl)
{
- struct nvmet_ctrl *ctrl =
- container_of(work, struct nvmet_ctrl, async_event_work);
struct nvmet_async_event *aen;
struct nvmet_req *req;
- while (1) {
- mutex_lock(&ctrl->lock);
- aen = list_first_entry_or_null(&ctrl->async_events,
- struct nvmet_async_event, entry);
- if (!aen || !ctrl->nr_async_event_cmds) {
- mutex_unlock(&ctrl->lock);
- return;
- }
-
+ mutex_lock(&ctrl->lock);
+ while (ctrl->nr_async_event_cmds && !list_empty(&ctrl->async_events)) {
+ aen = list_first_entry(&ctrl->async_events,
+ struct nvmet_async_event, entry);
req = ctrl->async_event_cmds[--ctrl->nr_async_event_cmds];
nvmet_set_result(req, nvmet_async_event_result(aen));
@@ -169,8 +161,31 @@
kfree(aen);
mutex_unlock(&ctrl->lock);
+ trace_nvmet_async_event(ctrl, req->cqe->result.u32);
nvmet_req_complete(req, 0);
+ mutex_lock(&ctrl->lock);
}
+ mutex_unlock(&ctrl->lock);
+}
+
+static void nvmet_async_events_free(struct nvmet_ctrl *ctrl)
+{
+ struct nvmet_async_event *aen, *tmp;
+
+ mutex_lock(&ctrl->lock);
+ list_for_each_entry_safe(aen, tmp, &ctrl->async_events, entry) {
+ list_del(&aen->entry);
+ kfree(aen);
+ }
+ mutex_unlock(&ctrl->lock);
+}
+
+static void nvmet_async_event_work(struct work_struct *work)
+{
+ struct nvmet_ctrl *ctrl =
+ container_of(work, struct nvmet_ctrl, async_event_work);
+
+ nvmet_async_events_process(ctrl);
}
void nvmet_add_async_event(struct nvmet_ctrl *ctrl, u8 event_type,
@@ -318,12 +333,21 @@
if (!try_module_get(ops->owner))
return -EINVAL;
- ret = ops->add_port(port);
- if (ret) {
- module_put(ops->owner);
- return ret;
+ /*
+ * If the user requested PI support and the transport isn't pi capable,
+ * don't enable the port.
+ */
+ if (port->pi_enable && !(ops->flags & NVMF_METADATA_SUPPORTED)) {
+ pr_err("T10-PI is not supported by transport type %d\n",
+ port->disc_addr.trtype);
+ ret = -EINVAL;
+ goto out_put;
}
+ ret = ops->add_port(port);
+ if (ret)
+ goto out_put;
+
/* If the transport didn't set inline_data_size, then disable it. */
if (port->inline_data_size < 0)
port->inline_data_size = 0;
@@ -331,6 +355,10 @@
port->enabled = true;
port->tr_ops = ops;
return 0;
+
+out_put:
+ module_put(ops->owner);
+ return ret;
}
void nvmet_disable_port(struct nvmet_port *port)
@@ -351,10 +379,10 @@
{
struct nvmet_ctrl *ctrl = container_of(to_delayed_work(work),
struct nvmet_ctrl, ka_work);
- bool cmd_seen = ctrl->cmd_seen;
+ bool reset_tbkas = ctrl->reset_tbkas;
- ctrl->cmd_seen = false;
- if (cmd_seen) {
+ ctrl->reset_tbkas = false;
+ if (reset_tbkas) {
pr_debug("ctrl %d reschedule traffic based keep-alive timer\n",
ctrl->cntlid);
schedule_delayed_work(&ctrl->ka_work, ctrl->kato * HZ);
@@ -367,7 +395,7 @@
nvmet_ctrl_fatal_error(ctrl);
}
-static void nvmet_start_keep_alive_timer(struct nvmet_ctrl *ctrl)
+void nvmet_start_keep_alive_timer(struct nvmet_ctrl *ctrl)
{
if (unlikely(ctrl->kato == 0))
return;
@@ -379,7 +407,7 @@
schedule_delayed_work(&ctrl->ka_work, ctrl->kato * HZ);
}
-static void nvmet_stop_keep_alive_timer(struct nvmet_ctrl *ctrl)
+void nvmet_stop_keep_alive_timer(struct nvmet_ctrl *ctrl)
{
if (unlikely(ctrl->kato == 0))
return;
@@ -389,28 +417,13 @@
cancel_delayed_work_sync(&ctrl->ka_work);
}
-static struct nvmet_ns *__nvmet_find_namespace(struct nvmet_ctrl *ctrl,
- __le32 nsid)
-{
- struct nvmet_ns *ns;
-
- list_for_each_entry_rcu(ns, &ctrl->subsys->namespaces, dev_link) {
- if (ns->nsid == le32_to_cpu(nsid))
- return ns;
- }
-
- return NULL;
-}
-
struct nvmet_ns *nvmet_find_namespace(struct nvmet_ctrl *ctrl, __le32 nsid)
{
struct nvmet_ns *ns;
- rcu_read_lock();
- ns = __nvmet_find_namespace(ctrl, nsid);
+ ns = xa_load(&ctrl->subsys->namespaces, le32_to_cpu(nsid));
if (ns)
percpu_ref_get(&ns->ref);
- rcu_read_unlock();
return ns;
}
@@ -446,7 +459,7 @@
return -EINVAL;
}
- if (!blk_queue_pci_p2pdma(ns->bdev->bd_queue)) {
+ if (!blk_queue_pci_p2pdma(ns->bdev->bd_disk->queue)) {
pr_err("peer-to-peer DMA is not supported by the driver of %s\n",
ns->device_path);
return -EINVAL;
@@ -516,6 +529,19 @@
ns->nsid);
}
+void nvmet_ns_revalidate(struct nvmet_ns *ns)
+{
+ loff_t oldsize = ns->size;
+
+ if (ns->bdev)
+ nvmet_bdev_ns_revalidate(ns);
+ else
+ nvmet_file_ns_revalidate(ns);
+
+ if (oldsize != ns->size)
+ nvmet_ns_changed(ns->subsys, ns->nsid);
+}
+
int nvmet_ns_enable(struct nvmet_ns *ns)
{
struct nvmet_subsys *subsys = ns->subsys;
@@ -524,6 +550,12 @@
mutex_lock(&subsys->lock);
ret = 0;
+
+ if (nvmet_passthru_ctrl(subsys)) {
+ pr_info("cannot enable both passthru and regular namespaces for a single subsystem");
+ goto out_unlock;
+ }
+
if (ns->enabled)
goto out_unlock;
@@ -552,24 +584,10 @@
if (ns->nsid > subsys->max_nsid)
subsys->max_nsid = ns->nsid;
- /*
- * The namespaces list needs to be sorted to simplify the implementation
- * of the Identify Namepace List subcommand.
- */
- if (list_empty(&subsys->namespaces)) {
- list_add_tail_rcu(&ns->dev_link, &subsys->namespaces);
- } else {
- struct nvmet_ns *old;
+ ret = xa_insert(&subsys->namespaces, ns->nsid, ns, GFP_KERNEL);
+ if (ret)
+ goto out_restore_subsys_maxnsid;
- list_for_each_entry_rcu(old, &subsys->namespaces, dev_link,
- lockdep_is_held(&subsys->lock)) {
- BUG_ON(ns->nsid == old->nsid);
- if (ns->nsid < old->nsid)
- break;
- }
-
- list_add_tail_rcu(&ns->dev_link, &old->dev_link);
- }
subsys->nr_namespaces++;
nvmet_ns_changed(subsys, ns->nsid);
@@ -578,6 +596,10 @@
out_unlock:
mutex_unlock(&subsys->lock);
return ret;
+
+out_restore_subsys_maxnsid:
+ subsys->max_nsid = nvmet_max_nsid(subsys);
+ percpu_ref_exit(&ns->ref);
out_dev_put:
list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry)
pci_dev_put(radix_tree_delete(&ctrl->p2p_ns_map, ns->nsid));
@@ -596,7 +618,7 @@
goto out_unlock;
ns->enabled = false;
- list_del_rcu(&ns->dev_link);
+ xa_erase(&ns->subsys->namespaces, ns->nsid);
if (ns->nsid == subsys->max_nsid)
subsys->max_nsid = nvmet_max_nsid(subsys);
@@ -647,7 +669,6 @@
if (!ns)
return NULL;
- INIT_LIST_HEAD(&ns->dev_link);
init_completion(&ns->disable_done);
ns->nsid = nsid;
@@ -736,8 +757,6 @@
{
cq->qid = qid;
cq->size = size;
-
- ctrl->cqs[qid] = cq;
}
void nvmet_sq_setup(struct nvmet_ctrl *ctrl, struct nvmet_sq *sq,
@@ -759,19 +778,28 @@
void nvmet_sq_destroy(struct nvmet_sq *sq)
{
+ struct nvmet_ctrl *ctrl = sq->ctrl;
+
/*
* If this is the admin queue, complete all AERs so that our
* queue doesn't have outstanding requests on it.
*/
- if (sq->ctrl && sq->ctrl->sqs && sq->ctrl->sqs[0] == sq)
- nvmet_async_events_free(sq->ctrl);
+ if (ctrl && ctrl->sqs && ctrl->sqs[0] == sq)
+ nvmet_async_events_failall(ctrl);
percpu_ref_kill_and_confirm(&sq->ref, nvmet_confirm_sq);
wait_for_completion(&sq->confirm_done);
wait_for_completion(&sq->free_done);
percpu_ref_exit(&sq->ref);
- if (sq->ctrl) {
- nvmet_ctrl_put(sq->ctrl);
+ if (ctrl) {
+ /*
+ * The teardown flow may take some time, and the host may not
+ * send us keep-alive during this period, hence reset the
+ * traffic based keep-alive timer so we don't trigger a
+ * controller teardown as a result of a keep-alive expiration.
+ */
+ ctrl->reset_tbkas = true;
+ nvmet_ctrl_put(ctrl);
sq->ctrl = NULL; /* allows reusing the queue later */
}
}
@@ -838,6 +866,9 @@
if (unlikely(ret))
return ret;
+ if (nvmet_req_passthru_ctrl(req))
+ return nvmet_parse_passthru_io_cmd(req);
+
req->ns = nvmet_find_namespace(req->sq->ctrl, cmd->rw.nsid);
if (unlikely(!req->ns)) {
req->error_loc = offsetof(struct nvme_common_command, nsid);
@@ -870,8 +901,11 @@
req->sq = sq;
req->ops = ops;
req->sg = NULL;
+ req->metadata_sg = NULL;
req->sg_cnt = 0;
+ req->metadata_sg_cnt = 0;
req->transfer_len = 0;
+ req->metadata_len = 0;
req->cqe->status = 0;
req->cqe->sq_head = 0;
req->ns = NULL;
@@ -897,14 +931,10 @@
}
if (unlikely(!req->sq->ctrl))
- /* will return an error for any Non-connect command: */
+ /* will return an error for any non-connect command: */
status = nvmet_parse_connect_cmd(req);
else if (likely(req->sq->qid != 0))
status = nvmet_parse_io_cmd(req);
- else if (nvme_is_fabrics(req->cmd))
- status = nvmet_parse_fabrics_cmd(req);
- else if (req->sq->ctrl->subsys->type == NVME_NQN_DISC)
- status = nvmet_parse_discovery_cmd(req);
else
status = nvmet_parse_admin_cmd(req);
@@ -919,7 +949,7 @@
}
if (sq->ctrl)
- sq->ctrl->cmd_seen = true;
+ sq->ctrl->reset_tbkas = true;
return true;
@@ -937,60 +967,112 @@
}
EXPORT_SYMBOL_GPL(nvmet_req_uninit);
-void nvmet_req_execute(struct nvmet_req *req)
+bool nvmet_check_transfer_len(struct nvmet_req *req, size_t len)
{
- if (unlikely(req->data_len != req->transfer_len)) {
+ if (unlikely(len != req->transfer_len)) {
req->error_loc = offsetof(struct nvme_common_command, dptr);
nvmet_req_complete(req, NVME_SC_SGL_INVALID_DATA | NVME_SC_DNR);
- } else
- req->execute(req);
-}
-EXPORT_SYMBOL_GPL(nvmet_req_execute);
-
-int nvmet_req_alloc_sgl(struct nvmet_req *req)
-{
- struct pci_dev *p2p_dev = NULL;
-
- if (IS_ENABLED(CONFIG_PCI_P2PDMA)) {
- if (req->sq->ctrl && req->ns)
- p2p_dev = radix_tree_lookup(&req->sq->ctrl->p2p_ns_map,
- req->ns->nsid);
-
- req->p2p_dev = NULL;
- if (req->sq->qid && p2p_dev) {
- req->sg = pci_p2pmem_alloc_sgl(p2p_dev, &req->sg_cnt,
- req->transfer_len);
- if (req->sg) {
- req->p2p_dev = p2p_dev;
- return 0;
- }
- }
-
- /*
- * If no P2P memory was available we fallback to using
- * regular memory
- */
+ return false;
}
- req->sg = sgl_alloc(req->transfer_len, GFP_KERNEL, &req->sg_cnt);
+ return true;
+}
+EXPORT_SYMBOL_GPL(nvmet_check_transfer_len);
+
+bool nvmet_check_data_len_lte(struct nvmet_req *req, size_t data_len)
+{
+ if (unlikely(data_len > req->transfer_len)) {
+ req->error_loc = offsetof(struct nvme_common_command, dptr);
+ nvmet_req_complete(req, NVME_SC_SGL_INVALID_DATA | NVME_SC_DNR);
+ return false;
+ }
+
+ return true;
+}
+
+static unsigned int nvmet_data_transfer_len(struct nvmet_req *req)
+{
+ return req->transfer_len - req->metadata_len;
+}
+
+static int nvmet_req_alloc_p2pmem_sgls(struct pci_dev *p2p_dev,
+ struct nvmet_req *req)
+{
+ req->sg = pci_p2pmem_alloc_sgl(p2p_dev, &req->sg_cnt,
+ nvmet_data_transfer_len(req));
if (!req->sg)
- return -ENOMEM;
+ goto out_err;
+
+ if (req->metadata_len) {
+ req->metadata_sg = pci_p2pmem_alloc_sgl(p2p_dev,
+ &req->metadata_sg_cnt, req->metadata_len);
+ if (!req->metadata_sg)
+ goto out_free_sg;
+ }
+
+ req->p2p_dev = p2p_dev;
return 0;
+out_free_sg:
+ pci_p2pmem_free_sgl(req->p2p_dev, req->sg);
+out_err:
+ return -ENOMEM;
}
-EXPORT_SYMBOL_GPL(nvmet_req_alloc_sgl);
-void nvmet_req_free_sgl(struct nvmet_req *req)
+static struct pci_dev *nvmet_req_find_p2p_dev(struct nvmet_req *req)
{
- if (req->p2p_dev)
+ if (!IS_ENABLED(CONFIG_PCI_P2PDMA) ||
+ !req->sq->ctrl || !req->sq->qid || !req->ns)
+ return NULL;
+ return radix_tree_lookup(&req->sq->ctrl->p2p_ns_map, req->ns->nsid);
+}
+
+int nvmet_req_alloc_sgls(struct nvmet_req *req)
+{
+ struct pci_dev *p2p_dev = nvmet_req_find_p2p_dev(req);
+
+ if (p2p_dev && !nvmet_req_alloc_p2pmem_sgls(p2p_dev, req))
+ return 0;
+
+ req->sg = sgl_alloc(nvmet_data_transfer_len(req), GFP_KERNEL,
+ &req->sg_cnt);
+ if (unlikely(!req->sg))
+ goto out;
+
+ if (req->metadata_len) {
+ req->metadata_sg = sgl_alloc(req->metadata_len, GFP_KERNEL,
+ &req->metadata_sg_cnt);
+ if (unlikely(!req->metadata_sg))
+ goto out_free;
+ }
+
+ return 0;
+out_free:
+ sgl_free(req->sg);
+out:
+ return -ENOMEM;
+}
+EXPORT_SYMBOL_GPL(nvmet_req_alloc_sgls);
+
+void nvmet_req_free_sgls(struct nvmet_req *req)
+{
+ if (req->p2p_dev) {
pci_p2pmem_free_sgl(req->p2p_dev, req->sg);
- else
+ if (req->metadata_sg)
+ pci_p2pmem_free_sgl(req->p2p_dev, req->metadata_sg);
+ req->p2p_dev = NULL;
+ } else {
sgl_free(req->sg);
+ if (req->metadata_sg)
+ sgl_free(req->metadata_sg);
+ }
req->sg = NULL;
+ req->metadata_sg = NULL;
req->sg_cnt = 0;
+ req->metadata_sg_cnt = 0;
}
-EXPORT_SYMBOL_GPL(nvmet_req_free_sgl);
+EXPORT_SYMBOL_GPL(nvmet_req_free_sgls);
static inline bool nvmet_cc_en(u32 cc)
{
@@ -1187,14 +1269,14 @@
struct nvmet_req *req)
{
struct nvmet_ns *ns;
+ unsigned long idx;
if (!req->p2p_client)
return;
ctrl->p2p_client = get_device(req->p2p_client);
- list_for_each_entry_rcu(ns, &ctrl->subsys->namespaces, dev_link,
- lockdep_is_held(&ctrl->subsys->lock))
+ xa_for_each(&ctrl->subsys->namespaces, idx, ns)
nvmet_p2pmem_ns_add_p2p(ctrl, ns);
}
@@ -1277,20 +1359,17 @@
if (!ctrl->changed_ns_list)
goto out_free_ctrl;
- ctrl->cqs = kcalloc(subsys->max_qid + 1,
- sizeof(struct nvmet_cq *),
- GFP_KERNEL);
- if (!ctrl->cqs)
- goto out_free_changed_ns_list;
-
ctrl->sqs = kcalloc(subsys->max_qid + 1,
sizeof(struct nvmet_sq *),
GFP_KERNEL);
if (!ctrl->sqs)
- goto out_free_cqs;
+ goto out_free_changed_ns_list;
+
+ if (subsys->cntlid_min > subsys->cntlid_max)
+ goto out_free_sqs;
ret = ida_simple_get(&cntlid_ida,
- NVME_CNTLID_MIN, NVME_CNTLID_MAX,
+ subsys->cntlid_min, subsys->cntlid_max,
GFP_KERNEL);
if (ret < 0) {
status = NVME_SC_CONNECT_CTRL_BUSY | NVME_SC_DNR;
@@ -1325,8 +1404,6 @@
out_free_sqs:
kfree(ctrl->sqs);
-out_free_cqs:
- kfree(ctrl->cqs);
out_free_changed_ns_list:
kfree(ctrl->changed_ns_list);
out_free_ctrl:
@@ -1354,8 +1431,8 @@
ida_simple_remove(&cntlid_ida, ctrl->cntlid);
+ nvmet_async_events_free(ctrl);
kfree(ctrl->sqs);
- kfree(ctrl->cqs);
kfree(ctrl->changed_ns_list);
kfree(ctrl);
@@ -1415,7 +1492,7 @@
if (!subsys)
return ERR_PTR(-ENOMEM);
- subsys->ver = NVME_VS(1, 3, 0); /* NVMe 1.3.0 */
+ subsys->ver = NVMET_DEFAULT_VS;
/* generate a random serial number as our controllers are ephemeral: */
get_random_bytes(&subsys->serial, sizeof(subsys->serial));
@@ -1438,11 +1515,12 @@
kfree(subsys);
return ERR_PTR(-ENOMEM);
}
-
+ subsys->cntlid_min = NVME_CNTLID_MIN;
+ subsys->cntlid_max = NVME_CNTLID_MAX;
kref_init(&subsys->ref);
mutex_init(&subsys->lock);
- INIT_LIST_HEAD(&subsys->namespaces);
+ xa_init(&subsys->namespaces);
INIT_LIST_HEAD(&subsys->ctrls);
INIT_LIST_HEAD(&subsys->hosts);
@@ -1454,9 +1532,13 @@
struct nvmet_subsys *subsys =
container_of(ref, struct nvmet_subsys, ref);
- WARN_ON_ONCE(!list_empty(&subsys->namespaces));
+ WARN_ON_ONCE(!xa_empty(&subsys->namespaces));
+
+ xa_destroy(&subsys->namespaces);
+ nvmet_passthru_subsys_free(subsys);
kfree(subsys->subsysnqn);
+ kfree_rcu(subsys->model, rcuhead);
kfree(subsys);
}
diff --git a/drivers/nvme/target/discovery.c b/drivers/nvme/target/discovery.c
index 3764a89..5b8ee82 100644
--- a/drivers/nvme/target/discovery.c
+++ b/drivers/nvme/target/discovery.c
@@ -157,7 +157,7 @@
return entries;
}
-static void nvmet_execute_get_disc_log_page(struct nvmet_req *req)
+static void nvmet_execute_disc_get_log_page(struct nvmet_req *req)
{
const int entry_size = sizeof(struct nvmf_disc_rsp_page_entry);
struct nvmet_ctrl *ctrl = req->sq->ctrl;
@@ -171,8 +171,20 @@
u16 status = 0;
void *buffer;
+ if (!nvmet_check_transfer_len(req, data_len))
+ return;
+
+ if (req->cmd->get_log_page.lid != NVME_LOG_DISC) {
+ req->error_loc =
+ offsetof(struct nvme_get_log_page_command, lid);
+ status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
+ goto out;
+ }
+
/* Spec requires dword aligned offsets */
if (offset & 0x3) {
+ req->error_loc =
+ offsetof(struct nvme_get_log_page_command, lpo);
status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
goto out;
}
@@ -227,20 +239,35 @@
nvmet_req_complete(req, status);
}
-static void nvmet_execute_identify_disc_ctrl(struct nvmet_req *req)
+static void nvmet_execute_disc_identify(struct nvmet_req *req)
{
struct nvmet_ctrl *ctrl = req->sq->ctrl;
struct nvme_id_ctrl *id;
+ const char model[] = "Linux";
u16 status = 0;
+ if (!nvmet_check_transfer_len(req, NVME_IDENTIFY_DATA_SIZE))
+ return;
+
+ if (req->cmd->identify.cns != NVME_ID_CNS_CTRL) {
+ req->error_loc = offsetof(struct nvme_identify, cns);
+ status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
+ goto out;
+ }
+
id = kzalloc(sizeof(*id), GFP_KERNEL);
if (!id) {
status = NVME_SC_INTERNAL;
goto out;
}
+ memset(id->sn, ' ', sizeof(id->sn));
+ bin2hex(id->sn, &ctrl->subsys->serial,
+ min(sizeof(ctrl->subsys->serial), sizeof(id->sn) / 2));
memset(id->fr, ' ', sizeof(id->fr));
- strncpy((char *)id->fr, UTS_RELEASE, sizeof(id->fr));
+ memcpy_and_pad(id->mn, sizeof(id->mn), model, sizeof(model) - 1, ' ');
+ memcpy_and_pad(id->fr, sizeof(id->fr),
+ UTS_RELEASE, strlen(UTS_RELEASE), ' ');
/* no limit on data transfer sizes for now */
id->mdts = 0;
@@ -252,7 +279,7 @@
id->maxcmd = cpu_to_le16(NVMET_MAX_CMD);
id->sgls = cpu_to_le32(1 << 0); /* we always support SGLs */
- if (ctrl->ops->has_keyed_sgls)
+ if (ctrl->ops->flags & NVMF_KEYED_SGLS)
id->sgls |= cpu_to_le32(1 << 2);
if (req->port->inline_data_size)
id->sgls |= cpu_to_le32(1 << 20);
@@ -273,6 +300,9 @@
u32 cdw10 = le32_to_cpu(req->cmd->common.cdw10);
u16 stat;
+ if (!nvmet_check_transfer_len(req, 0))
+ return;
+
switch (cdw10 & 0xff) {
case NVME_FEAT_KATO:
stat = nvmet_set_feat_kato(req);
@@ -296,6 +326,9 @@
u32 cdw10 = le32_to_cpu(req->cmd->common.cdw10);
u16 stat = 0;
+ if (!nvmet_check_transfer_len(req, 0))
+ return;
+
switch (cdw10 & 0xff) {
case NVME_FEAT_KATO:
nvmet_get_feat_kato(req);
@@ -328,47 +361,22 @@
switch (cmd->common.opcode) {
case nvme_admin_set_features:
req->execute = nvmet_execute_disc_set_features;
- req->data_len = 0;
return 0;
case nvme_admin_get_features:
req->execute = nvmet_execute_disc_get_features;
- req->data_len = 0;
return 0;
case nvme_admin_async_event:
req->execute = nvmet_execute_async_event;
- req->data_len = 0;
return 0;
case nvme_admin_keep_alive:
req->execute = nvmet_execute_keep_alive;
- req->data_len = 0;
return 0;
case nvme_admin_get_log_page:
- req->data_len = nvmet_get_log_page_len(cmd);
-
- switch (cmd->get_log_page.lid) {
- case NVME_LOG_DISC:
- req->execute = nvmet_execute_get_disc_log_page;
- return 0;
- default:
- pr_err("unsupported get_log_page lid %d\n",
- cmd->get_log_page.lid);
- req->error_loc =
- offsetof(struct nvme_get_log_page_command, lid);
- return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
- }
+ req->execute = nvmet_execute_disc_get_log_page;
+ return 0;
case nvme_admin_identify:
- req->data_len = NVME_IDENTIFY_DATA_SIZE;
- switch (cmd->identify.cns) {
- case NVME_ID_CNS_CTRL:
- req->execute =
- nvmet_execute_identify_disc_ctrl;
- return 0;
- default:
- pr_err("unsupported identify cns %d\n",
- cmd->identify.cns);
- req->error_loc = offsetof(struct nvme_identify, cns);
- return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
- }
+ req->execute = nvmet_execute_disc_identify;
+ return 0;
default:
pr_err("unhandled cmd %d\n", cmd->common.opcode);
req->error_loc = offsetof(struct nvme_common_command, opcode);
diff --git a/drivers/nvme/target/fabrics-cmd.c b/drivers/nvme/target/fabrics-cmd.c
index 5e47395..e62d3d0 100644
--- a/drivers/nvme/target/fabrics-cmd.c
+++ b/drivers/nvme/target/fabrics-cmd.c
@@ -12,6 +12,9 @@
u64 val = le64_to_cpu(req->cmd->prop_set.value);
u16 status = 0;
+ if (!nvmet_check_transfer_len(req, 0))
+ return;
+
if (req->cmd->prop_set.attrib & 1) {
req->error_loc =
offsetof(struct nvmf_property_set_command, attrib);
@@ -38,6 +41,9 @@
u16 status = 0;
u64 val = 0;
+ if (!nvmet_check_transfer_len(req, 0))
+ return;
+
if (req->cmd->prop_get.attrib & 1) {
switch (le32_to_cpu(req->cmd->prop_get.offset)) {
case NVME_REG_CAP:
@@ -82,11 +88,9 @@
switch (cmd->fabrics.fctype) {
case nvme_fabrics_type_property_set:
- req->data_len = 0;
req->execute = nvmet_execute_prop_set;
break;
case nvme_fabrics_type_property_get:
- req->data_len = 0;
req->execute = nvmet_execute_prop_get;
break;
default:
@@ -153,6 +157,9 @@
struct nvmet_ctrl *ctrl = NULL;
u16 status = 0;
+ if (!nvmet_check_transfer_len(req, sizeof(struct nvmf_connect_data)))
+ return;
+
d = kmalloc(sizeof(*d), GFP_KERNEL);
if (!d) {
status = NVME_SC_INTERNAL;
@@ -191,6 +198,8 @@
goto out;
}
+ ctrl->pi_support = ctrl->port->pi_enable && ctrl->subsys->pi_support;
+
uuid_copy(&ctrl->hostid, &d->hostid);
status = nvmet_install_queue(ctrl, req);
@@ -199,8 +208,9 @@
goto out;
}
- pr_info("creating controller %d for subsystem %s for NQN %s.\n",
- ctrl->cntlid, ctrl->subsys->subsysnqn, ctrl->hostnqn);
+ pr_info("creating controller %d for subsystem %s for NQN %s%s.\n",
+ ctrl->cntlid, ctrl->subsys->subsysnqn, ctrl->hostnqn,
+ ctrl->pi_support ? " T10-PI is enabled" : "");
req->cqe->result.u16 = cpu_to_le16(ctrl->cntlid);
out:
@@ -217,6 +227,9 @@
u16 qid = le16_to_cpu(c->qid);
u16 status = 0;
+ if (!nvmet_check_transfer_len(req, sizeof(struct nvmf_connect_data)))
+ return;
+
d = kmalloc(sizeof(*d), GFP_KERNEL);
if (!d) {
status = NVME_SC_INTERNAL;
@@ -287,7 +300,6 @@
return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
}
- req->data_len = sizeof(struct nvmf_connect_data);
if (cmd->connect.qid == 0)
req->execute = nvmet_execute_admin_connect;
else
diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c
index 9b07e8c..640031c 100644
--- a/drivers/nvme/target/fc.c
+++ b/drivers/nvme/target/fc.c
@@ -14,6 +14,7 @@
#include "nvmet.h"
#include <linux/nvme-fc-driver.h>
#include <linux/nvme-fc.h>
+#include "../host/fc.h"
/* *************************** Data Structures/Defines ****************** */
@@ -21,23 +22,21 @@
#define NVMET_LS_CTX_COUNT 256
-/* for this implementation, assume small single frame rqst/rsp */
-#define NVME_FC_MAX_LS_BUFFER_SIZE 2048
-
struct nvmet_fc_tgtport;
struct nvmet_fc_tgt_assoc;
-struct nvmet_fc_ls_iod {
- struct nvmefc_tgt_ls_req *lsreq;
+struct nvmet_fc_ls_iod { /* for an LS RQST RCV */
+ struct nvmefc_ls_rsp *lsrsp;
struct nvmefc_tgt_fcp_req *fcpreq; /* only if RS */
- struct list_head ls_list; /* tgtport->ls_list */
+ struct list_head ls_rcv_list; /* tgtport->ls_rcv_list */
struct nvmet_fc_tgtport *tgtport;
struct nvmet_fc_tgt_assoc *assoc;
+ void *hosthandle;
- u8 *rqstbuf;
- u8 *rspbuf;
+ union nvmefc_ls_requests *rqstbuf;
+ union nvmefc_ls_responses *rspbuf;
u16 rqstdatalen;
dma_addr_t rspdma;
@@ -46,6 +45,18 @@
struct work_struct work;
} __aligned(sizeof(unsigned long long));
+struct nvmet_fc_ls_req_op { /* for an LS RQST XMT */
+ struct nvmefc_ls_req ls_req;
+
+ struct nvmet_fc_tgtport *tgtport;
+ void *hosthandle;
+
+ int ls_error;
+ struct list_head lsreq_list; /* tgtport->ls_req_list */
+ bool req_queued;
+};
+
+
/* desired maximum for a single sequence - if sg list allows it */
#define NVMET_FC_MAX_SEQ_LENGTH (256 * 1024)
@@ -83,7 +94,6 @@
};
struct nvmet_fc_tgtport {
-
struct nvmet_fc_target_port fc_target_port;
struct list_head tgt_list; /* nvmet_fc_target_list */
@@ -92,9 +102,11 @@
struct nvmet_fc_ls_iod *iod;
spinlock_t lock;
- struct list_head ls_list;
+ struct list_head ls_rcv_list;
+ struct list_head ls_req_list;
struct list_head ls_busylist;
struct list_head assoc_list;
+ struct list_head host_list;
struct ida assoc_cnt;
struct nvmet_fc_port_entry *pe;
struct kref ref;
@@ -136,10 +148,21 @@
struct nvmet_fc_fcp_iod fod[]; /* array of fcp_iods */
} __aligned(sizeof(unsigned long long));
+struct nvmet_fc_hostport {
+ struct nvmet_fc_tgtport *tgtport;
+ void *hosthandle;
+ struct list_head host_list;
+ struct kref ref;
+ u8 invalid;
+};
+
struct nvmet_fc_tgt_assoc {
u64 association_id;
u32 a_id;
+ atomic_t terminating;
struct nvmet_fc_tgtport *tgtport;
+ struct nvmet_fc_hostport *hostport;
+ struct nvmet_fc_ls_iod *rcv_disconn;
struct list_head a_list;
struct nvmet_fc_tgt_queue *queues[NVMET_NR_QUEUES + 1];
struct kref ref;
@@ -227,6 +250,8 @@
static void nvmet_fc_handle_fcp_rqst(struct nvmet_fc_tgtport *tgtport,
struct nvmet_fc_fcp_iod *fod);
static void nvmet_fc_delete_target_assoc(struct nvmet_fc_tgt_assoc *assoc);
+static void nvmet_fc_xmt_ls_rsp(struct nvmet_fc_tgtport *tgtport,
+ struct nvmet_fc_ls_iod *iod);
/* *********************** FC-NVME DMA Handling **************************** */
@@ -318,6 +343,188 @@
}
+/* ********************** FC-NVME LS XMT Handling ************************* */
+
+
+static void
+__nvmet_fc_finish_ls_req(struct nvmet_fc_ls_req_op *lsop)
+{
+ struct nvmet_fc_tgtport *tgtport = lsop->tgtport;
+ struct nvmefc_ls_req *lsreq = &lsop->ls_req;
+ unsigned long flags;
+
+ spin_lock_irqsave(&tgtport->lock, flags);
+
+ if (!lsop->req_queued) {
+ spin_unlock_irqrestore(&tgtport->lock, flags);
+ return;
+ }
+
+ list_del(&lsop->lsreq_list);
+
+ lsop->req_queued = false;
+
+ spin_unlock_irqrestore(&tgtport->lock, flags);
+
+ fc_dma_unmap_single(tgtport->dev, lsreq->rqstdma,
+ (lsreq->rqstlen + lsreq->rsplen),
+ DMA_BIDIRECTIONAL);
+
+ nvmet_fc_tgtport_put(tgtport);
+}
+
+static int
+__nvmet_fc_send_ls_req(struct nvmet_fc_tgtport *tgtport,
+ struct nvmet_fc_ls_req_op *lsop,
+ void (*done)(struct nvmefc_ls_req *req, int status))
+{
+ struct nvmefc_ls_req *lsreq = &lsop->ls_req;
+ unsigned long flags;
+ int ret = 0;
+
+ if (!tgtport->ops->ls_req)
+ return -EOPNOTSUPP;
+
+ if (!nvmet_fc_tgtport_get(tgtport))
+ return -ESHUTDOWN;
+
+ lsreq->done = done;
+ lsop->req_queued = false;
+ INIT_LIST_HEAD(&lsop->lsreq_list);
+
+ lsreq->rqstdma = fc_dma_map_single(tgtport->dev, lsreq->rqstaddr,
+ lsreq->rqstlen + lsreq->rsplen,
+ DMA_BIDIRECTIONAL);
+ if (fc_dma_mapping_error(tgtport->dev, lsreq->rqstdma)) {
+ ret = -EFAULT;
+ goto out_puttgtport;
+ }
+ lsreq->rspdma = lsreq->rqstdma + lsreq->rqstlen;
+
+ spin_lock_irqsave(&tgtport->lock, flags);
+
+ list_add_tail(&lsop->lsreq_list, &tgtport->ls_req_list);
+
+ lsop->req_queued = true;
+
+ spin_unlock_irqrestore(&tgtport->lock, flags);
+
+ ret = tgtport->ops->ls_req(&tgtport->fc_target_port, lsop->hosthandle,
+ lsreq);
+ if (ret)
+ goto out_unlink;
+
+ return 0;
+
+out_unlink:
+ lsop->ls_error = ret;
+ spin_lock_irqsave(&tgtport->lock, flags);
+ lsop->req_queued = false;
+ list_del(&lsop->lsreq_list);
+ spin_unlock_irqrestore(&tgtport->lock, flags);
+ fc_dma_unmap_single(tgtport->dev, lsreq->rqstdma,
+ (lsreq->rqstlen + lsreq->rsplen),
+ DMA_BIDIRECTIONAL);
+out_puttgtport:
+ nvmet_fc_tgtport_put(tgtport);
+
+ return ret;
+}
+
+static int
+nvmet_fc_send_ls_req_async(struct nvmet_fc_tgtport *tgtport,
+ struct nvmet_fc_ls_req_op *lsop,
+ void (*done)(struct nvmefc_ls_req *req, int status))
+{
+ /* don't wait for completion */
+
+ return __nvmet_fc_send_ls_req(tgtport, lsop, done);
+}
+
+static void
+nvmet_fc_disconnect_assoc_done(struct nvmefc_ls_req *lsreq, int status)
+{
+ struct nvmet_fc_ls_req_op *lsop =
+ container_of(lsreq, struct nvmet_fc_ls_req_op, ls_req);
+
+ __nvmet_fc_finish_ls_req(lsop);
+
+ /* fc-nvme target doesn't care about success or failure of cmd */
+
+ kfree(lsop);
+}
+
+/*
+ * This routine sends a FC-NVME LS to disconnect (aka terminate)
+ * the FC-NVME Association. Terminating the association also
+ * terminates the FC-NVME connections (per queue, both admin and io
+ * queues) that are part of the association. E.g. things are torn
+ * down, and the related FC-NVME Association ID and Connection IDs
+ * become invalid.
+ *
+ * The behavior of the fc-nvme target is such that it's
+ * understanding of the association and connections will implicitly
+ * be torn down. The action is implicit as it may be due to a loss of
+ * connectivity with the fc-nvme host, so the target may never get a
+ * response even if it tried. As such, the action of this routine
+ * is to asynchronously send the LS, ignore any results of the LS, and
+ * continue on with terminating the association. If the fc-nvme host
+ * is present and receives the LS, it too can tear down.
+ */
+static void
+nvmet_fc_xmt_disconnect_assoc(struct nvmet_fc_tgt_assoc *assoc)
+{
+ struct nvmet_fc_tgtport *tgtport = assoc->tgtport;
+ struct fcnvme_ls_disconnect_assoc_rqst *discon_rqst;
+ struct fcnvme_ls_disconnect_assoc_acc *discon_acc;
+ struct nvmet_fc_ls_req_op *lsop;
+ struct nvmefc_ls_req *lsreq;
+ int ret;
+
+ /*
+ * If ls_req is NULL or no hosthandle, it's an older lldd and no
+ * message is normal. Otherwise, send unless the hostport has
+ * already been invalidated by the lldd.
+ */
+ if (!tgtport->ops->ls_req || !assoc->hostport ||
+ assoc->hostport->invalid)
+ return;
+
+ lsop = kzalloc((sizeof(*lsop) +
+ sizeof(*discon_rqst) + sizeof(*discon_acc) +
+ tgtport->ops->lsrqst_priv_sz), GFP_KERNEL);
+ if (!lsop) {
+ dev_info(tgtport->dev,
+ "{%d:%d} send Disconnect Association failed: ENOMEM\n",
+ tgtport->fc_target_port.port_num, assoc->a_id);
+ return;
+ }
+
+ discon_rqst = (struct fcnvme_ls_disconnect_assoc_rqst *)&lsop[1];
+ discon_acc = (struct fcnvme_ls_disconnect_assoc_acc *)&discon_rqst[1];
+ lsreq = &lsop->ls_req;
+ if (tgtport->ops->lsrqst_priv_sz)
+ lsreq->private = (void *)&discon_acc[1];
+ else
+ lsreq->private = NULL;
+
+ lsop->tgtport = tgtport;
+ lsop->hosthandle = assoc->hostport->hosthandle;
+
+ nvmefc_fmt_lsreq_discon_assoc(lsreq, discon_rqst, discon_acc,
+ assoc->association_id);
+
+ ret = nvmet_fc_send_ls_req_async(tgtport, lsop,
+ nvmet_fc_disconnect_assoc_done);
+ if (ret) {
+ dev_info(tgtport->dev,
+ "{%d:%d} XMT Disconnect Association failed: %d\n",
+ tgtport->fc_target_port.port_num, assoc->a_id, ret);
+ kfree(lsop);
+ }
+}
+
+
/* *********************** FC-NVME Port Management ************************ */
@@ -337,17 +544,18 @@
for (i = 0; i < NVMET_LS_CTX_COUNT; iod++, i++) {
INIT_WORK(&iod->work, nvmet_fc_handle_ls_rqst_work);
iod->tgtport = tgtport;
- list_add_tail(&iod->ls_list, &tgtport->ls_list);
+ list_add_tail(&iod->ls_rcv_list, &tgtport->ls_rcv_list);
- iod->rqstbuf = kcalloc(2, NVME_FC_MAX_LS_BUFFER_SIZE,
- GFP_KERNEL);
+ iod->rqstbuf = kzalloc(sizeof(union nvmefc_ls_requests) +
+ sizeof(union nvmefc_ls_responses),
+ GFP_KERNEL);
if (!iod->rqstbuf)
goto out_fail;
- iod->rspbuf = iod->rqstbuf + NVME_FC_MAX_LS_BUFFER_SIZE;
+ iod->rspbuf = (union nvmefc_ls_responses *)&iod->rqstbuf[1];
iod->rspdma = fc_dma_map_single(tgtport->dev, iod->rspbuf,
- NVME_FC_MAX_LS_BUFFER_SIZE,
+ sizeof(*iod->rspbuf),
DMA_TO_DEVICE);
if (fc_dma_mapping_error(tgtport->dev, iod->rspdma))
goto out_fail;
@@ -357,12 +565,12 @@
out_fail:
kfree(iod->rqstbuf);
- list_del(&iod->ls_list);
+ list_del(&iod->ls_rcv_list);
for (iod--, i--; i >= 0; iod--, i--) {
fc_dma_unmap_single(tgtport->dev, iod->rspdma,
- NVME_FC_MAX_LS_BUFFER_SIZE, DMA_TO_DEVICE);
+ sizeof(*iod->rspbuf), DMA_TO_DEVICE);
kfree(iod->rqstbuf);
- list_del(&iod->ls_list);
+ list_del(&iod->ls_rcv_list);
}
kfree(iod);
@@ -378,10 +586,10 @@
for (i = 0; i < NVMET_LS_CTX_COUNT; iod++, i++) {
fc_dma_unmap_single(tgtport->dev,
- iod->rspdma, NVME_FC_MAX_LS_BUFFER_SIZE,
+ iod->rspdma, sizeof(*iod->rspbuf),
DMA_TO_DEVICE);
kfree(iod->rqstbuf);
- list_del(&iod->ls_list);
+ list_del(&iod->ls_rcv_list);
}
kfree(tgtport->iod);
}
@@ -393,10 +601,10 @@
unsigned long flags;
spin_lock_irqsave(&tgtport->lock, flags);
- iod = list_first_entry_or_null(&tgtport->ls_list,
- struct nvmet_fc_ls_iod, ls_list);
+ iod = list_first_entry_or_null(&tgtport->ls_rcv_list,
+ struct nvmet_fc_ls_iod, ls_rcv_list);
if (iod)
- list_move_tail(&iod->ls_list, &tgtport->ls_busylist);
+ list_move_tail(&iod->ls_rcv_list, &tgtport->ls_busylist);
spin_unlock_irqrestore(&tgtport->lock, flags);
return iod;
}
@@ -409,7 +617,7 @@
unsigned long flags;
spin_lock_irqsave(&tgtport->lock, flags);
- list_move(&iod->ls_list, &tgtport->ls_list);
+ list_move(&iod->ls_rcv_list, &tgtport->ls_rcv_list);
spin_unlock_irqrestore(&tgtport->lock, flags);
}
@@ -678,31 +886,33 @@
struct nvmet_fc_fcp_iod *fod = queue->fod;
struct nvmet_fc_defer_fcp_req *deferfcp, *tempptr;
unsigned long flags;
- int i, writedataactive;
+ int i;
bool disconnect;
disconnect = atomic_xchg(&queue->connected, 0);
+ /* if not connected, nothing to do */
+ if (!disconnect)
+ return;
+
spin_lock_irqsave(&queue->qlock, flags);
- /* about outstanding io's */
+ /* abort outstanding io's */
for (i = 0; i < queue->sqsize; fod++, i++) {
if (fod->active) {
spin_lock(&fod->flock);
fod->abort = true;
- writedataactive = fod->writedataactive;
- spin_unlock(&fod->flock);
/*
* only call lldd abort routine if waiting for
* writedata. other outstanding ops should finish
* on their own.
*/
- if (writedataactive) {
- spin_lock(&fod->flock);
+ if (fod->writedataactive) {
fod->aborted = true;
spin_unlock(&fod->flock);
tgtport->ops->fcp_abort(
&tgtport->fc_target_port, fod->fcpreq);
- }
+ } else
+ spin_unlock(&fod->flock);
}
}
@@ -742,8 +952,7 @@
flush_workqueue(queue->work_q);
- if (disconnect)
- nvmet_sq_destroy(&queue->nvme_sq);
+ nvmet_sq_destroy(&queue->nvme_sq);
nvmet_fc_tgt_q_put(queue);
}
@@ -778,6 +987,102 @@
}
static void
+nvmet_fc_hostport_free(struct kref *ref)
+{
+ struct nvmet_fc_hostport *hostport =
+ container_of(ref, struct nvmet_fc_hostport, ref);
+ struct nvmet_fc_tgtport *tgtport = hostport->tgtport;
+ unsigned long flags;
+
+ spin_lock_irqsave(&tgtport->lock, flags);
+ list_del(&hostport->host_list);
+ spin_unlock_irqrestore(&tgtport->lock, flags);
+ if (tgtport->ops->host_release && hostport->invalid)
+ tgtport->ops->host_release(hostport->hosthandle);
+ kfree(hostport);
+ nvmet_fc_tgtport_put(tgtport);
+}
+
+static void
+nvmet_fc_hostport_put(struct nvmet_fc_hostport *hostport)
+{
+ kref_put(&hostport->ref, nvmet_fc_hostport_free);
+}
+
+static int
+nvmet_fc_hostport_get(struct nvmet_fc_hostport *hostport)
+{
+ return kref_get_unless_zero(&hostport->ref);
+}
+
+static void
+nvmet_fc_free_hostport(struct nvmet_fc_hostport *hostport)
+{
+ /* if LLDD not implemented, leave as NULL */
+ if (!hostport || !hostport->hosthandle)
+ return;
+
+ nvmet_fc_hostport_put(hostport);
+}
+
+static struct nvmet_fc_hostport *
+nvmet_fc_alloc_hostport(struct nvmet_fc_tgtport *tgtport, void *hosthandle)
+{
+ struct nvmet_fc_hostport *newhost, *host, *match = NULL;
+ unsigned long flags;
+
+ /* if LLDD not implemented, leave as NULL */
+ if (!hosthandle)
+ return NULL;
+
+ /* take reference for what will be the newly allocated hostport */
+ if (!nvmet_fc_tgtport_get(tgtport))
+ return ERR_PTR(-EINVAL);
+
+ newhost = kzalloc(sizeof(*newhost), GFP_KERNEL);
+ if (!newhost) {
+ spin_lock_irqsave(&tgtport->lock, flags);
+ list_for_each_entry(host, &tgtport->host_list, host_list) {
+ if (host->hosthandle == hosthandle && !host->invalid) {
+ if (nvmet_fc_hostport_get(host)) {
+ match = host;
+ break;
+ }
+ }
+ }
+ spin_unlock_irqrestore(&tgtport->lock, flags);
+ /* no allocation - release reference */
+ nvmet_fc_tgtport_put(tgtport);
+ return (match) ? match : ERR_PTR(-ENOMEM);
+ }
+
+ newhost->tgtport = tgtport;
+ newhost->hosthandle = hosthandle;
+ INIT_LIST_HEAD(&newhost->host_list);
+ kref_init(&newhost->ref);
+
+ spin_lock_irqsave(&tgtport->lock, flags);
+ list_for_each_entry(host, &tgtport->host_list, host_list) {
+ if (host->hosthandle == hosthandle && !host->invalid) {
+ if (nvmet_fc_hostport_get(host)) {
+ match = host;
+ break;
+ }
+ }
+ }
+ if (match) {
+ kfree(newhost);
+ newhost = NULL;
+ /* releasing allocation - release reference */
+ nvmet_fc_tgtport_put(tgtport);
+ } else
+ list_add_tail(&newhost->host_list, &tgtport->host_list);
+ spin_unlock_irqrestore(&tgtport->lock, flags);
+
+ return (match) ? match : newhost;
+}
+
+static void
nvmet_fc_delete_assoc(struct work_struct *work)
{
struct nvmet_fc_tgt_assoc *assoc =
@@ -788,7 +1093,7 @@
}
static struct nvmet_fc_tgt_assoc *
-nvmet_fc_alloc_target_assoc(struct nvmet_fc_tgtport *tgtport)
+nvmet_fc_alloc_target_assoc(struct nvmet_fc_tgtport *tgtport, void *hosthandle)
{
struct nvmet_fc_tgt_assoc *assoc, *tmpassoc;
unsigned long flags;
@@ -805,13 +1110,18 @@
goto out_free_assoc;
if (!nvmet_fc_tgtport_get(tgtport))
- goto out_ida_put;
+ goto out_ida;
+
+ assoc->hostport = nvmet_fc_alloc_hostport(tgtport, hosthandle);
+ if (IS_ERR(assoc->hostport))
+ goto out_put;
assoc->tgtport = tgtport;
assoc->a_id = idx;
INIT_LIST_HEAD(&assoc->a_list);
kref_init(&assoc->ref);
INIT_WORK(&assoc->del_work, nvmet_fc_delete_assoc);
+ atomic_set(&assoc->terminating, 0);
while (needrandom) {
get_random_bytes(&ran, sizeof(ran) - BYTES_FOR_QID);
@@ -819,11 +1129,12 @@
spin_lock_irqsave(&tgtport->lock, flags);
needrandom = false;
- list_for_each_entry(tmpassoc, &tgtport->assoc_list, a_list)
+ list_for_each_entry(tmpassoc, &tgtport->assoc_list, a_list) {
if (ran == tmpassoc->association_id) {
needrandom = true;
break;
}
+ }
if (!needrandom) {
assoc->association_id = ran;
list_add_tail(&assoc->a_list, &tgtport->assoc_list);
@@ -833,7 +1144,9 @@
return assoc;
-out_ida_put:
+out_put:
+ nvmet_fc_tgtport_put(tgtport);
+out_ida:
ida_simple_remove(&tgtport->assoc_cnt, idx);
out_free_assoc:
kfree(assoc);
@@ -846,12 +1159,24 @@
struct nvmet_fc_tgt_assoc *assoc =
container_of(ref, struct nvmet_fc_tgt_assoc, ref);
struct nvmet_fc_tgtport *tgtport = assoc->tgtport;
+ struct nvmet_fc_ls_iod *oldls;
unsigned long flags;
+ /* Send Disconnect now that all i/o has completed */
+ nvmet_fc_xmt_disconnect_assoc(assoc);
+
+ nvmet_fc_free_hostport(assoc->hostport);
spin_lock_irqsave(&tgtport->lock, flags);
list_del(&assoc->a_list);
+ oldls = assoc->rcv_disconn;
spin_unlock_irqrestore(&tgtport->lock, flags);
+ /* if pending Rcv Disconnect Association LS, send rsp now */
+ if (oldls)
+ nvmet_fc_xmt_ls_rsp(tgtport, oldls);
ida_simple_remove(&tgtport->assoc_cnt, assoc->a_id);
+ dev_info(tgtport->dev,
+ "{%d:%d} Association freed\n",
+ tgtport->fc_target_port.port_num, assoc->a_id);
kfree(assoc);
nvmet_fc_tgtport_put(tgtport);
}
@@ -874,7 +1199,13 @@
struct nvmet_fc_tgtport *tgtport = assoc->tgtport;
struct nvmet_fc_tgt_queue *queue;
unsigned long flags;
- int i;
+ int i, terminating;
+
+ terminating = atomic_xchg(&assoc->terminating, 1);
+
+ /* if already terminating, do nothing */
+ if (terminating)
+ return;
spin_lock_irqsave(&tgtport->lock, flags);
for (i = NVMET_NR_QUEUES; i >= 0; i--) {
@@ -890,6 +1221,10 @@
}
spin_unlock_irqrestore(&tgtport->lock, flags);
+ dev_info(tgtport->dev,
+ "{%d:%d} Association deleted\n",
+ tgtport->fc_target_port.port_num, assoc->a_id);
+
nvmet_fc_tgt_a_put(assoc);
}
@@ -905,7 +1240,8 @@
list_for_each_entry(assoc, &tgtport->assoc_list, a_list) {
if (association_id == assoc->association_id) {
ret = assoc;
- nvmet_fc_tgt_a_get(assoc);
+ if (!nvmet_fc_tgt_a_get(assoc))
+ ret = NULL;
break;
}
}
@@ -1048,16 +1384,21 @@
newrec->fc_target_port.node_name = pinfo->node_name;
newrec->fc_target_port.port_name = pinfo->port_name;
- newrec->fc_target_port.private = &newrec[1];
+ if (template->target_priv_sz)
+ newrec->fc_target_port.private = &newrec[1];
+ else
+ newrec->fc_target_port.private = NULL;
newrec->fc_target_port.port_id = pinfo->port_id;
newrec->fc_target_port.port_num = idx;
INIT_LIST_HEAD(&newrec->tgt_list);
newrec->dev = dev;
newrec->ops = template;
spin_lock_init(&newrec->lock);
- INIT_LIST_HEAD(&newrec->ls_list);
+ INIT_LIST_HEAD(&newrec->ls_rcv_list);
+ INIT_LIST_HEAD(&newrec->ls_req_list);
INIT_LIST_HEAD(&newrec->ls_busylist);
INIT_LIST_HEAD(&newrec->assoc_list);
+ INIT_LIST_HEAD(&newrec->host_list);
kref_init(&newrec->ref);
ida_init(&newrec->assoc_cnt);
newrec->max_sg_cnt = template->max_sgl_segments;
@@ -1141,11 +1482,72 @@
if (!nvmet_fc_tgt_a_get(assoc))
continue;
if (!schedule_work(&assoc->del_work))
+ /* already deleting - release local reference */
nvmet_fc_tgt_a_put(assoc);
}
spin_unlock_irqrestore(&tgtport->lock, flags);
}
+/**
+ * nvmet_fc_invalidate_host - transport entry point called by an LLDD
+ * to remove references to a hosthandle for LS's.
+ *
+ * The nvmet-fc layer ensures that any references to the hosthandle
+ * on the targetport are forgotten (set to NULL). The LLDD will
+ * typically call this when a login with a remote host port has been
+ * lost, thus LS's for the remote host port are no longer possible.
+ *
+ * If an LS request is outstanding to the targetport/hosthandle (or
+ * issued concurrently with the call to invalidate the host), the
+ * LLDD is responsible for terminating/aborting the LS and completing
+ * the LS request. It is recommended that these terminations/aborts
+ * occur after calling to invalidate the host handle to avoid additional
+ * retries by the nvmet-fc transport. The nvmet-fc transport may
+ * continue to reference host handle while it cleans up outstanding
+ * NVME associations. The nvmet-fc transport will call the
+ * ops->host_release() callback to notify the LLDD that all references
+ * are complete and the related host handle can be recovered.
+ * Note: if there are no references, the callback may be called before
+ * the invalidate host call returns.
+ *
+ * @target_port: pointer to the (registered) target port that a prior
+ * LS was received on and which supplied the transport the
+ * hosthandle.
+ * @hosthandle: the handle (pointer) that represents the host port
+ * that no longer has connectivity and that LS's should
+ * no longer be directed to.
+ */
+void
+nvmet_fc_invalidate_host(struct nvmet_fc_target_port *target_port,
+ void *hosthandle)
+{
+ struct nvmet_fc_tgtport *tgtport = targetport_to_tgtport(target_port);
+ struct nvmet_fc_tgt_assoc *assoc, *next;
+ unsigned long flags;
+ bool noassoc = true;
+
+ spin_lock_irqsave(&tgtport->lock, flags);
+ list_for_each_entry_safe(assoc, next,
+ &tgtport->assoc_list, a_list) {
+ if (!assoc->hostport ||
+ assoc->hostport->hosthandle != hosthandle)
+ continue;
+ if (!nvmet_fc_tgt_a_get(assoc))
+ continue;
+ assoc->hostport->invalid = 1;
+ noassoc = false;
+ if (!schedule_work(&assoc->del_work))
+ /* already deleting - release local reference */
+ nvmet_fc_tgt_a_put(assoc);
+ }
+ spin_unlock_irqrestore(&tgtport->lock, flags);
+
+ /* if there's nothing to wait for - call the callback */
+ if (noassoc && tgtport->ops->host_release)
+ tgtport->ops->host_release(hosthandle);
+}
+EXPORT_SYMBOL_GPL(nvmet_fc_invalidate_host);
+
/*
* nvmet layer has called to terminate an association
*/
@@ -1181,6 +1583,7 @@
if (found_ctrl) {
if (!schedule_work(&assoc->del_work))
+ /* already deleting - release local reference */
nvmet_fc_tgt_a_put(assoc);
return;
}
@@ -1211,6 +1614,13 @@
/* terminate any outstanding associations */
__nvmet_fc_free_assocs(tgtport);
+ /*
+ * should terminate LS's as well. However, LS's will be generated
+ * at the tail end of association termination, so they likely don't
+ * exist yet. And even if they did, it's worthwhile to just let
+ * them finish and targetport ref counting will clean things up.
+ */
+
nvmet_fc_tgtport_put(tgtport);
return 0;
@@ -1218,113 +1628,15 @@
EXPORT_SYMBOL_GPL(nvmet_fc_unregister_targetport);
-/* *********************** FC-NVME LS Handling **************************** */
+/* ********************** FC-NVME LS RCV Handling ************************* */
static void
-nvmet_fc_format_rsp_hdr(void *buf, u8 ls_cmd, __be32 desc_len, u8 rqst_ls_cmd)
-{
- struct fcnvme_ls_acc_hdr *acc = buf;
-
- acc->w0.ls_cmd = ls_cmd;
- acc->desc_list_len = desc_len;
- acc->rqst.desc_tag = cpu_to_be32(FCNVME_LSDESC_RQST);
- acc->rqst.desc_len =
- fcnvme_lsdesc_len(sizeof(struct fcnvme_lsdesc_rqst));
- acc->rqst.w0.ls_cmd = rqst_ls_cmd;
-}
-
-static int
-nvmet_fc_format_rjt(void *buf, u16 buflen, u8 ls_cmd,
- u8 reason, u8 explanation, u8 vendor)
-{
- struct fcnvme_ls_rjt *rjt = buf;
-
- nvmet_fc_format_rsp_hdr(buf, FCNVME_LSDESC_RQST,
- fcnvme_lsdesc_len(sizeof(struct fcnvme_ls_rjt)),
- ls_cmd);
- rjt->rjt.desc_tag = cpu_to_be32(FCNVME_LSDESC_RJT);
- rjt->rjt.desc_len = fcnvme_lsdesc_len(sizeof(struct fcnvme_lsdesc_rjt));
- rjt->rjt.reason_code = reason;
- rjt->rjt.reason_explanation = explanation;
- rjt->rjt.vendor = vendor;
-
- return sizeof(struct fcnvme_ls_rjt);
-}
-
-/* Validation Error indexes into the string table below */
-enum {
- VERR_NO_ERROR = 0,
- VERR_CR_ASSOC_LEN = 1,
- VERR_CR_ASSOC_RQST_LEN = 2,
- VERR_CR_ASSOC_CMD = 3,
- VERR_CR_ASSOC_CMD_LEN = 4,
- VERR_ERSP_RATIO = 5,
- VERR_ASSOC_ALLOC_FAIL = 6,
- VERR_QUEUE_ALLOC_FAIL = 7,
- VERR_CR_CONN_LEN = 8,
- VERR_CR_CONN_RQST_LEN = 9,
- VERR_ASSOC_ID = 10,
- VERR_ASSOC_ID_LEN = 11,
- VERR_NO_ASSOC = 12,
- VERR_CONN_ID = 13,
- VERR_CONN_ID_LEN = 14,
- VERR_NO_CONN = 15,
- VERR_CR_CONN_CMD = 16,
- VERR_CR_CONN_CMD_LEN = 17,
- VERR_DISCONN_LEN = 18,
- VERR_DISCONN_RQST_LEN = 19,
- VERR_DISCONN_CMD = 20,
- VERR_DISCONN_CMD_LEN = 21,
- VERR_DISCONN_SCOPE = 22,
- VERR_RS_LEN = 23,
- VERR_RS_RQST_LEN = 24,
- VERR_RS_CMD = 25,
- VERR_RS_CMD_LEN = 26,
- VERR_RS_RCTL = 27,
- VERR_RS_RO = 28,
-};
-
-static char *validation_errors[] = {
- "OK",
- "Bad CR_ASSOC Length",
- "Bad CR_ASSOC Rqst Length",
- "Not CR_ASSOC Cmd",
- "Bad CR_ASSOC Cmd Length",
- "Bad Ersp Ratio",
- "Association Allocation Failed",
- "Queue Allocation Failed",
- "Bad CR_CONN Length",
- "Bad CR_CONN Rqst Length",
- "Not Association ID",
- "Bad Association ID Length",
- "No Association",
- "Not Connection ID",
- "Bad Connection ID Length",
- "No Connection",
- "Not CR_CONN Cmd",
- "Bad CR_CONN Cmd Length",
- "Bad DISCONN Length",
- "Bad DISCONN Rqst Length",
- "Not DISCONN Cmd",
- "Bad DISCONN Cmd Length",
- "Bad Disconnect Scope",
- "Bad RS Length",
- "Bad RS Rqst Length",
- "Not RS Cmd",
- "Bad RS Cmd Length",
- "Bad RS R_CTL",
- "Bad RS Relative Offset",
-};
-
-static void
nvmet_fc_ls_create_association(struct nvmet_fc_tgtport *tgtport,
struct nvmet_fc_ls_iod *iod)
{
- struct fcnvme_ls_cr_assoc_rqst *rqst =
- (struct fcnvme_ls_cr_assoc_rqst *)iod->rqstbuf;
- struct fcnvme_ls_cr_assoc_acc *acc =
- (struct fcnvme_ls_cr_assoc_acc *)iod->rspbuf;
+ struct fcnvme_ls_cr_assoc_rqst *rqst = &iod->rqstbuf->rq_cr_assoc;
+ struct fcnvme_ls_cr_assoc_acc *acc = &iod->rspbuf->rsp_cr_assoc;
struct nvmet_fc_tgt_queue *queue;
int ret = 0;
@@ -1356,7 +1668,8 @@
else {
/* new association w/ admin queue */
- iod->assoc = nvmet_fc_alloc_target_assoc(tgtport);
+ iod->assoc = nvmet_fc_alloc_target_assoc(
+ tgtport, iod->hosthandle);
if (!iod->assoc)
ret = VERR_ASSOC_ALLOC_FAIL;
else {
@@ -1371,8 +1684,8 @@
dev_err(tgtport->dev,
"Create Association LS failed: %s\n",
validation_errors[ret]);
- iod->lsreq->rsplen = nvmet_fc_format_rjt(acc,
- NVME_FC_MAX_LS_BUFFER_SIZE, rqst->w0.ls_cmd,
+ iod->lsrsp->rsplen = nvme_fc_format_rjt(acc,
+ sizeof(*acc), rqst->w0.ls_cmd,
FCNVME_RJT_RC_LOGIC,
FCNVME_RJT_EXP_NONE, 0);
return;
@@ -1382,11 +1695,15 @@
atomic_set(&queue->connected, 1);
queue->sqhd = 0; /* best place to init value */
+ dev_info(tgtport->dev,
+ "{%d:%d} Association created\n",
+ tgtport->fc_target_port.port_num, iod->assoc->a_id);
+
/* format a response */
- iod->lsreq->rsplen = sizeof(*acc);
+ iod->lsrsp->rsplen = sizeof(*acc);
- nvmet_fc_format_rsp_hdr(acc, FCNVME_LS_ACC,
+ nvme_fc_format_rsp_hdr(acc, FCNVME_LS_ACC,
fcnvme_lsdesc_len(
sizeof(struct fcnvme_ls_cr_assoc_acc)),
FCNVME_LS_CREATE_ASSOCIATION);
@@ -1407,10 +1724,8 @@
nvmet_fc_ls_create_connection(struct nvmet_fc_tgtport *tgtport,
struct nvmet_fc_ls_iod *iod)
{
- struct fcnvme_ls_cr_conn_rqst *rqst =
- (struct fcnvme_ls_cr_conn_rqst *)iod->rqstbuf;
- struct fcnvme_ls_cr_conn_acc *acc =
- (struct fcnvme_ls_cr_conn_acc *)iod->rspbuf;
+ struct fcnvme_ls_cr_conn_rqst *rqst = &iod->rqstbuf->rq_cr_conn;
+ struct fcnvme_ls_cr_conn_acc *acc = &iod->rspbuf->rsp_cr_conn;
struct nvmet_fc_tgt_queue *queue;
int ret = 0;
@@ -1462,8 +1777,8 @@
dev_err(tgtport->dev,
"Create Connection LS failed: %s\n",
validation_errors[ret]);
- iod->lsreq->rsplen = nvmet_fc_format_rjt(acc,
- NVME_FC_MAX_LS_BUFFER_SIZE, rqst->w0.ls_cmd,
+ iod->lsrsp->rsplen = nvme_fc_format_rjt(acc,
+ sizeof(*acc), rqst->w0.ls_cmd,
(ret == VERR_NO_ASSOC) ?
FCNVME_RJT_RC_INV_ASSOC :
FCNVME_RJT_RC_LOGIC,
@@ -1477,9 +1792,9 @@
/* format a response */
- iod->lsreq->rsplen = sizeof(*acc);
+ iod->lsrsp->rsplen = sizeof(*acc);
- nvmet_fc_format_rsp_hdr(acc, FCNVME_LS_ACC,
+ nvme_fc_format_rsp_hdr(acc, FCNVME_LS_ACC,
fcnvme_lsdesc_len(sizeof(struct fcnvme_ls_cr_conn_acc)),
FCNVME_LS_CREATE_CONNECTION);
acc->connectid.desc_tag = cpu_to_be32(FCNVME_LSDESC_CONN_ID);
@@ -1491,43 +1806,28 @@
be16_to_cpu(rqst->connect_cmd.qid)));
}
-static void
+/*
+ * Returns true if the LS response is to be transmit
+ * Returns false if the LS response is to be delayed
+ */
+static int
nvmet_fc_ls_disconnect(struct nvmet_fc_tgtport *tgtport,
struct nvmet_fc_ls_iod *iod)
{
- struct fcnvme_ls_disconnect_rqst *rqst =
- (struct fcnvme_ls_disconnect_rqst *)iod->rqstbuf;
- struct fcnvme_ls_disconnect_acc *acc =
- (struct fcnvme_ls_disconnect_acc *)iod->rspbuf;
- struct nvmet_fc_tgt_assoc *assoc;
+ struct fcnvme_ls_disconnect_assoc_rqst *rqst =
+ &iod->rqstbuf->rq_dis_assoc;
+ struct fcnvme_ls_disconnect_assoc_acc *acc =
+ &iod->rspbuf->rsp_dis_assoc;
+ struct nvmet_fc_tgt_assoc *assoc = NULL;
+ struct nvmet_fc_ls_iod *oldls = NULL;
+ unsigned long flags;
int ret = 0;
memset(acc, 0, sizeof(*acc));
- if (iod->rqstdatalen < sizeof(struct fcnvme_ls_disconnect_rqst))
- ret = VERR_DISCONN_LEN;
- else if (rqst->desc_list_len !=
- fcnvme_lsdesc_len(
- sizeof(struct fcnvme_ls_disconnect_rqst)))
- ret = VERR_DISCONN_RQST_LEN;
- else if (rqst->associd.desc_tag != cpu_to_be32(FCNVME_LSDESC_ASSOC_ID))
- ret = VERR_ASSOC_ID;
- else if (rqst->associd.desc_len !=
- fcnvme_lsdesc_len(
- sizeof(struct fcnvme_lsdesc_assoc_id)))
- ret = VERR_ASSOC_ID_LEN;
- else if (rqst->discon_cmd.desc_tag !=
- cpu_to_be32(FCNVME_LSDESC_DISCONN_CMD))
- ret = VERR_DISCONN_CMD;
- else if (rqst->discon_cmd.desc_len !=
- fcnvme_lsdesc_len(
- sizeof(struct fcnvme_lsdesc_disconn_cmd)))
- ret = VERR_DISCONN_CMD_LEN;
- else if ((rqst->discon_cmd.scope != FCNVME_DISCONN_ASSOCIATION) &&
- (rqst->discon_cmd.scope != FCNVME_DISCONN_CONNECTION))
- ret = VERR_DISCONN_SCOPE;
- else {
- /* match an active association */
+ ret = nvmefc_vldt_lsreq_discon_assoc(iod->rqstdatalen, rqst);
+ if (!ret) {
+ /* match an active association - takes an assoc ref if !NULL */
assoc = nvmet_fc_find_target_assoc(tgtport,
be64_to_cpu(rqst->associd.association_id));
iod->assoc = assoc;
@@ -1535,34 +1835,63 @@
ret = VERR_NO_ASSOC;
}
- if (ret) {
+ if (ret || !assoc) {
dev_err(tgtport->dev,
"Disconnect LS failed: %s\n",
validation_errors[ret]);
- iod->lsreq->rsplen = nvmet_fc_format_rjt(acc,
- NVME_FC_MAX_LS_BUFFER_SIZE, rqst->w0.ls_cmd,
+ iod->lsrsp->rsplen = nvme_fc_format_rjt(acc,
+ sizeof(*acc), rqst->w0.ls_cmd,
(ret == VERR_NO_ASSOC) ?
FCNVME_RJT_RC_INV_ASSOC :
- (ret == VERR_NO_CONN) ?
- FCNVME_RJT_RC_INV_CONN :
- FCNVME_RJT_RC_LOGIC,
+ FCNVME_RJT_RC_LOGIC,
FCNVME_RJT_EXP_NONE, 0);
- return;
+ return true;
}
/* format a response */
- iod->lsreq->rsplen = sizeof(*acc);
+ iod->lsrsp->rsplen = sizeof(*acc);
- nvmet_fc_format_rsp_hdr(acc, FCNVME_LS_ACC,
+ nvme_fc_format_rsp_hdr(acc, FCNVME_LS_ACC,
fcnvme_lsdesc_len(
- sizeof(struct fcnvme_ls_disconnect_acc)),
- FCNVME_LS_DISCONNECT);
+ sizeof(struct fcnvme_ls_disconnect_assoc_acc)),
+ FCNVME_LS_DISCONNECT_ASSOC);
/* release get taken in nvmet_fc_find_target_assoc */
- nvmet_fc_tgt_a_put(iod->assoc);
+ nvmet_fc_tgt_a_put(assoc);
- nvmet_fc_delete_target_assoc(iod->assoc);
+ /*
+ * The rules for LS response says the response cannot
+ * go back until ABTS's have been sent for all outstanding
+ * I/O and a Disconnect Association LS has been sent.
+ * So... save off the Disconnect LS to send the response
+ * later. If there was a prior LS already saved, replace
+ * it with the newer one and send a can't perform reject
+ * on the older one.
+ */
+ spin_lock_irqsave(&tgtport->lock, flags);
+ oldls = assoc->rcv_disconn;
+ assoc->rcv_disconn = iod;
+ spin_unlock_irqrestore(&tgtport->lock, flags);
+
+ nvmet_fc_delete_target_assoc(assoc);
+
+ if (oldls) {
+ dev_info(tgtport->dev,
+ "{%d:%d} Multiple Disconnect Association LS's "
+ "received\n",
+ tgtport->fc_target_port.port_num, assoc->a_id);
+ /* overwrite good response with bogus failure */
+ oldls->lsrsp->rsplen = nvme_fc_format_rjt(oldls->rspbuf,
+ sizeof(*iod->rspbuf),
+ /* ok to use rqst, LS is same */
+ rqst->w0.ls_cmd,
+ FCNVME_RJT_RC_UNAB,
+ FCNVME_RJT_EXP_NONE, 0);
+ nvmet_fc_xmt_ls_rsp(tgtport, oldls);
+ }
+
+ return false;
}
@@ -1574,13 +1903,13 @@
static const struct nvmet_fabrics_ops nvmet_fc_tgt_fcp_ops;
static void
-nvmet_fc_xmt_ls_rsp_done(struct nvmefc_tgt_ls_req *lsreq)
+nvmet_fc_xmt_ls_rsp_done(struct nvmefc_ls_rsp *lsrsp)
{
- struct nvmet_fc_ls_iod *iod = lsreq->nvmet_fc_private;
+ struct nvmet_fc_ls_iod *iod = lsrsp->nvme_fc_private;
struct nvmet_fc_tgtport *tgtport = iod->tgtport;
fc_dma_sync_single_for_cpu(tgtport->dev, iod->rspdma,
- NVME_FC_MAX_LS_BUFFER_SIZE, DMA_TO_DEVICE);
+ sizeof(*iod->rspbuf), DMA_TO_DEVICE);
nvmet_fc_free_ls_iod(tgtport, iod);
nvmet_fc_tgtport_put(tgtport);
}
@@ -1592,11 +1921,11 @@
int ret;
fc_dma_sync_single_for_device(tgtport->dev, iod->rspdma,
- NVME_FC_MAX_LS_BUFFER_SIZE, DMA_TO_DEVICE);
+ sizeof(*iod->rspbuf), DMA_TO_DEVICE);
- ret = tgtport->ops->xmt_ls_rsp(&tgtport->fc_target_port, iod->lsreq);
+ ret = tgtport->ops->xmt_ls_rsp(&tgtport->fc_target_port, iod->lsrsp);
if (ret)
- nvmet_fc_xmt_ls_rsp_done(iod->lsreq);
+ nvmet_fc_xmt_ls_rsp_done(iod->lsrsp);
}
/*
@@ -1606,15 +1935,15 @@
nvmet_fc_handle_ls_rqst(struct nvmet_fc_tgtport *tgtport,
struct nvmet_fc_ls_iod *iod)
{
- struct fcnvme_ls_rqst_w0 *w0 =
- (struct fcnvme_ls_rqst_w0 *)iod->rqstbuf;
+ struct fcnvme_ls_rqst_w0 *w0 = &iod->rqstbuf->rq_cr_assoc.w0;
+ bool sendrsp = true;
- iod->lsreq->nvmet_fc_private = iod;
- iod->lsreq->rspbuf = iod->rspbuf;
- iod->lsreq->rspdma = iod->rspdma;
- iod->lsreq->done = nvmet_fc_xmt_ls_rsp_done;
+ iod->lsrsp->nvme_fc_private = iod;
+ iod->lsrsp->rspbuf = iod->rspbuf;
+ iod->lsrsp->rspdma = iod->rspdma;
+ iod->lsrsp->done = nvmet_fc_xmt_ls_rsp_done;
/* Be preventative. handlers will later set to valid length */
- iod->lsreq->rsplen = 0;
+ iod->lsrsp->rsplen = 0;
iod->assoc = NULL;
@@ -1632,17 +1961,18 @@
/* Creates an IO Queue/Connection */
nvmet_fc_ls_create_connection(tgtport, iod);
break;
- case FCNVME_LS_DISCONNECT:
+ case FCNVME_LS_DISCONNECT_ASSOC:
/* Terminate a Queue/Connection or the Association */
- nvmet_fc_ls_disconnect(tgtport, iod);
+ sendrsp = nvmet_fc_ls_disconnect(tgtport, iod);
break;
default:
- iod->lsreq->rsplen = nvmet_fc_format_rjt(iod->rspbuf,
- NVME_FC_MAX_LS_BUFFER_SIZE, w0->ls_cmd,
+ iod->lsrsp->rsplen = nvme_fc_format_rjt(iod->rspbuf,
+ sizeof(*iod->rspbuf), w0->ls_cmd,
FCNVME_RJT_RC_INVAL, FCNVME_RJT_EXP_NONE, 0);
}
- nvmet_fc_xmt_ls_rsp(tgtport, iod);
+ if (sendrsp)
+ nvmet_fc_xmt_ls_rsp(tgtport, iod);
}
/*
@@ -1671,35 +2001,53 @@
*
* @target_port: pointer to the (registered) target port the LS was
* received on.
- * @lsreq: pointer to a lsreq request structure to be used to reference
+ * @lsrsp: pointer to a lsrsp structure to be used to reference
* the exchange corresponding to the LS.
* @lsreqbuf: pointer to the buffer containing the LS Request
* @lsreqbuf_len: length, in bytes, of the received LS request
*/
int
nvmet_fc_rcv_ls_req(struct nvmet_fc_target_port *target_port,
- struct nvmefc_tgt_ls_req *lsreq,
+ void *hosthandle,
+ struct nvmefc_ls_rsp *lsrsp,
void *lsreqbuf, u32 lsreqbuf_len)
{
struct nvmet_fc_tgtport *tgtport = targetport_to_tgtport(target_port);
struct nvmet_fc_ls_iod *iod;
+ struct fcnvme_ls_rqst_w0 *w0 = (struct fcnvme_ls_rqst_w0 *)lsreqbuf;
- if (lsreqbuf_len > NVME_FC_MAX_LS_BUFFER_SIZE)
+ if (lsreqbuf_len > sizeof(union nvmefc_ls_requests)) {
+ dev_info(tgtport->dev,
+ "RCV %s LS failed: payload too large (%d)\n",
+ (w0->ls_cmd <= NVME_FC_LAST_LS_CMD_VALUE) ?
+ nvmefc_ls_names[w0->ls_cmd] : "",
+ lsreqbuf_len);
return -E2BIG;
+ }
- if (!nvmet_fc_tgtport_get(tgtport))
+ if (!nvmet_fc_tgtport_get(tgtport)) {
+ dev_info(tgtport->dev,
+ "RCV %s LS failed: target deleting\n",
+ (w0->ls_cmd <= NVME_FC_LAST_LS_CMD_VALUE) ?
+ nvmefc_ls_names[w0->ls_cmd] : "");
return -ESHUTDOWN;
+ }
iod = nvmet_fc_alloc_ls_iod(tgtport);
if (!iod) {
+ dev_info(tgtport->dev,
+ "RCV %s LS failed: context allocation failed\n",
+ (w0->ls_cmd <= NVME_FC_LAST_LS_CMD_VALUE) ?
+ nvmefc_ls_names[w0->ls_cmd] : "");
nvmet_fc_tgtport_put(tgtport);
return -ENOENT;
}
- iod->lsreq = lsreq;
+ iod->lsrsp = lsrsp;
iod->fcpreq = NULL;
memcpy(iod->rqstbuf, lsreqbuf, lsreqbuf_len);
iod->rqstdatalen = lsreqbuf_len;
+ iod->hosthandle = hosthandle;
schedule_work(&iod->work);
@@ -2015,7 +2363,7 @@
}
/* data transfer complete, resume with nvmet layer */
- nvmet_req_execute(&fod->req);
+ fod->req.execute(&fod->req);
break;
case NVMET_FCOP_READDATA:
@@ -2225,7 +2573,7 @@
* can invoke the nvmet_layer now. If read data, cmd completion will
* push the data
*/
- nvmet_req_execute(&fod->req);
+ fod->req.execute(&fod->req);
return;
transport_error:
@@ -2293,7 +2641,7 @@
/* validate iu, so the connection id can be used to find the queue */
if ((cmdiubuf_len != sizeof(*cmdiu)) ||
- (cmdiu->scsi_id != NVME_CMD_SCSI_ID) ||
+ (cmdiu->format_id != NVME_CMD_FORMAT_ID) ||
(cmdiu->fc_id != NVME_CMD_FC_ID) ||
(be16_to_cpu(cmdiu->iu_len) != (sizeof(*cmdiu)/4)))
return -EIO;
diff --git a/drivers/nvme/target/fcloop.c b/drivers/nvme/target/fcloop.c
index b50b53d..3da067a 100644
--- a/drivers/nvme/target/fcloop.c
+++ b/drivers/nvme/target/fcloop.c
@@ -43,6 +43,17 @@
{ NVMF_OPT_ERR, NULL }
};
+static int fcloop_verify_addr(substring_t *s)
+{
+ size_t blen = s->to - s->from + 1;
+
+ if (strnlen(s->from, blen) != NVME_FC_TRADDR_HEXNAMELEN + 2 ||
+ strncmp(s->from, "0x", 2))
+ return -EINVAL;
+
+ return 0;
+}
+
static int
fcloop_parse_options(struct fcloop_ctrl_options *opts,
const char *buf)
@@ -64,14 +75,16 @@
opts->mask |= token;
switch (token) {
case NVMF_OPT_WWNN:
- if (match_u64(args, &token64)) {
+ if (fcloop_verify_addr(args) ||
+ match_u64(args, &token64)) {
ret = -EINVAL;
goto out_free_options;
}
opts->wwnn = token64;
break;
case NVMF_OPT_WWPN:
- if (match_u64(args, &token64)) {
+ if (fcloop_verify_addr(args) ||
+ match_u64(args, &token64)) {
ret = -EINVAL;
goto out_free_options;
}
@@ -92,14 +105,16 @@
opts->fcaddr = token;
break;
case NVMF_OPT_LPWWNN:
- if (match_u64(args, &token64)) {
+ if (fcloop_verify_addr(args) ||
+ match_u64(args, &token64)) {
ret = -EINVAL;
goto out_free_options;
}
opts->lpwwnn = token64;
break;
case NVMF_OPT_LPWWPN:
- if (match_u64(args, &token64)) {
+ if (fcloop_verify_addr(args) ||
+ match_u64(args, &token64)) {
ret = -EINVAL;
goto out_free_options;
}
@@ -141,14 +156,16 @@
token = match_token(p, opt_tokens, args);
switch (token) {
case NVMF_OPT_WWNN:
- if (match_u64(args, &token64)) {
+ if (fcloop_verify_addr(args) ||
+ match_u64(args, &token64)) {
ret = -EINVAL;
goto out_free_options;
}
*nname = token64;
break;
case NVMF_OPT_WWPN:
- if (match_u64(args, &token64)) {
+ if (fcloop_verify_addr(args) ||
+ match_u64(args, &token64)) {
ret = -EINVAL;
goto out_free_options;
}
@@ -198,17 +215,23 @@
};
struct fcloop_rport {
- struct nvme_fc_remote_port *remoteport;
- struct nvmet_fc_target_port *targetport;
- struct fcloop_nport *nport;
- struct fcloop_lport *lport;
+ struct nvme_fc_remote_port *remoteport;
+ struct nvmet_fc_target_port *targetport;
+ struct fcloop_nport *nport;
+ struct fcloop_lport *lport;
+ spinlock_t lock;
+ struct list_head ls_list;
+ struct work_struct ls_work;
};
struct fcloop_tport {
- struct nvmet_fc_target_port *targetport;
- struct nvme_fc_remote_port *remoteport;
- struct fcloop_nport *nport;
- struct fcloop_lport *lport;
+ struct nvmet_fc_target_port *targetport;
+ struct nvme_fc_remote_port *remoteport;
+ struct fcloop_nport *nport;
+ struct fcloop_lport *lport;
+ spinlock_t lock;
+ struct list_head ls_list;
+ struct work_struct ls_work;
};
struct fcloop_nport {
@@ -224,11 +247,11 @@
};
struct fcloop_lsreq {
- struct fcloop_tport *tport;
struct nvmefc_ls_req *lsreq;
- struct work_struct work;
- struct nvmefc_tgt_ls_req tgt_ls_req;
+ struct nvmefc_ls_rsp ls_rsp;
+ int lsdir; /* H2T or T2H */
int status;
+ struct list_head ls_list; /* fcloop_rport->ls_list */
};
struct fcloop_rscn {
@@ -265,9 +288,9 @@
};
static inline struct fcloop_lsreq *
-tgt_ls_req_to_lsreq(struct nvmefc_tgt_ls_req *tgt_lsreq)
+ls_rsp_to_lsreq(struct nvmefc_ls_rsp *lsrsp)
{
- return container_of(tgt_lsreq, struct fcloop_lsreq, tgt_ls_req);
+ return container_of(lsrsp, struct fcloop_lsreq, ls_rsp);
}
static inline struct fcloop_fcpreq *
@@ -292,25 +315,36 @@
{
}
-
-/*
- * Transmit of LS RSP done (e.g. buffers all set). call back up
- * initiator "done" flows.
- */
static void
-fcloop_tgt_lsrqst_done_work(struct work_struct *work)
+fcloop_rport_lsrqst_work(struct work_struct *work)
{
- struct fcloop_lsreq *tls_req =
- container_of(work, struct fcloop_lsreq, work);
- struct fcloop_tport *tport = tls_req->tport;
- struct nvmefc_ls_req *lsreq = tls_req->lsreq;
+ struct fcloop_rport *rport =
+ container_of(work, struct fcloop_rport, ls_work);
+ struct fcloop_lsreq *tls_req;
- if (!tport || tport->remoteport)
- lsreq->done(lsreq, tls_req->status);
+ spin_lock(&rport->lock);
+ for (;;) {
+ tls_req = list_first_entry_or_null(&rport->ls_list,
+ struct fcloop_lsreq, ls_list);
+ if (!tls_req)
+ break;
+
+ list_del(&tls_req->ls_list);
+ spin_unlock(&rport->lock);
+
+ tls_req->lsreq->done(tls_req->lsreq, tls_req->status);
+ /*
+ * callee may free memory containing tls_req.
+ * do not reference lsreq after this.
+ */
+
+ spin_lock(&rport->lock);
+ }
+ spin_unlock(&rport->lock);
}
static int
-fcloop_ls_req(struct nvme_fc_local_port *localport,
+fcloop_h2t_ls_req(struct nvme_fc_local_port *localport,
struct nvme_fc_remote_port *remoteport,
struct nvmefc_ls_req *lsreq)
{
@@ -319,40 +353,145 @@
int ret = 0;
tls_req->lsreq = lsreq;
- INIT_WORK(&tls_req->work, fcloop_tgt_lsrqst_done_work);
+ INIT_LIST_HEAD(&tls_req->ls_list);
if (!rport->targetport) {
tls_req->status = -ECONNREFUSED;
- tls_req->tport = NULL;
- schedule_work(&tls_req->work);
+ spin_lock(&rport->lock);
+ list_add_tail(&rport->ls_list, &tls_req->ls_list);
+ spin_unlock(&rport->lock);
+ schedule_work(&rport->ls_work);
return ret;
}
tls_req->status = 0;
- tls_req->tport = rport->targetport->private;
- ret = nvmet_fc_rcv_ls_req(rport->targetport, &tls_req->tgt_ls_req,
+ ret = nvmet_fc_rcv_ls_req(rport->targetport, rport,
+ &tls_req->ls_rsp,
+ lsreq->rqstaddr, lsreq->rqstlen);
+
+ return ret;
+}
+
+static int
+fcloop_h2t_xmt_ls_rsp(struct nvmet_fc_target_port *targetport,
+ struct nvmefc_ls_rsp *lsrsp)
+{
+ struct fcloop_lsreq *tls_req = ls_rsp_to_lsreq(lsrsp);
+ struct nvmefc_ls_req *lsreq = tls_req->lsreq;
+ struct fcloop_tport *tport = targetport->private;
+ struct nvme_fc_remote_port *remoteport = tport->remoteport;
+ struct fcloop_rport *rport;
+
+ memcpy(lsreq->rspaddr, lsrsp->rspbuf,
+ ((lsreq->rsplen < lsrsp->rsplen) ?
+ lsreq->rsplen : lsrsp->rsplen));
+
+ lsrsp->done(lsrsp);
+
+ if (remoteport) {
+ rport = remoteport->private;
+ spin_lock(&rport->lock);
+ list_add_tail(&rport->ls_list, &tls_req->ls_list);
+ spin_unlock(&rport->lock);
+ schedule_work(&rport->ls_work);
+ }
+
+ return 0;
+}
+
+static void
+fcloop_tport_lsrqst_work(struct work_struct *work)
+{
+ struct fcloop_tport *tport =
+ container_of(work, struct fcloop_tport, ls_work);
+ struct fcloop_lsreq *tls_req;
+
+ spin_lock(&tport->lock);
+ for (;;) {
+ tls_req = list_first_entry_or_null(&tport->ls_list,
+ struct fcloop_lsreq, ls_list);
+ if (!tls_req)
+ break;
+
+ list_del(&tls_req->ls_list);
+ spin_unlock(&tport->lock);
+
+ tls_req->lsreq->done(tls_req->lsreq, tls_req->status);
+ /*
+ * callee may free memory containing tls_req.
+ * do not reference lsreq after this.
+ */
+
+ spin_lock(&tport->lock);
+ }
+ spin_unlock(&tport->lock);
+}
+
+static int
+fcloop_t2h_ls_req(struct nvmet_fc_target_port *targetport, void *hosthandle,
+ struct nvmefc_ls_req *lsreq)
+{
+ struct fcloop_lsreq *tls_req = lsreq->private;
+ struct fcloop_tport *tport = targetport->private;
+ int ret = 0;
+
+ /*
+ * hosthandle should be the dst.rport value.
+ * hosthandle ignored as fcloop currently is
+ * 1:1 tgtport vs remoteport
+ */
+ tls_req->lsreq = lsreq;
+ INIT_LIST_HEAD(&tls_req->ls_list);
+
+ if (!tport->remoteport) {
+ tls_req->status = -ECONNREFUSED;
+ spin_lock(&tport->lock);
+ list_add_tail(&tport->ls_list, &tls_req->ls_list);
+ spin_unlock(&tport->lock);
+ schedule_work(&tport->ls_work);
+ return ret;
+ }
+
+ tls_req->status = 0;
+ ret = nvme_fc_rcv_ls_req(tport->remoteport, &tls_req->ls_rsp,
lsreq->rqstaddr, lsreq->rqstlen);
return ret;
}
static int
-fcloop_xmt_ls_rsp(struct nvmet_fc_target_port *tport,
- struct nvmefc_tgt_ls_req *tgt_lsreq)
+fcloop_t2h_xmt_ls_rsp(struct nvme_fc_local_port *localport,
+ struct nvme_fc_remote_port *remoteport,
+ struct nvmefc_ls_rsp *lsrsp)
{
- struct fcloop_lsreq *tls_req = tgt_ls_req_to_lsreq(tgt_lsreq);
+ struct fcloop_lsreq *tls_req = ls_rsp_to_lsreq(lsrsp);
struct nvmefc_ls_req *lsreq = tls_req->lsreq;
+ struct fcloop_rport *rport = remoteport->private;
+ struct nvmet_fc_target_port *targetport = rport->targetport;
+ struct fcloop_tport *tport;
- memcpy(lsreq->rspaddr, tgt_lsreq->rspbuf,
- ((lsreq->rsplen < tgt_lsreq->rsplen) ?
- lsreq->rsplen : tgt_lsreq->rsplen));
- tgt_lsreq->done(tgt_lsreq);
+ memcpy(lsreq->rspaddr, lsrsp->rspbuf,
+ ((lsreq->rsplen < lsrsp->rsplen) ?
+ lsreq->rsplen : lsrsp->rsplen));
+ lsrsp->done(lsrsp);
- schedule_work(&tls_req->work);
+ if (targetport) {
+ tport = targetport->private;
+ spin_lock(&tport->lock);
+ list_add_tail(&tport->ls_list, &tls_req->ls_list);
+ spin_unlock(&tport->lock);
+ schedule_work(&tport->ls_work);
+ }
return 0;
}
+static void
+fcloop_t2h_host_release(void *hosthandle)
+{
+ /* host handle ignored for now */
+}
+
/*
* Simulate reception of RSCN and converting it to a initiator transport
* call to rescan a remote port.
@@ -673,7 +812,7 @@
break;
/* Fall-Thru to RSP handling */
- /* FALLTHRU */
+ fallthrough;
case NVMET_FCOP_RSP:
if (fcpreq) {
@@ -738,13 +877,19 @@
}
static void
-fcloop_ls_abort(struct nvme_fc_local_port *localport,
+fcloop_h2t_ls_abort(struct nvme_fc_local_port *localport,
struct nvme_fc_remote_port *remoteport,
struct nvmefc_ls_req *lsreq)
{
}
static void
+fcloop_t2h_ls_abort(struct nvmet_fc_target_port *targetport,
+ void *hosthandle, struct nvmefc_ls_req *lsreq)
+{
+}
+
+static void
fcloop_fcp_abort(struct nvme_fc_local_port *localport,
struct nvme_fc_remote_port *remoteport,
void *hw_queue_handle,
@@ -834,6 +979,7 @@
{
struct fcloop_rport *rport = remoteport->private;
+ flush_work(&rport->ls_work);
fcloop_nport_put(rport->nport);
}
@@ -842,6 +988,7 @@
{
struct fcloop_tport *tport = targetport->private;
+ flush_work(&tport->ls_work);
fcloop_nport_put(tport->nport);
}
@@ -854,10 +1001,11 @@
.remoteport_delete = fcloop_remoteport_delete,
.create_queue = fcloop_create_queue,
.delete_queue = fcloop_delete_queue,
- .ls_req = fcloop_ls_req,
+ .ls_req = fcloop_h2t_ls_req,
.fcp_io = fcloop_fcp_req,
- .ls_abort = fcloop_ls_abort,
+ .ls_abort = fcloop_h2t_ls_abort,
.fcp_abort = fcloop_fcp_abort,
+ .xmt_ls_rsp = fcloop_t2h_xmt_ls_rsp,
.max_hw_queues = FCLOOP_HW_QUEUES,
.max_sgl_segments = FCLOOP_SGL_SEGS,
.max_dif_sgl_segments = FCLOOP_SGL_SEGS,
@@ -871,11 +1019,14 @@
static struct nvmet_fc_target_template tgttemplate = {
.targetport_delete = fcloop_targetport_delete,
- .xmt_ls_rsp = fcloop_xmt_ls_rsp,
+ .xmt_ls_rsp = fcloop_h2t_xmt_ls_rsp,
.fcp_op = fcloop_fcp_op,
.fcp_abort = fcloop_tgt_fcp_abort,
.fcp_req_release = fcloop_fcp_req_release,
.discovery_event = fcloop_tgt_discovery_evt,
+ .ls_req = fcloop_t2h_ls_req,
+ .ls_abort = fcloop_t2h_ls_abort,
+ .host_release = fcloop_t2h_host_release,
.max_hw_queues = FCLOOP_HW_QUEUES,
.max_sgl_segments = FCLOOP_SGL_SEGS,
.max_dif_sgl_segments = FCLOOP_SGL_SEGS,
@@ -884,6 +1035,7 @@
.target_features = 0,
/* sizes of additional private data for data structures */
.target_priv_sz = sizeof(struct fcloop_tport),
+ .lsrqst_priv_sz = sizeof(struct fcloop_lsreq),
};
static ssize_t
@@ -1135,6 +1287,9 @@
rport->nport = nport;
rport->lport = nport->lport;
nport->rport = rport;
+ spin_lock_init(&rport->lock);
+ INIT_WORK(&rport->ls_work, fcloop_rport_lsrqst_work);
+ INIT_LIST_HEAD(&rport->ls_list);
return count;
}
@@ -1230,6 +1385,9 @@
tport->nport = nport;
tport->lport = nport->lport;
nport->tport = tport;
+ spin_lock_init(&tport->lock);
+ INIT_WORK(&tport->ls_work, fcloop_tport_lsrqst_work);
+ INIT_LIST_HEAD(&tport->ls_list);
return count;
}
diff --git a/drivers/nvme/target/io-cmd-bdev.c b/drivers/nvme/target/io-cmd-bdev.c
index 32008d8..6a9626f 100644
--- a/drivers/nvme/target/io-cmd-bdev.c
+++ b/drivers/nvme/target/io-cmd-bdev.c
@@ -47,6 +47,22 @@
id->nows = to0based(ql->io_opt / ql->logical_block_size);
}
+static void nvmet_bdev_ns_enable_integrity(struct nvmet_ns *ns)
+{
+ struct blk_integrity *bi = bdev_get_integrity(ns->bdev);
+
+ if (bi) {
+ ns->metadata_size = bi->tuple_size;
+ if (bi->profile == &t10_pi_type1_crc)
+ ns->pi_type = NVME_NS_DPS_PI_TYPE1;
+ else if (bi->profile == &t10_pi_type3_crc)
+ ns->pi_type = NVME_NS_DPS_PI_TYPE3;
+ else
+ /* Unsupported metadata type */
+ ns->metadata_size = 0;
+ }
+}
+
int nvmet_bdev_ns_enable(struct nvmet_ns *ns)
{
int ret;
@@ -64,6 +80,12 @@
}
ns->size = i_size_read(ns->bdev->bd_inode);
ns->blksize_shift = blksize_bits(bdev_logical_block_size(ns->bdev));
+
+ ns->pi_type = 0;
+ ns->metadata_size = 0;
+ if (IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY_T10))
+ nvmet_bdev_ns_enable_integrity(ns);
+
return 0;
}
@@ -75,6 +97,11 @@
}
}
+void nvmet_bdev_ns_revalidate(struct nvmet_ns *ns)
+{
+ ns->size = i_size_read(ns->bdev->bd_inode);
+}
+
static u16 blk_to_nvme_status(struct nvmet_req *req, blk_status_t blk_sts)
{
u16 status = NVME_SC_SUCCESS;
@@ -112,7 +139,6 @@
req->error_loc = offsetof(struct nvme_rw_command, nsid);
break;
case BLK_STS_IOERR:
- /* fallthru */
default:
status = NVME_SC_INTERNAL | NVME_SC_DNR;
req->error_loc = offsetof(struct nvme_common_command, opcode);
@@ -142,13 +168,75 @@
bio_put(bio);
}
+#ifdef CONFIG_BLK_DEV_INTEGRITY
+static int nvmet_bdev_alloc_bip(struct nvmet_req *req, struct bio *bio,
+ struct sg_mapping_iter *miter)
+{
+ struct blk_integrity *bi;
+ struct bio_integrity_payload *bip;
+ struct block_device *bdev = req->ns->bdev;
+ int rc;
+ size_t resid, len;
+
+ bi = bdev_get_integrity(bdev);
+ if (unlikely(!bi)) {
+ pr_err("Unable to locate bio_integrity\n");
+ return -ENODEV;
+ }
+
+ bip = bio_integrity_alloc(bio, GFP_NOIO,
+ min_t(unsigned int, req->metadata_sg_cnt, BIO_MAX_PAGES));
+ if (IS_ERR(bip)) {
+ pr_err("Unable to allocate bio_integrity_payload\n");
+ return PTR_ERR(bip);
+ }
+
+ bip->bip_iter.bi_size = bio_integrity_bytes(bi, bio_sectors(bio));
+ /* virtual start sector must be in integrity interval units */
+ bip_set_seed(bip, bio->bi_iter.bi_sector >>
+ (bi->interval_exp - SECTOR_SHIFT));
+
+ resid = bip->bip_iter.bi_size;
+ while (resid > 0 && sg_miter_next(miter)) {
+ len = min_t(size_t, miter->length, resid);
+ rc = bio_integrity_add_page(bio, miter->page, len,
+ offset_in_page(miter->addr));
+ if (unlikely(rc != len)) {
+ pr_err("bio_integrity_add_page() failed; %d\n", rc);
+ sg_miter_stop(miter);
+ return -ENOMEM;
+ }
+
+ resid -= len;
+ if (len < miter->length)
+ miter->consumed -= miter->length - len;
+ }
+ sg_miter_stop(miter);
+
+ return 0;
+}
+#else
+static int nvmet_bdev_alloc_bip(struct nvmet_req *req, struct bio *bio,
+ struct sg_mapping_iter *miter)
+{
+ return -EINVAL;
+}
+#endif /* CONFIG_BLK_DEV_INTEGRITY */
+
static void nvmet_bdev_execute_rw(struct nvmet_req *req)
{
int sg_cnt = req->sg_cnt;
struct bio *bio;
struct scatterlist *sg;
+ struct blk_plug plug;
sector_t sector;
- int op, op_flags = 0, i;
+ int op, i, rc;
+ struct sg_mapping_iter prot_miter;
+ unsigned int iter_flags;
+ unsigned int total_len = nvmet_rw_data_len(req) + req->metadata_len;
+
+ if (!nvmet_check_transfer_len(req, total_len))
+ return;
if (!req->sg_cnt) {
nvmet_req_complete(req, 0);
@@ -156,21 +244,21 @@
}
if (req->cmd->rw.opcode == nvme_cmd_write) {
- op = REQ_OP_WRITE;
- op_flags = REQ_SYNC | REQ_IDLE;
+ op = REQ_OP_WRITE | REQ_SYNC | REQ_IDLE;
if (req->cmd->rw.control & cpu_to_le16(NVME_RW_FUA))
- op_flags |= REQ_FUA;
+ op |= REQ_FUA;
+ iter_flags = SG_MITER_TO_SG;
} else {
op = REQ_OP_READ;
+ iter_flags = SG_MITER_FROM_SG;
}
if (is_pci_p2pdma_page(sg_page(req->sg)))
- op_flags |= REQ_NOMERGE;
+ op |= REQ_NOMERGE;
- sector = le64_to_cpu(req->cmd->rw.slba);
- sector <<= (req->ns->blksize_shift - 9);
+ sector = nvmet_lba_to_sect(req->ns, req->cmd->rw.slba);
- if (req->data_len <= NVMET_MAX_INLINE_DATA_LEN) {
+ if (nvmet_use_inline_bvec(req)) {
bio = &req->b.inline_bio;
bio_init(bio, req->inline_bvec, ARRAY_SIZE(req->inline_bvec));
} else {
@@ -180,17 +268,31 @@
bio->bi_iter.bi_sector = sector;
bio->bi_private = req;
bio->bi_end_io = nvmet_bio_done;
- bio_set_op_attrs(bio, op, op_flags);
+ bio->bi_opf = op;
+
+ blk_start_plug(&plug);
+ if (req->metadata_len)
+ sg_miter_start(&prot_miter, req->metadata_sg,
+ req->metadata_sg_cnt, iter_flags);
for_each_sg(req->sg, sg, req->sg_cnt, i) {
while (bio_add_page(bio, sg_page(sg), sg->length, sg->offset)
!= sg->length) {
struct bio *prev = bio;
+ if (req->metadata_len) {
+ rc = nvmet_bdev_alloc_bip(req, bio,
+ &prot_miter);
+ if (unlikely(rc)) {
+ bio_io_error(bio);
+ return;
+ }
+ }
+
bio = bio_alloc(GFP_KERNEL, min(sg_cnt, BIO_MAX_PAGES));
bio_set_dev(bio, req->ns->bdev);
bio->bi_iter.bi_sector = sector;
- bio_set_op_attrs(bio, op, op_flags);
+ bio->bi_opf = op;
bio_chain(bio, prev);
submit_bio(prev);
@@ -200,13 +302,25 @@
sg_cnt--;
}
+ if (req->metadata_len) {
+ rc = nvmet_bdev_alloc_bip(req, bio, &prot_miter);
+ if (unlikely(rc)) {
+ bio_io_error(bio);
+ return;
+ }
+ }
+
submit_bio(bio);
+ blk_finish_plug(&plug);
}
static void nvmet_bdev_execute_flush(struct nvmet_req *req)
{
struct bio *bio = &req->b.inline_bio;
+ if (!nvmet_check_transfer_len(req, 0))
+ return;
+
bio_init(bio, req->inline_bvec, ARRAY_SIZE(req->inline_bvec));
bio_set_dev(bio, req->ns->bdev);
bio->bi_private = req;
@@ -218,7 +332,7 @@
u16 nvmet_bdev_flush(struct nvmet_req *req)
{
- if (blkdev_issue_flush(req->ns->bdev, GFP_KERNEL, NULL))
+ if (blkdev_issue_flush(req->ns->bdev, GFP_KERNEL))
return NVME_SC_INTERNAL | NVME_SC_DNR;
return 0;
}
@@ -230,7 +344,7 @@
int ret;
ret = __blkdev_issue_discard(ns->bdev,
- le64_to_cpu(range->slba) << (ns->blksize_shift - 9),
+ nvmet_lba_to_sect(ns, range->slba),
le32_to_cpu(range->nlb) << (ns->blksize_shift - 9),
GFP_KERNEL, 0, bio);
if (ret && ret != -EOPNOTSUPP) {
@@ -261,12 +375,10 @@
if (bio) {
bio->bi_private = req;
bio->bi_end_io = nvmet_bio_done;
- if (status) {
- bio->bi_status = BLK_STS_IOERR;
- bio_endio(bio);
- } else {
+ if (status)
+ bio_io_error(bio);
+ else
submit_bio(bio);
- }
} else {
nvmet_req_complete(req, status);
}
@@ -274,6 +386,9 @@
static void nvmet_bdev_execute_dsm(struct nvmet_req *req)
{
+ if (!nvmet_check_data_len_lte(req, nvmet_dsm_len(req)))
+ return;
+
switch (le32_to_cpu(req->cmd->dsm.attributes)) {
case NVME_DSMGMT_AD:
nvmet_bdev_execute_discard(req);
@@ -295,8 +410,10 @@
sector_t nr_sector;
int ret;
- sector = le64_to_cpu(write_zeroes->slba) <<
- (req->ns->blksize_shift - 9);
+ if (!nvmet_check_transfer_len(req, 0))
+ return;
+
+ sector = nvmet_lba_to_sect(req->ns, write_zeroes->slba);
nr_sector = (((sector_t)le16_to_cpu(write_zeroes->length) + 1) <<
(req->ns->blksize_shift - 9));
@@ -319,20 +436,17 @@
case nvme_cmd_read:
case nvme_cmd_write:
req->execute = nvmet_bdev_execute_rw;
- req->data_len = nvmet_rw_len(req);
+ if (req->sq->ctrl->pi_support && nvmet_ns_has_pi(req->ns))
+ req->metadata_len = nvmet_rw_metadata_len(req);
return 0;
case nvme_cmd_flush:
req->execute = nvmet_bdev_execute_flush;
- req->data_len = 0;
return 0;
case nvme_cmd_dsm:
req->execute = nvmet_bdev_execute_dsm;
- req->data_len = (le32_to_cpu(cmd->dsm.nr) + 1) *
- sizeof(struct nvme_dsm_range);
return 0;
case nvme_cmd_write_zeroes:
req->execute = nvmet_bdev_execute_write_zeroes;
- req->data_len = 0;
return 0;
default:
pr_err("unhandled cmd %d on qid %d\n", cmd->common.opcode,
diff --git a/drivers/nvme/target/io-cmd-file.c b/drivers/nvme/target/io-cmd-file.c
index 6ca17a0..c81690b 100644
--- a/drivers/nvme/target/io-cmd-file.c
+++ b/drivers/nvme/target/io-cmd-file.c
@@ -8,11 +8,24 @@
#include <linux/uio.h>
#include <linux/falloc.h>
#include <linux/file.h>
+#include <linux/fs.h>
#include "nvmet.h"
#define NVMET_MAX_MPOOL_BVEC 16
#define NVMET_MIN_MPOOL_OBJ 16
+int nvmet_file_ns_revalidate(struct nvmet_ns *ns)
+{
+ struct kstat stat;
+ int ret;
+
+ ret = vfs_getattr(&ns->file->f_path, &stat, STATX_SIZE,
+ AT_STATX_FORCE_SYNC);
+ if (!ret)
+ ns->size = stat.size;
+ return ret;
+}
+
void nvmet_file_ns_disable(struct nvmet_ns *ns)
{
if (ns->file) {
@@ -30,7 +43,6 @@
int nvmet_file_ns_enable(struct nvmet_ns *ns)
{
int flags = O_RDWR | O_LARGEFILE;
- struct kstat stat;
int ret;
if (!ns->buffered_io)
@@ -45,12 +57,10 @@
return ret;
}
- ret = vfs_getattr(&ns->file->f_path,
- &stat, STATX_SIZE, AT_STATX_FORCE_SYNC);
+ ret = nvmet_file_ns_revalidate(ns);
if (ret)
goto err;
- ns->size = stat.size;
/*
* i_blkbits can be greater than the universally accepted upper bound,
* so make sure we export a sane namespace lba_shift.
@@ -128,7 +138,7 @@
mempool_free(req->f.bvec, req->ns->bvec_pool);
}
- if (unlikely(ret != req->data_len))
+ if (unlikely(ret != req->transfer_len))
status = errno_to_nvme_status(req, ret);
nvmet_req_complete(req, status);
}
@@ -148,7 +158,7 @@
is_sync = true;
pos = le64_to_cpu(req->cmd->rw.slba) << req->ns->blksize_shift;
- if (unlikely(pos + req->data_len > req->ns->size)) {
+ if (unlikely(pos + req->transfer_len > req->ns->size)) {
nvmet_req_complete(req, errno_to_nvme_status(req, -ENOSPC));
return true;
}
@@ -175,7 +185,7 @@
nr_bvec--;
}
- if (WARN_ON_ONCE(total_len != req->data_len)) {
+ if (WARN_ON_ONCE(total_len != req->transfer_len)) {
ret = -EIO;
goto complete;
}
@@ -234,6 +244,9 @@
{
ssize_t nr_bvec = req->sg_cnt;
+ if (!nvmet_check_transfer_len(req, nvmet_rw_data_len(req)))
+ return;
+
if (!req->sg_cnt || !nr_bvec) {
nvmet_req_complete(req, 0);
return;
@@ -254,7 +267,8 @@
if (req->ns->buffered_io) {
if (likely(!req->f.mpool_alloc) &&
- nvmet_file_execute_io(req, IOCB_NOWAIT))
+ (req->ns->file->f_mode & FMODE_NOWAIT) &&
+ nvmet_file_execute_io(req, IOCB_NOWAIT))
return;
nvmet_file_submit_buffered_io(req);
} else
@@ -275,6 +289,8 @@
static void nvmet_file_execute_flush(struct nvmet_req *req)
{
+ if (!nvmet_check_transfer_len(req, 0))
+ return;
INIT_WORK(&req->f.work, nvmet_file_flush_work);
schedule_work(&req->f.work);
}
@@ -333,6 +349,8 @@
static void nvmet_file_execute_dsm(struct nvmet_req *req)
{
+ if (!nvmet_check_data_len_lte(req, nvmet_dsm_len(req)))
+ return;
INIT_WORK(&req->f.work, nvmet_file_dsm_work);
schedule_work(&req->f.work);
}
@@ -361,6 +379,8 @@
static void nvmet_file_execute_write_zeroes(struct nvmet_req *req)
{
+ if (!nvmet_check_transfer_len(req, 0))
+ return;
INIT_WORK(&req->f.work, nvmet_file_write_zeroes_work);
schedule_work(&req->f.work);
}
@@ -373,20 +393,15 @@
case nvme_cmd_read:
case nvme_cmd_write:
req->execute = nvmet_file_execute_rw;
- req->data_len = nvmet_rw_len(req);
return 0;
case nvme_cmd_flush:
req->execute = nvmet_file_execute_flush;
- req->data_len = 0;
return 0;
case nvme_cmd_dsm:
req->execute = nvmet_file_execute_dsm;
- req->data_len = (le32_to_cpu(cmd->dsm.nr) + 1) *
- sizeof(struct nvme_dsm_range);
return 0;
case nvme_cmd_write_zeroes:
req->execute = nvmet_file_execute_write_zeroes;
- req->data_len = 0;
return 0;
default:
pr_err("unhandled cmd for file ns %d on qid %d\n",
diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c
index f657a12..ff3258c 100644
--- a/drivers/nvme/target/loop.c
+++ b/drivers/nvme/target/loop.c
@@ -36,7 +36,6 @@
struct nvme_loop_iod async_event_iod;
struct nvme_ctrl ctrl;
- struct nvmet_ctrl *target_ctrl;
struct nvmet_port *port;
};
@@ -76,8 +75,7 @@
{
struct nvme_loop_iod *iod = blk_mq_rq_to_pdu(req);
- nvme_cleanup_cmd(req);
- sg_free_table_chained(&iod->sg_table, SG_CHUNK_SIZE);
+ sg_free_table_chained(&iod->sg_table, NVME_INLINE_SG_CNT);
nvme_complete_rq(req);
}
@@ -102,22 +100,23 @@
* aborts. We don't even bother to allocate a struct request
* for them but rather special case them here.
*/
- if (unlikely(nvme_loop_queue_idx(queue) == 0 &&
- cqe->command_id >= NVME_AQ_BLK_MQ_DEPTH)) {
+ if (unlikely(nvme_is_aen_req(nvme_loop_queue_idx(queue),
+ cqe->command_id))) {
nvme_complete_async_event(&queue->ctrl->ctrl, cqe->status,
&cqe->result);
} else {
struct request *rq;
- rq = blk_mq_tag_to_rq(nvme_loop_tagset(queue), cqe->command_id);
+ rq = nvme_find_rq(nvme_loop_tagset(queue), cqe->command_id);
if (!rq) {
dev_err(queue->ctrl->ctrl.device,
- "tag 0x%x on queue %d not found\n",
+ "got bad command_id %#x on queue %d\n",
cqe->command_id, nvme_loop_queue_idx(queue));
return;
}
- nvme_end_request(rq, cqe->status, cqe->result);
+ if (!nvme_try_complete_req(rq, cqe->status, cqe->result))
+ nvme_loop_complete_rq(rq);
}
}
@@ -126,7 +125,7 @@
struct nvme_loop_iod *iod =
container_of(work, struct nvme_loop_iod, work);
- nvmet_req_execute(&iod->req);
+ iod->req.execute(&iod->req);
}
static blk_status_t nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx,
@@ -157,7 +156,7 @@
iod->sg_table.sgl = iod->first_sgl;
if (sg_alloc_table_chained(&iod->sg_table,
blk_rq_nr_phys_segments(req),
- iod->sg_table.sgl, SG_CHUNK_SIZE)) {
+ iod->sg_table.sgl, NVME_INLINE_SG_CNT)) {
nvme_cleanup_cmd(req);
return BLK_STS_RESOURCE;
}
@@ -343,9 +342,9 @@
ctrl->admin_tag_set.ops = &nvme_loop_admin_mq_ops;
ctrl->admin_tag_set.queue_depth = NVME_AQ_MQ_TAG_DEPTH;
ctrl->admin_tag_set.reserved_tags = 2; /* connect + keep-alive */
- ctrl->admin_tag_set.numa_node = NUMA_NO_NODE;
+ ctrl->admin_tag_set.numa_node = ctrl->ctrl.numa_node;
ctrl->admin_tag_set.cmd_size = sizeof(struct nvme_loop_iod) +
- SG_CHUNK_SIZE * sizeof(struct scatterlist);
+ NVME_INLINE_SG_CNT * sizeof(struct scatterlist);
ctrl->admin_tag_set.driver_data = ctrl;
ctrl->admin_tag_set.nr_hw_queues = 1;
ctrl->admin_tag_set.timeout = ADMIN_TIMEOUT;
@@ -448,15 +447,16 @@
{
struct nvme_loop_ctrl *ctrl =
container_of(work, struct nvme_loop_ctrl, ctrl.reset_work);
- bool changed;
int ret;
nvme_stop_ctrl(&ctrl->ctrl);
nvme_loop_shutdown_ctrl(ctrl);
if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING)) {
- /* state change failure should never happen */
- WARN_ON_ONCE(1);
+ if (ctrl->ctrl.state != NVME_CTRL_DELETING &&
+ ctrl->ctrl.state != NVME_CTRL_DELETING_NOIO)
+ /* state change failure for non-deleted ctrl? */
+ WARN_ON_ONCE(1);
return;
}
@@ -475,8 +475,8 @@
blk_mq_update_nr_hw_queues(&ctrl->tag_set,
ctrl->ctrl.queue_count - 1);
- changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
- WARN_ON_ONCE(!changed);
+ if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE))
+ WARN_ON_ONCE(1);
nvme_start_ctrl(&ctrl->ctrl);
@@ -489,7 +489,6 @@
out_disable:
dev_warn(ctrl->ctrl.device, "Removing after reset failure\n");
nvme_uninit_ctrl(&ctrl->ctrl);
- nvme_put_ctrl(&ctrl->ctrl);
}
static const struct nvme_ctrl_ops nvme_loop_ctrl_ops = {
@@ -517,10 +516,10 @@
ctrl->tag_set.ops = &nvme_loop_mq_ops;
ctrl->tag_set.queue_depth = ctrl->ctrl.opts->queue_size;
ctrl->tag_set.reserved_tags = 1; /* fabric connect */
- ctrl->tag_set.numa_node = NUMA_NO_NODE;
+ ctrl->tag_set.numa_node = ctrl->ctrl.numa_node;
ctrl->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
ctrl->tag_set.cmd_size = sizeof(struct nvme_loop_iod) +
- SG_CHUNK_SIZE * sizeof(struct scatterlist);
+ NVME_INLINE_SG_CNT * sizeof(struct scatterlist);
ctrl->tag_set.driver_data = ctrl;
ctrl->tag_set.nr_hw_queues = ctrl->ctrl.queue_count - 1;
ctrl->tag_set.timeout = NVME_IO_TIMEOUT;
@@ -572,7 +571,6 @@
struct nvmf_ctrl_options *opts)
{
struct nvme_loop_ctrl *ctrl;
- bool changed;
int ret;
ctrl = kzalloc(sizeof(*ctrl), GFP_KERNEL);
@@ -585,8 +583,13 @@
ret = nvme_init_ctrl(&ctrl->ctrl, dev, &nvme_loop_ctrl_ops,
0 /* no quirks, we're perfect! */);
- if (ret)
- goto out_put_ctrl;
+ if (ret) {
+ kfree(ctrl);
+ goto out;
+ }
+
+ if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING))
+ WARN_ON_ONCE(1);
ret = -ENOMEM;
@@ -622,8 +625,8 @@
dev_info(ctrl->ctrl.device,
"new ctrl: \"%s\"\n", ctrl->ctrl.opts->subsysnqn);
- changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
- WARN_ON_ONCE(!changed);
+ if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE))
+ WARN_ON_ONCE(1);
mutex_lock(&nvme_loop_ctrl_mutex);
list_add_tail(&ctrl->list, &nvme_loop_ctrl_list);
@@ -640,8 +643,7 @@
out_uninit_ctrl:
nvme_uninit_ctrl(&ctrl->ctrl);
nvme_put_ctrl(&ctrl->ctrl);
-out_put_ctrl:
- nvme_put_ctrl(&ctrl->ctrl);
+out:
if (ret > 0)
ret = -EIO;
return ERR_PTR(ret);
diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h
index c51f8dd..4bf6d21 100644
--- a/drivers/nvme/target/nvmet.h
+++ b/drivers/nvme/target/nvmet.h
@@ -19,10 +19,14 @@
#include <linux/rcupdate.h>
#include <linux/blkdev.h>
#include <linux/radix-tree.h>
+#include <linux/t10-pi.h>
+
+#define NVMET_DEFAULT_VS NVME_VS(1, 3, 0)
#define NVMET_ASYNC_EVENTS 4
#define NVMET_ERROR_LOG_SLOTS 128
#define NVMET_NO_ERROR_LOC ((u16)-1)
+#define NVMET_DEFAULT_CTRL_MODEL "Linux"
/*
* Supported optional AENs:
@@ -50,7 +54,6 @@
(cpu_to_le32(offsetof(struct nvmf_connect_command, x)))
struct nvmet_ns {
- struct list_head dev_link;
struct percpu_ref ref;
struct block_device *bdev;
struct file *file;
@@ -76,6 +79,8 @@
int use_p2pmem;
struct pci_dev *p2p_dev;
+ int pi_type;
+ int metadata_size;
};
static inline struct nvmet_ns *to_nvmet_ns(struct config_item *item)
@@ -141,6 +146,7 @@
bool enabled;
int inline_data_size;
const struct nvmet_fabrics_ops *tr_ops;
+ bool pi_enable;
};
static inline struct nvmet_port *to_nvmet_port(struct config_item *item)
@@ -158,10 +164,9 @@
struct nvmet_ctrl {
struct nvmet_subsys *subsys;
- struct nvmet_cq **cqs;
struct nvmet_sq **sqs;
- bool cmd_seen;
+ bool reset_tbkas;
struct mutex lock;
u64 cap;
@@ -200,6 +205,12 @@
spinlock_t error_lock;
u64 err_counter;
struct nvme_error_slot slots[NVMET_ERROR_LOG_SLOTS];
+ bool pi_support;
+};
+
+struct nvmet_subsys_model {
+ struct rcu_head rcuhead;
+ char number[];
};
struct nvmet_subsys {
@@ -208,9 +219,11 @@
struct mutex lock;
struct kref ref;
- struct list_head namespaces;
+ struct xarray namespaces;
unsigned int nr_namespaces;
unsigned int max_nsid;
+ u16 cntlid_min;
+ u16 cntlid_max;
struct list_head ctrls;
@@ -222,11 +235,20 @@
u64 ver;
u64 serial;
char *subsysnqn;
+ bool pi_support;
struct config_group group;
struct config_group namespaces_group;
struct config_group allowed_hosts_group;
+
+ struct nvmet_subsys_model __rcu *model;
+
+#ifdef CONFIG_NVME_TARGET_PASSTHRU
+ struct nvme_ctrl *passthru_ctrl;
+ char *passthru_ctrl_path;
+ struct config_group passthru_group;
+#endif /* CONFIG_NVME_TARGET_PASSTHRU */
};
static inline struct nvmet_subsys *to_subsys(struct config_item *item)
@@ -270,7 +292,9 @@
struct module *owner;
unsigned int type;
unsigned int msdbd;
- bool has_keyed_sgls : 1;
+ unsigned int flags;
+#define NVMF_KEYED_SGLS (1 << 0)
+#define NVMF_METADATA_SUPPORTED (1 << 1)
void (*queue_response)(struct nvmet_req *req);
int (*add_port)(struct nvmet_port *port);
void (*remove_port)(struct nvmet_port *port);
@@ -279,6 +303,7 @@
struct nvmet_port *port, char *traddr);
u16 (*install_queue)(struct nvmet_sq *nvme_sq);
void (*discovery_chg)(struct nvmet_port *port);
+ u8 (*get_mdts)(const struct nvmet_ctrl *ctrl);
};
#define NVMET_MAX_INLINE_BIOVEC 8
@@ -291,6 +316,7 @@
struct nvmet_cq *cq;
struct nvmet_ns *ns;
struct scatterlist *sg;
+ struct scatterlist *metadata_sg;
struct bio_vec inline_bvec[NVMET_MAX_INLINE_BIOVEC];
union {
struct {
@@ -302,12 +328,17 @@
struct bio_vec *bvec;
struct work_struct work;
} f;
+ struct {
+ struct request *rq;
+ struct work_struct work;
+ bool use_workqueue;
+ } p;
};
int sg_cnt;
- /* data length as parsed from the command: */
- size_t data_len;
+ int metadata_sg_cnt;
/* data length as parsed from the SGL descriptor: */
size_t transfer_len;
+ size_t metadata_len;
struct nvmet_port *port;
@@ -363,6 +394,8 @@
u16 nvmet_set_feat_kato(struct nvmet_req *req);
u16 nvmet_set_feat_async_event(struct nvmet_req *req, u32 mask);
void nvmet_execute_async_event(struct nvmet_req *req);
+void nvmet_start_keep_alive_timer(struct nvmet_ctrl *ctrl);
+void nvmet_stop_keep_alive_timer(struct nvmet_ctrl *ctrl);
u16 nvmet_parse_connect_cmd(struct nvmet_req *req);
void nvmet_bdev_set_limits(struct block_device *bdev, struct nvme_id_ns *id);
@@ -375,11 +408,14 @@
bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq,
struct nvmet_sq *sq, const struct nvmet_fabrics_ops *ops);
void nvmet_req_uninit(struct nvmet_req *req);
-void nvmet_req_execute(struct nvmet_req *req);
+bool nvmet_check_transfer_len(struct nvmet_req *req, size_t len);
+bool nvmet_check_data_len_lte(struct nvmet_req *req, size_t data_len);
void nvmet_req_complete(struct nvmet_req *req, u16 status);
-int nvmet_req_alloc_sgl(struct nvmet_req *req);
-void nvmet_req_free_sgl(struct nvmet_req *req);
+int nvmet_req_alloc_sgls(struct nvmet_req *req);
+void nvmet_req_free_sgls(struct nvmet_req *req);
+void nvmet_execute_set_features(struct nvmet_req *req);
+void nvmet_execute_get_features(struct nvmet_req *req);
void nvmet_execute_keep_alive(struct nvmet_req *req);
void nvmet_cq_setup(struct nvmet_ctrl *ctrl, struct nvmet_cq *cq, u16 qid,
@@ -488,13 +524,67 @@
u16 nvmet_bdev_flush(struct nvmet_req *req);
u16 nvmet_file_flush(struct nvmet_req *req);
void nvmet_ns_changed(struct nvmet_subsys *subsys, u32 nsid);
+void nvmet_bdev_ns_revalidate(struct nvmet_ns *ns);
+int nvmet_file_ns_revalidate(struct nvmet_ns *ns);
+void nvmet_ns_revalidate(struct nvmet_ns *ns);
-static inline u32 nvmet_rw_len(struct nvmet_req *req)
+static inline u32 nvmet_rw_data_len(struct nvmet_req *req)
{
return ((u32)le16_to_cpu(req->cmd->rw.length) + 1) <<
req->ns->blksize_shift;
}
+static inline u32 nvmet_rw_metadata_len(struct nvmet_req *req)
+{
+ if (!IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY))
+ return 0;
+ return ((u32)le16_to_cpu(req->cmd->rw.length) + 1) *
+ req->ns->metadata_size;
+}
+
+static inline u32 nvmet_dsm_len(struct nvmet_req *req)
+{
+ return (le32_to_cpu(req->cmd->dsm.nr) + 1) *
+ sizeof(struct nvme_dsm_range);
+}
+
+#ifdef CONFIG_NVME_TARGET_PASSTHRU
+void nvmet_passthru_subsys_free(struct nvmet_subsys *subsys);
+int nvmet_passthru_ctrl_enable(struct nvmet_subsys *subsys);
+void nvmet_passthru_ctrl_disable(struct nvmet_subsys *subsys);
+u16 nvmet_parse_passthru_admin_cmd(struct nvmet_req *req);
+u16 nvmet_parse_passthru_io_cmd(struct nvmet_req *req);
+static inline struct nvme_ctrl *nvmet_passthru_ctrl(struct nvmet_subsys *subsys)
+{
+ return subsys->passthru_ctrl;
+}
+#else /* CONFIG_NVME_TARGET_PASSTHRU */
+static inline void nvmet_passthru_subsys_free(struct nvmet_subsys *subsys)
+{
+}
+static inline void nvmet_passthru_ctrl_disable(struct nvmet_subsys *subsys)
+{
+}
+static inline u16 nvmet_parse_passthru_admin_cmd(struct nvmet_req *req)
+{
+ return 0;
+}
+static inline u16 nvmet_parse_passthru_io_cmd(struct nvmet_req *req)
+{
+ return 0;
+}
+static inline struct nvme_ctrl *nvmet_passthru_ctrl(struct nvmet_subsys *subsys)
+{
+ return NULL;
+}
+#endif /* CONFIG_NVME_TARGET_PASSTHRU */
+
+static inline struct nvme_ctrl *
+nvmet_req_passthru_ctrl(struct nvmet_req *req)
+{
+ return nvmet_passthru_ctrl(req->sq->ctrl->subsys);
+}
+
u16 errno_to_nvme_status(struct nvmet_req *req, int errno);
/* Convert a 32-bit number to a 16-bit 0's based number */
@@ -503,4 +593,27 @@
return cpu_to_le16(max(1U, min(1U << 16, a)) - 1);
}
+static inline bool nvmet_ns_has_pi(struct nvmet_ns *ns)
+{
+ if (!IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY))
+ return false;
+ return ns->pi_type && ns->metadata_size == sizeof(struct t10_pi_tuple);
+}
+
+static inline __le64 nvmet_sect_to_lba(struct nvmet_ns *ns, sector_t sect)
+{
+ return cpu_to_le64(sect >> (ns->blksize_shift - SECTOR_SHIFT));
+}
+
+static inline sector_t nvmet_lba_to_sect(struct nvmet_ns *ns, __le64 lba)
+{
+ return le64_to_cpu(lba) << (ns->blksize_shift - SECTOR_SHIFT);
+}
+
+static inline bool nvmet_use_inline_bvec(struct nvmet_req *req)
+{
+ return req->transfer_len <= NVMET_MAX_INLINE_DATA_LEN &&
+ req->sg_cnt <= NVMET_MAX_INLINE_BIOVEC;
+}
+
#endif /* _NVMET_H */
diff --git a/drivers/nvme/target/passthru.c b/drivers/nvme/target/passthru.c
new file mode 100644
index 0000000..8ee94f0
--- /dev/null
+++ b/drivers/nvme/target/passthru.c
@@ -0,0 +1,584 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * NVMe Over Fabrics Target Passthrough command implementation.
+ *
+ * Copyright (c) 2017-2018 Western Digital Corporation or its
+ * affiliates.
+ * Copyright (c) 2019-2020, Eideticom Inc.
+ *
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/module.h>
+
+#include "../host/nvme.h"
+#include "nvmet.h"
+
+MODULE_IMPORT_NS(NVME_TARGET_PASSTHRU);
+
+/*
+ * xarray to maintain one passthru subsystem per nvme controller.
+ */
+static DEFINE_XARRAY(passthru_subsystems);
+
+static u16 nvmet_passthru_override_id_ctrl(struct nvmet_req *req)
+{
+ struct nvmet_ctrl *ctrl = req->sq->ctrl;
+ struct nvme_ctrl *pctrl = ctrl->subsys->passthru_ctrl;
+ u16 status = NVME_SC_SUCCESS;
+ struct nvme_id_ctrl *id;
+ int max_hw_sectors;
+ int page_shift;
+
+ id = kzalloc(sizeof(*id), GFP_KERNEL);
+ if (!id)
+ return NVME_SC_INTERNAL;
+
+ status = nvmet_copy_from_sgl(req, 0, id, sizeof(*id));
+ if (status)
+ goto out_free;
+
+ id->cntlid = cpu_to_le16(ctrl->cntlid);
+ id->ver = cpu_to_le32(ctrl->subsys->ver);
+
+ /*
+ * The passthru NVMe driver may have a limit on the number of segments
+ * which depends on the host's memory fragementation. To solve this,
+ * ensure mdts is limited to the pages equal to the number of segments.
+ */
+ max_hw_sectors = min_not_zero(pctrl->max_segments << (PAGE_SHIFT - 9),
+ pctrl->max_hw_sectors);
+
+ /*
+ * nvmet_passthru_map_sg is limitted to using a single bio so limit
+ * the mdts based on BIO_MAX_PAGES as well
+ */
+ max_hw_sectors = min_not_zero(BIO_MAX_PAGES << (PAGE_SHIFT - 9),
+ max_hw_sectors);
+
+ page_shift = NVME_CAP_MPSMIN(ctrl->cap) + 12;
+
+ id->mdts = ilog2(max_hw_sectors) + 9 - page_shift;
+
+ id->acl = 3;
+ /*
+ * We export aerl limit for the fabrics controller, update this when
+ * passthru based aerl support is added.
+ */
+ id->aerl = NVMET_ASYNC_EVENTS - 1;
+
+ /* emulate kas as most of the PCIe ctrl don't have a support for kas */
+ id->kas = cpu_to_le16(NVMET_KAS);
+
+ /* don't support host memory buffer */
+ id->hmpre = 0;
+ id->hmmin = 0;
+
+ id->sqes = min_t(__u8, ((0x6 << 4) | 0x6), id->sqes);
+ id->cqes = min_t(__u8, ((0x4 << 4) | 0x4), id->cqes);
+ id->maxcmd = cpu_to_le16(NVMET_MAX_CMD);
+
+ /* don't support fuse commands */
+ id->fuses = 0;
+
+ id->sgls = cpu_to_le32(1 << 0); /* we always support SGLs */
+ if (ctrl->ops->flags & NVMF_KEYED_SGLS)
+ id->sgls |= cpu_to_le32(1 << 2);
+ if (req->port->inline_data_size)
+ id->sgls |= cpu_to_le32(1 << 20);
+
+ /*
+ * When passsthru controller is setup using nvme-loop transport it will
+ * export the passthru ctrl subsysnqn (PCIe NVMe ctrl) and will fail in
+ * the nvme/host/core.c in the nvme_init_subsystem()->nvme_active_ctrl()
+ * code path with duplicate ctr subsynqn. In order to prevent that we
+ * mask the passthru-ctrl subsysnqn with the target ctrl subsysnqn.
+ */
+ memcpy(id->subnqn, ctrl->subsysnqn, sizeof(id->subnqn));
+
+ /* use fabric id-ctrl values */
+ id->ioccsz = cpu_to_le32((sizeof(struct nvme_command) +
+ req->port->inline_data_size) / 16);
+ id->iorcsz = cpu_to_le32(sizeof(struct nvme_completion) / 16);
+
+ id->msdbd = ctrl->ops->msdbd;
+
+ /* Support multipath connections with fabrics */
+ id->cmic |= 1 << 1;
+
+ /* Disable reservations, see nvmet_parse_passthru_io_cmd() */
+ id->oncs &= cpu_to_le16(~NVME_CTRL_ONCS_RESERVATIONS);
+
+ status = nvmet_copy_to_sgl(req, 0, id, sizeof(struct nvme_id_ctrl));
+
+out_free:
+ kfree(id);
+ return status;
+}
+
+static u16 nvmet_passthru_override_id_ns(struct nvmet_req *req)
+{
+ u16 status = NVME_SC_SUCCESS;
+ struct nvme_id_ns *id;
+ int i;
+
+ id = kzalloc(sizeof(*id), GFP_KERNEL);
+ if (!id)
+ return NVME_SC_INTERNAL;
+
+ status = nvmet_copy_from_sgl(req, 0, id, sizeof(struct nvme_id_ns));
+ if (status)
+ goto out_free;
+
+ for (i = 0; i < (id->nlbaf + 1); i++)
+ if (id->lbaf[i].ms)
+ memset(&id->lbaf[i], 0, sizeof(id->lbaf[i]));
+
+ id->flbas = id->flbas & ~(1 << 4);
+
+ /*
+ * Presently the NVMEof target code does not support sending
+ * metadata, so we must disable it here. This should be updated
+ * once target starts supporting metadata.
+ */
+ id->mc = 0;
+
+ status = nvmet_copy_to_sgl(req, 0, id, sizeof(*id));
+
+out_free:
+ kfree(id);
+ return status;
+}
+
+static void nvmet_passthru_execute_cmd_work(struct work_struct *w)
+{
+ struct nvmet_req *req = container_of(w, struct nvmet_req, p.work);
+ struct request *rq = req->p.rq;
+ u16 status;
+
+ nvme_execute_passthru_rq(rq);
+
+ status = nvme_req(rq)->status;
+ if (status == NVME_SC_SUCCESS &&
+ req->cmd->common.opcode == nvme_admin_identify) {
+ switch (req->cmd->identify.cns) {
+ case NVME_ID_CNS_CTRL:
+ nvmet_passthru_override_id_ctrl(req);
+ break;
+ case NVME_ID_CNS_NS:
+ nvmet_passthru_override_id_ns(req);
+ break;
+ }
+ }
+
+ req->cqe->result = nvme_req(rq)->result;
+ nvmet_req_complete(req, status);
+ blk_mq_free_request(rq);
+}
+
+static void nvmet_passthru_req_done(struct request *rq,
+ blk_status_t blk_status)
+{
+ struct nvmet_req *req = rq->end_io_data;
+
+ req->cqe->result = nvme_req(rq)->result;
+ nvmet_req_complete(req, nvme_req(rq)->status);
+ blk_mq_free_request(rq);
+}
+
+static int nvmet_passthru_map_sg(struct nvmet_req *req, struct request *rq)
+{
+ struct scatterlist *sg;
+ int op_flags = 0;
+ struct bio *bio;
+ int i, ret;
+
+ if (req->sg_cnt > BIO_MAX_PAGES)
+ return -EINVAL;
+
+ if (req->cmd->common.opcode == nvme_cmd_flush)
+ op_flags = REQ_FUA;
+ else if (nvme_is_write(req->cmd))
+ op_flags = REQ_SYNC | REQ_IDLE;
+
+ bio = bio_alloc(GFP_KERNEL, req->sg_cnt);
+ bio->bi_end_io = bio_put;
+ bio->bi_opf = req_op(rq) | op_flags;
+
+ for_each_sg(req->sg, sg, req->sg_cnt, i) {
+ if (bio_add_pc_page(rq->q, bio, sg_page(sg), sg->length,
+ sg->offset) < sg->length) {
+ bio_put(bio);
+ return -EINVAL;
+ }
+ }
+
+ ret = blk_rq_append_bio(rq, &bio);
+ if (unlikely(ret)) {
+ bio_put(bio);
+ return ret;
+ }
+
+ return 0;
+}
+
+static void nvmet_passthru_execute_cmd(struct nvmet_req *req)
+{
+ struct nvme_ctrl *ctrl = nvmet_req_passthru_ctrl(req);
+ struct request_queue *q = ctrl->admin_q;
+ struct nvme_ns *ns = NULL;
+ struct request *rq = NULL;
+ u32 effects;
+ u16 status;
+ int ret;
+
+ if (likely(req->sq->qid != 0)) {
+ u32 nsid = le32_to_cpu(req->cmd->common.nsid);
+
+ ns = nvme_find_get_ns(ctrl, nsid);
+ if (unlikely(!ns)) {
+ pr_err("failed to get passthru ns nsid:%u\n", nsid);
+ status = NVME_SC_INVALID_NS | NVME_SC_DNR;
+ goto out;
+ }
+
+ q = ns->queue;
+ }
+
+ rq = nvme_alloc_request(q, req->cmd, 0, NVME_QID_ANY);
+ if (IS_ERR(rq)) {
+ status = NVME_SC_INTERNAL;
+ goto out_put_ns;
+ }
+
+ if (req->sg_cnt) {
+ ret = nvmet_passthru_map_sg(req, rq);
+ if (unlikely(ret)) {
+ status = NVME_SC_INTERNAL;
+ goto out_put_req;
+ }
+ }
+
+ /*
+ * If there are effects for the command we are about to execute, or
+ * an end_req function we need to use nvme_execute_passthru_rq()
+ * synchronously in a work item seeing the end_req function and
+ * nvme_passthru_end() can't be called in the request done callback
+ * which is typically in interrupt context.
+ */
+ effects = nvme_command_effects(ctrl, ns, req->cmd->common.opcode);
+ if (req->p.use_workqueue || effects) {
+ INIT_WORK(&req->p.work, nvmet_passthru_execute_cmd_work);
+ req->p.rq = rq;
+ schedule_work(&req->p.work);
+ } else {
+ rq->end_io_data = req;
+ blk_execute_rq_nowait(rq->q, ns ? ns->disk : NULL, rq, 0,
+ nvmet_passthru_req_done);
+ }
+
+ if (ns)
+ nvme_put_ns(ns);
+
+ return;
+
+out_put_req:
+ blk_mq_free_request(rq);
+out_put_ns:
+ if (ns)
+ nvme_put_ns(ns);
+out:
+ nvmet_req_complete(req, status);
+}
+
+/*
+ * We need to emulate set host behaviour to ensure that any requested
+ * behaviour of the target's host matches the requested behaviour
+ * of the device's host and fail otherwise.
+ */
+static void nvmet_passthru_set_host_behaviour(struct nvmet_req *req)
+{
+ struct nvme_ctrl *ctrl = nvmet_req_passthru_ctrl(req);
+ struct nvme_feat_host_behavior *host;
+ u16 status = NVME_SC_INTERNAL;
+ int ret;
+
+ host = kzalloc(sizeof(*host) * 2, GFP_KERNEL);
+ if (!host)
+ goto out_complete_req;
+
+ ret = nvme_get_features(ctrl, NVME_FEAT_HOST_BEHAVIOR, 0,
+ host, sizeof(*host), NULL);
+ if (ret)
+ goto out_free_host;
+
+ status = nvmet_copy_from_sgl(req, 0, &host[1], sizeof(*host));
+ if (status)
+ goto out_free_host;
+
+ if (memcmp(&host[0], &host[1], sizeof(host[0]))) {
+ pr_warn("target host has requested different behaviour from the local host\n");
+ status = NVME_SC_INTERNAL;
+ }
+
+out_free_host:
+ kfree(host);
+out_complete_req:
+ nvmet_req_complete(req, status);
+}
+
+static u16 nvmet_setup_passthru_command(struct nvmet_req *req)
+{
+ req->p.use_workqueue = false;
+ req->execute = nvmet_passthru_execute_cmd;
+ return NVME_SC_SUCCESS;
+}
+
+u16 nvmet_parse_passthru_io_cmd(struct nvmet_req *req)
+{
+ /* Reject any commands with non-sgl flags set (ie. fused commands) */
+ if (req->cmd->common.flags & ~NVME_CMD_SGL_ALL)
+ return NVME_SC_INVALID_FIELD;
+
+ switch (req->cmd->common.opcode) {
+ case nvme_cmd_resv_register:
+ case nvme_cmd_resv_report:
+ case nvme_cmd_resv_acquire:
+ case nvme_cmd_resv_release:
+ /*
+ * Reservations cannot be supported properly because the
+ * underlying device has no way of differentiating different
+ * hosts that connect via fabrics. This could potentially be
+ * emulated in the future if regular targets grow support for
+ * this feature.
+ */
+ return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
+ }
+
+ return nvmet_setup_passthru_command(req);
+}
+
+/*
+ * Only features that are emulated or specifically allowed in the list are
+ * passed down to the controller. This function implements the allow list for
+ * both get and set features.
+ */
+static u16 nvmet_passthru_get_set_features(struct nvmet_req *req)
+{
+ switch (le32_to_cpu(req->cmd->features.fid)) {
+ case NVME_FEAT_ARBITRATION:
+ case NVME_FEAT_POWER_MGMT:
+ case NVME_FEAT_LBA_RANGE:
+ case NVME_FEAT_TEMP_THRESH:
+ case NVME_FEAT_ERR_RECOVERY:
+ case NVME_FEAT_VOLATILE_WC:
+ case NVME_FEAT_WRITE_ATOMIC:
+ case NVME_FEAT_AUTO_PST:
+ case NVME_FEAT_TIMESTAMP:
+ case NVME_FEAT_HCTM:
+ case NVME_FEAT_NOPSC:
+ case NVME_FEAT_RRL:
+ case NVME_FEAT_PLM_CONFIG:
+ case NVME_FEAT_PLM_WINDOW:
+ case NVME_FEAT_HOST_BEHAVIOR:
+ case NVME_FEAT_SANITIZE:
+ case NVME_FEAT_VENDOR_START ... NVME_FEAT_VENDOR_END:
+ return nvmet_setup_passthru_command(req);
+
+ case NVME_FEAT_ASYNC_EVENT:
+ /* There is no support for forwarding ASYNC events */
+ case NVME_FEAT_IRQ_COALESCE:
+ case NVME_FEAT_IRQ_CONFIG:
+ /* The IRQ settings will not apply to the target controller */
+ case NVME_FEAT_HOST_MEM_BUF:
+ /*
+ * Any HMB that's set will not be passed through and will
+ * not work as expected
+ */
+ case NVME_FEAT_SW_PROGRESS:
+ /*
+ * The Pre-Boot Software Load Count doesn't make much
+ * sense for a target to export
+ */
+ case NVME_FEAT_RESV_MASK:
+ case NVME_FEAT_RESV_PERSIST:
+ /* No reservations, see nvmet_parse_passthru_io_cmd() */
+ default:
+ return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
+ }
+}
+
+u16 nvmet_parse_passthru_admin_cmd(struct nvmet_req *req)
+{
+ /* Reject any commands with non-sgl flags set (ie. fused commands) */
+ if (req->cmd->common.flags & ~NVME_CMD_SGL_ALL)
+ return NVME_SC_INVALID_FIELD;
+
+ /*
+ * Passthru all vendor specific commands
+ */
+ if (req->cmd->common.opcode >= nvme_admin_vendor_start)
+ return nvmet_setup_passthru_command(req);
+
+ switch (req->cmd->common.opcode) {
+ case nvme_admin_async_event:
+ req->execute = nvmet_execute_async_event;
+ return NVME_SC_SUCCESS;
+ case nvme_admin_keep_alive:
+ /*
+ * Most PCIe ctrls don't support keep alive cmd, we route keep
+ * alive to the non-passthru mode. In future please change this
+ * code when PCIe ctrls with keep alive support available.
+ */
+ req->execute = nvmet_execute_keep_alive;
+ return NVME_SC_SUCCESS;
+ case nvme_admin_set_features:
+ switch (le32_to_cpu(req->cmd->features.fid)) {
+ case NVME_FEAT_ASYNC_EVENT:
+ case NVME_FEAT_KATO:
+ case NVME_FEAT_NUM_QUEUES:
+ case NVME_FEAT_HOST_ID:
+ req->execute = nvmet_execute_set_features;
+ return NVME_SC_SUCCESS;
+ case NVME_FEAT_HOST_BEHAVIOR:
+ req->execute = nvmet_passthru_set_host_behaviour;
+ return NVME_SC_SUCCESS;
+ default:
+ return nvmet_passthru_get_set_features(req);
+ }
+ break;
+ case nvme_admin_get_features:
+ switch (le32_to_cpu(req->cmd->features.fid)) {
+ case NVME_FEAT_ASYNC_EVENT:
+ case NVME_FEAT_KATO:
+ case NVME_FEAT_NUM_QUEUES:
+ case NVME_FEAT_HOST_ID:
+ req->execute = nvmet_execute_get_features;
+ return NVME_SC_SUCCESS;
+ default:
+ return nvmet_passthru_get_set_features(req);
+ }
+ break;
+ case nvme_admin_identify:
+ switch (req->cmd->identify.cns) {
+ case NVME_ID_CNS_CTRL:
+ req->execute = nvmet_passthru_execute_cmd;
+ req->p.use_workqueue = true;
+ return NVME_SC_SUCCESS;
+ case NVME_ID_CNS_CS_CTRL:
+ switch (req->cmd->identify.csi) {
+ case NVME_CSI_ZNS:
+ req->execute = nvmet_passthru_execute_cmd;
+ req->p.use_workqueue = true;
+ return NVME_SC_SUCCESS;
+ }
+ return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
+ case NVME_ID_CNS_NS:
+ req->execute = nvmet_passthru_execute_cmd;
+ req->p.use_workqueue = true;
+ return NVME_SC_SUCCESS;
+ case NVME_ID_CNS_CS_NS:
+ switch (req->cmd->identify.csi) {
+ case NVME_CSI_ZNS:
+ req->execute = nvmet_passthru_execute_cmd;
+ req->p.use_workqueue = true;
+ return NVME_SC_SUCCESS;
+ }
+ return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
+ default:
+ return nvmet_setup_passthru_command(req);
+ }
+ case nvme_admin_get_log_page:
+ return nvmet_setup_passthru_command(req);
+ default:
+ /* Reject commands not in the allowlist above */
+ return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
+ }
+}
+
+int nvmet_passthru_ctrl_enable(struct nvmet_subsys *subsys)
+{
+ struct nvme_ctrl *ctrl;
+ struct file *file;
+ int ret = -EINVAL;
+ void *old;
+
+ mutex_lock(&subsys->lock);
+ if (!subsys->passthru_ctrl_path)
+ goto out_unlock;
+ if (subsys->passthru_ctrl)
+ goto out_unlock;
+
+ if (subsys->nr_namespaces) {
+ pr_info("cannot enable both passthru and regular namespaces for a single subsystem");
+ goto out_unlock;
+ }
+
+ file = filp_open(subsys->passthru_ctrl_path, O_RDWR, 0);
+ if (IS_ERR(file)) {
+ ret = PTR_ERR(file);
+ goto out_unlock;
+ }
+
+ ctrl = nvme_ctrl_from_file(file);
+ if (!ctrl) {
+ pr_err("failed to open nvme controller %s\n",
+ subsys->passthru_ctrl_path);
+
+ goto out_put_file;
+ }
+
+ old = xa_cmpxchg(&passthru_subsystems, ctrl->cntlid, NULL,
+ subsys, GFP_KERNEL);
+ if (xa_is_err(old)) {
+ ret = xa_err(old);
+ goto out_put_file;
+ }
+
+ if (old)
+ goto out_put_file;
+
+ subsys->passthru_ctrl = ctrl;
+ subsys->ver = ctrl->vs;
+
+ if (subsys->ver < NVME_VS(1, 2, 1)) {
+ pr_warn("nvme controller version is too old: %llu.%llu.%llu, advertising 1.2.1\n",
+ NVME_MAJOR(subsys->ver), NVME_MINOR(subsys->ver),
+ NVME_TERTIARY(subsys->ver));
+ subsys->ver = NVME_VS(1, 2, 1);
+ }
+ nvme_get_ctrl(ctrl);
+ __module_get(subsys->passthru_ctrl->ops->module);
+ ret = 0;
+
+out_put_file:
+ filp_close(file, NULL);
+out_unlock:
+ mutex_unlock(&subsys->lock);
+ return ret;
+}
+
+static void __nvmet_passthru_ctrl_disable(struct nvmet_subsys *subsys)
+{
+ if (subsys->passthru_ctrl) {
+ xa_erase(&passthru_subsystems, subsys->passthru_ctrl->cntlid);
+ module_put(subsys->passthru_ctrl->ops->module);
+ nvme_put_ctrl(subsys->passthru_ctrl);
+ }
+ subsys->passthru_ctrl = NULL;
+ subsys->ver = NVMET_DEFAULT_VS;
+}
+
+void nvmet_passthru_ctrl_disable(struct nvmet_subsys *subsys)
+{
+ mutex_lock(&subsys->lock);
+ __nvmet_passthru_ctrl_disable(subsys);
+ mutex_unlock(&subsys->lock);
+}
+
+void nvmet_passthru_subsys_free(struct nvmet_subsys *subsys)
+{
+ mutex_lock(&subsys->lock);
+ __nvmet_passthru_ctrl_disable(subsys);
+ mutex_unlock(&subsys->lock);
+ kfree(subsys->passthru_ctrl_path);
+}
diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c
index 50e2007..6d5552f 100644
--- a/drivers/nvme/target/rdma.c
+++ b/drivers/nvme/target/rdma.c
@@ -20,6 +20,7 @@
#include <rdma/ib_verbs.h>
#include <rdma/rdma_cm.h>
#include <rdma/rw.h>
+#include <rdma/ib_cm.h>
#include <linux/nvme-rdma.h>
#include "nvmet.h"
@@ -31,6 +32,12 @@
#define NVMET_RDMA_MAX_INLINE_SGE 4
#define NVMET_RDMA_MAX_INLINE_DATA_SIZE max_t(int, SZ_16K, PAGE_SIZE)
+/* Assume mpsmin == device_page_size == 4KB */
+#define NVMET_RDMA_MAX_MDTS 8
+#define NVMET_RDMA_MAX_METADATA_MDTS 5
+
+struct nvmet_rdma_srq;
+
struct nvmet_rdma_cmd {
struct ib_sge sge[NVMET_RDMA_MAX_INLINE_SGE + 1];
struct ib_cqe cqe;
@@ -38,6 +45,7 @@
struct scatterlist inline_sg[NVMET_RDMA_MAX_INLINE_SGE];
struct nvme_command *nvme_cmd;
struct nvmet_rdma_queue *queue;
+ struct nvmet_rdma_srq *nsrq;
};
enum {
@@ -54,6 +62,7 @@
struct nvmet_rdma_queue *queue;
struct ib_cqe read_cqe;
+ struct ib_cqe write_cqe;
struct rdma_rw_ctx rw;
struct nvmet_req req;
@@ -80,6 +89,7 @@
struct ib_cq *cq;
atomic_t sq_wr_avail;
struct nvmet_rdma_device *dev;
+ struct nvmet_rdma_srq *nsrq;
spinlock_t state_lock;
enum nvmet_rdma_queue_state state;
struct nvmet_cq nvme_cq;
@@ -97,17 +107,31 @@
int idx;
int host_qid;
+ int comp_vector;
int recv_queue_size;
int send_queue_size;
struct list_head queue_list;
};
+struct nvmet_rdma_port {
+ struct nvmet_port *nport;
+ struct sockaddr_storage addr;
+ struct rdma_cm_id *cm_id;
+ struct delayed_work repair_work;
+};
+
+struct nvmet_rdma_srq {
+ struct ib_srq *srq;
+ struct nvmet_rdma_cmd *cmds;
+ struct nvmet_rdma_device *ndev;
+};
+
struct nvmet_rdma_device {
struct ib_device *device;
struct ib_pd *pd;
- struct ib_srq *srq;
- struct nvmet_rdma_cmd *srq_cmds;
+ struct nvmet_rdma_srq **srqs;
+ int srq_count;
size_t srq_size;
struct kref ref;
struct list_head entry;
@@ -119,6 +143,16 @@
module_param_named(use_srq, nvmet_rdma_use_srq, bool, 0444);
MODULE_PARM_DESC(use_srq, "Use shared receive queue.");
+static int srq_size_set(const char *val, const struct kernel_param *kp);
+static const struct kernel_param_ops srq_size_ops = {
+ .set = srq_size_set,
+ .get = param_get_int,
+};
+
+static int nvmet_rdma_srq_size = 1024;
+module_param_cb(srq_size, &srq_size_ops, &nvmet_rdma_srq_size, 0644);
+MODULE_PARM_DESC(srq_size, "set Shared Receive Queue (SRQ) size, should >= 256 (default: 1024)");
+
static DEFINE_IDA(nvmet_rdma_queue_ida);
static LIST_HEAD(nvmet_rdma_queue_list);
static DEFINE_MUTEX(nvmet_rdma_queue_mutex);
@@ -130,6 +164,7 @@
static void nvmet_rdma_send_done(struct ib_cq *cq, struct ib_wc *wc);
static void nvmet_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc);
static void nvmet_rdma_read_data_done(struct ib_cq *cq, struct ib_wc *wc);
+static void nvmet_rdma_write_data_done(struct ib_cq *cq, struct ib_wc *wc);
static void nvmet_rdma_qp_event(struct ib_event *event, void *priv);
static void nvmet_rdma_queue_disconnect(struct nvmet_rdma_queue *queue);
static void nvmet_rdma_free_rsp(struct nvmet_rdma_device *ndev,
@@ -139,17 +174,22 @@
static const struct nvmet_fabrics_ops nvmet_rdma_ops;
+static int srq_size_set(const char *val, const struct kernel_param *kp)
+{
+ int n = 0, ret;
+
+ ret = kstrtoint(val, 10, &n);
+ if (ret != 0 || n < 256)
+ return -EINVAL;
+
+ return param_set_int(val, kp);
+}
+
static int num_pages(int len)
{
return 1 + (((len - 1) & PAGE_MASK) >> PAGE_SHIFT);
}
-/* XXX: really should move to a generic header sooner or later.. */
-static inline u32 get_unaligned_le24(const u8 *p)
-{
- return (u32)p[0] | (u32)p[1] << 8 | (u32)p[2] << 16;
-}
-
static inline bool nvmet_rdma_need_data_in(struct nvmet_rdma_rsp *rsp)
{
return nvme_is_write(rsp->req.cmd) &&
@@ -374,7 +414,8 @@
if (ib_dma_mapping_error(ndev->device, r->send_sge.addr))
goto out_free_rsp;
- r->req.p2p_client = &ndev->device->dev;
+ if (!ib_uses_virt_dma(ndev->device))
+ r->req.p2p_client = &ndev->device->dev;
r->send_sge.length = sizeof(*r->req.cqe);
r->send_sge.lkey = ndev->pd->local_dma_lkey;
@@ -387,6 +428,9 @@
/* Data In / RDMA READ */
r->read_cqe.done = nvmet_rdma_read_data_done;
+ /* Data Out / RDMA WRITE */
+ r->write_cqe.done = nvmet_rdma_write_data_done;
+
return 0;
out_free_rsp:
@@ -462,8 +506,8 @@
cmd->sge[0].addr, cmd->sge[0].length,
DMA_FROM_DEVICE);
- if (ndev->srq)
- ret = ib_post_srq_recv(ndev->srq, &cmd->wr, NULL);
+ if (cmd->nsrq)
+ ret = ib_post_srq_recv(cmd->nsrq->srq, &cmd->wr, NULL);
else
ret = ib_post_recv(cmd->queue->qp, &cmd->wr, NULL);
@@ -496,6 +540,129 @@
spin_unlock(&queue->rsp_wr_wait_lock);
}
+static u16 nvmet_rdma_check_pi_status(struct ib_mr *sig_mr)
+{
+ struct ib_mr_status mr_status;
+ int ret;
+ u16 status = 0;
+
+ ret = ib_check_mr_status(sig_mr, IB_MR_CHECK_SIG_STATUS, &mr_status);
+ if (ret) {
+ pr_err("ib_check_mr_status failed, ret %d\n", ret);
+ return NVME_SC_INVALID_PI;
+ }
+
+ if (mr_status.fail_status & IB_MR_CHECK_SIG_STATUS) {
+ switch (mr_status.sig_err.err_type) {
+ case IB_SIG_BAD_GUARD:
+ status = NVME_SC_GUARD_CHECK;
+ break;
+ case IB_SIG_BAD_REFTAG:
+ status = NVME_SC_REFTAG_CHECK;
+ break;
+ case IB_SIG_BAD_APPTAG:
+ status = NVME_SC_APPTAG_CHECK;
+ break;
+ }
+ pr_err("PI error found type %d expected 0x%x vs actual 0x%x\n",
+ mr_status.sig_err.err_type,
+ mr_status.sig_err.expected,
+ mr_status.sig_err.actual);
+ }
+
+ return status;
+}
+
+static void nvmet_rdma_set_sig_domain(struct blk_integrity *bi,
+ struct nvme_command *cmd, struct ib_sig_domain *domain,
+ u16 control, u8 pi_type)
+{
+ domain->sig_type = IB_SIG_TYPE_T10_DIF;
+ domain->sig.dif.bg_type = IB_T10DIF_CRC;
+ domain->sig.dif.pi_interval = 1 << bi->interval_exp;
+ domain->sig.dif.ref_tag = le32_to_cpu(cmd->rw.reftag);
+ if (control & NVME_RW_PRINFO_PRCHK_REF)
+ domain->sig.dif.ref_remap = true;
+
+ domain->sig.dif.app_tag = le16_to_cpu(cmd->rw.apptag);
+ domain->sig.dif.apptag_check_mask = le16_to_cpu(cmd->rw.appmask);
+ domain->sig.dif.app_escape = true;
+ if (pi_type == NVME_NS_DPS_PI_TYPE3)
+ domain->sig.dif.ref_escape = true;
+}
+
+static void nvmet_rdma_set_sig_attrs(struct nvmet_req *req,
+ struct ib_sig_attrs *sig_attrs)
+{
+ struct nvme_command *cmd = req->cmd;
+ u16 control = le16_to_cpu(cmd->rw.control);
+ u8 pi_type = req->ns->pi_type;
+ struct blk_integrity *bi;
+
+ bi = bdev_get_integrity(req->ns->bdev);
+
+ memset(sig_attrs, 0, sizeof(*sig_attrs));
+
+ if (control & NVME_RW_PRINFO_PRACT) {
+ /* for WRITE_INSERT/READ_STRIP no wire domain */
+ sig_attrs->wire.sig_type = IB_SIG_TYPE_NONE;
+ nvmet_rdma_set_sig_domain(bi, cmd, &sig_attrs->mem, control,
+ pi_type);
+ /* Clear the PRACT bit since HCA will generate/verify the PI */
+ control &= ~NVME_RW_PRINFO_PRACT;
+ cmd->rw.control = cpu_to_le16(control);
+ /* PI is added by the HW */
+ req->transfer_len += req->metadata_len;
+ } else {
+ /* for WRITE_PASS/READ_PASS both wire/memory domains exist */
+ nvmet_rdma_set_sig_domain(bi, cmd, &sig_attrs->wire, control,
+ pi_type);
+ nvmet_rdma_set_sig_domain(bi, cmd, &sig_attrs->mem, control,
+ pi_type);
+ }
+
+ if (control & NVME_RW_PRINFO_PRCHK_REF)
+ sig_attrs->check_mask |= IB_SIG_CHECK_REFTAG;
+ if (control & NVME_RW_PRINFO_PRCHK_GUARD)
+ sig_attrs->check_mask |= IB_SIG_CHECK_GUARD;
+ if (control & NVME_RW_PRINFO_PRCHK_APP)
+ sig_attrs->check_mask |= IB_SIG_CHECK_APPTAG;
+}
+
+static int nvmet_rdma_rw_ctx_init(struct nvmet_rdma_rsp *rsp, u64 addr, u32 key,
+ struct ib_sig_attrs *sig_attrs)
+{
+ struct rdma_cm_id *cm_id = rsp->queue->cm_id;
+ struct nvmet_req *req = &rsp->req;
+ int ret;
+
+ if (req->metadata_len)
+ ret = rdma_rw_ctx_signature_init(&rsp->rw, cm_id->qp,
+ cm_id->port_num, req->sg, req->sg_cnt,
+ req->metadata_sg, req->metadata_sg_cnt, sig_attrs,
+ addr, key, nvmet_data_dir(req));
+ else
+ ret = rdma_rw_ctx_init(&rsp->rw, cm_id->qp, cm_id->port_num,
+ req->sg, req->sg_cnt, 0, addr, key,
+ nvmet_data_dir(req));
+
+ return ret;
+}
+
+static void nvmet_rdma_rw_ctx_destroy(struct nvmet_rdma_rsp *rsp)
+{
+ struct rdma_cm_id *cm_id = rsp->queue->cm_id;
+ struct nvmet_req *req = &rsp->req;
+
+ if (req->metadata_len)
+ rdma_rw_ctx_destroy_signature(&rsp->rw, cm_id->qp,
+ cm_id->port_num, req->sg, req->sg_cnt,
+ req->metadata_sg, req->metadata_sg_cnt,
+ nvmet_data_dir(req));
+ else
+ rdma_rw_ctx_destroy(&rsp->rw, cm_id->qp, cm_id->port_num,
+ req->sg, req->sg_cnt, nvmet_data_dir(req));
+}
static void nvmet_rdma_release_rsp(struct nvmet_rdma_rsp *rsp)
{
@@ -503,14 +670,11 @@
atomic_add(1 + rsp->n_rdma, &queue->sq_wr_avail);
- if (rsp->n_rdma) {
- rdma_rw_ctx_destroy(&rsp->rw, queue->qp,
- queue->cm_id->port_num, rsp->req.sg,
- rsp->req.sg_cnt, nvmet_data_dir(&rsp->req));
- }
+ if (rsp->n_rdma)
+ nvmet_rdma_rw_ctx_destroy(rsp);
if (rsp->req.sg != rsp->cmd->inline_sg)
- nvmet_req_free_sgl(&rsp->req);
+ nvmet_req_free_sgls(&rsp->req);
if (unlikely(!list_empty_careful(&queue->rsp_wr_wait_list)))
nvmet_rdma_process_wr_wait_list(queue);
@@ -536,7 +700,7 @@
{
struct nvmet_rdma_rsp *rsp =
container_of(wc->wr_cqe, struct nvmet_rdma_rsp, send_cqe);
- struct nvmet_rdma_queue *queue = cq->cq_context;
+ struct nvmet_rdma_queue *queue = wc->qp->qp_context;
nvmet_rdma_release_rsp(rsp);
@@ -562,11 +726,16 @@
rsp->send_wr.opcode = IB_WR_SEND;
}
- if (nvmet_rdma_need_data_out(rsp))
- first_wr = rdma_rw_ctx_wrs(&rsp->rw, cm_id->qp,
- cm_id->port_num, NULL, &rsp->send_wr);
- else
+ if (nvmet_rdma_need_data_out(rsp)) {
+ if (rsp->req.metadata_len)
+ first_wr = rdma_rw_ctx_wrs(&rsp->rw, cm_id->qp,
+ cm_id->port_num, &rsp->write_cqe, NULL);
+ else
+ first_wr = rdma_rw_ctx_wrs(&rsp->rw, cm_id->qp,
+ cm_id->port_num, NULL, &rsp->send_wr);
+ } else {
first_wr = &rsp->send_wr;
+ }
nvmet_rdma_post_recv(rsp->queue->dev, rsp->cmd);
@@ -584,16 +753,15 @@
{
struct nvmet_rdma_rsp *rsp =
container_of(wc->wr_cqe, struct nvmet_rdma_rsp, read_cqe);
- struct nvmet_rdma_queue *queue = cq->cq_context;
+ struct nvmet_rdma_queue *queue = wc->qp->qp_context;
+ u16 status = 0;
WARN_ON(rsp->n_rdma <= 0);
atomic_add(rsp->n_rdma, &queue->sq_wr_avail);
- rdma_rw_ctx_destroy(&rsp->rw, queue->qp,
- queue->cm_id->port_num, rsp->req.sg,
- rsp->req.sg_cnt, nvmet_data_dir(&rsp->req));
rsp->n_rdma = 0;
if (unlikely(wc->status != IB_WC_SUCCESS)) {
+ nvmet_rdma_rw_ctx_destroy(rsp);
nvmet_req_uninit(&rsp->req);
nvmet_rdma_release_rsp(rsp);
if (wc->status != IB_WC_WR_FLUSH_ERR) {
@@ -604,7 +772,57 @@
return;
}
- nvmet_req_execute(&rsp->req);
+ if (rsp->req.metadata_len)
+ status = nvmet_rdma_check_pi_status(rsp->rw.reg->mr);
+ nvmet_rdma_rw_ctx_destroy(rsp);
+
+ if (unlikely(status))
+ nvmet_req_complete(&rsp->req, status);
+ else
+ rsp->req.execute(&rsp->req);
+}
+
+static void nvmet_rdma_write_data_done(struct ib_cq *cq, struct ib_wc *wc)
+{
+ struct nvmet_rdma_rsp *rsp =
+ container_of(wc->wr_cqe, struct nvmet_rdma_rsp, write_cqe);
+ struct nvmet_rdma_queue *queue = wc->qp->qp_context;
+ struct rdma_cm_id *cm_id = rsp->queue->cm_id;
+ u16 status;
+
+ if (!IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY))
+ return;
+
+ WARN_ON(rsp->n_rdma <= 0);
+ atomic_add(rsp->n_rdma, &queue->sq_wr_avail);
+ rsp->n_rdma = 0;
+
+ if (unlikely(wc->status != IB_WC_SUCCESS)) {
+ nvmet_rdma_rw_ctx_destroy(rsp);
+ nvmet_req_uninit(&rsp->req);
+ nvmet_rdma_release_rsp(rsp);
+ if (wc->status != IB_WC_WR_FLUSH_ERR) {
+ pr_info("RDMA WRITE for CQE failed with status %s (%d).\n",
+ ib_wc_status_msg(wc->status), wc->status);
+ nvmet_rdma_error_comp(queue);
+ }
+ return;
+ }
+
+ /*
+ * Upon RDMA completion check the signature status
+ * - if succeeded send good NVMe response
+ * - if failed send bad NVMe response with appropriate error
+ */
+ status = nvmet_rdma_check_pi_status(rsp->rw.reg->mr);
+ if (unlikely(status))
+ rsp->req.cqe->status = cpu_to_le16(status << 1);
+ nvmet_rdma_rw_ctx_destroy(rsp);
+
+ if (unlikely(ib_post_send(cm_id->qp, &rsp->send_wr, NULL))) {
+ pr_err("sending cmd response failed\n");
+ nvmet_rdma_release_rsp(rsp);
+ }
}
static void nvmet_rdma_use_inline_sg(struct nvmet_rdma_rsp *rsp, u32 len,
@@ -661,9 +879,9 @@
static u16 nvmet_rdma_map_sgl_keyed(struct nvmet_rdma_rsp *rsp,
struct nvme_keyed_sgl_desc *sgl, bool invalidate)
{
- struct rdma_cm_id *cm_id = rsp->queue->cm_id;
u64 addr = le64_to_cpu(sgl->addr);
u32 key = get_unaligned_le32(sgl->key);
+ struct ib_sig_attrs sig_attrs;
int ret;
rsp->req.transfer_len = get_unaligned_le24(sgl->length);
@@ -672,14 +890,15 @@
if (!rsp->req.transfer_len)
return 0;
- ret = nvmet_req_alloc_sgl(&rsp->req);
- if (ret < 0)
+ if (rsp->req.metadata_len)
+ nvmet_rdma_set_sig_attrs(&rsp->req, &sig_attrs);
+
+ ret = nvmet_req_alloc_sgls(&rsp->req);
+ if (unlikely(ret < 0))
goto error_out;
- ret = rdma_rw_ctx_init(&rsp->rw, cm_id->qp, cm_id->port_num,
- rsp->req.sg, rsp->req.sg_cnt, 0, addr, key,
- nvmet_data_dir(&rsp->req));
- if (ret < 0)
+ ret = nvmet_rdma_rw_ctx_init(rsp, addr, key, &sig_attrs);
+ if (unlikely(ret < 0))
goto error_out;
rsp->n_rdma += ret;
@@ -747,7 +966,7 @@
queue->cm_id->port_num, &rsp->read_cqe, NULL))
nvmet_req_complete(&rsp->req, NVME_SC_DATA_XFER_ERROR);
} else {
- nvmet_req_execute(&rsp->req);
+ rsp->req.execute(&rsp->req);
}
return true;
@@ -789,7 +1008,7 @@
{
struct nvmet_rdma_cmd *cmd =
container_of(wc->wr_cqe, struct nvmet_rdma_cmd, cqe);
- struct nvmet_rdma_queue *queue = cq->cq_context;
+ struct nvmet_rdma_queue *queue = wc->qp->qp_context;
struct nvmet_rdma_rsp *rsp;
if (unlikely(wc->status != IB_WC_SUCCESS)) {
@@ -841,23 +1060,40 @@
nvmet_rdma_handle_command(queue, rsp);
}
-static void nvmet_rdma_destroy_srq(struct nvmet_rdma_device *ndev)
+static void nvmet_rdma_destroy_srq(struct nvmet_rdma_srq *nsrq)
{
- if (!ndev->srq)
- return;
+ nvmet_rdma_free_cmds(nsrq->ndev, nsrq->cmds, nsrq->ndev->srq_size,
+ false);
+ ib_destroy_srq(nsrq->srq);
- nvmet_rdma_free_cmds(ndev, ndev->srq_cmds, ndev->srq_size, false);
- ib_destroy_srq(ndev->srq);
+ kfree(nsrq);
}
-static int nvmet_rdma_init_srq(struct nvmet_rdma_device *ndev)
+static void nvmet_rdma_destroy_srqs(struct nvmet_rdma_device *ndev)
+{
+ int i;
+
+ if (!ndev->srqs)
+ return;
+
+ for (i = 0; i < ndev->srq_count; i++)
+ nvmet_rdma_destroy_srq(ndev->srqs[i]);
+
+ kfree(ndev->srqs);
+}
+
+static struct nvmet_rdma_srq *
+nvmet_rdma_init_srq(struct nvmet_rdma_device *ndev)
{
struct ib_srq_init_attr srq_attr = { NULL, };
+ size_t srq_size = ndev->srq_size;
+ struct nvmet_rdma_srq *nsrq;
struct ib_srq *srq;
- size_t srq_size;
int ret, i;
- srq_size = 4095; /* XXX: tune */
+ nsrq = kzalloc(sizeof(*nsrq), GFP_KERNEL);
+ if (!nsrq)
+ return ERR_PTR(-ENOMEM);
srq_attr.attr.max_wr = srq_size;
srq_attr.attr.max_sge = 1 + ndev->inline_page_count;
@@ -865,6 +1101,42 @@
srq_attr.srq_type = IB_SRQT_BASIC;
srq = ib_create_srq(ndev->pd, &srq_attr);
if (IS_ERR(srq)) {
+ ret = PTR_ERR(srq);
+ goto out_free;
+ }
+
+ nsrq->cmds = nvmet_rdma_alloc_cmds(ndev, srq_size, false);
+ if (IS_ERR(nsrq->cmds)) {
+ ret = PTR_ERR(nsrq->cmds);
+ goto out_destroy_srq;
+ }
+
+ nsrq->srq = srq;
+ nsrq->ndev = ndev;
+
+ for (i = 0; i < srq_size; i++) {
+ nsrq->cmds[i].nsrq = nsrq;
+ ret = nvmet_rdma_post_recv(ndev, &nsrq->cmds[i]);
+ if (ret)
+ goto out_free_cmds;
+ }
+
+ return nsrq;
+
+out_free_cmds:
+ nvmet_rdma_free_cmds(ndev, nsrq->cmds, srq_size, false);
+out_destroy_srq:
+ ib_destroy_srq(srq);
+out_free:
+ kfree(nsrq);
+ return ERR_PTR(ret);
+}
+
+static int nvmet_rdma_init_srqs(struct nvmet_rdma_device *ndev)
+{
+ int i, ret;
+
+ if (!ndev->device->attrs.max_srq_wr || !ndev->device->attrs.max_srq) {
/*
* If SRQs aren't supported we just go ahead and use normal
* non-shared receive queues.
@@ -873,27 +1145,29 @@
return 0;
}
- ndev->srq_cmds = nvmet_rdma_alloc_cmds(ndev, srq_size, false);
- if (IS_ERR(ndev->srq_cmds)) {
- ret = PTR_ERR(ndev->srq_cmds);
- goto out_destroy_srq;
- }
+ ndev->srq_size = min(ndev->device->attrs.max_srq_wr,
+ nvmet_rdma_srq_size);
+ ndev->srq_count = min(ndev->device->num_comp_vectors,
+ ndev->device->attrs.max_srq);
- ndev->srq = srq;
- ndev->srq_size = srq_size;
+ ndev->srqs = kcalloc(ndev->srq_count, sizeof(*ndev->srqs), GFP_KERNEL);
+ if (!ndev->srqs)
+ return -ENOMEM;
- for (i = 0; i < srq_size; i++) {
- ret = nvmet_rdma_post_recv(ndev, &ndev->srq_cmds[i]);
- if (ret)
- goto out_free_cmds;
+ for (i = 0; i < ndev->srq_count; i++) {
+ ndev->srqs[i] = nvmet_rdma_init_srq(ndev);
+ if (IS_ERR(ndev->srqs[i])) {
+ ret = PTR_ERR(ndev->srqs[i]);
+ goto err_srq;
+ }
}
return 0;
-out_free_cmds:
- nvmet_rdma_free_cmds(ndev, ndev->srq_cmds, ndev->srq_size, false);
-out_destroy_srq:
- ib_destroy_srq(srq);
+err_srq:
+ while (--i >= 0)
+ nvmet_rdma_destroy_srq(ndev->srqs[i]);
+ kfree(ndev->srqs);
return ret;
}
@@ -906,7 +1180,7 @@
list_del(&ndev->entry);
mutex_unlock(&device_list_mutex);
- nvmet_rdma_destroy_srq(ndev);
+ nvmet_rdma_destroy_srqs(ndev);
ib_dealloc_pd(ndev->pd);
kfree(ndev);
@@ -915,7 +1189,8 @@
static struct nvmet_rdma_device *
nvmet_rdma_find_get_device(struct rdma_cm_id *cm_id)
{
- struct nvmet_port *port = cm_id->context;
+ struct nvmet_rdma_port *port = cm_id->context;
+ struct nvmet_port *nport = port->nport;
struct nvmet_rdma_device *ndev;
int inline_page_count;
int inline_sge_count;
@@ -932,18 +1207,26 @@
if (!ndev)
goto out_err;
- inline_page_count = num_pages(port->inline_data_size);
+ inline_page_count = num_pages(nport->inline_data_size);
inline_sge_count = max(cm_id->device->attrs.max_sge_rd,
cm_id->device->attrs.max_recv_sge) - 1;
if (inline_page_count > inline_sge_count) {
pr_warn("inline_data_size %d cannot be supported by device %s. Reducing to %lu.\n",
- port->inline_data_size, cm_id->device->name,
+ nport->inline_data_size, cm_id->device->name,
inline_sge_count * PAGE_SIZE);
- port->inline_data_size = inline_sge_count * PAGE_SIZE;
+ nport->inline_data_size = inline_sge_count * PAGE_SIZE;
inline_page_count = inline_sge_count;
}
- ndev->inline_data_size = port->inline_data_size;
+ ndev->inline_data_size = nport->inline_data_size;
ndev->inline_page_count = inline_page_count;
+
+ if (nport->pi_enable && !(cm_id->device->attrs.device_cap_flags &
+ IB_DEVICE_INTEGRITY_HANDOVER)) {
+ pr_warn("T10-PI is not supported by device %s. Disabling it\n",
+ cm_id->device->name);
+ nport->pi_enable = false;
+ }
+
ndev->device = cm_id->device;
kref_init(&ndev->ref);
@@ -952,7 +1235,7 @@
goto out_free_dev;
if (nvmet_rdma_use_srq) {
- ret = nvmet_rdma_init_srq(ndev);
+ ret = nvmet_rdma_init_srqs(ndev);
if (ret)
goto out_free_pd;
}
@@ -976,23 +1259,15 @@
{
struct ib_qp_init_attr qp_attr;
struct nvmet_rdma_device *ndev = queue->dev;
- int comp_vector, nr_cqe, ret, i;
-
- /*
- * Spread the io queues across completion vectors,
- * but still keep all admin queues on vector 0.
- */
- comp_vector = !queue->host_qid ? 0 :
- queue->idx % ndev->device->num_comp_vectors;
+ int nr_cqe, ret, i, factor;
/*
* Reserve CQ slots for RECV + RDMA_READ/RDMA_WRITE + RDMA_SEND.
*/
nr_cqe = queue->recv_queue_size + 2 * queue->send_queue_size;
- queue->cq = ib_alloc_cq(ndev->device, queue,
- nr_cqe + 1, comp_vector,
- IB_POLL_WORKQUEUE);
+ queue->cq = ib_cq_pool_get(ndev->device, nr_cqe + 1,
+ queue->comp_vector, IB_POLL_WORKQUEUE);
if (IS_ERR(queue->cq)) {
ret = PTR_ERR(queue->cq);
pr_err("failed to create CQ cqe= %d ret= %d\n",
@@ -1009,18 +1284,23 @@
qp_attr.qp_type = IB_QPT_RC;
/* +1 for drain */
qp_attr.cap.max_send_wr = queue->send_queue_size + 1;
- qp_attr.cap.max_rdma_ctxs = queue->send_queue_size;
+ factor = rdma_rw_mr_factor(ndev->device, queue->cm_id->port_num,
+ 1 << NVMET_RDMA_MAX_MDTS);
+ qp_attr.cap.max_rdma_ctxs = queue->send_queue_size * factor;
qp_attr.cap.max_send_sge = max(ndev->device->attrs.max_sge_rd,
ndev->device->attrs.max_send_sge);
- if (ndev->srq) {
- qp_attr.srq = ndev->srq;
+ if (queue->nsrq) {
+ qp_attr.srq = queue->nsrq->srq;
} else {
/* +1 for drain */
qp_attr.cap.max_recv_wr = 1 + queue->recv_queue_size;
qp_attr.cap.max_recv_sge = 1 + ndev->inline_page_count;
}
+ if (queue->port->pi_enable && queue->host_qid)
+ qp_attr.create_flags |= IB_QP_CREATE_INTEGRITY_EN;
+
ret = rdma_create_qp(queue->cm_id, ndev->pd, &qp_attr);
if (ret) {
pr_err("failed to create_qp ret= %d\n", ret);
@@ -1034,7 +1314,7 @@
__func__, queue->cq->cqe, qp_attr.cap.max_send_sge,
qp_attr.cap.max_send_wr, queue->cm_id);
- if (!ndev->srq) {
+ if (!queue->nsrq) {
for (i = 0; i < queue->recv_queue_size; i++) {
queue->cmds[i].queue = queue;
ret = nvmet_rdma_post_recv(ndev, &queue->cmds[i]);
@@ -1049,7 +1329,7 @@
err_destroy_qp:
rdma_destroy_qp(queue->cm_id);
err_destroy_cq:
- ib_free_cq(queue->cq);
+ ib_cq_pool_put(queue->cq, nr_cqe + 1);
goto out;
}
@@ -1059,7 +1339,8 @@
if (queue->cm_id)
rdma_destroy_id(queue->cm_id);
ib_destroy_qp(queue->qp);
- ib_free_cq(queue->cq);
+ ib_cq_pool_put(queue->cq, queue->recv_queue_size + 2 *
+ queue->send_queue_size + 1);
}
static void nvmet_rdma_free_queue(struct nvmet_rdma_queue *queue)
@@ -1069,7 +1350,7 @@
nvmet_sq_destroy(&queue->nvme_sq);
nvmet_rdma_destroy_queue_ib(queue);
- if (!queue->dev->srq) {
+ if (!queue->nsrq) {
nvmet_rdma_free_cmds(queue->dev, queue->cmds,
queue->recv_queue_size,
!queue->host_qid);
@@ -1131,7 +1412,8 @@
rej.recfmt = cpu_to_le16(NVME_RDMA_CM_FMT_1_0);
rej.sts = cpu_to_le16(status);
- return rdma_reject(cm_id, (void *)&rej, sizeof(rej));
+ return rdma_reject(cm_id, (void *)&rej, sizeof(rej),
+ IB_CM_REJ_CONSUMER_DEFINED);
}
static struct nvmet_rdma_queue *
@@ -1139,6 +1421,7 @@
struct rdma_cm_id *cm_id,
struct rdma_cm_event *event)
{
+ struct nvmet_rdma_port *port = cm_id->context;
struct nvmet_rdma_queue *queue;
int ret;
@@ -1165,6 +1448,7 @@
INIT_WORK(&queue->release_work, nvmet_rdma_release_queue_work);
queue->dev = ndev;
queue->cm_id = cm_id;
+ queue->port = port->nport;
spin_lock_init(&queue->state_lock);
queue->state = NVMET_RDMA_Q_CONNECTING;
@@ -1181,13 +1465,23 @@
goto out_destroy_sq;
}
+ /*
+ * Spread the io queues across completion vectors,
+ * but still keep all admin queues on vector 0.
+ */
+ queue->comp_vector = !queue->host_qid ? 0 :
+ queue->idx % ndev->device->num_comp_vectors;
+
+
ret = nvmet_rdma_alloc_rsps(queue);
if (ret) {
ret = NVME_RDMA_CM_NO_RSC;
goto out_ida_remove;
}
- if (!ndev->srq) {
+ if (ndev->srqs) {
+ queue->nsrq = ndev->srqs[queue->comp_vector % ndev->srq_count];
+ } else {
queue->cmds = nvmet_rdma_alloc_cmds(ndev,
queue->recv_queue_size,
!queue->host_qid);
@@ -1208,7 +1502,7 @@
return queue;
out_free_cmds:
- if (!ndev->srq) {
+ if (!queue->nsrq) {
nvmet_rdma_free_cmds(queue->dev, queue->cmds,
queue->recv_queue_size,
!queue->host_qid);
@@ -1234,6 +1528,10 @@
case IB_EVENT_COMM_EST:
rdma_notify(queue->cm_id, event->event);
break;
+ case IB_EVENT_QP_LAST_WQE_REACHED:
+ pr_debug("received last WQE reached event for queue=0x%p\n",
+ queue);
+ break;
default:
pr_err("received IB QP event: %s (%d)\n",
ib_event_msg(event->event), event->event);
@@ -1283,7 +1581,6 @@
ret = -ENOMEM;
goto put_device;
}
- queue->port = cm_id->context;
if (queue->host_qid == 0) {
/* Let inflight controller teardown complete */
@@ -1423,7 +1720,7 @@
static int nvmet_rdma_device_removal(struct rdma_cm_id *cm_id,
struct nvmet_rdma_queue *queue)
{
- struct nvmet_port *port;
+ struct nvmet_rdma_port *port;
if (queue) {
/*
@@ -1442,7 +1739,7 @@
* cm_id destroy. use atomic xchg to make sure
* we don't compete with remove_port.
*/
- if (xchg(&port->priv, NULL) != cm_id)
+ if (xchg(&port->cm_id, NULL) != cm_id)
return 0;
/*
@@ -1473,6 +1770,13 @@
nvmet_rdma_queue_established(queue);
break;
case RDMA_CM_EVENT_ADDR_CHANGE:
+ if (!queue) {
+ struct nvmet_rdma_port *port = cm_id->context;
+
+ schedule_delayed_work(&port->repair_work, 0);
+ break;
+ }
+ fallthrough;
case RDMA_CM_EVENT_DISCONNECTED:
case RDMA_CM_EVENT_TIMEWAIT_EXIT:
nvmet_rdma_queue_disconnect(queue);
@@ -1483,7 +1787,7 @@
case RDMA_CM_EVENT_REJECTED:
pr_debug("Connection rejected: %s\n",
rdma_reject_msg(cm_id, event->status));
- /* FALLTHROUGH */
+ fallthrough;
case RDMA_CM_EVENT_UNREACHABLE:
case RDMA_CM_EVENT_CONNECT_ERROR:
nvmet_rdma_queue_connect_fail(cm_id, queue);
@@ -1515,43 +1819,44 @@
mutex_unlock(&nvmet_rdma_queue_mutex);
}
-static int nvmet_rdma_add_port(struct nvmet_port *port)
+static void nvmet_rdma_destroy_port_queues(struct nvmet_rdma_port *port)
{
+ struct nvmet_rdma_queue *queue, *tmp;
+ struct nvmet_port *nport = port->nport;
+
+ mutex_lock(&nvmet_rdma_queue_mutex);
+ list_for_each_entry_safe(queue, tmp, &nvmet_rdma_queue_list,
+ queue_list) {
+ if (queue->port != nport)
+ continue;
+
+ list_del_init(&queue->queue_list);
+ __nvmet_rdma_queue_disconnect(queue);
+ }
+ mutex_unlock(&nvmet_rdma_queue_mutex);
+}
+
+static void nvmet_rdma_disable_port(struct nvmet_rdma_port *port)
+{
+ struct rdma_cm_id *cm_id = xchg(&port->cm_id, NULL);
+
+ if (cm_id)
+ rdma_destroy_id(cm_id);
+
+ /*
+ * Destroy the remaining queues, which are not belong to any
+ * controller yet. Do it here after the RDMA-CM was destroyed
+ * guarantees that no new queue will be created.
+ */
+ nvmet_rdma_destroy_port_queues(port);
+}
+
+static int nvmet_rdma_enable_port(struct nvmet_rdma_port *port)
+{
+ struct sockaddr *addr = (struct sockaddr *)&port->addr;
struct rdma_cm_id *cm_id;
- struct sockaddr_storage addr = { };
- __kernel_sa_family_t af;
int ret;
- switch (port->disc_addr.adrfam) {
- case NVMF_ADDR_FAMILY_IP4:
- af = AF_INET;
- break;
- case NVMF_ADDR_FAMILY_IP6:
- af = AF_INET6;
- break;
- default:
- pr_err("address family %d not supported\n",
- port->disc_addr.adrfam);
- return -EINVAL;
- }
-
- if (port->inline_data_size < 0) {
- port->inline_data_size = NVMET_RDMA_DEFAULT_INLINE_DATA_SIZE;
- } else if (port->inline_data_size > NVMET_RDMA_MAX_INLINE_DATA_SIZE) {
- pr_warn("inline_data_size %u is too large, reducing to %u\n",
- port->inline_data_size,
- NVMET_RDMA_MAX_INLINE_DATA_SIZE);
- port->inline_data_size = NVMET_RDMA_MAX_INLINE_DATA_SIZE;
- }
-
- ret = inet_pton_with_scope(&init_net, af, port->disc_addr.traddr,
- port->disc_addr.trsvcid, &addr);
- if (ret) {
- pr_err("malformed ip/port passed: %s:%s\n",
- port->disc_addr.traddr, port->disc_addr.trsvcid);
- return ret;
- }
-
cm_id = rdma_create_id(&init_net, nvmet_rdma_cm_handler, port,
RDMA_PS_TCP, IB_QPT_RC);
if (IS_ERR(cm_id)) {
@@ -1569,23 +1874,19 @@
goto out_destroy_id;
}
- ret = rdma_bind_addr(cm_id, (struct sockaddr *)&addr);
+ ret = rdma_bind_addr(cm_id, addr);
if (ret) {
- pr_err("binding CM ID to %pISpcs failed (%d)\n",
- (struct sockaddr *)&addr, ret);
+ pr_err("binding CM ID to %pISpcs failed (%d)\n", addr, ret);
goto out_destroy_id;
}
ret = rdma_listen(cm_id, 128);
if (ret) {
- pr_err("listening to %pISpcs failed (%d)\n",
- (struct sockaddr *)&addr, ret);
+ pr_err("listening to %pISpcs failed (%d)\n", addr, ret);
goto out_destroy_id;
}
- pr_info("enabling port %d (%pISpcs)\n",
- le16_to_cpu(port->disc_addr.portid), (struct sockaddr *)&addr);
- port->priv = cm_id;
+ port->cm_id = cm_id;
return 0;
out_destroy_id:
@@ -1593,18 +1894,92 @@
return ret;
}
-static void nvmet_rdma_remove_port(struct nvmet_port *port)
+static void nvmet_rdma_repair_port_work(struct work_struct *w)
{
- struct rdma_cm_id *cm_id = xchg(&port->priv, NULL);
+ struct nvmet_rdma_port *port = container_of(to_delayed_work(w),
+ struct nvmet_rdma_port, repair_work);
+ int ret;
- if (cm_id)
- rdma_destroy_id(cm_id);
+ nvmet_rdma_disable_port(port);
+ ret = nvmet_rdma_enable_port(port);
+ if (ret)
+ schedule_delayed_work(&port->repair_work, 5 * HZ);
+}
+
+static int nvmet_rdma_add_port(struct nvmet_port *nport)
+{
+ struct nvmet_rdma_port *port;
+ __kernel_sa_family_t af;
+ int ret;
+
+ port = kzalloc(sizeof(*port), GFP_KERNEL);
+ if (!port)
+ return -ENOMEM;
+
+ nport->priv = port;
+ port->nport = nport;
+ INIT_DELAYED_WORK(&port->repair_work, nvmet_rdma_repair_port_work);
+
+ switch (nport->disc_addr.adrfam) {
+ case NVMF_ADDR_FAMILY_IP4:
+ af = AF_INET;
+ break;
+ case NVMF_ADDR_FAMILY_IP6:
+ af = AF_INET6;
+ break;
+ default:
+ pr_err("address family %d not supported\n",
+ nport->disc_addr.adrfam);
+ ret = -EINVAL;
+ goto out_free_port;
+ }
+
+ if (nport->inline_data_size < 0) {
+ nport->inline_data_size = NVMET_RDMA_DEFAULT_INLINE_DATA_SIZE;
+ } else if (nport->inline_data_size > NVMET_RDMA_MAX_INLINE_DATA_SIZE) {
+ pr_warn("inline_data_size %u is too large, reducing to %u\n",
+ nport->inline_data_size,
+ NVMET_RDMA_MAX_INLINE_DATA_SIZE);
+ nport->inline_data_size = NVMET_RDMA_MAX_INLINE_DATA_SIZE;
+ }
+
+ ret = inet_pton_with_scope(&init_net, af, nport->disc_addr.traddr,
+ nport->disc_addr.trsvcid, &port->addr);
+ if (ret) {
+ pr_err("malformed ip/port passed: %s:%s\n",
+ nport->disc_addr.traddr, nport->disc_addr.trsvcid);
+ goto out_free_port;
+ }
+
+ ret = nvmet_rdma_enable_port(port);
+ if (ret)
+ goto out_free_port;
+
+ pr_info("enabling port %d (%pISpcs)\n",
+ le16_to_cpu(nport->disc_addr.portid),
+ (struct sockaddr *)&port->addr);
+
+ return 0;
+
+out_free_port:
+ kfree(port);
+ return ret;
+}
+
+static void nvmet_rdma_remove_port(struct nvmet_port *nport)
+{
+ struct nvmet_rdma_port *port = nport->priv;
+
+ cancel_delayed_work_sync(&port->repair_work);
+ nvmet_rdma_disable_port(port);
+ kfree(port);
}
static void nvmet_rdma_disc_port_addr(struct nvmet_req *req,
- struct nvmet_port *port, char *traddr)
+ struct nvmet_port *nport, char *traddr)
{
- struct rdma_cm_id *cm_id = port->priv;
+ struct nvmet_rdma_port *port = nport->priv;
+ struct rdma_cm_id *cm_id = port->cm_id;
if (inet_addr_is_any((struct sockaddr *)&cm_id->route.addr.src_addr)) {
struct nvmet_rdma_rsp *rsp =
@@ -1614,20 +1989,28 @@
sprintf(traddr, "%pISc", addr);
} else {
- memcpy(traddr, port->disc_addr.traddr, NVMF_TRADDR_SIZE);
+ memcpy(traddr, nport->disc_addr.traddr, NVMF_TRADDR_SIZE);
}
}
+static u8 nvmet_rdma_get_mdts(const struct nvmet_ctrl *ctrl)
+{
+ if (ctrl->pi_support)
+ return NVMET_RDMA_MAX_METADATA_MDTS;
+ return NVMET_RDMA_MAX_MDTS;
+}
+
static const struct nvmet_fabrics_ops nvmet_rdma_ops = {
.owner = THIS_MODULE,
.type = NVMF_TRTYPE_RDMA,
.msdbd = 1,
- .has_keyed_sgls = 1,
+ .flags = NVMF_KEYED_SGLS | NVMF_METADATA_SUPPORTED,
.add_port = nvmet_rdma_add_port,
.remove_port = nvmet_rdma_remove_port,
.queue_response = nvmet_rdma_queue_response,
.delete_ctrl = nvmet_rdma_delete_ctrl,
.disc_traddr = nvmet_rdma_disc_port_addr,
+ .get_mdts = nvmet_rdma_get_mdts,
};
static void nvmet_rdma_remove_one(struct ib_device *ib_device, void *client_data)
diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c
index 2ae8462..96b67a7 100644
--- a/drivers/nvme/target/tcp.c
+++ b/drivers/nvme/target/tcp.c
@@ -19,6 +19,16 @@
#define NVMET_TCP_DEF_INLINE_DATA_SIZE (4 * PAGE_SIZE)
+/* Define the socket priority to use for connections were it is desirable
+ * that the NIC consider performing optimized packet processing or filtering.
+ * A non-zero value being sufficient to indicate general consideration of any
+ * possible optimization. Making it a module param allows for alternative
+ * values that may be unique for some NIC implementations.
+ */
+static int so_priority;
+module_param(so_priority, int, 0644);
+MODULE_PARM_DESC(so_priority, "nvmet tcp socket optimize priority");
+
#define NVMET_TCP_RECV_BUDGET 8
#define NVMET_TCP_SEND_BUDGET 8
#define NVMET_TCP_IO_WORK_BUDGET 64
@@ -84,7 +94,6 @@
struct socket *sock;
struct nvmet_tcp_port *port;
struct work_struct io_work;
- int cpu;
struct nvmet_cq nvme_cq;
struct nvmet_sq nvme_sq;
@@ -134,7 +143,6 @@
struct work_struct accept_work;
struct nvmet_port *nport;
struct sockaddr_storage addr;
- int last_cpu;
void (*data_ready)(struct sock *);
};
@@ -143,7 +151,7 @@
static DEFINE_MUTEX(nvmet_tcp_queue_mutex);
static struct workqueue_struct *nvmet_tcp_wq;
-static struct nvmet_fabrics_ops nvmet_tcp_ops;
+static const struct nvmet_fabrics_ops nvmet_tcp_ops;
static void nvmet_tcp_free_cmd(struct nvmet_tcp_cmd *c);
static void nvmet_tcp_finish_cmd(struct nvmet_tcp_cmd *cmd);
@@ -209,6 +217,11 @@
list_add_tail(&cmd->entry, &cmd->queue->free_list);
}
+static inline int queue_cpu(struct nvmet_tcp_queue *queue)
+{
+ return queue->sock->sk->sk_incoming_cpu;
+}
+
static inline u8 nvmet_tcp_hdgst_len(struct nvmet_tcp_queue *queue)
{
return queue->hdr_digest ? NVME_TCP_DIGEST_LENGTH : 0;
@@ -321,12 +334,20 @@
kernel_sock_shutdown(queue->sock, SHUT_RDWR);
}
+static void nvmet_tcp_socket_error(struct nvmet_tcp_queue *queue, int status)
+{
+ if (status == -EPIPE || status == -ECONNRESET)
+ kernel_sock_shutdown(queue->sock, SHUT_RDWR);
+ else
+ nvmet_tcp_fatal_error(queue);
+}
+
static int nvmet_tcp_map_data(struct nvmet_tcp_cmd *cmd)
{
struct nvme_sgl_desc *sgl = &cmd->req.cmd->common.dptr.sgl;
u32 len = le32_to_cpu(sgl->length);
- if (!cmd->req.data_len)
+ if (!len)
return 0;
if (sgl->type == ((NVME_SGL_FMT_DATA_DESC << 4) |
@@ -358,7 +379,7 @@
return NVME_SC_INTERNAL;
}
-static void nvmet_tcp_ddgst(struct ahash_request *hash,
+static void nvmet_tcp_send_ddgst(struct ahash_request *hash,
struct nvmet_tcp_cmd *cmd)
{
ahash_request_set_crypt(hash, cmd->req.sg,
@@ -366,6 +387,23 @@
crypto_ahash_digest(hash);
}
+static void nvmet_tcp_recv_ddgst(struct ahash_request *hash,
+ struct nvmet_tcp_cmd *cmd)
+{
+ struct scatterlist sg;
+ struct kvec *iov;
+ int i;
+
+ crypto_ahash_init(hash);
+ for (i = 0, iov = cmd->iov; i < cmd->nr_mapped; i++, iov++) {
+ sg_init_one(&sg, iov->iov_base, iov->iov_len);
+ ahash_request_set_crypt(hash, &sg, NULL, iov->iov_len);
+ crypto_ahash_update(hash);
+ }
+ ahash_request_set_crypt(hash, NULL, (void *)&cmd->exp_ddgst, 0);
+ crypto_ahash_final(hash);
+}
+
static void nvmet_setup_c2h_data_pdu(struct nvmet_tcp_cmd *cmd)
{
struct nvme_tcp_data_pdu *pdu = cmd->data_pdu;
@@ -390,7 +428,7 @@
if (queue->data_digest) {
pdu->hdr.flags |= NVME_TCP_F_DDGST;
- nvmet_tcp_ddgst(queue->snd_hash, cmd);
+ nvmet_tcp_send_ddgst(queue->snd_hash, cmd);
}
if (cmd->queue->hdr_digest) {
@@ -447,17 +485,11 @@
static void nvmet_tcp_process_resp_list(struct nvmet_tcp_queue *queue)
{
struct llist_node *node;
+ struct nvmet_tcp_cmd *cmd;
- node = llist_del_all(&queue->resp_list);
- if (!node)
- return;
-
- while (node) {
- struct nvmet_tcp_cmd *cmd = llist_entry(node,
- struct nvmet_tcp_cmd, lentry);
-
+ for (node = llist_del_all(&queue->resp_list); node; node = node->next) {
+ cmd = llist_entry(node, struct nvmet_tcp_cmd, lentry);
list_add(&cmd->entry, &queue->resp_send_list);
- node = node->next;
queue->send_list_len++;
}
}
@@ -493,9 +525,34 @@
struct nvmet_tcp_cmd *cmd =
container_of(req, struct nvmet_tcp_cmd, req);
struct nvmet_tcp_queue *queue = cmd->queue;
+ struct nvme_sgl_desc *sgl;
+ u32 len;
+
+ if (unlikely(cmd == queue->cmd)) {
+ sgl = &cmd->req.cmd->common.dptr.sgl;
+ len = le32_to_cpu(sgl->length);
+
+ /*
+ * Wait for inline data before processing the response.
+ * Avoid using helpers, this might happen before
+ * nvmet_req_init is completed.
+ */
+ if (queue->rcv_state == NVMET_TCP_RECV_PDU &&
+ len && len <= cmd->req.port->inline_data_size &&
+ nvme_is_write(cmd->req.cmd))
+ return;
+ }
llist_add(&cmd->lentry, &queue->resp_list);
- queue_work_on(cmd->queue->cpu, nvmet_tcp_wq, &cmd->queue->io_work);
+ queue_work_on(queue_cpu(queue), nvmet_tcp_wq, &cmd->queue->io_work);
+}
+
+static void nvmet_tcp_execute_request(struct nvmet_tcp_cmd *cmd)
+{
+ if (unlikely(cmd->flags & NVMET_TCP_F_INIT_FAILED))
+ nvmet_tcp_queue_response(&cmd->req);
+ else
+ cmd->req.execute(&cmd->req);
}
static int nvmet_try_send_data_pdu(struct nvmet_tcp_cmd *cmd)
@@ -506,7 +563,7 @@
ret = kernel_sendpage(cmd->queue->sock, virt_to_page(cmd->data_pdu),
offset_in_page(cmd->data_pdu) + cmd->offset,
- left, MSG_DONTWAIT | MSG_MORE);
+ left, MSG_DONTWAIT | MSG_MORE | MSG_SENDPAGE_NOTLAST);
if (ret <= 0)
return ret;
@@ -534,7 +591,7 @@
if ((!last_in_batch && cmd->queue->send_list_len) ||
cmd->wbytes_done + left < cmd->req.transfer_len ||
queue->data_digest || !queue->nvme_sq.sqhd_disabled)
- flags |= MSG_MORE;
+ flags |= MSG_MORE | MSG_SENDPAGE_NOTLAST;
ret = kernel_sendpage(cmd->queue->sock, page, cmd->offset,
left, flags);
@@ -581,7 +638,7 @@
int ret;
if (!last_in_batch && cmd->queue->send_list_len)
- flags |= MSG_MORE;
+ flags |= MSG_MORE | MSG_SENDPAGE_NOTLAST;
else
flags |= MSG_EOR;
@@ -610,7 +667,7 @@
int ret;
if (!last_in_batch && cmd->queue->send_list_len)
- flags |= MSG_MORE;
+ flags |= MSG_MORE | MSG_SENDPAGE_NOTLAST;
else
flags |= MSG_EOR;
@@ -628,21 +685,31 @@
return 1;
}
-static int nvmet_try_send_ddgst(struct nvmet_tcp_cmd *cmd)
+static int nvmet_try_send_ddgst(struct nvmet_tcp_cmd *cmd, bool last_in_batch)
{
struct nvmet_tcp_queue *queue = cmd->queue;
+ int left = NVME_TCP_DIGEST_LENGTH - cmd->offset;
struct msghdr msg = { .msg_flags = MSG_DONTWAIT };
struct kvec iov = {
- .iov_base = &cmd->exp_ddgst + cmd->offset,
- .iov_len = NVME_TCP_DIGEST_LENGTH - cmd->offset
+ .iov_base = (u8 *)&cmd->exp_ddgst + cmd->offset,
+ .iov_len = left
};
int ret;
+ if (!last_in_batch && cmd->queue->send_list_len)
+ msg.msg_flags |= MSG_MORE;
+ else
+ msg.msg_flags |= MSG_EOR;
+
ret = kernel_sendmsg(queue->sock, &msg, &iov, 1, iov.iov_len);
if (unlikely(ret <= 0))
return ret;
cmd->offset += ret;
+ left -= ret;
+
+ if (left)
+ return -EAGAIN;
if (queue->nvme_sq.sqhd_disabled) {
cmd->queue->snd_cmd = NULL;
@@ -678,7 +745,7 @@
}
if (cmd->state == NVMET_TCP_SEND_DDGST) {
- ret = nvmet_try_send_ddgst(cmd);
+ ret = nvmet_try_send_ddgst(cmd, last_in_batch);
if (ret <= 0)
goto done_send;
}
@@ -709,11 +776,15 @@
for (i = 0; i < budget; i++) {
ret = nvmet_tcp_try_send_one(queue, i == budget - 1);
- if (ret <= 0)
+ if (unlikely(ret < 0)) {
+ nvmet_tcp_socket_error(queue, ret);
+ goto done;
+ } else if (ret == 0) {
break;
+ }
(*sends)++;
}
-
+done:
return ret;
}
@@ -825,13 +896,11 @@
static void nvmet_tcp_handle_req_failure(struct nvmet_tcp_queue *queue,
struct nvmet_tcp_cmd *cmd, struct nvmet_req *req)
{
+ size_t data_len = le32_to_cpu(req->cmd->common.dptr.sgl.length);
int ret;
- /* recover the expected data transfer length */
- req->data_len = le32_to_cpu(req->cmd->common.dptr.sgl.length);
-
if (!nvme_is_write(cmd->req.cmd) ||
- req->data_len > cmd->req.port->inline_data_size) {
+ data_len > cmd->req.port->inline_data_size) {
nvmet_prepare_receive_pdu(queue);
return;
}
@@ -922,7 +991,7 @@
le32_to_cpu(req->cmd->common.dptr.sgl.length));
nvmet_tcp_handle_req_failure(queue, queue->cmd, req);
- return -EAGAIN;
+ return 0;
}
ret = nvmet_tcp_map_data(queue->cmd);
@@ -947,7 +1016,7 @@
goto out;
}
- nvmet_req_execute(&queue->cmd->req);
+ queue->cmd->req.execute(&queue->cmd->req);
out:
nvmet_prepare_receive_pdu(queue);
return ret;
@@ -1020,7 +1089,7 @@
}
if (queue->hdr_digest &&
- nvmet_tcp_verify_hdgst(queue, &queue->pdu, queue->offset)) {
+ nvmet_tcp_verify_hdgst(queue, &queue->pdu, hdr->hlen)) {
nvmet_tcp_fatal_error(queue); /* fatal */
return -EPROTO;
}
@@ -1038,7 +1107,7 @@
{
struct nvmet_tcp_queue *queue = cmd->queue;
- nvmet_tcp_ddgst(queue->rcv_hash, cmd);
+ nvmet_tcp_recv_ddgst(queue->rcv_hash, cmd);
queue->offset = 0;
queue->left = NVME_TCP_DIGEST_LENGTH;
queue->rcv_state = NVMET_TCP_RECV_DDGST;
@@ -1060,16 +1129,14 @@
}
nvmet_tcp_unmap_pdu_iovec(cmd);
-
- if (!(cmd->flags & NVMET_TCP_F_INIT_FAILED) &&
- cmd->rbytes_done == cmd->req.transfer_len) {
- if (queue->data_digest) {
- nvmet_tcp_prep_recv_ddgst(cmd);
- return 0;
- }
- nvmet_req_execute(&cmd->req);
+ if (queue->data_digest) {
+ nvmet_tcp_prep_recv_ddgst(cmd);
+ return 0;
}
+ if (cmd->rbytes_done == cmd->req.transfer_len)
+ nvmet_tcp_execute_request(cmd);
+
nvmet_prepare_receive_pdu(queue);
return 0;
}
@@ -1105,9 +1172,9 @@
goto out;
}
- if (!(cmd->flags & NVMET_TCP_F_INIT_FAILED) &&
- cmd->rbytes_done == cmd->req.transfer_len)
- nvmet_req_execute(&cmd->req);
+ if (cmd->rbytes_done == cmd->req.transfer_len)
+ nvmet_tcp_execute_request(cmd);
+
ret = 0;
out:
nvmet_prepare_receive_pdu(queue);
@@ -1155,11 +1222,15 @@
for (i = 0; i < budget; i++) {
ret = nvmet_tcp_try_recv_one(queue);
- if (ret <= 0)
+ if (unlikely(ret < 0)) {
+ nvmet_tcp_socket_error(queue, ret);
+ goto done;
+ } else if (ret == 0) {
break;
+ }
(*recvs)++;
}
-
+done:
return ret;
}
@@ -1184,27 +1255,16 @@
pending = false;
ret = nvmet_tcp_try_recv(queue, NVMET_TCP_RECV_BUDGET, &ops);
- if (ret > 0) {
+ if (ret > 0)
pending = true;
- } else if (ret < 0) {
- if (ret == -EPIPE || ret == -ECONNRESET)
- kernel_sock_shutdown(queue->sock, SHUT_RDWR);
- else
- nvmet_tcp_fatal_error(queue);
+ else if (ret < 0)
return;
- }
ret = nvmet_tcp_try_send(queue, NVMET_TCP_SEND_BUDGET, &ops);
- if (ret > 0) {
- /* transmitted message/data */
+ if (ret > 0)
pending = true;
- } else if (ret < 0) {
- if (ret == -EPIPE || ret == -ECONNRESET)
- kernel_sock_shutdown(queue->sock, SHUT_RDWR);
- else
- nvmet_tcp_fatal_error(queue);
+ else if (ret < 0)
return;
- }
} while (pending && ops < NVMET_TCP_IO_WORK_BUDGET);
@@ -1212,7 +1272,7 @@
* We exahusted our budget, requeue our selves
*/
if (pending)
- queue_work_on(queue->cpu, nvmet_tcp_wq, &queue->io_work);
+ queue_work_on(queue_cpu(queue), nvmet_tcp_wq, &queue->io_work);
}
static int nvmet_tcp_alloc_cmd(struct nvmet_tcp_queue *queue,
@@ -1343,6 +1403,7 @@
static void nvmet_tcp_release_queue_work(struct work_struct *w)
{
+ struct page *page;
struct nvmet_tcp_queue *queue =
container_of(w, struct nvmet_tcp_queue, release_work);
@@ -1362,6 +1423,8 @@
nvmet_tcp_free_crypto(queue);
ida_simple_remove(&nvmet_tcp_queue_ida, queue->idx);
+ page = virt_to_head_page(queue->pf_cache.va);
+ __page_frag_cache_drain(page, queue->pf_cache.pagecnt_bias);
kfree(queue);
}
@@ -1372,7 +1435,7 @@
read_lock_bh(&sk->sk_callback_lock);
queue = sk->sk_user_data;
if (likely(queue))
- queue_work_on(queue->cpu, nvmet_tcp_wq, &queue->io_work);
+ queue_work_on(queue_cpu(queue), nvmet_tcp_wq, &queue->io_work);
read_unlock_bh(&sk->sk_callback_lock);
}
@@ -1392,7 +1455,7 @@
if (sk_stream_is_writeable(sk)) {
clear_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
- queue_work_on(queue->cpu, nvmet_tcp_wq, &queue->io_work);
+ queue_work_on(queue_cpu(queue), nvmet_tcp_wq, &queue->io_work);
}
out:
read_unlock_bh(&sk->sk_callback_lock);
@@ -1426,7 +1489,6 @@
{
struct socket *sock = queue->sock;
struct inet_sock *inet = inet_sk(sock->sk);
- struct linger sol = { .l_onoff = 1, .l_linger = 0 };
int ret;
ret = kernel_getsockname(sock,
@@ -1444,32 +1506,36 @@
* close. This is done to prevent stale data from being sent should
* the network connection be restored before TCP times out.
*/
- ret = kernel_setsockopt(sock, SOL_SOCKET, SO_LINGER,
- (char *)&sol, sizeof(sol));
- if (ret)
- return ret;
+ sock_no_linger(sock->sk);
+
+ if (so_priority > 0)
+ sock_set_priority(sock->sk, so_priority);
/* Set socket type of service */
- if (inet->rcv_tos > 0) {
- int tos = inet->rcv_tos;
+ if (inet->rcv_tos > 0)
+ ip_sock_set_tos(sock->sk, inet->rcv_tos);
- ret = kernel_setsockopt(sock, SOL_IP, IP_TOS,
- (char *)&tos, sizeof(tos));
- if (ret)
- return ret;
- }
-
+ ret = 0;
write_lock_bh(&sock->sk->sk_callback_lock);
- sock->sk->sk_user_data = queue;
- queue->data_ready = sock->sk->sk_data_ready;
- sock->sk->sk_data_ready = nvmet_tcp_data_ready;
- queue->state_change = sock->sk->sk_state_change;
- sock->sk->sk_state_change = nvmet_tcp_state_change;
- queue->write_space = sock->sk->sk_write_space;
- sock->sk->sk_write_space = nvmet_tcp_write_space;
+ if (sock->sk->sk_state != TCP_ESTABLISHED) {
+ /*
+ * If the socket is already closing, don't even start
+ * consuming it
+ */
+ ret = -ENOTCONN;
+ } else {
+ sock->sk->sk_user_data = queue;
+ queue->data_ready = sock->sk->sk_data_ready;
+ sock->sk->sk_data_ready = nvmet_tcp_data_ready;
+ queue->state_change = sock->sk->sk_state_change;
+ sock->sk->sk_state_change = nvmet_tcp_state_change;
+ queue->write_space = sock->sk->sk_write_space;
+ sock->sk->sk_write_space = nvmet_tcp_write_space;
+ queue_work_on(queue_cpu(queue), nvmet_tcp_wq, &queue->io_work);
+ }
write_unlock_bh(&sock->sk->sk_callback_lock);
- return 0;
+ return ret;
}
static int nvmet_tcp_alloc_queue(struct nvmet_tcp_port *port,
@@ -1507,9 +1573,6 @@
if (ret)
goto out_free_connect;
- port->last_cpu = cpumask_next_wrap(port->last_cpu,
- cpu_online_mask, -1, false);
- queue->cpu = port->last_cpu;
nvmet_prepare_receive_pdu(queue);
mutex_lock(&nvmet_tcp_queue_mutex);
@@ -1520,8 +1583,6 @@
if (ret)
goto out_destroy_sq;
- queue_work_on(queue->cpu, nvmet_tcp_wq, &queue->io_work);
-
return 0;
out_destroy_sq:
mutex_lock(&nvmet_tcp_queue_mutex);
@@ -1578,7 +1639,7 @@
{
struct nvmet_tcp_port *port;
__kernel_sa_family_t af;
- int opt, ret;
+ int ret;
port = kzalloc(sizeof(*port), GFP_KERNEL);
if (!port)
@@ -1607,7 +1668,6 @@
}
port->nport = nport;
- port->last_cpu = -1;
INIT_WORK(&port->accept_work, nvmet_tcp_accept_work);
if (port->nport->inline_data_size < 0)
port->nport->inline_data_size = NVMET_TCP_DEF_INLINE_DATA_SIZE;
@@ -1622,21 +1682,10 @@
port->sock->sk->sk_user_data = port;
port->data_ready = port->sock->sk->sk_data_ready;
port->sock->sk->sk_data_ready = nvmet_tcp_listen_data_ready;
-
- opt = 1;
- ret = kernel_setsockopt(port->sock, IPPROTO_TCP,
- TCP_NODELAY, (char *)&opt, sizeof(opt));
- if (ret) {
- pr_err("failed to set TCP_NODELAY sock opt %d\n", ret);
- goto err_sock;
- }
-
- ret = kernel_setsockopt(port->sock, SOL_SOCKET, SO_REUSEADDR,
- (char *)&opt, sizeof(opt));
- if (ret) {
- pr_err("failed to set SO_REUSEADDR sock opt %d\n", ret);
- goto err_sock;
- }
+ sock_set_reuseaddr(port->sock->sk);
+ tcp_sock_set_nodelay(port->sock->sk);
+ if (so_priority > 0)
+ sock_set_priority(port->sock->sk, so_priority);
ret = kernel_bind(port->sock, (struct sockaddr *)&port->addr,
sizeof(port->addr));
@@ -1664,6 +1713,17 @@
return ret;
}
+static void nvmet_tcp_destroy_port_queues(struct nvmet_tcp_port *port)
+{
+ struct nvmet_tcp_queue *queue;
+
+ mutex_lock(&nvmet_tcp_queue_mutex);
+ list_for_each_entry(queue, &nvmet_tcp_queue_list, queue_list)
+ if (queue->port == port)
+ kernel_sock_shutdown(queue->sock, SHUT_RDWR);
+ mutex_unlock(&nvmet_tcp_queue_mutex);
+}
+
static void nvmet_tcp_remove_port(struct nvmet_port *nport)
{
struct nvmet_tcp_port *port = nport->priv;
@@ -1673,6 +1733,11 @@
port->sock->sk->sk_user_data = NULL;
write_unlock_bh(&port->sock->sk->sk_callback_lock);
cancel_work_sync(&port->accept_work);
+ /*
+ * Destroy the remaining queues, which are not belong to any
+ * controller yet.
+ */
+ nvmet_tcp_destroy_port_queues(port);
sock_release(port->sock);
kfree(port);
@@ -1721,11 +1786,10 @@
}
}
-static struct nvmet_fabrics_ops nvmet_tcp_ops = {
+static const struct nvmet_fabrics_ops nvmet_tcp_ops = {
.owner = THIS_MODULE,
.type = NVMF_TRTYPE_TCP,
.msdbd = 1,
- .has_keyed_sgls = 0,
.add_port = nvmet_tcp_add_port,
.remove_port = nvmet_tcp_remove_port,
.queue_response = nvmet_tcp_queue_response,
diff --git a/drivers/nvme/target/trace.h b/drivers/nvme/target/trace.h
index 3f61b66..c14e324 100644
--- a/drivers/nvme/target/trace.h
+++ b/drivers/nvme/target/trace.h
@@ -123,6 +123,34 @@
);
+#define aer_name(aer) { aer, #aer }
+
+TRACE_EVENT(nvmet_async_event,
+ TP_PROTO(struct nvmet_ctrl *ctrl, __le32 result),
+ TP_ARGS(ctrl, result),
+ TP_STRUCT__entry(
+ __field(int, ctrl_id)
+ __field(u32, result)
+ ),
+ TP_fast_assign(
+ __entry->ctrl_id = ctrl->cntlid;
+ __entry->result = (le32_to_cpu(result) & 0xff00) >> 8;
+ ),
+ TP_printk("nvmet%d: NVME_AEN=%#08x [%s]",
+ __entry->ctrl_id, __entry->result,
+ __print_symbolic(__entry->result,
+ aer_name(NVME_AER_NOTICE_NS_CHANGED),
+ aer_name(NVME_AER_NOTICE_ANA),
+ aer_name(NVME_AER_NOTICE_FW_ACT_STARTING),
+ aer_name(NVME_AER_NOTICE_DISC_CHANGED),
+ aer_name(NVME_AER_ERROR),
+ aer_name(NVME_AER_SMART),
+ aer_name(NVME_AER_CSS),
+ aer_name(NVME_AER_VS))
+ )
+);
+#undef aer_name
+
#endif /* _TRACE_NVMET_H */
#undef TRACE_INCLUDE_PATH