Update Linux to v5.4.2
Change-Id: Idf6911045d9d382da2cfe01b1edff026404ac8fd
diff --git a/drivers/nvme/target/Kconfig b/drivers/nvme/target/Kconfig
index 3c7b61d..d7f48c0 100644
--- a/drivers/nvme/target/Kconfig
+++ b/drivers/nvme/target/Kconfig
@@ -1,8 +1,10 @@
+# SPDX-License-Identifier: GPL-2.0-only
config NVME_TARGET
tristate "NVMe Target support"
depends on BLOCK
depends on CONFIGFS_FS
+ select SGL_ALLOC
help
This enabled target side support for the NVMe protocol, that is
it allows the Linux kernel to implement NVMe subsystems and
@@ -60,3 +62,13 @@
to test NVMe-FC transport interfaces.
If unsure, say N.
+
+config NVME_TARGET_TCP
+ tristate "NVMe over Fabrics TCP target support"
+ depends on INET
+ depends on NVME_TARGET
+ help
+ This enables the NVMe TCP target support, which allows exporting NVMe
+ devices over TCP.
+
+ If unsure, say N.
diff --git a/drivers/nvme/target/Makefile b/drivers/nvme/target/Makefile
index 8118c93..2b33836 100644
--- a/drivers/nvme/target/Makefile
+++ b/drivers/nvme/target/Makefile
@@ -1,10 +1,13 @@
# SPDX-License-Identifier: GPL-2.0
+ccflags-y += -I$(src)
+
obj-$(CONFIG_NVME_TARGET) += nvmet.o
obj-$(CONFIG_NVME_TARGET_LOOP) += nvme-loop.o
obj-$(CONFIG_NVME_TARGET_RDMA) += nvmet-rdma.o
obj-$(CONFIG_NVME_TARGET_FC) += nvmet-fc.o
obj-$(CONFIG_NVME_TARGET_FCLOOP) += nvme-fcloop.o
+obj-$(CONFIG_NVME_TARGET_TCP) += nvmet-tcp.o
nvmet-y += core.o configfs.o admin-cmd.o fabrics-cmd.o \
discovery.o io-cmd-file.o io-cmd-bdev.o
@@ -12,3 +15,5 @@
nvmet-rdma-y += rdma.o
nvmet-fc-y += fc.o
nvme-fcloop-y += fcloop.o
+nvmet-tcp-y += tcp.o
+nvmet-$(CONFIG_TRACING) += trace.o
diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c
index 2008fa6..831a062 100644
--- a/drivers/nvme/target/admin-cmd.c
+++ b/drivers/nvme/target/admin-cmd.c
@@ -1,15 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* NVMe admin command implementation.
* Copyright (c) 2015-2016 HGST, a Western Digital Company.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/module.h>
@@ -19,19 +11,6 @@
#include <asm/unaligned.h>
#include "nvmet.h"
-/*
- * This helper allows us to clear the AEN based on the RAE bit,
- * Please use this helper when processing the log pages which are
- * associated with the AEN.
- */
-static inline void nvmet_clear_aen(struct nvmet_req *req, u32 aen_bit)
-{
- int rae = le32_to_cpu(req->cmd->common.cdw10[0]) & 1 << 15;
-
- if (!rae)
- clear_bit(aen_bit, &req->sq->ctrl->aen_masked);
-}
-
u32 nvmet_get_log_page_len(struct nvme_command *cmd)
{
u32 len = le16_to_cpu(cmd->get_log_page.numdu);
@@ -45,11 +24,42 @@
return len;
}
+u64 nvmet_get_log_page_offset(struct nvme_command *cmd)
+{
+ return le64_to_cpu(cmd->get_log_page.lpo);
+}
+
static void nvmet_execute_get_log_page_noop(struct nvmet_req *req)
{
nvmet_req_complete(req, nvmet_zero_sgl(req, 0, req->data_len));
}
+static void nvmet_execute_get_log_page_error(struct nvmet_req *req)
+{
+ struct nvmet_ctrl *ctrl = req->sq->ctrl;
+ unsigned long flags;
+ off_t offset = 0;
+ u64 slot;
+ u64 i;
+
+ spin_lock_irqsave(&ctrl->error_lock, flags);
+ slot = ctrl->err_counter % NVMET_ERROR_LOG_SLOTS;
+
+ for (i = 0; i < NVMET_ERROR_LOG_SLOTS; i++) {
+ if (nvmet_copy_to_sgl(req, offset, &ctrl->slots[slot],
+ sizeof(struct nvme_error_slot)))
+ break;
+
+ if (slot == 0)
+ slot = NVMET_ERROR_LOG_SLOTS - 1;
+ else
+ slot--;
+ offset += sizeof(struct nvme_error_slot);
+ }
+ spin_unlock_irqrestore(&ctrl->error_lock, flags);
+ nvmet_req_complete(req, 0);
+}
+
static u16 nvmet_get_smart_log_nsid(struct nvmet_req *req,
struct nvme_smart_log *slog)
{
@@ -58,8 +68,9 @@
ns = nvmet_find_namespace(req->sq->ctrl, req->cmd->get_log_page.nsid);
if (!ns) {
- pr_err("nvmet : Could not find namespace id : %d\n",
+ pr_err("Could not find namespace id : %d\n",
le32_to_cpu(req->cmd->get_log_page.nsid));
+ req->error_loc = offsetof(struct nvme_rw_command, nsid);
return NVME_SC_INVALID_NS;
}
@@ -68,9 +79,11 @@
goto out;
host_reads = part_stat_read(ns->bdev->bd_part, ios[READ]);
- data_units_read = part_stat_read(ns->bdev->bd_part, sectors[READ]);
+ data_units_read = DIV_ROUND_UP(part_stat_read(ns->bdev->bd_part,
+ sectors[READ]), 1000);
host_writes = part_stat_read(ns->bdev->bd_part, ios[WRITE]);
- data_units_written = part_stat_read(ns->bdev->bd_part, sectors[WRITE]);
+ data_units_written = DIV_ROUND_UP(part_stat_read(ns->bdev->bd_part,
+ sectors[WRITE]), 1000);
put_unaligned_le64(host_reads, &slog->host_reads[0]);
put_unaligned_le64(data_units_read, &slog->data_units_read[0]);
@@ -98,11 +111,11 @@
if (!ns->bdev)
continue;
host_reads += part_stat_read(ns->bdev->bd_part, ios[READ]);
- data_units_read +=
- part_stat_read(ns->bdev->bd_part, sectors[READ]);
+ data_units_read += DIV_ROUND_UP(
+ part_stat_read(ns->bdev->bd_part, sectors[READ]), 1000);
host_writes += part_stat_read(ns->bdev->bd_part, ios[WRITE]);
- data_units_written +=
- part_stat_read(ns->bdev->bd_part, sectors[WRITE]);
+ data_units_written += DIV_ROUND_UP(
+ part_stat_read(ns->bdev->bd_part, sectors[WRITE]), 1000);
}
rcu_read_unlock();
@@ -119,6 +132,7 @@
{
struct nvme_smart_log *log;
u16 status = NVME_SC_INTERNAL;
+ unsigned long flags;
if (req->data_len != sizeof(*log))
goto out;
@@ -134,6 +148,11 @@
if (status)
goto out_free_log;
+ spin_lock_irqsave(&req->sq->ctrl->error_lock, flags);
+ put_unaligned_le64(req->sq->ctrl->err_counter,
+ &log->num_err_log_entries);
+ spin_unlock_irqrestore(&req->sq->ctrl->error_lock, flags);
+
status = nvmet_copy_to_sgl(req, 0, log, sizeof(*log));
out_free_log:
kfree(log);
@@ -189,7 +208,7 @@
if (!status)
status = nvmet_zero_sgl(req, len, req->data_len - len);
ctrl->nr_changed_ns = 0;
- nvmet_clear_aen(req, NVME_AEN_CFG_NS_ATTR);
+ nvmet_clear_aen_bit(req, NVME_AEN_BIT_NS_ATTR);
mutex_unlock(&ctrl->lock);
out:
nvmet_req_complete(req, status);
@@ -252,7 +271,7 @@
hdr.chgcnt = cpu_to_le64(nvmet_ana_chgcnt);
hdr.ngrps = cpu_to_le16(ngrps);
- nvmet_clear_aen(req, NVME_AEN_CFG_ANA_CHANGE);
+ nvmet_clear_aen_bit(req, NVME_AEN_BIT_ANA_CHANGE);
up_read(&nvmet_ana_sem);
kfree(desc);
@@ -304,7 +323,8 @@
/* XXX: figure out what to do about RTD3R/RTD3 */
id->oaes = cpu_to_le32(NVMET_AEN_CFG_OPTIONAL);
- id->ctratt = cpu_to_le32(1 << 0);
+ id->ctratt = cpu_to_le32(NVME_CTRL_ATTR_HID_128_BIT |
+ NVME_CTRL_ATTR_TBKAS);
id->oacs = 0;
@@ -353,7 +373,7 @@
if (req->port->inline_data_size)
id->sgls |= cpu_to_le32(1 << 20);
- strcpy(id->subnqn, ctrl->subsys->subsysnqn);
+ strlcpy(id->subnqn, ctrl->subsys->subsysnqn, sizeof(id->subnqn));
/* Max command capsule size is sqe + single page of in-capsule data */
id->ioccsz = cpu_to_le32((sizeof(struct nvme_command) +
@@ -392,6 +412,7 @@
u16 status = 0;
if (le32_to_cpu(req->cmd->identify.nsid) == NVME_NSID_ALL) {
+ req->error_loc = offsetof(struct nvme_identify, nsid);
status = NVME_SC_INVALID_NS | NVME_SC_DNR;
goto out;
}
@@ -421,6 +442,9 @@
break;
}
+ if (ns->bdev)
+ nvmet_bdev_set_limits(ns->bdev, id);
+
/*
* We just provide a single LBA format that matches what the
* underlying device reports.
@@ -512,6 +536,7 @@
ns = nvmet_find_namespace(req->sq->ctrl, req->cmd->identify.nsid);
if (!ns) {
+ req->error_loc = offsetof(struct nvme_identify, nsid);
status = NVME_SC_INVALID_NS | NVME_SC_DNR;
goto out;
}
@@ -569,13 +594,15 @@
static u16 nvmet_set_feat_write_protect(struct nvmet_req *req)
{
- u32 write_protect = le32_to_cpu(req->cmd->common.cdw10[1]);
+ u32 write_protect = le32_to_cpu(req->cmd->common.cdw11);
struct nvmet_subsys *subsys = req->sq->ctrl->subsys;
u16 status = NVME_SC_FEATURE_NOT_CHANGEABLE;
req->ns = nvmet_find_namespace(req->sq->ctrl, req->cmd->rw.nsid);
- if (unlikely(!req->ns))
+ if (unlikely(!req->ns)) {
+ req->error_loc = offsetof(struct nvme_common_command, nsid);
return status;
+ }
mutex_lock(&subsys->lock);
switch (write_protect) {
@@ -599,11 +626,36 @@
return status;
}
+u16 nvmet_set_feat_kato(struct nvmet_req *req)
+{
+ u32 val32 = le32_to_cpu(req->cmd->common.cdw11);
+
+ req->sq->ctrl->kato = DIV_ROUND_UP(val32, 1000);
+
+ nvmet_set_result(req, req->sq->ctrl->kato);
+
+ return 0;
+}
+
+u16 nvmet_set_feat_async_event(struct nvmet_req *req, u32 mask)
+{
+ u32 val32 = le32_to_cpu(req->cmd->common.cdw11);
+
+ if (val32 & ~mask) {
+ req->error_loc = offsetof(struct nvme_common_command, cdw11);
+ return NVME_SC_INVALID_FIELD | NVME_SC_DNR;
+ }
+
+ WRITE_ONCE(req->sq->ctrl->aen_enabled, val32);
+ nvmet_set_result(req, val32);
+
+ return 0;
+}
+
static void nvmet_execute_set_features(struct nvmet_req *req)
{
struct nvmet_subsys *subsys = req->sq->ctrl->subsys;
- u32 cdw10 = le32_to_cpu(req->cmd->common.cdw10[0]);
- u32 val32;
+ u32 cdw10 = le32_to_cpu(req->cmd->common.cdw10);
u16 status = 0;
switch (cdw10 & 0xff) {
@@ -612,19 +664,10 @@
(subsys->max_qid - 1) | ((subsys->max_qid - 1) << 16));
break;
case NVME_FEAT_KATO:
- val32 = le32_to_cpu(req->cmd->common.cdw10[1]);
- req->sq->ctrl->kato = DIV_ROUND_UP(val32, 1000);
- nvmet_set_result(req, req->sq->ctrl->kato);
+ status = nvmet_set_feat_kato(req);
break;
case NVME_FEAT_ASYNC_EVENT:
- val32 = le32_to_cpu(req->cmd->common.cdw10[1]);
- if (val32 & ~NVMET_AEN_CFG_ALL) {
- status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
- break;
- }
-
- WRITE_ONCE(req->sq->ctrl->aen_enabled, val32);
- nvmet_set_result(req, val32);
+ status = nvmet_set_feat_async_event(req, NVMET_AEN_CFG_ALL);
break;
case NVME_FEAT_HOST_ID:
status = NVME_SC_CMD_SEQ_ERROR | NVME_SC_DNR;
@@ -633,6 +676,7 @@
status = nvmet_set_feat_write_protect(req);
break;
default:
+ req->error_loc = offsetof(struct nvme_common_command, cdw10);
status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
break;
}
@@ -646,9 +690,10 @@
u32 result;
req->ns = nvmet_find_namespace(req->sq->ctrl, req->cmd->common.nsid);
- if (!req->ns)
+ if (!req->ns) {
+ req->error_loc = offsetof(struct nvme_common_command, nsid);
return NVME_SC_INVALID_NS | NVME_SC_DNR;
-
+ }
mutex_lock(&subsys->lock);
if (req->ns->readonly == true)
result = NVME_NS_WRITE_PROTECT;
@@ -660,10 +705,20 @@
return 0;
}
+void nvmet_get_feat_kato(struct nvmet_req *req)
+{
+ nvmet_set_result(req, req->sq->ctrl->kato * 1000);
+}
+
+void nvmet_get_feat_async_event(struct nvmet_req *req)
+{
+ nvmet_set_result(req, READ_ONCE(req->sq->ctrl->aen_enabled));
+}
+
static void nvmet_execute_get_features(struct nvmet_req *req)
{
struct nvmet_subsys *subsys = req->sq->ctrl->subsys;
- u32 cdw10 = le32_to_cpu(req->cmd->common.cdw10[0]);
+ u32 cdw10 = le32_to_cpu(req->cmd->common.cdw10);
u16 status = 0;
switch (cdw10 & 0xff) {
@@ -689,7 +744,7 @@
break;
#endif
case NVME_FEAT_ASYNC_EVENT:
- nvmet_set_result(req, READ_ONCE(req->sq->ctrl->aen_enabled));
+ nvmet_get_feat_async_event(req);
break;
case NVME_FEAT_VOLATILE_WC:
nvmet_set_result(req, 1);
@@ -699,11 +754,13 @@
(subsys->max_qid-1) | ((subsys->max_qid-1) << 16));
break;
case NVME_FEAT_KATO:
- nvmet_set_result(req, req->sq->ctrl->kato * 1000);
+ nvmet_get_feat_kato(req);
break;
case NVME_FEAT_HOST_ID:
/* need 128-bit host identifier flag */
- if (!(req->cmd->common.cdw10[1] & cpu_to_le32(1 << 0))) {
+ if (!(req->cmd->common.cdw11 & cpu_to_le32(1 << 0))) {
+ req->error_loc =
+ offsetof(struct nvme_common_command, cdw11);
status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
break;
}
@@ -715,6 +772,8 @@
status = nvmet_get_feat_write_protect(req);
break;
default:
+ req->error_loc =
+ offsetof(struct nvme_common_command, cdw10);
status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
break;
}
@@ -722,7 +781,7 @@
nvmet_req_complete(req, status);
}
-static void nvmet_execute_async_event(struct nvmet_req *req)
+void nvmet_execute_async_event(struct nvmet_req *req)
{
struct nvmet_ctrl *ctrl = req->sq->ctrl;
@@ -738,7 +797,7 @@
schedule_work(&ctrl->async_event_work);
}
-static void nvmet_execute_keep_alive(struct nvmet_req *req)
+void nvmet_execute_keep_alive(struct nvmet_req *req)
{
struct nvmet_ctrl *ctrl = req->sq->ctrl;
@@ -764,13 +823,7 @@
switch (cmd->get_log_page.lid) {
case NVME_LOG_ERROR:
- /*
- * We currently never set the More bit in the status
- * field, so all error log entries are invalid and can
- * be zeroed out. This is called a minum viable
- * implementation (TM) of this mandatory log page.
- */
- req->execute = nvmet_execute_get_log_page_noop;
+ req->execute = nvmet_execute_get_log_page_error;
return 0;
case NVME_LOG_SMART:
req->execute = nvmet_execute_get_log_page_smart;
@@ -836,5 +889,6 @@
pr_err("unhandled cmd %d on qid %d\n", cmd->common.opcode,
req->sq->qid);
+ req->error_loc = offsetof(struct nvme_common_command, opcode);
return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
}
diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c
index b37a8e3..98613a4 100644
--- a/drivers/nvme/target/configfs.c
+++ b/drivers/nvme/target/configfs.c
@@ -1,15 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Configfs interface for the NVMe target.
* Copyright (c) 2015-2016 HGST, a Western Digital Company.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/kernel.h>
@@ -17,18 +9,24 @@
#include <linux/slab.h>
#include <linux/stat.h>
#include <linux/ctype.h>
+#include <linux/pci.h>
+#include <linux/pci-p2pdma.h>
#include "nvmet.h"
static const struct config_item_type nvmet_host_type;
static const struct config_item_type nvmet_subsys_type;
+static LIST_HEAD(nvmet_ports_list);
+struct list_head *nvmet_ports = &nvmet_ports_list;
+
static const struct nvmet_transport_name {
u8 type;
const char *name;
} nvmet_transport_names[] = {
{ NVMF_TRTYPE_RDMA, "rdma" },
{ NVMF_TRTYPE_FC, "fc" },
+ { NVMF_TRTYPE_TCP, "tcp" },
{ NVMF_TRTYPE_LOOP, "loop" },
};
@@ -148,7 +146,8 @@
static ssize_t nvmet_addr_treq_show(struct config_item *item,
char *page)
{
- switch (to_nvmet_port(item)->disc_addr.treq) {
+ switch (to_nvmet_port(item)->disc_addr.treq &
+ NVME_TREQ_SECURE_CHANNEL_MASK) {
case NVMF_TREQ_NOT_SPECIFIED:
return sprintf(page, "not specified\n");
case NVMF_TREQ_REQUIRED:
@@ -164,6 +163,7 @@
const char *page, size_t count)
{
struct nvmet_port *port = to_nvmet_port(item);
+ u8 treq = port->disc_addr.treq & ~NVME_TREQ_SECURE_CHANNEL_MASK;
if (port->enabled) {
pr_err("Cannot modify address while enabled\n");
@@ -172,15 +172,16 @@
}
if (sysfs_streq(page, "not specified")) {
- port->disc_addr.treq = NVMF_TREQ_NOT_SPECIFIED;
+ treq |= NVMF_TREQ_NOT_SPECIFIED;
} else if (sysfs_streq(page, "required")) {
- port->disc_addr.treq = NVMF_TREQ_REQUIRED;
+ treq |= NVMF_TREQ_REQUIRED;
} else if (sysfs_streq(page, "not required")) {
- port->disc_addr.treq = NVMF_TREQ_NOT_REQUIRED;
+ treq |= NVMF_TREQ_NOT_REQUIRED;
} else {
pr_err("Invalid value '%s' for treq\n", page);
return -EINVAL;
}
+ port->disc_addr.treq = treq;
return count;
}
@@ -340,6 +341,48 @@
CONFIGFS_ATTR(nvmet_ns_, device_path);
+#ifdef CONFIG_PCI_P2PDMA
+static ssize_t nvmet_ns_p2pmem_show(struct config_item *item, char *page)
+{
+ struct nvmet_ns *ns = to_nvmet_ns(item);
+
+ return pci_p2pdma_enable_show(page, ns->p2p_dev, ns->use_p2pmem);
+}
+
+static ssize_t nvmet_ns_p2pmem_store(struct config_item *item,
+ const char *page, size_t count)
+{
+ struct nvmet_ns *ns = to_nvmet_ns(item);
+ struct pci_dev *p2p_dev = NULL;
+ bool use_p2pmem;
+ int ret = count;
+ int error;
+
+ mutex_lock(&ns->subsys->lock);
+ if (ns->enabled) {
+ ret = -EBUSY;
+ goto out_unlock;
+ }
+
+ error = pci_p2pdma_enable_store(page, &p2p_dev, &use_p2pmem);
+ if (error) {
+ ret = error;
+ goto out_unlock;
+ }
+
+ ns->use_p2pmem = use_p2pmem;
+ pci_dev_put(ns->p2p_dev);
+ ns->p2p_dev = p2p_dev;
+
+out_unlock:
+ mutex_unlock(&ns->subsys->lock);
+
+ return ret;
+}
+
+CONFIGFS_ATTR(nvmet_ns_, p2pmem);
+#endif /* CONFIG_PCI_P2PDMA */
+
static ssize_t nvmet_ns_device_uuid_show(struct config_item *item, char *page)
{
return sprintf(page, "%pUb\n", &to_nvmet_ns(item)->uuid);
@@ -509,6 +552,9 @@
&nvmet_ns_attr_ana_grpid,
&nvmet_ns_attr_enable,
&nvmet_ns_attr_buffered_io,
+#ifdef CONFIG_PCI_P2PDMA
+ &nvmet_ns_attr_p2pmem,
+#endif
NULL,
};
@@ -542,8 +588,10 @@
goto out;
ret = -EINVAL;
- if (nsid == 0 || nsid == NVME_NSID_ALL)
+ if (nsid == 0 || nsid == NVME_NSID_ALL) {
+ pr_err("invalid nsid %#x", nsid);
goto out;
+ }
ret = -ENOMEM;
ns = nvmet_ns_alloc(subsys, nsid);
@@ -599,7 +647,8 @@
}
list_add_tail(&link->entry, &port->subsystems);
- nvmet_genctr++;
+ nvmet_port_disc_changed(port, subsys);
+
up_write(&nvmet_config_sem);
return 0;
@@ -626,7 +675,9 @@
found:
list_del(&p->entry);
- nvmet_genctr++;
+ nvmet_port_del_ctrls(port, subsys);
+ nvmet_port_disc_changed(port, subsys);
+
if (list_empty(&port->subsystems))
nvmet_disable_port(port);
up_write(&nvmet_config_sem);
@@ -675,7 +726,8 @@
goto out_free_link;
}
list_add_tail(&link->entry, &subsys->hosts);
- nvmet_genctr++;
+ nvmet_subsys_disc_changed(subsys, host);
+
up_write(&nvmet_config_sem);
return 0;
out_free_link:
@@ -701,7 +753,8 @@
found:
list_del(&p->entry);
- nvmet_genctr++;
+ nvmet_subsys_disc_changed(subsys, host);
+
up_write(&nvmet_config_sem);
kfree(p);
}
@@ -740,7 +793,11 @@
goto out_unlock;
}
- subsys->allow_any_host = allow_any_host;
+ if (subsys->allow_any_host != allow_any_host) {
+ subsys->allow_any_host = allow_any_host;
+ nvmet_subsys_disc_changed(subsys, NULL);
+ }
+
out_unlock:
up_write(&nvmet_config_sem);
return ret ? ret : count;
@@ -844,8 +901,8 @@
}
subsys = nvmet_subsys_alloc(name, NVME_NQN_NVME);
- if (!subsys)
- return ERR_PTR(-ENOMEM);
+ if (IS_ERR(subsys))
+ return ERR_CAST(subsys);
config_group_init_type_name(&subsys->group, name, &nvmet_subsys_type);
@@ -889,7 +946,7 @@
if (enable)
nvmet_referral_enable(parent, port);
else
- nvmet_referral_disable(port);
+ nvmet_referral_disable(parent, port);
return count;
inval:
@@ -915,9 +972,10 @@
static void nvmet_referral_release(struct config_item *item)
{
+ struct nvmet_port *parent = to_nvmet_port(item->ci_parent->ci_parent);
struct nvmet_port *port = to_nvmet_port(item);
- nvmet_referral_disable(port);
+ nvmet_referral_disable(parent, port);
kfree(port);
}
@@ -1090,6 +1148,8 @@
{
struct nvmet_port *port = to_nvmet_port(item);
+ list_del(&port->global_entry);
+
kfree(port->ana_state);
kfree(port);
}
@@ -1142,12 +1202,15 @@
port->ana_state[i] = NVME_ANA_INACCESSIBLE;
}
+ list_add(&port->global_entry, &nvmet_ports_list);
+
INIT_LIST_HEAD(&port->entry);
INIT_LIST_HEAD(&port->subsystems);
INIT_LIST_HEAD(&port->referrals);
port->inline_data_size = -1; /* < 0 == let the transport choose */
port->disc_addr.portid = cpu_to_le16(portid);
+ port->disc_addr.treq = NVMF_TREQ_DISABLE_SQFLOW;
config_group_init_type_name(&port->group, name, &nvmet_port_type);
config_group_init_type_name(&port->subsys_group,
diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c
index b5ec96a..3a67e24 100644
--- a/drivers/nvme/target/core.c
+++ b/drivers/nvme/target/core.c
@@ -1,20 +1,17 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Common code for the NVMe target.
* Copyright (c) 2015-2016 HGST, a Western Digital Company.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/module.h>
#include <linux/random.h>
#include <linux/rculist.h>
+#include <linux/pci-p2pdma.h>
+#include <linux/scatterlist.h>
+
+#define CREATE_TRACE_POINTS
+#include "trace.h"
#include "nvmet.h"
@@ -44,28 +41,75 @@
u64 nvmet_ana_chgcnt;
DECLARE_RWSEM(nvmet_ana_sem);
+inline u16 errno_to_nvme_status(struct nvmet_req *req, int errno)
+{
+ u16 status;
+
+ switch (errno) {
+ case 0:
+ status = NVME_SC_SUCCESS;
+ break;
+ case -ENOSPC:
+ req->error_loc = offsetof(struct nvme_rw_command, length);
+ status = NVME_SC_CAP_EXCEEDED | NVME_SC_DNR;
+ break;
+ case -EREMOTEIO:
+ req->error_loc = offsetof(struct nvme_rw_command, slba);
+ status = NVME_SC_LBA_RANGE | NVME_SC_DNR;
+ break;
+ case -EOPNOTSUPP:
+ req->error_loc = offsetof(struct nvme_common_command, opcode);
+ switch (req->cmd->common.opcode) {
+ case nvme_cmd_dsm:
+ case nvme_cmd_write_zeroes:
+ status = NVME_SC_ONCS_NOT_SUPPORTED | NVME_SC_DNR;
+ break;
+ default:
+ status = NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
+ }
+ break;
+ case -ENODATA:
+ req->error_loc = offsetof(struct nvme_rw_command, nsid);
+ status = NVME_SC_ACCESS_DENIED;
+ break;
+ case -EIO:
+ /* FALLTHRU */
+ default:
+ req->error_loc = offsetof(struct nvme_common_command, opcode);
+ status = NVME_SC_INTERNAL | NVME_SC_DNR;
+ }
+
+ return status;
+}
+
static struct nvmet_subsys *nvmet_find_get_subsys(struct nvmet_port *port,
const char *subsysnqn);
u16 nvmet_copy_to_sgl(struct nvmet_req *req, off_t off, const void *buf,
size_t len)
{
- if (sg_pcopy_from_buffer(req->sg, req->sg_cnt, buf, len, off) != len)
+ if (sg_pcopy_from_buffer(req->sg, req->sg_cnt, buf, len, off) != len) {
+ req->error_loc = offsetof(struct nvme_common_command, dptr);
return NVME_SC_SGL_INVALID_DATA | NVME_SC_DNR;
+ }
return 0;
}
u16 nvmet_copy_from_sgl(struct nvmet_req *req, off_t off, void *buf, size_t len)
{
- if (sg_pcopy_to_buffer(req->sg, req->sg_cnt, buf, len, off) != len)
+ if (sg_pcopy_to_buffer(req->sg, req->sg_cnt, buf, len, off) != len) {
+ req->error_loc = offsetof(struct nvme_common_command, dptr);
return NVME_SC_SGL_INVALID_DATA | NVME_SC_DNR;
+ }
return 0;
}
u16 nvmet_zero_sgl(struct nvmet_req *req, off_t off, size_t len)
{
- if (sg_zero_buffer(req->sg, req->sg_cnt, len, off) != len)
+ if (sg_zero_buffer(req->sg, req->sg_cnt, len, off) != len) {
+ req->error_loc = offsetof(struct nvme_common_command, dptr);
return NVME_SC_SGL_INVALID_DATA | NVME_SC_DNR;
+ }
return 0;
}
@@ -129,7 +173,7 @@
}
}
-static void nvmet_add_async_event(struct nvmet_ctrl *ctrl, u8 event_type,
+void nvmet_add_async_event(struct nvmet_ctrl *ctrl, u8 event_type,
u8 event_info, u8 log_page)
{
struct nvmet_async_event *aen;
@@ -149,13 +193,6 @@
schedule_work(&ctrl->async_event_work);
}
-static bool nvmet_aen_disabled(struct nvmet_ctrl *ctrl, u32 aen)
-{
- if (!(READ_ONCE(ctrl->aen_enabled) & aen))
- return true;
- return test_and_set_bit(aen, &ctrl->aen_masked);
-}
-
static void nvmet_add_to_changed_ns_log(struct nvmet_ctrl *ctrl, __le32 nsid)
{
u32 i;
@@ -184,9 +221,11 @@
{
struct nvmet_ctrl *ctrl;
+ lockdep_assert_held(&subsys->lock);
+
list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) {
nvmet_add_to_changed_ns_log(ctrl, cpu_to_le32(nsid));
- if (nvmet_aen_disabled(ctrl, NVME_AEN_CFG_NS_ATTR))
+ if (nvmet_aen_bit_disabled(ctrl, NVME_AEN_BIT_NS_ATTR))
continue;
nvmet_add_async_event(ctrl, NVME_AER_TYPE_NOTICE,
NVME_AER_NOTICE_NS_CHANGED,
@@ -203,7 +242,7 @@
list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) {
if (port && ctrl->port != port)
continue;
- if (nvmet_aen_disabled(ctrl, NVME_AEN_CFG_ANA_CHANGE))
+ if (nvmet_aen_bit_disabled(ctrl, NVME_AEN_BIT_ANA_CHANGE))
continue;
nvmet_add_async_event(ctrl, NVME_AER_TYPE_NOTICE,
NVME_AER_NOTICE_ANA, NVME_LOG_ANA);
@@ -244,6 +283,18 @@
}
EXPORT_SYMBOL_GPL(nvmet_unregister_transport);
+void nvmet_port_del_ctrls(struct nvmet_port *port, struct nvmet_subsys *subsys)
+{
+ struct nvmet_ctrl *ctrl;
+
+ mutex_lock(&subsys->lock);
+ list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) {
+ if (ctrl->port == port)
+ ctrl->ops->delete_ctrl(ctrl);
+ }
+ mutex_unlock(&subsys->lock);
+}
+
int nvmet_enable_port(struct nvmet_port *port)
{
const struct nvmet_fabrics_ops *ops;
@@ -278,6 +329,7 @@
port->inline_data_size = 0;
port->enabled = true;
+ port->tr_ops = ops;
return 0;
}
@@ -288,6 +340,7 @@
lockdep_assert_held(&nvmet_config_sem);
port->enabled = false;
+ port->tr_ops = NULL;
ops = nvmet_transports[port->disc_addr.trtype];
ops->remove_port(port);
@@ -298,6 +351,15 @@
{
struct nvmet_ctrl *ctrl = container_of(to_delayed_work(work),
struct nvmet_ctrl, ka_work);
+ bool cmd_seen = ctrl->cmd_seen;
+
+ ctrl->cmd_seen = false;
+ if (cmd_seen) {
+ pr_debug("ctrl %d reschedule traffic based keep-alive timer\n",
+ ctrl->cntlid);
+ schedule_delayed_work(&ctrl->ka_work, ctrl->kato * HZ);
+ return;
+ }
pr_err("ctrl %d keep-alive timer (%d seconds) expired!\n",
ctrl->cntlid, ctrl->kato);
@@ -365,25 +427,117 @@
nvmet_file_ns_disable(ns);
}
+static int nvmet_p2pmem_ns_enable(struct nvmet_ns *ns)
+{
+ int ret;
+ struct pci_dev *p2p_dev;
+
+ if (!ns->use_p2pmem)
+ return 0;
+
+ if (!ns->bdev) {
+ pr_err("peer-to-peer DMA is not supported by non-block device namespaces\n");
+ return -EINVAL;
+ }
+
+ if (!blk_queue_pci_p2pdma(ns->bdev->bd_queue)) {
+ pr_err("peer-to-peer DMA is not supported by the driver of %s\n",
+ ns->device_path);
+ return -EINVAL;
+ }
+
+ if (ns->p2p_dev) {
+ ret = pci_p2pdma_distance(ns->p2p_dev, nvmet_ns_dev(ns), true);
+ if (ret < 0)
+ return -EINVAL;
+ } else {
+ /*
+ * Right now we just check that there is p2pmem available so
+ * we can report an error to the user right away if there
+ * is not. We'll find the actual device to use once we
+ * setup the controller when the port's device is available.
+ */
+
+ p2p_dev = pci_p2pmem_find(nvmet_ns_dev(ns));
+ if (!p2p_dev) {
+ pr_err("no peer-to-peer memory is available for %s\n",
+ ns->device_path);
+ return -EINVAL;
+ }
+
+ pci_dev_put(p2p_dev);
+ }
+
+ return 0;
+}
+
+/*
+ * Note: ctrl->subsys->lock should be held when calling this function
+ */
+static void nvmet_p2pmem_ns_add_p2p(struct nvmet_ctrl *ctrl,
+ struct nvmet_ns *ns)
+{
+ struct device *clients[2];
+ struct pci_dev *p2p_dev;
+ int ret;
+
+ if (!ctrl->p2p_client || !ns->use_p2pmem)
+ return;
+
+ if (ns->p2p_dev) {
+ ret = pci_p2pdma_distance(ns->p2p_dev, ctrl->p2p_client, true);
+ if (ret < 0)
+ return;
+
+ p2p_dev = pci_dev_get(ns->p2p_dev);
+ } else {
+ clients[0] = ctrl->p2p_client;
+ clients[1] = nvmet_ns_dev(ns);
+
+ p2p_dev = pci_p2pmem_find_many(clients, ARRAY_SIZE(clients));
+ if (!p2p_dev) {
+ pr_err("no peer-to-peer memory is available that's supported by %s and %s\n",
+ dev_name(ctrl->p2p_client), ns->device_path);
+ return;
+ }
+ }
+
+ ret = radix_tree_insert(&ctrl->p2p_ns_map, ns->nsid, p2p_dev);
+ if (ret < 0)
+ pci_dev_put(p2p_dev);
+
+ pr_info("using p2pmem on %s for nsid %d\n", pci_name(p2p_dev),
+ ns->nsid);
+}
+
int nvmet_ns_enable(struct nvmet_ns *ns)
{
struct nvmet_subsys *subsys = ns->subsys;
+ struct nvmet_ctrl *ctrl;
int ret;
mutex_lock(&subsys->lock);
- ret = -EMFILE;
- if (subsys->nr_namespaces == NVMET_MAX_NAMESPACES)
- goto out_unlock;
ret = 0;
if (ns->enabled)
goto out_unlock;
+ ret = -EMFILE;
+ if (subsys->nr_namespaces == NVMET_MAX_NAMESPACES)
+ goto out_unlock;
+
ret = nvmet_bdev_ns_enable(ns);
if (ret == -ENOTBLK)
ret = nvmet_file_ns_enable(ns);
if (ret)
goto out_unlock;
+ ret = nvmet_p2pmem_ns_enable(ns);
+ if (ret)
+ goto out_dev_disable;
+
+ list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry)
+ nvmet_p2pmem_ns_add_p2p(ctrl, ns);
+
ret = percpu_ref_init(&ns->ref, nvmet_destroy_namespace,
0, GFP_KERNEL);
if (ret)
@@ -418,6 +572,9 @@
mutex_unlock(&subsys->lock);
return ret;
out_dev_put:
+ list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry)
+ pci_dev_put(radix_tree_delete(&ctrl->p2p_ns_map, ns->nsid));
+out_dev_disable:
nvmet_ns_dev_disable(ns);
goto out_unlock;
}
@@ -425,6 +582,7 @@
void nvmet_ns_disable(struct nvmet_ns *ns)
{
struct nvmet_subsys *subsys = ns->subsys;
+ struct nvmet_ctrl *ctrl;
mutex_lock(&subsys->lock);
if (!ns->enabled)
@@ -434,6 +592,10 @@
list_del_rcu(&ns->dev_link);
if (ns->nsid == subsys->max_nsid)
subsys->max_nsid = nvmet_max_nsid(subsys);
+
+ list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry)
+ pci_dev_put(radix_tree_delete(&ctrl->p2p_ns_map, ns->nsid));
+
mutex_unlock(&subsys->lock);
/*
@@ -450,6 +612,7 @@
percpu_ref_exit(&ns->ref);
mutex_lock(&subsys->lock);
+
subsys->nr_namespaces--;
nvmet_ns_changed(subsys, ns->nsid);
nvmet_ns_dev_disable(ns);
@@ -494,25 +657,60 @@
return ns;
}
-static void __nvmet_req_complete(struct nvmet_req *req, u16 status)
+static void nvmet_update_sq_head(struct nvmet_req *req)
{
- u32 old_sqhd, new_sqhd;
- u16 sqhd;
-
- if (status)
- nvmet_set_status(req, status);
-
if (req->sq->size) {
+ u32 old_sqhd, new_sqhd;
+
do {
old_sqhd = req->sq->sqhd;
new_sqhd = (old_sqhd + 1) % req->sq->size;
} while (cmpxchg(&req->sq->sqhd, old_sqhd, new_sqhd) !=
old_sqhd);
}
- sqhd = req->sq->sqhd & 0x0000FFFF;
- req->rsp->sq_head = cpu_to_le16(sqhd);
- req->rsp->sq_id = cpu_to_le16(req->sq->qid);
- req->rsp->command_id = req->cmd->common.command_id;
+ req->cqe->sq_head = cpu_to_le16(req->sq->sqhd & 0x0000FFFF);
+}
+
+static void nvmet_set_error(struct nvmet_req *req, u16 status)
+{
+ struct nvmet_ctrl *ctrl = req->sq->ctrl;
+ struct nvme_error_slot *new_error_slot;
+ unsigned long flags;
+
+ req->cqe->status = cpu_to_le16(status << 1);
+
+ if (!ctrl || req->error_loc == NVMET_NO_ERROR_LOC)
+ return;
+
+ spin_lock_irqsave(&ctrl->error_lock, flags);
+ ctrl->err_counter++;
+ new_error_slot =
+ &ctrl->slots[ctrl->err_counter % NVMET_ERROR_LOG_SLOTS];
+
+ new_error_slot->error_count = cpu_to_le64(ctrl->err_counter);
+ new_error_slot->sqid = cpu_to_le16(req->sq->qid);
+ new_error_slot->cmdid = cpu_to_le16(req->cmd->common.command_id);
+ new_error_slot->status_field = cpu_to_le16(status << 1);
+ new_error_slot->param_error_location = cpu_to_le16(req->error_loc);
+ new_error_slot->lba = cpu_to_le64(req->error_slba);
+ new_error_slot->nsid = req->cmd->common.nsid;
+ spin_unlock_irqrestore(&ctrl->error_lock, flags);
+
+ /* set the more bit for this request */
+ req->cqe->status |= cpu_to_le16(1 << 14);
+}
+
+static void __nvmet_req_complete(struct nvmet_req *req, u16 status)
+{
+ if (!req->sq->sqhd_disabled)
+ nvmet_update_sq_head(req);
+ req->cqe->sq_id = cpu_to_le16(req->sq->qid);
+ req->cqe->command_id = req->cmd->common.command_id;
+
+ if (unlikely(status))
+ nvmet_set_error(req, status);
+
+ trace_nvmet_req_complete(req);
if (req->ns)
nvmet_put_namespace(req->ns);
@@ -634,14 +832,20 @@
return ret;
req->ns = nvmet_find_namespace(req->sq->ctrl, cmd->rw.nsid);
- if (unlikely(!req->ns))
+ if (unlikely(!req->ns)) {
+ req->error_loc = offsetof(struct nvme_common_command, nsid);
return NVME_SC_INVALID_NS | NVME_SC_DNR;
+ }
ret = nvmet_check_ana_state(req->port, req->ns);
- if (unlikely(ret))
+ if (unlikely(ret)) {
+ req->error_loc = offsetof(struct nvme_common_command, nsid);
return ret;
+ }
ret = nvmet_io_cmd_check_access(req);
- if (unlikely(ret))
+ if (unlikely(ret)) {
+ req->error_loc = offsetof(struct nvme_common_command, nsid);
return ret;
+ }
if (req->ns->file)
return nvmet_file_parse_io_cmd(req);
@@ -661,11 +865,17 @@
req->sg = NULL;
req->sg_cnt = 0;
req->transfer_len = 0;
- req->rsp->status = 0;
+ req->cqe->status = 0;
+ req->cqe->sq_head = 0;
req->ns = NULL;
+ req->error_loc = NVMET_NO_ERROR_LOC;
+ req->error_slba = 0;
+
+ trace_nvmet_req_init(req, req->cmd);
/* no support for fused commands yet */
if (unlikely(flags & (NVME_CMD_FUSE_FIRST | NVME_CMD_FUSE_SECOND))) {
+ req->error_loc = offsetof(struct nvme_common_command, flags);
status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
goto fail;
}
@@ -676,6 +886,7 @@
* byte aligned.
*/
if (unlikely((flags & NVME_CMD_SGL_ALL) != NVME_CMD_SGL_METABUF)) {
+ req->error_loc = offsetof(struct nvme_common_command, flags);
status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
goto fail;
}
@@ -685,7 +896,7 @@
status = nvmet_parse_connect_cmd(req);
else if (likely(req->sq->qid != 0))
status = nvmet_parse_io_cmd(req);
- else if (req->cmd->common.opcode == nvme_fabrics_command)
+ else if (nvme_is_fabrics(req->cmd))
status = nvmet_parse_fabrics_cmd(req);
else if (req->sq->ctrl->subsys->type == NVME_NQN_DISC)
status = nvmet_parse_discovery_cmd(req);
@@ -700,6 +911,9 @@
goto fail;
}
+ if (sq->ctrl)
+ sq->ctrl->cmd_seen = true;
+
return true;
fail:
@@ -718,13 +932,59 @@
void nvmet_req_execute(struct nvmet_req *req)
{
- if (unlikely(req->data_len != req->transfer_len))
+ if (unlikely(req->data_len != req->transfer_len)) {
+ req->error_loc = offsetof(struct nvme_common_command, dptr);
nvmet_req_complete(req, NVME_SC_SGL_INVALID_DATA | NVME_SC_DNR);
- else
+ } else
req->execute(req);
}
EXPORT_SYMBOL_GPL(nvmet_req_execute);
+int nvmet_req_alloc_sgl(struct nvmet_req *req)
+{
+ struct pci_dev *p2p_dev = NULL;
+
+ if (IS_ENABLED(CONFIG_PCI_P2PDMA)) {
+ if (req->sq->ctrl && req->ns)
+ p2p_dev = radix_tree_lookup(&req->sq->ctrl->p2p_ns_map,
+ req->ns->nsid);
+
+ req->p2p_dev = NULL;
+ if (req->sq->qid && p2p_dev) {
+ req->sg = pci_p2pmem_alloc_sgl(p2p_dev, &req->sg_cnt,
+ req->transfer_len);
+ if (req->sg) {
+ req->p2p_dev = p2p_dev;
+ return 0;
+ }
+ }
+
+ /*
+ * If no P2P memory was available we fallback to using
+ * regular memory
+ */
+ }
+
+ req->sg = sgl_alloc(req->transfer_len, GFP_KERNEL, &req->sg_cnt);
+ if (!req->sg)
+ return -ENOMEM;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(nvmet_req_alloc_sgl);
+
+void nvmet_req_free_sgl(struct nvmet_req *req)
+{
+ if (req->p2p_dev)
+ pci_p2pmem_free_sgl(req->p2p_dev, req->sg);
+ else
+ sgl_free(req->sg);
+
+ req->sg = NULL;
+ req->sg_cnt = 0;
+}
+EXPORT_SYMBOL_GPL(nvmet_req_free_sgl);
+
static inline bool nvmet_cc_en(u32 cc)
{
return (cc >> NVME_CC_EN_SHIFT) & 0x1;
@@ -835,7 +1095,7 @@
if (!subsys) {
pr_warn("connect request for invalid subsystem %s!\n",
subsysnqn);
- req->rsp->result.u32 = IPO_IATTR_CONNECT_DATA(subsysnqn);
+ req->cqe->result.u32 = IPO_IATTR_CONNECT_DATA(subsysnqn);
return NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR;
}
@@ -856,7 +1116,7 @@
pr_warn("could not find controller %d for subsys %s / host %s\n",
cntlid, subsysnqn, hostnqn);
- req->rsp->result.u32 = IPO_IATTR_CONNECT_DATA(cntlid);
+ req->cqe->result.u32 = IPO_IATTR_CONNECT_DATA(cntlid);
status = NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR;
out:
@@ -881,14 +1141,18 @@
return 0;
}
-static bool __nvmet_host_allowed(struct nvmet_subsys *subsys,
- const char *hostnqn)
+bool nvmet_host_allowed(struct nvmet_subsys *subsys, const char *hostnqn)
{
struct nvmet_host_link *p;
+ lockdep_assert_held(&nvmet_config_sem);
+
if (subsys->allow_any_host)
return true;
+ if (subsys->type == NVME_NQN_DISC) /* allow all access to disc subsys */
+ return true;
+
list_for_each_entry(p, &subsys->hosts, entry) {
if (!strcmp(nvmet_host_name(p->host), hostnqn))
return true;
@@ -897,28 +1161,44 @@
return false;
}
-static bool nvmet_host_discovery_allowed(struct nvmet_req *req,
- const char *hostnqn)
+/*
+ * Note: ctrl->subsys->lock should be held when calling this function
+ */
+static void nvmet_setup_p2p_ns_map(struct nvmet_ctrl *ctrl,
+ struct nvmet_req *req)
{
- struct nvmet_subsys_link *s;
+ struct nvmet_ns *ns;
- list_for_each_entry(s, &req->port->subsystems, entry) {
- if (__nvmet_host_allowed(s->subsys, hostnqn))
- return true;
- }
+ if (!req->p2p_client)
+ return;
- return false;
+ ctrl->p2p_client = get_device(req->p2p_client);
+
+ list_for_each_entry_rcu(ns, &ctrl->subsys->namespaces, dev_link)
+ nvmet_p2pmem_ns_add_p2p(ctrl, ns);
}
-bool nvmet_host_allowed(struct nvmet_req *req, struct nvmet_subsys *subsys,
- const char *hostnqn)
+/*
+ * Note: ctrl->subsys->lock should be held when calling this function
+ */
+static void nvmet_release_p2p_ns_map(struct nvmet_ctrl *ctrl)
{
- lockdep_assert_held(&nvmet_config_sem);
+ struct radix_tree_iter iter;
+ void __rcu **slot;
- if (subsys->type == NVME_NQN_DISC)
- return nvmet_host_discovery_allowed(req, hostnqn);
- else
- return __nvmet_host_allowed(subsys, hostnqn);
+ radix_tree_for_each_slot(slot, &ctrl->p2p_ns_map, &iter, 0)
+ pci_dev_put(radix_tree_deref_slot(slot));
+
+ put_device(ctrl->p2p_client);
+}
+
+static void nvmet_fatal_error_handler(struct work_struct *work)
+{
+ struct nvmet_ctrl *ctrl =
+ container_of(work, struct nvmet_ctrl, fatal_err_work);
+
+ pr_err("ctrl %d fatal error occurred!\n", ctrl->cntlid);
+ ctrl->ops->delete_ctrl(ctrl);
}
u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn,
@@ -934,16 +1214,16 @@
if (!subsys) {
pr_warn("connect request for invalid subsystem %s!\n",
subsysnqn);
- req->rsp->result.u32 = IPO_IATTR_CONNECT_DATA(subsysnqn);
+ req->cqe->result.u32 = IPO_IATTR_CONNECT_DATA(subsysnqn);
goto out;
}
status = NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR;
down_read(&nvmet_config_sem);
- if (!nvmet_host_allowed(req, subsys, hostnqn)) {
+ if (!nvmet_host_allowed(subsys, hostnqn)) {
pr_info("connect by host %s for subsystem %s not allowed\n",
hostnqn, subsysnqn);
- req->rsp->result.u32 = IPO_IATTR_CONNECT_DATA(hostnqn);
+ req->cqe->result.u32 = IPO_IATTR_CONNECT_DATA(hostnqn);
up_read(&nvmet_config_sem);
status = NVME_SC_CONNECT_INVALID_HOST | NVME_SC_DNR;
goto out_put_subsystem;
@@ -962,6 +1242,8 @@
INIT_WORK(&ctrl->async_event_work, nvmet_async_event_work);
INIT_LIST_HEAD(&ctrl->async_events);
+ INIT_RADIX_TREE(&ctrl->p2p_ns_map, GFP_KERNEL);
+ INIT_WORK(&ctrl->fatal_err_work, nvmet_fatal_error_handler);
memcpy(ctrl->subsysnqn, subsysnqn, NVMF_NQN_SIZE);
memcpy(ctrl->hostnqn, hostnqn, NVMF_NQN_SIZE);
@@ -997,42 +1279,30 @@
ctrl->cntlid = ret;
ctrl->ops = req->ops;
- if (ctrl->subsys->type == NVME_NQN_DISC) {
- /* Don't accept keep-alive timeout for discovery controllers */
- if (kato) {
- status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
- goto out_remove_ida;
- }
- /*
- * Discovery controllers use some arbitrary high value in order
- * to cleanup stale discovery sessions
- *
- * From the latest base diff RC:
- * "The Keep Alive command is not supported by
- * Discovery controllers. A transport may specify a
- * fixed Discovery controller activity timeout value
- * (e.g., 2 minutes). If no commands are received
- * by a Discovery controller within that time
- * period, the controller may perform the
- * actions for Keep Alive Timer expiration".
- */
- ctrl->kato = NVMET_DISC_KATO;
- } else {
- /* keep-alive timeout in seconds */
- ctrl->kato = DIV_ROUND_UP(kato, 1000);
- }
+ /*
+ * Discovery controllers may use some arbitrary high value
+ * in order to cleanup stale discovery sessions
+ */
+ if ((ctrl->subsys->type == NVME_NQN_DISC) && !kato)
+ kato = NVMET_DISC_KATO_MS;
+
+ /* keep-alive timeout in seconds */
+ ctrl->kato = DIV_ROUND_UP(kato, 1000);
+
+ ctrl->err_counter = 0;
+ spin_lock_init(&ctrl->error_lock);
+
nvmet_start_keep_alive_timer(ctrl);
mutex_lock(&subsys->lock);
list_add_tail(&ctrl->subsys_entry, &subsys->ctrls);
+ nvmet_setup_p2p_ns_map(ctrl, req);
mutex_unlock(&subsys->lock);
*ctrlp = ctrl;
return 0;
-out_remove_ida:
- ida_simple_remove(&cntlid_ida, ctrl->cntlid);
out_free_sqs:
kfree(ctrl->sqs);
out_free_cqs:
@@ -1053,6 +1323,7 @@
struct nvmet_subsys *subsys = ctrl->subsys;
mutex_lock(&subsys->lock);
+ nvmet_release_p2p_ns_map(ctrl);
list_del(&ctrl->subsys_entry);
mutex_unlock(&subsys->lock);
@@ -1076,21 +1347,11 @@
kref_put(&ctrl->ref, nvmet_ctrl_free);
}
-static void nvmet_fatal_error_handler(struct work_struct *work)
-{
- struct nvmet_ctrl *ctrl =
- container_of(work, struct nvmet_ctrl, fatal_err_work);
-
- pr_err("ctrl %d fatal error occurred!\n", ctrl->cntlid);
- ctrl->ops->delete_ctrl(ctrl);
-}
-
void nvmet_ctrl_fatal_error(struct nvmet_ctrl *ctrl)
{
mutex_lock(&ctrl->lock);
if (!(ctrl->csts & NVME_CSTS_CFS)) {
ctrl->csts |= NVME_CSTS_CFS;
- INIT_WORK(&ctrl->fatal_err_work, nvmet_fatal_error_handler);
schedule_work(&ctrl->fatal_err_work);
}
mutex_unlock(&ctrl->lock);
@@ -1105,8 +1366,7 @@
if (!port)
return NULL;
- if (!strncmp(NVME_DISC_SUBSYS_NAME, subsysnqn,
- NVMF_NQN_SIZE)) {
+ if (!strcmp(NVME_DISC_SUBSYS_NAME, subsysnqn)) {
if (!kref_get_unless_zero(&nvmet_disc_subsys->ref))
return NULL;
return nvmet_disc_subsys;
@@ -1133,7 +1393,7 @@
subsys = kzalloc(sizeof(*subsys), GFP_KERNEL);
if (!subsys)
- return NULL;
+ return ERR_PTR(-ENOMEM);
subsys->ver = NVME_VS(1, 3, 0); /* NVMe 1.3.0 */
/* generate a random serial number as our controllers are ephemeral: */
@@ -1149,14 +1409,14 @@
default:
pr_err("%s: Unknown Subsystem type - %d\n", __func__, type);
kfree(subsys);
- return NULL;
+ return ERR_PTR(-EINVAL);
}
subsys->type = type;
subsys->subsysnqn = kstrndup(subsysnqn, NVMF_NQN_SIZE,
GFP_KERNEL);
if (!subsys->subsysnqn) {
kfree(subsys);
- return NULL;
+ return ERR_PTR(-ENOMEM);
}
kref_init(&subsys->ref);
diff --git a/drivers/nvme/target/discovery.c b/drivers/nvme/target/discovery.c
index eae29f4..3764a89 100644
--- a/drivers/nvme/target/discovery.c
+++ b/drivers/nvme/target/discovery.c
@@ -1,15 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Discovery service for the NVMe over Fabrics target.
* Copyright (C) 2016 Intel Corporation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License version
- * 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/slab.h>
@@ -18,7 +10,74 @@
struct nvmet_subsys *nvmet_disc_subsys;
-u64 nvmet_genctr;
+static u64 nvmet_genctr;
+
+static void __nvmet_disc_changed(struct nvmet_port *port,
+ struct nvmet_ctrl *ctrl)
+{
+ if (ctrl->port != port)
+ return;
+
+ if (nvmet_aen_bit_disabled(ctrl, NVME_AEN_BIT_DISC_CHANGE))
+ return;
+
+ nvmet_add_async_event(ctrl, NVME_AER_TYPE_NOTICE,
+ NVME_AER_NOTICE_DISC_CHANGED, NVME_LOG_DISC);
+}
+
+void nvmet_port_disc_changed(struct nvmet_port *port,
+ struct nvmet_subsys *subsys)
+{
+ struct nvmet_ctrl *ctrl;
+
+ lockdep_assert_held(&nvmet_config_sem);
+ nvmet_genctr++;
+
+ mutex_lock(&nvmet_disc_subsys->lock);
+ list_for_each_entry(ctrl, &nvmet_disc_subsys->ctrls, subsys_entry) {
+ if (subsys && !nvmet_host_allowed(subsys, ctrl->hostnqn))
+ continue;
+
+ __nvmet_disc_changed(port, ctrl);
+ }
+ mutex_unlock(&nvmet_disc_subsys->lock);
+
+ /* If transport can signal change, notify transport */
+ if (port->tr_ops && port->tr_ops->discovery_chg)
+ port->tr_ops->discovery_chg(port);
+}
+
+static void __nvmet_subsys_disc_changed(struct nvmet_port *port,
+ struct nvmet_subsys *subsys,
+ struct nvmet_host *host)
+{
+ struct nvmet_ctrl *ctrl;
+
+ mutex_lock(&nvmet_disc_subsys->lock);
+ list_for_each_entry(ctrl, &nvmet_disc_subsys->ctrls, subsys_entry) {
+ if (host && strcmp(nvmet_host_name(host), ctrl->hostnqn))
+ continue;
+
+ __nvmet_disc_changed(port, ctrl);
+ }
+ mutex_unlock(&nvmet_disc_subsys->lock);
+}
+
+void nvmet_subsys_disc_changed(struct nvmet_subsys *subsys,
+ struct nvmet_host *host)
+{
+ struct nvmet_port *port;
+ struct nvmet_subsys_link *s;
+
+ nvmet_genctr++;
+
+ list_for_each_entry(port, nvmet_ports, global_entry)
+ list_for_each_entry(s, &port->subsystems, entry) {
+ if (s->subsys != subsys)
+ continue;
+ __nvmet_subsys_disc_changed(port, subsys, host);
+ }
+}
void nvmet_referral_enable(struct nvmet_port *parent, struct nvmet_port *port)
{
@@ -26,18 +85,18 @@
if (list_empty(&port->entry)) {
list_add_tail(&port->entry, &parent->referrals);
port->enabled = true;
- nvmet_genctr++;
+ nvmet_port_disc_changed(parent, NULL);
}
up_write(&nvmet_config_sem);
}
-void nvmet_referral_disable(struct nvmet_port *port)
+void nvmet_referral_disable(struct nvmet_port *parent, struct nvmet_port *port)
{
down_write(&nvmet_config_sem);
if (!list_empty(&port->entry)) {
port->enabled = false;
list_del_init(&port->entry);
- nvmet_genctr++;
+ nvmet_port_disc_changed(parent, NULL);
}
up_write(&nvmet_config_sem);
}
@@ -81,54 +140,76 @@
memcpy(traddr, port->disc_addr.traddr, NVMF_TRADDR_SIZE);
}
+static size_t discovery_log_entries(struct nvmet_req *req)
+{
+ struct nvmet_ctrl *ctrl = req->sq->ctrl;
+ struct nvmet_subsys_link *p;
+ struct nvmet_port *r;
+ size_t entries = 0;
+
+ list_for_each_entry(p, &req->port->subsystems, entry) {
+ if (!nvmet_host_allowed(p->subsys, ctrl->hostnqn))
+ continue;
+ entries++;
+ }
+ list_for_each_entry(r, &req->port->referrals, entry)
+ entries++;
+ return entries;
+}
+
static void nvmet_execute_get_disc_log_page(struct nvmet_req *req)
{
const int entry_size = sizeof(struct nvmf_disc_rsp_page_entry);
struct nvmet_ctrl *ctrl = req->sq->ctrl;
struct nvmf_disc_rsp_page_hdr *hdr;
+ u64 offset = nvmet_get_log_page_offset(req->cmd);
size_t data_len = nvmet_get_log_page_len(req->cmd);
- size_t alloc_len = max(data_len, sizeof(*hdr));
- int residual_len = data_len - sizeof(*hdr);
+ size_t alloc_len;
struct nvmet_subsys_link *p;
struct nvmet_port *r;
u32 numrec = 0;
u16 status = 0;
+ void *buffer;
+
+ /* Spec requires dword aligned offsets */
+ if (offset & 0x3) {
+ status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
+ goto out;
+ }
/*
* Make sure we're passing at least a buffer of response header size.
* If host provided data len is less than the header size, only the
* number of bytes requested by host will be sent to host.
*/
- hdr = kzalloc(alloc_len, GFP_KERNEL);
- if (!hdr) {
+ down_read(&nvmet_config_sem);
+ alloc_len = sizeof(*hdr) + entry_size * discovery_log_entries(req);
+ buffer = kzalloc(alloc_len, GFP_KERNEL);
+ if (!buffer) {
+ up_read(&nvmet_config_sem);
status = NVME_SC_INTERNAL;
goto out;
}
- down_read(&nvmet_config_sem);
+ hdr = buffer;
list_for_each_entry(p, &req->port->subsystems, entry) {
- if (!nvmet_host_allowed(req, p->subsys, ctrl->hostnqn))
- continue;
- if (residual_len >= entry_size) {
- char traddr[NVMF_TRADDR_SIZE];
+ char traddr[NVMF_TRADDR_SIZE];
- nvmet_set_disc_traddr(req, req->port, traddr);
- nvmet_format_discovery_entry(hdr, req->port,
- p->subsys->subsysnqn, traddr,
- NVME_NQN_NVME, numrec);
- residual_len -= entry_size;
- }
+ if (!nvmet_host_allowed(p->subsys, ctrl->hostnqn))
+ continue;
+
+ nvmet_set_disc_traddr(req, req->port, traddr);
+ nvmet_format_discovery_entry(hdr, req->port,
+ p->subsys->subsysnqn, traddr,
+ NVME_NQN_NVME, numrec);
numrec++;
}
list_for_each_entry(r, &req->port->referrals, entry) {
- if (residual_len >= entry_size) {
- nvmet_format_discovery_entry(hdr, r,
- NVME_DISC_SUBSYS_NAME,
- r->disc_addr.traddr,
- NVME_NQN_DISC, numrec);
- residual_len -= entry_size;
- }
+ nvmet_format_discovery_entry(hdr, r,
+ NVME_DISC_SUBSYS_NAME,
+ r->disc_addr.traddr,
+ NVME_NQN_DISC, numrec);
numrec++;
}
@@ -136,10 +217,12 @@
hdr->numrec = cpu_to_le64(numrec);
hdr->recfmt = cpu_to_le16(0);
+ nvmet_clear_aen_bit(req, NVME_AEN_BIT_DISC_CHANGE);
+
up_read(&nvmet_config_sem);
- status = nvmet_copy_to_sgl(req, 0, hdr, data_len);
- kfree(hdr);
+ status = nvmet_copy_to_sgl(req, 0, buffer + offset, data_len);
+ kfree(buffer);
out:
nvmet_req_complete(req, status);
}
@@ -174,7 +257,9 @@
if (req->port->inline_data_size)
id->sgls |= cpu_to_le32(1 << 20);
- strcpy(id->subnqn, ctrl->subsys->subsysnqn);
+ id->oaes = cpu_to_le32(NVMET_DISC_AEN_CFG_OPTIONAL);
+
+ strlcpy(id->subnqn, ctrl->subsys->subsysnqn, sizeof(id->subnqn));
status = nvmet_copy_to_sgl(req, 0, id, sizeof(*id));
@@ -183,6 +268,51 @@
nvmet_req_complete(req, status);
}
+static void nvmet_execute_disc_set_features(struct nvmet_req *req)
+{
+ u32 cdw10 = le32_to_cpu(req->cmd->common.cdw10);
+ u16 stat;
+
+ switch (cdw10 & 0xff) {
+ case NVME_FEAT_KATO:
+ stat = nvmet_set_feat_kato(req);
+ break;
+ case NVME_FEAT_ASYNC_EVENT:
+ stat = nvmet_set_feat_async_event(req,
+ NVMET_DISC_AEN_CFG_OPTIONAL);
+ break;
+ default:
+ req->error_loc =
+ offsetof(struct nvme_common_command, cdw10);
+ stat = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
+ break;
+ }
+
+ nvmet_req_complete(req, stat);
+}
+
+static void nvmet_execute_disc_get_features(struct nvmet_req *req)
+{
+ u32 cdw10 = le32_to_cpu(req->cmd->common.cdw10);
+ u16 stat = 0;
+
+ switch (cdw10 & 0xff) {
+ case NVME_FEAT_KATO:
+ nvmet_get_feat_kato(req);
+ break;
+ case NVME_FEAT_ASYNC_EVENT:
+ nvmet_get_feat_async_event(req);
+ break;
+ default:
+ req->error_loc =
+ offsetof(struct nvme_common_command, cdw10);
+ stat = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
+ break;
+ }
+
+ nvmet_req_complete(req, stat);
+}
+
u16 nvmet_parse_discovery_cmd(struct nvmet_req *req)
{
struct nvme_command *cmd = req->cmd;
@@ -190,10 +320,28 @@
if (unlikely(!(req->sq->ctrl->csts & NVME_CSTS_RDY))) {
pr_err("got cmd %d while not ready\n",
cmd->common.opcode);
+ req->error_loc =
+ offsetof(struct nvme_common_command, opcode);
return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
}
switch (cmd->common.opcode) {
+ case nvme_admin_set_features:
+ req->execute = nvmet_execute_disc_set_features;
+ req->data_len = 0;
+ return 0;
+ case nvme_admin_get_features:
+ req->execute = nvmet_execute_disc_get_features;
+ req->data_len = 0;
+ return 0;
+ case nvme_admin_async_event:
+ req->execute = nvmet_execute_async_event;
+ req->data_len = 0;
+ return 0;
+ case nvme_admin_keep_alive:
+ req->execute = nvmet_execute_keep_alive;
+ req->data_len = 0;
+ return 0;
case nvme_admin_get_log_page:
req->data_len = nvmet_get_log_page_len(cmd);
@@ -204,7 +352,9 @@
default:
pr_err("unsupported get_log_page lid %d\n",
cmd->get_log_page.lid);
- return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
+ req->error_loc =
+ offsetof(struct nvme_get_log_page_command, lid);
+ return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
}
case nvme_admin_identify:
req->data_len = NVME_IDENTIFY_DATA_SIZE;
@@ -216,24 +366,22 @@
default:
pr_err("unsupported identify cns %d\n",
cmd->identify.cns);
+ req->error_loc = offsetof(struct nvme_identify, cns);
return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
}
default:
- pr_err("unsupported cmd %d\n", cmd->common.opcode);
+ pr_err("unhandled cmd %d\n", cmd->common.opcode);
+ req->error_loc = offsetof(struct nvme_common_command, opcode);
return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
}
- pr_err("unhandled cmd %d\n", cmd->common.opcode);
- return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
}
int __init nvmet_init_discovery(void)
{
nvmet_disc_subsys =
nvmet_subsys_alloc(NVME_DISC_SUBSYS_NAME, NVME_NQN_DISC);
- if (!nvmet_disc_subsys)
- return -ENOMEM;
- return 0;
+ return PTR_ERR_OR_ZERO(nvmet_disc_subsys);
}
void nvmet_exit_discovery(void)
diff --git a/drivers/nvme/target/fabrics-cmd.c b/drivers/nvme/target/fabrics-cmd.c
index d84ae00..d16b55f 100644
--- a/drivers/nvme/target/fabrics-cmd.c
+++ b/drivers/nvme/target/fabrics-cmd.c
@@ -1,15 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* NVMe Fabrics command implementation.
* Copyright (c) 2015-2016 HGST, a Western Digital Company.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/blkdev.h>
@@ -17,23 +9,26 @@
static void nvmet_execute_prop_set(struct nvmet_req *req)
{
+ u64 val = le64_to_cpu(req->cmd->prop_set.value);
u16 status = 0;
- if (!(req->cmd->prop_set.attrib & 1)) {
- u64 val = le64_to_cpu(req->cmd->prop_set.value);
-
- switch (le32_to_cpu(req->cmd->prop_set.offset)) {
- case NVME_REG_CC:
- nvmet_update_cc(req->sq->ctrl, val);
- break;
- default:
- status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
- break;
- }
- } else {
+ if (req->cmd->prop_set.attrib & 1) {
+ req->error_loc =
+ offsetof(struct nvmf_property_set_command, attrib);
status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
+ goto out;
}
+ switch (le32_to_cpu(req->cmd->prop_set.offset)) {
+ case NVME_REG_CC:
+ nvmet_update_cc(req->sq->ctrl, val);
+ break;
+ default:
+ req->error_loc =
+ offsetof(struct nvmf_property_set_command, offset);
+ status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
+ }
+out:
nvmet_req_complete(req, status);
}
@@ -69,7 +64,15 @@
}
}
- req->rsp->result.u64 = cpu_to_le64(val);
+ if (status && req->cmd->prop_get.attrib & 1) {
+ req->error_loc =
+ offsetof(struct nvmf_property_get_command, offset);
+ } else {
+ req->error_loc =
+ offsetof(struct nvmf_property_get_command, attrib);
+ }
+
+ req->cqe->result.u64 = cpu_to_le64(val);
nvmet_req_complete(req, status);
}
@@ -89,6 +92,7 @@
default:
pr_err("received unknown capsule type 0x%x\n",
cmd->fabrics.fctype);
+ req->error_loc = offsetof(struct nvmf_common_command, fctype);
return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
}
@@ -105,16 +109,34 @@
old = cmpxchg(&req->sq->ctrl, NULL, ctrl);
if (old) {
pr_warn("queue already connected!\n");
+ req->error_loc = offsetof(struct nvmf_connect_command, opcode);
return NVME_SC_CONNECT_CTRL_BUSY | NVME_SC_DNR;
}
if (!sqsize) {
pr_warn("queue size zero!\n");
+ req->error_loc = offsetof(struct nvmf_connect_command, sqsize);
return NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR;
}
/* note: convert queue size from 0's-based value to 1's-based value */
nvmet_cq_setup(ctrl, req->cq, qid, sqsize + 1);
nvmet_sq_setup(ctrl, req->sq, qid, sqsize + 1);
+
+ if (c->cattr & NVME_CONNECT_DISABLE_SQFLOW) {
+ req->sq->sqhd_disabled = true;
+ req->cqe->sq_head = cpu_to_le16(0xffff);
+ }
+
+ if (ctrl->ops->install_queue) {
+ u16 ret = ctrl->ops->install_queue(req->sq);
+
+ if (ret) {
+ pr_err("failed to install queue %d cntlid %d ret %x\n",
+ qid, ret, ctrl->cntlid);
+ return ret;
+ }
+ }
+
return 0;
}
@@ -136,11 +158,12 @@
goto out;
/* zero out initial completion result, assign values as needed */
- req->rsp->result.u32 = 0;
+ req->cqe->result.u32 = 0;
if (c->recfmt != 0) {
pr_warn("invalid connect version (%d).\n",
le16_to_cpu(c->recfmt));
+ req->error_loc = offsetof(struct nvmf_connect_command, recfmt);
status = NVME_SC_CONNECT_FORMAT | NVME_SC_DNR;
goto out;
}
@@ -149,14 +172,19 @@
pr_warn("connect attempt for invalid controller ID %#x\n",
d->cntlid);
status = NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR;
- req->rsp->result.u32 = IPO_IATTR_CONNECT_DATA(cntlid);
+ req->cqe->result.u32 = IPO_IATTR_CONNECT_DATA(cntlid);
goto out;
}
status = nvmet_alloc_ctrl(d->subsysnqn, d->hostnqn, req,
le32_to_cpu(c->kato), &ctrl);
- if (status)
+ if (status) {
+ if (status == (NVME_SC_INVALID_FIELD | NVME_SC_DNR))
+ req->error_loc =
+ offsetof(struct nvme_common_command, opcode);
goto out;
+ }
+
uuid_copy(&ctrl->hostid, &d->hostid);
status = nvmet_install_queue(ctrl, req);
@@ -167,7 +195,7 @@
pr_info("creating controller %d for subsystem %s for NQN %s.\n",
ctrl->cntlid, ctrl->subsys->subsysnqn, ctrl->hostnqn);
- req->rsp->result.u16 = cpu_to_le16(ctrl->cntlid);
+ req->cqe->result.u16 = cpu_to_le16(ctrl->cntlid);
out:
kfree(d);
@@ -194,7 +222,7 @@
goto out;
/* zero out initial completion result, assign values as needed */
- req->rsp->result.u32 = 0;
+ req->cqe->result.u32 = 0;
if (c->recfmt != 0) {
pr_warn("invalid connect version (%d).\n",
@@ -212,14 +240,14 @@
if (unlikely(qid > ctrl->subsys->max_qid)) {
pr_warn("invalid queue id (%d)\n", qid);
status = NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR;
- req->rsp->result.u32 = IPO_IATTR_CONNECT_SQE(qid);
+ req->cqe->result.u32 = IPO_IATTR_CONNECT_SQE(qid);
goto out_ctrl_put;
}
status = nvmet_install_queue(ctrl, req);
if (status) {
/* pass back cntlid that had the issue of installing queue */
- req->rsp->result.u16 = cpu_to_le16(ctrl->cntlid);
+ req->cqe->result.u16 = cpu_to_le16(ctrl->cntlid);
goto out_ctrl_put;
}
@@ -240,14 +268,16 @@
{
struct nvme_command *cmd = req->cmd;
- if (cmd->common.opcode != nvme_fabrics_command) {
+ if (!nvme_is_fabrics(cmd)) {
pr_err("invalid command 0x%x on unconnected queue.\n",
cmd->fabrics.opcode);
+ req->error_loc = offsetof(struct nvme_common_command, opcode);
return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
}
if (cmd->fabrics.fctype != nvme_fabrics_type_connect) {
pr_err("invalid capsule type 0x%x on unconnected queue.\n",
cmd->fabrics.fctype);
+ req->error_loc = offsetof(struct nvmf_common_command, fctype);
return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
}
diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c
index 29b4b23..ce8d819 100644
--- a/drivers/nvme/target/fc.c
+++ b/drivers/nvme/target/fc.c
@@ -1,18 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (c) 2016 Avago Technologies. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful.
- * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND WARRANTIES,
- * INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY, FITNESS FOR A
- * PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE DISCLAIMED, EXCEPT TO
- * THE EXTENT THAT SUCH DISCLAIMERS ARE HELD TO BE LEGALLY INVALID.
- * See the GNU General Public License for more details, a copy of which
- * can be found in the file COPYING included with this package
- *
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/module.h>
@@ -86,8 +74,6 @@
spinlock_t flock;
struct nvmet_req req;
- struct work_struct work;
- struct work_struct done_work;
struct work_struct defer_work;
struct nvmet_fc_tgtport *tgtport;
@@ -110,11 +96,19 @@
struct list_head ls_busylist;
struct list_head assoc_list;
struct ida assoc_cnt;
- struct nvmet_port *port;
+ struct nvmet_fc_port_entry *pe;
struct kref ref;
u32 max_sg_cnt;
};
+struct nvmet_fc_port_entry {
+ struct nvmet_fc_tgtport *tgtport;
+ struct nvmet_port *port;
+ u64 node_name;
+ u64 port_name;
+ struct list_head pe_list;
+};
+
struct nvmet_fc_defer_fcp_req {
struct list_head req_list;
struct nvmefc_tgt_fcp_req *fcp_req;
@@ -126,22 +120,20 @@
u16 sqsize;
u16 ersp_ratio;
__le16 sqhd;
- int cpu;
atomic_t connected;
atomic_t sqtail;
atomic_t zrspcnt;
atomic_t rsn;
spinlock_t qlock;
- struct nvmet_port *port;
struct nvmet_cq nvme_cq;
struct nvmet_sq nvme_sq;
struct nvmet_fc_tgt_assoc *assoc;
- struct nvmet_fc_fcp_iod *fod; /* array of fcp_iods */
struct list_head fod_list;
struct list_head pending_cmd_list;
struct list_head avail_defer_list;
struct workqueue_struct *work_q;
struct kref ref;
+ struct nvmet_fc_fcp_iod fod[]; /* array of fcp_iods */
} __aligned(sizeof(unsigned long long));
struct nvmet_fc_tgt_assoc {
@@ -221,11 +213,10 @@
static LIST_HEAD(nvmet_fc_target_list);
static DEFINE_IDA(nvmet_fc_tgtport_cnt);
+static LIST_HEAD(nvmet_fc_portentry_list);
static void nvmet_fc_handle_ls_rqst_work(struct work_struct *work);
-static void nvmet_fc_handle_fcp_rqst_work(struct work_struct *work);
-static void nvmet_fc_fcp_rqst_op_done_work(struct work_struct *work);
static void nvmet_fc_fcp_rqst_op_defer_work(struct work_struct *work);
static void nvmet_fc_tgt_a_put(struct nvmet_fc_tgt_assoc *assoc);
static int nvmet_fc_tgt_a_get(struct nvmet_fc_tgt_assoc *assoc);
@@ -430,8 +421,6 @@
int i;
for (i = 0; i < queue->sqsize; fod++, i++) {
- INIT_WORK(&fod->work, nvmet_fc_handle_fcp_rqst_work);
- INIT_WORK(&fod->done_work, nvmet_fc_fcp_rqst_op_done_work);
INIT_WORK(&fod->defer_work, nvmet_fc_fcp_rqst_op_defer_work);
fod->tgtport = tgtport;
fod->queue = queue;
@@ -509,10 +498,7 @@
fcpreq->hwqid = queue->qid ?
((queue->qid - 1) % tgtport->ops->max_hw_queues) : 0;
- if (tgtport->ops->target_features & NVMET_FCTGTFEAT_CMD_IN_ISR)
- queue_work_on(queue->cpu, queue->work_q, &fod->work);
- else
- nvmet_fc_handle_fcp_rqst(tgtport, fod);
+ nvmet_fc_handle_fcp_rqst(tgtport, fod);
}
static void
@@ -591,30 +577,6 @@
queue_work(queue->work_q, &fod->defer_work);
}
-static int
-nvmet_fc_queue_to_cpu(struct nvmet_fc_tgtport *tgtport, int qid)
-{
- int cpu, idx, cnt;
-
- if (tgtport->ops->max_hw_queues == 1)
- return WORK_CPU_UNBOUND;
-
- /* Simple cpu selection based on qid modulo active cpu count */
- idx = !qid ? 0 : (qid - 1) % num_active_cpus();
-
- /* find the n'th active cpu */
- for (cpu = 0, cnt = 0; ; ) {
- if (cpu_active(cpu)) {
- if (cnt == idx)
- break;
- cnt++;
- }
- cpu = (cpu + 1) % num_possible_cpus();
- }
-
- return cpu;
-}
-
static struct nvmet_fc_tgt_queue *
nvmet_fc_alloc_target_queue(struct nvmet_fc_tgt_assoc *assoc,
u16 qid, u16 sqsize)
@@ -626,9 +588,7 @@
if (qid > NVMET_NR_QUEUES)
return NULL;
- queue = kzalloc((sizeof(*queue) +
- (sizeof(struct nvmet_fc_fcp_iod) * sqsize)),
- GFP_KERNEL);
+ queue = kzalloc(struct_size(queue, fod, sqsize), GFP_KERNEL);
if (!queue)
return NULL;
@@ -641,12 +601,9 @@
if (!queue->work_q)
goto out_a_put;
- queue->fod = (struct nvmet_fc_fcp_iod *)&queue[1];
queue->qid = qid;
queue->sqsize = sqsize;
queue->assoc = assoc;
- queue->port = assoc->tgtport->port;
- queue->cpu = nvmet_fc_queue_to_cpu(assoc->tgtport, qid);
INIT_LIST_HEAD(&queue->fod_list);
INIT_LIST_HEAD(&queue->avail_defer_list);
INIT_LIST_HEAD(&queue->pending_cmd_list);
@@ -957,6 +914,83 @@
return ret;
}
+static void
+nvmet_fc_portentry_bind(struct nvmet_fc_tgtport *tgtport,
+ struct nvmet_fc_port_entry *pe,
+ struct nvmet_port *port)
+{
+ lockdep_assert_held(&nvmet_fc_tgtlock);
+
+ pe->tgtport = tgtport;
+ tgtport->pe = pe;
+
+ pe->port = port;
+ port->priv = pe;
+
+ pe->node_name = tgtport->fc_target_port.node_name;
+ pe->port_name = tgtport->fc_target_port.port_name;
+ INIT_LIST_HEAD(&pe->pe_list);
+
+ list_add_tail(&pe->pe_list, &nvmet_fc_portentry_list);
+}
+
+static void
+nvmet_fc_portentry_unbind(struct nvmet_fc_port_entry *pe)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&nvmet_fc_tgtlock, flags);
+ if (pe->tgtport)
+ pe->tgtport->pe = NULL;
+ list_del(&pe->pe_list);
+ spin_unlock_irqrestore(&nvmet_fc_tgtlock, flags);
+}
+
+/*
+ * called when a targetport deregisters. Breaks the relationship
+ * with the nvmet port, but leaves the port_entry in place so that
+ * re-registration can resume operation.
+ */
+static void
+nvmet_fc_portentry_unbind_tgt(struct nvmet_fc_tgtport *tgtport)
+{
+ struct nvmet_fc_port_entry *pe;
+ unsigned long flags;
+
+ spin_lock_irqsave(&nvmet_fc_tgtlock, flags);
+ pe = tgtport->pe;
+ if (pe)
+ pe->tgtport = NULL;
+ tgtport->pe = NULL;
+ spin_unlock_irqrestore(&nvmet_fc_tgtlock, flags);
+}
+
+/*
+ * called when a new targetport is registered. Looks in the
+ * existing nvmet port_entries to see if the nvmet layer is
+ * configured for the targetport's wwn's. (the targetport existed,
+ * nvmet configured, the lldd unregistered the tgtport, and is now
+ * reregistering the same targetport). If so, set the nvmet port
+ * port entry on the targetport.
+ */
+static void
+nvmet_fc_portentry_rebind_tgt(struct nvmet_fc_tgtport *tgtport)
+{
+ struct nvmet_fc_port_entry *pe;
+ unsigned long flags;
+
+ spin_lock_irqsave(&nvmet_fc_tgtlock, flags);
+ list_for_each_entry(pe, &nvmet_fc_portentry_list, pe_list) {
+ if (tgtport->fc_target_port.node_name == pe->node_name &&
+ tgtport->fc_target_port.port_name == pe->port_name) {
+ WARN_ON(pe->tgtport);
+ tgtport->pe = pe;
+ pe->tgtport = tgtport;
+ break;
+ }
+ }
+ spin_unlock_irqrestore(&nvmet_fc_tgtlock, flags);
+}
/**
* nvme_fc_register_targetport - transport entry point called by an
@@ -1034,6 +1068,8 @@
goto out_free_newrec;
}
+ nvmet_fc_portentry_rebind_tgt(newrec);
+
spin_lock_irqsave(&nvmet_fc_tgtlock, flags);
list_add_tail(&newrec->tgt_list, &nvmet_fc_target_list);
spin_unlock_irqrestore(&nvmet_fc_tgtlock, flags);
@@ -1104,10 +1140,8 @@
&tgtport->assoc_list, a_list) {
if (!nvmet_fc_tgt_a_get(assoc))
continue;
- spin_unlock_irqrestore(&tgtport->lock, flags);
- nvmet_fc_delete_target_assoc(assoc);
- nvmet_fc_tgt_a_put(assoc);
- spin_lock_irqsave(&tgtport->lock, flags);
+ if (!schedule_work(&assoc->del_work))
+ nvmet_fc_tgt_a_put(assoc);
}
spin_unlock_irqrestore(&tgtport->lock, flags);
}
@@ -1146,7 +1180,8 @@
nvmet_fc_tgtport_put(tgtport);
if (found_ctrl) {
- schedule_work(&assoc->del_work);
+ if (!schedule_work(&assoc->del_work))
+ nvmet_fc_tgt_a_put(assoc);
return;
}
@@ -1159,8 +1194,8 @@
* nvme_fc_unregister_targetport - transport entry point called by an
* LLDD to deregister/remove a previously
* registered a local NVME subsystem FC port.
- * @tgtport: pointer to the (registered) target port that is to be
- * deregistered.
+ * @target_port: pointer to the (registered) target port that is to be
+ * deregistered.
*
* Returns:
* a completion status. Must be 0 upon success; a negative errno
@@ -1171,6 +1206,8 @@
{
struct nvmet_fc_tgtport *tgtport = targetport_to_tgtport(target_port);
+ nvmet_fc_portentry_unbind_tgt(tgtport);
+
/* terminate any outstanding associations */
__nvmet_fc_free_assocs(tgtport);
@@ -1462,10 +1499,8 @@
(struct fcnvme_ls_disconnect_rqst *)iod->rqstbuf;
struct fcnvme_ls_disconnect_acc *acc =
(struct fcnvme_ls_disconnect_acc *)iod->rspbuf;
- struct nvmet_fc_tgt_queue *queue = NULL;
struct nvmet_fc_tgt_assoc *assoc;
int ret = 0;
- bool del_assoc = false;
memset(acc, 0, sizeof(*acc));
@@ -1496,18 +1531,7 @@
assoc = nvmet_fc_find_target_assoc(tgtport,
be64_to_cpu(rqst->associd.association_id));
iod->assoc = assoc;
- if (assoc) {
- if (rqst->discon_cmd.scope ==
- FCNVME_DISCONN_CONNECTION) {
- queue = nvmet_fc_find_target_queue(tgtport,
- be64_to_cpu(
- rqst->discon_cmd.id));
- if (!queue) {
- nvmet_fc_tgt_a_put(assoc);
- ret = VERR_NO_CONN;
- }
- }
- } else
+ if (!assoc)
ret = VERR_NO_ASSOC;
}
@@ -1535,26 +1559,10 @@
sizeof(struct fcnvme_ls_disconnect_acc)),
FCNVME_LS_DISCONNECT);
-
- /* are we to delete a Connection ID (queue) */
- if (queue) {
- int qid = queue->qid;
-
- nvmet_fc_delete_target_queue(queue);
-
- /* release the get taken by find_target_queue */
- nvmet_fc_tgt_q_put(queue);
-
- /* tear association down if io queue terminated */
- if (!qid)
- del_assoc = true;
- }
-
/* release get taken in nvmet_fc_find_target_assoc */
nvmet_fc_tgt_a_put(iod->assoc);
- if (del_assoc)
- nvmet_fc_delete_target_assoc(iod->assoc);
+ nvmet_fc_delete_target_assoc(iod->assoc);
}
@@ -1661,7 +1669,7 @@
*
* If this routine returns error, the LLDD should abort the exchange.
*
- * @tgtport: pointer to the (registered) target port the LS was
+ * @target_port: pointer to the (registered) target port the LS was
* received on.
* @lsreq: pointer to a lsreq request structure to be used to reference
* the exchange corresponding to the LS.
@@ -1798,7 +1806,7 @@
*/
rspcnt = atomic_inc_return(&fod->queue->zrspcnt);
if (!(rspcnt % fod->queue->ersp_ratio) ||
- sqe->opcode == nvme_fabrics_command ||
+ nvme_is_fabrics((struct nvme_command *) sqe) ||
xfr_length != fod->req.transfer_len ||
(le16_to_cpu(cqe->status) & 0xFFFE) || cqewd[0] || cqewd[1] ||
(sqe->flags & (NVME_CMD_FUSE_FIRST | NVME_CMD_FUSE_SECOND)) ||
@@ -2058,25 +2066,11 @@
}
static void
-nvmet_fc_fcp_rqst_op_done_work(struct work_struct *work)
-{
- struct nvmet_fc_fcp_iod *fod =
- container_of(work, struct nvmet_fc_fcp_iod, done_work);
-
- nvmet_fc_fod_op_done(fod);
-}
-
-static void
nvmet_fc_xmt_fcp_op_done(struct nvmefc_tgt_fcp_req *fcpreq)
{
struct nvmet_fc_fcp_iod *fod = fcpreq->nvmet_fc_private;
- struct nvmet_fc_tgt_queue *queue = fod->queue;
- if (fod->tgtport->ops->target_features & NVMET_FCTGTFEAT_OPDONE_IN_ISR)
- /* context switch so completion is not in ISR context */
- queue_work_on(queue->cpu, queue->work_q, &fod->done_work);
- else
- nvmet_fc_fod_op_done(fod);
+ nvmet_fc_fod_op_done(fod);
}
/*
@@ -2147,7 +2141,7 @@
/*
- * Actual processing routine for received FC-NVME LS Requests from the LLD
+ * Actual processing routine for received FC-NVME I/O Requests from the LLD
*/
static void
nvmet_fc_handle_fcp_rqst(struct nvmet_fc_tgtport *tgtport,
@@ -2158,6 +2152,13 @@
int ret;
/*
+ * if there is no nvmet mapping to the targetport there
+ * shouldn't be requests. just terminate them.
+ */
+ if (!tgtport->pe)
+ goto transport_error;
+
+ /*
* Fused commands are currently not supported in the linux
* implementation.
*
@@ -2183,8 +2184,8 @@
}
fod->req.cmd = &fod->cmdiubuf.sqe;
- fod->req.rsp = &fod->rspiubuf.cqe;
- fod->req.port = fod->queue->port;
+ fod->req.cqe = &fod->rspiubuf.cqe;
+ fod->req.port = tgtport->pe->port;
/* clear any response payload */
memset(&fod->rspiubuf, 0, sizeof(fod->rspiubuf));
@@ -2237,19 +2238,6 @@
nvmet_fc_abort_op(tgtport, fod);
}
-/*
- * Actual processing routine for received FC-NVME LS Requests from the LLD
- */
-static void
-nvmet_fc_handle_fcp_rqst_work(struct work_struct *work)
-{
- struct nvmet_fc_fcp_iod *fod =
- container_of(work, struct nvmet_fc_fcp_iod, work);
- struct nvmet_fc_tgtport *tgtport = fod->tgtport;
-
- nvmet_fc_handle_fcp_rqst(tgtport, fod);
-}
-
/**
* nvmet_fc_rcv_fcp_req - transport entry point called by an LLDD
* upon the reception of a NVME FCP CMD IU.
@@ -2468,7 +2456,7 @@
substring_t wwn = { name, &name[sizeof(name)-1] };
int nnoffset, pnoffset;
- /* validate it string one of the 2 allowed formats */
+ /* validate if string is one of the 2 allowed formats */
if (strnlen(buf, blen) == NVME_FC_TRADDR_MAXLENGTH &&
!strncmp(buf, "nn-0x", NVME_FC_TRADDR_OXNNLEN) &&
!strncmp(&buf[NVME_FC_TRADDR_MAX_PN_OFFSET],
@@ -2508,6 +2496,7 @@
nvmet_fc_add_port(struct nvmet_port *port)
{
struct nvmet_fc_tgtport *tgtport;
+ struct nvmet_fc_port_entry *pe;
struct nvmet_fc_traddr traddr = { 0L, 0L };
unsigned long flags;
int ret;
@@ -2524,24 +2513,50 @@
if (ret)
return ret;
+ pe = kzalloc(sizeof(*pe), GFP_KERNEL);
+ if (!pe)
+ return -ENOMEM;
+
ret = -ENXIO;
spin_lock_irqsave(&nvmet_fc_tgtlock, flags);
list_for_each_entry(tgtport, &nvmet_fc_target_list, tgt_list) {
if ((tgtport->fc_target_port.node_name == traddr.nn) &&
(tgtport->fc_target_port.port_name == traddr.pn)) {
- tgtport->port = port;
- ret = 0;
+ /* a FC port can only be 1 nvmet port id */
+ if (!tgtport->pe) {
+ nvmet_fc_portentry_bind(tgtport, pe, port);
+ ret = 0;
+ } else
+ ret = -EALREADY;
break;
}
}
spin_unlock_irqrestore(&nvmet_fc_tgtlock, flags);
+
+ if (ret)
+ kfree(pe);
+
return ret;
}
static void
nvmet_fc_remove_port(struct nvmet_port *port)
{
- /* nothing to do */
+ struct nvmet_fc_port_entry *pe = port->priv;
+
+ nvmet_fc_portentry_unbind(pe);
+
+ kfree(pe);
+}
+
+static void
+nvmet_fc_discovery_chg(struct nvmet_port *port)
+{
+ struct nvmet_fc_port_entry *pe = port->priv;
+ struct nvmet_fc_tgtport *tgtport = pe->tgtport;
+
+ if (tgtport && tgtport->ops->discovery_event)
+ tgtport->ops->discovery_event(&tgtport->fc_target_port);
}
static const struct nvmet_fabrics_ops nvmet_fc_tgt_fcp_ops = {
@@ -2552,6 +2567,7 @@
.remove_port = nvmet_fc_remove_port,
.queue_response = nvmet_fc_fcp_nvme_cmd_done,
.delete_ctrl = nvmet_fc_delete_ctrl,
+ .discovery_chg = nvmet_fc_discovery_chg,
};
static int __init nvmet_fc_init_module(void)
diff --git a/drivers/nvme/target/fcloop.c b/drivers/nvme/target/fcloop.c
index 5251689..b50b53d 100644
--- a/drivers/nvme/target/fcloop.c
+++ b/drivers/nvme/target/fcloop.c
@@ -1,17 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (c) 2016 Avago Technologies. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful.
- * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND WARRANTIES,
- * INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY, FITNESS FOR A
- * PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE DISCLAIMED, EXCEPT TO
- * THE EXTENT THAT SUCH DISCLAIMERS ARE HELD TO BE LEGALLY INVALID.
- * See the GNU General Public License for more details, a copy of which
- * can be found in the file COPYING included with this package
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/module.h>
@@ -242,6 +231,11 @@
int status;
};
+struct fcloop_rscn {
+ struct fcloop_tport *tport;
+ struct work_struct work;
+};
+
enum {
INI_IO_START = 0,
INI_IO_ACTIVE = 1,
@@ -359,6 +353,37 @@
return 0;
}
+/*
+ * Simulate reception of RSCN and converting it to a initiator transport
+ * call to rescan a remote port.
+ */
+static void
+fcloop_tgt_rscn_work(struct work_struct *work)
+{
+ struct fcloop_rscn *tgt_rscn =
+ container_of(work, struct fcloop_rscn, work);
+ struct fcloop_tport *tport = tgt_rscn->tport;
+
+ if (tport->remoteport)
+ nvme_fc_rescan_remoteport(tport->remoteport);
+ kfree(tgt_rscn);
+}
+
+static void
+fcloop_tgt_discovery_evt(struct nvmet_fc_target_port *tgtport)
+{
+ struct fcloop_rscn *tgt_rscn;
+
+ tgt_rscn = kzalloc(sizeof(*tgt_rscn), GFP_KERNEL);
+ if (!tgt_rscn)
+ return;
+
+ tgt_rscn->tport = tgtport->private;
+ INIT_WORK(&tgt_rscn->work, fcloop_tgt_rscn_work);
+
+ schedule_work(&tgt_rscn->work);
+}
+
static void
fcloop_tfcp_req_free(struct kref *ref)
{
@@ -409,7 +434,7 @@
int ret = 0;
bool aborted = false;
- spin_lock(&tfcp_req->reqlock);
+ spin_lock_irq(&tfcp_req->reqlock);
switch (tfcp_req->inistate) {
case INI_IO_START:
tfcp_req->inistate = INI_IO_ACTIVE;
@@ -418,11 +443,11 @@
aborted = true;
break;
default:
- spin_unlock(&tfcp_req->reqlock);
+ spin_unlock_irq(&tfcp_req->reqlock);
WARN_ON(1);
return;
}
- spin_unlock(&tfcp_req->reqlock);
+ spin_unlock_irq(&tfcp_req->reqlock);
if (unlikely(aborted))
ret = -ECANCELED;
@@ -444,7 +469,7 @@
struct nvmefc_fcp_req *fcpreq;
bool completed = false;
- spin_lock(&tfcp_req->reqlock);
+ spin_lock_irq(&tfcp_req->reqlock);
fcpreq = tfcp_req->fcpreq;
switch (tfcp_req->inistate) {
case INI_IO_ABORTED:
@@ -453,11 +478,11 @@
completed = true;
break;
default:
- spin_unlock(&tfcp_req->reqlock);
+ spin_unlock_irq(&tfcp_req->reqlock);
WARN_ON(1);
return;
}
- spin_unlock(&tfcp_req->reqlock);
+ spin_unlock_irq(&tfcp_req->reqlock);
if (unlikely(completed)) {
/* remove reference taken in original abort downcall */
@@ -469,9 +494,9 @@
nvmet_fc_rcv_fcp_abort(tfcp_req->tport->targetport,
&tfcp_req->tgt_fcp_req);
- spin_lock(&tfcp_req->reqlock);
+ spin_lock_irq(&tfcp_req->reqlock);
tfcp_req->fcpreq = NULL;
- spin_unlock(&tfcp_req->reqlock);
+ spin_unlock_irq(&tfcp_req->reqlock);
fcloop_call_host_done(fcpreq, tfcp_req, -ECANCELED);
/* call_host_done releases reference for abort downcall */
@@ -488,10 +513,10 @@
container_of(work, struct fcloop_fcpreq, tio_done_work);
struct nvmefc_fcp_req *fcpreq;
- spin_lock(&tfcp_req->reqlock);
+ spin_lock_irq(&tfcp_req->reqlock);
fcpreq = tfcp_req->fcpreq;
tfcp_req->inistate = INI_IO_COMPLETED;
- spin_unlock(&tfcp_req->reqlock);
+ spin_unlock_irq(&tfcp_req->reqlock);
fcloop_call_host_done(fcpreq, tfcp_req, tfcp_req->status);
}
@@ -510,7 +535,7 @@
if (!rport->targetport)
return -ECONNREFUSED;
- tfcp_req = kzalloc(sizeof(*tfcp_req), GFP_KERNEL);
+ tfcp_req = kzalloc(sizeof(*tfcp_req), GFP_ATOMIC);
if (!tfcp_req)
return -ENOMEM;
@@ -596,12 +621,12 @@
int fcp_err = 0, active, aborted;
u8 op = tgt_fcpreq->op;
- spin_lock(&tfcp_req->reqlock);
+ spin_lock_irq(&tfcp_req->reqlock);
fcpreq = tfcp_req->fcpreq;
active = tfcp_req->active;
aborted = tfcp_req->aborted;
tfcp_req->active = true;
- spin_unlock(&tfcp_req->reqlock);
+ spin_unlock_irq(&tfcp_req->reqlock);
if (unlikely(active))
/* illegal - call while i/o active */
@@ -609,9 +634,9 @@
if (unlikely(aborted)) {
/* target transport has aborted i/o prior */
- spin_lock(&tfcp_req->reqlock);
+ spin_lock_irq(&tfcp_req->reqlock);
tfcp_req->active = false;
- spin_unlock(&tfcp_req->reqlock);
+ spin_unlock_irq(&tfcp_req->reqlock);
tgt_fcpreq->transferred_length = 0;
tgt_fcpreq->fcp_error = -ECANCELED;
tgt_fcpreq->done(tgt_fcpreq);
@@ -648,6 +673,7 @@
break;
/* Fall-Thru to RSP handling */
+ /* FALLTHRU */
case NVMET_FCOP_RSP:
if (fcpreq) {
@@ -667,9 +693,9 @@
break;
}
- spin_lock(&tfcp_req->reqlock);
+ spin_lock_irq(&tfcp_req->reqlock);
tfcp_req->active = false;
- spin_unlock(&tfcp_req->reqlock);
+ spin_unlock_irq(&tfcp_req->reqlock);
tgt_fcpreq->transferred_length = xfrlen;
tgt_fcpreq->fcp_error = fcp_err;
@@ -689,9 +715,9 @@
* (one doing io, other doing abort) and only kills ops posted
* after the abort request
*/
- spin_lock(&tfcp_req->reqlock);
+ spin_lock_irq(&tfcp_req->reqlock);
tfcp_req->aborted = true;
- spin_unlock(&tfcp_req->reqlock);
+ spin_unlock_irq(&tfcp_req->reqlock);
tfcp_req->status = NVME_SC_INTERNAL;
@@ -739,7 +765,7 @@
return;
/* break initiator/target relationship for io */
- spin_lock(&tfcp_req->reqlock);
+ spin_lock_irq(&tfcp_req->reqlock);
switch (tfcp_req->inistate) {
case INI_IO_START:
case INI_IO_ACTIVE:
@@ -749,11 +775,11 @@
abortio = false;
break;
default:
- spin_unlock(&tfcp_req->reqlock);
+ spin_unlock_irq(&tfcp_req->reqlock);
WARN_ON(1);
return;
}
- spin_unlock(&tfcp_req->reqlock);
+ spin_unlock_irq(&tfcp_req->reqlock);
if (abortio)
/* leave the reference while the work item is scheduled */
@@ -849,6 +875,7 @@
.fcp_op = fcloop_fcp_op,
.fcp_abort = fcloop_tgt_fcp_abort,
.fcp_req_release = fcloop_fcp_req_release,
+ .discovery_event = fcloop_tgt_discovery_evt,
.max_hw_queues = FCLOOP_HW_QUEUES,
.max_sgl_segments = FCLOOP_SGL_SEGS,
.max_dif_sgl_segments = FCLOOP_SGL_SEGS,
diff --git a/drivers/nvme/target/io-cmd-bdev.c b/drivers/nvme/target/io-cmd-bdev.c
index 7bc9f62..32008d8 100644
--- a/drivers/nvme/target/io-cmd-bdev.c
+++ b/drivers/nvme/target/io-cmd-bdev.c
@@ -1,21 +1,52 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* NVMe I/O command implementation.
* Copyright (c) 2015-2016 HGST, a Western Digital Company.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/blkdev.h>
#include <linux/module.h>
#include "nvmet.h"
+void nvmet_bdev_set_limits(struct block_device *bdev, struct nvme_id_ns *id)
+{
+ const struct queue_limits *ql = &bdev_get_queue(bdev)->limits;
+ /* Number of logical blocks per physical block. */
+ const u32 lpp = ql->physical_block_size / ql->logical_block_size;
+ /* Logical blocks per physical block, 0's based. */
+ const __le16 lpp0b = to0based(lpp);
+
+ /*
+ * For NVMe 1.2 and later, bit 1 indicates that the fields NAWUN,
+ * NAWUPF, and NACWU are defined for this namespace and should be
+ * used by the host for this namespace instead of the AWUN, AWUPF,
+ * and ACWU fields in the Identify Controller data structure. If
+ * any of these fields are zero that means that the corresponding
+ * field from the identify controller data structure should be used.
+ */
+ id->nsfeat |= 1 << 1;
+ id->nawun = lpp0b;
+ id->nawupf = lpp0b;
+ id->nacwu = lpp0b;
+
+ /*
+ * Bit 4 indicates that the fields NPWG, NPWA, NPDG, NPDA, and
+ * NOWS are defined for this namespace and should be used by
+ * the host for I/O optimization.
+ */
+ id->nsfeat |= 1 << 4;
+ /* NPWG = Namespace Preferred Write Granularity. 0's based */
+ id->npwg = lpp0b;
+ /* NPWA = Namespace Preferred Write Alignment. 0's based */
+ id->npwa = id->npwg;
+ /* NPDG = Namespace Preferred Deallocate Granularity. 0's based */
+ id->npdg = to0based(ql->discard_granularity / ql->logical_block_size);
+ /* NPDG = Namespace Preferred Deallocate Alignment */
+ id->npda = id->npdg;
+ /* NOWS = Namespace Optimal Write Size */
+ id->nows = to0based(ql->io_opt / ql->logical_block_size);
+}
+
int nvmet_bdev_ns_enable(struct nvmet_ns *ns)
{
int ret;
@@ -44,13 +75,69 @@
}
}
+static u16 blk_to_nvme_status(struct nvmet_req *req, blk_status_t blk_sts)
+{
+ u16 status = NVME_SC_SUCCESS;
+
+ if (likely(blk_sts == BLK_STS_OK))
+ return status;
+ /*
+ * Right now there exists M : 1 mapping between block layer error
+ * to the NVMe status code (see nvme_error_status()). For consistency,
+ * when we reverse map we use most appropriate NVMe Status code from
+ * the group of the NVMe staus codes used in the nvme_error_status().
+ */
+ switch (blk_sts) {
+ case BLK_STS_NOSPC:
+ status = NVME_SC_CAP_EXCEEDED | NVME_SC_DNR;
+ req->error_loc = offsetof(struct nvme_rw_command, length);
+ break;
+ case BLK_STS_TARGET:
+ status = NVME_SC_LBA_RANGE | NVME_SC_DNR;
+ req->error_loc = offsetof(struct nvme_rw_command, slba);
+ break;
+ case BLK_STS_NOTSUPP:
+ req->error_loc = offsetof(struct nvme_common_command, opcode);
+ switch (req->cmd->common.opcode) {
+ case nvme_cmd_dsm:
+ case nvme_cmd_write_zeroes:
+ status = NVME_SC_ONCS_NOT_SUPPORTED | NVME_SC_DNR;
+ break;
+ default:
+ status = NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
+ }
+ break;
+ case BLK_STS_MEDIUM:
+ status = NVME_SC_ACCESS_DENIED;
+ req->error_loc = offsetof(struct nvme_rw_command, nsid);
+ break;
+ case BLK_STS_IOERR:
+ /* fallthru */
+ default:
+ status = NVME_SC_INTERNAL | NVME_SC_DNR;
+ req->error_loc = offsetof(struct nvme_common_command, opcode);
+ }
+
+ switch (req->cmd->common.opcode) {
+ case nvme_cmd_read:
+ case nvme_cmd_write:
+ req->error_slba = le64_to_cpu(req->cmd->rw.slba);
+ break;
+ case nvme_cmd_write_zeroes:
+ req->error_slba =
+ le64_to_cpu(req->cmd->write_zeroes.slba);
+ break;
+ default:
+ req->error_slba = 0;
+ }
+ return status;
+}
+
static void nvmet_bio_done(struct bio *bio)
{
struct nvmet_req *req = bio->bi_private;
- nvmet_req_complete(req,
- bio->bi_status ? NVME_SC_INTERNAL | NVME_SC_DNR : 0);
-
+ nvmet_req_complete(req, blk_to_nvme_status(req, bio->bi_status));
if (bio != &req->b.inline_bio)
bio_put(bio);
}
@@ -58,10 +145,9 @@
static void nvmet_bdev_execute_rw(struct nvmet_req *req)
{
int sg_cnt = req->sg_cnt;
- struct bio *bio = &req->b.inline_bio;
+ struct bio *bio;
struct scatterlist *sg;
sector_t sector;
- blk_qc_t cookie;
int op, op_flags = 0, i;
if (!req->sg_cnt) {
@@ -78,10 +164,18 @@
op = REQ_OP_READ;
}
+ if (is_pci_p2pdma_page(sg_page(req->sg)))
+ op_flags |= REQ_NOMERGE;
+
sector = le64_to_cpu(req->cmd->rw.slba);
sector <<= (req->ns->blksize_shift - 9);
- bio_init(bio, req->inline_bvec, ARRAY_SIZE(req->inline_bvec));
+ if (req->data_len <= NVMET_MAX_INLINE_DATA_LEN) {
+ bio = &req->b.inline_bio;
+ bio_init(bio, req->inline_bvec, ARRAY_SIZE(req->inline_bvec));
+ } else {
+ bio = bio_alloc(GFP_KERNEL, min(sg_cnt, BIO_MAX_PAGES));
+ }
bio_set_dev(bio, req->ns->bdev);
bio->bi_iter.bi_sector = sector;
bio->bi_private = req;
@@ -106,9 +200,7 @@
sg_cnt--;
}
- cookie = submit_bio(bio);
-
- blk_poll(bdev_get_queue(req->ns->bdev), cookie);
+ submit_bio(bio);
}
static void nvmet_bdev_execute_flush(struct nvmet_req *req)
@@ -131,18 +223,21 @@
return 0;
}
-static u16 nvmet_bdev_discard_range(struct nvmet_ns *ns,
+static u16 nvmet_bdev_discard_range(struct nvmet_req *req,
struct nvme_dsm_range *range, struct bio **bio)
{
+ struct nvmet_ns *ns = req->ns;
int ret;
ret = __blkdev_issue_discard(ns->bdev,
le64_to_cpu(range->slba) << (ns->blksize_shift - 9),
le32_to_cpu(range->nlb) << (ns->blksize_shift - 9),
GFP_KERNEL, 0, bio);
- if (ret && ret != -EOPNOTSUPP)
- return NVME_SC_INTERNAL | NVME_SC_DNR;
- return 0;
+ if (ret && ret != -EOPNOTSUPP) {
+ req->error_slba = le64_to_cpu(range->slba);
+ return errno_to_nvme_status(req, ret);
+ }
+ return NVME_SC_SUCCESS;
}
static void nvmet_bdev_execute_discard(struct nvmet_req *req)
@@ -158,7 +253,7 @@
if (status)
break;
- status = nvmet_bdev_discard_range(req->ns, &range, &bio);
+ status = nvmet_bdev_discard_range(req, &range, &bio);
if (status)
break;
}
@@ -196,25 +291,23 @@
{
struct nvme_write_zeroes_cmd *write_zeroes = &req->cmd->write_zeroes;
struct bio *bio = NULL;
- u16 status = NVME_SC_SUCCESS;
sector_t sector;
sector_t nr_sector;
+ int ret;
sector = le64_to_cpu(write_zeroes->slba) <<
(req->ns->blksize_shift - 9);
nr_sector = (((sector_t)le16_to_cpu(write_zeroes->length) + 1) <<
(req->ns->blksize_shift - 9));
- if (__blkdev_issue_zeroout(req->ns->bdev, sector, nr_sector,
- GFP_KERNEL, &bio, 0))
- status = NVME_SC_INTERNAL | NVME_SC_DNR;
-
+ ret = __blkdev_issue_zeroout(req->ns->bdev, sector, nr_sector,
+ GFP_KERNEL, &bio, 0);
if (bio) {
bio->bi_private = req;
bio->bi_end_io = nvmet_bio_done;
submit_bio(bio);
} else {
- nvmet_req_complete(req, status);
+ nvmet_req_complete(req, errno_to_nvme_status(req, ret));
}
}
@@ -239,10 +332,12 @@
return 0;
case nvme_cmd_write_zeroes:
req->execute = nvmet_bdev_execute_write_zeroes;
+ req->data_len = 0;
return 0;
default:
pr_err("unhandled cmd %d on qid %d\n", cmd->common.opcode,
req->sq->qid);
+ req->error_loc = offsetof(struct nvme_common_command, opcode);
return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
}
}
diff --git a/drivers/nvme/target/io-cmd-file.c b/drivers/nvme/target/io-cmd-file.c
index 81a9dc5..05453f5 100644
--- a/drivers/nvme/target/io-cmd-file.c
+++ b/drivers/nvme/target/io-cmd-file.c
@@ -49,7 +49,12 @@
goto err;
ns->size = stat.size;
- ns->blksize_shift = file_inode(ns->file)->i_blkbits;
+ /*
+ * i_blkbits can be greater than the universally accepted upper bound,
+ * so make sure we export a sane namespace lba_shift.
+ */
+ ns->blksize_shift = min_t(u8,
+ file_inode(ns->file)->i_blkbits, 12);
ns->bvec_cache = kmem_cache_create("nvmet-bvec",
NVMET_MAX_MPOOL_BVEC * sizeof(struct bio_vec),
@@ -75,25 +80,24 @@
return ret;
}
-static void nvmet_file_init_bvec(struct bio_vec *bv, struct sg_page_iter *iter)
+static void nvmet_file_init_bvec(struct bio_vec *bv, struct scatterlist *sg)
{
- bv->bv_page = sg_page_iter_page(iter);
- bv->bv_offset = iter->sg->offset;
- bv->bv_len = PAGE_SIZE - iter->sg->offset;
+ bv->bv_page = sg_page(sg);
+ bv->bv_offset = sg->offset;
+ bv->bv_len = sg->length;
}
static ssize_t nvmet_file_submit_bvec(struct nvmet_req *req, loff_t pos,
- unsigned long nr_segs, size_t count)
+ unsigned long nr_segs, size_t count, int ki_flags)
{
struct kiocb *iocb = &req->f.iocb;
ssize_t (*call_iter)(struct kiocb *iocb, struct iov_iter *iter);
struct iov_iter iter;
- int ki_flags = 0, rw;
- ssize_t ret;
+ int rw;
if (req->cmd->rw.opcode == nvme_cmd_write) {
if (req->cmd->rw.control & cpu_to_le16(NVME_RW_FUA))
- ki_flags = IOCB_DSYNC;
+ ki_flags |= IOCB_DSYNC;
call_iter = req->ns->file->f_op->write_iter;
rw = WRITE;
} else {
@@ -101,23 +105,19 @@
rw = READ;
}
- iov_iter_bvec(&iter, ITER_BVEC | rw, req->f.bvec, nr_segs, count);
+ iov_iter_bvec(&iter, rw, req->f.bvec, nr_segs, count);
iocb->ki_pos = pos;
iocb->ki_filp = req->ns->file;
iocb->ki_flags = ki_flags | iocb_flags(req->ns->file);
- ret = call_iter(iocb, &iter);
-
- if (ret != -EIOCBQUEUED && iocb->ki_complete)
- iocb->ki_complete(iocb, ret, 0);
-
- return ret;
+ return call_iter(iocb, &iter);
}
static void nvmet_file_io_done(struct kiocb *iocb, long ret, long ret2)
{
struct nvmet_req *req = container_of(iocb, struct nvmet_req, f.iocb);
+ u16 status = NVME_SC_SUCCESS;
if (req->f.bvec != req->inline_bvec) {
if (likely(req->f.mpool_alloc == false))
@@ -126,28 +126,114 @@
mempool_free(req->f.bvec, req->ns->bvec_pool);
}
- nvmet_req_complete(req, ret != req->data_len ?
- NVME_SC_INTERNAL | NVME_SC_DNR : 0);
+ if (unlikely(ret != req->data_len))
+ status = errno_to_nvme_status(req, ret);
+ nvmet_req_complete(req, status);
}
-static void nvmet_file_execute_rw(struct nvmet_req *req)
+static bool nvmet_file_execute_io(struct nvmet_req *req, int ki_flags)
{
- ssize_t nr_bvec = DIV_ROUND_UP(req->data_len, PAGE_SIZE);
- struct sg_page_iter sg_pg_iter;
+ ssize_t nr_bvec = req->sg_cnt;
unsigned long bv_cnt = 0;
bool is_sync = false;
size_t len = 0, total_len = 0;
ssize_t ret = 0;
loff_t pos;
+ int i;
+ struct scatterlist *sg;
- if (!req->sg_cnt || !nr_bvec) {
- nvmet_req_complete(req, 0);
- return;
- }
+ if (req->f.mpool_alloc && nr_bvec > NVMET_MAX_MPOOL_BVEC)
+ is_sync = true;
pos = le64_to_cpu(req->cmd->rw.slba) << req->ns->blksize_shift;
if (unlikely(pos + req->data_len > req->ns->size)) {
- nvmet_req_complete(req, NVME_SC_LBA_RANGE | NVME_SC_DNR);
+ nvmet_req_complete(req, errno_to_nvme_status(req, -ENOSPC));
+ return true;
+ }
+
+ memset(&req->f.iocb, 0, sizeof(struct kiocb));
+ for_each_sg(req->sg, sg, req->sg_cnt, i) {
+ nvmet_file_init_bvec(&req->f.bvec[bv_cnt], sg);
+ len += req->f.bvec[bv_cnt].bv_len;
+ total_len += req->f.bvec[bv_cnt].bv_len;
+ bv_cnt++;
+
+ WARN_ON_ONCE((nr_bvec - 1) < 0);
+
+ if (unlikely(is_sync) &&
+ (nr_bvec - 1 == 0 || bv_cnt == NVMET_MAX_MPOOL_BVEC)) {
+ ret = nvmet_file_submit_bvec(req, pos, bv_cnt, len, 0);
+ if (ret < 0)
+ goto complete;
+
+ pos += len;
+ bv_cnt = 0;
+ len = 0;
+ }
+ nr_bvec--;
+ }
+
+ if (WARN_ON_ONCE(total_len != req->data_len)) {
+ ret = -EIO;
+ goto complete;
+ }
+
+ if (unlikely(is_sync)) {
+ ret = total_len;
+ goto complete;
+ }
+
+ /*
+ * A NULL ki_complete ask for synchronous execution, which we want
+ * for the IOCB_NOWAIT case.
+ */
+ if (!(ki_flags & IOCB_NOWAIT))
+ req->f.iocb.ki_complete = nvmet_file_io_done;
+
+ ret = nvmet_file_submit_bvec(req, pos, bv_cnt, total_len, ki_flags);
+
+ switch (ret) {
+ case -EIOCBQUEUED:
+ return true;
+ case -EAGAIN:
+ if (WARN_ON_ONCE(!(ki_flags & IOCB_NOWAIT)))
+ goto complete;
+ return false;
+ case -EOPNOTSUPP:
+ /*
+ * For file systems returning error -EOPNOTSUPP, handle
+ * IOCB_NOWAIT error case separately and retry without
+ * IOCB_NOWAIT.
+ */
+ if ((ki_flags & IOCB_NOWAIT))
+ return false;
+ break;
+ }
+
+complete:
+ nvmet_file_io_done(&req->f.iocb, ret, 0);
+ return true;
+}
+
+static void nvmet_file_buffered_io_work(struct work_struct *w)
+{
+ struct nvmet_req *req = container_of(w, struct nvmet_req, f.work);
+
+ nvmet_file_execute_io(req, 0);
+}
+
+static void nvmet_file_submit_buffered_io(struct nvmet_req *req)
+{
+ INIT_WORK(&req->f.work, nvmet_file_buffered_io_work);
+ queue_work(buffered_io_wq, &req->f.work);
+}
+
+static void nvmet_file_execute_rw(struct nvmet_req *req)
+{
+ ssize_t nr_bvec = req->sg_cnt;
+
+ if (!req->sg_cnt || !nr_bvec) {
+ nvmet_req_complete(req, 0);
return;
}
@@ -157,65 +243,25 @@
else
req->f.bvec = req->inline_bvec;
- req->f.mpool_alloc = false;
if (unlikely(!req->f.bvec)) {
/* fallback under memory pressure */
req->f.bvec = mempool_alloc(req->ns->bvec_pool, GFP_KERNEL);
req->f.mpool_alloc = true;
- if (nr_bvec > NVMET_MAX_MPOOL_BVEC)
- is_sync = true;
- }
+ } else
+ req->f.mpool_alloc = false;
- memset(&req->f.iocb, 0, sizeof(struct kiocb));
- for_each_sg_page(req->sg, &sg_pg_iter, req->sg_cnt, 0) {
- nvmet_file_init_bvec(&req->f.bvec[bv_cnt], &sg_pg_iter);
- len += req->f.bvec[bv_cnt].bv_len;
- total_len += req->f.bvec[bv_cnt].bv_len;
- bv_cnt++;
-
- WARN_ON_ONCE((nr_bvec - 1) < 0);
-
- if (unlikely(is_sync) &&
- (nr_bvec - 1 == 0 || bv_cnt == NVMET_MAX_MPOOL_BVEC)) {
- ret = nvmet_file_submit_bvec(req, pos, bv_cnt, len);
- if (ret < 0)
- goto out;
- pos += len;
- bv_cnt = 0;
- len = 0;
- }
- nr_bvec--;
- }
-
- if (WARN_ON_ONCE(total_len != req->data_len))
- ret = -EIO;
-out:
- if (unlikely(is_sync || ret)) {
- nvmet_file_io_done(&req->f.iocb, ret < 0 ? ret : total_len, 0);
- return;
- }
- req->f.iocb.ki_complete = nvmet_file_io_done;
- nvmet_file_submit_bvec(req, pos, bv_cnt, total_len);
-}
-
-static void nvmet_file_buffered_io_work(struct work_struct *w)
-{
- struct nvmet_req *req = container_of(w, struct nvmet_req, f.work);
-
- nvmet_file_execute_rw(req);
-}
-
-static void nvmet_file_execute_rw_buffered_io(struct nvmet_req *req)
-{
- INIT_WORK(&req->f.work, nvmet_file_buffered_io_work);
- queue_work(buffered_io_wq, &req->f.work);
+ if (req->ns->buffered_io) {
+ if (likely(!req->f.mpool_alloc) &&
+ nvmet_file_execute_io(req, IOCB_NOWAIT))
+ return;
+ nvmet_file_submit_buffered_io(req);
+ } else
+ nvmet_file_execute_io(req, 0);
}
u16 nvmet_file_flush(struct nvmet_req *req)
{
- if (vfs_fsync(req->ns->file, 1) < 0)
- return NVME_SC_INTERNAL | NVME_SC_DNR;
- return 0;
+ return errno_to_nvme_status(req, vfs_fsync(req->ns->file, 1));
}
static void nvmet_file_flush_work(struct work_struct *w)
@@ -236,29 +282,34 @@
int mode = FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE;
struct nvme_dsm_range range;
loff_t offset, len;
- u16 ret;
+ u16 status = 0;
+ int ret;
int i;
for (i = 0; i <= le32_to_cpu(req->cmd->dsm.nr); i++) {
- ret = nvmet_copy_from_sgl(req, i * sizeof(range), &range,
+ status = nvmet_copy_from_sgl(req, i * sizeof(range), &range,
sizeof(range));
- if (ret)
+ if (status)
break;
offset = le64_to_cpu(range.slba) << req->ns->blksize_shift;
- len = le32_to_cpu(range.nlb) << req->ns->blksize_shift;
+ len = le32_to_cpu(range.nlb);
+ len <<= req->ns->blksize_shift;
if (offset + len > req->ns->size) {
- ret = NVME_SC_LBA_RANGE | NVME_SC_DNR;
+ req->error_slba = le64_to_cpu(range.slba);
+ status = errno_to_nvme_status(req, -ENOSPC);
break;
}
- if (vfs_fallocate(req->ns->file, mode, offset, len)) {
- ret = NVME_SC_INTERNAL | NVME_SC_DNR;
+ ret = vfs_fallocate(req->ns->file, mode, offset, len);
+ if (ret && ret != -EOPNOTSUPP) {
+ req->error_slba = le64_to_cpu(range.slba);
+ status = errno_to_nvme_status(req, ret);
break;
}
}
- nvmet_req_complete(req, ret);
+ nvmet_req_complete(req, status);
}
static void nvmet_file_dsm_work(struct work_struct *w)
@@ -298,12 +349,12 @@
req->ns->blksize_shift);
if (unlikely(offset + len > req->ns->size)) {
- nvmet_req_complete(req, NVME_SC_LBA_RANGE | NVME_SC_DNR);
+ nvmet_req_complete(req, errno_to_nvme_status(req, -ENOSPC));
return;
}
ret = vfs_fallocate(req->ns->file, mode, offset, len);
- nvmet_req_complete(req, ret < 0 ? NVME_SC_INTERNAL | NVME_SC_DNR : 0);
+ nvmet_req_complete(req, ret < 0 ? errno_to_nvme_status(req, ret) : 0);
}
static void nvmet_file_execute_write_zeroes(struct nvmet_req *req)
@@ -319,10 +370,7 @@
switch (cmd->common.opcode) {
case nvme_cmd_read:
case nvme_cmd_write:
- if (req->ns->buffered_io)
- req->execute = nvmet_file_execute_rw_buffered_io;
- else
- req->execute = nvmet_file_execute_rw;
+ req->execute = nvmet_file_execute_rw;
req->data_len = nvmet_rw_len(req);
return 0;
case nvme_cmd_flush:
@@ -341,6 +389,7 @@
default:
pr_err("unhandled cmd for file ns %d on qid %d\n",
cmd->common.opcode, req->sq->qid);
+ req->error_loc = offsetof(struct nvme_common_command, opcode);
return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
}
}
diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c
index 9908082..11f5aea 100644
--- a/drivers/nvme/target/loop.c
+++ b/drivers/nvme/target/loop.c
@@ -1,15 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* NVMe over Fabrics loopback device.
* Copyright (c) 2015-2016 HGST, a Western Digital Company.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/scatterlist.h>
@@ -26,7 +18,7 @@
struct nvme_loop_iod {
struct nvme_request nvme_req;
struct nvme_command cmd;
- struct nvme_completion rsp;
+ struct nvme_completion cqe;
struct nvmet_req req;
struct nvme_loop_queue *queue;
struct work_struct work;
@@ -85,7 +77,7 @@
struct nvme_loop_iod *iod = blk_mq_rq_to_pdu(req);
nvme_cleanup_cmd(req);
- sg_free_table_chained(&iod->sg_table, true);
+ sg_free_table_chained(&iod->sg_table, SG_CHUNK_SIZE);
nvme_complete_rq(req);
}
@@ -102,7 +94,7 @@
{
struct nvme_loop_queue *queue =
container_of(req->sq, struct nvme_loop_queue, nvme_sq);
- struct nvme_completion *cqe = req->rsp;
+ struct nvme_completion *cqe = req->cqe;
/*
* AEN requests are special as they don't time out and can
@@ -137,20 +129,6 @@
nvmet_req_execute(&iod->req);
}
-static enum blk_eh_timer_return
-nvme_loop_timeout(struct request *rq, bool reserved)
-{
- struct nvme_loop_iod *iod = blk_mq_rq_to_pdu(rq);
-
- /* queue error recovery */
- nvme_reset_ctrl(&iod->queue->ctrl->ctrl);
-
- /* fail with DNR on admin cmd timeout */
- nvme_req(rq)->status = NVME_SC_ABORT_REQ | NVME_SC_DNR;
-
- return BLK_EH_DONE;
-}
-
static blk_status_t nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx,
const struct blk_mq_queue_data *bd)
{
@@ -179,8 +157,10 @@
iod->sg_table.sgl = iod->first_sgl;
if (sg_alloc_table_chained(&iod->sg_table,
blk_rq_nr_phys_segments(req),
- iod->sg_table.sgl))
+ iod->sg_table.sgl, SG_CHUNK_SIZE)) {
+ nvme_cleanup_cmd(req);
return BLK_STS_RESOURCE;
+ }
iod->req.sg = iod->sg_table.sgl;
iod->req.sg_cnt = blk_rq_map_sg(req->q, req, iod->sg_table.sgl);
@@ -215,7 +195,7 @@
struct nvme_loop_iod *iod, unsigned int queue_idx)
{
iod->req.cmd = &iod->cmd;
- iod->req.rsp = &iod->rsp;
+ iod->req.cqe = &iod->cqe;
iod->queue = &ctrl->queues[queue_idx];
INIT_WORK(&iod->work, nvme_loop_execute_work);
return 0;
@@ -261,7 +241,6 @@
.complete = nvme_loop_complete_rq,
.init_request = nvme_loop_init_request,
.init_hctx = nvme_loop_init_hctx,
- .timeout = nvme_loop_timeout,
};
static const struct blk_mq_ops nvme_loop_admin_mq_ops = {
@@ -269,7 +248,6 @@
.complete = nvme_loop_complete_rq,
.init_request = nvme_loop_init_request,
.init_hctx = nvme_loop_init_admin_hctx,
- .timeout = nvme_loop_timeout,
};
static void nvme_loop_destroy_admin_queue(struct nvme_loop_ctrl *ctrl)
@@ -277,6 +255,7 @@
clear_bit(NVME_LOOP_Q_LIVE, &ctrl->queues[0].flags);
nvmet_sq_destroy(&ctrl->queues[0].nvme_sq);
blk_cleanup_queue(ctrl->ctrl.admin_q);
+ blk_cleanup_queue(ctrl->ctrl.fabrics_q);
blk_mq_free_tag_set(&ctrl->admin_tag_set);
}
@@ -345,7 +324,7 @@
int i, ret;
for (i = 1; i < ctrl->ctrl.queue_count; i++) {
- ret = nvmf_connect_io_queue(&ctrl->ctrl, i);
+ ret = nvmf_connect_io_queue(&ctrl->ctrl, i, false);
if (ret)
return ret;
set_bit(NVME_LOOP_Q_LIVE, &ctrl->queues[i].flags);
@@ -381,10 +360,16 @@
goto out_free_sq;
ctrl->ctrl.admin_tagset = &ctrl->admin_tag_set;
+ ctrl->ctrl.fabrics_q = blk_mq_init_queue(&ctrl->admin_tag_set);
+ if (IS_ERR(ctrl->ctrl.fabrics_q)) {
+ error = PTR_ERR(ctrl->ctrl.fabrics_q);
+ goto out_free_tagset;
+ }
+
ctrl->ctrl.admin_q = blk_mq_init_queue(&ctrl->admin_tag_set);
if (IS_ERR(ctrl->ctrl.admin_q)) {
error = PTR_ERR(ctrl->ctrl.admin_q);
- goto out_free_tagset;
+ goto out_cleanup_fabrics_q;
}
error = nvmf_connect_admin_queue(&ctrl->ctrl);
@@ -393,23 +378,15 @@
set_bit(NVME_LOOP_Q_LIVE, &ctrl->queues[0].flags);
- error = nvmf_reg_read64(&ctrl->ctrl, NVME_REG_CAP, &ctrl->ctrl.cap);
- if (error) {
- dev_err(ctrl->ctrl.device,
- "prop_get NVME_REG_CAP failed\n");
- goto out_cleanup_queue;
- }
-
- ctrl->ctrl.sqsize =
- min_t(int, NVME_CAP_MQES(ctrl->ctrl.cap), ctrl->ctrl.sqsize);
-
- error = nvme_enable_ctrl(&ctrl->ctrl, ctrl->ctrl.cap);
+ error = nvme_enable_ctrl(&ctrl->ctrl);
if (error)
goto out_cleanup_queue;
ctrl->ctrl.max_hw_sectors =
(NVME_LOOP_MAX_SEGMENTS - 1) << (PAGE_SHIFT - 9);
+ blk_mq_unquiesce_queue(ctrl->ctrl.admin_q);
+
error = nvme_init_identify(&ctrl->ctrl);
if (error)
goto out_cleanup_queue;
@@ -418,6 +395,8 @@
out_cleanup_queue:
blk_cleanup_queue(ctrl->ctrl.admin_q);
+out_cleanup_fabrics_q:
+ blk_cleanup_queue(ctrl->ctrl.fabrics_q);
out_free_tagset:
blk_mq_free_tag_set(&ctrl->admin_tag_set);
out_free_sq:
@@ -431,16 +410,17 @@
nvme_stop_queues(&ctrl->ctrl);
blk_mq_tagset_busy_iter(&ctrl->tag_set,
nvme_cancel_request, &ctrl->ctrl);
+ blk_mq_tagset_wait_completed_request(&ctrl->tag_set);
nvme_loop_destroy_io_queues(ctrl);
}
+ blk_mq_quiesce_queue(ctrl->ctrl.admin_q);
if (ctrl->ctrl.state == NVME_CTRL_LIVE)
nvme_shutdown_ctrl(&ctrl->ctrl);
- blk_mq_quiesce_queue(ctrl->ctrl.admin_q);
blk_mq_tagset_busy_iter(&ctrl->admin_tag_set,
nvme_cancel_request, &ctrl->ctrl);
- blk_mq_unquiesce_queue(ctrl->ctrl.admin_q);
+ blk_mq_tagset_wait_completed_request(&ctrl->admin_tag_set);
nvme_loop_destroy_admin_queue(ctrl);
}
@@ -678,6 +658,14 @@
mutex_lock(&nvme_loop_ports_mutex);
list_del_init(&port->entry);
mutex_unlock(&nvme_loop_ports_mutex);
+
+ /*
+ * Ensure any ctrls that are in the process of being
+ * deleted are in fact deleted before we return
+ * and free the port. This is to prevent active
+ * ctrls from using a port after it's freed.
+ */
+ flush_workqueue(nvme_delete_wq);
}
static const struct nvmet_fabrics_ops nvme_loop_ops = {
diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h
index ec9af4e..c51f8dd 100644
--- a/drivers/nvme/target/nvmet.h
+++ b/drivers/nvme/target/nvmet.h
@@ -1,14 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright (c) 2015-2016 HGST, a Western Digital Company.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
*/
#ifndef _NVMET_H
@@ -26,15 +18,19 @@
#include <linux/configfs.h>
#include <linux/rcupdate.h>
#include <linux/blkdev.h>
+#include <linux/radix-tree.h>
#define NVMET_ASYNC_EVENTS 4
#define NVMET_ERROR_LOG_SLOTS 128
+#define NVMET_NO_ERROR_LOC ((u16)-1)
/*
* Supported optional AENs:
*/
#define NVMET_AEN_CFG_OPTIONAL \
(NVME_AEN_CFG_NS_ATTR | NVME_AEN_CFG_ANA_CHANGE)
+#define NVMET_DISC_AEN_CFG_OPTIONAL \
+ (NVME_AEN_CFG_DISC_CHANGE)
/*
* Plus mandatory SMART AENs (we'll never send them, but allow enabling them):
@@ -77,6 +73,9 @@
struct completion disable_done;
mempool_t *bvec_pool;
struct kmem_cache *bvec_cache;
+
+ int use_p2pmem;
+ struct pci_dev *p2p_dev;
};
static inline struct nvmet_ns *to_nvmet_ns(struct config_item *item)
@@ -84,6 +83,11 @@
return container_of(to_config_group(item), struct nvmet_ns, group);
}
+static inline struct device *nvmet_ns_dev(struct nvmet_ns *ns)
+{
+ return ns->bdev ? disk_to_dev(ns->bdev->bd_disk) : NULL;
+}
+
struct nvmet_cq {
u16 qid;
u16 size;
@@ -95,6 +99,7 @@
u16 qid;
u16 size;
u32 sqhd;
+ bool sqhd_disabled;
struct completion free_done;
struct completion confirm_done;
};
@@ -128,12 +133,14 @@
struct list_head subsystems;
struct config_group referrals_group;
struct list_head referrals;
+ struct list_head global_entry;
struct config_group ana_groups_group;
struct nvmet_ana_group ana_default_group;
enum nvme_ana_state *ana_state;
void *priv;
bool enabled;
int inline_data_size;
+ const struct nvmet_fabrics_ops *tr_ops;
};
static inline struct nvmet_port *to_nvmet_port(struct config_item *item)
@@ -154,6 +161,8 @@
struct nvmet_cq **cqs;
struct nvmet_sq **sqs;
+ bool cmd_seen;
+
struct mutex lock;
u64 cap;
u32 cc;
@@ -184,6 +193,13 @@
char subsysnqn[NVMF_NQN_FIELD_LEN];
char hostnqn[NVMF_NQN_FIELD_LEN];
+
+ struct device *p2p_client;
+ struct radix_tree_root p2p_ns_map;
+
+ spinlock_t error_lock;
+ u64 err_counter;
+ struct nvme_error_slot slots[NVMET_ERROR_LOG_SLOTS];
};
struct nvmet_subsys {
@@ -261,13 +277,16 @@
void (*delete_ctrl)(struct nvmet_ctrl *ctrl);
void (*disc_traddr)(struct nvmet_req *req,
struct nvmet_port *port, char *traddr);
+ u16 (*install_queue)(struct nvmet_sq *nvme_sq);
+ void (*discovery_chg)(struct nvmet_port *port);
};
#define NVMET_MAX_INLINE_BIOVEC 8
+#define NVMET_MAX_INLINE_DATA_LEN NVMET_MAX_INLINE_BIOVEC * PAGE_SIZE
struct nvmet_req {
struct nvme_command *cmd;
- struct nvme_completion *rsp;
+ struct nvme_completion *cqe;
struct nvmet_sq *sq;
struct nvmet_cq *cq;
struct nvmet_ns *ns;
@@ -294,18 +313,18 @@
void (*execute)(struct nvmet_req *req);
const struct nvmet_fabrics_ops *ops;
+
+ struct pci_dev *p2p_dev;
+ struct device *p2p_client;
+ u16 error_loc;
+ u64 error_slba;
};
extern struct workqueue_struct *buffered_io_wq;
-static inline void nvmet_set_status(struct nvmet_req *req, u16 status)
-{
- req->rsp->status = cpu_to_le16(status << 1);
-}
-
static inline void nvmet_set_result(struct nvmet_req *req, u32 result)
{
- req->rsp->result.u32 = cpu_to_le32(result);
+ req->cqe->result.u32 = cpu_to_le32(result);
}
/*
@@ -324,7 +343,29 @@
u8 log_page;
};
+static inline void nvmet_clear_aen_bit(struct nvmet_req *req, u32 bn)
+{
+ int rae = le32_to_cpu(req->cmd->common.cdw10) & 1 << 15;
+
+ if (!rae)
+ clear_bit(bn, &req->sq->ctrl->aen_masked);
+}
+
+static inline bool nvmet_aen_bit_disabled(struct nvmet_ctrl *ctrl, u32 bn)
+{
+ if (!(READ_ONCE(ctrl->aen_enabled) & (1 << bn)))
+ return true;
+ return test_and_set_bit(bn, &ctrl->aen_masked);
+}
+
+void nvmet_get_feat_kato(struct nvmet_req *req);
+void nvmet_get_feat_async_event(struct nvmet_req *req);
+u16 nvmet_set_feat_kato(struct nvmet_req *req);
+u16 nvmet_set_feat_async_event(struct nvmet_req *req, u32 mask);
+void nvmet_execute_async_event(struct nvmet_req *req);
+
u16 nvmet_parse_connect_cmd(struct nvmet_req *req);
+void nvmet_bdev_set_limits(struct block_device *bdev, struct nvme_id_ns *id);
u16 nvmet_bdev_parse_io_cmd(struct nvmet_req *req);
u16 nvmet_file_parse_io_cmd(struct nvmet_req *req);
u16 nvmet_parse_admin_cmd(struct nvmet_req *req);
@@ -336,6 +377,10 @@
void nvmet_req_uninit(struct nvmet_req *req);
void nvmet_req_execute(struct nvmet_req *req);
void nvmet_req_complete(struct nvmet_req *req, u16 status);
+int nvmet_req_alloc_sgl(struct nvmet_req *req);
+void nvmet_req_free_sgl(struct nvmet_req *req);
+
+void nvmet_execute_keep_alive(struct nvmet_req *req);
void nvmet_cq_setup(struct nvmet_ctrl *ctrl, struct nvmet_cq *cq, u16 qid,
u16 size);
@@ -373,11 +418,14 @@
int nvmet_register_transport(const struct nvmet_fabrics_ops *ops);
void nvmet_unregister_transport(const struct nvmet_fabrics_ops *ops);
+void nvmet_port_del_ctrls(struct nvmet_port *port,
+ struct nvmet_subsys *subsys);
+
int nvmet_enable_port(struct nvmet_port *port);
void nvmet_disable_port(struct nvmet_port *port);
void nvmet_referral_enable(struct nvmet_port *parent, struct nvmet_port *port);
-void nvmet_referral_disable(struct nvmet_port *port);
+void nvmet_referral_disable(struct nvmet_port *parent, struct nvmet_port *port);
u16 nvmet_copy_to_sgl(struct nvmet_req *req, off_t off, const void *buf,
size_t len);
@@ -386,6 +434,15 @@
u16 nvmet_zero_sgl(struct nvmet_req *req, off_t off, size_t len);
u32 nvmet_get_log_page_len(struct nvme_command *cmd);
+u64 nvmet_get_log_page_offset(struct nvme_command *cmd);
+
+extern struct list_head *nvmet_ports;
+void nvmet_port_disc_changed(struct nvmet_port *port,
+ struct nvmet_subsys *subsys);
+void nvmet_subsys_disc_changed(struct nvmet_subsys *subsys,
+ struct nvmet_host *host);
+void nvmet_add_async_event(struct nvmet_ctrl *ctrl, u8 event_type,
+ u8 event_info, u8 log_page);
#define NVMET_QUEUE_SIZE 1024
#define NVMET_NR_QUEUES 128
@@ -407,7 +464,7 @@
#define NVMET_DEFAULT_ANA_GRPID 1
#define NVMET_KAS 10
-#define NVMET_DISC_KATO 120
+#define NVMET_DISC_KATO_MS 120000
int __init nvmet_init_configfs(void);
void __exit nvmet_exit_configfs(void);
@@ -416,15 +473,13 @@
void nvmet_exit_discovery(void);
extern struct nvmet_subsys *nvmet_disc_subsys;
-extern u64 nvmet_genctr;
extern struct rw_semaphore nvmet_config_sem;
extern u32 nvmet_ana_group_enabled[NVMET_MAX_ANAGRPS + 1];
extern u64 nvmet_ana_chgcnt;
extern struct rw_semaphore nvmet_ana_sem;
-bool nvmet_host_allowed(struct nvmet_req *req, struct nvmet_subsys *subsys,
- const char *hostnqn);
+bool nvmet_host_allowed(struct nvmet_subsys *subsys, const char *hostnqn);
int nvmet_bdev_ns_enable(struct nvmet_ns *ns);
int nvmet_file_ns_enable(struct nvmet_ns *ns);
@@ -439,4 +494,13 @@
return ((u32)le16_to_cpu(req->cmd->rw.length) + 1) <<
req->ns->blksize_shift;
}
+
+u16 errno_to_nvme_status(struct nvmet_req *req, int errno);
+
+/* Convert a 32-bit number to a 16-bit 0's based number */
+static inline __le16 to0based(u32 a)
+{
+ return cpu_to_le16(max(1U, min(1U << 16, a)) - 1);
+}
+
#endif /* _NVMET_H */
diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c
index e57f390..36d906a 100644
--- a/drivers/nvme/target/rdma.c
+++ b/drivers/nvme/target/rdma.c
@@ -1,15 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* NVMe over Fabrics RDMA target.
* Copyright (c) 2015-2016 HGST, a Western Digital Company.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/atomic.h>
@@ -139,6 +131,10 @@
static void nvmet_rdma_read_data_done(struct ib_cq *cq, struct ib_wc *wc);
static void nvmet_rdma_qp_event(struct ib_event *event, void *priv);
static void nvmet_rdma_queue_disconnect(struct nvmet_rdma_queue *queue);
+static void nvmet_rdma_free_rsp(struct nvmet_rdma_device *ndev,
+ struct nvmet_rdma_rsp *r);
+static int nvmet_rdma_alloc_rsp(struct nvmet_rdma_device *ndev,
+ struct nvmet_rdma_rsp *r);
static const struct nvmet_fabrics_ops nvmet_rdma_ops;
@@ -164,7 +160,7 @@
{
return !nvme_is_write(rsp->req.cmd) &&
rsp->req.transfer_len &&
- !rsp->req.rsp->status &&
+ !rsp->req.cqe->status &&
!(rsp->flags & NVMET_RDMA_REQ_INLINE_DATA);
}
@@ -182,9 +178,17 @@
spin_unlock_irqrestore(&queue->rsps_lock, flags);
if (unlikely(!rsp)) {
- rsp = kmalloc(sizeof(*rsp), GFP_KERNEL);
+ int ret;
+
+ rsp = kzalloc(sizeof(*rsp), GFP_KERNEL);
if (unlikely(!rsp))
return NULL;
+ ret = nvmet_rdma_alloc_rsp(queue->dev, rsp);
+ if (unlikely(ret)) {
+ kfree(rsp);
+ return NULL;
+ }
+
rsp->allocated = true;
}
@@ -196,7 +200,8 @@
{
unsigned long flags;
- if (rsp->allocated) {
+ if (unlikely(rsp->allocated)) {
+ nvmet_rdma_free_rsp(rsp->queue->dev, rsp);
kfree(rsp);
return;
}
@@ -359,16 +364,17 @@
struct nvmet_rdma_rsp *r)
{
/* NVMe CQE / RDMA SEND */
- r->req.rsp = kmalloc(sizeof(*r->req.rsp), GFP_KERNEL);
- if (!r->req.rsp)
+ r->req.cqe = kmalloc(sizeof(*r->req.cqe), GFP_KERNEL);
+ if (!r->req.cqe)
goto out;
- r->send_sge.addr = ib_dma_map_single(ndev->device, r->req.rsp,
- sizeof(*r->req.rsp), DMA_TO_DEVICE);
+ r->send_sge.addr = ib_dma_map_single(ndev->device, r->req.cqe,
+ sizeof(*r->req.cqe), DMA_TO_DEVICE);
if (ib_dma_mapping_error(ndev->device, r->send_sge.addr))
goto out_free_rsp;
- r->send_sge.length = sizeof(*r->req.rsp);
+ r->req.p2p_client = &ndev->device->dev;
+ r->send_sge.length = sizeof(*r->req.cqe);
r->send_sge.lkey = ndev->pd->local_dma_lkey;
r->send_cqe.done = nvmet_rdma_send_done;
@@ -383,7 +389,7 @@
return 0;
out_free_rsp:
- kfree(r->req.rsp);
+ kfree(r->req.cqe);
out:
return -ENOMEM;
}
@@ -392,8 +398,8 @@
struct nvmet_rdma_rsp *r)
{
ib_dma_unmap_single(ndev->device, r->send_sge.addr,
- sizeof(*r->req.rsp), DMA_TO_DEVICE);
- kfree(r->req.rsp);
+ sizeof(*r->req.cqe), DMA_TO_DEVICE);
+ kfree(r->req.cqe);
}
static int
@@ -503,7 +509,7 @@
}
if (rsp->req.sg != rsp->cmd->inline_sg)
- sgl_free(rsp->req.sg);
+ nvmet_req_free_sgl(&rsp->req);
if (unlikely(!list_empty_careful(&queue->rsp_wr_wait_list)))
nvmet_rdma_process_wr_wait_list(queue);
@@ -630,8 +636,11 @@
u64 off = le64_to_cpu(sgl->addr);
u32 len = le32_to_cpu(sgl->length);
- if (!nvme_is_write(rsp->req.cmd))
+ if (!nvme_is_write(rsp->req.cmd)) {
+ rsp->req.error_loc =
+ offsetof(struct nvme_common_command, opcode);
return NVME_SC_INVALID_FIELD | NVME_SC_DNR;
+ }
if (off + len > rsp->queue->dev->inline_data_size) {
pr_err("invalid inline data offset!\n");
@@ -653,24 +662,24 @@
{
struct rdma_cm_id *cm_id = rsp->queue->cm_id;
u64 addr = le64_to_cpu(sgl->addr);
- u32 len = get_unaligned_le24(sgl->length);
u32 key = get_unaligned_le32(sgl->key);
int ret;
+ rsp->req.transfer_len = get_unaligned_le24(sgl->length);
+
/* no data command? */
- if (!len)
+ if (!rsp->req.transfer_len)
return 0;
- rsp->req.sg = sgl_alloc(len, GFP_KERNEL, &rsp->req.sg_cnt);
- if (!rsp->req.sg)
- return NVME_SC_INTERNAL;
+ ret = nvmet_req_alloc_sgl(&rsp->req);
+ if (ret < 0)
+ goto error_out;
ret = rdma_rw_ctx_init(&rsp->rw, cm_id->qp, cm_id->port_num,
rsp->req.sg, rsp->req.sg_cnt, 0, addr, key,
nvmet_data_dir(&rsp->req));
if (ret < 0)
- return NVME_SC_INTERNAL;
- rsp->req.transfer_len += len;
+ goto error_out;
rsp->n_rdma += ret;
if (invalidate) {
@@ -679,6 +688,10 @@
}
return 0;
+
+error_out:
+ rsp->req.transfer_len = 0;
+ return NVME_SC_INTERNAL;
}
static u16 nvmet_rdma_map_sgl(struct nvmet_rdma_rsp *rsp)
@@ -692,6 +705,8 @@
return nvmet_rdma_map_sgl_inline(rsp);
default:
pr_err("invalid SGL subtype: %#x\n", sgl->type);
+ rsp->req.error_loc =
+ offsetof(struct nvme_common_command, dptr);
return NVME_SC_INVALID_FIELD | NVME_SC_DNR;
}
case NVME_KEY_SGL_FMT_DATA_DESC:
@@ -702,10 +717,13 @@
return nvmet_rdma_map_sgl_keyed(rsp, sgl, false);
default:
pr_err("invalid SGL subtype: %#x\n", sgl->type);
+ rsp->req.error_loc =
+ offsetof(struct nvme_common_command, dptr);
return NVME_SC_INVALID_FIELD | NVME_SC_DNR;
}
default:
pr_err("invalid SGL type: %#x\n", sgl->type);
+ rsp->req.error_loc = offsetof(struct nvme_common_command, dptr);
return NVME_SC_SGL_INVALID_TYPE | NVME_SC_DNR;
}
}
diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c
new file mode 100644
index 0000000..d535080
--- /dev/null
+++ b/drivers/nvme/target/tcp.c
@@ -0,0 +1,1761 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * NVMe over Fabrics TCP target.
+ * Copyright (c) 2018 Lightbits Labs. All rights reserved.
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/err.h>
+#include <linux/nvme-tcp.h>
+#include <net/sock.h>
+#include <net/tcp.h>
+#include <linux/inet.h>
+#include <linux/llist.h>
+#include <crypto/hash.h>
+
+#include "nvmet.h"
+
+#define NVMET_TCP_DEF_INLINE_DATA_SIZE (4 * PAGE_SIZE)
+
+#define NVMET_TCP_RECV_BUDGET 8
+#define NVMET_TCP_SEND_BUDGET 8
+#define NVMET_TCP_IO_WORK_BUDGET 64
+
+enum nvmet_tcp_send_state {
+ NVMET_TCP_SEND_DATA_PDU,
+ NVMET_TCP_SEND_DATA,
+ NVMET_TCP_SEND_R2T,
+ NVMET_TCP_SEND_DDGST,
+ NVMET_TCP_SEND_RESPONSE
+};
+
+enum nvmet_tcp_recv_state {
+ NVMET_TCP_RECV_PDU,
+ NVMET_TCP_RECV_DATA,
+ NVMET_TCP_RECV_DDGST,
+ NVMET_TCP_RECV_ERR,
+};
+
+enum {
+ NVMET_TCP_F_INIT_FAILED = (1 << 0),
+};
+
+struct nvmet_tcp_cmd {
+ struct nvmet_tcp_queue *queue;
+ struct nvmet_req req;
+
+ struct nvme_tcp_cmd_pdu *cmd_pdu;
+ struct nvme_tcp_rsp_pdu *rsp_pdu;
+ struct nvme_tcp_data_pdu *data_pdu;
+ struct nvme_tcp_r2t_pdu *r2t_pdu;
+
+ u32 rbytes_done;
+ u32 wbytes_done;
+
+ u32 pdu_len;
+ u32 pdu_recv;
+ int sg_idx;
+ int nr_mapped;
+ struct msghdr recv_msg;
+ struct kvec *iov;
+ u32 flags;
+
+ struct list_head entry;
+ struct llist_node lentry;
+
+ /* send state */
+ u32 offset;
+ struct scatterlist *cur_sg;
+ enum nvmet_tcp_send_state state;
+
+ __le32 exp_ddgst;
+ __le32 recv_ddgst;
+};
+
+enum nvmet_tcp_queue_state {
+ NVMET_TCP_Q_CONNECTING,
+ NVMET_TCP_Q_LIVE,
+ NVMET_TCP_Q_DISCONNECTING,
+};
+
+struct nvmet_tcp_queue {
+ struct socket *sock;
+ struct nvmet_tcp_port *port;
+ struct work_struct io_work;
+ int cpu;
+ struct nvmet_cq nvme_cq;
+ struct nvmet_sq nvme_sq;
+
+ /* send state */
+ struct nvmet_tcp_cmd *cmds;
+ unsigned int nr_cmds;
+ struct list_head free_list;
+ struct llist_head resp_list;
+ struct list_head resp_send_list;
+ int send_list_len;
+ struct nvmet_tcp_cmd *snd_cmd;
+
+ /* recv state */
+ int offset;
+ int left;
+ enum nvmet_tcp_recv_state rcv_state;
+ struct nvmet_tcp_cmd *cmd;
+ union nvme_tcp_pdu pdu;
+
+ /* digest state */
+ bool hdr_digest;
+ bool data_digest;
+ struct ahash_request *snd_hash;
+ struct ahash_request *rcv_hash;
+
+ spinlock_t state_lock;
+ enum nvmet_tcp_queue_state state;
+
+ struct sockaddr_storage sockaddr;
+ struct sockaddr_storage sockaddr_peer;
+ struct work_struct release_work;
+
+ int idx;
+ struct list_head queue_list;
+
+ struct nvmet_tcp_cmd connect;
+
+ struct page_frag_cache pf_cache;
+
+ void (*data_ready)(struct sock *);
+ void (*state_change)(struct sock *);
+ void (*write_space)(struct sock *);
+};
+
+struct nvmet_tcp_port {
+ struct socket *sock;
+ struct work_struct accept_work;
+ struct nvmet_port *nport;
+ struct sockaddr_storage addr;
+ int last_cpu;
+ void (*data_ready)(struct sock *);
+};
+
+static DEFINE_IDA(nvmet_tcp_queue_ida);
+static LIST_HEAD(nvmet_tcp_queue_list);
+static DEFINE_MUTEX(nvmet_tcp_queue_mutex);
+
+static struct workqueue_struct *nvmet_tcp_wq;
+static struct nvmet_fabrics_ops nvmet_tcp_ops;
+static void nvmet_tcp_free_cmd(struct nvmet_tcp_cmd *c);
+static void nvmet_tcp_finish_cmd(struct nvmet_tcp_cmd *cmd);
+
+static inline u16 nvmet_tcp_cmd_tag(struct nvmet_tcp_queue *queue,
+ struct nvmet_tcp_cmd *cmd)
+{
+ return cmd - queue->cmds;
+}
+
+static inline bool nvmet_tcp_has_data_in(struct nvmet_tcp_cmd *cmd)
+{
+ return nvme_is_write(cmd->req.cmd) &&
+ cmd->rbytes_done < cmd->req.transfer_len;
+}
+
+static inline bool nvmet_tcp_need_data_in(struct nvmet_tcp_cmd *cmd)
+{
+ return nvmet_tcp_has_data_in(cmd) && !cmd->req.cqe->status;
+}
+
+static inline bool nvmet_tcp_need_data_out(struct nvmet_tcp_cmd *cmd)
+{
+ return !nvme_is_write(cmd->req.cmd) &&
+ cmd->req.transfer_len > 0 &&
+ !cmd->req.cqe->status;
+}
+
+static inline bool nvmet_tcp_has_inline_data(struct nvmet_tcp_cmd *cmd)
+{
+ return nvme_is_write(cmd->req.cmd) && cmd->pdu_len &&
+ !cmd->rbytes_done;
+}
+
+static inline struct nvmet_tcp_cmd *
+nvmet_tcp_get_cmd(struct nvmet_tcp_queue *queue)
+{
+ struct nvmet_tcp_cmd *cmd;
+
+ cmd = list_first_entry_or_null(&queue->free_list,
+ struct nvmet_tcp_cmd, entry);
+ if (!cmd)
+ return NULL;
+ list_del_init(&cmd->entry);
+
+ cmd->rbytes_done = cmd->wbytes_done = 0;
+ cmd->pdu_len = 0;
+ cmd->pdu_recv = 0;
+ cmd->iov = NULL;
+ cmd->flags = 0;
+ return cmd;
+}
+
+static inline void nvmet_tcp_put_cmd(struct nvmet_tcp_cmd *cmd)
+{
+ if (unlikely(cmd == &cmd->queue->connect))
+ return;
+
+ list_add_tail(&cmd->entry, &cmd->queue->free_list);
+}
+
+static inline u8 nvmet_tcp_hdgst_len(struct nvmet_tcp_queue *queue)
+{
+ return queue->hdr_digest ? NVME_TCP_DIGEST_LENGTH : 0;
+}
+
+static inline u8 nvmet_tcp_ddgst_len(struct nvmet_tcp_queue *queue)
+{
+ return queue->data_digest ? NVME_TCP_DIGEST_LENGTH : 0;
+}
+
+static inline void nvmet_tcp_hdgst(struct ahash_request *hash,
+ void *pdu, size_t len)
+{
+ struct scatterlist sg;
+
+ sg_init_one(&sg, pdu, len);
+ ahash_request_set_crypt(hash, &sg, pdu + len, len);
+ crypto_ahash_digest(hash);
+}
+
+static int nvmet_tcp_verify_hdgst(struct nvmet_tcp_queue *queue,
+ void *pdu, size_t len)
+{
+ struct nvme_tcp_hdr *hdr = pdu;
+ __le32 recv_digest;
+ __le32 exp_digest;
+
+ if (unlikely(!(hdr->flags & NVME_TCP_F_HDGST))) {
+ pr_err("queue %d: header digest enabled but no header digest\n",
+ queue->idx);
+ return -EPROTO;
+ }
+
+ recv_digest = *(__le32 *)(pdu + hdr->hlen);
+ nvmet_tcp_hdgst(queue->rcv_hash, pdu, len);
+ exp_digest = *(__le32 *)(pdu + hdr->hlen);
+ if (recv_digest != exp_digest) {
+ pr_err("queue %d: header digest error: recv %#x expected %#x\n",
+ queue->idx, le32_to_cpu(recv_digest),
+ le32_to_cpu(exp_digest));
+ return -EPROTO;
+ }
+
+ return 0;
+}
+
+static int nvmet_tcp_check_ddgst(struct nvmet_tcp_queue *queue, void *pdu)
+{
+ struct nvme_tcp_hdr *hdr = pdu;
+ u8 digest_len = nvmet_tcp_hdgst_len(queue);
+ u32 len;
+
+ len = le32_to_cpu(hdr->plen) - hdr->hlen -
+ (hdr->flags & NVME_TCP_F_HDGST ? digest_len : 0);
+
+ if (unlikely(len && !(hdr->flags & NVME_TCP_F_DDGST))) {
+ pr_err("queue %d: data digest flag is cleared\n", queue->idx);
+ return -EPROTO;
+ }
+
+ return 0;
+}
+
+static void nvmet_tcp_unmap_pdu_iovec(struct nvmet_tcp_cmd *cmd)
+{
+ struct scatterlist *sg;
+ int i;
+
+ sg = &cmd->req.sg[cmd->sg_idx];
+
+ for (i = 0; i < cmd->nr_mapped; i++)
+ kunmap(sg_page(&sg[i]));
+}
+
+static void nvmet_tcp_map_pdu_iovec(struct nvmet_tcp_cmd *cmd)
+{
+ struct kvec *iov = cmd->iov;
+ struct scatterlist *sg;
+ u32 length, offset, sg_offset;
+
+ length = cmd->pdu_len;
+ cmd->nr_mapped = DIV_ROUND_UP(length, PAGE_SIZE);
+ offset = cmd->rbytes_done;
+ cmd->sg_idx = DIV_ROUND_UP(offset, PAGE_SIZE);
+ sg_offset = offset % PAGE_SIZE;
+ sg = &cmd->req.sg[cmd->sg_idx];
+
+ while (length) {
+ u32 iov_len = min_t(u32, length, sg->length - sg_offset);
+
+ iov->iov_base = kmap(sg_page(sg)) + sg->offset + sg_offset;
+ iov->iov_len = iov_len;
+
+ length -= iov_len;
+ sg = sg_next(sg);
+ iov++;
+ }
+
+ iov_iter_kvec(&cmd->recv_msg.msg_iter, READ, cmd->iov,
+ cmd->nr_mapped, cmd->pdu_len);
+}
+
+static void nvmet_tcp_fatal_error(struct nvmet_tcp_queue *queue)
+{
+ queue->rcv_state = NVMET_TCP_RECV_ERR;
+ if (queue->nvme_sq.ctrl)
+ nvmet_ctrl_fatal_error(queue->nvme_sq.ctrl);
+ else
+ kernel_sock_shutdown(queue->sock, SHUT_RDWR);
+}
+
+static int nvmet_tcp_map_data(struct nvmet_tcp_cmd *cmd)
+{
+ struct nvme_sgl_desc *sgl = &cmd->req.cmd->common.dptr.sgl;
+ u32 len = le32_to_cpu(sgl->length);
+
+ if (!cmd->req.data_len)
+ return 0;
+
+ if (sgl->type == ((NVME_SGL_FMT_DATA_DESC << 4) |
+ NVME_SGL_FMT_OFFSET)) {
+ if (!nvme_is_write(cmd->req.cmd))
+ return NVME_SC_INVALID_FIELD | NVME_SC_DNR;
+
+ if (len > cmd->req.port->inline_data_size)
+ return NVME_SC_SGL_INVALID_OFFSET | NVME_SC_DNR;
+ cmd->pdu_len = len;
+ }
+ cmd->req.transfer_len += len;
+
+ cmd->req.sg = sgl_alloc(len, GFP_KERNEL, &cmd->req.sg_cnt);
+ if (!cmd->req.sg)
+ return NVME_SC_INTERNAL;
+ cmd->cur_sg = cmd->req.sg;
+
+ if (nvmet_tcp_has_data_in(cmd)) {
+ cmd->iov = kmalloc_array(cmd->req.sg_cnt,
+ sizeof(*cmd->iov), GFP_KERNEL);
+ if (!cmd->iov)
+ goto err;
+ }
+
+ return 0;
+err:
+ sgl_free(cmd->req.sg);
+ return NVME_SC_INTERNAL;
+}
+
+static void nvmet_tcp_ddgst(struct ahash_request *hash,
+ struct nvmet_tcp_cmd *cmd)
+{
+ ahash_request_set_crypt(hash, cmd->req.sg,
+ (void *)&cmd->exp_ddgst, cmd->req.transfer_len);
+ crypto_ahash_digest(hash);
+}
+
+static void nvmet_setup_c2h_data_pdu(struct nvmet_tcp_cmd *cmd)
+{
+ struct nvme_tcp_data_pdu *pdu = cmd->data_pdu;
+ struct nvmet_tcp_queue *queue = cmd->queue;
+ u8 hdgst = nvmet_tcp_hdgst_len(cmd->queue);
+ u8 ddgst = nvmet_tcp_ddgst_len(cmd->queue);
+
+ cmd->offset = 0;
+ cmd->state = NVMET_TCP_SEND_DATA_PDU;
+
+ pdu->hdr.type = nvme_tcp_c2h_data;
+ pdu->hdr.flags = NVME_TCP_F_DATA_LAST | (queue->nvme_sq.sqhd_disabled ?
+ NVME_TCP_F_DATA_SUCCESS : 0);
+ pdu->hdr.hlen = sizeof(*pdu);
+ pdu->hdr.pdo = pdu->hdr.hlen + hdgst;
+ pdu->hdr.plen =
+ cpu_to_le32(pdu->hdr.hlen + hdgst +
+ cmd->req.transfer_len + ddgst);
+ pdu->command_id = cmd->req.cqe->command_id;
+ pdu->data_length = cpu_to_le32(cmd->req.transfer_len);
+ pdu->data_offset = cpu_to_le32(cmd->wbytes_done);
+
+ if (queue->data_digest) {
+ pdu->hdr.flags |= NVME_TCP_F_DDGST;
+ nvmet_tcp_ddgst(queue->snd_hash, cmd);
+ }
+
+ if (cmd->queue->hdr_digest) {
+ pdu->hdr.flags |= NVME_TCP_F_HDGST;
+ nvmet_tcp_hdgst(queue->snd_hash, pdu, sizeof(*pdu));
+ }
+}
+
+static void nvmet_setup_r2t_pdu(struct nvmet_tcp_cmd *cmd)
+{
+ struct nvme_tcp_r2t_pdu *pdu = cmd->r2t_pdu;
+ struct nvmet_tcp_queue *queue = cmd->queue;
+ u8 hdgst = nvmet_tcp_hdgst_len(cmd->queue);
+
+ cmd->offset = 0;
+ cmd->state = NVMET_TCP_SEND_R2T;
+
+ pdu->hdr.type = nvme_tcp_r2t;
+ pdu->hdr.flags = 0;
+ pdu->hdr.hlen = sizeof(*pdu);
+ pdu->hdr.pdo = 0;
+ pdu->hdr.plen = cpu_to_le32(pdu->hdr.hlen + hdgst);
+
+ pdu->command_id = cmd->req.cmd->common.command_id;
+ pdu->ttag = nvmet_tcp_cmd_tag(cmd->queue, cmd);
+ pdu->r2t_length = cpu_to_le32(cmd->req.transfer_len - cmd->rbytes_done);
+ pdu->r2t_offset = cpu_to_le32(cmd->rbytes_done);
+ if (cmd->queue->hdr_digest) {
+ pdu->hdr.flags |= NVME_TCP_F_HDGST;
+ nvmet_tcp_hdgst(queue->snd_hash, pdu, sizeof(*pdu));
+ }
+}
+
+static void nvmet_setup_response_pdu(struct nvmet_tcp_cmd *cmd)
+{
+ struct nvme_tcp_rsp_pdu *pdu = cmd->rsp_pdu;
+ struct nvmet_tcp_queue *queue = cmd->queue;
+ u8 hdgst = nvmet_tcp_hdgst_len(cmd->queue);
+
+ cmd->offset = 0;
+ cmd->state = NVMET_TCP_SEND_RESPONSE;
+
+ pdu->hdr.type = nvme_tcp_rsp;
+ pdu->hdr.flags = 0;
+ pdu->hdr.hlen = sizeof(*pdu);
+ pdu->hdr.pdo = 0;
+ pdu->hdr.plen = cpu_to_le32(pdu->hdr.hlen + hdgst);
+ if (cmd->queue->hdr_digest) {
+ pdu->hdr.flags |= NVME_TCP_F_HDGST;
+ nvmet_tcp_hdgst(queue->snd_hash, pdu, sizeof(*pdu));
+ }
+}
+
+static void nvmet_tcp_process_resp_list(struct nvmet_tcp_queue *queue)
+{
+ struct llist_node *node;
+
+ node = llist_del_all(&queue->resp_list);
+ if (!node)
+ return;
+
+ while (node) {
+ struct nvmet_tcp_cmd *cmd = llist_entry(node,
+ struct nvmet_tcp_cmd, lentry);
+
+ list_add(&cmd->entry, &queue->resp_send_list);
+ node = node->next;
+ queue->send_list_len++;
+ }
+}
+
+static struct nvmet_tcp_cmd *nvmet_tcp_fetch_cmd(struct nvmet_tcp_queue *queue)
+{
+ queue->snd_cmd = list_first_entry_or_null(&queue->resp_send_list,
+ struct nvmet_tcp_cmd, entry);
+ if (!queue->snd_cmd) {
+ nvmet_tcp_process_resp_list(queue);
+ queue->snd_cmd =
+ list_first_entry_or_null(&queue->resp_send_list,
+ struct nvmet_tcp_cmd, entry);
+ if (unlikely(!queue->snd_cmd))
+ return NULL;
+ }
+
+ list_del_init(&queue->snd_cmd->entry);
+ queue->send_list_len--;
+
+ if (nvmet_tcp_need_data_out(queue->snd_cmd))
+ nvmet_setup_c2h_data_pdu(queue->snd_cmd);
+ else if (nvmet_tcp_need_data_in(queue->snd_cmd))
+ nvmet_setup_r2t_pdu(queue->snd_cmd);
+ else
+ nvmet_setup_response_pdu(queue->snd_cmd);
+
+ return queue->snd_cmd;
+}
+
+static void nvmet_tcp_queue_response(struct nvmet_req *req)
+{
+ struct nvmet_tcp_cmd *cmd =
+ container_of(req, struct nvmet_tcp_cmd, req);
+ struct nvmet_tcp_queue *queue = cmd->queue;
+
+ llist_add(&cmd->lentry, &queue->resp_list);
+ queue_work_on(cmd->queue->cpu, nvmet_tcp_wq, &cmd->queue->io_work);
+}
+
+static int nvmet_try_send_data_pdu(struct nvmet_tcp_cmd *cmd)
+{
+ u8 hdgst = nvmet_tcp_hdgst_len(cmd->queue);
+ int left = sizeof(*cmd->data_pdu) - cmd->offset + hdgst;
+ int ret;
+
+ ret = kernel_sendpage(cmd->queue->sock, virt_to_page(cmd->data_pdu),
+ offset_in_page(cmd->data_pdu) + cmd->offset,
+ left, MSG_DONTWAIT | MSG_MORE);
+ if (ret <= 0)
+ return ret;
+
+ cmd->offset += ret;
+ left -= ret;
+
+ if (left)
+ return -EAGAIN;
+
+ cmd->state = NVMET_TCP_SEND_DATA;
+ cmd->offset = 0;
+ return 1;
+}
+
+static int nvmet_try_send_data(struct nvmet_tcp_cmd *cmd)
+{
+ struct nvmet_tcp_queue *queue = cmd->queue;
+ int ret;
+
+ while (cmd->cur_sg) {
+ struct page *page = sg_page(cmd->cur_sg);
+ u32 left = cmd->cur_sg->length - cmd->offset;
+
+ ret = kernel_sendpage(cmd->queue->sock, page, cmd->offset,
+ left, MSG_DONTWAIT | MSG_MORE);
+ if (ret <= 0)
+ return ret;
+
+ cmd->offset += ret;
+ cmd->wbytes_done += ret;
+
+ /* Done with sg?*/
+ if (cmd->offset == cmd->cur_sg->length) {
+ cmd->cur_sg = sg_next(cmd->cur_sg);
+ cmd->offset = 0;
+ }
+ }
+
+ if (queue->data_digest) {
+ cmd->state = NVMET_TCP_SEND_DDGST;
+ cmd->offset = 0;
+ } else {
+ if (queue->nvme_sq.sqhd_disabled) {
+ cmd->queue->snd_cmd = NULL;
+ nvmet_tcp_put_cmd(cmd);
+ } else {
+ nvmet_setup_response_pdu(cmd);
+ }
+ }
+
+ if (queue->nvme_sq.sqhd_disabled) {
+ kfree(cmd->iov);
+ sgl_free(cmd->req.sg);
+ }
+
+ return 1;
+
+}
+
+static int nvmet_try_send_response(struct nvmet_tcp_cmd *cmd,
+ bool last_in_batch)
+{
+ u8 hdgst = nvmet_tcp_hdgst_len(cmd->queue);
+ int left = sizeof(*cmd->rsp_pdu) - cmd->offset + hdgst;
+ int flags = MSG_DONTWAIT;
+ int ret;
+
+ if (!last_in_batch && cmd->queue->send_list_len)
+ flags |= MSG_MORE;
+ else
+ flags |= MSG_EOR;
+
+ ret = kernel_sendpage(cmd->queue->sock, virt_to_page(cmd->rsp_pdu),
+ offset_in_page(cmd->rsp_pdu) + cmd->offset, left, flags);
+ if (ret <= 0)
+ return ret;
+ cmd->offset += ret;
+ left -= ret;
+
+ if (left)
+ return -EAGAIN;
+
+ kfree(cmd->iov);
+ sgl_free(cmd->req.sg);
+ cmd->queue->snd_cmd = NULL;
+ nvmet_tcp_put_cmd(cmd);
+ return 1;
+}
+
+static int nvmet_try_send_r2t(struct nvmet_tcp_cmd *cmd, bool last_in_batch)
+{
+ u8 hdgst = nvmet_tcp_hdgst_len(cmd->queue);
+ int left = sizeof(*cmd->r2t_pdu) - cmd->offset + hdgst;
+ int flags = MSG_DONTWAIT;
+ int ret;
+
+ if (!last_in_batch && cmd->queue->send_list_len)
+ flags |= MSG_MORE;
+ else
+ flags |= MSG_EOR;
+
+ ret = kernel_sendpage(cmd->queue->sock, virt_to_page(cmd->r2t_pdu),
+ offset_in_page(cmd->r2t_pdu) + cmd->offset, left, flags);
+ if (ret <= 0)
+ return ret;
+ cmd->offset += ret;
+ left -= ret;
+
+ if (left)
+ return -EAGAIN;
+
+ cmd->queue->snd_cmd = NULL;
+ return 1;
+}
+
+static int nvmet_try_send_ddgst(struct nvmet_tcp_cmd *cmd)
+{
+ struct nvmet_tcp_queue *queue = cmd->queue;
+ struct msghdr msg = { .msg_flags = MSG_DONTWAIT };
+ struct kvec iov = {
+ .iov_base = &cmd->exp_ddgst + cmd->offset,
+ .iov_len = NVME_TCP_DIGEST_LENGTH - cmd->offset
+ };
+ int ret;
+
+ ret = kernel_sendmsg(queue->sock, &msg, &iov, 1, iov.iov_len);
+ if (unlikely(ret <= 0))
+ return ret;
+
+ cmd->offset += ret;
+
+ if (queue->nvme_sq.sqhd_disabled) {
+ cmd->queue->snd_cmd = NULL;
+ nvmet_tcp_put_cmd(cmd);
+ } else {
+ nvmet_setup_response_pdu(cmd);
+ }
+ return 1;
+}
+
+static int nvmet_tcp_try_send_one(struct nvmet_tcp_queue *queue,
+ bool last_in_batch)
+{
+ struct nvmet_tcp_cmd *cmd = queue->snd_cmd;
+ int ret = 0;
+
+ if (!cmd || queue->state == NVMET_TCP_Q_DISCONNECTING) {
+ cmd = nvmet_tcp_fetch_cmd(queue);
+ if (unlikely(!cmd))
+ return 0;
+ }
+
+ if (cmd->state == NVMET_TCP_SEND_DATA_PDU) {
+ ret = nvmet_try_send_data_pdu(cmd);
+ if (ret <= 0)
+ goto done_send;
+ }
+
+ if (cmd->state == NVMET_TCP_SEND_DATA) {
+ ret = nvmet_try_send_data(cmd);
+ if (ret <= 0)
+ goto done_send;
+ }
+
+ if (cmd->state == NVMET_TCP_SEND_DDGST) {
+ ret = nvmet_try_send_ddgst(cmd);
+ if (ret <= 0)
+ goto done_send;
+ }
+
+ if (cmd->state == NVMET_TCP_SEND_R2T) {
+ ret = nvmet_try_send_r2t(cmd, last_in_batch);
+ if (ret <= 0)
+ goto done_send;
+ }
+
+ if (cmd->state == NVMET_TCP_SEND_RESPONSE)
+ ret = nvmet_try_send_response(cmd, last_in_batch);
+
+done_send:
+ if (ret < 0) {
+ if (ret == -EAGAIN)
+ return 0;
+ return ret;
+ }
+
+ return 1;
+}
+
+static int nvmet_tcp_try_send(struct nvmet_tcp_queue *queue,
+ int budget, int *sends)
+{
+ int i, ret = 0;
+
+ for (i = 0; i < budget; i++) {
+ ret = nvmet_tcp_try_send_one(queue, i == budget - 1);
+ if (ret <= 0)
+ break;
+ (*sends)++;
+ }
+
+ return ret;
+}
+
+static void nvmet_prepare_receive_pdu(struct nvmet_tcp_queue *queue)
+{
+ queue->offset = 0;
+ queue->left = sizeof(struct nvme_tcp_hdr);
+ queue->cmd = NULL;
+ queue->rcv_state = NVMET_TCP_RECV_PDU;
+}
+
+static void nvmet_tcp_free_crypto(struct nvmet_tcp_queue *queue)
+{
+ struct crypto_ahash *tfm = crypto_ahash_reqtfm(queue->rcv_hash);
+
+ ahash_request_free(queue->rcv_hash);
+ ahash_request_free(queue->snd_hash);
+ crypto_free_ahash(tfm);
+}
+
+static int nvmet_tcp_alloc_crypto(struct nvmet_tcp_queue *queue)
+{
+ struct crypto_ahash *tfm;
+
+ tfm = crypto_alloc_ahash("crc32c", 0, CRYPTO_ALG_ASYNC);
+ if (IS_ERR(tfm))
+ return PTR_ERR(tfm);
+
+ queue->snd_hash = ahash_request_alloc(tfm, GFP_KERNEL);
+ if (!queue->snd_hash)
+ goto free_tfm;
+ ahash_request_set_callback(queue->snd_hash, 0, NULL, NULL);
+
+ queue->rcv_hash = ahash_request_alloc(tfm, GFP_KERNEL);
+ if (!queue->rcv_hash)
+ goto free_snd_hash;
+ ahash_request_set_callback(queue->rcv_hash, 0, NULL, NULL);
+
+ return 0;
+free_snd_hash:
+ ahash_request_free(queue->snd_hash);
+free_tfm:
+ crypto_free_ahash(tfm);
+ return -ENOMEM;
+}
+
+
+static int nvmet_tcp_handle_icreq(struct nvmet_tcp_queue *queue)
+{
+ struct nvme_tcp_icreq_pdu *icreq = &queue->pdu.icreq;
+ struct nvme_tcp_icresp_pdu *icresp = &queue->pdu.icresp;
+ struct msghdr msg = {};
+ struct kvec iov;
+ int ret;
+
+ if (le32_to_cpu(icreq->hdr.plen) != sizeof(struct nvme_tcp_icreq_pdu)) {
+ pr_err("bad nvme-tcp pdu length (%d)\n",
+ le32_to_cpu(icreq->hdr.plen));
+ nvmet_tcp_fatal_error(queue);
+ }
+
+ if (icreq->pfv != NVME_TCP_PFV_1_0) {
+ pr_err("queue %d: bad pfv %d\n", queue->idx, icreq->pfv);
+ return -EPROTO;
+ }
+
+ if (icreq->hpda != 0) {
+ pr_err("queue %d: unsupported hpda %d\n", queue->idx,
+ icreq->hpda);
+ return -EPROTO;
+ }
+
+ queue->hdr_digest = !!(icreq->digest & NVME_TCP_HDR_DIGEST_ENABLE);
+ queue->data_digest = !!(icreq->digest & NVME_TCP_DATA_DIGEST_ENABLE);
+ if (queue->hdr_digest || queue->data_digest) {
+ ret = nvmet_tcp_alloc_crypto(queue);
+ if (ret)
+ return ret;
+ }
+
+ memset(icresp, 0, sizeof(*icresp));
+ icresp->hdr.type = nvme_tcp_icresp;
+ icresp->hdr.hlen = sizeof(*icresp);
+ icresp->hdr.pdo = 0;
+ icresp->hdr.plen = cpu_to_le32(icresp->hdr.hlen);
+ icresp->pfv = cpu_to_le16(NVME_TCP_PFV_1_0);
+ icresp->maxdata = cpu_to_le32(0xffff); /* FIXME: support r2t */
+ icresp->cpda = 0;
+ if (queue->hdr_digest)
+ icresp->digest |= NVME_TCP_HDR_DIGEST_ENABLE;
+ if (queue->data_digest)
+ icresp->digest |= NVME_TCP_DATA_DIGEST_ENABLE;
+
+ iov.iov_base = icresp;
+ iov.iov_len = sizeof(*icresp);
+ ret = kernel_sendmsg(queue->sock, &msg, &iov, 1, iov.iov_len);
+ if (ret < 0)
+ goto free_crypto;
+
+ queue->state = NVMET_TCP_Q_LIVE;
+ nvmet_prepare_receive_pdu(queue);
+ return 0;
+free_crypto:
+ if (queue->hdr_digest || queue->data_digest)
+ nvmet_tcp_free_crypto(queue);
+ return ret;
+}
+
+static void nvmet_tcp_handle_req_failure(struct nvmet_tcp_queue *queue,
+ struct nvmet_tcp_cmd *cmd, struct nvmet_req *req)
+{
+ int ret;
+
+ /* recover the expected data transfer length */
+ req->data_len = le32_to_cpu(req->cmd->common.dptr.sgl.length);
+
+ if (!nvme_is_write(cmd->req.cmd) ||
+ req->data_len > cmd->req.port->inline_data_size) {
+ nvmet_prepare_receive_pdu(queue);
+ return;
+ }
+
+ ret = nvmet_tcp_map_data(cmd);
+ if (unlikely(ret)) {
+ pr_err("queue %d: failed to map data\n", queue->idx);
+ nvmet_tcp_fatal_error(queue);
+ return;
+ }
+
+ queue->rcv_state = NVMET_TCP_RECV_DATA;
+ nvmet_tcp_map_pdu_iovec(cmd);
+ cmd->flags |= NVMET_TCP_F_INIT_FAILED;
+}
+
+static int nvmet_tcp_handle_h2c_data_pdu(struct nvmet_tcp_queue *queue)
+{
+ struct nvme_tcp_data_pdu *data = &queue->pdu.data;
+ struct nvmet_tcp_cmd *cmd;
+
+ cmd = &queue->cmds[data->ttag];
+
+ if (le32_to_cpu(data->data_offset) != cmd->rbytes_done) {
+ pr_err("ttag %u unexpected data offset %u (expected %u)\n",
+ data->ttag, le32_to_cpu(data->data_offset),
+ cmd->rbytes_done);
+ /* FIXME: use path and transport errors */
+ nvmet_req_complete(&cmd->req,
+ NVME_SC_INVALID_FIELD | NVME_SC_DNR);
+ return -EPROTO;
+ }
+
+ cmd->pdu_len = le32_to_cpu(data->data_length);
+ cmd->pdu_recv = 0;
+ nvmet_tcp_map_pdu_iovec(cmd);
+ queue->cmd = cmd;
+ queue->rcv_state = NVMET_TCP_RECV_DATA;
+
+ return 0;
+}
+
+static int nvmet_tcp_done_recv_pdu(struct nvmet_tcp_queue *queue)
+{
+ struct nvme_tcp_hdr *hdr = &queue->pdu.cmd.hdr;
+ struct nvme_command *nvme_cmd = &queue->pdu.cmd.cmd;
+ struct nvmet_req *req;
+ int ret;
+
+ if (unlikely(queue->state == NVMET_TCP_Q_CONNECTING)) {
+ if (hdr->type != nvme_tcp_icreq) {
+ pr_err("unexpected pdu type (%d) before icreq\n",
+ hdr->type);
+ nvmet_tcp_fatal_error(queue);
+ return -EPROTO;
+ }
+ return nvmet_tcp_handle_icreq(queue);
+ }
+
+ if (hdr->type == nvme_tcp_h2c_data) {
+ ret = nvmet_tcp_handle_h2c_data_pdu(queue);
+ if (unlikely(ret))
+ return ret;
+ return 0;
+ }
+
+ queue->cmd = nvmet_tcp_get_cmd(queue);
+ if (unlikely(!queue->cmd)) {
+ /* This should never happen */
+ pr_err("queue %d: out of commands (%d) send_list_len: %d, opcode: %d",
+ queue->idx, queue->nr_cmds, queue->send_list_len,
+ nvme_cmd->common.opcode);
+ nvmet_tcp_fatal_error(queue);
+ return -ENOMEM;
+ }
+
+ req = &queue->cmd->req;
+ memcpy(req->cmd, nvme_cmd, sizeof(*nvme_cmd));
+
+ if (unlikely(!nvmet_req_init(req, &queue->nvme_cq,
+ &queue->nvme_sq, &nvmet_tcp_ops))) {
+ pr_err("failed cmd %p id %d opcode %d, data_len: %d\n",
+ req->cmd, req->cmd->common.command_id,
+ req->cmd->common.opcode,
+ le32_to_cpu(req->cmd->common.dptr.sgl.length));
+
+ nvmet_tcp_handle_req_failure(queue, queue->cmd, req);
+ return -EAGAIN;
+ }
+
+ ret = nvmet_tcp_map_data(queue->cmd);
+ if (unlikely(ret)) {
+ pr_err("queue %d: failed to map data\n", queue->idx);
+ if (nvmet_tcp_has_inline_data(queue->cmd))
+ nvmet_tcp_fatal_error(queue);
+ else
+ nvmet_req_complete(req, ret);
+ ret = -EAGAIN;
+ goto out;
+ }
+
+ if (nvmet_tcp_need_data_in(queue->cmd)) {
+ if (nvmet_tcp_has_inline_data(queue->cmd)) {
+ queue->rcv_state = NVMET_TCP_RECV_DATA;
+ nvmet_tcp_map_pdu_iovec(queue->cmd);
+ return 0;
+ }
+ /* send back R2T */
+ nvmet_tcp_queue_response(&queue->cmd->req);
+ goto out;
+ }
+
+ nvmet_req_execute(&queue->cmd->req);
+out:
+ nvmet_prepare_receive_pdu(queue);
+ return ret;
+}
+
+static const u8 nvme_tcp_pdu_sizes[] = {
+ [nvme_tcp_icreq] = sizeof(struct nvme_tcp_icreq_pdu),
+ [nvme_tcp_cmd] = sizeof(struct nvme_tcp_cmd_pdu),
+ [nvme_tcp_h2c_data] = sizeof(struct nvme_tcp_data_pdu),
+};
+
+static inline u8 nvmet_tcp_pdu_size(u8 type)
+{
+ size_t idx = type;
+
+ return (idx < ARRAY_SIZE(nvme_tcp_pdu_sizes) &&
+ nvme_tcp_pdu_sizes[idx]) ?
+ nvme_tcp_pdu_sizes[idx] : 0;
+}
+
+static inline bool nvmet_tcp_pdu_valid(u8 type)
+{
+ switch (type) {
+ case nvme_tcp_icreq:
+ case nvme_tcp_cmd:
+ case nvme_tcp_h2c_data:
+ /* fallthru */
+ return true;
+ }
+
+ return false;
+}
+
+static int nvmet_tcp_try_recv_pdu(struct nvmet_tcp_queue *queue)
+{
+ struct nvme_tcp_hdr *hdr = &queue->pdu.cmd.hdr;
+ int len;
+ struct kvec iov;
+ struct msghdr msg = { .msg_flags = MSG_DONTWAIT };
+
+recv:
+ iov.iov_base = (void *)&queue->pdu + queue->offset;
+ iov.iov_len = queue->left;
+ len = kernel_recvmsg(queue->sock, &msg, &iov, 1,
+ iov.iov_len, msg.msg_flags);
+ if (unlikely(len < 0))
+ return len;
+
+ queue->offset += len;
+ queue->left -= len;
+ if (queue->left)
+ return -EAGAIN;
+
+ if (queue->offset == sizeof(struct nvme_tcp_hdr)) {
+ u8 hdgst = nvmet_tcp_hdgst_len(queue);
+
+ if (unlikely(!nvmet_tcp_pdu_valid(hdr->type))) {
+ pr_err("unexpected pdu type %d\n", hdr->type);
+ nvmet_tcp_fatal_error(queue);
+ return -EIO;
+ }
+
+ if (unlikely(hdr->hlen != nvmet_tcp_pdu_size(hdr->type))) {
+ pr_err("pdu %d bad hlen %d\n", hdr->type, hdr->hlen);
+ return -EIO;
+ }
+
+ queue->left = hdr->hlen - queue->offset + hdgst;
+ goto recv;
+ }
+
+ if (queue->hdr_digest &&
+ nvmet_tcp_verify_hdgst(queue, &queue->pdu, queue->offset)) {
+ nvmet_tcp_fatal_error(queue); /* fatal */
+ return -EPROTO;
+ }
+
+ if (queue->data_digest &&
+ nvmet_tcp_check_ddgst(queue, &queue->pdu)) {
+ nvmet_tcp_fatal_error(queue); /* fatal */
+ return -EPROTO;
+ }
+
+ return nvmet_tcp_done_recv_pdu(queue);
+}
+
+static void nvmet_tcp_prep_recv_ddgst(struct nvmet_tcp_cmd *cmd)
+{
+ struct nvmet_tcp_queue *queue = cmd->queue;
+
+ nvmet_tcp_ddgst(queue->rcv_hash, cmd);
+ queue->offset = 0;
+ queue->left = NVME_TCP_DIGEST_LENGTH;
+ queue->rcv_state = NVMET_TCP_RECV_DDGST;
+}
+
+static int nvmet_tcp_try_recv_data(struct nvmet_tcp_queue *queue)
+{
+ struct nvmet_tcp_cmd *cmd = queue->cmd;
+ int ret;
+
+ while (msg_data_left(&cmd->recv_msg)) {
+ ret = sock_recvmsg(cmd->queue->sock, &cmd->recv_msg,
+ cmd->recv_msg.msg_flags);
+ if (ret <= 0)
+ return ret;
+
+ cmd->pdu_recv += ret;
+ cmd->rbytes_done += ret;
+ }
+
+ nvmet_tcp_unmap_pdu_iovec(cmd);
+
+ if (!(cmd->flags & NVMET_TCP_F_INIT_FAILED) &&
+ cmd->rbytes_done == cmd->req.transfer_len) {
+ if (queue->data_digest) {
+ nvmet_tcp_prep_recv_ddgst(cmd);
+ return 0;
+ }
+ nvmet_req_execute(&cmd->req);
+ }
+
+ nvmet_prepare_receive_pdu(queue);
+ return 0;
+}
+
+static int nvmet_tcp_try_recv_ddgst(struct nvmet_tcp_queue *queue)
+{
+ struct nvmet_tcp_cmd *cmd = queue->cmd;
+ int ret;
+ struct msghdr msg = { .msg_flags = MSG_DONTWAIT };
+ struct kvec iov = {
+ .iov_base = (void *)&cmd->recv_ddgst + queue->offset,
+ .iov_len = queue->left
+ };
+
+ ret = kernel_recvmsg(queue->sock, &msg, &iov, 1,
+ iov.iov_len, msg.msg_flags);
+ if (unlikely(ret < 0))
+ return ret;
+
+ queue->offset += ret;
+ queue->left -= ret;
+ if (queue->left)
+ return -EAGAIN;
+
+ if (queue->data_digest && cmd->exp_ddgst != cmd->recv_ddgst) {
+ pr_err("queue %d: cmd %d pdu (%d) data digest error: recv %#x expected %#x\n",
+ queue->idx, cmd->req.cmd->common.command_id,
+ queue->pdu.cmd.hdr.type, le32_to_cpu(cmd->recv_ddgst),
+ le32_to_cpu(cmd->exp_ddgst));
+ nvmet_tcp_finish_cmd(cmd);
+ nvmet_tcp_fatal_error(queue);
+ ret = -EPROTO;
+ goto out;
+ }
+
+ if (!(cmd->flags & NVMET_TCP_F_INIT_FAILED) &&
+ cmd->rbytes_done == cmd->req.transfer_len)
+ nvmet_req_execute(&cmd->req);
+ ret = 0;
+out:
+ nvmet_prepare_receive_pdu(queue);
+ return ret;
+}
+
+static int nvmet_tcp_try_recv_one(struct nvmet_tcp_queue *queue)
+{
+ int result = 0;
+
+ if (unlikely(queue->rcv_state == NVMET_TCP_RECV_ERR))
+ return 0;
+
+ if (queue->rcv_state == NVMET_TCP_RECV_PDU) {
+ result = nvmet_tcp_try_recv_pdu(queue);
+ if (result != 0)
+ goto done_recv;
+ }
+
+ if (queue->rcv_state == NVMET_TCP_RECV_DATA) {
+ result = nvmet_tcp_try_recv_data(queue);
+ if (result != 0)
+ goto done_recv;
+ }
+
+ if (queue->rcv_state == NVMET_TCP_RECV_DDGST) {
+ result = nvmet_tcp_try_recv_ddgst(queue);
+ if (result != 0)
+ goto done_recv;
+ }
+
+done_recv:
+ if (result < 0) {
+ if (result == -EAGAIN)
+ return 0;
+ return result;
+ }
+ return 1;
+}
+
+static int nvmet_tcp_try_recv(struct nvmet_tcp_queue *queue,
+ int budget, int *recvs)
+{
+ int i, ret = 0;
+
+ for (i = 0; i < budget; i++) {
+ ret = nvmet_tcp_try_recv_one(queue);
+ if (ret <= 0)
+ break;
+ (*recvs)++;
+ }
+
+ return ret;
+}
+
+static void nvmet_tcp_schedule_release_queue(struct nvmet_tcp_queue *queue)
+{
+ spin_lock(&queue->state_lock);
+ if (queue->state != NVMET_TCP_Q_DISCONNECTING) {
+ queue->state = NVMET_TCP_Q_DISCONNECTING;
+ schedule_work(&queue->release_work);
+ }
+ spin_unlock(&queue->state_lock);
+}
+
+static void nvmet_tcp_io_work(struct work_struct *w)
+{
+ struct nvmet_tcp_queue *queue =
+ container_of(w, struct nvmet_tcp_queue, io_work);
+ bool pending;
+ int ret, ops = 0;
+
+ do {
+ pending = false;
+
+ ret = nvmet_tcp_try_recv(queue, NVMET_TCP_RECV_BUDGET, &ops);
+ if (ret > 0) {
+ pending = true;
+ } else if (ret < 0) {
+ if (ret == -EPIPE || ret == -ECONNRESET)
+ kernel_sock_shutdown(queue->sock, SHUT_RDWR);
+ else
+ nvmet_tcp_fatal_error(queue);
+ return;
+ }
+
+ ret = nvmet_tcp_try_send(queue, NVMET_TCP_SEND_BUDGET, &ops);
+ if (ret > 0) {
+ /* transmitted message/data */
+ pending = true;
+ } else if (ret < 0) {
+ if (ret == -EPIPE || ret == -ECONNRESET)
+ kernel_sock_shutdown(queue->sock, SHUT_RDWR);
+ else
+ nvmet_tcp_fatal_error(queue);
+ return;
+ }
+
+ } while (pending && ops < NVMET_TCP_IO_WORK_BUDGET);
+
+ /*
+ * We exahusted our budget, requeue our selves
+ */
+ if (pending)
+ queue_work_on(queue->cpu, nvmet_tcp_wq, &queue->io_work);
+}
+
+static int nvmet_tcp_alloc_cmd(struct nvmet_tcp_queue *queue,
+ struct nvmet_tcp_cmd *c)
+{
+ u8 hdgst = nvmet_tcp_hdgst_len(queue);
+
+ c->queue = queue;
+ c->req.port = queue->port->nport;
+
+ c->cmd_pdu = page_frag_alloc(&queue->pf_cache,
+ sizeof(*c->cmd_pdu) + hdgst, GFP_KERNEL | __GFP_ZERO);
+ if (!c->cmd_pdu)
+ return -ENOMEM;
+ c->req.cmd = &c->cmd_pdu->cmd;
+
+ c->rsp_pdu = page_frag_alloc(&queue->pf_cache,
+ sizeof(*c->rsp_pdu) + hdgst, GFP_KERNEL | __GFP_ZERO);
+ if (!c->rsp_pdu)
+ goto out_free_cmd;
+ c->req.cqe = &c->rsp_pdu->cqe;
+
+ c->data_pdu = page_frag_alloc(&queue->pf_cache,
+ sizeof(*c->data_pdu) + hdgst, GFP_KERNEL | __GFP_ZERO);
+ if (!c->data_pdu)
+ goto out_free_rsp;
+
+ c->r2t_pdu = page_frag_alloc(&queue->pf_cache,
+ sizeof(*c->r2t_pdu) + hdgst, GFP_KERNEL | __GFP_ZERO);
+ if (!c->r2t_pdu)
+ goto out_free_data;
+
+ c->recv_msg.msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL;
+
+ list_add_tail(&c->entry, &queue->free_list);
+
+ return 0;
+out_free_data:
+ page_frag_free(c->data_pdu);
+out_free_rsp:
+ page_frag_free(c->rsp_pdu);
+out_free_cmd:
+ page_frag_free(c->cmd_pdu);
+ return -ENOMEM;
+}
+
+static void nvmet_tcp_free_cmd(struct nvmet_tcp_cmd *c)
+{
+ page_frag_free(c->r2t_pdu);
+ page_frag_free(c->data_pdu);
+ page_frag_free(c->rsp_pdu);
+ page_frag_free(c->cmd_pdu);
+}
+
+static int nvmet_tcp_alloc_cmds(struct nvmet_tcp_queue *queue)
+{
+ struct nvmet_tcp_cmd *cmds;
+ int i, ret = -EINVAL, nr_cmds = queue->nr_cmds;
+
+ cmds = kcalloc(nr_cmds, sizeof(struct nvmet_tcp_cmd), GFP_KERNEL);
+ if (!cmds)
+ goto out;
+
+ for (i = 0; i < nr_cmds; i++) {
+ ret = nvmet_tcp_alloc_cmd(queue, cmds + i);
+ if (ret)
+ goto out_free;
+ }
+
+ queue->cmds = cmds;
+
+ return 0;
+out_free:
+ while (--i >= 0)
+ nvmet_tcp_free_cmd(cmds + i);
+ kfree(cmds);
+out:
+ return ret;
+}
+
+static void nvmet_tcp_free_cmds(struct nvmet_tcp_queue *queue)
+{
+ struct nvmet_tcp_cmd *cmds = queue->cmds;
+ int i;
+
+ for (i = 0; i < queue->nr_cmds; i++)
+ nvmet_tcp_free_cmd(cmds + i);
+
+ nvmet_tcp_free_cmd(&queue->connect);
+ kfree(cmds);
+}
+
+static void nvmet_tcp_restore_socket_callbacks(struct nvmet_tcp_queue *queue)
+{
+ struct socket *sock = queue->sock;
+
+ write_lock_bh(&sock->sk->sk_callback_lock);
+ sock->sk->sk_data_ready = queue->data_ready;
+ sock->sk->sk_state_change = queue->state_change;
+ sock->sk->sk_write_space = queue->write_space;
+ sock->sk->sk_user_data = NULL;
+ write_unlock_bh(&sock->sk->sk_callback_lock);
+}
+
+static void nvmet_tcp_finish_cmd(struct nvmet_tcp_cmd *cmd)
+{
+ nvmet_req_uninit(&cmd->req);
+ nvmet_tcp_unmap_pdu_iovec(cmd);
+ kfree(cmd->iov);
+ sgl_free(cmd->req.sg);
+}
+
+static void nvmet_tcp_uninit_data_in_cmds(struct nvmet_tcp_queue *queue)
+{
+ struct nvmet_tcp_cmd *cmd = queue->cmds;
+ int i;
+
+ for (i = 0; i < queue->nr_cmds; i++, cmd++) {
+ if (nvmet_tcp_need_data_in(cmd))
+ nvmet_tcp_finish_cmd(cmd);
+ }
+
+ if (!queue->nr_cmds && nvmet_tcp_need_data_in(&queue->connect)) {
+ /* failed in connect */
+ nvmet_tcp_finish_cmd(&queue->connect);
+ }
+}
+
+static void nvmet_tcp_release_queue_work(struct work_struct *w)
+{
+ struct nvmet_tcp_queue *queue =
+ container_of(w, struct nvmet_tcp_queue, release_work);
+
+ mutex_lock(&nvmet_tcp_queue_mutex);
+ list_del_init(&queue->queue_list);
+ mutex_unlock(&nvmet_tcp_queue_mutex);
+
+ nvmet_tcp_restore_socket_callbacks(queue);
+ flush_work(&queue->io_work);
+
+ nvmet_tcp_uninit_data_in_cmds(queue);
+ nvmet_sq_destroy(&queue->nvme_sq);
+ cancel_work_sync(&queue->io_work);
+ sock_release(queue->sock);
+ nvmet_tcp_free_cmds(queue);
+ if (queue->hdr_digest || queue->data_digest)
+ nvmet_tcp_free_crypto(queue);
+ ida_simple_remove(&nvmet_tcp_queue_ida, queue->idx);
+
+ kfree(queue);
+}
+
+static void nvmet_tcp_data_ready(struct sock *sk)
+{
+ struct nvmet_tcp_queue *queue;
+
+ read_lock_bh(&sk->sk_callback_lock);
+ queue = sk->sk_user_data;
+ if (likely(queue))
+ queue_work_on(queue->cpu, nvmet_tcp_wq, &queue->io_work);
+ read_unlock_bh(&sk->sk_callback_lock);
+}
+
+static void nvmet_tcp_write_space(struct sock *sk)
+{
+ struct nvmet_tcp_queue *queue;
+
+ read_lock_bh(&sk->sk_callback_lock);
+ queue = sk->sk_user_data;
+ if (unlikely(!queue))
+ goto out;
+
+ if (unlikely(queue->state == NVMET_TCP_Q_CONNECTING)) {
+ queue->write_space(sk);
+ goto out;
+ }
+
+ if (sk_stream_is_writeable(sk)) {
+ clear_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
+ queue_work_on(queue->cpu, nvmet_tcp_wq, &queue->io_work);
+ }
+out:
+ read_unlock_bh(&sk->sk_callback_lock);
+}
+
+static void nvmet_tcp_state_change(struct sock *sk)
+{
+ struct nvmet_tcp_queue *queue;
+
+ write_lock_bh(&sk->sk_callback_lock);
+ queue = sk->sk_user_data;
+ if (!queue)
+ goto done;
+
+ switch (sk->sk_state) {
+ case TCP_FIN_WAIT1:
+ case TCP_CLOSE_WAIT:
+ case TCP_CLOSE:
+ /* FALLTHRU */
+ sk->sk_user_data = NULL;
+ nvmet_tcp_schedule_release_queue(queue);
+ break;
+ default:
+ pr_warn("queue %d unhandled state %d\n",
+ queue->idx, sk->sk_state);
+ }
+done:
+ write_unlock_bh(&sk->sk_callback_lock);
+}
+
+static int nvmet_tcp_set_queue_sock(struct nvmet_tcp_queue *queue)
+{
+ struct socket *sock = queue->sock;
+ struct inet_sock *inet = inet_sk(sock->sk);
+ struct linger sol = { .l_onoff = 1, .l_linger = 0 };
+ int ret;
+
+ ret = kernel_getsockname(sock,
+ (struct sockaddr *)&queue->sockaddr);
+ if (ret < 0)
+ return ret;
+
+ ret = kernel_getpeername(sock,
+ (struct sockaddr *)&queue->sockaddr_peer);
+ if (ret < 0)
+ return ret;
+
+ /*
+ * Cleanup whatever is sitting in the TCP transmit queue on socket
+ * close. This is done to prevent stale data from being sent should
+ * the network connection be restored before TCP times out.
+ */
+ ret = kernel_setsockopt(sock, SOL_SOCKET, SO_LINGER,
+ (char *)&sol, sizeof(sol));
+ if (ret)
+ return ret;
+
+ /* Set socket type of service */
+ if (inet->rcv_tos > 0) {
+ int tos = inet->rcv_tos;
+
+ ret = kernel_setsockopt(sock, SOL_IP, IP_TOS,
+ (char *)&tos, sizeof(tos));
+ if (ret)
+ return ret;
+ }
+
+ write_lock_bh(&sock->sk->sk_callback_lock);
+ sock->sk->sk_user_data = queue;
+ queue->data_ready = sock->sk->sk_data_ready;
+ sock->sk->sk_data_ready = nvmet_tcp_data_ready;
+ queue->state_change = sock->sk->sk_state_change;
+ sock->sk->sk_state_change = nvmet_tcp_state_change;
+ queue->write_space = sock->sk->sk_write_space;
+ sock->sk->sk_write_space = nvmet_tcp_write_space;
+ write_unlock_bh(&sock->sk->sk_callback_lock);
+
+ return 0;
+}
+
+static int nvmet_tcp_alloc_queue(struct nvmet_tcp_port *port,
+ struct socket *newsock)
+{
+ struct nvmet_tcp_queue *queue;
+ int ret;
+
+ queue = kzalloc(sizeof(*queue), GFP_KERNEL);
+ if (!queue)
+ return -ENOMEM;
+
+ INIT_WORK(&queue->release_work, nvmet_tcp_release_queue_work);
+ INIT_WORK(&queue->io_work, nvmet_tcp_io_work);
+ queue->sock = newsock;
+ queue->port = port;
+ queue->nr_cmds = 0;
+ spin_lock_init(&queue->state_lock);
+ queue->state = NVMET_TCP_Q_CONNECTING;
+ INIT_LIST_HEAD(&queue->free_list);
+ init_llist_head(&queue->resp_list);
+ INIT_LIST_HEAD(&queue->resp_send_list);
+
+ queue->idx = ida_simple_get(&nvmet_tcp_queue_ida, 0, 0, GFP_KERNEL);
+ if (queue->idx < 0) {
+ ret = queue->idx;
+ goto out_free_queue;
+ }
+
+ ret = nvmet_tcp_alloc_cmd(queue, &queue->connect);
+ if (ret)
+ goto out_ida_remove;
+
+ ret = nvmet_sq_init(&queue->nvme_sq);
+ if (ret)
+ goto out_free_connect;
+
+ port->last_cpu = cpumask_next_wrap(port->last_cpu,
+ cpu_online_mask, -1, false);
+ queue->cpu = port->last_cpu;
+ nvmet_prepare_receive_pdu(queue);
+
+ mutex_lock(&nvmet_tcp_queue_mutex);
+ list_add_tail(&queue->queue_list, &nvmet_tcp_queue_list);
+ mutex_unlock(&nvmet_tcp_queue_mutex);
+
+ ret = nvmet_tcp_set_queue_sock(queue);
+ if (ret)
+ goto out_destroy_sq;
+
+ queue_work_on(queue->cpu, nvmet_tcp_wq, &queue->io_work);
+
+ return 0;
+out_destroy_sq:
+ mutex_lock(&nvmet_tcp_queue_mutex);
+ list_del_init(&queue->queue_list);
+ mutex_unlock(&nvmet_tcp_queue_mutex);
+ nvmet_sq_destroy(&queue->nvme_sq);
+out_free_connect:
+ nvmet_tcp_free_cmd(&queue->connect);
+out_ida_remove:
+ ida_simple_remove(&nvmet_tcp_queue_ida, queue->idx);
+out_free_queue:
+ kfree(queue);
+ return ret;
+}
+
+static void nvmet_tcp_accept_work(struct work_struct *w)
+{
+ struct nvmet_tcp_port *port =
+ container_of(w, struct nvmet_tcp_port, accept_work);
+ struct socket *newsock;
+ int ret;
+
+ while (true) {
+ ret = kernel_accept(port->sock, &newsock, O_NONBLOCK);
+ if (ret < 0) {
+ if (ret != -EAGAIN)
+ pr_warn("failed to accept err=%d\n", ret);
+ return;
+ }
+ ret = nvmet_tcp_alloc_queue(port, newsock);
+ if (ret) {
+ pr_err("failed to allocate queue\n");
+ sock_release(newsock);
+ }
+ }
+}
+
+static void nvmet_tcp_listen_data_ready(struct sock *sk)
+{
+ struct nvmet_tcp_port *port;
+
+ read_lock_bh(&sk->sk_callback_lock);
+ port = sk->sk_user_data;
+ if (!port)
+ goto out;
+
+ if (sk->sk_state == TCP_LISTEN)
+ schedule_work(&port->accept_work);
+out:
+ read_unlock_bh(&sk->sk_callback_lock);
+}
+
+static int nvmet_tcp_add_port(struct nvmet_port *nport)
+{
+ struct nvmet_tcp_port *port;
+ __kernel_sa_family_t af;
+ int opt, ret;
+
+ port = kzalloc(sizeof(*port), GFP_KERNEL);
+ if (!port)
+ return -ENOMEM;
+
+ switch (nport->disc_addr.adrfam) {
+ case NVMF_ADDR_FAMILY_IP4:
+ af = AF_INET;
+ break;
+ case NVMF_ADDR_FAMILY_IP6:
+ af = AF_INET6;
+ break;
+ default:
+ pr_err("address family %d not supported\n",
+ nport->disc_addr.adrfam);
+ ret = -EINVAL;
+ goto err_port;
+ }
+
+ ret = inet_pton_with_scope(&init_net, af, nport->disc_addr.traddr,
+ nport->disc_addr.trsvcid, &port->addr);
+ if (ret) {
+ pr_err("malformed ip/port passed: %s:%s\n",
+ nport->disc_addr.traddr, nport->disc_addr.trsvcid);
+ goto err_port;
+ }
+
+ port->nport = nport;
+ port->last_cpu = -1;
+ INIT_WORK(&port->accept_work, nvmet_tcp_accept_work);
+ if (port->nport->inline_data_size < 0)
+ port->nport->inline_data_size = NVMET_TCP_DEF_INLINE_DATA_SIZE;
+
+ ret = sock_create(port->addr.ss_family, SOCK_STREAM,
+ IPPROTO_TCP, &port->sock);
+ if (ret) {
+ pr_err("failed to create a socket\n");
+ goto err_port;
+ }
+
+ port->sock->sk->sk_user_data = port;
+ port->data_ready = port->sock->sk->sk_data_ready;
+ port->sock->sk->sk_data_ready = nvmet_tcp_listen_data_ready;
+
+ opt = 1;
+ ret = kernel_setsockopt(port->sock, IPPROTO_TCP,
+ TCP_NODELAY, (char *)&opt, sizeof(opt));
+ if (ret) {
+ pr_err("failed to set TCP_NODELAY sock opt %d\n", ret);
+ goto err_sock;
+ }
+
+ ret = kernel_setsockopt(port->sock, SOL_SOCKET, SO_REUSEADDR,
+ (char *)&opt, sizeof(opt));
+ if (ret) {
+ pr_err("failed to set SO_REUSEADDR sock opt %d\n", ret);
+ goto err_sock;
+ }
+
+ ret = kernel_bind(port->sock, (struct sockaddr *)&port->addr,
+ sizeof(port->addr));
+ if (ret) {
+ pr_err("failed to bind port socket %d\n", ret);
+ goto err_sock;
+ }
+
+ ret = kernel_listen(port->sock, 128);
+ if (ret) {
+ pr_err("failed to listen %d on port sock\n", ret);
+ goto err_sock;
+ }
+
+ nport->priv = port;
+ pr_info("enabling port %d (%pISpc)\n",
+ le16_to_cpu(nport->disc_addr.portid), &port->addr);
+
+ return 0;
+
+err_sock:
+ sock_release(port->sock);
+err_port:
+ kfree(port);
+ return ret;
+}
+
+static void nvmet_tcp_remove_port(struct nvmet_port *nport)
+{
+ struct nvmet_tcp_port *port = nport->priv;
+
+ write_lock_bh(&port->sock->sk->sk_callback_lock);
+ port->sock->sk->sk_data_ready = port->data_ready;
+ port->sock->sk->sk_user_data = NULL;
+ write_unlock_bh(&port->sock->sk->sk_callback_lock);
+ cancel_work_sync(&port->accept_work);
+
+ sock_release(port->sock);
+ kfree(port);
+}
+
+static void nvmet_tcp_delete_ctrl(struct nvmet_ctrl *ctrl)
+{
+ struct nvmet_tcp_queue *queue;
+
+ mutex_lock(&nvmet_tcp_queue_mutex);
+ list_for_each_entry(queue, &nvmet_tcp_queue_list, queue_list)
+ if (queue->nvme_sq.ctrl == ctrl)
+ kernel_sock_shutdown(queue->sock, SHUT_RDWR);
+ mutex_unlock(&nvmet_tcp_queue_mutex);
+}
+
+static u16 nvmet_tcp_install_queue(struct nvmet_sq *sq)
+{
+ struct nvmet_tcp_queue *queue =
+ container_of(sq, struct nvmet_tcp_queue, nvme_sq);
+
+ if (sq->qid == 0) {
+ /* Let inflight controller teardown complete */
+ flush_scheduled_work();
+ }
+
+ queue->nr_cmds = sq->size * 2;
+ if (nvmet_tcp_alloc_cmds(queue))
+ return NVME_SC_INTERNAL;
+ return 0;
+}
+
+static void nvmet_tcp_disc_port_addr(struct nvmet_req *req,
+ struct nvmet_port *nport, char *traddr)
+{
+ struct nvmet_tcp_port *port = nport->priv;
+
+ if (inet_addr_is_any((struct sockaddr *)&port->addr)) {
+ struct nvmet_tcp_cmd *cmd =
+ container_of(req, struct nvmet_tcp_cmd, req);
+ struct nvmet_tcp_queue *queue = cmd->queue;
+
+ sprintf(traddr, "%pISc", (struct sockaddr *)&queue->sockaddr);
+ } else {
+ memcpy(traddr, nport->disc_addr.traddr, NVMF_TRADDR_SIZE);
+ }
+}
+
+static struct nvmet_fabrics_ops nvmet_tcp_ops = {
+ .owner = THIS_MODULE,
+ .type = NVMF_TRTYPE_TCP,
+ .msdbd = 1,
+ .has_keyed_sgls = 0,
+ .add_port = nvmet_tcp_add_port,
+ .remove_port = nvmet_tcp_remove_port,
+ .queue_response = nvmet_tcp_queue_response,
+ .delete_ctrl = nvmet_tcp_delete_ctrl,
+ .install_queue = nvmet_tcp_install_queue,
+ .disc_traddr = nvmet_tcp_disc_port_addr,
+};
+
+static int __init nvmet_tcp_init(void)
+{
+ int ret;
+
+ nvmet_tcp_wq = alloc_workqueue("nvmet_tcp_wq", WQ_HIGHPRI, 0);
+ if (!nvmet_tcp_wq)
+ return -ENOMEM;
+
+ ret = nvmet_register_transport(&nvmet_tcp_ops);
+ if (ret)
+ goto err;
+
+ return 0;
+err:
+ destroy_workqueue(nvmet_tcp_wq);
+ return ret;
+}
+
+static void __exit nvmet_tcp_exit(void)
+{
+ struct nvmet_tcp_queue *queue;
+
+ nvmet_unregister_transport(&nvmet_tcp_ops);
+
+ flush_scheduled_work();
+ mutex_lock(&nvmet_tcp_queue_mutex);
+ list_for_each_entry(queue, &nvmet_tcp_queue_list, queue_list)
+ kernel_sock_shutdown(queue->sock, SHUT_RDWR);
+ mutex_unlock(&nvmet_tcp_queue_mutex);
+ flush_scheduled_work();
+
+ destroy_workqueue(nvmet_tcp_wq);
+}
+
+module_init(nvmet_tcp_init);
+module_exit(nvmet_tcp_exit);
+
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS("nvmet-transport-3"); /* 3 == NVMF_TRTYPE_TCP */
diff --git a/drivers/nvme/target/trace.c b/drivers/nvme/target/trace.c
new file mode 100644
index 0000000..1373a3c
--- /dev/null
+++ b/drivers/nvme/target/trace.c
@@ -0,0 +1,219 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * NVM Express target device driver tracepoints
+ * Copyright (c) 2018 Johannes Thumshirn, SUSE Linux GmbH
+ */
+
+#include <asm/unaligned.h>
+#include "trace.h"
+
+static const char *nvmet_trace_admin_identify(struct trace_seq *p, u8 *cdw10)
+{
+ const char *ret = trace_seq_buffer_ptr(p);
+ u8 cns = cdw10[0];
+ u16 ctrlid = get_unaligned_le16(cdw10 + 2);
+
+ trace_seq_printf(p, "cns=%u, ctrlid=%u", cns, ctrlid);
+ trace_seq_putc(p, 0);
+
+ return ret;
+}
+
+static const char *nvmet_trace_admin_get_features(struct trace_seq *p,
+ u8 *cdw10)
+{
+ const char *ret = trace_seq_buffer_ptr(p);
+ u8 fid = cdw10[0];
+ u8 sel = cdw10[1] & 0x7;
+ u32 cdw11 = get_unaligned_le32(cdw10 + 4);
+
+ trace_seq_printf(p, "fid=0x%x sel=0x%x cdw11=0x%x", fid, sel, cdw11);
+ trace_seq_putc(p, 0);
+
+ return ret;
+}
+
+static const char *nvmet_trace_get_lba_status(struct trace_seq *p,
+ u8 *cdw10)
+{
+ const char *ret = trace_seq_buffer_ptr(p);
+ u64 slba = get_unaligned_le64(cdw10);
+ u32 mndw = get_unaligned_le32(cdw10 + 8);
+ u16 rl = get_unaligned_le16(cdw10 + 12);
+ u8 atype = cdw10[15];
+
+ trace_seq_printf(p, "slba=0x%llx, mndw=0x%x, rl=0x%x, atype=%u",
+ slba, mndw, rl, atype);
+ trace_seq_putc(p, 0);
+
+ return ret;
+}
+
+static const char *nvmet_trace_read_write(struct trace_seq *p, u8 *cdw10)
+{
+ const char *ret = trace_seq_buffer_ptr(p);
+ u64 slba = get_unaligned_le64(cdw10);
+ u16 length = get_unaligned_le16(cdw10 + 8);
+ u16 control = get_unaligned_le16(cdw10 + 10);
+ u32 dsmgmt = get_unaligned_le32(cdw10 + 12);
+ u32 reftag = get_unaligned_le32(cdw10 + 16);
+
+ trace_seq_printf(p,
+ "slba=%llu, len=%u, ctrl=0x%x, dsmgmt=%u, reftag=%u",
+ slba, length, control, dsmgmt, reftag);
+ trace_seq_putc(p, 0);
+
+ return ret;
+}
+
+static const char *nvmet_trace_dsm(struct trace_seq *p, u8 *cdw10)
+{
+ const char *ret = trace_seq_buffer_ptr(p);
+
+ trace_seq_printf(p, "nr=%u, attributes=%u",
+ get_unaligned_le32(cdw10),
+ get_unaligned_le32(cdw10 + 4));
+ trace_seq_putc(p, 0);
+
+ return ret;
+}
+
+static const char *nvmet_trace_common(struct trace_seq *p, u8 *cdw10)
+{
+ const char *ret = trace_seq_buffer_ptr(p);
+
+ trace_seq_printf(p, "cdw10=%*ph", 24, cdw10);
+ trace_seq_putc(p, 0);
+
+ return ret;
+}
+
+const char *nvmet_trace_parse_admin_cmd(struct trace_seq *p,
+ u8 opcode, u8 *cdw10)
+{
+ switch (opcode) {
+ case nvme_admin_identify:
+ return nvmet_trace_admin_identify(p, cdw10);
+ case nvme_admin_get_features:
+ return nvmet_trace_admin_get_features(p, cdw10);
+ case nvme_admin_get_lba_status:
+ return nvmet_trace_get_lba_status(p, cdw10);
+ default:
+ return nvmet_trace_common(p, cdw10);
+ }
+}
+
+const char *nvmet_trace_parse_nvm_cmd(struct trace_seq *p,
+ u8 opcode, u8 *cdw10)
+{
+ switch (opcode) {
+ case nvme_cmd_read:
+ case nvme_cmd_write:
+ case nvme_cmd_write_zeroes:
+ return nvmet_trace_read_write(p, cdw10);
+ case nvme_cmd_dsm:
+ return nvmet_trace_dsm(p, cdw10);
+ default:
+ return nvmet_trace_common(p, cdw10);
+ }
+}
+
+static const char *nvmet_trace_fabrics_property_set(struct trace_seq *p,
+ u8 *spc)
+{
+ const char *ret = trace_seq_buffer_ptr(p);
+ u8 attrib = spc[0];
+ u32 ofst = get_unaligned_le32(spc + 4);
+ u64 value = get_unaligned_le64(spc + 8);
+
+ trace_seq_printf(p, "attrib=%u, ofst=0x%x, value=0x%llx",
+ attrib, ofst, value);
+ trace_seq_putc(p, 0);
+ return ret;
+}
+
+static const char *nvmet_trace_fabrics_connect(struct trace_seq *p,
+ u8 *spc)
+{
+ const char *ret = trace_seq_buffer_ptr(p);
+ u16 recfmt = get_unaligned_le16(spc);
+ u16 qid = get_unaligned_le16(spc + 2);
+ u16 sqsize = get_unaligned_le16(spc + 4);
+ u8 cattr = spc[6];
+ u32 kato = get_unaligned_le32(spc + 8);
+
+ trace_seq_printf(p, "recfmt=%u, qid=%u, sqsize=%u, cattr=%u, kato=%u",
+ recfmt, qid, sqsize, cattr, kato);
+ trace_seq_putc(p, 0);
+ return ret;
+}
+
+static const char *nvmet_trace_fabrics_property_get(struct trace_seq *p,
+ u8 *spc)
+{
+ const char *ret = trace_seq_buffer_ptr(p);
+ u8 attrib = spc[0];
+ u32 ofst = get_unaligned_le32(spc + 4);
+
+ trace_seq_printf(p, "attrib=%u, ofst=0x%x", attrib, ofst);
+ trace_seq_putc(p, 0);
+ return ret;
+}
+
+static const char *nvmet_trace_fabrics_common(struct trace_seq *p, u8 *spc)
+{
+ const char *ret = trace_seq_buffer_ptr(p);
+
+ trace_seq_printf(p, "specific=%*ph", 24, spc);
+ trace_seq_putc(p, 0);
+ return ret;
+}
+
+const char *nvmet_trace_parse_fabrics_cmd(struct trace_seq *p,
+ u8 fctype, u8 *spc)
+{
+ switch (fctype) {
+ case nvme_fabrics_type_property_set:
+ return nvmet_trace_fabrics_property_set(p, spc);
+ case nvme_fabrics_type_connect:
+ return nvmet_trace_fabrics_connect(p, spc);
+ case nvme_fabrics_type_property_get:
+ return nvmet_trace_fabrics_property_get(p, spc);
+ default:
+ return nvmet_trace_fabrics_common(p, spc);
+ }
+}
+
+const char *nvmet_trace_disk_name(struct trace_seq *p, char *name)
+{
+ const char *ret = trace_seq_buffer_ptr(p);
+
+ if (*name)
+ trace_seq_printf(p, "disk=%s, ", name);
+ trace_seq_putc(p, 0);
+
+ return ret;
+}
+
+const char *nvmet_trace_ctrl_name(struct trace_seq *p, struct nvmet_ctrl *ctrl)
+{
+ const char *ret = trace_seq_buffer_ptr(p);
+
+ /*
+ * XXX: We don't know the controller instance before executing the
+ * connect command itself because the connect command for the admin
+ * queue will not provide the cntlid which will be allocated in this
+ * command. In case of io queues, the controller instance will be
+ * mapped by the extra data of the connect command.
+ * If we can know the extra data of the connect command in this stage,
+ * we can update this print statement later.
+ */
+ if (ctrl)
+ trace_seq_printf(p, "%d", ctrl->cntlid);
+ else
+ trace_seq_printf(p, "_");
+ trace_seq_putc(p, 0);
+
+ return ret;
+}
+
diff --git a/drivers/nvme/target/trace.h b/drivers/nvme/target/trace.h
new file mode 100644
index 0000000..e645caa
--- /dev/null
+++ b/drivers/nvme/target/trace.h
@@ -0,0 +1,141 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * NVM Express target device driver tracepoints
+ * Copyright (c) 2018 Johannes Thumshirn, SUSE Linux GmbH
+ *
+ * This is entirely based on drivers/nvme/host/trace.h
+ */
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM nvmet
+
+#if !defined(_TRACE_NVMET_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_NVMET_H
+
+#include <linux/nvme.h>
+#include <linux/tracepoint.h>
+#include <linux/trace_seq.h>
+
+#include "nvmet.h"
+
+const char *nvmet_trace_parse_admin_cmd(struct trace_seq *p, u8 opcode,
+ u8 *cdw10);
+const char *nvmet_trace_parse_nvm_cmd(struct trace_seq *p, u8 opcode,
+ u8 *cdw10);
+const char *nvmet_trace_parse_fabrics_cmd(struct trace_seq *p, u8 fctype,
+ u8 *spc);
+
+#define parse_nvme_cmd(qid, opcode, fctype, cdw10) \
+ ((opcode) == nvme_fabrics_command ? \
+ nvmet_trace_parse_fabrics_cmd(p, fctype, cdw10) : \
+ (qid ? \
+ nvmet_trace_parse_nvm_cmd(p, opcode, cdw10) : \
+ nvmet_trace_parse_admin_cmd(p, opcode, cdw10)))
+
+const char *nvmet_trace_ctrl_name(struct trace_seq *p, struct nvmet_ctrl *ctrl);
+#define __print_ctrl_name(ctrl) \
+ nvmet_trace_ctrl_name(p, ctrl)
+
+const char *nvmet_trace_disk_name(struct trace_seq *p, char *name);
+#define __print_disk_name(name) \
+ nvmet_trace_disk_name(p, name)
+
+#ifndef TRACE_HEADER_MULTI_READ
+static inline struct nvmet_ctrl *nvmet_req_to_ctrl(struct nvmet_req *req)
+{
+ return req->sq->ctrl;
+}
+
+static inline void __assign_disk_name(char *name, struct nvmet_req *req,
+ bool init)
+{
+ struct nvmet_ctrl *ctrl = nvmet_req_to_ctrl(req);
+ struct nvmet_ns *ns;
+
+ if ((init && req->sq->qid) || (!init && req->cq->qid)) {
+ ns = nvmet_find_namespace(ctrl, req->cmd->rw.nsid);
+ strncpy(name, ns->device_path, DISK_NAME_LEN);
+ return;
+ }
+
+ memset(name, 0, DISK_NAME_LEN);
+}
+#endif
+
+TRACE_EVENT(nvmet_req_init,
+ TP_PROTO(struct nvmet_req *req, struct nvme_command *cmd),
+ TP_ARGS(req, cmd),
+ TP_STRUCT__entry(
+ __field(struct nvme_command *, cmd)
+ __field(struct nvmet_ctrl *, ctrl)
+ __array(char, disk, DISK_NAME_LEN)
+ __field(int, qid)
+ __field(u16, cid)
+ __field(u8, opcode)
+ __field(u8, fctype)
+ __field(u8, flags)
+ __field(u32, nsid)
+ __field(u64, metadata)
+ __array(u8, cdw10, 24)
+ ),
+ TP_fast_assign(
+ __entry->cmd = cmd;
+ __entry->ctrl = nvmet_req_to_ctrl(req);
+ __assign_disk_name(__entry->disk, req, true);
+ __entry->qid = req->sq->qid;
+ __entry->cid = cmd->common.command_id;
+ __entry->opcode = cmd->common.opcode;
+ __entry->fctype = cmd->fabrics.fctype;
+ __entry->flags = cmd->common.flags;
+ __entry->nsid = le32_to_cpu(cmd->common.nsid);
+ __entry->metadata = le64_to_cpu(cmd->common.metadata);
+ memcpy(__entry->cdw10, &cmd->common.cdw10,
+ sizeof(__entry->cdw10));
+ ),
+ TP_printk("nvmet%s: %sqid=%d, cmdid=%u, nsid=%u, flags=%#x, "
+ "meta=%#llx, cmd=(%s, %s)",
+ __print_ctrl_name(__entry->ctrl),
+ __print_disk_name(__entry->disk),
+ __entry->qid, __entry->cid, __entry->nsid,
+ __entry->flags, __entry->metadata,
+ show_opcode_name(__entry->qid, __entry->opcode,
+ __entry->fctype),
+ parse_nvme_cmd(__entry->qid, __entry->opcode,
+ __entry->fctype, __entry->cdw10))
+);
+
+TRACE_EVENT(nvmet_req_complete,
+ TP_PROTO(struct nvmet_req *req),
+ TP_ARGS(req),
+ TP_STRUCT__entry(
+ __field(struct nvmet_ctrl *, ctrl)
+ __array(char, disk, DISK_NAME_LEN)
+ __field(int, qid)
+ __field(int, cid)
+ __field(u64, result)
+ __field(u16, status)
+ ),
+ TP_fast_assign(
+ __entry->ctrl = nvmet_req_to_ctrl(req);
+ __entry->qid = req->cq->qid;
+ __entry->cid = req->cqe->command_id;
+ __entry->result = le64_to_cpu(req->cqe->result.u64);
+ __entry->status = le16_to_cpu(req->cqe->status) >> 1;
+ __assign_disk_name(__entry->disk, req, false);
+ ),
+ TP_printk("nvmet%s: %sqid=%d, cmdid=%u, res=%#llx, status=%#x",
+ __print_ctrl_name(__entry->ctrl),
+ __print_disk_name(__entry->disk),
+ __entry->qid, __entry->cid, __entry->result, __entry->status)
+
+);
+
+#endif /* _TRACE_NVMET_H */
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE trace
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>