Update Linux to v5.4.148
Sourced from [1]
[1] https://cdn.kernel.org/pub/linux/kernel/v5.x/linux-5.4.148.tar.gz
Change-Id: Ib3d26c5ba9b022e2e03533005c4fed4d7c30b61b
Signed-off-by: Olivier Deprez <olivier.deprez@arm.com>
diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index 7544be8..38bbbbb 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -164,16 +164,14 @@
static inline bool nvme_tcp_has_inline_data(struct nvme_tcp_request *req)
{
struct request *rq;
- unsigned int bytes;
if (unlikely(nvme_tcp_async_req(req)))
return false; /* async events don't have a request */
rq = blk_mq_rq_from_pdu(req);
- bytes = blk_rq_payload_bytes(rq);
- return rq_data_dir(rq) == WRITE && bytes &&
- bytes <= nvme_tcp_inline_data_size(req->queue);
+ return rq_data_dir(rq) == WRITE && req->data_len &&
+ req->data_len <= nvme_tcp_inline_data_size(req->queue);
}
static inline struct page *nvme_tcp_req_cur_page(struct nvme_tcp_request *req)
@@ -188,7 +186,7 @@
static inline size_t nvme_tcp_req_cur_length(struct nvme_tcp_request *req)
{
- return min_t(size_t, req->iter.bvec->bv_len - req->iter.iov_offset,
+ return min_t(size_t, iov_iter_single_seg_count(&req->iter),
req->pdu_len - req->pdu_sent);
}
@@ -422,7 +420,8 @@
if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_RESETTING))
return;
- queue_work(nvme_wq, &to_tcp_ctrl(ctrl)->err_work);
+ dev_warn(ctrl->device, "starting error recovery\n");
+ queue_work(nvme_reset_wq, &to_tcp_ctrl(ctrl)->err_work);
}
static int nvme_tcp_process_nvme_cqe(struct nvme_tcp_queue *queue,
@@ -513,6 +512,13 @@
req->pdu_len = le32_to_cpu(pdu->r2t_length);
req->pdu_sent = 0;
+ if (unlikely(!req->pdu_len)) {
+ dev_err(queue->ctrl->ctrl.device,
+ "req %d r2t len is %u, probably a bug...\n",
+ rq->tag, req->pdu_len);
+ return -EPROTO;
+ }
+
if (unlikely(req->data_sent + req->pdu_len > req->data_len)) {
dev_err(queue->ctrl->ctrl.device,
"req %d r2t len %u exceeded data len %u (%zu sent)\n",
@@ -636,17 +642,9 @@
unsigned int *offset, size_t *len)
{
struct nvme_tcp_data_pdu *pdu = (void *)queue->pdu;
- struct nvme_tcp_request *req;
- struct request *rq;
-
- rq = blk_mq_tag_to_rq(nvme_tcp_tagset(queue), pdu->command_id);
- if (!rq) {
- dev_err(queue->ctrl->ctrl.device,
- "queue %d tag %#x not found\n",
- nvme_tcp_queue_id(queue), pdu->command_id);
- return -ENOENT;
- }
- req = blk_mq_rq_to_pdu(rq);
+ struct request *rq =
+ blk_mq_tag_to_rq(nvme_tcp_tagset(queue), pdu->command_id);
+ struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq);
while (true) {
int recv_len, ret;
@@ -786,11 +784,11 @@
{
struct nvme_tcp_queue *queue;
- read_lock(&sk->sk_callback_lock);
+ read_lock_bh(&sk->sk_callback_lock);
queue = sk->sk_user_data;
if (likely(queue && queue->rd_enabled))
queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work);
- read_unlock(&sk->sk_callback_lock);
+ read_unlock_bh(&sk->sk_callback_lock);
}
static void nvme_tcp_write_space(struct sock *sk)
@@ -810,7 +808,7 @@
{
struct nvme_tcp_queue *queue;
- read_lock(&sk->sk_callback_lock);
+ read_lock_bh(&sk->sk_callback_lock);
queue = sk->sk_user_data;
if (!queue)
goto done;
@@ -832,7 +830,7 @@
queue->state_change(sk);
done:
- read_unlock(&sk->sk_callback_lock);
+ read_unlock_bh(&sk->sk_callback_lock);
}
static inline void nvme_tcp_done_send_req(struct nvme_tcp_queue *queue)
@@ -861,12 +859,11 @@
else
flags |= MSG_MORE;
- /* can't zcopy slab pages */
- if (unlikely(PageSlab(page))) {
- ret = sock_no_sendpage(queue->sock, page, offset, len,
+ if (sendpage_ok(page)) {
+ ret = kernel_sendpage(queue->sock, page, offset, len,
flags);
} else {
- ret = kernel_sendpage(queue->sock, page, offset, len,
+ ret = sock_no_sendpage(queue->sock, page, offset, len,
flags);
}
if (ret <= 0)
@@ -1054,7 +1051,12 @@
} else if (unlikely(result < 0)) {
dev_err(queue->ctrl->ctrl.device,
"failed to send request %d\n", result);
- if (result != -EPIPE)
+
+ /*
+ * Fail the request unless peer closed the connection,
+ * in which case error recovery flow will complete all.
+ */
+ if ((result != -EPIPE) && (result != -ECONNRESET))
nvme_tcp_fail_request(queue->request);
nvme_tcp_done_send_req(queue);
return;
@@ -1316,6 +1318,9 @@
}
}
+ /* Set 10 seconds timeout for icresp recvmsg */
+ queue->sock->sk->sk_rcvtimeo = 10 * HZ;
+
queue->sock->sk->sk_allocation = GFP_ATOMIC;
if (!qid)
n = 0;
@@ -1432,7 +1437,6 @@
if (!test_and_clear_bit(NVME_TCP_Q_LIVE, &queue->flags))
return;
-
__nvme_tcp_stop_queue(queue);
}
@@ -1500,6 +1504,7 @@
static void nvme_tcp_free_admin_queue(struct nvme_ctrl *ctrl)
{
if (to_tcp_ctrl(ctrl)->async_req.pdu) {
+ cancel_work_sync(&ctrl->async_event_work);
nvme_tcp_free_async_req(to_tcp_ctrl(ctrl));
to_tcp_ctrl(ctrl)->async_req.pdu = NULL;
}
@@ -1636,10 +1641,13 @@
if (ret)
return ret;
- ctrl->queue_count = nr_io_queues + 1;
- if (ctrl->queue_count < 2)
- return 0;
+ if (nr_io_queues == 0) {
+ dev_err(ctrl->device,
+ "unable to set any I/O queues\n");
+ return -ENOMEM;
+ }
+ ctrl->queue_count = nr_io_queues + 1;
dev_info(ctrl->device,
"creating %d I/O queues.\n", nr_io_queues);
@@ -1678,18 +1686,36 @@
ret = PTR_ERR(ctrl->connect_q);
goto out_free_tag_set;
}
- } else {
- blk_mq_update_nr_hw_queues(ctrl->tagset,
- ctrl->queue_count - 1);
}
ret = nvme_tcp_start_io_queues(ctrl);
if (ret)
goto out_cleanup_connect_q;
+ if (!new) {
+ nvme_start_queues(ctrl);
+ if (!nvme_wait_freeze_timeout(ctrl, NVME_IO_TIMEOUT)) {
+ /*
+ * If we timed out waiting for freeze we are likely to
+ * be stuck. Fail the controller initialization just
+ * to be safe.
+ */
+ ret = -ENODEV;
+ goto out_wait_freeze_timed_out;
+ }
+ blk_mq_update_nr_hw_queues(ctrl->tagset,
+ ctrl->queue_count - 1);
+ nvme_unfreeze(ctrl);
+ }
+
return 0;
+out_wait_freeze_timed_out:
+ nvme_stop_queues(ctrl);
+ nvme_sync_io_queues(ctrl);
+ nvme_tcp_stop_io_queues(ctrl);
out_cleanup_connect_q:
+ nvme_cancel_tagset(ctrl);
if (new)
blk_cleanup_queue(ctrl->connect_q);
out_free_tag_set:
@@ -1751,12 +1777,16 @@
error = nvme_init_identify(ctrl);
if (error)
- goto out_stop_queue;
+ goto out_quiesce_queue;
return 0;
+out_quiesce_queue:
+ blk_mq_quiesce_queue(ctrl->admin_q);
+ blk_sync_queue(ctrl->admin_q);
out_stop_queue:
nvme_tcp_stop_queue(ctrl, 0);
+ nvme_cancel_admin_tagset(ctrl);
out_cleanup_queue:
if (new)
blk_cleanup_queue(ctrl->admin_q);
@@ -1775,6 +1805,7 @@
bool remove)
{
blk_mq_quiesce_queue(ctrl->admin_q);
+ blk_sync_queue(ctrl->admin_q);
nvme_tcp_stop_queue(ctrl, 0);
if (ctrl->admin_tagset) {
blk_mq_tagset_busy_iter(ctrl->admin_tagset,
@@ -1791,7 +1822,10 @@
{
if (ctrl->queue_count <= 1)
return;
+ blk_mq_quiesce_queue(ctrl->admin_q);
+ nvme_start_freeze(ctrl);
nvme_stop_queues(ctrl);
+ nvme_sync_io_queues(ctrl);
nvme_tcp_stop_io_queues(ctrl);
if (ctrl->tagset) {
blk_mq_tagset_busy_iter(ctrl->tagset,
@@ -1866,10 +1900,18 @@
return 0;
destroy_io:
- if (ctrl->queue_count > 1)
+ if (ctrl->queue_count > 1) {
+ nvme_stop_queues(ctrl);
+ nvme_sync_io_queues(ctrl);
+ nvme_tcp_stop_io_queues(ctrl);
+ nvme_cancel_tagset(ctrl);
nvme_tcp_destroy_io_queues(ctrl, new);
+ }
destroy_admin:
+ blk_mq_quiesce_queue(ctrl->admin_q);
+ blk_sync_queue(ctrl->admin_q);
nvme_tcp_stop_queue(ctrl, 0);
+ nvme_cancel_admin_tagset(ctrl);
nvme_tcp_destroy_admin_queue(ctrl, new);
return ret;
}
@@ -2039,40 +2081,52 @@
nvme_tcp_queue_request(&ctrl->async_req);
}
+static void nvme_tcp_complete_timed_out(struct request *rq)
+{
+ struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq);
+ struct nvme_ctrl *ctrl = &req->queue->ctrl->ctrl;
+
+ nvme_tcp_stop_queue(ctrl, nvme_tcp_queue_id(req->queue));
+ if (blk_mq_request_started(rq) && !blk_mq_request_completed(rq)) {
+ nvme_req(rq)->status = NVME_SC_HOST_ABORTED_CMD;
+ blk_mq_complete_request(rq);
+ }
+}
+
static enum blk_eh_timer_return
nvme_tcp_timeout(struct request *rq, bool reserved)
{
struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq);
- struct nvme_tcp_ctrl *ctrl = req->queue->ctrl;
+ struct nvme_ctrl *ctrl = &req->queue->ctrl->ctrl;
struct nvme_tcp_cmd_pdu *pdu = req->pdu;
- /*
- * Restart the timer if a controller reset is already scheduled. Any
- * timed out commands would be handled before entering the connecting
- * state.
- */
- if (ctrl->ctrl.state == NVME_CTRL_RESETTING)
- return BLK_EH_RESET_TIMER;
-
- dev_warn(ctrl->ctrl.device,
+ dev_warn(ctrl->device,
"queue %d: timeout request %#x type %d\n",
nvme_tcp_queue_id(req->queue), rq->tag, pdu->hdr.type);
- if (ctrl->ctrl.state != NVME_CTRL_LIVE) {
+ if (ctrl->state != NVME_CTRL_LIVE) {
/*
- * Teardown immediately if controller times out while starting
- * or we are already started error recovery. all outstanding
- * requests are completed on shutdown, so we return BLK_EH_DONE.
+ * If we are resetting, connecting or deleting we should
+ * complete immediately because we may block controller
+ * teardown or setup sequence
+ * - ctrl disable/shutdown fabrics requests
+ * - connect requests
+ * - initialization admin requests
+ * - I/O requests that entered after unquiescing and
+ * the controller stopped responding
+ *
+ * All other requests should be cancelled by the error
+ * recovery work, so it's fine that we fail it here.
*/
- flush_work(&ctrl->err_work);
- nvme_tcp_teardown_io_queues(&ctrl->ctrl, false);
- nvme_tcp_teardown_admin_queue(&ctrl->ctrl, false);
+ nvme_tcp_complete_timed_out(rq);
return BLK_EH_DONE;
}
- dev_warn(ctrl->ctrl.device, "starting error recovery\n");
- nvme_tcp_error_recovery(&ctrl->ctrl);
-
+ /*
+ * LIVE state should trigger the normal error recovery which will
+ * handle completing this request.
+ */
+ nvme_tcp_error_recovery(ctrl);
return BLK_EH_RESET_TIMER;
}
@@ -2085,7 +2139,9 @@
c->common.flags |= NVME_CMD_SGL_METABUF;
- if (rq_data_dir(rq) == WRITE && req->data_len &&
+ if (!blk_rq_nr_phys_segments(rq))
+ nvme_tcp_set_sg_null(c);
+ else if (rq_data_dir(rq) == WRITE &&
req->data_len <= nvme_tcp_inline_data_size(queue))
nvme_tcp_set_sg_inline(queue, c, req->data_len);
else
@@ -2112,7 +2168,8 @@
req->data_sent = 0;
req->pdu_len = 0;
req->pdu_sent = 0;
- req->data_len = blk_rq_payload_bytes(rq);
+ req->data_len = blk_rq_nr_phys_segments(rq) ?
+ blk_rq_payload_bytes(rq) : 0;
req->curr_bio = rq->bio;
if (rq_data_dir(rq) == WRITE &&
@@ -2354,8 +2411,6 @@
dev_info(ctrl->ctrl.device, "new ctrl: NQN \"%s\", addr %pISp\n",
ctrl->ctrl.opts->subsysnqn, &ctrl->addr);
- nvme_get_ctrl(&ctrl->ctrl);
-
mutex_lock(&nvme_tcp_ctrl_mutex);
list_add_tail(&ctrl->list, &nvme_tcp_ctrl_list);
mutex_unlock(&nvme_tcp_ctrl_mutex);
@@ -2365,6 +2420,7 @@
out_uninit_ctrl:
nvme_uninit_ctrl(&ctrl->ctrl);
nvme_put_ctrl(&ctrl->ctrl);
+ nvme_put_ctrl(&ctrl->ctrl);
if (ret > 0)
ret = -EIO;
return ERR_PTR(ret);