Blame - drivers/scsi/scsi_lib.c - hafnium/third_party/linux

blob: b5f638286037a74a0f6c4d327838f33127a78fe7 [file] [log] [blame]

Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame^]	1	/*
				2	* Copyright (C) 1999 Eric Youngdale
				3	* Copyright (C) 2014 Christoph Hellwig
				4	*
				5	* SCSI queueing library.
				6	* Initial versions: Eric Youngdale (eric@andante.org).
				7	* Based upon conversations with large numbers
				8	* of people at Linux Expo.
				9	*/
				10
				11	#include <linux/bio.h>
				12	#include <linux/bitops.h>
				13	#include <linux/blkdev.h>
				14	#include <linux/completion.h>
				15	#include <linux/kernel.h>
				16	#include <linux/export.h>
				17	#include <linux/init.h>
				18	#include <linux/pci.h>
				19	#include <linux/delay.h>
				20	#include <linux/hardirq.h>
				21	#include <linux/scatterlist.h>
				22	#include <linux/blk-mq.h>
				23	#include <linux/ratelimit.h>
				24	#include <asm/unaligned.h>
				25
				26	#include <scsi/scsi.h>
				27	#include <scsi/scsi_cmnd.h>
				28	#include <scsi/scsi_dbg.h>
				29	#include <scsi/scsi_device.h>
				30	#include <scsi/scsi_driver.h>
				31	#include <scsi/scsi_eh.h>
				32	#include <scsi/scsi_host.h>
				33	#include <scsi/scsi_transport.h> /* __scsi_init_queue() */
				34	#include <scsi/scsi_dh.h>
				35
				36	#include <trace/events/scsi.h>
				37
				38	#include "scsi_debugfs.h"
				39	#include "scsi_priv.h"
				40	#include "scsi_logging.h"
				41
				42	static struct kmem_cache *scsi_sdb_cache;
				43	static struct kmem_cache *scsi_sense_cache;
				44	static struct kmem_cache *scsi_sense_isadma_cache;
				45	static DEFINE_MUTEX(scsi_sense_cache_mutex);
				46
				47	static void scsi_mq_uninit_cmd(struct scsi_cmnd *cmd);
				48
				49	static inline struct kmem_cache *
				50	scsi_select_sense_cache(bool unchecked_isa_dma)
				51	{
				52	return unchecked_isa_dma ? scsi_sense_isadma_cache : scsi_sense_cache;
				53	}
				54
				55	static void scsi_free_sense_buffer(bool unchecked_isa_dma,
				56	unsigned char *sense_buffer)
				57	{
				58	kmem_cache_free(scsi_select_sense_cache(unchecked_isa_dma),
				59	sense_buffer);
				60	}
				61
				62	static unsigned char *scsi_alloc_sense_buffer(bool unchecked_isa_dma,
				63	gfp_t gfp_mask, int numa_node)
				64	{
				65	return kmem_cache_alloc_node(scsi_select_sense_cache(unchecked_isa_dma),
				66	gfp_mask, numa_node);
				67	}
				68
				69	int scsi_init_sense_cache(struct Scsi_Host *shost)
				70	{
				71	struct kmem_cache *cache;
				72	int ret = 0;
				73
				74	cache = scsi_select_sense_cache(shost->unchecked_isa_dma);
				75	if (cache)
				76	return 0;
				77
				78	mutex_lock(&scsi_sense_cache_mutex);
				79	if (shost->unchecked_isa_dma) {
				80	scsi_sense_isadma_cache =
				81	kmem_cache_create("scsi_sense_cache(DMA)",
				82	SCSI_SENSE_BUFFERSIZE, 0,
				83	SLAB_HWCACHE_ALIGN \| SLAB_CACHE_DMA, NULL);
				84	if (!scsi_sense_isadma_cache)
				85	ret = -ENOMEM;
				86	} else {
				87	scsi_sense_cache =
				88	kmem_cache_create_usercopy("scsi_sense_cache",
				89	SCSI_SENSE_BUFFERSIZE, 0, SLAB_HWCACHE_ALIGN,
				90	0, SCSI_SENSE_BUFFERSIZE, NULL);
				91	if (!scsi_sense_cache)
				92	ret = -ENOMEM;
				93	}
				94
				95	mutex_unlock(&scsi_sense_cache_mutex);
				96	return ret;
				97	}
				98
				99	/*
				100	* When to reinvoke queueing after a resource shortage. It's 3 msecs to
				101	* not change behaviour from the previous unplug mechanism, experimentation
				102	* may prove this needs changing.
				103	*/
				104	#define SCSI_QUEUE_DELAY 3
				105
				106	static void
				107	scsi_set_blocked(struct scsi_cmnd *cmd, int reason)
				108	{
				109	struct Scsi_Host *host = cmd->device->host;
				110	struct scsi_device *device = cmd->device;
				111	struct scsi_target *starget = scsi_target(device);
				112
				113	/*
				114	* Set the appropriate busy bit for the device/host.
				115	*
				116	* If the host/device isn't busy, assume that something actually
				117	* completed, and that we should be able to queue a command now.
				118	*
				119	* Note that the prior mid-layer assumption that any host could
				120	* always queue at least one command is now broken. The mid-layer
				121	* will implement a user specifiable stall (see
				122	* scsi_host.max_host_blocked and scsi_device.max_device_blocked)
				123	* if a command is requeued with no other commands outstanding
				124	* either for the device or for the host.
				125	*/
				126	switch (reason) {
				127	case SCSI_MLQUEUE_HOST_BUSY:
				128	atomic_set(&host->host_blocked, host->max_host_blocked);
				129	break;
				130	case SCSI_MLQUEUE_DEVICE_BUSY:
				131	case SCSI_MLQUEUE_EH_RETRY:
				132	atomic_set(&device->device_blocked,
				133	device->max_device_blocked);
				134	break;
				135	case SCSI_MLQUEUE_TARGET_BUSY:
				136	atomic_set(&starget->target_blocked,
				137	starget->max_target_blocked);
				138	break;
				139	}
				140	}
				141
				142	static void scsi_mq_requeue_cmd(struct scsi_cmnd *cmd)
				143	{
				144	struct scsi_device *sdev = cmd->device;
				145
				146	if (cmd->request->rq_flags & RQF_DONTPREP) {
				147	cmd->request->rq_flags &= ~RQF_DONTPREP;
				148	scsi_mq_uninit_cmd(cmd);
				149	} else {
				150	WARN_ON_ONCE(true);
				151	}
				152	blk_mq_requeue_request(cmd->request, true);
				153	put_device(&sdev->sdev_gendev);
				154	}
				155
				156	/**
				157	* __scsi_queue_insert - private queue insertion
				158	* @cmd: The SCSI command being requeued
				159	* @reason: The reason for the requeue
				160	* @unbusy: Whether the queue should be unbusied
				161	*
				162	* This is a private queue insertion. The public interface
				163	* scsi_queue_insert() always assumes the queue should be unbusied
				164	* because it's always called before the completion. This function is
				165	* for a requeue after completion, which should only occur in this
				166	* file.
				167	*/
				168	static void __scsi_queue_insert(struct scsi_cmnd *cmd, int reason, bool unbusy)
				169	{
				170	struct scsi_device *device = cmd->device;
				171	struct request_queue *q = device->request_queue;
				172	unsigned long flags;
				173
				174	SCSI_LOG_MLQUEUE(1, scmd_printk(KERN_INFO, cmd,
				175	"Inserting command %p into mlqueue\n", cmd));
				176
				177	scsi_set_blocked(cmd, reason);
				178
				179	/*
				180	* Decrement the counters, since these commands are no longer
				181	* active on the host/device.
				182	*/
				183	if (unbusy)
				184	scsi_device_unbusy(device);
				185
				186	/*
				187	* Requeue this command. It will go before all other commands
				188	* that are already in the queue. Schedule requeue work under
				189	* lock such that the kblockd_schedule_work() call happens
				190	* before blk_cleanup_queue() finishes.
				191	*/
				192	cmd->result = 0;
				193	if (q->mq_ops) {
				194	/*
				195	* Before a SCSI command is dispatched,
				196	* get_device(&sdev->sdev_gendev) is called and the host,
				197	* target and device busy counters are increased. Since
				198	* requeuing a request causes these actions to be repeated and
				199	* since scsi_device_unbusy() has already been called,
				200	* put_device(&device->sdev_gendev) must still be called. Call
				201	* put_device() after blk_mq_requeue_request() to avoid that
				202	* removal of the SCSI device can start before requeueing has
				203	* happened.
				204	*/
				205	blk_mq_requeue_request(cmd->request, true);
				206	put_device(&device->sdev_gendev);
				207	return;
				208	}
				209	spin_lock_irqsave(q->queue_lock, flags);
				210	blk_requeue_request(q, cmd->request);
				211	kblockd_schedule_work(&device->requeue_work);
				212	spin_unlock_irqrestore(q->queue_lock, flags);
				213	}
				214
				215	/*
				216	* Function: scsi_queue_insert()
				217	*
				218	* Purpose: Insert a command in the midlevel queue.
				219	*
				220	* Arguments: cmd - command that we are adding to queue.
				221	* reason - why we are inserting command to queue.
				222	*
				223	* Lock status: Assumed that lock is not held upon entry.
				224	*
				225	* Returns: Nothing.
				226	*
				227	* Notes: We do this for one of two cases. Either the host is busy
				228	* and it cannot accept any more commands for the time being,
				229	* or the device returned QUEUE_FULL and can accept no more
				230	* commands.
				231	* Notes: This could be called either from an interrupt context or a
				232	* normal process context.
				233	*/
				234	void scsi_queue_insert(struct scsi_cmnd *cmd, int reason)
				235	{
				236	__scsi_queue_insert(cmd, reason, true);
				237	}
				238
				239
				240	/**
				241	* __scsi_execute - insert request and wait for the result
				242	* @sdev: scsi device
				243	* @cmd: scsi command
				244	* @data_direction: data direction
				245	* @buffer: data buffer
				246	* @bufflen: len of buffer
				247	* @sense: optional sense buffer
				248	* @sshdr: optional decoded sense header
				249	* @timeout: request timeout in seconds
				250	* @retries: number of times to retry request
				251	* @flags: flags for ->cmd_flags
				252	* @rq_flags: flags for ->rq_flags
				253	* @resid: optional residual length
				254	*
				255	* Returns the scsi_cmnd result field if a command was executed, or a negative
				256	* Linux error code if we didn't get that far.
				257	*/
				258	int __scsi_execute(struct scsi_device sdev, const unsigned char cmd,
				259	int data_direction, void *buffer, unsigned bufflen,
				260	unsigned char sense, struct scsi_sense_hdr sshdr,
				261	int timeout, int retries, u64 flags, req_flags_t rq_flags,
				262	int *resid)
				263	{
				264	struct request *req;
				265	struct scsi_request *rq;
				266	int ret = DRIVER_ERROR << 24;
				267
				268	req = blk_get_request(sdev->request_queue,
				269	data_direction == DMA_TO_DEVICE ?
				270	REQ_OP_SCSI_OUT : REQ_OP_SCSI_IN, BLK_MQ_REQ_PREEMPT);
				271	if (IS_ERR(req))
				272	return ret;
				273	rq = scsi_req(req);
				274
				275	if (bufflen && blk_rq_map_kern(sdev->request_queue, req,
				276	buffer, bufflen, GFP_NOIO))
				277	goto out;
				278
				279	rq->cmd_len = COMMAND_SIZE(cmd[0]);
				280	memcpy(rq->cmd, cmd, rq->cmd_len);
				281	rq->retries = retries;
				282	req->timeout = timeout;
				283	req->cmd_flags \|= flags;
				284	req->rq_flags \|= rq_flags \| RQF_QUIET;
				285
				286	/*
				287	* head injection required here otherwise quiesce won't work
				288	*/
				289	blk_execute_rq(req->q, NULL, req, 1);
				290
				291	/*
				292	* Some devices (USB mass-storage in particular) may transfer
				293	* garbage data together with a residue indicating that the data
				294	* is invalid. Prevent the garbage from being misinterpreted
				295	* and prevent security leaks by zeroing out the excess data.
				296	*/
				297	if (unlikely(rq->resid_len > 0 && rq->resid_len <= bufflen))
				298	memset(buffer + (bufflen - rq->resid_len), 0, rq->resid_len);
				299
				300	if (resid)
				301	*resid = rq->resid_len;
				302	if (sense && rq->sense_len)
				303	memcpy(sense, rq->sense, SCSI_SENSE_BUFFERSIZE);
				304	if (sshdr)
				305	scsi_normalize_sense(rq->sense, rq->sense_len, sshdr);
				306	ret = rq->result;
				307	out:
				308	blk_put_request(req);
				309
				310	return ret;
				311	}
				312	EXPORT_SYMBOL(__scsi_execute);
				313
				314	/*
				315	* Function: scsi_init_cmd_errh()
				316	*
				317	* Purpose: Initialize cmd fields related to error handling.
				318	*
				319	* Arguments: cmd - command that is ready to be queued.
				320	*
				321	* Notes: This function has the job of initializing a number of
				322	* fields related to error handling. Typically this will
				323	* be called once for each command, as required.
				324	*/
				325	static void scsi_init_cmd_errh(struct scsi_cmnd *cmd)
				326	{
				327	cmd->serial_number = 0;
				328	scsi_set_resid(cmd, 0);
				329	memset(cmd->sense_buffer, 0, SCSI_SENSE_BUFFERSIZE);
				330	if (cmd->cmd_len == 0)
				331	cmd->cmd_len = scsi_command_size(cmd->cmnd);
				332	}
				333
				334	/*
				335	* Decrement the host_busy counter and wake up the error handler if necessary.
				336	* Avoid as follows that the error handler is not woken up if shost->host_busy
				337	* == shost->host_failed: use call_rcu() in scsi_eh_scmd_add() in combination
				338	* with an RCU read lock in this function to ensure that this function in its
				339	* entirety either finishes before scsi_eh_scmd_add() increases the
				340	* host_failed counter or that it notices the shost state change made by
				341	* scsi_eh_scmd_add().
				342	*/
				343	static void scsi_dec_host_busy(struct Scsi_Host *shost)
				344	{
				345	unsigned long flags;
				346
				347	rcu_read_lock();
				348	atomic_dec(&shost->host_busy);
				349	if (unlikely(scsi_host_in_recovery(shost))) {
				350	spin_lock_irqsave(shost->host_lock, flags);
				351	if (shost->host_failed \|\| shost->host_eh_scheduled)
				352	scsi_eh_wakeup(shost);
				353	spin_unlock_irqrestore(shost->host_lock, flags);
				354	}
				355	rcu_read_unlock();
				356	}
				357
				358	void scsi_device_unbusy(struct scsi_device *sdev)
				359	{
				360	struct Scsi_Host *shost = sdev->host;
				361	struct scsi_target *starget = scsi_target(sdev);
				362
				363	scsi_dec_host_busy(shost);
				364
				365	if (starget->can_queue > 0)
				366	atomic_dec(&starget->target_busy);
				367
				368	atomic_dec(&sdev->device_busy);
				369	}
				370
				371	static void scsi_kick_queue(struct request_queue *q)
				372	{
				373	if (q->mq_ops)
				374	blk_mq_run_hw_queues(q, false);
				375	else
				376	blk_run_queue(q);
				377	}
				378
				379	/*
				380	* Called for single_lun devices on IO completion. Clear starget_sdev_user,
				381	* and call blk_run_queue for all the scsi_devices on the target -
				382	* including current_sdev first.
				383	*
				384	* Called with no scsi locks held.
				385	*/
				386	static void scsi_single_lun_run(struct scsi_device *current_sdev)
				387	{
				388	struct Scsi_Host *shost = current_sdev->host;
				389	struct scsi_device sdev, tmp;
				390	struct scsi_target *starget = scsi_target(current_sdev);
				391	unsigned long flags;
				392
				393	spin_lock_irqsave(shost->host_lock, flags);
				394	starget->starget_sdev_user = NULL;
				395	spin_unlock_irqrestore(shost->host_lock, flags);
				396
				397	/*
				398	* Call blk_run_queue for all LUNs on the target, starting with
				399	* current_sdev. We race with others (to set starget_sdev_user),
				400	* but in most cases, we will be first. Ideally, each LU on the
				401	* target would get some limited time or requests on the target.
				402	*/
				403	scsi_kick_queue(current_sdev->request_queue);
				404
				405	spin_lock_irqsave(shost->host_lock, flags);
				406	if (starget->starget_sdev_user)
				407	goto out;
				408	list_for_each_entry_safe(sdev, tmp, &starget->devices,
				409	same_target_siblings) {
				410	if (sdev == current_sdev)
				411	continue;
				412	if (scsi_device_get(sdev))
				413	continue;
				414
				415	spin_unlock_irqrestore(shost->host_lock, flags);
				416	scsi_kick_queue(sdev->request_queue);
				417	spin_lock_irqsave(shost->host_lock, flags);
				418
				419	scsi_device_put(sdev);
				420	}
				421	out:
				422	spin_unlock_irqrestore(shost->host_lock, flags);
				423	}
				424
				425	static inline bool scsi_device_is_busy(struct scsi_device *sdev)
				426	{
				427	if (atomic_read(&sdev->device_busy) >= sdev->queue_depth)
				428	return true;
				429	if (atomic_read(&sdev->device_blocked) > 0)
				430	return true;
				431	return false;
				432	}
				433
				434	static inline bool scsi_target_is_busy(struct scsi_target *starget)
				435	{
				436	if (starget->can_queue > 0) {
				437	if (atomic_read(&starget->target_busy) >= starget->can_queue)
				438	return true;
				439	if (atomic_read(&starget->target_blocked) > 0)
				440	return true;
				441	}
				442	return false;
				443	}
				444
				445	static inline bool scsi_host_is_busy(struct Scsi_Host *shost)
				446	{
				447	if (shost->can_queue > 0 &&
				448	atomic_read(&shost->host_busy) >= shost->can_queue)
				449	return true;
				450	if (atomic_read(&shost->host_blocked) > 0)
				451	return true;
				452	if (shost->host_self_blocked)
				453	return true;
				454	return false;
				455	}
				456
				457	static void scsi_starved_list_run(struct Scsi_Host *shost)
				458	{
				459	LIST_HEAD(starved_list);
				460	struct scsi_device *sdev;
				461	unsigned long flags;
				462
				463	spin_lock_irqsave(shost->host_lock, flags);
				464	list_splice_init(&shost->starved_list, &starved_list);
				465
				466	while (!list_empty(&starved_list)) {
				467	struct request_queue *slq;
				468
				469	/*
				470	* As long as shost is accepting commands and we have
				471	* starved queues, call blk_run_queue. scsi_request_fn
				472	* drops the queue_lock and can add us back to the
				473	* starved_list.
				474	*
				475	* host_lock protects the starved_list and starved_entry.
				476	* scsi_request_fn must get the host_lock before checking
				477	* or modifying starved_list or starved_entry.
				478	*/
				479	if (scsi_host_is_busy(shost))
				480	break;
				481
				482	sdev = list_entry(starved_list.next,
				483	struct scsi_device, starved_entry);
				484	list_del_init(&sdev->starved_entry);
				485	if (scsi_target_is_busy(scsi_target(sdev))) {
				486	list_move_tail(&sdev->starved_entry,
				487	&shost->starved_list);
				488	continue;
				489	}
				490
				491	/*
				492	* Once we drop the host lock, a racing scsi_remove_device()
				493	* call may remove the sdev from the starved list and destroy
				494	* it and the queue. Mitigate by taking a reference to the
				495	* queue and never touching the sdev again after we drop the
				496	* host lock. Note: if __scsi_remove_device() invokes
				497	* blk_cleanup_queue() before the queue is run from this
				498	* function then blk_run_queue() will return immediately since
				499	* blk_cleanup_queue() marks the queue with QUEUE_FLAG_DYING.
				500	*/
				501	slq = sdev->request_queue;
				502	if (!blk_get_queue(slq))
				503	continue;
				504	spin_unlock_irqrestore(shost->host_lock, flags);
				505
				506	scsi_kick_queue(slq);
				507	blk_put_queue(slq);
				508
				509	spin_lock_irqsave(shost->host_lock, flags);
				510	}
				511	/* put any unprocessed entries back */
				512	list_splice(&starved_list, &shost->starved_list);
				513	spin_unlock_irqrestore(shost->host_lock, flags);
				514	}
				515
				516	/*
				517	* Function: scsi_run_queue()
				518	*
				519	* Purpose: Select a proper request queue to serve next
				520	*
				521	* Arguments: q - last request's queue
				522	*
				523	* Returns: Nothing
				524	*
				525	* Notes: The previous command was completely finished, start
				526	* a new one if possible.
				527	*/
				528	static void scsi_run_queue(struct request_queue *q)
				529	{
				530	struct scsi_device *sdev = q->queuedata;
				531
				532	if (scsi_target(sdev)->single_lun)
				533	scsi_single_lun_run(sdev);
				534	if (!list_empty(&sdev->host->starved_list))
				535	scsi_starved_list_run(sdev->host);
				536
				537	if (q->mq_ops)
				538	blk_mq_run_hw_queues(q, false);
				539	else
				540	blk_run_queue(q);
				541	}
				542
				543	void scsi_requeue_run_queue(struct work_struct *work)
				544	{
				545	struct scsi_device *sdev;
				546	struct request_queue *q;
				547
				548	sdev = container_of(work, struct scsi_device, requeue_work);
				549	q = sdev->request_queue;
				550	scsi_run_queue(q);
				551	}
				552
				553	/*
				554	* Function: scsi_requeue_command()
				555	*
				556	* Purpose: Handle post-processing of completed commands.
				557	*
				558	* Arguments: q - queue to operate on
				559	* cmd - command that may need to be requeued.
				560	*
				561	* Returns: Nothing
				562	*
				563	* Notes: After command completion, there may be blocks left
				564	* over which weren't finished by the previous command
				565	* this can be for a number of reasons - the main one is
				566	* I/O errors in the middle of the request, in which case
				567	* we need to request the blocks that come after the bad
				568	* sector.
				569	* Notes: Upon return, cmd is a stale pointer.
				570	*/
				571	static void scsi_requeue_command(struct request_queue q, struct scsi_cmnd cmd)
				572	{
				573	struct scsi_device *sdev = cmd->device;
				574	struct request *req = cmd->request;
				575	unsigned long flags;
				576
				577	spin_lock_irqsave(q->queue_lock, flags);
				578	blk_unprep_request(req);
				579	req->special = NULL;
				580	scsi_put_command(cmd);
				581	blk_requeue_request(q, req);
				582	spin_unlock_irqrestore(q->queue_lock, flags);
				583
				584	scsi_run_queue(q);
				585
				586	put_device(&sdev->sdev_gendev);
				587	}
				588
				589	void scsi_run_host_queues(struct Scsi_Host *shost)
				590	{
				591	struct scsi_device *sdev;
				592
				593	shost_for_each_device(sdev, shost)
				594	scsi_run_queue(sdev->request_queue);
				595	}
				596
				597	static void scsi_uninit_cmd(struct scsi_cmnd *cmd)
				598	{
				599	if (!blk_rq_is_passthrough(cmd->request)) {
				600	struct scsi_driver *drv = scsi_cmd_to_driver(cmd);
				601
				602	if (drv->uninit_command)
				603	drv->uninit_command(cmd);
				604	}
				605	}
				606
				607	static void scsi_mq_free_sgtables(struct scsi_cmnd *cmd)
				608	{
				609	struct scsi_data_buffer *sdb;
				610
				611	if (cmd->sdb.table.nents)
				612	sg_free_table_chained(&cmd->sdb.table, true);
				613	if (cmd->request->next_rq) {
				614	sdb = cmd->request->next_rq->special;
				615	if (sdb)
				616	sg_free_table_chained(&sdb->table, true);
				617	}
				618	if (scsi_prot_sg_count(cmd))
				619	sg_free_table_chained(&cmd->prot_sdb->table, true);
				620	}
				621
				622	static void scsi_mq_uninit_cmd(struct scsi_cmnd *cmd)
				623	{
				624	scsi_mq_free_sgtables(cmd);
				625	scsi_uninit_cmd(cmd);
				626	scsi_del_cmd_from_list(cmd);
				627	}
				628
				629	/*
				630	* Function: scsi_release_buffers()
				631	*
				632	* Purpose: Free resources allocate for a scsi_command.
				633	*
				634	* Arguments: cmd - command that we are bailing.
				635	*
				636	* Lock status: Assumed that no lock is held upon entry.
				637	*
				638	* Returns: Nothing
				639	*
				640	* Notes: In the event that an upper level driver rejects a
				641	* command, we must release resources allocated during
				642	* the __init_io() function. Primarily this would involve
				643	* the scatter-gather table.
				644	*/
				645	static void scsi_release_buffers(struct scsi_cmnd *cmd)
				646	{
				647	if (cmd->sdb.table.nents)
				648	sg_free_table_chained(&cmd->sdb.table, false);
				649
				650	memset(&cmd->sdb, 0, sizeof(cmd->sdb));
				651
				652	if (scsi_prot_sg_count(cmd))
				653	sg_free_table_chained(&cmd->prot_sdb->table, false);
				654	}
				655
				656	static void scsi_release_bidi_buffers(struct scsi_cmnd *cmd)
				657	{
				658	struct scsi_data_buffer *bidi_sdb = cmd->request->next_rq->special;
				659
				660	sg_free_table_chained(&bidi_sdb->table, false);
				661	kmem_cache_free(scsi_sdb_cache, bidi_sdb);
				662	cmd->request->next_rq->special = NULL;
				663	}
				664
				665	/* Returns false when no more bytes to process, true if there are more */
				666	static bool scsi_end_request(struct request *req, blk_status_t error,
				667	unsigned int bytes, unsigned int bidi_bytes)
				668	{
				669	struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(req);
				670	struct scsi_device *sdev = cmd->device;
				671	struct request_queue *q = sdev->request_queue;
				672
				673	if (blk_update_request(req, error, bytes))
				674	return true;
				675
				676	/* Bidi request must be completed as a whole */
				677	if (unlikely(bidi_bytes) &&
				678	blk_update_request(req->next_rq, error, bidi_bytes))
				679	return true;
				680
				681	if (blk_queue_add_random(q))
				682	add_disk_randomness(req->rq_disk);
				683
				684	if (!blk_rq_is_scsi(req)) {
				685	WARN_ON_ONCE(!(cmd->flags & SCMD_INITIALIZED));
				686	cmd->flags &= ~SCMD_INITIALIZED;
				687	destroy_rcu_head(&cmd->rcu);
				688	}
				689
				690	if (req->mq_ctx) {
				691	/*
				692	* In the MQ case the command gets freed by __blk_mq_end_request,
				693	* so we have to do all cleanup that depends on it earlier.
				694	*
				695	* We also can't kick the queues from irq context, so we
				696	* will have to defer it to a workqueue.
				697	*/
				698	scsi_mq_uninit_cmd(cmd);
				699
				700	/*
				701	* queue is still alive, so grab the ref for preventing it
				702	* from being cleaned up during running queue.
				703	*/
				704	percpu_ref_get(&q->q_usage_counter);
				705
				706	__blk_mq_end_request(req, error);
				707
				708	if (scsi_target(sdev)->single_lun \|\|
				709	!list_empty(&sdev->host->starved_list))
				710	kblockd_schedule_work(&sdev->requeue_work);
				711	else
				712	blk_mq_run_hw_queues(q, true);
				713
				714	percpu_ref_put(&q->q_usage_counter);
				715	} else {
				716	unsigned long flags;
				717
				718	if (bidi_bytes)
				719	scsi_release_bidi_buffers(cmd);
				720	scsi_release_buffers(cmd);
				721	scsi_put_command(cmd);
				722
				723	spin_lock_irqsave(q->queue_lock, flags);
				724	blk_finish_request(req, error);
				725	spin_unlock_irqrestore(q->queue_lock, flags);
				726
				727	scsi_run_queue(q);
				728	}
				729
				730	put_device(&sdev->sdev_gendev);
				731	return false;
				732	}
				733
				734	/**
				735	* scsi_result_to_blk_status - translate a SCSI result code into blk_status_t
				736	* @cmd: SCSI command
				737	* @result: scsi error code
				738	*
				739	* Translate a SCSI result code into a blk_status_t value. May reset the host
				740	* byte of @cmd->result.
				741	*/
				742	static blk_status_t scsi_result_to_blk_status(struct scsi_cmnd *cmd, int result)
				743	{
				744	switch (host_byte(result)) {
				745	case DID_OK:
				746	/*
				747	* Also check the other bytes than the status byte in result
				748	* to handle the case when a SCSI LLD sets result to
				749	* DRIVER_SENSE << 24 without setting SAM_STAT_CHECK_CONDITION.
				750	*/
				751	if (scsi_status_is_good(result) && (result & ~0xff) == 0)
				752	return BLK_STS_OK;
				753	return BLK_STS_IOERR;
				754	case DID_TRANSPORT_FAILFAST:
				755	return BLK_STS_TRANSPORT;
				756	case DID_TARGET_FAILURE:
				757	set_host_byte(cmd, DID_OK);
				758	return BLK_STS_TARGET;
				759	case DID_NEXUS_FAILURE:
				760	return BLK_STS_NEXUS;
				761	case DID_ALLOC_FAILURE:
				762	set_host_byte(cmd, DID_OK);
				763	return BLK_STS_NOSPC;
				764	case DID_MEDIUM_ERROR:
				765	set_host_byte(cmd, DID_OK);
				766	return BLK_STS_MEDIUM;
				767	default:
				768	return BLK_STS_IOERR;
				769	}
				770	}
				771
				772	/* Helper for scsi_io_completion() when "reprep" action required. */
				773	static void scsi_io_completion_reprep(struct scsi_cmnd *cmd,
				774	struct request_queue *q)
				775	{
				776	/* A new command will be prepared and issued. */
				777	if (q->mq_ops) {
				778	scsi_mq_requeue_cmd(cmd);
				779	} else {
				780	/* Unprep request and put it back at head of the queue. */
				781	scsi_release_buffers(cmd);
				782	scsi_requeue_command(q, cmd);
				783	}
				784	}
				785
				786	/* Helper for scsi_io_completion() when special action required. */
				787	static void scsi_io_completion_action(struct scsi_cmnd *cmd, int result)
				788	{
				789	struct request_queue *q = cmd->device->request_queue;
				790	struct request *req = cmd->request;
				791	int level = 0;
				792	enum {ACTION_FAIL, ACTION_REPREP, ACTION_RETRY,
				793	ACTION_DELAYED_RETRY} action;
				794	unsigned long wait_for = (cmd->allowed + 1) * req->timeout;
				795	struct scsi_sense_hdr sshdr;
				796	bool sense_valid;
				797	bool sense_current = true; /* false implies "deferred sense" */
				798	blk_status_t blk_stat;
				799
				800	sense_valid = scsi_command_normalize_sense(cmd, &sshdr);
				801	if (sense_valid)
				802	sense_current = !scsi_sense_is_deferred(&sshdr);
				803
				804	blk_stat = scsi_result_to_blk_status(cmd, result);
				805
				806	if (host_byte(result) == DID_RESET) {
				807	/* Third party bus reset or reset for error recovery
				808	* reasons. Just retry the command and see what
				809	* happens.
				810	*/
				811	action = ACTION_RETRY;
				812	} else if (sense_valid && sense_current) {
				813	switch (sshdr.sense_key) {
				814	case UNIT_ATTENTION:
				815	if (cmd->device->removable) {
				816	/* Detected disc change. Set a bit
				817	* and quietly refuse further access.
				818	*/
				819	cmd->device->changed = 1;
				820	action = ACTION_FAIL;
				821	} else {
				822	/* Must have been a power glitch, or a
				823	* bus reset. Could not have been a
				824	* media change, so we just retry the
				825	* command and see what happens.
				826	*/
				827	action = ACTION_RETRY;
				828	}
				829	break;
				830	case ILLEGAL_REQUEST:
				831	/* If we had an ILLEGAL REQUEST returned, then
				832	* we may have performed an unsupported
				833	* command. The only thing this should be
				834	* would be a ten byte read where only a six
				835	* byte read was supported. Also, on a system
				836	* where READ CAPACITY failed, we may have
				837	* read past the end of the disk.
				838	*/
				839	if ((cmd->device->use_10_for_rw &&
				840	sshdr.asc == 0x20 && sshdr.ascq == 0x00) &&
				841	(cmd->cmnd[0] == READ_10 \|\|
				842	cmd->cmnd[0] == WRITE_10)) {
				843	/* This will issue a new 6-byte command. */
				844	cmd->device->use_10_for_rw = 0;
				845	action = ACTION_REPREP;
				846	} else if (sshdr.asc == 0x10) /* DIX */ {
				847	action = ACTION_FAIL;
				848	blk_stat = BLK_STS_PROTECTION;
				849	/* INVALID COMMAND OPCODE or INVALID FIELD IN CDB */
				850	} else if (sshdr.asc == 0x20 \|\| sshdr.asc == 0x24) {
				851	action = ACTION_FAIL;
				852	blk_stat = BLK_STS_TARGET;
				853	} else
				854	action = ACTION_FAIL;
				855	break;
				856	case ABORTED_COMMAND:
				857	action = ACTION_FAIL;
				858	if (sshdr.asc == 0x10) /* DIF */
				859	blk_stat = BLK_STS_PROTECTION;
				860	break;
				861	case NOT_READY:
				862	/* If the device is in the process of becoming
				863	* ready, or has a temporary blockage, retry.
				864	*/
				865	if (sshdr.asc == 0x04) {
				866	switch (sshdr.ascq) {
				867	case 0x01: /* becoming ready */
				868	case 0x04: /* format in progress */
				869	case 0x05: /* rebuild in progress */
				870	case 0x06: /* recalculation in progress */
				871	case 0x07: /* operation in progress */
				872	case 0x08: /* Long write in progress */
				873	case 0x09: /* self test in progress */
				874	case 0x14: /* space allocation in progress */
				875	case 0x1a: /* start stop unit in progress */
				876	case 0x1b: /* sanitize in progress */
				877	case 0x1d: /* configuration in progress */
				878	case 0x24: /* depopulation in progress */
				879	action = ACTION_DELAYED_RETRY;
				880	break;
				881	default:
				882	action = ACTION_FAIL;
				883	break;
				884	}
				885	} else
				886	action = ACTION_FAIL;
				887	break;
				888	case VOLUME_OVERFLOW:
				889	/* See SSC3rXX or current. */
				890	action = ACTION_FAIL;
				891	break;
				892	default:
				893	action = ACTION_FAIL;
				894	break;
				895	}
				896	} else
				897	action = ACTION_FAIL;
				898
				899	if (action != ACTION_FAIL &&
				900	time_before(cmd->jiffies_at_alloc + wait_for, jiffies))
				901	action = ACTION_FAIL;
				902
				903	switch (action) {
				904	case ACTION_FAIL:
				905	/* Give up and fail the remainder of the request */
				906	if (!(req->rq_flags & RQF_QUIET)) {
				907	static DEFINE_RATELIMIT_STATE(_rs,
				908	DEFAULT_RATELIMIT_INTERVAL,
				909	DEFAULT_RATELIMIT_BURST);
				910
				911	if (unlikely(scsi_logging_level))
				912	level =
				913	SCSI_LOG_LEVEL(SCSI_LOG_MLCOMPLETE_SHIFT,
				914	SCSI_LOG_MLCOMPLETE_BITS);
				915
				916	/*
				917	* if logging is enabled the failure will be printed
				918	* in scsi_log_completion(), so avoid duplicate messages
				919	*/
				920	if (!level && __ratelimit(&_rs)) {
				921	scsi_print_result(cmd, NULL, FAILED);
				922	if (driver_byte(result) == DRIVER_SENSE)
				923	scsi_print_sense(cmd);
				924	scsi_print_command(cmd);
				925	}
				926	}
				927	if (!scsi_end_request(req, blk_stat, blk_rq_err_bytes(req), 0))
				928	return;
				929	/FALLTHRU/
				930	case ACTION_REPREP:
				931	scsi_io_completion_reprep(cmd, q);
				932	break;
				933	case ACTION_RETRY:
				934	/* Retry the same command immediately */
				935	__scsi_queue_insert(cmd, SCSI_MLQUEUE_EH_RETRY, false);
				936	break;
				937	case ACTION_DELAYED_RETRY:
				938	/* Retry the same command after a delay */
				939	__scsi_queue_insert(cmd, SCSI_MLQUEUE_DEVICE_BUSY, false);
				940	break;
				941	}
				942	}
				943
				944	/*
				945	* Helper for scsi_io_completion() when cmd->result is non-zero. Returns a
				946	* new result that may suppress further error checking. Also modifies
				947	* *blk_statp in some cases.
				948	*/
				949	static int scsi_io_completion_nz_result(struct scsi_cmnd *cmd, int result,
				950	blk_status_t *blk_statp)
				951	{
				952	bool sense_valid;
				953	bool sense_current = true; /* false implies "deferred sense" */
				954	struct request *req = cmd->request;
				955	struct scsi_sense_hdr sshdr;
				956
				957	sense_valid = scsi_command_normalize_sense(cmd, &sshdr);
				958	if (sense_valid)
				959	sense_current = !scsi_sense_is_deferred(&sshdr);
				960
				961	if (blk_rq_is_passthrough(req)) {
				962	if (sense_valid) {
				963	/*
				964	* SG_IO wants current and deferred errors
				965	*/
				966	scsi_req(req)->sense_len =
				967	min(8 + cmd->sense_buffer[7],
				968	SCSI_SENSE_BUFFERSIZE);
				969	}
				970	if (sense_current)
				971	*blk_statp = scsi_result_to_blk_status(cmd, result);
				972	} else if (blk_rq_bytes(req) == 0 && sense_current) {
				973	/*
				974	* Flush commands do not transfers any data, and thus cannot use
				975	* good_bytes != blk_rq_bytes(req) as the signal for an error.
				976	* This sets *blk_statp explicitly for the problem case.
				977	*/
				978	*blk_statp = scsi_result_to_blk_status(cmd, result);
				979	}
				980	/*
				981	* Recovered errors need reporting, but they're always treated as
				982	* success, so fiddle the result code here. For passthrough requests
				983	* we already took a copy of the original into sreq->result which
				984	* is what gets returned to the user
				985	*/
				986	if (sense_valid && (sshdr.sense_key == RECOVERED_ERROR)) {
				987	bool do_print = true;
				988	/*
				989	* if ATA PASS-THROUGH INFORMATION AVAILABLE [0x0, 0x1d]
				990	* skip print since caller wants ATA registers. Only occurs
				991	* on SCSI ATA PASS_THROUGH commands when CK_COND=1
				992	*/
				993	if ((sshdr.asc == 0x0) && (sshdr.ascq == 0x1d))
				994	do_print = false;
				995	else if (req->rq_flags & RQF_QUIET)
				996	do_print = false;
				997	if (do_print)
				998	scsi_print_sense(cmd);
				999	result = 0;
				1000	/* for passthrough, blk_statp may be set /
				1001	*blk_statp = BLK_STS_OK;
				1002	}
				1003	/*
				1004	* Another corner case: the SCSI status byte is non-zero but 'good'.
				1005	* Example: PRE-FETCH command returns SAM_STAT_CONDITION_MET when
				1006	* it is able to fit nominated LBs in its cache (and SAM_STAT_GOOD
				1007	* if it can't fit). Treat SAM_STAT_CONDITION_MET and the related
				1008	* intermediate statuses (both obsolete in SAM-4) as good.
				1009	*/
				1010	if (status_byte(result) && scsi_status_is_good(result)) {
				1011	result = 0;
				1012	*blk_statp = BLK_STS_OK;
				1013	}
				1014	return result;
				1015	}
				1016
				1017	/*
				1018	* Function: scsi_io_completion()
				1019	*
				1020	* Purpose: Completion processing for block device I/O requests.
				1021	*
				1022	* Arguments: cmd - command that is finished.
				1023	*
				1024	* Lock status: Assumed that no lock is held upon entry.
				1025	*
				1026	* Returns: Nothing
				1027	*
				1028	* Notes: We will finish off the specified number of sectors. If we
				1029	* are done, the command block will be released and the queue
				1030	* function will be goosed. If we are not done then we have to
				1031	* figure out what to do next:
				1032	*
				1033	* a) We can call scsi_requeue_command(). The request
				1034	* will be unprepared and put back on the queue. Then
				1035	* a new command will be created for it. This should
				1036	* be used if we made forward progress, or if we want
				1037	* to switch from READ(10) to READ(6) for example.
				1038	*
				1039	* b) We can call __scsi_queue_insert(). The request will
				1040	* be put back on the queue and retried using the same
				1041	* command as before, possibly after a delay.
				1042	*
				1043	* c) We can call scsi_end_request() with blk_stat other than
				1044	* BLK_STS_OK, to fail the remainder of the request.
				1045	*/
				1046	void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes)
				1047	{
				1048	int result = cmd->result;
				1049	struct request_queue *q = cmd->device->request_queue;
				1050	struct request *req = cmd->request;
				1051	blk_status_t blk_stat = BLK_STS_OK;
				1052
				1053	if (unlikely(result)) /* a nz result may or may not be an error */
				1054	result = scsi_io_completion_nz_result(cmd, result, &blk_stat);
				1055
				1056	if (unlikely(blk_rq_is_passthrough(req))) {
				1057	/*
				1058	* scsi_result_to_blk_status may have reset the host_byte
				1059	*/
				1060	scsi_req(req)->result = cmd->result;
				1061	scsi_req(req)->resid_len = scsi_get_resid(cmd);
				1062
				1063	if (unlikely(scsi_bidi_cmnd(cmd))) {
				1064	/*
				1065	* Bidi commands Must be complete as a whole,
				1066	* both sides at once.
				1067	*/
				1068	scsi_req(req->next_rq)->resid_len = scsi_in(cmd)->resid;
				1069	if (scsi_end_request(req, BLK_STS_OK, blk_rq_bytes(req),
				1070	blk_rq_bytes(req->next_rq)))
				1071	WARN_ONCE(true,
				1072	"Bidi command with remaining bytes");
				1073	return;
				1074	}
				1075	}
				1076
				1077	/* no bidi support yet, other than in pass-through */
				1078	if (unlikely(blk_bidi_rq(req))) {
				1079	WARN_ONCE(true, "Only support bidi command in passthrough");
				1080	scmd_printk(KERN_ERR, cmd, "Killing bidi command\n");
				1081	if (scsi_end_request(req, BLK_STS_IOERR, blk_rq_bytes(req),
				1082	blk_rq_bytes(req->next_rq)))
				1083	WARN_ONCE(true, "Bidi command with remaining bytes");
				1084	return;
				1085	}
				1086
				1087	/*
				1088	* Next deal with any sectors which we were able to correctly
				1089	* handle.
				1090	*/
				1091	SCSI_LOG_HLCOMPLETE(1, scmd_printk(KERN_INFO, cmd,
				1092	"%u sectors total, %d bytes done.\n",
				1093	blk_rq_sectors(req), good_bytes));
				1094
				1095	/*
				1096	* Next deal with any sectors which we were able to correctly
				1097	* handle. Failed, zero length commands always need to drop down
				1098	* to retry code. Fast path should return in this block.
				1099	*/
				1100	if (likely(blk_rq_bytes(req) > 0 \|\| blk_stat == BLK_STS_OK)) {
				1101	if (likely(!scsi_end_request(req, blk_stat, good_bytes, 0)))
				1102	return; /* no bytes remaining */
				1103	}
				1104
				1105	/* Kill remainder if no retries. */
				1106	if (unlikely(blk_stat && scsi_noretry_cmd(cmd))) {
				1107	if (scsi_end_request(req, blk_stat, blk_rq_bytes(req), 0))
				1108	WARN_ONCE(true,
				1109	"Bytes remaining after failed, no-retry command");
				1110	return;
				1111	}
				1112
				1113	/*
				1114	* If there had been no error, but we have leftover bytes in the
				1115	* requeues just queue the command up again.
				1116	*/
				1117	if (likely(result == 0))
				1118	scsi_io_completion_reprep(cmd, q);
				1119	else
				1120	scsi_io_completion_action(cmd, result);
				1121	}
				1122
				1123	static int scsi_init_sgtable(struct request req, struct scsi_data_buffer sdb)
				1124	{
				1125	int count;
				1126
				1127	/*
				1128	* If sg table allocation fails, requeue request later.
				1129	*/
				1130	if (unlikely(sg_alloc_table_chained(&sdb->table,
				1131	blk_rq_nr_phys_segments(req), sdb->table.sgl)))
				1132	return BLKPREP_DEFER;
				1133
				1134	/*
				1135	* Next, walk the list, and fill in the addresses and sizes of
				1136	* each segment.
				1137	*/
				1138	count = blk_rq_map_sg(req->q, req, sdb->table.sgl);
				1139	BUG_ON(count > sdb->table.nents);
				1140	sdb->table.nents = count;
				1141	sdb->length = blk_rq_payload_bytes(req);
				1142	return BLKPREP_OK;
				1143	}
				1144
				1145	/*
				1146	* Function: scsi_init_io()
				1147	*
				1148	* Purpose: SCSI I/O initialize function.
				1149	*
				1150	* Arguments: cmd - Command descriptor we wish to initialize
				1151	*
				1152	* Returns: 0 on success
				1153	* BLKPREP_DEFER if the failure is retryable
				1154	* BLKPREP_KILL if the failure is fatal
				1155	*/
				1156	int scsi_init_io(struct scsi_cmnd *cmd)
				1157	{
				1158	struct scsi_device *sdev = cmd->device;
				1159	struct request *rq = cmd->request;
				1160	bool is_mq = (rq->mq_ctx != NULL);
				1161	int error = BLKPREP_KILL;
				1162
				1163	if (WARN_ON_ONCE(!blk_rq_nr_phys_segments(rq)))
				1164	goto err_exit;
				1165
				1166	error = scsi_init_sgtable(rq, &cmd->sdb);
				1167	if (error)
				1168	goto err_exit;
				1169
				1170	if (blk_bidi_rq(rq)) {
				1171	if (!rq->q->mq_ops) {
				1172	struct scsi_data_buffer *bidi_sdb =
				1173	kmem_cache_zalloc(scsi_sdb_cache, GFP_ATOMIC);
				1174	if (!bidi_sdb) {
				1175	error = BLKPREP_DEFER;
				1176	goto err_exit;
				1177	}
				1178
				1179	rq->next_rq->special = bidi_sdb;
				1180	}
				1181
				1182	error = scsi_init_sgtable(rq->next_rq, rq->next_rq->special);
				1183	if (error)
				1184	goto err_exit;
				1185	}
				1186
				1187	if (blk_integrity_rq(rq)) {
				1188	struct scsi_data_buffer *prot_sdb = cmd->prot_sdb;
				1189	int ivecs, count;
				1190
				1191	if (prot_sdb == NULL) {
				1192	/*
				1193	* This can happen if someone (e.g. multipath)
				1194	* queues a command to a device on an adapter
				1195	* that does not support DIX.
				1196	*/
				1197	WARN_ON_ONCE(1);
				1198	error = BLKPREP_KILL;
				1199	goto err_exit;
				1200	}
				1201
				1202	ivecs = blk_rq_count_integrity_sg(rq->q, rq->bio);
				1203
				1204	if (sg_alloc_table_chained(&prot_sdb->table, ivecs,
				1205	prot_sdb->table.sgl)) {
				1206	error = BLKPREP_DEFER;
				1207	goto err_exit;
				1208	}
				1209
				1210	count = blk_rq_map_integrity_sg(rq->q, rq->bio,
				1211	prot_sdb->table.sgl);
				1212	BUG_ON(unlikely(count > ivecs));
				1213	BUG_ON(unlikely(count > queue_max_integrity_segments(rq->q)));
				1214
				1215	cmd->prot_sdb = prot_sdb;
				1216	cmd->prot_sdb->table.nents = count;
				1217	}
				1218
				1219	return BLKPREP_OK;
				1220	err_exit:
				1221	if (is_mq) {
				1222	scsi_mq_free_sgtables(cmd);
				1223	} else {
				1224	scsi_release_buffers(cmd);
				1225	cmd->request->special = NULL;
				1226	scsi_put_command(cmd);
				1227	put_device(&sdev->sdev_gendev);
				1228	}
				1229	return error;
				1230	}
				1231	EXPORT_SYMBOL(scsi_init_io);
				1232
				1233	/**
				1234	* scsi_initialize_rq - initialize struct scsi_cmnd partially
				1235	* @rq: Request associated with the SCSI command to be initialized.
				1236	*
				1237	* This function initializes the members of struct scsi_cmnd that must be
				1238	* initialized before request processing starts and that won't be
				1239	* reinitialized if a SCSI command is requeued.
				1240	*
				1241	* Called from inside blk_get_request() for pass-through requests and from
				1242	* inside scsi_init_command() for filesystem requests.
				1243	*/
				1244	static void scsi_initialize_rq(struct request *rq)
				1245	{
				1246	struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(rq);
				1247
				1248	scsi_req_init(&cmd->req);
				1249	init_rcu_head(&cmd->rcu);
				1250	cmd->jiffies_at_alloc = jiffies;
				1251	cmd->retries = 0;
				1252	}
				1253
				1254	/* Add a command to the list used by the aacraid and dpt_i2o drivers */
				1255	void scsi_add_cmd_to_list(struct scsi_cmnd *cmd)
				1256	{
				1257	struct scsi_device *sdev = cmd->device;
				1258	struct Scsi_Host *shost = sdev->host;
				1259	unsigned long flags;
				1260
				1261	if (shost->use_cmd_list) {
				1262	spin_lock_irqsave(&sdev->list_lock, flags);
				1263	list_add_tail(&cmd->list, &sdev->cmd_list);
				1264	spin_unlock_irqrestore(&sdev->list_lock, flags);
				1265	}
				1266	}
				1267
				1268	/* Remove a command from the list used by the aacraid and dpt_i2o drivers */
				1269	void scsi_del_cmd_from_list(struct scsi_cmnd *cmd)
				1270	{
				1271	struct scsi_device *sdev = cmd->device;
				1272	struct Scsi_Host *shost = sdev->host;
				1273	unsigned long flags;
				1274
				1275	if (shost->use_cmd_list) {
				1276	spin_lock_irqsave(&sdev->list_lock, flags);
				1277	BUG_ON(list_empty(&cmd->list));
				1278	list_del_init(&cmd->list);
				1279	spin_unlock_irqrestore(&sdev->list_lock, flags);
				1280	}
				1281	}
				1282
				1283	/* Called after a request has been started. */
				1284	void scsi_init_command(struct scsi_device dev, struct scsi_cmnd cmd)
				1285	{
				1286	void *buf = cmd->sense_buffer;
				1287	void *prot = cmd->prot_sdb;
				1288	struct request *rq = blk_mq_rq_from_pdu(cmd);
				1289	unsigned int flags = cmd->flags & SCMD_PRESERVED_FLAGS;
				1290	unsigned long jiffies_at_alloc;
				1291	int retries;
				1292
				1293	if (!blk_rq_is_scsi(rq) && !(flags & SCMD_INITIALIZED)) {
				1294	flags \|= SCMD_INITIALIZED;
				1295	scsi_initialize_rq(rq);
				1296	}
				1297
				1298	jiffies_at_alloc = cmd->jiffies_at_alloc;
				1299	retries = cmd->retries;
				1300	/* zero out the cmd, except for the embedded scsi_request */
				1301	memset((char *)cmd + sizeof(cmd->req), 0,
				1302	sizeof(*cmd) - sizeof(cmd->req) + dev->host->hostt->cmd_size);
				1303
				1304	cmd->device = dev;
				1305	cmd->sense_buffer = buf;
				1306	cmd->prot_sdb = prot;
				1307	cmd->flags = flags;
				1308	INIT_DELAYED_WORK(&cmd->abort_work, scmd_eh_abort_handler);
				1309	cmd->jiffies_at_alloc = jiffies_at_alloc;
				1310	cmd->retries = retries;
				1311
				1312	scsi_add_cmd_to_list(cmd);
				1313	}
				1314
				1315	static int scsi_setup_scsi_cmnd(struct scsi_device sdev, struct request req)
				1316	{
				1317	struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(req);
				1318
				1319	/*
				1320	* Passthrough requests may transfer data, in which case they must
				1321	* a bio attached to them. Or they might contain a SCSI command
				1322	* that does not transfer data, in which case they may optionally
				1323	* submit a request without an attached bio.
				1324	*/
				1325	if (req->bio) {
				1326	int ret = scsi_init_io(cmd);
				1327	if (unlikely(ret))
				1328	return ret;
				1329	} else {
				1330	BUG_ON(blk_rq_bytes(req));
				1331
				1332	memset(&cmd->sdb, 0, sizeof(cmd->sdb));
				1333	}
				1334
				1335	cmd->cmd_len = scsi_req(req)->cmd_len;
				1336	cmd->cmnd = scsi_req(req)->cmd;
				1337	cmd->transfersize = blk_rq_bytes(req);
				1338	cmd->allowed = scsi_req(req)->retries;
				1339	return BLKPREP_OK;
				1340	}
				1341
				1342	/*
				1343	* Setup a normal block command. These are simple request from filesystems
				1344	* that still need to be translated to SCSI CDBs from the ULD.
				1345	*/
				1346	static int scsi_setup_fs_cmnd(struct scsi_device sdev, struct request req)
				1347	{
				1348	struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(req);
				1349
				1350	if (unlikely(sdev->handler && sdev->handler->prep_fn)) {
				1351	int ret = sdev->handler->prep_fn(sdev, req);
				1352	if (ret != BLKPREP_OK)
				1353	return ret;
				1354	}
				1355
				1356	cmd->cmnd = scsi_req(req)->cmd = scsi_req(req)->__cmd;
				1357	memset(cmd->cmnd, 0, BLK_MAX_CDB);
				1358	return scsi_cmd_to_driver(cmd)->init_command(cmd);
				1359	}
				1360
				1361	static int scsi_setup_cmnd(struct scsi_device sdev, struct request req)
				1362	{
				1363	struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(req);
				1364
				1365	if (!blk_rq_bytes(req))
				1366	cmd->sc_data_direction = DMA_NONE;
				1367	else if (rq_data_dir(req) == WRITE)
				1368	cmd->sc_data_direction = DMA_TO_DEVICE;
				1369	else
				1370	cmd->sc_data_direction = DMA_FROM_DEVICE;
				1371
				1372	if (blk_rq_is_scsi(req))
				1373	return scsi_setup_scsi_cmnd(sdev, req);
				1374	else
				1375	return scsi_setup_fs_cmnd(sdev, req);
				1376	}
				1377
				1378	static int
				1379	scsi_prep_state_check(struct scsi_device sdev, struct request req)
				1380	{
				1381	int ret = BLKPREP_OK;
				1382
				1383	/*
				1384	* If the device is not in running state we will reject some
				1385	* or all commands.
				1386	*/
				1387	if (unlikely(sdev->sdev_state != SDEV_RUNNING)) {
				1388	switch (sdev->sdev_state) {
				1389	case SDEV_OFFLINE:
				1390	case SDEV_TRANSPORT_OFFLINE:
				1391	/*
				1392	* If the device is offline we refuse to process any
				1393	* commands. The device must be brought online
				1394	* before trying any recovery commands.
				1395	*/
				1396	sdev_printk(KERN_ERR, sdev,
				1397	"rejecting I/O to offline device\n");
				1398	ret = BLKPREP_KILL;
				1399	break;
				1400	case SDEV_DEL:
				1401	/*
				1402	* If the device is fully deleted, we refuse to
				1403	* process any commands as well.
				1404	*/
				1405	sdev_printk(KERN_ERR, sdev,
				1406	"rejecting I/O to dead device\n");
				1407	ret = BLKPREP_KILL;
				1408	break;
				1409	case SDEV_BLOCK:
				1410	case SDEV_CREATED_BLOCK:
				1411	ret = BLKPREP_DEFER;
				1412	break;
				1413	case SDEV_QUIESCE:
				1414	/*
				1415	* If the devices is blocked we defer normal commands.
				1416	*/
				1417	if (req && !(req->rq_flags & RQF_PREEMPT))
				1418	ret = BLKPREP_DEFER;
				1419	break;
				1420	default:
				1421	/*
				1422	* For any other not fully online state we only allow
				1423	* special commands. In particular any user initiated
				1424	* command is not allowed.
				1425	*/
				1426	if (req && !(req->rq_flags & RQF_PREEMPT))
				1427	ret = BLKPREP_KILL;
				1428	break;
				1429	}
				1430	}
				1431	return ret;
				1432	}
				1433
				1434	static int
				1435	scsi_prep_return(struct request_queue q, struct request req, int ret)
				1436	{
				1437	struct scsi_device *sdev = q->queuedata;
				1438
				1439	switch (ret) {
				1440	case BLKPREP_KILL:
				1441	case BLKPREP_INVALID:
				1442	scsi_req(req)->result = DID_NO_CONNECT << 16;
				1443	/* release the command and kill it */
				1444	if (req->special) {
				1445	struct scsi_cmnd *cmd = req->special;
				1446	scsi_release_buffers(cmd);
				1447	scsi_put_command(cmd);
				1448	put_device(&sdev->sdev_gendev);
				1449	req->special = NULL;
				1450	}
				1451	break;
				1452	case BLKPREP_DEFER:
				1453	/*
				1454	* If we defer, the blk_peek_request() returns NULL, but the
				1455	* queue must be restarted, so we schedule a callback to happen
				1456	* shortly.
				1457	*/
				1458	if (atomic_read(&sdev->device_busy) == 0)
				1459	blk_delay_queue(q, SCSI_QUEUE_DELAY);
				1460	break;
				1461	default:
				1462	req->rq_flags \|= RQF_DONTPREP;
				1463	}
				1464
				1465	return ret;
				1466	}
				1467
				1468	static int scsi_prep_fn(struct request_queue q, struct request req)
				1469	{
				1470	struct scsi_device *sdev = q->queuedata;
				1471	struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(req);
				1472	int ret;
				1473
				1474	ret = scsi_prep_state_check(sdev, req);
				1475	if (ret != BLKPREP_OK)
				1476	goto out;
				1477
				1478	if (!req->special) {
				1479	/* Bail if we can't get a reference to the device */
				1480	if (unlikely(!get_device(&sdev->sdev_gendev))) {
				1481	ret = BLKPREP_DEFER;
				1482	goto out;
				1483	}
				1484
				1485	scsi_init_command(sdev, cmd);
				1486	req->special = cmd;
				1487	}
				1488
				1489	cmd->tag = req->tag;
				1490	cmd->request = req;
				1491	cmd->prot_op = SCSI_PROT_NORMAL;
				1492
				1493	ret = scsi_setup_cmnd(sdev, req);
				1494	out:
				1495	return scsi_prep_return(q, req, ret);
				1496	}
				1497
				1498	static void scsi_unprep_fn(struct request_queue q, struct request req)
				1499	{
				1500	scsi_uninit_cmd(blk_mq_rq_to_pdu(req));
				1501	}
				1502
				1503	/*
				1504	* scsi_dev_queue_ready: if we can send requests to sdev, return 1 else
				1505	* return 0.
				1506	*
				1507	* Called with the queue_lock held.
				1508	*/
				1509	static inline int scsi_dev_queue_ready(struct request_queue *q,
				1510	struct scsi_device *sdev)
				1511	{
				1512	unsigned int busy;
				1513
				1514	busy = atomic_inc_return(&sdev->device_busy) - 1;
				1515	if (atomic_read(&sdev->device_blocked)) {
				1516	if (busy)
				1517	goto out_dec;
				1518
				1519	/*
				1520	* unblock after device_blocked iterates to zero
				1521	*/
				1522	if (atomic_dec_return(&sdev->device_blocked) > 0) {
				1523	/*
				1524	* For the MQ case we take care of this in the caller.
				1525	*/
				1526	if (!q->mq_ops)
				1527	blk_delay_queue(q, SCSI_QUEUE_DELAY);
				1528	goto out_dec;
				1529	}
				1530	SCSI_LOG_MLQUEUE(3, sdev_printk(KERN_INFO, sdev,
				1531	"unblocking device at zero depth\n"));
				1532	}
				1533
				1534	if (busy >= sdev->queue_depth)
				1535	goto out_dec;
				1536
				1537	return 1;
				1538	out_dec:
				1539	atomic_dec(&sdev->device_busy);
				1540	return 0;
				1541	}
				1542
				1543	/*
				1544	* scsi_target_queue_ready: checks if there we can send commands to target
				1545	* @sdev: scsi device on starget to check.
				1546	*/
				1547	static inline int scsi_target_queue_ready(struct Scsi_Host *shost,
				1548	struct scsi_device *sdev)
				1549	{
				1550	struct scsi_target *starget = scsi_target(sdev);
				1551	unsigned int busy;
				1552
				1553	if (starget->single_lun) {
				1554	spin_lock_irq(shost->host_lock);
				1555	if (starget->starget_sdev_user &&
				1556	starget->starget_sdev_user != sdev) {
				1557	spin_unlock_irq(shost->host_lock);
				1558	return 0;
				1559	}
				1560	starget->starget_sdev_user = sdev;
				1561	spin_unlock_irq(shost->host_lock);
				1562	}
				1563
				1564	if (starget->can_queue <= 0)
				1565	return 1;
				1566
				1567	busy = atomic_inc_return(&starget->target_busy) - 1;
				1568	if (atomic_read(&starget->target_blocked) > 0) {
				1569	if (busy)
				1570	goto starved;
				1571
				1572	/*
				1573	* unblock after target_blocked iterates to zero
				1574	*/
				1575	if (atomic_dec_return(&starget->target_blocked) > 0)
				1576	goto out_dec;
				1577
				1578	SCSI_LOG_MLQUEUE(3, starget_printk(KERN_INFO, starget,
				1579	"unblocking target at zero depth\n"));
				1580	}
				1581
				1582	if (busy >= starget->can_queue)
				1583	goto starved;
				1584
				1585	return 1;
				1586
				1587	starved:
				1588	spin_lock_irq(shost->host_lock);
				1589	list_move_tail(&sdev->starved_entry, &shost->starved_list);
				1590	spin_unlock_irq(shost->host_lock);
				1591	out_dec:
				1592	if (starget->can_queue > 0)
				1593	atomic_dec(&starget->target_busy);
				1594	return 0;
				1595	}
				1596
				1597	/*
				1598	* scsi_host_queue_ready: if we can send requests to shost, return 1 else
				1599	* return 0. We must end up running the queue again whenever 0 is
				1600	* returned, else IO can hang.
				1601	*/
				1602	static inline int scsi_host_queue_ready(struct request_queue *q,
				1603	struct Scsi_Host *shost,
				1604	struct scsi_device *sdev)
				1605	{
				1606	unsigned int busy;
				1607
				1608	if (scsi_host_in_recovery(shost))
				1609	return 0;
				1610
				1611	busy = atomic_inc_return(&shost->host_busy) - 1;
				1612	if (atomic_read(&shost->host_blocked) > 0) {
				1613	if (busy)
				1614	goto starved;
				1615
				1616	/*
				1617	* unblock after host_blocked iterates to zero
				1618	*/
				1619	if (atomic_dec_return(&shost->host_blocked) > 0)
				1620	goto out_dec;
				1621
				1622	SCSI_LOG_MLQUEUE(3,
				1623	shost_printk(KERN_INFO, shost,
				1624	"unblocking host at zero depth\n"));
				1625	}
				1626
				1627	if (shost->can_queue > 0 && busy >= shost->can_queue)
				1628	goto starved;
				1629	if (shost->host_self_blocked)
				1630	goto starved;
				1631
				1632	/* We're OK to process the command, so we can't be starved */
				1633	if (!list_empty(&sdev->starved_entry)) {
				1634	spin_lock_irq(shost->host_lock);
				1635	if (!list_empty(&sdev->starved_entry))
				1636	list_del_init(&sdev->starved_entry);
				1637	spin_unlock_irq(shost->host_lock);
				1638	}
				1639
				1640	return 1;
				1641
				1642	starved:
				1643	spin_lock_irq(shost->host_lock);
				1644	if (list_empty(&sdev->starved_entry))
				1645	list_add_tail(&sdev->starved_entry, &shost->starved_list);
				1646	spin_unlock_irq(shost->host_lock);
				1647	out_dec:
				1648	scsi_dec_host_busy(shost);
				1649	return 0;
				1650	}
				1651
				1652	/*
				1653	* Busy state exporting function for request stacking drivers.
				1654	*
				1655	* For efficiency, no lock is taken to check the busy state of
				1656	* shost/starget/sdev, since the returned value is not guaranteed and
				1657	* may be changed after request stacking drivers call the function,
				1658	* regardless of taking lock or not.
				1659	*
				1660	* When scsi can't dispatch I/Os anymore and needs to kill I/Os scsi
				1661	* needs to return 'not busy'. Otherwise, request stacking drivers
				1662	* may hold requests forever.
				1663	*/
				1664	static int scsi_lld_busy(struct request_queue *q)
				1665	{
				1666	struct scsi_device *sdev = q->queuedata;
				1667	struct Scsi_Host *shost;
				1668
				1669	if (blk_queue_dying(q))
				1670	return 0;
				1671
				1672	shost = sdev->host;
				1673
				1674	/*
				1675	* Ignore host/starget busy state.
				1676	* Since block layer does not have a concept of fairness across
				1677	* multiple queues, congestion of host/starget needs to be handled
				1678	* in SCSI layer.
				1679	*/
				1680	if (scsi_host_in_recovery(shost) \|\| scsi_device_is_busy(sdev))
				1681	return 1;
				1682
				1683	return 0;
				1684	}
				1685
				1686	/*
				1687	* Kill a request for a dead device
				1688	*/
				1689	static void scsi_kill_request(struct request req, struct request_queue q)
				1690	{
				1691	struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(req);
				1692	struct scsi_device *sdev;
				1693	struct scsi_target *starget;
				1694	struct Scsi_Host *shost;
				1695
				1696	blk_start_request(req);
				1697
				1698	scmd_printk(KERN_INFO, cmd, "killing request\n");
				1699
				1700	sdev = cmd->device;
				1701	starget = scsi_target(sdev);
				1702	shost = sdev->host;
				1703	scsi_init_cmd_errh(cmd);
				1704	cmd->result = DID_NO_CONNECT << 16;
				1705	atomic_inc(&cmd->device->iorequest_cnt);
				1706
				1707	/*
				1708	* SCSI request completion path will do scsi_device_unbusy(),
				1709	* bump busy counts. To bump the counters, we need to dance
				1710	* with the locks as normal issue path does.
				1711	*/
				1712	atomic_inc(&sdev->device_busy);
				1713	atomic_inc(&shost->host_busy);
				1714	if (starget->can_queue > 0)
				1715	atomic_inc(&starget->target_busy);
				1716
				1717	blk_complete_request(req);
				1718	}
				1719
				1720	static void scsi_softirq_done(struct request *rq)
				1721	{
				1722	struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(rq);
				1723	unsigned long wait_for = (cmd->allowed + 1) * rq->timeout;
				1724	int disposition;
				1725
				1726	INIT_LIST_HEAD(&cmd->eh_entry);
				1727
				1728	atomic_inc(&cmd->device->iodone_cnt);
				1729	if (cmd->result)
				1730	atomic_inc(&cmd->device->ioerr_cnt);
				1731
				1732	disposition = scsi_decide_disposition(cmd);
				1733	if (disposition != SUCCESS &&
				1734	time_before(cmd->jiffies_at_alloc + wait_for, jiffies)) {
				1735	sdev_printk(KERN_ERR, cmd->device,
				1736	"timing out command, waited %lus\n",
				1737	wait_for/HZ);
				1738	disposition = SUCCESS;
				1739	}
				1740
				1741	scsi_log_completion(cmd, disposition);
				1742
				1743	switch (disposition) {
				1744	case SUCCESS:
				1745	scsi_finish_command(cmd);
				1746	break;
				1747	case NEEDS_RETRY:
				1748	scsi_queue_insert(cmd, SCSI_MLQUEUE_EH_RETRY);
				1749	break;
				1750	case ADD_TO_MLQUEUE:
				1751	scsi_queue_insert(cmd, SCSI_MLQUEUE_DEVICE_BUSY);
				1752	break;
				1753	default:
				1754	scsi_eh_scmd_add(cmd);
				1755	break;
				1756	}
				1757	}
				1758
				1759	/**
				1760	* scsi_dispatch_command - Dispatch a command to the low-level driver.
				1761	* @cmd: command block we are dispatching.
				1762	*
				1763	* Return: nonzero return request was rejected and device's queue needs to be
				1764	* plugged.
				1765	*/
				1766	static int scsi_dispatch_cmd(struct scsi_cmnd *cmd)
				1767	{
				1768	struct Scsi_Host *host = cmd->device->host;
				1769	int rtn = 0;
				1770
				1771	atomic_inc(&cmd->device->iorequest_cnt);
				1772
				1773	/* check if the device is still usable */
				1774	if (unlikely(cmd->device->sdev_state == SDEV_DEL)) {
				1775	/* in SDEV_DEL we error all commands. DID_NO_CONNECT
				1776	* returns an immediate error upwards, and signals
				1777	* that the device is no longer present */
				1778	cmd->result = DID_NO_CONNECT << 16;
				1779	goto done;
				1780	}
				1781
				1782	/* Check to see if the scsi lld made this device blocked. */
				1783	if (unlikely(scsi_device_blocked(cmd->device))) {
				1784	/*
				1785	* in blocked state, the command is just put back on
				1786	* the device queue. The suspend state has already
				1787	* blocked the queue so future requests should not
				1788	* occur until the device transitions out of the
				1789	* suspend state.
				1790	*/
				1791	SCSI_LOG_MLQUEUE(3, scmd_printk(KERN_INFO, cmd,
				1792	"queuecommand : device blocked\n"));
				1793	return SCSI_MLQUEUE_DEVICE_BUSY;
				1794	}
				1795
				1796	/* Store the LUN value in cmnd, if needed. */
				1797	if (cmd->device->lun_in_cdb)
				1798	cmd->cmnd[1] = (cmd->cmnd[1] & 0x1f) \|
				1799	(cmd->device->lun << 5 & 0xe0);
				1800
				1801	scsi_log_send(cmd);
				1802
				1803	/*
				1804	* Before we queue this command, check if the command
				1805	* length exceeds what the host adapter can handle.
				1806	*/
				1807	if (cmd->cmd_len > cmd->device->host->max_cmd_len) {
				1808	SCSI_LOG_MLQUEUE(3, scmd_printk(KERN_INFO, cmd,
				1809	"queuecommand : command too long. "
				1810	"cdb_size=%d host->max_cmd_len=%d\n",
				1811	cmd->cmd_len, cmd->device->host->max_cmd_len));
				1812	cmd->result = (DID_ABORT << 16);
				1813	goto done;
				1814	}
				1815
				1816	if (unlikely(host->shost_state == SHOST_DEL)) {
				1817	cmd->result = (DID_NO_CONNECT << 16);
				1818	goto done;
				1819
				1820	}
				1821
				1822	trace_scsi_dispatch_cmd_start(cmd);
				1823	rtn = host->hostt->queuecommand(host, cmd);
				1824	if (rtn) {
				1825	trace_scsi_dispatch_cmd_error(cmd, rtn);
				1826	if (rtn != SCSI_MLQUEUE_DEVICE_BUSY &&
				1827	rtn != SCSI_MLQUEUE_TARGET_BUSY)
				1828	rtn = SCSI_MLQUEUE_HOST_BUSY;
				1829
				1830	SCSI_LOG_MLQUEUE(3, scmd_printk(KERN_INFO, cmd,
				1831	"queuecommand : request rejected\n"));
				1832	}
				1833
				1834	return rtn;
				1835	done:
				1836	cmd->scsi_done(cmd);
				1837	return 0;
				1838	}
				1839
				1840	/**
				1841	* scsi_done - Invoke completion on finished SCSI command.
				1842	* @cmd: The SCSI Command for which a low-level device driver (LLDD) gives
				1843	* ownership back to SCSI Core -- i.e. the LLDD has finished with it.
				1844	*
				1845	* Description: This function is the mid-level's (SCSI Core) interrupt routine,
				1846	* which regains ownership of the SCSI command (de facto) from a LLDD, and
				1847	* calls blk_complete_request() for further processing.
				1848	*
				1849	* This function is interrupt context safe.
				1850	*/
				1851	static void scsi_done(struct scsi_cmnd *cmd)
				1852	{
				1853	trace_scsi_dispatch_cmd_done(cmd);
				1854	blk_complete_request(cmd->request);
				1855	}
				1856
				1857	/*
				1858	* Function: scsi_request_fn()
				1859	*
				1860	* Purpose: Main strategy routine for SCSI.
				1861	*
				1862	* Arguments: q - Pointer to actual queue.
				1863	*
				1864	* Returns: Nothing
				1865	*
				1866	* Lock status: request queue lock assumed to be held when called.
				1867	*
				1868	* Note: See sd_zbc.c sd_zbc_write_lock_zone() for write order
				1869	* protection for ZBC disks.
				1870	*/
				1871	static void scsi_request_fn(struct request_queue *q)
				1872	__releases(q->queue_lock)
				1873	__acquires(q->queue_lock)
				1874	{
				1875	struct scsi_device *sdev = q->queuedata;
				1876	struct Scsi_Host *shost;
				1877	struct scsi_cmnd *cmd;
				1878	struct request *req;
				1879
				1880	/*
				1881	* To start with, we keep looping until the queue is empty, or until
				1882	* the host is no longer able to accept any more requests.
				1883	*/
				1884	shost = sdev->host;
				1885	for (;;) {
				1886	int rtn;
				1887	/*
				1888	* get next queueable request. We do this early to make sure
				1889	* that the request is fully prepared even if we cannot
				1890	* accept it.
				1891	*/
				1892	req = blk_peek_request(q);
				1893	if (!req)
				1894	break;
				1895
				1896	if (unlikely(!scsi_device_online(sdev))) {
				1897	sdev_printk(KERN_ERR, sdev,
				1898	"rejecting I/O to offline device\n");
				1899	scsi_kill_request(req, q);
				1900	continue;
				1901	}
				1902
				1903	if (!scsi_dev_queue_ready(q, sdev))
				1904	break;
				1905
				1906	/*
				1907	* Remove the request from the request list.
				1908	*/
				1909	if (!(blk_queue_tagged(q) && !blk_queue_start_tag(q, req)))
				1910	blk_start_request(req);
				1911
				1912	spin_unlock_irq(q->queue_lock);
				1913	cmd = blk_mq_rq_to_pdu(req);
				1914	if (cmd != req->special) {
				1915	printk(KERN_CRIT "impossible request in %s.\n"
				1916	"please mail a stack trace to "
				1917	"linux-scsi@vger.kernel.org\n",
				1918	__func__);
				1919	blk_dump_rq_flags(req, "foo");
				1920	BUG();
				1921	}
				1922
				1923	/*
				1924	* We hit this when the driver is using a host wide
				1925	* tag map. For device level tag maps the queue_depth check
				1926	* in the device ready fn would prevent us from trying
				1927	* to allocate a tag. Since the map is a shared host resource
				1928	* we add the dev to the starved list so it eventually gets
				1929	* a run when a tag is freed.
				1930	*/
				1931	if (blk_queue_tagged(q) && !(req->rq_flags & RQF_QUEUED)) {
				1932	spin_lock_irq(shost->host_lock);
				1933	if (list_empty(&sdev->starved_entry))
				1934	list_add_tail(&sdev->starved_entry,
				1935	&shost->starved_list);
				1936	spin_unlock_irq(shost->host_lock);
				1937	goto not_ready;
				1938	}
				1939
				1940	if (!scsi_target_queue_ready(shost, sdev))
				1941	goto not_ready;
				1942
				1943	if (!scsi_host_queue_ready(q, shost, sdev))
				1944	goto host_not_ready;
				1945
				1946	if (sdev->simple_tags)
				1947	cmd->flags \|= SCMD_TAGGED;
				1948	else
				1949	cmd->flags &= ~SCMD_TAGGED;
				1950
				1951	/*
				1952	* Finally, initialize any error handling parameters, and set up
				1953	* the timers for timeouts.
				1954	*/
				1955	scsi_init_cmd_errh(cmd);
				1956
				1957	/*
				1958	* Dispatch the command to the low-level driver.
				1959	*/
				1960	cmd->scsi_done = scsi_done;
				1961	rtn = scsi_dispatch_cmd(cmd);
				1962	if (rtn) {
				1963	scsi_queue_insert(cmd, rtn);
				1964	spin_lock_irq(q->queue_lock);
				1965	goto out_delay;
				1966	}
				1967	spin_lock_irq(q->queue_lock);
				1968	}
				1969
				1970	return;
				1971
				1972	host_not_ready:
				1973	if (scsi_target(sdev)->can_queue > 0)
				1974	atomic_dec(&scsi_target(sdev)->target_busy);
				1975	not_ready:
				1976	/*
				1977	* lock q, handle tag, requeue req, and decrement device_busy. We
				1978	* must return with queue_lock held.
				1979	*
				1980	* Decrementing device_busy without checking it is OK, as all such
				1981	* cases (host limits or settings) should run the queue at some
				1982	* later time.
				1983	*/
				1984	spin_lock_irq(q->queue_lock);
				1985	blk_requeue_request(q, req);
				1986	atomic_dec(&sdev->device_busy);
				1987	out_delay:
				1988	if (!atomic_read(&sdev->device_busy) && !scsi_device_blocked(sdev))
				1989	blk_delay_queue(q, SCSI_QUEUE_DELAY);
				1990	}
				1991
				1992	static inline blk_status_t prep_to_mq(int ret)
				1993	{
				1994	switch (ret) {
				1995	case BLKPREP_OK:
				1996	return BLK_STS_OK;
				1997	case BLKPREP_DEFER:
				1998	return BLK_STS_RESOURCE;
				1999	default:
				2000	return BLK_STS_IOERR;
				2001	}
				2002	}
				2003
				2004	/* Size in bytes of the sg-list stored in the scsi-mq command-private data. */
				2005	static unsigned int scsi_mq_sgl_size(struct Scsi_Host *shost)
				2006	{
				2007	return min_t(unsigned int, shost->sg_tablesize, SG_CHUNK_SIZE) *
				2008	sizeof(struct scatterlist);
				2009	}
				2010
				2011	static int scsi_mq_prep_fn(struct request *req)
				2012	{
				2013	struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(req);
				2014	struct scsi_device *sdev = req->q->queuedata;
				2015	struct Scsi_Host *shost = sdev->host;
				2016	struct scatterlist *sg;
				2017
				2018	scsi_init_command(sdev, cmd);
				2019
				2020	req->special = cmd;
				2021
				2022	cmd->request = req;
				2023
				2024	cmd->tag = req->tag;
				2025	cmd->prot_op = SCSI_PROT_NORMAL;
				2026
				2027	sg = (void *)cmd + sizeof(struct scsi_cmnd) + shost->hostt->cmd_size;
				2028	cmd->sdb.table.sgl = sg;
				2029
				2030	if (scsi_host_get_prot(shost)) {
				2031	memset(cmd->prot_sdb, 0, sizeof(struct scsi_data_buffer));
				2032
				2033	cmd->prot_sdb->table.sgl =
				2034	(struct scatterlist *)(cmd->prot_sdb + 1);
				2035	}
				2036
				2037	if (blk_bidi_rq(req)) {
				2038	struct request *next_rq = req->next_rq;
				2039	struct scsi_data_buffer *bidi_sdb = blk_mq_rq_to_pdu(next_rq);
				2040
				2041	memset(bidi_sdb, 0, sizeof(struct scsi_data_buffer));
				2042	bidi_sdb->table.sgl =
				2043	(struct scatterlist *)(bidi_sdb + 1);
				2044
				2045	next_rq->special = bidi_sdb;
				2046	}
				2047
				2048	blk_mq_start_request(req);
				2049
				2050	return scsi_setup_cmnd(sdev, req);
				2051	}
				2052
				2053	static void scsi_mq_done(struct scsi_cmnd *cmd)
				2054	{
				2055	trace_scsi_dispatch_cmd_done(cmd);
				2056	blk_mq_complete_request(cmd->request);
				2057	}
				2058
				2059	static void scsi_mq_put_budget(struct blk_mq_hw_ctx *hctx)
				2060	{
				2061	struct request_queue *q = hctx->queue;
				2062	struct scsi_device *sdev = q->queuedata;
				2063
				2064	atomic_dec(&sdev->device_busy);
				2065	put_device(&sdev->sdev_gendev);
				2066	}
				2067
				2068	static bool scsi_mq_get_budget(struct blk_mq_hw_ctx *hctx)
				2069	{
				2070	struct request_queue *q = hctx->queue;
				2071	struct scsi_device *sdev = q->queuedata;
				2072
				2073	if (!get_device(&sdev->sdev_gendev))
				2074	goto out;
				2075	if (!scsi_dev_queue_ready(q, sdev))
				2076	goto out_put_device;
				2077
				2078	return true;
				2079
				2080	out_put_device:
				2081	put_device(&sdev->sdev_gendev);
				2082	out:
				2083	if (atomic_read(&sdev->device_busy) == 0 && !scsi_device_blocked(sdev))
				2084	blk_mq_delay_run_hw_queue(hctx, SCSI_QUEUE_DELAY);
				2085	return false;
				2086	}
				2087
				2088	static blk_status_t scsi_queue_rq(struct blk_mq_hw_ctx *hctx,
				2089	const struct blk_mq_queue_data *bd)
				2090	{
				2091	struct request *req = bd->rq;
				2092	struct request_queue *q = req->q;
				2093	struct scsi_device *sdev = q->queuedata;
				2094	struct Scsi_Host *shost = sdev->host;
				2095	struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(req);
				2096	blk_status_t ret;
				2097	int reason;
				2098
				2099	ret = prep_to_mq(scsi_prep_state_check(sdev, req));
				2100	if (ret != BLK_STS_OK)
				2101	goto out_put_budget;
				2102
				2103	ret = BLK_STS_RESOURCE;
				2104	if (!scsi_target_queue_ready(shost, sdev))
				2105	goto out_put_budget;
				2106	if (!scsi_host_queue_ready(q, shost, sdev))
				2107	goto out_dec_target_busy;
				2108
				2109	if (!(req->rq_flags & RQF_DONTPREP)) {
				2110	ret = prep_to_mq(scsi_mq_prep_fn(req));
				2111	if (ret != BLK_STS_OK)
				2112	goto out_dec_host_busy;
				2113	req->rq_flags \|= RQF_DONTPREP;
				2114	} else {
				2115	blk_mq_start_request(req);
				2116	}
				2117
				2118	if (sdev->simple_tags)
				2119	cmd->flags \|= SCMD_TAGGED;
				2120	else
				2121	cmd->flags &= ~SCMD_TAGGED;
				2122
				2123	scsi_init_cmd_errh(cmd);
				2124	cmd->scsi_done = scsi_mq_done;
				2125
				2126	reason = scsi_dispatch_cmd(cmd);
				2127	if (reason) {
				2128	scsi_set_blocked(cmd, reason);
				2129	ret = BLK_STS_RESOURCE;
				2130	goto out_dec_host_busy;
				2131	}
				2132
				2133	return BLK_STS_OK;
				2134
				2135	out_dec_host_busy:
				2136	scsi_dec_host_busy(shost);
				2137	out_dec_target_busy:
				2138	if (scsi_target(sdev)->can_queue > 0)
				2139	atomic_dec(&scsi_target(sdev)->target_busy);
				2140	out_put_budget:
				2141	scsi_mq_put_budget(hctx);
				2142	switch (ret) {
				2143	case BLK_STS_OK:
				2144	break;
				2145	case BLK_STS_RESOURCE:
				2146	if (atomic_read(&sdev->device_busy) \|\|
				2147	scsi_device_blocked(sdev))
				2148	ret = BLK_STS_DEV_RESOURCE;
				2149	break;
				2150	default:
				2151	/*
				2152	* Make sure to release all allocated ressources when
				2153	* we hit an error, as we will never see this command
				2154	* again.
				2155	*/
				2156	if (req->rq_flags & RQF_DONTPREP)
				2157	scsi_mq_uninit_cmd(cmd);
				2158	break;
				2159	}
				2160	return ret;
				2161	}
				2162
				2163	static enum blk_eh_timer_return scsi_timeout(struct request *req,
				2164	bool reserved)
				2165	{
				2166	if (reserved)
				2167	return BLK_EH_RESET_TIMER;
				2168	return scsi_times_out(req);
				2169	}
				2170
				2171	static int scsi_mq_init_request(struct blk_mq_tag_set set, struct request rq,
				2172	unsigned int hctx_idx, unsigned int numa_node)
				2173	{
				2174	struct Scsi_Host *shost = set->driver_data;
				2175	const bool unchecked_isa_dma = shost->unchecked_isa_dma;
				2176	struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(rq);
				2177	struct scatterlist *sg;
				2178
				2179	if (unchecked_isa_dma)
				2180	cmd->flags \|= SCMD_UNCHECKED_ISA_DMA;
				2181	cmd->sense_buffer = scsi_alloc_sense_buffer(unchecked_isa_dma,
				2182	GFP_KERNEL, numa_node);
				2183	if (!cmd->sense_buffer)
				2184	return -ENOMEM;
				2185	cmd->req.sense = cmd->sense_buffer;
				2186
				2187	if (scsi_host_get_prot(shost)) {
				2188	sg = (void *)cmd + sizeof(struct scsi_cmnd) +
				2189	shost->hostt->cmd_size;
				2190	cmd->prot_sdb = (void *)sg + scsi_mq_sgl_size(shost);
				2191	}
				2192
				2193	return 0;
				2194	}
				2195
				2196	static void scsi_mq_exit_request(struct blk_mq_tag_set set, struct request rq,
				2197	unsigned int hctx_idx)
				2198	{
				2199	struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(rq);
				2200
				2201	scsi_free_sense_buffer(cmd->flags & SCMD_UNCHECKED_ISA_DMA,
				2202	cmd->sense_buffer);
				2203	}
				2204
				2205	static int scsi_map_queues(struct blk_mq_tag_set *set)
				2206	{
				2207	struct Scsi_Host *shost = container_of(set, struct Scsi_Host, tag_set);
				2208
				2209	if (shost->hostt->map_queues)
				2210	return shost->hostt->map_queues(shost);
				2211	return blk_mq_map_queues(set);
				2212	}
				2213
				2214	void __scsi_init_queue(struct Scsi_Host shost, struct request_queue q)
				2215	{
				2216	struct device *dev = shost->dma_dev;
				2217
				2218	/*
				2219	* this limit is imposed by hardware restrictions
				2220	*/
				2221	blk_queue_max_segments(q, min_t(unsigned short, shost->sg_tablesize,
				2222	SG_MAX_SEGMENTS));
				2223
				2224	if (scsi_host_prot_dma(shost)) {
				2225	shost->sg_prot_tablesize =
				2226	min_not_zero(shost->sg_prot_tablesize,
				2227	(unsigned short)SCSI_MAX_PROT_SG_SEGMENTS);
				2228	BUG_ON(shost->sg_prot_tablesize < shost->sg_tablesize);
				2229	blk_queue_max_integrity_segments(q, shost->sg_prot_tablesize);
				2230	}
				2231
				2232	blk_queue_max_hw_sectors(q, shost->max_sectors);
				2233	if (shost->unchecked_isa_dma)
				2234	blk_queue_bounce_limit(q, BLK_BOUNCE_ISA);
				2235	blk_queue_segment_boundary(q, shost->dma_boundary);
				2236	dma_set_seg_boundary(dev, shost->dma_boundary);
				2237
				2238	blk_queue_max_segment_size(q, dma_get_max_seg_size(dev));
				2239
				2240	if (!shost->use_clustering)
				2241	q->limits.cluster = 0;
				2242
				2243	/*
				2244	* Set a reasonable default alignment: The larger of 32-byte (dword),
				2245	* which is a common minimum for HBAs, and the minimum DMA alignment,
				2246	* which is set by the platform.
				2247	*
				2248	* Devices that require a bigger alignment can increase it later.
				2249	*/
				2250	blk_queue_dma_alignment(q, max(4, dma_get_cache_alignment()) - 1);
				2251	}
				2252	EXPORT_SYMBOL_GPL(__scsi_init_queue);
				2253
				2254	static int scsi_old_init_rq(struct request_queue q, struct request rq,
				2255	gfp_t gfp)
				2256	{
				2257	struct Scsi_Host *shost = q->rq_alloc_data;
				2258	const bool unchecked_isa_dma = shost->unchecked_isa_dma;
				2259	struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(rq);
				2260
				2261	memset(cmd, 0, sizeof(*cmd));
				2262
				2263	if (unchecked_isa_dma)
				2264	cmd->flags \|= SCMD_UNCHECKED_ISA_DMA;
				2265	cmd->sense_buffer = scsi_alloc_sense_buffer(unchecked_isa_dma, gfp,
				2266	NUMA_NO_NODE);
				2267	if (!cmd->sense_buffer)
				2268	goto fail;
				2269	cmd->req.sense = cmd->sense_buffer;
				2270
				2271	if (scsi_host_get_prot(shost) >= SHOST_DIX_TYPE0_PROTECTION) {
				2272	cmd->prot_sdb = kmem_cache_zalloc(scsi_sdb_cache, gfp);
				2273	if (!cmd->prot_sdb)
				2274	goto fail_free_sense;
				2275	}
				2276
				2277	return 0;
				2278
				2279	fail_free_sense:
				2280	scsi_free_sense_buffer(unchecked_isa_dma, cmd->sense_buffer);
				2281	fail:
				2282	return -ENOMEM;
				2283	}
				2284
				2285	static void scsi_old_exit_rq(struct request_queue q, struct request rq)
				2286	{
				2287	struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(rq);
				2288
				2289	if (cmd->prot_sdb)
				2290	kmem_cache_free(scsi_sdb_cache, cmd->prot_sdb);
				2291	scsi_free_sense_buffer(cmd->flags & SCMD_UNCHECKED_ISA_DMA,
				2292	cmd->sense_buffer);
				2293	}
				2294
				2295	struct request_queue scsi_old_alloc_queue(struct scsi_device sdev)
				2296	{
				2297	struct Scsi_Host *shost = sdev->host;
				2298	struct request_queue *q;
				2299
				2300	q = blk_alloc_queue_node(GFP_KERNEL, NUMA_NO_NODE, NULL);
				2301	if (!q)
				2302	return NULL;
				2303	q->cmd_size = sizeof(struct scsi_cmnd) + shost->hostt->cmd_size;
				2304	q->rq_alloc_data = shost;
				2305	q->request_fn = scsi_request_fn;
				2306	q->init_rq_fn = scsi_old_init_rq;
				2307	q->exit_rq_fn = scsi_old_exit_rq;
				2308	q->initialize_rq_fn = scsi_initialize_rq;
				2309
				2310	if (blk_init_allocated_queue(q) < 0) {
				2311	blk_cleanup_queue(q);
				2312	return NULL;
				2313	}
				2314
				2315	__scsi_init_queue(shost, q);
				2316	blk_queue_flag_set(QUEUE_FLAG_SCSI_PASSTHROUGH, q);
				2317	blk_queue_prep_rq(q, scsi_prep_fn);
				2318	blk_queue_unprep_rq(q, scsi_unprep_fn);
				2319	blk_queue_softirq_done(q, scsi_softirq_done);
				2320	blk_queue_rq_timed_out(q, scsi_times_out);
				2321	blk_queue_lld_busy(q, scsi_lld_busy);
				2322	return q;
				2323	}
				2324
				2325	static const struct blk_mq_ops scsi_mq_ops = {
				2326	.get_budget = scsi_mq_get_budget,
				2327	.put_budget = scsi_mq_put_budget,
				2328	.queue_rq = scsi_queue_rq,
				2329	.complete = scsi_softirq_done,
				2330	.timeout = scsi_timeout,
				2331	#ifdef CONFIG_BLK_DEBUG_FS
				2332	.show_rq = scsi_show_rq,
				2333	#endif
				2334	.init_request = scsi_mq_init_request,
				2335	.exit_request = scsi_mq_exit_request,
				2336	.initialize_rq_fn = scsi_initialize_rq,
				2337	.map_queues = scsi_map_queues,
				2338	};
				2339
				2340	struct request_queue scsi_mq_alloc_queue(struct scsi_device sdev)
				2341	{
				2342	sdev->request_queue = blk_mq_init_queue(&sdev->host->tag_set);
				2343	if (IS_ERR(sdev->request_queue))
				2344	return NULL;
				2345
				2346	sdev->request_queue->queuedata = sdev;
				2347	__scsi_init_queue(sdev->host, sdev->request_queue);
				2348	blk_queue_flag_set(QUEUE_FLAG_SCSI_PASSTHROUGH, sdev->request_queue);
				2349	return sdev->request_queue;
				2350	}
				2351
				2352	int scsi_mq_setup_tags(struct Scsi_Host *shost)
				2353	{
				2354	unsigned int cmd_size, sgl_size;
				2355
				2356	sgl_size = scsi_mq_sgl_size(shost);
				2357	cmd_size = sizeof(struct scsi_cmnd) + shost->hostt->cmd_size + sgl_size;
				2358	if (scsi_host_get_prot(shost))
				2359	cmd_size += sizeof(struct scsi_data_buffer) + sgl_size;
				2360
				2361	memset(&shost->tag_set, 0, sizeof(shost->tag_set));
				2362	shost->tag_set.ops = &scsi_mq_ops;
				2363	shost->tag_set.nr_hw_queues = shost->nr_hw_queues ? : 1;
				2364	shost->tag_set.queue_depth = shost->can_queue;
				2365	shost->tag_set.cmd_size = cmd_size;
				2366	shost->tag_set.numa_node = NUMA_NO_NODE;
				2367	shost->tag_set.flags = BLK_MQ_F_SHOULD_MERGE \| BLK_MQ_F_SG_MERGE;
				2368	shost->tag_set.flags \|=
				2369	BLK_ALLOC_POLICY_TO_MQ_FLAG(shost->hostt->tag_alloc_policy);
				2370	shost->tag_set.driver_data = shost;
				2371
				2372	return blk_mq_alloc_tag_set(&shost->tag_set);
				2373	}
				2374
				2375	void scsi_mq_destroy_tags(struct Scsi_Host *shost)
				2376	{
				2377	blk_mq_free_tag_set(&shost->tag_set);
				2378	}
				2379
				2380	/**
				2381	* scsi_device_from_queue - return sdev associated with a request_queue
				2382	* @q: The request queue to return the sdev from
				2383	*
				2384	* Return the sdev associated with a request queue or NULL if the
				2385	* request_queue does not reference a SCSI device.
				2386	*/
				2387	struct scsi_device scsi_device_from_queue(struct request_queue q)
				2388	{
				2389	struct scsi_device *sdev = NULL;
				2390
				2391	if (q->mq_ops) {
				2392	if (q->mq_ops == &scsi_mq_ops)
				2393	sdev = q->queuedata;
				2394	} else if (q->request_fn == scsi_request_fn)
				2395	sdev = q->queuedata;
				2396	if (!sdev \|\| !get_device(&sdev->sdev_gendev))
				2397	sdev = NULL;
				2398
				2399	return sdev;
				2400	}
				2401	EXPORT_SYMBOL_GPL(scsi_device_from_queue);
				2402
				2403	/*
				2404	* Function: scsi_block_requests()
				2405	*
				2406	* Purpose: Utility function used by low-level drivers to prevent further
				2407	* commands from being queued to the device.
				2408	*
				2409	* Arguments: shost - Host in question
				2410	*
				2411	* Returns: Nothing
				2412	*
				2413	* Lock status: No locks are assumed held.
				2414	*
				2415	* Notes: There is no timer nor any other means by which the requests
				2416	* get unblocked other than the low-level driver calling
				2417	* scsi_unblock_requests().
				2418	*/
				2419	void scsi_block_requests(struct Scsi_Host *shost)
				2420	{
				2421	shost->host_self_blocked = 1;
				2422	}
				2423	EXPORT_SYMBOL(scsi_block_requests);
				2424
				2425	/*
				2426	* Function: scsi_unblock_requests()
				2427	*
				2428	* Purpose: Utility function used by low-level drivers to allow further
				2429	* commands from being queued to the device.
				2430	*
				2431	* Arguments: shost - Host in question
				2432	*
				2433	* Returns: Nothing
				2434	*
				2435	* Lock status: No locks are assumed held.
				2436	*
				2437	* Notes: There is no timer nor any other means by which the requests
				2438	* get unblocked other than the low-level driver calling
				2439	* scsi_unblock_requests().
				2440	*
				2441	* This is done as an API function so that changes to the
				2442	* internals of the scsi mid-layer won't require wholesale
				2443	* changes to drivers that use this feature.
				2444	*/
				2445	void scsi_unblock_requests(struct Scsi_Host *shost)
				2446	{
				2447	shost->host_self_blocked = 0;
				2448	scsi_run_host_queues(shost);
				2449	}
				2450	EXPORT_SYMBOL(scsi_unblock_requests);
				2451
				2452	int __init scsi_init_queue(void)
				2453	{
				2454	scsi_sdb_cache = kmem_cache_create("scsi_data_buffer",
				2455	sizeof(struct scsi_data_buffer),
				2456	0, 0, NULL);
				2457	if (!scsi_sdb_cache) {
				2458	printk(KERN_ERR "SCSI: can't init scsi sdb cache\n");
				2459	return -ENOMEM;
				2460	}
				2461
				2462	return 0;
				2463	}
				2464
				2465	void scsi_exit_queue(void)
				2466	{
				2467	kmem_cache_destroy(scsi_sense_cache);
				2468	kmem_cache_destroy(scsi_sense_isadma_cache);
				2469	kmem_cache_destroy(scsi_sdb_cache);
				2470	}
				2471
				2472	/**
				2473	* scsi_mode_select - issue a mode select
				2474	* @sdev: SCSI device to be queried
				2475	* @pf: Page format bit (1 == standard, 0 == vendor specific)
				2476	* @sp: Save page bit (0 == don't save, 1 == save)
				2477	* @modepage: mode page being requested
				2478	* @buffer: request buffer (may not be smaller than eight bytes)
				2479	* @len: length of request buffer.
				2480	* @timeout: command timeout
				2481	* @retries: number of retries before failing
				2482	* @data: returns a structure abstracting the mode header data
				2483	* @sshdr: place to put sense data (or NULL if no sense to be collected).
				2484	* must be SCSI_SENSE_BUFFERSIZE big.
				2485	*
				2486	* Returns zero if successful; negative error number or scsi
				2487	* status on error
				2488	*
				2489	*/
				2490	int
				2491	scsi_mode_select(struct scsi_device *sdev, int pf, int sp, int modepage,
				2492	unsigned char *buffer, int len, int timeout, int retries,
				2493	struct scsi_mode_data data, struct scsi_sense_hdr sshdr)
				2494	{
				2495	unsigned char cmd[10];
				2496	unsigned char *real_buffer;
				2497	int ret;
				2498
				2499	memset(cmd, 0, sizeof(cmd));
				2500	cmd[1] = (pf ? 0x10 : 0) \| (sp ? 0x01 : 0);
				2501
				2502	if (sdev->use_10_for_ms) {
				2503	if (len > 65535)
				2504	return -EINVAL;
				2505	real_buffer = kmalloc(8 + len, GFP_KERNEL);
				2506	if (!real_buffer)
				2507	return -ENOMEM;
				2508	memcpy(real_buffer + 8, buffer, len);
				2509	len += 8;
				2510	real_buffer[0] = 0;
				2511	real_buffer[1] = 0;
				2512	real_buffer[2] = data->medium_type;
				2513	real_buffer[3] = data->device_specific;
				2514	real_buffer[4] = data->longlba ? 0x01 : 0;
				2515	real_buffer[5] = 0;
				2516	real_buffer[6] = data->block_descriptor_length >> 8;
				2517	real_buffer[7] = data->block_descriptor_length;
				2518
				2519	cmd[0] = MODE_SELECT_10;
				2520	cmd[7] = len >> 8;
				2521	cmd[8] = len;
				2522	} else {
				2523	if (len > 255 \|\| data->block_descriptor_length > 255 \|\|
				2524	data->longlba)
				2525	return -EINVAL;
				2526
				2527	real_buffer = kmalloc(4 + len, GFP_KERNEL);
				2528	if (!real_buffer)
				2529	return -ENOMEM;
				2530	memcpy(real_buffer + 4, buffer, len);
				2531	len += 4;
				2532	real_buffer[0] = 0;
				2533	real_buffer[1] = data->medium_type;
				2534	real_buffer[2] = data->device_specific;
				2535	real_buffer[3] = data->block_descriptor_length;
				2536
				2537
				2538	cmd[0] = MODE_SELECT;
				2539	cmd[4] = len;
				2540	}
				2541
				2542	ret = scsi_execute_req(sdev, cmd, DMA_TO_DEVICE, real_buffer, len,
				2543	sshdr, timeout, retries, NULL);
				2544	kfree(real_buffer);
				2545	return ret;
				2546	}
				2547	EXPORT_SYMBOL_GPL(scsi_mode_select);
				2548
				2549	/**
				2550	* scsi_mode_sense - issue a mode sense, falling back from 10 to six bytes if necessary.
				2551	* @sdev: SCSI device to be queried
				2552	* @dbd: set if mode sense will allow block descriptors to be returned
				2553	* @modepage: mode page being requested
				2554	* @buffer: request buffer (may not be smaller than eight bytes)
				2555	* @len: length of request buffer.
				2556	* @timeout: command timeout
				2557	* @retries: number of retries before failing
				2558	* @data: returns a structure abstracting the mode header data
				2559	* @sshdr: place to put sense data (or NULL if no sense to be collected).
				2560	* must be SCSI_SENSE_BUFFERSIZE big.
				2561	*
				2562	* Returns zero if unsuccessful, or the header offset (either 4
				2563	* or 8 depending on whether a six or ten byte command was
				2564	* issued) if successful.
				2565	*/
				2566	int
				2567	scsi_mode_sense(struct scsi_device *sdev, int dbd, int modepage,
				2568	unsigned char *buffer, int len, int timeout, int retries,
				2569	struct scsi_mode_data data, struct scsi_sense_hdr sshdr)
				2570	{
				2571	unsigned char cmd[12];
				2572	int use_10_for_ms;
				2573	int header_length;
				2574	int result, retry_count = retries;
				2575	struct scsi_sense_hdr my_sshdr;
				2576
				2577	memset(data, 0, sizeof(*data));
				2578	memset(&cmd[0], 0, 12);
				2579	cmd[1] = dbd & 0x18; /* allows DBD and LLBA bits */
				2580	cmd[2] = modepage;
				2581
				2582	/* caller might not be interested in sense, but we need it */
				2583	if (!sshdr)
				2584	sshdr = &my_sshdr;
				2585
				2586	retry:
				2587	use_10_for_ms = sdev->use_10_for_ms;
				2588
				2589	if (use_10_for_ms) {
				2590	if (len < 8)
				2591	len = 8;
				2592
				2593	cmd[0] = MODE_SENSE_10;
				2594	cmd[8] = len;
				2595	header_length = 8;
				2596	} else {
				2597	if (len < 4)
				2598	len = 4;
				2599
				2600	cmd[0] = MODE_SENSE;
				2601	cmd[4] = len;
				2602	header_length = 4;
				2603	}
				2604
				2605	memset(buffer, 0, len);
				2606
				2607	result = scsi_execute_req(sdev, cmd, DMA_FROM_DEVICE, buffer, len,
				2608	sshdr, timeout, retries, NULL);
				2609
				2610	/* This code looks awful: what it's doing is making sure an
				2611	* ILLEGAL REQUEST sense return identifies the actual command
				2612	* byte as the problem. MODE_SENSE commands can return
				2613	* ILLEGAL REQUEST if the code page isn't supported */
				2614
				2615	if (use_10_for_ms && !scsi_status_is_good(result) &&
				2616	driver_byte(result) == DRIVER_SENSE) {
				2617	if (scsi_sense_valid(sshdr)) {
				2618	if ((sshdr->sense_key == ILLEGAL_REQUEST) &&
				2619	(sshdr->asc == 0x20) && (sshdr->ascq == 0)) {
				2620	/*
				2621	* Invalid command operation code
				2622	*/
				2623	sdev->use_10_for_ms = 0;
				2624	goto retry;
				2625	}
				2626	}
				2627	}
				2628
				2629	if(scsi_status_is_good(result)) {
				2630	if (unlikely(buffer[0] == 0x86 && buffer[1] == 0x0b &&
				2631	(modepage == 6 \|\| modepage == 8))) {
				2632	/* Initio breakage? */
				2633	header_length = 0;
				2634	data->length = 13;
				2635	data->medium_type = 0;
				2636	data->device_specific = 0;
				2637	data->longlba = 0;
				2638	data->block_descriptor_length = 0;
				2639	} else if(use_10_for_ms) {
				2640	data->length = buffer[0]*256 + buffer[1] + 2;
				2641	data->medium_type = buffer[2];
				2642	data->device_specific = buffer[3];
				2643	data->longlba = buffer[4] & 0x01;
				2644	data->block_descriptor_length = buffer[6]*256
				2645	+ buffer[7];
				2646	} else {
				2647	data->length = buffer[0] + 1;
				2648	data->medium_type = buffer[1];
				2649	data->device_specific = buffer[2];
				2650	data->block_descriptor_length = buffer[3];
				2651	}
				2652	data->header_length = header_length;
				2653	} else if ((status_byte(result) == CHECK_CONDITION) &&
				2654	scsi_sense_valid(sshdr) &&
				2655	sshdr->sense_key == UNIT_ATTENTION && retry_count) {
				2656	retry_count--;
				2657	goto retry;
				2658	}
				2659
				2660	return result;
				2661	}
				2662	EXPORT_SYMBOL(scsi_mode_sense);
				2663
				2664	/**
				2665	* scsi_test_unit_ready - test if unit is ready
				2666	* @sdev: scsi device to change the state of.
				2667	* @timeout: command timeout
				2668	* @retries: number of retries before failing
				2669	* @sshdr: outpout pointer for decoded sense information.
				2670	*
				2671	* Returns zero if unsuccessful or an error if TUR failed. For
				2672	* removable media, UNIT_ATTENTION sets ->changed flag.
				2673	**/
				2674	int
				2675	scsi_test_unit_ready(struct scsi_device *sdev, int timeout, int retries,
				2676	struct scsi_sense_hdr *sshdr)
				2677	{
				2678	char cmd[] = {
				2679	TEST_UNIT_READY, 0, 0, 0, 0, 0,
				2680	};
				2681	int result;
				2682
				2683	/* try to eat the UNIT_ATTENTION if there are enough retries */
				2684	do {
				2685	result = scsi_execute_req(sdev, cmd, DMA_NONE, NULL, 0, sshdr,
				2686	timeout, 1, NULL);
				2687	if (sdev->removable && scsi_sense_valid(sshdr) &&
				2688	sshdr->sense_key == UNIT_ATTENTION)
				2689	sdev->changed = 1;
				2690	} while (scsi_sense_valid(sshdr) &&
				2691	sshdr->sense_key == UNIT_ATTENTION && --retries);
				2692
				2693	return result;
				2694	}
				2695	EXPORT_SYMBOL(scsi_test_unit_ready);
				2696
				2697	/**
				2698	* scsi_device_set_state - Take the given device through the device state model.
				2699	* @sdev: scsi device to change the state of.
				2700	* @state: state to change to.
				2701	*
				2702	* Returns zero if successful or an error if the requested
				2703	* transition is illegal.
				2704	*/
				2705	int
				2706	scsi_device_set_state(struct scsi_device *sdev, enum scsi_device_state state)
				2707	{
				2708	enum scsi_device_state oldstate = sdev->sdev_state;
				2709
				2710	if (state == oldstate)
				2711	return 0;
				2712
				2713	switch (state) {
				2714	case SDEV_CREATED:
				2715	switch (oldstate) {
				2716	case SDEV_CREATED_BLOCK:
				2717	break;
				2718	default:
				2719	goto illegal;
				2720	}
				2721	break;
				2722
				2723	case SDEV_RUNNING:
				2724	switch (oldstate) {
				2725	case SDEV_CREATED:
				2726	case SDEV_OFFLINE:
				2727	case SDEV_TRANSPORT_OFFLINE:
				2728	case SDEV_QUIESCE:
				2729	case SDEV_BLOCK:
				2730	break;
				2731	default:
				2732	goto illegal;
				2733	}
				2734	break;
				2735
				2736	case SDEV_QUIESCE:
				2737	switch (oldstate) {
				2738	case SDEV_RUNNING:
				2739	case SDEV_OFFLINE:
				2740	case SDEV_TRANSPORT_OFFLINE:
				2741	break;
				2742	default:
				2743	goto illegal;
				2744	}
				2745	break;
				2746
				2747	case SDEV_OFFLINE:
				2748	case SDEV_TRANSPORT_OFFLINE:
				2749	switch (oldstate) {
				2750	case SDEV_CREATED:
				2751	case SDEV_RUNNING:
				2752	case SDEV_QUIESCE:
				2753	case SDEV_BLOCK:
				2754	break;
				2755	default:
				2756	goto illegal;
				2757	}
				2758	break;
				2759
				2760	case SDEV_BLOCK:
				2761	switch (oldstate) {
				2762	case SDEV_RUNNING:
				2763	case SDEV_CREATED_BLOCK:
				2764	break;
				2765	default:
				2766	goto illegal;
				2767	}
				2768	break;
				2769
				2770	case SDEV_CREATED_BLOCK:
				2771	switch (oldstate) {
				2772	case SDEV_CREATED:
				2773	break;
				2774	default:
				2775	goto illegal;
				2776	}
				2777	break;
				2778
				2779	case SDEV_CANCEL:
				2780	switch (oldstate) {
				2781	case SDEV_CREATED:
				2782	case SDEV_RUNNING:
				2783	case SDEV_QUIESCE:
				2784	case SDEV_OFFLINE:
				2785	case SDEV_TRANSPORT_OFFLINE:
				2786	break;
				2787	default:
				2788	goto illegal;
				2789	}
				2790	break;
				2791
				2792	case SDEV_DEL:
				2793	switch (oldstate) {
				2794	case SDEV_CREATED:
				2795	case SDEV_RUNNING:
				2796	case SDEV_OFFLINE:
				2797	case SDEV_TRANSPORT_OFFLINE:
				2798	case SDEV_CANCEL:
				2799	case SDEV_BLOCK:
				2800	case SDEV_CREATED_BLOCK:
				2801	break;
				2802	default:
				2803	goto illegal;
				2804	}
				2805	break;
				2806
				2807	}
				2808	sdev->sdev_state = state;
				2809	return 0;
				2810
				2811	illegal:
				2812	SCSI_LOG_ERROR_RECOVERY(1,
				2813	sdev_printk(KERN_ERR, sdev,
				2814	"Illegal state transition %s->%s",
				2815	scsi_device_state_name(oldstate),
				2816	scsi_device_state_name(state))
				2817	);
				2818	return -EINVAL;
				2819	}
				2820	EXPORT_SYMBOL(scsi_device_set_state);
				2821
				2822	/**
				2823	* sdev_evt_emit - emit a single SCSI device uevent
				2824	* @sdev: associated SCSI device
				2825	* @evt: event to emit
				2826	*
				2827	* Send a single uevent (scsi_event) to the associated scsi_device.
				2828	*/
				2829	static void scsi_evt_emit(struct scsi_device sdev, struct scsi_event evt)
				2830	{
				2831	int idx = 0;
				2832	char *envp[3];
				2833
				2834	switch (evt->evt_type) {
				2835	case SDEV_EVT_MEDIA_CHANGE:
				2836	envp[idx++] = "SDEV_MEDIA_CHANGE=1";
				2837	break;
				2838	case SDEV_EVT_INQUIRY_CHANGE_REPORTED:
				2839	scsi_rescan_device(&sdev->sdev_gendev);
				2840	envp[idx++] = "SDEV_UA=INQUIRY_DATA_HAS_CHANGED";
				2841	break;
				2842	case SDEV_EVT_CAPACITY_CHANGE_REPORTED:
				2843	envp[idx++] = "SDEV_UA=CAPACITY_DATA_HAS_CHANGED";
				2844	break;
				2845	case SDEV_EVT_SOFT_THRESHOLD_REACHED_REPORTED:
				2846	envp[idx++] = "SDEV_UA=THIN_PROVISIONING_SOFT_THRESHOLD_REACHED";
				2847	break;
				2848	case SDEV_EVT_MODE_PARAMETER_CHANGE_REPORTED:
				2849	envp[idx++] = "SDEV_UA=MODE_PARAMETERS_CHANGED";
				2850	break;
				2851	case SDEV_EVT_LUN_CHANGE_REPORTED:
				2852	envp[idx++] = "SDEV_UA=REPORTED_LUNS_DATA_HAS_CHANGED";
				2853	break;
				2854	case SDEV_EVT_ALUA_STATE_CHANGE_REPORTED:
				2855	envp[idx++] = "SDEV_UA=ASYMMETRIC_ACCESS_STATE_CHANGED";
				2856	break;
				2857	case SDEV_EVT_POWER_ON_RESET_OCCURRED:
				2858	envp[idx++] = "SDEV_UA=POWER_ON_RESET_OCCURRED";
				2859	break;
				2860	default:
				2861	/* do nothing */
				2862	break;
				2863	}
				2864
				2865	envp[idx++] = NULL;
				2866
				2867	kobject_uevent_env(&sdev->sdev_gendev.kobj, KOBJ_CHANGE, envp);
				2868	}
				2869
				2870	/**
				2871	* sdev_evt_thread - send a uevent for each scsi event
				2872	* @work: work struct for scsi_device
				2873	*
				2874	* Dispatch queued events to their associated scsi_device kobjects
				2875	* as uevents.
				2876	*/
				2877	void scsi_evt_thread(struct work_struct *work)
				2878	{
				2879	struct scsi_device *sdev;
				2880	enum scsi_device_event evt_type;
				2881	LIST_HEAD(event_list);
				2882
				2883	sdev = container_of(work, struct scsi_device, event_work);
				2884
				2885	for (evt_type = SDEV_EVT_FIRST; evt_type <= SDEV_EVT_LAST; evt_type++)
				2886	if (test_and_clear_bit(evt_type, sdev->pending_events))
				2887	sdev_evt_send_simple(sdev, evt_type, GFP_KERNEL);
				2888
				2889	while (1) {
				2890	struct scsi_event *evt;
				2891	struct list_head this, tmp;
				2892	unsigned long flags;
				2893
				2894	spin_lock_irqsave(&sdev->list_lock, flags);
				2895	list_splice_init(&sdev->event_list, &event_list);
				2896	spin_unlock_irqrestore(&sdev->list_lock, flags);
				2897
				2898	if (list_empty(&event_list))
				2899	break;
				2900
				2901	list_for_each_safe(this, tmp, &event_list) {
				2902	evt = list_entry(this, struct scsi_event, node);
				2903	list_del(&evt->node);
				2904	scsi_evt_emit(sdev, evt);
				2905	kfree(evt);
				2906	}
				2907	}
				2908	}
				2909
				2910	/**
				2911	* sdev_evt_send - send asserted event to uevent thread
				2912	* @sdev: scsi_device event occurred on
				2913	* @evt: event to send
				2914	*
				2915	* Assert scsi device event asynchronously.
				2916	*/
				2917	void sdev_evt_send(struct scsi_device sdev, struct scsi_event evt)
				2918	{
				2919	unsigned long flags;
				2920
				2921	#if 0
				2922	/* FIXME: currently this check eliminates all media change events
				2923	* for polled devices. Need to update to discriminate between AN
				2924	* and polled events */
				2925	if (!test_bit(evt->evt_type, sdev->supported_events)) {
				2926	kfree(evt);
				2927	return;
				2928	}
				2929	#endif
				2930
				2931	spin_lock_irqsave(&sdev->list_lock, flags);
				2932	list_add_tail(&evt->node, &sdev->event_list);
				2933	schedule_work(&sdev->event_work);
				2934	spin_unlock_irqrestore(&sdev->list_lock, flags);
				2935	}
				2936	EXPORT_SYMBOL_GPL(sdev_evt_send);
				2937
				2938	/**
				2939	* sdev_evt_alloc - allocate a new scsi event
				2940	* @evt_type: type of event to allocate
				2941	* @gfpflags: GFP flags for allocation
				2942	*
				2943	* Allocates and returns a new scsi_event.
				2944	*/
				2945	struct scsi_event *sdev_evt_alloc(enum scsi_device_event evt_type,
				2946	gfp_t gfpflags)
				2947	{
				2948	struct scsi_event *evt = kzalloc(sizeof(struct scsi_event), gfpflags);
				2949	if (!evt)
				2950	return NULL;
				2951
				2952	evt->evt_type = evt_type;
				2953	INIT_LIST_HEAD(&evt->node);
				2954
				2955	/* evt_type-specific initialization, if any */
				2956	switch (evt_type) {
				2957	case SDEV_EVT_MEDIA_CHANGE:
				2958	case SDEV_EVT_INQUIRY_CHANGE_REPORTED:
				2959	case SDEV_EVT_CAPACITY_CHANGE_REPORTED:
				2960	case SDEV_EVT_SOFT_THRESHOLD_REACHED_REPORTED:
				2961	case SDEV_EVT_MODE_PARAMETER_CHANGE_REPORTED:
				2962	case SDEV_EVT_LUN_CHANGE_REPORTED:
				2963	case SDEV_EVT_ALUA_STATE_CHANGE_REPORTED:
				2964	case SDEV_EVT_POWER_ON_RESET_OCCURRED:
				2965	default:
				2966	/* do nothing */
				2967	break;
				2968	}
				2969
				2970	return evt;
				2971	}
				2972	EXPORT_SYMBOL_GPL(sdev_evt_alloc);
				2973
				2974	/**
				2975	* sdev_evt_send_simple - send asserted event to uevent thread
				2976	* @sdev: scsi_device event occurred on
				2977	* @evt_type: type of event to send
				2978	* @gfpflags: GFP flags for allocation
				2979	*
				2980	* Assert scsi device event asynchronously, given an event type.
				2981	*/
				2982	void sdev_evt_send_simple(struct scsi_device *sdev,
				2983	enum scsi_device_event evt_type, gfp_t gfpflags)
				2984	{
				2985	struct scsi_event *evt = sdev_evt_alloc(evt_type, gfpflags);
				2986	if (!evt) {
				2987	sdev_printk(KERN_ERR, sdev, "event %d eaten due to OOM\n",
				2988	evt_type);
				2989	return;
				2990	}
				2991
				2992	sdev_evt_send(sdev, evt);
				2993	}
				2994	EXPORT_SYMBOL_GPL(sdev_evt_send_simple);
				2995
				2996	/**
				2997	* scsi_request_fn_active() - number of kernel threads inside scsi_request_fn()
				2998	* @sdev: SCSI device to count the number of scsi_request_fn() callers for.
				2999	*/
				3000	static int scsi_request_fn_active(struct scsi_device *sdev)
				3001	{
				3002	struct request_queue *q = sdev->request_queue;
				3003	int request_fn_active;
				3004
				3005	WARN_ON_ONCE(sdev->host->use_blk_mq);
				3006
				3007	spin_lock_irq(q->queue_lock);
				3008	request_fn_active = q->request_fn_active;
				3009	spin_unlock_irq(q->queue_lock);
				3010
				3011	return request_fn_active;
				3012	}
				3013
				3014	/**
				3015	* scsi_wait_for_queuecommand() - wait for ongoing queuecommand() calls
				3016	* @sdev: SCSI device pointer.
				3017	*
				3018	* Wait until the ongoing shost->hostt->queuecommand() calls that are
				3019	* invoked from scsi_request_fn() have finished.
				3020	*/
				3021	static void scsi_wait_for_queuecommand(struct scsi_device *sdev)
				3022	{
				3023	WARN_ON_ONCE(sdev->host->use_blk_mq);
				3024
				3025	while (scsi_request_fn_active(sdev))
				3026	msleep(20);
				3027	}
				3028
				3029	/**
				3030	* scsi_device_quiesce - Block user issued commands.
				3031	* @sdev: scsi device to quiesce.
				3032	*
				3033	* This works by trying to transition to the SDEV_QUIESCE state
				3034	* (which must be a legal transition). When the device is in this
				3035	* state, only special requests will be accepted, all others will
				3036	* be deferred. Since special requests may also be requeued requests,
				3037	* a successful return doesn't guarantee the device will be
				3038	* totally quiescent.
				3039	*
				3040	* Must be called with user context, may sleep.
				3041	*
				3042	* Returns zero if unsuccessful or an error if not.
				3043	*/
				3044	int
				3045	scsi_device_quiesce(struct scsi_device *sdev)
				3046	{
				3047	struct request_queue *q = sdev->request_queue;
				3048	int err;
				3049
				3050	/*
				3051	* It is allowed to call scsi_device_quiesce() multiple times from
				3052	* the same context but concurrent scsi_device_quiesce() calls are
				3053	* not allowed.
				3054	*/
				3055	WARN_ON_ONCE(sdev->quiesced_by && sdev->quiesced_by != current);
				3056
				3057	blk_set_preempt_only(q);
				3058
				3059	blk_mq_freeze_queue(q);
				3060	/*
				3061	* Ensure that the effect of blk_set_preempt_only() will be visible
				3062	* for percpu_ref_tryget() callers that occur after the queue
				3063	* unfreeze even if the queue was already frozen before this function
				3064	* was called. See also https://lwn.net/Articles/573497/.
				3065	*/
				3066	synchronize_rcu();
				3067	blk_mq_unfreeze_queue(q);
				3068
				3069	mutex_lock(&sdev->state_mutex);
				3070	err = scsi_device_set_state(sdev, SDEV_QUIESCE);
				3071	if (err == 0)
				3072	sdev->quiesced_by = current;
				3073	else
				3074	blk_clear_preempt_only(q);
				3075	mutex_unlock(&sdev->state_mutex);
				3076
				3077	return err;
				3078	}
				3079	EXPORT_SYMBOL(scsi_device_quiesce);
				3080
				3081	/**
				3082	* scsi_device_resume - Restart user issued commands to a quiesced device.
				3083	* @sdev: scsi device to resume.
				3084	*
				3085	* Moves the device from quiesced back to running and restarts the
				3086	* queues.
				3087	*
				3088	* Must be called with user context, may sleep.
				3089	*/
				3090	void scsi_device_resume(struct scsi_device *sdev)
				3091	{
				3092	/* check if the device state was mutated prior to resume, and if
				3093	* so assume the state is being managed elsewhere (for example
				3094	* device deleted during suspend)
				3095	*/
				3096	mutex_lock(&sdev->state_mutex);
				3097	WARN_ON_ONCE(!sdev->quiesced_by);
				3098	sdev->quiesced_by = NULL;
				3099	blk_clear_preempt_only(sdev->request_queue);
				3100	if (sdev->sdev_state == SDEV_QUIESCE)
				3101	scsi_device_set_state(sdev, SDEV_RUNNING);
				3102	mutex_unlock(&sdev->state_mutex);
				3103	}
				3104	EXPORT_SYMBOL(scsi_device_resume);
				3105
				3106	static void
				3107	device_quiesce_fn(struct scsi_device sdev, void data)
				3108	{
				3109	scsi_device_quiesce(sdev);
				3110	}
				3111
				3112	void
				3113	scsi_target_quiesce(struct scsi_target *starget)
				3114	{
				3115	starget_for_each_device(starget, NULL, device_quiesce_fn);
				3116	}
				3117	EXPORT_SYMBOL(scsi_target_quiesce);
				3118
				3119	static void
				3120	device_resume_fn(struct scsi_device sdev, void data)
				3121	{
				3122	scsi_device_resume(sdev);
				3123	}
				3124
				3125	void
				3126	scsi_target_resume(struct scsi_target *starget)
				3127	{
				3128	starget_for_each_device(starget, NULL, device_resume_fn);
				3129	}
				3130	EXPORT_SYMBOL(scsi_target_resume);
				3131
				3132	/**
				3133	* scsi_internal_device_block_nowait - try to transition to the SDEV_BLOCK state
				3134	* @sdev: device to block
				3135	*
				3136	* Pause SCSI command processing on the specified device. Does not sleep.
				3137	*
				3138	* Returns zero if successful or a negative error code upon failure.
				3139	*
				3140	* Notes:
				3141	* This routine transitions the device to the SDEV_BLOCK state (which must be
				3142	* a legal transition). When the device is in this state, command processing
				3143	* is paused until the device leaves the SDEV_BLOCK state. See also
				3144	* scsi_internal_device_unblock_nowait().
				3145	*/
				3146	int scsi_internal_device_block_nowait(struct scsi_device *sdev)
				3147	{
				3148	struct request_queue *q = sdev->request_queue;
				3149	unsigned long flags;
				3150	int err = 0;
				3151
				3152	err = scsi_device_set_state(sdev, SDEV_BLOCK);
				3153	if (err) {
				3154	err = scsi_device_set_state(sdev, SDEV_CREATED_BLOCK);
				3155
				3156	if (err)
				3157	return err;
				3158	}
				3159
				3160	/*
				3161	* The device has transitioned to SDEV_BLOCK. Stop the
				3162	* block layer from calling the midlayer with this device's
				3163	* request queue.
				3164	*/
				3165	if (q->mq_ops) {
				3166	blk_mq_quiesce_queue_nowait(q);
				3167	} else {
				3168	spin_lock_irqsave(q->queue_lock, flags);
				3169	blk_stop_queue(q);
				3170	spin_unlock_irqrestore(q->queue_lock, flags);
				3171	}
				3172
				3173	return 0;
				3174	}
				3175	EXPORT_SYMBOL_GPL(scsi_internal_device_block_nowait);
				3176
				3177	/**
				3178	* scsi_internal_device_block - try to transition to the SDEV_BLOCK state
				3179	* @sdev: device to block
				3180	*
				3181	* Pause SCSI command processing on the specified device and wait until all
				3182	* ongoing scsi_request_fn() / scsi_queue_rq() calls have finished. May sleep.
				3183	*
				3184	* Returns zero if successful or a negative error code upon failure.
				3185	*
				3186	* Note:
				3187	* This routine transitions the device to the SDEV_BLOCK state (which must be
				3188	* a legal transition). When the device is in this state, command processing
				3189	* is paused until the device leaves the SDEV_BLOCK state. See also
				3190	* scsi_internal_device_unblock().
				3191	*
				3192	* To do: avoid that scsi_send_eh_cmnd() calls queuecommand() after
				3193	* scsi_internal_device_block() has blocked a SCSI device and also
				3194	* remove the rport mutex lock and unlock calls from srp_queuecommand().
				3195	*/
				3196	static int scsi_internal_device_block(struct scsi_device *sdev)
				3197	{
				3198	struct request_queue *q = sdev->request_queue;
				3199	int err;
				3200
				3201	mutex_lock(&sdev->state_mutex);
				3202	err = scsi_internal_device_block_nowait(sdev);
				3203	if (err == 0) {
				3204	if (q->mq_ops)
				3205	blk_mq_quiesce_queue(q);
				3206	else
				3207	scsi_wait_for_queuecommand(sdev);
				3208	}
				3209	mutex_unlock(&sdev->state_mutex);
				3210
				3211	return err;
				3212	}
				3213
				3214	void scsi_start_queue(struct scsi_device *sdev)
				3215	{
				3216	struct request_queue *q = sdev->request_queue;
				3217	unsigned long flags;
				3218
				3219	if (q->mq_ops) {
				3220	blk_mq_unquiesce_queue(q);
				3221	} else {
				3222	spin_lock_irqsave(q->queue_lock, flags);
				3223	blk_start_queue(q);
				3224	spin_unlock_irqrestore(q->queue_lock, flags);
				3225	}
				3226	}
				3227
				3228	/**
				3229	* scsi_internal_device_unblock_nowait - resume a device after a block request
				3230	* @sdev: device to resume
				3231	* @new_state: state to set the device to after unblocking
				3232	*
				3233	* Restart the device queue for a previously suspended SCSI device. Does not
				3234	* sleep.
				3235	*
				3236	* Returns zero if successful or a negative error code upon failure.
				3237	*
				3238	* Notes:
				3239	* This routine transitions the device to the SDEV_RUNNING state or to one of
				3240	* the offline states (which must be a legal transition) allowing the midlayer
				3241	* to goose the queue for this device.
				3242	*/
				3243	int scsi_internal_device_unblock_nowait(struct scsi_device *sdev,
				3244	enum scsi_device_state new_state)
				3245	{
				3246	/*
				3247	* Try to transition the scsi device to SDEV_RUNNING or one of the
				3248	* offlined states and goose the device queue if successful.
				3249	*/
				3250	switch (sdev->sdev_state) {
				3251	case SDEV_BLOCK:
				3252	case SDEV_TRANSPORT_OFFLINE:
				3253	sdev->sdev_state = new_state;
				3254	break;
				3255	case SDEV_CREATED_BLOCK:
				3256	if (new_state == SDEV_TRANSPORT_OFFLINE \|\|
				3257	new_state == SDEV_OFFLINE)
				3258	sdev->sdev_state = new_state;
				3259	else
				3260	sdev->sdev_state = SDEV_CREATED;
				3261	break;
				3262	case SDEV_CANCEL:
				3263	case SDEV_OFFLINE:
				3264	break;
				3265	default:
				3266	return -EINVAL;
				3267	}
				3268	scsi_start_queue(sdev);
				3269
				3270	return 0;
				3271	}
				3272	EXPORT_SYMBOL_GPL(scsi_internal_device_unblock_nowait);
				3273
				3274	/**
				3275	* scsi_internal_device_unblock - resume a device after a block request
				3276	* @sdev: device to resume
				3277	* @new_state: state to set the device to after unblocking
				3278	*
				3279	* Restart the device queue for a previously suspended SCSI device. May sleep.
				3280	*
				3281	* Returns zero if successful or a negative error code upon failure.
				3282	*
				3283	* Notes:
				3284	* This routine transitions the device to the SDEV_RUNNING state or to one of
				3285	* the offline states (which must be a legal transition) allowing the midlayer
				3286	* to goose the queue for this device.
				3287	*/
				3288	static int scsi_internal_device_unblock(struct scsi_device *sdev,
				3289	enum scsi_device_state new_state)
				3290	{
				3291	int ret;
				3292
				3293	mutex_lock(&sdev->state_mutex);
				3294	ret = scsi_internal_device_unblock_nowait(sdev, new_state);
				3295	mutex_unlock(&sdev->state_mutex);
				3296
				3297	return ret;
				3298	}
				3299
				3300	static void
				3301	device_block(struct scsi_device sdev, void data)
				3302	{
				3303	scsi_internal_device_block(sdev);
				3304	}
				3305
				3306	static int
				3307	target_block(struct device dev, void data)
				3308	{
				3309	if (scsi_is_target_device(dev))
				3310	starget_for_each_device(to_scsi_target(dev), NULL,
				3311	device_block);
				3312	return 0;
				3313	}
				3314
				3315	void
				3316	scsi_target_block(struct device *dev)
				3317	{
				3318	if (scsi_is_target_device(dev))
				3319	starget_for_each_device(to_scsi_target(dev), NULL,
				3320	device_block);
				3321	else
				3322	device_for_each_child(dev, NULL, target_block);
				3323	}
				3324	EXPORT_SYMBOL_GPL(scsi_target_block);
				3325
				3326	static void
				3327	device_unblock(struct scsi_device sdev, void data)
				3328	{
				3329	scsi_internal_device_unblock(sdev, (enum scsi_device_state )data);
				3330	}
				3331
				3332	static int
				3333	target_unblock(struct device dev, void data)
				3334	{
				3335	if (scsi_is_target_device(dev))
				3336	starget_for_each_device(to_scsi_target(dev), data,
				3337	device_unblock);
				3338	return 0;
				3339	}
				3340
				3341	void
				3342	scsi_target_unblock(struct device *dev, enum scsi_device_state new_state)
				3343	{
				3344	if (scsi_is_target_device(dev))
				3345	starget_for_each_device(to_scsi_target(dev), &new_state,
				3346	device_unblock);
				3347	else
				3348	device_for_each_child(dev, &new_state, target_unblock);
				3349	}
				3350	EXPORT_SYMBOL_GPL(scsi_target_unblock);
				3351
				3352	/**
				3353	* scsi_kmap_atomic_sg - find and atomically map an sg-elemnt
				3354	* @sgl: scatter-gather list
				3355	* @sg_count: number of segments in sg
				3356	* @offset: offset in bytes into sg, on return offset into the mapped area
				3357	* @len: bytes to map, on return number of bytes mapped
				3358	*
				3359	* Returns virtual address of the start of the mapped page
				3360	*/
				3361	void scsi_kmap_atomic_sg(struct scatterlist sgl, int sg_count,
				3362	size_t offset, size_t len)
				3363	{
				3364	int i;
				3365	size_t sg_len = 0, len_complete = 0;
				3366	struct scatterlist *sg;
				3367	struct page *page;
				3368
				3369	WARN_ON(!irqs_disabled());
				3370
				3371	for_each_sg(sgl, sg, sg_count, i) {
				3372	len_complete = sg_len; /* Complete sg-entries */
				3373	sg_len += sg->length;
				3374	if (sg_len > *offset)
				3375	break;
				3376	}
				3377
				3378	if (unlikely(i == sg_count)) {
				3379	printk(KERN_ERR "%s: Bytes in sg: %zu, requested offset %zu, "
				3380	"elements %d\n",
				3381	__func__, sg_len, *offset, sg_count);
				3382	WARN_ON(1);
				3383	return NULL;
				3384	}
				3385
				3386	/* Offset starting from the beginning of first page in this sg-entry */
				3387	offset = offset - len_complete + sg->offset;
				3388
				3389	/* Assumption: contiguous pages can be accessed as "page + i" */
				3390	page = nth_page(sg_page(sg), (*offset >> PAGE_SHIFT));
				3391	*offset &= ~PAGE_MASK;
				3392
				3393	/* Bytes in this sg-entry from offset to the end of the page /
				3394	sg_len = PAGE_SIZE - *offset;
				3395	if (*len > sg_len)
				3396	*len = sg_len;
				3397
				3398	return kmap_atomic(page);
				3399	}
				3400	EXPORT_SYMBOL(scsi_kmap_atomic_sg);
				3401
				3402	/**
				3403	* scsi_kunmap_atomic_sg - atomically unmap a virtual address, previously mapped with scsi_kmap_atomic_sg
				3404	* @virt: virtual address to be unmapped
				3405	*/
				3406	void scsi_kunmap_atomic_sg(void *virt)
				3407	{
				3408	kunmap_atomic(virt);
				3409	}
				3410	EXPORT_SYMBOL(scsi_kunmap_atomic_sg);
				3411
				3412	void sdev_disable_disk_events(struct scsi_device *sdev)
				3413	{
				3414	atomic_inc(&sdev->disk_events_disable_depth);
				3415	}
				3416	EXPORT_SYMBOL(sdev_disable_disk_events);
				3417
				3418	void sdev_enable_disk_events(struct scsi_device *sdev)
				3419	{
				3420	if (WARN_ON_ONCE(atomic_read(&sdev->disk_events_disable_depth) <= 0))
				3421	return;
				3422	atomic_dec(&sdev->disk_events_disable_depth);
				3423	}
				3424	EXPORT_SYMBOL(sdev_enable_disk_events);
				3425
				3426	/**
				3427	* scsi_vpd_lun_id - return a unique device identification
				3428	* @sdev: SCSI device
				3429	* @id: buffer for the identification
				3430	* @id_len: length of the buffer
				3431	*
				3432	* Copies a unique device identification into @id based
				3433	* on the information in the VPD page 0x83 of the device.
				3434	* The string will be formatted as a SCSI name string.
				3435	*
				3436	* Returns the length of the identification or error on failure.
				3437	* If the identifier is longer than the supplied buffer the actual
				3438	* identifier length is returned and the buffer is not zero-padded.
				3439	*/
				3440	int scsi_vpd_lun_id(struct scsi_device sdev, char id, size_t id_len)
				3441	{
				3442	u8 cur_id_type = 0xff;
				3443	u8 cur_id_size = 0;
				3444	const unsigned char d, cur_id_str;
				3445	const struct scsi_vpd *vpd_pg83;
				3446	int id_size = -EINVAL;
				3447
				3448	rcu_read_lock();
				3449	vpd_pg83 = rcu_dereference(sdev->vpd_pg83);
				3450	if (!vpd_pg83) {
				3451	rcu_read_unlock();
				3452	return -ENXIO;
				3453	}
				3454
				3455	/*
				3456	* Look for the correct descriptor.
				3457	* Order of preference for lun descriptor:
				3458	* - SCSI name string
				3459	* - NAA IEEE Registered Extended
				3460	* - EUI-64 based 16-byte
				3461	* - EUI-64 based 12-byte
				3462	* - NAA IEEE Registered
				3463	* - NAA IEEE Extended
				3464	* - T10 Vendor ID
				3465	* as longer descriptors reduce the likelyhood
				3466	* of identification clashes.
				3467	*/
				3468
				3469	/* The id string must be at least 20 bytes + terminating NULL byte */
				3470	if (id_len < 21) {
				3471	rcu_read_unlock();
				3472	return -EINVAL;
				3473	}
				3474
				3475	memset(id, 0, id_len);
				3476	d = vpd_pg83->data + 4;
				3477	while (d < vpd_pg83->data + vpd_pg83->len) {
				3478	/* Skip designators not referring to the LUN */
				3479	if ((d[1] & 0x30) != 0x00)
				3480	goto next_desig;
				3481
				3482	switch (d[1] & 0xf) {
				3483	case 0x1:
				3484	/* T10 Vendor ID */
				3485	if (cur_id_size > d[3])
				3486	break;
				3487	/* Prefer anything */
				3488	if (cur_id_type > 0x01 && cur_id_type != 0xff)
				3489	break;
				3490	cur_id_size = d[3];
				3491	if (cur_id_size + 4 > id_len)
				3492	cur_id_size = id_len - 4;
				3493	cur_id_str = d + 4;
				3494	cur_id_type = d[1] & 0xf;
				3495	id_size = snprintf(id, id_len, "t10.%*pE",
				3496	cur_id_size, cur_id_str);
				3497	break;
				3498	case 0x2:
				3499	/* EUI-64 */
				3500	if (cur_id_size > d[3])
				3501	break;
				3502	/* Prefer NAA IEEE Registered Extended */
				3503	if (cur_id_type == 0x3 &&
				3504	cur_id_size == d[3])
				3505	break;
				3506	cur_id_size = d[3];
				3507	cur_id_str = d + 4;
				3508	cur_id_type = d[1] & 0xf;
				3509	switch (cur_id_size) {
				3510	case 8:
				3511	id_size = snprintf(id, id_len,
				3512	"eui.%8phN",
				3513	cur_id_str);
				3514	break;
				3515	case 12:
				3516	id_size = snprintf(id, id_len,
				3517	"eui.%12phN",
				3518	cur_id_str);
				3519	break;
				3520	case 16:
				3521	id_size = snprintf(id, id_len,
				3522	"eui.%16phN",
				3523	cur_id_str);
				3524	break;
				3525	default:
				3526	cur_id_size = 0;
				3527	break;
				3528	}
				3529	break;
				3530	case 0x3:
				3531	/* NAA */
				3532	if (cur_id_size > d[3])
				3533	break;
				3534	cur_id_size = d[3];
				3535	cur_id_str = d + 4;
				3536	cur_id_type = d[1] & 0xf;
				3537	switch (cur_id_size) {
				3538	case 8:
				3539	id_size = snprintf(id, id_len,
				3540	"naa.%8phN",
				3541	cur_id_str);
				3542	break;
				3543	case 16:
				3544	id_size = snprintf(id, id_len,
				3545	"naa.%16phN",
				3546	cur_id_str);
				3547	break;
				3548	default:
				3549	cur_id_size = 0;
				3550	break;
				3551	}
				3552	break;
				3553	case 0x8:
				3554	/* SCSI name string */
				3555	if (cur_id_size + 4 > d[3])
				3556	break;
				3557	/* Prefer others for truncated descriptor */
				3558	if (cur_id_size && d[3] > id_len)
				3559	break;
				3560	cur_id_size = id_size = d[3];
				3561	cur_id_str = d + 4;
				3562	cur_id_type = d[1] & 0xf;
				3563	if (cur_id_size >= id_len)
				3564	cur_id_size = id_len - 1;
				3565	memcpy(id, cur_id_str, cur_id_size);
				3566	/* Decrease priority for truncated descriptor */
				3567	if (cur_id_size != id_size)
				3568	cur_id_size = 6;
				3569	break;
				3570	default:
				3571	break;
				3572	}
				3573	next_desig:
				3574	d += d[3] + 4;
				3575	}
				3576	rcu_read_unlock();
				3577
				3578	return id_size;
				3579	}
				3580	EXPORT_SYMBOL(scsi_vpd_lun_id);
				3581
				3582	/*
				3583	* scsi_vpd_tpg_id - return a target port group identifier
				3584	* @sdev: SCSI device
				3585	*
				3586	* Returns the Target Port Group identifier from the information
				3587	* froom VPD page 0x83 of the device.
				3588	*
				3589	* Returns the identifier or error on failure.
				3590	*/
				3591	int scsi_vpd_tpg_id(struct scsi_device sdev, int rel_id)
				3592	{
				3593	const unsigned char *d;
				3594	const struct scsi_vpd *vpd_pg83;
				3595	int group_id = -EAGAIN, rel_port = -1;
				3596
				3597	rcu_read_lock();
				3598	vpd_pg83 = rcu_dereference(sdev->vpd_pg83);
				3599	if (!vpd_pg83) {
				3600	rcu_read_unlock();
				3601	return -ENXIO;
				3602	}
				3603
				3604	d = vpd_pg83->data + 4;
				3605	while (d < vpd_pg83->data + vpd_pg83->len) {
				3606	switch (d[1] & 0xf) {
				3607	case 0x4:
				3608	/* Relative target port */
				3609	rel_port = get_unaligned_be16(&d[6]);
				3610	break;
				3611	case 0x5:
				3612	/* Target port group */
				3613	group_id = get_unaligned_be16(&d[6]);
				3614	break;
				3615	default:
				3616	break;
				3617	}
				3618	d += d[3] + 4;
				3619	}
				3620	rcu_read_unlock();
				3621
				3622	if (group_id >= 0 && rel_id && rel_port != -1)
				3623	*rel_id = rel_port;
				3624
				3625	return group_id;
				3626	}
				3627	EXPORT_SYMBOL(scsi_vpd_tpg_id);