Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 1 | /* SPDX-License-Identifier: GPL-2.0 */ |
| 2 | #ifndef BLK_MQ_H |
| 3 | #define BLK_MQ_H |
| 4 | |
| 5 | #include <linux/blkdev.h> |
| 6 | #include <linux/sbitmap.h> |
| 7 | #include <linux/srcu.h> |
| 8 | |
| 9 | struct blk_mq_tags; |
| 10 | struct blk_flush_queue; |
| 11 | |
| 12 | /** |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 13 | * struct blk_mq_hw_ctx - State for a hardware queue facing the hardware |
| 14 | * block device |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 15 | */ |
| 16 | struct blk_mq_hw_ctx { |
| 17 | struct { |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 18 | /** @lock: Protects the dispatch list. */ |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 19 | spinlock_t lock; |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 20 | /** |
| 21 | * @dispatch: Used for requests that are ready to be |
| 22 | * dispatched to the hardware but for some reason (e.g. lack of |
| 23 | * resources) could not be sent to the hardware. As soon as the |
| 24 | * driver can send new requests, requests at this list will |
| 25 | * be sent first for a fairer dispatch. |
| 26 | */ |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 27 | struct list_head dispatch; |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 28 | /** |
| 29 | * @state: BLK_MQ_S_* flags. Defines the state of the hw |
| 30 | * queue (active, scheduled to restart, stopped). |
| 31 | */ |
| 32 | unsigned long state; |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 33 | } ____cacheline_aligned_in_smp; |
| 34 | |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 35 | /** |
| 36 | * @run_work: Used for scheduling a hardware queue run at a later time. |
| 37 | */ |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 38 | struct delayed_work run_work; |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 39 | /** @cpumask: Map of available CPUs where this hctx can run. */ |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 40 | cpumask_var_t cpumask; |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 41 | /** |
| 42 | * @next_cpu: Used by blk_mq_hctx_next_cpu() for round-robin CPU |
| 43 | * selection from @cpumask. |
| 44 | */ |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 45 | int next_cpu; |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 46 | /** |
| 47 | * @next_cpu_batch: Counter of how many works left in the batch before |
| 48 | * changing to the next CPU. |
| 49 | */ |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 50 | int next_cpu_batch; |
| 51 | |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 52 | /** @flags: BLK_MQ_F_* flags. Defines the behaviour of the queue. */ |
| 53 | unsigned long flags; |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 54 | |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 55 | /** |
| 56 | * @sched_data: Pointer owned by the IO scheduler attached to a request |
| 57 | * queue. It's up to the IO scheduler how to use this pointer. |
| 58 | */ |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 59 | void *sched_data; |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 60 | /** |
| 61 | * @queue: Pointer to the request queue that owns this hardware context. |
| 62 | */ |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 63 | struct request_queue *queue; |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 64 | /** @fq: Queue of requests that need to perform a flush operation. */ |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 65 | struct blk_flush_queue *fq; |
| 66 | |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 67 | /** |
| 68 | * @driver_data: Pointer to data owned by the block driver that created |
| 69 | * this hctx |
| 70 | */ |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 71 | void *driver_data; |
| 72 | |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 73 | /** |
| 74 | * @ctx_map: Bitmap for each software queue. If bit is on, there is a |
| 75 | * pending request in that software queue. |
| 76 | */ |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 77 | struct sbitmap ctx_map; |
| 78 | |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 79 | /** |
| 80 | * @dispatch_from: Software queue to be used when no scheduler was |
| 81 | * selected. |
| 82 | */ |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 83 | struct blk_mq_ctx *dispatch_from; |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 84 | /** |
| 85 | * @dispatch_busy: Number used by blk_mq_update_dispatch_busy() to |
| 86 | * decide if the hw_queue is busy using Exponential Weighted Moving |
| 87 | * Average algorithm. |
| 88 | */ |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 89 | unsigned int dispatch_busy; |
| 90 | |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 91 | /** @type: HCTX_TYPE_* flags. Type of hardware queue. */ |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame] | 92 | unsigned short type; |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 93 | /** @nr_ctx: Number of software queues. */ |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame] | 94 | unsigned short nr_ctx; |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 95 | /** @ctxs: Array of software queues. */ |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 96 | struct blk_mq_ctx **ctxs; |
| 97 | |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 98 | /** @dispatch_wait_lock: Lock for dispatch_wait queue. */ |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 99 | spinlock_t dispatch_wait_lock; |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 100 | /** |
| 101 | * @dispatch_wait: Waitqueue to put requests when there is no tag |
| 102 | * available at the moment, to wait for another try in the future. |
| 103 | */ |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 104 | wait_queue_entry_t dispatch_wait; |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 105 | |
| 106 | /** |
| 107 | * @wait_index: Index of next available dispatch_wait queue to insert |
| 108 | * requests. |
| 109 | */ |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 110 | atomic_t wait_index; |
| 111 | |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 112 | /** |
| 113 | * @tags: Tags owned by the block driver. A tag at this set is only |
| 114 | * assigned when a request is dispatched from a hardware queue. |
| 115 | */ |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 116 | struct blk_mq_tags *tags; |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 117 | /** |
| 118 | * @sched_tags: Tags owned by I/O scheduler. If there is an I/O |
| 119 | * scheduler associated with a request queue, a tag is assigned when |
| 120 | * that request is allocated. Else, this member is not used. |
| 121 | */ |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 122 | struct blk_mq_tags *sched_tags; |
| 123 | |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 124 | /** @queued: Number of queued requests. */ |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 125 | unsigned long queued; |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 126 | /** @run: Number of dispatched requests. */ |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 127 | unsigned long run; |
| 128 | #define BLK_MQ_MAX_DISPATCH_ORDER 7 |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 129 | /** @dispatched: Number of dispatch requests by queue. */ |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 130 | unsigned long dispatched[BLK_MQ_MAX_DISPATCH_ORDER]; |
| 131 | |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 132 | /** @numa_node: NUMA node the storage adapter has been connected to. */ |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 133 | unsigned int numa_node; |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 134 | /** @queue_num: Index of this hardware queue. */ |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 135 | unsigned int queue_num; |
| 136 | |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 137 | /** |
| 138 | * @nr_active: Number of active requests. Only used when a tag set is |
| 139 | * shared across request queues. |
| 140 | */ |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 141 | atomic_t nr_active; |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 142 | /** |
| 143 | * @elevator_queued: Number of queued requests on hctx. |
| 144 | */ |
| 145 | atomic_t elevator_queued; |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 146 | |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 147 | /** @cpuhp_online: List to store request if CPU is going to die */ |
| 148 | struct hlist_node cpuhp_online; |
| 149 | /** @cpuhp_dead: List to store request if some CPU die. */ |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 150 | struct hlist_node cpuhp_dead; |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 151 | /** @kobj: Kernel object for sysfs. */ |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 152 | struct kobject kobj; |
| 153 | |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 154 | /** @poll_considered: Count times blk_poll() was called. */ |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 155 | unsigned long poll_considered; |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 156 | /** @poll_invoked: Count how many requests blk_poll() polled. */ |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 157 | unsigned long poll_invoked; |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 158 | /** @poll_success: Count how many polled requests were completed. */ |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 159 | unsigned long poll_success; |
| 160 | |
| 161 | #ifdef CONFIG_BLK_DEBUG_FS |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 162 | /** |
| 163 | * @debugfs_dir: debugfs directory for this hardware queue. Named |
| 164 | * as cpu<cpu_number>. |
| 165 | */ |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 166 | struct dentry *debugfs_dir; |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 167 | /** @sched_debugfs_dir: debugfs directory for the scheduler. */ |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 168 | struct dentry *sched_debugfs_dir; |
| 169 | #endif |
| 170 | |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 171 | /** |
| 172 | * @hctx_list: if this hctx is not in use, this is an entry in |
| 173 | * q->unused_hctx_list. |
| 174 | */ |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame] | 175 | struct list_head hctx_list; |
| 176 | |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 177 | /** |
| 178 | * @srcu: Sleepable RCU. Use as lock when type of the hardware queue is |
| 179 | * blocking (BLK_MQ_F_BLOCKING). Must be the last member - see also |
| 180 | * blk_mq_hw_ctx_size(). |
| 181 | */ |
| 182 | struct srcu_struct srcu[]; |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 183 | }; |
| 184 | |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 185 | /** |
| 186 | * struct blk_mq_queue_map - Map software queues to hardware queues |
| 187 | * @mq_map: CPU ID to hardware queue index map. This is an array |
| 188 | * with nr_cpu_ids elements. Each element has a value in the range |
| 189 | * [@queue_offset, @queue_offset + @nr_queues). |
| 190 | * @nr_queues: Number of hardware queues to map CPU IDs onto. |
| 191 | * @queue_offset: First hardware queue to map onto. Used by the PCIe NVMe |
| 192 | * driver to map each hardware queue type (enum hctx_type) onto a distinct |
| 193 | * set of hardware queues. |
| 194 | */ |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame] | 195 | struct blk_mq_queue_map { |
| 196 | unsigned int *mq_map; |
| 197 | unsigned int nr_queues; |
| 198 | unsigned int queue_offset; |
| 199 | }; |
| 200 | |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 201 | /** |
| 202 | * enum hctx_type - Type of hardware queue |
| 203 | * @HCTX_TYPE_DEFAULT: All I/O not otherwise accounted for. |
| 204 | * @HCTX_TYPE_READ: Just for READ I/O. |
| 205 | * @HCTX_TYPE_POLL: Polled I/O of any kind. |
| 206 | * @HCTX_MAX_TYPES: Number of types of hctx. |
| 207 | */ |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame] | 208 | enum hctx_type { |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 209 | HCTX_TYPE_DEFAULT, |
| 210 | HCTX_TYPE_READ, |
| 211 | HCTX_TYPE_POLL, |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame] | 212 | |
| 213 | HCTX_MAX_TYPES, |
| 214 | }; |
| 215 | |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 216 | /** |
| 217 | * struct blk_mq_tag_set - tag set that can be shared between request queues |
| 218 | * @map: One or more ctx -> hctx mappings. One map exists for each |
| 219 | * hardware queue type (enum hctx_type) that the driver wishes |
| 220 | * to support. There are no restrictions on maps being of the |
| 221 | * same size, and it's perfectly legal to share maps between |
| 222 | * types. |
| 223 | * @nr_maps: Number of elements in the @map array. A number in the range |
| 224 | * [1, HCTX_MAX_TYPES]. |
| 225 | * @ops: Pointers to functions that implement block driver behavior. |
| 226 | * @nr_hw_queues: Number of hardware queues supported by the block driver that |
| 227 | * owns this data structure. |
| 228 | * @queue_depth: Number of tags per hardware queue, reserved tags included. |
| 229 | * @reserved_tags: Number of tags to set aside for BLK_MQ_REQ_RESERVED tag |
| 230 | * allocations. |
| 231 | * @cmd_size: Number of additional bytes to allocate per request. The block |
| 232 | * driver owns these additional bytes. |
| 233 | * @numa_node: NUMA node the storage adapter has been connected to. |
| 234 | * @timeout: Request processing timeout in jiffies. |
| 235 | * @flags: Zero or more BLK_MQ_F_* flags. |
| 236 | * @driver_data: Pointer to data owned by the block driver that created this |
| 237 | * tag set. |
| 238 | * @active_queues_shared_sbitmap: |
| 239 | * number of active request queues per tag set. |
| 240 | * @__bitmap_tags: A shared tags sbitmap, used over all hctx's |
| 241 | * @__breserved_tags: |
| 242 | * A shared reserved tags sbitmap, used over all hctx's |
| 243 | * @tags: Tag sets. One tag set per hardware queue. Has @nr_hw_queues |
| 244 | * elements. |
| 245 | * @tag_list_lock: Serializes tag_list accesses. |
| 246 | * @tag_list: List of the request queues that use this tag set. See also |
| 247 | * request_queue.tag_set_list. |
| 248 | */ |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 249 | struct blk_mq_tag_set { |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame] | 250 | struct blk_mq_queue_map map[HCTX_MAX_TYPES]; |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 251 | unsigned int nr_maps; |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 252 | const struct blk_mq_ops *ops; |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 253 | unsigned int nr_hw_queues; |
| 254 | unsigned int queue_depth; |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 255 | unsigned int reserved_tags; |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 256 | unsigned int cmd_size; |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 257 | int numa_node; |
| 258 | unsigned int timeout; |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 259 | unsigned int flags; |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 260 | void *driver_data; |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 261 | atomic_t active_queues_shared_sbitmap; |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 262 | |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 263 | struct sbitmap_queue __bitmap_tags; |
| 264 | struct sbitmap_queue __breserved_tags; |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 265 | struct blk_mq_tags **tags; |
| 266 | |
| 267 | struct mutex tag_list_lock; |
| 268 | struct list_head tag_list; |
| 269 | }; |
| 270 | |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 271 | /** |
| 272 | * struct blk_mq_queue_data - Data about a request inserted in a queue |
| 273 | * |
| 274 | * @rq: Request pointer. |
| 275 | * @last: If it is the last request in the queue. |
| 276 | */ |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 277 | struct blk_mq_queue_data { |
| 278 | struct request *rq; |
| 279 | bool last; |
| 280 | }; |
| 281 | |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame] | 282 | typedef bool (busy_iter_fn)(struct blk_mq_hw_ctx *, struct request *, void *, |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 283 | bool); |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame] | 284 | typedef bool (busy_tag_iter_fn)(struct request *, void *, bool); |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 285 | |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 286 | /** |
| 287 | * struct blk_mq_ops - Callback functions that implements block driver |
| 288 | * behaviour. |
| 289 | */ |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 290 | struct blk_mq_ops { |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 291 | /** |
| 292 | * @queue_rq: Queue a new request from block IO. |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 293 | */ |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 294 | blk_status_t (*queue_rq)(struct blk_mq_hw_ctx *, |
| 295 | const struct blk_mq_queue_data *); |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 296 | |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 297 | /** |
| 298 | * @commit_rqs: If a driver uses bd->last to judge when to submit |
| 299 | * requests to hardware, it must define this function. In case of errors |
| 300 | * that make us stop issuing further requests, this hook serves the |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame] | 301 | * purpose of kicking the hardware (which the last request otherwise |
| 302 | * would have done). |
| 303 | */ |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 304 | void (*commit_rqs)(struct blk_mq_hw_ctx *); |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame] | 305 | |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 306 | /** |
| 307 | * @get_budget: Reserve budget before queue request, once .queue_rq is |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 308 | * run, it is driver's responsibility to release the |
| 309 | * reserved budget. Also we have to handle failure case |
| 310 | * of .get_budget for avoiding I/O deadlock. |
| 311 | */ |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 312 | bool (*get_budget)(struct request_queue *); |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 313 | |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 314 | /** |
| 315 | * @put_budget: Release the reserved budget. |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 316 | */ |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 317 | void (*put_budget)(struct request_queue *); |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 318 | |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 319 | /** |
| 320 | * @timeout: Called on request timeout. |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 321 | */ |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 322 | enum blk_eh_timer_return (*timeout)(struct request *, bool); |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 323 | |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 324 | /** |
| 325 | * @poll: Called to poll for completion of a specific tag. |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 326 | */ |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 327 | int (*poll)(struct blk_mq_hw_ctx *); |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 328 | |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 329 | /** |
| 330 | * @complete: Mark the request as complete. |
| 331 | */ |
| 332 | void (*complete)(struct request *); |
| 333 | |
| 334 | /** |
| 335 | * @init_hctx: Called when the block layer side of a hardware queue has |
| 336 | * been set up, allowing the driver to allocate/init matching |
| 337 | * structures. |
| 338 | */ |
| 339 | int (*init_hctx)(struct blk_mq_hw_ctx *, void *, unsigned int); |
| 340 | /** |
| 341 | * @exit_hctx: Ditto for exit/teardown. |
| 342 | */ |
| 343 | void (*exit_hctx)(struct blk_mq_hw_ctx *, unsigned int); |
| 344 | |
| 345 | /** |
| 346 | * @init_request: Called for every command allocated by the block layer |
| 347 | * to allow the driver to set up driver specific data. |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 348 | * |
| 349 | * Tag greater than or equal to queue_depth is for setting up |
| 350 | * flush request. |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 351 | */ |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 352 | int (*init_request)(struct blk_mq_tag_set *set, struct request *, |
| 353 | unsigned int, unsigned int); |
| 354 | /** |
| 355 | * @exit_request: Ditto for exit/teardown. |
| 356 | */ |
| 357 | void (*exit_request)(struct blk_mq_tag_set *set, struct request *, |
| 358 | unsigned int); |
| 359 | |
| 360 | /** |
| 361 | * @initialize_rq_fn: Called from inside blk_get_request(). |
| 362 | */ |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 363 | void (*initialize_rq_fn)(struct request *rq); |
| 364 | |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 365 | /** |
| 366 | * @cleanup_rq: Called before freeing one request which isn't completed |
| 367 | * yet, and usually for freeing the driver private data. |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame] | 368 | */ |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 369 | void (*cleanup_rq)(struct request *); |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame] | 370 | |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 371 | /** |
| 372 | * @busy: If set, returns whether or not this queue currently is busy. |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame] | 373 | */ |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 374 | bool (*busy)(struct request_queue *); |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame] | 375 | |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 376 | /** |
| 377 | * @map_queues: This allows drivers specify their own queue mapping by |
| 378 | * overriding the setup-time function that builds the mq_map. |
| 379 | */ |
| 380 | int (*map_queues)(struct blk_mq_tag_set *set); |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 381 | |
| 382 | #ifdef CONFIG_BLK_DEBUG_FS |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 383 | /** |
| 384 | * @show_rq: Used by the debugfs implementation to show driver-specific |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 385 | * information about a request. |
| 386 | */ |
| 387 | void (*show_rq)(struct seq_file *m, struct request *rq); |
| 388 | #endif |
| 389 | }; |
| 390 | |
| 391 | enum { |
| 392 | BLK_MQ_F_SHOULD_MERGE = 1 << 0, |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 393 | BLK_MQ_F_TAG_QUEUE_SHARED = 1 << 1, |
| 394 | /* |
| 395 | * Set when this device requires underlying blk-mq device for |
| 396 | * completing IO: |
| 397 | */ |
| 398 | BLK_MQ_F_STACKING = 1 << 2, |
| 399 | BLK_MQ_F_TAG_HCTX_SHARED = 1 << 3, |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 400 | BLK_MQ_F_BLOCKING = 1 << 5, |
| 401 | BLK_MQ_F_NO_SCHED = 1 << 6, |
| 402 | BLK_MQ_F_ALLOC_POLICY_START_BIT = 8, |
| 403 | BLK_MQ_F_ALLOC_POLICY_BITS = 1, |
| 404 | |
| 405 | BLK_MQ_S_STOPPED = 0, |
| 406 | BLK_MQ_S_TAG_ACTIVE = 1, |
| 407 | BLK_MQ_S_SCHED_RESTART = 2, |
| 408 | |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 409 | /* hw queue is inactive after all its CPUs become offline */ |
| 410 | BLK_MQ_S_INACTIVE = 3, |
| 411 | |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 412 | BLK_MQ_MAX_DEPTH = 10240, |
| 413 | |
| 414 | BLK_MQ_CPU_WORK_BATCH = 8, |
| 415 | }; |
| 416 | #define BLK_MQ_FLAG_TO_ALLOC_POLICY(flags) \ |
| 417 | ((flags >> BLK_MQ_F_ALLOC_POLICY_START_BIT) & \ |
| 418 | ((1 << BLK_MQ_F_ALLOC_POLICY_BITS) - 1)) |
| 419 | #define BLK_ALLOC_POLICY_TO_MQ_FLAG(policy) \ |
| 420 | ((policy & ((1 << BLK_MQ_F_ALLOC_POLICY_BITS) - 1)) \ |
| 421 | << BLK_MQ_F_ALLOC_POLICY_START_BIT) |
| 422 | |
| 423 | struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *); |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 424 | struct request_queue *blk_mq_init_queue_data(struct blk_mq_tag_set *set, |
| 425 | void *queuedata); |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 426 | struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set, |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame] | 427 | struct request_queue *q, |
| 428 | bool elevator_init); |
| 429 | struct request_queue *blk_mq_init_sq_queue(struct blk_mq_tag_set *set, |
| 430 | const struct blk_mq_ops *ops, |
| 431 | unsigned int queue_depth, |
| 432 | unsigned int set_flags); |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 433 | void blk_mq_unregister_dev(struct device *, struct request_queue *); |
| 434 | |
| 435 | int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set); |
| 436 | void blk_mq_free_tag_set(struct blk_mq_tag_set *set); |
| 437 | |
| 438 | void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule); |
| 439 | |
| 440 | void blk_mq_free_request(struct request *rq); |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 441 | |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame] | 442 | bool blk_mq_queue_inflight(struct request_queue *q); |
| 443 | |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 444 | enum { |
| 445 | /* return when out of requests */ |
| 446 | BLK_MQ_REQ_NOWAIT = (__force blk_mq_req_flags_t)(1 << 0), |
| 447 | /* allocate from reserved pool */ |
| 448 | BLK_MQ_REQ_RESERVED = (__force blk_mq_req_flags_t)(1 << 1), |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 449 | /* set RQF_PM */ |
| 450 | BLK_MQ_REQ_PM = (__force blk_mq_req_flags_t)(1 << 2), |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 451 | }; |
| 452 | |
| 453 | struct request *blk_mq_alloc_request(struct request_queue *q, unsigned int op, |
| 454 | blk_mq_req_flags_t flags); |
| 455 | struct request *blk_mq_alloc_request_hctx(struct request_queue *q, |
| 456 | unsigned int op, blk_mq_req_flags_t flags, |
| 457 | unsigned int hctx_idx); |
| 458 | struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag); |
| 459 | |
| 460 | enum { |
| 461 | BLK_MQ_UNIQUE_TAG_BITS = 16, |
| 462 | BLK_MQ_UNIQUE_TAG_MASK = (1 << BLK_MQ_UNIQUE_TAG_BITS) - 1, |
| 463 | }; |
| 464 | |
| 465 | u32 blk_mq_unique_tag(struct request *rq); |
| 466 | |
| 467 | static inline u16 blk_mq_unique_tag_to_hwq(u32 unique_tag) |
| 468 | { |
| 469 | return unique_tag >> BLK_MQ_UNIQUE_TAG_BITS; |
| 470 | } |
| 471 | |
| 472 | static inline u16 blk_mq_unique_tag_to_tag(u32 unique_tag) |
| 473 | { |
| 474 | return unique_tag & BLK_MQ_UNIQUE_TAG_MASK; |
| 475 | } |
| 476 | |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 477 | /** |
| 478 | * blk_mq_rq_state() - read the current MQ_RQ_* state of a request |
| 479 | * @rq: target request. |
| 480 | */ |
| 481 | static inline enum mq_rq_state blk_mq_rq_state(struct request *rq) |
| 482 | { |
| 483 | return READ_ONCE(rq->state); |
| 484 | } |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 485 | |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 486 | static inline int blk_mq_request_started(struct request *rq) |
| 487 | { |
| 488 | return blk_mq_rq_state(rq) != MQ_RQ_IDLE; |
| 489 | } |
| 490 | |
| 491 | static inline int blk_mq_request_completed(struct request *rq) |
| 492 | { |
| 493 | return blk_mq_rq_state(rq) == MQ_RQ_COMPLETE; |
| 494 | } |
| 495 | |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 496 | void blk_mq_start_request(struct request *rq); |
| 497 | void blk_mq_end_request(struct request *rq, blk_status_t error); |
| 498 | void __blk_mq_end_request(struct request *rq, blk_status_t error); |
| 499 | |
| 500 | void blk_mq_requeue_request(struct request *rq, bool kick_requeue_list); |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 501 | void blk_mq_kick_requeue_list(struct request_queue *q); |
| 502 | void blk_mq_delay_kick_requeue_list(struct request_queue *q, unsigned long msecs); |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 503 | void blk_mq_complete_request(struct request *rq); |
| 504 | bool blk_mq_complete_request_remote(struct request *rq); |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 505 | bool blk_mq_queue_stopped(struct request_queue *q); |
| 506 | void blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx); |
| 507 | void blk_mq_start_hw_queue(struct blk_mq_hw_ctx *hctx); |
| 508 | void blk_mq_stop_hw_queues(struct request_queue *q); |
| 509 | void blk_mq_start_hw_queues(struct request_queue *q); |
| 510 | void blk_mq_start_stopped_hw_queue(struct blk_mq_hw_ctx *hctx, bool async); |
| 511 | void blk_mq_start_stopped_hw_queues(struct request_queue *q, bool async); |
| 512 | void blk_mq_quiesce_queue(struct request_queue *q); |
| 513 | void blk_mq_unquiesce_queue(struct request_queue *q); |
| 514 | void blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs); |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 515 | void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async); |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 516 | void blk_mq_run_hw_queues(struct request_queue *q, bool async); |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 517 | void blk_mq_delay_run_hw_queues(struct request_queue *q, unsigned long msecs); |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 518 | void blk_mq_tagset_busy_iter(struct blk_mq_tag_set *tagset, |
| 519 | busy_tag_iter_fn *fn, void *priv); |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame] | 520 | void blk_mq_tagset_wait_completed_request(struct blk_mq_tag_set *tagset); |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 521 | void blk_mq_freeze_queue(struct request_queue *q); |
| 522 | void blk_mq_unfreeze_queue(struct request_queue *q); |
| 523 | void blk_freeze_queue_start(struct request_queue *q); |
| 524 | void blk_mq_freeze_queue_wait(struct request_queue *q); |
| 525 | int blk_mq_freeze_queue_wait_timeout(struct request_queue *q, |
| 526 | unsigned long timeout); |
| 527 | |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame] | 528 | int blk_mq_map_queues(struct blk_mq_queue_map *qmap); |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 529 | void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues); |
| 530 | |
| 531 | void blk_mq_quiesce_queue_nowait(struct request_queue *q); |
| 532 | |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame] | 533 | unsigned int blk_mq_rq_cpu(struct request *rq); |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 534 | |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 535 | bool __blk_should_fake_timeout(struct request_queue *q); |
| 536 | static inline bool blk_should_fake_timeout(struct request_queue *q) |
| 537 | { |
| 538 | if (IS_ENABLED(CONFIG_FAIL_IO_TIMEOUT) && |
| 539 | test_bit(QUEUE_FLAG_FAIL_IO, &q->queue_flags)) |
| 540 | return __blk_should_fake_timeout(q); |
| 541 | return false; |
| 542 | } |
| 543 | |
| 544 | /** |
| 545 | * blk_mq_rq_from_pdu - cast a PDU to a request |
| 546 | * @pdu: the PDU (Protocol Data Unit) to be casted |
| 547 | * |
| 548 | * Return: request |
| 549 | * |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 550 | * Driver command data is immediately after the request. So subtract request |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 551 | * size to get back to the original request. |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 552 | */ |
| 553 | static inline struct request *blk_mq_rq_from_pdu(void *pdu) |
| 554 | { |
| 555 | return pdu - sizeof(struct request); |
| 556 | } |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 557 | |
| 558 | /** |
| 559 | * blk_mq_rq_to_pdu - cast a request to a PDU |
| 560 | * @rq: the request to be casted |
| 561 | * |
| 562 | * Return: pointer to the PDU |
| 563 | * |
| 564 | * Driver command data is immediately after the request. So add request to get |
| 565 | * the PDU. |
| 566 | */ |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 567 | static inline void *blk_mq_rq_to_pdu(struct request *rq) |
| 568 | { |
| 569 | return rq + 1; |
| 570 | } |
| 571 | |
| 572 | #define queue_for_each_hw_ctx(q, hctx, i) \ |
| 573 | for ((i) = 0; (i) < (q)->nr_hw_queues && \ |
| 574 | ({ hctx = (q)->queue_hw_ctx[i]; 1; }); (i)++) |
| 575 | |
| 576 | #define hctx_for_each_ctx(hctx, ctx, i) \ |
| 577 | for ((i) = 0; (i) < (hctx)->nr_ctx && \ |
| 578 | ({ ctx = (hctx)->ctxs[(i)]; 1; }); (i)++) |
| 579 | |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame] | 580 | static inline blk_qc_t request_to_qc_t(struct blk_mq_hw_ctx *hctx, |
| 581 | struct request *rq) |
| 582 | { |
| 583 | if (rq->tag != -1) |
| 584 | return rq->tag | (hctx->queue_num << BLK_QC_T_SHIFT); |
| 585 | |
| 586 | return rq->internal_tag | (hctx->queue_num << BLK_QC_T_SHIFT) | |
| 587 | BLK_QC_T_INTERNAL; |
| 588 | } |
| 589 | |
| 590 | static inline void blk_mq_cleanup_rq(struct request *rq) |
| 591 | { |
| 592 | if (rq->q->mq_ops->cleanup_rq) |
| 593 | rq->q->mq_ops->cleanup_rq(rq); |
| 594 | } |
| 595 | |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 596 | blk_qc_t blk_mq_submit_bio(struct bio *bio); |
| 597 | |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 598 | #endif |