Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 1 | /* SPDX-License-Identifier: GPL-2.0 */ |
| 2 | |
| 3 | #ifndef BTRFS_EXTENT_IO_H |
| 4 | #define BTRFS_EXTENT_IO_H |
| 5 | |
| 6 | #include <linux/rbtree.h> |
| 7 | #include <linux/refcount.h> |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 8 | #include <linux/fiemap.h> |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 9 | #include "ulist.h" |
| 10 | |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 11 | /* |
| 12 | * flags for bio submission. The high bits indicate the compression |
| 13 | * type for this bio |
| 14 | */ |
| 15 | #define EXTENT_BIO_COMPRESSED 1 |
| 16 | #define EXTENT_BIO_FLAG_SHIFT 16 |
| 17 | |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame] | 18 | enum { |
| 19 | EXTENT_BUFFER_UPTODATE, |
| 20 | EXTENT_BUFFER_DIRTY, |
| 21 | EXTENT_BUFFER_CORRUPT, |
| 22 | /* this got triggered by readahead */ |
| 23 | EXTENT_BUFFER_READAHEAD, |
| 24 | EXTENT_BUFFER_TREE_REF, |
| 25 | EXTENT_BUFFER_STALE, |
| 26 | EXTENT_BUFFER_WRITEBACK, |
| 27 | /* read IO error */ |
| 28 | EXTENT_BUFFER_READ_ERR, |
| 29 | EXTENT_BUFFER_UNMAPPED, |
| 30 | EXTENT_BUFFER_IN_TREE, |
| 31 | /* write IO error */ |
| 32 | EXTENT_BUFFER_WRITE_ERR, |
| 33 | }; |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 34 | |
| 35 | /* these are flags for __process_pages_contig */ |
| 36 | #define PAGE_UNLOCK (1 << 0) |
| 37 | #define PAGE_CLEAR_DIRTY (1 << 1) |
| 38 | #define PAGE_SET_WRITEBACK (1 << 2) |
| 39 | #define PAGE_END_WRITEBACK (1 << 3) |
| 40 | #define PAGE_SET_PRIVATE2 (1 << 4) |
| 41 | #define PAGE_SET_ERROR (1 << 5) |
| 42 | #define PAGE_LOCK (1 << 6) |
| 43 | |
| 44 | /* |
| 45 | * page->private values. Every page that is controlled by the extent |
| 46 | * map has page->private set to one. |
| 47 | */ |
| 48 | #define EXTENT_PAGE_PRIVATE 1 |
| 49 | |
| 50 | /* |
| 51 | * The extent buffer bitmap operations are done with byte granularity instead of |
| 52 | * word granularity for two reasons: |
| 53 | * 1. The bitmaps must be little-endian on disk. |
| 54 | * 2. Bitmap items are not guaranteed to be aligned to a word and therefore a |
| 55 | * single word in a bitmap may straddle two pages in the extent buffer. |
| 56 | */ |
| 57 | #define BIT_BYTE(nr) ((nr) / BITS_PER_BYTE) |
| 58 | #define BYTE_MASK ((1 << BITS_PER_BYTE) - 1) |
| 59 | #define BITMAP_FIRST_BYTE_MASK(start) \ |
| 60 | ((BYTE_MASK << ((start) & (BITS_PER_BYTE - 1))) & BYTE_MASK) |
| 61 | #define BITMAP_LAST_BYTE_MASK(nbits) \ |
| 62 | (BYTE_MASK >> (-(nbits) & (BITS_PER_BYTE - 1))) |
| 63 | |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 64 | struct btrfs_root; |
| 65 | struct btrfs_inode; |
| 66 | struct btrfs_io_bio; |
| 67 | struct io_failure_record; |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 68 | struct extent_io_tree; |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 69 | |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 70 | typedef blk_status_t (submit_bio_hook_t)(struct inode *inode, struct bio *bio, |
| 71 | int mirror_num, |
| 72 | unsigned long bio_flags); |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 73 | |
| 74 | typedef blk_status_t (extent_submit_bio_start_t)(void *private_data, |
| 75 | struct bio *bio, u64 bio_offset); |
| 76 | |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 77 | #define INLINE_EXTENT_BUFFER_PAGES 16 |
| 78 | #define MAX_INLINE_EXTENT_BUFFER_SIZE (INLINE_EXTENT_BUFFER_PAGES * PAGE_SIZE) |
| 79 | struct extent_buffer { |
| 80 | u64 start; |
| 81 | unsigned long len; |
| 82 | unsigned long bflags; |
| 83 | struct btrfs_fs_info *fs_info; |
| 84 | spinlock_t refs_lock; |
| 85 | atomic_t refs; |
| 86 | atomic_t io_pages; |
| 87 | int read_mirror; |
| 88 | struct rcu_head rcu_head; |
| 89 | pid_t lock_owner; |
| 90 | |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame] | 91 | int blocking_writers; |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 92 | atomic_t blocking_readers; |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 93 | bool lock_recursed; |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 94 | /* >= 0 if eb belongs to a log tree, -1 otherwise */ |
| 95 | short log_index; |
| 96 | |
| 97 | /* protects write locks */ |
| 98 | rwlock_t lock; |
| 99 | |
| 100 | /* readers use lock_wq while they wait for the write |
| 101 | * lock holders to unlock |
| 102 | */ |
| 103 | wait_queue_head_t write_lock_wq; |
| 104 | |
| 105 | /* writers use read_lock_wq while they wait for readers |
| 106 | * to unlock |
| 107 | */ |
| 108 | wait_queue_head_t read_lock_wq; |
| 109 | struct page *pages[INLINE_EXTENT_BUFFER_PAGES]; |
| 110 | #ifdef CONFIG_BTRFS_DEBUG |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame] | 111 | int spinning_writers; |
| 112 | atomic_t spinning_readers; |
| 113 | atomic_t read_locks; |
| 114 | int write_locks; |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 115 | struct list_head leak_list; |
| 116 | #endif |
| 117 | }; |
| 118 | |
| 119 | /* |
| 120 | * Structure to record how many bytes and which ranges are set/cleared |
| 121 | */ |
| 122 | struct extent_changeset { |
| 123 | /* How many bytes are set/cleared in this operation */ |
| 124 | unsigned int bytes_changed; |
| 125 | |
| 126 | /* Changed ranges */ |
| 127 | struct ulist range_changed; |
| 128 | }; |
| 129 | |
| 130 | static inline void extent_changeset_init(struct extent_changeset *changeset) |
| 131 | { |
| 132 | changeset->bytes_changed = 0; |
| 133 | ulist_init(&changeset->range_changed); |
| 134 | } |
| 135 | |
| 136 | static inline struct extent_changeset *extent_changeset_alloc(void) |
| 137 | { |
| 138 | struct extent_changeset *ret; |
| 139 | |
| 140 | ret = kmalloc(sizeof(*ret), GFP_KERNEL); |
| 141 | if (!ret) |
| 142 | return NULL; |
| 143 | |
| 144 | extent_changeset_init(ret); |
| 145 | return ret; |
| 146 | } |
| 147 | |
| 148 | static inline void extent_changeset_release(struct extent_changeset *changeset) |
| 149 | { |
| 150 | if (!changeset) |
| 151 | return; |
| 152 | changeset->bytes_changed = 0; |
| 153 | ulist_release(&changeset->range_changed); |
| 154 | } |
| 155 | |
| 156 | static inline void extent_changeset_free(struct extent_changeset *changeset) |
| 157 | { |
| 158 | if (!changeset) |
| 159 | return; |
| 160 | extent_changeset_release(changeset); |
| 161 | kfree(changeset); |
| 162 | } |
| 163 | |
| 164 | static inline void extent_set_compress_type(unsigned long *bio_flags, |
| 165 | int compress_type) |
| 166 | { |
| 167 | *bio_flags |= compress_type << EXTENT_BIO_FLAG_SHIFT; |
| 168 | } |
| 169 | |
| 170 | static inline int extent_compress_type(unsigned long bio_flags) |
| 171 | { |
| 172 | return bio_flags >> EXTENT_BIO_FLAG_SHIFT; |
| 173 | } |
| 174 | |
| 175 | struct extent_map_tree; |
| 176 | |
| 177 | typedef struct extent_map *(get_extent_t)(struct btrfs_inode *inode, |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 178 | struct page *page, size_t pg_offset, |
| 179 | u64 start, u64 len); |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 180 | |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 181 | int try_release_extent_mapping(struct page *page, gfp_t mask); |
| 182 | int try_release_extent_buffer(struct page *page); |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 183 | |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 184 | int __must_check submit_one_bio(struct bio *bio, int mirror_num, |
| 185 | unsigned long bio_flags); |
| 186 | int btrfs_do_readpage(struct page *page, struct extent_map **em_cached, |
| 187 | struct bio **bio, unsigned long *bio_flags, |
| 188 | unsigned int read_flags, u64 *prev_em_start); |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 189 | int extent_write_full_page(struct page *page, struct writeback_control *wbc); |
| 190 | int extent_write_locked_range(struct inode *inode, u64 start, u64 end, |
| 191 | int mode); |
| 192 | int extent_writepages(struct address_space *mapping, |
| 193 | struct writeback_control *wbc); |
| 194 | int btree_write_cache_pages(struct address_space *mapping, |
| 195 | struct writeback_control *wbc); |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 196 | void extent_readahead(struct readahead_control *rac); |
| 197 | int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo, |
| 198 | u64 start, u64 len); |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 199 | void set_page_extent_mapped(struct page *page); |
| 200 | |
| 201 | struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info, |
| 202 | u64 start); |
| 203 | struct extent_buffer *__alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info, |
| 204 | u64 start, unsigned long len); |
| 205 | struct extent_buffer *alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info, |
| 206 | u64 start); |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 207 | struct extent_buffer *btrfs_clone_extent_buffer(const struct extent_buffer *src); |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 208 | struct extent_buffer *find_extent_buffer(struct btrfs_fs_info *fs_info, |
| 209 | u64 start); |
| 210 | void free_extent_buffer(struct extent_buffer *eb); |
| 211 | void free_extent_buffer_stale(struct extent_buffer *eb); |
| 212 | #define WAIT_NONE 0 |
| 213 | #define WAIT_COMPLETE 1 |
| 214 | #define WAIT_PAGE_LOCK 2 |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame] | 215 | int read_extent_buffer_pages(struct extent_buffer *eb, int wait, |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 216 | int mirror_num); |
| 217 | void wait_on_extent_buffer_writeback(struct extent_buffer *eb); |
| 218 | |
| 219 | static inline int num_extent_pages(const struct extent_buffer *eb) |
| 220 | { |
| 221 | return (round_up(eb->start + eb->len, PAGE_SIZE) >> PAGE_SHIFT) - |
| 222 | (eb->start >> PAGE_SHIFT); |
| 223 | } |
| 224 | |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 225 | static inline int extent_buffer_uptodate(const struct extent_buffer *eb) |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 226 | { |
| 227 | return test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags); |
| 228 | } |
| 229 | |
| 230 | int memcmp_extent_buffer(const struct extent_buffer *eb, const void *ptrv, |
| 231 | unsigned long start, unsigned long len); |
| 232 | void read_extent_buffer(const struct extent_buffer *eb, void *dst, |
| 233 | unsigned long start, |
| 234 | unsigned long len); |
Olivier Deprez | 0e64123 | 2021-09-23 10:07:05 +0200 | [diff] [blame] | 235 | int read_extent_buffer_to_user_nofault(const struct extent_buffer *eb, |
| 236 | void __user *dst, unsigned long start, |
| 237 | unsigned long len); |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 238 | void write_extent_buffer_fsid(const struct extent_buffer *eb, const void *src); |
| 239 | void write_extent_buffer_chunk_tree_uuid(const struct extent_buffer *eb, |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 240 | const void *src); |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 241 | void write_extent_buffer(const struct extent_buffer *eb, const void *src, |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 242 | unsigned long start, unsigned long len); |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 243 | void copy_extent_buffer_full(const struct extent_buffer *dst, |
| 244 | const struct extent_buffer *src); |
| 245 | void copy_extent_buffer(const struct extent_buffer *dst, |
| 246 | const struct extent_buffer *src, |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 247 | unsigned long dst_offset, unsigned long src_offset, |
| 248 | unsigned long len); |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 249 | void memcpy_extent_buffer(const struct extent_buffer *dst, |
| 250 | unsigned long dst_offset, unsigned long src_offset, |
| 251 | unsigned long len); |
| 252 | void memmove_extent_buffer(const struct extent_buffer *dst, |
| 253 | unsigned long dst_offset, unsigned long src_offset, |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 254 | unsigned long len); |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 255 | void memzero_extent_buffer(const struct extent_buffer *eb, unsigned long start, |
| 256 | unsigned long len); |
| 257 | int extent_buffer_test_bit(const struct extent_buffer *eb, unsigned long start, |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 258 | unsigned long pos); |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 259 | void extent_buffer_bitmap_set(const struct extent_buffer *eb, unsigned long start, |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 260 | unsigned long pos, unsigned long len); |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 261 | void extent_buffer_bitmap_clear(const struct extent_buffer *eb, |
| 262 | unsigned long start, unsigned long pos, |
| 263 | unsigned long len); |
| 264 | void clear_extent_buffer_dirty(const struct extent_buffer *eb); |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame] | 265 | bool set_extent_buffer_dirty(struct extent_buffer *eb); |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 266 | void set_extent_buffer_uptodate(struct extent_buffer *eb); |
| 267 | void clear_extent_buffer_uptodate(struct extent_buffer *eb); |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 268 | int extent_buffer_under_io(const struct extent_buffer *eb); |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 269 | void extent_range_clear_dirty_for_io(struct inode *inode, u64 start, u64 end); |
| 270 | void extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end); |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 271 | void extent_clear_unlock_delalloc(struct btrfs_inode *inode, u64 start, u64 end, |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame] | 272 | struct page *locked_page, |
| 273 | unsigned bits_to_clear, |
| 274 | unsigned long page_ops); |
| 275 | struct bio *btrfs_bio_alloc(u64 first_byte); |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 276 | struct bio *btrfs_io_bio_alloc(unsigned int nr_iovecs); |
| 277 | struct bio *btrfs_bio_clone(struct bio *bio); |
| 278 | struct bio *btrfs_bio_clone_partial(struct bio *orig, int offset, int size); |
| 279 | |
| 280 | struct btrfs_fs_info; |
| 281 | struct btrfs_inode; |
| 282 | |
| 283 | int repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 start, |
| 284 | u64 length, u64 logical, struct page *page, |
| 285 | unsigned int pg_offset, int mirror_num); |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 286 | void end_extent_writepage(struct page *page, int err, u64 start, u64 end); |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 287 | int btrfs_repair_eb_io_failure(const struct extent_buffer *eb, int mirror_num); |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 288 | |
| 289 | /* |
| 290 | * When IO fails, either with EIO or csum verification fails, we |
| 291 | * try other mirrors that might have a good copy of the data. This |
| 292 | * io_failure_record is used to record state as we go through all the |
| 293 | * mirrors. If another mirror has good data, the page is set up to date |
| 294 | * and things continue. If a good mirror can't be found, the original |
| 295 | * bio end_io callback is called to indicate things have failed. |
| 296 | */ |
| 297 | struct io_failure_record { |
| 298 | struct page *page; |
| 299 | u64 start; |
| 300 | u64 len; |
| 301 | u64 logical; |
| 302 | unsigned long bio_flags; |
| 303 | int this_mirror; |
| 304 | int failed_mirror; |
| 305 | int in_validation; |
| 306 | }; |
| 307 | |
| 308 | |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 309 | blk_status_t btrfs_submit_read_repair(struct inode *inode, |
| 310 | struct bio *failed_bio, u64 phy_offset, |
| 311 | struct page *page, unsigned int pgoff, |
| 312 | u64 start, u64 end, int failed_mirror, |
| 313 | submit_bio_hook_t *submit_bio_hook); |
| 314 | |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 315 | #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame] | 316 | bool find_lock_delalloc_range(struct inode *inode, |
| 317 | struct page *locked_page, u64 *start, |
| 318 | u64 *end); |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 319 | #endif |
| 320 | struct extent_buffer *alloc_test_extent_buffer(struct btrfs_fs_info *fs_info, |
| 321 | u64 start); |
| 322 | |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 323 | #ifdef CONFIG_BTRFS_DEBUG |
| 324 | void btrfs_extent_buffer_leak_debug_check(struct btrfs_fs_info *fs_info); |
| 325 | #else |
| 326 | #define btrfs_extent_buffer_leak_debug_check(fs_info) do {} while (0) |
| 327 | #endif |
| 328 | |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 329 | #endif |