Update Linux to v5.4.2
Change-Id: Idf6911045d9d382da2cfe01b1edff026404ac8fd
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 7158b5b..0159100 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -27,7 +27,10 @@
#include <linux/uio.h>
#include <linux/magic.h>
#include <linux/iversion.h>
+#include <linux/swap.h>
+#include <linux/sched/mm.h>
#include <asm/unaligned.h>
+#include "misc.h"
#include "ctree.h"
#include "disk-io.h"
#include "transaction.h"
@@ -44,7 +47,8 @@
#include "backref.h"
#include "props.h"
#include "qgroup.h"
-#include "dedupe.h"
+#include "delalloc-space.h"
+#include "block-group.h"
struct btrfs_iget_args {
struct btrfs_key *location;
@@ -64,7 +68,6 @@
static const struct inode_operations btrfs_special_inode_operations;
static const struct inode_operations btrfs_file_inode_operations;
static const struct address_space_operations btrfs_aops;
-static const struct address_space_operations btrfs_symlink_aops;
static const struct file_operations btrfs_dir_file_operations;
static const struct extent_io_ops btrfs_extent_io_ops;
@@ -72,26 +75,15 @@
struct kmem_cache *btrfs_trans_handle_cachep;
struct kmem_cache *btrfs_path_cachep;
struct kmem_cache *btrfs_free_space_cachep;
-
-#define S_SHIFT 12
-static const unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
- [S_IFREG >> S_SHIFT] = BTRFS_FT_REG_FILE,
- [S_IFDIR >> S_SHIFT] = BTRFS_FT_DIR,
- [S_IFCHR >> S_SHIFT] = BTRFS_FT_CHRDEV,
- [S_IFBLK >> S_SHIFT] = BTRFS_FT_BLKDEV,
- [S_IFIFO >> S_SHIFT] = BTRFS_FT_FIFO,
- [S_IFSOCK >> S_SHIFT] = BTRFS_FT_SOCK,
- [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK,
-};
+struct kmem_cache *btrfs_free_space_bitmap_cachep;
static int btrfs_setsize(struct inode *inode, struct iattr *attr);
static int btrfs_truncate(struct inode *inode, bool skip_writeback);
static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent);
static noinline int cow_file_range(struct inode *inode,
struct page *locked_page,
- u64 start, u64 end, u64 delalloc_end,
- int *page_started, unsigned long *nr_written,
- int unlock, struct btrfs_dedupe_hash *hash);
+ u64 start, u64 end, int *page_started,
+ unsigned long *nr_written, int unlock);
static struct extent_map *create_io_em(struct inode *inode, u64 start, u64 len,
u64 orig_start, u64 block_start,
u64 block_len, u64 orig_block_len,
@@ -104,23 +96,23 @@
/*
* Cleanup all submitted ordered extents in specified range to handle errors
- * from the fill_dellaloc() callback.
+ * from the btrfs_run_delalloc_range() callback.
*
* NOTE: caller must ensure that when an error happens, it can not call
* extent_clear_unlock_delalloc() to clear both the bits EXTENT_DO_ACCOUNTING
* and EXTENT_DELALLOC simultaneously, because that causes the reserved metadata
* to be released, which we want to happen only when finishing the ordered
- * extent (btrfs_finish_ordered_io()). Also note that the caller of the
- * fill_delalloc() callback already does proper cleanup for the first page of
- * the range, that is, it invokes the callback writepage_end_io_hook() for the
- * range of the first page.
+ * extent (btrfs_finish_ordered_io()).
*/
static inline void btrfs_cleanup_ordered_extents(struct inode *inode,
- const u64 offset,
- const u64 bytes)
+ struct page *locked_page,
+ u64 offset, u64 bytes)
{
unsigned long index = offset >> PAGE_SHIFT;
unsigned long end_index = (offset + bytes - 1) >> PAGE_SHIFT;
+ u64 page_start = page_offset(locked_page);
+ u64 page_end = page_start + PAGE_SIZE - 1;
+
struct page *page;
while (index <= end_index) {
@@ -131,8 +123,18 @@
ClearPagePrivate2(page);
put_page(page);
}
- return __endio_write_update_ordered(inode, offset + PAGE_SIZE,
- bytes - PAGE_SIZE, false);
+
+ /*
+ * In case this page belongs to the delalloc range being instantiated
+ * then skip it, since the first page of a range is going to be
+ * properly cleaned up by the caller of run_delalloc_range
+ */
+ if (page_start >= offset && page_end <= (offset + bytes - 1)) {
+ offset += PAGE_SIZE;
+ bytes -= PAGE_SIZE;
+ }
+
+ return __endio_write_update_ordered(inode, offset, bytes, false);
}
static int btrfs_dirty_inode(struct inode *inode);
@@ -177,6 +179,9 @@
size_t cur_size = size;
unsigned long offset;
+ ASSERT((compressed_size > 0 && compressed_pages) ||
+ (compressed_size == 0 && !compressed_pages));
+
if (compressed_size && compressed_pages)
cur_size = compressed_size;
@@ -230,7 +235,7 @@
start >> PAGE_SHIFT);
btrfs_set_file_extent_compression(leaf, ei, 0);
kaddr = kmap_atomic(page);
- offset = start & (PAGE_SIZE - 1);
+ offset = offset_in_page(start);
write_extent_buffer(leaf, kaddr + offset, ptr, size);
kunmap_atomic(kaddr);
put_page(page);
@@ -356,18 +361,24 @@
struct list_head list;
};
-struct async_cow {
+struct async_chunk {
struct inode *inode;
- struct btrfs_root *root;
struct page *locked_page;
u64 start;
u64 end;
unsigned int write_flags;
struct list_head extents;
struct btrfs_work work;
+ atomic_t *pending;
};
-static noinline int add_async_extent(struct async_cow *cow,
+struct async_cow {
+ /* Number of chunks in flight; must be first in the structure */
+ atomic_t num_chunks;
+ struct async_chunk chunks[];
+};
+
+static noinline int add_async_extent(struct async_chunk *cow,
u64 start, u64 ram_size,
u64 compressed_size,
struct page **pages,
@@ -388,10 +399,31 @@
return 0;
}
+/*
+ * Check if the inode has flags compatible with compression
+ */
+static inline bool inode_can_compress(struct inode *inode)
+{
+ if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW ||
+ BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)
+ return false;
+ return true;
+}
+
+/*
+ * Check if the inode needs to be submitted to compression, based on mount
+ * options, defragmentation, properties or heuristics.
+ */
static inline int inode_need_compress(struct inode *inode, u64 start, u64 end)
{
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ if (!inode_can_compress(inode)) {
+ WARN(IS_ENABLED(CONFIG_BTRFS_DEBUG),
+ KERN_ERR "BTRFS: unexpected compression for ino %llu\n",
+ btrfs_ino(BTRFS_I(inode)));
+ return 0;
+ }
/* force compress */
if (btrfs_test_opt(fs_info, FORCE_COMPRESS))
return 1;
@@ -434,16 +466,15 @@
* are written in the same order that the flusher thread sent them
* down.
*/
-static noinline void compress_file_range(struct inode *inode,
- struct page *locked_page,
- u64 start, u64 end,
- struct async_cow *async_cow,
- int *num_added)
+static noinline int compress_file_range(struct async_chunk *async_chunk)
{
+ struct inode *inode = async_chunk->inode;
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
u64 blocksize = fs_info->sectorsize;
+ u64 start = async_chunk->start;
+ u64 end = async_chunk->end;
u64 actual_end;
- u64 isize = i_size_read(inode);
+ u64 i_size;
int ret = 0;
struct page **pages = NULL;
unsigned long nr_pages;
@@ -452,12 +483,25 @@
int i;
int will_compress;
int compress_type = fs_info->compress_type;
+ int compressed_extents = 0;
int redirty = 0;
inode_should_defrag(BTRFS_I(inode), start, end, end - start + 1,
SZ_16K);
- actual_end = min_t(u64, isize, end + 1);
+ /*
+ * We need to save i_size before now because it could change in between
+ * us evaluating the size and assigning it. This is because we lock and
+ * unlock the page in truncate and fallocate, and then modify the i_size
+ * later on.
+ *
+ * The barriers are to emulate READ_ONCE, remove that once i_size_read
+ * does that for us.
+ */
+ barrier();
+ i_size = i_size_read(inode);
+ barrier();
+ actual_end = min_t(u64, i_size, end + 1);
again:
will_compress = 0;
nr_pages = (end >> PAGE_SHIFT) - (start >> PAGE_SHIFT) + 1;
@@ -539,8 +583,7 @@
&total_compressed);
if (!ret) {
- unsigned long offset = total_compressed &
- (PAGE_SIZE - 1);
+ unsigned long offset = offset_in_page(total_compressed);
struct page *page = pages[nr_pages - 1];
char *kaddr;
@@ -589,14 +632,21 @@
* our outstanding extent for clearing delalloc for this
* range.
*/
- extent_clear_unlock_delalloc(inode, start, end, end,
- NULL, clear_flags,
+ extent_clear_unlock_delalloc(inode, start, end, NULL,
+ clear_flags,
PAGE_UNLOCK |
PAGE_CLEAR_DIRTY |
PAGE_SET_WRITEBACK |
page_error_op |
PAGE_END_WRITEBACK);
- goto free_pages_out;
+
+ for (i = 0; i < nr_pages; i++) {
+ WARN_ON(pages[i]->mapping);
+ put_page(pages[i]);
+ }
+ kfree(pages);
+
+ return 0;
}
}
@@ -615,14 +665,14 @@
*/
total_in = ALIGN(total_in, PAGE_SIZE);
if (total_compressed + blocksize <= total_in) {
- *num_added += 1;
+ compressed_extents++;
/*
* The async work queues will take care of doing actual
* allocation on disk for these compressed pages, and
* will submit them to the elevator.
*/
- add_async_extent(async_cow, start, total_in,
+ add_async_extent(async_chunk, start, total_in,
total_compressed, pages, nr_pages,
compress_type);
@@ -632,7 +682,7 @@
cond_resched();
goto again;
}
- return;
+ return compressed_extents;
}
}
if (pages) {
@@ -662,25 +712,18 @@
* to our extent and set things up for the async work queue to run
* cow_file_range to do the normal delalloc dance.
*/
- if (page_offset(locked_page) >= start &&
- page_offset(locked_page) <= end)
- __set_page_dirty_nobuffers(locked_page);
+ if (page_offset(async_chunk->locked_page) >= start &&
+ page_offset(async_chunk->locked_page) <= end)
+ __set_page_dirty_nobuffers(async_chunk->locked_page);
/* unlocked later on in the async handlers */
if (redirty)
extent_range_redirty_for_io(inode, start, end);
- add_async_extent(async_cow, start, end - start + 1, 0, NULL, 0,
+ add_async_extent(async_chunk, start, end - start + 1, 0, NULL, 0,
BTRFS_COMPRESS_NONE);
- *num_added += 1;
+ compressed_extents++;
- return;
-
-free_pages_out:
- for (i = 0; i < nr_pages; i++) {
- WARN_ON(pages[i]->mapping);
- put_page(pages[i]);
- }
- kfree(pages);
+ return compressed_extents;
}
static void free_async_extent_pages(struct async_extent *async_extent)
@@ -705,45 +748,38 @@
* queued. We walk all the async extents created by compress_file_range
* and send them down to the disk.
*/
-static noinline void submit_compressed_extents(struct inode *inode,
- struct async_cow *async_cow)
+static noinline void submit_compressed_extents(struct async_chunk *async_chunk)
{
+ struct inode *inode = async_chunk->inode;
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct async_extent *async_extent;
u64 alloc_hint = 0;
struct btrfs_key ins;
struct extent_map *em;
struct btrfs_root *root = BTRFS_I(inode)->root;
- struct extent_io_tree *io_tree;
+ struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
int ret = 0;
again:
- while (!list_empty(&async_cow->extents)) {
- async_extent = list_entry(async_cow->extents.next,
+ while (!list_empty(&async_chunk->extents)) {
+ async_extent = list_entry(async_chunk->extents.next,
struct async_extent, list);
list_del(&async_extent->list);
- io_tree = &BTRFS_I(inode)->io_tree;
-
retry:
+ lock_extent(io_tree, async_extent->start,
+ async_extent->start + async_extent->ram_size - 1);
/* did the compression code fall back to uncompressed IO? */
if (!async_extent->pages) {
int page_started = 0;
unsigned long nr_written = 0;
- lock_extent(io_tree, async_extent->start,
- async_extent->start +
- async_extent->ram_size - 1);
-
/* allocate blocks */
- ret = cow_file_range(inode, async_cow->locked_page,
+ ret = cow_file_range(inode, async_chunk->locked_page,
async_extent->start,
async_extent->start +
async_extent->ram_size - 1,
- async_extent->start +
- async_extent->ram_size - 1,
- &page_started, &nr_written, 0,
- NULL);
+ &page_started, &nr_written, 0);
/* JDM XXX */
@@ -760,15 +796,12 @@
async_extent->ram_size - 1,
WB_SYNC_ALL);
else if (ret)
- unlock_page(async_cow->locked_page);
+ unlock_page(async_chunk->locked_page);
kfree(async_extent);
cond_resched();
continue;
}
- lock_extent(io_tree, async_extent->start,
- async_extent->start + async_extent->ram_size - 1);
-
ret = btrfs_reserve_extent(root, async_extent->ram_size,
async_extent->compressed_size,
async_extent->compressed_size,
@@ -836,8 +869,6 @@
extent_clear_unlock_delalloc(inode, async_extent->start,
async_extent->start +
async_extent->ram_size - 1,
- async_extent->start +
- async_extent->ram_size - 1,
NULL, EXTENT_LOCKED | EXTENT_DELALLOC,
PAGE_UNLOCK | PAGE_CLEAR_DIRTY |
PAGE_SET_WRITEBACK);
@@ -847,17 +878,16 @@
ins.objectid,
ins.offset, async_extent->pages,
async_extent->nr_pages,
- async_cow->write_flags)) {
- struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
+ async_chunk->write_flags)) {
struct page *p = async_extent->pages[0];
const u64 start = async_extent->start;
const u64 end = start + async_extent->ram_size - 1;
p->mapping = inode->i_mapping;
- tree->ops->writepage_end_io_hook(p, start, end,
- NULL, 0);
+ btrfs_writepage_endio_finish_ordered(p, start, end, 0);
+
p->mapping = NULL;
- extent_clear_unlock_delalloc(inode, start, end, end,
+ extent_clear_unlock_delalloc(inode, start, end,
NULL, 0,
PAGE_END_WRITEBACK |
PAGE_SET_ERROR);
@@ -875,8 +905,6 @@
extent_clear_unlock_delalloc(inode, async_extent->start,
async_extent->start +
async_extent->ram_size - 1,
- async_extent->start +
- async_extent->ram_size - 1,
NULL, EXTENT_LOCKED | EXTENT_DELALLOC |
EXTENT_DELALLOC_NEW |
EXTENT_DEFRAG | EXTENT_DO_ACCOUNTING,
@@ -935,9 +963,8 @@
*/
static noinline int cow_file_range(struct inode *inode,
struct page *locked_page,
- u64 start, u64 end, u64 delalloc_end,
- int *page_started, unsigned long *nr_written,
- int unlock, struct btrfs_dedupe_hash *hash)
+ u64 start, u64 end, int *page_started,
+ unsigned long *nr_written, int unlock)
{
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct btrfs_root *root = BTRFS_I(inode)->root;
@@ -976,8 +1003,7 @@
* our outstanding extent for clearing delalloc for this
* range.
*/
- extent_clear_unlock_delalloc(inode, start, end,
- delalloc_end, NULL,
+ extent_clear_unlock_delalloc(inode, start, end, NULL,
EXTENT_LOCKED | EXTENT_DELALLOC |
EXTENT_DELALLOC_NEW | EXTENT_DEFRAG |
EXTENT_DO_ACCOUNTING, PAGE_UNLOCK |
@@ -1060,7 +1086,7 @@
extent_clear_unlock_delalloc(inode, start,
start + ram_size - 1,
- delalloc_end, locked_page,
+ locked_page,
EXTENT_LOCKED | EXTENT_DELALLOC,
page_ops);
if (num_bytes < cur_alloc_size)
@@ -1105,7 +1131,6 @@
if (extent_reserved) {
extent_clear_unlock_delalloc(inode, start,
start + cur_alloc_size,
- start + cur_alloc_size,
locked_page,
clear_bits,
page_ops);
@@ -1113,8 +1138,7 @@
if (start >= end)
goto out;
}
- extent_clear_unlock_delalloc(inode, start, end, delalloc_end,
- locked_page,
+ extent_clear_unlock_delalloc(inode, start, end, locked_page,
clear_bits | EXTENT_CLEAR_DATA_RESV,
page_ops);
goto out;
@@ -1125,16 +1149,15 @@
*/
static noinline void async_cow_start(struct btrfs_work *work)
{
- struct async_cow *async_cow;
- int num_added = 0;
- async_cow = container_of(work, struct async_cow, work);
+ struct async_chunk *async_chunk;
+ int compressed_extents;
- compress_file_range(async_cow->inode, async_cow->locked_page,
- async_cow->start, async_cow->end, async_cow,
- &num_added);
- if (num_added == 0) {
- btrfs_add_delayed_iput(async_cow->inode);
- async_cow->inode = NULL;
+ async_chunk = container_of(work, struct async_chunk, work);
+
+ compressed_extents = compress_file_range(async_chunk);
+ if (compressed_extents == 0) {
+ btrfs_add_delayed_iput(async_chunk->inode);
+ async_chunk->inode = NULL;
}
}
@@ -1143,16 +1166,12 @@
*/
static noinline void async_cow_submit(struct btrfs_work *work)
{
- struct btrfs_fs_info *fs_info;
- struct async_cow *async_cow;
- struct btrfs_root *root;
+ struct async_chunk *async_chunk = container_of(work, struct async_chunk,
+ work);
+ struct btrfs_fs_info *fs_info = btrfs_work_owner(work);
unsigned long nr_pages;
- async_cow = container_of(work, struct async_cow, work);
-
- root = async_cow->root;
- fs_info = root->fs_info;
- nr_pages = (async_cow->end - async_cow->start + PAGE_SIZE) >>
+ nr_pages = (async_chunk->end - async_chunk->start + PAGE_SIZE) >>
PAGE_SHIFT;
/* atomic_sub_return implies a barrier */
@@ -1160,17 +1179,29 @@
5 * SZ_1M)
cond_wake_up_nomb(&fs_info->async_submit_wait);
- if (async_cow->inode)
- submit_compressed_extents(async_cow->inode, async_cow);
+ /*
+ * ->inode could be NULL if async_chunk_start has failed to compress,
+ * in which case we don't have anything to submit, yet we need to
+ * always adjust ->async_delalloc_pages as its paired with the init
+ * happening in cow_file_range_async
+ */
+ if (async_chunk->inode)
+ submit_compressed_extents(async_chunk);
}
static noinline void async_cow_free(struct btrfs_work *work)
{
- struct async_cow *async_cow;
- async_cow = container_of(work, struct async_cow, work);
- if (async_cow->inode)
- btrfs_add_delayed_iput(async_cow->inode);
- kfree(async_cow);
+ struct async_chunk *async_chunk;
+
+ async_chunk = container_of(work, struct async_chunk, work);
+ if (async_chunk->inode)
+ btrfs_add_delayed_iput(async_chunk->inode);
+ /*
+ * Since the pointer to 'pending' is at the beginning of the array of
+ * async_chunk's, freeing it ensures the whole array has been freed.
+ */
+ if (atomic_dec_and_test(async_chunk->pending))
+ kvfree(async_chunk->pending);
}
static int cow_file_range_async(struct inode *inode, struct page *locked_page,
@@ -1179,41 +1210,73 @@
unsigned int write_flags)
{
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
- struct async_cow *async_cow;
- struct btrfs_root *root = BTRFS_I(inode)->root;
+ struct async_cow *ctx;
+ struct async_chunk *async_chunk;
unsigned long nr_pages;
u64 cur_end;
+ u64 num_chunks = DIV_ROUND_UP(end - start, SZ_512K);
+ int i;
+ bool should_compress;
+ unsigned nofs_flag;
- clear_extent_bit(&BTRFS_I(inode)->io_tree, start, end, EXTENT_LOCKED,
- 1, 0, NULL);
- while (start < end) {
- async_cow = kmalloc(sizeof(*async_cow), GFP_NOFS);
- BUG_ON(!async_cow); /* -ENOMEM */
- async_cow->inode = igrab(inode);
- async_cow->root = root;
- async_cow->locked_page = locked_page;
- async_cow->start = start;
- async_cow->write_flags = write_flags;
+ unlock_extent(&BTRFS_I(inode)->io_tree, start, end);
- if (BTRFS_I(inode)->flags & BTRFS_INODE_NOCOMPRESS &&
- !btrfs_test_opt(fs_info, FORCE_COMPRESS))
- cur_end = end;
- else
+ if (BTRFS_I(inode)->flags & BTRFS_INODE_NOCOMPRESS &&
+ !btrfs_test_opt(fs_info, FORCE_COMPRESS)) {
+ num_chunks = 1;
+ should_compress = false;
+ } else {
+ should_compress = true;
+ }
+
+ nofs_flag = memalloc_nofs_save();
+ ctx = kvmalloc(struct_size(ctx, chunks, num_chunks), GFP_KERNEL);
+ memalloc_nofs_restore(nofs_flag);
+
+ if (!ctx) {
+ unsigned clear_bits = EXTENT_LOCKED | EXTENT_DELALLOC |
+ EXTENT_DELALLOC_NEW | EXTENT_DEFRAG |
+ EXTENT_DO_ACCOUNTING;
+ unsigned long page_ops = PAGE_UNLOCK | PAGE_CLEAR_DIRTY |
+ PAGE_SET_WRITEBACK | PAGE_END_WRITEBACK |
+ PAGE_SET_ERROR;
+
+ extent_clear_unlock_delalloc(inode, start, end, locked_page,
+ clear_bits, page_ops);
+ return -ENOMEM;
+ }
+
+ async_chunk = ctx->chunks;
+ atomic_set(&ctx->num_chunks, num_chunks);
+
+ for (i = 0; i < num_chunks; i++) {
+ if (should_compress)
cur_end = min(end, start + SZ_512K - 1);
+ else
+ cur_end = end;
- async_cow->end = cur_end;
- INIT_LIST_HEAD(&async_cow->extents);
+ /*
+ * igrab is called higher up in the call chain, take only the
+ * lightweight reference for the callback lifetime
+ */
+ ihold(inode);
+ async_chunk[i].pending = &ctx->num_chunks;
+ async_chunk[i].inode = inode;
+ async_chunk[i].start = start;
+ async_chunk[i].end = cur_end;
+ async_chunk[i].locked_page = locked_page;
+ async_chunk[i].write_flags = write_flags;
+ INIT_LIST_HEAD(&async_chunk[i].extents);
- btrfs_init_work(&async_cow->work,
+ btrfs_init_work(&async_chunk[i].work,
btrfs_delalloc_helper,
async_cow_start, async_cow_submit,
async_cow_free);
- nr_pages = (cur_end - start + PAGE_SIZE) >>
- PAGE_SHIFT;
+ nr_pages = DIV_ROUND_UP(cur_end - start, PAGE_SIZE);
atomic_add(nr_pages, &fs_info->async_delalloc_pages);
- btrfs_queue_work(fs_info->delalloc_workers, &async_cow->work);
+ btrfs_queue_work(fs_info->delalloc_workers, &async_chunk[i].work);
*nr_written += nr_pages;
start = cur_end + 1;
@@ -1253,36 +1316,25 @@
*/
static noinline int run_delalloc_nocow(struct inode *inode,
struct page *locked_page,
- u64 start, u64 end, int *page_started, int force,
- unsigned long *nr_written)
+ const u64 start, const u64 end,
+ int *page_started, int force,
+ unsigned long *nr_written)
{
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct btrfs_root *root = BTRFS_I(inode)->root;
- struct extent_buffer *leaf;
struct btrfs_path *path;
- struct btrfs_file_extent_item *fi;
- struct btrfs_key found_key;
- struct extent_map *em;
- u64 cow_start;
- u64 cur_offset;
- u64 extent_end;
- u64 extent_offset;
- u64 disk_bytenr;
- u64 num_bytes;
- u64 disk_num_bytes;
- u64 ram_bytes;
- int extent_type;
+ u64 cow_start = (u64)-1;
+ u64 cur_offset = start;
int ret;
- int type;
- int nocow;
- int check_prev = 1;
- bool nolock;
+ bool check_prev = true;
+ const bool freespace_inode = btrfs_is_free_space_inode(BTRFS_I(inode));
u64 ino = btrfs_ino(BTRFS_I(inode));
+ bool nocow = false;
+ u64 disk_bytenr = 0;
path = btrfs_alloc_path();
if (!path) {
- extent_clear_unlock_delalloc(inode, start, end, end,
- locked_page,
+ extent_clear_unlock_delalloc(inode, start, end, locked_page,
EXTENT_LOCKED | EXTENT_DELALLOC |
EXTENT_DO_ACCOUNTING |
EXTENT_DEFRAG, PAGE_UNLOCK |
@@ -1292,15 +1344,29 @@
return -ENOMEM;
}
- nolock = btrfs_is_free_space_inode(BTRFS_I(inode));
-
- cow_start = (u64)-1;
- cur_offset = start;
while (1) {
+ struct btrfs_key found_key;
+ struct btrfs_file_extent_item *fi;
+ struct extent_buffer *leaf;
+ u64 extent_end;
+ u64 extent_offset;
+ u64 num_bytes = 0;
+ u64 disk_num_bytes;
+ u64 ram_bytes;
+ int extent_type;
+
+ nocow = false;
+
ret = btrfs_lookup_file_extent(NULL, root, path, ino,
cur_offset, 0);
if (ret < 0)
goto error;
+
+ /*
+ * If there is no extent for our range when doing the initial
+ * search, then go back to the previous slot as it will be the
+ * one containing the search offset
+ */
if (ret > 0 && path->slots[0] > 0 && check_prev) {
leaf = path->nodes[0];
btrfs_item_key_to_cpu(leaf, &found_key,
@@ -1309,8 +1375,9 @@
found_key.type == BTRFS_EXTENT_DATA_KEY)
path->slots[0]--;
}
- check_prev = 0;
+ check_prev = false;
next_slot:
+ /* Go to next leaf if we have exhausted the current one */
leaf = path->nodes[0];
if (path->slots[0] >= btrfs_header_nritems(leaf)) {
ret = btrfs_next_leaf(root, path);
@@ -1324,28 +1391,40 @@
leaf = path->nodes[0];
}
- nocow = 0;
- disk_bytenr = 0;
- num_bytes = 0;
btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
+ /* Didn't find anything for our INO */
if (found_key.objectid > ino)
break;
+ /*
+ * Keep searching until we find an EXTENT_ITEM or there are no
+ * more extents for this inode
+ */
if (WARN_ON_ONCE(found_key.objectid < ino) ||
found_key.type < BTRFS_EXTENT_DATA_KEY) {
path->slots[0]++;
goto next_slot;
}
+
+ /* Found key is not EXTENT_DATA_KEY or starts after req range */
if (found_key.type > BTRFS_EXTENT_DATA_KEY ||
found_key.offset > end)
break;
+ /*
+ * If the found extent starts after requested offset, then
+ * adjust extent_end to be right before this extent begins
+ */
if (found_key.offset > cur_offset) {
extent_end = found_key.offset;
extent_type = 0;
goto out_check;
}
+ /*
+ * Found extent which begins before our range and potentially
+ * intersect it
+ */
fi = btrfs_item_ptr(leaf, path->slots[0],
struct btrfs_file_extent_item);
extent_type = btrfs_file_extent_type(leaf, fi);
@@ -1359,25 +1438,36 @@
btrfs_file_extent_num_bytes(leaf, fi);
disk_num_bytes =
btrfs_file_extent_disk_num_bytes(leaf, fi);
+ /*
+ * If extent we got ends before our range starts, skip
+ * to next extent
+ */
if (extent_end <= start) {
path->slots[0]++;
goto next_slot;
}
+ /* Skip holes */
if (disk_bytenr == 0)
goto out_check;
+ /* Skip compressed/encrypted/encoded extents */
if (btrfs_file_extent_compression(leaf, fi) ||
btrfs_file_extent_encryption(leaf, fi) ||
btrfs_file_extent_other_encoding(leaf, fi))
goto out_check;
/*
- * Do the same check as in btrfs_cross_ref_exist but
- * without the unnecessary search.
+ * If extent is created before the last volume's snapshot
+ * this implies the extent is shared, hence we can't do
+ * nocow. This is the same check as in
+ * btrfs_cross_ref_exist but without calling
+ * btrfs_search_slot.
*/
- if (btrfs_file_extent_generation(leaf, fi) <=
+ if (!freespace_inode &&
+ btrfs_file_extent_generation(leaf, fi) <=
btrfs_root_last_snapshot(&root->root_item))
goto out_check;
if (extent_type == BTRFS_FILE_EXTENT_REG && !force)
goto out_check;
+ /* If extent is RO, we must COW it */
if (btrfs_extent_readonly(fs_info, disk_bytenr))
goto out_check;
ret = btrfs_cross_ref_exist(root, ino,
@@ -1394,17 +1484,17 @@
goto error;
}
- WARN_ON_ONCE(nolock);
+ WARN_ON_ONCE(freespace_inode);
goto out_check;
}
disk_bytenr += extent_offset;
disk_bytenr += cur_offset - found_key.offset;
num_bytes = min(end + 1, extent_end) - cur_offset;
/*
- * if there are pending snapshots for this root,
- * we fall into common COW way.
+ * If there are pending snapshots for this root, we
+ * fall into common COW way
*/
- if (!nolock && atomic_read(&root->snapshot_force_cow))
+ if (!freespace_inode && atomic_read(&root->snapshot_force_cow))
goto out_check;
/*
* force cow if csum exists in the range.
@@ -1423,27 +1513,29 @@
cur_offset = cow_start;
goto error;
}
- WARN_ON_ONCE(nolock);
+ WARN_ON_ONCE(freespace_inode);
goto out_check;
}
if (!btrfs_inc_nocow_writers(fs_info, disk_bytenr))
goto out_check;
- nocow = 1;
+ nocow = true;
} else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
- extent_end = found_key.offset +
- btrfs_file_extent_ram_bytes(leaf, fi);
- extent_end = ALIGN(extent_end,
- fs_info->sectorsize);
+ extent_end = found_key.offset + ram_bytes;
+ extent_end = ALIGN(extent_end, fs_info->sectorsize);
+ /* Skip extents outside of our requested range */
+ if (extent_end <= start) {
+ path->slots[0]++;
+ goto next_slot;
+ }
} else {
- BUG_ON(1);
+ /* If this triggers then we have a memory corruption */
+ BUG();
}
out_check:
- if (extent_end <= start) {
- path->slots[0]++;
- if (nocow)
- btrfs_dec_nocow_writers(fs_info, disk_bytenr);
- goto next_slot;
- }
+ /*
+ * If nocow is false then record the beginning of the range
+ * that needs to be COWed
+ */
if (!nocow) {
if (cow_start == (u64)-1)
cow_start = cur_offset;
@@ -1455,11 +1547,16 @@
}
btrfs_release_path(path);
+
+ /*
+ * COW range from cow_start to found_key.offset - 1. As the key
+ * will contain the beginning of the first extent that can be
+ * NOCOW, following one which needs to be COW'ed
+ */
if (cow_start != (u64)-1) {
ret = cow_file_range(inode, locked_page,
cow_start, found_key.offset - 1,
- end, page_started, nr_written, 1,
- NULL);
+ page_started, nr_written, 1);
if (ret) {
if (nocow)
btrfs_dec_nocow_writers(fs_info,
@@ -1471,6 +1568,7 @@
if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
u64 orig_start = found_key.offset - extent_offset;
+ struct extent_map *em;
em = create_io_em(inode, cur_offset, num_bytes,
orig_start,
@@ -1487,19 +1585,29 @@
goto error;
}
free_extent_map(em);
- }
-
- if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
- type = BTRFS_ORDERED_PREALLOC;
+ ret = btrfs_add_ordered_extent(inode, cur_offset,
+ disk_bytenr, num_bytes,
+ num_bytes,
+ BTRFS_ORDERED_PREALLOC);
+ if (ret) {
+ btrfs_drop_extent_cache(BTRFS_I(inode),
+ cur_offset,
+ cur_offset + num_bytes - 1,
+ 0);
+ goto error;
+ }
} else {
- type = BTRFS_ORDERED_NOCOW;
+ ret = btrfs_add_ordered_extent(inode, cur_offset,
+ disk_bytenr, num_bytes,
+ num_bytes,
+ BTRFS_ORDERED_NOCOW);
+ if (ret)
+ goto error;
}
- ret = btrfs_add_ordered_extent(inode, cur_offset, disk_bytenr,
- num_bytes, num_bytes, type);
if (nocow)
btrfs_dec_nocow_writers(fs_info, disk_bytenr);
- BUG_ON(ret); /* -ENOMEM */
+ nocow = false;
if (root->root_key.objectid ==
BTRFS_DATA_RELOC_TREE_OBJECTID)
@@ -1512,7 +1620,7 @@
num_bytes);
extent_clear_unlock_delalloc(inode, cur_offset,
- cur_offset + num_bytes - 1, end,
+ cur_offset + num_bytes - 1,
locked_page, EXTENT_LOCKED |
EXTENT_DELALLOC |
EXTENT_CLEAR_DATA_RESV,
@@ -1537,15 +1645,18 @@
if (cow_start != (u64)-1) {
cur_offset = end;
- ret = cow_file_range(inode, locked_page, cow_start, end, end,
- page_started, nr_written, 1, NULL);
+ ret = cow_file_range(inode, locked_page, cow_start, end,
+ page_started, nr_written, 1);
if (ret)
goto error;
}
error:
+ if (nocow)
+ btrfs_dec_nocow_writers(fs_info, disk_bytenr);
+
if (ret && cur_offset < end)
- extent_clear_unlock_delalloc(inode, cur_offset, end, end,
+ extent_clear_unlock_delalloc(inode, cur_offset, end,
locked_page, EXTENT_LOCKED |
EXTENT_DELALLOC | EXTENT_DEFRAG |
EXTENT_DO_ACCOUNTING, PAGE_UNLOCK |
@@ -1577,14 +1688,13 @@
}
/*
- * extent_io.c call back to do delayed allocation processing
+ * Function to process delayed allocation (create CoW) for ranges which are
+ * being touched for the first time.
*/
-static int run_delalloc_range(void *private_data, struct page *locked_page,
- u64 start, u64 end, int *page_started,
- unsigned long *nr_written,
- struct writeback_control *wbc)
+int btrfs_run_delalloc_range(struct inode *inode, struct page *locked_page,
+ u64 start, u64 end, int *page_started, unsigned long *nr_written,
+ struct writeback_control *wbc)
{
- struct inode *inode = private_data;
int ret;
int force_cow = need_force_cow(inode, start, end);
unsigned int write_flags = wbc_to_write_flags(wbc);
@@ -1595,9 +1705,10 @@
} else if (BTRFS_I(inode)->flags & BTRFS_INODE_PREALLOC && !force_cow) {
ret = run_delalloc_nocow(inode, locked_page, start, end,
page_started, 0, nr_written);
- } else if (!inode_need_compress(inode, start, end)) {
- ret = cow_file_range(inode, locked_page, start, end, end,
- page_started, nr_written, 1, NULL);
+ } else if (!inode_can_compress(inode) ||
+ !inode_need_compress(inode, start, end)) {
+ ret = cow_file_range(inode, locked_page, start, end,
+ page_started, nr_written, 1);
} else {
set_bit(BTRFS_INODE_HAS_ASYNC_EXTENT,
&BTRFS_I(inode)->runtime_flags);
@@ -1606,14 +1717,14 @@
write_flags);
}
if (ret)
- btrfs_cleanup_ordered_extents(inode, start, end - start + 1);
+ btrfs_cleanup_ordered_extents(inode, locked_page, start,
+ end - start + 1);
return ret;
}
-static void btrfs_split_extent_hook(void *private_data,
- struct extent_state *orig, u64 split)
+void btrfs_split_delalloc_extent(struct inode *inode,
+ struct extent_state *orig, u64 split)
{
- struct inode *inode = private_data;
u64 size;
/* not delalloc, ignore it */
@@ -1626,7 +1737,7 @@
u64 new_size;
/*
- * See the explanation in btrfs_merge_extent_hook, the same
+ * See the explanation in btrfs_merge_delalloc_extent, the same
* applies here, just in reverse.
*/
new_size = orig->end - split + 1;
@@ -1643,16 +1754,13 @@
}
/*
- * extent_io.c merge_extent_hook, used to track merged delayed allocation
- * extents so we can keep track of new extents that are just merged onto old
- * extents, such as when we are doing sequential writes, so we can properly
- * account for the metadata space we'll need.
+ * Handle merged delayed allocation extents so we can keep track of new extents
+ * that are just merged onto old extents, such as when we are doing sequential
+ * writes, so we can properly account for the metadata space we'll need.
*/
-static void btrfs_merge_extent_hook(void *private_data,
- struct extent_state *new,
- struct extent_state *other)
+void btrfs_merge_delalloc_extent(struct inode *inode, struct extent_state *new,
+ struct extent_state *other)
{
- struct inode *inode = private_data;
u64 new_size, old_size;
u32 num_extents;
@@ -1756,15 +1864,12 @@
}
/*
- * extent_io.c set_bit_hook, used to track delayed allocation
- * bytes in this file, and to maintain the list of inodes that
- * have pending delalloc work to be done.
+ * Properly track delayed allocation bytes in the inode and to maintain the
+ * list of inodes that have pending delalloc work to be done.
*/
-static void btrfs_set_bit_hook(void *private_data,
- struct extent_state *state, unsigned *bits)
+void btrfs_set_delalloc_extent(struct inode *inode, struct extent_state *state,
+ unsigned *bits)
{
- struct inode *inode = private_data;
-
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
if ((*bits & EXTENT_DEFRAG) && !(*bits & EXTENT_DELALLOC))
@@ -1810,14 +1915,14 @@
}
/*
- * extent_io.c clear_bit_hook, see set_bit_hook for why
+ * Once a range is no longer delalloc this function ensures that proper
+ * accounting happens.
*/
-static void btrfs_clear_bit_hook(void *private_data,
- struct extent_state *state,
- unsigned *bits)
+void btrfs_clear_delalloc_extent(struct inode *vfs_inode,
+ struct extent_state *state, unsigned *bits)
{
- struct btrfs_inode *inode = BTRFS_I((struct inode *)private_data);
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
+ struct btrfs_inode *inode = BTRFS_I(vfs_inode);
+ struct btrfs_fs_info *fs_info = btrfs_sb(vfs_inode->i_sb);
u64 len = state->end + 1 - state->start;
u32 num_extents = count_max_extents(len);
@@ -1842,7 +1947,7 @@
/*
* We don't reserve metadata space for space cache inodes so we
- * don't need to call dellalloc_release_metadata if there is an
+ * don't need to call delalloc_release_metadata if there is an
* error.
*/
if (*bits & EXTENT_CLEAR_META_RESV &&
@@ -1881,16 +1986,21 @@
}
/*
- * Merge bio hook, this must check the chunk tree to make sure we don't create
- * bios that span stripes or chunks
+ * btrfs_bio_fits_in_stripe - Checks whether the size of the given bio will fit
+ * in a chunk's stripe. This function ensures that bios do not span a
+ * stripe/chunk
*
- * return 1 if page cannot be merged to bio
- * return 0 if page can be merged to bio
+ * @page - The page we are about to add to the bio
+ * @size - size we want to add to the bio
+ * @bio - bio we want to ensure is smaller than a stripe
+ * @bio_flags - flags of the bio
+ *
+ * return 1 if page cannot be added to the bio
+ * return 0 if page can be added to the bio
* return error otherwise
*/
-int btrfs_merge_bio_hook(struct page *page, unsigned long offset,
- size_t size, struct bio *bio,
- unsigned long bio_flags)
+int btrfs_bio_fits_in_stripe(struct page *page, size_t size, struct bio *bio,
+ unsigned long bio_flags)
{
struct inode *inode = page->mapping->host;
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
@@ -1898,17 +2008,19 @@
u64 length = 0;
u64 map_length;
int ret;
+ struct btrfs_io_geometry geom;
if (bio_flags & EXTENT_BIO_COMPRESSED)
return 0;
length = bio->bi_iter.bi_size;
map_length = length;
- ret = btrfs_map_block(fs_info, btrfs_op(bio), logical, &map_length,
- NULL, 0);
+ ret = btrfs_get_io_geometry(fs_info, btrfs_op(bio), logical, map_length,
+ &geom);
if (ret < 0)
return ret;
- if (map_length < length + size)
+
+ if (geom.len < length + size)
return 1;
return 0;
}
@@ -1933,29 +2045,6 @@
}
/*
- * in order to insert checksums into the metadata in large chunks,
- * we wait until bio submission time. All the pages in the bio are
- * checksummed and sums are attached onto the ordered extent record.
- *
- * At IO completion time the cums attached on the ordered extent record
- * are inserted into the btree
- */
-blk_status_t btrfs_submit_bio_done(void *private_data, struct bio *bio,
- int mirror_num)
-{
- struct inode *inode = private_data;
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
- blk_status_t ret;
-
- ret = btrfs_map_bio(fs_info, bio, mirror_num, 1);
- if (ret) {
- bio->bi_status = ret;
- bio_endio(bio);
- }
- return ret;
-}
-
-/*
* extent_io.c submission hook. This does the right thing for csum calculation
* on write, or reading the csums from the tree before a read.
*
@@ -1973,11 +2062,11 @@
*
* c-3) otherwise: async submit
*/
-static blk_status_t btrfs_submit_bio_hook(void *private_data, struct bio *bio,
- int mirror_num, unsigned long bio_flags,
- u64 bio_offset)
+static blk_status_t btrfs_submit_bio_hook(struct inode *inode, struct bio *bio,
+ int mirror_num,
+ unsigned long bio_flags)
+
{
- struct inode *inode = private_data;
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct btrfs_root *root = BTRFS_I(inode)->root;
enum btrfs_wq_endio_type metadata = BTRFS_WQ_ENDIO_DATA;
@@ -2012,8 +2101,7 @@
goto mapit;
/* we're doing a write, do the async checksumming */
ret = btrfs_wq_submit_bio(fs_info, bio, mirror_num, bio_flags,
- bio_offset, inode,
- btrfs_submit_bio_start);
+ 0, inode, btrfs_submit_bio_start);
goto out;
} else if (!skip_sum) {
ret = btrfs_csum_one_bio(inode, bio, 0, 0);
@@ -2055,9 +2143,9 @@
int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end,
unsigned int extra_bits,
- struct extent_state **cached_state, int dedupe)
+ struct extent_state **cached_state)
{
- WARN_ON((end & (PAGE_SIZE - 1)) == 0);
+ WARN_ON(PAGE_ALIGNED(end));
return set_extent_delalloc(&BTRFS_I(inode)->io_tree, start, end,
extra_bits, cached_state);
}
@@ -2121,7 +2209,7 @@
}
ret = btrfs_set_extent_delalloc(inode, page_start, page_end, 0,
- &cached_state, 0);
+ &cached_state);
if (ret) {
mapping_set_error(page->mapping, ret);
end_extent_writepage(page, ret, page_start, page_end);
@@ -2131,7 +2219,7 @@
ClearPageChecked(page);
set_page_dirty(page);
- btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE, false);
+ btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE);
out:
unlock_extent_cached(&BTRFS_I(inode)->io_tree, page_start, page_end,
&cached_state);
@@ -2153,7 +2241,7 @@
* to fix it up. The async helper will wait for ordered extents, set
* the delalloc bit and make it safe to write the page.
*/
-static int btrfs_writepage_start_hook(struct page *page, u64 start, u64 end)
+int btrfs_writepage_cow_fixup(struct page *page, u64 start, u64 end)
{
struct inode *inode = page->mapping->host;
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
@@ -2540,6 +2628,7 @@
struct btrfs_file_extent_item *item;
struct btrfs_ordered_extent *ordered;
struct btrfs_trans_handle *trans;
+ struct btrfs_ref ref = { 0 };
struct btrfs_root *root;
struct btrfs_key key;
struct extent_buffer *leaf;
@@ -2710,10 +2799,11 @@
inode_add_bytes(inode, len);
btrfs_release_path(path);
- ret = btrfs_inc_extent_ref(trans, root, new->bytenr,
- new->disk_len, 0,
- backref->root_id, backref->inum,
- new->file_pos); /* start - extent_offset */
+ btrfs_init_generic_ref(&ref, BTRFS_ADD_DELAYED_REF, new->bytenr,
+ new->disk_len, 0);
+ btrfs_init_data_ref(&ref, backref->root_id, backref->inum,
+ new->file_pos); /* start - extent_offset */
+ ret = btrfs_inc_extent_ref(trans, &ref);
if (ret) {
btrfs_abort_transaction(trans, ret);
goto out_free_path;
@@ -2750,12 +2840,9 @@
struct btrfs_path *path;
struct sa_defrag_extent_backref *backref;
struct sa_defrag_extent_backref *prev = NULL;
- struct inode *inode;
struct rb_node *node;
int ret;
- inode = new->inode;
-
path = btrfs_alloc_path();
if (!path)
return;
@@ -3150,9 +3237,6 @@
/* once for the tree */
btrfs_put_ordered_extent(ordered_extent);
- /* Try to release some metadata so we don't get an OOM but don't wait */
- btrfs_btree_balance_dirty_nodelay(fs_info);
-
return ret;
}
@@ -3163,8 +3247,8 @@
btrfs_finish_ordered_io(ordered_extent);
}
-static void btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end,
- struct extent_state *state, int uptodate)
+void btrfs_writepage_endio_finish_ordered(struct page *page, u64 start,
+ u64 end, int uptodate)
{
struct inode *inode = page->mapping->host;
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
@@ -3197,16 +3281,23 @@
int icsum, struct page *page,
int pgoff, u64 start, size_t len)
{
+ struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
char *kaddr;
- u32 csum_expected;
- u32 csum = ~(u32)0;
+ u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
+ u8 *csum_expected;
+ u8 csum[BTRFS_CSUM_SIZE];
- csum_expected = *(((u32 *)io_bio->csum) + icsum);
+ csum_expected = ((u8 *)io_bio->csum) + icsum * csum_size;
kaddr = kmap_atomic(page);
- csum = btrfs_csum_data(kaddr + pgoff, csum, len);
- btrfs_csum_final(csum, (u8 *)&csum);
- if (csum != csum_expected)
+ shash->tfm = fs_info->csum_shash;
+
+ crypto_shash_init(shash);
+ crypto_shash_update(shash, kaddr + pgoff, len);
+ crypto_shash_final(shash, csum);
+
+ if (memcmp(csum, csum_expected, csum_size))
goto zeroit;
kunmap_atomic(kaddr);
@@ -3271,10 +3362,35 @@
if (atomic_add_unless(&inode->i_count, -1, 1))
return;
+ atomic_inc(&fs_info->nr_delayed_iputs);
spin_lock(&fs_info->delayed_iput_lock);
ASSERT(list_empty(&binode->delayed_iput));
list_add_tail(&binode->delayed_iput, &fs_info->delayed_iputs);
spin_unlock(&fs_info->delayed_iput_lock);
+ if (!test_bit(BTRFS_FS_CLEANER_RUNNING, &fs_info->flags))
+ wake_up_process(fs_info->cleaner_kthread);
+}
+
+static void run_delayed_iput_locked(struct btrfs_fs_info *fs_info,
+ struct btrfs_inode *inode)
+{
+ list_del_init(&inode->delayed_iput);
+ spin_unlock(&fs_info->delayed_iput_lock);
+ iput(&inode->vfs_inode);
+ if (atomic_dec_and_test(&fs_info->nr_delayed_iputs))
+ wake_up(&fs_info->delayed_iputs_wait);
+ spin_lock(&fs_info->delayed_iput_lock);
+}
+
+static void btrfs_run_delayed_iput(struct btrfs_fs_info *fs_info,
+ struct btrfs_inode *inode)
+{
+ if (!list_empty(&inode->delayed_iput)) {
+ spin_lock(&fs_info->delayed_iput_lock);
+ if (!list_empty(&inode->delayed_iput))
+ run_delayed_iput_locked(fs_info, inode);
+ spin_unlock(&fs_info->delayed_iput_lock);
+ }
}
void btrfs_run_delayed_iputs(struct btrfs_fs_info *fs_info)
@@ -3286,14 +3402,30 @@
inode = list_first_entry(&fs_info->delayed_iputs,
struct btrfs_inode, delayed_iput);
- list_del_init(&inode->delayed_iput);
- spin_unlock(&fs_info->delayed_iput_lock);
- iput(&inode->vfs_inode);
- spin_lock(&fs_info->delayed_iput_lock);
+ run_delayed_iput_locked(fs_info, inode);
}
spin_unlock(&fs_info->delayed_iput_lock);
}
+/**
+ * btrfs_wait_on_delayed_iputs - wait on the delayed iputs to be done running
+ * @fs_info - the fs_info for this fs
+ * @return - EINTR if we were killed, 0 if nothing's pending
+ *
+ * This will wait on any delayed iputs that are currently running with KILLABLE
+ * set. Once they are all done running we will return, unless we are killed in
+ * which case we return EINTR. This helps in user operations like fallocate etc
+ * that might get blocked on the iputs.
+ */
+int btrfs_wait_on_delayed_iputs(struct btrfs_fs_info *fs_info)
+{
+ int ret = wait_event_killable(fs_info->delayed_iputs_wait,
+ atomic_read(&fs_info->nr_delayed_iputs) == 0);
+ if (ret)
+ return -EINTR;
+ return 0;
+}
+
/*
* This creates an orphan entry for the given inode in case something goes wrong
* in the middle of an unlink.
@@ -3479,8 +3611,6 @@
/* this will do delete_inode and everything for us */
iput(inode);
- if (ret)
- goto out;
}
/* release the path since we're done with it */
btrfs_release_path(path);
@@ -3575,10 +3705,11 @@
/*
* read an inode from the btree into the in-memory inode
*/
-static int btrfs_read_locked_inode(struct inode *inode)
+static int btrfs_read_locked_inode(struct inode *inode,
+ struct btrfs_path *in_path)
{
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
- struct btrfs_path *path;
+ struct btrfs_path *path = in_path;
struct extent_buffer *leaf;
struct btrfs_inode_item *inode_item;
struct btrfs_root *root = BTRFS_I(inode)->root;
@@ -3594,15 +3725,18 @@
if (!ret)
filled = true;
- path = btrfs_alloc_path();
- if (!path)
- return -ENOMEM;
+ if (!path) {
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+ }
memcpy(&location, &BTRFS_I(inode)->location, sizeof(location));
ret = btrfs_lookup_inode(NULL, root, path, &location, 0);
if (ret) {
- btrfs_free_path(path);
+ if (path != in_path)
+ btrfs_free_path(path);
return ret;
}
@@ -3727,7 +3861,8 @@
btrfs_ino(BTRFS_I(inode)),
root->root_key.objectid, ret);
}
- btrfs_free_path(path);
+ if (path != in_path)
+ btrfs_free_path(path);
if (!maybe_acls)
cache_no_acl(inode);
@@ -3746,7 +3881,7 @@
case S_IFLNK:
inode->i_op = &btrfs_symlink_inode_operations;
inode_nohighmem(inode);
- inode->i_mapping->a_ops = &btrfs_symlink_aops;
+ inode->i_mapping->a_ops = &btrfs_aops;
break;
default:
inode->i_op = &btrfs_special_inode_operations;
@@ -3768,7 +3903,7 @@
{
struct btrfs_map_token token;
- btrfs_init_map_token(&token);
+ btrfs_init_map_token(&token, leaf);
btrfs_set_token_inode_uid(leaf, item, i_uid_read(inode), &token);
btrfs_set_token_inode_gid(leaf, item, i_gid_read(inode), &token);
@@ -3902,9 +4037,7 @@
struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_path *path;
int ret = 0;
- struct extent_buffer *leaf;
struct btrfs_dir_item *di;
- struct btrfs_key key;
u64 index;
u64 ino = btrfs_ino(inode);
u64 dir_ino = btrfs_ino(dir);
@@ -3918,16 +4051,10 @@
path->leave_spinning = 1;
di = btrfs_lookup_dir_item(trans, root, path, dir_ino,
name, name_len, -1);
- if (IS_ERR(di)) {
- ret = PTR_ERR(di);
+ if (IS_ERR_OR_NULL(di)) {
+ ret = di ? PTR_ERR(di) : -ENOENT;
goto err;
}
- if (!di) {
- ret = -ENOENT;
- goto err;
- }
- leaf = path->nodes[0];
- btrfs_dir_item_key_to_cpu(leaf, di, &key);
ret = btrfs_delete_one_dir_name(trans, root, path, di);
if (ret)
goto err;
@@ -3980,6 +4107,17 @@
ret = 0;
else if (ret)
btrfs_abort_transaction(trans, ret);
+
+ /*
+ * If we have a pending delayed iput we could end up with the final iput
+ * being run in btrfs-cleaner context. If we have enough of these built
+ * up we can end up burning a lot of time in btrfs-cleaner without any
+ * way to throttle the unlinks. Since we're currently holding a ref on
+ * the inode we can run the delayed iput here without any issues as the
+ * final iput won't be done until after we drop the ref we're currently
+ * holding.
+ */
+ btrfs_run_delayed_iput(fs_info, inode);
err:
btrfs_free_path(path);
if (ret)
@@ -4083,10 +4221,7 @@
di = btrfs_lookup_dir_item(trans, root, path, dir_ino,
name, name_len, -1);
if (IS_ERR_OR_NULL(di)) {
- if (!di)
- ret = -ENOENT;
- else
- ret = PTR_ERR(di);
+ ret = di ? PTR_ERR(di) : -ENOENT;
goto out;
}
@@ -4278,18 +4413,17 @@
* again is not run concurrently.
*/
spin_lock(&dest->root_item_lock);
- root_flags = btrfs_root_flags(&dest->root_item);
- if (dest->send_in_progress == 0) {
- btrfs_set_root_flags(&dest->root_item,
- root_flags | BTRFS_ROOT_SUBVOL_DEAD);
- spin_unlock(&dest->root_item_lock);
- } else {
+ if (dest->send_in_progress) {
spin_unlock(&dest->root_item_lock);
btrfs_warn(fs_info,
"attempt to delete subvolume %llu during send",
dest->root_key.objectid);
return -EPERM;
}
+ root_flags = btrfs_root_flags(&dest->root_item);
+ btrfs_set_root_flags(&dest->root_item,
+ root_flags | BTRFS_ROOT_SUBVOL_DEAD);
+ spin_unlock(&dest->root_item_lock);
down_write(&fs_info->subvol_sem);
@@ -4453,31 +4587,6 @@
return err;
}
-static int truncate_space_check(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- u64 bytes_deleted)
-{
- struct btrfs_fs_info *fs_info = root->fs_info;
- int ret;
-
- /*
- * This is only used to apply pressure to the enospc system, we don't
- * intend to use this reservation at all.
- */
- bytes_deleted = btrfs_csum_bytes_to_leaves(fs_info, bytes_deleted);
- bytes_deleted *= fs_info->nodesize;
- ret = btrfs_block_rsv_add(root, &fs_info->trans_block_rsv,
- bytes_deleted, BTRFS_RESERVE_NO_FLUSH);
- if (!ret) {
- trace_btrfs_space_reservation(fs_info, "transaction",
- trans->transid,
- bytes_deleted, 1);
- trans->bytes_reserved += bytes_deleted;
- }
- return ret;
-
-}
-
/*
* Return this if we need to call truncate_block for the last bit of the
* truncate.
@@ -4522,7 +4631,6 @@
u64 bytes_deleted = 0;
bool be_nice = false;
bool should_throttle = false;
- bool should_end = false;
BUG_ON(new_size > 0 && min_type != BTRFS_EXTENT_DATA_KEY);
@@ -4553,7 +4661,7 @@
/*
* This function is also used to drop the items in the log tree before
* we relog the inode, so if root != BTRFS_I(inode)->root, it means
- * it is used to drop the loged items. So we shouldn't kill the delayed
+ * it is used to drop the logged items. So we shouldn't kill the delayed
* items.
*/
if (min_type == 0 && root == BTRFS_I(inode)->root)
@@ -4686,7 +4794,7 @@
btrfs_set_file_extent_ram_bytes(leaf, fi, size);
size = btrfs_file_extent_calc_inline_size(size);
- btrfs_truncate_item(root->fs_info, path, size, 1);
+ btrfs_truncate_item(path, size, 1);
} else if (!del_item) {
/*
* We have to bail so the last_size is set to
@@ -4725,27 +4833,23 @@
if (found_extent &&
(test_bit(BTRFS_ROOT_REF_COWS, &root->state) ||
root == fs_info->tree_root)) {
+ struct btrfs_ref ref = { 0 };
+
btrfs_set_path_blocking(path);
bytes_deleted += extent_num_bytes;
- ret = btrfs_free_extent(trans, root, extent_start,
- extent_num_bytes, 0,
- btrfs_header_owner(leaf),
- ino, extent_offset);
+
+ btrfs_init_generic_ref(&ref, BTRFS_DROP_DELAYED_REF,
+ extent_start, extent_num_bytes, 0);
+ ref.real_root = root->root_key.objectid;
+ btrfs_init_data_ref(&ref, btrfs_header_owner(leaf),
+ ino, extent_offset);
+ ret = btrfs_free_extent(trans, &ref);
if (ret) {
btrfs_abort_transaction(trans, ret);
break;
}
- if (btrfs_should_throttle_delayed_refs(trans, fs_info))
- btrfs_async_run_delayed_refs(fs_info,
- trans->delayed_ref_updates * 2,
- trans->transid, 0);
if (be_nice) {
- if (truncate_space_check(trans, root,
- extent_num_bytes)) {
- should_end = true;
- }
- if (btrfs_should_throttle_delayed_refs(trans,
- fs_info))
+ if (btrfs_should_throttle_delayed_refs(trans))
should_throttle = true;
}
}
@@ -4755,7 +4859,7 @@
if (path->slots[0] == 0 ||
path->slots[0] != pending_del_slot ||
- should_throttle || should_end) {
+ should_throttle) {
if (pending_del_nr) {
ret = btrfs_del_items(trans, root, path,
pending_del_slot,
@@ -4767,23 +4871,24 @@
pending_del_nr = 0;
}
btrfs_release_path(path);
- if (should_throttle) {
- unsigned long updates = trans->delayed_ref_updates;
- if (updates) {
- trans->delayed_ref_updates = 0;
- ret = btrfs_run_delayed_refs(trans,
- updates * 2);
- if (ret)
- break;
- }
- }
+
/*
- * if we failed to refill our space rsv, bail out
- * and let the transaction restart
+ * We can generate a lot of delayed refs, so we need to
+ * throttle every once and a while and make sure we're
+ * adding enough space to keep up with the work we are
+ * generating. Since we hold a transaction here we
+ * can't flush, and we don't want to FLUSH_LIMIT because
+ * we could have generated too many delayed refs to
+ * actually allocate, so just bail if we're short and
+ * let the normal reservation dance happen higher up.
*/
- if (should_end) {
- ret = -EAGAIN;
- break;
+ if (should_throttle) {
+ ret = btrfs_delayed_refs_rsv_refill(fs_info,
+ BTRFS_RESERVE_NO_FLUSH);
+ if (ret) {
+ ret = -EAGAIN;
+ break;
+ }
}
goto search_again;
} else {
@@ -4809,18 +4914,6 @@
}
btrfs_free_path(path);
-
- if (be_nice && bytes_deleted > SZ_32M && (ret >= 0 || ret == -EAGAIN)) {
- unsigned long updates = trans->delayed_ref_updates;
- int err;
-
- if (updates) {
- trans->delayed_ref_updates = 0;
- err = btrfs_run_delayed_refs(trans, updates * 2);
- if (err)
- ret = err;
- }
- }
return ret;
}
@@ -4871,7 +4964,7 @@
if (!page) {
btrfs_delalloc_release_space(inode, data_reserved,
block_start, blocksize, true);
- btrfs_delalloc_release_extents(BTRFS_I(inode), blocksize, true);
+ btrfs_delalloc_release_extents(BTRFS_I(inode), blocksize);
ret = -ENOMEM;
goto out;
}
@@ -4906,12 +4999,11 @@
}
clear_extent_bit(&BTRFS_I(inode)->io_tree, block_start, block_end,
- EXTENT_DIRTY | EXTENT_DELALLOC |
- EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG,
- 0, 0, &cached_state);
+ EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG,
+ 0, 0, &cached_state);
ret = btrfs_set_extent_delalloc(inode, block_start, block_end, 0,
- &cached_state, 0);
+ &cached_state);
if (ret) {
unlock_extent_cached(io_tree, block_start, block_end,
&cached_state);
@@ -4939,7 +5031,7 @@
if (ret)
btrfs_delalloc_release_space(inode, data_reserved, block_start,
blocksize, true);
- btrfs_delalloc_release_extents(BTRFS_I(inode), blocksize, (ret != 0));
+ btrfs_delalloc_release_extents(BTRFS_I(inode), blocksize);
unlock_page(page);
put_page(page);
out:
@@ -5024,21 +5116,8 @@
if (size <= hole_start)
return 0;
- while (1) {
- struct btrfs_ordered_extent *ordered;
-
- lock_extent_bits(io_tree, hole_start, block_end - 1,
- &cached_state);
- ordered = btrfs_lookup_ordered_range(BTRFS_I(inode), hole_start,
- block_end - hole_start);
- if (!ordered)
- break;
- unlock_extent_cached(io_tree, hole_start, block_end - 1,
- &cached_state);
- btrfs_start_ordered_extent(inode, ordered, 1);
- btrfs_put_ordered_extent(ordered);
- }
-
+ btrfs_lock_and_flush_ordered_range(io_tree, BTRFS_I(inode), hole_start,
+ block_end - 1, &cached_state);
cur_offset = hole_start;
while (1) {
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, cur_offset,
@@ -5165,7 +5244,7 @@
truncate_setsize(inode, newsize);
- /* Disable nonlocked read DIO to avoid the end less truncate */
+ /* Disable nonlocked read DIO to avoid the endless truncate */
btrfs_inode_block_unlocked_dio(BTRFS_I(inode));
inode_dio_wait(inode);
btrfs_inode_resume_unlocked_dio(BTRFS_I(inode));
@@ -5243,10 +5322,10 @@
truncate_inode_pages_final(&inode->i_data);
write_lock(&map_tree->lock);
- while (!RB_EMPTY_ROOT(&map_tree->map)) {
+ while (!RB_EMPTY_ROOT(&map_tree->map.rb_root)) {
struct extent_map *em;
- node = rb_first(&map_tree->map);
+ node = rb_first_cached(&map_tree->map);
em = rb_entry(node, struct extent_map, rb_node);
clear_bit(EXTENT_FLAG_PINNED, &em->flags);
clear_bit(EXTENT_FLAG_LOGGING, &em->flags);
@@ -5305,9 +5384,9 @@
btrfs_qgroup_free_data(inode, NULL, start, end - start + 1);
clear_extent_bit(io_tree, start, end,
- EXTENT_LOCKED | EXTENT_DIRTY |
- EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING |
- EXTENT_DEFRAG, 1, 1, &cached_state);
+ EXTENT_LOCKED | EXTENT_DELALLOC |
+ EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, 1, 1,
+ &cached_state);
cond_resched();
spin_lock(&io_tree->lock);
@@ -5316,43 +5395,54 @@
}
static struct btrfs_trans_handle *evict_refill_and_join(struct btrfs_root *root,
- struct btrfs_block_rsv *rsv,
- u64 min_size)
+ struct btrfs_block_rsv *rsv)
{
struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
- int failures = 0;
+ struct btrfs_trans_handle *trans;
+ u64 delayed_refs_extra = btrfs_calc_insert_metadata_size(fs_info, 1);
+ int ret;
- for (;;) {
- struct btrfs_trans_handle *trans;
- int ret;
-
- ret = btrfs_block_rsv_refill(root, rsv, min_size,
- BTRFS_RESERVE_FLUSH_LIMIT);
-
- if (ret && ++failures > 2) {
- btrfs_warn(fs_info,
- "could not allocate space for a delete; will truncate on mount");
- return ERR_PTR(-ENOSPC);
- }
-
- trans = btrfs_join_transaction(root);
- if (IS_ERR(trans) || !ret)
- return trans;
-
+ /*
+ * Eviction should be taking place at some place safe because of our
+ * delayed iputs. However the normal flushing code will run delayed
+ * iputs, so we cannot use FLUSH_ALL otherwise we'll deadlock.
+ *
+ * We reserve the delayed_refs_extra here again because we can't use
+ * btrfs_start_transaction(root, 0) for the same deadlocky reason as
+ * above. We reserve our extra bit here because we generate a ton of
+ * delayed refs activity by truncating.
+ *
+ * If we cannot make our reservation we'll attempt to steal from the
+ * global reserve, because we really want to be able to free up space.
+ */
+ ret = btrfs_block_rsv_refill(root, rsv, rsv->size + delayed_refs_extra,
+ BTRFS_RESERVE_FLUSH_EVICT);
+ if (ret) {
/*
* Try to steal from the global reserve if there is space for
* it.
*/
- if (!btrfs_check_space_for_delayed_refs(trans, fs_info) &&
- !btrfs_block_rsv_migrate(global_rsv, rsv, min_size, 0))
- return trans;
-
- /* If not, commit and try again. */
- ret = btrfs_commit_transaction(trans);
- if (ret)
- return ERR_PTR(ret);
+ if (btrfs_check_space_for_delayed_refs(fs_info) ||
+ btrfs_block_rsv_migrate(global_rsv, rsv, rsv->size, 0)) {
+ btrfs_warn(fs_info,
+ "could not allocate space for delete; will truncate on mount");
+ return ERR_PTR(-ENOSPC);
+ }
+ delayed_refs_extra = 0;
}
+
+ trans = btrfs_join_transaction(root);
+ if (IS_ERR(trans))
+ return trans;
+
+ if (delayed_refs_extra) {
+ trans->block_rsv = &fs_info->trans_block_rsv;
+ trans->bytes_reserved = delayed_refs_extra;
+ btrfs_block_rsv_migrate(rsv, trans->block_rsv,
+ delayed_refs_extra, 1);
+ }
+ return trans;
}
void btrfs_evict_inode(struct inode *inode)
@@ -5361,7 +5451,6 @@
struct btrfs_trans_handle *trans;
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_block_rsv *rsv;
- u64 min_size;
int ret;
trace_btrfs_inode_evict(inode);
@@ -5371,8 +5460,6 @@
return;
}
- min_size = btrfs_calc_trunc_metadata_size(fs_info, 1);
-
evict_inode_truncate_pages(inode);
if (inode->i_nlink &&
@@ -5383,9 +5470,6 @@
if (is_bad_inode(inode))
goto no_delete;
- /* do we really want it for ->i_nlink > 0 and zero btrfs_root_refs? */
- if (!special_file(inode->i_mode))
- btrfs_wait_ordered_range(inode, 0, (u64)-1);
btrfs_free_io_failure_record(BTRFS_I(inode), 0, (u64)-1);
@@ -5405,13 +5489,13 @@
rsv = btrfs_alloc_block_rsv(fs_info, BTRFS_BLOCK_RSV_TEMP);
if (!rsv)
goto no_delete;
- rsv->size = min_size;
+ rsv->size = btrfs_calc_metadata_size(fs_info, 1);
rsv->failfast = 1;
btrfs_i_size_write(BTRFS_I(inode), 0);
while (1) {
- trans = evict_refill_and_join(root, rsv, min_size);
+ trans = evict_refill_and_join(root, rsv);
if (IS_ERR(trans))
goto free_rsv;
@@ -5436,7 +5520,7 @@
* If it turns out that we are dropping too many of these, we might want
* to add a mechanism for retrying these after a commit.
*/
- trans = evict_refill_and_join(root, rsv, min_size);
+ trans = evict_refill_and_join(root, rsv);
if (!IS_ERR(trans)) {
trans->block_rsv = rsv;
btrfs_orphan_del(trans, BTRFS_I(inode));
@@ -5461,12 +5545,14 @@
}
/*
- * this returns the key found in the dir entry in the location pointer.
+ * Return the key found in the dir entry in the location pointer, fill @type
+ * with BTRFS_FT_*, and return 0.
+ *
* If no dir entries were found, returns -ENOENT.
* If found a corrupted location in dir entry, returns -EUCLEAN.
*/
static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry,
- struct btrfs_key *location)
+ struct btrfs_key *location, u8 *type)
{
const char *name = dentry->d_name.name;
int namelen = dentry->d_name.len;
@@ -5481,12 +5567,8 @@
di = btrfs_lookup_dir_item(NULL, root, path, btrfs_ino(BTRFS_I(dir)),
name, namelen, 0);
- if (!di) {
- ret = -ENOENT;
- goto out;
- }
- if (IS_ERR(di)) {
- ret = PTR_ERR(di);
+ if (IS_ERR_OR_NULL(di)) {
+ ret = di ? PTR_ERR(di) : -ENOENT;
goto out;
}
@@ -5499,6 +5581,8 @@
__func__, name, btrfs_ino(BTRFS_I(dir)),
location->objectid, location->type, location->offset);
}
+ if (!ret)
+ *type = btrfs_dir_type(path->nodes[0], di);
out:
btrfs_free_path(path);
return ret;
@@ -5669,8 +5753,9 @@
/* Get an inode object given its location and corresponding root.
* Returns in *is_new if the inode was read from disk
*/
-struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
- struct btrfs_root *root, int *new)
+struct inode *btrfs_iget_path(struct super_block *s, struct btrfs_key *location,
+ struct btrfs_root *root, int *new,
+ struct btrfs_path *path)
{
struct inode *inode;
@@ -5681,7 +5766,7 @@
if (inode->i_state & I_NEW) {
int ret;
- ret = btrfs_read_locked_inode(inode);
+ ret = btrfs_read_locked_inode(inode, path);
if (!ret) {
inode_tree_add(inode);
unlock_new_inode(inode);
@@ -5703,6 +5788,12 @@
return inode;
}
+struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
+ struct btrfs_root *root, int *new)
+{
+ return btrfs_iget_path(s, location, root, new, NULL);
+}
+
static struct inode *new_simple_dir(struct super_block *s,
struct btrfs_key *key,
struct btrfs_root *root)
@@ -5729,6 +5820,24 @@
return inode;
}
+static inline u8 btrfs_inode_type(struct inode *inode)
+{
+ /*
+ * Compile-time asserts that generic FT_* types still match
+ * BTRFS_FT_* types
+ */
+ BUILD_BUG_ON(BTRFS_FT_UNKNOWN != FT_UNKNOWN);
+ BUILD_BUG_ON(BTRFS_FT_REG_FILE != FT_REG_FILE);
+ BUILD_BUG_ON(BTRFS_FT_DIR != FT_DIR);
+ BUILD_BUG_ON(BTRFS_FT_CHRDEV != FT_CHRDEV);
+ BUILD_BUG_ON(BTRFS_FT_BLKDEV != FT_BLKDEV);
+ BUILD_BUG_ON(BTRFS_FT_FIFO != FT_FIFO);
+ BUILD_BUG_ON(BTRFS_FT_SOCK != FT_SOCK);
+ BUILD_BUG_ON(BTRFS_FT_SYMLINK != FT_SYMLINK);
+
+ return fs_umode_to_ftype(inode->i_mode);
+}
+
struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)
{
struct btrfs_fs_info *fs_info = btrfs_sb(dir->i_sb);
@@ -5736,18 +5845,31 @@
struct btrfs_root *root = BTRFS_I(dir)->root;
struct btrfs_root *sub_root = root;
struct btrfs_key location;
+ u8 di_type = 0;
int index;
int ret = 0;
if (dentry->d_name.len > BTRFS_NAME_LEN)
return ERR_PTR(-ENAMETOOLONG);
- ret = btrfs_inode_by_name(dir, dentry, &location);
+ ret = btrfs_inode_by_name(dir, dentry, &location, &di_type);
if (ret < 0)
return ERR_PTR(ret);
if (location.type == BTRFS_INODE_ITEM_KEY) {
inode = btrfs_iget(dir->i_sb, &location, root, NULL);
+ if (IS_ERR(inode))
+ return inode;
+
+ /* Do extra check against inode mode with di_type */
+ if (btrfs_inode_type(inode) != di_type) {
+ btrfs_crit(fs_info,
+"inode mode mismatch with dir: inode mode=0%o btrfs type=%u dir type=%u",
+ inode->i_mode, btrfs_inode_type(inode),
+ di_type);
+ iput(inode);
+ return ERR_PTR(-EUCLEAN);
+ }
return inode;
}
@@ -5800,23 +5922,13 @@
static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry,
unsigned int flags)
{
- struct inode *inode;
+ struct inode *inode = btrfs_lookup_dentry(dir, dentry);
- inode = btrfs_lookup_dentry(dir, dentry);
- if (IS_ERR(inode)) {
- if (PTR_ERR(inode) == -ENOENT)
- inode = NULL;
- else
- return ERR_CAST(inode);
- }
-
+ if (inode == ERR_PTR(-ENOENT))
+ inode = NULL;
return d_splice_alias(inode, dentry);
}
-unsigned char btrfs_filetype_table[] = {
- DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK
-};
-
/*
* All this infrastructure exists because dir_emit can fault, and we are holding
* the tree lock when doing readdir. For now just allocate a buffer and copy
@@ -5955,7 +6067,7 @@
name_ptr = (char *)(entry + 1);
read_extent_buffer(leaf, name_ptr, (unsigned long)(di + 1),
name_len);
- put_unaligned(btrfs_filetype_table[btrfs_dir_type(leaf, di)],
+ put_unaligned(fs_ftype_to_dtype(btrfs_dir_type(leaf, di)),
&entry->type);
btrfs_dir_item_key_to_cpu(leaf, di, &location);
put_unaligned(location.objectid, &entry->ino);
@@ -6206,13 +6318,16 @@
u32 sizes[2];
int nitems = name ? 2 : 1;
unsigned long ptr;
+ unsigned int nofs_flag;
int ret;
path = btrfs_alloc_path();
if (!path)
return ERR_PTR(-ENOMEM);
+ nofs_flag = memalloc_nofs_save();
inode = new_inode(fs_info->sb);
+ memalloc_nofs_restore(nofs_flag);
if (!inode) {
btrfs_free_path(path);
return ERR_PTR(-ENOMEM);
@@ -6358,11 +6473,6 @@
return ERR_PTR(ret);
}
-static inline u8 btrfs_inode_type(struct inode *inode)
-{
- return btrfs_type_by_mode[(inode->i_mode & S_IFMT) >> S_SHIFT];
-}
-
/*
* utility function to add 'inode' into 'parent_inode' with
* a give name and a given sequence number.
@@ -6400,8 +6510,7 @@
if (ret)
return ret;
- ret = btrfs_insert_dir_item(trans, root, name, name_len,
- parent_inode, &key,
+ ret = btrfs_insert_dir_item(trans, name, name_len, parent_inode, &key,
btrfs_inode_type(&inode->vfs_inode), index);
if (ret == -EEXIST || ret == -EOVERFLOW)
goto fail_dir_item;
@@ -6413,8 +6522,18 @@
btrfs_i_size_write(parent_inode, parent_inode->vfs_inode.i_size +
name_len * 2);
inode_inc_iversion(&parent_inode->vfs_inode);
- parent_inode->vfs_inode.i_mtime = parent_inode->vfs_inode.i_ctime =
- current_time(&parent_inode->vfs_inode);
+ /*
+ * If we are replaying a log tree, we do not want to update the mtime
+ * and ctime of the parent directory with the current time, since the
+ * log replay procedure is responsible for setting them to their correct
+ * values (the ones it had when the fsync was done).
+ */
+ if (!test_bit(BTRFS_FS_LOG_RECOVERING, &root->fs_info->flags)) {
+ struct timespec64 now = current_time(&parent_inode->vfs_inode);
+
+ parent_inode->vfs_inode.i_mtime = now;
+ parent_inode->vfs_inode.i_ctime = now;
+ }
ret = btrfs_update_inode(trans, root, &parent_inode->vfs_inode);
if (ret)
btrfs_abort_transaction(trans, ret);
@@ -6427,14 +6546,19 @@
err = btrfs_del_root_ref(trans, key.objectid,
root->root_key.objectid, parent_ino,
&local_index, name, name_len);
-
+ if (err)
+ btrfs_abort_transaction(trans, err);
} else if (add_backref) {
u64 local_index;
int err;
err = btrfs_del_inode_ref(trans, root, name, name_len,
ino, parent_ino, &local_index);
+ if (err)
+ btrfs_abort_transaction(trans, err);
}
+
+ /* Return the original error code */
return ret;
}
@@ -6594,7 +6718,7 @@
int drop_inode = 0;
/* do not allow sys_link's with other subvols of the same device */
- if (root->objectid != BTRFS_I(inode)->root->objectid)
+ if (root->root_key.objectid != BTRFS_I(inode)->root->root_key.objectid)
return -EXDEV;
if (inode->i_nlink >= BTRFS_LINK_MAX)
@@ -6673,7 +6797,6 @@
struct btrfs_trans_handle *trans;
struct btrfs_root *root = BTRFS_I(dir)->root;
int err = 0;
- int drop_on_err = 0;
u64 objectid = 0;
u64 index = 0;
@@ -6699,7 +6822,6 @@
goto out_fail;
}
- drop_on_err = 1;
/* these must be set before we unlock the inode */
inode->i_op = &btrfs_dir_inode_operations;
inode->i_fop = &btrfs_dir_file_operations;
@@ -6720,7 +6842,6 @@
goto out_fail;
d_instantiate_new(dentry, inode);
- drop_on_err = 0;
out_fail:
btrfs_end_transaction(trans);
@@ -6787,9 +6908,9 @@
* This also copies inline extents directly into the page.
*/
struct extent_map *btrfs_get_extent(struct btrfs_inode *inode,
- struct page *page,
- size_t pg_offset, u64 start, u64 len,
- int create)
+ struct page *page,
+ size_t pg_offset, u64 start, u64 len,
+ int create)
{
struct btrfs_fs_info *fs_info = inode->root->fs_info;
int ret;
@@ -6797,7 +6918,7 @@
u64 extent_start = 0;
u64 extent_end = 0;
u64 objectid = btrfs_ino(inode);
- u32 found_type;
+ int extent_type = -1;
struct btrfs_path *path = NULL;
struct btrfs_root *root = inode->root;
struct btrfs_file_extent_item *item;
@@ -6833,26 +6954,26 @@
em->len = (u64)-1;
em->block_len = (u64)-1;
+ path = btrfs_alloc_path();
if (!path) {
- path = btrfs_alloc_path();
- if (!path) {
- err = -ENOMEM;
- goto out;
- }
- /*
- * Chances are we'll be called again, so go ahead and do
- * readahead
- */
- path->reada = READA_FORWARD;
+ err = -ENOMEM;
+ goto out;
}
+ /* Chances are we'll be called again, so go ahead and do readahead */
+ path->reada = READA_FORWARD;
+
+ /*
+ * Unless we're going to uncompress the inline extent, no sleep would
+ * happen.
+ */
+ path->leave_spinning = 1;
+
ret = btrfs_lookup_file_extent(NULL, root, path, objectid, start, 0);
if (ret < 0) {
err = ret;
goto out;
- }
-
- if (ret != 0) {
+ } else if (ret > 0) {
if (path->slots[0] == 0)
goto not_found;
path->slots[0]--;
@@ -6861,11 +6982,9 @@
leaf = path->nodes[0];
item = btrfs_item_ptr(leaf, path->slots[0],
struct btrfs_file_extent_item);
- /* are we inside the extent that was found? */
btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
- found_type = found_key.type;
if (found_key.objectid != objectid ||
- found_type != BTRFS_EXTENT_DATA_KEY) {
+ found_key.type != BTRFS_EXTENT_DATA_KEY) {
/*
* If we backup past the first extent we want to move forward
* and see if there is an extent in front of us, otherwise we'll
@@ -6876,16 +6995,24 @@
goto next;
}
- found_type = btrfs_file_extent_type(leaf, item);
+ extent_type = btrfs_file_extent_type(leaf, item);
extent_start = found_key.offset;
- if (found_type == BTRFS_FILE_EXTENT_REG ||
- found_type == BTRFS_FILE_EXTENT_PREALLOC) {
+ if (extent_type == BTRFS_FILE_EXTENT_REG ||
+ extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
+ /* Only regular file could have regular/prealloc extent */
+ if (!S_ISREG(inode->vfs_inode.i_mode)) {
+ ret = -EUCLEAN;
+ btrfs_crit(fs_info,
+ "regular/prealloc extent found for non-regular inode %llu",
+ btrfs_ino(inode));
+ goto out;
+ }
extent_end = extent_start +
btrfs_file_extent_num_bytes(leaf, item);
trace_btrfs_get_extent_show_fi_regular(inode, leaf, item,
extent_start);
- } else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
+ } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
size_t size;
size = btrfs_file_extent_ram_bytes(leaf, item);
@@ -6904,9 +7031,9 @@
if (ret < 0) {
err = ret;
goto out;
- }
- if (ret > 0)
+ } else if (ret > 0) {
goto not_found;
+ }
leaf = path->nodes[0];
}
btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
@@ -6917,19 +7044,22 @@
goto not_found;
if (start > found_key.offset)
goto next;
+
+ /* New extent overlaps with existing one */
em->start = start;
em->orig_start = start;
em->len = found_key.offset - start;
- goto not_found_em;
+ em->block_start = EXTENT_MAP_HOLE;
+ goto insert;
}
btrfs_extent_item_to_extent_map(inode, path, item,
new_inline, em);
- if (found_type == BTRFS_FILE_EXTENT_REG ||
- found_type == BTRFS_FILE_EXTENT_PREALLOC) {
+ if (extent_type == BTRFS_FILE_EXTENT_REG ||
+ extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
goto insert;
- } else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
+ } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
unsigned long ptr;
char *map;
size_t size;
@@ -6948,6 +7078,8 @@
em->orig_block_len = em->len;
em->orig_start = em->start;
ptr = btrfs_file_extent_inline_start(item) + extent_offset;
+
+ btrfs_set_path_blocking(path);
if (!PageUptodate(page)) {
if (btrfs_file_extent_compression(leaf, item) !=
BTRFS_COMPRESS_NONE) {
@@ -6978,7 +7110,6 @@
em->start = start;
em->orig_start = start;
em->len = len;
-not_found_em:
em->block_start = EXTENT_MAP_HOLE;
insert:
btrfs_release_path(path);
@@ -6995,10 +7126,10 @@
err = btrfs_add_extent_mapping(fs_info, em_tree, &em, start, len);
write_unlock(&em_tree->lock);
out:
+ btrfs_free_path(path);
trace_btrfs_get_extent(root, inode, em);
- btrfs_free_path(path);
if (err) {
free_extent_map(em);
return ERR_PTR(err);
@@ -7008,19 +7139,17 @@
}
struct extent_map *btrfs_get_extent_fiemap(struct btrfs_inode *inode,
- struct page *page,
- size_t pg_offset, u64 start, u64 len,
- int create)
+ u64 start, u64 len)
{
struct extent_map *em;
struct extent_map *hole_em = NULL;
- u64 range_start = start;
+ u64 delalloc_start = start;
u64 end;
- u64 found;
- u64 found_end;
+ u64 delalloc_len;
+ u64 delalloc_end;
int err = 0;
- em = btrfs_get_extent(inode, page, pg_offset, start, len, create);
+ em = btrfs_get_extent(inode, NULL, 0, start, len, 0);
if (IS_ERR(em))
return em;
/*
@@ -7045,80 +7174,84 @@
em = NULL;
/* ok, we didn't find anything, lets look for delalloc */
- found = count_range_bits(&inode->io_tree, &range_start,
+ delalloc_len = count_range_bits(&inode->io_tree, &delalloc_start,
end, len, EXTENT_DELALLOC, 1);
- found_end = range_start + found;
- if (found_end < range_start)
- found_end = (u64)-1;
+ delalloc_end = delalloc_start + delalloc_len;
+ if (delalloc_end < delalloc_start)
+ delalloc_end = (u64)-1;
/*
- * we didn't find anything useful, return
- * the original results from get_extent()
+ * We didn't find anything useful, return the original results from
+ * get_extent()
*/
- if (range_start > end || found_end <= start) {
+ if (delalloc_start > end || delalloc_end <= start) {
em = hole_em;
hole_em = NULL;
goto out;
}
- /* adjust the range_start to make sure it doesn't
- * go backwards from the start they passed in
+ /*
+ * Adjust the delalloc_start to make sure it doesn't go backwards from
+ * the start they passed in
*/
- range_start = max(start, range_start);
- found = found_end - range_start;
+ delalloc_start = max(start, delalloc_start);
+ delalloc_len = delalloc_end - delalloc_start;
- if (found > 0) {
- u64 hole_start = start;
- u64 hole_len = len;
+ if (delalloc_len > 0) {
+ u64 hole_start;
+ u64 hole_len;
+ const u64 hole_end = extent_map_end(hole_em);
em = alloc_extent_map();
if (!em) {
err = -ENOMEM;
goto out;
}
- /*
- * when btrfs_get_extent can't find anything it
- * returns one huge hole
- *
- * make sure what it found really fits our range, and
- * adjust to make sure it is based on the start from
- * the caller
- */
- if (hole_em) {
- u64 calc_end = extent_map_end(hole_em);
-
- if (calc_end <= start || (hole_em->start > end)) {
- free_extent_map(hole_em);
- hole_em = NULL;
- } else {
- hole_start = max(hole_em->start, start);
- hole_len = calc_end - hole_start;
- }
- }
em->bdev = NULL;
- if (hole_em && range_start > hole_start) {
- /* our hole starts before our delalloc, so we
- * have to return just the parts of the hole
- * that go until the delalloc starts
+
+ ASSERT(hole_em);
+ /*
+ * When btrfs_get_extent can't find anything it returns one
+ * huge hole
+ *
+ * Make sure what it found really fits our range, and adjust to
+ * make sure it is based on the start from the caller
+ */
+ if (hole_end <= start || hole_em->start > end) {
+ free_extent_map(hole_em);
+ hole_em = NULL;
+ } else {
+ hole_start = max(hole_em->start, start);
+ hole_len = hole_end - hole_start;
+ }
+
+ if (hole_em && delalloc_start > hole_start) {
+ /*
+ * Our hole starts before our delalloc, so we have to
+ * return just the parts of the hole that go until the
+ * delalloc starts
*/
- em->len = min(hole_len,
- range_start - hole_start);
+ em->len = min(hole_len, delalloc_start - hole_start);
em->start = hole_start;
em->orig_start = hole_start;
/*
- * don't adjust block start at all,
- * it is fixed at EXTENT_MAP_HOLE
+ * Don't adjust block start at all, it is fixed at
+ * EXTENT_MAP_HOLE
*/
em->block_start = hole_em->block_start;
em->block_len = hole_len;
if (test_bit(EXTENT_FLAG_PREALLOC, &hole_em->flags))
set_bit(EXTENT_FLAG_PREALLOC, &em->flags);
} else {
- em->start = range_start;
- em->len = found;
- em->orig_start = range_start;
+ /*
+ * Hole is out of passed range or it starts after
+ * delalloc range
+ */
+ em->start = delalloc_start;
+ em->len = delalloc_len;
+ em->orig_start = delalloc_start;
em->block_start = EXTENT_MAP_DELALLOC;
- em->block_len = found;
+ em->block_len = delalloc_len;
}
} else {
return hole_em;
@@ -7614,12 +7747,9 @@
u64 start = iblock << inode->i_blkbits;
u64 lockstart, lockend;
u64 len = bh_result->b_size;
- int unlock_bits = EXTENT_LOCKED;
int ret = 0;
- if (create)
- unlock_bits |= EXTENT_DIRTY;
- else
+ if (!create)
len = min_t(u64, len, fs_info->sectorsize);
lockstart = start;
@@ -7678,9 +7808,8 @@
if (ret < 0)
goto unlock_err;
- /* clear and unlock the entire range */
- clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend,
- unlock_bits, 1, 0, &cached_state);
+ unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart,
+ lockend, &cached_state);
} else {
ret = btrfs_get_blocks_direct_read(em, bh_result, inode,
start, len);
@@ -7696,9 +7825,8 @@
*/
lockstart = start + bh_result->b_size;
if (lockstart < lockend) {
- clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,
- lockend, unlock_bits, 1, 0,
- &cached_state);
+ unlock_extent_cached(&BTRFS_I(inode)->io_tree,
+ lockstart, lockend, &cached_state);
} else {
free_extent_state(cached_state);
}
@@ -7709,8 +7837,8 @@
return 0;
unlock_err:
- clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend,
- unlock_bits, 1, 0, &cached_state);
+ unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend,
+ &cached_state);
err:
if (dio_data)
current->journal_info = dio_data;
@@ -7838,7 +7966,7 @@
struct inode *inode = done->inode;
struct bio_vec *bvec;
struct extent_io_tree *io_tree, *failure_tree;
- int i;
+ struct bvec_iter_all iter_all;
if (bio->bi_status)
goto end;
@@ -7850,7 +7978,7 @@
done->uptodate = 1;
ASSERT(!bio_flagged(bio, BIO_CLONED));
- bio_for_each_segment_all(bvec, bio, i)
+ bio_for_each_segment_all(bvec, bio, iter_all)
clean_io_failure(BTRFS_I(inode)->root->fs_info, failure_tree,
io_tree, done->start, bvec->bv_page,
btrfs_ino(BTRFS_I(inode)), 0);
@@ -7928,7 +8056,8 @@
struct bio_vec *bvec;
int uptodate;
int ret;
- int i;
+ int i = 0;
+ struct bvec_iter_all iter_all;
if (bio->bi_status)
goto end;
@@ -7942,7 +8071,7 @@
failure_tree = &BTRFS_I(inode)->io_failure_tree;
ASSERT(!bio_flagged(bio, BIO_CLONED));
- bio_for_each_segment_all(bvec, bio, i) {
+ bio_for_each_segment_all(bvec, bio, iter_all) {
ret = __readpage_endio_check(inode, io_bio, i, bvec->bv_page,
bvec->bv_offset, done->start,
bvec->bv_len);
@@ -7954,6 +8083,7 @@
bvec->bv_offset);
else
uptodate = 0;
+ i++;
}
done->uptodate = uptodate;
@@ -8070,9 +8200,7 @@
dio_bio->bi_status = err;
dio_end_io(dio_bio);
-
- if (io_bio->end_io)
- io_bio->end_io(io_bio, blk_status_to_errno(err));
+ btrfs_io_bio_free_csum(io_bio);
bio_put(bio);
}
@@ -8115,7 +8243,7 @@
return;
/*
* Our bio might span multiple ordered extents. In this case
- * we keep goin until we have accounted the whole dio.
+ * we keep going until we have accounted the whole dio.
*/
if (ordered_offset < offset + bytes) {
ordered_bytes = offset + bytes - ordered_offset;
@@ -8274,22 +8402,21 @@
struct bio *orig_bio = dip->orig_bio;
u64 start_sector = orig_bio->bi_iter.bi_sector;
u64 file_offset = dip->logical_offset;
- u64 map_length;
int async_submit = 0;
u64 submit_len;
int clone_offset = 0;
int clone_len;
int ret;
blk_status_t status;
+ struct btrfs_io_geometry geom;
- map_length = orig_bio->bi_iter.bi_size;
- submit_len = map_length;
- ret = btrfs_map_block(fs_info, btrfs_op(orig_bio), start_sector << 9,
- &map_length, NULL, 0);
+ submit_len = orig_bio->bi_iter.bi_size;
+ ret = btrfs_get_io_geometry(fs_info, btrfs_op(orig_bio),
+ start_sector << 9, submit_len, &geom);
if (ret)
return -EIO;
- if (map_length >= submit_len) {
+ if (geom.len >= submit_len) {
bio = orig_bio;
dip->flags |= BTRFS_DIO_ORIG_BIO_SUBMITTED;
goto submit;
@@ -8302,10 +8429,10 @@
async_submit = 1;
/* bio split */
- ASSERT(map_length <= INT_MAX);
+ ASSERT(geom.len <= INT_MAX);
atomic_inc(&dip->pending_bios);
do {
- clone_len = min_t(int, submit_len, map_length);
+ clone_len = min_t(int, submit_len, geom.len);
/*
* This will never fail as it's passing GPF_NOFS and
@@ -8342,9 +8469,8 @@
start_sector += clone_len >> 9;
file_offset += clone_len;
- map_length = submit_len;
- ret = btrfs_map_block(fs_info, btrfs_op(orig_bio),
- start_sector << 9, &map_length, NULL, 0);
+ ret = btrfs_get_io_geometry(fs_info, btrfs_op(orig_bio),
+ start_sector << 9, submit_len, &geom);
if (ret)
goto out_err;
} while (submit_len > 0);
@@ -8425,8 +8551,7 @@
if (!ret)
return;
- if (io_bio->end_io)
- io_bio->end_io(io_bio, ret);
+ btrfs_io_bio_free_csum(io_bio);
free_ordered:
/*
@@ -8597,7 +8722,7 @@
} else if (ret >= 0 && (size_t)ret < count)
btrfs_delalloc_release_space(inode, data_reserved,
offset, count - (size_t)ret, true);
- btrfs_delalloc_release_extents(BTRFS_I(inode), count, false);
+ btrfs_delalloc_release_extents(BTRFS_I(inode), count);
}
out:
if (wakeup)
@@ -8728,8 +8853,7 @@
*/
if (!inode_evicting)
clear_extent_bit(tree, start, end,
- EXTENT_DIRTY | EXTENT_DELALLOC |
- EXTENT_DELALLOC_NEW |
+ EXTENT_DELALLOC | EXTENT_DELALLOC_NEW |
EXTENT_LOCKED | EXTENT_DO_ACCOUNTING |
EXTENT_DEFRAG, 1, 0, &cached_state);
/*
@@ -8784,8 +8908,7 @@
if (PageDirty(page))
btrfs_qgroup_free_data(inode, NULL, page_start, PAGE_SIZE);
if (!inode_evicting) {
- clear_extent_bit(tree, page_start, page_end,
- EXTENT_LOCKED | EXTENT_DIRTY |
+ clear_extent_bit(tree, page_start, page_end, EXTENT_LOCKED |
EXTENT_DELALLOC | EXTENT_DELALLOC_NEW |
EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, 1, 1,
&cached_state);
@@ -8913,12 +9036,11 @@
* reserve data&meta space before lock_page() (see above comments).
*/
clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start, end,
- EXTENT_DIRTY | EXTENT_DELALLOC |
- EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG,
- 0, 0, &cached_state);
+ EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING |
+ EXTENT_DEFRAG, 0, 0, &cached_state);
ret2 = btrfs_set_extent_delalloc(inode, page_start, end, 0,
- &cached_state, 0);
+ &cached_state);
if (ret2) {
unlock_extent_cached(io_tree, page_start, page_end,
&cached_state);
@@ -8929,7 +9051,7 @@
/* page is wholly or partially inside EOF */
if (page_start + PAGE_SIZE > size)
- zero_start = size & ~PAGE_MASK;
+ zero_start = offset_in_page(size);
else
zero_start = PAGE_SIZE;
@@ -8950,7 +9072,7 @@
unlock_extent_cached(io_tree, page_start, page_end, &cached_state);
if (!ret2) {
- btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE, true);
+ btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE);
sb_end_pagefault(inode->i_sb);
extent_changeset_free(data_reserved);
return VM_FAULT_LOCKED;
@@ -8959,7 +9081,7 @@
out_unlock:
unlock_page(page);
out:
- btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE, (ret != 0));
+ btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE);
btrfs_delalloc_release_space(inode, data_reserved, page_start,
reserved_space, (ret != 0));
out_noreserve:
@@ -8976,7 +9098,7 @@
int ret;
struct btrfs_trans_handle *trans;
u64 mask = fs_info->sectorsize - 1;
- u64 min_size = btrfs_calc_trunc_metadata_size(fs_info, 1);
+ u64 min_size = btrfs_calc_metadata_size(fs_info, 1);
if (!skip_writeback) {
ret = btrfs_wait_ordered_range(inode, inode->i_size & (~mask),
@@ -9031,7 +9153,7 @@
/* Migrate the slack space for the truncate to our reserve */
ret = btrfs_block_rsv_migrate(&fs_info->trans_block_rsv, rsv,
- min_size, 0);
+ min_size, false);
BUG_ON(ret);
/*
@@ -9068,7 +9190,7 @@
btrfs_block_rsv_release(fs_info, rsv, -1);
ret = btrfs_block_rsv_migrate(&fs_info->trans_block_rsv,
- rsv, min_size, 0);
+ rsv, min_size, false);
BUG_ON(ret); /* shouldn't happen */
trans->block_rsv = rsv;
}
@@ -9192,10 +9314,11 @@
inode = &ei->vfs_inode;
extent_map_tree_init(&ei->extent_tree);
- extent_io_tree_init(&ei->io_tree, inode);
- extent_io_tree_init(&ei->io_failure_tree, inode);
- ei->io_tree.track_uptodate = 1;
- ei->io_failure_tree.track_uptodate = 1;
+ extent_io_tree_init(fs_info, &ei->io_tree, IO_TREE_INODE_IO, inode);
+ extent_io_tree_init(fs_info, &ei->io_failure_tree,
+ IO_TREE_INODE_IO_FAILURE, inode);
+ ei->io_tree.track_uptodate = true;
+ ei->io_failure_tree.track_uptodate = true;
atomic_set(&ei->sync_writers, 0);
mutex_init(&ei->log_mutex);
mutex_init(&ei->delalloc_mutex);
@@ -9216,9 +9339,8 @@
}
#endif
-static void btrfs_i_callback(struct rcu_head *head)
+void btrfs_free_inode(struct inode *inode)
{
- struct inode *inode = container_of(head, struct inode, i_rcu);
kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode));
}
@@ -9244,7 +9366,7 @@
* created.
*/
if (!root)
- goto free;
+ return;
while (1) {
ordered = btrfs_lookup_first_ordered_extent(inode, (u64)-1);
@@ -9262,8 +9384,6 @@
btrfs_qgroup_check_reserved_leak(inode);
inode_tree_del(inode);
btrfs_drop_extent_cache(BTRFS_I(inode), 0, (u64)-1, 0);
-free:
- call_rcu(&inode->i_rcu, btrfs_i_callback);
}
int btrfs_drop_inode(struct inode *inode)
@@ -9298,6 +9418,7 @@
kmem_cache_destroy(btrfs_trans_handle_cachep);
kmem_cache_destroy(btrfs_path_cachep);
kmem_cache_destroy(btrfs_free_space_cachep);
+ kmem_cache_destroy(btrfs_free_space_bitmap_cachep);
}
int __init btrfs_init_cachep(void)
@@ -9327,6 +9448,12 @@
if (!btrfs_free_space_cachep)
goto fail;
+ btrfs_free_space_bitmap_cachep = kmem_cache_create("btrfs_free_space_bitmap",
+ PAGE_SIZE, PAGE_SIZE,
+ SLAB_RED_ZONE, NULL);
+ if (!btrfs_free_space_bitmap_cachep)
+ goto fail;
+
return 0;
fail:
btrfs_destroy_cachep();
@@ -9440,7 +9567,7 @@
/* Reference for the source. */
if (old_ino == BTRFS_FIRST_FREE_OBJECTID) {
/* force full log commit if subvolume involved. */
- btrfs_set_log_full_commit(fs_info, trans);
+ btrfs_set_log_full_commit(trans);
} else {
btrfs_pin_log_trans(root);
root_log_pinned = true;
@@ -9457,7 +9584,7 @@
/* And now for the dest. */
if (new_ino == BTRFS_FIRST_FREE_OBJECTID) {
/* force full log commit if subvolume involved. */
- btrfs_set_log_full_commit(fs_info, trans);
+ btrfs_set_log_full_commit(trans);
} else {
btrfs_pin_log_trans(dest);
dest_log_pinned = true;
@@ -9593,7 +9720,7 @@
btrfs_inode_in_log(BTRFS_I(old_inode), fs_info->generation) ||
(new_inode &&
btrfs_inode_in_log(BTRFS_I(new_inode), fs_info->generation)))
- btrfs_set_log_full_commit(fs_info, trans);
+ btrfs_set_log_full_commit(trans);
if (root_log_pinned) {
btrfs_end_log_trans(root);
@@ -9617,6 +9744,18 @@
commit_transaction = true;
}
if (commit_transaction) {
+ /*
+ * We may have set commit_transaction when logging the new name
+ * in the destination root, in which case we left the source
+ * root context in the list of log contextes. So make sure we
+ * remove it to avoid invalid memory accesses, since the context
+ * was allocated in our stack frame.
+ */
+ if (sync_log_root) {
+ mutex_lock(&root->log_mutex);
+ list_del_init(&ctx_root.list);
+ mutex_unlock(&root->log_mutex);
+ }
ret = btrfs_commit_transaction(trans);
} else {
int ret2;
@@ -9630,6 +9769,9 @@
if (old_ino == BTRFS_FIRST_FREE_OBJECTID)
up_read(&fs_info->subvol_sem);
+ ASSERT(list_empty(&ctx_root.list));
+ ASSERT(list_empty(&ctx_dest.list));
+
return ret;
}
@@ -9779,7 +9921,7 @@
BTRFS_I(old_inode)->dir_index = 0ULL;
if (unlikely(old_ino == BTRFS_FIRST_FREE_OBJECTID)) {
/* force full log commit if subvolume involved. */
- btrfs_set_log_full_commit(fs_info, trans);
+ btrfs_set_log_full_commit(trans);
} else {
btrfs_pin_log_trans(root);
log_pinned = true;
@@ -9900,7 +10042,7 @@
btrfs_inode_in_log(BTRFS_I(old_inode), fs_info->generation) ||
(new_inode &&
btrfs_inode_in_log(BTRFS_I(new_inode), fs_info->generation)))
- btrfs_set_log_full_commit(fs_info, trans);
+ btrfs_set_log_full_commit(trans);
btrfs_end_log_trans(root);
log_pinned = false;
@@ -9974,7 +10116,6 @@
init_completion(&work->completion);
INIT_LIST_HEAD(&work->list);
work->inode = inode;
- WARN_ON_ONCE(!inode);
btrfs_init_work(&work->work, btrfs_flush_delalloc_helper,
btrfs_run_delalloc_work, NULL, NULL);
@@ -9985,7 +10126,7 @@
* some fairly slow code that needs optimization. This walks the list
* of all the inodes with pending delalloc and forces them to disk.
*/
-static int start_delalloc_inodes(struct btrfs_root *root, int nr)
+static int start_delalloc_inodes(struct btrfs_root *root, int nr, bool snapshot)
{
struct btrfs_inode *binode;
struct inode *inode;
@@ -10013,6 +10154,9 @@
}
spin_unlock(&root->delalloc_lock);
+ if (snapshot)
+ set_bit(BTRFS_INODE_SNAPSHOT_FLUSH,
+ &binode->runtime_flags);
work = btrfs_alloc_delalloc_work(inode);
if (!work) {
iput(inode);
@@ -10046,7 +10190,7 @@
return ret;
}
-int btrfs_start_delalloc_inodes(struct btrfs_root *root)
+int btrfs_start_delalloc_snapshot(struct btrfs_root *root)
{
struct btrfs_fs_info *fs_info = root->fs_info;
int ret;
@@ -10054,7 +10198,7 @@
if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state))
return -EROFS;
- ret = start_delalloc_inodes(root, -1);
+ ret = start_delalloc_inodes(root, -1, true);
if (ret > 0)
ret = 0;
return ret;
@@ -10083,7 +10227,7 @@
&fs_info->delalloc_roots);
spin_unlock(&fs_info->delalloc_root_lock);
- ret = start_delalloc_inodes(root, nr);
+ ret = start_delalloc_inodes(root, nr, false);
btrfs_put_fs_root(root);
if (ret < 0)
goto out;
@@ -10201,7 +10345,6 @@
inode->i_op = &btrfs_symlink_inode_operations;
inode_nohighmem(inode);
- inode->i_mapping->a_ops = &btrfs_symlink_aops;
inode_set_bytes(inode, name_len);
btrfs_i_size_write(BTRFS_I(inode), name_len);
err = btrfs_update_inode(trans, root, inode);
@@ -10462,26 +10605,6 @@
return ret;
}
-__attribute__((const))
-static int btrfs_readpage_io_failed_hook(struct page *page, int failed_mirror)
-{
- return -EAGAIN;
-}
-
-static void btrfs_check_extent_io_range(void *private_data, const char *caller,
- u64 start, u64 end)
-{
- struct inode *inode = private_data;
- u64 isize;
-
- isize = i_size_read(inode);
- if (end >= PAGE_SIZE && (end % 2) == 0 && end != isize - 1) {
- btrfs_debug_rl(BTRFS_I(inode)->root->fs_info,
- "%s: ino %llu isize %llu odd range [%llu,%llu]",
- caller, btrfs_ino(BTRFS_I(inode)), isize, start, end);
- }
-}
-
void btrfs_set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end)
{
struct inode *inode = tree->private_data;
@@ -10498,6 +10621,343 @@
}
}
+#ifdef CONFIG_SWAP
+/*
+ * Add an entry indicating a block group or device which is pinned by a
+ * swapfile. Returns 0 on success, 1 if there is already an entry for it, or a
+ * negative errno on failure.
+ */
+static int btrfs_add_swapfile_pin(struct inode *inode, void *ptr,
+ bool is_block_group)
+{
+ struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
+ struct btrfs_swapfile_pin *sp, *entry;
+ struct rb_node **p;
+ struct rb_node *parent = NULL;
+
+ sp = kmalloc(sizeof(*sp), GFP_NOFS);
+ if (!sp)
+ return -ENOMEM;
+ sp->ptr = ptr;
+ sp->inode = inode;
+ sp->is_block_group = is_block_group;
+
+ spin_lock(&fs_info->swapfile_pins_lock);
+ p = &fs_info->swapfile_pins.rb_node;
+ while (*p) {
+ parent = *p;
+ entry = rb_entry(parent, struct btrfs_swapfile_pin, node);
+ if (sp->ptr < entry->ptr ||
+ (sp->ptr == entry->ptr && sp->inode < entry->inode)) {
+ p = &(*p)->rb_left;
+ } else if (sp->ptr > entry->ptr ||
+ (sp->ptr == entry->ptr && sp->inode > entry->inode)) {
+ p = &(*p)->rb_right;
+ } else {
+ spin_unlock(&fs_info->swapfile_pins_lock);
+ kfree(sp);
+ return 1;
+ }
+ }
+ rb_link_node(&sp->node, parent, p);
+ rb_insert_color(&sp->node, &fs_info->swapfile_pins);
+ spin_unlock(&fs_info->swapfile_pins_lock);
+ return 0;
+}
+
+/* Free all of the entries pinned by this swapfile. */
+static void btrfs_free_swapfile_pins(struct inode *inode)
+{
+ struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
+ struct btrfs_swapfile_pin *sp;
+ struct rb_node *node, *next;
+
+ spin_lock(&fs_info->swapfile_pins_lock);
+ node = rb_first(&fs_info->swapfile_pins);
+ while (node) {
+ next = rb_next(node);
+ sp = rb_entry(node, struct btrfs_swapfile_pin, node);
+ if (sp->inode == inode) {
+ rb_erase(&sp->node, &fs_info->swapfile_pins);
+ if (sp->is_block_group)
+ btrfs_put_block_group(sp->ptr);
+ kfree(sp);
+ }
+ node = next;
+ }
+ spin_unlock(&fs_info->swapfile_pins_lock);
+}
+
+struct btrfs_swap_info {
+ u64 start;
+ u64 block_start;
+ u64 block_len;
+ u64 lowest_ppage;
+ u64 highest_ppage;
+ unsigned long nr_pages;
+ int nr_extents;
+};
+
+static int btrfs_add_swap_extent(struct swap_info_struct *sis,
+ struct btrfs_swap_info *bsi)
+{
+ unsigned long nr_pages;
+ u64 first_ppage, first_ppage_reported, next_ppage;
+ int ret;
+
+ first_ppage = ALIGN(bsi->block_start, PAGE_SIZE) >> PAGE_SHIFT;
+ next_ppage = ALIGN_DOWN(bsi->block_start + bsi->block_len,
+ PAGE_SIZE) >> PAGE_SHIFT;
+
+ if (first_ppage >= next_ppage)
+ return 0;
+ nr_pages = next_ppage - first_ppage;
+
+ first_ppage_reported = first_ppage;
+ if (bsi->start == 0)
+ first_ppage_reported++;
+ if (bsi->lowest_ppage > first_ppage_reported)
+ bsi->lowest_ppage = first_ppage_reported;
+ if (bsi->highest_ppage < (next_ppage - 1))
+ bsi->highest_ppage = next_ppage - 1;
+
+ ret = add_swap_extent(sis, bsi->nr_pages, nr_pages, first_ppage);
+ if (ret < 0)
+ return ret;
+ bsi->nr_extents += ret;
+ bsi->nr_pages += nr_pages;
+ return 0;
+}
+
+static void btrfs_swap_deactivate(struct file *file)
+{
+ struct inode *inode = file_inode(file);
+
+ btrfs_free_swapfile_pins(inode);
+ atomic_dec(&BTRFS_I(inode)->root->nr_swapfiles);
+}
+
+static int btrfs_swap_activate(struct swap_info_struct *sis, struct file *file,
+ sector_t *span)
+{
+ struct inode *inode = file_inode(file);
+ struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
+ struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
+ struct extent_state *cached_state = NULL;
+ struct extent_map *em = NULL;
+ struct btrfs_device *device = NULL;
+ struct btrfs_swap_info bsi = {
+ .lowest_ppage = (sector_t)-1ULL,
+ };
+ int ret = 0;
+ u64 isize;
+ u64 start;
+
+ /*
+ * If the swap file was just created, make sure delalloc is done. If the
+ * file changes again after this, the user is doing something stupid and
+ * we don't really care.
+ */
+ ret = btrfs_wait_ordered_range(inode, 0, (u64)-1);
+ if (ret)
+ return ret;
+
+ /*
+ * The inode is locked, so these flags won't change after we check them.
+ */
+ if (BTRFS_I(inode)->flags & BTRFS_INODE_COMPRESS) {
+ btrfs_warn(fs_info, "swapfile must not be compressed");
+ return -EINVAL;
+ }
+ if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW)) {
+ btrfs_warn(fs_info, "swapfile must not be copy-on-write");
+ return -EINVAL;
+ }
+ if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) {
+ btrfs_warn(fs_info, "swapfile must not be checksummed");
+ return -EINVAL;
+ }
+
+ /*
+ * Balance or device remove/replace/resize can move stuff around from
+ * under us. The EXCL_OP flag makes sure they aren't running/won't run
+ * concurrently while we are mapping the swap extents, and
+ * fs_info->swapfile_pins prevents them from running while the swap file
+ * is active and moving the extents. Note that this also prevents a
+ * concurrent device add which isn't actually necessary, but it's not
+ * really worth the trouble to allow it.
+ */
+ if (test_and_set_bit(BTRFS_FS_EXCL_OP, &fs_info->flags)) {
+ btrfs_warn(fs_info,
+ "cannot activate swapfile while exclusive operation is running");
+ return -EBUSY;
+ }
+ /*
+ * Snapshots can create extents which require COW even if NODATACOW is
+ * set. We use this counter to prevent snapshots. We must increment it
+ * before walking the extents because we don't want a concurrent
+ * snapshot to run after we've already checked the extents.
+ */
+ atomic_inc(&BTRFS_I(inode)->root->nr_swapfiles);
+
+ isize = ALIGN_DOWN(inode->i_size, fs_info->sectorsize);
+
+ lock_extent_bits(io_tree, 0, isize - 1, &cached_state);
+ start = 0;
+ while (start < isize) {
+ u64 logical_block_start, physical_block_start;
+ struct btrfs_block_group_cache *bg;
+ u64 len = isize - start;
+
+ em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, start, len, 0);
+ if (IS_ERR(em)) {
+ ret = PTR_ERR(em);
+ goto out;
+ }
+
+ if (em->block_start == EXTENT_MAP_HOLE) {
+ btrfs_warn(fs_info, "swapfile must not have holes");
+ ret = -EINVAL;
+ goto out;
+ }
+ if (em->block_start == EXTENT_MAP_INLINE) {
+ /*
+ * It's unlikely we'll ever actually find ourselves
+ * here, as a file small enough to fit inline won't be
+ * big enough to store more than the swap header, but in
+ * case something changes in the future, let's catch it
+ * here rather than later.
+ */
+ btrfs_warn(fs_info, "swapfile must not be inline");
+ ret = -EINVAL;
+ goto out;
+ }
+ if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) {
+ btrfs_warn(fs_info, "swapfile must not be compressed");
+ ret = -EINVAL;
+ goto out;
+ }
+
+ logical_block_start = em->block_start + (start - em->start);
+ len = min(len, em->len - (start - em->start));
+ free_extent_map(em);
+ em = NULL;
+
+ ret = can_nocow_extent(inode, start, &len, NULL, NULL, NULL);
+ if (ret < 0) {
+ goto out;
+ } else if (ret) {
+ ret = 0;
+ } else {
+ btrfs_warn(fs_info,
+ "swapfile must not be copy-on-write");
+ ret = -EINVAL;
+ goto out;
+ }
+
+ em = btrfs_get_chunk_map(fs_info, logical_block_start, len);
+ if (IS_ERR(em)) {
+ ret = PTR_ERR(em);
+ goto out;
+ }
+
+ if (em->map_lookup->type & BTRFS_BLOCK_GROUP_PROFILE_MASK) {
+ btrfs_warn(fs_info,
+ "swapfile must have single data profile");
+ ret = -EINVAL;
+ goto out;
+ }
+
+ if (device == NULL) {
+ device = em->map_lookup->stripes[0].dev;
+ ret = btrfs_add_swapfile_pin(inode, device, false);
+ if (ret == 1)
+ ret = 0;
+ else if (ret)
+ goto out;
+ } else if (device != em->map_lookup->stripes[0].dev) {
+ btrfs_warn(fs_info, "swapfile must be on one device");
+ ret = -EINVAL;
+ goto out;
+ }
+
+ physical_block_start = (em->map_lookup->stripes[0].physical +
+ (logical_block_start - em->start));
+ len = min(len, em->len - (logical_block_start - em->start));
+ free_extent_map(em);
+ em = NULL;
+
+ bg = btrfs_lookup_block_group(fs_info, logical_block_start);
+ if (!bg) {
+ btrfs_warn(fs_info,
+ "could not find block group containing swapfile");
+ ret = -EINVAL;
+ goto out;
+ }
+
+ ret = btrfs_add_swapfile_pin(inode, bg, true);
+ if (ret) {
+ btrfs_put_block_group(bg);
+ if (ret == 1)
+ ret = 0;
+ else
+ goto out;
+ }
+
+ if (bsi.block_len &&
+ bsi.block_start + bsi.block_len == physical_block_start) {
+ bsi.block_len += len;
+ } else {
+ if (bsi.block_len) {
+ ret = btrfs_add_swap_extent(sis, &bsi);
+ if (ret)
+ goto out;
+ }
+ bsi.start = start;
+ bsi.block_start = physical_block_start;
+ bsi.block_len = len;
+ }
+
+ start += len;
+ }
+
+ if (bsi.block_len)
+ ret = btrfs_add_swap_extent(sis, &bsi);
+
+out:
+ if (!IS_ERR_OR_NULL(em))
+ free_extent_map(em);
+
+ unlock_extent_cached(io_tree, 0, isize - 1, &cached_state);
+
+ if (ret)
+ btrfs_swap_deactivate(file);
+
+ clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags);
+
+ if (ret)
+ return ret;
+
+ if (device)
+ sis->bdev = device->bdev;
+ *span = bsi.highest_ppage - bsi.lowest_ppage + 1;
+ sis->max = bsi.nr_pages;
+ sis->pages = bsi.nr_pages - 1;
+ sis->highest_bit = bsi.nr_pages - 1;
+ return bsi.nr_extents;
+}
+#else
+static void btrfs_swap_deactivate(struct file *file)
+{
+}
+
+static int btrfs_swap_activate(struct swap_info_struct *sis, struct file *file,
+ sector_t *span)
+{
+ return -EOPNOTSUPP;
+}
+#endif
+
static const struct inode_operations btrfs_dir_inode_operations = {
.getattr = btrfs_getattr,
.lookup = btrfs_lookup,
@@ -10540,17 +11000,6 @@
/* mandatory callbacks */
.submit_bio_hook = btrfs_submit_bio_hook,
.readpage_end_io_hook = btrfs_readpage_end_io_hook,
- .readpage_io_failed_hook = btrfs_readpage_io_failed_hook,
-
- /* optional callbacks */
- .fill_delalloc = run_delalloc_range,
- .writepage_end_io_hook = btrfs_writepage_end_io_hook,
- .writepage_start_hook = btrfs_writepage_start_hook,
- .set_bit_hook = btrfs_set_bit_hook,
- .clear_bit_hook = btrfs_clear_bit_hook,
- .merge_extent_hook = btrfs_merge_extent_hook,
- .split_extent_hook = btrfs_split_extent_hook,
- .check_extent_io_range = btrfs_check_extent_io_range,
};
/*
@@ -10575,13 +11024,8 @@
.releasepage = btrfs_releasepage,
.set_page_dirty = btrfs_set_page_dirty,
.error_remove_page = generic_error_remove_page,
-};
-
-static const struct address_space_operations btrfs_symlink_aops = {
- .readpage = btrfs_readpage,
- .writepage = btrfs_writepage,
- .invalidatepage = btrfs_invalidatepage,
- .releasepage = btrfs_releasepage,
+ .swap_activate = btrfs_swap_activate,
+ .swap_deactivate = btrfs_swap_deactivate,
};
static const struct inode_operations btrfs_file_inode_operations = {