Update Linux to v5.4.2
Change-Id: Idf6911045d9d382da2cfe01b1edff026404ac8fd
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 4dd6faa..cceaf05 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -89,9 +89,18 @@
static inline void __btrfs_debug_check_extent_io_range(const char *caller,
struct extent_io_tree *tree, u64 start, u64 end)
{
- if (tree->ops && tree->ops->check_extent_io_range)
- tree->ops->check_extent_io_range(tree->private_data, caller,
- start, end);
+ struct inode *inode = tree->private_data;
+ u64 isize;
+
+ if (!inode || !is_data_inode(inode))
+ return;
+
+ isize = i_size_read(inode);
+ if (end >= PAGE_SIZE && (end % 2) == 0 && end != isize - 1) {
+ btrfs_debug_rl(BTRFS_I(inode)->root->fs_info,
+ "%s: ino %llu isize %llu odd range [%llu,%llu]",
+ caller, btrfs_ino(BTRFS_I(inode)), isize, start, end);
+ }
}
#else
#define btrfs_leak_debug_add(new, head) do {} while (0)
@@ -100,8 +109,6 @@
#define btrfs_debug_check_extent_io_range(c, s, e) do {} while (0)
#endif
-#define BUFFER_LRU_MAX 64
-
struct tree_entry {
u64 start;
u64 end;
@@ -138,7 +145,56 @@
return ret;
}
-static void flush_write_bio(struct extent_page_data *epd);
+static int __must_check submit_one_bio(struct bio *bio, int mirror_num,
+ unsigned long bio_flags)
+{
+ blk_status_t ret = 0;
+ struct extent_io_tree *tree = bio->bi_private;
+
+ bio->bi_private = NULL;
+
+ if (tree->ops)
+ ret = tree->ops->submit_bio_hook(tree->private_data, bio,
+ mirror_num, bio_flags);
+ else
+ btrfsic_submit_bio(bio);
+
+ return blk_status_to_errno(ret);
+}
+
+/* Cleanup unsubmitted bios */
+static void end_write_bio(struct extent_page_data *epd, int ret)
+{
+ if (epd->bio) {
+ epd->bio->bi_status = errno_to_blk_status(ret);
+ bio_endio(epd->bio);
+ epd->bio = NULL;
+ }
+}
+
+/*
+ * Submit bio from extent page data via submit_one_bio
+ *
+ * Return 0 if everything is OK.
+ * Return <0 for error.
+ */
+static int __must_check flush_write_bio(struct extent_page_data *epd)
+{
+ int ret = 0;
+
+ if (epd->bio) {
+ ret = submit_one_bio(epd->bio, 0, 0);
+ /*
+ * Clean up of epd->bio is handled by its endio function.
+ * And endio is either triggered by successful bio execution
+ * or the error handler of submit bio hook.
+ * So at this point, no matter what happened, we don't need
+ * to clean up epd->bio.
+ */
+ epd->bio = NULL;
+ }
+ return ret;
+}
int __init extent_io_init(void)
{
@@ -191,14 +247,46 @@
bioset_exit(&btrfs_bioset);
}
-void extent_io_tree_init(struct extent_io_tree *tree,
+void extent_io_tree_init(struct btrfs_fs_info *fs_info,
+ struct extent_io_tree *tree, unsigned int owner,
void *private_data)
{
+ tree->fs_info = fs_info;
tree->state = RB_ROOT;
tree->ops = NULL;
tree->dirty_bytes = 0;
spin_lock_init(&tree->lock);
tree->private_data = private_data;
+ tree->owner = owner;
+}
+
+void extent_io_tree_release(struct extent_io_tree *tree)
+{
+ spin_lock(&tree->lock);
+ /*
+ * Do a single barrier for the waitqueue_active check here, the state
+ * of the waitqueue should not change once extent_io_tree_release is
+ * called.
+ */
+ smp_mb();
+ while (!RB_EMPTY_ROOT(&tree->state)) {
+ struct rb_node *node;
+ struct extent_state *state;
+
+ node = rb_first(&tree->state);
+ state = rb_entry(node, struct extent_state, rb_node);
+ rb_erase(&state->rb_node, &tree->state);
+ RB_CLEAR_NODE(&state->rb_node);
+ /*
+ * btree io trees aren't supposed to have tasks waiting for
+ * changes in the flags of extent states ever.
+ */
+ ASSERT(!waitqueue_active(&state->wq));
+ free_extent_state(state);
+
+ cond_resched_lock(&tree->lock);
+ }
+ spin_unlock(&tree->lock);
}
static struct extent_state *alloc_extent_state(gfp_t mask)
@@ -271,9 +359,27 @@
return NULL;
}
+/**
+ * __etree_search - searche @tree for an entry that contains @offset. Such
+ * entry would have entry->start <= offset && entry->end >= offset.
+ *
+ * @tree - the tree to search
+ * @offset - offset that should fall within an entry in @tree
+ * @next_ret - pointer to the first entry whose range ends after @offset
+ * @prev - pointer to the first entry whose range begins before @offset
+ * @p_ret - pointer where new node should be anchored (used when inserting an
+ * entry in the tree)
+ * @parent_ret - points to entry which would have been the parent of the entry,
+ * containing @offset
+ *
+ * This function returns a pointer to the entry that contains @offset byte
+ * address. If no such entry exists, then NULL is returned and the other
+ * pointer arguments to the function are filled, otherwise the found entry is
+ * returned and other pointers are left untouched.
+ */
static struct rb_node *__etree_search(struct extent_io_tree *tree, u64 offset,
- struct rb_node **prev_ret,
struct rb_node **next_ret,
+ struct rb_node **prev_ret,
struct rb_node ***p_ret,
struct rb_node **parent_ret)
{
@@ -302,23 +408,23 @@
if (parent_ret)
*parent_ret = prev;
- if (prev_ret) {
+ if (next_ret) {
orig_prev = prev;
while (prev && offset > prev_entry->end) {
prev = rb_next(prev);
prev_entry = rb_entry(prev, struct tree_entry, rb_node);
}
- *prev_ret = prev;
+ *next_ret = prev;
prev = orig_prev;
}
- if (next_ret) {
+ if (prev_ret) {
prev_entry = rb_entry(prev, struct tree_entry, rb_node);
while (prev && offset < prev_entry->start) {
prev = rb_prev(prev);
prev_entry = rb_entry(prev, struct tree_entry, rb_node);
}
- *next_ret = prev;
+ *prev_ret = prev;
}
return NULL;
}
@@ -329,12 +435,12 @@
struct rb_node ***p_ret,
struct rb_node **parent_ret)
{
- struct rb_node *prev = NULL;
+ struct rb_node *next= NULL;
struct rb_node *ret;
- ret = __etree_search(tree, offset, &prev, NULL, p_ret, parent_ret);
+ ret = __etree_search(tree, offset, &next, NULL, p_ret, parent_ret);
if (!ret)
- return prev;
+ return next;
return ret;
}
@@ -344,13 +450,6 @@
return tree_search_for_insert(tree, offset, NULL, NULL);
}
-static void merge_cb(struct extent_io_tree *tree, struct extent_state *new,
- struct extent_state *other)
-{
- if (tree->ops && tree->ops->merge_extent_hook)
- tree->ops->merge_extent_hook(tree->private_data, new, other);
-}
-
/*
* utility function to look for merge candidates inside a given range.
* Any extents with matching state are merged together into a single
@@ -366,7 +465,7 @@
struct extent_state *other;
struct rb_node *other_node;
- if (state->state & (EXTENT_IOBITS | EXTENT_BOUNDARY))
+ if (state->state & (EXTENT_LOCKED | EXTENT_BOUNDARY))
return;
other_node = rb_prev(&state->rb_node);
@@ -374,7 +473,10 @@
other = rb_entry(other_node, struct extent_state, rb_node);
if (other->end == state->start - 1 &&
other->state == state->state) {
- merge_cb(tree, state, other);
+ if (tree->private_data &&
+ is_data_inode(tree->private_data))
+ btrfs_merge_delalloc_extent(tree->private_data,
+ state, other);
state->start = other->start;
rb_erase(&other->rb_node, &tree->state);
RB_CLEAR_NODE(&other->rb_node);
@@ -386,7 +488,10 @@
other = rb_entry(other_node, struct extent_state, rb_node);
if (other->start == state->end + 1 &&
other->state == state->state) {
- merge_cb(tree, state, other);
+ if (tree->private_data &&
+ is_data_inode(tree->private_data))
+ btrfs_merge_delalloc_extent(tree->private_data,
+ state, other);
state->end = other->end;
rb_erase(&other->rb_node, &tree->state);
RB_CLEAR_NODE(&other->rb_node);
@@ -395,20 +500,6 @@
}
}
-static void set_state_cb(struct extent_io_tree *tree,
- struct extent_state *state, unsigned *bits)
-{
- if (tree->ops && tree->ops->set_bit_hook)
- tree->ops->set_bit_hook(tree->private_data, state, bits);
-}
-
-static void clear_state_cb(struct extent_io_tree *tree,
- struct extent_state *state, unsigned *bits)
-{
- if (tree->ops && tree->ops->clear_bit_hook)
- tree->ops->clear_bit_hook(tree->private_data, state, bits);
-}
-
static void set_state_bits(struct extent_io_tree *tree,
struct extent_state *state, unsigned *bits,
struct extent_changeset *changeset);
@@ -431,9 +522,11 @@
{
struct rb_node *node;
- if (end < start)
- WARN(1, KERN_ERR "BTRFS: end < start %llu %llu\n",
- end, start);
+ if (end < start) {
+ btrfs_err(tree->fs_info,
+ "insert state: end < start %llu %llu", end, start);
+ WARN_ON(1);
+ }
state->start = start;
state->end = end;
@@ -443,7 +536,8 @@
if (node) {
struct extent_state *found;
found = rb_entry(node, struct extent_state, rb_node);
- pr_err("BTRFS: found node %llu %llu on insert of %llu %llu\n",
+ btrfs_err(tree->fs_info,
+ "found node %llu %llu on insert of %llu %llu",
found->start, found->end, start, end);
return -EEXIST;
}
@@ -451,13 +545,6 @@
return 0;
}
-static void split_cb(struct extent_io_tree *tree, struct extent_state *orig,
- u64 split)
-{
- if (tree->ops && tree->ops->split_extent_hook)
- tree->ops->split_extent_hook(tree->private_data, orig, split);
-}
-
/*
* split a given extent state struct in two, inserting the preallocated
* struct 'prealloc' as the newly created second half. 'split' indicates an
@@ -477,7 +564,8 @@
{
struct rb_node *node;
- split_cb(tree, orig, split);
+ if (tree->private_data && is_data_inode(tree->private_data))
+ btrfs_split_delalloc_extent(tree->private_data, orig, split);
prealloc->start = orig->start;
prealloc->end = split - 1;
@@ -504,7 +592,7 @@
/*
* utility function to clear some bits in an extent state struct.
- * it will optionally wake up any one waiting on this state (wake == 1).
+ * it will optionally wake up anyone waiting on this state (wake == 1).
*
* If no bits are set on the state struct after clearing things, the
* struct is freed and removed from the tree
@@ -523,7 +611,10 @@
WARN_ON(range > tree->dirty_bytes);
tree->dirty_bytes -= range;
}
- clear_state_cb(tree, state, bits);
+
+ if (tree->private_data && is_data_inode(tree->private_data))
+ btrfs_clear_delalloc_extent(tree->private_data, state, bits);
+
ret = add_extent_changeset(state, bits_to_clear, changeset, 0);
BUG_ON(ret < 0);
state->state &= ~bits_to_clear;
@@ -588,15 +679,15 @@
int clear = 0;
btrfs_debug_check_extent_io_range(tree, start, end);
+ trace_btrfs_clear_extent_bit(tree, start, end - start + 1, bits);
if (bits & EXTENT_DELALLOC)
bits |= EXTENT_NORESERVE;
if (delete)
bits |= ~EXTENT_CTLBITS;
- bits |= EXTENT_FIRST_DELALLOC;
- if (bits & (EXTENT_IOBITS | EXTENT_BOUNDARY))
+ if (bits & (EXTENT_LOCKED | EXTENT_BOUNDARY))
clear = 1;
again:
if (!prealloc && gfpflags_allow_blocking(mask)) {
@@ -800,7 +891,9 @@
unsigned bits_to_set = *bits & ~EXTENT_CTLBITS;
int ret;
- set_state_cb(tree, state, bits);
+ if (tree->private_data && is_data_inode(tree->private_data))
+ btrfs_set_delalloc_extent(tree->private_data, state, bits);
+
if ((bits_to_set & EXTENT_DIRTY) && !(state->state & EXTENT_DIRTY)) {
u64 range = state->end - state->start + 1;
tree->dirty_bytes += range;
@@ -826,7 +919,7 @@
struct extent_state **cached_ptr)
{
return cache_state_if_flags(state, cached_ptr,
- EXTENT_IOBITS | EXTENT_BOUNDARY);
+ EXTENT_LOCKED | EXTENT_BOUNDARY);
}
/*
@@ -856,8 +949,8 @@
u64 last_end;
btrfs_debug_check_extent_io_range(tree, start, end);
+ trace_btrfs_set_extent_bit(tree, start, end - start + 1, bits);
- bits |= EXTENT_FIRST_DELALLOC;
again:
if (!prealloc && gfpflags_allow_blocking(mask)) {
/*
@@ -1089,6 +1182,8 @@
bool first_iteration = true;
btrfs_debug_check_extent_io_range(tree, start, end);
+ trace_btrfs_convert_extent_bit(tree, start, end - start + 1, bits,
+ clear_bits);
again:
if (!prealloc) {
@@ -1288,6 +1383,13 @@
changeset);
}
+int set_extent_bits_nowait(struct extent_io_tree *tree, u64 start, u64 end,
+ unsigned bits)
+{
+ return __set_extent_bit(tree, start, end, bits, 0, NULL, NULL,
+ GFP_NOWAIT, NULL);
+}
+
int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
unsigned bits, int wake, int delete,
struct extent_state **cached)
@@ -1424,20 +1526,15 @@
struct extent_state **cached_state)
{
struct extent_state *state;
- struct rb_node *n;
int ret = 1;
spin_lock(&tree->lock);
if (cached_state && *cached_state) {
state = *cached_state;
if (state->end == start - 1 && extent_state_in_tree(state)) {
- n = rb_next(&state->rb_node);
- while (n) {
- state = rb_entry(n, struct extent_state,
- rb_node);
+ while ((state = next_state(state)) != NULL) {
if (state->state & bits)
goto got_it;
- n = rb_next(n);
}
free_extent_state(*cached_state);
*cached_state = NULL;
@@ -1460,20 +1557,133 @@
return ret;
}
+/**
+ * find_first_clear_extent_bit - find the first range that has @bits not set.
+ * This range could start before @start.
+ *
+ * @tree - the tree to search
+ * @start - the offset at/after which the found extent should start
+ * @start_ret - records the beginning of the range
+ * @end_ret - records the end of the range (inclusive)
+ * @bits - the set of bits which must be unset
+ *
+ * Since unallocated range is also considered one which doesn't have the bits
+ * set it's possible that @end_ret contains -1, this happens in case the range
+ * spans (last_range_end, end of device]. In this case it's up to the caller to
+ * trim @end_ret to the appropriate size.
+ */
+void find_first_clear_extent_bit(struct extent_io_tree *tree, u64 start,
+ u64 *start_ret, u64 *end_ret, unsigned bits)
+{
+ struct extent_state *state;
+ struct rb_node *node, *prev = NULL, *next;
+
+ spin_lock(&tree->lock);
+
+ /* Find first extent with bits cleared */
+ while (1) {
+ node = __etree_search(tree, start, &next, &prev, NULL, NULL);
+ if (!node) {
+ node = next;
+ if (!node) {
+ /*
+ * We are past the last allocated chunk,
+ * set start at the end of the last extent. The
+ * device alloc tree should never be empty so
+ * prev is always set.
+ */
+ ASSERT(prev);
+ state = rb_entry(prev, struct extent_state, rb_node);
+ *start_ret = state->end + 1;
+ *end_ret = -1;
+ goto out;
+ }
+ }
+ /*
+ * At this point 'node' either contains 'start' or start is
+ * before 'node'
+ */
+ state = rb_entry(node, struct extent_state, rb_node);
+
+ if (in_range(start, state->start, state->end - state->start + 1)) {
+ if (state->state & bits) {
+ /*
+ * |--range with bits sets--|
+ * |
+ * start
+ */
+ start = state->end + 1;
+ } else {
+ /*
+ * 'start' falls within a range that doesn't
+ * have the bits set, so take its start as
+ * the beginning of the desired range
+ *
+ * |--range with bits cleared----|
+ * |
+ * start
+ */
+ *start_ret = state->start;
+ break;
+ }
+ } else {
+ /*
+ * |---prev range---|---hole/unset---|---node range---|
+ * |
+ * start
+ *
+ * or
+ *
+ * |---hole/unset--||--first node--|
+ * 0 |
+ * start
+ */
+ if (prev) {
+ state = rb_entry(prev, struct extent_state,
+ rb_node);
+ *start_ret = state->end + 1;
+ } else {
+ *start_ret = 0;
+ }
+ break;
+ }
+ }
+
+ /*
+ * Find the longest stretch from start until an entry which has the
+ * bits set
+ */
+ while (1) {
+ state = rb_entry(node, struct extent_state, rb_node);
+ if (state->end >= start && !(state->state & bits)) {
+ *end_ret = state->end;
+ } else {
+ *end_ret = state->start - 1;
+ break;
+ }
+
+ node = rb_next(node);
+ if (!node)
+ break;
+ }
+out:
+ spin_unlock(&tree->lock);
+}
+
/*
* find a contiguous range of bytes in the file marked as delalloc, not
* more than 'max_bytes'. start and end are used to return the range,
*
- * 1 is returned if we find something, 0 if nothing was in the tree
+ * true is returned if we find something, false if nothing was in the tree
*/
-static noinline u64 find_delalloc_range(struct extent_io_tree *tree,
+static noinline bool find_delalloc_range(struct extent_io_tree *tree,
u64 *start, u64 *end, u64 max_bytes,
struct extent_state **cached_state)
{
struct rb_node *node;
struct extent_state *state;
u64 cur_start = *start;
- u64 found = 0;
+ bool found = false;
u64 total_bytes = 0;
spin_lock(&tree->lock);
@@ -1484,8 +1694,7 @@
*/
node = tree_search(tree, cur_start);
if (!node) {
- if (!found)
- *end = (u64)-1;
+ *end = (u64)-1;
goto out;
}
@@ -1505,7 +1714,7 @@
*cached_state = state;
refcount_inc(&state->refs);
}
- found++;
+ found = true;
*end = state->end;
cur_start = state->end + 1;
node = rb_next(node);
@@ -1563,19 +1772,22 @@
}
/*
- * find a contiguous range of bytes in the file marked as delalloc, not
- * more than 'max_bytes'. start and end are used to return the range,
+ * Find and lock a contiguous range of bytes in the file marked as delalloc, no
+ * more than @max_bytes. @Start and @end are used to return the range,
*
- * 1 is returned if we find something, 0 if nothing was in the tree
+ * Return: true if we find something
+ * false if nothing was in the tree
*/
-STATIC u64 find_lock_delalloc_range(struct inode *inode,
- struct extent_io_tree *tree,
+EXPORT_FOR_TESTS
+noinline_for_stack bool find_lock_delalloc_range(struct inode *inode,
struct page *locked_page, u64 *start,
- u64 *end, u64 max_bytes)
+ u64 *end)
{
+ struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
+ u64 max_bytes = BTRFS_MAX_EXTENT_SIZE;
u64 delalloc_start;
u64 delalloc_end;
- u64 found;
+ bool found;
struct extent_state *cached_state = NULL;
int ret;
int loops = 0;
@@ -1590,7 +1802,7 @@
*start = delalloc_start;
*end = delalloc_end;
free_extent_state(cached_state);
- return 0;
+ return false;
}
/*
@@ -1610,6 +1822,7 @@
/* step two, lock all the pages after the page that has start */
ret = lock_delalloc_pages(inode, locked_page,
delalloc_start, delalloc_end);
+ ASSERT(!ret || ret == -EAGAIN);
if (ret == -EAGAIN) {
/* some of the pages are gone, lets avoid looping by
* shortening the size of the delalloc range we're searching
@@ -1621,11 +1834,10 @@
loops = 1;
goto again;
} else {
- found = 0;
+ found = false;
goto out_failed;
}
}
- BUG_ON(ret); /* Only valid values are 0 and -EAGAIN */
/* step three, lock the state bits for the whole range */
lock_extent_bits(tree, delalloc_start, delalloc_end, &cached_state);
@@ -1726,9 +1938,9 @@
}
void extent_clear_unlock_delalloc(struct inode *inode, u64 start, u64 end,
- u64 delalloc_end, struct page *locked_page,
- unsigned clear_bits,
- unsigned long page_ops)
+ struct page *locked_page,
+ unsigned clear_bits,
+ unsigned long page_ops)
{
clear_extent_bit(&BTRFS_I(inode)->io_tree, start, end, clear_bits, 1, 0,
NULL);
@@ -2041,9 +2253,9 @@
return 0;
}
-int repair_eb_io_failure(struct btrfs_fs_info *fs_info,
- struct extent_buffer *eb, int mirror_num)
+int btrfs_repair_eb_io_failure(struct extent_buffer *eb, int mirror_num)
{
+ struct btrfs_fs_info *fs_info = eb->fs_info;
u64 start = eb->start;
int i, num_pages = num_extent_pages(eb);
int ret = 0;
@@ -2343,13 +2555,11 @@
}
/*
- * this is a generic handler for readpage errors (default
- * readpage_io_failed_hook). if other copies exist, read those and write back
- * good data to the failed position. does not investigate in remapping the
- * failed extent elsewhere, hoping the device will be smart enough to do this as
- * needed
+ * This is a generic handler for readpage errors. If other copies exist, read
+ * those and write back good data to the failed position. Does not investigate
+ * in remapping the failed extent elsewhere, hoping the device will be smart
+ * enough to do this as needed
*/
-
static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset,
struct page *page, u64 start, u64 end,
int failed_mirror)
@@ -2362,7 +2572,7 @@
int read_mode = 0;
blk_status_t status;
int ret;
- unsigned failed_bio_pages = bio_pages_all(failed_bio);
+ unsigned failed_bio_pages = failed_bio->bi_iter.bi_size >> PAGE_SHIFT;
BUG_ON(bio_op(failed_bio) == REQ_OP_WRITE);
@@ -2391,7 +2601,7 @@
read_mode, failrec->this_mirror, failrec->in_validation);
status = tree->ops->submit_bio_hook(tree->private_data, bio, failrec->this_mirror,
- failrec->bio_flags, 0);
+ failrec->bio_flags);
if (status) {
free_io_failure(failure_tree, tree, failrec);
bio_put(bio);
@@ -2406,14 +2616,9 @@
void end_extent_writepage(struct page *page, int err, u64 start, u64 end)
{
int uptodate = (err == 0);
- struct extent_io_tree *tree;
int ret = 0;
- tree = &BTRFS_I(page->mapping->host)->io_tree;
-
- if (tree->ops && tree->ops->writepage_end_io_hook)
- tree->ops->writepage_end_io_hook(page, start, end, NULL,
- uptodate);
+ btrfs_writepage_endio_finish_ordered(page, start, end, uptodate);
if (!uptodate) {
ClearPageUptodate(page);
@@ -2438,10 +2643,10 @@
struct bio_vec *bvec;
u64 start;
u64 end;
- int i;
+ struct bvec_iter_all iter_all;
ASSERT(!bio_flagged(bio, BIO_CLONED));
- bio_for_each_segment_all(bvec, bio, i) {
+ bio_for_each_segment_all(bvec, bio, iter_all) {
struct page *page = bvec->bv_page;
struct inode *inode = page->mapping->host;
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
@@ -2509,13 +2714,15 @@
u64 extent_len = 0;
int mirror;
int ret;
- int i;
+ struct bvec_iter_all iter_all;
ASSERT(!bio_flagged(bio, BIO_CLONED));
- bio_for_each_segment_all(bvec, bio, i) {
+ bio_for_each_segment_all(bvec, bio, iter_all) {
struct page *page = bvec->bv_page;
struct inode *inode = page->mapping->host;
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ bool data_inode = btrfs_ino(BTRFS_I(inode))
+ != BTRFS_BTREE_INODE_OBJECTID;
btrfs_debug(fs_info,
"end_bio_extent_readpage: bi_sector=%llu, err=%d, mirror=%u",
@@ -2545,7 +2752,7 @@
len = bvec->bv_len;
mirror = io_bio->mirror_num;
- if (likely(uptodate && tree->ops)) {
+ if (likely(uptodate)) {
ret = tree->ops->readpage_end_io_hook(io_bio, offset,
page, start, end,
mirror);
@@ -2561,38 +2768,35 @@
if (likely(uptodate))
goto readpage_ok;
- if (tree->ops) {
- ret = tree->ops->readpage_io_failed_hook(page, mirror);
- if (ret == -EAGAIN) {
- /*
- * Data inode's readpage_io_failed_hook() always
- * returns -EAGAIN.
- *
- * The generic bio_readpage_error handles errors
- * the following way: If possible, new read
- * requests are created and submitted and will
- * end up in end_bio_extent_readpage as well (if
- * we're lucky, not in the !uptodate case). In
- * that case it returns 0 and we just go on with
- * the next page in our bio. If it can't handle
- * the error it will return -EIO and we remain
- * responsible for that page.
- */
- ret = bio_readpage_error(bio, offset, page,
- start, end, mirror);
- if (ret == 0) {
- uptodate = !bio->bi_status;
- offset += len;
- continue;
- }
- }
+ if (data_inode) {
/*
- * metadata's readpage_io_failed_hook() always returns
- * -EIO and fixes nothing. -EIO is also returned if
- * data inode error could not be fixed.
+ * The generic bio_readpage_error handles errors the
+ * following way: If possible, new read requests are
+ * created and submitted and will end up in
+ * end_bio_extent_readpage as well (if we're lucky,
+ * not in the !uptodate case). In that case it returns
+ * 0 and we just go on with the next page in our bio.
+ * If it can't handle the error it will return -EIO and
+ * we remain responsible for that page.
*/
- ASSERT(ret == -EIO);
+ ret = bio_readpage_error(bio, offset, page, start, end,
+ mirror);
+ if (ret == 0) {
+ uptodate = !bio->bi_status;
+ offset += len;
+ continue;
+ }
+ } else {
+ struct extent_buffer *eb;
+
+ eb = (struct extent_buffer *)page->private;
+ set_bit(EXTENT_BUFFER_READ_ERR, &eb->bflags);
+ eb->read_mirror = mirror;
+ atomic_dec(&eb->io_pages);
+ if (test_and_clear_bit(EXTENT_BUFFER_READAHEAD,
+ &eb->bflags))
+ btree_readahead_hook(eb, -EIO);
}
readpage_ok:
if (likely(uptodate)) {
@@ -2601,7 +2805,7 @@
unsigned off;
/* Zero out the end if this page straddles i_size */
- off = i_size & (PAGE_SIZE-1);
+ off = offset_in_page(i_size);
if (page->index == end_index && off)
zero_user_segment(page, off, PAGE_SIZE);
SetPageUptodate(page);
@@ -2638,8 +2842,7 @@
if (extent_len)
endio_readpage_release_extent(tree, extent_start, extent_len,
uptodate);
- if (io_bio->end_io)
- io_bio->end_io(io_bio, blk_status_to_errno(bio->bi_status));
+ btrfs_io_bio_free_csum(io_bio);
bio_put(bio);
}
@@ -2658,12 +2861,11 @@
* never fail. We're returning a bio right now but you can call btrfs_io_bio
* for the appropriate container_of magic
*/
-struct bio *btrfs_bio_alloc(struct block_device *bdev, u64 first_byte)
+struct bio *btrfs_bio_alloc(u64 first_byte)
{
struct bio *bio;
bio = bio_alloc_bioset(GFP_NOFS, BIO_MAX_PAGES, &btrfs_bioset);
- bio_set_dev(bio, bdev);
bio->bi_iter.bi_sector = first_byte >> 9;
btrfs_io_bio_init(btrfs_io_bio(bio));
return bio;
@@ -2709,28 +2911,6 @@
return bio;
}
-static int __must_check submit_one_bio(struct bio *bio, int mirror_num,
- unsigned long bio_flags)
-{
- blk_status_t ret = 0;
- struct bio_vec *bvec = bio_last_bvec_all(bio);
- struct page *page = bvec->bv_page;
- struct extent_io_tree *tree = bio->bi_private;
- u64 start;
-
- start = page_offset(page) + bvec->bv_offset;
-
- bio->bi_private = NULL;
-
- if (tree->ops)
- ret = tree->ops->submit_bio_hook(tree->private_data, bio,
- mirror_num, bio_flags, start);
- else
- btrfsic_submit_bio(bio);
-
- return blk_status_to_errno(ret);
-}
-
/*
* @opf: bio REQ_OP_* and REQ_* flags as one value
* @tree: tree so we can call our merge_bio hook
@@ -2776,8 +2956,8 @@
else
contig = bio_end_sector(bio) == sector;
- if (tree->ops && btrfs_merge_bio_hook(page, offset, page_size,
- bio, bio_flags))
+ ASSERT(tree->ops);
+ if (btrfs_bio_fits_in_stripe(page, page_size, bio, bio_flags))
can_merge = false;
if (prev_bio_flags != bio_flags || !contig || !can_merge ||
@@ -2791,12 +2971,13 @@
bio = NULL;
} else {
if (wbc)
- wbc_account_io(wbc, page, page_size);
+ wbc_account_cgroup_owner(wbc, page, page_size);
return 0;
}
}
- bio = btrfs_bio_alloc(bdev, offset);
+ bio = btrfs_bio_alloc(offset);
+ bio_set_dev(bio, bdev);
bio_add_page(bio, page, page_size, pg_offset);
bio->bi_end_io = end_io_func;
bio->bi_private = tree;
@@ -2804,7 +2985,7 @@
bio->bi_opf = opf;
if (wbc) {
wbc_init_bio(wbc, bio);
- wbc_account_io(wbc, page, page_size);
+ wbc_account_cgroup_owner(wbc, page, page_size);
}
*bio_ret = bio;
@@ -2905,7 +3086,7 @@
if (page->index == last_byte >> PAGE_SHIFT) {
char *userpage;
- size_t zero_offset = last_byte & (PAGE_SIZE - 1);
+ size_t zero_offset = offset_in_page(last_byte);
if (zero_offset) {
iosize = PAGE_SIZE - zero_offset;
@@ -3002,11 +3183,11 @@
*/
if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags) &&
prev_em_start && *prev_em_start != (u64)-1 &&
- *prev_em_start != em->orig_start)
+ *prev_em_start != em->start)
force_bio_submit = true;
if (prev_em_start)
- *prev_em_start = em->orig_start;
+ *prev_em_start = em->start;
free_extent_map(em);
em = NULL;
@@ -3076,7 +3257,7 @@
return ret;
}
-static inline void __do_contiguous_readpages(struct extent_io_tree *tree,
+static inline void contiguous_readpages(struct extent_io_tree *tree,
struct page *pages[], int nr_pages,
u64 start, u64 end,
struct extent_map **em_cached,
@@ -3084,21 +3265,10 @@
unsigned long *bio_flags,
u64 *prev_em_start)
{
- struct inode *inode;
- struct btrfs_ordered_extent *ordered;
+ struct btrfs_inode *inode = BTRFS_I(pages[0]->mapping->host);
int index;
- inode = pages[0]->mapping->host;
- while (1) {
- lock_extent(tree, start, end);
- ordered = btrfs_lookup_ordered_range(BTRFS_I(inode), start,
- end - start + 1);
- if (!ordered)
- break;
- unlock_extent(tree, start, end);
- btrfs_start_ordered_extent(inode, ordered, 1);
- btrfs_put_ordered_extent(ordered);
- }
+ btrfs_lock_and_flush_ordered_range(tree, inode, start, end, NULL);
for (index = 0; index < nr_pages; index++) {
__do_readpage(tree, pages[index], btrfs_get_extent, em_cached,
@@ -3107,46 +3277,6 @@
}
}
-static void __extent_readpages(struct extent_io_tree *tree,
- struct page *pages[],
- int nr_pages,
- struct extent_map **em_cached,
- struct bio **bio, unsigned long *bio_flags,
- u64 *prev_em_start)
-{
- u64 start = 0;
- u64 end = 0;
- u64 page_start;
- int index;
- int first_index = 0;
-
- for (index = 0; index < nr_pages; index++) {
- page_start = page_offset(pages[index]);
- if (!end) {
- start = page_start;
- end = start + PAGE_SIZE - 1;
- first_index = index;
- } else if (end + 1 == page_start) {
- end += PAGE_SIZE;
- } else {
- __do_contiguous_readpages(tree, &pages[first_index],
- index - first_index, start,
- end, em_cached,
- bio, bio_flags,
- prev_em_start);
- start = page_start;
- end = start + PAGE_SIZE - 1;
- first_index = index;
- }
- }
-
- if (end)
- __do_contiguous_readpages(tree, &pages[first_index],
- index - first_index, start,
- end, em_cached, bio,
- bio_flags, prev_em_start);
-}
-
static int __extent_read_full_page(struct extent_io_tree *tree,
struct page *page,
get_extent_t *get_extent,
@@ -3154,22 +3284,12 @@
unsigned long *bio_flags,
unsigned int read_flags)
{
- struct inode *inode = page->mapping->host;
- struct btrfs_ordered_extent *ordered;
+ struct btrfs_inode *inode = BTRFS_I(page->mapping->host);
u64 start = page_offset(page);
u64 end = start + PAGE_SIZE - 1;
int ret;
- while (1) {
- lock_extent(tree, start, end);
- ordered = btrfs_lookup_ordered_range(BTRFS_I(inode), start,
- PAGE_SIZE);
- if (!ordered)
- break;
- unlock_extent(tree, start, end);
- btrfs_start_ordered_extent(inode, ordered, 1);
- btrfs_put_ordered_extent(ordered);
- }
+ btrfs_lock_and_flush_ordered_range(tree, inode, start, end, NULL);
ret = __do_readpage(tree, page, get_extent, NULL, bio, mirror_num,
bio_flags, read_flags, NULL);
@@ -3199,7 +3319,7 @@
/*
* helper for __extent_writepage, doing all of the delayed allocation setup.
*
- * This returns 1 if our fill_delalloc function did all the work required
+ * This returns 1 if btrfs_run_delalloc_range function did all the work required
* to write the page (copy into inline extent). In this case the IO has
* been started and the page is already unlocked.
*
@@ -3207,44 +3327,34 @@
* This returns < 0 if there were errors (page still locked)
*/
static noinline_for_stack int writepage_delalloc(struct inode *inode,
- struct page *page, struct writeback_control *wbc,
- struct extent_page_data *epd,
- u64 delalloc_start,
- unsigned long *nr_written)
+ struct page *page, struct writeback_control *wbc,
+ u64 delalloc_start, unsigned long *nr_written)
{
- struct extent_io_tree *tree = epd->tree;
u64 page_end = delalloc_start + PAGE_SIZE - 1;
- u64 nr_delalloc;
+ bool found;
u64 delalloc_to_write = 0;
u64 delalloc_end = 0;
int ret;
int page_started = 0;
- if (epd->extent_locked || !tree->ops || !tree->ops->fill_delalloc)
- return 0;
while (delalloc_end < page_end) {
- nr_delalloc = find_lock_delalloc_range(inode, tree,
- page,
+ found = find_lock_delalloc_range(inode, page,
&delalloc_start,
- &delalloc_end,
- BTRFS_MAX_EXTENT_SIZE);
- if (nr_delalloc == 0) {
+ &delalloc_end);
+ if (!found) {
delalloc_start = delalloc_end + 1;
continue;
}
- ret = tree->ops->fill_delalloc(inode, page,
- delalloc_start,
- delalloc_end,
- &page_started,
- nr_written, wbc);
- /* File system has been set read-only */
+ ret = btrfs_run_delalloc_range(inode, page, delalloc_start,
+ delalloc_end, &page_started, nr_written, wbc);
if (ret) {
SetPageError(page);
- /* fill_delalloc should be return < 0 for error
- * but just in case, we use > 0 here meaning the
- * IO is started, so we don't want to return > 0
- * unless things are going well.
+ /*
+ * btrfs_run_delalloc_range should return < 0 for error
+ * but just in case, we use > 0 here meaning the IO is
+ * started, so we don't want to return > 0 unless
+ * things are going well.
*/
ret = ret < 0 ? ret : -EIO;
goto done;
@@ -3317,20 +3427,17 @@
int nr = 0;
bool compressed;
- if (tree->ops && tree->ops->writepage_start_hook) {
- ret = tree->ops->writepage_start_hook(page, start,
- page_end);
- if (ret) {
- /* Fixup worker will requeue */
- if (ret == -EBUSY)
- wbc->pages_skipped++;
- else
- redirty_page_for_writepage(wbc, page);
+ ret = btrfs_writepage_cow_fixup(page, start, page_end);
+ if (ret) {
+ /* Fixup worker will requeue */
+ if (ret == -EBUSY)
+ wbc->pages_skipped++;
+ else
+ redirty_page_for_writepage(wbc, page);
- update_nr_written(wbc, nr_written);
- unlock_page(page);
- return 1;
- }
+ update_nr_written(wbc, nr_written);
+ unlock_page(page);
+ return 1;
}
/*
@@ -3341,9 +3448,7 @@
end = page_end;
if (i_size <= start) {
- if (tree->ops && tree->ops->writepage_end_io_hook)
- tree->ops->writepage_end_io_hook(page, start,
- page_end, NULL, 1);
+ btrfs_writepage_endio_finish_ordered(page, start, page_end, 1);
goto done;
}
@@ -3354,9 +3459,8 @@
u64 offset;
if (cur >= i_size) {
- if (tree->ops && tree->ops->writepage_end_io_hook)
- tree->ops->writepage_end_io_hook(page, cur,
- page_end, NULL, 1);
+ btrfs_writepage_endio_finish_ordered(page, cur,
+ page_end, 1);
break;
}
em = btrfs_get_extent(BTRFS_I(inode), page, pg_offset, cur,
@@ -3390,11 +3494,10 @@
* end_io notification does not happen here for
* compressed extents
*/
- if (!compressed && tree->ops &&
- tree->ops->writepage_end_io_hook)
- tree->ops->writepage_end_io_hook(page, cur,
- cur + iosize - 1,
- NULL, 1);
+ if (!compressed)
+ btrfs_writepage_endio_finish_ordered(page, cur,
+ cur + iosize - 1,
+ 1);
else if (compressed) {
/* we don't want to end_page_writeback on
* a compressed extent. this happens
@@ -3440,6 +3543,9 @@
* records are inserted to lock ranges in the tree, and as dirty areas
* are found, they are marked writeback. Then the lock bits are removed
* and the end_io handler clears the writeback ranges
+ *
+ * Return 0 if everything goes well.
+ * Return <0 for error.
*/
static int __extent_writepage(struct page *page, struct writeback_control *wbc,
struct extent_page_data *epd)
@@ -3463,7 +3569,7 @@
ClearPageError(page);
- pg_offset = i_size & (PAGE_SIZE - 1);
+ pg_offset = offset_in_page(i_size);
if (page->index > end_index ||
(page->index == end_index && !pg_offset)) {
page->mapping->a_ops->invalidatepage(page, 0, PAGE_SIZE);
@@ -3485,11 +3591,13 @@
set_page_extent_mapped(page);
- ret = writepage_delalloc(inode, page, wbc, epd, start, &nr_written);
- if (ret == 1)
- goto done_unlocked;
- if (ret)
- goto done;
+ if (!epd->extent_locked) {
+ ret = writepage_delalloc(inode, page, wbc, start, &nr_written);
+ if (ret == 1)
+ goto done_unlocked;
+ if (ret)
+ goto done;
+ }
ret = __extent_writepage_io(inode, page, wbc, epd,
i_size, nr_written, write_flags, &nr);
@@ -3507,6 +3615,7 @@
end_extent_writepage(page, ret, start, page_end);
}
unlock_page(page);
+ ASSERT(ret <= 0);
return ret;
done_unlocked:
@@ -3519,18 +3628,33 @@
TASK_UNINTERRUPTIBLE);
}
-static noinline_for_stack int
-lock_extent_buffer_for_io(struct extent_buffer *eb,
- struct btrfs_fs_info *fs_info,
+static void end_extent_buffer_writeback(struct extent_buffer *eb)
+{
+ clear_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags);
+ smp_mb__after_atomic();
+ wake_up_bit(&eb->bflags, EXTENT_BUFFER_WRITEBACK);
+}
+
+/*
+ * Lock eb pages and flush the bio if we can't the locks
+ *
+ * Return 0 if nothing went wrong
+ * Return >0 is same as 0, except bio is not submitted
+ * Return <0 if something went wrong, no page is locked
+ */
+static noinline_for_stack int lock_extent_buffer_for_io(struct extent_buffer *eb,
struct extent_page_data *epd)
{
- int i, num_pages;
+ struct btrfs_fs_info *fs_info = eb->fs_info;
+ int i, num_pages, failed_page_nr;
int flush = 0;
int ret = 0;
if (!btrfs_try_tree_write_lock(eb)) {
+ ret = flush_write_bio(epd);
+ if (ret < 0)
+ return ret;
flush = 1;
- flush_write_bio(epd);
btrfs_tree_lock(eb);
}
@@ -3539,7 +3663,9 @@
if (!epd->sync_io)
return 0;
if (!flush) {
- flush_write_bio(epd);
+ ret = flush_write_bio(epd);
+ if (ret < 0)
+ return ret;
flush = 1;
}
while (1) {
@@ -3580,7 +3706,14 @@
if (!trylock_page(p)) {
if (!flush) {
- flush_write_bio(epd);
+ int err;
+
+ err = flush_write_bio(epd);
+ if (err < 0) {
+ ret = err;
+ failed_page_nr = i;
+ goto err_unlock;
+ }
flush = 1;
}
lock_page(p);
@@ -3588,24 +3721,45 @@
}
return ret;
-}
-
-static void end_extent_buffer_writeback(struct extent_buffer *eb)
-{
- clear_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags);
- smp_mb__after_atomic();
- wake_up_bit(&eb->bflags, EXTENT_BUFFER_WRITEBACK);
+err_unlock:
+ /* Unlock already locked pages */
+ for (i = 0; i < failed_page_nr; i++)
+ unlock_page(eb->pages[i]);
+ /*
+ * Clear EXTENT_BUFFER_WRITEBACK and wake up anyone waiting on it.
+ * Also set back EXTENT_BUFFER_DIRTY so future attempts to this eb can
+ * be made and undo everything done before.
+ */
+ btrfs_tree_lock(eb);
+ spin_lock(&eb->refs_lock);
+ set_bit(EXTENT_BUFFER_DIRTY, &eb->bflags);
+ end_extent_buffer_writeback(eb);
+ spin_unlock(&eb->refs_lock);
+ percpu_counter_add_batch(&fs_info->dirty_metadata_bytes, eb->len,
+ fs_info->dirty_metadata_batch);
+ btrfs_clear_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN);
+ btrfs_tree_unlock(eb);
+ return ret;
}
static void set_btree_ioerr(struct page *page)
{
struct extent_buffer *eb = (struct extent_buffer *)page->private;
+ struct btrfs_fs_info *fs_info;
SetPageError(page);
if (test_and_set_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags))
return;
/*
+ * If we error out, we should add back the dirty_metadata_bytes
+ * to make it consistent.
+ */
+ fs_info = eb->fs_info;
+ percpu_counter_add_batch(&fs_info->dirty_metadata_bytes,
+ eb->len, fs_info->dirty_metadata_batch);
+
+ /*
* If writeback for a btree extent that doesn't belong to a log tree
* failed, increment the counter transaction->eb_write_errors.
* We do this because while the transaction is running and before it's
@@ -3662,10 +3816,11 @@
{
struct bio_vec *bvec;
struct extent_buffer *eb;
- int i, done;
+ int done;
+ struct bvec_iter_all iter_all;
ASSERT(!bio_flagged(bio, BIO_CLONED));
- bio_for_each_segment_all(bvec, bio, i) {
+ bio_for_each_segment_all(bvec, bio, iter_all) {
struct page *page = bvec->bv_page;
eb = (struct extent_buffer *)page->private;
@@ -3690,10 +3845,10 @@
}
static noinline_for_stack int write_one_eb(struct extent_buffer *eb,
- struct btrfs_fs_info *fs_info,
struct writeback_control *wbc,
struct extent_page_data *epd)
{
+ struct btrfs_fs_info *fs_info = eb->fs_info;
struct block_device *bdev = fs_info->fs_devices->latest_bdev;
struct extent_io_tree *tree = &BTRFS_I(fs_info->btree_inode)->io_tree;
u64 offset = eb->start;
@@ -3719,7 +3874,7 @@
* header 0 1 2 .. N ... data_N .. data_2 data_1 data_0
*/
start = btrfs_item_nr_offset(nritems);
- end = BTRFS_LEAF_DATA_OFFSET + leaf_data_end(fs_info, eb);
+ end = BTRFS_LEAF_DATA_OFFSET + leaf_data_end(eb);
memzero_extent_buffer(eb, start, end - start);
}
@@ -3762,7 +3917,6 @@
struct writeback_control *wbc)
{
struct extent_io_tree *tree = &BTRFS_I(mapping->host)->io_tree;
- struct btrfs_fs_info *fs_info = BTRFS_I(mapping->host)->root->fs_info;
struct extent_buffer *eb, *prev_eb = NULL;
struct extent_page_data epd = {
.bio = NULL,
@@ -3778,7 +3932,7 @@
pgoff_t index;
pgoff_t end; /* Inclusive */
int scanned = 0;
- int tag;
+ xa_mark_t tag;
pagevec_init(&pvec);
if (wbc->range_cyclic) {
@@ -3837,13 +3991,17 @@
continue;
prev_eb = eb;
- ret = lock_extent_buffer_for_io(eb, fs_info, &epd);
+ ret = lock_extent_buffer_for_io(eb, &epd);
if (!ret) {
free_extent_buffer(eb);
continue;
+ } else if (ret < 0) {
+ done = 1;
+ free_extent_buffer(eb);
+ break;
}
- ret = write_one_eb(eb, fs_info, wbc, &epd);
+ ret = write_one_eb(eb, wbc, &epd);
if (ret) {
done = 1;
free_extent_buffer(eb);
@@ -3870,7 +4028,12 @@
index = 0;
goto retry;
}
- flush_write_bio(&epd);
+ ASSERT(ret <= 0);
+ if (ret < 0) {
+ end_write_bio(&epd, ret);
+ return ret;
+ }
+ ret = flush_write_bio(&epd);
return ret;
}
@@ -3903,7 +4066,7 @@
pgoff_t done_index;
int range_whole = 0;
int scanned = 0;
- int tag;
+ xa_mark_t tag;
/*
* We have to hold onto the inode so that ordered extents can do their
@@ -3928,12 +4091,25 @@
range_whole = 1;
scanned = 1;
}
- if (wbc->sync_mode == WB_SYNC_ALL)
+
+ /*
+ * We do the tagged writepage as long as the snapshot flush bit is set
+ * and we are the first one who do the filemap_flush() on this inode.
+ *
+ * The nr_to_write == LONG_MAX is needed to make sure other flushers do
+ * not race in and drop the bit.
+ */
+ if (range_whole && wbc->nr_to_write == LONG_MAX &&
+ test_and_clear_bit(BTRFS_INODE_SNAPSHOT_FLUSH,
+ &BTRFS_I(inode)->runtime_flags))
+ wbc->tagged_writepages = 1;
+
+ if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
tag = PAGECACHE_TAG_TOWRITE;
else
tag = PAGECACHE_TAG_DIRTY;
retry:
- if (wbc->sync_mode == WB_SYNC_ALL)
+ if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
tag_pages_for_writeback(mapping, index, end);
done_index = index;
while (!done && !nr_to_write_done && (index <= end) &&
@@ -3954,7 +4130,8 @@
* tmpfs file mapping
*/
if (!trylock_page(page)) {
- flush_write_bio(epd);
+ ret = flush_write_bio(epd);
+ BUG_ON(ret < 0);
lock_page(page);
}
@@ -3964,8 +4141,10 @@
}
if (wbc->sync_mode != WB_SYNC_NONE) {
- if (PageWriteback(page))
- flush_write_bio(epd);
+ if (PageWriteback(page)) {
+ ret = flush_write_bio(epd);
+ BUG_ON(ret < 0);
+ }
wait_on_page_writeback(page);
}
@@ -3976,11 +4155,6 @@
}
ret = __extent_writepage(page, wbc, epd);
-
- if (unlikely(ret == AOP_WRITEPAGE_ACTIVATE)) {
- unlock_page(page);
- ret = 0;
- }
if (ret < 0) {
/*
* done_index is set past this page,
@@ -4023,17 +4197,6 @@
return ret;
}
-static void flush_write_bio(struct extent_page_data *epd)
-{
- if (epd->bio) {
- int ret;
-
- ret = submit_one_bio(epd->bio, 0, 0);
- BUG_ON(ret < 0); /* -ENOMEM */
- epd->bio = NULL;
- }
-}
-
int extent_write_full_page(struct page *page, struct writeback_control *wbc)
{
int ret;
@@ -4045,8 +4208,14 @@
};
ret = __extent_writepage(page, wbc, &epd);
+ ASSERT(ret <= 0);
+ if (ret < 0) {
+ end_write_bio(&epd, ret);
+ return ret;
+ }
- flush_write_bio(&epd);
+ ret = flush_write_bio(&epd);
+ ASSERT(ret <= 0);
return ret;
}
@@ -4078,17 +4247,20 @@
if (clear_page_dirty_for_io(page))
ret = __extent_writepage(page, &wbc_writepages, &epd);
else {
- if (tree->ops && tree->ops->writepage_end_io_hook)
- tree->ops->writepage_end_io_hook(page, start,
- start + PAGE_SIZE - 1,
- NULL, 1);
+ btrfs_writepage_endio_finish_ordered(page, start,
+ start + PAGE_SIZE - 1, 1);
unlock_page(page);
}
put_page(page);
start += PAGE_SIZE;
}
- flush_write_bio(&epd);
+ ASSERT(ret <= 0);
+ if (ret < 0) {
+ end_write_bio(&epd, ret);
+ return ret;
+ }
+ ret = flush_write_bio(&epd);
return ret;
}
@@ -4104,7 +4276,12 @@
};
ret = extent_write_cache_pages(mapping, wbc, &epd);
- flush_write_bio(&epd);
+ ASSERT(ret <= 0);
+ if (ret < 0) {
+ end_write_bio(&epd, ret);
+ return ret;
+ }
+ ret = flush_write_bio(&epd);
return ret;
}
@@ -4112,42 +4289,45 @@
unsigned nr_pages)
{
struct bio *bio = NULL;
- unsigned page_idx;
unsigned long bio_flags = 0;
struct page *pagepool[16];
- struct page *page;
struct extent_map *em_cached = NULL;
struct extent_io_tree *tree = &BTRFS_I(mapping->host)->io_tree;
int nr = 0;
u64 prev_em_start = (u64)-1;
- for (page_idx = 0; page_idx < nr_pages; page_idx++) {
- page = list_entry(pages->prev, struct page, lru);
+ while (!list_empty(pages)) {
+ u64 contig_end = 0;
- prefetchw(&page->flags);
- list_del(&page->lru);
- if (add_to_page_cache_lru(page, mapping,
- page->index,
- readahead_gfp_mask(mapping))) {
- put_page(page);
- continue;
+ for (nr = 0; nr < ARRAY_SIZE(pagepool) && !list_empty(pages);) {
+ struct page *page = lru_to_page(pages);
+
+ prefetchw(&page->flags);
+ list_del(&page->lru);
+ if (add_to_page_cache_lru(page, mapping, page->index,
+ readahead_gfp_mask(mapping))) {
+ put_page(page);
+ break;
+ }
+
+ pagepool[nr++] = page;
+ contig_end = page_offset(page) + PAGE_SIZE - 1;
}
- pagepool[nr++] = page;
- if (nr < ARRAY_SIZE(pagepool))
- continue;
- __extent_readpages(tree, pagepool, nr, &em_cached, &bio,
- &bio_flags, &prev_em_start);
- nr = 0;
+ if (nr) {
+ u64 contig_start = page_offset(pagepool[0]);
+
+ ASSERT(contig_start + nr * PAGE_SIZE - 1 == contig_end);
+
+ contiguous_readpages(tree, pagepool, nr, contig_start,
+ contig_end, &em_cached, &bio, &bio_flags,
+ &prev_em_start);
+ }
}
- if (nr)
- __extent_readpages(tree, pagepool, nr, &em_cached, &bio,
- &bio_flags, &prev_em_start);
if (em_cached)
free_extent_map(em_cached);
- BUG_ON(!list_empty(pages));
if (bio)
return submit_one_bio(bio, 0, bio_flags);
return 0;
@@ -4172,10 +4352,8 @@
lock_extent_bits(tree, start, end, &cached_state);
wait_on_page_writeback(page);
- clear_extent_bit(tree, start, end,
- EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC |
- EXTENT_DO_ACCOUNTING,
- 1, 1, &cached_state);
+ clear_extent_bit(tree, start, end, EXTENT_LOCKED | EXTENT_DELALLOC |
+ EXTENT_DO_ACCOUNTING, 1, 1, &cached_state);
return 0;
}
@@ -4191,10 +4369,9 @@
u64 end = start + PAGE_SIZE - 1;
int ret = 1;
- if (test_range_bit(tree, start, end,
- EXTENT_IOBITS, 0, NULL))
+ if (test_range_bit(tree, start, end, EXTENT_LOCKED, 0, NULL)) {
ret = 0;
- else {
+ } else {
/*
* at this point we can safely clear everything except the
* locked bit and the nodatasum bit
@@ -4247,8 +4424,7 @@
}
if (!test_range_bit(tree, em->start,
extent_map_end(em) - 1,
- EXTENT_LOCKED | EXTENT_WRITEBACK,
- 0, NULL)) {
+ EXTENT_LOCKED, 0, NULL)) {
set_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
&btrfs_inode->runtime_flags);
remove_extent_mapping(map, em);
@@ -4284,8 +4460,7 @@
if (len == 0)
break;
len = ALIGN(len, sectorsize);
- em = btrfs_get_extent_fiemap(BTRFS_I(inode), NULL, 0, offset,
- len, 0);
+ em = btrfs_get_extent_fiemap(BTRFS_I(inode), offset, len);
if (IS_ERR_OR_NULL(em))
return em;
@@ -4336,7 +4511,7 @@
/*
* Sanity check, extent_fiemap() should have ensured that new
- * fiemap extent won't overlap with cahced one.
+ * fiemap extent won't overlap with cached one.
* Not recoverable.
*
* NOTE: Physical address can overlap, due to compression
@@ -4398,8 +4573,7 @@
* In this case, the first extent range will be cached but not emitted.
* So we must emit it before ending extent_fiemap().
*/
-static int emit_last_fiemap_cache(struct btrfs_fs_info *fs_info,
- struct fiemap_extent_info *fieinfo,
+static int emit_last_fiemap_cache(struct fiemap_extent_info *fieinfo,
struct fiemap_cache *cache)
{
int ret;
@@ -4433,6 +4607,8 @@
struct btrfs_path *path;
struct btrfs_root *root = BTRFS_I(inode)->root;
struct fiemap_cache cache = { 0 };
+ struct ulist *roots;
+ struct ulist *tmp_ulist;
int end = 0;
u64 em_start = 0;
u64 em_len = 0;
@@ -4446,6 +4622,13 @@
return -ENOMEM;
path->leave_spinning = 1;
+ roots = ulist_alloc(GFP_KERNEL);
+ tmp_ulist = ulist_alloc(GFP_KERNEL);
+ if (!roots || !tmp_ulist) {
+ ret = -ENOMEM;
+ goto out_free_ulist;
+ }
+
start = round_down(start, btrfs_inode_sectorsize(inode));
len = round_up(max, btrfs_inode_sectorsize(inode)) - start;
@@ -4456,8 +4639,7 @@
ret = btrfs_lookup_file_extent(NULL, root, path,
btrfs_ino(BTRFS_I(inode)), -1, 0);
if (ret < 0) {
- btrfs_free_path(path);
- return ret;
+ goto out_free_ulist;
} else {
WARN_ON(!ret);
if (ret == 1)
@@ -4566,7 +4748,7 @@
*/
ret = btrfs_check_shared(root,
btrfs_ino(BTRFS_I(inode)),
- bytenr);
+ bytenr, roots, tmp_ulist);
if (ret < 0)
goto out_free;
if (ret)
@@ -4606,12 +4788,16 @@
}
out_free:
if (!ret)
- ret = emit_last_fiemap_cache(root->fs_info, fieinfo, &cache);
+ ret = emit_last_fiemap_cache(fieinfo, &cache);
free_extent_map(em);
out:
- btrfs_free_path(path);
unlock_extent_cached(&BTRFS_I(inode)->io_tree, start, start + len - 1,
&cached_state);
+
+out_free_ulist:
+ btrfs_free_path(path);
+ ulist_free(roots);
+ ulist_free(tmp_ulist);
return ret;
}
@@ -4698,13 +4884,9 @@
eb->fs_info = fs_info;
eb->bflags = 0;
rwlock_init(&eb->lock);
- atomic_set(&eb->write_locks, 0);
- atomic_set(&eb->read_locks, 0);
atomic_set(&eb->blocking_readers, 0);
- atomic_set(&eb->blocking_writers, 0);
- atomic_set(&eb->spinning_readers, 0);
- atomic_set(&eb->spinning_writers, 0);
- eb->lock_nested = 0;
+ eb->blocking_writers = 0;
+ eb->lock_nested = false;
init_waitqueue_head(&eb->write_lock_wq);
init_waitqueue_head(&eb->read_lock_wq);
@@ -4721,6 +4903,13 @@
> MAX_INLINE_EXTENT_BUFFER_SIZE);
BUG_ON(len > MAX_INLINE_EXTENT_BUFFER_SIZE);
+#ifdef CONFIG_BTRFS_DEBUG
+ eb->spinning_writers = 0;
+ atomic_set(&eb->spinning_readers, 0);
+ atomic_set(&eb->read_locks, 0);
+ eb->write_locks = 0;
+#endif
+
return eb;
}
@@ -4908,13 +5097,6 @@
check_buffer_tree_ref(eb);
set_bit(EXTENT_BUFFER_IN_TREE, &eb->bflags);
- /*
- * We will free dummy extent buffer's if they come into
- * free_extent_buffer with a ref count of 2, but if we are using this we
- * want the buffers to stay in memory until we're done with them, so
- * bump the ref count again.
- */
- atomic_inc(&eb->refs);
return eb;
free_eb:
btrfs_release_extent_buffer(eb);
@@ -5096,7 +5278,9 @@
while (1) {
refs = atomic_read(&eb->refs);
- if (refs <= 3)
+ if ((!test_bit(EXTENT_BUFFER_UNMAPPED, &eb->bflags) && refs <= 3)
+ || (test_bit(EXTENT_BUFFER_UNMAPPED, &eb->bflags) &&
+ refs == 1))
break;
old = atomic_cmpxchg(&eb->refs, refs, refs - 1);
if (old == refs)
@@ -5105,10 +5289,6 @@
spin_lock(&eb->refs_lock);
if (atomic_read(&eb->refs) == 2 &&
- test_bit(EXTENT_BUFFER_UNMAPPED, &eb->bflags))
- atomic_dec(&eb->refs);
-
- if (atomic_read(&eb->refs) == 2 &&
test_bit(EXTENT_BUFFER_STALE, &eb->bflags) &&
!extent_buffer_under_io(eb) &&
test_and_clear_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags))
@@ -5153,11 +5333,9 @@
clear_page_dirty_for_io(page);
xa_lock_irq(&page->mapping->i_pages);
- if (!PageDirty(page)) {
- radix_tree_tag_clear(&page->mapping->i_pages,
- page_index(page),
- PAGECACHE_TAG_DIRTY);
- }
+ if (!PageDirty(page))
+ __xa_clear_mark(&page->mapping->i_pages,
+ page_index(page), PAGECACHE_TAG_DIRTY);
xa_unlock_irq(&page->mapping->i_pages);
ClearPageError(page);
unlock_page(page);
@@ -5165,11 +5343,11 @@
WARN_ON(atomic_read(&eb->refs) == 0);
}
-int set_extent_buffer_dirty(struct extent_buffer *eb)
+bool set_extent_buffer_dirty(struct extent_buffer *eb)
{
int i;
int num_pages;
- int was_dirty = 0;
+ bool was_dirty;
check_buffer_tree_ref(eb);
@@ -5179,8 +5357,15 @@
WARN_ON(atomic_read(&eb->refs) == 0);
WARN_ON(!test_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags));
+ if (!was_dirty)
+ for (i = 0; i < num_pages; i++)
+ set_page_dirty(eb->pages[i]);
+
+#ifdef CONFIG_BTRFS_DEBUG
for (i = 0; i < num_pages; i++)
- set_page_dirty(eb->pages[i]);
+ ASSERT(PageDirty(eb->pages[i]));
+#endif
+
return was_dirty;
}
@@ -5213,8 +5398,7 @@
}
}
-int read_extent_buffer_pages(struct extent_io_tree *tree,
- struct extent_buffer *eb, int wait, int mirror_num)
+int read_extent_buffer_pages(struct extent_buffer *eb, int wait, int mirror_num)
{
int i;
struct page *page;
@@ -5226,6 +5410,7 @@
unsigned long num_reads = 0;
struct bio *bio = NULL;
unsigned long bio_flags = 0;
+ struct extent_io_tree *tree = &BTRFS_I(eb->fs_info->btree_inode)->io_tree;
if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags))
return 0;
@@ -5329,7 +5514,7 @@
struct page *page;
char *kaddr;
char *dst = (char *)dstv;
- size_t start_offset = eb->start & ((u64)PAGE_SIZE - 1);
+ size_t start_offset = offset_in_page(eb->start);
unsigned long i = (start_offset + start) >> PAGE_SHIFT;
if (start + len > eb->len) {
@@ -5339,7 +5524,7 @@
return;
}
- offset = (start_offset + start) & (PAGE_SIZE - 1);
+ offset = offset_in_page(start_offset + start);
while (len > 0) {
page = eb->pages[i];
@@ -5364,14 +5549,14 @@
struct page *page;
char *kaddr;
char __user *dst = (char __user *)dstv;
- size_t start_offset = eb->start & ((u64)PAGE_SIZE - 1);
+ size_t start_offset = offset_in_page(eb->start);
unsigned long i = (start_offset + start) >> PAGE_SHIFT;
int ret = 0;
WARN_ON(start > eb->len);
WARN_ON(start + len > eb->start + eb->len);
- offset = (start_offset + start) & (PAGE_SIZE - 1);
+ offset = offset_in_page(start_offset + start);
while (len > 0) {
page = eb->pages[i];
@@ -5402,10 +5587,10 @@
char **map, unsigned long *map_start,
unsigned long *map_len)
{
- size_t offset = start & (PAGE_SIZE - 1);
+ size_t offset;
char *kaddr;
struct page *p;
- size_t start_offset = eb->start & ((u64)PAGE_SIZE - 1);
+ size_t start_offset = offset_in_page(eb->start);
unsigned long i = (start_offset + start) >> PAGE_SHIFT;
unsigned long end_i = (start_offset + start + min_len - 1) >>
PAGE_SHIFT;
@@ -5442,14 +5627,14 @@
struct page *page;
char *kaddr;
char *ptr = (char *)ptrv;
- size_t start_offset = eb->start & ((u64)PAGE_SIZE - 1);
+ size_t start_offset = offset_in_page(eb->start);
unsigned long i = (start_offset + start) >> PAGE_SHIFT;
int ret = 0;
WARN_ON(start > eb->len);
WARN_ON(start + len > eb->start + eb->len);
- offset = (start_offset + start) & (PAGE_SIZE - 1);
+ offset = offset_in_page(start_offset + start);
while (len > 0) {
page = eb->pages[i];
@@ -5498,13 +5683,13 @@
struct page *page;
char *kaddr;
char *src = (char *)srcv;
- size_t start_offset = eb->start & ((u64)PAGE_SIZE - 1);
+ size_t start_offset = offset_in_page(eb->start);
unsigned long i = (start_offset + start) >> PAGE_SHIFT;
WARN_ON(start > eb->len);
WARN_ON(start + len > eb->start + eb->len);
- offset = (start_offset + start) & (PAGE_SIZE - 1);
+ offset = offset_in_page(start_offset + start);
while (len > 0) {
page = eb->pages[i];
@@ -5528,13 +5713,13 @@
size_t offset;
struct page *page;
char *kaddr;
- size_t start_offset = eb->start & ((u64)PAGE_SIZE - 1);
+ size_t start_offset = offset_in_page(eb->start);
unsigned long i = (start_offset + start) >> PAGE_SHIFT;
WARN_ON(start > eb->len);
WARN_ON(start + len > eb->start + eb->len);
- offset = (start_offset + start) & (PAGE_SIZE - 1);
+ offset = offset_in_page(start_offset + start);
while (len > 0) {
page = eb->pages[i];
@@ -5573,13 +5758,12 @@
size_t offset;
struct page *page;
char *kaddr;
- size_t start_offset = dst->start & ((u64)PAGE_SIZE - 1);
+ size_t start_offset = offset_in_page(dst->start);
unsigned long i = (start_offset + dst_offset) >> PAGE_SHIFT;
WARN_ON(src->len != dst_len);
- offset = (start_offset + dst_offset) &
- (PAGE_SIZE - 1);
+ offset = offset_in_page(start_offset + dst_offset);
while (len > 0) {
page = dst->pages[i];
@@ -5615,7 +5799,7 @@
unsigned long *page_index,
size_t *page_offset)
{
- size_t start_offset = eb->start & ((u64)PAGE_SIZE - 1);
+ size_t start_offset = offset_in_page(eb->start);
size_t byte_offset = BIT_BYTE(nr);
size_t offset;
@@ -5627,7 +5811,7 @@
offset = start_offset + start + byte_offset;
*page_index = offset >> PAGE_SHIFT;
- *page_offset = offset & (PAGE_SIZE - 1);
+ *page_offset = offset_in_page(offset);
}
/**
@@ -5769,7 +5953,7 @@
size_t cur;
size_t dst_off_in_page;
size_t src_off_in_page;
- size_t start_offset = dst->start & ((u64)PAGE_SIZE - 1);
+ size_t start_offset = offset_in_page(dst->start);
unsigned long dst_i;
unsigned long src_i;
@@ -5777,20 +5961,18 @@
btrfs_err(fs_info,
"memmove bogus src_offset %lu move len %lu dst len %lu",
src_offset, len, dst->len);
- BUG_ON(1);
+ BUG();
}
if (dst_offset + len > dst->len) {
btrfs_err(fs_info,
"memmove bogus dst_offset %lu move len %lu dst len %lu",
dst_offset, len, dst->len);
- BUG_ON(1);
+ BUG();
}
while (len > 0) {
- dst_off_in_page = (start_offset + dst_offset) &
- (PAGE_SIZE - 1);
- src_off_in_page = (start_offset + src_offset) &
- (PAGE_SIZE - 1);
+ dst_off_in_page = offset_in_page(start_offset + dst_offset);
+ src_off_in_page = offset_in_page(start_offset + src_offset);
dst_i = (start_offset + dst_offset) >> PAGE_SHIFT;
src_i = (start_offset + src_offset) >> PAGE_SHIFT;
@@ -5818,7 +6000,7 @@
size_t src_off_in_page;
unsigned long dst_end = dst_offset + len - 1;
unsigned long src_end = src_offset + len - 1;
- size_t start_offset = dst->start & ((u64)PAGE_SIZE - 1);
+ size_t start_offset = offset_in_page(dst->start);
unsigned long dst_i;
unsigned long src_i;
@@ -5826,13 +6008,13 @@
btrfs_err(fs_info,
"memmove bogus src_offset %lu move len %lu len %lu",
src_offset, len, dst->len);
- BUG_ON(1);
+ BUG();
}
if (dst_offset + len > dst->len) {
btrfs_err(fs_info,
"memmove bogus dst_offset %lu move len %lu len %lu",
dst_offset, len, dst->len);
- BUG_ON(1);
+ BUG();
}
if (dst_offset < src_offset) {
memcpy_extent_buffer(dst, dst_offset, src_offset, len);
@@ -5842,10 +6024,8 @@
dst_i = (start_offset + dst_end) >> PAGE_SHIFT;
src_i = (start_offset + src_end) >> PAGE_SHIFT;
- dst_off_in_page = (start_offset + dst_end) &
- (PAGE_SIZE - 1);
- src_off_in_page = (start_offset + src_end) &
- (PAGE_SIZE - 1);
+ dst_off_in_page = offset_in_page(start_offset + dst_end);
+ src_off_in_page = offset_in_page(start_offset + src_end);
cur = min_t(unsigned long, len, src_off_in_page + 1);
cur = min(cur, dst_off_in_page + 1);