Update Linux to v5.4.148
Sourced from [1]
[1] https://cdn.kernel.org/pub/linux/kernel/v5.x/linux-5.4.148.tar.gz
Change-Id: Ib3d26c5ba9b022e2e03533005c4fed4d7c30b61b
Signed-off-by: Olivier Deprez <olivier.deprez@arm.com>
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 0159100..b859ed5 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -49,6 +49,7 @@
#include "qgroup.h"
#include "delalloc-space.h"
#include "block-group.h"
+#include "space-info.h"
struct btrfs_iget_args {
struct btrfs_key *location;
@@ -640,12 +641,18 @@
page_error_op |
PAGE_END_WRITEBACK);
- for (i = 0; i < nr_pages; i++) {
- WARN_ON(pages[i]->mapping);
- put_page(pages[i]);
+ /*
+ * Ensure we only free the compressed pages if we have
+ * them allocated, as we can still reach here with
+ * inode_need_compress() == false.
+ */
+ if (pages) {
+ for (i = 0; i < nr_pages; i++) {
+ WARN_ON(pages[i]->mapping);
+ put_page(pages[i]);
+ }
+ kfree(pages);
}
- kfree(pages);
-
return 0;
}
}
@@ -712,10 +719,12 @@
* to our extent and set things up for the async work queue to run
* cow_file_range to do the normal delalloc dance.
*/
- if (page_offset(async_chunk->locked_page) >= start &&
- page_offset(async_chunk->locked_page) <= end)
+ if (async_chunk->locked_page &&
+ (page_offset(async_chunk->locked_page) >= start &&
+ page_offset(async_chunk->locked_page)) <= end) {
__set_page_dirty_nobuffers(async_chunk->locked_page);
/* unlocked later on in the async handlers */
+ }
if (redirty)
extent_range_redirty_for_io(inode, start, end);
@@ -795,7 +804,7 @@
async_extent->start +
async_extent->ram_size - 1,
WB_SYNC_ALL);
- else if (ret)
+ else if (ret && async_chunk->locked_page)
unlock_page(async_chunk->locked_page);
kfree(async_extent);
cond_resched();
@@ -972,6 +981,7 @@
u64 num_bytes;
unsigned long ram_size;
u64 cur_alloc_size = 0;
+ u64 min_alloc_size;
u64 blocksize = fs_info->sectorsize;
struct btrfs_key ins;
struct extent_map *em;
@@ -1022,10 +1032,26 @@
btrfs_drop_extent_cache(BTRFS_I(inode), start,
start + num_bytes - 1, 0);
+ /*
+ * Relocation relies on the relocated extents to have exactly the same
+ * size as the original extents. Normally writeback for relocation data
+ * extents follows a NOCOW path because relocation preallocates the
+ * extents. However, due to an operation such as scrub turning a block
+ * group to RO mode, it may fallback to COW mode, so we must make sure
+ * an extent allocated during COW has exactly the requested size and can
+ * not be split into smaller extents, otherwise relocation breaks and
+ * fails during the stage where it updates the bytenr of file extent
+ * items.
+ */
+ if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
+ min_alloc_size = num_bytes;
+ else
+ min_alloc_size = fs_info->sectorsize;
+
while (num_bytes > 0) {
cur_alloc_size = num_bytes;
ret = btrfs_reserve_extent(root, cur_alloc_size, cur_alloc_size,
- fs_info->sectorsize, 0, alloc_hint,
+ min_alloc_size, 0, alloc_hint,
&ins, 1, 1);
if (ret < 0)
goto out_unlock;
@@ -1130,7 +1156,7 @@
*/
if (extent_reserved) {
extent_clear_unlock_delalloc(inode, start,
- start + cur_alloc_size,
+ start + cur_alloc_size - 1,
locked_page,
clear_bits,
page_ops);
@@ -1174,11 +1200,6 @@
nr_pages = (async_chunk->end - async_chunk->start + PAGE_SIZE) >>
PAGE_SHIFT;
- /* atomic_sub_return implies a barrier */
- if (atomic_sub_return(nr_pages, &fs_info->async_delalloc_pages) <
- 5 * SZ_1M)
- cond_wake_up_nomb(&fs_info->async_submit_wait);
-
/*
* ->inode could be NULL if async_chunk_start has failed to compress,
* in which case we don't have anything to submit, yet we need to
@@ -1187,6 +1208,11 @@
*/
if (async_chunk->inode)
submit_compressed_extents(async_chunk);
+
+ /* atomic_sub_return implies a barrier */
+ if (atomic_sub_return(nr_pages, &fs_info->async_delalloc_pages) <
+ 5 * SZ_1M)
+ cond_wake_up_nomb(&fs_info->async_submit_wait);
}
static noinline void async_cow_free(struct btrfs_work *work)
@@ -1264,14 +1290,27 @@
async_chunk[i].inode = inode;
async_chunk[i].start = start;
async_chunk[i].end = cur_end;
- async_chunk[i].locked_page = locked_page;
async_chunk[i].write_flags = write_flags;
INIT_LIST_HEAD(&async_chunk[i].extents);
- btrfs_init_work(&async_chunk[i].work,
- btrfs_delalloc_helper,
- async_cow_start, async_cow_submit,
- async_cow_free);
+ /*
+ * The locked_page comes all the way from writepage and its
+ * the original page we were actually given. As we spread
+ * this large delalloc region across multiple async_chunk
+ * structs, only the first struct needs a pointer to locked_page
+ *
+ * This way we don't need racey decisions about who is supposed
+ * to unlock it.
+ */
+ if (locked_page) {
+ async_chunk[i].locked_page = locked_page;
+ locked_page = NULL;
+ } else {
+ async_chunk[i].locked_page = NULL;
+ }
+
+ btrfs_init_work(&async_chunk[i].work, async_cow_start,
+ async_cow_submit, async_cow_free);
nr_pages = DIV_ROUND_UP(cur_end - start, PAGE_SIZE);
atomic_add(nr_pages, &fs_info->async_delalloc_pages);
@@ -1307,6 +1346,73 @@
return 1;
}
+static int fallback_to_cow(struct inode *inode, struct page *locked_page,
+ const u64 start, const u64 end,
+ int *page_started, unsigned long *nr_written)
+{
+ const bool is_space_ino = btrfs_is_free_space_inode(BTRFS_I(inode));
+ const bool is_reloc_ino = (BTRFS_I(inode)->root->root_key.objectid ==
+ BTRFS_DATA_RELOC_TREE_OBJECTID);
+ const u64 range_bytes = end + 1 - start;
+ struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
+ u64 range_start = start;
+ u64 count;
+
+ /*
+ * If EXTENT_NORESERVE is set it means that when the buffered write was
+ * made we had not enough available data space and therefore we did not
+ * reserve data space for it, since we though we could do NOCOW for the
+ * respective file range (either there is prealloc extent or the inode
+ * has the NOCOW bit set).
+ *
+ * However when we need to fallback to COW mode (because for example the
+ * block group for the corresponding extent was turned to RO mode by a
+ * scrub or relocation) we need to do the following:
+ *
+ * 1) We increment the bytes_may_use counter of the data space info.
+ * If COW succeeds, it allocates a new data extent and after doing
+ * that it decrements the space info's bytes_may_use counter and
+ * increments its bytes_reserved counter by the same amount (we do
+ * this at btrfs_add_reserved_bytes()). So we need to increment the
+ * bytes_may_use counter to compensate (when space is reserved at
+ * buffered write time, the bytes_may_use counter is incremented);
+ *
+ * 2) We clear the EXTENT_NORESERVE bit from the range. We do this so
+ * that if the COW path fails for any reason, it decrements (through
+ * extent_clear_unlock_delalloc()) the bytes_may_use counter of the
+ * data space info, which we incremented in the step above.
+ *
+ * If we need to fallback to cow and the inode corresponds to a free
+ * space cache inode or an inode of the data relocation tree, we must
+ * also increment bytes_may_use of the data space_info for the same
+ * reason. Space caches and relocated data extents always get a prealloc
+ * extent for them, however scrub or balance may have set the block
+ * group that contains that extent to RO mode and therefore force COW
+ * when starting writeback.
+ */
+ count = count_range_bits(io_tree, &range_start, end, range_bytes,
+ EXTENT_NORESERVE, 0);
+ if (count > 0 || is_space_ino || is_reloc_ino) {
+ u64 bytes = count;
+ struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
+ struct btrfs_space_info *sinfo = fs_info->data_sinfo;
+
+ if (is_space_ino || is_reloc_ino)
+ bytes = range_bytes;
+
+ spin_lock(&sinfo->lock);
+ btrfs_space_info_update_bytes_may_use(fs_info, sinfo, bytes);
+ spin_unlock(&sinfo->lock);
+
+ if (count > 0)
+ clear_extent_bit(io_tree, start, end, EXTENT_NORESERVE,
+ 0, 0, NULL);
+ }
+
+ return cow_file_range(inode, locked_page, start, end, page_started,
+ nr_written, 1);
+}
+
/*
* when nowcow writeback call back. This checks for snapshots or COW copies
* of the extents that exist in the file, and COWs the file as required.
@@ -1439,10 +1545,10 @@
disk_num_bytes =
btrfs_file_extent_disk_num_bytes(leaf, fi);
/*
- * If extent we got ends before our range starts, skip
- * to next extent
+ * If the extent we got ends before our current offset,
+ * skip to the next extent.
*/
- if (extent_end <= start) {
+ if (extent_end <= cur_offset) {
path->slots[0]++;
goto next_slot;
}
@@ -1472,7 +1578,7 @@
goto out_check;
ret = btrfs_cross_ref_exist(root, ino,
found_key.offset -
- extent_offset, disk_bytenr);
+ extent_offset, disk_bytenr, false);
if (ret) {
/*
* ret could be -EIO if the above fails to read
@@ -1554,15 +1660,11 @@
* NOCOW, following one which needs to be COW'ed
*/
if (cow_start != (u64)-1) {
- ret = cow_file_range(inode, locked_page,
- cow_start, found_key.offset - 1,
- page_started, nr_written, 1);
- if (ret) {
- if (nocow)
- btrfs_dec_nocow_writers(fs_info,
- disk_bytenr);
+ ret = fallback_to_cow(inode, locked_page, cow_start,
+ found_key.offset - 1,
+ page_started, nr_written);
+ if (ret)
goto error;
- }
cow_start = (u64)-1;
}
@@ -1578,9 +1680,6 @@
ram_bytes, BTRFS_COMPRESS_NONE,
BTRFS_ORDERED_PREALLOC);
if (IS_ERR(em)) {
- if (nocow)
- btrfs_dec_nocow_writers(fs_info,
- disk_bytenr);
ret = PTR_ERR(em);
goto error;
}
@@ -1645,8 +1744,8 @@
if (cow_start != (u64)-1) {
cur_offset = end;
- ret = cow_file_range(inode, locked_page, cow_start, end,
- page_started, nr_written, 1);
+ ret = fallback_to_cow(inode, locked_page, cow_start, end,
+ page_started, nr_written);
if (ret)
goto error;
}
@@ -2153,6 +2252,7 @@
/* see btrfs_writepage_start_hook for details on why this is required */
struct btrfs_writepage_fixup {
struct page *page;
+ struct inode *inode;
struct btrfs_work work;
};
@@ -2166,27 +2266,71 @@
struct inode *inode;
u64 page_start;
u64 page_end;
- int ret;
+ int ret = 0;
+ bool free_delalloc_space = true;
fixup = container_of(work, struct btrfs_writepage_fixup, work);
page = fixup->page;
+ inode = fixup->inode;
+ page_start = page_offset(page);
+ page_end = page_offset(page) + PAGE_SIZE - 1;
+
+ /*
+ * This is similar to page_mkwrite, we need to reserve the space before
+ * we take the page lock.
+ */
+ ret = btrfs_delalloc_reserve_space(inode, &data_reserved, page_start,
+ PAGE_SIZE);
again:
lock_page(page);
+
+ /*
+ * Before we queued this fixup, we took a reference on the page.
+ * page->mapping may go NULL, but it shouldn't be moved to a different
+ * address space.
+ */
if (!page->mapping || !PageDirty(page) || !PageChecked(page)) {
- ClearPageChecked(page);
+ /*
+ * Unfortunately this is a little tricky, either
+ *
+ * 1) We got here and our page had already been dealt with and
+ * we reserved our space, thus ret == 0, so we need to just
+ * drop our space reservation and bail. This can happen the
+ * first time we come into the fixup worker, or could happen
+ * while waiting for the ordered extent.
+ * 2) Our page was already dealt with, but we happened to get an
+ * ENOSPC above from the btrfs_delalloc_reserve_space. In
+ * this case we obviously don't have anything to release, but
+ * because the page was already dealt with we don't want to
+ * mark the page with an error, so make sure we're resetting
+ * ret to 0. This is why we have this check _before_ the ret
+ * check, because we do not want to have a surprise ENOSPC
+ * when the page was already properly dealt with.
+ */
+ if (!ret) {
+ btrfs_delalloc_release_extents(BTRFS_I(inode),
+ PAGE_SIZE);
+ btrfs_delalloc_release_space(inode, data_reserved,
+ page_start, PAGE_SIZE,
+ true);
+ }
+ ret = 0;
goto out_page;
}
- inode = page->mapping->host;
- page_start = page_offset(page);
- page_end = page_offset(page) + PAGE_SIZE - 1;
+ /*
+ * We can't mess with the page state unless it is locked, so now that
+ * it is locked bail if we failed to make our space reservation.
+ */
+ if (ret)
+ goto out_page;
lock_extent_bits(&BTRFS_I(inode)->io_tree, page_start, page_end,
&cached_state);
/* already ordered? We're done */
if (PagePrivate2(page))
- goto out;
+ goto out_reserved;
ordered = btrfs_lookup_ordered_range(BTRFS_I(inode), page_start,
PAGE_SIZE);
@@ -2199,35 +2343,49 @@
goto again;
}
- ret = btrfs_delalloc_reserve_space(inode, &data_reserved, page_start,
- PAGE_SIZE);
- if (ret) {
- mapping_set_error(page->mapping, ret);
- end_extent_writepage(page, ret, page_start, page_end);
- ClearPageChecked(page);
- goto out;
- }
-
ret = btrfs_set_extent_delalloc(inode, page_start, page_end, 0,
&cached_state);
- if (ret) {
- mapping_set_error(page->mapping, ret);
- end_extent_writepage(page, ret, page_start, page_end);
- ClearPageChecked(page);
- goto out;
- }
+ if (ret)
+ goto out_reserved;
- ClearPageChecked(page);
- set_page_dirty(page);
+ /*
+ * Everything went as planned, we're now the owner of a dirty page with
+ * delayed allocation bits set and space reserved for our COW
+ * destination.
+ *
+ * The page was dirty when we started, nothing should have cleaned it.
+ */
+ BUG_ON(!PageDirty(page));
+ free_delalloc_space = false;
+out_reserved:
btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE);
-out:
+ if (free_delalloc_space)
+ btrfs_delalloc_release_space(inode, data_reserved, page_start,
+ PAGE_SIZE, true);
unlock_extent_cached(&BTRFS_I(inode)->io_tree, page_start, page_end,
&cached_state);
out_page:
+ if (ret) {
+ /*
+ * We hit ENOSPC or other errors. Update the mapping and page
+ * to reflect the errors and clean the page.
+ */
+ mapping_set_error(page->mapping, ret);
+ end_extent_writepage(page, ret, page_start, page_end);
+ clear_page_dirty_for_io(page);
+ SetPageError(page);
+ }
+ ClearPageChecked(page);
unlock_page(page);
put_page(page);
kfree(fixup);
extent_changeset_free(data_reserved);
+ /*
+ * As a precaution, do a delayed iput in case it would be the last iput
+ * that could need flushing space. Recursing back to fixup worker would
+ * deadlock.
+ */
+ btrfs_add_delayed_iput(inode);
}
/*
@@ -2251,6 +2409,13 @@
if (TestClearPagePrivate2(page))
return 0;
+ /*
+ * PageChecked is set below when we create a fixup worker for this page,
+ * don't try to create another one if we're already PageChecked()
+ *
+ * The extent_io writepage code will redirty the page if we send back
+ * EAGAIN.
+ */
if (PageChecked(page))
return -EAGAIN;
@@ -2258,13 +2423,21 @@
if (!fixup)
return -EAGAIN;
+ /*
+ * We are already holding a reference to this inode from
+ * write_cache_pages. We need to hold it because the space reservation
+ * takes place outside of the page lock, and we can't trust
+ * page->mapping outside of the page lock.
+ */
+ ihold(inode);
SetPageChecked(page);
get_page(page);
- btrfs_init_work(&fixup->work, btrfs_fixup_helper,
- btrfs_writepage_fixup_worker, NULL, NULL);
+ btrfs_init_work(&fixup->work, btrfs_writepage_fixup_worker, NULL, NULL);
fixup->page = page;
+ fixup->inode = inode;
btrfs_queue_work(fs_info->fixup_workers, &fixup->work);
- return -EBUSY;
+
+ return -EAGAIN;
}
static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
@@ -3186,6 +3359,18 @@
if (ret || truncated) {
u64 start, end;
+ /*
+ * If we failed to finish this ordered extent for any reason we
+ * need to make sure BTRFS_ORDERED_IOERR is set on the ordered
+ * extent, and mark the inode with the error if it wasn't
+ * already set. Any error during writeback would have already
+ * set the mapping error, so we need to set it if we're the ones
+ * marking this ordered extent as failed.
+ */
+ if (ret && !test_and_set_bit(BTRFS_ORDERED_IOERR,
+ &ordered_extent->flags))
+ mapping_set_error(ordered_extent->inode->i_mapping, -EIO);
+
if (truncated)
start = ordered_extent->file_offset + logical_len;
else
@@ -3254,7 +3439,6 @@
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct btrfs_ordered_extent *ordered_extent = NULL;
struct btrfs_workqueue *wq;
- btrfs_work_func_t func;
trace_btrfs_writepage_end_io_hook(page, start, end, uptodate);
@@ -3263,16 +3447,12 @@
end - start + 1, uptodate))
return;
- if (btrfs_is_free_space_inode(BTRFS_I(inode))) {
+ if (btrfs_is_free_space_inode(BTRFS_I(inode)))
wq = fs_info->endio_freespace_worker;
- func = btrfs_freespace_write_helper;
- } else {
+ else
wq = fs_info->endio_write_workers;
- func = btrfs_endio_write_helper;
- }
- btrfs_init_work(&ordered_extent->work, func, finish_ordered_fn, NULL,
- NULL);
+ btrfs_init_work(&ordered_extent->work, finish_ordered_fn, NULL, NULL);
btrfs_queue_work(wq, &ordered_extent->work);
}
@@ -3403,6 +3583,7 @@
inode = list_first_entry(&fs_info->delayed_iputs,
struct btrfs_inode, delayed_iput);
run_delayed_iput_locked(fs_info, inode);
+ cond_resched_lock(&fs_info->delayed_iput_lock);
}
spin_unlock(&fs_info->delayed_iput_lock);
}
@@ -4166,7 +4347,7 @@
* 1 for the inode ref
* 1 for the inode
*/
- return btrfs_start_transaction_fallback_global_rsv(root, 5, 5);
+ return btrfs_start_transaction_fallback_global_rsv(root, 5);
}
static int btrfs_unlink(struct inode *dir, struct dentry *dentry)
@@ -4202,18 +4383,30 @@
}
static int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
- struct inode *dir, u64 objectid,
- const char *name, int name_len)
+ struct inode *dir, struct dentry *dentry)
{
struct btrfs_root *root = BTRFS_I(dir)->root;
+ struct btrfs_inode *inode = BTRFS_I(d_inode(dentry));
struct btrfs_path *path;
struct extent_buffer *leaf;
struct btrfs_dir_item *di;
struct btrfs_key key;
+ const char *name = dentry->d_name.name;
+ int name_len = dentry->d_name.len;
u64 index;
int ret;
+ u64 objectid;
u64 dir_ino = btrfs_ino(BTRFS_I(dir));
+ if (btrfs_ino(inode) == BTRFS_FIRST_FREE_OBJECTID) {
+ objectid = inode->root->root_key.objectid;
+ } else if (btrfs_ino(inode) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID) {
+ objectid = inode->location.objectid;
+ } else {
+ WARN_ON(1);
+ return -EINVAL;
+ }
+
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
@@ -4235,13 +4428,16 @@
}
btrfs_release_path(path);
- ret = btrfs_del_root_ref(trans, objectid, root->root_key.objectid,
- dir_ino, &index, name, name_len);
- if (ret < 0) {
- if (ret != -ENOENT) {
- btrfs_abort_transaction(trans, ret);
- goto out;
- }
+ /*
+ * This is a placeholder inode for a subvolume we didn't have a
+ * reference to at the time of the snapshot creation. In the meantime
+ * we could have renamed the real subvol link into our snapshot, so
+ * depending on btrfs_del_root_ref to return -ENOENT here is incorret.
+ * Instead simply lookup the dir_index_item for this entry so we can
+ * remove it. Otherwise we know we have a ref to the root and we can
+ * call btrfs_del_root_ref, and it _shouldn't_ fail.
+ */
+ if (btrfs_ino(inode) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID) {
di = btrfs_search_dir_index_item(root, path, dir_ino,
name, name_len);
if (IS_ERR_OR_NULL(di)) {
@@ -4256,8 +4452,16 @@
leaf = path->nodes[0];
btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
index = key.offset;
+ btrfs_release_path(path);
+ } else {
+ ret = btrfs_del_root_ref(trans, objectid,
+ root->root_key.objectid, dir_ino,
+ &index, name, name_len);
+ if (ret) {
+ btrfs_abort_transaction(trans, ret);
+ goto out;
+ }
}
- btrfs_release_path(path);
ret = btrfs_delete_delayed_dir_index(trans, BTRFS_I(dir), index);
if (ret) {
@@ -4451,8 +4655,7 @@
btrfs_record_snapshot_destroy(trans, BTRFS_I(dir));
- ret = btrfs_unlink_subvol(trans, dir, dest->root_key.objectid,
- dentry->d_name.name, dentry->d_name.len);
+ ret = btrfs_unlink_subvol(trans, dir, dentry);
if (ret) {
err = ret;
btrfs_abort_transaction(trans, ret);
@@ -4497,6 +4700,8 @@
}
}
+ free_anon_bdev(dest->anon_dev);
+ dest->anon_dev = 0;
out_end_trans:
trans->block_rsv = NULL;
trans->bytes_reserved = 0;
@@ -4547,10 +4752,7 @@
return PTR_ERR(trans);
if (unlikely(btrfs_ino(BTRFS_I(inode)) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) {
- err = btrfs_unlink_subvol(trans, dir,
- BTRFS_I(inode)->location.objectid,
- dentry->d_name.name,
- dentry->d_name.len);
+ err = btrfs_unlink_subvol(trans, dir, dentry);
goto out;
}
@@ -4631,6 +4833,8 @@
u64 bytes_deleted = 0;
bool be_nice = false;
bool should_throttle = false;
+ const u64 lock_start = ALIGN_DOWN(new_size, fs_info->sectorsize);
+ struct extent_state *cached_state = NULL;
BUG_ON(new_size > 0 && min_type != BTRFS_EXTENT_DATA_KEY);
@@ -4647,6 +4851,10 @@
return -ENOMEM;
path->reada = READA_BACK;
+ if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID)
+ lock_extent_bits(&BTRFS_I(inode)->io_tree, lock_start, (u64)-1,
+ &cached_state);
+
/*
* We want to drop from the next block forward in case this new size is
* not block aligned since we will be keeping the last block of the
@@ -4683,7 +4891,6 @@
goto out;
}
- path->leave_spinning = 1;
ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
if (ret < 0)
goto out;
@@ -4835,7 +5042,6 @@
root == fs_info->tree_root)) {
struct btrfs_ref ref = { 0 };
- btrfs_set_path_blocking(path);
bytes_deleted += extent_num_bytes;
btrfs_init_generic_ref(&ref, BTRFS_DROP_DELAYED_REF,
@@ -4911,6 +5117,8 @@
if (!ret && last_size > new_size)
last_size = new_size;
btrfs_ordered_update_i_size(inode, last_size, NULL);
+ unlock_extent_cached(&BTRFS_I(inode)->io_tree, lock_start,
+ (u64)-1, &cached_state);
}
btrfs_free_path(path);
@@ -4938,11 +5146,13 @@
struct extent_state *cached_state = NULL;
struct extent_changeset *data_reserved = NULL;
char *kaddr;
+ bool only_release_metadata = false;
u32 blocksize = fs_info->sectorsize;
pgoff_t index = from >> PAGE_SHIFT;
unsigned offset = from & (blocksize - 1);
struct page *page;
gfp_t mask = btrfs_alloc_write_mask(mapping);
+ size_t write_bytes = blocksize;
int ret = 0;
u64 block_start;
u64 block_end;
@@ -4954,11 +5164,27 @@
block_start = round_down(from, blocksize);
block_end = block_start + blocksize - 1;
- ret = btrfs_delalloc_reserve_space(inode, &data_reserved,
- block_start, blocksize);
- if (ret)
- goto out;
+ ret = btrfs_check_data_free_space(inode, &data_reserved, block_start,
+ blocksize);
+ if (ret < 0) {
+ if ((BTRFS_I(inode)->flags & (BTRFS_INODE_NODATACOW |
+ BTRFS_INODE_PREALLOC)) &&
+ btrfs_check_can_nocow(BTRFS_I(inode), block_start,
+ &write_bytes) > 0) {
+ /* For nocow case, no need to reserve data space */
+ only_release_metadata = true;
+ } else {
+ goto out;
+ }
+ }
+ ret = btrfs_delalloc_reserve_metadata(BTRFS_I(inode), blocksize);
+ if (ret < 0) {
+ if (!only_release_metadata)
+ btrfs_free_reserved_data_space(inode, data_reserved,
+ block_start, blocksize);
+ goto out;
+ }
again:
page = find_or_create_page(mapping, index, mask);
if (!page) {
@@ -5027,14 +5253,26 @@
set_page_dirty(page);
unlock_extent_cached(io_tree, block_start, block_end, &cached_state);
+ if (only_release_metadata)
+ set_extent_bit(&BTRFS_I(inode)->io_tree, block_start,
+ block_end, EXTENT_NORESERVE, NULL, NULL,
+ GFP_NOFS);
+
out_unlock:
- if (ret)
- btrfs_delalloc_release_space(inode, data_reserved, block_start,
- blocksize, true);
+ if (ret) {
+ if (only_release_metadata)
+ btrfs_delalloc_release_metadata(BTRFS_I(inode),
+ blocksize, true);
+ else
+ btrfs_delalloc_release_space(inode, data_reserved,
+ block_start, blocksize, true);
+ }
btrfs_delalloc_release_extents(BTRFS_I(inode), blocksize);
unlock_page(page);
put_page(page);
out:
+ if (only_release_metadata)
+ btrfs_end_write_no_snapshotting(BTRFS_I(inode)->root);
extent_changeset_free(data_reserved);
return ret;
}
@@ -5693,7 +5931,6 @@
static void inode_tree_del(struct inode *inode)
{
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct btrfs_root *root = BTRFS_I(inode)->root;
int empty = 0;
@@ -5706,7 +5943,6 @@
spin_unlock(&root->inode_lock);
if (empty && btrfs_root_refs(&root->root_item) == 0) {
- synchronize_srcu(&fs_info->subvol_srcu);
spin_lock(&root->inode_lock);
empty = RB_EMPTY_ROOT(&root->inode_tree);
spin_unlock(&root->inode_lock);
@@ -6139,7 +6375,7 @@
return PTR_ERR(trans);
ret = btrfs_update_inode(trans, root, inode);
- if (ret && ret == -ENOSPC) {
+ if (ret && (ret == -ENOSPC || ret == -EDQUOT)) {
/* whoops, lets try again with the full transaction */
btrfs_end_transaction(trans);
trans = btrfs_start_transaction(root, 1);
@@ -6756,7 +6992,6 @@
drop_inode = 1;
} else {
struct dentry *parent = dentry->d_parent;
- int ret;
err = btrfs_update_inode(trans, root, inode);
if (err)
@@ -6771,12 +7006,7 @@
goto fail;
}
d_instantiate(dentry, inode);
- ret = btrfs_log_new_name(trans, BTRFS_I(inode), NULL, parent,
- true, NULL);
- if (ret == BTRFS_NEED_TRANS_COMMIT) {
- err = btrfs_commit_transaction(trans);
- trans = NULL;
- }
+ btrfs_log_new_name(trans, BTRFS_I(inode), NULL, parent);
}
fail:
@@ -7001,7 +7231,7 @@
extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
/* Only regular file could have regular/prealloc extent */
if (!S_ISREG(inode->vfs_inode.i_mode)) {
- ret = -EUCLEAN;
+ err = -EUCLEAN;
btrfs_crit(fs_info,
"regular/prealloc extent found for non-regular inode %llu",
btrfs_ino(inode));
@@ -7336,7 +7566,7 @@
*/
noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len,
u64 *orig_start, u64 *orig_block_len,
- u64 *ram_bytes)
+ u64 *ram_bytes, bool strict)
{
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct btrfs_path *path;
@@ -7414,8 +7644,9 @@
* Do the same check as in btrfs_cross_ref_exist but without the
* unnecessary search.
*/
- if (btrfs_file_extent_generation(leaf, fi) <=
- btrfs_root_last_snapshot(&root->root_item))
+ if (!strict &&
+ (btrfs_file_extent_generation(leaf, fi) <=
+ btrfs_root_last_snapshot(&root->root_item)))
goto out;
backref_offset = btrfs_file_extent_offset(leaf, fi);
@@ -7451,7 +7682,8 @@
*/
ret = btrfs_cross_ref_exist(root, btrfs_ino(BTRFS_I(inode)),
- key.offset - backref_offset, disk_bytenr);
+ key.offset - backref_offset, disk_bytenr,
+ strict);
if (ret) {
ret = 0;
goto out;
@@ -7672,7 +7904,7 @@
block_start = em->block_start + (start - em->start);
if (can_nocow_extent(inode, start, &len, &orig_start,
- &orig_block_len, &ram_bytes) == 1 &&
+ &orig_block_len, &ram_bytes, false) == 1 &&
btrfs_inc_nocow_writers(fs_info, block_start)) {
struct extent_map *em2;
@@ -8211,18 +8443,14 @@
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct btrfs_ordered_extent *ordered = NULL;
struct btrfs_workqueue *wq;
- btrfs_work_func_t func;
u64 ordered_offset = offset;
u64 ordered_bytes = bytes;
u64 last_offset;
- if (btrfs_is_free_space_inode(BTRFS_I(inode))) {
+ if (btrfs_is_free_space_inode(BTRFS_I(inode)))
wq = fs_info->endio_freespace_worker;
- func = btrfs_freespace_write_helper;
- } else {
+ else
wq = fs_info->endio_write_workers;
- func = btrfs_endio_write_helper;
- }
while (ordered_offset < offset + bytes) {
last_offset = ordered_offset;
@@ -8230,9 +8458,8 @@
&ordered_offset,
ordered_bytes,
uptodate)) {
- btrfs_init_work(&ordered->work, func,
- finish_ordered_fn,
- NULL, NULL);
+ btrfs_init_work(&ordered->work, finish_ordered_fn, NULL,
+ NULL);
btrfs_queue_work(wq, &ordered->work);
}
/*
@@ -8324,6 +8551,7 @@
{
struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
struct btrfs_io_bio *orig_io_bio = btrfs_io_bio(dip->orig_bio);
+ u16 csum_size;
blk_status_t ret;
/*
@@ -8343,7 +8571,8 @@
file_offset -= dip->logical_offset;
file_offset >>= inode->i_sb->s_blocksize_bits;
- io_bio->csum = (u8 *)(((u32 *)orig_io_bio->csum) + file_offset);
+ csum_size = btrfs_super_csum_size(btrfs_sb(inode->i_sb)->super_copy);
+ io_bio->csum = orig_io_bio->csum + csum_size * file_offset;
return 0;
}
@@ -8394,14 +8623,64 @@
return ret;
}
-static int btrfs_submit_direct_hook(struct btrfs_dio_private *dip)
+/*
+ * If this succeeds, the btrfs_dio_private is responsible for cleaning up locked
+ * or ordered extents whether or not we submit any bios.
+ */
+static struct btrfs_dio_private *btrfs_create_dio_private(struct bio *dio_bio,
+ struct inode *inode,
+ loff_t file_offset)
{
- struct inode *inode = dip->inode;
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ const bool write = (bio_op(dio_bio) == REQ_OP_WRITE);
+ struct btrfs_dio_private *dip;
struct bio *bio;
- struct bio *orig_bio = dip->orig_bio;
- u64 start_sector = orig_bio->bi_iter.bi_sector;
- u64 file_offset = dip->logical_offset;
+
+ dip = kzalloc(sizeof(*dip), GFP_NOFS);
+ if (!dip)
+ return NULL;
+
+ bio = btrfs_bio_clone(dio_bio);
+ bio->bi_private = dip;
+ btrfs_io_bio(bio)->logical = file_offset;
+
+ dip->private = dio_bio->bi_private;
+ dip->inode = inode;
+ dip->logical_offset = file_offset;
+ dip->bytes = dio_bio->bi_iter.bi_size;
+ dip->disk_bytenr = (u64)dio_bio->bi_iter.bi_sector << 9;
+ dip->orig_bio = bio;
+ dip->dio_bio = dio_bio;
+ atomic_set(&dip->pending_bios, 1);
+
+ if (write) {
+ struct btrfs_dio_data *dio_data = current->journal_info;
+
+ /*
+ * Setting range start and end to the same value means that
+ * no cleanup will happen in btrfs_direct_IO
+ */
+ dio_data->unsubmitted_oe_range_end = dip->logical_offset +
+ dip->bytes;
+ dio_data->unsubmitted_oe_range_start =
+ dio_data->unsubmitted_oe_range_end;
+
+ bio->bi_end_io = btrfs_endio_direct_write;
+ } else {
+ bio->bi_end_io = btrfs_endio_direct_read;
+ dip->subio_endio = btrfs_subio_endio_read;
+ }
+ return dip;
+}
+
+static void btrfs_submit_direct(struct bio *dio_bio, struct inode *inode,
+ loff_t file_offset)
+{
+ const bool write = (bio_op(dio_bio) == REQ_OP_WRITE);
+ struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ struct btrfs_dio_private *dip;
+ struct bio *bio;
+ struct bio *orig_bio;
+ u64 start_sector;
int async_submit = 0;
u64 submit_len;
int clone_offset = 0;
@@ -8410,11 +8689,24 @@
blk_status_t status;
struct btrfs_io_geometry geom;
+ dip = btrfs_create_dio_private(dio_bio, inode, file_offset);
+ if (!dip) {
+ if (!write) {
+ unlock_extent(&BTRFS_I(inode)->io_tree, file_offset,
+ file_offset + dio_bio->bi_iter.bi_size - 1);
+ }
+ dio_bio->bi_status = BLK_STS_RESOURCE;
+ dio_end_io(dio_bio);
+ return;
+ }
+
+ orig_bio = dip->orig_bio;
+ start_sector = orig_bio->bi_iter.bi_sector;
submit_len = orig_bio->bi_iter.bi_size;
ret = btrfs_get_io_geometry(fs_info, btrfs_op(orig_bio),
start_sector << 9, submit_len, &geom);
if (ret)
- return -EIO;
+ goto out_err;
if (geom.len >= submit_len) {
bio = orig_bio;
@@ -8430,7 +8722,6 @@
/* bio split */
ASSERT(geom.len <= INT_MAX);
- atomic_inc(&dip->pending_bios);
do {
clone_len = min_t(int, submit_len, geom.len);
@@ -8478,9 +8769,10 @@
submit:
status = btrfs_submit_dio_bio(bio, inode, file_offset, async_submit);
if (!status)
- return 0;
+ return;
- bio_put(bio);
+ if (bio != orig_bio)
+ bio_put(bio);
out_err:
dip->errors = 1;
/*
@@ -8491,107 +8783,6 @@
*/
if (atomic_dec_and_test(&dip->pending_bios))
bio_io_error(dip->orig_bio);
-
- /* bio_end_io() will handle error, so we needn't return it */
- return 0;
-}
-
-static void btrfs_submit_direct(struct bio *dio_bio, struct inode *inode,
- loff_t file_offset)
-{
- struct btrfs_dio_private *dip = NULL;
- struct bio *bio = NULL;
- struct btrfs_io_bio *io_bio;
- bool write = (bio_op(dio_bio) == REQ_OP_WRITE);
- int ret = 0;
-
- bio = btrfs_bio_clone(dio_bio);
-
- dip = kzalloc(sizeof(*dip), GFP_NOFS);
- if (!dip) {
- ret = -ENOMEM;
- goto free_ordered;
- }
-
- dip->private = dio_bio->bi_private;
- dip->inode = inode;
- dip->logical_offset = file_offset;
- dip->bytes = dio_bio->bi_iter.bi_size;
- dip->disk_bytenr = (u64)dio_bio->bi_iter.bi_sector << 9;
- bio->bi_private = dip;
- dip->orig_bio = bio;
- dip->dio_bio = dio_bio;
- atomic_set(&dip->pending_bios, 0);
- io_bio = btrfs_io_bio(bio);
- io_bio->logical = file_offset;
-
- if (write) {
- bio->bi_end_io = btrfs_endio_direct_write;
- } else {
- bio->bi_end_io = btrfs_endio_direct_read;
- dip->subio_endio = btrfs_subio_endio_read;
- }
-
- /*
- * Reset the range for unsubmitted ordered extents (to a 0 length range)
- * even if we fail to submit a bio, because in such case we do the
- * corresponding error handling below and it must not be done a second
- * time by btrfs_direct_IO().
- */
- if (write) {
- struct btrfs_dio_data *dio_data = current->journal_info;
-
- dio_data->unsubmitted_oe_range_end = dip->logical_offset +
- dip->bytes;
- dio_data->unsubmitted_oe_range_start =
- dio_data->unsubmitted_oe_range_end;
- }
-
- ret = btrfs_submit_direct_hook(dip);
- if (!ret)
- return;
-
- btrfs_io_bio_free_csum(io_bio);
-
-free_ordered:
- /*
- * If we arrived here it means either we failed to submit the dip
- * or we either failed to clone the dio_bio or failed to allocate the
- * dip. If we cloned the dio_bio and allocated the dip, we can just
- * call bio_endio against our io_bio so that we get proper resource
- * cleanup if we fail to submit the dip, otherwise, we must do the
- * same as btrfs_endio_direct_[write|read] because we can't call these
- * callbacks - they require an allocated dip and a clone of dio_bio.
- */
- if (bio && dip) {
- bio_io_error(bio);
- /*
- * The end io callbacks free our dip, do the final put on bio
- * and all the cleanup and final put for dio_bio (through
- * dio_end_io()).
- */
- dip = NULL;
- bio = NULL;
- } else {
- if (write)
- __endio_write_update_ordered(inode,
- file_offset,
- dio_bio->bi_iter.bi_size,
- false);
- else
- unlock_extent(&BTRFS_I(inode)->io_tree, file_offset,
- file_offset + dio_bio->bi_iter.bi_size - 1);
-
- dio_bio->bi_status = BLK_STS_IOERR;
- /*
- * Releases and cleans up our dio_bio, no need to bio_put()
- * nor bio_endio()/bio_io_error() against dio_bio.
- */
- dio_end_io(dio_bio);
- }
- if (bio)
- bio_put(bio);
- kfree(dip);
}
static ssize_t check_direct_IO(struct btrfs_fs_info *fs_info,
@@ -8668,9 +8859,6 @@
dio_data.overwrite = 1;
inode_unlock(inode);
relock = true;
- } else if (iocb->ki_flags & IOCB_NOWAIT) {
- ret = -EAGAIN;
- goto out;
}
ret = btrfs_delalloc_reserve_space(inode, &data_reserved,
offset, count);
@@ -8893,20 +9081,17 @@
/*
* Qgroup reserved space handler
* Page here will be either
- * 1) Already written to disk
- * In this case, its reserved space is released from data rsv map
- * and will be freed by delayed_ref handler finally.
- * So even we call qgroup_free_data(), it won't decrease reserved
- * space.
- * 2) Not written to disk
- * This means the reserved space should be freed here. However,
- * if a truncate invalidates the page (by clearing PageDirty)
- * and the page is accounted for while allocating extent
- * in btrfs_check_data_free_space() we let delayed_ref to
- * free the entire extent.
+ * 1) Already written to disk or ordered extent already submitted
+ * Then its QGROUP_RESERVED bit in io_tree is already cleaned.
+ * Qgroup will be handled by its qgroup_record then.
+ * btrfs_qgroup_free_data() call will do nothing here.
+ *
+ * 2) Not written to disk yet
+ * Then btrfs_qgroup_free_data() call will clear the QGROUP_RESERVED
+ * bit of its io_tree, and free the qgroup reserved data space.
+ * Since the IO will never happen for this page.
*/
- if (PageDirty(page))
- btrfs_qgroup_free_data(inode, NULL, page_start, PAGE_SIZE);
+ btrfs_qgroup_free_data(inode, NULL, page_start, PAGE_SIZE);
if (!inode_evicting) {
clear_extent_bit(tree, page_start, page_end, EXTENT_LOCKED |
EXTENT_DELALLOC | EXTENT_DELALLOC_NEW |
@@ -9065,9 +9250,7 @@
set_page_dirty(page);
SetPageUptodate(page);
- BTRFS_I(inode)->last_trans = fs_info->generation;
- BTRFS_I(inode)->last_sub_trans = BTRFS_I(inode)->root->log_transid;
- BTRFS_I(inode)->last_log_commit = BTRFS_I(inode)->root->last_log_commit;
+ btrfs_set_inode_last_sub_trans(BTRFS_I(inode));
unlock_extent_cached(io_tree, page_start, page_end, &cached_state);
@@ -9381,7 +9564,7 @@
btrfs_put_ordered_extent(ordered);
}
}
- btrfs_qgroup_check_reserved_leak(inode);
+ btrfs_qgroup_check_reserved_leak(BTRFS_I(inode));
inode_tree_del(inode);
btrfs_drop_extent_cache(BTRFS_I(inode), 0, (u64)-1, 0);
}
@@ -9450,7 +9633,7 @@
btrfs_free_space_bitmap_cachep = kmem_cache_create("btrfs_free_space_bitmap",
PAGE_SIZE, PAGE_SIZE,
- SLAB_RED_ZONE, NULL);
+ SLAB_MEM_SPREAD, NULL);
if (!btrfs_free_space_bitmap_cachep)
goto fail;
@@ -9508,32 +9691,28 @@
struct inode *new_inode = new_dentry->d_inode;
struct inode *old_inode = old_dentry->d_inode;
struct timespec64 ctime = current_time(old_inode);
- struct dentry *parent;
u64 old_ino = btrfs_ino(BTRFS_I(old_inode));
u64 new_ino = btrfs_ino(BTRFS_I(new_inode));
u64 old_idx = 0;
u64 new_idx = 0;
- u64 root_objectid;
int ret;
+ int ret2;
bool root_log_pinned = false;
bool dest_log_pinned = false;
- struct btrfs_log_ctx ctx_root;
- struct btrfs_log_ctx ctx_dest;
- bool sync_log_root = false;
- bool sync_log_dest = false;
- bool commit_transaction = false;
- /* we only allow rename subvolume link between subvolumes */
- if (old_ino != BTRFS_FIRST_FREE_OBJECTID && root != dest)
+ /*
+ * For non-subvolumes allow exchange only within one subvolume, in the
+ * same inode namespace. Two subvolumes (represented as directory) can
+ * be exchanged as they're a logical link and have a fixed inode number.
+ */
+ if (root != dest &&
+ (old_ino != BTRFS_FIRST_FREE_OBJECTID ||
+ new_ino != BTRFS_FIRST_FREE_OBJECTID))
return -EXDEV;
- btrfs_init_log_ctx(&ctx_root, old_inode);
- btrfs_init_log_ctx(&ctx_dest, new_inode);
-
/* close the race window with snapshot create/destroy ioctl */
- if (old_ino == BTRFS_FIRST_FREE_OBJECTID)
- down_read(&fs_info->subvol_sem);
- if (new_ino == BTRFS_FIRST_FREE_OBJECTID)
+ if (old_ino == BTRFS_FIRST_FREE_OBJECTID ||
+ new_ino == BTRFS_FIRST_FREE_OBJECTID)
down_read(&fs_info->subvol_sem);
/*
@@ -9550,6 +9729,9 @@
goto out_notrans;
}
+ if (dest != root)
+ btrfs_record_root_in_trans(trans, dest);
+
/*
* We need to find a free sequence number both in the source and
* in the destination directory for the exchange.
@@ -9617,10 +9799,7 @@
/* src is a subvolume */
if (old_ino == BTRFS_FIRST_FREE_OBJECTID) {
- root_objectid = BTRFS_I(old_inode)->root->root_key.objectid;
- ret = btrfs_unlink_subvol(trans, old_dir, root_objectid,
- old_dentry->d_name.name,
- old_dentry->d_name.len);
+ ret = btrfs_unlink_subvol(trans, old_dir, old_dentry);
} else { /* src is an inode */
ret = __btrfs_unlink_inode(trans, root, BTRFS_I(old_dir),
BTRFS_I(old_dentry->d_inode),
@@ -9636,10 +9815,7 @@
/* dest is a subvolume */
if (new_ino == BTRFS_FIRST_FREE_OBJECTID) {
- root_objectid = BTRFS_I(new_inode)->root->root_key.objectid;
- ret = btrfs_unlink_subvol(trans, new_dir, root_objectid,
- new_dentry->d_name.name,
- new_dentry->d_name.len);
+ ret = btrfs_unlink_subvol(trans, new_dir, new_dentry);
} else { /* dest is an inode */
ret = __btrfs_unlink_inode(trans, dest, BTRFS_I(new_dir),
BTRFS_I(new_dentry->d_inode),
@@ -9675,30 +9851,14 @@
BTRFS_I(new_inode)->dir_index = new_idx;
if (root_log_pinned) {
- parent = new_dentry->d_parent;
- ret = btrfs_log_new_name(trans, BTRFS_I(old_inode),
- BTRFS_I(old_dir), parent,
- false, &ctx_root);
- if (ret == BTRFS_NEED_LOG_SYNC)
- sync_log_root = true;
- else if (ret == BTRFS_NEED_TRANS_COMMIT)
- commit_transaction = true;
- ret = 0;
+ btrfs_log_new_name(trans, BTRFS_I(old_inode), BTRFS_I(old_dir),
+ new_dentry->d_parent);
btrfs_end_log_trans(root);
root_log_pinned = false;
}
if (dest_log_pinned) {
- if (!commit_transaction) {
- parent = old_dentry->d_parent;
- ret = btrfs_log_new_name(trans, BTRFS_I(new_inode),
- BTRFS_I(new_dir), parent,
- false, &ctx_dest);
- if (ret == BTRFS_NEED_LOG_SYNC)
- sync_log_dest = true;
- else if (ret == BTRFS_NEED_TRANS_COMMIT)
- commit_transaction = true;
- ret = 0;
- }
+ btrfs_log_new_name(trans, BTRFS_I(new_inode), BTRFS_I(new_dir),
+ old_dentry->d_parent);
btrfs_end_log_trans(dest);
dest_log_pinned = false;
}
@@ -9731,46 +9891,12 @@
dest_log_pinned = false;
}
}
- if (!ret && sync_log_root && !commit_transaction) {
- ret = btrfs_sync_log(trans, BTRFS_I(old_inode)->root,
- &ctx_root);
- if (ret)
- commit_transaction = true;
- }
- if (!ret && sync_log_dest && !commit_transaction) {
- ret = btrfs_sync_log(trans, BTRFS_I(new_inode)->root,
- &ctx_dest);
- if (ret)
- commit_transaction = true;
- }
- if (commit_transaction) {
- /*
- * We may have set commit_transaction when logging the new name
- * in the destination root, in which case we left the source
- * root context in the list of log contextes. So make sure we
- * remove it to avoid invalid memory accesses, since the context
- * was allocated in our stack frame.
- */
- if (sync_log_root) {
- mutex_lock(&root->log_mutex);
- list_del_init(&ctx_root.list);
- mutex_unlock(&root->log_mutex);
- }
- ret = btrfs_commit_transaction(trans);
- } else {
- int ret2;
-
- ret2 = btrfs_end_transaction(trans);
- ret = ret ? ret : ret2;
- }
+ ret2 = btrfs_end_transaction(trans);
+ ret = ret ? ret : ret2;
out_notrans:
- if (new_ino == BTRFS_FIRST_FREE_OBJECTID)
+ if (new_ino == BTRFS_FIRST_FREE_OBJECTID ||
+ old_ino == BTRFS_FIRST_FREE_OBJECTID)
up_read(&fs_info->subvol_sem);
- if (old_ino == BTRFS_FIRST_FREE_OBJECTID)
- up_read(&fs_info->subvol_sem);
-
- ASSERT(list_empty(&ctx_root.list));
- ASSERT(list_empty(&ctx_dest.list));
return ret;
}
@@ -9838,13 +9964,10 @@
struct inode *new_inode = d_inode(new_dentry);
struct inode *old_inode = d_inode(old_dentry);
u64 index = 0;
- u64 root_objectid;
int ret;
+ int ret2;
u64 old_ino = btrfs_ino(BTRFS_I(old_inode));
bool log_pinned = false;
- struct btrfs_log_ctx ctx;
- bool sync_log = false;
- bool commit_transaction = false;
if (btrfs_ino(BTRFS_I(new_dir)) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)
return -EPERM;
@@ -9946,10 +10069,7 @@
BTRFS_I(old_inode), 1);
if (unlikely(old_ino == BTRFS_FIRST_FREE_OBJECTID)) {
- root_objectid = BTRFS_I(old_inode)->root->root_key.objectid;
- ret = btrfs_unlink_subvol(trans, old_dir, root_objectid,
- old_dentry->d_name.name,
- old_dentry->d_name.len);
+ ret = btrfs_unlink_subvol(trans, old_dir, old_dentry);
} else {
ret = __btrfs_unlink_inode(trans, root, BTRFS_I(old_dir),
BTRFS_I(d_inode(old_dentry)),
@@ -9968,10 +10088,7 @@
new_inode->i_ctime = current_time(new_inode);
if (unlikely(btrfs_ino(BTRFS_I(new_inode)) ==
BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) {
- root_objectid = BTRFS_I(new_inode)->location.objectid;
- ret = btrfs_unlink_subvol(trans, new_dir, root_objectid,
- new_dentry->d_name.name,
- new_dentry->d_name.len);
+ ret = btrfs_unlink_subvol(trans, new_dir, new_dentry);
BUG_ON(new_inode->i_nlink == 0);
} else {
ret = btrfs_unlink_inode(trans, dest, BTRFS_I(new_dir),
@@ -10000,17 +10117,8 @@
BTRFS_I(old_inode)->dir_index = index;
if (log_pinned) {
- struct dentry *parent = new_dentry->d_parent;
-
- btrfs_init_log_ctx(&ctx, old_inode);
- ret = btrfs_log_new_name(trans, BTRFS_I(old_inode),
- BTRFS_I(old_dir), parent,
- false, &ctx);
- if (ret == BTRFS_NEED_LOG_SYNC)
- sync_log = true;
- else if (ret == BTRFS_NEED_TRANS_COMMIT)
- commit_transaction = true;
- ret = 0;
+ btrfs_log_new_name(trans, BTRFS_I(old_inode), BTRFS_I(old_dir),
+ new_dentry->d_parent);
btrfs_end_log_trans(root);
log_pinned = false;
}
@@ -10047,19 +10155,8 @@
btrfs_end_log_trans(root);
log_pinned = false;
}
- if (!ret && sync_log) {
- ret = btrfs_sync_log(trans, BTRFS_I(old_inode)->root, &ctx);
- if (ret)
- commit_transaction = true;
- }
- if (commit_transaction) {
- ret = btrfs_commit_transaction(trans);
- } else {
- int ret2;
-
- ret2 = btrfs_end_transaction(trans);
- ret = ret ? ret : ret2;
- }
+ ret2 = btrfs_end_transaction(trans);
+ ret = ret ? ret : ret2;
out_notrans:
if (old_ino == BTRFS_FIRST_FREE_OBJECTID)
up_read(&fs_info->subvol_sem);
@@ -10116,8 +10213,7 @@
init_completion(&work->completion);
INIT_LIST_HEAD(&work->list);
work->inode = inode;
- btrfs_init_work(&work->work, btrfs_flush_delalloc_helper,
- btrfs_run_delalloc_work, NULL, NULL);
+ btrfs_init_work(&work->work, btrfs_run_delalloc_work, NULL, NULL);
return work;
}
@@ -10382,6 +10478,7 @@
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_key ins;
u64 cur_offset = start;
+ u64 clear_offset = start;
u64 i_size;
u64 cur_bytes;
u64 last_alloc = (u64)-1;
@@ -10416,6 +10513,15 @@
btrfs_end_transaction(trans);
break;
}
+
+ /*
+ * We've reserved this space, and thus converted it from
+ * ->bytes_may_use to ->bytes_reserved. Any error that happens
+ * from here on out we will only need to clear our reservation
+ * for the remaining unreserved area, so advance our
+ * clear_offset by our extent size.
+ */
+ clear_offset += ins.offset;
btrfs_dec_block_group_reservations(fs_info, ins.objectid);
last_alloc = ins.offset;
@@ -10496,9 +10602,9 @@
if (own_trans)
btrfs_end_transaction(trans);
}
- if (cur_offset < end)
- btrfs_free_reserved_data_space(inode, NULL, cur_offset,
- end - cur_offset + 1);
+ if (clear_offset < end)
+ btrfs_free_reserved_data_space(inode, NULL, clear_offset,
+ end - clear_offset + 1);
return ret;
}
@@ -10843,7 +10949,7 @@
free_extent_map(em);
em = NULL;
- ret = can_nocow_extent(inode, start, &len, NULL, NULL, NULL);
+ ret = can_nocow_extent(inode, start, &len, NULL, NULL, NULL, true);
if (ret < 0) {
goto out;
} else if (ret) {