Update Linux to v5.10.109
Sourced from [1]
[1] https://cdn.kernel.org/pub/linux/kernel/v5.x/linux-5.10.109.tar.xz
Change-Id: I19bca9fc6762d4e63bcf3e4cba88bbe560d9c76c
Signed-off-by: Olivier Deprez <olivier.deprez@arm.com>
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 1279359..f59ec55 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -27,6 +27,7 @@
#include "qgroup.h"
#include "compression.h"
#include "delalloc-space.h"
+#include "reflink.h"
static struct kmem_cache *btrfs_inode_defrag_cachep;
/*
@@ -274,34 +275,23 @@
{
struct btrfs_root *inode_root;
struct inode *inode;
- struct btrfs_key key;
struct btrfs_ioctl_defrag_range_args range;
int num_defrag;
- int index;
int ret;
/* get the inode */
- key.objectid = defrag->root;
- key.type = BTRFS_ROOT_ITEM_KEY;
- key.offset = (u64)-1;
-
- index = srcu_read_lock(&fs_info->subvol_srcu);
-
- inode_root = btrfs_read_fs_root_no_name(fs_info, &key);
+ inode_root = btrfs_get_fs_root(fs_info, defrag->root, true);
if (IS_ERR(inode_root)) {
ret = PTR_ERR(inode_root);
goto cleanup;
}
- key.objectid = defrag->ino;
- key.type = BTRFS_INODE_ITEM_KEY;
- key.offset = 0;
- inode = btrfs_iget(fs_info->sb, &key, inode_root, NULL);
+ inode = btrfs_iget(fs_info->sb, defrag->ino, inode_root);
+ btrfs_put_root(inode_root);
if (IS_ERR(inode)) {
ret = PTR_ERR(inode);
goto cleanup;
}
- srcu_read_unlock(&fs_info->subvol_srcu, index);
/* do a chunk of defrag */
clear_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags);
@@ -337,7 +327,6 @@
iput(inode);
return 0;
cleanup:
- srcu_read_unlock(&fs_info->subvol_srcu, index);
kmem_cache_free(btrfs_inode_defrag_cachep, defrag);
return ret;
}
@@ -463,47 +452,6 @@
}
}
-static int btrfs_find_new_delalloc_bytes(struct btrfs_inode *inode,
- const u64 start,
- const u64 len,
- struct extent_state **cached_state)
-{
- u64 search_start = start;
- const u64 end = start + len - 1;
-
- while (search_start < end) {
- const u64 search_len = end - search_start + 1;
- struct extent_map *em;
- u64 em_len;
- int ret = 0;
-
- em = btrfs_get_extent(inode, NULL, 0, search_start,
- search_len, 0);
- if (IS_ERR(em))
- return PTR_ERR(em);
-
- if (em->block_start != EXTENT_MAP_HOLE)
- goto next;
-
- em_len = em->len;
- if (em->start < search_start)
- em_len -= search_start - em->start;
- if (em_len > search_len)
- em_len = search_len;
-
- ret = set_extent_bit(&inode->io_tree, search_start,
- search_start + em_len - 1,
- EXTENT_DELALLOC_NEW,
- NULL, cached_state, GFP_NOFS);
-next:
- search_start = extent_map_end(em);
- free_extent_map(em);
- if (ret)
- return ret;
- }
- return 0;
-}
-
/*
* after copy_from_user, pages need to be dirtied and we need to make
* sure holes are created between the current EOF and the start of
@@ -512,18 +460,18 @@
* this also makes the decision about creating an inline extent vs
* doing real data extents, marking pages dirty and delalloc as required.
*/
-int btrfs_dirty_pages(struct inode *inode, struct page **pages,
+int btrfs_dirty_pages(struct btrfs_inode *inode, struct page **pages,
size_t num_pages, loff_t pos, size_t write_bytes,
struct extent_state **cached)
{
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ struct btrfs_fs_info *fs_info = inode->root->fs_info;
int err = 0;
int i;
u64 num_bytes;
u64 start_pos;
u64 end_of_last_block;
u64 end_pos = pos + write_bytes;
- loff_t isize = i_size_read(inode);
+ loff_t isize = i_size_read(&inode->vfs_inode);
unsigned int extra_bits = 0;
start_pos = pos & ~((u64) fs_info->sectorsize - 1);
@@ -536,28 +484,10 @@
* The pages may have already been dirty, clear out old accounting so
* we can set things up properly
*/
- clear_extent_bit(&BTRFS_I(inode)->io_tree, start_pos, end_of_last_block,
+ clear_extent_bit(&inode->io_tree, start_pos, end_of_last_block,
EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG,
0, 0, cached);
- if (!btrfs_is_free_space_inode(BTRFS_I(inode))) {
- if (start_pos >= isize &&
- !(BTRFS_I(inode)->flags & BTRFS_INODE_PREALLOC)) {
- /*
- * There can't be any extents following eof in this case
- * so just set the delalloc new bit for the range
- * directly.
- */
- extra_bits |= EXTENT_DELALLOC_NEW;
- } else {
- err = btrfs_find_new_delalloc_bytes(BTRFS_I(inode),
- start_pos,
- num_bytes, cached);
- if (err)
- return err;
- }
- }
-
err = btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block,
extra_bits, cached);
if (err)
@@ -576,7 +506,7 @@
* at this time.
*/
if (end_pos > isize)
- i_size_write(inode, end_pos);
+ i_size_write(&inode->vfs_inode, end_pos);
return 0;
}
@@ -667,7 +597,6 @@
}
split->generation = gen;
- split->bdev = em->bdev;
split->flags = flags;
split->compress_type = em->compress_type;
replace_extent_mapping(em_tree, em, split, modified);
@@ -680,7 +609,6 @@
split->start = start + len;
split->len = em->start + em->len - (start + len);
- split->bdev = em->bdev;
split->flags = flags;
split->compress_type = em->compress_type;
split->generation = gen;
@@ -745,7 +673,7 @@
* is deleted from the tree.
*/
int __btrfs_drop_extents(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, struct inode *inode,
+ struct btrfs_root *root, struct btrfs_inode *inode,
struct btrfs_path *path, u64 start, u64 end,
u64 *drop_end, int drop_cache,
int replace_extent,
@@ -758,7 +686,8 @@
struct btrfs_ref ref = { 0 };
struct btrfs_key key;
struct btrfs_key new_key;
- u64 ino = btrfs_ino(BTRFS_I(inode));
+ struct inode *vfs_inode = &inode->vfs_inode;
+ u64 ino = btrfs_ino(inode);
u64 search_start = start;
u64 disk_bytenr = 0;
u64 num_bytes = 0;
@@ -776,13 +705,12 @@
int leafs_visited = 0;
if (drop_cache)
- btrfs_drop_extent_cache(BTRFS_I(inode), start, end - 1, 0);
+ btrfs_drop_extent_cache(inode, start, end - 1, 0);
- if (start >= BTRFS_I(inode)->disk_i_size && !replace_extent)
+ if (start >= inode->disk_i_size && !replace_extent)
modify_tree = 0;
- update_refs = (test_bit(BTRFS_ROOT_REF_COWS, &root->state) ||
- root == fs_info->tree_root);
+ update_refs = (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID);
while (1) {
recow = 0;
ret = btrfs_lookup_file_extent(trans, root, path, ino,
@@ -949,7 +877,7 @@
extent_end - end);
btrfs_mark_buffer_dirty(leaf);
if (update_refs && disk_bytenr > 0)
- inode_sub_bytes(inode, end - key.offset);
+ inode_sub_bytes(vfs_inode, end - key.offset);
break;
}
@@ -969,7 +897,7 @@
start - key.offset);
btrfs_mark_buffer_dirty(leaf);
if (update_refs && disk_bytenr > 0)
- inode_sub_bytes(inode, extent_end - start);
+ inode_sub_bytes(vfs_inode, extent_end - start);
if (end == extent_end)
break;
@@ -993,7 +921,7 @@
if (update_refs &&
extent_type == BTRFS_FILE_EXTENT_INLINE) {
- inode_sub_bytes(inode,
+ inode_sub_bytes(vfs_inode,
extent_end - key.offset);
extent_end = ALIGN(extent_end,
fs_info->sectorsize);
@@ -1007,7 +935,7 @@
key.offset - extent_offset);
ret = btrfs_free_extent(trans, &ref);
BUG_ON(ret); /* -ENOMEM */
- inode_sub_bytes(inode,
+ inode_sub_bytes(vfs_inode,
extent_end - key.offset);
}
@@ -1071,11 +999,7 @@
if (btrfs_comp_cpu_keys(&key, &slot_key) > 0)
path->slots[0]++;
}
- setup_items_for_insert(root, path, &key,
- &extent_item_size,
- extent_item_size,
- sizeof(struct btrfs_item) +
- extent_item_size, 1);
+ setup_items_for_insert(root, path, &key, &extent_item_size, 1);
*key_inserted = 1;
}
@@ -1096,8 +1020,8 @@
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
- ret = __btrfs_drop_extents(trans, root, inode, path, start, end, NULL,
- drop_cache, 0, 0, NULL);
+ ret = __btrfs_drop_extents(trans, root, BTRFS_I(inode), path, start,
+ end, NULL, drop_cache, 0, 0, NULL);
btrfs_free_path(path);
return ret;
}
@@ -1491,9 +1415,7 @@
int ret = 0;
start_pos = round_down(pos, fs_info->sectorsize);
- last_pos = start_pos
- + round_up(pos + write_bytes - start_pos,
- fs_info->sectorsize) - 1;
+ last_pos = round_up(pos + write_bytes, fs_info->sectorsize) - 1;
if (start_pos < inode->vfs_inode.i_size) {
struct btrfs_ordered_extent *ordered;
@@ -1503,7 +1425,7 @@
ordered = btrfs_lookup_ordered_range(inode, start_pos,
last_pos - start_pos + 1);
if (ordered &&
- ordered->file_offset + ordered->len > start_pos &&
+ ordered->file_offset + ordered->num_bytes > start_pos &&
ordered->file_offset <= last_pos) {
unlock_extent_cached(&inode->io_tree, start_pos,
last_pos, cached_state);
@@ -1511,8 +1433,7 @@
unlock_page(pages[i]);
put_page(pages[i]);
}
- btrfs_start_ordered_extent(&inode->vfs_inode,
- ordered, 1);
+ btrfs_start_ordered_extent(ordered, 1);
btrfs_put_ordered_extent(ordered);
return -EAGAIN;
}
@@ -1546,8 +1467,8 @@
return ret;
}
-int btrfs_check_can_nocow(struct btrfs_inode *inode, loff_t pos,
- size_t *write_bytes)
+static int check_can_nocow(struct btrfs_inode *inode, loff_t pos,
+ size_t *write_bytes, bool nowait)
{
struct btrfs_fs_info *fs_info = inode->root->fs_info;
struct btrfs_root *root = inode->root;
@@ -1555,33 +1476,87 @@
u64 num_bytes;
int ret;
- ret = btrfs_start_write_no_snapshotting(root);
- if (!ret)
+ if (!(inode->flags & (BTRFS_INODE_NODATACOW | BTRFS_INODE_PREALLOC)))
+ return 0;
+
+ if (!nowait && !btrfs_drew_try_write_lock(&root->snapshot_lock))
return -EAGAIN;
lockstart = round_down(pos, fs_info->sectorsize);
lockend = round_up(pos + *write_bytes,
fs_info->sectorsize) - 1;
-
- btrfs_lock_and_flush_ordered_range(&inode->io_tree, inode, lockstart,
- lockend, NULL);
-
num_bytes = lockend - lockstart + 1;
+
+ if (nowait) {
+ struct btrfs_ordered_extent *ordered;
+
+ if (!try_lock_extent(&inode->io_tree, lockstart, lockend))
+ return -EAGAIN;
+
+ ordered = btrfs_lookup_ordered_range(inode, lockstart,
+ num_bytes);
+ if (ordered) {
+ btrfs_put_ordered_extent(ordered);
+ ret = -EAGAIN;
+ goto out_unlock;
+ }
+ } else {
+ btrfs_lock_and_flush_ordered_range(inode, lockstart,
+ lockend, NULL);
+ }
+
ret = can_nocow_extent(&inode->vfs_inode, lockstart, &num_bytes,
NULL, NULL, NULL, false);
if (ret <= 0) {
ret = 0;
- btrfs_end_write_no_snapshotting(root);
+ if (!nowait)
+ btrfs_drew_write_unlock(&root->snapshot_lock);
} else {
*write_bytes = min_t(size_t, *write_bytes ,
num_bytes - pos + lockstart);
}
-
+out_unlock:
unlock_extent(&inode->io_tree, lockstart, lockend);
return ret;
}
+static int check_nocow_nolock(struct btrfs_inode *inode, loff_t pos,
+ size_t *write_bytes)
+{
+ return check_can_nocow(inode, pos, write_bytes, true);
+}
+
+/*
+ * Check if we can do nocow write into the range [@pos, @pos + @write_bytes)
+ *
+ * @pos: File offset
+ * @write_bytes: The length to write, will be updated to the nocow writeable
+ * range
+ *
+ * This function will flush ordered extents in the range to ensure proper
+ * nocow checks.
+ *
+ * Return:
+ * >0 and update @write_bytes if we can do nocow write
+ * 0 if we can't do nocow write
+ * -EAGAIN if we can't get the needed lock or there are ordered extents
+ * for * (nowait == true) case
+ * <0 if other error happened
+ *
+ * NOTE: Callers need to release the lock by btrfs_check_nocow_unlock().
+ */
+int btrfs_check_nocow_lock(struct btrfs_inode *inode, loff_t pos,
+ size_t *write_bytes)
+{
+ return check_can_nocow(inode, pos, write_bytes, false);
+}
+
+void btrfs_check_nocow_unlock(struct btrfs_inode *inode)
+{
+ btrfs_drew_write_unlock(&inode->root->snapshot_lock);
+}
+
static noinline ssize_t btrfs_buffered_write(struct kiocb *iocb,
struct iov_iter *i)
{
@@ -1589,7 +1564,6 @@
loff_t pos = iocb->ki_pos;
struct inode *inode = file_inode(file);
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
- struct btrfs_root *root = BTRFS_I(inode)->root;
struct page **pages = NULL;
struct extent_changeset *data_reserved = NULL;
u64 release_bytes = 0;
@@ -1642,13 +1616,12 @@
fs_info->sectorsize);
extent_changeset_release(data_reserved);
- ret = btrfs_check_data_free_space(inode, &data_reserved, pos,
+ ret = btrfs_check_data_free_space(BTRFS_I(inode),
+ &data_reserved, pos,
write_bytes);
if (ret < 0) {
- if ((BTRFS_I(inode)->flags & (BTRFS_INODE_NODATACOW |
- BTRFS_INODE_PREALLOC)) &&
- btrfs_check_can_nocow(BTRFS_I(inode), pos,
- &write_bytes) > 0) {
+ if (btrfs_check_nocow_lock(BTRFS_I(inode), pos,
+ &write_bytes) > 0) {
/*
* For nodata cow case, no need to reserve
* data space.
@@ -1673,11 +1646,11 @@
reserve_bytes);
if (ret) {
if (!only_release_metadata)
- btrfs_free_reserved_data_space(inode,
+ btrfs_free_reserved_data_space(BTRFS_I(inode),
data_reserved, pos,
write_bytes);
else
- btrfs_end_write_no_snapshotting(root);
+ btrfs_check_nocow_unlock(BTRFS_I(inode));
break;
}
@@ -1747,7 +1720,7 @@
__pos = round_down(pos,
fs_info->sectorsize) +
(dirty_pages << PAGE_SHIFT);
- btrfs_delalloc_release_space(inode,
+ btrfs_delalloc_release_space(BTRFS_I(inode),
data_reserved, __pos,
release_bytes, true);
}
@@ -1757,8 +1730,9 @@
fs_info->sectorsize);
if (copied > 0)
- ret = btrfs_dirty_pages(inode, pages, dirty_pages,
- pos, copied, &cached_state);
+ ret = btrfs_dirty_pages(BTRFS_I(inode), pages,
+ dirty_pages, pos, copied,
+ &cached_state);
/*
* If we have not locked the extent range, because the range's
@@ -1781,7 +1755,7 @@
release_bytes = 0;
if (only_release_metadata)
- btrfs_end_write_no_snapshotting(root);
+ btrfs_check_nocow_unlock(BTRFS_I(inode));
if (only_release_metadata && copied > 0) {
lockstart = round_down(pos,
@@ -1799,8 +1773,6 @@
cond_resched();
balance_dirty_pages_ratelimited(inode->i_mapping);
- if (dirty_pages < (fs_info->nodesize >> PAGE_SHIFT) + 1)
- btrfs_btree_balance_dirty(fs_info);
pos += copied;
num_written += copied;
@@ -1810,11 +1782,12 @@
if (release_bytes) {
if (only_release_metadata) {
- btrfs_end_write_no_snapshotting(root);
+ btrfs_check_nocow_unlock(BTRFS_I(inode));
btrfs_delalloc_release_metadata(BTRFS_I(inode),
release_bytes, true);
} else {
- btrfs_delalloc_release_space(inode, data_reserved,
+ btrfs_delalloc_release_space(BTRFS_I(inode),
+ data_reserved,
round_down(pos, fs_info->sectorsize),
release_bytes, true);
}
@@ -1834,7 +1807,7 @@
loff_t endbyte;
int err;
- written = generic_file_direct_write(iocb, from);
+ written = btrfs_direct_IO(iocb, from);
if (written < 0 || !iov_iter_count(from))
return written;
@@ -1888,7 +1861,6 @@
struct file *file = iocb->ki_filp;
struct inode *inode = file_inode(file);
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
- struct btrfs_root *root = BTRFS_I(inode)->root;
u64 start_pos;
u64 end_pos;
ssize_t num_written = 0;
@@ -1925,16 +1897,11 @@
* We will allocate space in case nodatacow is not set,
* so bail
*/
- if (!(BTRFS_I(inode)->flags & (BTRFS_INODE_NODATACOW |
- BTRFS_INODE_PREALLOC)) ||
- btrfs_check_can_nocow(BTRFS_I(inode), pos,
- &nocow_bytes) <= 0) {
+ if (check_nocow_nolock(BTRFS_I(inode), pos, &nocow_bytes)
+ <= 0) {
inode_unlock(inode);
return -EAGAIN;
}
-
- /* check_can_nocow() locks the snapshot lock on success */
- btrfs_end_write_no_snapshotting(root);
/*
* There are holes in the range or parts of the range that must
* be COWed (shared extents, RO block groups, etc), so just bail
@@ -1992,7 +1959,40 @@
atomic_inc(&BTRFS_I(inode)->sync_writers);
if (iocb->ki_flags & IOCB_DIRECT) {
+ /*
+ * 1. We must always clear IOCB_DSYNC in order to not deadlock
+ * in iomap, as it calls generic_write_sync() in this case.
+ * 2. If we are async, we can call iomap_dio_complete() either
+ * in
+ *
+ * 2.1. A worker thread from the last bio completed. In this
+ * case we need to mark the btrfs_dio_data that it is
+ * async in order to call generic_write_sync() properly.
+ * This is handled by setting BTRFS_DIO_SYNC_STUB in the
+ * current->journal_info.
+ * 2.2 The submitter context, because all IO completed
+ * before we exited iomap_dio_rw(). In this case we can
+ * just re-set the IOCB_DSYNC on the iocb and we'll do
+ * the sync below. If our ->end_io() gets called and
+ * current->journal_info is set, then we know we're in
+ * our current context and we will clear
+ * current->journal_info to indicate that we need to
+ * sync below.
+ */
+ if (sync) {
+ ASSERT(current->journal_info == NULL);
+ iocb->ki_flags &= ~IOCB_DSYNC;
+ current->journal_info = BTRFS_DIO_SYNC_STUB;
+ }
num_written = __btrfs_direct_write(iocb, from);
+
+ /*
+ * As stated above, we cleared journal_info, so we need to do
+ * the sync ourselves.
+ */
+ if (sync && current->journal_info == NULL)
+ iocb->ki_flags |= IOCB_DSYNC;
+ current->journal_info = NULL;
} else {
num_written = btrfs_buffered_write(iocb, from);
if (num_written > 0)
@@ -2026,12 +2026,12 @@
filp->private_data = NULL;
/*
- * ordered_data_close is set by setattr when we are about to truncate
- * a file from a non-zero size to a zero size. This tries to
- * flush down new bytes that may have been written if the
- * application were using truncate to replace a file in place.
+ * Set by setattr when we are about to truncate a file from a non-zero
+ * size to a zero size. This tries to flush down new bytes that may
+ * have been written if the application were using truncate to replace
+ * a file in place.
*/
- if (test_and_clear_bit(BTRFS_INODE_ORDERED_DATA_CLOSE,
+ if (test_and_clear_bit(BTRFS_INODE_FLUSH_ON_CLOSE,
&BTRFS_I(inode)->runtime_flags))
filemap_flush(inode->i_mapping);
return 0;
@@ -2057,6 +2057,30 @@
return ret;
}
+static inline bool skip_inode_logging(const struct btrfs_log_ctx *ctx)
+{
+ struct btrfs_inode *inode = BTRFS_I(ctx->inode);
+ struct btrfs_fs_info *fs_info = inode->root->fs_info;
+
+ if (btrfs_inode_in_log(inode, fs_info->generation) &&
+ list_empty(&ctx->ordered_extents))
+ return true;
+
+ /*
+ * If we are doing a fast fsync we can not bail out if the inode's
+ * last_trans is <= then the last committed transaction, because we only
+ * update the last_trans of the inode during ordered extent completion,
+ * and for a fast fsync we don't wait for that, we only wait for the
+ * writeback to complete.
+ */
+ if (inode->last_trans <= fs_info->last_trans_committed &&
+ (test_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &inode->runtime_flags) ||
+ list_empty(&ctx->ordered_extents)))
+ return true;
+
+ return false;
+}
+
/*
* fsync call for both files and directories. This logs the inode into
* the tree log instead of forcing full commits whenever possible.
@@ -2072,25 +2096,28 @@
{
struct dentry *dentry = file_dentry(file);
struct inode *inode = d_inode(dentry);
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_trans_handle *trans;
struct btrfs_log_ctx ctx;
int ret = 0, err;
+ u64 len;
+ bool full_sync;
trace_btrfs_sync_file(file, datasync);
btrfs_init_log_ctx(&ctx, inode);
/*
- * Set the range to full if the NO_HOLES feature is not enabled.
- * This is to avoid missing file extent items representing holes after
- * replaying the log.
+ * Always set the range to a full range, otherwise we can get into
+ * several problems, from missing file extent items to represent holes
+ * when not using the NO_HOLES feature, to log tree corruption due to
+ * races between hole detection during logging and completion of ordered
+ * extents outside the range, to missing checksums due to ordered extents
+ * for which we flushed only a subset of their pages.
*/
- if (!btrfs_fs_incompat(fs_info, NO_HOLES)) {
- start = 0;
- end = LLONG_MAX;
- }
+ start = 0;
+ end = LLONG_MAX;
+ len = (u64)LLONG_MAX + 1;
/*
* We write the dirty pages in the range and wait until they complete
@@ -2114,17 +2141,12 @@
atomic_inc(&root->log_batch);
/*
- * If the inode needs a full sync, make sure we use a full range to
- * avoid log tree corruption, due to hole detection racing with ordered
- * extent completion for adjacent ranges, and assertion failures during
- * hole detection. Do this while holding the inode lock, to avoid races
- * with other tasks.
+ * Always check for the full sync flag while holding the inode's lock,
+ * to avoid races with other tasks. The flag must be either set all the
+ * time during logging or always off all the time while logging.
*/
- if (test_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
- &BTRFS_I(inode)->runtime_flags)) {
- start = 0;
- end = LLONG_MAX;
- }
+ full_sync = test_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
+ &BTRFS_I(inode)->runtime_flags);
/*
* Before we acquired the inode's lock, someone may have dirtied more
@@ -2155,20 +2177,33 @@
* We have to do this here to avoid the priority inversion of waiting on
* IO of a lower priority task while holding a transaction open.
*
- * Also, the range length can be represented by u64, we have to do the
- * typecasts to avoid signed overflow if it's [0, LLONG_MAX].
+ * For a full fsync we wait for the ordered extents to complete while
+ * for a fast fsync we wait just for writeback to complete, and then
+ * attach the ordered extents to the transaction so that a transaction
+ * commit waits for their completion, to avoid data loss if we fsync,
+ * the current transaction commits before the ordered extents complete
+ * and a power failure happens right after that.
*/
- ret = btrfs_wait_ordered_range(inode, start, (u64)end - (u64)start + 1);
- if (ret) {
- up_write(&BTRFS_I(inode)->dio_sem);
- inode_unlock(inode);
- goto out;
+ if (full_sync) {
+ ret = btrfs_wait_ordered_range(inode, start, len);
+ } else {
+ /*
+ * Get our ordered extents as soon as possible to avoid doing
+ * checksum lookups in the csum tree, and use instead the
+ * checksums attached to the ordered extents.
+ */
+ btrfs_get_ordered_extents_for_logging(BTRFS_I(inode),
+ &ctx.ordered_extents);
+ ret = filemap_fdatawait_range(inode->i_mapping, start, end);
}
+
+ if (ret)
+ goto out_release_extents;
+
atomic_inc(&root->log_batch);
smp_mb();
- if (btrfs_inode_in_log(BTRFS_I(inode), fs_info->generation) ||
- BTRFS_I(inode)->last_trans <= fs_info->last_trans_committed) {
+ if (skip_inode_logging(&ctx)) {
/*
* We've had everything committed since the last time we were
* modified so clear this flag in case it was set for whatever
@@ -2184,9 +2219,7 @@
* checked called fsync.
*/
ret = filemap_check_wb_err(inode->i_mapping, file->f_wb_err);
- up_write(&BTRFS_I(inode)->dio_sem);
- inode_unlock(inode);
- goto out;
+ goto out_release_extents;
}
/*
@@ -2203,12 +2236,11 @@
trans = btrfs_start_transaction(root, 0);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
- up_write(&BTRFS_I(inode)->dio_sem);
- inode_unlock(inode);
- goto out;
+ goto out_release_extents;
}
- ret = btrfs_log_dentry_safe(trans, dentry, start, end, &ctx);
+ ret = btrfs_log_dentry_safe(trans, dentry, &ctx);
+ btrfs_release_log_ctx_extents(&ctx);
if (ret < 0) {
/* Fallthrough and commit/free transaction. */
ret = 1;
@@ -2235,6 +2267,13 @@
goto out;
}
}
+ if (!full_sync) {
+ ret = btrfs_wait_ordered_range(inode, start, len);
+ if (ret) {
+ btrfs_end_transaction(trans);
+ goto out;
+ }
+ }
ret = btrfs_commit_transaction(trans);
} else {
ret = btrfs_end_transaction(trans);
@@ -2245,6 +2284,12 @@
if (!ret)
ret = err;
return ret > 0 ? -EIO : ret;
+
+out_release_extents:
+ btrfs_release_log_ctx_extents(&ctx);
+ up_write(&BTRFS_I(inode)->dio_sem);
+ inode_unlock(inode);
+ goto out;
}
static const struct vm_operations_struct btrfs_file_vm_ops = {
@@ -2380,7 +2425,6 @@
hole_em->block_start = EXTENT_MAP_HOLE;
hole_em->block_len = 0;
hole_em->orig_block_len = 0;
- hole_em->bdev = fs_info->fs_devices->latest_bdev;
hole_em->compress_type = BTRFS_COMPRESS_NONE;
hole_em->generation = trans->transid;
@@ -2413,7 +2457,7 @@
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0,
round_down(*start, fs_info->sectorsize),
- round_up(*len, fs_info->sectorsize), 0);
+ round_up(*len, fs_info->sectorsize));
if (IS_ERR(em))
return PTR_ERR(em);
@@ -2441,7 +2485,8 @@
lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend,
cached_state);
- ordered = btrfs_lookup_first_ordered_extent(inode, lockend);
+ ordered = btrfs_lookup_first_ordered_extent(BTRFS_I(inode),
+ lockend);
/*
* We need to make sure we have no ordered extents in this range
@@ -2449,7 +2494,7 @@
* we need to try again.
*/
if ((!ordered ||
- (ordered->file_offset + ordered->len <= lockstart ||
+ (ordered->file_offset + ordered->num_bytes <= lockstart ||
ordered->file_offset > lockend)) &&
!filemap_range_has_page(inode->i_mapping,
lockstart, lockend)) {
@@ -2469,11 +2514,11 @@
return 0;
}
-static int btrfs_insert_clone_extent(struct btrfs_trans_handle *trans,
+static int btrfs_insert_replace_extent(struct btrfs_trans_handle *trans,
struct inode *inode,
struct btrfs_path *path,
- struct btrfs_clone_extent_info *clone_info,
- const u64 clone_len)
+ struct btrfs_replace_extent_info *extent_info,
+ const u64 replace_len)
{
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct btrfs_root *root = BTRFS_I(inode)->root;
@@ -2482,46 +2527,69 @@
struct btrfs_key key;
int slot;
struct btrfs_ref ref = { 0 };
- u64 ref_offset;
int ret;
- if (clone_len == 0)
+ if (replace_len == 0)
return 0;
- if (clone_info->disk_offset == 0 &&
+ if (extent_info->disk_offset == 0 &&
btrfs_fs_incompat(fs_info, NO_HOLES))
return 0;
key.objectid = btrfs_ino(BTRFS_I(inode));
key.type = BTRFS_EXTENT_DATA_KEY;
- key.offset = clone_info->file_offset;
+ key.offset = extent_info->file_offset;
ret = btrfs_insert_empty_item(trans, root, path, &key,
- clone_info->item_size);
+ sizeof(struct btrfs_file_extent_item));
if (ret)
return ret;
leaf = path->nodes[0];
slot = path->slots[0];
- write_extent_buffer(leaf, clone_info->extent_buf,
+ write_extent_buffer(leaf, extent_info->extent_buf,
btrfs_item_ptr_offset(leaf, slot),
- clone_info->item_size);
+ sizeof(struct btrfs_file_extent_item));
extent = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
- btrfs_set_file_extent_offset(leaf, extent, clone_info->data_offset);
- btrfs_set_file_extent_num_bytes(leaf, extent, clone_len);
+ ASSERT(btrfs_file_extent_type(leaf, extent) != BTRFS_FILE_EXTENT_INLINE);
+ btrfs_set_file_extent_offset(leaf, extent, extent_info->data_offset);
+ btrfs_set_file_extent_num_bytes(leaf, extent, replace_len);
+ if (extent_info->is_new_extent)
+ btrfs_set_file_extent_generation(leaf, extent, trans->transid);
btrfs_mark_buffer_dirty(leaf);
btrfs_release_path(path);
+ ret = btrfs_inode_set_file_extent_range(BTRFS_I(inode),
+ extent_info->file_offset, replace_len);
+ if (ret)
+ return ret;
+
/* If it's a hole, nothing more needs to be done. */
- if (clone_info->disk_offset == 0)
+ if (extent_info->disk_offset == 0)
return 0;
- inode_add_bytes(inode, clone_len);
- btrfs_init_generic_ref(&ref, BTRFS_ADD_DELAYED_REF,
- clone_info->disk_offset,
- clone_info->disk_len, 0);
- ref_offset = clone_info->file_offset - clone_info->data_offset;
- btrfs_init_data_ref(&ref, root->root_key.objectid,
- btrfs_ino(BTRFS_I(inode)), ref_offset);
- ret = btrfs_inc_extent_ref(trans, &ref);
+ inode_add_bytes(inode, replace_len);
+
+ if (extent_info->is_new_extent && extent_info->insertions == 0) {
+ key.objectid = extent_info->disk_offset;
+ key.type = BTRFS_EXTENT_ITEM_KEY;
+ key.offset = extent_info->disk_len;
+ ret = btrfs_alloc_reserved_file_extent(trans, root,
+ btrfs_ino(BTRFS_I(inode)),
+ extent_info->file_offset,
+ extent_info->qgroup_reserved,
+ &key);
+ } else {
+ u64 ref_offset;
+
+ btrfs_init_generic_ref(&ref, BTRFS_ADD_DELAYED_REF,
+ extent_info->disk_offset,
+ extent_info->disk_len, 0);
+ ref_offset = extent_info->file_offset - extent_info->data_offset;
+ btrfs_init_data_ref(&ref, root->root_key.objectid,
+ btrfs_ino(BTRFS_I(inode)), ref_offset);
+ ret = btrfs_inc_extent_ref(trans, &ref);
+ }
+
+ extent_info->insertions++;
return ret;
}
@@ -2529,15 +2597,15 @@
/*
* The respective range must have been previously locked, as well as the inode.
* The end offset is inclusive (last byte of the range).
- * @clone_info is NULL for fallocate's hole punching and non-NULL for extent
- * cloning.
- * When cloning, we don't want to end up in a state where we dropped extents
- * without inserting a new one, so we must abort the transaction to avoid a
- * corruption.
+ * @extent_info is NULL for fallocate's hole punching and non-NULL when replacing
+ * the file range with an extent.
+ * When not punching a hole, we don't want to end up in a state where we dropped
+ * extents without inserting a new one, so we must abort the transaction to avoid
+ * a corruption.
*/
-int btrfs_punch_hole_range(struct inode *inode, struct btrfs_path *path,
+int btrfs_replace_file_extents(struct inode *inode, struct btrfs_path *path,
const u64 start, const u64 end,
- struct btrfs_clone_extent_info *clone_info,
+ struct btrfs_replace_extent_info *extent_info,
struct btrfs_trans_handle **trans_out)
{
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
@@ -2566,10 +2634,10 @@
/*
* 1 - update the inode
* 1 - removing the extents in the range
- * 1 - adding the hole extent if no_holes isn't set or if we are cloning
- * an extent
+ * 1 - adding the hole extent if no_holes isn't set or if we are
+ * replacing the range with a new extent
*/
- if (!btrfs_fs_incompat(fs_info, NO_HOLES) || clone_info)
+ if (!btrfs_fs_incompat(fs_info, NO_HOLES) || extent_info)
rsv_count = 3;
else
rsv_count = 2;
@@ -2588,25 +2656,28 @@
cur_offset = start;
while (cur_offset < end) {
- ret = __btrfs_drop_extents(trans, root, inode, path,
+ ret = __btrfs_drop_extents(trans, root, BTRFS_I(inode), path,
cur_offset, end + 1, &drop_end,
1, 0, 0, NULL);
if (ret != -ENOSPC) {
/*
- * When cloning we want to avoid transaction aborts when
- * nothing was done and we are attempting to clone parts
- * of inline extents, in such cases -EOPNOTSUPP is
- * returned by __btrfs_drop_extents() without having
- * changed anything in the file.
+ * The only time we don't want to abort is if we are
+ * attempting to clone a partial inline extent, in which
+ * case we'll get EOPNOTSUPP. However if we aren't
+ * clone we need to abort no matter what, because if we
+ * got EOPNOTSUPP via prealloc then we messed up and
+ * need to abort.
*/
- if (clone_info && ret && ret != -EOPNOTSUPP)
+ if (ret &&
+ (ret != -EOPNOTSUPP ||
+ (extent_info && extent_info->is_new_extent)))
btrfs_abort_transaction(trans, ret);
break;
}
trans->block_rsv = &fs_info->trans_block_rsv;
- if (!clone_info && cur_offset < drop_end &&
+ if (!extent_info && cur_offset < drop_end &&
cur_offset < ino_size) {
ret = fill_holes(trans, BTRFS_I(inode), path,
cur_offset, drop_end);
@@ -2620,20 +2691,38 @@
btrfs_abort_transaction(trans, ret);
break;
}
+ } else if (!extent_info && cur_offset < drop_end) {
+ /*
+ * We are past the i_size here, but since we didn't
+ * insert holes we need to clear the mapped area so we
+ * know to not set disk_i_size in this area until a new
+ * file extent is inserted here.
+ */
+ ret = btrfs_inode_clear_file_extent_range(BTRFS_I(inode),
+ cur_offset, drop_end - cur_offset);
+ if (ret) {
+ /*
+ * We couldn't clear our area, so we could
+ * presumably adjust up and corrupt the fs, so
+ * we need to abort.
+ */
+ btrfs_abort_transaction(trans, ret);
+ break;
+ }
}
- if (clone_info && drop_end > clone_info->file_offset) {
- u64 clone_len = drop_end - clone_info->file_offset;
+ if (extent_info && drop_end > extent_info->file_offset) {
+ u64 replace_len = drop_end - extent_info->file_offset;
- ret = btrfs_insert_clone_extent(trans, inode, path,
- clone_info, clone_len);
+ ret = btrfs_insert_replace_extent(trans, inode, path,
+ extent_info, replace_len);
if (ret) {
btrfs_abort_transaction(trans, ret);
break;
}
- clone_info->data_len -= clone_len;
- clone_info->data_offset += clone_len;
- clone_info->file_offset += clone_len;
+ extent_info->data_len -= replace_len;
+ extent_info->data_offset += replace_len;
+ extent_info->file_offset += replace_len;
}
cur_offset = drop_end;
@@ -2657,7 +2746,7 @@
BUG_ON(ret); /* shouldn't happen */
trans->block_rsv = rsv;
- if (!clone_info) {
+ if (!extent_info) {
ret = find_first_non_hole(inode, &cur_offset, &len);
if (unlikely(ret < 0))
break;
@@ -2676,7 +2765,7 @@
* than 16Mb would force the full fsync any way (when
* try_release_extent_mapping() is invoked during page cache truncation.
*/
- if (clone_info)
+ if (extent_info && !extent_info->is_new_extent)
set_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
&BTRFS_I(inode)->runtime_flags);
@@ -2702,7 +2791,7 @@
* (because it's useless) or if it represents a 0 bytes range (when
* cur_offset == drop_end).
*/
- if (!clone_info && cur_offset < ino_size && cur_offset < drop_end) {
+ if (!extent_info && cur_offset < ino_size && cur_offset < drop_end) {
ret = fill_holes(trans, BTRFS_I(inode), path,
cur_offset, drop_end);
if (ret) {
@@ -2710,10 +2799,19 @@
btrfs_abort_transaction(trans, ret);
goto out_trans;
}
+ } else if (!extent_info && cur_offset < drop_end) {
+ /* See the comment in the loop above for the reasoning here. */
+ ret = btrfs_inode_clear_file_extent_range(BTRFS_I(inode),
+ cur_offset, drop_end - cur_offset);
+ if (ret) {
+ btrfs_abort_transaction(trans, ret);
+ goto out_trans;
+ }
+
}
- if (clone_info) {
- ret = btrfs_insert_clone_extent(trans, inode, path, clone_info,
- clone_info->data_len);
+ if (extent_info) {
+ ret = btrfs_insert_replace_extent(trans, inode, path, extent_info,
+ extent_info->data_len);
if (ret) {
btrfs_abort_transaction(trans, ret);
goto out_trans;
@@ -2768,9 +2866,9 @@
goto out_only_mutex;
}
- lockstart = round_up(offset, btrfs_inode_sectorsize(inode));
+ lockstart = round_up(offset, btrfs_inode_sectorsize(BTRFS_I(inode)));
lockend = round_down(offset + len,
- btrfs_inode_sectorsize(inode)) - 1;
+ btrfs_inode_sectorsize(BTRFS_I(inode))) - 1;
same_block = (BTRFS_BYTES_TO_BLKS(fs_info, offset))
== (BTRFS_BYTES_TO_BLKS(fs_info, offset + len - 1));
/*
@@ -2855,7 +2953,7 @@
goto out;
}
- ret = btrfs_punch_hole_range(inode, path, lockstart, lockend, NULL,
+ ret = btrfs_replace_file_extents(inode, path, lockstart, lockend, NULL,
&trans);
btrfs_free_path(path);
if (ret)
@@ -2959,7 +3057,7 @@
inode->i_ctime = current_time(inode);
i_size_write(inode, end);
- btrfs_ordered_update_i_size(inode, end, NULL);
+ btrfs_inode_safe_disk_i_size_write(inode, 0);
ret = btrfs_update_inode(trans, root, inode);
ret2 = btrfs_end_transaction(trans);
@@ -2972,7 +3070,7 @@
RANGE_BOUNDARY_HOLE,
};
-static int btrfs_zero_range_check_range_boundary(struct inode *inode,
+static int btrfs_zero_range_check_range_boundary(struct btrfs_inode *inode,
u64 offset)
{
const u64 sectorsize = btrfs_inode_sectorsize(inode);
@@ -2980,7 +3078,7 @@
int ret;
offset = round_down(offset, sectorsize);
- em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize, 0);
+ em = btrfs_get_extent(inode, NULL, 0, offset, sectorsize);
if (IS_ERR(em))
return PTR_ERR(em);
@@ -3005,7 +3103,7 @@
struct extent_changeset *data_reserved = NULL;
int ret;
u64 alloc_hint = 0;
- const u64 sectorsize = btrfs_inode_sectorsize(inode);
+ const u64 sectorsize = btrfs_inode_sectorsize(BTRFS_I(inode));
u64 alloc_start = round_down(offset, sectorsize);
u64 alloc_end = round_up(offset + len, sectorsize);
u64 bytes_to_reserve = 0;
@@ -3013,8 +3111,8 @@
inode_dio_wait(inode);
- em = btrfs_get_extent(BTRFS_I(inode), NULL, 0,
- alloc_start, alloc_end - alloc_start, 0);
+ em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, alloc_start,
+ alloc_end - alloc_start);
if (IS_ERR(em)) {
ret = PTR_ERR(em);
goto out;
@@ -3057,8 +3155,8 @@
if (BTRFS_BYTES_TO_BLKS(fs_info, offset) ==
BTRFS_BYTES_TO_BLKS(fs_info, offset + len - 1)) {
- em = btrfs_get_extent(BTRFS_I(inode), NULL, 0,
- alloc_start, sectorsize, 0);
+ em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, alloc_start,
+ sectorsize);
if (IS_ERR(em)) {
ret = PTR_ERR(em);
goto out;
@@ -3095,7 +3193,8 @@
* to cover them.
*/
if (!IS_ALIGNED(offset, sectorsize)) {
- ret = btrfs_zero_range_check_range_boundary(inode, offset);
+ ret = btrfs_zero_range_check_range_boundary(BTRFS_I(inode),
+ offset);
if (ret < 0)
goto out;
if (ret == RANGE_BOUNDARY_HOLE) {
@@ -3111,7 +3210,7 @@
}
if (!IS_ALIGNED(offset + len, sectorsize)) {
- ret = btrfs_zero_range_check_range_boundary(inode,
+ ret = btrfs_zero_range_check_range_boundary(BTRFS_I(inode),
offset + len);
if (ret < 0)
goto out;
@@ -3165,7 +3264,7 @@
ret = btrfs_fallocate_update_isize(inode, offset + len, mode);
out:
if (ret && space_reserved)
- btrfs_free_reserved_data_space(inode, data_reserved,
+ btrfs_free_reserved_data_space(BTRFS_I(inode), data_reserved,
alloc_start, bytes_to_reserve);
extent_changeset_free(data_reserved);
@@ -3189,7 +3288,7 @@
u64 locked_end;
u64 actual_end = 0;
struct extent_map *em;
- int blocksize = btrfs_inode_sectorsize(inode);
+ int blocksize = btrfs_inode_sectorsize(BTRFS_I(inode));
int ret;
alloc_start = round_down(offset, blocksize);
@@ -3271,10 +3370,11 @@
*/
lock_extent_bits(&BTRFS_I(inode)->io_tree, alloc_start,
locked_end, &cached_state);
- ordered = btrfs_lookup_first_ordered_extent(inode, locked_end);
+ ordered = btrfs_lookup_first_ordered_extent(BTRFS_I(inode),
+ locked_end);
if (ordered &&
- ordered->file_offset + ordered->len > alloc_start &&
+ ordered->file_offset + ordered->num_bytes > alloc_start &&
ordered->file_offset < alloc_end) {
btrfs_put_ordered_extent(ordered);
unlock_extent_cached(&BTRFS_I(inode)->io_tree,
@@ -3299,7 +3399,7 @@
INIT_LIST_HEAD(&reserve_list);
while (cur_offset < alloc_end) {
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, cur_offset,
- alloc_end - cur_offset, 0);
+ alloc_end - cur_offset);
if (IS_ERR(em)) {
ret = PTR_ERR(em);
break;
@@ -3330,8 +3430,9 @@
* range, free reserved data space first, otherwise
* it'll result in false ENOSPC error.
*/
- btrfs_free_reserved_data_space(inode, data_reserved,
- cur_offset, last_byte - cur_offset);
+ btrfs_free_reserved_data_space(BTRFS_I(inode),
+ data_reserved, cur_offset,
+ last_byte - cur_offset);
}
free_extent_map(em);
cur_offset = last_byte;
@@ -3348,7 +3449,7 @@
range->len, i_blocksize(inode),
offset + len, &alloc_hint);
else
- btrfs_free_reserved_data_space(inode,
+ btrfs_free_reserved_data_space(BTRFS_I(inode),
data_reserved, range->start,
range->len);
list_del(&range->list);
@@ -3369,35 +3470,36 @@
inode_unlock(inode);
/* Let go of our reservation. */
if (ret != 0 && !(mode & FALLOC_FL_ZERO_RANGE))
- btrfs_free_reserved_data_space(inode, data_reserved,
+ btrfs_free_reserved_data_space(BTRFS_I(inode), data_reserved,
cur_offset, alloc_end - cur_offset);
extent_changeset_free(data_reserved);
return ret;
}
-static int find_desired_extent(struct inode *inode, loff_t *offset, int whence)
+static loff_t find_desired_extent(struct inode *inode, loff_t offset,
+ int whence)
{
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct extent_map *em = NULL;
struct extent_state *cached_state = NULL;
+ loff_t i_size = inode->i_size;
u64 lockstart;
u64 lockend;
u64 start;
u64 len;
int ret = 0;
- if (inode->i_size == 0)
+ if (i_size == 0 || offset >= i_size)
return -ENXIO;
/*
- * *offset can be negative, in this case we start finding DATA/HOLE from
+ * offset can be negative, in this case we start finding DATA/HOLE from
* the very start of the file.
*/
- start = max_t(loff_t, 0, *offset);
+ start = max_t(loff_t, 0, offset);
lockstart = round_down(start, fs_info->sectorsize);
- lockend = round_up(i_size_read(inode),
- fs_info->sectorsize);
+ lockend = round_up(i_size, fs_info->sectorsize);
if (lockend <= lockstart)
lockend = lockstart + fs_info->sectorsize;
lockend--;
@@ -3406,7 +3508,7 @@
lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend,
&cached_state);
- while (start < inode->i_size) {
+ while (start < i_size) {
em = btrfs_get_extent_fiemap(BTRFS_I(inode), start, len);
if (IS_ERR(em)) {
ret = PTR_ERR(em);
@@ -3429,59 +3531,71 @@
cond_resched();
}
free_extent_map(em);
- if (!ret) {
- if (whence == SEEK_DATA && start >= inode->i_size)
- ret = -ENXIO;
- else
- *offset = min_t(loff_t, start, inode->i_size);
- }
unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend,
&cached_state);
- return ret;
+ if (ret) {
+ offset = ret;
+ } else {
+ if (whence == SEEK_DATA && start >= i_size)
+ offset = -ENXIO;
+ else
+ offset = min_t(loff_t, start, i_size);
+ }
+
+ return offset;
}
static loff_t btrfs_file_llseek(struct file *file, loff_t offset, int whence)
{
struct inode *inode = file->f_mapping->host;
- int ret;
- inode_lock(inode);
switch (whence) {
- case SEEK_END:
- case SEEK_CUR:
- offset = generic_file_llseek(file, offset, whence);
- goto out;
+ default:
+ return generic_file_llseek(file, offset, whence);
case SEEK_DATA:
case SEEK_HOLE:
- if (offset >= i_size_read(inode)) {
- inode_unlock(inode);
- return -ENXIO;
- }
-
- ret = find_desired_extent(inode, &offset, whence);
- if (ret) {
- inode_unlock(inode);
- return ret;
- }
+ inode_lock_shared(inode);
+ offset = find_desired_extent(inode, offset, whence);
+ inode_unlock_shared(inode);
+ break;
}
- offset = vfs_setpos(file, offset, inode->i_sb->s_maxbytes);
-out:
- inode_unlock(inode);
- return offset;
+ if (offset < 0)
+ return offset;
+
+ return vfs_setpos(file, offset, inode->i_sb->s_maxbytes);
}
static int btrfs_file_open(struct inode *inode, struct file *filp)
{
- filp->f_mode |= FMODE_NOWAIT;
+ filp->f_mode |= FMODE_NOWAIT | FMODE_BUF_RASYNC;
return generic_file_open(inode, filp);
}
+static ssize_t btrfs_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
+{
+ ssize_t ret = 0;
+
+ if (iocb->ki_flags & IOCB_DIRECT) {
+ struct inode *inode = file_inode(iocb->ki_filp);
+
+ inode_lock_shared(inode);
+ ret = btrfs_direct_IO(iocb, to);
+ inode_unlock_shared(inode);
+ if (ret < 0 || !iov_iter_count(to) ||
+ iocb->ki_pos >= i_size_read(file_inode(iocb->ki_filp)))
+ return ret;
+ }
+
+ return generic_file_buffered_read(iocb, to, ret);
+}
+
const struct file_operations btrfs_file_operations = {
.llseek = btrfs_file_llseek,
- .read_iter = generic_file_read_iter,
+ .read_iter = btrfs_file_read_iter,
.splice_read = generic_file_splice_read,
.write_iter = btrfs_file_write_iter,
+ .splice_write = iter_file_splice_write,
.mmap = btrfs_file_mmap,
.open = btrfs_file_open,
.release = btrfs_release_file,