Update Linux to v5.4.148
Sourced from [1]
[1] https://cdn.kernel.org/pub/linux/kernel/v5.x/linux-5.4.148.tar.gz
Change-Id: Ib3d26c5ba9b022e2e03533005c4fed4d7c30b61b
Signed-off-by: Olivier Deprez <olivier.deprez@arm.com>
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 5cd42b6..ba68b0b 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -517,6 +517,34 @@
return 1;
}
+static bool reloc_root_is_dead(struct btrfs_root *root)
+{
+ /*
+ * Pair with set_bit/clear_bit in clean_dirty_subvols and
+ * btrfs_update_reloc_root. We need to see the updated bit before
+ * trying to access reloc_root
+ */
+ smp_rmb();
+ if (test_bit(BTRFS_ROOT_DEAD_RELOC_TREE, &root->state))
+ return true;
+ return false;
+}
+
+/*
+ * Check if this subvolume tree has valid reloc tree.
+ *
+ * Reloc tree after swap is considered dead, thus not considered as valid.
+ * This is enough for most callers, as they don't distinguish dead reloc root
+ * from no reloc root. But should_ignore_root() below is a special case.
+ */
+static bool have_reloc_root(struct btrfs_root *root)
+{
+ if (reloc_root_is_dead(root))
+ return false;
+ if (!root->reloc_root)
+ return false;
+ return true;
+}
static int should_ignore_root(struct btrfs_root *root)
{
@@ -525,12 +553,16 @@
if (!test_bit(BTRFS_ROOT_REF_COWS, &root->state))
return 0;
+ /* This root has been merged with its reloc tree, we can ignore it */
+ if (reloc_root_is_dead(root))
+ return 1;
+
reloc_root = root->reloc_root;
if (!reloc_root)
return 0;
- if (btrfs_root_last_snapshot(&reloc_root->root_item) ==
- root->fs_info->running_transaction->transid - 1)
+ if (btrfs_header_generation(reloc_root->commit_root) ==
+ root->fs_info->running_transaction->transid)
return 0;
/*
* if there is reloc tree and it was created in previous
@@ -1154,7 +1186,7 @@
free_backref_node(cache, lower);
}
- free_backref_node(cache, node);
+ remove_backref_node(cache, node);
return ERR_PTR(err);
}
ASSERT(!node || !node->detached);
@@ -1266,7 +1298,7 @@
if (!node)
return -ENOMEM;
- node->bytenr = root->node->start;
+ node->bytenr = root->commit_root->start;
node->data = root;
spin_lock(&rc->reloc_root_tree.lock);
@@ -1297,15 +1329,14 @@
if (rc && root->node) {
spin_lock(&rc->reloc_root_tree.lock);
rb_node = tree_search(&rc->reloc_root_tree.rb_root,
- root->node->start);
+ root->commit_root->start);
if (rb_node) {
node = rb_entry(rb_node, struct mapping_node, rb_node);
rb_erase(&node->rb_node, &rc->reloc_root_tree.rb_root);
+ RB_CLEAR_NODE(&node->rb_node);
}
spin_unlock(&rc->reloc_root_tree.lock);
- if (!node)
- return;
- BUG_ON((struct btrfs_root *)node->data != root);
+ ASSERT(!node || (struct btrfs_root *)node->data == root);
}
spin_lock(&fs_info->trans_lock);
@@ -1318,7 +1349,7 @@
* helper to update the 'address of tree root -> reloc tree'
* mapping
*/
-static int __update_reloc_root(struct btrfs_root *root, u64 new_bytenr)
+static int __update_reloc_root(struct btrfs_root *root)
{
struct btrfs_fs_info *fs_info = root->fs_info;
struct rb_node *rb_node;
@@ -1327,7 +1358,7 @@
spin_lock(&rc->reloc_root_tree.lock);
rb_node = tree_search(&rc->reloc_root_tree.rb_root,
- root->node->start);
+ root->commit_root->start);
if (rb_node) {
node = rb_entry(rb_node, struct mapping_node, rb_node);
rb_erase(&node->rb_node, &rc->reloc_root_tree.rb_root);
@@ -1339,7 +1370,7 @@
BUG_ON((struct btrfs_root *)node->data != root);
spin_lock(&rc->reloc_root_tree.lock);
- node->bytenr = new_bytenr;
+ node->bytenr = root->node->start;
rb_node = tree_insert(&rc->reloc_root_tree.rb_root,
node->bytenr, &node->rb_node);
spin_unlock(&rc->reloc_root_tree.lock);
@@ -1435,20 +1466,35 @@
int clear_rsv = 0;
int ret;
+ if (!rc)
+ return 0;
+
/*
* The subvolume has reloc tree but the swap is finished, no need to
* create/update the dead reloc tree
*/
- if (test_bit(BTRFS_ROOT_DEAD_RELOC_TREE, &root->state))
+ if (reloc_root_is_dead(root))
return 0;
+ /*
+ * This is subtle but important. We do not do
+ * record_root_in_transaction for reloc roots, instead we record their
+ * corresponding fs root, and then here we update the last trans for the
+ * reloc root. This means that we have to do this for the entire life
+ * of the reloc root, regardless of which stage of the relocation we are
+ * in.
+ */
if (root->reloc_root) {
reloc_root = root->reloc_root;
reloc_root->last_trans = trans->transid;
return 0;
}
- if (!rc || !rc->create_reloc_tree ||
+ /*
+ * We are merging reloc roots, we do not need new reloc trees. Also
+ * reloc trees never need their own reloc tree.
+ */
+ if (!rc->create_reloc_tree ||
root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
return 0;
@@ -1478,8 +1524,7 @@
struct btrfs_root_item *root_item;
int ret;
- if (test_bit(BTRFS_ROOT_DEAD_RELOC_TREE, &root->state) ||
- !root->reloc_root)
+ if (!have_reloc_root(root))
goto out;
reloc_root = root->reloc_root;
@@ -1489,10 +1534,16 @@
if (fs_info->reloc_ctl->merge_reloc_tree &&
btrfs_root_refs(root_item) == 0) {
set_bit(BTRFS_ROOT_DEAD_RELOC_TREE, &root->state);
+ /*
+ * Mark the tree as dead before we change reloc_root so
+ * have_reloc_root will not touch it from now on.
+ */
+ smp_wmb();
__del_reloc_root(reloc_root);
}
if (reloc_root->commit_root != reloc_root->node) {
+ __update_reloc_root(reloc_root);
btrfs_set_root_node(root_item, reloc_root->node);
free_extent_buffer(reloc_root->commit_root);
reloc_root->commit_root = btrfs_root_node(reloc_root);
@@ -1785,8 +1836,8 @@
int ret;
int slot;
- BUG_ON(src->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID);
- BUG_ON(dest->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID);
+ ASSERT(src->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID);
+ ASSERT(dest->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID);
last_snapshot = btrfs_root_last_snapshot(&src->root_item);
again:
@@ -1820,7 +1871,7 @@
struct btrfs_key first_key;
level = btrfs_header_level(parent);
- BUG_ON(level < lowest_level);
+ ASSERT(level >= lowest_level);
ret = btrfs_bin_search(parent, &key, level, &slot);
if (ret < 0)
@@ -2202,6 +2253,11 @@
if (ret2 < 0 && !ret)
ret = ret2;
}
+ /*
+ * Need barrier to ensure clear_bit() only happens after
+ * root->reloc_root = NULL. Pairs with have_reloc_root.
+ */
+ smp_wmb();
clear_bit(BTRFS_ROOT_DEAD_RELOC_TREE, &root->state);
btrfs_put_fs_root(root);
} else {
@@ -2229,6 +2285,7 @@
struct btrfs_root_item *root_item;
struct btrfs_path *path;
struct extent_buffer *leaf;
+ int reserve_level;
int level;
int max_level;
int replaced = 0;
@@ -2269,12 +2326,21 @@
btrfs_unlock_up_safe(path, 0);
}
- min_reserved = fs_info->nodesize * (BTRFS_MAX_LEVEL - 1) * 2;
+ /*
+ * In merge_reloc_root(), we modify the upper level pointer to swap the
+ * tree blocks between reloc tree and subvolume tree. Thus for tree
+ * block COW, we COW at most from level 1 to root level for each tree.
+ *
+ * Thus the needed metadata size is at most root_level * nodesize,
+ * and * 2 since we have two trees to COW.
+ */
+ reserve_level = max_t(int, 1, btrfs_root_level(root_item));
+ min_reserved = fs_info->nodesize * reserve_level * 2;
memset(&next_key, 0, sizeof(next_key));
while (1) {
ret = btrfs_block_rsv_refill(root, rc->block_rsv, min_reserved,
- BTRFS_RESERVE_FLUSH_ALL);
+ BTRFS_RESERVE_FLUSH_LIMIT);
if (ret) {
err = ret;
goto out;
@@ -2285,6 +2351,18 @@
trans = NULL;
goto out;
}
+
+ /*
+ * At this point we no longer have a reloc_control, so we can't
+ * depend on btrfs_init_reloc_root to update our last_trans.
+ *
+ * But that's ok, we started the trans handle on our
+ * corresponding fs_root, which means it's been added to the
+ * dirty list. At commit time we'll still call
+ * btrfs_update_reloc_root() and update our root item
+ * appropriately.
+ */
+ reloc_root->last_trans = trans->transid;
trans->block_rsv = rc->block_rsv;
replaced = 0;
@@ -2482,12 +2560,10 @@
reloc_root = list_entry(reloc_roots.next,
struct btrfs_root, root_list);
+ root = read_fs_root(fs_info, reloc_root->root_key.offset);
if (btrfs_root_refs(&reloc_root->root_item) > 0) {
- root = read_fs_root(fs_info,
- reloc_root->root_key.offset);
BUG_ON(IS_ERR(root));
BUG_ON(root->reloc_root != reloc_root);
-
ret = merge_reloc_root(rc, root);
if (ret) {
if (list_empty(&reloc_root->root_list))
@@ -2496,6 +2572,13 @@
goto out;
}
} else {
+ if (!IS_ERR(root)) {
+ if (root->reloc_root == reloc_root)
+ root->reloc_root = NULL;
+ clear_bit(BTRFS_ROOT_DEAD_RELOC_TREE,
+ &root->state);
+ }
+
list_del_init(&reloc_root->root_list);
/* Don't forget to queue this reloc root for cleanup */
list_add_tail(&reloc_root->reloc_dirty_list,
@@ -2521,7 +2604,21 @@
free_reloc_roots(&reloc_roots);
}
- BUG_ON(!RB_EMPTY_ROOT(&rc->reloc_root_tree.rb_root));
+ /*
+ * We used to have
+ *
+ * BUG_ON(!RB_EMPTY_ROOT(&rc->reloc_root_tree.rb_root));
+ *
+ * here, but it's wrong. If we fail to start the transaction in
+ * prepare_to_merge() we will have only 0 ref reloc roots, none of which
+ * have actually been removed from the reloc_root_tree rb tree. This is
+ * fine because we're bailing here, and we hold a reference on the root
+ * for the list that holds it, so these roots will be cleaned up when we
+ * do the reloc_dirty_list afterwards. Meanwhile the root->reloc_root
+ * will be cleaned up on unmount.
+ *
+ * The remaining nodes will be cleaned up by free_reloc_control.
+ */
}
static void free_block_list(struct rb_root *blocks)
@@ -3121,9 +3218,8 @@
ret = relocate_tree_block(trans, rc, node, &block->key,
path);
if (ret < 0) {
- if (ret != -EAGAIN || &block->rb_node == rb_first(blocks))
- err = ret;
- goto out;
+ err = ret;
+ break;
}
}
out:
@@ -4099,12 +4195,6 @@
if (!RB_EMPTY_ROOT(&blocks)) {
ret = relocate_tree_blocks(trans, rc, &blocks);
if (ret < 0) {
- /*
- * if we fail to relocate tree blocks, force to update
- * backref cache when committing transaction.
- */
- rc->backref_cache.last_trans = trans->transid - 1;
-
if (ret != -EAGAIN) {
err = ret;
break;
@@ -4174,10 +4264,10 @@
goto out_free;
}
btrfs_commit_transaction(trans);
+out_free:
ret = clean_dirty_subvols(rc);
if (ret < 0 && !err)
err = ret;
-out_free:
btrfs_free_block_rsv(fs_info, rc->block_rsv);
btrfs_free_path(path);
return err;
@@ -4279,6 +4369,18 @@
return rc;
}
+static void free_reloc_control(struct reloc_control *rc)
+{
+ struct mapping_node *node, *tmp;
+
+ free_reloc_roots(&rc->reloc_roots);
+ rbtree_postorder_for_each_entry_safe(node, tmp,
+ &rc->reloc_root_tree.rb_root, rb_node)
+ kfree(node);
+
+ kfree(rc);
+}
+
/*
* Print the block group being relocated
*/
@@ -4326,7 +4428,7 @@
rc->extent_root = extent_root;
rc->block_group = bg;
- ret = btrfs_inc_block_group_ro(rc->block_group);
+ ret = btrfs_inc_block_group_ro(rc->block_group, true);
if (ret) {
err = ret;
goto out;
@@ -4411,7 +4513,7 @@
btrfs_dec_block_group_ro(rc->block_group);
iput(rc->data_inode);
btrfs_put_block_group(rc->block_group);
- kfree(rc);
+ free_reloc_control(rc);
return err;
}
@@ -4534,9 +4636,8 @@
trans = btrfs_join_transaction(rc->extent_root);
if (IS_ERR(trans)) {
- unset_reloc_control(rc);
err = PTR_ERR(trans);
- goto out_free;
+ goto out_unset;
}
rc->merge_reloc_tree = 1;
@@ -4555,7 +4656,9 @@
fs_root = read_fs_root(fs_info, reloc_root->root_key.offset);
if (IS_ERR(fs_root)) {
err = PTR_ERR(fs_root);
- goto out_free;
+ list_add_tail(&reloc_root->root_list, &reloc_roots);
+ btrfs_end_transaction(trans);
+ goto out_unset;
}
err = __add_reloc_root(reloc_root);
@@ -4565,7 +4668,7 @@
err = btrfs_commit_transaction(trans);
if (err)
- goto out_free;
+ goto out_unset;
merge_reloc_roots(rc);
@@ -4574,15 +4677,16 @@
trans = btrfs_join_transaction(rc->extent_root);
if (IS_ERR(trans)) {
err = PTR_ERR(trans);
- goto out_free;
+ goto out_clean;
}
err = btrfs_commit_transaction(trans);
-
+out_clean:
ret = clean_dirty_subvols(rc);
if (ret < 0 && !err)
err = ret;
-out_free:
- kfree(rc);
+out_unset:
+ unset_reloc_control(rc);
+ free_reloc_control(rc);
out:
if (!list_empty(&reloc_roots))
free_reloc_roots(&reloc_roots);
@@ -4669,11 +4773,6 @@
BUG_ON(rc->stage == UPDATE_DATA_PTRS &&
root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID);
- if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
- if (buf == root->node)
- __update_reloc_root(root, cow->start);
- }
-
level = btrfs_header_level(buf);
if (btrfs_header_generation(buf) <=
btrfs_root_last_snapshot(&root->root_item))
@@ -4720,7 +4819,7 @@
struct btrfs_root *root = pending->root;
struct reloc_control *rc = root->fs_info->reloc_ctl;
- if (!root->reloc_root || !rc)
+ if (!rc || !have_reloc_root(root))
return;
if (!rc->merge_reloc_tree)
@@ -4754,7 +4853,7 @@
struct reloc_control *rc = root->fs_info->reloc_ctl;
int ret;
- if (!root->reloc_root || !rc)
+ if (!rc || !have_reloc_root(root))
return 0;
rc = root->fs_info->reloc_ctl;