Update Linux to v5.4.2
Change-Id: Idf6911045d9d382da2cfe01b1edff026404ac8fd
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index a342f00..f9baefc 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/* -*- mode: c; c-basic-offset: 8; -*-
* vim: noexpandtab sw=8 ts=8 sts=0:
*
@@ -6,21 +7,6 @@
* Extent allocs and frees
*
* Copyright (C) 2002, 2004 Oracle. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this program; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 021110-1307, USA.
*/
#include <linux/fs.h>
@@ -5106,8 +5092,6 @@
* rightmost extent list.
*/
if (path->p_tree_depth) {
- struct ocfs2_extent_block *eb;
-
ret = ocfs2_read_extent_block(et->et_ci,
ocfs2_et_get_last_eb_blk(et),
&last_eb_bh);
@@ -5115,8 +5099,6 @@
mlog_errno(ret);
goto out;
}
-
- eb = (struct ocfs2_extent_block *) last_eb_bh->b_data;
}
if (rec->e_cpos == split_rec->e_cpos &&
@@ -6011,6 +5993,7 @@
struct buffer_head *data_alloc_bh = NULL;
struct ocfs2_dinode *di;
struct ocfs2_truncate_log *tl;
+ struct ocfs2_journal *journal = osb->journal;
BUG_ON(inode_trylock(tl_inode));
@@ -6031,6 +6014,20 @@
goto out;
}
+ /* Appending truncate log(TA) and and flushing truncate log(TF) are
+ * two separated transactions. They can be both committed but not
+ * checkpointed. If crash occurs then, both two transaction will be
+ * replayed with several already released to global bitmap clusters.
+ * Then truncate log will be replayed resulting in cluster double free.
+ */
+ jbd2_journal_lock_updates(journal->j_journal);
+ status = jbd2_journal_flush(journal->j_journal);
+ jbd2_journal_unlock_updates(journal->j_journal);
+ if (status < 0) {
+ mlog_errno(status);
+ goto out;
+ }
+
data_alloc_inode = ocfs2_get_system_file_inode(osb,
GLOBAL_BITMAP_SYSTEM_INODE,
OCFS2_INVALID_SLOT);
@@ -6209,17 +6206,17 @@
if (le16_to_cpu(tl->tl_used)) {
trace_ocfs2_truncate_log_recovery_num(le16_to_cpu(tl->tl_used));
- *tl_copy = kmalloc(tl_bh->b_size, GFP_KERNEL);
+ /*
+ * Assuming the write-out below goes well, this copy will be
+ * passed back to recovery for processing.
+ */
+ *tl_copy = kmemdup(tl_bh->b_data, tl_bh->b_size, GFP_KERNEL);
if (!(*tl_copy)) {
status = -ENOMEM;
mlog_errno(status);
goto bail;
}
- /* Assuming the write-out below goes well, this copy
- * will be passed back to recovery for processing. */
- memcpy(*tl_copy, tl_bh->b_data, tl_bh->b_size);
-
/* All we need to do to clear the truncate log is set
* tl_used. */
tl->tl_used = 0;
@@ -6810,6 +6807,8 @@
struct page *page, int zero, u64 *phys)
{
int ret, partial = 0;
+ loff_t start_byte = ((loff_t)page->index << PAGE_SHIFT) + from;
+ loff_t length = to - from;
ret = ocfs2_map_page_blocks(page, phys, inode, from, to, 0);
if (ret)
@@ -6829,7 +6828,8 @@
if (ret < 0)
mlog_errno(ret);
else if (ocfs2_should_order_data(inode)) {
- ret = ocfs2_jbd2_file_inode(handle, inode);
+ ret = ocfs2_jbd2_inode_add_write(handle, inode,
+ start_byte, length);
if (ret < 0)
mlog_errno(ret);
}
@@ -7536,10 +7536,11 @@
return count;
}
-int ocfs2_trim_fs(struct super_block *sb, struct fstrim_range *range)
+static
+int ocfs2_trim_mainbm(struct super_block *sb, struct fstrim_range *range)
{
struct ocfs2_super *osb = OCFS2_SB(sb);
- u64 start, len, trimmed, first_group, last_group, group;
+ u64 start, len, trimmed = 0, first_group, last_group = 0, group = 0;
int ret, cnt;
u32 first_bit, last_bit, minlen;
struct buffer_head *main_bm_bh = NULL;
@@ -7547,7 +7548,6 @@
struct buffer_head *gd_bh = NULL;
struct ocfs2_dinode *main_bm;
struct ocfs2_group_desc *gd = NULL;
- struct ocfs2_trim_fs_info info, *pinfo = NULL;
start = range->start >> osb->s_clustersize_bits;
len = range->len >> osb->s_clustersize_bits;
@@ -7556,6 +7556,9 @@
if (minlen >= osb->bitmap_cpg || range->len < sb->s_blocksize)
return -EINVAL;
+ trace_ocfs2_trim_mainbm(start, len, minlen);
+
+next_group:
main_bm_inode = ocfs2_get_system_file_inode(osb,
GLOBAL_BITMAP_SYSTEM_INODE,
OCFS2_INVALID_SLOT);
@@ -7574,64 +7577,34 @@
}
main_bm = (struct ocfs2_dinode *)main_bm_bh->b_data;
- if (start >= le32_to_cpu(main_bm->i_clusters)) {
- ret = -EINVAL;
- goto out_unlock;
- }
-
- len = range->len >> osb->s_clustersize_bits;
- if (start + len > le32_to_cpu(main_bm->i_clusters))
- len = le32_to_cpu(main_bm->i_clusters) - start;
-
- trace_ocfs2_trim_fs(start, len, minlen);
-
- ocfs2_trim_fs_lock_res_init(osb);
- ret = ocfs2_trim_fs_lock(osb, NULL, 1);
- if (ret < 0) {
- if (ret != -EAGAIN) {
- mlog_errno(ret);
- ocfs2_trim_fs_lock_res_uninit(osb);
+ /*
+ * Do some check before trim the first group.
+ */
+ if (!group) {
+ if (start >= le32_to_cpu(main_bm->i_clusters)) {
+ ret = -EINVAL;
goto out_unlock;
}
- mlog(ML_NOTICE, "Wait for trim on device (%s) to "
- "finish, which is running from another node.\n",
- osb->dev_str);
- ret = ocfs2_trim_fs_lock(osb, &info, 0);
- if (ret < 0) {
- mlog_errno(ret);
- ocfs2_trim_fs_lock_res_uninit(osb);
- goto out_unlock;
- }
+ if (start + len > le32_to_cpu(main_bm->i_clusters))
+ len = le32_to_cpu(main_bm->i_clusters) - start;
- if (info.tf_valid && info.tf_success &&
- info.tf_start == start && info.tf_len == len &&
- info.tf_minlen == minlen) {
- /* Avoid sending duplicated trim to a shared device */
- mlog(ML_NOTICE, "The same trim on device (%s) was "
- "just done from node (%u), return.\n",
- osb->dev_str, info.tf_nodenum);
- range->len = info.tf_trimlen;
- goto out_trimunlock;
- }
+ /*
+ * Determine first and last group to examine based on
+ * start and len
+ */
+ first_group = ocfs2_which_cluster_group(main_bm_inode, start);
+ if (first_group == osb->first_cluster_group_blkno)
+ first_bit = start;
+ else
+ first_bit = start - ocfs2_blocks_to_clusters(sb,
+ first_group);
+ last_group = ocfs2_which_cluster_group(main_bm_inode,
+ start + len - 1);
+ group = first_group;
}
- info.tf_nodenum = osb->node_num;
- info.tf_start = start;
- info.tf_len = len;
- info.tf_minlen = minlen;
-
- /* Determine first and last group to examine based on start and len */
- first_group = ocfs2_which_cluster_group(main_bm_inode, start);
- if (first_group == osb->first_cluster_group_blkno)
- first_bit = start;
- else
- first_bit = start - ocfs2_blocks_to_clusters(sb, first_group);
- last_group = ocfs2_which_cluster_group(main_bm_inode, start + len - 1);
- last_bit = osb->bitmap_cpg;
-
- trimmed = 0;
- for (group = first_group; group <= last_group;) {
+ do {
if (first_bit + len >= osb->bitmap_cpg)
last_bit = osb->bitmap_cpg;
else
@@ -7663,21 +7636,81 @@
group = ocfs2_clusters_to_blocks(sb, osb->bitmap_cpg);
else
group += ocfs2_clusters_to_blocks(sb, osb->bitmap_cpg);
- }
- range->len = trimmed * sb->s_blocksize;
+ } while (0);
- info.tf_trimlen = range->len;
- info.tf_success = (ret ? 0 : 1);
- pinfo = &info;
-out_trimunlock:
- ocfs2_trim_fs_unlock(osb, pinfo);
- ocfs2_trim_fs_lock_res_uninit(osb);
out_unlock:
ocfs2_inode_unlock(main_bm_inode, 0);
brelse(main_bm_bh);
+ main_bm_bh = NULL;
out_mutex:
inode_unlock(main_bm_inode);
iput(main_bm_inode);
+
+ /*
+ * If all the groups trim are not done or failed, but we should release
+ * main_bm related locks for avoiding the current IO starve, then go to
+ * trim the next group
+ */
+ if (ret >= 0 && group <= last_group)
+ goto next_group;
out:
+ range->len = trimmed * sb->s_blocksize;
+ return ret;
+}
+
+int ocfs2_trim_fs(struct super_block *sb, struct fstrim_range *range)
+{
+ int ret;
+ struct ocfs2_super *osb = OCFS2_SB(sb);
+ struct ocfs2_trim_fs_info info, *pinfo = NULL;
+
+ ocfs2_trim_fs_lock_res_init(osb);
+
+ trace_ocfs2_trim_fs(range->start, range->len, range->minlen);
+
+ ret = ocfs2_trim_fs_lock(osb, NULL, 1);
+ if (ret < 0) {
+ if (ret != -EAGAIN) {
+ mlog_errno(ret);
+ ocfs2_trim_fs_lock_res_uninit(osb);
+ return ret;
+ }
+
+ mlog(ML_NOTICE, "Wait for trim on device (%s) to "
+ "finish, which is running from another node.\n",
+ osb->dev_str);
+ ret = ocfs2_trim_fs_lock(osb, &info, 0);
+ if (ret < 0) {
+ mlog_errno(ret);
+ ocfs2_trim_fs_lock_res_uninit(osb);
+ return ret;
+ }
+
+ if (info.tf_valid && info.tf_success &&
+ info.tf_start == range->start &&
+ info.tf_len == range->len &&
+ info.tf_minlen == range->minlen) {
+ /* Avoid sending duplicated trim to a shared device */
+ mlog(ML_NOTICE, "The same trim on device (%s) was "
+ "just done from node (%u), return.\n",
+ osb->dev_str, info.tf_nodenum);
+ range->len = info.tf_trimlen;
+ goto out;
+ }
+ }
+
+ info.tf_nodenum = osb->node_num;
+ info.tf_start = range->start;
+ info.tf_len = range->len;
+ info.tf_minlen = range->minlen;
+
+ ret = ocfs2_trim_mainbm(sb, range);
+
+ info.tf_trimlen = range->len;
+ info.tf_success = (ret < 0 ? 0 : 1);
+ pinfo = &info;
+out:
+ ocfs2_trim_fs_unlock(osb, pinfo);
+ ocfs2_trim_fs_lock_res_uninit(osb);
return ret;
}