Update Linux to v5.4.2

Change-Id: Idf6911045d9d382da2cfe01b1edff026404ac8fd
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index a342f00..f9baefc 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /* -*- mode: c; c-basic-offset: 8; -*-
  * vim: noexpandtab sw=8 ts=8 sts=0:
  *
@@ -6,21 +7,6 @@
  * Extent allocs and frees
  *
  * Copyright (C) 2002, 2004 Oracle.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this program; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 021110-1307, USA.
  */
 
 #include <linux/fs.h>
@@ -5106,8 +5092,6 @@
 	 * rightmost extent list.
 	 */
 	if (path->p_tree_depth) {
-		struct ocfs2_extent_block *eb;
-
 		ret = ocfs2_read_extent_block(et->et_ci,
 					      ocfs2_et_get_last_eb_blk(et),
 					      &last_eb_bh);
@@ -5115,8 +5099,6 @@
 			mlog_errno(ret);
 			goto out;
 		}
-
-		eb = (struct ocfs2_extent_block *) last_eb_bh->b_data;
 	}
 
 	if (rec->e_cpos == split_rec->e_cpos &&
@@ -6011,6 +5993,7 @@
 	struct buffer_head *data_alloc_bh = NULL;
 	struct ocfs2_dinode *di;
 	struct ocfs2_truncate_log *tl;
+	struct ocfs2_journal *journal = osb->journal;
 
 	BUG_ON(inode_trylock(tl_inode));
 
@@ -6031,6 +6014,20 @@
 		goto out;
 	}
 
+	/* Appending truncate log(TA) and and flushing truncate log(TF) are
+	 * two separated transactions. They can be both committed but not
+	 * checkpointed. If crash occurs then, both two transaction will be
+	 * replayed with several already released to global bitmap clusters.
+	 * Then truncate log will be replayed resulting in cluster double free.
+	 */
+	jbd2_journal_lock_updates(journal->j_journal);
+	status = jbd2_journal_flush(journal->j_journal);
+	jbd2_journal_unlock_updates(journal->j_journal);
+	if (status < 0) {
+		mlog_errno(status);
+		goto out;
+	}
+
 	data_alloc_inode = ocfs2_get_system_file_inode(osb,
 						       GLOBAL_BITMAP_SYSTEM_INODE,
 						       OCFS2_INVALID_SLOT);
@@ -6209,17 +6206,17 @@
 	if (le16_to_cpu(tl->tl_used)) {
 		trace_ocfs2_truncate_log_recovery_num(le16_to_cpu(tl->tl_used));
 
-		*tl_copy = kmalloc(tl_bh->b_size, GFP_KERNEL);
+		/*
+		 * Assuming the write-out below goes well, this copy will be
+		 * passed back to recovery for processing.
+		 */
+		*tl_copy = kmemdup(tl_bh->b_data, tl_bh->b_size, GFP_KERNEL);
 		if (!(*tl_copy)) {
 			status = -ENOMEM;
 			mlog_errno(status);
 			goto bail;
 		}
 
-		/* Assuming the write-out below goes well, this copy
-		 * will be passed back to recovery for processing. */
-		memcpy(*tl_copy, tl_bh->b_data, tl_bh->b_size);
-
 		/* All we need to do to clear the truncate log is set
 		 * tl_used. */
 		tl->tl_used = 0;
@@ -6810,6 +6807,8 @@
 			      struct page *page, int zero, u64 *phys)
 {
 	int ret, partial = 0;
+	loff_t start_byte = ((loff_t)page->index << PAGE_SHIFT) + from;
+	loff_t length = to - from;
 
 	ret = ocfs2_map_page_blocks(page, phys, inode, from, to, 0);
 	if (ret)
@@ -6829,7 +6828,8 @@
 	if (ret < 0)
 		mlog_errno(ret);
 	else if (ocfs2_should_order_data(inode)) {
-		ret = ocfs2_jbd2_file_inode(handle, inode);
+		ret = ocfs2_jbd2_inode_add_write(handle, inode,
+						 start_byte, length);
 		if (ret < 0)
 			mlog_errno(ret);
 	}
@@ -7536,10 +7536,11 @@
 	return count;
 }
 
-int ocfs2_trim_fs(struct super_block *sb, struct fstrim_range *range)
+static
+int ocfs2_trim_mainbm(struct super_block *sb, struct fstrim_range *range)
 {
 	struct ocfs2_super *osb = OCFS2_SB(sb);
-	u64 start, len, trimmed, first_group, last_group, group;
+	u64 start, len, trimmed = 0, first_group, last_group = 0, group = 0;
 	int ret, cnt;
 	u32 first_bit, last_bit, minlen;
 	struct buffer_head *main_bm_bh = NULL;
@@ -7547,7 +7548,6 @@
 	struct buffer_head *gd_bh = NULL;
 	struct ocfs2_dinode *main_bm;
 	struct ocfs2_group_desc *gd = NULL;
-	struct ocfs2_trim_fs_info info, *pinfo = NULL;
 
 	start = range->start >> osb->s_clustersize_bits;
 	len = range->len >> osb->s_clustersize_bits;
@@ -7556,6 +7556,9 @@
 	if (minlen >= osb->bitmap_cpg || range->len < sb->s_blocksize)
 		return -EINVAL;
 
+	trace_ocfs2_trim_mainbm(start, len, minlen);
+
+next_group:
 	main_bm_inode = ocfs2_get_system_file_inode(osb,
 						    GLOBAL_BITMAP_SYSTEM_INODE,
 						    OCFS2_INVALID_SLOT);
@@ -7574,64 +7577,34 @@
 	}
 	main_bm = (struct ocfs2_dinode *)main_bm_bh->b_data;
 
-	if (start >= le32_to_cpu(main_bm->i_clusters)) {
-		ret = -EINVAL;
-		goto out_unlock;
-	}
-
-	len = range->len >> osb->s_clustersize_bits;
-	if (start + len > le32_to_cpu(main_bm->i_clusters))
-		len = le32_to_cpu(main_bm->i_clusters) - start;
-
-	trace_ocfs2_trim_fs(start, len, minlen);
-
-	ocfs2_trim_fs_lock_res_init(osb);
-	ret = ocfs2_trim_fs_lock(osb, NULL, 1);
-	if (ret < 0) {
-		if (ret != -EAGAIN) {
-			mlog_errno(ret);
-			ocfs2_trim_fs_lock_res_uninit(osb);
+	/*
+	 * Do some check before trim the first group.
+	 */
+	if (!group) {
+		if (start >= le32_to_cpu(main_bm->i_clusters)) {
+			ret = -EINVAL;
 			goto out_unlock;
 		}
 
-		mlog(ML_NOTICE, "Wait for trim on device (%s) to "
-		     "finish, which is running from another node.\n",
-		     osb->dev_str);
-		ret = ocfs2_trim_fs_lock(osb, &info, 0);
-		if (ret < 0) {
-			mlog_errno(ret);
-			ocfs2_trim_fs_lock_res_uninit(osb);
-			goto out_unlock;
-		}
+		if (start + len > le32_to_cpu(main_bm->i_clusters))
+			len = le32_to_cpu(main_bm->i_clusters) - start;
 
-		if (info.tf_valid && info.tf_success &&
-		    info.tf_start == start && info.tf_len == len &&
-		    info.tf_minlen == minlen) {
-			/* Avoid sending duplicated trim to a shared device */
-			mlog(ML_NOTICE, "The same trim on device (%s) was "
-			     "just done from node (%u), return.\n",
-			     osb->dev_str, info.tf_nodenum);
-			range->len = info.tf_trimlen;
-			goto out_trimunlock;
-		}
+		/*
+		 * Determine first and last group to examine based on
+		 * start and len
+		 */
+		first_group = ocfs2_which_cluster_group(main_bm_inode, start);
+		if (first_group == osb->first_cluster_group_blkno)
+			first_bit = start;
+		else
+			first_bit = start - ocfs2_blocks_to_clusters(sb,
+								first_group);
+		last_group = ocfs2_which_cluster_group(main_bm_inode,
+						       start + len - 1);
+		group = first_group;
 	}
 
-	info.tf_nodenum = osb->node_num;
-	info.tf_start = start;
-	info.tf_len = len;
-	info.tf_minlen = minlen;
-
-	/* Determine first and last group to examine based on start and len */
-	first_group = ocfs2_which_cluster_group(main_bm_inode, start);
-	if (first_group == osb->first_cluster_group_blkno)
-		first_bit = start;
-	else
-		first_bit = start - ocfs2_blocks_to_clusters(sb, first_group);
-	last_group = ocfs2_which_cluster_group(main_bm_inode, start + len - 1);
-	last_bit = osb->bitmap_cpg;
-
-	trimmed = 0;
-	for (group = first_group; group <= last_group;) {
+	do {
 		if (first_bit + len >= osb->bitmap_cpg)
 			last_bit = osb->bitmap_cpg;
 		else
@@ -7663,21 +7636,81 @@
 			group = ocfs2_clusters_to_blocks(sb, osb->bitmap_cpg);
 		else
 			group += ocfs2_clusters_to_blocks(sb, osb->bitmap_cpg);
-	}
-	range->len = trimmed * sb->s_blocksize;
+	} while (0);
 
-	info.tf_trimlen = range->len;
-	info.tf_success = (ret ? 0 : 1);
-	pinfo = &info;
-out_trimunlock:
-	ocfs2_trim_fs_unlock(osb, pinfo);
-	ocfs2_trim_fs_lock_res_uninit(osb);
 out_unlock:
 	ocfs2_inode_unlock(main_bm_inode, 0);
 	brelse(main_bm_bh);
+	main_bm_bh = NULL;
 out_mutex:
 	inode_unlock(main_bm_inode);
 	iput(main_bm_inode);
+
+	/*
+	 * If all the groups trim are not done or failed, but we should release
+	 * main_bm related locks for avoiding the current IO starve, then go to
+	 * trim the next group
+	 */
+	if (ret >= 0 && group <= last_group)
+		goto next_group;
 out:
+	range->len = trimmed * sb->s_blocksize;
+	return ret;
+}
+
+int ocfs2_trim_fs(struct super_block *sb, struct fstrim_range *range)
+{
+	int ret;
+	struct ocfs2_super *osb = OCFS2_SB(sb);
+	struct ocfs2_trim_fs_info info, *pinfo = NULL;
+
+	ocfs2_trim_fs_lock_res_init(osb);
+
+	trace_ocfs2_trim_fs(range->start, range->len, range->minlen);
+
+	ret = ocfs2_trim_fs_lock(osb, NULL, 1);
+	if (ret < 0) {
+		if (ret != -EAGAIN) {
+			mlog_errno(ret);
+			ocfs2_trim_fs_lock_res_uninit(osb);
+			return ret;
+		}
+
+		mlog(ML_NOTICE, "Wait for trim on device (%s) to "
+		     "finish, which is running from another node.\n",
+		     osb->dev_str);
+		ret = ocfs2_trim_fs_lock(osb, &info, 0);
+		if (ret < 0) {
+			mlog_errno(ret);
+			ocfs2_trim_fs_lock_res_uninit(osb);
+			return ret;
+		}
+
+		if (info.tf_valid && info.tf_success &&
+		    info.tf_start == range->start &&
+		    info.tf_len == range->len &&
+		    info.tf_minlen == range->minlen) {
+			/* Avoid sending duplicated trim to a shared device */
+			mlog(ML_NOTICE, "The same trim on device (%s) was "
+			     "just done from node (%u), return.\n",
+			     osb->dev_str, info.tf_nodenum);
+			range->len = info.tf_trimlen;
+			goto out;
+		}
+	}
+
+	info.tf_nodenum = osb->node_num;
+	info.tf_start = range->start;
+	info.tf_len = range->len;
+	info.tf_minlen = range->minlen;
+
+	ret = ocfs2_trim_mainbm(sb, range);
+
+	info.tf_trimlen = range->len;
+	info.tf_success = (ret < 0 ? 0 : 1);
+	pinfo = &info;
+out:
+	ocfs2_trim_fs_unlock(osb, pinfo);
+	ocfs2_trim_fs_lock_res_uninit(osb);
 	return ret;
 }