Update Linux to v5.4.2
Change-Id: Idf6911045d9d382da2cfe01b1edff026404ac8fd
diff --git a/fs/jbd2/Kconfig b/fs/jbd2/Kconfig
index 5a9f553..4ad2c67 100644
--- a/fs/jbd2/Kconfig
+++ b/fs/jbd2/Kconfig
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
config JBD2
tristate
select CRC32
diff --git a/fs/jbd2/Makefile b/fs/jbd2/Makefile
index 802a341..126b4da 100644
--- a/fs/jbd2/Makefile
+++ b/fs/jbd2/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
#
# Makefile for the linux journaling routines.
#
diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c
index 26f8d7e..a190906 100644
--- a/fs/jbd2/checkpoint.c
+++ b/fs/jbd2/checkpoint.c
@@ -113,7 +113,7 @@
nblocks = jbd2_space_needed(journal);
while (jbd2_log_space_left(journal) < nblocks) {
write_unlock(&journal->j_state_lock);
- mutex_lock(&journal->j_checkpoint_mutex);
+ mutex_lock_io(&journal->j_checkpoint_mutex);
/*
* Test again, another process may have checkpointed while we
@@ -132,7 +132,6 @@
return;
}
spin_lock(&journal->j_list_lock);
- nblocks = jbd2_space_needed(journal);
space_left = jbd2_log_space_left(journal);
if (space_left < nblocks) {
int chkpt = journal->j_checkpoint_transactions != NULL;
@@ -276,9 +275,22 @@
"JBD2: %s: Waiting for Godot: block %llu\n",
journal->j_devname, (unsigned long long) bh->b_blocknr);
+ if (batch_count)
+ __flush_batch(journal, &batch_count);
jbd2_log_start_commit(journal, tid);
+ /*
+ * jbd2_journal_commit_transaction() may want
+ * to take the checkpoint_mutex if JBD2_FLUSHED
+ * is set, jbd2_update_log_tail() called by
+ * jbd2_journal_commit_transaction() may also take
+ * checkpoint_mutex. So we need to temporarily
+ * drop it.
+ */
+ mutex_unlock(&journal->j_checkpoint_mutex);
jbd2_log_wait_commit(journal, tid);
- goto retry;
+ mutex_lock_io(&journal->j_checkpoint_mutex);
+ spin_lock(&journal->j_list_lock);
+ goto restart;
}
if (!buffer_dirty(bh)) {
if (unlikely(buffer_write_io_error(bh)) && !result)
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 150cc03..132fb92 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -184,17 +184,18 @@
/*
* write the filemap data using writepage() address_space_operations.
* We don't do block allocation here even for delalloc. We don't
- * use writepages() because with dealyed allocation we may be doing
+ * use writepages() because with delayed allocation we may be doing
* block allocation in writepages().
*/
-static int journal_submit_inode_data_buffers(struct address_space *mapping)
+static int journal_submit_inode_data_buffers(struct address_space *mapping,
+ loff_t dirty_start, loff_t dirty_end)
{
int ret;
struct writeback_control wbc = {
.sync_mode = WB_SYNC_ALL,
.nr_to_write = mapping->nrpages * 2,
- .range_start = 0,
- .range_end = i_size_read(mapping->host),
+ .range_start = dirty_start,
+ .range_end = dirty_end,
};
ret = generic_writepages(mapping, &wbc);
@@ -218,6 +219,9 @@
spin_lock(&journal->j_list_lock);
list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) {
+ loff_t dirty_start = jinode->i_dirty_start;
+ loff_t dirty_end = jinode->i_dirty_end;
+
if (!(jinode->i_flags & JI_WRITE_DATA))
continue;
mapping = jinode->i_vfs_inode->i_mapping;
@@ -230,7 +234,8 @@
* only allocated blocks here.
*/
trace_jbd2_submit_inode_data(jinode->i_vfs_inode);
- err = journal_submit_inode_data_buffers(mapping);
+ err = journal_submit_inode_data_buffers(mapping, dirty_start,
+ dirty_end);
if (!ret)
ret = err;
spin_lock(&journal->j_list_lock);
@@ -257,12 +262,16 @@
/* For locking, see the comment in journal_submit_data_buffers() */
spin_lock(&journal->j_list_lock);
list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) {
+ loff_t dirty_start = jinode->i_dirty_start;
+ loff_t dirty_end = jinode->i_dirty_end;
+
if (!(jinode->i_flags & JI_WAIT_DATA))
continue;
jinode->i_flags |= JI_COMMIT_RUNNING;
spin_unlock(&journal->j_list_lock);
- err = filemap_fdatawait_keep_errors(
- jinode->i_vfs_inode->i_mapping);
+ err = filemap_fdatawait_range_keep_errors(
+ jinode->i_vfs_inode->i_mapping, dirty_start,
+ dirty_end);
if (!ret)
ret = err;
spin_lock(&journal->j_list_lock);
@@ -282,6 +291,8 @@
&jinode->i_transaction->t_inode_list);
} else {
jinode->i_transaction = NULL;
+ jinode->i_dirty_start = 0;
+ jinode->i_dirty_end = 0;
}
}
spin_unlock(&journal->j_list_lock);
@@ -439,6 +450,8 @@
finish_wait(&journal->j_wait_updates, &wait);
}
spin_unlock(&commit_transaction->t_handle_lock);
+ commit_transaction->t_state = T_SWITCH;
+ write_unlock(&journal->j_state_lock);
J_ASSERT (atomic_read(&commit_transaction->t_outstanding_credits) <=
journal->j_max_transaction_buffers);
@@ -505,6 +518,7 @@
atomic_sub(atomic_read(&journal->j_reserved_credits),
&commit_transaction->t_outstanding_credits);
+ write_lock(&journal->j_state_lock);
trace_jbd2_commit_flushing(journal, commit_transaction);
stats.run.rs_flushing = jiffies;
stats.run.rs_locked = jbd2_time_diff(stats.run.rs_locked,
@@ -691,9 +705,11 @@
the last tag we set up. */
tag->t_flags |= cpu_to_be16(JBD2_FLAG_LAST_TAG);
-
- jbd2_descriptor_block_csum_set(journal, descriptor);
start_journal_io:
+ if (descriptor)
+ jbd2_descriptor_block_csum_set(journal,
+ descriptor);
+
for (i = 0; i < bufs; i++) {
struct buffer_head *bh = wbuf[i];
/*
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 8ef6b6d..1c58859 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -66,9 +66,6 @@
EXPORT_SYMBOL(jbd2_journal_set_triggers);
EXPORT_SYMBOL(jbd2_journal_dirty_metadata);
EXPORT_SYMBOL(jbd2_journal_forget);
-#if 0
-EXPORT_SYMBOL(journal_sync_buffer);
-#endif
EXPORT_SYMBOL(jbd2_journal_flush);
EXPORT_SYMBOL(jbd2_journal_revoke);
@@ -92,8 +89,8 @@
EXPORT_SYMBOL(jbd2_journal_invalidatepage);
EXPORT_SYMBOL(jbd2_journal_try_to_free_buffers);
EXPORT_SYMBOL(jbd2_journal_force_commit);
-EXPORT_SYMBOL(jbd2_journal_inode_add_write);
-EXPORT_SYMBOL(jbd2_journal_inode_add_wait);
+EXPORT_SYMBOL(jbd2_journal_inode_ranged_write);
+EXPORT_SYMBOL(jbd2_journal_inode_ranged_wait);
EXPORT_SYMBOL(jbd2_journal_init_jbd_inode);
EXPORT_SYMBOL(jbd2_journal_release_jbd_inode);
EXPORT_SYMBOL(jbd2_journal_begin_ordered_truncate);
@@ -142,22 +139,6 @@
return cpu_to_be32(csum);
}
-static int jbd2_superblock_csum_verify(journal_t *j, journal_superblock_t *sb)
-{
- if (!jbd2_journal_has_csum_v2or3(j))
- return 1;
-
- return sb->s_checksum == jbd2_superblock_csum(j, sb);
-}
-
-static void jbd2_superblock_csum_set(journal_t *j, journal_superblock_t *sb)
-{
- if (!jbd2_journal_has_csum_v2or3(j))
- return;
-
- sb->s_checksum = jbd2_superblock_csum(j, sb);
-}
-
/*
* Helper function used to manage commit timeouts
*/
@@ -219,7 +200,7 @@
if (journal->j_flags & JBD2_UNMOUNT)
goto end_loop;
- jbd_debug(1, "commit_sequence=%d, commit_request=%d\n",
+ jbd_debug(1, "commit_sequence=%u, commit_request=%u\n",
journal->j_commit_sequence, journal->j_commit_request);
if (journal->j_commit_sequence != journal->j_commit_request) {
@@ -340,7 +321,7 @@
* IO is in progress. do_get_write_access() handles this.
*
* The function returns a pointer to the buffer_head to be used for IO.
- *
+ *
*
* Return value:
* <0: Error
@@ -516,7 +497,7 @@
*/
journal->j_commit_request = target;
- jbd_debug(1, "JBD2: requesting commit %d/%d\n",
+ jbd_debug(1, "JBD2: requesting commit %u/%u\n",
journal->j_commit_request,
journal->j_commit_sequence);
journal->j_running_transaction->t_requested = jiffies;
@@ -529,7 +510,7 @@
WARN_ONCE(1, "JBD2: bad log_start_commit: %u %u %u %u\n",
journal->j_commit_request,
journal->j_commit_sequence,
- target, journal->j_running_transaction ?
+ target, journal->j_running_transaction ?
journal->j_running_transaction->t_tid : 0);
return 0;
}
@@ -714,12 +695,12 @@
#ifdef CONFIG_JBD2_DEBUG
if (!tid_geq(journal->j_commit_request, tid)) {
printk(KERN_ERR
- "%s: error: j_commit_request=%d, tid=%d\n",
+ "%s: error: j_commit_request=%u, tid=%u\n",
__func__, journal->j_commit_request, tid);
}
#endif
while (tid_gt(tid, journal->j_commit_sequence)) {
- jbd_debug(1, "JBD2: want %d, j_commit_sequence=%d\n",
+ jbd_debug(1, "JBD2: want %u, j_commit_sequence=%u\n",
tid, journal->j_commit_sequence);
read_unlock(&journal->j_state_lock);
wake_up(&journal->j_wait_commit);
@@ -960,7 +941,7 @@
trace_jbd2_update_log_tail(journal, tid, block, freed);
jbd_debug(1,
- "Cleaning journal tail from %d to %d (offset %lu), "
+ "Cleaning journal tail from %u to %u (offset %lu), "
"freeing %lu\n",
journal->j_tail_sequence, tid, block, freed);
@@ -1334,7 +1315,7 @@
*/
if (sb->s_start == 0) {
jbd_debug(1, "JBD2: Skipping superblock update on recovered sb "
- "(start %ld, seq %d, errno %d)\n",
+ "(start %ld, seq %u, errno %d)\n",
journal->j_tail, journal->j_tail_sequence,
journal->j_errno);
journal->j_flags |= JBD2_FLUSHED;
@@ -1356,16 +1337,23 @@
return jbd2_journal_start_thread(journal);
}
+/*
+ * This function expects that the caller will have locked the journal
+ * buffer head, and will return with it unlocked
+ */
static int jbd2_write_superblock(journal_t *journal, int write_flags)
{
struct buffer_head *bh = journal->j_sb_buffer;
journal_superblock_t *sb = journal->j_superblock;
int ret;
+ /* Buffer got discarded which means block device got invalidated */
+ if (!buffer_mapped(bh))
+ return -EIO;
+
trace_jbd2_write_superblock(journal, write_flags);
if (!(journal->j_flags & JBD2_BARRIER))
write_flags &= ~(REQ_FUA | REQ_PREFLUSH);
- lock_buffer(bh);
if (buffer_write_io_error(bh)) {
/*
* Oh, dear. A previous attempt to write the journal
@@ -1381,7 +1369,8 @@
clear_buffer_write_io_error(bh);
set_buffer_uptodate(bh);
}
- jbd2_superblock_csum_set(journal, sb);
+ if (jbd2_journal_has_csum_v2or3(journal))
+ sb->s_checksum = jbd2_superblock_csum(journal, sb);
get_bh(bh);
bh->b_end_io = end_buffer_write_sync;
ret = submit_bh(REQ_OP_WRITE, write_flags, bh);
@@ -1424,6 +1413,7 @@
jbd_debug(1, "JBD2: updating superblock (start %lu, seq %u)\n",
tail_block, tail_tid);
+ lock_buffer(journal->j_sb_buffer);
sb->s_sequence = cpu_to_be32(tail_tid);
sb->s_start = cpu_to_be32(tail_block);
@@ -1454,18 +1444,17 @@
journal_superblock_t *sb = journal->j_superblock;
BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex));
- read_lock(&journal->j_state_lock);
- /* Is it already empty? */
- if (sb->s_start == 0) {
- read_unlock(&journal->j_state_lock);
+ lock_buffer(journal->j_sb_buffer);
+ if (sb->s_start == 0) { /* Is it already empty? */
+ unlock_buffer(journal->j_sb_buffer);
return;
}
- jbd_debug(1, "JBD2: Marking journal as empty (seq %d)\n",
+
+ jbd_debug(1, "JBD2: Marking journal as empty (seq %u)\n",
journal->j_tail_sequence);
sb->s_sequence = cpu_to_be32(journal->j_tail_sequence);
sb->s_start = cpu_to_be32(0);
- read_unlock(&journal->j_state_lock);
jbd2_write_superblock(journal, write_op);
@@ -1488,9 +1477,8 @@
journal_superblock_t *sb = journal->j_superblock;
int errcode;
- read_lock(&journal->j_state_lock);
+ lock_buffer(journal->j_sb_buffer);
errcode = journal->j_errno;
- read_unlock(&journal->j_state_lock);
if (errcode == -ESHUTDOWN)
errcode = 0;
jbd_debug(1, "JBD2: updating superblock error (errno %d)\n", errcode);
@@ -1595,17 +1583,18 @@
}
}
- /* Check superblock checksum */
- if (!jbd2_superblock_csum_verify(journal, sb)) {
- printk(KERN_ERR "JBD2: journal checksum error\n");
- err = -EFSBADCRC;
- goto out;
- }
+ if (jbd2_journal_has_csum_v2or3(journal)) {
+ /* Check superblock checksum */
+ if (sb->s_checksum != jbd2_superblock_csum(journal, sb)) {
+ printk(KERN_ERR "JBD2: journal checksum error\n");
+ err = -EFSBADCRC;
+ goto out;
+ }
- /* Precompute checksum seed for all metadata */
- if (jbd2_journal_has_csum_v2or3(journal))
+ /* Precompute checksum seed for all metadata */
journal->j_csum_seed = jbd2_chksum(journal, ~0, sb->s_uuid,
sizeof(sb->s_uuid));
+ }
set_buffer_verified(bh);
@@ -1894,28 +1883,27 @@
sb = journal->j_superblock;
+ /* Load the checksum driver if necessary */
+ if ((journal->j_chksum_driver == NULL) &&
+ INCOMPAT_FEATURE_ON(JBD2_FEATURE_INCOMPAT_CSUM_V3)) {
+ journal->j_chksum_driver = crypto_alloc_shash("crc32c", 0, 0);
+ if (IS_ERR(journal->j_chksum_driver)) {
+ printk(KERN_ERR "JBD2: Cannot load crc32c driver.\n");
+ journal->j_chksum_driver = NULL;
+ return 0;
+ }
+ /* Precompute checksum seed for all metadata */
+ journal->j_csum_seed = jbd2_chksum(journal, ~0, sb->s_uuid,
+ sizeof(sb->s_uuid));
+ }
+
+ lock_buffer(journal->j_sb_buffer);
+
/* If enabling v3 checksums, update superblock */
if (INCOMPAT_FEATURE_ON(JBD2_FEATURE_INCOMPAT_CSUM_V3)) {
sb->s_checksum_type = JBD2_CRC32C_CHKSUM;
sb->s_feature_compat &=
~cpu_to_be32(JBD2_FEATURE_COMPAT_CHECKSUM);
-
- /* Load the checksum driver */
- if (journal->j_chksum_driver == NULL) {
- journal->j_chksum_driver = crypto_alloc_shash("crc32c",
- 0, 0);
- if (IS_ERR(journal->j_chksum_driver)) {
- printk(KERN_ERR "JBD2: Cannot load crc32c "
- "driver.\n");
- journal->j_chksum_driver = NULL;
- return 0;
- }
-
- /* Precompute checksum seed for all metadata */
- journal->j_csum_seed = jbd2_chksum(journal, ~0,
- sb->s_uuid,
- sizeof(sb->s_uuid));
- }
}
/* If enabling v1 checksums, downgrade superblock */
@@ -1927,6 +1915,7 @@
sb->s_feature_compat |= cpu_to_be32(compat);
sb->s_feature_ro_compat |= cpu_to_be32(ro);
sb->s_feature_incompat |= cpu_to_be32(incompat);
+ unlock_buffer(journal->j_sb_buffer);
return 1;
#undef COMPAT_FEATURE_ON
@@ -2067,7 +2056,7 @@
err = jbd2_journal_skip_recovery(journal);
if (write) {
/* Lock to make assertions happy... */
- mutex_lock(&journal->j_checkpoint_mutex);
+ mutex_lock_io(&journal->j_checkpoint_mutex);
jbd2_mark_journal_empty(journal, REQ_SYNC | REQ_FUA);
mutex_unlock(&journal->j_checkpoint_mutex);
}
@@ -2383,22 +2372,19 @@
static atomic_t nr_journal_heads = ATOMIC_INIT(0);
#endif
-static int jbd2_journal_init_journal_head_cache(void)
+static int __init jbd2_journal_init_journal_head_cache(void)
{
- int retval;
-
- J_ASSERT(jbd2_journal_head_cache == NULL);
+ J_ASSERT(!jbd2_journal_head_cache);
jbd2_journal_head_cache = kmem_cache_create("jbd2_journal_head",
sizeof(struct journal_head),
0, /* offset */
SLAB_TEMPORARY | SLAB_TYPESAFE_BY_RCU,
NULL); /* ctor */
- retval = 0;
if (!jbd2_journal_head_cache) {
- retval = -ENOMEM;
printk(KERN_EMERG "JBD2: no memory for journal_head cache\n");
+ return -ENOMEM;
}
- return retval;
+ return 0;
}
static void jbd2_journal_destroy_journal_head_cache(void)
@@ -2585,6 +2571,8 @@
jinode->i_next_transaction = NULL;
jinode->i_vfs_inode = inode;
jinode->i_flags = 0;
+ jinode->i_dirty_start = 0;
+ jinode->i_dirty_end = 0;
INIT_LIST_HEAD(&jinode->i_list);
}
@@ -2644,28 +2632,38 @@
struct kmem_cache *jbd2_handle_cache, *jbd2_inode_cache;
-static int __init jbd2_journal_init_handle_cache(void)
+static int __init jbd2_journal_init_inode_cache(void)
{
- jbd2_handle_cache = KMEM_CACHE(jbd2_journal_handle, SLAB_TEMPORARY);
- if (jbd2_handle_cache == NULL) {
- printk(KERN_EMERG "JBD2: failed to create handle cache\n");
- return -ENOMEM;
- }
+ J_ASSERT(!jbd2_inode_cache);
jbd2_inode_cache = KMEM_CACHE(jbd2_inode, 0);
- if (jbd2_inode_cache == NULL) {
- printk(KERN_EMERG "JBD2: failed to create inode cache\n");
- kmem_cache_destroy(jbd2_handle_cache);
+ if (!jbd2_inode_cache) {
+ pr_emerg("JBD2: failed to create inode cache\n");
return -ENOMEM;
}
return 0;
}
+static int __init jbd2_journal_init_handle_cache(void)
+{
+ J_ASSERT(!jbd2_handle_cache);
+ jbd2_handle_cache = KMEM_CACHE(jbd2_journal_handle, SLAB_TEMPORARY);
+ if (!jbd2_handle_cache) {
+ printk(KERN_EMERG "JBD2: failed to create handle cache\n");
+ return -ENOMEM;
+ }
+ return 0;
+}
+
+static void jbd2_journal_destroy_inode_cache(void)
+{
+ kmem_cache_destroy(jbd2_inode_cache);
+ jbd2_inode_cache = NULL;
+}
+
static void jbd2_journal_destroy_handle_cache(void)
{
kmem_cache_destroy(jbd2_handle_cache);
jbd2_handle_cache = NULL;
- kmem_cache_destroy(jbd2_inode_cache);
- jbd2_inode_cache = NULL;
}
/*
@@ -2676,21 +2674,27 @@
{
int ret;
- ret = jbd2_journal_init_revoke_caches();
+ ret = jbd2_journal_init_revoke_record_cache();
+ if (ret == 0)
+ ret = jbd2_journal_init_revoke_table_cache();
if (ret == 0)
ret = jbd2_journal_init_journal_head_cache();
if (ret == 0)
ret = jbd2_journal_init_handle_cache();
if (ret == 0)
+ ret = jbd2_journal_init_inode_cache();
+ if (ret == 0)
ret = jbd2_journal_init_transaction_cache();
return ret;
}
static void jbd2_journal_destroy_caches(void)
{
- jbd2_journal_destroy_revoke_caches();
+ jbd2_journal_destroy_revoke_record_cache();
+ jbd2_journal_destroy_revoke_table_cache();
jbd2_journal_destroy_journal_head_cache();
jbd2_journal_destroy_handle_cache();
+ jbd2_journal_destroy_inode_cache();
jbd2_journal_destroy_transaction_cache();
jbd2_journal_destroy_slabs();
}
diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c
index a1143e5..f08073d 100644
--- a/fs/jbd2/revoke.c
+++ b/fs/jbd2/revoke.c
@@ -178,33 +178,41 @@
return NULL;
}
-void jbd2_journal_destroy_revoke_caches(void)
+void jbd2_journal_destroy_revoke_record_cache(void)
{
kmem_cache_destroy(jbd2_revoke_record_cache);
jbd2_revoke_record_cache = NULL;
+}
+
+void jbd2_journal_destroy_revoke_table_cache(void)
+{
kmem_cache_destroy(jbd2_revoke_table_cache);
jbd2_revoke_table_cache = NULL;
}
-int __init jbd2_journal_init_revoke_caches(void)
+int __init jbd2_journal_init_revoke_record_cache(void)
{
J_ASSERT(!jbd2_revoke_record_cache);
- J_ASSERT(!jbd2_revoke_table_cache);
-
jbd2_revoke_record_cache = KMEM_CACHE(jbd2_revoke_record_s,
SLAB_HWCACHE_ALIGN|SLAB_TEMPORARY);
- if (!jbd2_revoke_record_cache)
- goto record_cache_failure;
+ if (!jbd2_revoke_record_cache) {
+ pr_emerg("JBD2: failed to create revoke_record cache\n");
+ return -ENOMEM;
+ }
+ return 0;
+}
+
+int __init jbd2_journal_init_revoke_table_cache(void)
+{
+ J_ASSERT(!jbd2_revoke_table_cache);
jbd2_revoke_table_cache = KMEM_CACHE(jbd2_revoke_table_s,
SLAB_TEMPORARY);
- if (!jbd2_revoke_table_cache)
- goto table_cache_failure;
- return 0;
-table_cache_failure:
- jbd2_journal_destroy_revoke_caches();
-record_cache_failure:
+ if (!jbd2_revoke_table_cache) {
+ pr_emerg("JBD2: failed to create revoke_table cache\n");
return -ENOMEM;
+ }
+ return 0;
}
static struct jbd2_revoke_table_s *jbd2_journal_init_revoke_table(int hash_size)
@@ -630,10 +638,8 @@
{
jbd2_journal_revoke_header_t *header;
- if (is_journal_aborted(journal)) {
- put_bh(descriptor);
+ if (is_journal_aborted(journal))
return;
- }
header = (jbd2_journal_revoke_header_t *)descriptor->b_data;
header->r_count = cpu_to_be32(offset);
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index c0b66a7..bee8498 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -42,9 +42,11 @@
0,
SLAB_HWCACHE_ALIGN|SLAB_TEMPORARY,
NULL);
- if (transaction_cache)
- return 0;
- return -ENOMEM;
+ if (!transaction_cache) {
+ pr_emerg("JBD2: failed to create transaction cache\n");
+ return -ENOMEM;
+ }
+ return 0;
}
void jbd2_journal_destroy_transaction_cache(void)
@@ -63,7 +65,7 @@
/*
* jbd2_get_transaction: obtain a new transaction_t object.
*
- * Simply allocate and initialise a new transaction. Create it in
+ * Simply initialise a new transaction. Initialize it in
* RUNNING state and add it to the current journal (which should not
* have an existing running transaction: we only make a new transaction
* once we have started to commit the old one).
@@ -75,8 +77,8 @@
*
*/
-static transaction_t *
-jbd2_get_transaction(journal_t *journal, transaction_t *transaction)
+static void jbd2_get_transaction(journal_t *journal,
+ transaction_t *transaction)
{
transaction->t_journal = journal;
transaction->t_state = T_RUNNING;
@@ -100,8 +102,6 @@
transaction->t_max_wait = 0;
transaction->t_start = jiffies;
transaction->t_requested = 0;
-
- return transaction;
}
/*
@@ -138,9 +138,9 @@
}
/*
- * Wait until running transaction passes T_LOCKED state. Also starts the commit
- * if needed. The function expects running transaction to exist and releases
- * j_state_lock.
+ * Wait until running transaction passes to T_FLUSH state and new transaction
+ * can thus be started. Also starts the commit if needed. The function expects
+ * running transaction to exist and releases j_state_lock.
*/
static void wait_transaction_locked(journal_t *journal)
__releases(journal->j_state_lock)
@@ -160,6 +160,32 @@
finish_wait(&journal->j_wait_transaction_locked, &wait);
}
+/*
+ * Wait until running transaction transitions from T_SWITCH to T_FLUSH
+ * state and new transaction can thus be started. The function releases
+ * j_state_lock.
+ */
+static void wait_transaction_switching(journal_t *journal)
+ __releases(journal->j_state_lock)
+{
+ DEFINE_WAIT(wait);
+
+ if (WARN_ON(!journal->j_running_transaction ||
+ journal->j_running_transaction->t_state != T_SWITCH))
+ return;
+ prepare_to_wait(&journal->j_wait_transaction_locked, &wait,
+ TASK_UNINTERRUPTIBLE);
+ read_unlock(&journal->j_state_lock);
+ /*
+ * We don't call jbd2_might_wait_for_commit() here as there's no
+ * waiting for outstanding handles happening anymore in T_SWITCH state
+ * and handling of reserved handles actually relies on that for
+ * correctness.
+ */
+ schedule();
+ finish_wait(&journal->j_wait_transaction_locked, &wait);
+}
+
static void sub_reserved_credits(journal_t *journal, int blocks)
{
atomic_sub(blocks, &journal->j_reserved_credits);
@@ -183,7 +209,8 @@
* If the current transaction is locked down for commit, wait
* for the lock to be released.
*/
- if (t->t_state == T_LOCKED) {
+ if (t->t_state != T_RUNNING) {
+ WARN_ON_ONCE(t->t_state >= T_FLUSH);
wait_transaction_locked(journal);
return 1;
}
@@ -360,8 +387,14 @@
/*
* We have handle reserved so we are allowed to join T_LOCKED
* transaction and we don't have to check for transaction size
- * and journal space.
+ * and journal space. But we still have to wait while running
+ * transaction is being switched to a committing one as it
+ * won't wait for any handles anymore.
*/
+ if (transaction->t_state == T_SWITCH) {
+ wait_transaction_switching(journal);
+ goto repeat;
+ }
sub_reserved_credits(journal, blocks);
handle->h_reserved = 0;
}
@@ -536,6 +569,9 @@
}
handle->h_type = type;
handle->h_line_no = line_no;
+ trace_jbd2_handle_start(journal->j_fs_dev->bd_dev,
+ handle->h_transaction->t_tid, type,
+ line_no, handle->h_buffer_credits);
return 0;
}
EXPORT_SYMBOL(jbd2_journal_start_reserved);
@@ -910,7 +946,7 @@
* this is the first time this transaction is touching this buffer,
* reset the modified flag
*/
- jh->b_modified = 0;
+ jh->b_modified = 0;
/*
* If the buffer is not journaled right now, we need to make sure it
@@ -1219,11 +1255,12 @@
struct journal_head *jh;
char *committed_data = NULL;
- JBUFFER_TRACE(jh, "entry");
if (jbd2_write_access_granted(handle, bh, true))
return 0;
jh = jbd2_journal_add_journal_head(bh);
+ JBUFFER_TRACE(jh, "entry");
+
/*
* Do this first --- it can drop the journal lock, so we want to
* make sure that obtaining the committed_data is done
@@ -1334,15 +1371,17 @@
if (is_handle_aborted(handle))
return -EROFS;
- if (!buffer_jbd(bh)) {
- ret = -EUCLEAN;
- goto out;
- }
+ if (!buffer_jbd(bh))
+ return -EUCLEAN;
+
/*
* We don't grab jh reference here since the buffer must be part
* of the running transaction.
*/
jh = bh2jh(bh);
+ jbd_debug(5, "journal_head %p\n", jh);
+ JBUFFER_TRACE(jh, "entry");
+
/*
* This and the following assertions are unreliable since we may see jh
* in inconsistent state unless we grab bh_state lock. But this is
@@ -1376,9 +1415,6 @@
}
journal = transaction->t_journal;
- jbd_debug(5, "journal_head %p\n", jh);
- JBUFFER_TRACE(jh, "entry");
-
jbd_lock_bh_state(bh);
if (jh->b_modified == 0) {
@@ -1564,9 +1600,7 @@
__jbd2_journal_unfile_buffer(jh);
if (!buffer_jbd(bh)) {
spin_unlock(&journal->j_list_lock);
- jbd_unlock_bh_state(bh);
- __bforget(bh);
- goto drop;
+ goto not_jbd;
}
}
spin_unlock(&journal->j_list_lock);
@@ -1576,14 +1610,21 @@
/* However, if the buffer is still owned by a prior
* (committing) transaction, we can't drop it yet... */
JBUFFER_TRACE(jh, "belongs to older transaction");
- /* ... but we CAN drop it from the new transaction if we
- * have also modified it since the original commit. */
+ /* ... but we CAN drop it from the new transaction through
+ * marking the buffer as freed and set j_next_transaction to
+ * the new transaction, so that not only the commit code
+ * knows it should clear dirty bits when it is done with the
+ * buffer, but also the buffer can be checkpointed only
+ * after the new transaction commits. */
- if (jh->b_next_transaction) {
- J_ASSERT(jh->b_next_transaction == transaction);
+ set_buffer_freed(bh);
+
+ if (!jh->b_next_transaction) {
spin_lock(&journal->j_list_lock);
- jh->b_next_transaction = NULL;
+ jh->b_next_transaction = transaction;
spin_unlock(&journal->j_list_lock);
+ } else {
+ J_ASSERT(jh->b_next_transaction == transaction);
/*
* only drop a reference if this transaction modified
@@ -1592,9 +1633,40 @@
if (was_modified)
drop_reserve = 1;
}
+ } else {
+ /*
+ * Finally, if the buffer is not belongs to any
+ * transaction, we can just drop it now if it has no
+ * checkpoint.
+ */
+ spin_lock(&journal->j_list_lock);
+ if (!jh->b_cp_transaction) {
+ JBUFFER_TRACE(jh, "belongs to none transaction");
+ spin_unlock(&journal->j_list_lock);
+ goto not_jbd;
+ }
+
+ /*
+ * Otherwise, if the buffer has been written to disk,
+ * it is safe to remove the checkpoint and drop it.
+ */
+ if (!buffer_dirty(bh)) {
+ __jbd2_journal_remove_checkpoint(jh);
+ spin_unlock(&journal->j_list_lock);
+ goto not_jbd;
+ }
+
+ /*
+ * The buffer is still not written to disk, we should
+ * attach this buffer to current transaction so that the
+ * buffer can be checkpointed only after the current
+ * transaction commits.
+ */
+ clear_buffer_dirty(bh);
+ __jbd2_journal_file_buffer(jh, transaction, BJ_Forget);
+ spin_unlock(&journal->j_list_lock);
}
-not_jbd:
jbd_unlock_bh_state(bh);
__brelse(bh);
drop:
@@ -1603,6 +1675,11 @@
handle->h_buffer_credits++;
}
return err;
+
+not_jbd:
+ jbd_unlock_bh_state(bh);
+ __bforget(bh);
+ goto drop;
}
/**
@@ -2491,7 +2568,7 @@
* File inode in the inode list of the handle's transaction
*/
static int jbd2_journal_file_inode(handle_t *handle, struct jbd2_inode *jinode,
- unsigned long flags)
+ unsigned long flags, loff_t start_byte, loff_t end_byte)
{
transaction_t *transaction = handle->h_transaction;
journal_t *journal;
@@ -2503,26 +2580,17 @@
jbd_debug(4, "Adding inode %lu, tid:%d\n", jinode->i_vfs_inode->i_ino,
transaction->t_tid);
- /*
- * First check whether inode isn't already on the transaction's
- * lists without taking the lock. Note that this check is safe
- * without the lock as we cannot race with somebody removing inode
- * from the transaction. The reason is that we remove inode from the
- * transaction only in journal_release_jbd_inode() and when we commit
- * the transaction. We are guarded from the first case by holding
- * a reference to the inode. We are safe against the second case
- * because if jinode->i_transaction == transaction, commit code
- * cannot touch the transaction because we hold reference to it,
- * and if jinode->i_next_transaction == transaction, commit code
- * will only file the inode where we want it.
- */
- if ((jinode->i_transaction == transaction ||
- jinode->i_next_transaction == transaction) &&
- (jinode->i_flags & flags) == flags)
- return 0;
-
spin_lock(&journal->j_list_lock);
jinode->i_flags |= flags;
+
+ if (jinode->i_dirty_end) {
+ jinode->i_dirty_start = min(jinode->i_dirty_start, start_byte);
+ jinode->i_dirty_end = max(jinode->i_dirty_end, end_byte);
+ } else {
+ jinode->i_dirty_start = start_byte;
+ jinode->i_dirty_end = end_byte;
+ }
+
/* Is inode already attached where we need it? */
if (jinode->i_transaction == transaction ||
jinode->i_next_transaction == transaction)
@@ -2554,15 +2622,19 @@
return 0;
}
-int jbd2_journal_inode_add_write(handle_t *handle, struct jbd2_inode *jinode)
+int jbd2_journal_inode_ranged_write(handle_t *handle,
+ struct jbd2_inode *jinode, loff_t start_byte, loff_t length)
{
return jbd2_journal_file_inode(handle, jinode,
- JI_WRITE_DATA | JI_WAIT_DATA);
+ JI_WRITE_DATA | JI_WAIT_DATA, start_byte,
+ start_byte + length - 1);
}
-int jbd2_journal_inode_add_wait(handle_t *handle, struct jbd2_inode *jinode)
+int jbd2_journal_inode_ranged_wait(handle_t *handle, struct jbd2_inode *jinode,
+ loff_t start_byte, loff_t length)
{
- return jbd2_journal_file_inode(handle, jinode, JI_WAIT_DATA);
+ return jbd2_journal_file_inode(handle, jinode, JI_WAIT_DATA,
+ start_byte, start_byte + length - 1);
}
/*