Update Linux to v5.4.2
Change-Id: Idf6911045d9d382da2cfe01b1edff026404ac8fd
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 244531d..d691d17 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -391,7 +391,7 @@
* inode's preallocations.
*/
if ((ei->i_reserved_data_blocks == 0) &&
- (atomic_read(&inode->i_writecount) == 0))
+ !inode_is_open_for_write(inode))
ext4_discard_preallocations(inode);
}
@@ -399,6 +399,10 @@
unsigned int line,
struct ext4_map_blocks *map)
{
+ if (ext4_has_feature_journal(inode->i_sb) &&
+ (inode->i_ino ==
+ le32_to_cpu(EXT4_SB(inode->i_sb)->s_es->s_journal_inum)))
+ return 0;
if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), map->m_pblk,
map->m_len)) {
ext4_error_inode(inode, func, line, map->m_pblk,
@@ -415,7 +419,7 @@
{
int ret;
- if (ext4_encrypted_inode(inode))
+ if (IS_ENCRYPTED(inode))
return fscrypt_zeroout_range(inode, lblk, pblk, len);
ret = sb_issue_zeroout(inode->i_sb, pblk, len, GFP_NOFS);
@@ -523,7 +527,7 @@
return -EFSCORRUPTED;
/* Lookup extent status tree firstly */
- if (ext4_es_lookup_extent(inode, map->m_lblk, &es)) {
+ if (ext4_es_lookup_extent(inode, map->m_lblk, NULL, &es)) {
if (ext4_es_is_written(&es) || ext4_es_is_unwritten(&es)) {
map->m_pblk = ext4_es_pblock(&es) +
map->m_lblk - es.es_lblk;
@@ -541,7 +545,7 @@
map->m_len = retval;
retval = 0;
} else {
- BUG_ON(1);
+ BUG();
}
#ifdef ES_AGGRESSIVE_TEST
ext4_map_blocks_es_recheck(handle, inode, map,
@@ -577,8 +581,8 @@
EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN;
if (!(flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) &&
!(status & EXTENT_STATUS_WRITTEN) &&
- ext4_find_delalloc_range(inode, map->m_lblk,
- map->m_lblk + map->m_len - 1))
+ ext4_es_scan_range(inode, &ext4_es_is_delayed, map->m_lblk,
+ map->m_lblk + map->m_len - 1))
status |= EXTENT_STATUS_DELAYED;
ret = ext4_es_insert_extent(inode, map->m_lblk,
map->m_len, map->m_pblk, status);
@@ -678,8 +682,6 @@
if (flags & EXT4_GET_BLOCKS_ZERO &&
map->m_flags & EXT4_MAP_MAPPED &&
map->m_flags & EXT4_MAP_NEW) {
- clean_bdev_aliases(inode->i_sb->s_bdev, map->m_pblk,
- map->m_len);
ret = ext4_issue_zeroout(inode, map->m_lblk,
map->m_pblk, map->m_len);
if (ret) {
@@ -693,7 +695,7 @@
* extent status tree.
*/
if ((flags & EXT4_GET_BLOCKS_PRE_IO) &&
- ext4_es_lookup_extent(inode, map->m_lblk, &es)) {
+ ext4_es_lookup_extent(inode, map->m_lblk, NULL, &es)) {
if (ext4_es_is_written(&es))
goto out_sem;
}
@@ -701,8 +703,8 @@
EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN;
if (!(flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) &&
!(status & EXTENT_STATUS_WRITTEN) &&
- ext4_find_delalloc_range(inode, map->m_lblk,
- map->m_lblk + map->m_len - 1))
+ ext4_es_scan_range(inode, &ext4_es_is_delayed, map->m_lblk,
+ map->m_lblk + map->m_len - 1))
status |= EXTENT_STATUS_DELAYED;
ret = ext4_es_insert_extent(inode, map->m_lblk, map->m_len,
map->m_pblk, status);
@@ -729,10 +731,16 @@
!(flags & EXT4_GET_BLOCKS_ZERO) &&
!ext4_is_quota_file(inode) &&
ext4_should_order_data(inode)) {
+ loff_t start_byte =
+ (loff_t)map->m_lblk << inode->i_blkbits;
+ loff_t length = (loff_t)map->m_len << inode->i_blkbits;
+
if (flags & EXT4_GET_BLOCKS_IO_SUBMIT)
- ret = ext4_jbd2_inode_add_wait(handle, inode);
+ ret = ext4_jbd2_inode_add_wait(handle, inode,
+ start_byte, length);
else
- ret = ext4_jbd2_inode_add_write(handle, inode);
+ ret = ext4_jbd2_inode_add_write(handle, inode,
+ start_byte, length);
if (ret)
return ret;
}
@@ -1016,7 +1024,7 @@
bh = ext4_getblk(handle, inode, block, map_flags);
if (IS_ERR(bh))
return bh;
- if (!bh || buffer_uptodate(bh))
+ if (!bh || ext4_buffer_uptodate(bh))
return bh;
ll_rw_block(REQ_OP_READ, REQ_META | REQ_PRIO, 1, &bh);
wait_on_buffer(bh);
@@ -1043,7 +1051,7 @@
for (i = 0; i < bh_count; i++)
/* Note that NULL bhs[i] is valid because of holes. */
- if (bhs[i] && !buffer_uptodate(bhs[i]))
+ if (bhs[i] && !ext4_buffer_uptodate(bhs[i]))
ll_rw_block(REQ_OP_READ, REQ_META | REQ_PRIO, 1,
&bhs[i]);
@@ -1150,7 +1158,7 @@
return ret;
}
-#ifdef CONFIG_EXT4_FS_ENCRYPTION
+#ifdef CONFIG_FS_ENCRYPTION
static int ext4_block_write_begin(struct page *page, loff_t pos, unsigned len,
get_block_t *get_block)
{
@@ -1162,8 +1170,9 @@
int err = 0;
unsigned blocksize = inode->i_sb->s_blocksize;
unsigned bbits;
- struct buffer_head *bh, *head, *wait[2], **wait_bh = wait;
- bool decrypt = false;
+ struct buffer_head *bh, *head, *wait[2];
+ int nr_wait = 0;
+ int i;
BUG_ON(!PageLocked(page));
BUG_ON(from > PAGE_SIZE);
@@ -1194,7 +1203,6 @@
if (err)
break;
if (buffer_new(bh)) {
- clean_bdev_bh_alias(bh);
if (PageUptodate(page)) {
clear_buffer_new(bh);
set_buffer_uptodate(bh);
@@ -1216,24 +1224,32 @@
!buffer_unwritten(bh) &&
(block_start < from || block_end > to)) {
ll_rw_block(REQ_OP_READ, 0, 1, &bh);
- *wait_bh++ = bh;
- decrypt = ext4_encrypted_inode(inode) &&
- S_ISREG(inode->i_mode);
+ wait[nr_wait++] = bh;
}
}
/*
* If we issued read requests, let them complete.
*/
- while (wait_bh > wait) {
- wait_on_buffer(*--wait_bh);
- if (!buffer_uptodate(*wait_bh))
+ for (i = 0; i < nr_wait; i++) {
+ wait_on_buffer(wait[i]);
+ if (!buffer_uptodate(wait[i]))
err = -EIO;
}
- if (unlikely(err))
+ if (unlikely(err)) {
page_zero_new_buffers(page, from, to);
- else if (decrypt)
- err = fscrypt_decrypt_page(page->mapping->host, page,
- PAGE_SIZE, 0, page->index);
+ } else if (IS_ENCRYPTED(inode) && S_ISREG(inode->i_mode)) {
+ for (i = 0; i < nr_wait; i++) {
+ int err2;
+
+ err2 = fscrypt_decrypt_pagecache_blocks(page, blocksize,
+ bh_offset(wait[i]));
+ if (err2) {
+ clear_buffer_uptodate(wait[i]);
+ err = err2;
+ }
+ }
+ }
+
return err;
}
#endif
@@ -1303,7 +1319,7 @@
/* In case writeback began while the page was unlocked */
wait_for_stable_page(page);
-#ifdef CONFIG_EXT4_FS_ENCRYPTION
+#ifdef CONFIG_FS_ENCRYPTION
if (ext4_should_dioread_nolock(inode))
ret = ext4_block_write_begin(page, pos, len,
ext4_get_block_unwritten);
@@ -1324,6 +1340,9 @@
}
if (ret) {
+ bool extended = (pos + len > inode->i_size) &&
+ !ext4_verity_in_progress(inode);
+
unlock_page(page);
/*
* __block_write_begin may have instantiated a few blocks
@@ -1333,11 +1352,11 @@
* Add inode to orphan list in case we crash before
* truncate finishes
*/
- if (pos + len > inode->i_size && ext4_can_truncate(inode))
+ if (extended && ext4_can_truncate(inode))
ext4_orphan_add(handle, inode);
ext4_journal_stop(handle);
- if (pos + len > inode->i_size) {
+ if (extended) {
ext4_truncate_failed_write(inode);
/*
* If truncate failed early the inode might
@@ -1390,6 +1409,7 @@
int ret = 0, ret2;
int i_size_changed = 0;
int inline_data = ext4_has_inline_data(inode);
+ bool verity = ext4_verity_in_progress(inode);
trace_ext4_write_end(inode, pos, len, copied);
if (inline_data) {
@@ -1407,12 +1427,16 @@
/*
* it's important to update i_size while still holding page lock:
* page writeout could otherwise come in and zero beyond i_size.
+ *
+ * If FS_IOC_ENABLE_VERITY is running on this inode, then Merkle tree
+ * blocks are being written past EOF, so skip the i_size update.
*/
- i_size_changed = ext4_update_inode_size(inode, pos + copied);
+ if (!verity)
+ i_size_changed = ext4_update_inode_size(inode, pos + copied);
unlock_page(page);
put_page(page);
- if (old_size < pos)
+ if (old_size < pos && !verity)
pagecache_isize_extended(inode, old_size, pos);
/*
* Don't mark the inode dirty under page lock. First, it unnecessarily
@@ -1423,7 +1447,7 @@
if (i_size_changed || inline_data)
ext4_mark_inode_dirty(handle, inode);
- if (pos + len > inode->i_size && ext4_can_truncate(inode))
+ if (pos + len > inode->i_size && !verity && ext4_can_truncate(inode))
/* if we have allocated more blocks and copied
* less. We will have blocks allocated outside
* inode->i_size. So truncate them
@@ -1434,7 +1458,7 @@
if (!ret)
ret = ret2;
- if (pos + len > inode->i_size) {
+ if (pos + len > inode->i_size && !verity) {
ext4_truncate_failed_write(inode);
/*
* If truncate failed early the inode might still be
@@ -1495,6 +1519,7 @@
unsigned from, to;
int size_changed = 0;
int inline_data = ext4_has_inline_data(inode);
+ bool verity = ext4_verity_in_progress(inode);
trace_ext4_journalled_write_end(inode, pos, len, copied);
from = pos & (PAGE_SIZE - 1);
@@ -1524,13 +1549,14 @@
if (!partial)
SetPageUptodate(page);
}
- size_changed = ext4_update_inode_size(inode, pos + copied);
+ if (!verity)
+ size_changed = ext4_update_inode_size(inode, pos + copied);
ext4_set_inode_state(inode, EXT4_STATE_JDATA);
EXT4_I(inode)->i_datasync_tid = handle->h_transaction->t_tid;
unlock_page(page);
put_page(page);
- if (old_size < pos)
+ if (old_size < pos && !verity)
pagecache_isize_extended(inode, old_size, pos);
if (size_changed || inline_data) {
@@ -1539,7 +1565,7 @@
ret = ret2;
}
- if (pos + len > inode->i_size && ext4_can_truncate(inode))
+ if (pos + len > inode->i_size && !verity && ext4_can_truncate(inode))
/* if we have allocated more blocks and copied
* less. We will have blocks allocated outside
* inode->i_size. So truncate them
@@ -1550,7 +1576,7 @@
ret2 = ext4_journal_stop(handle);
if (!ret)
ret = ret2;
- if (pos + len > inode->i_size) {
+ if (pos + len > inode->i_size && !verity) {
ext4_truncate_failed_write(inode);
/*
* If truncate failed early the inode might still be
@@ -1595,7 +1621,7 @@
return 0; /* success */
}
-static void ext4_da_release_space(struct inode *inode, int to_free)
+void ext4_da_release_space(struct inode *inode, int to_free)
{
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
struct ext4_inode_info *ei = EXT4_I(inode);
@@ -1630,64 +1656,6 @@
dquot_release_reservation_block(inode, EXT4_C2B(sbi, to_free));
}
-static void ext4_da_page_release_reservation(struct page *page,
- unsigned int offset,
- unsigned int length)
-{
- int to_release = 0, contiguous_blks = 0;
- struct buffer_head *head, *bh;
- unsigned int curr_off = 0;
- struct inode *inode = page->mapping->host;
- struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
- unsigned int stop = offset + length;
- int num_clusters;
- ext4_fsblk_t lblk;
-
- BUG_ON(stop > PAGE_SIZE || stop < length);
-
- head = page_buffers(page);
- bh = head;
- do {
- unsigned int next_off = curr_off + bh->b_size;
-
- if (next_off > stop)
- break;
-
- if ((offset <= curr_off) && (buffer_delay(bh))) {
- to_release++;
- contiguous_blks++;
- clear_buffer_delay(bh);
- } else if (contiguous_blks) {
- lblk = page->index <<
- (PAGE_SHIFT - inode->i_blkbits);
- lblk += (curr_off >> inode->i_blkbits) -
- contiguous_blks;
- ext4_es_remove_extent(inode, lblk, contiguous_blks);
- contiguous_blks = 0;
- }
- curr_off = next_off;
- } while ((bh = bh->b_this_page) != head);
-
- if (contiguous_blks) {
- lblk = page->index << (PAGE_SHIFT - inode->i_blkbits);
- lblk += (curr_off >> inode->i_blkbits) - contiguous_blks;
- ext4_es_remove_extent(inode, lblk, contiguous_blks);
- }
-
- /* If we have released all the blocks belonging to a cluster, then we
- * need to release the reserved space for that cluster. */
- num_clusters = EXT4_NUM_B2C(sbi, to_release);
- while (num_clusters > 0) {
- lblk = (page->index << (PAGE_SHIFT - inode->i_blkbits)) +
- ((num_clusters - 1) << sbi->s_cluster_bits);
- if (sbi->s_cluster_ratio == 1 ||
- !ext4_find_delalloc_cluster(inode, lblk))
- ext4_da_release_space(inode, 1);
-
- num_clusters--;
- }
-}
-
/*
* Delayed allocation stuff
*/
@@ -1781,6 +1749,65 @@
}
/*
+ * ext4_insert_delayed_block - adds a delayed block to the extents status
+ * tree, incrementing the reserved cluster/block
+ * count or making a pending reservation
+ * where needed
+ *
+ * @inode - file containing the newly added block
+ * @lblk - logical block to be added
+ *
+ * Returns 0 on success, negative error code on failure.
+ */
+static int ext4_insert_delayed_block(struct inode *inode, ext4_lblk_t lblk)
+{
+ struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
+ int ret;
+ bool allocated = false;
+
+ /*
+ * If the cluster containing lblk is shared with a delayed,
+ * written, or unwritten extent in a bigalloc file system, it's
+ * already been accounted for and does not need to be reserved.
+ * A pending reservation must be made for the cluster if it's
+ * shared with a written or unwritten extent and doesn't already
+ * have one. Written and unwritten extents can be purged from the
+ * extents status tree if the system is under memory pressure, so
+ * it's necessary to examine the extent tree if a search of the
+ * extents status tree doesn't get a match.
+ */
+ if (sbi->s_cluster_ratio == 1) {
+ ret = ext4_da_reserve_space(inode);
+ if (ret != 0) /* ENOSPC */
+ goto errout;
+ } else { /* bigalloc */
+ if (!ext4_es_scan_clu(inode, &ext4_es_is_delonly, lblk)) {
+ if (!ext4_es_scan_clu(inode,
+ &ext4_es_is_mapped, lblk)) {
+ ret = ext4_clu_mapped(inode,
+ EXT4_B2C(sbi, lblk));
+ if (ret < 0)
+ goto errout;
+ if (ret == 0) {
+ ret = ext4_da_reserve_space(inode);
+ if (ret != 0) /* ENOSPC */
+ goto errout;
+ } else {
+ allocated = true;
+ }
+ } else {
+ allocated = true;
+ }
+ }
+ }
+
+ ret = ext4_es_insert_delayed_block(inode, lblk, allocated);
+
+errout:
+ return ret;
+}
+
+/*
* This function is grabs code from the very beginning of
* ext4_map_blocks, but assumes that the caller is from delayed write
* time. This function looks up the requested blocks and sets the
@@ -1808,7 +1835,7 @@
(unsigned long) map->m_lblk);
/* Lookup extent status tree firstly */
- if (ext4_es_lookup_extent(inode, iblock, &es)) {
+ if (ext4_es_lookup_extent(inode, iblock, NULL, &es)) {
if (ext4_es_is_hole(&es)) {
retval = 0;
down_read(&EXT4_I(inode)->i_data_sem);
@@ -1836,7 +1863,7 @@
else if (ext4_es_is_unwritten(&es))
map->m_flags |= EXT4_MAP_UNWRITTEN;
else
- BUG_ON(1);
+ BUG();
#ifdef ES_AGGRESSIVE_TEST
ext4_map_blocks_es_recheck(NULL, inode, map, &orig_map, 0);
@@ -1859,28 +1886,14 @@
add_delayed:
if (retval == 0) {
int ret;
+
/*
* XXX: __block_prepare_write() unmaps passed block,
* is it OK?
*/
- /*
- * If the block was allocated from previously allocated cluster,
- * then we don't need to reserve it again. However we still need
- * to reserve metadata for every block we're going to write.
- */
- if (EXT4_SB(inode->i_sb)->s_cluster_ratio == 1 ||
- !ext4_find_delalloc_cluster(inode, map->m_lblk)) {
- ret = ext4_da_reserve_space(inode);
- if (ret) {
- /* not enough space to reserve */
- retval = ret;
- goto out_unlock;
- }
- }
- ret = ext4_es_insert_extent(inode, map->m_lblk, map->m_len,
- ~0, EXTENT_STATUS_DELAYED);
- if (ret) {
+ ret = ext4_insert_delayed_block(inode, map->m_lblk);
+ if (ret != 0) {
retval = ret;
goto out_unlock;
}
@@ -2116,7 +2129,8 @@
trace_ext4_writepage(page);
size = i_size_read(inode);
- if (page->index == size >> PAGE_SHIFT)
+ if (page->index == size >> PAGE_SHIFT &&
+ !ext4_verity_in_progress(inode))
len = size & ~PAGE_MASK;
else
len = PAGE_SIZE;
@@ -2200,7 +2214,8 @@
* after page tables are updated.
*/
size = i_size_read(mpd->inode);
- if (page->index == size >> PAGE_SHIFT)
+ if (page->index == size >> PAGE_SHIFT &&
+ !ext4_verity_in_progress(mpd->inode))
len = size & ~PAGE_MASK;
else
len = PAGE_SIZE;
@@ -2299,6 +2314,9 @@
ext4_lblk_t blocks = (i_size_read(inode) + i_blocksize(inode) - 1)
>> inode->i_blkbits;
+ if (ext4_verity_in_progress(inode))
+ blocks = EXT_MAX_BLOCKS;
+
do {
BUG_ON(buffer_locked(bh));
@@ -2460,10 +2478,6 @@
}
BUG_ON(map->m_len == 0);
- if (map->m_flags & EXT4_MAP_NEW) {
- clean_bdev_aliases(inode->i_sb->s_bdev, map->m_pblk,
- map->m_len);
- }
return 0;
}
@@ -2613,7 +2627,7 @@
long left = mpd->wbc->nr_to_write;
pgoff_t index = mpd->first_page;
pgoff_t end = mpd->last_page;
- int tag;
+ xa_mark_t tag;
int i, err = 0;
int blkbits = mpd->inode->i_blkbits;
ext4_lblk_t lblk;
@@ -2743,14 +2757,6 @@
goto out_writepages;
}
- if (ext4_should_dioread_nolock(inode)) {
- /*
- * We may need to convert up to one extent per block in
- * the page and we may dirty the inode.
- */
- rsv_blocks = 1 + (PAGE_SIZE >> inode->i_blkbits);
- }
-
/*
* If we have inline data and arrive here, it means that
* we will soon create the block for the 1st page, so
@@ -2769,6 +2775,15 @@
ext4_journal_stop(handle);
}
+ if (ext4_should_dioread_nolock(inode)) {
+ /*
+ * We may need to convert up to one extent per block in
+ * the page and we may dirty the inode.
+ */
+ rsv_blocks = 1 + ext4_chunk_trans_blocks(inode,
+ PAGE_SIZE >> inode->i_blkbits);
+ }
+
if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
range_whole = 1;
@@ -2805,12 +2820,12 @@
goto unplug;
}
ret = mpage_prepare_extent_to_map(&mpd);
+ /* Unlock pages we didn't use */
+ mpage_release_unused_pages(&mpd, false);
/* Submit prepared bio */
ext4_io_submit(&mpd.io_submit);
ext4_put_io_end_defer(mpd.io_submit.io_end);
mpd.io_submit.io_end = NULL;
- /* Unlock pages we didn't use */
- mpage_release_unused_pages(&mpd, false);
if (ret < 0)
goto unplug;
@@ -2878,10 +2893,11 @@
handle = NULL;
mpd.do_map = 0;
}
- /* Submit prepared bio */
- ext4_io_submit(&mpd.io_submit);
/* Unlock pages we didn't use */
mpage_release_unused_pages(&mpd, give_up_on_write);
+ /* Submit prepared bio */
+ ext4_io_submit(&mpd.io_submit);
+
/*
* Drop our io_end reference we got from init. We have
* to be careful and use deferred io_end finishing if
@@ -3017,8 +3033,8 @@
index = pos >> PAGE_SHIFT;
- if (ext4_nonda_switch(inode->i_sb) ||
- S_ISLNK(inode->i_mode)) {
+ if (ext4_nonda_switch(inode->i_sb) || S_ISLNK(inode->i_mode) ||
+ ext4_verity_in_progress(inode)) {
*fsdata = (void *)FALL_BACK_TO_NONDELALLOC;
return ext4_write_begin(file, mapping, pos,
len, flags, pagep, fsdata);
@@ -3074,7 +3090,7 @@
/* In case writeback began while the page was unlocked */
wait_for_stable_page(page);
-#ifdef CONFIG_EXT4_FS_ENCRYPTION
+#ifdef CONFIG_FS_ENCRYPTION
ret = ext4_block_write_begin(page, pos, len,
ext4_da_get_block_prep);
#else
@@ -3183,24 +3199,6 @@
return ret ? ret : copied;
}
-static void ext4_da_invalidatepage(struct page *page, unsigned int offset,
- unsigned int length)
-{
- /*
- * Drop reserved blocks
- */
- BUG_ON(!PageLocked(page));
- if (!page_has_buffers(page))
- goto out;
-
- ext4_da_page_release_reservation(page, offset, length);
-
-out:
- ext4_invalidatepage(page, offset, length);
-
- return;
-}
-
/*
* Force all delayed allocation blocks to be allocated for a given inode.
*/
@@ -3450,7 +3448,8 @@
ext4_lblk_t end = map.m_lblk + map.m_len - 1;
struct extent_status es;
- ext4_es_find_delayed_extent_range(inode, map.m_lblk, end, &es);
+ ext4_es_find_extent_range(inode, &ext4_es_is_delayed,
+ map.m_lblk, end, &es);
if (!es.es_len || es.es_lblk > end) {
/* entire range is a hole */
@@ -3848,10 +3847,12 @@
loff_t offset = iocb->ki_pos;
ssize_t ret;
-#ifdef CONFIG_EXT4_FS_ENCRYPTION
- if (ext4_encrypted_inode(inode) && S_ISREG(inode->i_mode))
+#ifdef CONFIG_FS_ENCRYPTION
+ if (IS_ENCRYPTED(inode) && S_ISREG(inode->i_mode))
return 0;
#endif
+ if (fsverity_active(inode))
+ return 0;
/*
* If we are doing data journalling we don't support O_DIRECT
@@ -3940,7 +3941,7 @@
.write_end = ext4_da_write_end,
.set_page_dirty = ext4_set_page_dirty,
.bmap = ext4_bmap,
- .invalidatepage = ext4_da_invalidatepage,
+ .invalidatepage = ext4_invalidatepage,
.releasepage = ext4_releasepage,
.direct_IO = ext4_direct_IO,
.migratepage = buffer_migrate_page,
@@ -4033,13 +4034,11 @@
/* Uhhuh. Read error. Complain and punt. */
if (!buffer_uptodate(bh))
goto unlock;
- if (S_ISREG(inode->i_mode) &&
- ext4_encrypted_inode(inode)) {
+ if (S_ISREG(inode->i_mode) && IS_ENCRYPTED(inode)) {
/* We expect the key to be set. */
BUG_ON(!fscrypt_has_encryption_key(inode));
- BUG_ON(blocksize != PAGE_SIZE);
- WARN_ON_ONCE(fscrypt_decrypt_page(page->mapping->host,
- page, PAGE_SIZE, 0, page->index));
+ WARN_ON_ONCE(fscrypt_decrypt_pagecache_blocks(
+ page, blocksize, bh_offset(bh)));
}
}
if (ext4_should_journal_data(inode)) {
@@ -4057,7 +4056,8 @@
err = 0;
mark_buffer_dirty(bh);
if (ext4_should_order_data(inode))
- err = ext4_jbd2_inode_add_write(handle, inode);
+ err = ext4_jbd2_inode_add_write(handle, inode, from,
+ length);
}
unlock:
@@ -4110,7 +4110,7 @@
struct inode *inode = mapping->host;
/* If we are processing an encrypted inode during orphan list handling */
- if (ext4_encrypted_inode(inode) && !fscrypt_has_encryption_key(inode))
+ if (IS_ENCRYPTED(inode) && !fscrypt_has_encryption_key(inode))
return 0;
blocksize = inode->i_sb->s_blocksize;
@@ -4253,6 +4253,15 @@
trace_ext4_punch_hole(inode, offset, length, 0);
+ ext4_clear_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA);
+ if (ext4_has_inline_data(inode)) {
+ down_write(&EXT4_I(inode)->i_mmap_sem);
+ ret = ext4_convert_inline_data(inode);
+ up_write(&EXT4_I(inode)->i_mmap_sem);
+ if (ret)
+ return ret;
+ }
+
/*
* Write out all dirty pages to avoid race conditions
* Then release them.
@@ -4542,6 +4551,7 @@
struct buffer_head *bh;
struct super_block *sb = inode->i_sb;
ext4_fsblk_t block;
+ struct blk_plug plug;
int inodes_per_block, inode_offset;
iloc->bh = NULL;
@@ -4630,6 +4640,7 @@
* If we need to do any I/O, try to pre-readahead extra
* blocks from the inode table.
*/
+ blk_start_plug(&plug);
if (EXT4_SB(sb)->s_inode_readahead_blks) {
ext4_fsblk_t b, end, table;
unsigned num;
@@ -4660,6 +4671,7 @@
get_bh(bh);
bh->b_end_io = end_buffer_read_sync;
submit_bh(REQ_OP_READ, REQ_META | REQ_PRIO, bh);
+ blk_finish_plug(&plug);
wait_on_buffer(bh);
if (!buffer_uptodate(bh)) {
EXT4_ERROR_INODE_BLOCK(inode, block,
@@ -4690,7 +4702,9 @@
return false;
if (ext4_has_inline_data(inode))
return false;
- if (ext4_encrypted_inode(inode))
+ if (ext4_test_inode_flag(inode, EXT4_INODE_ENCRYPT))
+ return false;
+ if (ext4_test_inode_flag(inode, EXT4_INODE_VERITY))
return false;
return true;
}
@@ -4714,9 +4728,13 @@
new_fl |= S_DAX;
if (flags & EXT4_ENCRYPT_FL)
new_fl |= S_ENCRYPTED;
+ if (flags & EXT4_CASEFOLD_FL)
+ new_fl |= S_CASEFOLD;
+ if (flags & EXT4_VERITY_FL)
+ new_fl |= S_VERITY;
inode_set_flags(inode, new_fl,
S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC|S_DAX|
- S_ENCRYPTED);
+ S_ENCRYPTED|S_CASEFOLD|S_VERITY);
}
static blkcnt_t ext4_inode_blocks(struct ext4_inode *raw_inode,
@@ -4786,7 +4804,9 @@
return inode_peek_iversion(inode);
}
-struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
+struct inode *__ext4_iget(struct super_block *sb, unsigned long ino,
+ ext4_iget_flags flags, const char *function,
+ unsigned int line)
{
struct ext4_iloc iloc;
struct ext4_inode *raw_inode;
@@ -4800,6 +4820,18 @@
gid_t i_gid;
projid_t i_projid;
+ if ((!(flags & EXT4_IGET_SPECIAL) &&
+ (ino < EXT4_FIRST_INO(sb) && ino != EXT4_ROOT_INO)) ||
+ (ino < EXT4_ROOT_INO) ||
+ (ino > le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count))) {
+ if (flags & EXT4_IGET_HANDLE)
+ return ERR_PTR(-ESTALE);
+ __ext4_error(sb, function, line,
+ "inode #%lu: comm %s: iget: illegal inode #",
+ ino, current->comm);
+ return ERR_PTR(-EFSCORRUPTED);
+ }
+
inode = iget_locked(sb, ino);
if (!inode)
return ERR_PTR(-ENOMEM);
@@ -4815,18 +4847,26 @@
raw_inode = ext4_raw_inode(&iloc);
if ((ino == EXT4_ROOT_INO) && (raw_inode->i_links_count == 0)) {
- EXT4_ERROR_INODE(inode, "root inode unallocated");
+ ext4_error_inode(inode, function, line, 0,
+ "iget: root inode unallocated");
ret = -EFSCORRUPTED;
goto bad_inode;
}
+ if ((flags & EXT4_IGET_HANDLE) &&
+ (raw_inode->i_links_count == 0) && (raw_inode->i_mode == 0)) {
+ ret = -ESTALE;
+ goto bad_inode;
+ }
+
if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) {
ei->i_extra_isize = le16_to_cpu(raw_inode->i_extra_isize);
if (EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize >
EXT4_INODE_SIZE(inode->i_sb) ||
(ei->i_extra_isize & 3)) {
- EXT4_ERROR_INODE(inode,
- "bad extra_isize %u (inode size %u)",
+ ext4_error_inode(inode, function, line, 0,
+ "iget: bad extra_isize %u "
+ "(inode size %u)",
ei->i_extra_isize,
EXT4_INODE_SIZE(inode->i_sb));
ret = -EFSCORRUPTED;
@@ -4848,7 +4888,8 @@
}
if (!ext4_inode_csum_verify(inode, raw_inode, ei)) {
- EXT4_ERROR_INODE(inode, "checksum invalid");
+ ext4_error_inode(inode, function, line, 0,
+ "iget: checksum invalid");
ret = -EFSBADCRC;
goto bad_inode;
}
@@ -4905,7 +4946,8 @@
((__u64)le16_to_cpu(raw_inode->i_file_acl_high)) << 32;
inode->i_size = ext4_isize(sb, raw_inode);
if ((size = i_size_read(inode)) < 0) {
- EXT4_ERROR_INODE(inode, "bad i_size value: %lld", size);
+ ext4_error_inode(inode, function, line, 0,
+ "iget: bad i_size value: %lld", size);
ret = -EFSCORRUPTED;
goto bad_inode;
}
@@ -4981,7 +5023,8 @@
ret = 0;
if (ei->i_file_acl &&
!ext4_data_block_valid(EXT4_SB(sb), ei->i_file_acl, 1)) {
- EXT4_ERROR_INODE(inode, "bad extended attribute block %llu",
+ ext4_error_inode(inode, function, line, 0,
+ "iget: bad extended attribute block %llu",
ei->i_file_acl);
ret = -EFSCORRUPTED;
goto bad_inode;
@@ -5009,12 +5052,13 @@
} else if (S_ISLNK(inode->i_mode)) {
/* VFS does not allow setting these so must be corruption */
if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) {
- EXT4_ERROR_INODE(inode,
- "immutable or append flags not allowed on symlinks");
+ ext4_error_inode(inode, function, line, 0,
+ "iget: immutable or append flags "
+ "not allowed on symlinks");
ret = -EFSCORRUPTED;
goto bad_inode;
}
- if (ext4_encrypted_inode(inode)) {
+ if (IS_ENCRYPTED(inode)) {
inode->i_op = &ext4_encrypted_symlink_inode_operations;
ext4_set_aops(inode);
} else if (ext4_inode_is_fast_symlink(inode)) {
@@ -5040,9 +5084,13 @@
make_bad_inode(inode);
} else {
ret = -EFSCORRUPTED;
- EXT4_ERROR_INODE(inode, "bogus i_mode (%o)", inode->i_mode);
+ ext4_error_inode(inode, function, line, 0,
+ "iget: bogus i_mode (%o)", inode->i_mode);
goto bad_inode;
}
+ if (IS_CASEFOLDED(inode) && !ext4_has_feature_casefold(inode->i_sb))
+ ext4_error_inode(inode, function, line, 0,
+ "casefold flag without casefold feature");
brelse(iloc.bh);
unlock_new_inode(inode);
@@ -5054,13 +5102,6 @@
return ERR_PTR(ret);
}
-struct inode *ext4_iget_normal(struct super_block *sb, unsigned long ino)
-{
- if (ino < EXT4_FIRST_INO(sb) && ino != EXT4_ROOT_INO)
- return ERR_PTR(-EFSCORRUPTED);
- return ext4_iget(sb, ino);
-}
-
static int ext4_inode_blocks_set(handle_t *handle,
struct ext4_inode *raw_inode,
struct ext4_inode_info *ei)
@@ -5299,7 +5340,6 @@
err = ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh);
if (err)
goto out_brelse;
- ext4_update_dynamic_rev(sb);
ext4_set_feature_large_file(sb);
ext4_handle_sync(handle);
err = ext4_handle_dirty_super(handle, sb);
@@ -5349,9 +5389,13 @@
{
int err;
- if (WARN_ON_ONCE(current->flags & PF_MEMALLOC))
+ if (WARN_ON_ONCE(current->flags & PF_MEMALLOC) ||
+ sb_rdonly(inode->i_sb))
return 0;
+ if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
+ return -EIO;
+
if (EXT4_SB(inode->i_sb)->s_journal) {
if (ext4_journal_current_handle()) {
jbd_debug(1, "called recursively, non-PF_MEMALLOC!\n");
@@ -5367,7 +5411,8 @@
if (wbc->sync_mode != WB_SYNC_ALL || wbc->for_sync)
return 0;
- err = ext4_force_commit(inode->i_sb);
+ err = jbd2_complete_transaction(EXT4_SB(inode->i_sb)->s_journal,
+ EXT4_I(inode)->i_sync_tid);
} else {
struct ext4_iloc iloc;
@@ -5466,6 +5511,14 @@
if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
return -EIO;
+ if (unlikely(IS_IMMUTABLE(inode)))
+ return -EPERM;
+
+ if (unlikely(IS_APPEND(inode) &&
+ (ia_valid & (ATTR_MODE | ATTR_UID |
+ ATTR_GID | ATTR_TIMES_SET))))
+ return -EPERM;
+
error = setattr_prepare(dentry, attr);
if (error)
return error;
@@ -5474,6 +5527,10 @@
if (error)
return error;
+ error = fsverity_prepare_setattr(dentry, attr);
+ if (error)
+ return error;
+
if (is_quota_modification(inode, attr)) {
error = dquot_initialize(inode);
if (error)
@@ -5517,7 +5574,7 @@
if (attr->ia_valid & ATTR_SIZE) {
handle_t *handle;
loff_t oldsize = inode->i_size;
- int shrink = (attr->ia_size <= inode->i_size);
+ int shrink = (attr->ia_size < inode->i_size);
if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) {
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
@@ -5531,18 +5588,33 @@
if (IS_I_VERSION(inode) && attr->ia_size != inode->i_size)
inode_inc_iversion(inode);
- if (ext4_should_order_data(inode) &&
- (attr->ia_size < inode->i_size)) {
- error = ext4_begin_ordered_truncate(inode,
+ if (shrink) {
+ if (ext4_should_order_data(inode)) {
+ error = ext4_begin_ordered_truncate(inode,
attr->ia_size);
- if (error)
- goto err_out;
+ if (error)
+ goto err_out;
+ }
+ /*
+ * Blocks are going to be removed from the inode. Wait
+ * for dio in flight.
+ */
+ inode_dio_wait(inode);
}
+
+ down_write(&EXT4_I(inode)->i_mmap_sem);
+
+ rc = ext4_break_layouts(inode);
+ if (rc) {
+ up_write(&EXT4_I(inode)->i_mmap_sem);
+ return rc;
+ }
+
if (attr->ia_size != inode->i_size) {
handle = ext4_journal_start(inode, EXT4_HT_INODE, 3);
if (IS_ERR(handle)) {
error = PTR_ERR(handle);
- goto err_out;
+ goto out_mmap_sem;
}
if (ext4_handle_valid(handle) && shrink) {
error = ext4_orphan_add(handle, inode);
@@ -5570,33 +5642,14 @@
i_size_write(inode, attr->ia_size);
up_write(&EXT4_I(inode)->i_data_sem);
ext4_journal_stop(handle);
- if (error) {
- if (orphan)
- ext4_orphan_del(NULL, inode);
- goto err_out;
- }
- }
- if (!shrink)
- pagecache_isize_extended(inode, oldsize, inode->i_size);
-
- /*
- * Blocks are going to be removed from the inode. Wait
- * for dio in flight. Temporarily disable
- * dioread_nolock to prevent livelock.
- */
- if (orphan) {
- if (!ext4_should_journal_data(inode)) {
- inode_dio_wait(inode);
- } else
+ if (error)
+ goto out_mmap_sem;
+ if (!shrink) {
+ pagecache_isize_extended(inode, oldsize,
+ inode->i_size);
+ } else if (ext4_should_journal_data(inode)) {
ext4_wait_for_tail_page_commit(inode);
- }
- down_write(&EXT4_I(inode)->i_mmap_sem);
-
- rc = ext4_break_layouts(inode);
- if (rc) {
- up_write(&EXT4_I(inode)->i_mmap_sem);
- error = rc;
- goto err_out;
+ }
}
/*
@@ -5604,11 +5657,16 @@
* in data=journal mode to make pages freeable.
*/
truncate_pagecache(inode, inode->i_size);
- if (shrink) {
+ /*
+ * Call ext4_truncate() even if i_size didn't change to
+ * truncate possible preallocated blocks.
+ */
+ if (attr->ia_size <= oldsize) {
rc = ext4_truncate(inode);
if (rc)
error = rc;
}
+out_mmap_sem:
up_write(&EXT4_I(inode)->i_mmap_sem);
}
@@ -5854,8 +5912,23 @@
{
struct ext4_inode *raw_inode;
struct ext4_xattr_ibody_header *header;
+ unsigned int inode_size = EXT4_INODE_SIZE(inode->i_sb);
+ struct ext4_inode_info *ei = EXT4_I(inode);
int error;
+ /* this was checked at iget time, but double check for good measure */
+ if ((EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize > inode_size) ||
+ (ei->i_extra_isize & 3)) {
+ EXT4_ERROR_INODE(inode, "bad extra_isize %u (inode size %u)",
+ ei->i_extra_isize,
+ EXT4_INODE_SIZE(inode->i_sb));
+ return -EFSCORRUPTED;
+ }
+ if ((new_extra_isize < ei->i_extra_isize) ||
+ (new_extra_isize < 4) ||
+ (new_extra_isize > inode_size - EXT4_GOOD_OLD_INODE_SIZE))
+ return -EINVAL; /* Should never happen */
+
raw_inode = ext4_raw_inode(iloc);
header = IHDR(inode, raw_inode);
@@ -5945,7 +6018,7 @@
ext4_write_lock_xattr(inode, &no_expand);
- BUFFER_TRACE(iloc.bh, "get_write_access");
+ BUFFER_TRACE(iloc->bh, "get_write_access");
error = ext4_journal_get_write_access(handle, iloc->bh);
if (error) {
brelse(iloc->bh);
@@ -6032,36 +6105,6 @@
return;
}
-#if 0
-/*
- * Bind an inode's backing buffer_head into this transaction, to prevent
- * it from being flushed to disk early. Unlike
- * ext4_reserve_inode_write, this leaves behind no bh reference and
- * returns no iloc structure, so the caller needs to repeat the iloc
- * lookup to mark the inode dirty later.
- */
-static int ext4_pin_inode(handle_t *handle, struct inode *inode)
-{
- struct ext4_iloc iloc;
-
- int err = 0;
- if (handle) {
- err = ext4_get_inode_loc(inode, &iloc);
- if (!err) {
- BUFFER_TRACE(iloc.bh, "get_write_access");
- err = jbd2_journal_get_write_access(handle, iloc.bh);
- if (!err)
- err = ext4_handle_dirty_metadata(handle,
- NULL,
- iloc.bh);
- brelse(iloc.bh);
- }
- }
- ext4_std_error(inode->i_sb, err);
- return err;
-}
-#endif
-
int ext4_change_inode_journal_flag(struct inode *inode, int val)
{
journal_t *journal;
@@ -6154,13 +6197,14 @@
return !buffer_mapped(bh);
}
-int ext4_page_mkwrite(struct vm_fault *vmf)
+vm_fault_t ext4_page_mkwrite(struct vm_fault *vmf)
{
struct vm_area_struct *vma = vmf->vma;
struct page *page = vmf->page;
loff_t size;
unsigned long len;
- int ret;
+ int err;
+ vm_fault_t ret;
struct file *file = vma->vm_file;
struct inode *inode = file_inode(file);
struct address_space *mapping = inode->i_mapping;
@@ -6168,13 +6212,16 @@
get_block_t *get_block;
int retries = 0;
+ if (unlikely(IS_IMMUTABLE(inode)))
+ return VM_FAULT_SIGBUS;
+
sb_start_pagefault(inode->i_sb);
file_update_time(vma->vm_file);
down_read(&EXT4_I(inode)->i_mmap_sem);
- ret = ext4_convert_inline_data(inode);
- if (ret)
+ err = ext4_convert_inline_data(inode);
+ if (err)
goto out_ret;
/* Delalloc case is easy... */
@@ -6182,9 +6229,9 @@
!ext4_should_journal_data(inode) &&
!ext4_nonda_switch(inode->i_sb)) {
do {
- ret = block_page_mkwrite(vma, vmf,
+ err = block_page_mkwrite(vma, vmf,
ext4_da_get_block_prep);
- } while (ret == -ENOSPC &&
+ } while (err == -ENOSPC &&
ext4_should_retry_alloc(inode->i_sb, &retries));
goto out_ret;
}
@@ -6229,8 +6276,8 @@
ret = VM_FAULT_SIGBUS;
goto out;
}
- ret = block_page_mkwrite(vma, vmf, get_block);
- if (!ret && ext4_should_journal_data(inode)) {
+ err = block_page_mkwrite(vma, vmf, get_block);
+ if (!err && ext4_should_journal_data(inode)) {
if (ext4_walk_page_buffers(handle, page_buffers(page), 0,
PAGE_SIZE, NULL, do_journal_get_write_access)) {
unlock_page(page);
@@ -6241,24 +6288,24 @@
ext4_set_inode_state(inode, EXT4_STATE_JDATA);
}
ext4_journal_stop(handle);
- if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
+ if (err == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
goto retry_alloc;
out_ret:
- ret = block_page_mkwrite_return(ret);
+ ret = block_page_mkwrite_return(err);
out:
up_read(&EXT4_I(inode)->i_mmap_sem);
sb_end_pagefault(inode->i_sb);
return ret;
}
-int ext4_filemap_fault(struct vm_fault *vmf)
+vm_fault_t ext4_filemap_fault(struct vm_fault *vmf)
{
struct inode *inode = file_inode(vmf->vma->vm_file);
- int err;
+ vm_fault_t ret;
down_read(&EXT4_I(inode)->i_mmap_sem);
- err = filemap_fault(vmf);
+ ret = filemap_fault(vmf);
up_read(&EXT4_I(inode)->i_mmap_sem);
- return err;
+ return ret;
}