Update Linux to v5.4.2
Change-Id: Idf6911045d9d382da2cfe01b1edff026404ac8fd
diff --git a/fs/f2fs/Kconfig b/fs/f2fs/Kconfig
index 9a20ef4..652fd2e 100644
--- a/fs/f2fs/Kconfig
+++ b/fs/f2fs/Kconfig
@@ -1,8 +1,11 @@
+# SPDX-License-Identifier: GPL-2.0-only
config F2FS_FS
tristate "F2FS filesystem support"
depends on BLOCK
+ select NLS
select CRYPTO
select CRYPTO_CRC32
+ select F2FS_FS_XATTR if FS_ENCRYPTION
help
F2FS is based on Log-structured File System (LFS), which supports
versatile "flash-friendly" features. The design has been focused on
@@ -58,7 +61,9 @@
Security Models (LSMs) accepted by AppArmor, SELinux, Smack and TOMOYO
Linux. This option enables an extended attribute handler for file
security labels in the f2fs filesystem, so that it requires enabling
- the extended attribute support in advance.
+ the extended attribute support in advance. In particular you need this
+ option if you use the setcap command to assign initial process capabi-
+ lities to executables (the security.* extended attributes).
If you are not using a security module, say N.
@@ -70,17 +75,6 @@
If you want to improve the performance, say N.
-config F2FS_FS_ENCRYPTION
- bool "F2FS Encryption"
- depends on F2FS_FS
- depends on F2FS_FS_XATTR
- select FS_ENCRYPTION
- help
- Enable encryption of f2fs files and directories. This
- feature is similar to ecryptfs, but it is more memory
- efficient since it avoids caching the encrypted and
- decrypted pages in the page cache.
-
config F2FS_IO_TRACE
bool "F2FS IO tracer"
depends on F2FS_FS
diff --git a/fs/f2fs/Makefile b/fs/f2fs/Makefile
index 776c4b9..2aaecc6 100644
--- a/fs/f2fs/Makefile
+++ b/fs/f2fs/Makefile
@@ -8,3 +8,4 @@
f2fs-$(CONFIG_F2FS_FS_XATTR) += xattr.o
f2fs-$(CONFIG_F2FS_FS_POSIX_ACL) += acl.o
f2fs-$(CONFIG_F2FS_IO_TRACE) += trace.o
+f2fs-$(CONFIG_FS_VERITY) += verity.o
diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c
index 1118241..217b290 100644
--- a/fs/f2fs/acl.c
+++ b/fs/f2fs/acl.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* fs/f2fs/acl.c
*
@@ -7,10 +8,6 @@
* Portions of this code from linux/fs/ext2/acl.c
*
* Copyright (C) 2001-2003 Andreas Gruenbacher, <agruen@suse.de>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
*/
#include <linux/f2fs_fs.h>
#include "f2fs.h"
@@ -53,6 +50,9 @@
struct f2fs_acl_entry *entry = (struct f2fs_acl_entry *)(hdr + 1);
const char *end = value + size;
+ if (size < sizeof(struct f2fs_acl_header))
+ return ERR_PTR(-EINVAL);
+
if (hdr->a_version != cpu_to_le32(F2FS_ACL_VERSION))
return ERR_PTR(-EINVAL);
@@ -160,7 +160,7 @@
return (void *)f2fs_acl;
fail:
- kfree(f2fs_acl);
+ kvfree(f2fs_acl);
return ERR_PTR(-EINVAL);
}
@@ -190,7 +190,7 @@
acl = NULL;
else
acl = ERR_PTR(retval);
- kfree(value);
+ kvfree(value);
return acl;
}
@@ -240,7 +240,7 @@
error = f2fs_setxattr(inode, name_index, "", value, size, ipage, 0);
- kfree(value);
+ kvfree(value);
if (!error)
set_cached_acl(inode, type, acl);
@@ -285,7 +285,7 @@
/* assert(atomic_read(acl->a_refcount) == 1); */
FOREACH_ACL_ENTRY(pa, acl, pe) {
- switch(pa->e_tag) {
+ switch (pa->e_tag) {
case ACL_USER_OBJ:
pa->e_perm &= (mode >> 6) | ~S_IRWXO;
mode &= (pa->e_perm << 6) | ~S_IRWXU;
@@ -326,7 +326,7 @@
}
*mode_p = (*mode_p & ~S_IRWXUGO) | mode;
- return not_equiv;
+ return not_equiv;
}
static int f2fs_acl_create(struct inode *dir, umode_t *mode,
@@ -352,12 +352,14 @@
return PTR_ERR(p);
clone = f2fs_acl_clone(p, GFP_NOFS);
- if (!clone)
- goto no_mem;
+ if (!clone) {
+ ret = -ENOMEM;
+ goto release_acl;
+ }
ret = f2fs_acl_create_masq(clone, mode);
if (ret < 0)
- goto no_mem_clone;
+ goto release_clone;
if (ret == 0)
posix_acl_release(clone);
@@ -371,11 +373,11 @@
return 0;
-no_mem_clone:
+release_clone:
posix_acl_release(clone);
-no_mem:
+release_acl:
posix_acl_release(p);
- return -ENOMEM;
+ return ret;
}
int f2fs_init_acl(struct inode *inode, struct inode *dir, struct page *ipage,
@@ -394,12 +396,16 @@
error = __f2fs_set_acl(inode, ACL_TYPE_DEFAULT, default_acl,
ipage);
posix_acl_release(default_acl);
+ } else {
+ inode->i_default_acl = NULL;
}
if (acl) {
if (!error)
error = __f2fs_set_acl(inode, ACL_TYPE_ACCESS, acl,
ipage);
posix_acl_release(acl);
+ } else {
+ inode->i_acl = NULL;
}
return error;
diff --git a/fs/f2fs/acl.h b/fs/f2fs/acl.h
index 2c68518..b96823c 100644
--- a/fs/f2fs/acl.h
+++ b/fs/f2fs/acl.h
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* fs/f2fs/acl.h
*
@@ -7,10 +8,6 @@
* Portions of this code from linux/fs/ext2/acl.h
*
* Copyright (C) 2001-2003 Andreas Gruenbacher, <agruen@suse.de>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
*/
#ifndef __F2FS_ACL_H__
#define __F2FS_ACL_H__
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index 59d0472..a0eef95 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* fs/f2fs/checkpoint.c
*
* Copyright (c) 2012 Samsung Electronics Co., Ltd.
* http://www.samsung.com/
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
*/
#include <linux/fs.h>
#include <linux/bio.h>
@@ -47,7 +44,7 @@
cond_resched();
goto repeat;
}
- f2fs_wait_on_page_writeback(page, META, true);
+ f2fs_wait_on_page_writeback(page, META, true, true);
if (!PageUptodate(page))
SetPageUptodate(page);
return page;
@@ -69,7 +66,7 @@
.old_blkaddr = index,
.new_blkaddr = index,
.encrypted_page = NULL,
- .is_meta = is_meta,
+ .is_por = !is_meta,
};
int err;
@@ -122,11 +119,8 @@
if (PTR_ERR(page) == -EIO &&
++count <= DEFAULT_RETRY_IO_COUNT)
goto retry;
-
f2fs_stop_checkpoint(sbi, false);
- f2fs_bug_on(sbi, 1);
}
-
return page;
}
@@ -136,6 +130,30 @@
return __get_meta_page(sbi, index, false);
}
+static bool __is_bitmap_valid(struct f2fs_sb_info *sbi, block_t blkaddr,
+ int type)
+{
+ struct seg_entry *se;
+ unsigned int segno, offset;
+ bool exist;
+
+ if (type != DATA_GENERIC_ENHANCE && type != DATA_GENERIC_ENHANCE_READ)
+ return true;
+
+ segno = GET_SEGNO(sbi, blkaddr);
+ offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr);
+ se = get_seg_entry(sbi, segno);
+
+ exist = f2fs_test_bit(offset, se->cur_valid_map);
+ if (!exist && type == DATA_GENERIC_ENHANCE) {
+ f2fs_err(sbi, "Inconsistent error blkaddr:%u, sit bitmap:%d",
+ blkaddr, exist);
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+ WARN_ON(1);
+ }
+ return exist;
+}
+
bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi,
block_t blkaddr, int type)
{
@@ -157,15 +175,22 @@
return false;
break;
case META_POR:
- case DATA_GENERIC:
if (unlikely(blkaddr >= MAX_BLKADDR(sbi) ||
- blkaddr < MAIN_BLKADDR(sbi))) {
- if (type == DATA_GENERIC) {
- f2fs_msg(sbi->sb, KERN_WARNING,
- "access invalid blkaddr:%u", blkaddr);
- WARN_ON(1);
- }
+ blkaddr < MAIN_BLKADDR(sbi)))
return false;
+ break;
+ case DATA_GENERIC:
+ case DATA_GENERIC_ENHANCE:
+ case DATA_GENERIC_ENHANCE_READ:
+ if (unlikely(blkaddr >= MAX_BLKADDR(sbi) ||
+ blkaddr < MAIN_BLKADDR(sbi))) {
+ f2fs_warn(sbi, "access invalid blkaddr:%u",
+ blkaddr);
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+ WARN_ON(1);
+ return false;
+ } else {
+ return __is_bitmap_valid(sbi, blkaddr, type);
}
break;
case META_GENERIC:
@@ -195,7 +220,7 @@
.op_flags = sync ? (REQ_META | REQ_PRIO) : REQ_RAHEAD,
.encrypted_page = NULL,
.in_list = false,
- .is_meta = (type != META_POR),
+ .is_por = (type == META_POR),
};
struct blk_plug plug;
@@ -282,8 +307,7 @@
dec_page_count(sbi, F2FS_DIRTY_META);
if (wbc->for_reclaim)
- f2fs_submit_merged_write_cond(sbi, page->mapping->host,
- 0, page->index, META);
+ f2fs_submit_merged_write_cond(sbi, NULL, page, 0, META);
unlock_page(page);
@@ -313,8 +337,9 @@
goto skip_write;
/* collect a number of dirty meta pages and write together */
- if (wbc->for_kupdate ||
- get_pages(sbi, F2FS_DIRTY_META) < nr_pages_to_skip(sbi, META))
+ if (wbc->sync_mode != WB_SYNC_ALL &&
+ get_pages(sbi, F2FS_DIRTY_META) <
+ nr_pages_to_skip(sbi, META))
goto skip_write;
/* if locked failed, cp will flush dirty pages instead */
@@ -377,9 +402,8 @@
goto continue_unlock;
}
- f2fs_wait_on_page_writeback(page, META, true);
+ f2fs_wait_on_page_writeback(page, META, true, true);
- BUG_ON(PageWriteback(page));
if (!clear_page_dirty_for_io(page))
goto continue_unlock;
@@ -413,7 +437,7 @@
if (!PageDirty(page)) {
__set_page_dirty_nobuffers(page);
inc_page_count(F2FS_P_SB(page), F2FS_DIRTY_META);
- SetPagePrivate(page);
+ f2fs_set_page_private(page, 0);
f2fs_trace_pid(page);
return 1;
}
@@ -633,9 +657,8 @@
err_out:
set_sbi_flag(sbi, SBI_NEED_FSCK);
- f2fs_msg(sbi->sb, KERN_WARNING,
- "%s: orphan failed (ino=%x), run fsck to fix.",
- __func__, ino);
+ f2fs_warn(sbi, "%s: orphan failed (ino=%x), run fsck to fix.",
+ __func__, ino);
return err;
}
@@ -651,8 +674,13 @@
if (!is_set_ckpt_flags(sbi, CP_ORPHAN_PRESENT_FLAG))
return 0;
+ if (bdev_read_only(sbi->sb->s_bdev)) {
+ f2fs_info(sbi, "write access unavailable, skipping orphan cleanup");
+ return 0;
+ }
+
if (s_flags & SB_RDONLY) {
- f2fs_msg(sbi->sb, KERN_INFO, "orphan cleanup on readonly fs");
+ f2fs_info(sbi, "orphan cleanup on readonly fs");
sbi->sb->s_flags &= ~SB_RDONLY;
}
@@ -765,13 +793,27 @@
}
}
+static __u32 f2fs_checkpoint_chksum(struct f2fs_sb_info *sbi,
+ struct f2fs_checkpoint *ckpt)
+{
+ unsigned int chksum_ofs = le32_to_cpu(ckpt->checksum_offset);
+ __u32 chksum;
+
+ chksum = f2fs_crc32(sbi, ckpt, chksum_ofs);
+ if (chksum_ofs < CP_CHKSUM_OFFSET) {
+ chksum_ofs += sizeof(chksum);
+ chksum = f2fs_chksum(sbi, chksum, (__u8 *)ckpt + chksum_ofs,
+ F2FS_BLKSIZE - chksum_ofs);
+ }
+ return chksum;
+}
+
static int get_checkpoint_version(struct f2fs_sb_info *sbi, block_t cp_addr,
struct f2fs_checkpoint **cp_block, struct page **cp_page,
unsigned long long *version)
{
- unsigned long blk_size = sbi->blocksize;
size_t crc_offset = 0;
- __u32 crc = 0;
+ __u32 crc;
*cp_page = f2fs_get_meta_page(sbi, cp_addr);
if (IS_ERR(*cp_page))
@@ -780,17 +822,17 @@
*cp_block = (struct f2fs_checkpoint *)page_address(*cp_page);
crc_offset = le32_to_cpu((*cp_block)->checksum_offset);
- if (crc_offset > (blk_size - sizeof(__le32))) {
+ if (crc_offset < CP_MIN_CHKSUM_OFFSET ||
+ crc_offset > CP_CHKSUM_OFFSET) {
f2fs_put_page(*cp_page, 1);
- f2fs_msg(sbi->sb, KERN_WARNING,
- "invalid crc_offset: %zu", crc_offset);
+ f2fs_warn(sbi, "invalid crc_offset: %zu", crc_offset);
return -EINVAL;
}
- crc = cur_cp_crc(*cp_block);
- if (!f2fs_crc_valid(sbi, crc, *cp_block, crc_offset)) {
+ crc = f2fs_checkpoint_chksum(sbi, *cp_block);
+ if (crc != cur_cp_crc(*cp_block)) {
f2fs_put_page(*cp_page, 1);
- f2fs_msg(sbi->sb, KERN_WARNING, "invalid crc value");
+ f2fs_warn(sbi, "invalid crc value");
return -EINVAL;
}
@@ -813,9 +855,8 @@
if (le32_to_cpu(cp_block->cp_pack_total_block_count) >
sbi->blocks_per_seg) {
- f2fs_msg(sbi->sb, KERN_WARNING,
- "invalid cp_pack_total_block_count:%u",
- le32_to_cpu(cp_block->cp_pack_total_block_count));
+ f2fs_warn(sbi, "invalid cp_pack_total_block_count:%u",
+ le32_to_cpu(cp_block->cp_pack_total_block_count));
goto invalid_cp;
}
pre_version = *version;
@@ -849,6 +890,7 @@
unsigned int cp_blks = 1 + __cp_payload(sbi);
block_t cp_blk_no;
int i;
+ int err;
sbi->ckpt = f2fs_kzalloc(sbi, array_size(blk_size, cp_blks),
GFP_KERNEL);
@@ -876,6 +918,7 @@
} else if (cp2) {
cur_page = cp2;
} else {
+ err = -EFSCORRUPTED;
goto fail_no_cp;
}
@@ -888,8 +931,10 @@
sbi->cur_cp_pack = 2;
/* Sanity checking of checkpoint */
- if (f2fs_sanity_check_ckpt(sbi))
+ if (f2fs_sanity_check_ckpt(sbi)) {
+ err = -EFSCORRUPTED;
goto free_fail_no_cp;
+ }
if (cp_blks <= 1)
goto done;
@@ -903,8 +948,10 @@
unsigned char *ckpt = (unsigned char *)sbi->ckpt;
cur_page = f2fs_get_meta_page(sbi, cp_blk_no + i);
- if (IS_ERR(cur_page))
+ if (IS_ERR(cur_page)) {
+ err = PTR_ERR(cur_page);
goto free_fail_no_cp;
+ }
sit_bitmap_ptr = page_address(cur_page);
memcpy(ckpt + i * blk_size, sit_bitmap_ptr, blk_size);
f2fs_put_page(cur_page, 1);
@@ -918,8 +965,8 @@
f2fs_put_page(cp1, 1);
f2fs_put_page(cp2, 1);
fail_no_cp:
- kfree(sbi->ckpt);
- return -EINVAL;
+ kvfree(sbi->ckpt);
+ return err;
}
static void __add_dirty_inode(struct inode *inode, enum inode_type type)
@@ -964,7 +1011,7 @@
inode_inc_dirty_pages(inode);
spin_unlock(&sbi->inode_lock[type]);
- SetPagePrivate(page);
+ f2fs_set_page_private(page, 0);
f2fs_trace_pid(page);
}
@@ -1016,13 +1063,11 @@
if (inode) {
unsigned long cur_ino = inode->i_ino;
- if (is_dir)
- F2FS_I(inode)->cp_task = current;
+ F2FS_I(inode)->cp_task = current;
filemap_fdatawrite(inode->i_mapping);
- if (is_dir)
- F2FS_I(inode)->cp_task = NULL;
+ F2FS_I(inode)->cp_task = NULL;
iput(inode);
/* We need to give cpu to another writers. */
@@ -1086,6 +1131,28 @@
ckpt->next_free_nid = cpu_to_le32(last_nid);
}
+static bool __need_flush_quota(struct f2fs_sb_info *sbi)
+{
+ bool ret = false;
+
+ if (!is_journalled_quota(sbi))
+ return false;
+
+ down_write(&sbi->quota_sem);
+ if (is_sbi_flag_set(sbi, SBI_QUOTA_SKIP_FLUSH)) {
+ ret = false;
+ } else if (is_sbi_flag_set(sbi, SBI_QUOTA_NEED_REPAIR)) {
+ ret = false;
+ } else if (is_sbi_flag_set(sbi, SBI_QUOTA_NEED_FLUSH)) {
+ clear_sbi_flag(sbi, SBI_QUOTA_NEED_FLUSH);
+ ret = true;
+ } else if (get_pages(sbi, F2FS_DIRTY_QDATA)) {
+ ret = true;
+ }
+ up_write(&sbi->quota_sem);
+ return ret;
+}
+
/*
* Freeze all the FS-operations for checkpoint.
*/
@@ -1097,12 +1164,32 @@
.for_reclaim = 0,
};
struct blk_plug plug;
- int err = 0;
+ int err = 0, cnt = 0;
blk_start_plug(&plug);
-retry_flush_dents:
+retry_flush_quotas:
f2fs_lock_all(sbi);
+ if (__need_flush_quota(sbi)) {
+ int locked;
+
+ if (++cnt > DEFAULT_RETRY_QUOTA_FLUSH_COUNT) {
+ set_sbi_flag(sbi, SBI_QUOTA_SKIP_FLUSH);
+ set_sbi_flag(sbi, SBI_QUOTA_NEED_FLUSH);
+ goto retry_flush_dents;
+ }
+ f2fs_unlock_all(sbi);
+
+ /* only failed during mount/umount/freeze/quotactl */
+ locked = down_read_trylock(&sbi->sb->s_umount);
+ f2fs_quota_sync(sbi->sb, -1);
+ if (locked)
+ up_read(&sbi->sb->s_umount);
+ cond_resched();
+ goto retry_flush_quotas;
+ }
+
+retry_flush_dents:
/* write all the dirty dentry pages */
if (get_pages(sbi, F2FS_DIRTY_DENTS)) {
f2fs_unlock_all(sbi);
@@ -1110,7 +1197,7 @@
if (err)
goto out;
cond_resched();
- goto retry_flush_dents;
+ goto retry_flush_quotas;
}
/*
@@ -1126,7 +1213,7 @@
if (err)
goto out;
cond_resched();
- goto retry_flush_dents;
+ goto retry_flush_quotas;
}
retry_flush_nodes:
@@ -1214,9 +1301,28 @@
else
__clear_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG);
- if (is_sbi_flag_set(sbi, SBI_NEED_FSCK))
+ if (is_sbi_flag_set(sbi, SBI_NEED_FSCK) ||
+ is_sbi_flag_set(sbi, SBI_IS_RESIZEFS))
__set_ckpt_flags(ckpt, CP_FSCK_FLAG);
+ if (is_sbi_flag_set(sbi, SBI_CP_DISABLED))
+ __set_ckpt_flags(ckpt, CP_DISABLED_FLAG);
+ else
+ __clear_ckpt_flags(ckpt, CP_DISABLED_FLAG);
+
+ if (is_sbi_flag_set(sbi, SBI_CP_DISABLED_QUICK))
+ __set_ckpt_flags(ckpt, CP_DISABLED_QUICK_FLAG);
+ else
+ __clear_ckpt_flags(ckpt, CP_DISABLED_QUICK_FLAG);
+
+ if (is_sbi_flag_set(sbi, SBI_QUOTA_SKIP_FLUSH))
+ __set_ckpt_flags(ckpt, CP_QUOTA_NEED_FSCK_FLAG);
+ else
+ __clear_ckpt_flags(ckpt, CP_QUOTA_NEED_FSCK_FLAG);
+
+ if (is_sbi_flag_set(sbi, SBI_QUOTA_NEED_REPAIR))
+ __set_ckpt_flags(ckpt, CP_QUOTA_NEED_FSCK_FLAG);
+
/* set this flag to activate crc|cp_ver for recovery */
__set_ckpt_flags(ckpt, CP_CRC_RECOVERY_FLAG);
__clear_ckpt_flags(ckpt, CP_NOCRC_RECOVERY_FLAG);
@@ -1239,11 +1345,11 @@
struct page *page = f2fs_grab_meta_page(sbi, blk_addr);
int err;
- memcpy(page_address(page), src, PAGE_SIZE);
- set_page_dirty(page);
+ f2fs_wait_on_page_writeback(page, META, true, true);
- f2fs_wait_on_page_writeback(page, META, true);
- f2fs_bug_on(sbi, PageWriteback(page));
+ memcpy(page_address(page), src, PAGE_SIZE);
+
+ set_page_dirty(page);
if (unlikely(!clear_page_dirty_for_io(page)))
f2fs_bug_on(sbi, 1);
@@ -1277,11 +1383,9 @@
int err;
/* Flush all the NAT/SIT pages */
- while (get_pages(sbi, F2FS_DIRTY_META)) {
- f2fs_sync_meta_pages(sbi, META, LONG_MAX, FS_CP_META_IO);
- if (unlikely(f2fs_cp_error(sbi)))
- break;
- }
+ f2fs_sync_meta_pages(sbi, META, LONG_MAX, FS_CP_META_IO);
+ f2fs_bug_on(sbi, get_pages(sbi, F2FS_DIRTY_META) &&
+ !f2fs_cp_error(sbi));
/*
* modify checkpoint
@@ -1335,7 +1439,7 @@
get_sit_bitmap(sbi, __bitmap_ptr(sbi, SIT_BITMAP));
get_nat_bitmap(sbi, __bitmap_ptr(sbi, NAT_BITMAP));
- crc32 = f2fs_crc32(sbi, ckpt, le32_to_cpu(ckpt->checksum_offset));
+ crc32 = f2fs_checkpoint_chksum(sbi, ckpt);
*((__le32 *)((unsigned char *)ckpt +
le32_to_cpu(ckpt->checksum_offset)))
= cpu_to_le32(crc32);
@@ -1354,14 +1458,6 @@
for (i = 0; i < nm_i->nat_bits_blocks; i++)
f2fs_update_meta_page(sbi, nm_i->nat_bits +
(i << F2FS_BLKSIZE_BITS), blk + i);
-
- /* Flush all the NAT BITS pages */
- while (get_pages(sbi, F2FS_DIRTY_META)) {
- f2fs_sync_meta_pages(sbi, META, LONG_MAX,
- FS_CP_META_IO);
- if (unlikely(f2fs_cp_error(sbi)))
- break;
- }
}
/* write out checkpoint buffer at block 0 */
@@ -1397,6 +1493,8 @@
/* Here, we have one bio having CP pack except cp pack 2 page */
f2fs_sync_meta_pages(sbi, META, LONG_MAX, FS_CP_META_IO);
+ f2fs_bug_on(sbi, get_pages(sbi, F2FS_DIRTY_META) &&
+ !f2fs_cp_error(sbi));
/* wait for previous submitted meta pages writeback */
f2fs_wait_on_all_pages_writeback(sbi);
@@ -1414,7 +1512,7 @@
* invalidate intermediate page cache borrowed from meta inode
* which are used for migration of encrypted inode's blocks.
*/
- if (f2fs_sb_has_encrypt(sbi->sb))
+ if (f2fs_sb_has_encrypt(sbi))
invalidate_mapping_pages(META_MAPPING(sbi),
MAIN_BLKADDR(sbi), MAX_BLKADDR(sbi) - 1);
@@ -1424,6 +1522,12 @@
clear_sbi_flag(sbi, SBI_IS_DIRTY);
clear_sbi_flag(sbi, SBI_NEED_CP);
+ clear_sbi_flag(sbi, SBI_QUOTA_SKIP_FLUSH);
+
+ spin_lock(&sbi->stat_lock);
+ sbi->unusable_block_count = 0;
+ spin_unlock(&sbi->stat_lock);
+
__set_cp_next_pack(sbi);
/*
@@ -1448,6 +1552,14 @@
unsigned long long ckpt_ver;
int err = 0;
+ if (f2fs_readonly(sbi->sb) || f2fs_hw_is_readonly(sbi))
+ return -EROFS;
+
+ if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) {
+ if (cpc->reason != CP_PAUSE)
+ return 0;
+ f2fs_warn(sbi, "Start checkpoint disabled!");
+ }
mutex_lock(&sbi->cp_mutex);
if (!is_sbi_flag_set(sbi, SBI_IS_DIRTY) &&
@@ -1458,10 +1570,6 @@
err = -EIO;
goto out;
}
- if (f2fs_readonly(sbi->sb)) {
- err = -EROFS;
- goto out;
- }
trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "start block_ops");
@@ -1499,7 +1607,10 @@
ckpt->checkpoint_ver = cpu_to_le64(++ckpt_ver);
/* write cached NAT/SIT entries to NAT/SIT area */
- f2fs_flush_nat_entries(sbi, cpc);
+ err = f2fs_flush_nat_entries(sbi, cpc);
+ if (err)
+ goto stop;
+
f2fs_flush_sit_entries(sbi, cpc);
/* unlock all the fs_lock[] in do_checkpoint() */
@@ -1508,13 +1619,12 @@
f2fs_release_discard_addrs(sbi);
else
f2fs_clear_prefree_segments(sbi, cpc);
-
+stop:
unblock_operations(sbi);
stat_inc_cp_count(sbi->stat_info);
if (cpc->reason & CP_RECOVERY)
- f2fs_msg(sbi->sb, KERN_NOTICE,
- "checkpoint: version = %llx", ckpt_ver);
+ f2fs_notice(sbi, "checkpoint: version = %llx", ckpt_ver);
/* do checkpoint periodically */
f2fs_update_time(sbi, CP_TIME);
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 11f2834..5755e89 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* fs/f2fs/data.c
*
* Copyright (c) 2012 Samsung Electronics Co., Ltd.
* http://www.samsung.com/
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
*/
#include <linux/fs.h>
#include <linux/f2fs_fs.h>
@@ -17,6 +14,7 @@
#include <linux/pagevec.h>
#include <linux/blkdev.h>
#include <linux/bio.h>
+#include <linux/swap.h>
#include <linux/prefetch.h>
#include <linux/uio.h>
#include <linux/cleancache.h>
@@ -49,16 +47,34 @@
inode->i_ino == F2FS_NODE_INO(sbi) ||
S_ISDIR(inode->i_mode) ||
(S_ISREG(inode->i_mode) &&
- is_inode_flag_set(inode, FI_ATOMIC_FILE)) ||
+ (f2fs_is_atomic_file(inode) || IS_NOQUOTA(inode))) ||
is_cold_data(page))
return true;
return false;
}
+static enum count_type __read_io_type(struct page *page)
+{
+ struct address_space *mapping = page_file_mapping(page);
+
+ if (mapping) {
+ struct inode *inode = mapping->host;
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+
+ if (inode->i_ino == F2FS_META_INO(sbi))
+ return F2FS_RD_META;
+
+ if (inode->i_ino == F2FS_NODE_INO(sbi))
+ return F2FS_RD_NODE;
+ }
+ return F2FS_RD_DATA;
+}
+
/* postprocessing steps for read bios */
enum bio_post_read_step {
STEP_INITIAL = 0,
STEP_DECRYPT,
+ STEP_VERITY,
};
struct bio_post_read_ctx {
@@ -72,9 +88,9 @@
{
struct page *page;
struct bio_vec *bv;
- int i;
+ struct bvec_iter_all iter_all;
- bio_for_each_segment_all(bv, bio, i) {
+ bio_for_each_segment_all(bv, bio, iter_all) {
page = bv->bv_page;
/* PG_error was set if any post_read step failed */
@@ -85,6 +101,7 @@
} else {
SetPageUptodate(page);
}
+ dec_page_count(F2FS_P_SB(page), __read_io_type(page));
unlock_page(page);
}
if (bio->bi_private)
@@ -104,8 +121,23 @@
bio_post_read_processing(ctx);
}
+static void verity_work(struct work_struct *work)
+{
+ struct bio_post_read_ctx *ctx =
+ container_of(work, struct bio_post_read_ctx, work);
+
+ fsverity_verify_bio(ctx->bio);
+
+ bio_post_read_processing(ctx);
+}
+
static void bio_post_read_processing(struct bio_post_read_ctx *ctx)
{
+ /*
+ * We use different work queues for decryption and for verity because
+ * verity may require reading metadata pages that need decryption, and
+ * we shouldn't recurse to the same workqueue.
+ */
switch (++ctx->cur_step) {
case STEP_DECRYPT:
if (ctx->enabled_steps & (1 << STEP_DECRYPT)) {
@@ -115,6 +147,14 @@
}
ctx->cur_step++;
/* fall-through */
+ case STEP_VERITY:
+ if (ctx->enabled_steps & (1 << STEP_VERITY)) {
+ INIT_WORK(&ctx->work, verity_work);
+ fsverity_enqueue_verify_work(&ctx->work);
+ return;
+ }
+ ctx->cur_step++;
+ /* fall-through */
default:
__read_end_io(ctx->bio);
}
@@ -127,8 +167,9 @@
static void f2fs_read_end_io(struct bio *bio)
{
- if (time_to_inject(F2FS_P_SB(bio_first_page_all(bio)), FAULT_IO)) {
- f2fs_show_injection_info(FAULT_IO);
+ if (time_to_inject(F2FS_P_SB(bio_first_page_all(bio)),
+ FAULT_READ_IO)) {
+ f2fs_show_injection_info(FAULT_READ_IO);
bio->bi_status = BLK_STS_IOERR;
}
@@ -147,9 +188,14 @@
{
struct f2fs_sb_info *sbi = bio->bi_private;
struct bio_vec *bvec;
- int i;
+ struct bvec_iter_all iter_all;
- bio_for_each_segment_all(bvec, bio, i) {
+ if (time_to_inject(sbi, FAULT_WRITE_IO)) {
+ f2fs_show_injection_info(FAULT_WRITE_IO);
+ bio->bi_status = BLK_STS_IOERR;
+ }
+
+ bio_for_each_segment_all(bvec, bio, iter_all) {
struct page *page = bvec->bv_page;
enum count_type type = WB_DATA_TYPE(page);
@@ -164,7 +210,7 @@
continue;
}
- fscrypt_pullback_bio_page(&page, true);
+ fscrypt_finalize_bounce_page(&page);
if (unlikely(bio->bi_status)) {
mapping_set_error(page->mapping, -EIO);
@@ -197,12 +243,14 @@
struct block_device *bdev = sbi->sb->s_bdev;
int i;
- for (i = 0; i < sbi->s_ndevs; i++) {
- if (FDEV(i).start_blk <= blk_addr &&
- FDEV(i).end_blk >= blk_addr) {
- blk_addr -= FDEV(i).start_blk;
- bdev = FDEV(i).bdev;
- break;
+ if (f2fs_is_multi_device(sbi)) {
+ for (i = 0; i < sbi->s_ndevs; i++) {
+ if (FDEV(i).start_blk <= blk_addr &&
+ FDEV(i).end_blk >= blk_addr) {
+ blk_addr -= FDEV(i).start_blk;
+ bdev = FDEV(i).bdev;
+ break;
+ }
}
}
if (bio) {
@@ -216,6 +264,9 @@
{
int i;
+ if (!f2fs_is_multi_device(sbi))
+ return 0;
+
for (i = 0; i < sbi->s_ndevs; i++)
if (FDEV(i).start_blk <= blkaddr && FDEV(i).end_blk >= blkaddr)
return i;
@@ -232,26 +283,25 @@
/*
* Low-level block read/write IO operations.
*/
-static struct bio *__bio_alloc(struct f2fs_sb_info *sbi, block_t blk_addr,
- struct writeback_control *wbc,
- int npages, bool is_read,
- enum page_type type, enum temp_type temp)
+static struct bio *__bio_alloc(struct f2fs_io_info *fio, int npages)
{
+ struct f2fs_sb_info *sbi = fio->sbi;
struct bio *bio;
bio = f2fs_bio_alloc(sbi, npages, true);
- f2fs_target_device(sbi, blk_addr, bio);
- if (is_read) {
+ f2fs_target_device(sbi, fio->new_blkaddr, bio);
+ if (is_read_io(fio->op)) {
bio->bi_end_io = f2fs_read_end_io;
bio->bi_private = NULL;
} else {
bio->bi_end_io = f2fs_write_end_io;
bio->bi_private = sbi;
- bio->bi_write_hint = f2fs_io_type_to_rw_hint(sbi, type, temp);
+ bio->bi_write_hint = f2fs_io_type_to_rw_hint(sbi,
+ fio->type, fio->temp);
}
- if (wbc)
- wbc_init_bio(wbc, bio);
+ if (fio->io_wbc)
+ wbc_init_bio(fio->io_wbc, bio);
return bio;
}
@@ -268,6 +318,9 @@
if (test_opt(sbi, LFS) && current->plug)
blk_finish_plug(current->plug);
+ if (F2FS_IO_ALIGNED(sbi))
+ goto submit_io;
+
start = bio->bi_iter.bi_size >> F2FS_BLKSIZE_BITS;
start %= F2FS_IO_SIZE(sbi);
@@ -278,9 +331,10 @@
for (; start < F2FS_IO_SIZE(sbi); start++) {
struct page *page =
mempool_alloc(sbi->write_io_dummy,
- GFP_NOIO | __GFP_ZERO | __GFP_NOFAIL);
+ GFP_NOIO | __GFP_NOFAIL);
f2fs_bug_on(sbi, !page);
+ zero_user_segment(page, 0, PAGE_SIZE);
SetPagePrivate(page);
set_page_private(page, (unsigned long)DUMMY_WRITTEN_PAGE);
lock_page(page);
@@ -320,31 +374,29 @@
io->bio = NULL;
}
-static bool __has_merged_page(struct f2fs_bio_info *io,
- struct inode *inode, nid_t ino, pgoff_t idx)
+static bool __has_merged_page(struct bio *bio, struct inode *inode,
+ struct page *page, nid_t ino)
{
struct bio_vec *bvec;
struct page *target;
- int i;
+ struct bvec_iter_all iter_all;
- if (!io->bio)
+ if (!bio)
return false;
- if (!inode && !ino)
+ if (!inode && !page && !ino)
return true;
- bio_for_each_segment_all(bvec, io->bio, i) {
+ bio_for_each_segment_all(bvec, bio, iter_all) {
- if (bvec->bv_page->mapping)
- target = bvec->bv_page;
- else
- target = fscrypt_control_page(bvec->bv_page);
-
- if (idx != target->index)
- continue;
+ target = bvec->bv_page;
+ if (fscrypt_is_bounce_page(target))
+ target = fscrypt_pagecache_page(target);
if (inode && inode == target->mapping->host)
return true;
+ if (page && page == target)
+ return true;
if (ino && ino == ino_of_node(target))
return true;
}
@@ -352,28 +404,6 @@
return false;
}
-static bool has_merged_page(struct f2fs_sb_info *sbi, struct inode *inode,
- nid_t ino, pgoff_t idx, enum page_type type)
-{
- enum page_type btype = PAGE_TYPE_OF_BIO(type);
- enum temp_type temp;
- struct f2fs_bio_info *io;
- bool ret = false;
-
- for (temp = HOT; temp < NR_TEMP_TYPE; temp++) {
- io = sbi->write_io[btype] + temp;
-
- down_read(&io->io_rwsem);
- ret = __has_merged_page(io, inode, ino, idx);
- up_read(&io->io_rwsem);
-
- /* TODO: use HOT temp only for meta pages now. */
- if (ret || btype == META)
- break;
- }
- return ret;
-}
-
static void __f2fs_submit_merged_write(struct f2fs_sb_info *sbi,
enum page_type type, enum temp_type temp)
{
@@ -395,17 +425,23 @@
}
static void __submit_merged_write_cond(struct f2fs_sb_info *sbi,
- struct inode *inode, nid_t ino, pgoff_t idx,
- enum page_type type, bool force)
+ struct inode *inode, struct page *page,
+ nid_t ino, enum page_type type, bool force)
{
enum temp_type temp;
-
- if (!force && !has_merged_page(sbi, inode, ino, idx, type))
- return;
+ bool ret = true;
for (temp = HOT; temp < NR_TEMP_TYPE; temp++) {
+ if (!force) {
+ enum page_type btype = PAGE_TYPE_OF_BIO(type);
+ struct f2fs_bio_info *io = sbi->write_io[btype] + temp;
- __f2fs_submit_merged_write(sbi, type, temp);
+ down_read(&io->io_rwsem);
+ ret = __has_merged_page(io->bio, inode, page, ino);
+ up_read(&io->io_rwsem);
+ }
+ if (ret)
+ __f2fs_submit_merged_write(sbi, type, temp);
/* TODO: use HOT temp only for meta pages now. */
if (type >= META)
@@ -415,14 +451,14 @@
void f2fs_submit_merged_write(struct f2fs_sb_info *sbi, enum page_type type)
{
- __submit_merged_write_cond(sbi, NULL, 0, 0, type, true);
+ __submit_merged_write_cond(sbi, NULL, NULL, 0, type, true);
}
void f2fs_submit_merged_write_cond(struct f2fs_sb_info *sbi,
- struct inode *inode, nid_t ino, pgoff_t idx,
- enum page_type type)
+ struct inode *inode, struct page *page,
+ nid_t ino, enum page_type type)
{
- __submit_merged_write_cond(sbi, inode, ino, idx, type, false);
+ __submit_merged_write_cond(sbi, inode, page, ino, type, false);
}
void f2fs_flush_merged_writes(struct f2fs_sb_info *sbi)
@@ -443,15 +479,15 @@
fio->encrypted_page : fio->page;
if (!f2fs_is_valid_blkaddr(fio->sbi, fio->new_blkaddr,
- __is_meta_io(fio) ? META_GENERIC : DATA_GENERIC))
- return -EFAULT;
+ fio->is_por ? META_POR : (__is_meta_io(fio) ?
+ META_GENERIC : DATA_GENERIC_ENHANCE)))
+ return -EFSCORRUPTED;
trace_f2fs_submit_page_bio(page, fio);
f2fs_trace_ios(fio, 0);
/* Allocate a new bio */
- bio = __bio_alloc(fio->sbi, fio->new_blkaddr, fio->io_wbc,
- 1, is_read_io(fio->op), fio->type, fio->temp);
+ bio = __bio_alloc(fio, 1);
if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) {
bio_put(bio);
@@ -459,17 +495,108 @@
}
if (fio->io_wbc && !is_read_io(fio->op))
- wbc_account_io(fio->io_wbc, page, PAGE_SIZE);
+ wbc_account_cgroup_owner(fio->io_wbc, page, PAGE_SIZE);
bio_set_op_attrs(bio, fio->op, fio->op_flags);
- if (!is_read_io(fio->op))
- inc_page_count(fio->sbi, WB_DATA_TYPE(fio->page));
+ inc_page_count(fio->sbi, is_read_io(fio->op) ?
+ __read_io_type(page): WB_DATA_TYPE(fio->page));
__submit_bio(fio->sbi, bio, fio->type);
return 0;
}
+static bool page_is_mergeable(struct f2fs_sb_info *sbi, struct bio *bio,
+ block_t last_blkaddr, block_t cur_blkaddr)
+{
+ if (last_blkaddr + 1 != cur_blkaddr)
+ return false;
+ return __same_bdev(sbi, cur_blkaddr, bio);
+}
+
+static bool io_type_is_mergeable(struct f2fs_bio_info *io,
+ struct f2fs_io_info *fio)
+{
+ if (io->fio.op != fio->op)
+ return false;
+ return io->fio.op_flags == fio->op_flags;
+}
+
+static bool io_is_mergeable(struct f2fs_sb_info *sbi, struct bio *bio,
+ struct f2fs_bio_info *io,
+ struct f2fs_io_info *fio,
+ block_t last_blkaddr,
+ block_t cur_blkaddr)
+{
+ if (F2FS_IO_ALIGNED(sbi) && (fio->type == DATA || fio->type == NODE)) {
+ unsigned int filled_blocks =
+ F2FS_BYTES_TO_BLK(bio->bi_iter.bi_size);
+ unsigned int io_size = F2FS_IO_SIZE(sbi);
+ unsigned int left_vecs = bio->bi_max_vecs - bio->bi_vcnt;
+
+ /* IOs in bio is aligned and left space of vectors is not enough */
+ if (!(filled_blocks % io_size) && left_vecs < io_size)
+ return false;
+ }
+ if (!page_is_mergeable(sbi, bio, last_blkaddr, cur_blkaddr))
+ return false;
+ return io_type_is_mergeable(io, fio);
+}
+
+int f2fs_merge_page_bio(struct f2fs_io_info *fio)
+{
+ struct bio *bio = *fio->bio;
+ struct page *page = fio->encrypted_page ?
+ fio->encrypted_page : fio->page;
+
+ if (!f2fs_is_valid_blkaddr(fio->sbi, fio->new_blkaddr,
+ __is_meta_io(fio) ? META_GENERIC : DATA_GENERIC))
+ return -EFSCORRUPTED;
+
+ trace_f2fs_submit_page_bio(page, fio);
+ f2fs_trace_ios(fio, 0);
+
+ if (bio && !page_is_mergeable(fio->sbi, bio, *fio->last_block,
+ fio->new_blkaddr)) {
+ __submit_bio(fio->sbi, bio, fio->type);
+ bio = NULL;
+ }
+alloc_new:
+ if (!bio) {
+ bio = __bio_alloc(fio, BIO_MAX_PAGES);
+ bio_set_op_attrs(bio, fio->op, fio->op_flags);
+ }
+
+ if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) {
+ __submit_bio(fio->sbi, bio, fio->type);
+ bio = NULL;
+ goto alloc_new;
+ }
+
+ if (fio->io_wbc)
+ wbc_account_cgroup_owner(fio->io_wbc, page, PAGE_SIZE);
+
+ inc_page_count(fio->sbi, WB_DATA_TYPE(page));
+
+ *fio->last_block = fio->new_blkaddr;
+ *fio->bio = bio;
+
+ return 0;
+}
+
+static void f2fs_submit_ipu_bio(struct f2fs_sb_info *sbi, struct bio **bio,
+ struct page *page)
+{
+ if (!bio)
+ return;
+
+ if (!__has_merged_page(*bio, NULL, page, 0))
+ return;
+
+ __submit_bio(sbi, *bio, DATA);
+ *bio = NULL;
+}
+
void f2fs_submit_page_write(struct f2fs_io_info *fio)
{
struct f2fs_sb_info *sbi = fio->sbi;
@@ -493,9 +620,7 @@
spin_unlock(&io->io_lock);
}
- if (__is_valid_data_blkaddr(fio->old_blkaddr))
- verify_block_addr(fio, fio->old_blkaddr);
- verify_block_addr(fio, fio->new_blkaddr);
+ verify_fio_blkaddr(fio);
bio_page = fio->encrypted_page ? fio->encrypted_page : fio->page;
@@ -504,21 +629,19 @@
inc_page_count(sbi, WB_DATA_TYPE(bio_page));
- if (io->bio && (io->last_block_in_bio != fio->new_blkaddr - 1 ||
- (io->fio.op != fio->op || io->fio.op_flags != fio->op_flags) ||
- !__same_bdev(sbi, fio->new_blkaddr, io->bio)))
+ if (io->bio && !io_is_mergeable(sbi, io->bio, io, fio,
+ io->last_block_in_bio, fio->new_blkaddr))
__submit_merged_bio(io);
alloc_new:
if (io->bio == NULL) {
- if ((fio->type == DATA || fio->type == NODE) &&
+ if (F2FS_IO_ALIGNED(sbi) &&
+ (fio->type == DATA || fio->type == NODE) &&
fio->new_blkaddr & F2FS_IO_SIZE_MASK(sbi)) {
dec_page_count(sbi, WB_DATA_TYPE(bio_page));
fio->retry = true;
goto skip;
}
- io->bio = __bio_alloc(sbi, fio->new_blkaddr, fio->io_wbc,
- BIO_MAX_PAGES, false,
- fio->type, fio->temp);
+ io->bio = __bio_alloc(fio, BIO_MAX_PAGES);
io->fio = *fio;
}
@@ -528,7 +651,7 @@
}
if (fio->io_wbc)
- wbc_account_io(fio->io_wbc, bio_page, PAGE_SIZE);
+ wbc_account_cgroup_owner(fio->io_wbc, bio_page, PAGE_SIZE);
io->last_block_in_bio = fio->new_blkaddr;
f2fs_trace_ios(fio, 0);
@@ -538,20 +661,27 @@
if (fio->in_list)
goto next;
out:
+ if (is_sbi_flag_set(sbi, SBI_IS_SHUTDOWN) ||
+ !f2fs_is_checkpoint_ready(sbi))
+ __submit_merged_bio(io);
up_write(&io->io_rwsem);
}
+static inline bool f2fs_need_verity(const struct inode *inode, pgoff_t idx)
+{
+ return fsverity_active(inode) &&
+ idx < DIV_ROUND_UP(inode->i_size, PAGE_SIZE);
+}
+
static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr,
- unsigned nr_pages, unsigned op_flag)
+ unsigned nr_pages, unsigned op_flag,
+ pgoff_t first_idx)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct bio *bio;
struct bio_post_read_ctx *ctx;
unsigned int post_read_steps = 0;
- if (!f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC))
- return ERR_PTR(-EFAULT);
-
bio = f2fs_bio_alloc(sbi, min_t(int, nr_pages, BIO_MAX_PAGES), false);
if (!bio)
return ERR_PTR(-ENOMEM);
@@ -561,6 +691,10 @@
if (f2fs_encrypted_file(inode))
post_read_steps |= 1 << STEP_DECRYPT;
+
+ if (f2fs_need_verity(inode, first_idx))
+ post_read_steps |= 1 << STEP_VERITY;
+
if (post_read_steps) {
ctx = mempool_alloc(bio_post_read_ctx_pool, GFP_NOFS);
if (!ctx) {
@@ -570,9 +704,6 @@
ctx->bio = bio;
ctx->enabled_steps = post_read_steps;
bio->bi_private = ctx;
-
- /* wait the page to be moved by cleaning */
- f2fs_wait_on_block_writeback(sbi, blkaddr);
}
return bio;
@@ -582,17 +713,23 @@
static int f2fs_submit_page_read(struct inode *inode, struct page *page,
block_t blkaddr)
{
- struct bio *bio = f2fs_grab_read_bio(inode, blkaddr, 1, 0);
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+ struct bio *bio;
+ bio = f2fs_grab_read_bio(inode, blkaddr, 1, 0, page->index);
if (IS_ERR(bio))
return PTR_ERR(bio);
+ /* wait for GCed page writeback via META_MAPPING */
+ f2fs_wait_on_block_writeback(inode, blkaddr);
+
if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) {
bio_put(bio);
return -EFAULT;
}
ClearPageError(page);
- __submit_bio(F2FS_I_SB(inode), bio, DATA);
+ inc_page_count(sbi, F2FS_RD_DATA);
+ __submit_bio(sbi, bio, DATA);
return 0;
}
@@ -618,7 +755,7 @@
*/
void f2fs_set_data_blkaddr(struct dnode_of_data *dn)
{
- f2fs_wait_on_page_writeback(dn->node_page, NODE, true);
+ f2fs_wait_on_page_writeback(dn->node_page, NODE, true, true);
__set_data_blkaddr(dn);
if (set_page_dirty(dn->node_page))
dn->node_changed = true;
@@ -648,7 +785,7 @@
trace_f2fs_reserve_new_blocks(dn->inode, dn->nid,
dn->ofs_in_node, count);
- f2fs_wait_on_page_writeback(dn->node_page, NODE, true);
+ f2fs_wait_on_page_writeback(dn->node_page, NODE, true, true);
for (; count > 0; dn->ofs_in_node++) {
block_t blkaddr = datablock_addr(dn->inode,
@@ -720,6 +857,11 @@
if (f2fs_lookup_extent_cache(inode, index, &ei)) {
dn.data_blkaddr = ei.blk + index - ei.fofs;
+ if (!f2fs_is_valid_blkaddr(F2FS_I_SB(inode), dn.data_blkaddr,
+ DATA_GENERIC_ENHANCE_READ)) {
+ err = -EFSCORRUPTED;
+ goto put_err;
+ }
goto got_it;
}
@@ -733,6 +875,13 @@
err = -ENOENT;
goto put_err;
}
+ if (dn.data_blkaddr != NEW_ADDR &&
+ !f2fs_is_valid_blkaddr(F2FS_I_SB(inode),
+ dn.data_blkaddr,
+ DATA_GENERIC_ENHANCE)) {
+ err = -EFSCORRUPTED;
+ goto put_err;
+ }
got_it:
if (PageUptodate(page)) {
unlock_page(page);
@@ -882,7 +1031,6 @@
struct f2fs_summary sum;
struct node_info ni;
block_t old_blkaddr;
- pgoff_t fofs;
blkcnt_t count = 1;
int err;
@@ -895,7 +1043,7 @@
dn->data_blkaddr = datablock_addr(dn->inode,
dn->node_page, dn->ofs_in_node);
- if (dn->data_blkaddr == NEW_ADDR)
+ if (dn->data_blkaddr != NULL_ADDR)
goto alloc;
if (unlikely((err = inc_valid_block_count(sbi, dn->inode, &count))))
@@ -909,14 +1057,12 @@
if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO)
invalidate_mapping_pages(META_MAPPING(sbi),
old_blkaddr, old_blkaddr);
- f2fs_set_data_blkaddr(dn);
+ f2fs_update_data_blkaddr(dn, dn->data_blkaddr);
- /* update i_size */
- fofs = f2fs_start_bidx_of_node(ofs_of_node(dn->node_page), dn->inode) +
- dn->ofs_in_node;
- if (i_size_read(dn->inode) < ((loff_t)(fofs + 1) << PAGE_SHIFT))
- f2fs_i_size_write(dn->inode,
- ((loff_t)(fofs + 1) << PAGE_SHIFT));
+ /*
+ * i_size will be updated by direct_IO. Otherwise, we'll get stale
+ * data from unwritten block via dio_read.
+ */
return 0;
}
@@ -935,6 +1081,9 @@
return err;
}
+ if (direct_io && allow_outplace_dio(inode, iocb, from))
+ return 0;
+
if (is_inode_flag_set(inode, FI_NO_PREALLOC))
return 0;
@@ -948,10 +1097,11 @@
map.m_next_pgofs = NULL;
map.m_next_extent = NULL;
map.m_seg_type = NO_CHECK_TYPE;
+ map.m_may_create = true;
if (direct_io) {
map.m_seg_type = f2fs_rw_hint_to_seg_type(iocb->ki_hint);
- flag = f2fs_force_buffered_io(inode, WRITE) ?
+ flag = f2fs_force_buffered_io(inode, iocb, from) ?
F2FS_GET_BLOCK_PRE_AIO :
F2FS_GET_BLOCK_PRE_DIO;
goto map_blocks;
@@ -976,7 +1126,7 @@
return err;
}
-static inline void __do_map_lock(struct f2fs_sb_info *sbi, int flag, bool lock)
+void __do_map_lock(struct f2fs_sb_info *sbi, int flag, bool lock)
{
if (flag == F2FS_GET_BLOCK_PRE_AIO) {
if (lock)
@@ -1006,7 +1156,7 @@
unsigned int maxblocks = map->m_len;
struct dnode_of_data dn;
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
- int mode = create ? ALLOC_NODE : LOOKUP_NODE;
+ int mode = map->m_may_create ? ALLOC_NODE : LOOKUP_NODE;
pgoff_t pgofs, end_offset, end;
int err = 0, ofs = 1;
unsigned int ofs_in_node, last_ofs_in_node;
@@ -1026,16 +1176,25 @@
end = pgofs + maxblocks;
if (!create && f2fs_lookup_extent_cache(inode, pgofs, &ei)) {
+ if (test_opt(sbi, LFS) && flag == F2FS_GET_BLOCK_DIO &&
+ map->m_may_create)
+ goto next_dnode;
+
map->m_pblk = ei.blk + pgofs - ei.fofs;
map->m_len = min((pgoff_t)maxblocks, ei.fofs + ei.len - pgofs);
map->m_flags = F2FS_MAP_MAPPED;
if (map->m_next_extent)
*map->m_next_extent = pgofs + map->m_len;
+
+ /* for hardware encryption, but to avoid potential issue in future */
+ if (flag == F2FS_GET_BLOCK_DIO)
+ f2fs_wait_on_block_writeback_range(inode,
+ map->m_pblk, map->m_len);
goto out;
}
next_dnode:
- if (create)
+ if (map->m_may_create)
__do_map_lock(sbi, flag, true);
/* When reading holes, we need its node page */
@@ -1065,12 +1224,22 @@
blkaddr = datablock_addr(dn.inode, dn.node_page, dn.ofs_in_node);
if (__is_valid_data_blkaddr(blkaddr) &&
- !f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC)) {
- err = -EFAULT;
+ !f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC_ENHANCE)) {
+ err = -EFSCORRUPTED;
goto sync_out;
}
- if (!is_valid_data_blkaddr(sbi, blkaddr)) {
+ if (__is_valid_data_blkaddr(blkaddr)) {
+ /* use out-place-update for driect IO under LFS mode */
+ if (test_opt(sbi, LFS) && flag == F2FS_GET_BLOCK_DIO &&
+ map->m_may_create) {
+ err = __allocate_data_block(&dn, map->m_seg_type);
+ if (err)
+ goto sync_out;
+ blkaddr = dn.data_blkaddr;
+ set_inode_flag(inode, FI_APPEND_WRITE);
+ }
+ } else {
if (create) {
if (unlikely(f2fs_cp_error(sbi))) {
err = -EIO;
@@ -1082,6 +1251,8 @@
last_ofs_in_node = dn.ofs_in_node;
}
} else {
+ WARN_ON(flag != F2FS_GET_BLOCK_PRE_DIO &&
+ flag != F2FS_GET_BLOCK_DIO);
err = __allocate_data_block(&dn,
map->m_seg_type);
if (!err)
@@ -1172,13 +1343,19 @@
f2fs_put_dnode(&dn);
- if (create) {
+ if (map->m_may_create) {
__do_map_lock(sbi, flag, false);
f2fs_balance_fs(sbi, dn.node_changed);
}
goto next_dnode;
sync_out:
+
+ /* for hardware encryption, but to avoid potential issue in future */
+ if (flag == F2FS_GET_BLOCK_DIO && map->m_flags & F2FS_MAP_MAPPED)
+ f2fs_wait_on_block_writeback_range(inode,
+ map->m_pblk, map->m_len);
+
if (flag == F2FS_GET_BLOCK_PRECACHE) {
if (map->m_flags & F2FS_MAP_MAPPED) {
unsigned int ofs = start_pgofs - map->m_lblk;
@@ -1192,7 +1369,7 @@
}
f2fs_put_dnode(&dn);
unlock_out:
- if (create) {
+ if (map->m_may_create) {
__do_map_lock(sbi, flag, false);
f2fs_balance_fs(sbi, dn.node_changed);
}
@@ -1214,6 +1391,7 @@
map.m_next_pgofs = NULL;
map.m_next_extent = NULL;
map.m_seg_type = NO_CHECK_TYPE;
+ map.m_may_create = false;
last_lblk = F2FS_BLK_ALIGN(pos + len);
while (map.m_lblk < last_lblk) {
@@ -1228,7 +1406,7 @@
static int __get_data_block(struct inode *inode, sector_t iblock,
struct buffer_head *bh, int create, int flag,
- pgoff_t *next_pgofs, int seg_type)
+ pgoff_t *next_pgofs, int seg_type, bool may_write)
{
struct f2fs_map_blocks map;
int err;
@@ -1238,6 +1416,7 @@
map.m_next_pgofs = next_pgofs;
map.m_next_extent = NULL;
map.m_seg_type = seg_type;
+ map.m_may_create = may_write;
err = f2fs_map_blocks(inode, &map, create, flag);
if (!err) {
@@ -1254,16 +1433,25 @@
{
return __get_data_block(inode, iblock, bh_result, create,
flag, next_pgofs,
- NO_CHECK_TYPE);
+ NO_CHECK_TYPE, create);
+}
+
+static int get_data_block_dio_write(struct inode *inode, sector_t iblock,
+ struct buffer_head *bh_result, int create)
+{
+ return __get_data_block(inode, iblock, bh_result, create,
+ F2FS_GET_BLOCK_DIO, NULL,
+ f2fs_rw_hint_to_seg_type(inode->i_write_hint),
+ IS_SWAPFILE(inode) ? false : true);
}
static int get_data_block_dio(struct inode *inode, sector_t iblock,
struct buffer_head *bh_result, int create)
{
return __get_data_block(inode, iblock, bh_result, create,
- F2FS_GET_BLOCK_DEFAULT, NULL,
- f2fs_rw_hint_to_seg_type(
- inode->i_write_hint));
+ F2FS_GET_BLOCK_DIO, NULL,
+ f2fs_rw_hint_to_seg_type(inode->i_write_hint),
+ false);
}
static int get_data_block_bmap(struct inode *inode, sector_t iblock,
@@ -1275,7 +1463,7 @@
return __get_data_block(inode, iblock, bh_result, create,
F2FS_GET_BLOCK_BMAP, NULL,
- NO_CHECK_TYPE);
+ NO_CHECK_TYPE, create);
}
static inline sector_t logical_to_blk(struct inode *inode, loff_t offset)
@@ -1385,7 +1573,7 @@
goto out;
}
- if (f2fs_has_inline_data(inode)) {
+ if (f2fs_has_inline_data(inode) || f2fs_has_inline_dentry(inode)) {
ret = f2fs_inline_data_fiemap(inode, fieinfo, start, len);
if (ret != -EAGAIN)
goto out;
@@ -1418,7 +1606,7 @@
}
if (size) {
- if (f2fs_encrypted_inode(inode))
+ if (IS_ENCRYPTED(inode))
flags |= FIEMAP_EXTENT_DATA_ENCRYPTED;
ret = fiemap_fill_next_extent(fieinfo, logical,
@@ -1451,6 +1639,133 @@
return ret;
}
+static inline loff_t f2fs_readpage_limit(struct inode *inode)
+{
+ if (IS_ENABLED(CONFIG_FS_VERITY) &&
+ (IS_VERITY(inode) || f2fs_verity_in_progress(inode)))
+ return inode->i_sb->s_maxbytes;
+
+ return i_size_read(inode);
+}
+
+static int f2fs_read_single_page(struct inode *inode, struct page *page,
+ unsigned nr_pages,
+ struct f2fs_map_blocks *map,
+ struct bio **bio_ret,
+ sector_t *last_block_in_bio,
+ bool is_readahead)
+{
+ struct bio *bio = *bio_ret;
+ const unsigned blkbits = inode->i_blkbits;
+ const unsigned blocksize = 1 << blkbits;
+ sector_t block_in_file;
+ sector_t last_block;
+ sector_t last_block_in_file;
+ sector_t block_nr;
+ int ret = 0;
+
+ block_in_file = (sector_t)page_index(page);
+ last_block = block_in_file + nr_pages;
+ last_block_in_file = (f2fs_readpage_limit(inode) + blocksize - 1) >>
+ blkbits;
+ if (last_block > last_block_in_file)
+ last_block = last_block_in_file;
+
+ /* just zeroing out page which is beyond EOF */
+ if (block_in_file >= last_block)
+ goto zero_out;
+ /*
+ * Map blocks using the previous result first.
+ */
+ if ((map->m_flags & F2FS_MAP_MAPPED) &&
+ block_in_file > map->m_lblk &&
+ block_in_file < (map->m_lblk + map->m_len))
+ goto got_it;
+
+ /*
+ * Then do more f2fs_map_blocks() calls until we are
+ * done with this page.
+ */
+ map->m_lblk = block_in_file;
+ map->m_len = last_block - block_in_file;
+
+ ret = f2fs_map_blocks(inode, map, 0, F2FS_GET_BLOCK_DEFAULT);
+ if (ret)
+ goto out;
+got_it:
+ if ((map->m_flags & F2FS_MAP_MAPPED)) {
+ block_nr = map->m_pblk + block_in_file - map->m_lblk;
+ SetPageMappedToDisk(page);
+
+ if (!PageUptodate(page) && (!PageSwapCache(page) &&
+ !cleancache_get_page(page))) {
+ SetPageUptodate(page);
+ goto confused;
+ }
+
+ if (!f2fs_is_valid_blkaddr(F2FS_I_SB(inode), block_nr,
+ DATA_GENERIC_ENHANCE_READ)) {
+ ret = -EFSCORRUPTED;
+ goto out;
+ }
+ } else {
+zero_out:
+ zero_user_segment(page, 0, PAGE_SIZE);
+ if (f2fs_need_verity(inode, page->index) &&
+ !fsverity_verify_page(page)) {
+ ret = -EIO;
+ goto out;
+ }
+ if (!PageUptodate(page))
+ SetPageUptodate(page);
+ unlock_page(page);
+ goto out;
+ }
+
+ /*
+ * This page will go to BIO. Do we need to send this
+ * BIO off first?
+ */
+ if (bio && !page_is_mergeable(F2FS_I_SB(inode), bio,
+ *last_block_in_bio, block_nr)) {
+submit_and_realloc:
+ __submit_bio(F2FS_I_SB(inode), bio, DATA);
+ bio = NULL;
+ }
+ if (bio == NULL) {
+ bio = f2fs_grab_read_bio(inode, block_nr, nr_pages,
+ is_readahead ? REQ_RAHEAD : 0, page->index);
+ if (IS_ERR(bio)) {
+ ret = PTR_ERR(bio);
+ bio = NULL;
+ goto out;
+ }
+ }
+
+ /*
+ * If the page is under writeback, we need to wait for
+ * its completion to see the correct decrypted data.
+ */
+ f2fs_wait_on_block_writeback(inode, block_nr);
+
+ if (bio_add_page(bio, page, blocksize, 0) < blocksize)
+ goto submit_and_realloc;
+
+ inc_page_count(F2FS_I_SB(inode), F2FS_RD_DATA);
+ ClearPageError(page);
+ *last_block_in_bio = block_nr;
+ goto out;
+confused:
+ if (bio) {
+ __submit_bio(F2FS_I_SB(inode), bio, DATA);
+ bio = NULL;
+ }
+ unlock_page(page);
+out:
+ *bio_ret = bio;
+ return ret;
+}
+
/*
* This function was originally taken from fs/mpage.c, and customized for f2fs.
* Major change was from block_size == page_size in f2fs by default.
@@ -1467,13 +1782,8 @@
struct bio *bio = NULL;
sector_t last_block_in_bio = 0;
struct inode *inode = mapping->host;
- const unsigned blkbits = inode->i_blkbits;
- const unsigned blocksize = 1 << blkbits;
- sector_t block_in_file;
- sector_t last_block;
- sector_t last_block_in_file;
- sector_t block_nr;
struct f2fs_map_blocks map;
+ int ret = 0;
map.m_pblk = 0;
map.m_lblk = 0;
@@ -1482,6 +1792,7 @@
map.m_next_pgofs = NULL;
map.m_next_extent = NULL;
map.m_seg_type = NO_CHECK_TYPE;
+ map.m_may_create = false;
for (; nr_pages; nr_pages--) {
if (pages) {
@@ -1490,97 +1801,18 @@
prefetchw(&page->flags);
list_del(&page->lru);
if (add_to_page_cache_lru(page, mapping,
- page->index,
+ page_index(page),
readahead_gfp_mask(mapping)))
goto next_page;
}
- block_in_file = (sector_t)page->index;
- last_block = block_in_file + nr_pages;
- last_block_in_file = (i_size_read(inode) + blocksize - 1) >>
- blkbits;
- if (last_block > last_block_in_file)
- last_block = last_block_in_file;
-
- /*
- * Map blocks using the previous result first.
- */
- if ((map.m_flags & F2FS_MAP_MAPPED) &&
- block_in_file > map.m_lblk &&
- block_in_file < (map.m_lblk + map.m_len))
- goto got_it;
-
- /*
- * Then do more f2fs_map_blocks() calls until we are
- * done with this page.
- */
- map.m_flags = 0;
-
- if (block_in_file < last_block) {
- map.m_lblk = block_in_file;
- map.m_len = last_block - block_in_file;
-
- if (f2fs_map_blocks(inode, &map, 0,
- F2FS_GET_BLOCK_DEFAULT))
- goto set_error_page;
- }
-got_it:
- if ((map.m_flags & F2FS_MAP_MAPPED)) {
- block_nr = map.m_pblk + block_in_file - map.m_lblk;
- SetPageMappedToDisk(page);
-
- if (!PageUptodate(page) && !cleancache_get_page(page)) {
- SetPageUptodate(page);
- goto confused;
- }
-
- if (!f2fs_is_valid_blkaddr(F2FS_I_SB(inode), block_nr,
- DATA_GENERIC))
- goto set_error_page;
- } else {
+ ret = f2fs_read_single_page(inode, page, nr_pages, &map, &bio,
+ &last_block_in_bio, is_readahead);
+ if (ret) {
+ SetPageError(page);
zero_user_segment(page, 0, PAGE_SIZE);
- if (!PageUptodate(page))
- SetPageUptodate(page);
unlock_page(page);
- goto next_page;
}
-
- /*
- * This page will go to BIO. Do we need to send this
- * BIO off first?
- */
- if (bio && (last_block_in_bio != block_nr - 1 ||
- !__same_bdev(F2FS_I_SB(inode), block_nr, bio))) {
-submit_and_realloc:
- __submit_bio(F2FS_I_SB(inode), bio, DATA);
- bio = NULL;
- }
- if (bio == NULL) {
- bio = f2fs_grab_read_bio(inode, block_nr, nr_pages,
- is_readahead ? REQ_RAHEAD : 0);
- if (IS_ERR(bio)) {
- bio = NULL;
- goto set_error_page;
- }
- }
-
- if (bio_add_page(bio, page, blocksize, 0) < blocksize)
- goto submit_and_realloc;
-
- ClearPageError(page);
- last_block_in_bio = block_nr;
- goto next_page;
-set_error_page:
- SetPageError(page);
- zero_user_segment(page, 0, PAGE_SIZE);
- unlock_page(page);
- goto next_page;
-confused:
- if (bio) {
- __submit_bio(F2FS_I_SB(inode), bio, DATA);
- bio = NULL;
- }
- unlock_page(page);
next_page:
if (pages)
put_page(page);
@@ -1588,12 +1820,12 @@
BUG_ON(pages && !list_empty(pages));
if (bio)
__submit_bio(F2FS_I_SB(inode), bio, DATA);
- return 0;
+ return pages ? 0 : ret;
}
static int f2fs_read_data_page(struct file *file, struct page *page)
{
- struct inode *inode = page->mapping->host;
+ struct inode *inode = page_file_mapping(page)->host;
int ret = -EAGAIN;
trace_f2fs_readpage(page, DATA);
@@ -1602,7 +1834,8 @@
if (f2fs_has_inline_data(inode))
ret = f2fs_read_inline_data(inode, page);
if (ret == -EAGAIN)
- ret = f2fs_mpage_readpages(page->mapping, NULL, page, 1, false);
+ ret = f2fs_mpage_readpages(page_file_mapping(page),
+ NULL, page, 1, false);
return ret;
}
@@ -1632,11 +1865,12 @@
return 0;
/* wait for GCed page writeback via META_MAPPING */
- f2fs_wait_on_block_writeback(fio->sbi, fio->old_blkaddr);
+ f2fs_wait_on_block_writeback(inode, fio->old_blkaddr);
retry_encrypt:
- fio->encrypted_page = fscrypt_encrypt_page(inode, fio->page,
- PAGE_SIZE, 0, fio->page->index, gfp_flags);
+ fio->encrypted_page = fscrypt_encrypt_pagecache_blocks(fio->page,
+ PAGE_SIZE, 0,
+ gfp_flags);
if (IS_ERR(fio->encrypted_page)) {
/* flush pending IOs and wait for a while in the ENOMEM case */
if (PTR_ERR(fio->encrypted_page) == -ENOMEM) {
@@ -1681,7 +1915,7 @@
if (policy & (0x1 << F2FS_IPU_ASYNC) &&
fio && fio->op == REQ_OP_WRITE &&
!(fio->op_flags & REQ_SYNC) &&
- !f2fs_encrypted_inode(inode))
+ !IS_ENCRYPTED(inode))
return true;
/* this is only set during fdatasync */
@@ -1689,6 +1923,10 @@
is_inode_flag_set(inode, FI_NEED_IPU))
return true;
+ if (unlikely(fio && is_sbi_flag_set(sbi, SBI_CP_DISABLED) &&
+ !f2fs_is_checkpointed_data(sbi, fio->old_blkaddr)))
+ return true;
+
return false;
}
@@ -1712,6 +1950,8 @@
return true;
if (S_ISDIR(inode->i_mode))
return true;
+ if (IS_NOQUOTA(inode))
+ return true;
if (f2fs_is_atomic_file(inode))
return true;
if (fio) {
@@ -1719,6 +1959,9 @@
return true;
if (IS_ATOMIC_WRITTEN_PAGE(fio->page))
return true;
+ if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED) &&
+ f2fs_is_checkpointed_data(sbi, fio->old_blkaddr)))
+ return true;
}
return false;
}
@@ -1749,8 +1992,8 @@
fio->old_blkaddr = ei.blk + page->index - ei.fofs;
if (!f2fs_is_valid_blkaddr(fio->sbi, fio->old_blkaddr,
- DATA_GENERIC))
- return -EFAULT;
+ DATA_GENERIC_ENHANCE))
+ return -EFSCORRUPTED;
ipu_force = true;
fio->need_lock = LOCK_DONE;
@@ -1770,20 +2013,22 @@
/* This page is already truncated */
if (fio->old_blkaddr == NULL_ADDR) {
ClearPageUptodate(page);
+ clear_cold_data(page);
goto out_writepage;
}
got_it:
if (__is_valid_data_blkaddr(fio->old_blkaddr) &&
!f2fs_is_valid_blkaddr(fio->sbi, fio->old_blkaddr,
- DATA_GENERIC)) {
- err = -EFAULT;
+ DATA_GENERIC_ENHANCE)) {
+ err = -EFSCORRUPTED;
goto out_writepage;
}
/*
* If current allocation needs SSR,
* it had better in-place writes for updated data.
*/
- if (ipu_force || (is_valid_data_blkaddr(fio->sbi, fio->old_blkaddr) &&
+ if (ipu_force ||
+ (__is_valid_data_blkaddr(fio->old_blkaddr) &&
need_inplace_update(fio))) {
err = encrypt_one_page(fio);
if (err)
@@ -1795,8 +2040,15 @@
if (fio->need_lock == LOCK_REQ)
f2fs_unlock_op(fio->sbi);
err = f2fs_inplace_write_data(fio);
+ if (err) {
+ if (f2fs_encrypted_file(inode))
+ fscrypt_finalize_bounce_page(&fio->encrypted_page);
+ if (PageWriteback(page))
+ end_page_writeback(page);
+ } else {
+ set_inode_flag(inode, FI_UPDATE_WRITE);
+ }
trace_f2fs_do_write_data_page(fio->page, IPU);
- set_inode_flag(inode, FI_UPDATE_WRITE);
return err;
}
@@ -1836,6 +2088,8 @@
}
static int __write_data_page(struct page *page, bool *submitted,
+ struct bio **bio,
+ sector_t *last_block,
struct writeback_control *wbc,
enum iostat_type io_type)
{
@@ -1861,6 +2115,8 @@
.need_lock = LOCK_RETRY,
.io_type = io_type,
.io_wbc = wbc,
+ .bio = bio,
+ .last_block = last_block,
};
trace_f2fs_writepage(page, DATA);
@@ -1880,7 +2136,7 @@
if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
goto redirty_out;
- if (page->index < end_index)
+ if (page->index < end_index || f2fs_verity_in_progress(inode))
goto write;
/*
@@ -1945,21 +2201,27 @@
out:
inode_dec_dirty_pages(inode);
- if (err)
+ if (err) {
ClearPageUptodate(page);
+ clear_cold_data(page);
+ }
if (wbc->for_reclaim) {
- f2fs_submit_merged_write_cond(sbi, inode, 0, page->index, DATA);
+ f2fs_submit_merged_write_cond(sbi, NULL, page, 0, DATA);
clear_inode_flag(inode, FI_HOT_DATA);
f2fs_remove_dirty_inode(inode);
submitted = NULL;
}
unlock_page(page);
- if (!S_ISDIR(inode->i_mode))
+ if (!S_ISDIR(inode->i_mode) && !IS_NOQUOTA(inode) &&
+ !F2FS_I(inode)->cp_task) {
+ f2fs_submit_ipu_bio(sbi, bio, page);
f2fs_balance_fs(sbi, need_balance_fs);
+ }
if (unlikely(f2fs_cp_error(sbi))) {
+ f2fs_submit_ipu_bio(sbi, bio, page);
f2fs_submit_merged_write(sbi, DATA);
submitted = NULL;
}
@@ -1986,7 +2248,7 @@
static int f2fs_write_data_page(struct page *page,
struct writeback_control *wbc)
{
- return __write_data_page(page, NULL, wbc, FS_DATA_IO);
+ return __write_data_page(page, NULL, NULL, NULL, wbc, FS_DATA_IO);
}
/*
@@ -2002,15 +2264,17 @@
int done = 0;
struct pagevec pvec;
struct f2fs_sb_info *sbi = F2FS_M_SB(mapping);
+ struct bio *bio = NULL;
+ sector_t last_block;
int nr_pages;
pgoff_t uninitialized_var(writeback_index);
pgoff_t index;
pgoff_t end; /* Inclusive */
pgoff_t done_index;
- pgoff_t last_idx = ULONG_MAX;
int cycled;
int range_whole = 0;
- int tag;
+ xa_mark_t tag;
+ int nwritten = 0;
pagevec_init(&pvec);
@@ -2078,18 +2342,20 @@
}
if (PageWriteback(page)) {
- if (wbc->sync_mode != WB_SYNC_NONE)
+ if (wbc->sync_mode != WB_SYNC_NONE) {
f2fs_wait_on_page_writeback(page,
- DATA, true);
- else
+ DATA, true, true);
+ f2fs_submit_ipu_bio(sbi, &bio, page);
+ } else {
goto continue_unlock;
+ }
}
- BUG_ON(PageWriteback(page));
if (!clear_page_dirty_for_io(page))
goto continue_unlock;
- ret = __write_data_page(page, &submitted, wbc, io_type);
+ ret = __write_data_page(page, &submitted, &bio,
+ &last_block, wbc, io_type);
if (unlikely(ret)) {
/*
* keep nr_to_write, since vfs uses this to
@@ -2113,7 +2379,7 @@
done = 1;
break;
} else if (submitted) {
- last_idx = page->index;
+ nwritten++;
}
if (--wbc->nr_to_write <= 0 &&
@@ -2135,9 +2401,12 @@
if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
mapping->writeback_index = done_index;
- if (last_idx != ULONG_MAX)
+ if (nwritten)
f2fs_submit_merged_write_cond(F2FS_M_SB(mapping), mapping->host,
- 0, last_idx, DATA);
+ NULL, 0, DATA);
+ /* submit cached bio of IPU write */
+ if (bio)
+ __submit_bio(sbi, bio, DATA);
return ret;
}
@@ -2147,6 +2416,11 @@
{
if (!S_ISREG(inode->i_mode))
return false;
+ if (IS_NOQUOTA(inode))
+ return false;
+ /* to avoid deadlock in path of data flush */
+ if (F2FS_I(inode)->cp_task)
+ return false;
if (wbc->sync_mode != WB_SYNC_ALL)
return true;
if (get_dirty_pages(inode) >= SM_I(F2FS_I_SB(inode))->min_seq_blocks)
@@ -2176,7 +2450,8 @@
if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
goto skip_write;
- if (S_ISDIR(inode->i_mode) && wbc->sync_mode == WB_SYNC_NONE &&
+ if ((S_ISDIR(inode->i_mode) || IS_NOQUOTA(inode)) &&
+ wbc->sync_mode == WB_SYNC_NONE &&
get_dirty_pages(inode) < nr_pages_to_skip(sbi, DATA) &&
f2fs_available_free_memory(sbi, DIRTY_DENTS))
goto skip_write;
@@ -2236,12 +2511,14 @@
struct inode *inode = mapping->host;
loff_t i_size = i_size_read(inode);
- if (to > i_size) {
+ /* In the fs-verity case, f2fs_end_enable_verity() does the truncate */
+ if (to > i_size && !f2fs_verity_in_progress(inode)) {
down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
down_write(&F2FS_I(inode)->i_mmap_sem);
truncate_pagecache(inode, i_size);
- f2fs_truncate_blocks(inode, i_size, true);
+ if (!IS_NOQUOTA(inode))
+ f2fs_truncate_blocks(inode, i_size, true);
up_write(&F2FS_I(inode)->i_mmap_sem);
up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
@@ -2259,18 +2536,26 @@
bool locked = false;
struct extent_info ei = {0,0,0};
int err = 0;
+ int flag;
/*
* we already allocated all the blocks, so we don't need to get
* the block addresses when there is no need to fill the page.
*/
if (!f2fs_has_inline_data(inode) && len == PAGE_SIZE &&
- !is_inode_flag_set(inode, FI_NO_PREALLOC))
+ !is_inode_flag_set(inode, FI_NO_PREALLOC) &&
+ !f2fs_verity_in_progress(inode))
return 0;
+ /* f2fs_lock_op avoids race between write CP and convert_inline_page */
+ if (f2fs_has_inline_data(inode) && pos + len > MAX_INLINE_DATA(inode))
+ flag = F2FS_GET_BLOCK_DEFAULT;
+ else
+ flag = F2FS_GET_BLOCK_PRE_AIO;
+
if (f2fs_has_inline_data(inode) ||
(pos & PAGE_MASK) >= i_size_read(inode)) {
- __do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, true);
+ __do_map_lock(sbi, flag, true);
locked = true;
}
restart:
@@ -2308,6 +2593,7 @@
f2fs_put_dnode(&dn);
__do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO,
true);
+ WARN_ON(flag != F2FS_GET_BLOCK_PRE_AIO);
locked = true;
goto restart;
}
@@ -2321,7 +2607,7 @@
f2fs_put_dnode(&dn);
unlock_out:
if (locked)
- __do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, false);
+ __do_map_lock(sbi, flag, false);
return err;
}
@@ -2339,6 +2625,11 @@
trace_f2fs_write_begin(inode, pos, len, flags);
+ if (!f2fs_is_checkpoint_ready(sbi)) {
+ err = -ENOSPC;
+ goto fail;
+ }
+
if ((f2fs_is_atomic_file(inode) &&
!f2fs_available_free_memory(sbi, INMEM_PAGES)) ||
is_inode_flag_set(inode, FI_ATOMIC_REVOKE_REQUEST)) {
@@ -2376,7 +2667,8 @@
if (err)
goto fail;
- if (need_balance && has_not_enough_free_secs(sbi, 0, 0)) {
+ if (need_balance && !IS_NOQUOTA(inode) &&
+ has_not_enough_free_secs(sbi, 0, 0)) {
unlock_page(page);
f2fs_balance_fs(sbi, true);
lock_page(page);
@@ -2387,16 +2679,13 @@
}
}
- f2fs_wait_on_page_writeback(page, DATA, false);
-
- /* wait for GCed page writeback via META_MAPPING */
- if (f2fs_post_read_required(inode))
- f2fs_wait_on_block_writeback(sbi, blkaddr);
+ f2fs_wait_on_page_writeback(page, DATA, false, true);
if (len == PAGE_SIZE || PageUptodate(page))
return 0;
- if (!(pos & (PAGE_SIZE - 1)) && (pos + len) >= i_size_read(inode)) {
+ if (!(pos & (PAGE_SIZE - 1)) && (pos + len) >= i_size_read(inode) &&
+ !f2fs_verity_in_progress(inode)) {
zero_user_segment(page, len, PAGE_SIZE);
return 0;
}
@@ -2405,6 +2694,11 @@
zero_user_segment(page, 0, PAGE_SIZE);
SetPageUptodate(page);
} else {
+ if (!f2fs_is_valid_blkaddr(sbi, blkaddr,
+ DATA_GENERIC_ENHANCE_READ)) {
+ err = -EFSCORRUPTED;
+ goto fail;
+ }
err = f2fs_submit_page_read(inode, page, blkaddr);
if (err)
goto fail;
@@ -2454,7 +2748,8 @@
set_page_dirty(page);
- if (pos + copied > i_size_read(inode))
+ if (pos + copied > i_size_read(inode) &&
+ !f2fs_verity_in_progress(inode))
f2fs_i_size_write(inode, pos + copied);
unlock_out:
f2fs_put_page(page, 1);
@@ -2482,41 +2777,105 @@
return 0;
}
+static void f2fs_dio_end_io(struct bio *bio)
+{
+ struct f2fs_private_dio *dio = bio->bi_private;
+
+ dec_page_count(F2FS_I_SB(dio->inode),
+ dio->write ? F2FS_DIO_WRITE : F2FS_DIO_READ);
+
+ bio->bi_private = dio->orig_private;
+ bio->bi_end_io = dio->orig_end_io;
+
+ kvfree(dio);
+
+ bio_endio(bio);
+}
+
+static void f2fs_dio_submit_bio(struct bio *bio, struct inode *inode,
+ loff_t file_offset)
+{
+ struct f2fs_private_dio *dio;
+ bool write = (bio_op(bio) == REQ_OP_WRITE);
+
+ dio = f2fs_kzalloc(F2FS_I_SB(inode),
+ sizeof(struct f2fs_private_dio), GFP_NOFS);
+ if (!dio)
+ goto out;
+
+ dio->inode = inode;
+ dio->orig_end_io = bio->bi_end_io;
+ dio->orig_private = bio->bi_private;
+ dio->write = write;
+
+ bio->bi_end_io = f2fs_dio_end_io;
+ bio->bi_private = dio;
+
+ inc_page_count(F2FS_I_SB(inode),
+ write ? F2FS_DIO_WRITE : F2FS_DIO_READ);
+
+ submit_bio(bio);
+ return;
+out:
+ bio->bi_status = BLK_STS_IOERR;
+ bio_endio(bio);
+}
+
static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
{
struct address_space *mapping = iocb->ki_filp->f_mapping;
struct inode *inode = mapping->host;
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+ struct f2fs_inode_info *fi = F2FS_I(inode);
size_t count = iov_iter_count(iter);
loff_t offset = iocb->ki_pos;
int rw = iov_iter_rw(iter);
int err;
enum rw_hint hint = iocb->ki_hint;
int whint_mode = F2FS_OPTION(sbi).whint_mode;
+ bool do_opu;
err = check_direct_IO(inode, iter, offset);
if (err)
return err < 0 ? err : 0;
- if (f2fs_force_buffered_io(inode, rw))
+ if (f2fs_force_buffered_io(inode, iocb, iter))
return 0;
+ do_opu = allow_outplace_dio(inode, iocb, iter);
+
trace_f2fs_direct_IO_enter(inode, offset, count, rw);
if (rw == WRITE && whint_mode == WHINT_MODE_OFF)
iocb->ki_hint = WRITE_LIFE_NOT_SET;
- if (!down_read_trylock(&F2FS_I(inode)->i_gc_rwsem[rw])) {
- if (iocb->ki_flags & IOCB_NOWAIT) {
+ if (iocb->ki_flags & IOCB_NOWAIT) {
+ if (!down_read_trylock(&fi->i_gc_rwsem[rw])) {
iocb->ki_hint = hint;
err = -EAGAIN;
goto out;
}
- down_read(&F2FS_I(inode)->i_gc_rwsem[rw]);
+ if (do_opu && !down_read_trylock(&fi->i_gc_rwsem[READ])) {
+ up_read(&fi->i_gc_rwsem[rw]);
+ iocb->ki_hint = hint;
+ err = -EAGAIN;
+ goto out;
+ }
+ } else {
+ down_read(&fi->i_gc_rwsem[rw]);
+ if (do_opu)
+ down_read(&fi->i_gc_rwsem[READ]);
}
- err = blockdev_direct_IO(iocb, inode, iter, get_data_block_dio);
- up_read(&F2FS_I(inode)->i_gc_rwsem[rw]);
+ err = __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev,
+ iter, rw == WRITE ? get_data_block_dio_write :
+ get_data_block_dio, NULL, f2fs_dio_submit_bio,
+ DIO_LOCKING | DIO_SKIP_HOLES);
+
+ if (do_opu)
+ up_read(&fi->i_gc_rwsem[READ]);
+
+ up_read(&fi->i_gc_rwsem[rw]);
if (rw == WRITE) {
if (whint_mode == WHINT_MODE_OFF)
@@ -2524,7 +2883,8 @@
if (err > 0) {
f2fs_update_iostat(F2FS_I_SB(inode), APP_DIRECT_IO,
err);
- set_inode_flag(inode, FI_UPDATE_WRITE);
+ if (!do_opu)
+ set_inode_flag(inode, FI_UPDATE_WRITE);
} else if (err < 0) {
f2fs_write_failed(mapping, offset + count);
}
@@ -2557,12 +2917,12 @@
}
}
- /* This is atomic written page, keep Private */
+ clear_cold_data(page);
+
if (IS_ATOMIC_WRITTEN_PAGE(page))
return f2fs_drop_inmem_page(inode, page);
- set_page_private(page, 0);
- ClearPagePrivate(page);
+ f2fs_clear_page_private(page);
}
int f2fs_release_page(struct page *page, gfp_t wait)
@@ -2575,20 +2935,21 @@
if (IS_ATOMIC_WRITTEN_PAGE(page))
return 0;
- set_page_private(page, 0);
- ClearPagePrivate(page);
+ clear_cold_data(page);
+ f2fs_clear_page_private(page);
return 1;
}
static int f2fs_set_data_page_dirty(struct page *page)
{
- struct address_space *mapping = page->mapping;
- struct inode *inode = mapping->host;
+ struct inode *inode = page_file_mapping(page)->host;
trace_f2fs_set_page_dirty(page, DATA);
if (!PageUptodate(page))
SetPageUptodate(page);
+ if (PageSwapCache(page))
+ return __set_page_dirty_nobuffers(page);
if (f2fs_is_atomic_file(inode) && !f2fs_is_commit_atomic_write(inode)) {
if (!IS_ATOMIC_WRITTEN_PAGE(page)) {
@@ -2644,14 +3005,10 @@
return -EAGAIN;
}
- /*
- * A reference is expected if PagePrivate set when move mapping,
- * however F2FS breaks this for maintaining dirty page counts when
- * truncating pages. So here adjusting the 'extra_count' make it work.
- */
- extra_count = (atomic_written ? 1 : 0) - page_has_private(page);
+ /* one extra reference was held for atomic_write page */
+ extra_count = atomic_written ? 1 : 0;
rc = migrate_page_move_mapping(mapping, newpage,
- page, NULL, mode, extra_count);
+ page, extra_count);
if (rc != MIGRATEPAGE_SUCCESS) {
if (atomic_written)
mutex_unlock(&fi->inmem_lock);
@@ -2670,9 +3027,10 @@
get_page(newpage);
}
- if (PagePrivate(page))
- SetPagePrivate(newpage);
- set_page_private(newpage, page_private(page));
+ if (PagePrivate(page)) {
+ f2fs_set_page_private(newpage, page_private(page));
+ f2fs_clear_page_private(page);
+ }
if (mode != MIGRATE_SYNC_NO_COPY)
migrate_page_copy(newpage, page);
@@ -2683,6 +3041,126 @@
}
#endif
+#ifdef CONFIG_SWAP
+/* Copied from generic_swapfile_activate() to check any holes */
+static int check_swap_activate(struct file *swap_file, unsigned int max)
+{
+ struct address_space *mapping = swap_file->f_mapping;
+ struct inode *inode = mapping->host;
+ unsigned blocks_per_page;
+ unsigned long page_no;
+ unsigned blkbits;
+ sector_t probe_block;
+ sector_t last_block;
+ sector_t lowest_block = -1;
+ sector_t highest_block = 0;
+
+ blkbits = inode->i_blkbits;
+ blocks_per_page = PAGE_SIZE >> blkbits;
+
+ /*
+ * Map all the blocks into the extent list. This code doesn't try
+ * to be very smart.
+ */
+ probe_block = 0;
+ page_no = 0;
+ last_block = i_size_read(inode) >> blkbits;
+ while ((probe_block + blocks_per_page) <= last_block && page_no < max) {
+ unsigned block_in_page;
+ sector_t first_block;
+
+ cond_resched();
+
+ first_block = bmap(inode, probe_block);
+ if (first_block == 0)
+ goto bad_bmap;
+
+ /*
+ * It must be PAGE_SIZE aligned on-disk
+ */
+ if (first_block & (blocks_per_page - 1)) {
+ probe_block++;
+ goto reprobe;
+ }
+
+ for (block_in_page = 1; block_in_page < blocks_per_page;
+ block_in_page++) {
+ sector_t block;
+
+ block = bmap(inode, probe_block + block_in_page);
+ if (block == 0)
+ goto bad_bmap;
+ if (block != first_block + block_in_page) {
+ /* Discontiguity */
+ probe_block++;
+ goto reprobe;
+ }
+ }
+
+ first_block >>= (PAGE_SHIFT - blkbits);
+ if (page_no) { /* exclude the header page */
+ if (first_block < lowest_block)
+ lowest_block = first_block;
+ if (first_block > highest_block)
+ highest_block = first_block;
+ }
+
+ page_no++;
+ probe_block += blocks_per_page;
+reprobe:
+ continue;
+ }
+ return 0;
+
+bad_bmap:
+ pr_err("swapon: swapfile has holes\n");
+ return -EINVAL;
+}
+
+static int f2fs_swap_activate(struct swap_info_struct *sis, struct file *file,
+ sector_t *span)
+{
+ struct inode *inode = file_inode(file);
+ int ret;
+
+ if (!S_ISREG(inode->i_mode))
+ return -EINVAL;
+
+ if (f2fs_readonly(F2FS_I_SB(inode)->sb))
+ return -EROFS;
+
+ ret = f2fs_convert_inline_inode(inode);
+ if (ret)
+ return ret;
+
+ ret = check_swap_activate(file, sis->max);
+ if (ret)
+ return ret;
+
+ set_inode_flag(inode, FI_PIN_FILE);
+ f2fs_precache_extents(inode);
+ f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
+ return 0;
+}
+
+static void f2fs_swap_deactivate(struct file *file)
+{
+ struct inode *inode = file_inode(file);
+
+ clear_inode_flag(inode, FI_PIN_FILE);
+}
+#else
+static int f2fs_swap_activate(struct swap_info_struct *sis, struct file *file,
+ sector_t *span)
+{
+ return -EOPNOTSUPP;
+}
+
+static void f2fs_swap_deactivate(struct file *file)
+{
+}
+#endif
+
const struct address_space_operations f2fs_dblock_aops = {
.readpage = f2fs_read_data_page,
.readpages = f2fs_read_data_pages,
@@ -2695,25 +3173,29 @@
.releasepage = f2fs_release_page,
.direct_IO = f2fs_direct_IO,
.bmap = f2fs_bmap,
+ .swap_activate = f2fs_swap_activate,
+ .swap_deactivate = f2fs_swap_deactivate,
#ifdef CONFIG_MIGRATION
.migratepage = f2fs_migrate_page,
#endif
};
-void f2fs_clear_radix_tree_dirty_tag(struct page *page)
+void f2fs_clear_page_cache_dirty_tag(struct page *page)
{
struct address_space *mapping = page_mapping(page);
unsigned long flags;
xa_lock_irqsave(&mapping->i_pages, flags);
- radix_tree_tag_clear(&mapping->i_pages, page_index(page),
+ __xa_clear_mark(&mapping->i_pages, page_index(page),
PAGECACHE_TAG_DIRTY);
xa_unlock_irqrestore(&mapping->i_pages, flags);
}
int __init f2fs_init_post_read_processing(void)
{
- bio_post_read_ctx_cache = KMEM_CACHE(bio_post_read_ctx, 0);
+ bio_post_read_ctx_cache =
+ kmem_cache_create("f2fs_bio_post_read_ctx",
+ sizeof(struct bio_post_read_ctx), 0, 0, NULL);
if (!bio_post_read_ctx_cache)
goto fail;
bio_post_read_ctx_pool =
diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
index 214a968..9b0bedd 100644
--- a/fs/f2fs/debug.c
+++ b/fs/f2fs/debug.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* f2fs debugging statistics
*
@@ -5,10 +6,6 @@
* http://www.samsung.com/
* Copyright (c) 2012 Linux Foundation
* Copyright (c) 2012 Greg Kroah-Hartman <gregkh@linuxfoundation.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
*/
#include <linux/fs.h>
@@ -30,8 +27,15 @@
static void update_general_status(struct f2fs_sb_info *sbi)
{
struct f2fs_stat_info *si = F2FS_STAT(sbi);
+ struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
int i;
+ /* these will be changed if online resize is done */
+ si->main_area_segs = le32_to_cpu(raw_super->segment_count_main);
+ si->main_area_sections = le32_to_cpu(raw_super->section_count);
+ si->main_area_zones = si->main_area_sections /
+ le32_to_cpu(raw_super->secs_per_zone);
+
/* validation check of the segment numbers */
si->hit_largest = atomic64_read(&sbi->read_hit_largest);
si->hit_cached = atomic64_read(&sbi->read_hit_cached);
@@ -56,21 +60,26 @@
si->vw_cnt = atomic_read(&sbi->vw_cnt);
si->max_aw_cnt = atomic_read(&sbi->max_aw_cnt);
si->max_vw_cnt = atomic_read(&sbi->max_vw_cnt);
+ si->nr_dio_read = get_pages(sbi, F2FS_DIO_READ);
+ si->nr_dio_write = get_pages(sbi, F2FS_DIO_WRITE);
si->nr_wb_cp_data = get_pages(sbi, F2FS_WB_CP_DATA);
si->nr_wb_data = get_pages(sbi, F2FS_WB_DATA);
- if (SM_I(sbi) && SM_I(sbi)->fcc_info) {
+ si->nr_rd_data = get_pages(sbi, F2FS_RD_DATA);
+ si->nr_rd_node = get_pages(sbi, F2FS_RD_NODE);
+ si->nr_rd_meta = get_pages(sbi, F2FS_RD_META);
+ if (SM_I(sbi)->fcc_info) {
si->nr_flushed =
atomic_read(&SM_I(sbi)->fcc_info->issued_flush);
si->nr_flushing =
- atomic_read(&SM_I(sbi)->fcc_info->issing_flush);
+ atomic_read(&SM_I(sbi)->fcc_info->queued_flush);
si->flush_list_empty =
llist_empty(&SM_I(sbi)->fcc_info->issue_list);
}
- if (SM_I(sbi) && SM_I(sbi)->dcc_info) {
+ if (SM_I(sbi)->dcc_info) {
si->nr_discarded =
atomic_read(&SM_I(sbi)->dcc_info->issued_discard);
si->nr_discarding =
- atomic_read(&SM_I(sbi)->dcc_info->issing_discard);
+ atomic_read(&SM_I(sbi)->dcc_info->queued_discard);
si->nr_discard_cmd =
atomic_read(&SM_I(sbi)->dcc_info->discard_cmd_cnt);
si->undiscard_blks = SM_I(sbi)->dcc_info->undiscard_blks;
@@ -94,8 +103,10 @@
si->free_secs = free_sections(sbi);
si->prefree_count = prefree_segments(sbi);
si->dirty_count = dirty_segments(sbi);
- si->node_pages = NODE_MAPPING(sbi)->nrpages;
- si->meta_pages = META_MAPPING(sbi)->nrpages;
+ if (sbi->node_inode)
+ si->node_pages = NODE_MAPPING(sbi)->nrpages;
+ if (sbi->meta_inode)
+ si->meta_pages = META_MAPPING(sbi)->nrpages;
si->nats = NM_I(sbi)->nat_cnt;
si->dirty_nats = NM_I(sbi)->dirty_nat_cnt;
si->sits = MAIN_SEGS(sbi);
@@ -104,6 +115,8 @@
si->avail_nids = NM_I(sbi)->available_nids;
si->alloc_nids = NM_I(sbi)->nid_cnt[PREALLOC_NID];
si->bg_gc = sbi->bg_gc;
+ si->io_skip_bggc = sbi->io_skip_bggc;
+ si->other_skip_bggc = sbi->other_skip_bggc;
si->skipped_atomic_files[BG_GC] = sbi->skipped_atomic_files[BG_GC];
si->skipped_atomic_files[FG_GC] = sbi->skipped_atomic_files[FG_GC];
si->util_free = (int)(free_user_blocks(sbi) >> sbi->log_blocks_per_seg)
@@ -121,6 +134,9 @@
si->curzone[i] = GET_ZONE_FROM_SEC(sbi, si->cursec[i]);
}
+ for (i = META_CP; i < META_MAX; i++)
+ si->meta_count[i] = atomic_read(&sbi->meta_count[i]);
+
for (i = 0; i < 2; i++) {
si->segment_count[i] = sbi->segment_count[i];
si->block_count[i] = sbi->block_count[i];
@@ -168,7 +184,6 @@
static void update_mem_info(struct f2fs_sb_info *sbi)
{
struct f2fs_stat_info *si = F2FS_STAT(sbi);
- unsigned npages;
int i;
if (si->base_mem)
@@ -190,10 +205,9 @@
si->base_mem += MAIN_SEGS(sbi) * sizeof(struct seg_entry);
si->base_mem += f2fs_bitmap_size(MAIN_SEGS(sbi));
si->base_mem += 2 * SIT_VBLOCK_MAP_SIZE * MAIN_SEGS(sbi);
- if (f2fs_discard_en(sbi))
- si->base_mem += SIT_VBLOCK_MAP_SIZE * MAIN_SEGS(sbi);
+ si->base_mem += SIT_VBLOCK_MAP_SIZE * MAIN_SEGS(sbi);
si->base_mem += SIT_VBLOCK_MAP_SIZE;
- if (sbi->segs_per_sec > 1)
+ if (__is_large_section(sbi))
si->base_mem += MAIN_SECS(sbi) * sizeof(struct sec_entry);
si->base_mem += __bitmap_size(sbi, SIT_BITMAP);
@@ -252,10 +266,14 @@
sizeof(struct extent_node);
si->page_mem = 0;
- npages = NODE_MAPPING(sbi)->nrpages;
- si->page_mem += (unsigned long long)npages << PAGE_SHIFT;
- npages = META_MAPPING(sbi)->nrpages;
- si->page_mem += (unsigned long long)npages << PAGE_SHIFT;
+ if (sbi->node_inode) {
+ unsigned npages = NODE_MAPPING(sbi)->nrpages;
+ si->page_mem += (unsigned long long)npages << PAGE_SHIFT;
+ }
+ if (sbi->meta_inode) {
+ unsigned npages = META_MAPPING(sbi)->nrpages;
+ si->page_mem += (unsigned long long)npages << PAGE_SHIFT;
+ }
}
static int stat_show(struct seq_file *s, void *v)
@@ -271,7 +289,8 @@
seq_printf(s, "\n=====[ partition info(%pg). #%d, %s, CP: %s]=====\n",
si->sbi->sb->s_bdev, i++,
f2fs_readonly(si->sbi->sb) ? "RO": "RW",
- f2fs_cp_error(si->sbi) ? "Error": "Good");
+ is_set_ckpt_flags(si->sbi, CP_DISABLED_FLAG) ?
+ "Disabled": (f2fs_cp_error(si->sbi) ? "Error": "Good"));
seq_printf(s, "[SB: 1] [CP: 2] [SIT: %d] [NAT: %d] ",
si->sit_area_segs, si->nat_area_segs);
seq_printf(s, "[SSA: %d] [MAIN: %d",
@@ -333,6 +352,13 @@
si->prefree_count, si->free_segs, si->free_secs);
seq_printf(s, "CP calls: %d (BG: %d)\n",
si->cp_count, si->bg_cp_count);
+ seq_printf(s, " - cp blocks : %u\n", si->meta_count[META_CP]);
+ seq_printf(s, " - sit blocks : %u\n",
+ si->meta_count[META_SIT]);
+ seq_printf(s, " - nat blocks : %u\n",
+ si->meta_count[META_NAT]);
+ seq_printf(s, " - ssa blocks : %u\n",
+ si->meta_count[META_SSA]);
seq_printf(s, "GC calls: %d (BG: %d)\n",
si->call_count, si->bg_gc);
seq_printf(s, " - data segments : %d (%d)\n",
@@ -349,6 +375,8 @@
si->skipped_atomic_files[BG_GC] +
si->skipped_atomic_files[FG_GC],
si->skipped_atomic_files[BG_GC]);
+ seq_printf(s, "BG skip : IO: %u, Other: %u\n",
+ si->io_skip_bggc, si->other_skip_bggc);
seq_puts(s, "\nExtent Cache:\n");
seq_printf(s, " - Hit Count: L1-1:%llu L1-2:%llu L2:%llu\n",
si->hit_largest, si->hit_cached,
@@ -360,7 +388,11 @@
seq_printf(s, " - Inner Struct Count: tree: %d(%d), node: %d\n",
si->ext_tree, si->zombie_tree, si->ext_node);
seq_puts(s, "\nBalancing F2FS Async:\n");
- seq_printf(s, " - IO (CP: %4d, Data: %4d, Flush: (%4d %4d %4d), "
+ seq_printf(s, " - DIO (R: %4d, W: %4d)\n",
+ si->nr_dio_read, si->nr_dio_write);
+ seq_printf(s, " - IO_R (Data: %4d, Node: %4d, Meta: %4d\n",
+ si->nr_rd_data, si->nr_rd_node, si->nr_rd_meta);
+ seq_printf(s, " - IO_W (CP: %4d, Data: %4d, Flush: (%4d %4d %4d), "
"Discard: (%4d %4d)) cmd: %4d undiscard:%4u\n",
si->nr_wb_cp_data, si->nr_wb_data,
si->nr_flushing, si->nr_flushed,
@@ -428,23 +460,13 @@
return 0;
}
-static int stat_open(struct inode *inode, struct file *file)
-{
- return single_open(file, stat_show, inode->i_private);
-}
-
-static const struct file_operations stat_fops = {
- .owner = THIS_MODULE,
- .open = stat_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
+DEFINE_SHOW_ATTRIBUTE(stat);
int f2fs_build_stats(struct f2fs_sb_info *sbi)
{
struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
struct f2fs_stat_info *si;
+ int i;
si = f2fs_kzalloc(sbi, sizeof(struct f2fs_stat_info), GFP_KERNEL);
if (!si)
@@ -470,6 +492,8 @@
atomic_set(&sbi->inline_inode, 0);
atomic_set(&sbi->inline_dir, 0);
atomic_set(&sbi->inplace_count, 0);
+ for (i = META_CP; i < META_MAX; i++)
+ atomic_set(&sbi->meta_count[i], 0);
atomic_set(&sbi->aw_cnt, 0);
atomic_set(&sbi->vw_cnt, 0);
@@ -491,33 +515,19 @@
list_del(&si->stat_list);
mutex_unlock(&f2fs_stat_mutex);
- kfree(si);
+ kvfree(si);
}
-int __init f2fs_create_root_stats(void)
+void __init f2fs_create_root_stats(void)
{
- struct dentry *file;
-
f2fs_debugfs_root = debugfs_create_dir("f2fs", NULL);
- if (!f2fs_debugfs_root)
- return -ENOMEM;
- file = debugfs_create_file("status", S_IRUGO, f2fs_debugfs_root,
- NULL, &stat_fops);
- if (!file) {
- debugfs_remove(f2fs_debugfs_root);
- f2fs_debugfs_root = NULL;
- return -ENOMEM;
- }
-
- return 0;
+ debugfs_create_file("status", S_IRUGO, f2fs_debugfs_root, NULL,
+ &stat_fops);
}
void f2fs_destroy_root_stats(void)
{
- if (!f2fs_debugfs_root)
- return;
-
debugfs_remove_recursive(f2fs_debugfs_root);
f2fs_debugfs_root = NULL;
}
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index ecc3a4e..4033778 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -1,16 +1,14 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* fs/f2fs/dir.c
*
* Copyright (c) 2012 Samsung Electronics Co., Ltd.
* http://www.samsung.com/
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
*/
#include <linux/fs.h>
#include <linux/f2fs_fs.h>
#include <linux/sched/signal.h>
+#include <linux/unicode.h>
#include "f2fs.h"
#include "node.h"
#include "acl.h"
@@ -84,7 +82,8 @@
return bidx;
}
-static struct f2fs_dir_entry *find_in_block(struct page *dentry_page,
+static struct f2fs_dir_entry *find_in_block(struct inode *dir,
+ struct page *dentry_page,
struct fscrypt_name *fname,
f2fs_hash_t namehash,
int *max_slots,
@@ -96,7 +95,7 @@
dentry_blk = (struct f2fs_dentry_block *)page_address(dentry_page);
- make_dentry_ptr_block(NULL, &d, dentry_blk);
+ make_dentry_ptr_block(dir, &d, dentry_blk);
de = f2fs_find_target_dentry(fname, namehash, max_slots, &d);
if (de)
*res_page = dentry_page;
@@ -104,14 +103,116 @@
return de;
}
+#ifdef CONFIG_UNICODE
+/*
+ * Test whether a case-insensitive directory entry matches the filename
+ * being searched for.
+ *
+ * Returns: 0 if the directory entry matches, more than 0 if it
+ * doesn't match or less than zero on error.
+ */
+int f2fs_ci_compare(const struct inode *parent, const struct qstr *name,
+ const struct qstr *entry, bool quick)
+{
+ const struct f2fs_sb_info *sbi = F2FS_SB(parent->i_sb);
+ const struct unicode_map *um = sbi->s_encoding;
+ int ret;
+
+ if (quick)
+ ret = utf8_strncasecmp_folded(um, name, entry);
+ else
+ ret = utf8_strncasecmp(um, name, entry);
+
+ if (ret < 0) {
+ /* Handle invalid character sequence as either an error
+ * or as an opaque byte sequence.
+ */
+ if (f2fs_has_strict_mode(sbi))
+ return -EINVAL;
+
+ if (name->len != entry->len)
+ return 1;
+
+ return !!memcmp(name->name, entry->name, name->len);
+ }
+
+ return ret;
+}
+
+static void f2fs_fname_setup_ci_filename(struct inode *dir,
+ const struct qstr *iname,
+ struct fscrypt_str *cf_name)
+{
+ struct f2fs_sb_info *sbi = F2FS_I_SB(dir);
+
+ if (!IS_CASEFOLDED(dir)) {
+ cf_name->name = NULL;
+ return;
+ }
+
+ cf_name->name = f2fs_kmalloc(sbi, F2FS_NAME_LEN, GFP_NOFS);
+ if (!cf_name->name)
+ return;
+
+ cf_name->len = utf8_casefold(sbi->s_encoding,
+ iname, cf_name->name,
+ F2FS_NAME_LEN);
+ if ((int)cf_name->len <= 0) {
+ kvfree(cf_name->name);
+ cf_name->name = NULL;
+ }
+}
+#endif
+
+static inline bool f2fs_match_name(struct f2fs_dentry_ptr *d,
+ struct f2fs_dir_entry *de,
+ struct fscrypt_name *fname,
+ struct fscrypt_str *cf_str,
+ unsigned long bit_pos,
+ f2fs_hash_t namehash)
+{
+#ifdef CONFIG_UNICODE
+ struct inode *parent = d->inode;
+ struct f2fs_sb_info *sbi = F2FS_I_SB(parent);
+ struct qstr entry;
+#endif
+
+ if (de->hash_code != namehash)
+ return false;
+
+#ifdef CONFIG_UNICODE
+ entry.name = d->filename[bit_pos];
+ entry.len = de->name_len;
+
+ if (sbi->s_encoding && IS_CASEFOLDED(parent)) {
+ if (cf_str->name) {
+ struct qstr cf = {.name = cf_str->name,
+ .len = cf_str->len};
+ return !f2fs_ci_compare(parent, &cf, &entry, true);
+ }
+ return !f2fs_ci_compare(parent, fname->usr_fname, &entry,
+ false);
+ }
+#endif
+ if (fscrypt_match_name(fname, d->filename[bit_pos],
+ le16_to_cpu(de->name_len)))
+ return true;
+ return false;
+}
+
struct f2fs_dir_entry *f2fs_find_target_dentry(struct fscrypt_name *fname,
f2fs_hash_t namehash, int *max_slots,
struct f2fs_dentry_ptr *d)
{
struct f2fs_dir_entry *de;
+ struct fscrypt_str cf_str = { .name = NULL, .len = 0 };
unsigned long bit_pos = 0;
int max_len = 0;
+#ifdef CONFIG_UNICODE
+ f2fs_fname_setup_ci_filename(d->inode, fname->usr_fname, &cf_str);
+#endif
+
if (max_slots)
*max_slots = 0;
while (bit_pos < d->max) {
@@ -128,9 +229,7 @@
continue;
}
- if (de->hash_code == namehash &&
- fscrypt_match_name(fname, d->filename[bit_pos],
- le16_to_cpu(de->name_len)))
+ if (f2fs_match_name(d, de, fname, &cf_str, bit_pos, namehash))
goto found;
if (max_slots && max_len > *max_slots)
@@ -144,6 +243,10 @@
found:
if (max_slots && max_len > *max_slots)
*max_slots = max_len;
+
+#ifdef CONFIG_UNICODE
+ kvfree(cf_str.name);
+#endif
return de;
}
@@ -160,7 +263,7 @@
struct f2fs_dir_entry *de = NULL;
bool room = false;
int max_slots;
- f2fs_hash_t namehash = f2fs_dentry_hash(&name, fname);
+ f2fs_hash_t namehash = f2fs_dentry_hash(dir, &name, fname);
nbucket = dir_buckets(level, F2FS_I(dir)->i_dir_level);
nblock = bucket_blocks(level);
@@ -182,8 +285,8 @@
}
}
- de = find_in_block(dentry_page, fname, namehash, &max_slots,
- res_page);
+ de = find_in_block(dir, dentry_page, fname, namehash,
+ &max_slots, res_page);
if (de)
break;
@@ -221,9 +324,8 @@
max_depth = F2FS_I(dir)->i_current_depth;
if (unlikely(max_depth > MAX_DIR_HASH_DEPTH)) {
- f2fs_msg(F2FS_I_SB(dir)->sb, KERN_WARNING,
- "Corrupted max_depth of %lu: %u",
- dir->i_ino, max_depth);
+ f2fs_warn(F2FS_I_SB(dir), "Corrupted max_depth of %lu: %u",
+ dir->i_ino, max_depth);
max_depth = MAX_DIR_HASH_DEPTH;
f2fs_i_depth_write(dir, max_depth);
}
@@ -254,6 +356,14 @@
struct fscrypt_name fname;
int err;
+#ifdef CONFIG_UNICODE
+ if (f2fs_has_strict_mode(F2FS_I_SB(dir)) && IS_CASEFOLDED(dir) &&
+ utf8_validate(F2FS_I_SB(dir)->s_encoding, child)) {
+ *res_page = ERR_PTR(-EINVAL);
+ return NULL;
+ }
+#endif
+
err = fscrypt_setup_filename(dir, child, 1, &fname);
if (err) {
if (err == -ENOENT)
@@ -296,7 +406,7 @@
{
enum page_type type = f2fs_has_inline_dentry(dir) ? NODE : DATA;
lock_page(page);
- f2fs_wait_on_page_writeback(page, type, true);
+ f2fs_wait_on_page_writeback(page, type, true, true);
de->ino = cpu_to_le32(inode->i_ino);
set_de_type(de, inode->i_mode);
set_page_dirty(page);
@@ -310,7 +420,7 @@
{
struct f2fs_inode *ri;
- f2fs_wait_on_page_writeback(ipage, NODE, true);
+ f2fs_wait_on_page_writeback(ipage, NODE, true, true);
/* copy name info. to this inode page */
ri = F2FS_INODE(ipage);
@@ -388,7 +498,7 @@
if (err)
goto put_error;
- if ((f2fs_encrypted_inode(dir) || dummy_encrypt) &&
+ if ((IS_ENCRYPTED(dir) || dummy_encrypt) &&
f2fs_may_encrypt(inode)) {
err = fscrypt_inherit_context(dir, inode, page, false);
if (err)
@@ -402,7 +512,7 @@
if (new_name) {
init_dent_inode(new_name, page);
- if (f2fs_encrypted_inode(dir))
+ if (IS_ENCRYPTED(dir))
file_set_enc_name(inode);
}
@@ -508,7 +618,7 @@
level = 0;
slots = GET_DENTRY_SLOTS(new_name->len);
- dentry_hash = f2fs_dentry_hash(new_name, NULL);
+ dentry_hash = f2fs_dentry_hash(dir, new_name, NULL);
current_depth = F2FS_I(dir)->i_current_depth;
if (F2FS_I(dir)->chash == dentry_hash) {
@@ -553,7 +663,7 @@
++level;
goto start;
add_dentry:
- f2fs_wait_on_page_writeback(dentry_page, DATA, true);
+ f2fs_wait_on_page_writeback(dentry_page, DATA, true, true);
if (inode) {
down_write(&F2FS_I(inode)->i_sem);
@@ -572,6 +682,11 @@
if (inode) {
f2fs_i_pino_write(inode, dir->i_ino);
+
+ /* synchronize inode page's data from inode cache */
+ if (is_inode_flag_set(inode, FI_NEW_INODE))
+ f2fs_update_inode(inode, page);
+
f2fs_put_page(page, 1);
}
@@ -658,9 +773,9 @@
f2fs_put_page(page, 1);
clear_inode_flag(inode, FI_NEW_INODE);
+ f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
fail:
up_write(&F2FS_I(inode)->i_sem);
- f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
return err;
}
@@ -708,7 +823,7 @@
return f2fs_delete_inline_entry(dentry, page, dir, inode);
lock_page(page);
- f2fs_wait_on_page_writeback(page, DATA, true);
+ f2fs_wait_on_page_writeback(page, DATA, true, true);
dentry_blk = page_address(page);
bit_pos = dentry - dentry_blk->dentry;
@@ -729,10 +844,11 @@
if (bit_pos == NR_DENTRY_IN_BLOCK &&
!f2fs_truncate_hole(dir, page->index, page->index + 1)) {
- f2fs_clear_radix_tree_dirty_tag(page);
+ f2fs_clear_page_cache_dirty_tag(page);
clear_page_dirty_for_io(page);
- ClearPagePrivate(page);
+ f2fs_clear_page_private(page);
ClearPageUptodate(page);
+ clear_cold_data(page);
inode_dec_dirty_pages(dir);
f2fs_remove_dirty_inode(dir);
}
@@ -784,9 +900,15 @@
struct f2fs_dir_entry *de = NULL;
struct fscrypt_str de_name = FSTR_INIT(NULL, 0);
struct f2fs_sb_info *sbi = F2FS_I_SB(d->inode);
+ struct blk_plug plug;
+ bool readdir_ra = sbi->readdir_ra == 1;
+ int err = 0;
bit_pos = ((unsigned long)ctx->pos % d->max);
+ if (readdir_ra)
+ blk_start_plug(&plug);
+
while (bit_pos < d->max) {
bit_pos = find_next_bit_le(d->bitmap, d->max, bit_pos);
if (bit_pos >= d->max)
@@ -796,6 +918,10 @@
if (de->name_len == 0) {
bit_pos++;
ctx->pos = start_pos + bit_pos;
+ printk_ratelimited(
+ "%s, invalid namelen(0), ino:%u, run fsck to fix.",
+ KERN_WARNING, le32_to_cpu(de->ino));
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
continue;
}
@@ -804,31 +930,45 @@
de_name.name = d->filename[bit_pos];
de_name.len = le16_to_cpu(de->name_len);
- if (f2fs_encrypted_inode(d->inode)) {
+ /* check memory boundary before moving forward */
+ bit_pos += GET_DENTRY_SLOTS(le16_to_cpu(de->name_len));
+ if (unlikely(bit_pos > d->max ||
+ le16_to_cpu(de->name_len) > F2FS_NAME_LEN)) {
+ f2fs_warn(sbi, "%s: corrupted namelen=%d, run fsck to fix.",
+ __func__, le16_to_cpu(de->name_len));
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+ err = -EFSCORRUPTED;
+ goto out;
+ }
+
+ if (IS_ENCRYPTED(d->inode)) {
int save_len = fstr->len;
- int err;
err = fscrypt_fname_disk_to_usr(d->inode,
- (u32)de->hash_code, 0,
- &de_name, fstr);
+ (u32)le32_to_cpu(de->hash_code),
+ 0, &de_name, fstr);
if (err)
- return err;
+ goto out;
de_name = *fstr;
fstr->len = save_len;
}
if (!dir_emit(ctx, de_name.name, de_name.len,
- le32_to_cpu(de->ino), d_type))
- return 1;
+ le32_to_cpu(de->ino), d_type)) {
+ err = 1;
+ goto out;
+ }
- if (sbi->readdir_ra == 1)
+ if (readdir_ra)
f2fs_ra_node_page(sbi, le32_to_cpu(de->ino));
- bit_pos += GET_DENTRY_SLOTS(le16_to_cpu(de->name_len));
ctx->pos = start_pos + bit_pos;
}
- return 0;
+out:
+ if (readdir_ra)
+ blk_finish_plug(&plug);
+ return err;
}
static int f2fs_readdir(struct file *file, struct dir_context *ctx)
@@ -844,7 +984,7 @@
struct fscrypt_str fstr = FSTR_INIT(NULL, 0);
int err = 0;
- if (f2fs_encrypted_inode(inode)) {
+ if (IS_ENCRYPTED(inode)) {
err = fscrypt_get_encryption_info(inode);
if (err && err != -ENOKEY)
goto out;
@@ -873,7 +1013,7 @@
page_cache_sync_readahead(inode->i_mapping, ra, file, n,
min(npages - n, (pgoff_t)MAX_DIR_RA_PAGES));
- dentry_page = f2fs_get_lock_data_page(inode, n, false);
+ dentry_page = f2fs_find_data_page(inode, n);
if (IS_ERR(dentry_page)) {
err = PTR_ERR(dentry_page);
if (err == -ENOENT) {
@@ -891,11 +1031,11 @@
err = f2fs_fill_dentries(ctx, &d,
n * NR_DENTRY_IN_BLOCK, &fstr);
if (err) {
- f2fs_put_page(dentry_page, 1);
+ f2fs_put_page(dentry_page, 0);
break;
}
- f2fs_put_page(dentry_page, 1);
+ f2fs_put_page(dentry_page, 0);
}
out_free:
fscrypt_fname_free_buffer(&fstr);
@@ -906,7 +1046,7 @@
static int f2fs_dir_open(struct inode *inode, struct file *filp)
{
- if (f2fs_encrypted_inode(inode))
+ if (IS_ENCRYPTED(inode))
return fscrypt_get_encryption_info(inode) ? -EACCES : 0;
return 0;
}
@@ -922,3 +1062,50 @@
.compat_ioctl = f2fs_compat_ioctl,
#endif
};
+
+#ifdef CONFIG_UNICODE
+static int f2fs_d_compare(const struct dentry *dentry, unsigned int len,
+ const char *str, const struct qstr *name)
+{
+ struct qstr qstr = {.name = str, .len = len };
+
+ if (!IS_CASEFOLDED(dentry->d_parent->d_inode)) {
+ if (len != name->len)
+ return -1;
+ return memcmp(str, name, len);
+ }
+
+ return f2fs_ci_compare(dentry->d_parent->d_inode, name, &qstr, false);
+}
+
+static int f2fs_d_hash(const struct dentry *dentry, struct qstr *str)
+{
+ struct f2fs_sb_info *sbi = F2FS_SB(dentry->d_sb);
+ const struct unicode_map *um = sbi->s_encoding;
+ unsigned char *norm;
+ int len, ret = 0;
+
+ if (!IS_CASEFOLDED(dentry->d_inode))
+ return 0;
+
+ norm = f2fs_kmalloc(sbi, PATH_MAX, GFP_ATOMIC);
+ if (!norm)
+ return -ENOMEM;
+
+ len = utf8_casefold(um, str, norm, PATH_MAX);
+ if (len < 0) {
+ if (f2fs_has_strict_mode(sbi))
+ ret = -EINVAL;
+ goto out;
+ }
+ str->hash = full_name_hash(dentry, norm, len);
+out:
+ kvfree(norm);
+ return ret;
+}
+
+const struct dentry_operations f2fs_dentry_ops = {
+ .d_hash = f2fs_d_hash,
+ .d_compare = f2fs_d_compare,
+};
+#endif
diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c
index a70cd25..e600784 100644
--- a/fs/f2fs/extent_cache.c
+++ b/fs/f2fs/extent_cache.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* f2fs extent cache support
*
@@ -5,10 +6,6 @@
* Copyright (c) 2015 Samsung Electronics
* Authors: Jaegeuk Kim <jaegeuk@kernel.org>
* Chao Yu <chao2.yu@samsung.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
*/
#include <linux/fs.h>
@@ -30,10 +27,10 @@
return NULL;
}
-static struct rb_entry *__lookup_rb_tree_slow(struct rb_root *root,
+static struct rb_entry *__lookup_rb_tree_slow(struct rb_root_cached *root,
unsigned int ofs)
{
- struct rb_node *node = root->rb_node;
+ struct rb_node *node = root->rb_root.rb_node;
struct rb_entry *re;
while (node) {
@@ -49,7 +46,7 @@
return NULL;
}
-struct rb_entry *f2fs_lookup_rb_tree(struct rb_root *root,
+struct rb_entry *f2fs_lookup_rb_tree(struct rb_root_cached *root,
struct rb_entry *cached_re, unsigned int ofs)
{
struct rb_entry *re;
@@ -62,22 +59,25 @@
}
struct rb_node **f2fs_lookup_rb_tree_for_insert(struct f2fs_sb_info *sbi,
- struct rb_root *root, struct rb_node **parent,
- unsigned int ofs)
+ struct rb_root_cached *root,
+ struct rb_node **parent,
+ unsigned int ofs, bool *leftmost)
{
- struct rb_node **p = &root->rb_node;
+ struct rb_node **p = &root->rb_root.rb_node;
struct rb_entry *re;
while (*p) {
*parent = *p;
re = rb_entry(*parent, struct rb_entry, rb_node);
- if (ofs < re->ofs)
+ if (ofs < re->ofs) {
p = &(*p)->rb_left;
- else if (ofs >= re->ofs + re->len)
+ } else if (ofs >= re->ofs + re->len) {
p = &(*p)->rb_right;
- else
+ *leftmost = false;
+ } else {
f2fs_bug_on(sbi, 1);
+ }
}
return p;
@@ -92,16 +92,16 @@
* in order to simpfy the insertion after.
* tree must stay unchanged between lookup and insertion.
*/
-struct rb_entry *f2fs_lookup_rb_tree_ret(struct rb_root *root,
+struct rb_entry *f2fs_lookup_rb_tree_ret(struct rb_root_cached *root,
struct rb_entry *cached_re,
unsigned int ofs,
struct rb_entry **prev_entry,
struct rb_entry **next_entry,
struct rb_node ***insert_p,
struct rb_node **insert_parent,
- bool force)
+ bool force, bool *leftmost)
{
- struct rb_node **pnode = &root->rb_node;
+ struct rb_node **pnode = &root->rb_root.rb_node;
struct rb_node *parent = NULL, *tmp_node;
struct rb_entry *re = cached_re;
@@ -110,7 +110,7 @@
*prev_entry = NULL;
*next_entry = NULL;
- if (RB_EMPTY_ROOT(root))
+ if (RB_EMPTY_ROOT(&root->rb_root))
return NULL;
if (re) {
@@ -118,16 +118,22 @@
goto lookup_neighbors;
}
+ if (leftmost)
+ *leftmost = true;
+
while (*pnode) {
parent = *pnode;
re = rb_entry(*pnode, struct rb_entry, rb_node);
- if (ofs < re->ofs)
+ if (ofs < re->ofs) {
pnode = &(*pnode)->rb_left;
- else if (ofs >= re->ofs + re->len)
+ } else if (ofs >= re->ofs + re->len) {
pnode = &(*pnode)->rb_right;
- else
+ if (leftmost)
+ *leftmost = false;
+ } else {
goto lookup_neighbors;
+ }
}
*insert_p = pnode;
@@ -160,10 +166,10 @@
}
bool f2fs_check_rb_tree_consistence(struct f2fs_sb_info *sbi,
- struct rb_root *root)
+ struct rb_root_cached *root)
{
#ifdef CONFIG_F2FS_CHECK_FS
- struct rb_node *cur = rb_first(root), *next;
+ struct rb_node *cur = rb_first_cached(root), *next;
struct rb_entry *cur_re, *next_re;
if (!cur)
@@ -178,10 +184,9 @@
next_re = rb_entry(next, struct rb_entry, rb_node);
if (cur_re->ofs + cur_re->len > next_re->ofs) {
- f2fs_msg(sbi->sb, KERN_INFO, "inconsistent rbtree, "
- "cur(%u, %u) next(%u, %u)",
- cur_re->ofs, cur_re->len,
- next_re->ofs, next_re->len);
+ f2fs_info(sbi, "inconsistent rbtree, cur(%u, %u) next(%u, %u)",
+ cur_re->ofs, cur_re->len,
+ next_re->ofs, next_re->len);
return false;
}
@@ -196,7 +201,8 @@
static struct extent_node *__attach_extent_node(struct f2fs_sb_info *sbi,
struct extent_tree *et, struct extent_info *ei,
- struct rb_node *parent, struct rb_node **p)
+ struct rb_node *parent, struct rb_node **p,
+ bool leftmost)
{
struct extent_node *en;
@@ -209,7 +215,7 @@
en->et = et;
rb_link_node(&en->rb_node, parent, p);
- rb_insert_color(&en->rb_node, &et->root);
+ rb_insert_color_cached(&en->rb_node, &et->root, leftmost);
atomic_inc(&et->node_cnt);
atomic_inc(&sbi->total_ext_node);
return en;
@@ -218,7 +224,7 @@
static void __detach_extent_node(struct f2fs_sb_info *sbi,
struct extent_tree *et, struct extent_node *en)
{
- rb_erase(&en->rb_node, &et->root);
+ rb_erase_cached(&en->rb_node, &et->root);
atomic_dec(&et->node_cnt);
atomic_dec(&sbi->total_ext_node);
@@ -257,7 +263,7 @@
f2fs_radix_tree_insert(&sbi->extent_tree_root, ino, et);
memset(et, 0, sizeof(struct extent_tree));
et->ino = ino;
- et->root = RB_ROOT;
+ et->root = RB_ROOT_CACHED;
et->cached_en = NULL;
rwlock_init(&et->lock);
INIT_LIST_HEAD(&et->list);
@@ -278,10 +284,10 @@
static struct extent_node *__init_extent_tree(struct f2fs_sb_info *sbi,
struct extent_tree *et, struct extent_info *ei)
{
- struct rb_node **p = &et->root.rb_node;
+ struct rb_node **p = &et->root.rb_root.rb_node;
struct extent_node *en;
- en = __attach_extent_node(sbi, et, ei, NULL, p);
+ en = __attach_extent_node(sbi, et, ei, NULL, p, true);
if (!en)
return NULL;
@@ -297,7 +303,7 @@
struct extent_node *en;
unsigned int count = atomic_read(&et->node_cnt);
- node = rb_first(&et->root);
+ node = rb_first_cached(&et->root);
while (node) {
next = rb_next(node);
en = rb_entry(node, struct extent_node, rb_node);
@@ -455,7 +461,8 @@
static struct extent_node *__insert_extent_tree(struct f2fs_sb_info *sbi,
struct extent_tree *et, struct extent_info *ei,
struct rb_node **insert_p,
- struct rb_node *insert_parent)
+ struct rb_node *insert_parent,
+ bool leftmost)
{
struct rb_node **p;
struct rb_node *parent = NULL;
@@ -467,9 +474,12 @@
goto do_insert;
}
- p = f2fs_lookup_rb_tree_for_insert(sbi, &et->root, &parent, ei->fofs);
+ leftmost = true;
+
+ p = f2fs_lookup_rb_tree_for_insert(sbi, &et->root, &parent,
+ ei->fofs, &leftmost);
do_insert:
- en = __attach_extent_node(sbi, et, ei, parent, p);
+ en = __attach_extent_node(sbi, et, ei, parent, p, leftmost);
if (!en)
return NULL;
@@ -495,6 +505,7 @@
unsigned int end = fofs + len;
unsigned int pos = (unsigned int)fofs;
bool updated = false;
+ bool leftmost = false;
if (!et)
return;
@@ -522,7 +533,8 @@
(struct rb_entry *)et->cached_en, fofs,
(struct rb_entry **)&prev_en,
(struct rb_entry **)&next_en,
- &insert_p, &insert_parent, false);
+ &insert_p, &insert_parent, false,
+ &leftmost);
if (!en)
en = next_en;
@@ -549,7 +561,7 @@
end - dei.fofs + dei.blk,
org_end - end);
en1 = __insert_extent_tree(sbi, et, &ei,
- NULL, NULL);
+ NULL, NULL, true);
next_en = en1;
} else {
en->ei.fofs = end;
@@ -590,7 +602,7 @@
set_extent_info(&ei, fofs, blkaddr, len);
if (!__try_merge_extent_node(sbi, et, &ei, prev_en, next_en))
__insert_extent_tree(sbi, et, &ei,
- insert_p, insert_parent);
+ insert_p, insert_parent, leftmost);
/* give up extent_cache, if split and small updates happen */
if (dei.len >= 1 &&
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index ecb7351..4024790 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -1,16 +1,14 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* fs/f2fs/f2fs.h
*
* Copyright (c) 2012 Samsung Electronics Co., Ltd.
* http://www.samsung.com/
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
*/
#ifndef _LINUX_F2FS_H
#define _LINUX_F2FS_H
+#include <linux/uio.h>
#include <linux/types.h>
#include <linux/page-flags.h>
#include <linux/buffer_head.h>
@@ -26,8 +24,8 @@
#include <linux/quotaops.h>
#include <crypto/hash.h>
-#define __FS_HAS_ENCRYPTION IS_ENABLED(CONFIG_F2FS_FS_ENCRYPTION)
#include <linux/fscrypt.h>
+#include <linux/fsverity.h>
#ifdef CONFIG_F2FS_CHECK_FS
#define f2fs_bug_on(sbi, condition) BUG_ON(condition)
@@ -53,9 +51,10 @@
FAULT_DIR_DEPTH,
FAULT_EVICT_INODE,
FAULT_TRUNCATE,
- FAULT_IO,
+ FAULT_READ_IO,
FAULT_CHECKPOINT,
FAULT_DISCARD,
+ FAULT_WRITE_IO,
FAULT_MAX,
};
@@ -68,7 +67,7 @@
unsigned int inject_type;
};
-extern char *f2fs_fault_name[FAULT_MAX];
+extern const char *f2fs_fault_name[FAULT_MAX];
#define IS_FAULT_SET(fi, type) ((fi)->inject_type & (1 << (type)))
#endif
@@ -100,6 +99,7 @@
#define F2FS_MOUNT_QUOTA 0x00400000
#define F2FS_MOUNT_INLINE_XATTR_SIZE 0x00800000
#define F2FS_MOUNT_RESERVE_ROOT 0x01000000
+#define F2FS_MOUNT_DISABLE_CHECKPOINT 0x02000000
#define F2FS_OPTION(sbi) ((sbi)->mount_opt)
#define clear_opt(sbi, option) (F2FS_OPTION(sbi).opt &= ~F2FS_MOUNT_##option)
@@ -137,6 +137,9 @@
int alloc_mode; /* segment allocation policy */
int fsync_mode; /* fsync policy */
bool test_dummy_encryption; /* test dummy encryption */
+ block_t unusable_cap; /* Amount of space allowed to be
+ * unusable when disabling checkpoint
+ */
};
#define F2FS_FEATURE_ENCRYPT 0x0001
@@ -149,14 +152,17 @@
#define F2FS_FEATURE_QUOTA_INO 0x0080
#define F2FS_FEATURE_INODE_CRTIME 0x0100
#define F2FS_FEATURE_LOST_FOUND 0x0200
-#define F2FS_FEATURE_VERITY 0x0400 /* reserved */
+#define F2FS_FEATURE_VERITY 0x0400
+#define F2FS_FEATURE_SB_CHKSUM 0x0800
+#define F2FS_FEATURE_CASEFOLD 0x1000
-#define F2FS_HAS_FEATURE(sb, mask) \
- ((F2FS_SB(sb)->raw_super->feature & cpu_to_le32(mask)) != 0)
-#define F2FS_SET_FEATURE(sb, mask) \
- (F2FS_SB(sb)->raw_super->feature |= cpu_to_le32(mask))
-#define F2FS_CLEAR_FEATURE(sb, mask) \
- (F2FS_SB(sb)->raw_super->feature &= ~cpu_to_le32(mask))
+#define __F2FS_HAS_FEATURE(raw_super, mask) \
+ ((raw_super->feature & cpu_to_le32(mask)) != 0)
+#define F2FS_HAS_FEATURE(sbi, mask) __F2FS_HAS_FEATURE(sbi->raw_super, mask)
+#define F2FS_SET_FEATURE(sbi, mask) \
+ (sbi->raw_super->feature |= cpu_to_le32(mask))
+#define F2FS_CLEAR_FEATURE(sbi, mask) \
+ (sbi->raw_super->feature &= ~cpu_to_le32(mask))
/*
* Default values for user and/or group using reserved blocks
@@ -178,6 +184,7 @@
#define CP_RECOVERY 0x00000008
#define CP_DISCARD 0x00000010
#define CP_TRIMMED 0x00000020
+#define CP_PAUSE 0x00000040
#define MAX_DISCARD_BLOCKS(sbi) BLKS_PER_SEC(sbi)
#define DEF_MAX_DISCARD_REQUEST 8 /* issue 8 discards per round */
@@ -187,6 +194,9 @@
#define DEF_DISCARD_URGENT_UTIL 80 /* do more discard over 80% */
#define DEF_CP_INTERVAL 60 /* 60 secs */
#define DEF_IDLE_INTERVAL 5 /* 5 secs */
+#define DEF_DISABLE_INTERVAL 5 /* 5 secs */
+#define DEF_DISABLE_QUICK_INTERVAL 1 /* 1 secs */
+#define DEF_UMOUNT_DISCARD_TIMEOUT 5 /* 5 secs */
struct cp_control {
int reason;
@@ -203,8 +213,16 @@
META_NAT,
META_SIT,
META_SSA,
+ META_MAX,
META_POR,
- DATA_GENERIC,
+ DATA_GENERIC, /* check range only */
+ DATA_GENERIC_ENHANCE, /* strong check on range and segment bitmap */
+ DATA_GENERIC_ENHANCE_READ, /*
+ * strong check on range and segment
+ * bitmap but no warning due to race
+ * condition of read on truncated area
+ * by extent_cache
+ */
META_GENERIC,
};
@@ -249,7 +267,7 @@
/* max discard pend list number */
#define MAX_PLIST_NUM 512
#define plist_idx(blk_num) ((blk_num) >= MAX_PLIST_NUM ? \
- (MAX_PLIST_NUM - 1) : (blk_num - 1))
+ (MAX_PLIST_NUM - 1) : ((blk_num) - 1))
enum {
D_PREP, /* initial */
@@ -280,7 +298,7 @@
struct block_device *bdev; /* bdev */
unsigned short ref; /* reference count */
unsigned char state; /* state */
- unsigned char issuing; /* issuing discard */
+ unsigned char queued; /* queued discard */
int error; /* bio error */
spinlock_t lock; /* for state/bio_ref updating */
unsigned short bio_ref; /* bio reference count */
@@ -305,6 +323,7 @@
bool sync; /* submit discard with REQ_SYNC flag */
bool ordered; /* issue discard by lba order */
unsigned int granularity; /* discard granularity */
+ int timeout; /* discard timeout for put_super */
};
struct discard_cmd_control {
@@ -322,9 +341,9 @@
unsigned int undiscard_blks; /* # of undiscard blocks */
unsigned int next_pos; /* next discard position */
atomic_t issued_discard; /* # of issued discard */
- atomic_t issing_discard; /* # of issing discard */
+ atomic_t queued_discard; /* # of queued discard */
atomic_t discard_cmd_cnt; /* # of cached cmd count */
- struct rb_root root; /* root of discard rb-tree */
+ struct rb_root_cached root; /* root of discard rb-tree */
bool rbtree_check; /* config for consistence check */
};
@@ -398,6 +417,10 @@
#define F2FS_IOC_SET_PIN_FILE _IOW(F2FS_IOCTL_MAGIC, 13, __u32)
#define F2FS_IOC_GET_PIN_FILE _IOR(F2FS_IOCTL_MAGIC, 14, __u32)
#define F2FS_IOC_PRECACHE_EXTENTS _IO(F2FS_IOCTL_MAGIC, 15)
+#define F2FS_IOC_RESIZE_FS _IOW(F2FS_IOCTL_MAGIC, 16, __u64)
+
+#define F2FS_IOC_GET_VOLUME_NAME FS_IOC_GETFSLABEL
+#define F2FS_IOC_SET_VOLUME_NAME FS_IOC_SETFSLABEL
#define F2FS_IOC_SET_ENCRYPTION_POLICY FS_IOC_SET_ENCRYPTION_POLICY
#define F2FS_IOC_GET_ENCRYPTION_POLICY FS_IOC_GET_ENCRYPTION_POLICY
@@ -412,6 +435,7 @@
#define F2FS_GOING_DOWN_METASYNC 0x1 /* going down with metadata */
#define F2FS_GOING_DOWN_NOSYNC 0x2 /* going down */
#define F2FS_GOING_DOWN_METAFLUSH 0x3 /* going down with meta flush */
+#define F2FS_GOING_DOWN_NEED_FSCK 0x4 /* going down to trigger fsck */
#if defined(__KERNEL__) && defined(CONFIG_COMPAT)
/*
@@ -450,7 +474,6 @@
/* for inline stuff */
#define DEF_INLINE_RESERVED_SIZE 1
-#define DEF_MIN_INLINE_SIZE 1
static inline int get_extra_isize(struct inode *inode);
static inline int get_inline_xattr_addrs(struct inode *inode);
#define MAX_INLINE_DATA(inode) (sizeof(__le32) * \
@@ -462,8 +485,8 @@
#define NR_INLINE_DENTRY(inode) (MAX_INLINE_DATA(inode) * BITS_PER_BYTE / \
((SIZE_OF_DIR_ENTRY + F2FS_SLOT_LEN) * \
BITS_PER_BYTE + 1))
-#define INLINE_DENTRY_BITMAP_SIZE(inode) ((NR_INLINE_DENTRY(inode) + \
- BITS_PER_BYTE - 1) / BITS_PER_BYTE)
+#define INLINE_DENTRY_BITMAP_SIZE(inode) \
+ DIV_ROUND_UP(NR_INLINE_DENTRY(inode), BITS_PER_BYTE)
#define INLINE_RESERVED_SIZE(inode) (MAX_INLINE_DATA(inode) - \
((SIZE_OF_DIR_ENTRY + F2FS_SLOT_LEN) * \
NR_INLINE_DENTRY(inode) + \
@@ -527,6 +550,9 @@
#define DEFAULT_RETRY_IO_COUNT 8 /* maximum retry read IO count */
+/* maximum retry quota flush count */
+#define DEFAULT_RETRY_QUOTA_FLUSH_COUNT 8
+
#define F2FS_LINK_MAX 0xffffffff /* maximum link count per file */
#define MAX_DIR_RA_PAGES 4 /* maximum ra pages of dir */
@@ -550,23 +576,15 @@
};
struct extent_node {
- struct rb_node rb_node;
- union {
- struct {
- unsigned int fofs;
- unsigned int len;
- u32 blk;
- };
- struct extent_info ei; /* extent info */
-
- };
+ struct rb_node rb_node; /* rb node located in rb-tree */
+ struct extent_info ei; /* extent info */
struct list_head list; /* node in global extent list of sbi */
struct extent_tree *et; /* extent tree pointer */
};
struct extent_tree {
nid_t ino; /* inode number */
- struct rb_root root; /* root of extent info rb-tree */
+ struct rb_root_cached root; /* root of extent info rb-tree */
struct extent_node *cached_en; /* recently accessed extent node */
struct extent_info largest; /* largested extent info */
struct list_head list; /* to be used by sbi->zombie_list */
@@ -594,6 +612,7 @@
pgoff_t *m_next_pgofs; /* point next possible non-hole pgofs */
pgoff_t *m_next_extent; /* point to next possible extent */
int m_seg_type;
+ bool m_may_create; /* indicate it is from write path */
};
/* for flag in get_data_block */
@@ -601,6 +620,7 @@
F2FS_GET_BLOCK_DEFAULT,
F2FS_GET_BLOCK_FIEMAP,
F2FS_GET_BLOCK_BMAP,
+ F2FS_GET_BLOCK_DIO,
F2FS_GET_BLOCK_PRE_DIO,
F2FS_GET_BLOCK_PRE_AIO,
F2FS_GET_BLOCK_PRECACHE,
@@ -615,7 +635,7 @@
#define FADVISE_ENC_NAME_BIT 0x08
#define FADVISE_KEEP_SIZE_BIT 0x10
#define FADVISE_HOT_BIT 0x20
-#define FADVISE_VERITY_BIT 0x40 /* reserved */
+#define FADVISE_VERITY_BIT 0x40
#define FADVISE_MODIFIABLE_BITS (FADVISE_COLD_BIT | FADVISE_HOT_BIT)
@@ -635,6 +655,8 @@
#define file_is_hot(inode) is_file(inode, FADVISE_HOT_BIT)
#define file_set_hot(inode) set_file(inode, FADVISE_HOT_BIT)
#define file_clear_hot(inode) clear_file(inode, FADVISE_HOT_BIT)
+#define file_is_verity(inode) is_file(inode, FADVISE_VERITY_BIT)
+#define file_set_verity(inode) set_file(inode, FADVISE_VERITY_BIT)
#define DEF_DIR_LEVEL 0
@@ -881,7 +903,7 @@
struct task_struct *f2fs_issue_flush; /* flush thread */
wait_queue_head_t flush_wait_queue; /* waiting queue for wake-up */
atomic_t issued_flush; /* # of issued flushes */
- atomic_t issing_flush; /* # of issing flushes */
+ atomic_t queued_flush; /* # of queued flushes */
struct llist_head issue_list; /* list for command issue */
struct llist_node *dispatch_list; /* list for command dispatch */
};
@@ -945,6 +967,11 @@
F2FS_DIRTY_IMETA,
F2FS_WB_CP_DATA,
F2FS_WB_DATA,
+ F2FS_RD_DATA,
+ F2FS_RD_NODE,
+ F2FS_RD_META,
+ F2FS_DIO_WRITE,
+ F2FS_DIO_READ,
NR_COUNT_TYPE,
};
@@ -1032,10 +1059,12 @@
bool submitted; /* indicate IO submission */
int need_lock; /* indicate we need to lock cp_rwsem */
bool in_list; /* indicate fio is in io_list */
- bool is_meta; /* indicate borrow meta inode mapping or not */
+ bool is_por; /* indicate IO is from recovery or not */
bool retry; /* need to reallocate block address */
enum iostat_type io_type; /* io type */
struct writeback_control *io_wbc; /* writeback control */
+ struct bio **bio; /* bio for ipu */
+ sector_t *last_block; /* last block number in bio */
unsigned char version; /* version of the node */
};
@@ -1059,8 +1088,8 @@
block_t start_blk;
block_t end_blk;
#ifdef CONFIG_BLK_DEV_ZONED
- unsigned int nr_blkz; /* Total number of zones */
- u8 *blkz_type; /* Array of zones type */
+ unsigned int nr_blkz; /* Total number of zones */
+ unsigned long *blkz_seq; /* Bitmap indicating sequential zones */
#endif
};
@@ -1090,11 +1119,21 @@
SBI_NEED_CP, /* need to checkpoint */
SBI_IS_SHUTDOWN, /* shutdown by ioctl */
SBI_IS_RECOVERED, /* recovered orphan/data */
+ SBI_CP_DISABLED, /* CP was disabled last mount */
+ SBI_CP_DISABLED_QUICK, /* CP was disabled quickly */
+ SBI_QUOTA_NEED_FLUSH, /* need to flush quota info in CP */
+ SBI_QUOTA_SKIP_FLUSH, /* skip flushing quota in current CP */
+ SBI_QUOTA_NEED_REPAIR, /* quota file may be corrupted */
+ SBI_IS_RESIZEFS, /* resizefs is in process */
};
enum {
CP_TIME,
REQ_TIME,
+ DISCARD_TIME,
+ GC_TIME,
+ DISABLE_TIME,
+ UMOUNT_DISCARD_TIMEOUT,
MAX_TIME,
};
@@ -1122,7 +1161,7 @@
FSYNC_MODE_NOBARRIER, /* fsync behaves nobarrier based on posix */
};
-#ifdef CONFIG_F2FS_FS_ENCRYPTION
+#ifdef CONFIG_FS_ENCRYPTION
#define DUMMY_ENCRYPTION_ENABLED(sbi) \
(unlikely(F2FS_OPTION(sbi).test_dummy_encryption))
#else
@@ -1137,6 +1176,10 @@
int valid_super_block; /* valid super block no */
unsigned long s_flag; /* flags for sbi */
struct mutex writepages; /* mutex for writepages() */
+#ifdef CONFIG_UNICODE
+ struct unicode_map *s_encoding;
+ __u16 s_encoding_flags;
+#endif
#ifdef CONFIG_BLK_DEV_ZONED
unsigned int blocks_per_blkz; /* F2FS blocks per zone */
@@ -1152,8 +1195,6 @@
/* for bio operations */
struct f2fs_bio_info *write_io[NR_PAGE_TYPE]; /* for write bios */
- struct mutex wio_mutex[NR_PAGE_TYPE - 1][NR_TEMP_TYPE];
- /* bio ordering for NODE/DATA */
/* keep migration IO order for LFS mode */
struct rw_semaphore io_order_lock;
mempool_t *write_io_dummy; /* Dummy pages */
@@ -1184,6 +1225,7 @@
/* for inode management */
struct list_head inode_list[NR_INODE_TYPE]; /* dirty inode list */
spinlock_t inode_lock[NR_INODE_TYPE]; /* for dirty inode list lock */
+ struct mutex flush_lock; /* for flush exclusion */
/* for extent tree cache */
struct radix_tree_root extent_tree_root;/* cache extent cache entries */
@@ -1207,11 +1249,11 @@
unsigned int segs_per_sec; /* segments per section */
unsigned int secs_per_zone; /* sections per zone */
unsigned int total_sections; /* total section count */
+ struct mutex resize_mutex; /* for resize exclusion */
unsigned int total_node_count; /* total node block count */
unsigned int total_valid_node_count; /* valid node block count */
loff_t max_file_blocks; /* max block index of file */
int dir_level; /* directory level */
- unsigned int trigger_ssr_threshold; /* threshold to trigger ssr */
int readdir_ra; /* readahead inode in readdir */
block_t user_block_count; /* # of user blocks */
@@ -1221,9 +1263,11 @@
block_t reserved_blocks; /* configurable reserved blocks */
block_t current_reserved_blocks; /* current reserved blocks */
- unsigned int nquota_files; /* # of quota sysfile */
+ /* Additional tracking for no checkpoint mode */
+ block_t unusable_block_count; /* # of blocks saved by last cp */
- u32 s_next_generation; /* for NFS support */
+ unsigned int nquota_files; /* # of quota sysfile */
+ struct rw_semaphore quota_sem; /* blocking cp for flags */
/* # of pages, see count_type */
atomic_t nr_pages[NR_COUNT_TYPE];
@@ -1243,6 +1287,7 @@
struct f2fs_gc_kthread *gc_thread; /* GC thread */
unsigned int cur_victim_sec; /* current victim section num */
unsigned int gc_mode; /* current GC state */
+ unsigned int next_victim_seg[2]; /* next segment in victim section */
/* for skip statistic */
unsigned long long skipped_atomic_files[2]; /* FG_GC and BG_GC */
unsigned long long skipped_gc_rwsem; /* FG_GC only */
@@ -1252,6 +1297,8 @@
/* maximum # of trials to find a victim segment for SSR and GC */
unsigned int max_victim_search;
+ /* migration granularity of garbage collection, unit: segment */
+ unsigned int migration_granularity;
/*
* for stat information.
@@ -1259,6 +1306,7 @@
*/
#ifdef CONFIG_F2FS_STAT_FS
struct f2fs_stat_info *stat_info; /* FS status information */
+ atomic_t meta_count[META_MAX]; /* # of meta blocks */
unsigned int segment_count[2]; /* # of allocated segments */
unsigned int block_count[2]; /* # of allocated blocks */
atomic_t inplace_count; /* # of inplace update */
@@ -1274,6 +1322,8 @@
atomic_t max_aw_cnt; /* max # of atomic writes */
atomic_t max_vw_cnt; /* max # of volatile writes */
int bg_gc; /* background gc calls */
+ unsigned int io_skip_bggc; /* skip background gc for in-flight IO */
+ unsigned int other_skip_bggc; /* skip background gc for other reasons */
unsigned int ndirty_inode[NR_INODE_TYPE]; /* # of dirty inodes */
#endif
spinlock_t stat_lock; /* lock for stat operations */
@@ -1307,10 +1357,17 @@
__u32 s_chksum_seed;
};
+struct f2fs_private_dio {
+ struct inode *inode;
+ void *orig_private;
+ bio_end_io_t *orig_end_io;
+ bool write;
+};
+
#ifdef CONFIG_F2FS_FAULT_INJECTION
-#define f2fs_show_injection_info(type) \
- printk("%sF2FS-fs : inject %s in %s of %pF\n", \
- KERN_INFO, f2fs_fault_name[type], \
+#define f2fs_show_injection_info(type) \
+ printk_ratelimited("%sF2FS-fs : inject %s in %s of %pS\n", \
+ KERN_INFO, f2fs_fault_name[type], \
__func__, __builtin_return_address(0))
static inline bool time_to_inject(struct f2fs_sb_info *sbi, int type)
{
@@ -1337,6 +1394,17 @@
}
#endif
+/*
+ * Test if the mounted volume is a multi-device volume.
+ * - For a single regular disk volume, sbi->s_ndevs is 0.
+ * - For a single zoned disk volume, sbi->s_ndevs is 1.
+ * - For a multi-device volume, sbi->s_ndevs is always 2 or more.
+ */
+static inline bool f2fs_is_multi_device(struct f2fs_sb_info *sbi)
+{
+ return sbi->s_ndevs > 1;
+}
+
/* For write statistics. Suppose sector size is 512 bytes,
* and the return value is in kbytes. s is of struct f2fs_sb_info.
*/
@@ -1346,7 +1414,15 @@
static inline void f2fs_update_time(struct f2fs_sb_info *sbi, int type)
{
- sbi->last_time[type] = jiffies;
+ unsigned long now = jiffies;
+
+ sbi->last_time[type] = now;
+
+ /* DISCARD_TIME and GC_TIME are based on REQ_TIME */
+ if (type == REQ_TIME) {
+ sbi->last_time[DISCARD_TIME] = now;
+ sbi->last_time[GC_TIME] = now;
+ }
}
static inline bool f2fs_time_over(struct f2fs_sb_info *sbi, int type)
@@ -1356,16 +1432,18 @@
return time_after(jiffies, sbi->last_time[type] + interval);
}
-static inline bool is_idle(struct f2fs_sb_info *sbi)
+static inline unsigned int f2fs_time_to_wait(struct f2fs_sb_info *sbi,
+ int type)
{
- struct block_device *bdev = sbi->sb->s_bdev;
- struct request_queue *q = bdev_get_queue(bdev);
- struct request_list *rl = &q->root_rl;
+ unsigned long interval = sbi->interval_time[type] * HZ;
+ unsigned int wait_ms = 0;
+ long delta;
- if (rl->count[BLK_RW_SYNC] || rl->count[BLK_RW_ASYNC])
- return false;
+ delta = (sbi->last_time[type] + interval) - jiffies;
+ if (delta > 0)
+ wait_ms = jiffies_to_msecs(delta);
- return f2fs_time_over(sbi, REQ_TIME);
+ return wait_ms;
}
/*
@@ -1383,7 +1461,6 @@
BUG_ON(crypto_shash_descsize(sbi->s_chksum_driver) != sizeof(desc.ctx));
desc.shash.tfm = sbi->s_chksum_driver;
- desc.shash.flags = 0;
*(u32 *)desc.ctx = crc;
err = crypto_shash_update(&desc.shash, address, length);
@@ -1432,7 +1509,7 @@
static inline struct f2fs_sb_info *F2FS_P_SB(struct page *page)
{
- return F2FS_M_SB(page->mapping);
+ return F2FS_M_SB(page_file_mapping(page));
}
static inline struct f2fs_super_block *F2FS_RAW_SUPER(struct f2fs_sb_info *sbi)
@@ -1574,16 +1651,23 @@
static inline void disable_nat_bits(struct f2fs_sb_info *sbi, bool lock)
{
unsigned long flags;
+ unsigned char *nat_bits;
- set_sbi_flag(sbi, SBI_NEED_FSCK);
+ /*
+ * In order to re-enable nat_bits we need to call fsck.f2fs by
+ * set_sbi_flag(sbi, SBI_NEED_FSCK). But it may give huge cost,
+ * so let's rely on regular fsck or unclean shutdown.
+ */
if (lock)
spin_lock_irqsave(&sbi->cp_lock, flags);
__clear_ckpt_flags(F2FS_CKPT(sbi), CP_NAT_BITS_FLAG);
- kfree(NM_I(sbi)->nat_bits);
+ nat_bits = NM_I(sbi)->nat_bits;
NM_I(sbi)->nat_bits = NULL;
if (lock)
spin_unlock_irqrestore(&sbi->cp_lock, flags);
+
+ kvfree(nat_bits);
}
static inline bool enabled_nat_bits(struct f2fs_sb_info *sbi,
@@ -1690,7 +1774,7 @@
if (time_to_inject(sbi, FAULT_BLOCK)) {
f2fs_show_injection_info(FAULT_BLOCK);
release = *count;
- goto enospc;
+ goto release_quota;
}
/*
@@ -1706,7 +1790,12 @@
if (!__allow_reserved_blocks(sbi, inode, true))
avail_user_block_count -= F2FS_OPTION(sbi).root_reserved_blocks;
-
+ if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) {
+ if (avail_user_block_count > sbi->unusable_block_count)
+ avail_user_block_count -= sbi->unusable_block_count;
+ else
+ avail_user_block_count = 0;
+ }
if (unlikely(sbi->total_valid_block_count > avail_user_block_count)) {
diff = sbi->total_valid_block_count - avail_user_block_count;
if (diff > *count)
@@ -1730,10 +1819,25 @@
enospc:
percpu_counter_sub(&sbi->alloc_valid_block_count, release);
+release_quota:
dquot_release_reservation_block(inode, release);
return -ENOSPC;
}
+__printf(2, 3)
+void f2fs_printk(struct f2fs_sb_info *sbi, const char *fmt, ...);
+
+#define f2fs_err(sbi, fmt, ...) \
+ f2fs_printk(sbi, KERN_ERR fmt, ##__VA_ARGS__)
+#define f2fs_warn(sbi, fmt, ...) \
+ f2fs_printk(sbi, KERN_WARNING fmt, ##__VA_ARGS__)
+#define f2fs_notice(sbi, fmt, ...) \
+ f2fs_printk(sbi, KERN_NOTICE fmt, ##__VA_ARGS__)
+#define f2fs_info(sbi, fmt, ...) \
+ f2fs_printk(sbi, KERN_INFO fmt, ##__VA_ARGS__)
+#define f2fs_debug(sbi, fmt, ...) \
+ f2fs_printk(sbi, KERN_DEBUG fmt, ##__VA_ARGS__)
+
static inline void dec_valid_block_count(struct f2fs_sb_info *sbi,
struct inode *inode,
block_t count)
@@ -1742,13 +1846,20 @@
spin_lock(&sbi->stat_lock);
f2fs_bug_on(sbi, sbi->total_valid_block_count < (block_t) count);
- f2fs_bug_on(sbi, inode->i_blocks < sectors);
sbi->total_valid_block_count -= (block_t)count;
if (sbi->reserved_blocks &&
sbi->current_reserved_blocks < sbi->reserved_blocks)
sbi->current_reserved_blocks = min(sbi->reserved_blocks,
sbi->current_reserved_blocks + count);
spin_unlock(&sbi->stat_lock);
+ if (unlikely(inode->i_blocks < sectors)) {
+ f2fs_warn(sbi, "Inconsistent i_blocks, ino:%lu, iblocks:%llu, sectors:%llu",
+ inode->i_ino,
+ (unsigned long long)inode->i_blocks,
+ (unsigned long long)sectors);
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+ return;
+ }
f2fs_i_blocks_write(inode, count, false, true);
}
@@ -1756,11 +1867,12 @@
{
atomic_inc(&sbi->nr_pages[count_type]);
- if (count_type == F2FS_DIRTY_DATA || count_type == F2FS_INMEM_PAGES ||
- count_type == F2FS_WB_CP_DATA || count_type == F2FS_WB_DATA)
- return;
-
- set_sbi_flag(sbi, SBI_IS_DIRTY);
+ if (count_type == F2FS_DIRTY_DENTS ||
+ count_type == F2FS_DIRTY_NODES ||
+ count_type == F2FS_DIRTY_META ||
+ count_type == F2FS_DIRTY_QDATA ||
+ count_type == F2FS_DIRTY_IMETA)
+ set_sbi_flag(sbi, SBI_IS_DIRTY);
}
static inline void inode_inc_dirty_pages(struct inode *inode)
@@ -1845,7 +1957,11 @@
if (is_set_ckpt_flags(sbi, CP_LARGE_NAT_BITMAP_FLAG)) {
offset = (flag == SIT_BITMAP) ?
le32_to_cpu(ckpt->nat_ver_bitmap_bytesize) : 0;
- return &ckpt->sit_nat_version_bitmap + offset;
+ /*
+ * if large_nat_bitmap feature is enabled, leave checksum
+ * protection for all nat/sit bitmaps.
+ */
+ return &ckpt->sit_nat_version_bitmap + offset + sizeof(__le32);
}
if (__cp_payload(sbi) > 0) {
@@ -1892,13 +2008,19 @@
struct inode *inode, bool is_inode)
{
block_t valid_block_count;
- unsigned int valid_node_count;
- bool quota = inode && !is_inode;
+ unsigned int valid_node_count, user_block_count;
+ int err;
- if (quota) {
- int ret = dquot_reserve_block(inode, 1);
- if (ret)
- return ret;
+ if (is_inode) {
+ if (inode) {
+ err = dquot_alloc_inode(inode);
+ if (err)
+ return err;
+ }
+ } else {
+ err = dquot_reserve_block(inode, 1);
+ if (err)
+ return err;
}
if (time_to_inject(sbi, FAULT_BLOCK)) {
@@ -1913,8 +2035,11 @@
if (!__allow_reserved_blocks(sbi, inode, false))
valid_block_count += F2FS_OPTION(sbi).root_reserved_blocks;
+ user_block_count = sbi->user_block_count;
+ if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
+ user_block_count -= sbi->unusable_block_count;
- if (unlikely(valid_block_count > sbi->user_block_count)) {
+ if (unlikely(valid_block_count > user_block_count)) {
spin_unlock(&sbi->stat_lock);
goto enospc;
}
@@ -1940,8 +2065,12 @@
return 0;
enospc:
- if (quota)
+ if (is_inode) {
+ if (inode)
+ dquot_free_inode(inode);
+ } else {
dquot_release_reservation_block(inode, 1);
+ }
return -ENOSPC;
}
@@ -1952,7 +2081,6 @@
f2fs_bug_on(sbi, !sbi->total_valid_block_count);
f2fs_bug_on(sbi, !sbi->total_valid_node_count);
- f2fs_bug_on(sbi, !is_inode && !inode->i_blocks);
sbi->total_valid_node_count--;
sbi->total_valid_block_count--;
@@ -1962,8 +2090,18 @@
spin_unlock(&sbi->stat_lock);
- if (!is_inode)
+ if (is_inode) {
+ dquot_free_inode(inode);
+ } else {
+ if (unlikely(inode->i_blocks == 0)) {
+ f2fs_warn(sbi, "Inconsistent i_blocks, ino:%lu, iblocks:%llu",
+ inode->i_ino,
+ (unsigned long long)inode->i_blocks);
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+ return;
+ }
f2fs_i_blocks_write(inode, 1, false, true);
+ }
}
static inline unsigned int valid_node_count(struct f2fs_sb_info *sbi)
@@ -2092,6 +2230,29 @@
return bio_alloc(GFP_KERNEL, npages);
}
+static inline bool is_idle(struct f2fs_sb_info *sbi, int type)
+{
+ if (sbi->gc_mode == GC_URGENT)
+ return true;
+
+ if (get_pages(sbi, F2FS_RD_DATA) || get_pages(sbi, F2FS_RD_NODE) ||
+ get_pages(sbi, F2FS_RD_META) || get_pages(sbi, F2FS_WB_DATA) ||
+ get_pages(sbi, F2FS_WB_CP_DATA) ||
+ get_pages(sbi, F2FS_DIO_READ) ||
+ get_pages(sbi, F2FS_DIO_WRITE))
+ return false;
+
+ if (type != DISCARD_TIME && SM_I(sbi) && SM_I(sbi)->dcc_info &&
+ atomic_read(&SM_I(sbi)->dcc_info->queued_discard))
+ return false;
+
+ if (SM_I(sbi) && SM_I(sbi)->fcc_info &&
+ atomic_read(&SM_I(sbi)->fcc_info->queued_flush))
+ return false;
+
+ return f2fs_time_over(sbi, type);
+}
+
static inline void f2fs_radix_tree_insert(struct radix_tree_root *root,
unsigned long index, void *item)
{
@@ -2203,56 +2364,26 @@
}
/*
- * Inode flags
+ * On-disk inode flags (f2fs_inode::i_flags)
*/
-#define F2FS_SECRM_FL 0x00000001 /* Secure deletion */
-#define F2FS_UNRM_FL 0x00000002 /* Undelete */
-#define F2FS_COMPR_FL 0x00000004 /* Compress file */
#define F2FS_SYNC_FL 0x00000008 /* Synchronous updates */
#define F2FS_IMMUTABLE_FL 0x00000010 /* Immutable file */
#define F2FS_APPEND_FL 0x00000020 /* writes to file may only append */
#define F2FS_NODUMP_FL 0x00000040 /* do not dump file */
#define F2FS_NOATIME_FL 0x00000080 /* do not update atime */
-/* Reserved for compression usage... */
-#define F2FS_DIRTY_FL 0x00000100
-#define F2FS_COMPRBLK_FL 0x00000200 /* One or more compressed clusters */
-#define F2FS_NOCOMPR_FL 0x00000400 /* Don't compress */
-#define F2FS_ENCRYPT_FL 0x00000800 /* encrypted file */
-/* End compression flags --- maybe not all used */
#define F2FS_INDEX_FL 0x00001000 /* hash-indexed directory */
-#define F2FS_IMAGIC_FL 0x00002000 /* AFS directory */
-#define F2FS_JOURNAL_DATA_FL 0x00004000 /* file data should be journaled */
-#define F2FS_NOTAIL_FL 0x00008000 /* file tail should not be merged */
#define F2FS_DIRSYNC_FL 0x00010000 /* dirsync behaviour (directories only) */
-#define F2FS_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/
-#define F2FS_HUGE_FILE_FL 0x00040000 /* Set to each huge file */
-#define F2FS_EXTENTS_FL 0x00080000 /* Inode uses extents */
-#define F2FS_EA_INODE_FL 0x00200000 /* Inode used for large EA */
-#define F2FS_EOFBLOCKS_FL 0x00400000 /* Blocks allocated beyond EOF */
-#define F2FS_INLINE_DATA_FL 0x10000000 /* Inode has inline data. */
#define F2FS_PROJINHERIT_FL 0x20000000 /* Create with parents projid */
-#define F2FS_RESERVED_FL 0x80000000 /* reserved for ext4 lib */
-
-#define F2FS_FL_USER_VISIBLE 0x304BDFFF /* User visible flags */
-#define F2FS_FL_USER_MODIFIABLE 0x204BC0FF /* User modifiable flags */
-
-/* Flags we can manipulate with through F2FS_IOC_FSSETXATTR */
-#define F2FS_FL_XFLAG_VISIBLE (F2FS_SYNC_FL | \
- F2FS_IMMUTABLE_FL | \
- F2FS_APPEND_FL | \
- F2FS_NODUMP_FL | \
- F2FS_NOATIME_FL | \
- F2FS_PROJINHERIT_FL)
+#define F2FS_CASEFOLD_FL 0x40000000 /* Casefolded file */
/* Flags that should be inherited by new inodes from their parent. */
-#define F2FS_FL_INHERITED (F2FS_SECRM_FL | F2FS_UNRM_FL | F2FS_COMPR_FL |\
- F2FS_SYNC_FL | F2FS_NODUMP_FL | F2FS_NOATIME_FL |\
- F2FS_NOCOMPR_FL | F2FS_JOURNAL_DATA_FL |\
- F2FS_NOTAIL_FL | F2FS_DIRSYNC_FL |\
- F2FS_PROJINHERIT_FL)
+#define F2FS_FL_INHERITED (F2FS_SYNC_FL | F2FS_NODUMP_FL | F2FS_NOATIME_FL | \
+ F2FS_DIRSYNC_FL | F2FS_PROJINHERIT_FL | \
+ F2FS_CASEFOLD_FL)
/* Flags that are appropriate for regular files (all but dir-specific ones). */
-#define F2FS_REG_FLMASK (~(F2FS_DIRSYNC_FL | F2FS_TOPDIR_FL))
+#define F2FS_REG_FLMASK (~(F2FS_DIRSYNC_FL | F2FS_PROJINHERIT_FL | \
+ F2FS_CASEFOLD_FL))
/* Flags that are appropriate for non-directories/regular files. */
#define F2FS_OTHER_FLMASK (F2FS_NODUMP_FL | F2FS_NOATIME_FL)
@@ -2299,6 +2430,7 @@
FI_PROJ_INHERIT, /* indicate file inherits projectid */
FI_PIN_FILE, /* indicate file should not be gced */
FI_ATOMIC_REVOKE_REQUEST, /* request to drop atomic data */
+ FI_VERITY_IN_PROGRESS, /* building fs-verity Merkle tree */
};
static inline void __mark_inode_dirty_flag(struct inode *inode,
@@ -2311,6 +2443,7 @@
case FI_NEW_INODE:
if (set)
return;
+ /* fall through */
case FI_DATA_EXIST:
case FI_INLINE_DOTS:
case FI_PIN_FILE:
@@ -2337,6 +2470,12 @@
__mark_inode_dirty_flag(inode, flag, false);
}
+static inline bool f2fs_verity_in_progress(struct inode *inode)
+{
+ return IS_ENABLED(CONFIG_FS_VERITY) &&
+ is_inode_flag_set(inode, FI_VERITY_IN_PROGRESS);
+}
+
static inline void set_acl_inode(struct inode *inode, umode_t mode)
{
F2FS_I(inode)->i_acl_mode = mode;
@@ -2465,7 +2604,14 @@
static inline unsigned int addrs_per_inode(struct inode *inode)
{
- return CUR_ADDRS_PER_INODE(inode) - get_inline_xattr_addrs(inode);
+ unsigned int addrs = CUR_ADDRS_PER_INODE(inode) -
+ get_inline_xattr_addrs(inode);
+ return ALIGN_DOWN(addrs, 1);
+}
+
+static inline unsigned int addrs_per_block(struct inode *inode)
+{
+ return ALIGN_DOWN(DEF_ADDRS_PER_BLOCK, 1);
}
static inline void *inline_xattr_addr(struct inode *inode, struct page *page)
@@ -2478,7 +2624,9 @@
static inline int inline_xattr_size(struct inode *inode)
{
- return get_inline_xattr_addrs(inode) * sizeof(__le32);
+ if (f2fs_has_inline_xattr(inode))
+ return get_inline_xattr_addrs(inode) * sizeof(__le32);
+ return 0;
}
static inline int f2fs_has_inline_data(struct inode *inode)
@@ -2613,22 +2761,37 @@
static inline bool f2fs_may_extent_tree(struct inode *inode)
{
- if (!test_opt(F2FS_I_SB(inode), EXTENT_CACHE) ||
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+
+ if (!test_opt(sbi, EXTENT_CACHE) ||
is_inode_flag_set(inode, FI_NO_EXTENT))
return false;
+ /*
+ * for recovered files during mount do not create extents
+ * if shrinker is not registered.
+ */
+ if (list_empty(&sbi->s_list))
+ return false;
+
return S_ISREG(inode->i_mode);
}
static inline void *f2fs_kmalloc(struct f2fs_sb_info *sbi,
size_t size, gfp_t flags)
{
+ void *ret;
+
if (time_to_inject(sbi, FAULT_KMALLOC)) {
f2fs_show_injection_info(FAULT_KMALLOC);
return NULL;
}
- return kmalloc(size, flags);
+ ret = kmalloc(size, flags);
+ if (ret)
+ return ret;
+
+ return kvmalloc(size, flags);
}
static inline void *f2fs_kzalloc(struct f2fs_sb_info *sbi,
@@ -2674,9 +2837,9 @@
#define F2FS_OLD_ATTRIBUTE_SIZE (offsetof(struct f2fs_inode, i_addr))
#define F2FS_FITS_IN_INODE(f2fs_inode, extra_isize, field) \
- ((offsetof(typeof(*f2fs_inode), field) + \
+ ((offsetof(typeof(*(f2fs_inode)), field) + \
sizeof((f2fs_inode)->field)) \
- <= (F2FS_OLD_ATTRIBUTE_SIZE + extra_isize)) \
+ <= (F2FS_OLD_ATTRIBUTE_SIZE + (extra_isize))) \
static inline void f2fs_reset_iostat(struct f2fs_sb_info *sbi)
{
@@ -2703,19 +2866,18 @@
spin_unlock(&sbi->iostat_lock);
}
-#define __is_meta_io(fio) (PAGE_TYPE_OF_BIO(fio->type) == META && \
- (!is_read_io(fio->op) || fio->is_meta))
+#define __is_large_section(sbi) ((sbi)->segs_per_sec > 1)
+
+#define __is_meta_io(fio) (PAGE_TYPE_OF_BIO((fio)->type) == META)
bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi,
block_t blkaddr, int type);
-void f2fs_msg(struct super_block *sb, const char *level, const char *fmt, ...);
static inline void verify_blkaddr(struct f2fs_sb_info *sbi,
block_t blkaddr, int type)
{
if (!f2fs_is_valid_blkaddr(sbi, blkaddr, type)) {
- f2fs_msg(sbi->sb, KERN_ERR,
- "invalid blkaddr: %u, type: %d, run fsck to fix.",
- blkaddr, type);
+ f2fs_err(sbi, "invalid blkaddr: %u, type: %d, run fsck to fix.",
+ blkaddr, type);
f2fs_bug_on(sbi, 1);
}
}
@@ -2727,13 +2889,25 @@
return true;
}
-static inline bool is_valid_data_blkaddr(struct f2fs_sb_info *sbi,
- block_t blkaddr)
+static inline void f2fs_set_page_private(struct page *page,
+ unsigned long data)
{
- if (!__is_valid_data_blkaddr(blkaddr))
- return false;
- verify_blkaddr(sbi, blkaddr, DATA_GENERIC);
- return true;
+ if (PagePrivate(page))
+ return;
+
+ get_page(page);
+ SetPagePrivate(page);
+ set_page_private(page, data);
+}
+
+static inline void f2fs_clear_page_private(struct page *page)
+{
+ if (!PagePrivate(page))
+ return;
+
+ set_page_private(page, 0);
+ ClearPagePrivate(page);
+ f2fs_put_page(page, 0);
}
/*
@@ -2751,6 +2925,7 @@
int f2fs_precache_extents(struct inode *inode);
long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);
long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
+int f2fs_transfer_project_quota(struct inode *inode, kprojid_t kprojid);
int f2fs_pin_file_control(struct inode *inode, bool inc);
/*
@@ -2775,6 +2950,11 @@
bool hot, bool set);
struct dentry *f2fs_get_parent(struct dentry *child);
+extern int f2fs_ci_compare(const struct inode *parent,
+ const struct qstr *name,
+ const struct qstr *entry,
+ bool quick);
+
/*
* dir.c
*/
@@ -2829,18 +3009,17 @@
int f2fs_inode_dirtied(struct inode *inode, bool sync);
void f2fs_inode_synced(struct inode *inode);
int f2fs_enable_quota_files(struct f2fs_sb_info *sbi, bool rdonly);
+int f2fs_quota_sync(struct super_block *sb, int type);
void f2fs_quota_off_umount(struct super_block *sb);
int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover);
int f2fs_sync_fs(struct super_block *sb, int sync);
-extern __printf(3, 4)
-void f2fs_msg(struct super_block *sb, const char *level, const char *fmt, ...);
int f2fs_sanity_check_ckpt(struct f2fs_sb_info *sbi);
/*
* hash.c
*/
-f2fs_hash_t f2fs_dentry_hash(const struct qstr *name_info,
- struct fscrypt_name *fname);
+f2fs_hash_t f2fs_dentry_hash(const struct inode *dir,
+ const struct qstr *name_info, struct fscrypt_name *fname);
/*
* node.c
@@ -2871,7 +3050,7 @@
void f2fs_ra_node_page(struct f2fs_sb_info *sbi, nid_t nid);
struct page *f2fs_get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid);
struct page *f2fs_get_node_page_ra(struct page *parent, int start);
-void f2fs_move_node_page(struct page *node_page, int gc_type);
+int f2fs_move_node_page(struct page *node_page, int gc_type);
int f2fs_fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode,
struct writeback_control *wbc, bool atomic,
unsigned int *seq_id);
@@ -2888,7 +3067,7 @@
int f2fs_recover_inode_page(struct f2fs_sb_info *sbi, struct page *page);
int f2fs_restore_node_summary(struct f2fs_sb_info *sbi,
unsigned int segno, struct f2fs_summary_block *sum);
-void f2fs_flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc);
+int f2fs_flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc);
int f2fs_build_node_manager(struct f2fs_sb_info *sbi);
void f2fs_destroy_node_manager(struct f2fs_sb_info *sbi);
int __init f2fs_create_node_manager_caches(void);
@@ -2913,11 +3092,16 @@
bool f2fs_is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr);
void f2fs_drop_discard_cmd(struct f2fs_sb_info *sbi);
void f2fs_stop_discard_thread(struct f2fs_sb_info *sbi);
-bool f2fs_wait_discard_bios(struct f2fs_sb_info *sbi);
+bool f2fs_issue_discard_timeout(struct f2fs_sb_info *sbi);
void f2fs_clear_prefree_segments(struct f2fs_sb_info *sbi,
struct cp_control *cpc);
+void f2fs_dirty_to_prefree(struct f2fs_sb_info *sbi);
+block_t f2fs_get_unusable_blocks(struct f2fs_sb_info *sbi);
+int f2fs_disable_cp_again(struct f2fs_sb_info *sbi, block_t unusable);
void f2fs_release_discard_addrs(struct f2fs_sb_info *sbi);
int f2fs_npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra);
+void allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type,
+ unsigned int start, unsigned int end);
void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi);
int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range);
bool f2fs_exist_trim_candidates(struct f2fs_sb_info *sbi,
@@ -2943,8 +3127,10 @@
struct f2fs_summary *sum, int type,
struct f2fs_io_info *fio, bool add_list);
void f2fs_wait_on_page_writeback(struct page *page,
- enum page_type type, bool ordered);
-void f2fs_wait_on_block_writeback(struct f2fs_sb_info *sbi, block_t blkaddr);
+ enum page_type type, bool ordered, bool locked);
+void f2fs_wait_on_block_writeback(struct inode *inode, block_t blkaddr);
+void f2fs_wait_on_block_writeback_range(struct inode *inode, block_t blkaddr,
+ block_t len);
void f2fs_write_data_summaries(struct f2fs_sb_info *sbi, block_t start_blk);
void f2fs_write_node_summaries(struct f2fs_sb_info *sbi, block_t start_blk);
int f2fs_lookup_journal_in_cursum(struct f2fs_journal *journal, int type,
@@ -3004,10 +3190,11 @@
void f2fs_destroy_post_read_processing(void);
void f2fs_submit_merged_write(struct f2fs_sb_info *sbi, enum page_type type);
void f2fs_submit_merged_write_cond(struct f2fs_sb_info *sbi,
- struct inode *inode, nid_t ino, pgoff_t idx,
- enum page_type type);
+ struct inode *inode, struct page *page,
+ nid_t ino, enum page_type type);
void f2fs_flush_merged_writes(struct f2fs_sb_info *sbi);
int f2fs_submit_page_bio(struct f2fs_io_info *fio);
+int f2fs_merge_page_bio(struct f2fs_io_info *fio);
void f2fs_submit_page_write(struct f2fs_io_info *fio);
struct block_device *f2fs_target_device(struct f2fs_sb_info *sbi,
block_t blk_addr, struct bio *bio);
@@ -3027,6 +3214,7 @@
struct page *f2fs_get_new_data_page(struct inode *inode,
struct page *ipage, pgoff_t index, bool new_i_size);
int f2fs_do_write_data_page(struct f2fs_io_info *fio);
+void __do_map_lock(struct f2fs_sb_info *sbi, int flag, bool lock);
int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
int create, int flag);
int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
@@ -3041,7 +3229,7 @@
struct page *page, enum migrate_mode mode);
#endif
bool f2fs_overwrite_io(struct inode *inode, loff_t pos, size_t len);
-void f2fs_clear_radix_tree_dirty_tag(struct page *page);
+void f2fs_clear_page_cache_dirty_tag(struct page *page);
/*
* gc.c
@@ -3052,6 +3240,7 @@
int f2fs_gc(struct f2fs_sb_info *sbi, bool sync, bool background,
unsigned int segno);
void f2fs_build_gc_manager(struct f2fs_sb_info *sbi);
+int f2fs_resize_fs(struct f2fs_sb_info *sbi, __u64 block_count);
/*
* recovery.c
@@ -3079,6 +3268,9 @@
int free_nids, avail_nids, alloc_nids;
int total_count, utilization;
int bg_gc, nr_wb_cp_data, nr_wb_data;
+ int nr_rd_data, nr_rd_node, nr_rd_meta;
+ int nr_dio_read, nr_dio_write;
+ unsigned int io_skip_bggc, other_skip_bggc;
int nr_flushing, nr_flushed, flush_list_empty;
int nr_discarding, nr_discarded;
int nr_discard_cmd;
@@ -3100,6 +3292,7 @@
int cursec[NR_CURSEG_TYPE];
int curzone[NR_CURSEG_TYPE];
+ unsigned int meta_count[META_MAX];
unsigned int segment_count[2];
unsigned int block_count[2];
unsigned int inplace_count;
@@ -3115,6 +3308,8 @@
#define stat_inc_bg_cp_count(si) ((si)->bg_cp_count++)
#define stat_inc_call_count(si) ((si)->call_count++)
#define stat_inc_bggc_count(sbi) ((sbi)->bg_gc++)
+#define stat_io_skip_bggc_count(sbi) ((sbi)->io_skip_bggc++)
+#define stat_other_skip_bggc_count(sbi) ((sbi)->other_skip_bggc++)
#define stat_inc_dirty_inode(sbi, type) ((sbi)->ndirty_inode[type]++)
#define stat_dec_dirty_inode(sbi, type) ((sbi)->ndirty_inode[type]--)
#define stat_inc_total_hit(sbi) (atomic64_inc(&(sbi)->total_hit_ext))
@@ -3151,6 +3346,17 @@
if (f2fs_has_inline_dentry(inode)) \
(atomic_dec(&F2FS_I_SB(inode)->inline_dir)); \
} while (0)
+#define stat_inc_meta_count(sbi, blkaddr) \
+ do { \
+ if (blkaddr < SIT_I(sbi)->sit_base_addr) \
+ atomic_inc(&(sbi)->meta_count[META_CP]); \
+ else if (blkaddr < NM_I(sbi)->nat_blkaddr) \
+ atomic_inc(&(sbi)->meta_count[META_SIT]); \
+ else if (blkaddr < SM_I(sbi)->ssa_blkaddr) \
+ atomic_inc(&(sbi)->meta_count[META_NAT]); \
+ else if (blkaddr < SM_I(sbi)->main_blkaddr) \
+ atomic_inc(&(sbi)->meta_count[META_SSA]); \
+ } while (0)
#define stat_inc_seg_type(sbi, curseg) \
((sbi)->segment_count[(curseg)->alloc_type]++)
#define stat_inc_block_count(sbi, curseg) \
@@ -3213,13 +3419,15 @@
int f2fs_build_stats(struct f2fs_sb_info *sbi);
void f2fs_destroy_stats(struct f2fs_sb_info *sbi);
-int __init f2fs_create_root_stats(void);
+void __init f2fs_create_root_stats(void);
void f2fs_destroy_root_stats(void);
#else
#define stat_inc_cp_count(si) do { } while (0)
#define stat_inc_bg_cp_count(si) do { } while (0)
#define stat_inc_call_count(si) do { } while (0)
#define stat_inc_bggc_count(si) do { } while (0)
+#define stat_io_skip_bggc_count(sbi) do { } while (0)
+#define stat_other_skip_bggc_count(sbi) do { } while (0)
#define stat_inc_dirty_inode(sbi, type) do { } while (0)
#define stat_dec_dirty_inode(sbi, type) do { } while (0)
#define stat_inc_total_hit(sb) do { } while (0)
@@ -3238,6 +3446,7 @@
#define stat_inc_volatile_write(inode) do { } while (0)
#define stat_dec_volatile_write(inode) do { } while (0)
#define stat_update_max_volatile_write(inode) do { } while (0)
+#define stat_inc_meta_count(sbi, blkaddr) do { } while (0)
#define stat_inc_seg_type(sbi, curseg) do { } while (0)
#define stat_inc_block_count(sbi, curseg) do { } while (0)
#define stat_inc_inplace_blocks(sbi) do { } while (0)
@@ -3248,11 +3457,14 @@
static inline int f2fs_build_stats(struct f2fs_sb_info *sbi) { return 0; }
static inline void f2fs_destroy_stats(struct f2fs_sb_info *sbi) { }
-static inline int __init f2fs_create_root_stats(void) { return 0; }
+static inline void __init f2fs_create_root_stats(void) { }
static inline void f2fs_destroy_root_stats(void) { }
#endif
extern const struct file_operations f2fs_dir_operations;
+#ifdef CONFIG_UNICODE
+extern const struct dentry_operations f2fs_dentry_ops;
+#endif
extern const struct file_operations f2fs_file_operations;
extern const struct inode_operations f2fs_file_inode_operations;
extern const struct address_space_operations f2fs_dblock_aops;
@@ -3307,18 +3519,19 @@
/*
* extent_cache.c
*/
-struct rb_entry *f2fs_lookup_rb_tree(struct rb_root *root,
+struct rb_entry *f2fs_lookup_rb_tree(struct rb_root_cached *root,
struct rb_entry *cached_re, unsigned int ofs);
struct rb_node **f2fs_lookup_rb_tree_for_insert(struct f2fs_sb_info *sbi,
- struct rb_root *root, struct rb_node **parent,
- unsigned int ofs);
-struct rb_entry *f2fs_lookup_rb_tree_ret(struct rb_root *root,
+ struct rb_root_cached *root,
+ struct rb_node **parent,
+ unsigned int ofs, bool *leftmost);
+struct rb_entry *f2fs_lookup_rb_tree_ret(struct rb_root_cached *root,
struct rb_entry *cached_re, unsigned int ofs,
struct rb_entry **prev_entry, struct rb_entry **next_entry,
struct rb_node ***insert_p, struct rb_node **insert_parent,
- bool force);
+ bool force, bool *leftmost);
bool f2fs_check_rb_tree_consistence(struct f2fs_sb_info *sbi,
- struct rb_root *root);
+ struct rb_root_cached *root);
unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink);
bool f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_ext);
void f2fs_drop_extent_tree(struct inode *inode);
@@ -3341,24 +3554,22 @@
int f2fs_register_sysfs(struct f2fs_sb_info *sbi);
void f2fs_unregister_sysfs(struct f2fs_sb_info *sbi);
+/* verity.c */
+extern const struct fsverity_operations f2fs_verityops;
+
/*
* crypto support
*/
-static inline bool f2fs_encrypted_inode(struct inode *inode)
-{
- return file_is_encrypt(inode);
-}
-
static inline bool f2fs_encrypted_file(struct inode *inode)
{
- return f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode);
+ return IS_ENCRYPTED(inode) && S_ISREG(inode->i_mode);
}
static inline void f2fs_set_encrypted_inode(struct inode *inode)
{
-#ifdef CONFIG_F2FS_FS_ENCRYPTION
+#ifdef CONFIG_FS_ENCRYPTION
file_set_encrypt(inode);
- inode->i_flags |= S_ENCRYPTED;
+ f2fs_set_inode_flags(inode);
#endif
}
@@ -3368,13 +3579,13 @@
*/
static inline bool f2fs_post_read_required(struct inode *inode)
{
- return f2fs_encrypted_file(inode);
+ return f2fs_encrypted_file(inode) || fsverity_active(inode);
}
#define F2FS_FEATURE_FUNCS(name, flagname) \
-static inline int f2fs_sb_has_##name(struct super_block *sb) \
+static inline int f2fs_sb_has_##name(struct f2fs_sb_info *sbi) \
{ \
- return F2FS_HAS_FEATURE(sb, F2FS_FEATURE_##flagname); \
+ return F2FS_HAS_FEATURE(sbi, F2FS_FEATURE_##flagname); \
}
F2FS_FEATURE_FUNCS(encrypt, ENCRYPT);
@@ -3386,28 +3597,64 @@
F2FS_FEATURE_FUNCS(quota_ino, QUOTA_INO);
F2FS_FEATURE_FUNCS(inode_crtime, INODE_CRTIME);
F2FS_FEATURE_FUNCS(lost_found, LOST_FOUND);
+F2FS_FEATURE_FUNCS(verity, VERITY);
+F2FS_FEATURE_FUNCS(sb_chksum, SB_CHKSUM);
+F2FS_FEATURE_FUNCS(casefold, CASEFOLD);
#ifdef CONFIG_BLK_DEV_ZONED
-static inline int get_blkz_type(struct f2fs_sb_info *sbi,
- struct block_device *bdev, block_t blkaddr)
+static inline bool f2fs_blkz_is_seq(struct f2fs_sb_info *sbi, int devi,
+ block_t blkaddr)
{
unsigned int zno = blkaddr >> sbi->log_blocks_per_blkz;
- int i;
- for (i = 0; i < sbi->s_ndevs; i++)
- if (FDEV(i).bdev == bdev)
- return FDEV(i).blkz_type[zno];
- return -EINVAL;
+ return test_bit(zno, FDEV(devi).blkz_seq);
}
#endif
-static inline bool f2fs_discard_en(struct f2fs_sb_info *sbi)
+static inline bool f2fs_hw_should_discard(struct f2fs_sb_info *sbi)
{
- struct request_queue *q = bdev_get_queue(sbi->sb->s_bdev);
-
- return blk_queue_discard(q) || f2fs_sb_has_blkzoned(sbi->sb);
+ return f2fs_sb_has_blkzoned(sbi);
}
+static inline bool f2fs_bdev_support_discard(struct block_device *bdev)
+{
+ return blk_queue_discard(bdev_get_queue(bdev)) ||
+ bdev_is_zoned(bdev);
+}
+
+static inline bool f2fs_hw_support_discard(struct f2fs_sb_info *sbi)
+{
+ int i;
+
+ if (!f2fs_is_multi_device(sbi))
+ return f2fs_bdev_support_discard(sbi->sb->s_bdev);
+
+ for (i = 0; i < sbi->s_ndevs; i++)
+ if (f2fs_bdev_support_discard(FDEV(i).bdev))
+ return true;
+ return false;
+}
+
+static inline bool f2fs_realtime_discard_enable(struct f2fs_sb_info *sbi)
+{
+ return (test_opt(sbi, DISCARD) && f2fs_hw_support_discard(sbi)) ||
+ f2fs_hw_should_discard(sbi);
+}
+
+static inline bool f2fs_hw_is_readonly(struct f2fs_sb_info *sbi)
+{
+ int i;
+
+ if (!f2fs_is_multi_device(sbi))
+ return bdev_read_only(sbi->sb->s_bdev);
+
+ for (i = 0; i < sbi->s_ndevs; i++)
+ if (bdev_read_only(FDEV(i).bdev))
+ return true;
+ return false;
+}
+
+
static inline void set_opt_mode(struct f2fs_sb_info *sbi, unsigned int mt)
{
clear_opt(sbi, ADAPTIVE);
@@ -3425,7 +3672,7 @@
static inline bool f2fs_may_encrypt(struct inode *inode)
{
-#ifdef CONFIG_F2FS_FS_ENCRYPTION
+#ifdef CONFIG_FS_ENCRYPTION
umode_t mode = inode->i_mode;
return (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode));
@@ -3434,11 +3681,54 @@
#endif
}
-static inline bool f2fs_force_buffered_io(struct inode *inode, int rw)
+static inline int block_unaligned_IO(struct inode *inode,
+ struct kiocb *iocb, struct iov_iter *iter)
{
- return (f2fs_post_read_required(inode) ||
- (rw == WRITE && test_opt(F2FS_I_SB(inode), LFS)) ||
- F2FS_I_SB(inode)->s_ndevs);
+ unsigned int i_blkbits = READ_ONCE(inode->i_blkbits);
+ unsigned int blocksize_mask = (1 << i_blkbits) - 1;
+ loff_t offset = iocb->ki_pos;
+ unsigned long align = offset | iov_iter_alignment(iter);
+
+ return align & blocksize_mask;
+}
+
+static inline int allow_outplace_dio(struct inode *inode,
+ struct kiocb *iocb, struct iov_iter *iter)
+{
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+ int rw = iov_iter_rw(iter);
+
+ return (test_opt(sbi, LFS) && (rw == WRITE) &&
+ !block_unaligned_IO(inode, iocb, iter));
+}
+
+static inline bool f2fs_force_buffered_io(struct inode *inode,
+ struct kiocb *iocb, struct iov_iter *iter)
+{
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+ int rw = iov_iter_rw(iter);
+
+ if (f2fs_post_read_required(inode))
+ return true;
+ if (f2fs_is_multi_device(sbi))
+ return true;
+ /*
+ * for blkzoned device, fallback direct IO to buffered IO, so
+ * all IOs can be serialized by log-structured write.
+ */
+ if (f2fs_sb_has_blkzoned(sbi))
+ return true;
+ if (test_opt(sbi, LFS) && (rw == WRITE)) {
+ if (block_unaligned_IO(inode, iocb, iter))
+ return true;
+ if (F2FS_IO_ALIGNED(sbi))
+ return true;
+ }
+ if (is_sbi_flag_set(F2FS_I_SB(inode), SBI_CP_DISABLED) &&
+ !IS_SWAPFILE(inode))
+ return true;
+
+ return false;
}
#ifdef CONFIG_F2FS_FAULT_INJECTION
@@ -3448,4 +3738,20 @@
#define f2fs_build_fault_attr(sbi, rate, type) do { } while (0)
#endif
+static inline bool is_journalled_quota(struct f2fs_sb_info *sbi)
+{
+#ifdef CONFIG_QUOTA
+ if (f2fs_sb_has_quota_ino(sbi))
+ return true;
+ if (F2FS_OPTION(sbi).s_qf_names[USRQUOTA] ||
+ F2FS_OPTION(sbi).s_qf_names[GRPQUOTA] ||
+ F2FS_OPTION(sbi).s_qf_names[PRJQUOTA])
+ return true;
#endif
+ return false;
+}
+
+#define EFSBADCRC EBADMSG /* Bad CRC detected */
+#define EFSCORRUPTED EUCLEAN /* Filesystem is corrupted */
+
+#endif /* _LINUX_F2FS_H */
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 5474aaa..29bc0a5 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* fs/f2fs/file.c
*
* Copyright (c) 2012 Samsung Electronics Co., Ltd.
* http://www.samsung.com/
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
*/
#include <linux/fs.h>
#include <linux/f2fs_fs.h>
@@ -23,6 +20,7 @@
#include <linux/uio.h>
#include <linux/uuid.h>
#include <linux/file.h>
+#include <linux/nls.h>
#include "f2fs.h"
#include "node.h"
@@ -42,6 +40,8 @@
ret = filemap_fault(vmf);
up_read(&F2FS_I(inode)->i_mmap_sem);
+ trace_f2fs_filemap_fault(inode, vmf->pgoff, (unsigned long)ret);
+
return ret;
}
@@ -50,7 +50,7 @@
struct page *page = vmf->page;
struct inode *inode = file_inode(vmf->vma->vm_file);
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
- struct dnode_of_data dn;
+ struct dnode_of_data dn = { .node_changed = false };
int err;
if (unlikely(f2fs_cp_error(sbi))) {
@@ -58,23 +58,15 @@
goto err;
}
+ if (!f2fs_is_checkpoint_ready(sbi)) {
+ err = -ENOSPC;
+ goto err;
+ }
+
sb_start_pagefault(inode->i_sb);
f2fs_bug_on(sbi, f2fs_has_inline_data(inode));
- /* block allocation */
- f2fs_lock_op(sbi);
- set_new_dnode(&dn, inode, NULL, NULL, 0);
- err = f2fs_reserve_block(&dn, page->index);
- if (err) {
- f2fs_unlock_op(sbi);
- goto out;
- }
- f2fs_put_dnode(&dn);
- f2fs_unlock_op(sbi);
-
- f2fs_balance_fs(sbi, dn.node_changed);
-
file_update_time(vmf->vma->vm_file);
down_read(&F2FS_I(inode)->i_mmap_sem);
lock_page(page);
@@ -86,11 +78,28 @@
goto out_sem;
}
+ /* block allocation */
+ __do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, true);
+ set_new_dnode(&dn, inode, NULL, NULL, 0);
+ err = f2fs_get_block(&dn, page->index);
+ f2fs_put_dnode(&dn);
+ __do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, false);
+ if (err) {
+ unlock_page(page);
+ goto out_sem;
+ }
+
+ /* fill the page */
+ f2fs_wait_on_page_writeback(page, DATA, false, true);
+
+ /* wait for GCed page writeback via META_MAPPING */
+ f2fs_wait_on_block_writeback(inode, dn.data_blkaddr);
+
/*
* check to see if the page is mapped already (no holes)
*/
if (PageMappedToDisk(page))
- goto mapped;
+ goto out_sem;
/* page is wholly or partially inside EOF */
if (((loff_t)(page->index + 1) << PAGE_SHIFT) >
@@ -105,21 +114,15 @@
SetPageUptodate(page);
f2fs_update_iostat(sbi, APP_MAPPED_IO, F2FS_BLKSIZE);
+ f2fs_update_time(sbi, REQ_TIME);
trace_f2fs_vm_page_mkwrite(page, DATA);
-mapped:
- /* fill the page */
- f2fs_wait_on_page_writeback(page, DATA, false);
-
- /* wait for GCed page writeback via META_MAPPING */
- if (f2fs_post_read_required(inode))
- f2fs_wait_on_block_writeback(sbi, dn.data_blkaddr);
-
out_sem:
up_read(&F2FS_I(inode)->i_mmap_sem);
-out:
+
+ f2fs_balance_fs(sbi, dn.node_changed);
+
sb_end_pagefault(inode->i_sb);
- f2fs_update_time(sbi, REQ_TIME);
err:
return block_page_mkwrite_return(err);
}
@@ -215,11 +218,15 @@
};
unsigned int seq_id = 0;
- if (unlikely(f2fs_readonly(inode->i_sb)))
+ if (unlikely(f2fs_readonly(inode->i_sb) ||
+ is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
return 0;
trace_f2fs_sync_file_enter(inode);
+ if (S_ISDIR(inode->i_mode))
+ goto go_write;
+
/* if fdatasync is triggered, let's do in-place-update */
if (datasync || get_dirty_pages(inode) <= SM_I(sbi)->min_fsync_blocks)
set_inode_flag(inode, FI_NEED_IPU);
@@ -357,7 +364,7 @@
switch (whence) {
case SEEK_DATA:
if ((blkaddr == NEW_ADDR && dirty == pgofs) ||
- is_valid_data_blkaddr(sbi, blkaddr))
+ __is_valid_data_blkaddr(blkaddr))
return true;
break;
case SEEK_HOLE:
@@ -423,7 +430,7 @@
if (__is_valid_data_blkaddr(blkaddr) &&
!f2fs_is_valid_blkaddr(F2FS_I_SB(inode),
- blkaddr, DATA_GENERIC)) {
+ blkaddr, DATA_GENERIC_ENHANCE)) {
f2fs_put_dnode(&dn);
goto fail;
}
@@ -495,6 +502,10 @@
if (err)
return err;
+ err = fsverity_file_open(inode, filp);
+ if (err)
+ return err;
+
filp->f_mode |= FMODE_NOWAIT;
return dquot_file_open(inode, filp);
@@ -524,7 +535,8 @@
f2fs_set_data_blkaddr(dn);
if (__is_valid_data_blkaddr(blkaddr) &&
- !f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC))
+ !f2fs_is_valid_blkaddr(sbi, blkaddr,
+ DATA_GENERIC_ENHANCE))
continue;
f2fs_invalidate_blocks(sbi, blkaddr);
@@ -553,7 +565,7 @@
void f2fs_truncate_data_blocks(struct dnode_of_data *dn)
{
- f2fs_truncate_data_blocks_range(dn, ADDRS_PER_BLOCK);
+ f2fs_truncate_data_blocks_range(dn, ADDRS_PER_BLOCK(dn->inode));
}
static int truncate_partial_data_page(struct inode *inode, u64 from,
@@ -579,11 +591,11 @@
if (IS_ERR(page))
return PTR_ERR(page) == -ENOENT ? 0 : PTR_ERR(page);
truncate_out:
- f2fs_wait_on_page_writeback(page, DATA, true);
+ f2fs_wait_on_page_writeback(page, DATA, true, true);
zero_user(page, offset, PAGE_SIZE - offset);
/* An encrypted inode should have a key and truncate the last page. */
- f2fs_bug_on(F2FS_I_SB(inode), cache_only && f2fs_encrypted_inode(inode));
+ f2fs_bug_on(F2FS_I_SB(inode), cache_only && IS_ENCRYPTED(inode));
if (!cache_only)
set_page_dirty(page);
f2fs_put_page(page, 1);
@@ -698,19 +710,17 @@
unsigned int flags;
if (f2fs_has_extra_attr(inode) &&
- f2fs_sb_has_inode_crtime(inode->i_sb) &&
+ f2fs_sb_has_inode_crtime(F2FS_I_SB(inode)) &&
F2FS_FITS_IN_INODE(ri, fi->i_extra_isize, i_crtime)) {
stat->result_mask |= STATX_BTIME;
stat->btime.tv_sec = fi->i_crtime.tv_sec;
stat->btime.tv_nsec = fi->i_crtime.tv_nsec;
}
- flags = fi->i_flags & F2FS_FL_USER_VISIBLE;
+ flags = fi->i_flags;
if (flags & F2FS_APPEND_FL)
stat->attributes |= STATX_ATTR_APPEND;
- if (flags & F2FS_COMPR_FL)
- stat->attributes |= STATX_ATTR_COMPRESSED;
- if (f2fs_encrypted_inode(inode))
+ if (IS_ENCRYPTED(inode))
stat->attributes |= STATX_ATTR_ENCRYPTED;
if (flags & F2FS_IMMUTABLE_FL)
stat->attributes |= STATX_ATTR_IMMUTABLE;
@@ -718,7 +728,6 @@
stat->attributes |= STATX_ATTR_NODUMP;
stat->attributes_mask |= (STATX_ATTR_APPEND |
- STATX_ATTR_COMPRESSED |
STATX_ATTR_ENCRYPTED |
STATX_ATTR_IMMUTABLE |
STATX_ATTR_NODUMP);
@@ -742,15 +751,18 @@
inode->i_uid = attr->ia_uid;
if (ia_valid & ATTR_GID)
inode->i_gid = attr->ia_gid;
- if (ia_valid & ATTR_ATIME)
- inode->i_atime = timespec64_trunc(attr->ia_atime,
- inode->i_sb->s_time_gran);
- if (ia_valid & ATTR_MTIME)
- inode->i_mtime = timespec64_trunc(attr->ia_mtime,
- inode->i_sb->s_time_gran);
- if (ia_valid & ATTR_CTIME)
- inode->i_ctime = timespec64_trunc(attr->ia_ctime,
- inode->i_sb->s_time_gran);
+ if (ia_valid & ATTR_ATIME) {
+ inode->i_atime = timestamp_truncate(attr->ia_atime,
+ inode);
+ }
+ if (ia_valid & ATTR_MTIME) {
+ inode->i_mtime = timestamp_truncate(attr->ia_mtime,
+ inode);
+ }
+ if (ia_valid & ATTR_CTIME) {
+ inode->i_ctime = timestamp_truncate(attr->ia_ctime,
+ inode);
+ }
if (ia_valid & ATTR_MODE) {
umode_t mode = attr->ia_mode;
@@ -767,7 +779,6 @@
{
struct inode *inode = d_inode(dentry);
int err;
- bool size_changed = false;
if (unlikely(f2fs_cp_error(F2FS_I_SB(inode))))
return -EIO;
@@ -780,6 +791,10 @@
if (err)
return err;
+ err = fsverity_prepare_setattr(dentry, attr);
+ if (err)
+ return err;
+
if (is_quota_modification(inode, attr)) {
err = dquot_initialize(inode);
if (err)
@@ -789,20 +804,45 @@
!uid_eq(attr->ia_uid, inode->i_uid)) ||
(attr->ia_valid & ATTR_GID &&
!gid_eq(attr->ia_gid, inode->i_gid))) {
+ f2fs_lock_op(F2FS_I_SB(inode));
err = dquot_transfer(inode, attr);
- if (err)
+ if (err) {
+ set_sbi_flag(F2FS_I_SB(inode),
+ SBI_QUOTA_NEED_REPAIR);
+ f2fs_unlock_op(F2FS_I_SB(inode));
return err;
+ }
+ /*
+ * update uid/gid under lock_op(), so that dquot and inode can
+ * be updated atomically.
+ */
+ if (attr->ia_valid & ATTR_UID)
+ inode->i_uid = attr->ia_uid;
+ if (attr->ia_valid & ATTR_GID)
+ inode->i_gid = attr->ia_gid;
+ f2fs_mark_inode_dirty_sync(inode, true);
+ f2fs_unlock_op(F2FS_I_SB(inode));
}
if (attr->ia_valid & ATTR_SIZE) {
- bool to_smaller = (attr->ia_size <= i_size_read(inode));
+ loff_t old_size = i_size_read(inode);
+
+ if (attr->ia_size > MAX_INLINE_DATA(inode)) {
+ /*
+ * should convert inline inode before i_size_write to
+ * keep smaller than inline_data size with inline flag.
+ */
+ err = f2fs_convert_inline_inode(inode);
+ if (err)
+ return err;
+ }
down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
down_write(&F2FS_I(inode)->i_mmap_sem);
truncate_setsize(inode, attr->ia_size);
- if (to_smaller)
+ if (attr->ia_size <= old_size)
err = f2fs_truncate(inode);
/*
* do not trim all blocks after i_size if target size is
@@ -810,25 +850,13 @@
*/
up_write(&F2FS_I(inode)->i_mmap_sem);
up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
-
if (err)
return err;
- if (!to_smaller) {
- /* should convert inline inode here */
- if (!f2fs_may_inline_data(inode)) {
- err = f2fs_convert_inline_inode(inode);
- if (err)
- return err;
- }
- inode->i_mtime = inode->i_ctime = current_time(inode);
- }
-
down_write(&F2FS_I(inode)->i_sem);
+ inode->i_mtime = inode->i_ctime = current_time(inode);
F2FS_I(inode)->last_disk_size = i_size_read(inode);
up_write(&F2FS_I(inode)->i_sem);
-
- size_changed = true;
}
__setattr_copy(inode, attr);
@@ -842,7 +870,7 @@
}
/* file size may changed here */
- f2fs_mark_inode_dirty_sync(inode, size_changed);
+ f2fs_mark_inode_dirty_sync(inode, true);
/* inode change will produce dirty node pages flushed by checkpoint */
f2fs_balance_fs(F2FS_I_SB(inode), true);
@@ -879,7 +907,7 @@
if (IS_ERR(page))
return PTR_ERR(page);
- f2fs_wait_on_page_writeback(page, DATA, true);
+ f2fs_wait_on_page_writeback(page, DATA, true, true);
zero_user(page, start, len);
set_page_dirty(page);
f2fs_put_page(page, 1);
@@ -995,7 +1023,8 @@
} else if (ret == -ENOENT) {
if (dn.max_level == 0)
return -ENOENT;
- done = min((pgoff_t)ADDRS_PER_BLOCK - dn.ofs_in_node, len);
+ done = min((pgoff_t)ADDRS_PER_BLOCK(inode) - dn.ofs_in_node,
+ len);
blkaddr += done;
do_replace += done;
goto next;
@@ -1006,11 +1035,19 @@
for (i = 0; i < done; i++, blkaddr++, do_replace++, dn.ofs_in_node++) {
*blkaddr = datablock_addr(dn.inode,
dn.node_page, dn.ofs_in_node);
+
+ if (__is_valid_data_blkaddr(*blkaddr) &&
+ !f2fs_is_valid_blkaddr(sbi, *blkaddr,
+ DATA_GENERIC_ENHANCE)) {
+ f2fs_put_dnode(&dn);
+ return -EFSCORRUPTED;
+ }
+
if (!f2fs_is_checkpointed_data(sbi, *blkaddr)) {
if (test_opt(sbi, LFS)) {
f2fs_put_dnode(&dn);
- return -ENOTSUPP;
+ return -EOPNOTSUPP;
}
/* do not invalidate this block address */
@@ -1146,7 +1183,7 @@
int ret;
while (len) {
- olen = min((pgoff_t)4 * ADDRS_PER_BLOCK, len);
+ olen = min((pgoff_t)4 * ADDRS_PER_BLOCK(src_inode), len);
src_blkaddr = f2fs_kvzalloc(F2FS_I_SB(src_inode),
array_size(olen, sizeof(block_t)),
@@ -1191,7 +1228,7 @@
static int f2fs_do_collapse(struct inode *inode, loff_t offset, loff_t len)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
- pgoff_t nrpages = (i_size_read(inode) + PAGE_SIZE - 1) / PAGE_SIZE;
+ pgoff_t nrpages = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
pgoff_t start = offset >> PAGE_SHIFT;
pgoff_t end = (offset + len) >> PAGE_SHIFT;
int ret;
@@ -1444,7 +1481,7 @@
pg_start = offset >> PAGE_SHIFT;
pg_end = (offset + len) >> PAGE_SHIFT;
delta = pg_end - pg_start;
- idx = (i_size_read(inode) + PAGE_SIZE - 1) / PAGE_SIZE;
+ idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
/* avoid gc operation during block exchange */
down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
@@ -1483,7 +1520,8 @@
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct f2fs_map_blocks map = { .m_next_pgofs = NULL,
- .m_next_extent = NULL, .m_seg_type = NO_CHECK_TYPE };
+ .m_next_extent = NULL, .m_seg_type = NO_CHECK_TYPE,
+ .m_may_create = true };
pgoff_t pg_end;
loff_t new_size = i_size_read(inode);
loff_t off_end;
@@ -1507,7 +1545,12 @@
if (off_end)
map.m_len++;
- err = f2fs_map_blocks(inode, &map, 1, F2FS_GET_BLOCK_PRE_AIO);
+ if (f2fs_is_pinned_file(inode))
+ map.m_seg_type = CURSEG_COLD_DATA;
+
+ err = f2fs_map_blocks(inode, &map, 1, (f2fs_is_pinned_file(inode) ?
+ F2FS_GET_BLOCK_PRE_DIO :
+ F2FS_GET_BLOCK_PRE_AIO));
if (err) {
pgoff_t last_off;
@@ -1541,12 +1584,14 @@
if (unlikely(f2fs_cp_error(F2FS_I_SB(inode))))
return -EIO;
+ if (!f2fs_is_checkpoint_ready(F2FS_I_SB(inode)))
+ return -ENOSPC;
/* f2fs only support ->fallocate for regular file */
if (!S_ISREG(inode->i_mode))
return -EINVAL;
- if (f2fs_encrypted_inode(inode) &&
+ if (IS_ENCRYPTED(inode) &&
(mode & (FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_INSERT_RANGE)))
return -EOPNOTSUPP;
@@ -1624,42 +1669,22 @@
return 0;
}
-static int f2fs_ioc_getflags(struct file *filp, unsigned long arg)
-{
- struct inode *inode = file_inode(filp);
- struct f2fs_inode_info *fi = F2FS_I(inode);
- unsigned int flags = fi->i_flags;
-
- if (f2fs_encrypted_inode(inode))
- flags |= F2FS_ENCRYPT_FL;
- if (f2fs_has_inline_data(inode) || f2fs_has_inline_dentry(inode))
- flags |= F2FS_INLINE_DATA_FL;
-
- flags &= F2FS_FL_USER_VISIBLE;
-
- return put_user(flags, (int __user *)arg);
-}
-
-static int __f2fs_ioc_setflags(struct inode *inode, unsigned int flags)
+static int f2fs_setflags_common(struct inode *inode, u32 iflags, u32 mask)
{
struct f2fs_inode_info *fi = F2FS_I(inode);
- unsigned int oldflags;
/* Is it quota file? Do not allow user to mess with it */
if (IS_NOQUOTA(inode))
return -EPERM;
- flags = f2fs_mask_flags(inode->i_mode, flags);
+ if ((iflags ^ fi->i_flags) & F2FS_CASEFOLD_FL) {
+ if (!f2fs_sb_has_casefold(F2FS_I_SB(inode)))
+ return -EOPNOTSUPP;
+ if (!f2fs_empty_dir(inode))
+ return -ENOTEMPTY;
+ }
- oldflags = fi->i_flags;
-
- if ((flags ^ oldflags) & (F2FS_APPEND_FL | F2FS_IMMUTABLE_FL))
- if (!capable(CAP_LINUX_IMMUTABLE))
- return -EPERM;
-
- flags = flags & F2FS_FL_USER_MODIFIABLE;
- flags |= oldflags & ~F2FS_FL_USER_MODIFIABLE;
- fi->i_flags = flags;
+ fi->i_flags = iflags | (fi->i_flags & ~mask);
if (fi->i_flags & F2FS_PROJINHERIT_FL)
set_inode_flag(inode, FI_PROJ_INHERIT);
@@ -1668,30 +1693,141 @@
inode->i_ctime = current_time(inode);
f2fs_set_inode_flags(inode);
- f2fs_mark_inode_dirty_sync(inode, false);
+ f2fs_mark_inode_dirty_sync(inode, true);
return 0;
}
+/* FS_IOC_GETFLAGS and FS_IOC_SETFLAGS support */
+
+/*
+ * To make a new on-disk f2fs i_flag gettable via FS_IOC_GETFLAGS, add an entry
+ * for it to f2fs_fsflags_map[], and add its FS_*_FL equivalent to
+ * F2FS_GETTABLE_FS_FL. To also make it settable via FS_IOC_SETFLAGS, also add
+ * its FS_*_FL equivalent to F2FS_SETTABLE_FS_FL.
+ */
+
+static const struct {
+ u32 iflag;
+ u32 fsflag;
+} f2fs_fsflags_map[] = {
+ { F2FS_SYNC_FL, FS_SYNC_FL },
+ { F2FS_IMMUTABLE_FL, FS_IMMUTABLE_FL },
+ { F2FS_APPEND_FL, FS_APPEND_FL },
+ { F2FS_NODUMP_FL, FS_NODUMP_FL },
+ { F2FS_NOATIME_FL, FS_NOATIME_FL },
+ { F2FS_INDEX_FL, FS_INDEX_FL },
+ { F2FS_DIRSYNC_FL, FS_DIRSYNC_FL },
+ { F2FS_PROJINHERIT_FL, FS_PROJINHERIT_FL },
+ { F2FS_CASEFOLD_FL, FS_CASEFOLD_FL },
+};
+
+#define F2FS_GETTABLE_FS_FL ( \
+ FS_SYNC_FL | \
+ FS_IMMUTABLE_FL | \
+ FS_APPEND_FL | \
+ FS_NODUMP_FL | \
+ FS_NOATIME_FL | \
+ FS_INDEX_FL | \
+ FS_DIRSYNC_FL | \
+ FS_PROJINHERIT_FL | \
+ FS_ENCRYPT_FL | \
+ FS_INLINE_DATA_FL | \
+ FS_NOCOW_FL | \
+ FS_VERITY_FL | \
+ FS_CASEFOLD_FL)
+
+#define F2FS_SETTABLE_FS_FL ( \
+ FS_SYNC_FL | \
+ FS_IMMUTABLE_FL | \
+ FS_APPEND_FL | \
+ FS_NODUMP_FL | \
+ FS_NOATIME_FL | \
+ FS_DIRSYNC_FL | \
+ FS_PROJINHERIT_FL | \
+ FS_CASEFOLD_FL)
+
+/* Convert f2fs on-disk i_flags to FS_IOC_{GET,SET}FLAGS flags */
+static inline u32 f2fs_iflags_to_fsflags(u32 iflags)
+{
+ u32 fsflags = 0;
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(f2fs_fsflags_map); i++)
+ if (iflags & f2fs_fsflags_map[i].iflag)
+ fsflags |= f2fs_fsflags_map[i].fsflag;
+
+ return fsflags;
+}
+
+/* Convert FS_IOC_{GET,SET}FLAGS flags to f2fs on-disk i_flags */
+static inline u32 f2fs_fsflags_to_iflags(u32 fsflags)
+{
+ u32 iflags = 0;
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(f2fs_fsflags_map); i++)
+ if (fsflags & f2fs_fsflags_map[i].fsflag)
+ iflags |= f2fs_fsflags_map[i].iflag;
+
+ return iflags;
+}
+
+static int f2fs_ioc_getflags(struct file *filp, unsigned long arg)
+{
+ struct inode *inode = file_inode(filp);
+ struct f2fs_inode_info *fi = F2FS_I(inode);
+ u32 fsflags = f2fs_iflags_to_fsflags(fi->i_flags);
+
+ if (IS_ENCRYPTED(inode))
+ fsflags |= FS_ENCRYPT_FL;
+ if (IS_VERITY(inode))
+ fsflags |= FS_VERITY_FL;
+ if (f2fs_has_inline_data(inode) || f2fs_has_inline_dentry(inode))
+ fsflags |= FS_INLINE_DATA_FL;
+ if (is_inode_flag_set(inode, FI_PIN_FILE))
+ fsflags |= FS_NOCOW_FL;
+
+ fsflags &= F2FS_GETTABLE_FS_FL;
+
+ return put_user(fsflags, (int __user *)arg);
+}
+
static int f2fs_ioc_setflags(struct file *filp, unsigned long arg)
{
struct inode *inode = file_inode(filp);
- unsigned int flags;
+ struct f2fs_inode_info *fi = F2FS_I(inode);
+ u32 fsflags, old_fsflags;
+ u32 iflags;
int ret;
if (!inode_owner_or_capable(inode))
return -EACCES;
- if (get_user(flags, (int __user *)arg))
+ if (get_user(fsflags, (int __user *)arg))
return -EFAULT;
+ if (fsflags & ~F2FS_GETTABLE_FS_FL)
+ return -EOPNOTSUPP;
+ fsflags &= F2FS_SETTABLE_FS_FL;
+
+ iflags = f2fs_fsflags_to_iflags(fsflags);
+ if (f2fs_mask_flags(inode->i_mode, iflags) != iflags)
+ return -EOPNOTSUPP;
+
ret = mnt_want_write_file(filp);
if (ret)
return ret;
inode_lock(inode);
- ret = __f2fs_ioc_setflags(inode, flags);
+ old_fsflags = f2fs_iflags_to_fsflags(fi->i_flags);
+ ret = vfs_ioc_setflags_prepare(inode, old_fsflags, fsflags);
+ if (ret)
+ goto out;
+ ret = f2fs_setflags_common(inode, iflags,
+ f2fs_fsflags_to_iflags(F2FS_SETTABLE_FS_FL));
+out:
inode_unlock(inode);
mnt_drop_write_file(filp);
return ret;
@@ -1707,6 +1843,8 @@
static int f2fs_ioc_start_atomic_write(struct file *filp)
{
struct inode *inode = file_inode(filp);
+ struct f2fs_inode_info *fi = F2FS_I(inode);
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
int ret;
if (!inode_owner_or_capable(inode))
@@ -1715,6 +1853,9 @@
if (!S_ISREG(inode->i_mode))
return -EINVAL;
+ if (filp->f_flags & O_DIRECT)
+ return -EINVAL;
+
ret = mnt_want_write_file(filp);
if (ret)
return ret;
@@ -1733,18 +1874,25 @@
down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
- if (!get_dirty_pages(inode))
- goto skip_flush;
-
- f2fs_msg(F2FS_I_SB(inode)->sb, KERN_WARNING,
- "Unexpected flush for atomic writes: ino=%lu, npages=%u",
- inode->i_ino, get_dirty_pages(inode));
+ /*
+ * Should wait end_io to count F2FS_WB_CP_DATA correctly by
+ * f2fs_is_atomic_file.
+ */
+ if (get_dirty_pages(inode))
+ f2fs_warn(F2FS_I_SB(inode), "Unexpected flush for atomic writes: ino=%lu, npages=%u",
+ inode->i_ino, get_dirty_pages(inode));
ret = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX);
if (ret) {
up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
goto out;
}
-skip_flush:
+
+ spin_lock(&sbi->inode_lock[ATOMIC_FILE]);
+ if (list_empty(&fi->inmem_ilist))
+ list_add_tail(&fi->inmem_ilist, &sbi->inode_list[ATOMIC_FILE]);
+ spin_unlock(&sbi->inode_lock[ATOMIC_FILE]);
+
+ /* add inode in inmem_list first and set atomic_file */
set_inode_flag(inode, FI_ATOMIC_FILE);
clear_inode_flag(inode, FI_ATOMIC_REVOKE_REQUEST);
up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
@@ -1786,11 +1934,8 @@
goto err_out;
ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 0, true);
- if (!ret) {
- clear_inode_flag(inode, FI_ATOMIC_FILE);
- F2FS_I(inode)->i_gc_failures[GC_FAILURE_ATOMIC] = 0;
- stat_dec_atomic_write(inode);
- }
+ if (!ret)
+ f2fs_drop_inmem_pages(inode);
} else {
ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 1, false);
}
@@ -1949,6 +2094,13 @@
f2fs_stop_checkpoint(sbi, false);
set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
break;
+ case F2FS_GOING_DOWN_NEED_FSCK:
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+ set_sbi_flag(sbi, SBI_CP_DISABLED_QUICK);
+ set_sbi_flag(sbi, SBI_IS_DIRTY);
+ /* do checkpoint only */
+ ret = f2fs_sync_fs(sb, 1);
+ goto out;
default:
ret = -EINVAL;
goto out;
@@ -1964,6 +2116,9 @@
out:
if (in != F2FS_GOING_DOWN_FULLSYNC)
mnt_drop_write_file(filp);
+
+ trace_f2fs_shutdown(sbi, in, ret);
+
return ret;
}
@@ -1978,7 +2133,7 @@
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
- if (!blk_queue_discard(q))
+ if (!f2fs_hw_support_discard(F2FS_SB(sb)))
return -EOPNOTSUPP;
if (copy_from_user(&range, (struct fstrim_range __user *)arg,
@@ -2017,7 +2172,7 @@
{
struct inode *inode = file_inode(filp);
- if (!f2fs_sb_has_encrypt(inode->i_sb))
+ if (!f2fs_sb_has_encrypt(F2FS_I_SB(inode)))
return -EOPNOTSUPP;
f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
@@ -2027,7 +2182,7 @@
static int f2fs_ioc_get_encryption_policy(struct file *filp, unsigned long arg)
{
- if (!f2fs_sb_has_encrypt(file_inode(filp)->i_sb))
+ if (!f2fs_sb_has_encrypt(F2FS_I_SB(file_inode(filp))))
return -EOPNOTSUPP;
return fscrypt_ioctl_get_policy(filp, (void __user *)arg);
}
@@ -2038,7 +2193,7 @@
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
int err;
- if (!f2fs_sb_has_encrypt(inode->i_sb))
+ if (!f2fs_sb_has_encrypt(sbi))
return -EOPNOTSUPP;
err = mnt_want_write_file(filp);
@@ -2069,6 +2224,49 @@
return err;
}
+static int f2fs_ioc_get_encryption_policy_ex(struct file *filp,
+ unsigned long arg)
+{
+ if (!f2fs_sb_has_encrypt(F2FS_I_SB(file_inode(filp))))
+ return -EOPNOTSUPP;
+
+ return fscrypt_ioctl_get_policy_ex(filp, (void __user *)arg);
+}
+
+static int f2fs_ioc_add_encryption_key(struct file *filp, unsigned long arg)
+{
+ if (!f2fs_sb_has_encrypt(F2FS_I_SB(file_inode(filp))))
+ return -EOPNOTSUPP;
+
+ return fscrypt_ioctl_add_key(filp, (void __user *)arg);
+}
+
+static int f2fs_ioc_remove_encryption_key(struct file *filp, unsigned long arg)
+{
+ if (!f2fs_sb_has_encrypt(F2FS_I_SB(file_inode(filp))))
+ return -EOPNOTSUPP;
+
+ return fscrypt_ioctl_remove_key(filp, (void __user *)arg);
+}
+
+static int f2fs_ioc_remove_encryption_key_all_users(struct file *filp,
+ unsigned long arg)
+{
+ if (!f2fs_sb_has_encrypt(F2FS_I_SB(file_inode(filp))))
+ return -EOPNOTSUPP;
+
+ return fscrypt_ioctl_remove_key_all_users(filp, (void __user *)arg);
+}
+
+static int f2fs_ioc_get_encryption_key_status(struct file *filp,
+ unsigned long arg)
+{
+ if (!f2fs_sb_has_encrypt(F2FS_I_SB(file_inode(filp))))
+ return -EOPNOTSUPP;
+
+ return fscrypt_ioctl_get_key_status(filp, (void __user *)arg);
+}
+
static int f2fs_ioc_gc(struct file *filp, unsigned long arg)
{
struct inode *inode = file_inode(filp);
@@ -2123,9 +2321,9 @@
return -EROFS;
end = range.start + range.len;
- if (range.start < MAIN_BLKADDR(sbi) || end >= MAX_BLKADDR(sbi)) {
+ if (end < range.start || range.start < MAIN_BLKADDR(sbi) ||
+ end >= MAX_BLKADDR(sbi))
return -EINVAL;
- }
ret = mnt_want_write_file(filp);
if (ret)
@@ -2142,7 +2340,7 @@
}
ret = f2fs_gc(sbi, range.sync, true, GET_SEGNO(sbi, range.start));
- range.start += sbi->blocks_per_seg;
+ range.start += BLKS_PER_SEC(sbi);
if (range.start <= end)
goto do_more;
out:
@@ -2162,6 +2360,11 @@
if (f2fs_readonly(sbi->sb))
return -EROFS;
+ if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) {
+ f2fs_info(sbi, "Skipping Checkpoint. Checkpoints currently disabled.");
+ return -EINVAL;
+ }
+
ret = mnt_want_write_file(filp);
if (ret)
return ret;
@@ -2178,7 +2381,8 @@
{
struct inode *inode = file_inode(filp);
struct f2fs_map_blocks map = { .m_next_extent = NULL,
- .m_seg_type = NO_CHECK_TYPE };
+ .m_seg_type = NO_CHECK_TYPE ,
+ .m_may_create = false };
struct extent_info ei = {0, 0, 0};
pgoff_t pg_start, pg_end, next_pgofs;
unsigned int blk_per_seg = sbi->blocks_per_seg;
@@ -2243,10 +2447,12 @@
map.m_lblk += map.m_len;
}
- if (!fragmented)
+ if (!fragmented) {
+ total = 0;
goto out;
+ }
- sec_num = (total + BLKS_PER_SEC(sbi) - 1) / BLKS_PER_SEC(sbi);
+ sec_num = DIV_ROUND_UP(total, BLKS_PER_SEC(sbi));
/*
* make sure there are enough free section for LFS allocation, this can
@@ -2274,7 +2480,7 @@
if (!(map.m_flags & F2FS_MAP_FLAGS)) {
map.m_lblk = next_pgofs;
- continue;
+ goto check;
}
set_inode_flag(inode, FI_DO_DEFRAG);
@@ -2298,8 +2504,8 @@
}
map.m_lblk = idx;
-
- if (idx < pg_end && cnt < blk_per_seg)
+check:
+ if (map.m_lblk < pg_end && cnt < blk_per_seg)
goto do_map;
clear_inode_flag(inode, FI_DO_DEFRAG);
@@ -2383,7 +2589,7 @@
if (!S_ISREG(src->i_mode) || !S_ISREG(dst->i_mode))
return -EINVAL;
- if (f2fs_encrypted_inode(src) || f2fs_encrypted_inode(dst))
+ if (IS_ENCRYPTED(src) || IS_ENCRYPTED(dst))
return -EOPNOTSUPP;
if (src == dst) {
@@ -2533,16 +2739,17 @@
if (f2fs_readonly(sbi->sb))
return -EROFS;
+ if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
+ return -EINVAL;
+
if (copy_from_user(&range, (struct f2fs_flush_device __user *)arg,
sizeof(range)))
return -EFAULT;
- if (sbi->s_ndevs <= 1 || sbi->s_ndevs - 1 <= range.dev_num ||
- sbi->segs_per_sec != 1) {
- f2fs_msg(sbi->sb, KERN_WARNING,
- "Can't flush %u in %d for segs_per_sec %u != 1\n",
- range.dev_num, sbi->s_ndevs,
- sbi->segs_per_sec);
+ if (!f2fs_is_multi_device(sbi) || sbi->s_ndevs - 1 <= range.dev_num ||
+ __is_large_section(sbi)) {
+ f2fs_warn(sbi, "Can't flush %u in %d for segs_per_sec %u != 1",
+ range.dev_num, sbi->s_ndevs, sbi->segs_per_sec);
return -EINVAL;
}
@@ -2591,18 +2798,33 @@
}
#ifdef CONFIG_QUOTA
+int f2fs_transfer_project_quota(struct inode *inode, kprojid_t kprojid)
+{
+ struct dquot *transfer_to[MAXQUOTAS] = {};
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+ struct super_block *sb = sbi->sb;
+ int err = 0;
+
+ transfer_to[PRJQUOTA] = dqget(sb, make_kqid_projid(kprojid));
+ if (!IS_ERR(transfer_to[PRJQUOTA])) {
+ err = __dquot_transfer(inode, transfer_to);
+ if (err)
+ set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR);
+ dqput(transfer_to[PRJQUOTA]);
+ }
+ return err;
+}
+
static int f2fs_ioc_setproject(struct file *filp, __u32 projid)
{
struct inode *inode = file_inode(filp);
struct f2fs_inode_info *fi = F2FS_I(inode);
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
- struct super_block *sb = sbi->sb;
- struct dquot *transfer_to[MAXQUOTAS] = {};
struct page *ipage;
kprojid_t kprojid;
int err;
- if (!f2fs_sb_has_project_quota(sb)) {
+ if (!f2fs_sb_has_project_quota(sbi)) {
if (projid != F2FS_DEF_PROJID)
return -EOPNOTSUPP;
else
@@ -2617,53 +2839,45 @@
if (projid_eq(kprojid, F2FS_I(inode)->i_projid))
return 0;
- err = mnt_want_write_file(filp);
- if (err)
- return err;
-
err = -EPERM;
- inode_lock(inode);
-
/* Is it quota file? Do not allow user to mess with it */
if (IS_NOQUOTA(inode))
- goto out_unlock;
+ return err;
ipage = f2fs_get_node_page(sbi, inode->i_ino);
- if (IS_ERR(ipage)) {
- err = PTR_ERR(ipage);
- goto out_unlock;
- }
+ if (IS_ERR(ipage))
+ return PTR_ERR(ipage);
if (!F2FS_FITS_IN_INODE(F2FS_INODE(ipage), fi->i_extra_isize,
i_projid)) {
err = -EOVERFLOW;
f2fs_put_page(ipage, 1);
- goto out_unlock;
+ return err;
}
f2fs_put_page(ipage, 1);
err = dquot_initialize(inode);
if (err)
- goto out_unlock;
+ return err;
- transfer_to[PRJQUOTA] = dqget(sb, make_kqid_projid(kprojid));
- if (!IS_ERR(transfer_to[PRJQUOTA])) {
- err = __dquot_transfer(inode, transfer_to);
- dqput(transfer_to[PRJQUOTA]);
- if (err)
- goto out_dirty;
- }
+ f2fs_lock_op(sbi);
+ err = f2fs_transfer_project_quota(inode, kprojid);
+ if (err)
+ goto out_unlock;
F2FS_I(inode)->i_projid = kprojid;
inode->i_ctime = current_time(inode);
-out_dirty:
f2fs_mark_inode_dirty_sync(inode, true);
out_unlock:
- inode_unlock(inode);
- mnt_drop_write_file(filp);
+ f2fs_unlock_op(sbi);
return err;
}
#else
+int f2fs_transfer_project_quota(struct inode *inode, kprojid_t kprojid)
+{
+ return 0;
+}
+
static int f2fs_ioc_setproject(struct file *filp, __u32 projid)
{
if (projid != F2FS_DEF_PROJID)
@@ -2672,64 +2886,76 @@
}
#endif
-/* Transfer internal flags to xflags */
-static inline __u32 f2fs_iflags_to_xflags(unsigned long iflags)
-{
- __u32 xflags = 0;
+/* FS_IOC_FSGETXATTR and FS_IOC_FSSETXATTR support */
- if (iflags & F2FS_SYNC_FL)
- xflags |= FS_XFLAG_SYNC;
- if (iflags & F2FS_IMMUTABLE_FL)
- xflags |= FS_XFLAG_IMMUTABLE;
- if (iflags & F2FS_APPEND_FL)
- xflags |= FS_XFLAG_APPEND;
- if (iflags & F2FS_NODUMP_FL)
- xflags |= FS_XFLAG_NODUMP;
- if (iflags & F2FS_NOATIME_FL)
- xflags |= FS_XFLAG_NOATIME;
- if (iflags & F2FS_PROJINHERIT_FL)
- xflags |= FS_XFLAG_PROJINHERIT;
+/*
+ * To make a new on-disk f2fs i_flag gettable via FS_IOC_FSGETXATTR and settable
+ * via FS_IOC_FSSETXATTR, add an entry for it to f2fs_xflags_map[], and add its
+ * FS_XFLAG_* equivalent to F2FS_SUPPORTED_XFLAGS.
+ */
+
+static const struct {
+ u32 iflag;
+ u32 xflag;
+} f2fs_xflags_map[] = {
+ { F2FS_SYNC_FL, FS_XFLAG_SYNC },
+ { F2FS_IMMUTABLE_FL, FS_XFLAG_IMMUTABLE },
+ { F2FS_APPEND_FL, FS_XFLAG_APPEND },
+ { F2FS_NODUMP_FL, FS_XFLAG_NODUMP },
+ { F2FS_NOATIME_FL, FS_XFLAG_NOATIME },
+ { F2FS_PROJINHERIT_FL, FS_XFLAG_PROJINHERIT },
+};
+
+#define F2FS_SUPPORTED_XFLAGS ( \
+ FS_XFLAG_SYNC | \
+ FS_XFLAG_IMMUTABLE | \
+ FS_XFLAG_APPEND | \
+ FS_XFLAG_NODUMP | \
+ FS_XFLAG_NOATIME | \
+ FS_XFLAG_PROJINHERIT)
+
+/* Convert f2fs on-disk i_flags to FS_IOC_FS{GET,SET}XATTR flags */
+static inline u32 f2fs_iflags_to_xflags(u32 iflags)
+{
+ u32 xflags = 0;
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(f2fs_xflags_map); i++)
+ if (iflags & f2fs_xflags_map[i].iflag)
+ xflags |= f2fs_xflags_map[i].xflag;
+
return xflags;
}
-#define F2FS_SUPPORTED_FS_XFLAGS (FS_XFLAG_SYNC | FS_XFLAG_IMMUTABLE | \
- FS_XFLAG_APPEND | FS_XFLAG_NODUMP | \
- FS_XFLAG_NOATIME | FS_XFLAG_PROJINHERIT)
-
-/* Transfer xflags flags to internal */
-static inline unsigned long f2fs_xflags_to_iflags(__u32 xflags)
+/* Convert FS_IOC_FS{GET,SET}XATTR flags to f2fs on-disk i_flags */
+static inline u32 f2fs_xflags_to_iflags(u32 xflags)
{
- unsigned long iflags = 0;
+ u32 iflags = 0;
+ int i;
- if (xflags & FS_XFLAG_SYNC)
- iflags |= F2FS_SYNC_FL;
- if (xflags & FS_XFLAG_IMMUTABLE)
- iflags |= F2FS_IMMUTABLE_FL;
- if (xflags & FS_XFLAG_APPEND)
- iflags |= F2FS_APPEND_FL;
- if (xflags & FS_XFLAG_NODUMP)
- iflags |= F2FS_NODUMP_FL;
- if (xflags & FS_XFLAG_NOATIME)
- iflags |= F2FS_NOATIME_FL;
- if (xflags & FS_XFLAG_PROJINHERIT)
- iflags |= F2FS_PROJINHERIT_FL;
+ for (i = 0; i < ARRAY_SIZE(f2fs_xflags_map); i++)
+ if (xflags & f2fs_xflags_map[i].xflag)
+ iflags |= f2fs_xflags_map[i].iflag;
return iflags;
}
+static void f2fs_fill_fsxattr(struct inode *inode, struct fsxattr *fa)
+{
+ struct f2fs_inode_info *fi = F2FS_I(inode);
+
+ simple_fill_fsxattr(fa, f2fs_iflags_to_xflags(fi->i_flags));
+
+ if (f2fs_sb_has_project_quota(F2FS_I_SB(inode)))
+ fa->fsx_projid = from_kprojid(&init_user_ns, fi->i_projid);
+}
+
static int f2fs_ioc_fsgetxattr(struct file *filp, unsigned long arg)
{
struct inode *inode = file_inode(filp);
- struct f2fs_inode_info *fi = F2FS_I(inode);
struct fsxattr fa;
- memset(&fa, 0, sizeof(struct fsxattr));
- fa.fsx_xflags = f2fs_iflags_to_xflags(fi->i_flags &
- F2FS_FL_USER_VISIBLE);
-
- if (f2fs_sb_has_project_quota(inode->i_sb))
- fa.fsx_projid = (__u32)from_kprojid(&init_user_ns,
- fi->i_projid);
+ f2fs_fill_fsxattr(inode, &fa);
if (copy_to_user((struct fsxattr __user *)arg, &fa, sizeof(fa)))
return -EFAULT;
@@ -2739,9 +2965,8 @@
static int f2fs_ioc_fssetxattr(struct file *filp, unsigned long arg)
{
struct inode *inode = file_inode(filp);
- struct f2fs_inode_info *fi = F2FS_I(inode);
- struct fsxattr fa;
- unsigned int flags;
+ struct fsxattr fa, old_fa;
+ u32 iflags;
int err;
if (copy_from_user(&fa, (struct fsxattr __user *)arg, sizeof(fa)))
@@ -2751,11 +2976,11 @@
if (!inode_owner_or_capable(inode))
return -EACCES;
- if (fa.fsx_xflags & ~F2FS_SUPPORTED_FS_XFLAGS)
+ if (fa.fsx_xflags & ~F2FS_SUPPORTED_XFLAGS)
return -EOPNOTSUPP;
- flags = f2fs_xflags_to_iflags(fa.fsx_xflags);
- if (f2fs_mask_flags(inode->i_mode, flags) != flags)
+ iflags = f2fs_xflags_to_iflags(fa.fsx_xflags);
+ if (f2fs_mask_flags(inode->i_mode, iflags) != iflags)
return -EOPNOTSUPP;
err = mnt_want_write_file(filp);
@@ -2763,19 +2988,22 @@
return err;
inode_lock(inode);
- flags = (fi->i_flags & ~F2FS_FL_XFLAG_VISIBLE) |
- (flags & F2FS_FL_XFLAG_VISIBLE);
- err = __f2fs_ioc_setflags(inode, flags);
- inode_unlock(inode);
- mnt_drop_write_file(filp);
+
+ f2fs_fill_fsxattr(inode, &old_fa);
+ err = vfs_ioc_fssetxattr_check(inode, &old_fa, &fa);
if (err)
- return err;
+ goto out;
+
+ err = f2fs_setflags_common(inode, iflags,
+ f2fs_xflags_to_iflags(F2FS_SUPPORTED_XFLAGS));
+ if (err)
+ goto out;
err = f2fs_ioc_setproject(filp, fa.fsx_projid);
- if (err)
- return err;
-
- return 0;
+out:
+ inode_unlock(inode);
+ mnt_drop_write_file(filp);
+ return err;
}
int f2fs_pin_file_control(struct inode *inode, bool inc)
@@ -2789,10 +3017,9 @@
fi->i_gc_failures[GC_FAILURE_PIN] + 1);
if (fi->i_gc_failures[GC_FAILURE_PIN] > sbi->gc_pin_file_threshold) {
- f2fs_msg(sbi->sb, KERN_WARNING,
- "%s: Enable GC = ino %lx after %x GC trials\n",
- __func__, inode->i_ino,
- fi->i_gc_failures[GC_FAILURE_PIN]);
+ f2fs_warn(sbi, "%s: Enable GC = ino %lx after %x GC trials",
+ __func__, inode->i_ino,
+ fi->i_gc_failures[GC_FAILURE_PIN]);
clear_inode_flag(inode, FI_PIN_FILE);
return -EAGAIN;
}
@@ -2805,9 +3032,6 @@
__u32 pin;
int ret = 0;
- if (!inode_owner_or_capable(inode))
- return -EACCES;
-
if (get_user(pin, (__u32 __user *)arg))
return -EFAULT;
@@ -2877,6 +3101,7 @@
map.m_next_pgofs = NULL;
map.m_next_extent = &m_next_extent;
map.m_seg_type = NO_CHECK_TYPE;
+ map.m_may_create = false;
end = F2FS_I_SB(inode)->max_file_blocks;
while (map.m_lblk < end) {
@@ -2899,10 +3124,119 @@
return f2fs_precache_extents(file_inode(filp));
}
+static int f2fs_ioc_resize_fs(struct file *filp, unsigned long arg)
+{
+ struct f2fs_sb_info *sbi = F2FS_I_SB(file_inode(filp));
+ __u64 block_count;
+ int ret;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ if (f2fs_readonly(sbi->sb))
+ return -EROFS;
+
+ if (copy_from_user(&block_count, (void __user *)arg,
+ sizeof(block_count)))
+ return -EFAULT;
+
+ ret = f2fs_resize_fs(sbi, block_count);
+
+ return ret;
+}
+
+static int f2fs_ioc_enable_verity(struct file *filp, unsigned long arg)
+{
+ struct inode *inode = file_inode(filp);
+
+ f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
+
+ if (!f2fs_sb_has_verity(F2FS_I_SB(inode))) {
+ f2fs_warn(F2FS_I_SB(inode),
+ "Can't enable fs-verity on inode %lu: the verity feature is not enabled on this filesystem.\n",
+ inode->i_ino);
+ return -EOPNOTSUPP;
+ }
+
+ return fsverity_ioctl_enable(filp, (const void __user *)arg);
+}
+
+static int f2fs_ioc_measure_verity(struct file *filp, unsigned long arg)
+{
+ if (!f2fs_sb_has_verity(F2FS_I_SB(file_inode(filp))))
+ return -EOPNOTSUPP;
+
+ return fsverity_ioctl_measure(filp, (void __user *)arg);
+}
+
+static int f2fs_get_volume_name(struct file *filp, unsigned long arg)
+{
+ struct inode *inode = file_inode(filp);
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+ char *vbuf;
+ int count;
+ int err = 0;
+
+ vbuf = f2fs_kzalloc(sbi, MAX_VOLUME_NAME, GFP_KERNEL);
+ if (!vbuf)
+ return -ENOMEM;
+
+ down_read(&sbi->sb_lock);
+ count = utf16s_to_utf8s(sbi->raw_super->volume_name,
+ ARRAY_SIZE(sbi->raw_super->volume_name),
+ UTF16_LITTLE_ENDIAN, vbuf, MAX_VOLUME_NAME);
+ up_read(&sbi->sb_lock);
+
+ if (copy_to_user((char __user *)arg, vbuf,
+ min(FSLABEL_MAX, count)))
+ err = -EFAULT;
+
+ kvfree(vbuf);
+ return err;
+}
+
+static int f2fs_set_volume_name(struct file *filp, unsigned long arg)
+{
+ struct inode *inode = file_inode(filp);
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+ char *vbuf;
+ int err = 0;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ vbuf = strndup_user((const char __user *)arg, FSLABEL_MAX);
+ if (IS_ERR(vbuf))
+ return PTR_ERR(vbuf);
+
+ err = mnt_want_write_file(filp);
+ if (err)
+ goto out;
+
+ down_write(&sbi->sb_lock);
+
+ memset(sbi->raw_super->volume_name, 0,
+ sizeof(sbi->raw_super->volume_name));
+ utf8s_to_utf16s(vbuf, strlen(vbuf), UTF16_LITTLE_ENDIAN,
+ sbi->raw_super->volume_name,
+ ARRAY_SIZE(sbi->raw_super->volume_name));
+
+ err = f2fs_commit_super(sbi, false);
+
+ up_write(&sbi->sb_lock);
+
+ mnt_drop_write_file(filp);
+out:
+ kfree(vbuf);
+ return err;
+}
+
long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
{
if (unlikely(f2fs_cp_error(F2FS_I_SB(file_inode(filp)))))
return -EIO;
+ if (!f2fs_is_checkpoint_ready(F2FS_I_SB(file_inode(filp))))
+ return -ENOSPC;
switch (cmd) {
case F2FS_IOC_GETFLAGS:
@@ -2931,6 +3265,16 @@
return f2fs_ioc_get_encryption_policy(filp, arg);
case F2FS_IOC_GET_ENCRYPTION_PWSALT:
return f2fs_ioc_get_encryption_pwsalt(filp, arg);
+ case FS_IOC_GET_ENCRYPTION_POLICY_EX:
+ return f2fs_ioc_get_encryption_policy_ex(filp, arg);
+ case FS_IOC_ADD_ENCRYPTION_KEY:
+ return f2fs_ioc_add_encryption_key(filp, arg);
+ case FS_IOC_REMOVE_ENCRYPTION_KEY:
+ return f2fs_ioc_remove_encryption_key(filp, arg);
+ case FS_IOC_REMOVE_ENCRYPTION_KEY_ALL_USERS:
+ return f2fs_ioc_remove_encryption_key_all_users(filp, arg);
+ case FS_IOC_GET_ENCRYPTION_KEY_STATUS:
+ return f2fs_ioc_get_encryption_key_status(filp, arg);
case F2FS_IOC_GARBAGE_COLLECT:
return f2fs_ioc_gc(filp, arg);
case F2FS_IOC_GARBAGE_COLLECT_RANGE:
@@ -2955,6 +3299,16 @@
return f2fs_ioc_set_pin_file(filp, arg);
case F2FS_IOC_PRECACHE_EXTENTS:
return f2fs_ioc_precache_extents(filp, arg);
+ case F2FS_IOC_RESIZE_FS:
+ return f2fs_ioc_resize_fs(filp, arg);
+ case FS_IOC_ENABLE_VERITY:
+ return f2fs_ioc_enable_verity(filp, arg);
+ case FS_IOC_MEASURE_VERITY:
+ return f2fs_ioc_measure_verity(filp, arg);
+ case F2FS_IOC_GET_VOLUME_NAME:
+ return f2fs_get_volume_name(filp, arg);
+ case F2FS_IOC_SET_VOLUME_NAME:
+ return f2fs_set_volume_name(filp, arg);
default:
return -ENOTTY;
}
@@ -2966,15 +3320,17 @@
struct inode *inode = file_inode(file);
ssize_t ret;
- if (unlikely(f2fs_cp_error(F2FS_I_SB(inode))))
- return -EIO;
+ if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) {
+ ret = -EIO;
+ goto out;
+ }
- if ((iocb->ki_flags & IOCB_NOWAIT) && !(iocb->ki_flags & IOCB_DIRECT))
- return -EINVAL;
-
- if (!inode_trylock(inode)) {
- if (iocb->ki_flags & IOCB_NOWAIT)
- return -EAGAIN;
+ if (iocb->ki_flags & IOCB_NOWAIT) {
+ if (!inode_trylock(inode)) {
+ ret = -EAGAIN;
+ goto out;
+ }
+ } else {
inode_lock(inode);
}
@@ -2987,18 +3343,16 @@
if (iov_iter_fault_in_readable(from, iov_iter_count(from)))
set_inode_flag(inode, FI_NO_PREALLOC);
- if ((iocb->ki_flags & IOCB_NOWAIT) &&
- (iocb->ki_flags & IOCB_DIRECT)) {
- if (!f2fs_overwrite_io(inode, iocb->ki_pos,
+ if ((iocb->ki_flags & IOCB_NOWAIT)) {
+ if (!f2fs_overwrite_io(inode, iocb->ki_pos,
iov_iter_count(from)) ||
- f2fs_has_inline_data(inode) ||
- f2fs_force_buffered_io(inode, WRITE)) {
- clear_inode_flag(inode,
- FI_NO_PREALLOC);
- inode_unlock(inode);
- return -EAGAIN;
- }
-
+ f2fs_has_inline_data(inode) ||
+ f2fs_force_buffered_io(inode, iocb, from)) {
+ clear_inode_flag(inode, FI_NO_PREALLOC);
+ inode_unlock(inode);
+ ret = -EAGAIN;
+ goto out;
+ }
} else {
preallocated = true;
target_size = iocb->ki_pos + iov_iter_count(from);
@@ -3007,7 +3361,8 @@
if (err) {
clear_inode_flag(inode, FI_NO_PREALLOC);
inode_unlock(inode);
- return err;
+ ret = err;
+ goto out;
}
}
ret = __generic_file_write_iter(iocb, from);
@@ -3021,7 +3376,9 @@
f2fs_update_iostat(F2FS_I_SB(inode), APP_WRITE_IO, ret);
}
inode_unlock(inode);
-
+out:
+ trace_f2fs_file_write_iter(inode, iocb->ki_pos,
+ iov_iter_count(from), ret);
if (ret > 0)
ret = generic_write_sync(iocb, ret);
return ret;
@@ -3049,6 +3406,11 @@
case F2FS_IOC_SET_ENCRYPTION_POLICY:
case F2FS_IOC_GET_ENCRYPTION_PWSALT:
case F2FS_IOC_GET_ENCRYPTION_POLICY:
+ case FS_IOC_GET_ENCRYPTION_POLICY_EX:
+ case FS_IOC_ADD_ENCRYPTION_KEY:
+ case FS_IOC_REMOVE_ENCRYPTION_KEY:
+ case FS_IOC_REMOVE_ENCRYPTION_KEY_ALL_USERS:
+ case FS_IOC_GET_ENCRYPTION_KEY_STATUS:
case F2FS_IOC_GARBAGE_COLLECT:
case F2FS_IOC_GARBAGE_COLLECT_RANGE:
case F2FS_IOC_WRITE_CHECKPOINT:
@@ -3061,6 +3423,11 @@
case F2FS_IOC_GET_PIN_FILE:
case F2FS_IOC_SET_PIN_FILE:
case F2FS_IOC_PRECACHE_EXTENTS:
+ case F2FS_IOC_RESIZE_FS:
+ case FS_IOC_ENABLE_VERITY:
+ case FS_IOC_MEASURE_VERITY:
+ case F2FS_IOC_GET_VOLUME_NAME:
+ case F2FS_IOC_SET_VOLUME_NAME:
break;
default:
return -ENOIOCTLCMD;
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index 5c8d004..5877bd7 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* fs/f2fs/gc.c
*
* Copyright (c) 2012 Samsung Electronics Co., Ltd.
* http://www.samsung.com/
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
*/
#include <linux/fs.h>
#include <linux/module.h>
@@ -43,13 +40,16 @@
if (gc_th->gc_wake)
gc_th->gc_wake = 0;
- if (try_to_freeze())
+ if (try_to_freeze()) {
+ stat_other_skip_bggc_count(sbi);
continue;
+ }
if (kthread_should_stop())
break;
if (sbi->sb->s_writers.frozen >= SB_FREEZE_WRITE) {
increase_sleep_time(gc_th, &wait_ms);
+ stat_other_skip_bggc_count(sbi);
continue;
}
@@ -58,8 +58,10 @@
f2fs_stop_checkpoint(sbi, false);
}
- if (!sb_start_write_trylock(sbi->sb))
+ if (!sb_start_write_trylock(sbi->sb)) {
+ stat_other_skip_bggc_count(sbi);
continue;
+ }
/*
* [GC triggering condition]
@@ -80,12 +82,15 @@
goto do_gc;
}
- if (!mutex_trylock(&sbi->gc_mutex))
+ if (!mutex_trylock(&sbi->gc_mutex)) {
+ stat_other_skip_bggc_count(sbi);
goto next;
+ }
- if (!is_idle(sbi)) {
+ if (!is_idle(sbi, GC_TIME)) {
increase_sleep_time(gc_th, &wait_ms);
mutex_unlock(&sbi->gc_mutex);
+ stat_io_skip_bggc_count(sbi);
goto next;
}
@@ -137,7 +142,7 @@
"f2fs_gc-%u:%u", MAJOR(dev), MINOR(dev));
if (IS_ERR(gc_th->f2fs_gc_task)) {
err = PTR_ERR(gc_th->f2fs_gc_task);
- kfree(gc_th);
+ kvfree(gc_th);
sbi->gc_thread = NULL;
}
out:
@@ -150,7 +155,7 @@
if (!gc_th)
return;
kthread_stop(gc_th->f2fs_gc_task);
- kfree(gc_th);
+ kvfree(gc_th);
sbi->gc_thread = NULL;
}
@@ -306,10 +311,11 @@
struct sit_info *sm = SIT_I(sbi);
struct victim_sel_policy p;
unsigned int secno, last_victim;
- unsigned int last_segment = MAIN_SEGS(sbi);
+ unsigned int last_segment;
unsigned int nsearched = 0;
mutex_lock(&dirty_i->seglist_lock);
+ last_segment = MAIN_SECS(sbi) * sbi->segs_per_sec;
p.alloc_mode = alloc_mode;
select_policy(sbi, gc_type, type, &p);
@@ -318,8 +324,7 @@
p.min_cost = get_max_cost(sbi, &p);
if (*result != NULL_SEGNO) {
- if (IS_DATASEG(get_seg_entry(sbi, *result)->type) &&
- get_valid_blocks(sbi, *result, false) &&
+ if (get_valid_blocks(sbi, *result, false) &&
!sec_usage_check(sbi, GET_SEC_FROM_SEG(sbi, *result)))
p.min_segno = *result;
goto out;
@@ -328,6 +333,22 @@
if (p.max_search == 0)
goto out;
+ if (__is_large_section(sbi) && p.alloc_mode == LFS) {
+ if (sbi->next_victim_seg[BG_GC] != NULL_SEGNO) {
+ p.min_segno = sbi->next_victim_seg[BG_GC];
+ *result = p.min_segno;
+ sbi->next_victim_seg[BG_GC] = NULL_SEGNO;
+ goto got_result;
+ }
+ if (gc_type == FG_GC &&
+ sbi->next_victim_seg[FG_GC] != NULL_SEGNO) {
+ p.min_segno = sbi->next_victim_seg[FG_GC];
+ *result = p.min_segno;
+ sbi->next_victim_seg[FG_GC] = NULL_SEGNO;
+ goto got_result;
+ }
+ }
+
last_victim = sm->last_victim[p.gc_mode];
if (p.alloc_mode == LFS && gc_type == FG_GC) {
p.min_segno = check_bg_victims(sbi);
@@ -361,10 +382,25 @@
nsearched++;
}
+#ifdef CONFIG_F2FS_CHECK_FS
+ /*
+ * skip selecting the invalid segno (that is failed due to block
+ * validity check failure during GC) to avoid endless GC loop in
+ * such cases.
+ */
+ if (test_bit(segno, sm->invalid_segmap))
+ goto next;
+#endif
+
secno = GET_SEC_FROM_SEG(sbi, segno);
if (sec_usage_check(sbi, secno))
goto next;
+ /* Don't touch checkpointed data */
+ if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED) &&
+ get_ckpt_valid_blocks(sbi, segno) &&
+ p.alloc_mode != SSR))
+ goto next;
if (gc_type == BG_GC && test_bit(secno, dirty_i->victim_secmap))
goto next;
@@ -380,12 +416,15 @@
sm->last_victim[p.gc_mode] = last_victim + 1;
else
sm->last_victim[p.gc_mode] = segno + 1;
- sm->last_victim[p.gc_mode] %= MAIN_SEGS(sbi);
+ sm->last_victim[p.gc_mode] %=
+ (MAIN_SECS(sbi) * sbi->segs_per_sec);
break;
}
}
if (p.min_segno != NULL_SEGNO) {
got_it:
+ *result = (p.min_segno / p.ofs_unit) * p.ofs_unit;
+got_result:
if (p.alloc_mode == LFS) {
secno = GET_SEC_FROM_SEG(sbi, p.min_segno);
if (gc_type == FG_GC)
@@ -393,13 +432,13 @@
else
set_bit(secno, dirty_i->victim_secmap);
}
- *result = (p.min_segno / p.ofs_unit) * p.ofs_unit;
+ }
+out:
+ if (p.min_segno != NULL_SEGNO)
trace_f2fs_get_victim(sbi->sb, type, gc_type, &p,
sbi->cur_victim_sec,
prefree_segments(sbi), free_segments(sbi));
- }
-out:
mutex_unlock(&dirty_i->seglist_lock);
return (p.min_segno == NULL_SEGNO) ? 0 : 1;
@@ -464,7 +503,7 @@
* On validity, copy that node with cold status, otherwise (invalid node)
* ignore that.
*/
-static void gc_node_segment(struct f2fs_sb_info *sbi,
+static int gc_node_segment(struct f2fs_sb_info *sbi,
struct f2fs_summary *sum, unsigned int segno, int gc_type)
{
struct f2fs_summary *entry;
@@ -472,6 +511,7 @@
int off;
int phase = 0;
bool fggc = (gc_type == FG_GC);
+ int submitted = 0;
start_addr = START_BLOCK(sbi, segno);
@@ -485,10 +525,11 @@
nid_t nid = le32_to_cpu(entry->nid);
struct page *node_page;
struct node_info ni;
+ int err;
/* stop BG_GC if there is not enough free sections. */
if (gc_type == BG_GC && has_not_enough_free_secs(sbi, 0, 0))
- return;
+ return submitted;
if (check_valid_map(sbi, segno, off) == 0)
continue;
@@ -525,7 +566,9 @@
continue;
}
- f2fs_move_node_page(node_page, gc_type);
+ err = f2fs_move_node_page(node_page, gc_type);
+ if (!err && gc_type == FG_GC)
+ submitted++;
stat_inc_node_blk_count(sbi, 1, gc_type);
}
@@ -534,6 +577,7 @@
if (fggc)
atomic_dec(&sbi->wb_sync_req[NODE]);
+ return submitted;
}
/*
@@ -560,7 +604,7 @@
int dec = (node_ofs - indirect_blks - 3) / (NIDS_PER_BLOCK + 1);
bidx = node_ofs - 5 - dec;
}
- return bidx * ADDRS_PER_BLOCK + ADDRS_PER_INODE(inode);
+ return bidx * ADDRS_PER_BLOCK(inode) + ADDRS_PER_INODE(inode);
}
static bool is_alive(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
@@ -584,9 +628,8 @@
}
if (sum->version != dni->version) {
- f2fs_msg(sbi->sb, KERN_WARNING,
- "%s: valid data with mismatched node version.",
- __func__);
+ f2fs_warn(sbi, "%s: valid data with mismatched node version.",
+ __func__);
set_sbi_flag(sbi, SBI_NEED_FSCK);
}
@@ -594,8 +637,21 @@
source_blkaddr = datablock_addr(NULL, node_page, ofs_in_node);
f2fs_put_page(node_page, 1);
- if (source_blkaddr != blkaddr)
+ if (source_blkaddr != blkaddr) {
+#ifdef CONFIG_F2FS_CHECK_FS
+ unsigned int segno = GET_SEGNO(sbi, blkaddr);
+ unsigned long offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr);
+
+ if (unlikely(check_valid_map(sbi, segno, offset))) {
+ if (!test_and_set_bit(segno, SIT_I(sbi)->invalid_segmap)) {
+ f2fs_err(sbi, "mismatched blkaddr %u (source_blkaddr %u) in seg %u\n",
+ blkaddr, source_blkaddr, segno);
+ f2fs_bug_on(sbi, 1);
+ }
+ }
+#endif
return false;
+ }
return true;
}
@@ -625,6 +681,11 @@
if (f2fs_lookup_extent_cache(inode, index, &ei)) {
dn.data_blkaddr = ei.blk + index - ei.fofs;
+ if (unlikely(!f2fs_is_valid_blkaddr(sbi, dn.data_blkaddr,
+ DATA_GENERIC_ENHANCE_READ))) {
+ err = -EFSCORRUPTED;
+ goto put_page;
+ }
goto got_it;
}
@@ -634,9 +695,13 @@
goto put_page;
f2fs_put_dnode(&dn);
+ if (!__is_valid_data_blkaddr(dn.data_blkaddr)) {
+ err = -ENOENT;
+ goto put_page;
+ }
if (unlikely(!f2fs_is_valid_blkaddr(sbi, dn.data_blkaddr,
- DATA_GENERIC))) {
- err = -EFAULT;
+ DATA_GENERIC_ENHANCE))) {
+ err = -EFSCORRUPTED;
goto put_page;
}
got_it:
@@ -644,6 +709,14 @@
fio.page = page;
fio.new_blkaddr = fio.old_blkaddr = dn.data_blkaddr;
+ /*
+ * don't cache encrypted data into meta inode until previous dirty
+ * data were writebacked to avoid racing between GC and flush.
+ */
+ f2fs_wait_on_page_writeback(page, DATA, true, true);
+
+ f2fs_wait_on_block_writeback(inode, dn.data_blkaddr);
+
fio.encrypted_page = f2fs_pagecache_get_page(META_MAPPING(sbi),
dn.data_blkaddr,
FGP_LOCK | FGP_CREAT, GFP_NOFS);
@@ -669,7 +742,7 @@
* Move data block via META_MAPPING while keeping locked data page.
* This can be used to move blocks, aka LBAs, directly on disk.
*/
-static void move_data_block(struct inode *inode, block_t bidx,
+static int move_data_block(struct inode *inode, block_t bidx,
int gc_type, unsigned int segno, int off)
{
struct f2fs_io_info fio = {
@@ -688,25 +761,29 @@
struct node_info ni;
struct page *page, *mpage;
block_t newaddr;
- int err;
+ int err = 0;
bool lfs_mode = test_opt(fio.sbi, LFS);
/* do not read out */
page = f2fs_grab_cache_page(inode->i_mapping, bidx, false);
if (!page)
- return;
+ return -ENOMEM;
- if (!check_valid_map(F2FS_I_SB(inode), segno, off))
+ if (!check_valid_map(F2FS_I_SB(inode), segno, off)) {
+ err = -ENOENT;
goto out;
+ }
if (f2fs_is_atomic_file(inode)) {
F2FS_I(inode)->i_gc_failures[GC_FAILURE_ATOMIC]++;
F2FS_I_SB(inode)->skipped_atomic_files[gc_type]++;
+ err = -EAGAIN;
goto out;
}
if (f2fs_is_pinned_file(inode)) {
f2fs_pin_file_control(inode, true);
+ err = -EAGAIN;
goto out;
}
@@ -717,6 +794,7 @@
if (unlikely(dn.data_blkaddr == NULL_ADDR)) {
ClearPageUptodate(page);
+ err = -ENOENT;
goto put_out;
}
@@ -724,7 +802,9 @@
* don't cache encrypted data into meta inode until previous dirty
* data were writebacked to avoid racing between GC and flush.
*/
- f2fs_wait_on_page_writeback(page, DATA, true);
+ f2fs_wait_on_page_writeback(page, DATA, true, true);
+
+ f2fs_wait_on_block_writeback(inode, dn.data_blkaddr);
err = f2fs_get_node_info(fio.sbi, dn.nid, &ni);
if (err)
@@ -739,6 +819,29 @@
if (lfs_mode)
down_write(&fio.sbi->io_order_lock);
+ mpage = f2fs_grab_cache_page(META_MAPPING(fio.sbi),
+ fio.old_blkaddr, false);
+ if (!mpage)
+ goto up_out;
+
+ fio.encrypted_page = mpage;
+
+ /* read source block in mpage */
+ if (!PageUptodate(mpage)) {
+ err = f2fs_submit_page_bio(&fio);
+ if (err) {
+ f2fs_put_page(mpage, 1);
+ goto up_out;
+ }
+ lock_page(mpage);
+ if (unlikely(mpage->mapping != META_MAPPING(fio.sbi) ||
+ !PageUptodate(mpage))) {
+ err = -EIO;
+ f2fs_put_page(mpage, 1);
+ goto up_out;
+ }
+ }
+
f2fs_allocate_data_block(fio.sbi, NULL, fio.old_blkaddr, &newaddr,
&sum, CURSEG_COLD_DATA, NULL, false);
@@ -746,45 +849,19 @@
newaddr, FGP_LOCK | FGP_CREAT, GFP_NOFS);
if (!fio.encrypted_page) {
err = -ENOMEM;
+ f2fs_put_page(mpage, 1);
goto recover_block;
}
- mpage = f2fs_pagecache_get_page(META_MAPPING(fio.sbi),
- fio.old_blkaddr, FGP_LOCK, GFP_NOFS);
- if (mpage) {
- bool updated = false;
+ /* write target block */
+ f2fs_wait_on_page_writeback(fio.encrypted_page, DATA, true, true);
+ memcpy(page_address(fio.encrypted_page),
+ page_address(mpage), PAGE_SIZE);
+ f2fs_put_page(mpage, 1);
+ invalidate_mapping_pages(META_MAPPING(fio.sbi),
+ fio.old_blkaddr, fio.old_blkaddr);
- if (PageUptodate(mpage)) {
- memcpy(page_address(fio.encrypted_page),
- page_address(mpage), PAGE_SIZE);
- updated = true;
- }
- f2fs_put_page(mpage, 1);
- invalidate_mapping_pages(META_MAPPING(fio.sbi),
- fio.old_blkaddr, fio.old_blkaddr);
- if (updated)
- goto write_page;
- }
-
- err = f2fs_submit_page_bio(&fio);
- if (err)
- goto put_page_out;
-
- /* write page */
- lock_page(fio.encrypted_page);
-
- if (unlikely(fio.encrypted_page->mapping != META_MAPPING(fio.sbi))) {
- err = -EIO;
- goto put_page_out;
- }
- if (unlikely(!PageUptodate(fio.encrypted_page))) {
- err = -EIO;
- goto put_page_out;
- }
-
-write_page:
set_page_dirty(fio.encrypted_page);
- f2fs_wait_on_page_writeback(fio.encrypted_page, DATA, true);
if (clear_page_dirty_for_io(fio.encrypted_page))
dec_page_count(fio.sbi, F2FS_DIRTY_META);
@@ -792,13 +869,14 @@
ClearPageError(page);
/* allocate block address */
- f2fs_wait_on_page_writeback(dn.node_page, NODE, true);
+ f2fs_wait_on_page_writeback(dn.node_page, NODE, true, true);
fio.op = REQ_OP_WRITE;
fio.op_flags = REQ_SYNC;
fio.new_blkaddr = newaddr;
f2fs_submit_page_write(&fio);
if (fio.retry) {
+ err = -EAGAIN;
if (PageWriteback(fio.encrypted_page))
end_page_writeback(fio.encrypted_page);
goto put_page_out;
@@ -813,43 +891,52 @@
put_page_out:
f2fs_put_page(fio.encrypted_page, 1);
recover_block:
- if (lfs_mode)
- up_write(&fio.sbi->io_order_lock);
if (err)
f2fs_do_replace_block(fio.sbi, &sum, newaddr, fio.old_blkaddr,
true, true);
+up_out:
+ if (lfs_mode)
+ up_write(&fio.sbi->io_order_lock);
put_out:
f2fs_put_dnode(&dn);
out:
f2fs_put_page(page, 1);
+ return err;
}
-static void move_data_page(struct inode *inode, block_t bidx, int gc_type,
+static int move_data_page(struct inode *inode, block_t bidx, int gc_type,
unsigned int segno, int off)
{
struct page *page;
+ int err = 0;
page = f2fs_get_lock_data_page(inode, bidx, true);
if (IS_ERR(page))
- return;
+ return PTR_ERR(page);
- if (!check_valid_map(F2FS_I_SB(inode), segno, off))
+ if (!check_valid_map(F2FS_I_SB(inode), segno, off)) {
+ err = -ENOENT;
goto out;
+ }
if (f2fs_is_atomic_file(inode)) {
F2FS_I(inode)->i_gc_failures[GC_FAILURE_ATOMIC]++;
F2FS_I_SB(inode)->skipped_atomic_files[gc_type]++;
+ err = -EAGAIN;
goto out;
}
if (f2fs_is_pinned_file(inode)) {
if (gc_type == FG_GC)
f2fs_pin_file_control(inode, true);
+ err = -EAGAIN;
goto out;
}
if (gc_type == BG_GC) {
- if (PageWriteback(page))
+ if (PageWriteback(page)) {
+ err = -EAGAIN;
goto out;
+ }
set_page_dirty(page);
set_cold_data(page);
} else {
@@ -867,11 +954,11 @@
.io_type = FS_GC_DATA_IO,
};
bool is_dirty = PageDirty(page);
- int err;
retry:
+ f2fs_wait_on_page_writeback(page, DATA, true, true);
+
set_page_dirty(page);
- f2fs_wait_on_page_writeback(page, DATA, true);
if (clear_page_dirty_for_io(page)) {
inode_dec_dirty_pages(inode);
f2fs_remove_dirty_inode(inode);
@@ -892,6 +979,7 @@
}
out:
f2fs_put_page(page, 1);
+ return err;
}
/*
@@ -901,7 +989,7 @@
* If the parent node is not valid or the data block address is different,
* the victim data block is ignored.
*/
-static void gc_data_segment(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
+static int gc_data_segment(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
struct gc_inode_list *gc_list, unsigned int segno, int gc_type)
{
struct super_block *sb = sbi->sb;
@@ -909,6 +997,7 @@
block_t start_addr;
int off;
int phase = 0;
+ int submitted = 0;
start_addr = START_BLOCK(sbi, segno);
@@ -925,7 +1014,7 @@
/* stop BG_GC if there is not enough free sections. */
if (gc_type == BG_GC && has_not_enough_free_secs(sbi, 0, 0))
- return;
+ return submitted;
if (check_valid_map(sbi, segno, off) == 0)
continue;
@@ -997,6 +1086,7 @@
if (inode) {
struct f2fs_inode_info *fi = F2FS_I(inode);
bool locked = false;
+ int err;
if (S_ISREG(inode->i_mode)) {
if (!down_write_trylock(&fi->i_gc_rwsem[READ]))
@@ -1016,12 +1106,16 @@
start_bidx = f2fs_start_bidx_of_node(nofs, inode)
+ ofs_in_node;
if (f2fs_post_read_required(inode))
- move_data_block(inode, start_bidx, gc_type,
- segno, off);
+ err = move_data_block(inode, start_bidx,
+ gc_type, segno, off);
else
- move_data_page(inode, start_bidx, gc_type,
+ err = move_data_page(inode, start_bidx, gc_type,
segno, off);
+ if (!err && (gc_type == FG_GC ||
+ f2fs_post_read_required(inode)))
+ submitted++;
+
if (locked) {
up_write(&fi->i_gc_rwsem[WRITE]);
up_write(&fi->i_gc_rwsem[READ]);
@@ -1033,6 +1127,8 @@
if (++phase < 5)
goto next_step;
+
+ return submitted;
}
static int __get_victim(struct f2fs_sb_info *sbi, unsigned int *victim,
@@ -1057,18 +1153,34 @@
struct blk_plug plug;
unsigned int segno = start_segno;
unsigned int end_segno = start_segno + sbi->segs_per_sec;
- int seg_freed = 0;
+ int seg_freed = 0, migrated = 0;
unsigned char type = IS_DATASEG(get_seg_entry(sbi, segno)->type) ?
SUM_TYPE_DATA : SUM_TYPE_NODE;
+ int submitted = 0;
+
+ if (__is_large_section(sbi))
+ end_segno = rounddown(end_segno, sbi->segs_per_sec);
/* readahead multi ssa blocks those have contiguous address */
- if (sbi->segs_per_sec > 1)
+ if (__is_large_section(sbi))
f2fs_ra_meta_pages(sbi, GET_SUM_BLOCK(sbi, segno),
- sbi->segs_per_sec, META_SSA, true);
+ end_segno - segno, META_SSA, true);
/* reference all summary page */
while (segno < end_segno) {
sum_page = f2fs_get_sum_page(sbi, segno++);
+ if (IS_ERR(sum_page)) {
+ int err = PTR_ERR(sum_page);
+
+ end_segno = segno - 1;
+ for (segno = start_segno; segno < end_segno; segno++) {
+ sum_page = find_get_page(META_MAPPING(sbi),
+ GET_SUM_BLOCK(sbi, segno));
+ f2fs_put_page(sum_page, 0);
+ f2fs_put_page(sum_page, 0);
+ }
+ return err;
+ }
unlock_page(sum_page);
}
@@ -1081,18 +1193,21 @@
GET_SUM_BLOCK(sbi, segno));
f2fs_put_page(sum_page, 0);
- if (get_valid_blocks(sbi, segno, false) == 0 ||
- !PageUptodate(sum_page) ||
- unlikely(f2fs_cp_error(sbi)))
- goto next;
+ if (get_valid_blocks(sbi, segno, false) == 0)
+ goto freed;
+ if (__is_large_section(sbi) &&
+ migrated >= sbi->migration_granularity)
+ goto skip;
+ if (!PageUptodate(sum_page) || unlikely(f2fs_cp_error(sbi)))
+ goto skip;
sum = page_address(sum_page);
if (type != GET_SUM_TYPE((&sum->footer))) {
- f2fs_msg(sbi->sb, KERN_ERR, "Inconsistent segment (%u) "
- "type [%d, %d] in SSA and SIT",
- segno, type, GET_SUM_TYPE((&sum->footer)));
+ f2fs_err(sbi, "Inconsistent segment (%u) type [%d, %d] in SSA and SIT",
+ segno, type, GET_SUM_TYPE((&sum->footer)));
set_sbi_flag(sbi, SBI_NEED_FSCK);
- goto next;
+ f2fs_stop_checkpoint(sbi, false);
+ goto skip;
}
/*
@@ -1103,21 +1218,27 @@
* - lock_page(sum_page)
*/
if (type == SUM_TYPE_NODE)
- gc_node_segment(sbi, sum->entries, segno, gc_type);
- else
- gc_data_segment(sbi, sum->entries, gc_list, segno,
+ submitted += gc_node_segment(sbi, sum->entries, segno,
gc_type);
+ else
+ submitted += gc_data_segment(sbi, sum->entries, gc_list,
+ segno, gc_type);
stat_inc_seg_count(sbi, type, gc_type);
+freed:
if (gc_type == FG_GC &&
get_valid_blocks(sbi, segno, false) == 0)
seg_freed++;
-next:
+ migrated++;
+
+ if (__is_large_section(sbi) && segno + 1 < end_segno)
+ sbi->next_victim_seg[gc_type] = segno + 1;
+skip:
f2fs_put_page(sum_page, 0);
}
- if (gc_type == FG_GC)
+ if (submitted)
f2fs_submit_merged_write(sbi,
(type == SUM_TYPE_NODE) ? NODE : DATA);
@@ -1172,7 +1293,8 @@
* threshold, we can make them free by checkpoint. Then, we
* secure free segments which doesn't need fggc any more.
*/
- if (prefree_segments(sbi)) {
+ if (prefree_segments(sbi) &&
+ !is_sbi_flag_set(sbi, SBI_CP_DISABLED)) {
ret = f2fs_write_checkpoint(sbi, &cpc);
if (ret)
goto stop;
@@ -1204,7 +1326,7 @@
round++;
}
- if (gc_type == FG_GC)
+ if (gc_type == FG_GC && seg_freed)
sbi->cur_victim_sec = NULL_SEGNO;
if (sync)
@@ -1224,7 +1346,7 @@
segno = NULL_SEGNO;
goto gc_more;
}
- if (gc_type == FG_GC)
+ if (gc_type == FG_GC && !is_sbi_flag_set(sbi, SBI_CP_DISABLED))
ret = f2fs_write_checkpoint(sbi, &cpc);
}
stop:
@@ -1244,7 +1366,7 @@
put_gc_inode(&gc_list);
- if (sync)
+ if (sync && !ret)
ret = sec_freed ? 0 : -EAGAIN;
return ret;
}
@@ -1256,7 +1378,180 @@
sbi->gc_pin_file_threshold = DEF_GC_FAILED_PINNED_FILES;
/* give warm/cold data area from slower device */
- if (sbi->s_ndevs && sbi->segs_per_sec == 1)
+ if (f2fs_is_multi_device(sbi) && !__is_large_section(sbi))
SIT_I(sbi)->last_victim[ALLOC_NEXT] =
GET_SEGNO(sbi, FDEV(0).end_blk) + 1;
}
+
+static int free_segment_range(struct f2fs_sb_info *sbi, unsigned int start,
+ unsigned int end)
+{
+ int type;
+ unsigned int segno, next_inuse;
+ int err = 0;
+
+ /* Move out cursegs from the target range */
+ for (type = CURSEG_HOT_DATA; type < NR_CURSEG_TYPE; type++)
+ allocate_segment_for_resize(sbi, type, start, end);
+
+ /* do GC to move out valid blocks in the range */
+ for (segno = start; segno <= end; segno += sbi->segs_per_sec) {
+ struct gc_inode_list gc_list = {
+ .ilist = LIST_HEAD_INIT(gc_list.ilist),
+ .iroot = RADIX_TREE_INIT(gc_list.iroot, GFP_NOFS),
+ };
+
+ mutex_lock(&sbi->gc_mutex);
+ do_garbage_collect(sbi, segno, &gc_list, FG_GC);
+ mutex_unlock(&sbi->gc_mutex);
+ put_gc_inode(&gc_list);
+
+ if (get_valid_blocks(sbi, segno, true))
+ return -EAGAIN;
+ }
+
+ err = f2fs_sync_fs(sbi->sb, 1);
+ if (err)
+ return err;
+
+ next_inuse = find_next_inuse(FREE_I(sbi), end + 1, start);
+ if (next_inuse <= end) {
+ f2fs_err(sbi, "segno %u should be free but still inuse!",
+ next_inuse);
+ f2fs_bug_on(sbi, 1);
+ }
+ return err;
+}
+
+static void update_sb_metadata(struct f2fs_sb_info *sbi, int secs)
+{
+ struct f2fs_super_block *raw_sb = F2FS_RAW_SUPER(sbi);
+ int section_count = le32_to_cpu(raw_sb->section_count);
+ int segment_count = le32_to_cpu(raw_sb->segment_count);
+ int segment_count_main = le32_to_cpu(raw_sb->segment_count_main);
+ long long block_count = le64_to_cpu(raw_sb->block_count);
+ int segs = secs * sbi->segs_per_sec;
+
+ raw_sb->section_count = cpu_to_le32(section_count + secs);
+ raw_sb->segment_count = cpu_to_le32(segment_count + segs);
+ raw_sb->segment_count_main = cpu_to_le32(segment_count_main + segs);
+ raw_sb->block_count = cpu_to_le64(block_count +
+ (long long)segs * sbi->blocks_per_seg);
+}
+
+static void update_fs_metadata(struct f2fs_sb_info *sbi, int secs)
+{
+ int segs = secs * sbi->segs_per_sec;
+ long long user_block_count =
+ le64_to_cpu(F2FS_CKPT(sbi)->user_block_count);
+
+ SM_I(sbi)->segment_count = (int)SM_I(sbi)->segment_count + segs;
+ MAIN_SEGS(sbi) = (int)MAIN_SEGS(sbi) + segs;
+ FREE_I(sbi)->free_sections = (int)FREE_I(sbi)->free_sections + secs;
+ FREE_I(sbi)->free_segments = (int)FREE_I(sbi)->free_segments + segs;
+ F2FS_CKPT(sbi)->user_block_count = cpu_to_le64(user_block_count +
+ (long long)segs * sbi->blocks_per_seg);
+}
+
+int f2fs_resize_fs(struct f2fs_sb_info *sbi, __u64 block_count)
+{
+ __u64 old_block_count, shrunk_blocks;
+ unsigned int secs;
+ int gc_mode, gc_type;
+ int err = 0;
+ __u32 rem;
+
+ old_block_count = le64_to_cpu(F2FS_RAW_SUPER(sbi)->block_count);
+ if (block_count > old_block_count)
+ return -EINVAL;
+
+ /* new fs size should align to section size */
+ div_u64_rem(block_count, BLKS_PER_SEC(sbi), &rem);
+ if (rem)
+ return -EINVAL;
+
+ if (block_count == old_block_count)
+ return 0;
+
+ if (is_sbi_flag_set(sbi, SBI_NEED_FSCK)) {
+ f2fs_err(sbi, "Should run fsck to repair first.");
+ return -EFSCORRUPTED;
+ }
+
+ if (test_opt(sbi, DISABLE_CHECKPOINT)) {
+ f2fs_err(sbi, "Checkpoint should be enabled.");
+ return -EINVAL;
+ }
+
+ freeze_bdev(sbi->sb->s_bdev);
+
+ shrunk_blocks = old_block_count - block_count;
+ secs = div_u64(shrunk_blocks, BLKS_PER_SEC(sbi));
+ spin_lock(&sbi->stat_lock);
+ if (shrunk_blocks + valid_user_blocks(sbi) +
+ sbi->current_reserved_blocks + sbi->unusable_block_count +
+ F2FS_OPTION(sbi).root_reserved_blocks > sbi->user_block_count)
+ err = -ENOSPC;
+ else
+ sbi->user_block_count -= shrunk_blocks;
+ spin_unlock(&sbi->stat_lock);
+ if (err) {
+ thaw_bdev(sbi->sb->s_bdev, sbi->sb);
+ return err;
+ }
+
+ mutex_lock(&sbi->resize_mutex);
+ set_sbi_flag(sbi, SBI_IS_RESIZEFS);
+
+ mutex_lock(&DIRTY_I(sbi)->seglist_lock);
+
+ MAIN_SECS(sbi) -= secs;
+
+ for (gc_mode = 0; gc_mode < MAX_GC_POLICY; gc_mode++)
+ if (SIT_I(sbi)->last_victim[gc_mode] >=
+ MAIN_SECS(sbi) * sbi->segs_per_sec)
+ SIT_I(sbi)->last_victim[gc_mode] = 0;
+
+ for (gc_type = BG_GC; gc_type <= FG_GC; gc_type++)
+ if (sbi->next_victim_seg[gc_type] >=
+ MAIN_SECS(sbi) * sbi->segs_per_sec)
+ sbi->next_victim_seg[gc_type] = NULL_SEGNO;
+
+ mutex_unlock(&DIRTY_I(sbi)->seglist_lock);
+
+ err = free_segment_range(sbi, MAIN_SECS(sbi) * sbi->segs_per_sec,
+ MAIN_SEGS(sbi) - 1);
+ if (err)
+ goto out;
+
+ update_sb_metadata(sbi, -secs);
+
+ err = f2fs_commit_super(sbi, false);
+ if (err) {
+ update_sb_metadata(sbi, secs);
+ goto out;
+ }
+
+ update_fs_metadata(sbi, -secs);
+ clear_sbi_flag(sbi, SBI_IS_RESIZEFS);
+ err = f2fs_sync_fs(sbi->sb, 1);
+ if (err) {
+ update_fs_metadata(sbi, secs);
+ update_sb_metadata(sbi, secs);
+ f2fs_commit_super(sbi, false);
+ }
+out:
+ if (err) {
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+ f2fs_err(sbi, "resize_fs failed, should run fsck to repair!");
+
+ MAIN_SECS(sbi) += secs;
+ spin_lock(&sbi->stat_lock);
+ sbi->user_block_count += shrunk_blocks;
+ spin_unlock(&sbi->stat_lock);
+ }
+ clear_sbi_flag(sbi, SBI_IS_RESIZEFS);
+ mutex_unlock(&sbi->resize_mutex);
+ thaw_bdev(sbi->sb->s_bdev, sbi->sb);
+ return err;
+}
diff --git a/fs/f2fs/gc.h b/fs/f2fs/gc.h
index c8619e4..bbac9d3 100644
--- a/fs/f2fs/gc.h
+++ b/fs/f2fs/gc.h
@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* fs/f2fs/gc.h
*
* Copyright (c) 2012 Samsung Electronics Co., Ltd.
* http://www.samsung.com/
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
*/
#define GC_THREAD_MIN_WB_PAGES 1 /*
* a threshold to determine
diff --git a/fs/f2fs/hash.c b/fs/f2fs/hash.c
index eb2e031..5bc4dcd 100644
--- a/fs/f2fs/hash.c
+++ b/fs/f2fs/hash.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* fs/f2fs/hash.c
*
@@ -7,16 +8,13 @@
* Portions of this code from linux/fs/ext3/hash.c
*
* Copyright (C) 2002 by Theodore Ts'o
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
*/
#include <linux/types.h>
#include <linux/fs.h>
#include <linux/f2fs_fs.h>
#include <linux/cryptohash.h>
#include <linux/pagemap.h>
+#include <linux/unicode.h>
#include "f2fs.h"
@@ -70,7 +68,7 @@
*buf++ = pad;
}
-f2fs_hash_t f2fs_dentry_hash(const struct qstr *name_info,
+static f2fs_hash_t __f2fs_dentry_hash(const struct qstr *name_info,
struct fscrypt_name *fname)
{
__u32 hash;
@@ -106,3 +104,37 @@
f2fs_hash = cpu_to_le32(hash & ~F2FS_HASH_COL_BIT);
return f2fs_hash;
}
+
+f2fs_hash_t f2fs_dentry_hash(const struct inode *dir,
+ const struct qstr *name_info, struct fscrypt_name *fname)
+{
+#ifdef CONFIG_UNICODE
+ struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
+ const struct unicode_map *um = sbi->s_encoding;
+ int r, dlen;
+ unsigned char *buff;
+ struct qstr folded;
+
+ if (!name_info->len || !IS_CASEFOLDED(dir))
+ goto opaque_seq;
+
+ buff = f2fs_kzalloc(sbi, sizeof(char) * PATH_MAX, GFP_KERNEL);
+ if (!buff)
+ return -ENOMEM;
+
+ dlen = utf8_casefold(um, name_info, buff, PATH_MAX);
+ if (dlen < 0) {
+ kvfree(buff);
+ goto opaque_seq;
+ }
+ folded.name = buff;
+ folded.len = dlen;
+ r = __f2fs_dentry_hash(&folded, fname);
+
+ kvfree(buff);
+ return r;
+
+opaque_seq:
+#endif
+ return __f2fs_dentry_hash(name_info, fname);
+}
diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c
index 115dc21..896db04 100644
--- a/fs/f2fs/inline.c
+++ b/fs/f2fs/inline.c
@@ -1,11 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* fs/f2fs/inline.c
* Copyright (c) 2013, Intel Corporation
* Authors: Huajun Li <huajun.li@intel.com>
* Haicheng Li <haicheng.li@intel.com>
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
*/
#include <linux/fs.h>
@@ -74,7 +72,7 @@
addr = inline_data_addr(inode, ipage);
- f2fs_wait_on_page_writeback(ipage, NODE, true);
+ f2fs_wait_on_page_writeback(ipage, NODE, true, true);
memset(addr + from, 0, MAX_INLINE_DATA(inode) - from);
set_page_dirty(ipage);
@@ -133,6 +131,7 @@
err = f2fs_get_node_info(fio.sbi, dn->nid, &ni);
if (err) {
+ f2fs_truncate_data_blocks_range(dn, 1);
f2fs_put_dnode(dn);
return err;
}
@@ -142,11 +141,9 @@
if (unlikely(dn->data_blkaddr != NEW_ADDR)) {
f2fs_put_dnode(dn);
set_sbi_flag(fio.sbi, SBI_NEED_FSCK);
- f2fs_msg(fio.sbi->sb, KERN_WARNING,
- "%s: corrupted inline inode ino=%lx, i_addr[0]:0x%x, "
- "run fsck to fix.",
- __func__, dn->inode->i_ino, dn->data_blkaddr);
- return -EINVAL;
+ f2fs_warn(fio.sbi, "%s: corrupted inline inode ino=%lx, i_addr[0]:0x%x, run fsck to fix.",
+ __func__, dn->inode->i_ino, dn->data_blkaddr);
+ return -EFSCORRUPTED;
}
f2fs_bug_on(F2FS_P_SB(page), PageWriteback(page));
@@ -163,7 +160,7 @@
fio.old_blkaddr = dn->data_blkaddr;
set_inode_flag(dn->inode, FI_HOT_DATA);
f2fs_outplace_write_data(dn, &fio);
- f2fs_wait_on_page_writeback(page, DATA, true);
+ f2fs_wait_on_page_writeback(page, DATA, true, true);
if (dirty) {
inode_dec_dirty_pages(dn->inode);
f2fs_remove_dirty_inode(dn->inode);
@@ -238,14 +235,14 @@
f2fs_bug_on(F2FS_I_SB(inode), page->index);
- f2fs_wait_on_page_writeback(dn.inode_page, NODE, true);
+ f2fs_wait_on_page_writeback(dn.inode_page, NODE, true, true);
src_addr = kmap_atomic(page);
dst_addr = inline_data_addr(inode, dn.inode_page);
memcpy(dst_addr, src_addr, MAX_INLINE_DATA(inode));
kunmap_atomic(src_addr);
set_page_dirty(dn.inode_page);
- f2fs_clear_radix_tree_dirty_tag(page);
+ f2fs_clear_page_cache_dirty_tag(page);
set_inode_flag(inode, FI_APPEND_WRITE);
set_inode_flag(inode, FI_DATA_EXIST);
@@ -279,7 +276,7 @@
ipage = f2fs_get_node_page(sbi, inode->i_ino);
f2fs_bug_on(sbi, IS_ERR(ipage));
- f2fs_wait_on_page_writeback(ipage, NODE, true);
+ f2fs_wait_on_page_writeback(ipage, NODE, true, true);
src_addr = inline_data_addr(inode, npage);
dst_addr = inline_data_addr(inode, ipage);
@@ -324,7 +321,7 @@
return NULL;
}
- namehash = f2fs_dentry_hash(&name, fname);
+ namehash = f2fs_dentry_hash(dir, &name, fname);
inline_dentry = inline_data_addr(dir, ipage);
@@ -385,15 +382,13 @@
if (unlikely(dn.data_blkaddr != NEW_ADDR)) {
f2fs_put_dnode(&dn);
set_sbi_flag(F2FS_P_SB(page), SBI_NEED_FSCK);
- f2fs_msg(F2FS_P_SB(page)->sb, KERN_WARNING,
- "%s: corrupted inline inode ino=%lx, i_addr[0]:0x%x, "
- "run fsck to fix.",
- __func__, dir->i_ino, dn.data_blkaddr);
- err = -EINVAL;
+ f2fs_warn(F2FS_P_SB(page), "%s: corrupted inline inode ino=%lx, i_addr[0]:0x%x, run fsck to fix.",
+ __func__, dir->i_ino, dn.data_blkaddr);
+ err = -EFSCORRUPTED;
goto out;
}
- f2fs_wait_on_page_writeback(page, DATA, true);
+ f2fs_wait_on_page_writeback(page, DATA, true, true);
dentry_blk = page_address(page);
@@ -422,6 +417,14 @@
stat_dec_inline_dir(dir);
clear_inode_flag(dir, FI_INLINE_DENTRY);
+ /*
+ * should retrieve reserved space which was used to keep
+ * inline_dentry's structure for backward compatibility.
+ */
+ if (!f2fs_sb_has_flexible_inline_xattr(F2FS_I_SB(dir)) &&
+ !f2fs_has_inline_xattr(dir))
+ F2FS_I(dir)->i_inline_xattr_size = 0;
+
f2fs_i_depth_write(dir, 1);
if (i_size_read(dir) < PAGE_SIZE)
f2fs_i_size_write(dir, PAGE_SIZE);
@@ -503,18 +506,27 @@
stat_dec_inline_dir(dir);
clear_inode_flag(dir, FI_INLINE_DENTRY);
- kfree(backup_dentry);
+
+ /*
+ * should retrieve reserved space which was used to keep
+ * inline_dentry's structure for backward compatibility.
+ */
+ if (!f2fs_sb_has_flexible_inline_xattr(F2FS_I_SB(dir)) &&
+ !f2fs_has_inline_xattr(dir))
+ F2FS_I(dir)->i_inline_xattr_size = 0;
+
+ kvfree(backup_dentry);
return 0;
recover:
lock_page(ipage);
- f2fs_wait_on_page_writeback(ipage, NODE, true);
+ f2fs_wait_on_page_writeback(ipage, NODE, true, true);
memcpy(inline_dentry, backup_dentry, MAX_INLINE_DATA(dir));
f2fs_i_depth_write(dir, 0);
f2fs_i_size_write(dir, MAX_INLINE_DATA(dir));
set_page_dirty(ipage);
f2fs_put_page(ipage, 1);
- kfree(backup_dentry);
+ kvfree(backup_dentry);
return err;
}
@@ -567,9 +579,9 @@
}
}
- f2fs_wait_on_page_writeback(ipage, NODE, true);
+ f2fs_wait_on_page_writeback(ipage, NODE, true, true);
- name_hash = f2fs_dentry_hash(new_name, NULL);
+ name_hash = f2fs_dentry_hash(dir, new_name, NULL);
f2fs_update_dentry(ino, mode, &d, new_name, name_hash, bit_pos);
set_page_dirty(ipage);
@@ -577,6 +589,11 @@
/* we don't need to mark_inode_dirty now */
if (inode) {
f2fs_i_pino_write(inode, dir->i_ino);
+
+ /* synchronize inode page's data from inode cache */
+ if (is_inode_flag_set(inode, FI_NEW_INODE))
+ f2fs_update_inode(inode, page);
+
f2fs_put_page(page, 1);
}
@@ -599,7 +616,7 @@
int i;
lock_page(page);
- f2fs_wait_on_page_writeback(page, NODE, true);
+ f2fs_wait_on_page_writeback(page, NODE, true, true);
inline_dentry = inline_data_addr(dir, page);
make_dentry_ptr_inline(dir, &d, inline_dentry);
@@ -661,6 +678,12 @@
if (IS_ERR(ipage))
return PTR_ERR(ipage);
+ /*
+ * f2fs_readdir was protected by inode.i_rwsem, it is safe to access
+ * ipage without page's lock held.
+ */
+ unlock_page(ipage);
+
inline_dentry = inline_data_addr(inode, ipage);
make_dentry_ptr_inline(inode, &d, inline_dentry);
@@ -669,7 +692,7 @@
if (!err)
ctx->pos = d.max;
- f2fs_put_page(ipage, 1);
+ f2fs_put_page(ipage, 0);
return err < 0 ? err : 0;
}
@@ -687,7 +710,13 @@
if (IS_ERR(ipage))
return PTR_ERR(ipage);
- if (!f2fs_has_inline_data(inode)) {
+ if ((S_ISREG(inode->i_mode) || S_ISLNK(inode->i_mode)) &&
+ !f2fs_has_inline_data(inode)) {
+ err = -EAGAIN;
+ goto out;
+ }
+
+ if (S_ISDIR(inode->i_mode) && !f2fs_has_inline_dentry(inode)) {
err = -EAGAIN;
goto out;
}
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index dd608b8..db4fec3 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* fs/f2fs/inode.c
*
* Copyright (c) 2012 Samsung Electronics Co., Ltd.
* http://www.samsung.com/
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
*/
#include <linux/fs.h>
#include <linux/f2fs_fs.h>
@@ -17,6 +14,7 @@
#include "f2fs.h"
#include "node.h"
#include "segment.h"
+#include "xattr.h"
#include <trace/events/f2fs.h>
@@ -46,11 +44,15 @@
new_fl |= S_NOATIME;
if (flags & F2FS_DIRSYNC_FL)
new_fl |= S_DIRSYNC;
- if (f2fs_encrypted_inode(inode))
+ if (file_is_encrypt(inode))
new_fl |= S_ENCRYPTED;
+ if (file_is_verity(inode))
+ new_fl |= S_VERITY;
+ if (flags & F2FS_CASEFOLD_FL)
+ new_fl |= S_CASEFOLD;
inode_set_flags(inode, new_fl,
S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC|
- S_ENCRYPTED);
+ S_ENCRYPTED|S_VERITY|S_CASEFOLD);
}
static void __get_inode_rdev(struct inode *inode, struct f2fs_inode *ri)
@@ -75,8 +77,8 @@
if (!__is_valid_data_blkaddr(addr))
return 1;
- if (!f2fs_is_valid_blkaddr(sbi, addr, DATA_GENERIC))
- return -EFAULT;
+ if (!f2fs_is_valid_blkaddr(sbi, addr, DATA_GENERIC_ENHANCE))
+ return -EFSCORRUPTED;
return 0;
}
@@ -106,7 +108,7 @@
while (start < end) {
if (*start++) {
- f2fs_wait_on_page_writeback(ipage, NODE, true);
+ f2fs_wait_on_page_writeback(ipage, NODE, true, true);
set_inode_flag(inode, FI_DATA_EXIST);
set_raw_inline(inode, F2FS_INODE(ipage));
@@ -121,7 +123,7 @@
{
struct f2fs_inode *ri = &F2FS_NODE(page)->i;
- if (!f2fs_sb_has_inode_chksum(sbi->sb))
+ if (!f2fs_sb_has_inode_chksum(sbi))
return false;
if (!IS_INODE(page) || !(ri->i_inline & F2FS_EXTRA_ATTR))
@@ -178,9 +180,8 @@
calculated = f2fs_inode_chksum(sbi, page);
if (provided != calculated)
- f2fs_msg(sbi->sb, KERN_WARNING,
- "checksum invalid, ino = %x, %x vs. %x",
- ino_of_node(page), provided, calculated);
+ f2fs_warn(sbi, "checksum invalid, nid = %lu, ino_of_node = %x, %x vs. %x",
+ page->index, ino_of_node(page), provided, calculated);
return provided == calculated;
}
@@ -204,50 +205,53 @@
iblocks = le64_to_cpu(F2FS_INODE(node_page)->i_blocks);
if (!iblocks) {
set_sbi_flag(sbi, SBI_NEED_FSCK);
- f2fs_msg(sbi->sb, KERN_WARNING,
- "%s: corrupted inode i_blocks i_ino=%lx iblocks=%llu, "
- "run fsck to fix.",
- __func__, inode->i_ino, iblocks);
+ f2fs_warn(sbi, "%s: corrupted inode i_blocks i_ino=%lx iblocks=%llu, run fsck to fix.",
+ __func__, inode->i_ino, iblocks);
return false;
}
if (ino_of_node(node_page) != nid_of_node(node_page)) {
set_sbi_flag(sbi, SBI_NEED_FSCK);
- f2fs_msg(sbi->sb, KERN_WARNING,
- "%s: corrupted inode footer i_ino=%lx, ino,nid: "
- "[%u, %u] run fsck to fix.",
- __func__, inode->i_ino,
- ino_of_node(node_page), nid_of_node(node_page));
+ f2fs_warn(sbi, "%s: corrupted inode footer i_ino=%lx, ino,nid: [%u, %u] run fsck to fix.",
+ __func__, inode->i_ino,
+ ino_of_node(node_page), nid_of_node(node_page));
return false;
}
- if (f2fs_sb_has_flexible_inline_xattr(sbi->sb)
+ if (f2fs_sb_has_flexible_inline_xattr(sbi)
&& !f2fs_has_extra_attr(inode)) {
set_sbi_flag(sbi, SBI_NEED_FSCK);
- f2fs_msg(sbi->sb, KERN_WARNING,
- "%s: corrupted inode ino=%lx, run fsck to fix.",
- __func__, inode->i_ino);
+ f2fs_warn(sbi, "%s: corrupted inode ino=%lx, run fsck to fix.",
+ __func__, inode->i_ino);
return false;
}
if (f2fs_has_extra_attr(inode) &&
- !f2fs_sb_has_extra_attr(sbi->sb)) {
+ !f2fs_sb_has_extra_attr(sbi)) {
set_sbi_flag(sbi, SBI_NEED_FSCK);
- f2fs_msg(sbi->sb, KERN_WARNING,
- "%s: inode (ino=%lx) is with extra_attr, "
- "but extra_attr feature is off",
- __func__, inode->i_ino);
+ f2fs_warn(sbi, "%s: inode (ino=%lx) is with extra_attr, but extra_attr feature is off",
+ __func__, inode->i_ino);
return false;
}
if (fi->i_extra_isize > F2FS_TOTAL_EXTRA_ATTR_SIZE ||
fi->i_extra_isize % sizeof(__le32)) {
set_sbi_flag(sbi, SBI_NEED_FSCK);
- f2fs_msg(sbi->sb, KERN_WARNING,
- "%s: inode (ino=%lx) has corrupted i_extra_isize: %d, "
- "max: %zu",
- __func__, inode->i_ino, fi->i_extra_isize,
- F2FS_TOTAL_EXTRA_ATTR_SIZE);
+ f2fs_warn(sbi, "%s: inode (ino=%lx) has corrupted i_extra_isize: %d, max: %zu",
+ __func__, inode->i_ino, fi->i_extra_isize,
+ F2FS_TOTAL_EXTRA_ATTR_SIZE);
+ return false;
+ }
+
+ if (f2fs_has_extra_attr(inode) &&
+ f2fs_sb_has_flexible_inline_xattr(sbi) &&
+ f2fs_has_inline_xattr(inode) &&
+ (!fi->i_inline_xattr_size ||
+ fi->i_inline_xattr_size > MAX_INLINE_XATTR_SIZE)) {
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+ f2fs_warn(sbi, "%s: inode (ino=%lx) has corrupted i_inline_xattr_size: %d, max: %zu",
+ __func__, inode->i_ino, fi->i_inline_xattr_size,
+ MAX_INLINE_XATTR_SIZE);
return false;
}
@@ -255,15 +259,14 @@
struct extent_info *ei = &F2FS_I(inode)->extent_tree->largest;
if (ei->len &&
- (!f2fs_is_valid_blkaddr(sbi, ei->blk, DATA_GENERIC) ||
+ (!f2fs_is_valid_blkaddr(sbi, ei->blk,
+ DATA_GENERIC_ENHANCE) ||
!f2fs_is_valid_blkaddr(sbi, ei->blk + ei->len - 1,
- DATA_GENERIC))) {
+ DATA_GENERIC_ENHANCE))) {
set_sbi_flag(sbi, SBI_NEED_FSCK);
- f2fs_msg(sbi->sb, KERN_WARNING,
- "%s: inode (ino=%lx) extent info [%u, %u, %u] "
- "is incorrect, run fsck to fix",
- __func__, inode->i_ino,
- ei->blk, ei->fofs, ei->len);
+ f2fs_warn(sbi, "%s: inode (ino=%lx) extent info [%u, %u, %u] is incorrect, run fsck to fix",
+ __func__, inode->i_ino,
+ ei->blk, ei->fofs, ei->len);
return false;
}
}
@@ -271,19 +274,15 @@
if (f2fs_has_inline_data(inode) &&
(!S_ISREG(inode->i_mode) && !S_ISLNK(inode->i_mode))) {
set_sbi_flag(sbi, SBI_NEED_FSCK);
- f2fs_msg(sbi->sb, KERN_WARNING,
- "%s: inode (ino=%lx, mode=%u) should not have "
- "inline_data, run fsck to fix",
- __func__, inode->i_ino, inode->i_mode);
+ f2fs_warn(sbi, "%s: inode (ino=%lx, mode=%u) should not have inline_data, run fsck to fix",
+ __func__, inode->i_ino, inode->i_mode);
return false;
}
if (f2fs_has_inline_dentry(inode) && !S_ISDIR(inode->i_mode)) {
set_sbi_flag(sbi, SBI_NEED_FSCK);
- f2fs_msg(sbi->sb, KERN_WARNING,
- "%s: inode (ino=%lx, mode=%u) should not have "
- "inline_dentry, run fsck to fix",
- __func__, inode->i_ino, inode->i_mode);
+ f2fs_warn(sbi, "%s: inode (ino=%lx, mode=%u) should not have inline_dentry, run fsck to fix",
+ __func__, inode->i_ino, inode->i_mode);
return false;
}
@@ -330,6 +329,8 @@
le16_to_cpu(ri->i_gc_failures);
fi->i_xattr_nid = le32_to_cpu(ri->i_xattr_nid);
fi->i_flags = le32_to_cpu(ri->i_flags);
+ if (S_ISREG(inode->i_mode))
+ fi->i_flags &= ~F2FS_PROJINHERIT_FL;
fi->flags = 0;
fi->i_advise = ri->i_advise;
fi->i_pino = le32_to_cpu(ri->i_pino);
@@ -343,7 +344,7 @@
fi->i_extra_isize = f2fs_has_extra_attr(inode) ?
le16_to_cpu(ri->i_extra_isize) : 0;
- if (f2fs_sb_has_flexible_inline_xattr(sbi->sb)) {
+ if (f2fs_sb_has_flexible_inline_xattr(sbi)) {
fi->i_inline_xattr_size = le16_to_cpu(ri->i_inline_xattr_size);
} else if (f2fs_has_inline_xattr(inode) ||
f2fs_has_inline_dentry(inode)) {
@@ -361,7 +362,7 @@
if (!sanity_check_inode(inode, node_page)) {
f2fs_put_page(node_page, 1);
- return -EINVAL;
+ return -EFSCORRUPTED;
}
/* check data exist */
@@ -393,14 +394,14 @@
if (fi->i_flags & F2FS_PROJINHERIT_FL)
set_inode_flag(inode, FI_PROJ_INHERIT);
- if (f2fs_has_extra_attr(inode) && f2fs_sb_has_project_quota(sbi->sb) &&
+ if (f2fs_has_extra_attr(inode) && f2fs_sb_has_project_quota(sbi) &&
F2FS_FITS_IN_INODE(ri, fi->i_extra_isize, i_projid))
i_projid = (projid_t)le32_to_cpu(ri->i_projid);
else
i_projid = F2FS_DEF_PROJID;
fi->i_projid = make_kprojid(&init_user_ns, i_projid);
- if (f2fs_has_extra_attr(inode) && f2fs_sb_has_inode_crtime(sbi->sb) &&
+ if (f2fs_has_extra_attr(inode) && f2fs_sb_has_inode_crtime(sbi) &&
F2FS_FITS_IN_INODE(ri, fi->i_extra_isize, i_crtime)) {
fi->i_crtime.tv_sec = le64_to_cpu(ri->i_crtime);
fi->i_crtime.tv_nsec = le32_to_cpu(ri->i_crtime_nsec);
@@ -456,7 +457,7 @@
inode->i_mapping->a_ops = &f2fs_dblock_aops;
inode_nohighmem(inode);
} else if (S_ISLNK(inode->i_mode)) {
- if (f2fs_encrypted_inode(inode))
+ if (file_is_encrypt(inode))
inode->i_op = &f2fs_encrypted_symlink_inode_operations;
else
inode->i_op = &f2fs_symlink_inode_operations;
@@ -476,6 +477,7 @@
return inode;
bad_inode:
+ f2fs_inode_synced(inode);
iget_failed(inode);
trace_f2fs_iget_exit(inode, ret);
return ERR_PTR(ret);
@@ -500,7 +502,7 @@
struct f2fs_inode *ri;
struct extent_tree *et = F2FS_I(inode)->extent_tree;
- f2fs_wait_on_page_writeback(node_page, NODE, true);
+ f2fs_wait_on_page_writeback(node_page, NODE, true, true);
set_page_dirty(node_page);
f2fs_inode_synced(inode);
@@ -545,11 +547,11 @@
if (f2fs_has_extra_attr(inode)) {
ri->i_extra_isize = cpu_to_le16(F2FS_I(inode)->i_extra_isize);
- if (f2fs_sb_has_flexible_inline_xattr(F2FS_I_SB(inode)->sb))
+ if (f2fs_sb_has_flexible_inline_xattr(F2FS_I_SB(inode)))
ri->i_inline_xattr_size =
cpu_to_le16(F2FS_I(inode)->i_inline_xattr_size);
- if (f2fs_sb_has_project_quota(F2FS_I_SB(inode)->sb) &&
+ if (f2fs_sb_has_project_quota(F2FS_I_SB(inode)) &&
F2FS_FITS_IN_INODE(ri, F2FS_I(inode)->i_extra_isize,
i_projid)) {
projid_t i_projid;
@@ -559,7 +561,7 @@
ri->i_projid = cpu_to_le32(i_projid);
}
- if (f2fs_sb_has_inode_crtime(F2FS_I_SB(inode)->sb) &&
+ if (f2fs_sb_has_inode_crtime(F2FS_I_SB(inode)) &&
F2FS_FITS_IN_INODE(ri, F2FS_I(inode)->i_extra_isize,
i_crtime)) {
ri->i_crtime =
@@ -616,6 +618,9 @@
if (!is_inode_flag_set(inode, FI_DIRTY_INODE))
return 0;
+ if (!f2fs_is_checkpoint_ready(sbi))
+ return -ENOSPC;
+
/*
* We need to balance fs here to prevent from producing dirty node pages
* during the urgent cleaning time when runing out of free sections.
@@ -654,7 +659,11 @@
if (inode->i_nlink || is_bad_inode(inode))
goto no_delete;
- dquot_initialize(inode);
+ err = dquot_initialize(inode);
+ if (err) {
+ err = 0;
+ set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR);
+ }
f2fs_remove_ino_entry(sbi, inode->i_ino, APPEND_INO);
f2fs_remove_ino_entry(sbi, inode->i_ino, UPDATE_INO);
@@ -686,9 +695,11 @@
goto retry;
}
- if (err)
+ if (err) {
f2fs_update_inode_page(inode);
- dquot_free_inode(inode);
+ if (dquot_initialize_needed(inode))
+ set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR);
+ }
sb_end_intwrite(inode->i_sb);
no_delete:
dquot_drop(inode);
@@ -697,7 +708,8 @@
stat_dec_inline_dir(inode);
stat_dec_inline_inode(inode);
- if (likely(!is_set_ckpt_flags(sbi, CP_ERROR_FLAG)))
+ if (likely(!f2fs_cp_error(sbi) &&
+ !is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
f2fs_bug_on(sbi, is_inode_flag_set(inode, FI_DIRTY_INODE));
else
f2fs_inode_synced(inode);
@@ -726,6 +738,7 @@
}
out_clear:
fscrypt_put_encryption_info(inode);
+ fsverity_cleanup_inode(inode);
clear_inode(inode);
}
@@ -760,8 +773,7 @@
err = f2fs_get_node_info(sbi, inode->i_ino, &ni);
if (err) {
set_sbi_flag(sbi, SBI_NEED_FSCK);
- f2fs_msg(sbi->sb, KERN_WARNING,
- "May loss orphan inode, run fsck to fix.");
+ f2fs_warn(sbi, "May loss orphan inode, run fsck to fix.");
goto out;
}
@@ -769,8 +781,7 @@
err = f2fs_acquire_orphan_inode(sbi);
if (err) {
set_sbi_flag(sbi, SBI_NEED_FSCK);
- f2fs_msg(sbi->sb, KERN_WARNING,
- "Too many orphan inodes, run fsck to fix.");
+ f2fs_warn(sbi, "Too many orphan inodes, run fsck to fix.");
} else {
f2fs_add_orphan_inode(inode);
}
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index 1f67e38..4faf06e 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -1,24 +1,23 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* fs/f2fs/namei.c
*
* Copyright (c) 2012 Samsung Electronics Co., Ltd.
* http://www.samsung.com/
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
*/
#include <linux/fs.h>
#include <linux/f2fs_fs.h>
#include <linux/pagemap.h>
#include <linux/sched.h>
#include <linux/ctype.h>
+#include <linux/random.h>
#include <linux/dcache.h>
#include <linux/namei.h>
#include <linux/quotaops.h>
#include "f2fs.h"
#include "node.h"
+#include "segment.h"
#include "xattr.h"
#include "acl.h"
#include <trace/events/f2fs.h>
@@ -52,7 +51,7 @@
inode->i_blocks = 0;
inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
F2FS_I(inode)->i_crtime = inode->i_mtime;
- inode->i_generation = sbi->s_next_generation++;
+ inode->i_generation = prandom_u32();
if (S_ISDIR(inode->i_mode))
F2FS_I(inode)->i_current_depth = 1;
@@ -63,7 +62,7 @@
goto fail;
}
- if (f2fs_sb_has_project_quota(sbi->sb) &&
+ if (f2fs_sb_has_project_quota(sbi) &&
(F2FS_I(dir)->i_flags & F2FS_PROJINHERIT_FL))
F2FS_I(inode)->i_projid = F2FS_I(dir)->i_projid;
else
@@ -74,18 +73,14 @@
if (err)
goto fail_drop;
- err = dquot_alloc_inode(inode);
- if (err)
- goto fail_drop;
-
set_inode_flag(inode, FI_NEW_INODE);
/* If the directory encrypted, then we should encrypt the inode. */
- if ((f2fs_encrypted_inode(dir) || DUMMY_ENCRYPTION_ENABLED(sbi)) &&
+ if ((IS_ENCRYPTED(dir) || DUMMY_ENCRYPTION_ENABLED(sbi)) &&
f2fs_may_encrypt(inode))
f2fs_set_encrypted_inode(inode);
- if (f2fs_sb_has_extra_attr(sbi->sb)) {
+ if (f2fs_sb_has_extra_attr(sbi)) {
set_inode_flag(inode, FI_EXTRA_ATTR);
F2FS_I(inode)->i_extra_isize = F2FS_TOTAL_EXTRA_ATTR_SIZE;
}
@@ -98,7 +93,7 @@
if (f2fs_may_inline_dentry(inode))
set_inode_flag(inode, FI_INLINE_DENTRY);
- if (f2fs_sb_has_flexible_inline_xattr(sbi->sb)) {
+ if (f2fs_sb_has_flexible_inline_xattr(sbi)) {
f2fs_bug_on(sbi, !f2fs_has_extra_attr(inode));
if (f2fs_has_inline_xattr(inode))
xattr_size = F2FS_OPTION(sbi).inline_xattr_size;
@@ -124,6 +119,8 @@
if (F2FS_I(inode)->i_flags & F2FS_PROJINHERIT_FL)
set_inode_flag(inode, FI_PROJ_INHERIT);
+ f2fs_set_inode_flags(inode);
+
trace_f2fs_new_inode(inode, 0);
return inode;
@@ -146,7 +143,7 @@
return ERR_PTR(err);
}
-static int is_extension_exist(const unsigned char *s, const char *sub)
+static inline int is_extension_exist(const unsigned char *s, const char *sub)
{
size_t slen = strlen(s);
size_t sublen = strlen(sub);
@@ -184,16 +181,19 @@
hot_count = sbi->raw_super->hot_ext_count;
for (i = 0; i < cold_count + hot_count; i++) {
- if (!is_extension_exist(name, extlist[i]))
- continue;
- if (i < cold_count)
- file_set_cold(inode);
- else
- file_set_hot(inode);
- break;
+ if (is_extension_exist(name, extlist[i]))
+ break;
}
up_read(&sbi->sb_lock);
+
+ if (i == cold_count + hot_count)
+ return;
+
+ if (i < cold_count)
+ file_set_cold(inode);
+ else
+ file_set_hot(inode);
}
int f2fs_update_extension_list(struct f2fs_sb_info *sbi, const char *name,
@@ -272,6 +272,8 @@
if (unlikely(f2fs_cp_error(sbi)))
return -EIO;
+ if (!f2fs_is_checkpoint_ready(sbi))
+ return -ENOSPC;
err = dquot_initialize(dir);
if (err)
@@ -318,6 +320,8 @@
if (unlikely(f2fs_cp_error(sbi)))
return -EIO;
+ if (!f2fs_is_checkpoint_ready(sbi))
+ return -ENOSPC;
err = fscrypt_prepare_link(old_dentry, dir, dentry);
if (err)
@@ -379,9 +383,8 @@
int err = 0;
if (f2fs_readonly(sbi->sb)) {
- f2fs_msg(sbi->sb, KERN_INFO,
- "skip recovering inline_dots inode (ino:%lu, pino:%u) "
- "in readonly mountpoint", dir->i_ino, pino);
+ f2fs_info(sbi, "skip recovering inline_dots inode (ino:%lu, pino:%u) in readonly mountpoint",
+ dir->i_ino, pino);
return 0;
}
@@ -430,19 +433,23 @@
nid_t ino = -1;
int err = 0;
unsigned int root_ino = F2FS_ROOT_INO(F2FS_I_SB(dir));
+ struct fscrypt_name fname;
trace_f2fs_lookup_start(dir, dentry, flags);
- err = fscrypt_prepare_lookup(dir, dentry, flags);
- if (err)
- goto out;
-
if (dentry->d_name.len > F2FS_NAME_LEN) {
err = -ENAMETOOLONG;
goto out;
}
- de = f2fs_find_entry(dir, &dentry->d_name, &page);
+ err = fscrypt_prepare_lookup(dir, dentry, &fname);
+ if (err == -ENOENT)
+ goto out_splice;
+ if (err)
+ goto out;
+ de = __f2fs_find_entry(dir, &fname, &page);
+ fscrypt_free_filename(&fname);
+
if (!de) {
if (IS_ERR(page)) {
err = PTR_ERR(page);
@@ -471,19 +478,28 @@
if (err)
goto out_iput;
}
- if (f2fs_encrypted_inode(dir) &&
+ if (IS_ENCRYPTED(dir) &&
(S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) &&
!fscrypt_has_permitted_context(dir, inode)) {
- f2fs_msg(inode->i_sb, KERN_WARNING,
- "Inconsistent encryption contexts: %lu/%lu",
- dir->i_ino, inode->i_ino);
+ f2fs_warn(F2FS_I_SB(inode), "Inconsistent encryption contexts: %lu/%lu",
+ dir->i_ino, inode->i_ino);
err = -EPERM;
goto out_iput;
}
out_splice:
+#ifdef CONFIG_UNICODE
+ if (!inode && IS_CASEFOLDED(dir)) {
+ /* Eventually we want to call d_add_ci(dentry, NULL)
+ * for negative dentries in the encoding case as
+ * well. For now, prevent the negative dentry
+ * from being cached.
+ */
+ trace_f2fs_lookup_end(dir, dentry, ino, err);
+ return NULL;
+ }
+#endif
new = d_splice_alias(inode, dentry);
- if (IS_ERR(new))
- err = PTR_ERR(new);
+ err = PTR_ERR_OR_ZERO(new);
trace_f2fs_lookup_end(dir, dentry, ino, err);
return new;
out_iput:
@@ -530,6 +546,16 @@
goto fail;
}
f2fs_delete_entry(de, page, dir, inode);
+#ifdef CONFIG_UNICODE
+ /* VFS negative dentries are incompatible with Encoding and
+ * Case-insensitiveness. Eventually we'll want avoid
+ * invalidating the dentries here, alongside with returning the
+ * negative dentries at f2fs_lookup(), when it is better
+ * supported by the VFS for the CI case.
+ */
+ if (IS_CASEFOLDED(dir))
+ d_invalidate(dentry);
+#endif
f2fs_unlock_op(sbi);
if (IS_DIRSYNC(dir))
@@ -564,6 +590,8 @@
if (unlikely(f2fs_cp_error(sbi)))
return -EIO;
+ if (!f2fs_is_checkpoint_ready(sbi))
+ return -ENOSPC;
err = fscrypt_prepare_symlink(dir, symname, len, dir->i_sb->s_blocksize,
&disk_link);
@@ -627,7 +655,7 @@
f2fs_handle_failed_inode(inode);
out_free_encrypted_link:
if (disk_link.name != (unsigned char *)symname)
- kfree(disk_link.name);
+ kvfree(disk_link.name);
return err;
}
@@ -693,6 +721,8 @@
if (unlikely(f2fs_cp_error(sbi)))
return -EIO;
+ if (!f2fs_is_checkpoint_ready(sbi))
+ return -ENOSPC;
err = dquot_initialize(dir);
if (err)
@@ -791,8 +821,10 @@
if (unlikely(f2fs_cp_error(sbi)))
return -EIO;
+ if (!f2fs_is_checkpoint_ready(sbi))
+ return -ENOSPC;
- if (f2fs_encrypted_inode(dir) || DUMMY_ENCRYPTION_ENABLED(sbi)) {
+ if (IS_ENCRYPTED(dir) || DUMMY_ENCRYPTION_ENABLED(sbi)) {
int err = fscrypt_get_encryption_info(dir);
if (err)
return err;
@@ -823,10 +855,12 @@
struct f2fs_dir_entry *old_entry;
struct f2fs_dir_entry *new_entry;
bool is_old_inline = f2fs_has_inline_dentry(old_dir);
- int err = -ENOENT;
+ int err;
if (unlikely(f2fs_cp_error(sbi)))
return -EIO;
+ if (!f2fs_is_checkpoint_ready(sbi))
+ return -ENOSPC;
if (is_inode_flag_set(new_dir, FI_PROJ_INHERIT) &&
(!projid_eq(F2FS_I(new_dir)->i_projid,
@@ -847,6 +881,7 @@
goto out;
}
+ err = -ENOENT;
old_entry = f2fs_find_entry(old_dir, &old_dentry->d_name, &old_page);
if (!old_entry) {
if (IS_ERR(old_page))
@@ -983,6 +1018,8 @@
if (IS_DIRSYNC(old_dir) || IS_DIRSYNC(new_dir))
f2fs_sync_fs(sbi->sb, 1);
+
+ f2fs_update_time(sbi, REQ_TIME);
return 0;
put_out_dir:
@@ -1012,10 +1049,12 @@
struct f2fs_dir_entry *old_dir_entry = NULL, *new_dir_entry = NULL;
struct f2fs_dir_entry *old_entry, *new_entry;
int old_nlink = 0, new_nlink = 0;
- int err = -ENOENT;
+ int err;
if (unlikely(f2fs_cp_error(sbi)))
return -EIO;
+ if (!f2fs_is_checkpoint_ready(sbi))
+ return -ENOSPC;
if ((is_inode_flag_set(new_dir, FI_PROJ_INHERIT) &&
!projid_eq(F2FS_I(new_dir)->i_projid,
@@ -1033,6 +1072,7 @@
if (err)
goto out;
+ err = -ENOENT;
old_entry = f2fs_find_entry(old_dir, &old_dentry->d_name, &old_page);
if (!old_entry) {
if (IS_ERR(old_page))
@@ -1136,6 +1176,8 @@
if (IS_DIRSYNC(old_dir) || IS_DIRSYNC(new_dir))
f2fs_sync_fs(sbi->sb, 1);
+
+ f2fs_update_time(sbi, REQ_TIME);
return 0;
out_new_dir:
if (new_dir_entry) {
@@ -1225,6 +1267,7 @@
#ifdef CONFIG_F2FS_FS_XATTR
.listxattr = f2fs_listxattr,
#endif
+ .fiemap = f2fs_fiemap,
};
const struct inode_operations f2fs_symlink_inode_operations = {
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 42ea42a..8b66bc4 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* fs/f2fs/node.c
*
* Copyright (c) 2012 Samsung Electronics Co., Ltd.
* http://www.samsung.com/
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
*/
#include <linux/fs.h>
#include <linux/f2fs_fs.h>
@@ -37,10 +34,9 @@
{
if (unlikely(nid < F2FS_ROOT_INO(sbi) || nid >= NM_I(sbi)->max_nid)) {
set_sbi_flag(sbi, SBI_NEED_FSCK);
- f2fs_msg(sbi->sb, KERN_WARNING,
- "%s: out-of-range nid=%x, run fsck to fix.",
- __func__, nid);
- return -EINVAL;
+ f2fs_warn(sbi, "%s: out-of-range nid=%x, run fsck to fix.",
+ __func__, nid);
+ return -EFSCORRUPTED;
}
return 0;
}
@@ -104,7 +100,7 @@
static void clear_node_page_dirty(struct page *page)
{
if (PageDirty(page)) {
- f2fs_clear_radix_tree_dirty_tag(page);
+ f2fs_clear_page_cache_dirty_tag(page);
clear_page_dirty_for_io(page);
dec_page_count(F2FS_P_SB(page), F2FS_DIRTY_NODES);
}
@@ -129,6 +125,8 @@
/* get current nat block page with lock */
src_page = get_current_nat_page(sbi, nid);
+ if (IS_ERR(src_page))
+ return src_page;
dst_page = f2fs_grab_meta_page(sbi, dst_off);
f2fs_bug_on(sbi, PageDirty(src_page));
@@ -455,7 +453,7 @@
new_blkaddr == NULL_ADDR);
f2fs_bug_on(sbi, nat_get_blkaddr(e) == NEW_ADDR &&
new_blkaddr == NEW_ADDR);
- f2fs_bug_on(sbi, is_valid_data_blkaddr(sbi, nat_get_blkaddr(e)) &&
+ f2fs_bug_on(sbi, __is_valid_data_blkaddr(nat_get_blkaddr(e)) &&
new_blkaddr == NEW_ADDR);
/* increment version no as node is removed */
@@ -466,7 +464,7 @@
/* change address */
nat_set_blkaddr(e, new_blkaddr);
- if (!is_valid_data_blkaddr(sbi, new_blkaddr))
+ if (!__is_valid_data_blkaddr(new_blkaddr))
set_nat_flag(e, IS_CHECKPOINTED, false);
__set_nat_cache_dirty(nm_i, e);
@@ -527,6 +525,7 @@
struct f2fs_nat_entry ne;
struct nat_entry *e;
pgoff_t index;
+ block_t blkaddr;
int i;
ni->nid = nid;
@@ -570,6 +569,11 @@
node_info_from_raw_nat(ni, &ne);
f2fs_put_page(page, 1);
cache:
+ blkaddr = le32_to_cpu(ne.block_addr);
+ if (__is_valid_data_blkaddr(blkaddr) &&
+ !f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC_ENHANCE))
+ return -EFAULT;
+
/* cache nat entry */
cache_nat_entry(sbi, nid, &ne);
return 0;
@@ -601,9 +605,9 @@
pgoff_t f2fs_get_next_page_offset(struct dnode_of_data *dn, pgoff_t pgofs)
{
const long direct_index = ADDRS_PER_INODE(dn->inode);
- const long direct_blks = ADDRS_PER_BLOCK;
- const long indirect_blks = ADDRS_PER_BLOCK * NIDS_PER_BLOCK;
- unsigned int skipped_unit = ADDRS_PER_BLOCK;
+ const long direct_blks = ADDRS_PER_BLOCK(dn->inode);
+ const long indirect_blks = ADDRS_PER_BLOCK(dn->inode) * NIDS_PER_BLOCK;
+ unsigned int skipped_unit = ADDRS_PER_BLOCK(dn->inode);
int cur_level = dn->cur_level;
int max_level = dn->max_level;
pgoff_t base = 0;
@@ -617,8 +621,10 @@
switch (dn->max_level) {
case 3:
base += 2 * indirect_blks;
+ /* fall through */
case 2:
base += 2 * direct_blks;
+ /* fall through */
case 1:
base += direct_index;
break;
@@ -637,9 +643,9 @@
int offset[4], unsigned int noffset[4])
{
const long direct_index = ADDRS_PER_INODE(inode);
- const long direct_blks = ADDRS_PER_BLOCK;
+ const long direct_blks = ADDRS_PER_BLOCK(inode);
const long dptrs_per_blk = NIDS_PER_BLOCK;
- const long indirect_blks = ADDRS_PER_BLOCK * NIDS_PER_BLOCK;
+ const long indirect_blks = ADDRS_PER_BLOCK(inode) * NIDS_PER_BLOCK;
const long dindirect_blks = indirect_blks * NIDS_PER_BLOCK;
int n = 0;
int level = 0;
@@ -827,6 +833,7 @@
struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
struct node_info ni;
int err;
+ pgoff_t index;
err = f2fs_get_node_info(sbi, dn->nid, &ni);
if (err)
@@ -846,10 +853,11 @@
clear_node_page_dirty(dn->node_page);
set_sbi_flag(sbi, SBI_IS_DIRTY);
+ index = dn->node_page->index;
f2fs_put_page(dn->node_page, 1);
invalidate_mapping_pages(NODE_MAPPING(sbi),
- dn->node_page->index, dn->node_page->index);
+ index, index);
dn->node_page = NULL;
trace_f2fs_truncate_node(dn->inode, dn->nid, ni.blk_addr);
@@ -1105,7 +1113,7 @@
ri->i_nid[offset[0] - NODE_DIR1_BLOCK]) {
lock_page(page);
BUG_ON(page->mapping != NODE_MAPPING(sbi));
- f2fs_wait_on_page_writeback(page, NODE, true);
+ f2fs_wait_on_page_writeback(page, NODE, true, true);
ri->i_nid[offset[0] - NODE_DIR1_BLOCK] = 0;
set_page_dirty(page);
unlock_page(page);
@@ -1178,8 +1186,12 @@
f2fs_put_dnode(&dn);
return -EIO;
}
- f2fs_bug_on(F2FS_I_SB(inode),
- inode->i_blocks != 0 && inode->i_blocks != 8);
+
+ if (unlikely(inode->i_blocks != 0 && inode->i_blocks != 8)) {
+ f2fs_warn(F2FS_I_SB(inode), "Inconsistent i_blocks, ino:%lu, iblocks:%llu",
+ inode->i_ino, (unsigned long long)inode->i_blocks);
+ set_sbi_flag(F2FS_I_SB(inode), SBI_NEED_FSCK);
+ }
/* will put inode & node pages */
err = truncate_node(&dn);
@@ -1233,7 +1245,7 @@
new_ni.version = 0;
set_node_addr(sbi, &new_ni, NEW_ADDR, false);
- f2fs_wait_on_page_writeback(page, NODE, true);
+ f2fs_wait_on_page_writeback(page, NODE, true, true);
fill_node_footer(page, dn->nid, dn->inode->i_ino, ofs, true);
set_cold_node(page, S_ISDIR(dn->inode->i_mode));
if (!PageUptodate(page))
@@ -1274,9 +1286,10 @@
int err;
if (PageUptodate(page)) {
-#ifdef CONFIG_F2FS_CHECK_FS
- f2fs_bug_on(sbi, !f2fs_inode_chksum_verify(sbi, page));
-#endif
+ if (!f2fs_inode_chksum_verify(sbi, page)) {
+ ClearPageUptodate(page);
+ return -EFSBADCRC;
+ }
return LOCKED_PAGE;
}
@@ -1307,9 +1320,7 @@
if (f2fs_check_nid_range(sbi, nid))
return;
- rcu_read_lock();
- apage = radix_tree_lookup(&NODE_MAPPING(sbi)->i_pages, nid);
- rcu_read_unlock();
+ apage = xa_load(&NODE_MAPPING(sbi)->i_pages, nid);
if (apage)
return;
@@ -1361,16 +1372,15 @@
}
if (!f2fs_inode_chksum_verify(sbi, page)) {
- err = -EBADMSG;
+ err = -EFSBADCRC;
goto out_err;
}
page_hit:
if(unlikely(nid != nid_of_node(page))) {
- f2fs_msg(sbi->sb, KERN_WARNING, "inconsistent node block, "
- "nid:%lu, node_footer[nid:%u,ino:%u,ofs:%u,cpver:%llu,blkaddr:%u]",
- nid, nid_of_node(page), ino_of_node(page),
- ofs_of_node(page), cpver_of_node(page),
- next_blkaddr_of_node(page));
+ f2fs_warn(sbi, "inconsistent node block, nid:%lu, node_footer[nid:%u,ino:%u,ofs:%u,cpver:%llu,blkaddr:%u]",
+ nid, nid_of_node(page), ino_of_node(page),
+ ofs_of_node(page), cpver_of_node(page),
+ next_blkaddr_of_node(page));
err = -EINVAL;
out_err:
ClearPageUptodate(page);
@@ -1514,7 +1524,8 @@
if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
goto redirty_out;
- if (wbc->sync_mode == WB_SYNC_NONE &&
+ if (!is_sbi_flag_set(sbi, SBI_CP_DISABLED) &&
+ wbc->sync_mode == WB_SYNC_NONE &&
IS_DNODE(page) && is_cold_node(page))
goto redirty_out;
@@ -1542,7 +1553,8 @@
}
if (__is_valid_data_blkaddr(ni.blk_addr) &&
- !f2fs_is_valid_blkaddr(sbi, ni.blk_addr, DATA_GENERIC)) {
+ !f2fs_is_valid_blkaddr(sbi, ni.blk_addr,
+ DATA_GENERIC_ENHANCE)) {
up_read(&sbi->node_write);
goto redirty_out;
}
@@ -1566,8 +1578,7 @@
up_read(&sbi->node_write);
if (wbc->for_reclaim) {
- f2fs_submit_merged_write_cond(sbi, page->mapping->host, 0,
- page->index, NODE);
+ f2fs_submit_merged_write_cond(sbi, NULL, page, 0, NODE);
submitted = NULL;
}
@@ -1589,8 +1600,10 @@
return AOP_WRITEPAGE_ACTIVATE;
}
-void f2fs_move_node_page(struct page *node_page, int gc_type)
+int f2fs_move_node_page(struct page *node_page, int gc_type)
{
+ int err = 0;
+
if (gc_type == FG_GC) {
struct writeback_control wbc = {
.sync_mode = WB_SYNC_ALL,
@@ -1598,16 +1611,20 @@
.for_reclaim = 0,
};
- set_page_dirty(node_page);
- f2fs_wait_on_page_writeback(node_page, NODE, true);
+ f2fs_wait_on_page_writeback(node_page, NODE, true, true);
- f2fs_bug_on(F2FS_P_SB(node_page), PageWriteback(node_page));
- if (!clear_page_dirty_for_io(node_page))
+ set_page_dirty(node_page);
+
+ if (!clear_page_dirty_for_io(node_page)) {
+ err = -EAGAIN;
goto out_page;
+ }
if (__write_node_page(node_page, false, NULL,
- &wbc, false, FS_GC_NODE_IO, NULL))
+ &wbc, false, FS_GC_NODE_IO, NULL)) {
+ err = -EAGAIN;
unlock_page(node_page);
+ }
goto release_page;
} else {
/* set page dirty and write it */
@@ -1618,6 +1635,7 @@
unlock_page(node_page);
release_page:
f2fs_put_page(node_page, 0);
+ return err;
}
static int f2fs_write_node_page(struct page *page,
@@ -1632,13 +1650,13 @@
unsigned int *seq_id)
{
pgoff_t index;
- pgoff_t last_idx = ULONG_MAX;
struct pagevec pvec;
int ret = 0;
struct page *last_page = NULL;
bool marked = false;
nid_t ino = inode->i_ino;
int nr_pages;
+ int nwritten = 0;
if (atomic) {
last_page = last_fsync_dnode(sbi, ino);
@@ -1684,8 +1702,7 @@
goto continue_unlock;
}
- f2fs_wait_on_page_writeback(page, NODE, true);
- BUG_ON(PageWriteback(page));
+ f2fs_wait_on_page_writeback(page, NODE, true, true);
set_fsync_mark(page, 0);
set_dentry_mark(page, 0);
@@ -1716,7 +1733,7 @@
f2fs_put_page(last_page, 0);
break;
} else if (submitted) {
- last_idx = page->index;
+ nwritten++;
}
if (page == last_page) {
@@ -1732,21 +1749,61 @@
break;
}
if (!ret && atomic && !marked) {
- f2fs_msg(sbi->sb, KERN_DEBUG,
- "Retry to write fsync mark: ino=%u, idx=%lx",
- ino, last_page->index);
+ f2fs_debug(sbi, "Retry to write fsync mark: ino=%u, idx=%lx",
+ ino, last_page->index);
lock_page(last_page);
- f2fs_wait_on_page_writeback(last_page, NODE, true);
+ f2fs_wait_on_page_writeback(last_page, NODE, true, true);
set_page_dirty(last_page);
unlock_page(last_page);
goto retry;
}
out:
- if (last_idx != ULONG_MAX)
- f2fs_submit_merged_write_cond(sbi, NULL, ino, last_idx, NODE);
+ if (nwritten)
+ f2fs_submit_merged_write_cond(sbi, NULL, NULL, ino, NODE);
return ret ? -EIO: 0;
}
+static int f2fs_match_ino(struct inode *inode, unsigned long ino, void *data)
+{
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+ bool clean;
+
+ if (inode->i_ino != ino)
+ return 0;
+
+ if (!is_inode_flag_set(inode, FI_DIRTY_INODE))
+ return 0;
+
+ spin_lock(&sbi->inode_lock[DIRTY_META]);
+ clean = list_empty(&F2FS_I(inode)->gdirty_list);
+ spin_unlock(&sbi->inode_lock[DIRTY_META]);
+
+ if (clean)
+ return 0;
+
+ inode = igrab(inode);
+ if (!inode)
+ return 0;
+ return 1;
+}
+
+static bool flush_dirty_inode(struct page *page)
+{
+ struct f2fs_sb_info *sbi = F2FS_P_SB(page);
+ struct inode *inode;
+ nid_t ino = ino_of_node(page);
+
+ inode = find_inode_nowait(sbi->sb, ino, f2fs_match_ino, NULL);
+ if (!inode)
+ return false;
+
+ f2fs_update_inode(inode, page);
+ unlock_page(page);
+
+ iput(inode);
+ return true;
+}
+
int f2fs_sync_node_pages(struct f2fs_sb_info *sbi,
struct writeback_control *wbc,
bool do_balance, enum iostat_type io_type)
@@ -1770,6 +1827,7 @@
for (i = 0; i < nr_pages; i++) {
struct page *page = pvec.pages[i];
bool submitted = false;
+ bool may_dirty = true;
/* give a priority to WB_SYNC threads */
if (atomic_read(&sbi->wb_sync_req[NODE]) &&
@@ -1817,9 +1875,15 @@
goto lock_node;
}
- f2fs_wait_on_page_writeback(page, NODE, true);
+ /* flush dirty inode */
+ if (IS_INODE(page) && may_dirty) {
+ may_dirty = false;
+ if (flush_dirty_inode(page))
+ goto lock_node;
+ }
- BUG_ON(PageWriteback(page));
+ f2fs_wait_on_page_writeback(page, NODE, true, true);
+
if (!clear_page_dirty_for_io(page))
goto continue_unlock;
@@ -1846,7 +1910,8 @@
}
if (step < 2) {
- if (wbc->sync_mode == WB_SYNC_NONE && step == 1)
+ if (!is_sbi_flag_set(sbi, SBI_CP_DISABLED) &&
+ wbc->sync_mode == WB_SYNC_NONE && step == 1)
goto out;
step++;
goto next_step;
@@ -1886,7 +1951,7 @@
get_page(page);
spin_unlock_irqrestore(&sbi->fsync_node_lock, flags);
- f2fs_wait_on_page_writeback(page, NODE, true);
+ f2fs_wait_on_page_writeback(page, NODE, true, false);
if (TestClearPageError(page))
ret = -EIO;
@@ -1917,7 +1982,9 @@
f2fs_balance_fs_bg(sbi);
/* collect a number of dirty node pages and write together */
- if (get_pages(sbi, F2FS_DIRTY_NODES) < nr_pages_to_skip(sbi, NODE))
+ if (wbc->sync_mode != WB_SYNC_ALL &&
+ get_pages(sbi, F2FS_DIRTY_NODES) <
+ nr_pages_to_skip(sbi, NODE))
goto skip_write;
if (wbc->sync_mode == WB_SYNC_ALL)
@@ -1956,7 +2023,7 @@
if (!PageDirty(page)) {
__set_page_dirty_nobuffers(page);
inc_page_count(F2FS_P_SB(page), F2FS_DIRTY_NODES);
- SetPagePrivate(page);
+ f2fs_set_page_private(page, 0);
f2fs_trace_pid(page);
return 1;
}
@@ -2071,6 +2138,9 @@
if (unlikely(nid == 0))
return false;
+ if (unlikely(f2fs_check_nid_range(sbi, nid)))
+ return false;
+
i = f2fs_kmem_cache_alloc(free_nid_slab, GFP_NOFS);
i->nid = nid;
i->state = FREE_NID;
@@ -2270,15 +2340,18 @@
nm_i->nat_block_bitmap)) {
struct page *page = get_current_nat_page(sbi, nid);
- ret = scan_nat_page(sbi, page, nid);
- f2fs_put_page(page, 1);
+ if (IS_ERR(page)) {
+ ret = PTR_ERR(page);
+ } else {
+ ret = scan_nat_page(sbi, page, nid);
+ f2fs_put_page(page, 1);
+ }
if (ret) {
up_read(&nm_i->nat_tree_lock);
f2fs_bug_on(sbi, !mount);
- f2fs_msg(sbi->sb, KERN_ERR,
- "NAT is corrupt, run fsck to fix it");
- return -EINVAL;
+ f2fs_err(sbi, "NAT is corrupt, run fsck to fix it");
+ return ret;
}
}
@@ -2355,8 +2428,9 @@
spin_unlock(&nm_i->nid_list_lock);
/* Let's scan nat pages and its caches to get free nids */
- f2fs_build_free_nids(sbi, true, false);
- goto retry;
+ if (!f2fs_build_free_nids(sbi, true, false))
+ goto retry;
+ return false;
}
/*
@@ -2459,7 +2533,7 @@
src_addr = inline_xattr_addr(inode, page);
inline_size = inline_xattr_size(inode);
- f2fs_wait_on_page_writeback(ipage, NODE, true);
+ f2fs_wait_on_page_writeback(ipage, NODE, true, true);
memcpy(dst_addr, src_addr, inline_size);
update_inode:
f2fs_update_inode(inode, ipage);
@@ -2553,17 +2627,17 @@
if (dst->i_inline & F2FS_EXTRA_ATTR) {
dst->i_extra_isize = src->i_extra_isize;
- if (f2fs_sb_has_flexible_inline_xattr(sbi->sb) &&
+ if (f2fs_sb_has_flexible_inline_xattr(sbi) &&
F2FS_FITS_IN_INODE(src, le16_to_cpu(src->i_extra_isize),
i_inline_xattr_size))
dst->i_inline_xattr_size = src->i_inline_xattr_size;
- if (f2fs_sb_has_project_quota(sbi->sb) &&
+ if (f2fs_sb_has_project_quota(sbi) &&
F2FS_FITS_IN_INODE(src, le16_to_cpu(src->i_extra_isize),
i_projid))
dst->i_projid = src->i_projid;
- if (f2fs_sb_has_inode_crtime(sbi->sb) &&
+ if (f2fs_sb_has_inode_crtime(sbi) &&
F2FS_FITS_IN_INODE(src, le16_to_cpu(src->i_extra_isize),
i_crtime_nsec)) {
dst->i_crtime = src->i_crtime;
@@ -2696,7 +2770,7 @@
i = 1;
}
for (; i < NAT_ENTRY_PER_BLOCK; i++) {
- if (nat_blk->entries[i].block_addr != NULL_ADDR)
+ if (le32_to_cpu(nat_blk->entries[i].block_addr) != NULL_ADDR)
valid++;
}
if (valid == 0) {
@@ -2712,7 +2786,7 @@
__clear_bit_le(nat_index, nm_i->full_nat_bits);
}
-static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
+static int __flush_nat_entry_set(struct f2fs_sb_info *sbi,
struct nat_entry_set *set, struct cp_control *cpc)
{
struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
@@ -2736,6 +2810,9 @@
down_write(&curseg->journal_rwsem);
} else {
page = get_next_nat_page(sbi, start_nid);
+ if (IS_ERR(page))
+ return PTR_ERR(page);
+
nat_blk = page_address(page);
f2fs_bug_on(sbi, !nat_blk);
}
@@ -2781,12 +2858,13 @@
radix_tree_delete(&NM_I(sbi)->nat_set_root, set->set);
kmem_cache_free(nat_entry_set_slab, set);
}
+ return 0;
}
/*
* This function is called during the checkpointing process.
*/
-void f2fs_flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
+int f2fs_flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
{
struct f2fs_nm_info *nm_i = NM_I(sbi);
struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
@@ -2796,6 +2874,7 @@
unsigned int found;
nid_t set_idx = 0;
LIST_HEAD(sets);
+ int err = 0;
/* during unmount, let's flush nat_bits before checking dirty_nat_cnt */
if (enabled_nat_bits(sbi, cpc)) {
@@ -2805,7 +2884,7 @@
}
if (!nm_i->dirty_nat_cnt)
- return;
+ return 0;
down_write(&nm_i->nat_tree_lock);
@@ -2828,11 +2907,16 @@
}
/* flush dirty nats in nat entry set */
- list_for_each_entry_safe(set, tmp, &sets, set_list)
- __flush_nat_entry_set(sbi, set, cpc);
+ list_for_each_entry_safe(set, tmp, &sets, set_list) {
+ err = __flush_nat_entry_set(sbi, set, cpc);
+ if (err)
+ break;
+ }
up_write(&nm_i->nat_tree_lock);
/* Allow dirty nats by node block allocation in write_begin */
+
+ return err;
}
static int __get_nat_bitmaps(struct f2fs_sb_info *sbi)
@@ -2859,10 +2943,8 @@
struct page *page;
page = f2fs_get_meta_page(sbi, nat_bits_addr++);
- if (IS_ERR(page)) {
- disable_nat_bits(sbi, true);
+ if (IS_ERR(page))
return PTR_ERR(page);
- }
memcpy(nm_i->nat_bits + (i << F2FS_BLKSIZE_BITS),
page_address(page), F2FS_BLKSIZE);
@@ -2878,7 +2960,7 @@
nm_i->full_nat_bits = nm_i->nat_bits + 8;
nm_i->empty_nat_bits = nm_i->full_nat_bits + nat_bits_bytes;
- f2fs_msg(sbi->sb, KERN_NOTICE, "Found nat_bits in checkpoint");
+ f2fs_notice(sbi, "Found nat_bits in checkpoint");
return 0;
}
@@ -2933,7 +3015,7 @@
/* not used nids: 0, node, meta, (and root counted as valid node) */
nm_i->available_nids = nm_i->max_nid - sbi->total_valid_node_count -
- sbi->nquota_files - F2FS_RESERVED_NODE_NUM;
+ F2FS_RESERVED_NODE_NUM;
nm_i->nid_cnt[FREE_NID] = 0;
nm_i->nid_cnt[PREALLOC_NID] = 0;
nm_i->nat_cnt = 0;
@@ -3097,17 +3179,17 @@
for (i = 0; i < nm_i->nat_blocks; i++)
kvfree(nm_i->free_nid_bitmap[i]);
- kfree(nm_i->free_nid_bitmap);
+ kvfree(nm_i->free_nid_bitmap);
}
kvfree(nm_i->free_nid_count);
- kfree(nm_i->nat_bitmap);
- kfree(nm_i->nat_bits);
+ kvfree(nm_i->nat_bitmap);
+ kvfree(nm_i->nat_bits);
#ifdef CONFIG_F2FS_CHECK_FS
- kfree(nm_i->nat_bitmap_mir);
+ kvfree(nm_i->nat_bitmap_mir);
#endif
sbi->nm_info = NULL;
- kfree(nm_i);
+ kvfree(nm_i);
}
int __init f2fs_create_node_manager_caches(void)
diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h
index 0f4db7a..e05af5d 100644
--- a/fs/f2fs/node.h
+++ b/fs/f2fs/node.h
@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* fs/f2fs/node.h
*
* Copyright (c) 2012 Samsung Electronics Co., Ltd.
* http://www.samsung.com/
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
*/
/* start node id of a node block dedicated to the given node id */
#define START_NID(nid) (((nid) / NAT_ENTRY_PER_BLOCK) * NAT_ENTRY_PER_BLOCK)
@@ -364,7 +361,7 @@
{
struct f2fs_node *rn = F2FS_NODE(p);
- f2fs_wait_on_page_writeback(p, NODE, true);
+ f2fs_wait_on_page_writeback(p, NODE, true, true);
if (i)
rn->i.i_nid[off - NODE_DIR1_BLOCK] = cpu_to_le32(nid);
diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
index 9a8579f..783773e 100644
--- a/fs/f2fs/recovery.c
+++ b/fs/f2fs/recovery.c
@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* fs/f2fs/recovery.c
*
* Copyright (c) 2012 Samsung Electronics Co., Ltd.
* http://www.samsung.com/
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
*/
#include <linux/fs.h>
#include <linux/f2fs_fs.h>
@@ -99,8 +96,12 @@
return ERR_PTR(err);
}
-static void del_fsync_inode(struct fsync_inode_entry *entry)
+static void del_fsync_inode(struct fsync_inode_entry *entry, int drop)
{
+ if (drop) {
+ /* inode should not be recovered, drop it */
+ f2fs_inode_synced(entry->inode);
+ }
iput(entry->inode);
list_del(&entry->list);
kmem_cache_free(fsync_entry_slab, entry);
@@ -187,10 +188,36 @@
name = "<encrypted>";
else
name = raw_inode->i_name;
- f2fs_msg(inode->i_sb, KERN_NOTICE,
- "%s: ino = %x, name = %s, dir = %lx, err = %d",
- __func__, ino_of_node(ipage), name,
- IS_ERR(dir) ? 0 : dir->i_ino, err);
+ f2fs_notice(F2FS_I_SB(inode), "%s: ino = %x, name = %s, dir = %lx, err = %d",
+ __func__, ino_of_node(ipage), name,
+ IS_ERR(dir) ? 0 : dir->i_ino, err);
+ return err;
+}
+
+static int recover_quota_data(struct inode *inode, struct page *page)
+{
+ struct f2fs_inode *raw = F2FS_INODE(page);
+ struct iattr attr;
+ uid_t i_uid = le32_to_cpu(raw->i_uid);
+ gid_t i_gid = le32_to_cpu(raw->i_gid);
+ int err;
+
+ memset(&attr, 0, sizeof(attr));
+
+ attr.ia_uid = make_kuid(inode->i_sb->s_user_ns, i_uid);
+ attr.ia_gid = make_kgid(inode->i_sb->s_user_ns, i_gid);
+
+ if (!uid_eq(attr.ia_uid, inode->i_uid))
+ attr.ia_valid |= ATTR_UID;
+ if (!gid_eq(attr.ia_gid, inode->i_gid))
+ attr.ia_valid |= ATTR_GID;
+
+ if (!attr.ia_valid)
+ return 0;
+
+ err = dquot_transfer(inode, &attr);
+ if (err)
+ set_sbi_flag(F2FS_I_SB(inode), SBI_QUOTA_NEED_REPAIR);
return err;
}
@@ -206,12 +233,41 @@
clear_inode_flag(inode, FI_DATA_EXIST);
}
-static void recover_inode(struct inode *inode, struct page *page)
+static int recover_inode(struct inode *inode, struct page *page)
{
struct f2fs_inode *raw = F2FS_INODE(page);
char *name;
+ int err;
inode->i_mode = le16_to_cpu(raw->i_mode);
+
+ err = recover_quota_data(inode, page);
+ if (err)
+ return err;
+
+ i_uid_write(inode, le32_to_cpu(raw->i_uid));
+ i_gid_write(inode, le32_to_cpu(raw->i_gid));
+
+ if (raw->i_inline & F2FS_EXTRA_ATTR) {
+ if (f2fs_sb_has_project_quota(F2FS_I_SB(inode)) &&
+ F2FS_FITS_IN_INODE(raw, le16_to_cpu(raw->i_extra_isize),
+ i_projid)) {
+ projid_t i_projid;
+ kprojid_t kprojid;
+
+ i_projid = (projid_t)le32_to_cpu(raw->i_projid);
+ kprojid = make_kprojid(&init_user_ns, i_projid);
+
+ if (!projid_eq(kprojid, F2FS_I(inode)->i_projid)) {
+ err = f2fs_transfer_project_quota(inode,
+ kprojid);
+ if (err)
+ return err;
+ F2FS_I(inode)->i_projid = kprojid;
+ }
+ }
+ }
+
f2fs_i_size_write(inode, le64_to_cpu(raw->i_size));
inode->i_atime.tv_sec = le64_to_cpu(raw->i_atime);
inode->i_ctime.tv_sec = le64_to_cpu(raw->i_ctime);
@@ -222,17 +278,22 @@
F2FS_I(inode)->i_advise = raw->i_advise;
F2FS_I(inode)->i_flags = le32_to_cpu(raw->i_flags);
+ f2fs_set_inode_flags(inode);
+ F2FS_I(inode)->i_gc_failures[GC_FAILURE_PIN] =
+ le16_to_cpu(raw->i_gc_failures);
recover_inline_flags(inode, raw);
+ f2fs_mark_inode_dirty_sync(inode, true);
+
if (file_enc_name(inode))
name = "<encrypted>";
else
name = F2FS_INODE(page)->i_name;
- f2fs_msg(inode->i_sb, KERN_NOTICE,
- "recover_inode: ino = %x, name = %s, inline = %x",
- ino_of_node(page), name, raw->i_inline);
+ f2fs_notice(F2FS_I_SB(inode), "recover_inode: ino = %x, name = %s, inline = %x",
+ ino_of_node(page), name, raw->i_inline);
+ return 0;
}
static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head,
@@ -262,8 +323,10 @@
break;
}
- if (!is_recoverable_dnode(page))
+ if (!is_recoverable_dnode(page)) {
+ f2fs_put_page(page, 1);
break;
+ }
if (!is_fsync_dnode(page))
goto next;
@@ -275,8 +338,10 @@
if (!check_only &&
IS_INODE(page) && is_dent_dnode(page)) {
err = f2fs_recover_inode_page(sbi, page);
- if (err)
+ if (err) {
+ f2fs_put_page(page, 1);
break;
+ }
quota_inode = true;
}
@@ -292,6 +357,7 @@
err = 0;
goto next;
}
+ f2fs_put_page(page, 1);
break;
}
}
@@ -303,10 +369,10 @@
/* sanity check in order to detect looped node chain */
if (++loop_cnt >= free_blocks ||
blkaddr == next_blkaddr_of_node(page)) {
- f2fs_msg(sbi->sb, KERN_NOTICE,
- "%s: detect looped node chain, "
- "blkaddr:%u, next:%u",
- __func__, blkaddr, next_blkaddr_of_node(page));
+ f2fs_notice(sbi, "%s: detect looped node chain, blkaddr:%u, next:%u",
+ __func__, blkaddr,
+ next_blkaddr_of_node(page));
+ f2fs_put_page(page, 1);
err = -EINVAL;
break;
}
@@ -317,16 +383,15 @@
f2fs_ra_meta_pages_cond(sbi, blkaddr);
}
- f2fs_put_page(page, 1);
return err;
}
-static void destroy_fsync_dnodes(struct list_head *head)
+static void destroy_fsync_dnodes(struct list_head *head, int drop)
{
struct fsync_inode_entry *entry, *tmp;
list_for_each_entry_safe(entry, tmp, head, list)
- del_fsync_inode(entry);
+ del_fsync_inode(entry, drop);
}
static int check_index_in_prev_nodes(struct f2fs_sb_info *sbi,
@@ -359,6 +424,8 @@
}
sum_page = f2fs_get_sum_page(sbi, segno);
+ if (IS_ERR(sum_page))
+ return PTR_ERR(sum_page);
sum_node = (struct f2fs_summary_block *)page_address(sum_page);
sum = sum_node->entries[blkoff];
f2fs_put_page(sum_page, 1);
@@ -474,14 +541,21 @@
goto out;
}
- f2fs_wait_on_page_writeback(dn.node_page, NODE, true);
+ f2fs_wait_on_page_writeback(dn.node_page, NODE, true, true);
err = f2fs_get_node_info(sbi, dn.nid, &ni);
if (err)
goto err;
f2fs_bug_on(sbi, ni.ino != ino_of_node(page));
- f2fs_bug_on(sbi, ofs_of_node(dn.node_page) != ofs_of_node(page));
+
+ if (ofs_of_node(dn.node_page) != ofs_of_node(page)) {
+ f2fs_warn(sbi, "Inconsistent ofs_of_node, ino:%lu, ofs:%u, %u",
+ inode->i_ino, ofs_of_node(dn.node_page),
+ ofs_of_node(page));
+ err = -EFSCORRUPTED;
+ goto err;
+ }
for (; start < end; start++, dn.ofs_in_node++) {
block_t src, dest;
@@ -489,6 +563,18 @@
src = datablock_addr(dn.inode, dn.node_page, dn.ofs_in_node);
dest = datablock_addr(dn.inode, page, dn.ofs_in_node);
+ if (__is_valid_data_blkaddr(src) &&
+ !f2fs_is_valid_blkaddr(sbi, src, META_POR)) {
+ err = -EFSCORRUPTED;
+ goto err;
+ }
+
+ if (__is_valid_data_blkaddr(dest) &&
+ !f2fs_is_valid_blkaddr(sbi, dest, META_POR)) {
+ err = -EFSCORRUPTED;
+ goto err;
+ }
+
/* skip recovering if dest is the same as src */
if (src == dest)
continue;
@@ -552,16 +638,14 @@
err:
f2fs_put_dnode(&dn);
out:
- f2fs_msg(sbi->sb, KERN_NOTICE,
- "recover_data: ino = %lx (i_size: %s) recovered = %d, err = %d",
- inode->i_ino,
- file_keep_isize(inode) ? "keep" : "recover",
- recovered, err);
+ f2fs_notice(sbi, "recover_data: ino = %lx (i_size: %s) recovered = %d, err = %d",
+ inode->i_ino, file_keep_isize(inode) ? "keep" : "recover",
+ recovered, err);
return err;
}
static int recover_data(struct f2fs_sb_info *sbi, struct list_head *inode_list,
- struct list_head *dir_list)
+ struct list_head *tmp_inode_list, struct list_head *dir_list)
{
struct curseg_info *curseg;
struct page *page = NULL;
@@ -599,8 +683,13 @@
* In this case, we can lose the latest inode(x).
* So, call recover_inode for the inode update.
*/
- if (IS_INODE(page))
- recover_inode(entry->inode, page);
+ if (IS_INODE(page)) {
+ err = recover_inode(entry->inode, page);
+ if (err) {
+ f2fs_put_page(page, 1);
+ break;
+ }
+ }
if (entry->last_dentry == blkaddr) {
err = recover_dentry(entry->inode, page, dir_list);
if (err) {
@@ -615,7 +704,7 @@
}
if (entry->blkaddr == blkaddr)
- del_fsync_inode(entry);
+ list_move_tail(&entry->list, tmp_inode_list);
next:
/* check next segment */
blkaddr = next_blkaddr_of_node(page);
@@ -628,7 +717,7 @@
int f2fs_recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only)
{
- struct list_head inode_list;
+ struct list_head inode_list, tmp_inode_list;
struct list_head dir_list;
int err;
int ret = 0;
@@ -639,8 +728,7 @@
#endif
if (s_flags & SB_RDONLY) {
- f2fs_msg(sbi->sb, KERN_INFO,
- "recover fsync data on readonly fs");
+ f2fs_info(sbi, "recover fsync data on readonly fs");
sbi->sb->s_flags &= ~SB_RDONLY;
}
@@ -659,6 +747,7 @@
}
INIT_LIST_HEAD(&inode_list);
+ INIT_LIST_HEAD(&tmp_inode_list);
INIT_LIST_HEAD(&dir_list);
/* prevent checkpoint */
@@ -677,11 +766,16 @@
need_writecp = true;
/* step #2: recover data */
- err = recover_data(sbi, &inode_list, &dir_list);
+ err = recover_data(sbi, &inode_list, &tmp_inode_list, &dir_list);
if (!err)
f2fs_bug_on(sbi, !list_empty(&inode_list));
+ else {
+ /* restore s_flags to let iput() trash data */
+ sbi->sb->s_flags = s_flags;
+ }
skip:
- destroy_fsync_dnodes(&inode_list);
+ destroy_fsync_dnodes(&inode_list, err);
+ destroy_fsync_dnodes(&tmp_inode_list, err);
/* truncate meta pages to be used by the recovery */
truncate_inode_pages_range(META_MAPPING(sbi),
@@ -690,13 +784,13 @@
if (err) {
truncate_inode_pages_final(NODE_MAPPING(sbi));
truncate_inode_pages_final(META_MAPPING(sbi));
+ } else {
+ clear_sbi_flag(sbi, SBI_POR_DOING);
}
-
- clear_sbi_flag(sbi, SBI_POR_DOING);
mutex_unlock(&sbi->cp_mutex);
/* let's drop all the directory inodes for clean checkpoint */
- destroy_fsync_dnodes(&dir_list);
+ destroy_fsync_dnodes(&dir_list, err);
if (need_writecp) {
set_sbi_flag(sbi, SBI_IS_RECOVERED);
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 30779aa..8087095 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* fs/f2fs/segment.c
*
* Copyright (c) 2012 Samsung Electronics Co., Ltd.
* http://www.samsung.com/
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
*/
#include <linux/fs.h>
#include <linux/f2fs_fs.h>
@@ -179,6 +176,8 @@
return false;
if (sbi->gc_mode == GC_URGENT)
return true;
+ if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
+ return true;
return free_sections(sbi) <= (node_secs + 2 * dent_secs + imeta_secs +
SM_I(sbi)->min_ssr_sections + reserved_sections(sbi));
@@ -186,14 +185,11 @@
void f2fs_register_inmem_page(struct inode *inode, struct page *page)
{
- struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
- struct f2fs_inode_info *fi = F2FS_I(inode);
struct inmem_pages *new;
f2fs_trace_pid(page);
- set_page_private(page, (unsigned long)ATOMIC_WRITTEN_PAGE);
- SetPagePrivate(page);
+ f2fs_set_page_private(page, (unsigned long)ATOMIC_WRITTEN_PAGE);
new = f2fs_kmem_cache_alloc(inmem_entry_slab, GFP_NOFS);
@@ -202,21 +198,18 @@
INIT_LIST_HEAD(&new->list);
/* increase reference count with clean state */
- mutex_lock(&fi->inmem_lock);
get_page(page);
- list_add_tail(&new->list, &fi->inmem_pages);
- spin_lock(&sbi->inode_lock[ATOMIC_FILE]);
- if (list_empty(&fi->inmem_ilist))
- list_add_tail(&fi->inmem_ilist, &sbi->inode_list[ATOMIC_FILE]);
- spin_unlock(&sbi->inode_lock[ATOMIC_FILE]);
+ mutex_lock(&F2FS_I(inode)->inmem_lock);
+ list_add_tail(&new->list, &F2FS_I(inode)->inmem_pages);
inc_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES);
- mutex_unlock(&fi->inmem_lock);
+ mutex_unlock(&F2FS_I(inode)->inmem_lock);
trace_f2fs_register_inmem_page(page, INMEM);
}
static int __revoke_inmem_pages(struct inode *inode,
- struct list_head *head, bool drop, bool recover)
+ struct list_head *head, bool drop, bool recover,
+ bool trylock)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct inmem_pages *cur, *tmp;
@@ -228,9 +221,18 @@
if (drop)
trace_f2fs_commit_inmem_page(page, INMEM_DROP);
- lock_page(page);
+ if (trylock) {
+ /*
+ * to avoid deadlock in between page lock and
+ * inmem_lock.
+ */
+ if (!trylock_page(page))
+ continue;
+ } else {
+ lock_page(page);
+ }
- f2fs_wait_on_page_writeback(page, DATA, true);
+ f2fs_wait_on_page_writeback(page, DATA, true, true);
if (recover) {
struct dnode_of_data dn;
@@ -267,10 +269,11 @@
}
next:
/* we don't need to invalidate this in the sccessful status */
- if (drop || recover)
+ if (drop || recover) {
ClearPageUptodate(page);
- set_page_private(page, 0);
- ClearPagePrivate(page);
+ clear_cold_data(page);
+ }
+ f2fs_clear_page_private(page);
f2fs_put_page(page, 1);
list_del(&cur->list);
@@ -317,17 +320,21 @@
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct f2fs_inode_info *fi = F2FS_I(inode);
- mutex_lock(&fi->inmem_lock);
- __revoke_inmem_pages(inode, &fi->inmem_pages, true, false);
- spin_lock(&sbi->inode_lock[ATOMIC_FILE]);
- if (!list_empty(&fi->inmem_ilist))
- list_del_init(&fi->inmem_ilist);
- spin_unlock(&sbi->inode_lock[ATOMIC_FILE]);
- mutex_unlock(&fi->inmem_lock);
+ while (!list_empty(&fi->inmem_pages)) {
+ mutex_lock(&fi->inmem_lock);
+ __revoke_inmem_pages(inode, &fi->inmem_pages,
+ true, false, true);
+ mutex_unlock(&fi->inmem_lock);
+ }
clear_inode_flag(inode, FI_ATOMIC_FILE);
fi->i_gc_failures[GC_FAILURE_ATOMIC] = 0;
stat_dec_atomic_write(inode);
+
+ spin_lock(&sbi->inode_lock[ATOMIC_FILE]);
+ if (!list_empty(&fi->inmem_ilist))
+ list_del_init(&fi->inmem_ilist);
+ spin_unlock(&sbi->inode_lock[ATOMIC_FILE]);
}
void f2fs_drop_inmem_page(struct inode *inode, struct page *page)
@@ -353,8 +360,7 @@
kmem_cache_free(inmem_entry_slab, cur);
ClearPageUptodate(page);
- set_page_private(page, 0);
- ClearPagePrivate(page);
+ f2fs_clear_page_private(page);
f2fs_put_page(page, 0);
trace_f2fs_commit_inmem_page(page, INMEM_INVALIDATE);
@@ -374,7 +380,7 @@
.io_type = FS_DATA_IO,
};
struct list_head revoke_list;
- pgoff_t last_idx = ULONG_MAX;
+ bool submit_bio = false;
int err = 0;
INIT_LIST_HEAD(&revoke_list);
@@ -386,8 +392,9 @@
if (page->mapping == inode->i_mapping) {
trace_f2fs_commit_inmem_page(page, INMEM);
+ f2fs_wait_on_page_writeback(page, DATA, true, true);
+
set_page_dirty(page);
- f2fs_wait_on_page_writeback(page, DATA, true);
if (clear_page_dirty_for_io(page)) {
inode_dec_dirty_pages(inode);
f2fs_remove_dirty_inode(inode);
@@ -409,14 +416,14 @@
}
/* record old blkaddr for revoking */
cur->old_addr = fio.old_blkaddr;
- last_idx = page->index;
+ submit_bio = true;
}
unlock_page(page);
list_move_tail(&cur->list, &revoke_list);
}
- if (last_idx != ULONG_MAX)
- f2fs_submit_merged_write_cond(sbi, inode, 0, last_idx, DATA);
+ if (submit_bio)
+ f2fs_submit_merged_write_cond(sbi, inode, NULL, 0, DATA);
if (err) {
/*
@@ -427,12 +434,15 @@
* recovery or rewrite & commit last transaction. For other
* error number, revoking was done by filesystem itself.
*/
- err = __revoke_inmem_pages(inode, &revoke_list, false, true);
+ err = __revoke_inmem_pages(inode, &revoke_list,
+ false, true, false);
/* drop all uncommitted pages */
- __revoke_inmem_pages(inode, &fi->inmem_pages, true, false);
+ __revoke_inmem_pages(inode, &fi->inmem_pages,
+ true, false, false);
} else {
- __revoke_inmem_pages(inode, &revoke_list, false, false);
+ __revoke_inmem_pages(inode, &revoke_list,
+ false, false, false);
}
return err;
@@ -453,11 +463,6 @@
mutex_lock(&fi->inmem_lock);
err = __f2fs_commit_inmem_pages(inode);
-
- spin_lock(&sbi->inode_lock[ATOMIC_FILE]);
- if (!list_empty(&fi->inmem_ilist))
- list_del_init(&fi->inmem_ilist);
- spin_unlock(&sbi->inode_lock[ATOMIC_FILE]);
mutex_unlock(&fi->inmem_lock);
clear_inode_flag(inode, FI_ATOMIC_COMMIT);
@@ -483,6 +488,9 @@
if (need && excess_cached_nats(sbi))
f2fs_balance_fs_bg(sbi);
+ if (!f2fs_is_checkpoint_ready(sbi))
+ return;
+
/*
* We should do GC or end up with checkpoint, if there are so many dirty
* dir/node pages without enough free segments.
@@ -511,7 +519,7 @@
else
f2fs_build_free_nids(sbi, false, false);
- if (!is_idle(sbi) &&
+ if (!is_idle(sbi, REQ_TIME) &&
(!excess_dirty_nats(sbi) && !excess_dirty_nodes(sbi)))
return;
@@ -525,9 +533,13 @@
if (test_opt(sbi, DATA_FLUSH)) {
struct blk_plug plug;
+ mutex_lock(&sbi->flush_lock);
+
blk_start_plug(&plug);
f2fs_sync_dirty_inodes(sbi, FILE_INODE);
blk_finish_plug(&plug);
+
+ mutex_unlock(&sbi->flush_lock);
}
f2fs_sync_fs(sbi->sb, true);
stat_inc_bg_cp_count(sbi->stat_info);
@@ -537,9 +549,13 @@
static int __submit_flush_wait(struct f2fs_sb_info *sbi,
struct block_device *bdev)
{
- struct bio *bio = f2fs_bio_alloc(sbi, 0, true);
+ struct bio *bio;
int ret;
+ bio = f2fs_bio_alloc(sbi, 0, false);
+ if (!bio)
+ return -ENOMEM;
+
bio->bi_opf = REQ_OP_WRITE | REQ_SYNC | REQ_PREFLUSH;
bio_set_dev(bio, bdev);
ret = submit_bio_wait(bio);
@@ -555,7 +571,7 @@
int ret = 0;
int i;
- if (!sbi->s_ndevs)
+ if (!f2fs_is_multi_device(sbi))
return __submit_flush_wait(sbi, sbi->sb->s_bdev);
for (i = 0; i < sbi->s_ndevs; i++) {
@@ -616,14 +632,17 @@
return 0;
if (!test_opt(sbi, FLUSH_MERGE)) {
+ atomic_inc(&fcc->queued_flush);
ret = submit_flush_wait(sbi, ino);
+ atomic_dec(&fcc->queued_flush);
atomic_inc(&fcc->issued_flush);
return ret;
}
- if (atomic_inc_return(&fcc->issing_flush) == 1 || sbi->s_ndevs > 1) {
+ if (atomic_inc_return(&fcc->queued_flush) == 1 ||
+ f2fs_is_multi_device(sbi)) {
ret = submit_flush_wait(sbi, ino);
- atomic_dec(&fcc->issing_flush);
+ atomic_dec(&fcc->queued_flush);
atomic_inc(&fcc->issued_flush);
return ret;
@@ -642,14 +661,14 @@
if (fcc->f2fs_issue_flush) {
wait_for_completion(&cmd.wait);
- atomic_dec(&fcc->issing_flush);
+ atomic_dec(&fcc->queued_flush);
} else {
struct llist_node *list;
list = llist_del_all(&fcc->issue_list);
if (!list) {
wait_for_completion(&cmd.wait);
- atomic_dec(&fcc->issing_flush);
+ atomic_dec(&fcc->queued_flush);
} else {
struct flush_cmd *tmp, *next;
@@ -658,7 +677,7 @@
llist_for_each_entry_safe(tmp, next, list, llnode) {
if (tmp == &cmd) {
cmd.ret = ret;
- atomic_dec(&fcc->issing_flush);
+ atomic_dec(&fcc->queued_flush);
continue;
}
tmp->ret = ret;
@@ -687,7 +706,7 @@
if (!fcc)
return -ENOMEM;
atomic_set(&fcc->issued_flush, 0);
- atomic_set(&fcc->issing_flush, 0);
+ atomic_set(&fcc->queued_flush, 0);
init_waitqueue_head(&fcc->flush_wait_queue);
init_llist_head(&fcc->issue_list);
SM_I(sbi)->fcc_info = fcc;
@@ -699,7 +718,7 @@
"f2fs_flush-%u:%u", MAJOR(dev), MINOR(dev));
if (IS_ERR(fcc->f2fs_issue_flush)) {
err = PTR_ERR(fcc->f2fs_issue_flush);
- kfree(fcc);
+ kvfree(fcc);
SM_I(sbi)->fcc_info = NULL;
return err;
}
@@ -718,7 +737,7 @@
kthread_stop(flush_thread);
}
if (free) {
- kfree(fcc);
+ kvfree(fcc);
SM_I(sbi)->fcc_info = NULL;
}
}
@@ -727,7 +746,7 @@
{
int ret = 0, i;
- if (!sbi->s_ndevs)
+ if (!f2fs_is_multi_device(sbi))
return 0;
for (i = 1; i < sbi->s_ndevs; i++) {
@@ -785,9 +804,13 @@
if (test_and_clear_bit(segno, dirty_i->dirty_segmap[t]))
dirty_i->nr_dirty[t]--;
- if (get_valid_blocks(sbi, segno, true) == 0)
+ if (get_valid_blocks(sbi, segno, true) == 0) {
clear_bit(GET_SEC_FROM_SEG(sbi, segno),
dirty_i->victim_secmap);
+#ifdef CONFIG_F2FS_CHECK_FS
+ clear_bit(segno, SIT_I(sbi)->invalid_segmap);
+#endif
+ }
}
}
@@ -799,7 +822,7 @@
static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno)
{
struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
- unsigned short valid_blocks;
+ unsigned short valid_blocks, ckpt_valid_blocks;
if (segno == NULL_SEGNO || IS_CURSEG(sbi, segno))
return;
@@ -807,8 +830,10 @@
mutex_lock(&dirty_i->seglist_lock);
valid_blocks = get_valid_blocks(sbi, segno, false);
+ ckpt_valid_blocks = get_ckpt_valid_blocks(sbi, segno);
- if (valid_blocks == 0) {
+ if (valid_blocks == 0 && (!is_sbi_flag_set(sbi, SBI_CP_DISABLED) ||
+ ckpt_valid_blocks == sbi->blocks_per_seg)) {
__locate_dirty_segment(sbi, segno, PRE);
__remove_dirty_segment(sbi, segno, DIRTY);
} else if (valid_blocks < sbi->blocks_per_seg) {
@@ -821,6 +846,82 @@
mutex_unlock(&dirty_i->seglist_lock);
}
+/* This moves currently empty dirty blocks to prefree. Must hold seglist_lock */
+void f2fs_dirty_to_prefree(struct f2fs_sb_info *sbi)
+{
+ struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
+ unsigned int segno;
+
+ mutex_lock(&dirty_i->seglist_lock);
+ for_each_set_bit(segno, dirty_i->dirty_segmap[DIRTY], MAIN_SEGS(sbi)) {
+ if (get_valid_blocks(sbi, segno, false))
+ continue;
+ if (IS_CURSEG(sbi, segno))
+ continue;
+ __locate_dirty_segment(sbi, segno, PRE);
+ __remove_dirty_segment(sbi, segno, DIRTY);
+ }
+ mutex_unlock(&dirty_i->seglist_lock);
+}
+
+block_t f2fs_get_unusable_blocks(struct f2fs_sb_info *sbi)
+{
+ int ovp_hole_segs =
+ (overprovision_segments(sbi) - reserved_segments(sbi));
+ block_t ovp_holes = ovp_hole_segs << sbi->log_blocks_per_seg;
+ struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
+ block_t holes[2] = {0, 0}; /* DATA and NODE */
+ block_t unusable;
+ struct seg_entry *se;
+ unsigned int segno;
+
+ mutex_lock(&dirty_i->seglist_lock);
+ for_each_set_bit(segno, dirty_i->dirty_segmap[DIRTY], MAIN_SEGS(sbi)) {
+ se = get_seg_entry(sbi, segno);
+ if (IS_NODESEG(se->type))
+ holes[NODE] += sbi->blocks_per_seg - se->valid_blocks;
+ else
+ holes[DATA] += sbi->blocks_per_seg - se->valid_blocks;
+ }
+ mutex_unlock(&dirty_i->seglist_lock);
+
+ unusable = holes[DATA] > holes[NODE] ? holes[DATA] : holes[NODE];
+ if (unusable > ovp_holes)
+ return unusable - ovp_holes;
+ return 0;
+}
+
+int f2fs_disable_cp_again(struct f2fs_sb_info *sbi, block_t unusable)
+{
+ int ovp_hole_segs =
+ (overprovision_segments(sbi) - reserved_segments(sbi));
+ if (unusable > F2FS_OPTION(sbi).unusable_cap)
+ return -EAGAIN;
+ if (is_sbi_flag_set(sbi, SBI_CP_DISABLED_QUICK) &&
+ dirty_segments(sbi) > ovp_hole_segs)
+ return -EAGAIN;
+ return 0;
+}
+
+/* This is only used by SBI_CP_DISABLED */
+static unsigned int get_free_segment(struct f2fs_sb_info *sbi)
+{
+ struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
+ unsigned int segno = 0;
+
+ mutex_lock(&dirty_i->seglist_lock);
+ for_each_set_bit(segno, dirty_i->dirty_segmap[DIRTY], MAIN_SEGS(sbi)) {
+ if (get_valid_blocks(sbi, segno, false))
+ continue;
+ if (get_ckpt_valid_blocks(sbi, segno))
+ continue;
+ mutex_unlock(&dirty_i->seglist_lock);
+ return segno;
+ }
+ mutex_unlock(&dirty_i->seglist_lock);
+ return NULL_SEGNO;
+}
+
static struct discard_cmd *__create_discard_cmd(struct f2fs_sb_info *sbi,
struct block_device *bdev, block_t lstart,
block_t start, block_t len)
@@ -841,7 +942,7 @@
dc->len = len;
dc->ref = 0;
dc->state = D_PREP;
- dc->issuing = 0;
+ dc->queued = 0;
dc->error = 0;
init_completion(&dc->wait);
list_add_tail(&dc->list, pend_list);
@@ -856,7 +957,8 @@
static struct discard_cmd *__attach_discard_cmd(struct f2fs_sb_info *sbi,
struct block_device *bdev, block_t lstart,
block_t start, block_t len,
- struct rb_node *parent, struct rb_node **p)
+ struct rb_node *parent, struct rb_node **p,
+ bool leftmost)
{
struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
struct discard_cmd *dc;
@@ -864,7 +966,7 @@
dc = __create_discard_cmd(sbi, bdev, lstart, start, len);
rb_link_node(&dc->rb_node, parent, p);
- rb_insert_color(&dc->rb_node, &dcc->root);
+ rb_insert_color_cached(&dc->rb_node, &dcc->root, leftmost);
return dc;
}
@@ -873,10 +975,10 @@
struct discard_cmd *dc)
{
if (dc->state == D_DONE)
- atomic_sub(dc->issuing, &dcc->issing_discard);
+ atomic_sub(dc->queued, &dcc->queued_discard);
list_del(&dc->list);
- rb_erase(&dc->rb_node, &dcc->root);
+ rb_erase_cached(&dc->rb_node, &dcc->root);
dcc->undiscard_blks -= dc->len;
kmem_cache_free(discard_cmd_slab, dc);
@@ -905,9 +1007,9 @@
dc->error = 0;
if (dc->error)
- f2fs_msg(sbi->sb, KERN_INFO,
- "Issue discard(%u, %u, %u) failed, ret: %d",
- dc->lstart, dc->start, dc->len, dc->error);
+ printk_ratelimited(
+ "%sF2FS-fs: Issue discard(%u, %u, %u) failed, ret: %d",
+ KERN_INFO, dc->lstart, dc->start, dc->len, dc->error);
__detach_discard_cmd(dcc, dc);
}
@@ -967,6 +1069,7 @@
dpolicy->max_requests = DEF_MAX_DISCARD_REQUEST;
dpolicy->io_aware_gran = MAX_PLIST_NUM;
+ dpolicy->timeout = 0;
if (discard_type == DPOLICY_BG) {
dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME;
@@ -989,6 +1092,8 @@
} else if (discard_type == DPOLICY_UMOUNT) {
dpolicy->max_requests = UINT_MAX;
dpolicy->io_aware = false;
+ /* we need to issue all to keep CP_TRIMMED_FLAG */
+ dpolicy->granularity = 1;
}
}
@@ -1076,12 +1181,12 @@
dc->bio_ref++;
spin_unlock_irqrestore(&dc->lock, flags);
- atomic_inc(&dcc->issing_discard);
- dc->issuing++;
+ atomic_inc(&dcc->queued_discard);
+ dc->queued++;
list_move_tail(&dc->list, wait_list);
/* sanity check on discard range */
- __check_sit_bitmap(sbi, start, start + len);
+ __check_sit_bitmap(sbi, lstart, lstart + len);
bio->bi_private = dc;
bio->bi_end_io = f2fs_submit_discard_endio;
@@ -1113,6 +1218,7 @@
struct rb_node **p;
struct rb_node *parent = NULL;
struct discard_cmd *dc = NULL;
+ bool leftmost = true;
if (insert_p && insert_parent) {
parent = insert_parent;
@@ -1120,9 +1226,11 @@
goto do_insert;
}
- p = f2fs_lookup_rb_tree_for_insert(sbi, &dcc->root, &parent, lstart);
+ p = f2fs_lookup_rb_tree_for_insert(sbi, &dcc->root, &parent,
+ lstart, &leftmost);
do_insert:
- dc = __attach_discard_cmd(sbi, bdev, lstart, start, len, parent, p);
+ dc = __attach_discard_cmd(sbi, bdev, lstart, start, len, parent,
+ p, leftmost);
if (!dc)
return NULL;
@@ -1190,7 +1298,7 @@
NULL, lstart,
(struct rb_entry **)&prev_dc,
(struct rb_entry **)&next_dc,
- &insert_p, &insert_parent, true);
+ &insert_p, &insert_parent, true, NULL);
if (dc)
prev_dc = dc;
@@ -1268,9 +1376,12 @@
{
block_t lblkstart = blkstart;
+ if (!f2fs_bdev_support_discard(bdev))
+ return 0;
+
trace_f2fs_queue_discard(bdev, blkstart, blklen);
- if (sbi->s_ndevs) {
+ if (f2fs_is_multi_device(sbi)) {
int devi = f2fs_target_device_index(sbi, blkstart);
blkstart -= FDEV(devi).start_blk;
@@ -1298,7 +1409,7 @@
NULL, pos,
(struct rb_entry **)&prev_dc,
(struct rb_entry **)&next_dc,
- &insert_p, &insert_parent, true);
+ &insert_p, &insert_parent, true, NULL);
if (!dc)
dc = next_dc;
@@ -1311,7 +1422,7 @@
if (dc->state != D_PREP)
goto next;
- if (dpolicy->io_aware && !is_idle(sbi)) {
+ if (dpolicy->io_aware && !is_idle(sbi, DISCARD_TIME)) {
io_interrupted = true;
break;
}
@@ -1351,7 +1462,14 @@
int i, issued = 0;
bool io_interrupted = false;
+ if (dpolicy->timeout != 0)
+ f2fs_update_time(sbi, dpolicy->timeout);
+
for (i = MAX_PLIST_NUM - 1; i >= 0; i--) {
+ if (dpolicy->timeout != 0 &&
+ f2fs_time_over(sbi, dpolicy->timeout))
+ break;
+
if (i + 1 < dpolicy->granularity)
break;
@@ -1370,8 +1488,12 @@
list_for_each_entry_safe(dc, tmp, pend_list, list) {
f2fs_bug_on(sbi, dc->state != D_PREP);
+ if (dpolicy->timeout != 0 &&
+ f2fs_time_over(sbi, dpolicy->timeout))
+ break;
+
if (dpolicy->io_aware && i < dpolicy->io_aware_gran &&
- !is_idle(sbi)) {
+ !is_idle(sbi, DISCARD_TIME)) {
io_interrupted = true;
break;
}
@@ -1538,7 +1660,7 @@
}
/* This comes from f2fs_put_super */
-bool f2fs_wait_discard_bios(struct f2fs_sb_info *sbi)
+bool f2fs_issue_discard_timeout(struct f2fs_sb_info *sbi)
{
struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
struct discard_policy dpolicy;
@@ -1546,6 +1668,7 @@
__init_discard_policy(sbi, &dpolicy, DPOLICY_UMOUNT,
dcc->discard_granularity);
+ dpolicy.timeout = UMOUNT_DISCARD_TIMEOUT;
__issue_discard_cmd(sbi, &dpolicy);
dropped = __drop_discard_cmd(sbi);
@@ -1579,6 +1702,10 @@
if (dcc->discard_wake)
dcc->discard_wake = 0;
+ /* clean up pending candidates before going to sleep */
+ if (atomic_read(&dcc->queued_discard))
+ __wait_all_discard_cmd(sbi, NULL);
+
if (try_to_freeze())
continue;
if (f2fs_readonly(sbi->sb))
@@ -1600,7 +1727,9 @@
__wait_all_discard_cmd(sbi, &dpolicy);
wait_ms = dpolicy.min_interval;
} else if (issued == -1){
- wait_ms = dpolicy.mid_interval;
+ wait_ms = f2fs_time_to_wait(sbi, DISCARD_TIME);
+ if (!wait_ms)
+ wait_ms = dpolicy.mid_interval;
} else {
wait_ms = dpolicy.max_interval;
}
@@ -1619,42 +1748,34 @@
block_t lblkstart = blkstart;
int devi = 0;
- if (sbi->s_ndevs) {
+ if (f2fs_is_multi_device(sbi)) {
devi = f2fs_target_device_index(sbi, blkstart);
+ if (blkstart < FDEV(devi).start_blk ||
+ blkstart > FDEV(devi).end_blk) {
+ f2fs_err(sbi, "Invalid block %x", blkstart);
+ return -EIO;
+ }
blkstart -= FDEV(devi).start_blk;
}
- /*
- * We need to know the type of the zone: for conventional zones,
- * use regular discard if the drive supports it. For sequential
- * zones, reset the zone write pointer.
- */
- switch (get_blkz_type(sbi, bdev, blkstart)) {
-
- case BLK_ZONE_TYPE_CONVENTIONAL:
- if (!blk_queue_discard(bdev_get_queue(bdev)))
- return 0;
- return __queue_discard_cmd(sbi, bdev, lblkstart, blklen);
- case BLK_ZONE_TYPE_SEQWRITE_REQ:
- case BLK_ZONE_TYPE_SEQWRITE_PREF:
+ /* For sequential zones, reset the zone write pointer */
+ if (f2fs_blkz_is_seq(sbi, devi, blkstart)) {
sector = SECTOR_FROM_BLOCK(blkstart);
nr_sects = SECTOR_FROM_BLOCK(blklen);
if (sector & (bdev_zone_sectors(bdev) - 1) ||
nr_sects != bdev_zone_sectors(bdev)) {
- f2fs_msg(sbi->sb, KERN_INFO,
- "(%d) %s: Unaligned discard attempted (block %x + %x)",
- devi, sbi->s_ndevs ? FDEV(devi).path: "",
- blkstart, blklen);
+ f2fs_err(sbi, "(%d) %s: Unaligned zone reset attempted (block %x + %x)",
+ devi, sbi->s_ndevs ? FDEV(devi).path : "",
+ blkstart, blklen);
return -EIO;
}
trace_f2fs_issue_reset_zone(bdev, blkstart);
- return blkdev_reset_zones(bdev, sector,
- nr_sects, GFP_NOFS);
- default:
- /* Unknown zone type: broken device ? */
- return -EIO;
+ return blkdev_reset_zones(bdev, sector, nr_sects, GFP_NOFS);
}
+
+ /* For conventional zones, use regular discard if supported */
+ return __queue_discard_cmd(sbi, bdev, lblkstart, blklen);
}
#endif
@@ -1662,8 +1783,7 @@
struct block_device *bdev, block_t blkstart, block_t blklen)
{
#ifdef CONFIG_BLK_DEV_ZONED
- if (f2fs_sb_has_blkzoned(sbi->sb) &&
- bdev_zoned_model(bdev) != BLK_ZONED_NONE)
+ if (f2fs_sb_has_blkzoned(sbi) && bdev_is_zoned(bdev))
return __f2fs_issue_discard_zone(sbi, bdev, blkstart, blklen);
#endif
return __queue_discard_cmd(sbi, bdev, blkstart, blklen);
@@ -1725,11 +1845,11 @@
struct list_head *head = &SM_I(sbi)->dcc_info->entry_list;
int i;
- if (se->valid_blocks == max_blocks || !f2fs_discard_en(sbi))
+ if (se->valid_blocks == max_blocks || !f2fs_hw_support_discard(sbi))
return false;
if (!force) {
- if (!test_opt(sbi, DISCARD) || !se->valid_blocks ||
+ if (!f2fs_realtime_discard_enable(sbi) || !se->valid_blocks ||
SM_I(sbi)->dcc_info->nr_discards >=
SM_I(sbi)->dcc_info->max_discards)
return false;
@@ -1810,7 +1930,7 @@
unsigned int start = 0, end = -1;
unsigned int secno, start_segno;
bool force = (cpc->reason & CP_DISCARD);
- bool need_align = test_opt(sbi, LFS) && sbi->segs_per_sec > 1;
+ bool need_align = test_opt(sbi, LFS) && __is_large_section(sbi);
mutex_lock(&dirty_i->seglist_lock);
@@ -1835,14 +1955,14 @@
dirty_i->nr_dirty[PRE]--;
}
- if (!test_opt(sbi, DISCARD))
+ if (!f2fs_realtime_discard_enable(sbi))
continue;
if (force && start >= cpc->trim_start &&
(end - 1) <= cpc->trim_end)
continue;
- if (!test_opt(sbi, LFS) || sbi->segs_per_sec == 1) {
+ if (!test_opt(sbi, LFS) || !__is_large_section(sbi)) {
f2fs_issue_discard(sbi, START_BLOCK(sbi, start),
(end - start) << sbi->log_blocks_per_seg);
continue;
@@ -1874,7 +1994,7 @@
sbi->blocks_per_seg, cur_pos);
len = next_pos - cur_pos;
- if (f2fs_sb_has_blkzoned(sbi->sb) ||
+ if (f2fs_sb_has_blkzoned(sbi) ||
(force && len < cpc->trim_minlen))
goto skip;
@@ -1922,13 +2042,13 @@
INIT_LIST_HEAD(&dcc->fstrim_list);
mutex_init(&dcc->cmd_lock);
atomic_set(&dcc->issued_discard, 0);
- atomic_set(&dcc->issing_discard, 0);
+ atomic_set(&dcc->queued_discard, 0);
atomic_set(&dcc->discard_cmd_cnt, 0);
dcc->nr_discards = 0;
dcc->max_discards = MAIN_SEGS(sbi) << sbi->log_blocks_per_seg;
dcc->undiscard_blks = 0;
dcc->next_pos = 0;
- dcc->root = RB_ROOT;
+ dcc->root = RB_ROOT_CACHED;
dcc->rbtree_check = false;
init_waitqueue_head(&dcc->discard_wait_queue);
@@ -1938,7 +2058,7 @@
"f2fs_discard-%u:%u", MAJOR(dev), MINOR(dev));
if (IS_ERR(dcc->f2fs_issue_discard)) {
err = PTR_ERR(dcc->f2fs_issue_discard);
- kfree(dcc);
+ kvfree(dcc);
SM_I(sbi)->dcc_info = NULL;
return err;
}
@@ -1955,7 +2075,14 @@
f2fs_stop_discard_thread(sbi);
- kfree(dcc);
+ /*
+ * Recovery can cache discard commands, so in error path of
+ * fill_super(), it needs to give a chance to handle them.
+ */
+ if (unlikely(atomic_read(&dcc->discard_cmd_cnt)))
+ f2fs_issue_discard_timeout(sbi);
+
+ kvfree(dcc);
SM_I(sbi)->dcc_info = NULL;
}
@@ -2011,26 +2138,27 @@
mir_exist = f2fs_test_and_set_bit(offset,
se->cur_valid_map_mir);
if (unlikely(exist != mir_exist)) {
- f2fs_msg(sbi->sb, KERN_ERR, "Inconsistent error "
- "when setting bitmap, blk:%u, old bit:%d",
- blkaddr, exist);
+ f2fs_err(sbi, "Inconsistent error when setting bitmap, blk:%u, old bit:%d",
+ blkaddr, exist);
f2fs_bug_on(sbi, 1);
}
#endif
if (unlikely(exist)) {
- f2fs_msg(sbi->sb, KERN_ERR,
- "Bitmap was wrongly set, blk:%u", blkaddr);
+ f2fs_err(sbi, "Bitmap was wrongly set, blk:%u",
+ blkaddr);
f2fs_bug_on(sbi, 1);
se->valid_blocks--;
del = 0;
}
- if (f2fs_discard_en(sbi) &&
- !f2fs_test_and_set_bit(offset, se->discard_map))
+ if (!f2fs_test_and_set_bit(offset, se->discard_map))
sbi->discard_blks--;
- /* don't overwrite by SSR to keep node chain */
- if (IS_NODESEG(se->type)) {
+ /*
+ * SSR should never reuse block which is checkpointed
+ * or newly invalidated.
+ */
+ if (!is_sbi_flag_set(sbi, SBI_CP_DISABLED)) {
if (!f2fs_test_and_set_bit(offset, se->ckpt_valid_map))
se->ckpt_valid_blocks++;
}
@@ -2040,22 +2168,32 @@
mir_exist = f2fs_test_and_clear_bit(offset,
se->cur_valid_map_mir);
if (unlikely(exist != mir_exist)) {
- f2fs_msg(sbi->sb, KERN_ERR, "Inconsistent error "
- "when clearing bitmap, blk:%u, old bit:%d",
- blkaddr, exist);
+ f2fs_err(sbi, "Inconsistent error when clearing bitmap, blk:%u, old bit:%d",
+ blkaddr, exist);
f2fs_bug_on(sbi, 1);
}
#endif
if (unlikely(!exist)) {
- f2fs_msg(sbi->sb, KERN_ERR,
- "Bitmap was wrongly cleared, blk:%u", blkaddr);
+ f2fs_err(sbi, "Bitmap was wrongly cleared, blk:%u",
+ blkaddr);
f2fs_bug_on(sbi, 1);
se->valid_blocks++;
del = 0;
+ } else if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) {
+ /*
+ * If checkpoints are off, we must not reuse data that
+ * was used in the previous checkpoint. If it was used
+ * before, we must track that to know how much space we
+ * really have.
+ */
+ if (f2fs_test_bit(offset, se->ckpt_valid_map)) {
+ spin_lock(&sbi->stat_lock);
+ sbi->unusable_block_count++;
+ spin_unlock(&sbi->stat_lock);
+ }
}
- if (f2fs_discard_en(sbi) &&
- f2fs_test_and_clear_bit(offset, se->discard_map))
+ if (f2fs_test_and_clear_bit(offset, se->discard_map))
sbi->discard_blks++;
}
if (!f2fs_test_bit(offset, se->ckpt_valid_map))
@@ -2066,7 +2204,7 @@
/* update total number of valid blocks to be written in ckpt area */
SIT_I(sbi)->written_valid_blocks += del;
- if (sbi->segs_per_sec > 1)
+ if (__is_large_section(sbi))
get_sec_entry(sbi, segno)->valid_blocks += del;
}
@@ -2099,7 +2237,7 @@
struct seg_entry *se;
bool is_cp = false;
- if (!is_valid_data_blkaddr(sbi, blkaddr))
+ if (!__is_valid_data_blkaddr(blkaddr))
return true;
down_read(&sit_i->sentry_lock);
@@ -2332,9 +2470,12 @@
static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type)
{
/* if segs_per_sec is large than 1, we need to keep original policy. */
- if (sbi->segs_per_sec != 1)
+ if (__is_large_section(sbi))
return CURSEG_I(sbi, type)->segno;
+ if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
+ return 0;
+
if (test_opt(sbi, NOHEAP) &&
(type == CURSEG_HOT_DATA || IS_NODESEG(type)))
return 0;
@@ -2432,6 +2573,7 @@
__next_free_blkoff(sbi, curseg, 0);
sum_page = f2fs_get_sum_page(sbi, new_segno);
+ f2fs_bug_on(sbi, IS_ERR(sum_page));
sum_node = (struct f2fs_summary_block *)page_address(sum_page);
memcpy(curseg->sum_blk, sum_node, SUM_ENTRY_SIZE);
f2fs_put_page(sum_page, 1);
@@ -2478,6 +2620,15 @@
return 1;
}
}
+
+ /* find valid_blocks=0 in dirty list */
+ if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) {
+ segno = get_free_segment(sbi);
+ if (segno != NULL_SEGNO) {
+ curseg->next_segno = segno;
+ return 1;
+ }
+ }
return 0;
}
@@ -2495,7 +2646,8 @@
else if (!is_set_ckpt_flags(sbi, CP_CRC_RECOVERY_FLAG) &&
type == CURSEG_WARM_NODE)
new_curseg(sbi, type, false);
- else if (curseg->alloc_type == LFS && is_next_segment_free(sbi, type))
+ else if (curseg->alloc_type == LFS && is_next_segment_free(sbi, type) &&
+ likely(!is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
new_curseg(sbi, type, false);
else if (f2fs_need_SSR(sbi) && get_ssr_segment(sbi, type))
change_curseg(sbi, type);
@@ -2505,6 +2657,39 @@
stat_inc_seg_type(sbi, curseg);
}
+void allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type,
+ unsigned int start, unsigned int end)
+{
+ struct curseg_info *curseg = CURSEG_I(sbi, type);
+ unsigned int segno;
+
+ down_read(&SM_I(sbi)->curseg_lock);
+ mutex_lock(&curseg->curseg_mutex);
+ down_write(&SIT_I(sbi)->sentry_lock);
+
+ segno = CURSEG_I(sbi, type)->segno;
+ if (segno < start || segno > end)
+ goto unlock;
+
+ if (f2fs_need_SSR(sbi) && get_ssr_segment(sbi, type))
+ change_curseg(sbi, type);
+ else
+ new_curseg(sbi, type, true);
+
+ stat_inc_seg_type(sbi, curseg);
+
+ locate_dirty_segment(sbi, segno);
+unlock:
+ up_write(&SIT_I(sbi)->sentry_lock);
+
+ if (segno != curseg->segno)
+ f2fs_notice(sbi, "For resize: curseg of type %d: %u ==> %u",
+ type, segno, curseg->segno);
+
+ mutex_unlock(&curseg->curseg_mutex);
+ up_read(&SM_I(sbi)->curseg_lock);
+}
+
void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi)
{
struct curseg_info *curseg;
@@ -2570,7 +2755,7 @@
NULL, start,
(struct rb_entry **)&prev_dc,
(struct rb_entry **)&next_dc,
- &insert_p, &insert_parent, true);
+ &insert_p, &insert_parent, true, NULL);
if (!dc)
dc = next_dc;
@@ -2628,7 +2813,7 @@
struct discard_policy dpolicy;
unsigned long long trimmed = 0;
int err = 0;
- bool need_align = test_opt(sbi, LFS) && sbi->segs_per_sec > 1;
+ bool need_align = test_opt(sbi, LFS) && __is_large_section(sbi);
if (start >= MAX_BLKADDR(sbi) || range->len < sbi->blocksize)
return -EINVAL;
@@ -2637,9 +2822,8 @@
goto out;
if (is_sbi_flag_set(sbi, SBI_NEED_FSCK)) {
- f2fs_msg(sbi->sb, KERN_WARNING,
- "Found FS corruption, run fsck to fix.");
- return -EIO;
+ f2fs_warn(sbi, "Found FS corruption, run fsck to fix.");
+ return -EFSCORRUPTED;
}
/* start/end segment number in main_area */
@@ -2671,7 +2855,7 @@
* discard option. User configuration looks like using runtime discard
* or periodic fstrim instead of it.
*/
- if (test_opt(sbi, DISCARD))
+ if (f2fs_realtime_discard_enable(sbi))
goto out;
start_block = START_BLOCK(sbi, start_segno);
@@ -2932,12 +3116,14 @@
f2fs_inode_chksum_set(sbi, page);
}
+ if (F2FS_IO_ALIGNED(sbi))
+ fio->retry = false;
+
if (add_list) {
struct f2fs_bio_info *io;
INIT_LIST_HEAD(&fio->list);
fio->in_list = true;
- fio->retry = false;
io = sbi->write_io[fio->type] + fio->temp;
spin_lock(&io->io_lock);
list_add_tail(&fio->list, &io->io_list);
@@ -2954,7 +3140,7 @@
struct f2fs_sb_info *sbi = fio->sbi;
unsigned int devidx;
- if (!sbi->s_ndevs)
+ if (!f2fs_is_multi_device(sbi))
return;
devidx = f2fs_target_device_index(sbi, fio->new_blkaddr);
@@ -3020,6 +3206,7 @@
ClearPageError(page);
f2fs_submit_page_write(&fio);
+ stat_inc_meta_count(sbi, page->index);
f2fs_update_iostat(sbi, io_type, F2FS_BLKSIZE);
}
@@ -3051,21 +3238,31 @@
{
int err;
struct f2fs_sb_info *sbi = fio->sbi;
+ unsigned int segno;
fio->new_blkaddr = fio->old_blkaddr;
/* i/o temperature is needed for passing down write hints */
__get_segment_type(fio);
- f2fs_bug_on(sbi, !IS_DATASEG(get_seg_entry(sbi,
- GET_SEGNO(sbi, fio->new_blkaddr))->type));
+ segno = GET_SEGNO(sbi, fio->new_blkaddr);
+
+ if (!IS_DATASEG(get_seg_entry(sbi, segno)->type)) {
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+ f2fs_warn(sbi, "%s: incorrect segment(%u) type, run fsck to fix.",
+ __func__, segno);
+ return -EFSCORRUPTED;
+ }
stat_inc_inplace_blocks(fio->sbi);
- err = f2fs_submit_page_bio(fio);
- if (!err)
+ if (fio->bio)
+ err = f2fs_merge_page_bio(fio);
+ else
+ err = f2fs_submit_page_bio(fio);
+ if (!err) {
update_device_state(fio);
-
- f2fs_update_iostat(fio->sbi, fio->io_type, F2FS_BLKSIZE);
+ f2fs_update_iostat(fio->sbi, fio->io_type, F2FS_BLKSIZE);
+ }
return err;
}
@@ -3177,34 +3374,48 @@
}
void f2fs_wait_on_page_writeback(struct page *page,
- enum page_type type, bool ordered)
+ enum page_type type, bool ordered, bool locked)
{
if (PageWriteback(page)) {
struct f2fs_sb_info *sbi = F2FS_P_SB(page);
- f2fs_submit_merged_write_cond(sbi, page->mapping->host,
- 0, page->index, type);
- if (ordered)
+ f2fs_submit_merged_write_cond(sbi, NULL, page, 0, type);
+ if (ordered) {
wait_on_page_writeback(page);
- else
+ f2fs_bug_on(sbi, locked && PageWriteback(page));
+ } else {
wait_for_stable_page(page);
+ }
}
}
-void f2fs_wait_on_block_writeback(struct f2fs_sb_info *sbi, block_t blkaddr)
+void f2fs_wait_on_block_writeback(struct inode *inode, block_t blkaddr)
{
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct page *cpage;
- if (!is_valid_data_blkaddr(sbi, blkaddr))
+ if (!f2fs_post_read_required(inode))
+ return;
+
+ if (!__is_valid_data_blkaddr(blkaddr))
return;
cpage = find_lock_page(META_MAPPING(sbi), blkaddr);
if (cpage) {
- f2fs_wait_on_page_writeback(cpage, DATA, true);
+ f2fs_wait_on_page_writeback(cpage, DATA, true, true);
f2fs_put_page(cpage, 1);
}
}
+void f2fs_wait_on_block_writeback_range(struct inode *inode, block_t blkaddr,
+ block_t len)
+{
+ block_t i;
+
+ for (i = 0; i < len; i++)
+ f2fs_wait_on_block_writeback(inode, blkaddr + i);
+}
+
static int read_compacted_summaries(struct f2fs_sb_info *sbi)
{
struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
@@ -3375,8 +3586,11 @@
/* sanity check for summary blocks */
if (nats_in_cursum(nat_j) > NAT_JOURNAL_ENTRIES ||
- sits_in_cursum(sit_j) > SIT_JOURNAL_ENTRIES)
+ sits_in_cursum(sit_j) > SIT_JOURNAL_ENTRIES) {
+ f2fs_err(sbi, "invalid journal entries nats %u sits %u\n",
+ nats_in_cursum(nat_j), sits_in_cursum(sit_j));
return -EINVAL;
+ }
return 0;
}
@@ -3607,7 +3821,7 @@
struct f2fs_journal *journal = curseg->journal;
struct sit_entry_set *ses, *tmp;
struct list_head *head = &SM_I(sbi)->sit_entry_set;
- bool to_journal = true;
+ bool to_journal = !is_sbi_flag_set(sbi, SBI_IS_RESIZEFS);
struct seg_entry *se;
down_write(&sit_i->sentry_lock);
@@ -3626,7 +3840,8 @@
* entries, remove all entries from journal and add and account
* them in sit entry set.
*/
- if (!__has_cursum_space(journal, sit_i->dirty_sentries, SIT_JOURNAL))
+ if (!__has_cursum_space(journal, sit_i->dirty_sentries, SIT_JOURNAL) ||
+ !to_journal)
remove_sits_in_journal(sbi);
/*
@@ -3723,8 +3938,8 @@
struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
struct sit_info *sit_i;
unsigned int sit_segs, start;
- char *src_bitmap;
- unsigned int bitmap_size;
+ char *src_bitmap, *bitmap;
+ unsigned int bitmap_size, main_bitmap_size, sit_bitmap_size;
/* allocate memory for SIT information */
sit_i = f2fs_kzalloc(sbi, sizeof(struct sit_info), GFP_KERNEL);
@@ -3740,42 +3955,44 @@
if (!sit_i->sentries)
return -ENOMEM;
- bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi));
- sit_i->dirty_sentries_bitmap = f2fs_kvzalloc(sbi, bitmap_size,
+ main_bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi));
+ sit_i->dirty_sentries_bitmap = f2fs_kvzalloc(sbi, main_bitmap_size,
GFP_KERNEL);
if (!sit_i->dirty_sentries_bitmap)
return -ENOMEM;
+#ifdef CONFIG_F2FS_CHECK_FS
+ bitmap_size = MAIN_SEGS(sbi) * SIT_VBLOCK_MAP_SIZE * 4;
+#else
+ bitmap_size = MAIN_SEGS(sbi) * SIT_VBLOCK_MAP_SIZE * 3;
+#endif
+ sit_i->bitmap = f2fs_kvzalloc(sbi, bitmap_size, GFP_KERNEL);
+ if (!sit_i->bitmap)
+ return -ENOMEM;
+
+ bitmap = sit_i->bitmap;
+
for (start = 0; start < MAIN_SEGS(sbi); start++) {
- sit_i->sentries[start].cur_valid_map
- = f2fs_kzalloc(sbi, SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
- sit_i->sentries[start].ckpt_valid_map
- = f2fs_kzalloc(sbi, SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
- if (!sit_i->sentries[start].cur_valid_map ||
- !sit_i->sentries[start].ckpt_valid_map)
- return -ENOMEM;
+ sit_i->sentries[start].cur_valid_map = bitmap;
+ bitmap += SIT_VBLOCK_MAP_SIZE;
+
+ sit_i->sentries[start].ckpt_valid_map = bitmap;
+ bitmap += SIT_VBLOCK_MAP_SIZE;
#ifdef CONFIG_F2FS_CHECK_FS
- sit_i->sentries[start].cur_valid_map_mir
- = f2fs_kzalloc(sbi, SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
- if (!sit_i->sentries[start].cur_valid_map_mir)
- return -ENOMEM;
+ sit_i->sentries[start].cur_valid_map_mir = bitmap;
+ bitmap += SIT_VBLOCK_MAP_SIZE;
#endif
- if (f2fs_discard_en(sbi)) {
- sit_i->sentries[start].discard_map
- = f2fs_kzalloc(sbi, SIT_VBLOCK_MAP_SIZE,
- GFP_KERNEL);
- if (!sit_i->sentries[start].discard_map)
- return -ENOMEM;
- }
+ sit_i->sentries[start].discard_map = bitmap;
+ bitmap += SIT_VBLOCK_MAP_SIZE;
}
sit_i->tmp_map = f2fs_kzalloc(sbi, SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
if (!sit_i->tmp_map)
return -ENOMEM;
- if (sbi->segs_per_sec > 1) {
+ if (__is_large_section(sbi)) {
sit_i->sec_entries =
f2fs_kvzalloc(sbi, array_size(sizeof(struct sec_entry),
MAIN_SECS(sbi)),
@@ -3788,17 +4005,23 @@
sit_segs = le32_to_cpu(raw_super->segment_count_sit) >> 1;
/* setup SIT bitmap from ckeckpoint pack */
- bitmap_size = __bitmap_size(sbi, SIT_BITMAP);
+ sit_bitmap_size = __bitmap_size(sbi, SIT_BITMAP);
src_bitmap = __bitmap_ptr(sbi, SIT_BITMAP);
- sit_i->sit_bitmap = kmemdup(src_bitmap, bitmap_size, GFP_KERNEL);
+ sit_i->sit_bitmap = kmemdup(src_bitmap, sit_bitmap_size, GFP_KERNEL);
if (!sit_i->sit_bitmap)
return -ENOMEM;
#ifdef CONFIG_F2FS_CHECK_FS
- sit_i->sit_bitmap_mir = kmemdup(src_bitmap, bitmap_size, GFP_KERNEL);
+ sit_i->sit_bitmap_mir = kmemdup(src_bitmap,
+ sit_bitmap_size, GFP_KERNEL);
if (!sit_i->sit_bitmap_mir)
return -ENOMEM;
+
+ sit_i->invalid_segmap = f2fs_kvzalloc(sbi,
+ main_bitmap_size, GFP_KERNEL);
+ if (!sit_i->invalid_segmap)
+ return -ENOMEM;
#endif
/* init SIT information */
@@ -3807,7 +4030,7 @@
sit_i->sit_base_addr = le32_to_cpu(raw_super->sit_blkaddr);
sit_i->sit_blocks = sit_segs << sbi->log_blocks_per_seg;
sit_i->written_valid_blocks = 0;
- sit_i->bitmap_size = bitmap_size;
+ sit_i->bitmap_size = sit_bitmap_size;
sit_i->dirty_sentries = 0;
sit_i->sents_per_block = SIT_ENTRY_PER_BLOCK;
sit_i->elapsed_time = le64_to_cpu(sbi->ckpt->elapsed_time);
@@ -3904,6 +4127,8 @@
se = &sit_i->sentries[start];
page = get_current_sit_page(sbi, start);
+ if (IS_ERR(page))
+ return PTR_ERR(page);
sit_blk = (struct f2fs_sit_block *)page_address(page);
sit = sit_blk->entries[SIT_ENTRY_OFFSET(sit_i, start)];
f2fs_put_page(page, 1);
@@ -3916,21 +4141,19 @@
total_node_blocks += se->valid_blocks;
/* build discard map only one time */
- if (f2fs_discard_en(sbi)) {
- if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) {
- memset(se->discard_map, 0xff,
- SIT_VBLOCK_MAP_SIZE);
- } else {
- memcpy(se->discard_map,
- se->cur_valid_map,
- SIT_VBLOCK_MAP_SIZE);
- sbi->discard_blks +=
- sbi->blocks_per_seg -
- se->valid_blocks;
- }
+ if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) {
+ memset(se->discard_map, 0xff,
+ SIT_VBLOCK_MAP_SIZE);
+ } else {
+ memcpy(se->discard_map,
+ se->cur_valid_map,
+ SIT_VBLOCK_MAP_SIZE);
+ sbi->discard_blks +=
+ sbi->blocks_per_seg -
+ se->valid_blocks;
}
- if (sbi->segs_per_sec > 1)
+ if (__is_large_section(sbi))
get_sec_entry(sbi, start)->valid_blocks +=
se->valid_blocks;
}
@@ -3943,11 +4166,9 @@
start = le32_to_cpu(segno_in_journal(journal, i));
if (start >= MAIN_SEGS(sbi)) {
- f2fs_msg(sbi->sb, KERN_ERR,
- "Wrong journal entry on segno %u",
- start);
- set_sbi_flag(sbi, SBI_NEED_FSCK);
- err = -EINVAL;
+ f2fs_err(sbi, "Wrong journal entry on segno %u",
+ start);
+ err = -EFSCORRUPTED;
break;
}
@@ -3965,19 +4186,16 @@
if (IS_NODESEG(se->type))
total_node_blocks += se->valid_blocks;
- if (f2fs_discard_en(sbi)) {
- if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) {
- memset(se->discard_map, 0xff,
- SIT_VBLOCK_MAP_SIZE);
- } else {
- memcpy(se->discard_map, se->cur_valid_map,
- SIT_VBLOCK_MAP_SIZE);
- sbi->discard_blks += old_valid_blocks;
- sbi->discard_blks -= se->valid_blocks;
- }
+ if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) {
+ memset(se->discard_map, 0xff, SIT_VBLOCK_MAP_SIZE);
+ } else {
+ memcpy(se->discard_map, se->cur_valid_map,
+ SIT_VBLOCK_MAP_SIZE);
+ sbi->discard_blks += old_valid_blocks;
+ sbi->discard_blks -= se->valid_blocks;
}
- if (sbi->segs_per_sec > 1) {
+ if (__is_large_section(sbi)) {
get_sec_entry(sbi, start)->valid_blocks +=
se->valid_blocks;
get_sec_entry(sbi, start)->valid_blocks -=
@@ -3987,11 +4205,9 @@
up_read(&curseg->journal_rwsem);
if (!err && total_node_blocks != valid_node_count(sbi)) {
- f2fs_msg(sbi->sb, KERN_ERR,
- "SIT is corrupted node# %u vs %u",
- total_node_blocks, valid_node_count(sbi));
- set_sbi_flag(sbi, SBI_NEED_FSCK);
- err = -EINVAL;
+ f2fs_err(sbi, "SIT is corrupted node# %u vs %u",
+ total_node_blocks, valid_node_count(sbi));
+ err = -EFSCORRUPTED;
}
return err;
@@ -4082,6 +4298,39 @@
return init_victim_secmap(sbi);
}
+static int sanity_check_curseg(struct f2fs_sb_info *sbi)
+{
+ int i;
+
+ /*
+ * In LFS/SSR curseg, .next_blkoff should point to an unused blkaddr;
+ * In LFS curseg, all blkaddr after .next_blkoff should be unused.
+ */
+ for (i = 0; i < NO_CHECK_TYPE; i++) {
+ struct curseg_info *curseg = CURSEG_I(sbi, i);
+ struct seg_entry *se = get_seg_entry(sbi, curseg->segno);
+ unsigned int blkofs = curseg->next_blkoff;
+
+ if (f2fs_test_bit(blkofs, se->cur_valid_map))
+ goto out;
+
+ if (curseg->alloc_type == SSR)
+ continue;
+
+ for (blkofs += 1; blkofs < sbi->blocks_per_seg; blkofs++) {
+ if (!f2fs_test_bit(blkofs, se->cur_valid_map))
+ continue;
+out:
+ f2fs_err(sbi,
+ "Current segment's next free block offset is inconsistent with bitmap, logtype:%u, segno:%u, type:%u, next_blkoff:%u, blkofs:%u",
+ i, curseg->segno, curseg->alloc_type,
+ curseg->next_blkoff, blkofs);
+ return -EFSCORRUPTED;
+ }
+ }
+ return 0;
+}
+
/*
* Update min, max modified time for cost-benefit GC algorithm
*/
@@ -4177,6 +4426,10 @@
if (err)
return err;
+ err = sanity_check_curseg(sbi);
+ if (err)
+ return err;
+
init_min_max_mtime(sbi);
return 0;
}
@@ -4212,7 +4465,7 @@
destroy_victim_secmap(sbi);
SM_I(sbi)->dirty_info = NULL;
- kfree(dirty_i);
+ kvfree(dirty_i);
}
static void destroy_curseg(struct f2fs_sb_info *sbi)
@@ -4224,10 +4477,10 @@
return;
SM_I(sbi)->curseg_array = NULL;
for (i = 0; i < NR_CURSEG_TYPE; i++) {
- kfree(array[i].sum_blk);
- kfree(array[i].journal);
+ kvfree(array[i].sum_blk);
+ kvfree(array[i].journal);
}
- kfree(array);
+ kvfree(array);
}
static void destroy_free_segmap(struct f2fs_sb_info *sbi)
@@ -4238,39 +4491,31 @@
SM_I(sbi)->free_info = NULL;
kvfree(free_i->free_segmap);
kvfree(free_i->free_secmap);
- kfree(free_i);
+ kvfree(free_i);
}
static void destroy_sit_info(struct f2fs_sb_info *sbi)
{
struct sit_info *sit_i = SIT_I(sbi);
- unsigned int start;
if (!sit_i)
return;
- if (sit_i->sentries) {
- for (start = 0; start < MAIN_SEGS(sbi); start++) {
- kfree(sit_i->sentries[start].cur_valid_map);
-#ifdef CONFIG_F2FS_CHECK_FS
- kfree(sit_i->sentries[start].cur_valid_map_mir);
-#endif
- kfree(sit_i->sentries[start].ckpt_valid_map);
- kfree(sit_i->sentries[start].discard_map);
- }
- }
- kfree(sit_i->tmp_map);
+ if (sit_i->sentries)
+ kvfree(sit_i->bitmap);
+ kvfree(sit_i->tmp_map);
kvfree(sit_i->sentries);
kvfree(sit_i->sec_entries);
kvfree(sit_i->dirty_sentries_bitmap);
SM_I(sbi)->sit_info = NULL;
- kfree(sit_i->sit_bitmap);
+ kvfree(sit_i->sit_bitmap);
#ifdef CONFIG_F2FS_CHECK_FS
- kfree(sit_i->sit_bitmap_mir);
+ kvfree(sit_i->sit_bitmap_mir);
+ kvfree(sit_i->invalid_segmap);
#endif
- kfree(sit_i);
+ kvfree(sit_i);
}
void f2fs_destroy_segment_manager(struct f2fs_sb_info *sbi)
@@ -4286,7 +4531,7 @@
destroy_free_segmap(sbi);
destroy_sit_info(sbi);
sbi->sm_info = NULL;
- kfree(sm_info);
+ kvfree(sm_info);
}
int __init f2fs_create_segment_manager_caches(void)
diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
index b3d9e31..325781a 100644
--- a/fs/f2fs/segment.h
+++ b/fs/f2fs/segment.h
@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* fs/f2fs/segment.h
*
* Copyright (c) 2012 Samsung Electronics Co., Ltd.
* http://www.samsung.com/
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
*/
#include <linux/blkdev.h>
#include <linux/backing-dev.h>
@@ -85,7 +82,7 @@
(GET_SEGOFF_FROM_SEG0(sbi, blk_addr) & ((sbi)->blocks_per_seg - 1))
#define GET_SEGNO(sbi, blk_addr) \
- ((!is_valid_data_blkaddr(sbi, blk_addr)) ? \
+ ((!__is_valid_data_blkaddr(blk_addr)) ? \
NULL_SEGNO : GET_L2R_SEGNO(FREE_I(sbi), \
GET_SEGNO_FROM_SEG0(sbi, blk_addr)))
#define BLKS_PER_SEC(sbi) \
@@ -112,7 +109,7 @@
#define START_SEGNO(segno) \
(SIT_BLOCK_OFFSET(segno) * SIT_ENTRY_PER_BLOCK)
#define SIT_BLK_CNT(sbi) \
- ((MAIN_SEGS(sbi) + SIT_ENTRY_PER_BLOCK - 1) / SIT_ENTRY_PER_BLOCK)
+ DIV_ROUND_UP(MAIN_SEGS(sbi), SIT_ENTRY_PER_BLOCK)
#define f2fs_bitmap_size(nr) \
(BITS_TO_LONGS(nr) * sizeof(unsigned long))
@@ -229,9 +226,13 @@
block_t sit_base_addr; /* start block address of SIT area */
block_t sit_blocks; /* # of blocks used by SIT area */
block_t written_valid_blocks; /* # of valid blocks in main area */
+ char *bitmap; /* all bitmaps pointer */
char *sit_bitmap; /* SIT bitmap pointer */
#ifdef CONFIG_F2FS_CHECK_FS
char *sit_bitmap_mir; /* SIT bitmap mirror */
+
+ /* bitmap of segments to be ignored by GC in case of errors */
+ unsigned long *invalid_segmap;
#endif
unsigned int bitmap_size; /* SIT bitmap size */
@@ -336,12 +337,18 @@
* In order to get # of valid blocks in a section instantly from many
* segments, f2fs manages two counting structures separately.
*/
- if (use_section && sbi->segs_per_sec > 1)
+ if (use_section && __is_large_section(sbi))
return get_sec_entry(sbi, segno)->valid_blocks;
else
return get_seg_entry(sbi, segno)->valid_blocks;
}
+static inline unsigned int get_ckpt_valid_blocks(struct f2fs_sb_info *sbi,
+ unsigned int segno)
+{
+ return get_seg_entry(sbi, segno)->ckpt_valid_blocks;
+}
+
static inline void seg_info_from_raw_sit(struct seg_entry *se,
struct f2fs_sit_entry *rs)
{
@@ -579,6 +586,15 @@
reserved_sections(sbi) + needed);
}
+static inline bool f2fs_is_checkpoint_ready(struct f2fs_sb_info *sbi)
+{
+ if (likely(!is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
+ return true;
+ if (likely(!has_not_enough_free_secs(sbi, 0, 0)))
+ return true;
+ return false;
+}
+
static inline bool excess_prefree_segs(struct f2fs_sb_info *sbi)
{
return prefree_segments(sbi) > SM_I(sbi)->rec_prefree_segments;
@@ -644,14 +660,15 @@
f2fs_bug_on(sbi, segno > TOTAL_SEGS(sbi) - 1);
}
-static inline void verify_block_addr(struct f2fs_io_info *fio, block_t blk_addr)
+static inline void verify_fio_blkaddr(struct f2fs_io_info *fio)
{
struct f2fs_sb_info *sbi = fio->sbi;
- if (__is_meta_io(fio))
- verify_blkaddr(sbi, blk_addr, META_GENERIC);
- else
- verify_blkaddr(sbi, blk_addr, DATA_GENERIC);
+ if (__is_valid_data_blkaddr(fio->old_blkaddr))
+ verify_blkaddr(sbi, fio->old_blkaddr, __is_meta_io(fio) ?
+ META_GENERIC : DATA_GENERIC);
+ verify_blkaddr(sbi, fio->new_blkaddr, __is_meta_io(fio) ?
+ META_GENERIC : DATA_GENERIC_ENHANCE);
}
/*
@@ -660,7 +677,6 @@
static inline int check_block_count(struct f2fs_sb_info *sbi,
int segno, struct f2fs_sit_entry *raw_sit)
{
-#ifdef CONFIG_F2FS_CHECK_FS
bool is_valid = test_bit_le(0, raw_sit->valid_map) ? true : false;
int valid_blocks = 0;
int cur_pos = 0, next_pos;
@@ -681,21 +697,19 @@
} while (cur_pos < sbi->blocks_per_seg);
if (unlikely(GET_SIT_VBLOCKS(raw_sit) != valid_blocks)) {
- f2fs_msg(sbi->sb, KERN_ERR,
- "Mismatch valid blocks %d vs. %d",
- GET_SIT_VBLOCKS(raw_sit), valid_blocks);
+ f2fs_err(sbi, "Mismatch valid blocks %d vs. %d",
+ GET_SIT_VBLOCKS(raw_sit), valid_blocks);
set_sbi_flag(sbi, SBI_NEED_FSCK);
- return -EINVAL;
+ return -EFSCORRUPTED;
}
-#endif
+
/* check segment usage, and check boundary of a given segment number */
if (unlikely(GET_SIT_VBLOCKS(raw_sit) > sbi->blocks_per_seg
|| segno > TOTAL_SEGS(sbi) - 1)) {
- f2fs_msg(sbi->sb, KERN_ERR,
- "Wrong valid blocks %d or segno %u",
- GET_SIT_VBLOCKS(raw_sit), segno);
+ f2fs_err(sbi, "Wrong valid blocks %d or segno %u",
+ GET_SIT_VBLOCKS(raw_sit), segno);
set_sbi_flag(sbi, SBI_NEED_FSCK);
- return -EINVAL;
+ return -EFSCORRUPTED;
}
return 0;
}
@@ -853,7 +867,7 @@
}
}
mutex_unlock(&dcc->cmd_lock);
- if (!wakeup)
+ if (!wakeup || !is_idle(sbi, DISCARD_TIME))
return;
wake_up:
dcc->discard_wake = 1;
diff --git a/fs/f2fs/shrinker.c b/fs/f2fs/shrinker.c
index 36cfd81..a467aca 100644
--- a/fs/f2fs/shrinker.c
+++ b/fs/f2fs/shrinker.c
@@ -1,13 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* f2fs shrinker support
* the basic infra was copied from fs/ubifs/shrinker.c
*
* Copyright (c) 2015 Motorola Mobility
* Copyright (c) 2015 Jaegeuk Kim <jaegeuk@kernel.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
*/
#include <linux/fs.h>
#include <linux/f2fs_fs.h>
@@ -138,6 +135,6 @@
f2fs_shrink_extent_tree(sbi, __count_extent_cache(sbi));
spin_lock(&f2fs_list_lock);
- list_del(&sbi->s_list);
+ list_del_init(&sbi->s_list);
spin_unlock(&f2fs_list_lock);
}
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 287c9fe..1443cee 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* fs/f2fs/super.c
*
* Copyright (c) 2012 Samsung Electronics Co., Ltd.
* http://www.samsung.com/
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
*/
#include <linux/module.h>
#include <linux/init.h>
@@ -26,6 +23,7 @@
#include <linux/f2fs_fs.h>
#include <linux/sysfs.h>
#include <linux/quota.h>
+#include <linux/unicode.h>
#include "f2fs.h"
#include "node.h"
@@ -41,7 +39,7 @@
#ifdef CONFIG_F2FS_FAULT_INJECTION
-char *f2fs_fault_name[FAULT_MAX] = {
+const char *f2fs_fault_name[FAULT_MAX] = {
[FAULT_KMALLOC] = "kmalloc",
[FAULT_KVMALLOC] = "kvmalloc",
[FAULT_PAGE_ALLOC] = "page alloc",
@@ -53,9 +51,10 @@
[FAULT_DIR_DEPTH] = "too big dir depth",
[FAULT_EVICT_INODE] = "evict_inode fail",
[FAULT_TRUNCATE] = "truncate fail",
- [FAULT_IO] = "IO error",
+ [FAULT_READ_IO] = "read IO error",
[FAULT_CHECKPOINT] = "checkpoint error",
[FAULT_DISCARD] = "discard error",
+ [FAULT_WRITE_IO] = "write IO error",
};
void f2fs_build_fault_attr(struct f2fs_sb_info *sbi, unsigned int rate,
@@ -138,6 +137,10 @@
Opt_alloc,
Opt_fsync,
Opt_test_dummy_encryption,
+ Opt_checkpoint_disable,
+ Opt_checkpoint_disable_cap,
+ Opt_checkpoint_disable_cap_perc,
+ Opt_checkpoint_enable,
Opt_err,
};
@@ -196,44 +199,82 @@
{Opt_alloc, "alloc_mode=%s"},
{Opt_fsync, "fsync_mode=%s"},
{Opt_test_dummy_encryption, "test_dummy_encryption"},
+ {Opt_checkpoint_disable, "checkpoint=disable"},
+ {Opt_checkpoint_disable_cap, "checkpoint=disable:%u"},
+ {Opt_checkpoint_disable_cap_perc, "checkpoint=disable:%u%%"},
+ {Opt_checkpoint_enable, "checkpoint=enable"},
{Opt_err, NULL},
};
-void f2fs_msg(struct super_block *sb, const char *level, const char *fmt, ...)
+void f2fs_printk(struct f2fs_sb_info *sbi, const char *fmt, ...)
{
struct va_format vaf;
va_list args;
+ int level;
va_start(args, fmt);
- vaf.fmt = fmt;
+
+ level = printk_get_level(fmt);
+ vaf.fmt = printk_skip_level(fmt);
vaf.va = &args;
- printk_ratelimited("%sF2FS-fs (%s): %pV\n", level, sb->s_id, &vaf);
+ printk("%c%cF2FS-fs (%s): %pV\n",
+ KERN_SOH_ASCII, level, sbi->sb->s_id, &vaf);
+
va_end(args);
}
+#ifdef CONFIG_UNICODE
+static const struct f2fs_sb_encodings {
+ __u16 magic;
+ char *name;
+ char *version;
+} f2fs_sb_encoding_map[] = {
+ {F2FS_ENC_UTF8_12_1, "utf8", "12.1.0"},
+};
+
+static int f2fs_sb_read_encoding(const struct f2fs_super_block *sb,
+ const struct f2fs_sb_encodings **encoding,
+ __u16 *flags)
+{
+ __u16 magic = le16_to_cpu(sb->s_encoding);
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(f2fs_sb_encoding_map); i++)
+ if (magic == f2fs_sb_encoding_map[i].magic)
+ break;
+
+ if (i >= ARRAY_SIZE(f2fs_sb_encoding_map))
+ return -EINVAL;
+
+ *encoding = &f2fs_sb_encoding_map[i];
+ *flags = le16_to_cpu(sb->s_encoding_flags);
+
+ return 0;
+}
+#endif
+
static inline void limit_reserve_root(struct f2fs_sb_info *sbi)
{
- block_t limit = (sbi->user_block_count << 1) / 1000;
+ block_t limit = min((sbi->user_block_count << 1) / 1000,
+ sbi->user_block_count - sbi->reserved_blocks);
/* limit is 0.2% */
if (test_opt(sbi, RESERVE_ROOT) &&
F2FS_OPTION(sbi).root_reserved_blocks > limit) {
F2FS_OPTION(sbi).root_reserved_blocks = limit;
- f2fs_msg(sbi->sb, KERN_INFO,
- "Reduce reserved blocks for root = %u",
- F2FS_OPTION(sbi).root_reserved_blocks);
+ f2fs_info(sbi, "Reduce reserved blocks for root = %u",
+ F2FS_OPTION(sbi).root_reserved_blocks);
}
if (!test_opt(sbi, RESERVE_ROOT) &&
(!uid_eq(F2FS_OPTION(sbi).s_resuid,
make_kuid(&init_user_ns, F2FS_DEF_RESUID)) ||
!gid_eq(F2FS_OPTION(sbi).s_resgid,
make_kgid(&init_user_ns, F2FS_DEF_RESGID))))
- f2fs_msg(sbi->sb, KERN_INFO,
- "Ignore s_resuid=%u, s_resgid=%u w/o reserve_root",
- from_kuid_munged(&init_user_ns,
- F2FS_OPTION(sbi).s_resuid),
- from_kgid_munged(&init_user_ns,
- F2FS_OPTION(sbi).s_resgid));
+ f2fs_info(sbi, "Ignore s_resuid=%u, s_resgid=%u w/o reserve_root",
+ from_kuid_munged(&init_user_ns,
+ F2FS_OPTION(sbi).s_resuid),
+ from_kgid_munged(&init_user_ns,
+ F2FS_OPTION(sbi).s_resgid));
}
static void init_once(void *foo)
@@ -254,42 +295,36 @@
int ret = -EINVAL;
if (sb_any_quota_loaded(sb) && !F2FS_OPTION(sbi).s_qf_names[qtype]) {
- f2fs_msg(sb, KERN_ERR,
- "Cannot change journaled "
- "quota options when quota turned on");
+ f2fs_err(sbi, "Cannot change journaled quota options when quota turned on");
return -EINVAL;
}
- if (f2fs_sb_has_quota_ino(sb)) {
- f2fs_msg(sb, KERN_INFO,
- "QUOTA feature is enabled, so ignore qf_name");
+ if (f2fs_sb_has_quota_ino(sbi)) {
+ f2fs_info(sbi, "QUOTA feature is enabled, so ignore qf_name");
return 0;
}
qname = match_strdup(args);
if (!qname) {
- f2fs_msg(sb, KERN_ERR,
- "Not enough memory for storing quotafile name");
- return -EINVAL;
+ f2fs_err(sbi, "Not enough memory for storing quotafile name");
+ return -ENOMEM;
}
if (F2FS_OPTION(sbi).s_qf_names[qtype]) {
if (strcmp(F2FS_OPTION(sbi).s_qf_names[qtype], qname) == 0)
ret = 0;
else
- f2fs_msg(sb, KERN_ERR,
- "%s quota file already specified",
+ f2fs_err(sbi, "%s quota file already specified",
QTYPE2NAME(qtype));
goto errout;
}
if (strchr(qname, '/')) {
- f2fs_msg(sb, KERN_ERR,
- "quotafile must be on filesystem root");
+ f2fs_err(sbi, "quotafile must be on filesystem root");
goto errout;
}
F2FS_OPTION(sbi).s_qf_names[qtype] = qname;
set_opt(sbi, QUOTA);
return 0;
errout:
- kfree(qname);
+ kvfree(qname);
return ret;
}
@@ -298,11 +333,10 @@
struct f2fs_sb_info *sbi = F2FS_SB(sb);
if (sb_any_quota_loaded(sb) && F2FS_OPTION(sbi).s_qf_names[qtype]) {
- f2fs_msg(sb, KERN_ERR, "Cannot change journaled quota options"
- " when quota turned on");
+ f2fs_err(sbi, "Cannot change journaled quota options when quota turned on");
return -EINVAL;
}
- kfree(F2FS_OPTION(sbi).s_qf_names[qtype]);
+ kvfree(F2FS_OPTION(sbi).s_qf_names[qtype]);
F2FS_OPTION(sbi).s_qf_names[qtype] = NULL;
return 0;
}
@@ -314,9 +348,8 @@
* 'grpquota' mount options are allowed even without quota feature
* to support legacy quotas in quota files.
*/
- if (test_opt(sbi, PRJQUOTA) && !f2fs_sb_has_project_quota(sbi->sb)) {
- f2fs_msg(sbi->sb, KERN_ERR, "Project quota feature not enabled. "
- "Cannot enable project quota enforcement.");
+ if (test_opt(sbi, PRJQUOTA) && !f2fs_sb_has_project_quota(sbi)) {
+ f2fs_err(sbi, "Project quota feature not enabled. Cannot enable project quota enforcement.");
return -1;
}
if (F2FS_OPTION(sbi).s_qf_names[USRQUOTA] ||
@@ -336,21 +369,18 @@
if (test_opt(sbi, GRPQUOTA) || test_opt(sbi, USRQUOTA) ||
test_opt(sbi, PRJQUOTA)) {
- f2fs_msg(sbi->sb, KERN_ERR, "old and new quota "
- "format mixing");
+ f2fs_err(sbi, "old and new quota format mixing");
return -1;
}
if (!F2FS_OPTION(sbi).s_jquota_fmt) {
- f2fs_msg(sbi->sb, KERN_ERR, "journaled quota format "
- "not specified");
+ f2fs_err(sbi, "journaled quota format not specified");
return -1;
}
}
- if (f2fs_sb_has_quota_ino(sbi->sb) && F2FS_OPTION(sbi).s_jquota_fmt) {
- f2fs_msg(sbi->sb, KERN_INFO,
- "QUOTA feature is enabled, so ignore jquota_fmt");
+ if (f2fs_sb_has_quota_ino(sbi) && F2FS_OPTION(sbi).s_jquota_fmt) {
+ f2fs_info(sbi, "QUOTA feature is enabled, so ignore jquota_fmt");
F2FS_OPTION(sbi).s_jquota_fmt = 0;
}
return 0;
@@ -360,7 +390,6 @@
static int parse_options(struct super_block *sb, char *options)
{
struct f2fs_sb_info *sbi = F2FS_SB(sb);
- struct request_queue *q;
substring_t args[MAX_OPT_ARGS];
char *p, *name;
int arg = 0;
@@ -400,10 +429,10 @@
set_opt(sbi, BG_GC);
set_opt(sbi, FORCE_FG_GC);
} else {
- kfree(name);
+ kvfree(name);
return -EINVAL;
}
- kfree(name);
+ kvfree(name);
break;
case Opt_disable_roll_forward:
set_opt(sbi, DISABLE_ROLL_FORWARD);
@@ -415,19 +444,11 @@
return -EINVAL;
break;
case Opt_discard:
- q = bdev_get_queue(sb->s_bdev);
- if (blk_queue_discard(q)) {
- set_opt(sbi, DISCARD);
- } else if (!f2fs_sb_has_blkzoned(sb)) {
- f2fs_msg(sb, KERN_WARNING,
- "mounting with \"discard\" option, but "
- "the device does not support discard");
- }
+ set_opt(sbi, DISCARD);
break;
case Opt_nodiscard:
- if (f2fs_sb_has_blkzoned(sb)) {
- f2fs_msg(sb, KERN_WARNING,
- "discard is required for zoned block devices");
+ if (f2fs_sb_has_blkzoned(sbi)) {
+ f2fs_warn(sbi, "discard is required for zoned block devices");
return -EINVAL;
}
clear_opt(sbi, DISCARD);
@@ -459,20 +480,16 @@
break;
#else
case Opt_user_xattr:
- f2fs_msg(sb, KERN_INFO,
- "user_xattr options not supported");
+ f2fs_info(sbi, "user_xattr options not supported");
break;
case Opt_nouser_xattr:
- f2fs_msg(sb, KERN_INFO,
- "nouser_xattr options not supported");
+ f2fs_info(sbi, "nouser_xattr options not supported");
break;
case Opt_inline_xattr:
- f2fs_msg(sb, KERN_INFO,
- "inline_xattr options not supported");
+ f2fs_info(sbi, "inline_xattr options not supported");
break;
case Opt_noinline_xattr:
- f2fs_msg(sb, KERN_INFO,
- "noinline_xattr options not supported");
+ f2fs_info(sbi, "noinline_xattr options not supported");
break;
#endif
#ifdef CONFIG_F2FS_FS_POSIX_ACL
@@ -484,10 +501,10 @@
break;
#else
case Opt_acl:
- f2fs_msg(sb, KERN_INFO, "acl options not supported");
+ f2fs_info(sbi, "acl options not supported");
break;
case Opt_noacl:
- f2fs_msg(sb, KERN_INFO, "noacl options not supported");
+ f2fs_info(sbi, "noacl options not supported");
break;
#endif
case Opt_active_logs:
@@ -537,9 +554,8 @@
if (args->from && match_int(args, &arg))
return -EINVAL;
if (test_opt(sbi, RESERVE_ROOT)) {
- f2fs_msg(sb, KERN_INFO,
- "Preserve previous reserve_root=%u",
- F2FS_OPTION(sbi).root_reserved_blocks);
+ f2fs_info(sbi, "Preserve previous reserve_root=%u",
+ F2FS_OPTION(sbi).root_reserved_blocks);
} else {
F2FS_OPTION(sbi).root_reserved_blocks = arg;
set_opt(sbi, RESERVE_ROOT);
@@ -550,8 +566,7 @@
return -EINVAL;
uid = make_kuid(current_user_ns(), arg);
if (!uid_valid(uid)) {
- f2fs_msg(sb, KERN_ERR,
- "Invalid uid value %d", arg);
+ f2fs_err(sbi, "Invalid uid value %d", arg);
return -EINVAL;
}
F2FS_OPTION(sbi).s_resuid = uid;
@@ -561,8 +576,7 @@
return -EINVAL;
gid = make_kgid(current_user_ns(), arg);
if (!gid_valid(gid)) {
- f2fs_msg(sb, KERN_ERR,
- "Invalid gid value %d", arg);
+ f2fs_err(sbi, "Invalid gid value %d", arg);
return -EINVAL;
}
F2FS_OPTION(sbi).s_resgid = gid;
@@ -574,11 +588,9 @@
return -ENOMEM;
if (strlen(name) == 8 &&
!strncmp(name, "adaptive", 8)) {
- if (f2fs_sb_has_blkzoned(sb)) {
- f2fs_msg(sb, KERN_WARNING,
- "adaptive mode is not allowed with "
- "zoned block device feature");
- kfree(name);
+ if (f2fs_sb_has_blkzoned(sbi)) {
+ f2fs_warn(sbi, "adaptive mode is not allowed with zoned block device feature");
+ kvfree(name);
return -EINVAL;
}
set_opt_mode(sbi, F2FS_MOUNT_ADAPTIVE);
@@ -586,44 +598,44 @@
!strncmp(name, "lfs", 3)) {
set_opt_mode(sbi, F2FS_MOUNT_LFS);
} else {
- kfree(name);
+ kvfree(name);
return -EINVAL;
}
- kfree(name);
+ kvfree(name);
break;
case Opt_io_size_bits:
if (args->from && match_int(args, &arg))
return -EINVAL;
- if (arg > __ilog2_u32(BIO_MAX_PAGES)) {
- f2fs_msg(sb, KERN_WARNING,
- "Not support %d, larger than %d",
- 1 << arg, BIO_MAX_PAGES);
+ if (arg <= 0 || arg > __ilog2_u32(BIO_MAX_PAGES)) {
+ f2fs_warn(sbi, "Not support %d, larger than %d",
+ 1 << arg, BIO_MAX_PAGES);
return -EINVAL;
}
F2FS_OPTION(sbi).write_io_size_bits = arg;
break;
+#ifdef CONFIG_F2FS_FAULT_INJECTION
case Opt_fault_injection:
if (args->from && match_int(args, &arg))
return -EINVAL;
-#ifdef CONFIG_F2FS_FAULT_INJECTION
f2fs_build_fault_attr(sbi, arg, F2FS_ALL_FAULT_TYPE);
set_opt(sbi, FAULT_INJECTION);
-#else
- f2fs_msg(sb, KERN_INFO,
- "FAULT_INJECTION was not selected");
-#endif
break;
+
case Opt_fault_type:
if (args->from && match_int(args, &arg))
return -EINVAL;
-#ifdef CONFIG_F2FS_FAULT_INJECTION
f2fs_build_fault_attr(sbi, 0, arg);
set_opt(sbi, FAULT_INJECTION);
-#else
- f2fs_msg(sb, KERN_INFO,
- "FAULT_INJECTION was not selected");
-#endif
break;
+#else
+ case Opt_fault_injection:
+ f2fs_info(sbi, "fault_injection options not supported");
+ break;
+
+ case Opt_fault_type:
+ f2fs_info(sbi, "fault_type options not supported");
+ break;
+#endif
case Opt_lazytime:
sb->s_flags |= SB_LAZYTIME;
break;
@@ -701,8 +713,7 @@
case Opt_jqfmt_vfsv0:
case Opt_jqfmt_vfsv1:
case Opt_noquota:
- f2fs_msg(sb, KERN_INFO,
- "quota operations not supported");
+ f2fs_info(sbi, "quota operations not supported");
break;
#endif
case Opt_whint:
@@ -719,10 +730,10 @@
!strncmp(name, "fs-based", 8)) {
F2FS_OPTION(sbi).whint_mode = WHINT_MODE_FS;
} else {
- kfree(name);
+ kvfree(name);
return -EINVAL;
}
- kfree(name);
+ kvfree(name);
break;
case Opt_alloc:
name = match_strdup(&args[0]);
@@ -736,10 +747,10 @@
!strncmp(name, "reuse", 5)) {
F2FS_OPTION(sbi).alloc_mode = ALLOC_MODE_REUSE;
} else {
- kfree(name);
+ kvfree(name);
return -EINVAL;
}
- kfree(name);
+ kvfree(name);
break;
case Opt_fsync:
name = match_strdup(&args[0]);
@@ -756,30 +767,52 @@
F2FS_OPTION(sbi).fsync_mode =
FSYNC_MODE_NOBARRIER;
} else {
- kfree(name);
+ kvfree(name);
return -EINVAL;
}
- kfree(name);
+ kvfree(name);
break;
case Opt_test_dummy_encryption:
-#ifdef CONFIG_F2FS_FS_ENCRYPTION
- if (!f2fs_sb_has_encrypt(sb)) {
- f2fs_msg(sb, KERN_ERR, "Encrypt feature is off");
+#ifdef CONFIG_FS_ENCRYPTION
+ if (!f2fs_sb_has_encrypt(sbi)) {
+ f2fs_err(sbi, "Encrypt feature is off");
return -EINVAL;
}
F2FS_OPTION(sbi).test_dummy_encryption = true;
- f2fs_msg(sb, KERN_INFO,
- "Test dummy encryption mode enabled");
+ f2fs_info(sbi, "Test dummy encryption mode enabled");
#else
- f2fs_msg(sb, KERN_INFO,
- "Test dummy encryption mount option ignored");
+ f2fs_info(sbi, "Test dummy encryption mount option ignored");
#endif
break;
+ case Opt_checkpoint_disable_cap_perc:
+ if (args->from && match_int(args, &arg))
+ return -EINVAL;
+ if (arg < 0 || arg > 100)
+ return -EINVAL;
+ if (arg == 100)
+ F2FS_OPTION(sbi).unusable_cap =
+ sbi->user_block_count;
+ else
+ F2FS_OPTION(sbi).unusable_cap =
+ (sbi->user_block_count / 100) * arg;
+ set_opt(sbi, DISABLE_CHECKPOINT);
+ break;
+ case Opt_checkpoint_disable_cap:
+ if (args->from && match_int(args, &arg))
+ return -EINVAL;
+ F2FS_OPTION(sbi).unusable_cap = arg;
+ set_opt(sbi, DISABLE_CHECKPOINT);
+ break;
+ case Opt_checkpoint_disable:
+ set_opt(sbi, DISABLE_CHECKPOINT);
+ break;
+ case Opt_checkpoint_enable:
+ clear_opt(sbi, DISABLE_CHECKPOINT);
+ break;
default:
- f2fs_msg(sb, KERN_ERR,
- "Unrecognized mount option \"%s\" or missing value",
- p);
+ f2fs_err(sbi, "Unrecognized mount option \"%s\" or missing value",
+ p);
return -EINVAL;
}
}
@@ -787,53 +820,58 @@
if (f2fs_check_quota_options(sbi))
return -EINVAL;
#else
- if (f2fs_sb_has_quota_ino(sbi->sb) && !f2fs_readonly(sbi->sb)) {
- f2fs_msg(sbi->sb, KERN_INFO,
- "Filesystem with quota feature cannot be mounted RDWR "
- "without CONFIG_QUOTA");
+ if (f2fs_sb_has_quota_ino(sbi) && !f2fs_readonly(sbi->sb)) {
+ f2fs_info(sbi, "Filesystem with quota feature cannot be mounted RDWR without CONFIG_QUOTA");
return -EINVAL;
}
- if (f2fs_sb_has_project_quota(sbi->sb) && !f2fs_readonly(sbi->sb)) {
- f2fs_msg(sb, KERN_ERR,
- "Filesystem with project quota feature cannot be "
- "mounted RDWR without CONFIG_QUOTA");
+ if (f2fs_sb_has_project_quota(sbi) && !f2fs_readonly(sbi->sb)) {
+ f2fs_err(sbi, "Filesystem with project quota feature cannot be mounted RDWR without CONFIG_QUOTA");
+ return -EINVAL;
+ }
+#endif
+#ifndef CONFIG_UNICODE
+ if (f2fs_sb_has_casefold(sbi)) {
+ f2fs_err(sbi,
+ "Filesystem with casefold feature cannot be mounted without CONFIG_UNICODE");
return -EINVAL;
}
#endif
if (F2FS_IO_SIZE_BITS(sbi) && !test_opt(sbi, LFS)) {
- f2fs_msg(sb, KERN_ERR,
- "Should set mode=lfs with %uKB-sized IO",
- F2FS_IO_SIZE_KB(sbi));
+ f2fs_err(sbi, "Should set mode=lfs with %uKB-sized IO",
+ F2FS_IO_SIZE_KB(sbi));
return -EINVAL;
}
if (test_opt(sbi, INLINE_XATTR_SIZE)) {
- if (!f2fs_sb_has_extra_attr(sb) ||
- !f2fs_sb_has_flexible_inline_xattr(sb)) {
- f2fs_msg(sb, KERN_ERR,
- "extra_attr or flexible_inline_xattr "
- "feature is off");
+ int min_size, max_size;
+
+ if (!f2fs_sb_has_extra_attr(sbi) ||
+ !f2fs_sb_has_flexible_inline_xattr(sbi)) {
+ f2fs_err(sbi, "extra_attr or flexible_inline_xattr feature is off");
return -EINVAL;
}
if (!test_opt(sbi, INLINE_XATTR)) {
- f2fs_msg(sb, KERN_ERR,
- "inline_xattr_size option should be "
- "set with inline_xattr option");
+ f2fs_err(sbi, "inline_xattr_size option should be set with inline_xattr option");
return -EINVAL;
}
- if (!F2FS_OPTION(sbi).inline_xattr_size ||
- F2FS_OPTION(sbi).inline_xattr_size >=
- DEF_ADDRS_PER_INODE -
- F2FS_TOTAL_EXTRA_ATTR_SIZE -
- DEF_INLINE_RESERVED_SIZE -
- DEF_MIN_INLINE_SIZE) {
- f2fs_msg(sb, KERN_ERR,
- "inline xattr size is out of range");
+
+ min_size = sizeof(struct f2fs_xattr_header) / sizeof(__le32);
+ max_size = MAX_INLINE_XATTR_SIZE;
+
+ if (F2FS_OPTION(sbi).inline_xattr_size < min_size ||
+ F2FS_OPTION(sbi).inline_xattr_size > max_size) {
+ f2fs_err(sbi, "inline xattr size is out of range: %d ~ %d",
+ min_size, max_size);
return -EINVAL;
}
}
+ if (test_opt(sbi, DISABLE_CHECKPOINT) && test_opt(sbi, LFS)) {
+ f2fs_err(sbi, "LFS not compatible with checkpoint=disable\n");
+ return -EINVAL;
+ }
+
/* Not pass down write hints if the number of active logs is lesser
* than NR_CURSEG_TYPE.
*/
@@ -873,7 +911,21 @@
static int f2fs_drop_inode(struct inode *inode)
{
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
int ret;
+
+ /*
+ * during filesystem shutdown, if checkpoint is disabled,
+ * drop useless meta/node dirty pages.
+ */
+ if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) {
+ if (inode->i_ino == F2FS_NODE_INO(sbi) ||
+ inode->i_ino == F2FS_META_INO(sbi)) {
+ trace_f2fs_drop_inode(inode, 1);
+ return 1;
+ }
+ }
+
/*
* This is to avoid a deadlock condition like below.
* writeback_single_inode(inode)
@@ -897,6 +949,10 @@
sb_start_intwrite(inode->i_sb);
f2fs_i_size_write(inode, 0);
+ f2fs_submit_merged_write_cond(F2FS_I_SB(inode),
+ inode, NULL, 0, DATA);
+ truncate_inode_pages_final(inode->i_mapping);
+
if (F2FS_HAS_BLOCKS(inode))
f2fs_truncate(inode);
@@ -909,6 +965,8 @@
return 0;
}
ret = generic_drop_inode(inode);
+ if (!ret)
+ ret = fscrypt_drop_inode(inode);
trace_f2fs_drop_inode(inode, ret);
return ret;
}
@@ -975,17 +1033,12 @@
f2fs_inode_dirtied(inode, false);
}
-static void f2fs_i_callback(struct rcu_head *head)
+static void f2fs_free_inode(struct inode *inode)
{
- struct inode *inode = container_of(head, struct inode, i_rcu);
+ fscrypt_free_inode(inode);
kmem_cache_free(f2fs_inode_cachep, F2FS_I(inode));
}
-static void f2fs_destroy_inode(struct inode *inode)
-{
- call_rcu(&inode->i_rcu, f2fs_i_callback);
-}
-
static void destroy_percpu_info(struct f2fs_sb_info *sbi)
{
percpu_counter_destroy(&sbi->alloc_valid_block_count);
@@ -999,10 +1052,10 @@
for (i = 0; i < sbi->s_ndevs; i++) {
blkdev_put(FDEV(i).bdev, FMODE_EXCL);
#ifdef CONFIG_BLK_DEV_ZONED
- kfree(FDEV(i).blkz_type);
+ kvfree(FDEV(i).blkz_seq);
#endif
}
- kfree(sbi->devs);
+ kvfree(sbi->devs);
}
static void f2fs_put_super(struct super_block *sb)
@@ -1021,8 +1074,8 @@
* But, the previous checkpoint was not done by umount, it needs to do
* clean checkpoint again.
*/
- if (is_sbi_flag_set(sbi, SBI_IS_DIRTY) ||
- !is_set_ckpt_flags(sbi, CP_UMOUNT_FLAG)) {
+ if ((is_sbi_flag_set(sbi, SBI_IS_DIRTY) ||
+ !is_set_ckpt_flags(sbi, CP_UMOUNT_FLAG))) {
struct cp_control cpc = {
.reason = CP_UMOUNT,
};
@@ -1030,18 +1083,16 @@
}
/* be sure to wait for any on-going discard commands */
- dropped = f2fs_wait_discard_bios(sbi);
+ dropped = f2fs_issue_discard_timeout(sbi);
- if (f2fs_discard_en(sbi) && !sbi->discard_blks && !dropped) {
+ if ((f2fs_hw_support_discard(sbi) || f2fs_hw_should_discard(sbi)) &&
+ !sbi->discard_blks && !dropped) {
struct cp_control cpc = {
.reason = CP_UMOUNT | CP_TRIMMED,
};
f2fs_write_checkpoint(sbi, &cpc);
}
- /* f2fs_write_checkpoint can update stat informaion */
- f2fs_destroy_stats(sbi);
-
/*
* normally superblock is clean, so we need to release this.
* In addition, EIO will skip do checkpoint, we need this as well.
@@ -1059,31 +1110,43 @@
f2fs_bug_on(sbi, sbi->fsync_node_num);
iput(sbi->node_inode);
+ sbi->node_inode = NULL;
+
iput(sbi->meta_inode);
+ sbi->meta_inode = NULL;
+
+ /*
+ * iput() can update stat information, if f2fs_write_checkpoint()
+ * above failed with error.
+ */
+ f2fs_destroy_stats(sbi);
/* destroy f2fs internal modules */
f2fs_destroy_node_manager(sbi);
f2fs_destroy_segment_manager(sbi);
- kfree(sbi->ckpt);
+ kvfree(sbi->ckpt);
f2fs_unregister_sysfs(sbi);
sb->s_fs_info = NULL;
if (sbi->s_chksum_driver)
crypto_free_shash(sbi->s_chksum_driver);
- kfree(sbi->raw_super);
+ kvfree(sbi->raw_super);
destroy_device_list(sbi);
mempool_destroy(sbi->write_io_dummy);
#ifdef CONFIG_QUOTA
for (i = 0; i < MAXQUOTAS; i++)
- kfree(F2FS_OPTION(sbi).s_qf_names[i]);
+ kvfree(F2FS_OPTION(sbi).s_qf_names[i]);
#endif
destroy_percpu_info(sbi);
for (i = 0; i < NR_PAGE_TYPE; i++)
- kfree(sbi->write_io[i]);
- kfree(sbi);
+ kvfree(sbi->write_io[i]);
+#ifdef CONFIG_UNICODE
+ utf8_unload(sbi->s_encoding);
+#endif
+ kvfree(sbi);
}
int f2fs_sync_fs(struct super_block *sb, int sync)
@@ -1093,6 +1156,8 @@
if (unlikely(f2fs_cp_error(sbi)))
return 0;
+ if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
+ return 0;
trace_f2fs_sync_fs(sb, sync);
@@ -1192,14 +1257,21 @@
buf->f_blocks = total_count - start_count;
buf->f_bfree = user_block_count - valid_user_blocks(sbi) -
sbi->current_reserved_blocks;
+
+ spin_lock(&sbi->stat_lock);
+ if (unlikely(buf->f_bfree <= sbi->unusable_block_count))
+ buf->f_bfree = 0;
+ else
+ buf->f_bfree -= sbi->unusable_block_count;
+ spin_unlock(&sbi->stat_lock);
+
if (buf->f_bfree > F2FS_OPTION(sbi).root_reserved_blocks)
buf->f_bavail = buf->f_bfree -
F2FS_OPTION(sbi).root_reserved_blocks;
else
buf->f_bavail = 0;
- avail_node_count = sbi->total_node_count - sbi->nquota_files -
- F2FS_RESERVED_NODE_NUM;
+ avail_node_count = sbi->total_node_count - F2FS_RESERVED_NODE_NUM;
if (avail_node_count > user_block_count) {
buf->f_files = user_block_count;
@@ -1276,6 +1348,8 @@
seq_puts(seq, ",disable_roll_forward");
if (test_opt(sbi, DISCARD))
seq_puts(seq, ",discard");
+ else
+ seq_puts(seq, ",nodiscard");
if (test_opt(sbi, NOHEAP))
seq_puts(seq, ",no_heap");
else
@@ -1336,7 +1410,8 @@
from_kgid_munged(&init_user_ns,
F2FS_OPTION(sbi).s_resgid));
if (F2FS_IO_SIZE_BITS(sbi))
- seq_printf(seq, ",io_size=%uKB", F2FS_IO_SIZE_KB(sbi));
+ seq_printf(seq, ",io_bits=%u",
+ F2FS_OPTION(sbi).write_io_size_bits);
#ifdef CONFIG_F2FS_FAULT_INJECTION
if (test_opt(sbi, FAULT_INJECTION)) {
seq_printf(seq, ",fault_injection=%u",
@@ -1360,7 +1435,7 @@
seq_printf(seq, ",whint_mode=%s", "user-based");
else if (F2FS_OPTION(sbi).whint_mode == WHINT_MODE_FS)
seq_printf(seq, ",whint_mode=%s", "fs-based");
-#ifdef CONFIG_F2FS_FS_ENCRYPTION
+#ifdef CONFIG_FS_ENCRYPTION
if (F2FS_OPTION(sbi).test_dummy_encryption)
seq_puts(seq, ",test_dummy_encryption");
#endif
@@ -1370,6 +1445,9 @@
else if (F2FS_OPTION(sbi).alloc_mode == ALLOC_MODE_REUSE)
seq_printf(seq, ",alloc_mode=%s", "reuse");
+ if (test_opt(sbi, DISABLE_CHECKPOINT))
+ seq_printf(seq, ",checkpoint=disable:%u",
+ F2FS_OPTION(sbi).unusable_cap);
if (F2FS_OPTION(sbi).fsync_mode == FSYNC_MODE_POSIX)
seq_printf(seq, ",fsync_mode=%s", "posix");
else if (F2FS_OPTION(sbi).fsync_mode == FSYNC_MODE_STRICT)
@@ -1397,11 +1475,12 @@
set_opt(sbi, INLINE_DENTRY);
set_opt(sbi, EXTENT_CACHE);
set_opt(sbi, NOHEAP);
+ clear_opt(sbi, DISABLE_CHECKPOINT);
+ F2FS_OPTION(sbi).unusable_cap = 0;
sbi->sb->s_flags |= SB_LAZYTIME;
set_opt(sbi, FLUSH_MERGE);
- if (blk_queue_discard(bdev_get_queue(sbi->sb->s_bdev)))
- set_opt(sbi, DISCARD);
- if (f2fs_sb_has_blkzoned(sbi->sb))
+ set_opt(sbi, DISCARD);
+ if (f2fs_sb_has_blkzoned(sbi))
set_opt_mode(sbi, F2FS_MOUNT_LFS);
else
set_opt_mode(sbi, F2FS_MOUNT_ADAPTIVE);
@@ -1419,6 +1498,76 @@
#ifdef CONFIG_QUOTA
static int f2fs_enable_quotas(struct super_block *sb);
#endif
+
+static int f2fs_disable_checkpoint(struct f2fs_sb_info *sbi)
+{
+ unsigned int s_flags = sbi->sb->s_flags;
+ struct cp_control cpc;
+ int err = 0;
+ int ret;
+ block_t unusable;
+
+ if (s_flags & SB_RDONLY) {
+ f2fs_err(sbi, "checkpoint=disable on readonly fs");
+ return -EINVAL;
+ }
+ sbi->sb->s_flags |= SB_ACTIVE;
+
+ f2fs_update_time(sbi, DISABLE_TIME);
+
+ while (!f2fs_time_over(sbi, DISABLE_TIME)) {
+ mutex_lock(&sbi->gc_mutex);
+ err = f2fs_gc(sbi, true, false, NULL_SEGNO);
+ if (err == -ENODATA) {
+ err = 0;
+ break;
+ }
+ if (err && err != -EAGAIN)
+ break;
+ }
+
+ ret = sync_filesystem(sbi->sb);
+ if (ret || err) {
+ err = ret ? ret: err;
+ goto restore_flag;
+ }
+
+ unusable = f2fs_get_unusable_blocks(sbi);
+ if (f2fs_disable_cp_again(sbi, unusable)) {
+ err = -EAGAIN;
+ goto restore_flag;
+ }
+
+ mutex_lock(&sbi->gc_mutex);
+ cpc.reason = CP_PAUSE;
+ set_sbi_flag(sbi, SBI_CP_DISABLED);
+ err = f2fs_write_checkpoint(sbi, &cpc);
+ if (err)
+ goto out_unlock;
+
+ spin_lock(&sbi->stat_lock);
+ sbi->unusable_block_count = unusable;
+ spin_unlock(&sbi->stat_lock);
+
+out_unlock:
+ mutex_unlock(&sbi->gc_mutex);
+restore_flag:
+ sbi->sb->s_flags = s_flags; /* Restore MS_RDONLY status */
+ return err;
+}
+
+static void f2fs_enable_checkpoint(struct f2fs_sb_info *sbi)
+{
+ mutex_lock(&sbi->gc_mutex);
+ f2fs_dirty_to_prefree(sbi);
+
+ clear_sbi_flag(sbi, SBI_CP_DISABLED);
+ set_sbi_flag(sbi, SBI_IS_DIRTY);
+ mutex_unlock(&sbi->gc_mutex);
+
+ f2fs_sync_fs(sbi->sb, 1);
+}
+
static int f2fs_remount(struct super_block *sb, int *flags, char *data)
{
struct f2fs_sb_info *sbi = F2FS_SB(sb);
@@ -1428,6 +1577,9 @@
bool need_restart_gc = false;
bool need_stop_gc = false;
bool no_extent_cache = !test_opt(sbi, EXTENT_CACHE);
+ bool disable_checkpoint = test_opt(sbi, DISABLE_CHECKPOINT);
+ bool no_io_align = !F2FS_IO_ALIGNED(sbi);
+ bool checkpoint_changed;
#ifdef CONFIG_QUOTA
int i, j;
#endif
@@ -1448,7 +1600,7 @@
GFP_KERNEL);
if (!org_mount_opt.s_qf_names[i]) {
for (j = 0; j < i; j++)
- kfree(org_mount_opt.s_qf_names[j]);
+ kvfree(org_mount_opt.s_qf_names[j]);
return -ENOMEM;
}
} else {
@@ -1460,8 +1612,8 @@
/* recover superblocks we couldn't write due to previous RO mount */
if (!(*flags & SB_RDONLY) && is_sbi_flag_set(sbi, SBI_NEED_SB_WRITE)) {
err = f2fs_commit_super(sbi, false);
- f2fs_msg(sb, KERN_INFO,
- "Try to recover all the superblocks, ret: %d", err);
+ f2fs_info(sbi, "Try to recover all the superblocks, ret: %d",
+ err);
if (!err)
clear_sbi_flag(sbi, SBI_NEED_SB_WRITE);
}
@@ -1472,6 +1624,8 @@
err = parse_options(sb, data);
if (err)
goto restore_opts;
+ checkpoint_changed =
+ disable_checkpoint != test_opt(sbi, DISABLE_CHECKPOINT);
/*
* Previous and new state of filesystem is RO,
@@ -1485,12 +1639,12 @@
err = dquot_suspend(sb, -1);
if (err < 0)
goto restore_opts;
- } else if (f2fs_readonly(sb) && !(*flags & MS_RDONLY)) {
+ } else if (f2fs_readonly(sb) && !(*flags & SB_RDONLY)) {
/* dquot_resume needs RW */
sb->s_flags &= ~SB_RDONLY;
if (sb_any_quota_suspended(sb)) {
dquot_resume(sb, -1);
- } else if (f2fs_sb_has_quota_ino(sb)) {
+ } else if (f2fs_sb_has_quota_ino(sbi)) {
err = f2fs_enable_quotas(sb);
if (err)
goto restore_opts;
@@ -1500,8 +1654,19 @@
/* disallow enable/disable extent_cache dynamically */
if (no_extent_cache == !!test_opt(sbi, EXTENT_CACHE)) {
err = -EINVAL;
- f2fs_msg(sbi->sb, KERN_WARNING,
- "switch extent_cache option is not allowed");
+ f2fs_warn(sbi, "switch extent_cache option is not allowed");
+ goto restore_opts;
+ }
+
+ if (no_io_align == !!F2FS_IO_ALIGNED(sbi)) {
+ err = -EINVAL;
+ f2fs_warn(sbi, "switch io_bits option is not allowed");
+ goto restore_opts;
+ }
+
+ if ((*flags & SB_RDONLY) && test_opt(sbi, DISABLE_CHECKPOINT)) {
+ err = -EINVAL;
+ f2fs_warn(sbi, "disabling checkpoint not compatible with read-only");
goto restore_opts;
}
@@ -1533,6 +1698,16 @@
clear_sbi_flag(sbi, SBI_IS_CLOSE);
}
+ if (checkpoint_changed) {
+ if (test_opt(sbi, DISABLE_CHECKPOINT)) {
+ err = f2fs_disable_checkpoint(sbi);
+ if (err)
+ goto restore_gc;
+ } else {
+ f2fs_enable_checkpoint(sbi);
+ }
+ }
+
/*
* We stop issue flush thread if FS is mounted as RO
* or if flush_merge is not passed in mount option.
@@ -1549,19 +1724,19 @@
#ifdef CONFIG_QUOTA
/* Release old quota file names */
for (i = 0; i < MAXQUOTAS; i++)
- kfree(org_mount_opt.s_qf_names[i]);
+ kvfree(org_mount_opt.s_qf_names[i]);
#endif
/* Update the POSIXACL Flag */
sb->s_flags = (sb->s_flags & ~SB_POSIXACL) |
(test_opt(sbi, POSIX_ACL) ? SB_POSIXACL : 0);
limit_reserve_root(sbi);
+ *flags = (*flags & ~SB_LAZYTIME) | (sb->s_flags & SB_LAZYTIME);
return 0;
restore_gc:
if (need_restart_gc) {
if (f2fs_start_gc_thread(sbi))
- f2fs_msg(sbi->sb, KERN_WARNING,
- "background gc thread has stopped");
+ f2fs_warn(sbi, "background gc thread has stopped");
} else if (need_stop_gc) {
f2fs_stop_gc_thread(sbi);
}
@@ -1569,7 +1744,7 @@
#ifdef CONFIG_QUOTA
F2FS_OPTION(sbi).s_jquota_fmt = org_mount_opt.s_jquota_fmt;
for (i = 0; i < MAXQUOTAS; i++) {
- kfree(F2FS_OPTION(sbi).s_qf_names[i]);
+ kvfree(F2FS_OPTION(sbi).s_qf_names[i]);
F2FS_OPTION(sbi).s_qf_names[i] = org_mount_opt.s_qf_names[i];
}
#endif
@@ -1608,6 +1783,7 @@
congestion_wait(BLK_RW_ASYNC, HZ/50);
goto repeat;
}
+ set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR);
return PTR_ERR(page);
}
@@ -1619,6 +1795,7 @@
}
if (unlikely(!PageUptodate(page))) {
f2fs_put_page(page, 1);
+ set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR);
return -EIO;
}
@@ -1660,6 +1837,7 @@
congestion_wait(BLK_RW_ASYNC, HZ/50);
goto retry;
}
+ set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR);
break;
}
@@ -1696,6 +1874,11 @@
static int f2fs_quota_on_mount(struct f2fs_sb_info *sbi, int type)
{
+ if (is_set_ckpt_flags(sbi, CP_QUOTA_NEED_FSCK_FLAG)) {
+ f2fs_err(sbi, "quota sysfile may be corrupted, skip loading it");
+ return 0;
+ }
+
return dquot_quota_on_mount(sbi->sb, F2FS_OPTION(sbi).s_qf_names[type],
F2FS_OPTION(sbi).s_jquota_fmt, type);
}
@@ -1705,11 +1888,10 @@
int enabled = 0;
int i, err;
- if (f2fs_sb_has_quota_ino(sbi->sb) && rdonly) {
+ if (f2fs_sb_has_quota_ino(sbi) && rdonly) {
err = f2fs_enable_quotas(sbi->sb);
if (err) {
- f2fs_msg(sbi->sb, KERN_ERR,
- "Cannot turn on quota_ino: %d", err);
+ f2fs_err(sbi, "Cannot turn on quota_ino: %d", err);
return 0;
}
return 1;
@@ -1722,8 +1904,8 @@
enabled = 1;
continue;
}
- f2fs_msg(sbi->sb, KERN_ERR,
- "Cannot turn on quotas: %d on %d", err, i);
+ f2fs_err(sbi, "Cannot turn on quotas: %d on %d",
+ err, i);
}
}
return enabled;
@@ -1736,7 +1918,7 @@
unsigned long qf_inum;
int err;
- BUG_ON(!f2fs_sb_has_quota_ino(sb));
+ BUG_ON(!f2fs_sb_has_quota_ino(F2FS_SB(sb)));
qf_inum = f2fs_qf_ino(sb, type);
if (!qf_inum)
@@ -1744,8 +1926,7 @@
qf_inode = f2fs_iget(sb, qf_inum);
if (IS_ERR(qf_inode)) {
- f2fs_msg(sb, KERN_ERR,
- "Bad quota inode %u:%lu", type, qf_inum);
+ f2fs_err(F2FS_SB(sb), "Bad quota inode %u:%lu", type, qf_inum);
return PTR_ERR(qf_inode);
}
@@ -1758,15 +1939,22 @@
static int f2fs_enable_quotas(struct super_block *sb)
{
+ struct f2fs_sb_info *sbi = F2FS_SB(sb);
int type, err = 0;
unsigned long qf_inum;
bool quota_mopt[MAXQUOTAS] = {
- test_opt(F2FS_SB(sb), USRQUOTA),
- test_opt(F2FS_SB(sb), GRPQUOTA),
- test_opt(F2FS_SB(sb), PRJQUOTA),
+ test_opt(sbi, USRQUOTA),
+ test_opt(sbi, GRPQUOTA),
+ test_opt(sbi, PRJQUOTA),
};
- sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE | DQUOT_NOLIST_DIRTY;
+ if (is_set_ckpt_flags(F2FS_SB(sb), CP_QUOTA_NEED_FSCK_FLAG)) {
+ f2fs_err(sbi, "quota file may be corrupted, skip loading it");
+ return 0;
+ }
+
+ sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE;
+
for (type = 0; type < MAXQUOTAS; type++) {
qf_inum = f2fs_qf_ino(sb, type);
if (qf_inum) {
@@ -1774,12 +1962,12 @@
DQUOT_USAGE_ENABLED |
(quota_mopt[type] ? DQUOT_LIMITS_ENABLED : 0));
if (err) {
- f2fs_msg(sb, KERN_ERR,
- "Failed to enable quota tracking "
- "(type=%d, err=%d). Please run "
- "fsck to fix.", type, err);
+ f2fs_err(sbi, "Failed to enable quota tracking (type=%d, err=%d). Please run fsck to fix.",
+ type, err);
for (type--; type >= 0; type--)
dquot_quota_off(sb, type);
+ set_sbi_flag(F2FS_SB(sb),
+ SBI_QUOTA_NEED_REPAIR);
return err;
}
}
@@ -1787,35 +1975,65 @@
return 0;
}
-static int f2fs_quota_sync(struct super_block *sb, int type)
+int f2fs_quota_sync(struct super_block *sb, int type)
{
+ struct f2fs_sb_info *sbi = F2FS_SB(sb);
struct quota_info *dqopt = sb_dqopt(sb);
int cnt;
int ret;
+ /*
+ * do_quotactl
+ * f2fs_quota_sync
+ * down_read(quota_sem)
+ * dquot_writeback_dquots()
+ * f2fs_dquot_commit
+ * block_operation
+ * down_read(quota_sem)
+ */
+ f2fs_lock_op(sbi);
+
+ down_read(&sbi->quota_sem);
ret = dquot_writeback_dquots(sb, type);
if (ret)
- return ret;
+ goto out;
/*
* Now when everything is written we can discard the pagecache so
* that userspace sees the changes.
*/
for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
+ struct address_space *mapping;
+
if (type != -1 && cnt != type)
continue;
if (!sb_has_quota_active(sb, cnt))
continue;
- ret = filemap_write_and_wait(dqopt->files[cnt]->i_mapping);
+ mapping = dqopt->files[cnt]->i_mapping;
+
+ ret = filemap_fdatawrite(mapping);
if (ret)
- return ret;
+ goto out;
+
+ /* if we are using journalled quota */
+ if (is_journalled_quota(sbi))
+ continue;
+
+ ret = filemap_fdatawait(mapping);
+ if (ret)
+ set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR);
inode_lock(dqopt->files[cnt]);
truncate_inode_pages(&dqopt->files[cnt]->i_data, 0);
inode_unlock(dqopt->files[cnt]);
}
- return 0;
+out:
+ if (ret)
+ set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR);
+ up_read(&sbi->quota_sem);
+ f2fs_unlock_op(sbi);
+ return ret;
}
static int f2fs_quota_on(struct super_block *sb, int type, int format_id,
@@ -1824,6 +2042,12 @@
struct inode *inode;
int err;
+ /* if quota sysfile exists, deny enabling quota with specific file */
+ if (f2fs_sb_has_quota_ino(F2FS_SB(sb))) {
+ f2fs_err(F2FS_SB(sb), "quota sysfile already exists");
+ return -EBUSY;
+ }
+
err = f2fs_quota_sync(sb, type);
if (err)
return err;
@@ -1836,15 +2060,14 @@
inode_lock(inode);
F2FS_I(inode)->i_flags |= F2FS_NOATIME_FL | F2FS_IMMUTABLE_FL;
- inode_set_flags(inode, S_NOATIME | S_IMMUTABLE,
- S_NOATIME | S_IMMUTABLE);
+ f2fs_set_inode_flags(inode);
inode_unlock(inode);
f2fs_mark_inode_dirty_sync(inode, false);
return 0;
}
-static int f2fs_quota_off(struct super_block *sb, int type)
+static int __f2fs_quota_off(struct super_block *sb, int type)
{
struct inode *inode = sb_dqopt(sb)->files[type];
int err;
@@ -1857,12 +2080,12 @@
goto out_put;
err = dquot_quota_off(sb, type);
- if (err || f2fs_sb_has_quota_ino(sb))
+ if (err || f2fs_sb_has_quota_ino(F2FS_SB(sb)))
goto out_put;
inode_lock(inode);
F2FS_I(inode)->i_flags &= ~(F2FS_NOATIME_FL | F2FS_IMMUTABLE_FL);
- inode_set_flags(inode, 0, S_NOATIME | S_IMMUTABLE);
+ f2fs_set_inode_flags(inode);
inode_unlock(inode);
f2fs_mark_inode_dirty_sync(inode, false);
out_put:
@@ -1870,23 +2093,125 @@
return err;
}
+static int f2fs_quota_off(struct super_block *sb, int type)
+{
+ struct f2fs_sb_info *sbi = F2FS_SB(sb);
+ int err;
+
+ err = __f2fs_quota_off(sb, type);
+
+ /*
+ * quotactl can shutdown journalled quota, result in inconsistence
+ * between quota record and fs data by following updates, tag the
+ * flag to let fsck be aware of it.
+ */
+ if (is_journalled_quota(sbi))
+ set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR);
+ return err;
+}
+
void f2fs_quota_off_umount(struct super_block *sb)
{
int type;
int err;
for (type = 0; type < MAXQUOTAS; type++) {
- err = f2fs_quota_off(sb, type);
+ err = __f2fs_quota_off(sb, type);
if (err) {
int ret = dquot_quota_off(sb, type);
- f2fs_msg(sb, KERN_ERR,
- "Fail to turn off disk quota "
- "(type: %d, err: %d, ret:%d), Please "
- "run fsck to fix it.", type, err, ret);
- set_sbi_flag(F2FS_SB(sb), SBI_NEED_FSCK);
+ f2fs_err(F2FS_SB(sb), "Fail to turn off disk quota (type: %d, err: %d, ret:%d), Please run fsck to fix it.",
+ type, err, ret);
+ set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR);
}
}
+ /*
+ * In case of checkpoint=disable, we must flush quota blocks.
+ * This can cause NULL exception for node_inode in end_io, since
+ * put_super already dropped it.
+ */
+ sync_filesystem(sb);
+}
+
+static void f2fs_truncate_quota_inode_pages(struct super_block *sb)
+{
+ struct quota_info *dqopt = sb_dqopt(sb);
+ int type;
+
+ for (type = 0; type < MAXQUOTAS; type++) {
+ if (!dqopt->files[type])
+ continue;
+ f2fs_inode_synced(dqopt->files[type]);
+ }
+}
+
+static int f2fs_dquot_commit(struct dquot *dquot)
+{
+ struct f2fs_sb_info *sbi = F2FS_SB(dquot->dq_sb);
+ int ret;
+
+ down_read(&sbi->quota_sem);
+ ret = dquot_commit(dquot);
+ if (ret < 0)
+ set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR);
+ up_read(&sbi->quota_sem);
+ return ret;
+}
+
+static int f2fs_dquot_acquire(struct dquot *dquot)
+{
+ struct f2fs_sb_info *sbi = F2FS_SB(dquot->dq_sb);
+ int ret;
+
+ down_read(&sbi->quota_sem);
+ ret = dquot_acquire(dquot);
+ if (ret < 0)
+ set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR);
+ up_read(&sbi->quota_sem);
+ return ret;
+}
+
+static int f2fs_dquot_release(struct dquot *dquot)
+{
+ struct f2fs_sb_info *sbi = F2FS_SB(dquot->dq_sb);
+ int ret;
+
+ down_read(&sbi->quota_sem);
+ ret = dquot_release(dquot);
+ if (ret < 0)
+ set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR);
+ up_read(&sbi->quota_sem);
+ return ret;
+}
+
+static int f2fs_dquot_mark_dquot_dirty(struct dquot *dquot)
+{
+ struct super_block *sb = dquot->dq_sb;
+ struct f2fs_sb_info *sbi = F2FS_SB(sb);
+ int ret;
+
+ down_read(&sbi->quota_sem);
+ ret = dquot_mark_dquot_dirty(dquot);
+
+ /* if we are using journalled quota */
+ if (is_journalled_quota(sbi))
+ set_sbi_flag(sbi, SBI_QUOTA_NEED_FLUSH);
+
+ up_read(&sbi->quota_sem);
+ return ret;
+}
+
+static int f2fs_dquot_commit_info(struct super_block *sb, int type)
+{
+ struct f2fs_sb_info *sbi = F2FS_SB(sb);
+ int ret;
+
+ down_read(&sbi->quota_sem);
+ ret = dquot_commit_info(sb, type);
+ if (ret < 0)
+ set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR);
+ up_read(&sbi->quota_sem);
+ return ret;
}
static int f2fs_get_projid(struct inode *inode, kprojid_t *projid)
@@ -1897,11 +2222,11 @@
static const struct dquot_operations f2fs_quota_operations = {
.get_reserved_space = f2fs_get_reserved_space,
- .write_dquot = dquot_commit,
- .acquire_dquot = dquot_acquire,
- .release_dquot = dquot_release,
- .mark_dirty = dquot_mark_dquot_dirty,
- .write_info = dquot_commit_info,
+ .write_dquot = f2fs_dquot_commit,
+ .acquire_dquot = f2fs_dquot_acquire,
+ .release_dquot = f2fs_dquot_release,
+ .mark_dirty = f2fs_dquot_mark_dquot_dirty,
+ .write_info = f2fs_dquot_commit_info,
.alloc_dquot = dquot_alloc,
.destroy_dquot = dquot_destroy,
.get_projid = f2fs_get_projid,
@@ -1919,6 +2244,11 @@
.get_nextdqblk = dquot_get_next_dqblk,
};
#else
+int f2fs_quota_sync(struct super_block *sb, int type)
+{
+ return 0;
+}
+
void f2fs_quota_off_umount(struct super_block *sb)
{
}
@@ -1926,8 +2256,8 @@
static const struct super_operations f2fs_sops = {
.alloc_inode = f2fs_alloc_inode,
+ .free_inode = f2fs_free_inode,
.drop_inode = f2fs_drop_inode,
- .destroy_inode = f2fs_destroy_inode,
.write_inode = f2fs_write_inode,
.dirty_inode = f2fs_dirty_inode,
.show_options = f2fs_show_options,
@@ -1945,7 +2275,7 @@
.remount_fs = f2fs_remount,
};
-#ifdef CONFIG_F2FS_FS_ENCRYPTION
+#ifdef CONFIG_FS_ENCRYPTION
static int f2fs_get_context(struct inode *inode, void *ctx, size_t len)
{
return f2fs_getxattr(inode, F2FS_XATTR_INDEX_ENCRYPTION,
@@ -1964,7 +2294,7 @@
* if LOST_FOUND feature is enabled.
*
*/
- if (f2fs_sb_has_lost_found(sbi->sb) &&
+ if (f2fs_sb_has_lost_found(sbi) &&
inode->i_ino == F2FS_ROOT_INO(sbi))
return -EPERM;
@@ -2036,7 +2366,7 @@
static loff_t max_file_blocks(void)
{
loff_t result = 0;
- loff_t leaf_count = ADDRS_PER_BLOCK;
+ loff_t leaf_count = DEF_ADDRS_PER_BLOCK;
/*
* note: previously, result is equal to (DEF_ADDRS_PER_INODE -
@@ -2097,55 +2427,49 @@
(segment_count << log_blocks_per_seg);
if (segment0_blkaddr != cp_blkaddr) {
- f2fs_msg(sb, KERN_INFO,
- "Mismatch start address, segment0(%u) cp_blkaddr(%u)",
- segment0_blkaddr, cp_blkaddr);
+ f2fs_info(sbi, "Mismatch start address, segment0(%u) cp_blkaddr(%u)",
+ segment0_blkaddr, cp_blkaddr);
return true;
}
if (cp_blkaddr + (segment_count_ckpt << log_blocks_per_seg) !=
sit_blkaddr) {
- f2fs_msg(sb, KERN_INFO,
- "Wrong CP boundary, start(%u) end(%u) blocks(%u)",
- cp_blkaddr, sit_blkaddr,
- segment_count_ckpt << log_blocks_per_seg);
+ f2fs_info(sbi, "Wrong CP boundary, start(%u) end(%u) blocks(%u)",
+ cp_blkaddr, sit_blkaddr,
+ segment_count_ckpt << log_blocks_per_seg);
return true;
}
if (sit_blkaddr + (segment_count_sit << log_blocks_per_seg) !=
nat_blkaddr) {
- f2fs_msg(sb, KERN_INFO,
- "Wrong SIT boundary, start(%u) end(%u) blocks(%u)",
- sit_blkaddr, nat_blkaddr,
- segment_count_sit << log_blocks_per_seg);
+ f2fs_info(sbi, "Wrong SIT boundary, start(%u) end(%u) blocks(%u)",
+ sit_blkaddr, nat_blkaddr,
+ segment_count_sit << log_blocks_per_seg);
return true;
}
if (nat_blkaddr + (segment_count_nat << log_blocks_per_seg) !=
ssa_blkaddr) {
- f2fs_msg(sb, KERN_INFO,
- "Wrong NAT boundary, start(%u) end(%u) blocks(%u)",
- nat_blkaddr, ssa_blkaddr,
- segment_count_nat << log_blocks_per_seg);
+ f2fs_info(sbi, "Wrong NAT boundary, start(%u) end(%u) blocks(%u)",
+ nat_blkaddr, ssa_blkaddr,
+ segment_count_nat << log_blocks_per_seg);
return true;
}
if (ssa_blkaddr + (segment_count_ssa << log_blocks_per_seg) !=
main_blkaddr) {
- f2fs_msg(sb, KERN_INFO,
- "Wrong SSA boundary, start(%u) end(%u) blocks(%u)",
- ssa_blkaddr, main_blkaddr,
- segment_count_ssa << log_blocks_per_seg);
+ f2fs_info(sbi, "Wrong SSA boundary, start(%u) end(%u) blocks(%u)",
+ ssa_blkaddr, main_blkaddr,
+ segment_count_ssa << log_blocks_per_seg);
return true;
}
if (main_end_blkaddr > seg_end_blkaddr) {
- f2fs_msg(sb, KERN_INFO,
- "Wrong MAIN_AREA boundary, start(%u) end(%u) block(%u)",
- main_blkaddr,
- segment0_blkaddr +
- (segment_count << log_blocks_per_seg),
- segment_count_main << log_blocks_per_seg);
+ f2fs_info(sbi, "Wrong MAIN_AREA boundary, start(%u) end(%u) block(%u)",
+ main_blkaddr,
+ segment0_blkaddr +
+ (segment_count << log_blocks_per_seg),
+ segment_count_main << log_blocks_per_seg);
return true;
} else if (main_end_blkaddr < seg_end_blkaddr) {
int err = 0;
@@ -2162,12 +2486,11 @@
err = __f2fs_commit_super(bh, NULL);
res = err ? "failed" : "done";
}
- f2fs_msg(sb, KERN_INFO,
- "Fix alignment : %s, start(%u) end(%u) block(%u)",
- res, main_blkaddr,
- segment0_blkaddr +
- (segment_count << log_blocks_per_seg),
- segment_count_main << log_blocks_per_seg);
+ f2fs_info(sbi, "Fix alignment : %s, start(%u) end(%u) block(%u)",
+ res, main_blkaddr,
+ segment0_blkaddr +
+ (segment_count << log_blocks_per_seg),
+ segment_count_main << log_blocks_per_seg);
if (err)
return true;
}
@@ -2181,39 +2504,52 @@
block_t total_sections, blocks_per_seg;
struct f2fs_super_block *raw_super = (struct f2fs_super_block *)
(bh->b_data + F2FS_SUPER_OFFSET);
- struct super_block *sb = sbi->sb;
unsigned int blocksize;
+ size_t crc_offset = 0;
+ __u32 crc = 0;
- if (F2FS_SUPER_MAGIC != le32_to_cpu(raw_super->magic)) {
- f2fs_msg(sb, KERN_INFO,
- "Magic Mismatch, valid(0x%x) - read(0x%x)",
- F2FS_SUPER_MAGIC, le32_to_cpu(raw_super->magic));
- return 1;
+ if (le32_to_cpu(raw_super->magic) != F2FS_SUPER_MAGIC) {
+ f2fs_info(sbi, "Magic Mismatch, valid(0x%x) - read(0x%x)",
+ F2FS_SUPER_MAGIC, le32_to_cpu(raw_super->magic));
+ return -EINVAL;
+ }
+
+ /* Check checksum_offset and crc in superblock */
+ if (__F2FS_HAS_FEATURE(raw_super, F2FS_FEATURE_SB_CHKSUM)) {
+ crc_offset = le32_to_cpu(raw_super->checksum_offset);
+ if (crc_offset !=
+ offsetof(struct f2fs_super_block, crc)) {
+ f2fs_info(sbi, "Invalid SB checksum offset: %zu",
+ crc_offset);
+ return -EFSCORRUPTED;
+ }
+ crc = le32_to_cpu(raw_super->crc);
+ if (!f2fs_crc_valid(sbi, crc, raw_super, crc_offset)) {
+ f2fs_info(sbi, "Invalid SB checksum value: %u", crc);
+ return -EFSCORRUPTED;
+ }
}
/* Currently, support only 4KB page cache size */
if (F2FS_BLKSIZE != PAGE_SIZE) {
- f2fs_msg(sb, KERN_INFO,
- "Invalid page_cache_size (%lu), supports only 4KB\n",
- PAGE_SIZE);
- return 1;
+ f2fs_info(sbi, "Invalid page_cache_size (%lu), supports only 4KB",
+ PAGE_SIZE);
+ return -EFSCORRUPTED;
}
/* Currently, support only 4KB block size */
blocksize = 1 << le32_to_cpu(raw_super->log_blocksize);
if (blocksize != F2FS_BLKSIZE) {
- f2fs_msg(sb, KERN_INFO,
- "Invalid blocksize (%u), supports only 4KB\n",
- blocksize);
- return 1;
+ f2fs_info(sbi, "Invalid blocksize (%u), supports only 4KB",
+ blocksize);
+ return -EFSCORRUPTED;
}
/* check log blocks per segment */
if (le32_to_cpu(raw_super->log_blocks_per_seg) != 9) {
- f2fs_msg(sb, KERN_INFO,
- "Invalid log blocks per segment (%u)\n",
- le32_to_cpu(raw_super->log_blocks_per_seg));
- return 1;
+ f2fs_info(sbi, "Invalid log blocks per segment (%u)",
+ le32_to_cpu(raw_super->log_blocks_per_seg));
+ return -EFSCORRUPTED;
}
/* Currently, support 512/1024/2048/4096 bytes sector size */
@@ -2221,18 +2557,17 @@
F2FS_MAX_LOG_SECTOR_SIZE ||
le32_to_cpu(raw_super->log_sectorsize) <
F2FS_MIN_LOG_SECTOR_SIZE) {
- f2fs_msg(sb, KERN_INFO, "Invalid log sectorsize (%u)",
- le32_to_cpu(raw_super->log_sectorsize));
- return 1;
+ f2fs_info(sbi, "Invalid log sectorsize (%u)",
+ le32_to_cpu(raw_super->log_sectorsize));
+ return -EFSCORRUPTED;
}
if (le32_to_cpu(raw_super->log_sectors_per_block) +
le32_to_cpu(raw_super->log_sectorsize) !=
F2FS_MAX_LOG_SECTOR_SIZE) {
- f2fs_msg(sb, KERN_INFO,
- "Invalid log sectors per block(%u) log sectorsize(%u)",
- le32_to_cpu(raw_super->log_sectors_per_block),
- le32_to_cpu(raw_super->log_sectorsize));
- return 1;
+ f2fs_info(sbi, "Invalid log sectors per block(%u) log sectorsize(%u)",
+ le32_to_cpu(raw_super->log_sectors_per_block),
+ le32_to_cpu(raw_super->log_sectorsize));
+ return -EFSCORRUPTED;
}
segment_count = le32_to_cpu(raw_super->segment_count);
@@ -2245,77 +2580,68 @@
if (segment_count > F2FS_MAX_SEGMENT ||
segment_count < F2FS_MIN_SEGMENTS) {
- f2fs_msg(sb, KERN_INFO,
- "Invalid segment count (%u)",
- segment_count);
- return 1;
+ f2fs_info(sbi, "Invalid segment count (%u)", segment_count);
+ return -EFSCORRUPTED;
}
if (total_sections > segment_count ||
total_sections < F2FS_MIN_SEGMENTS ||
segs_per_sec > segment_count || !segs_per_sec) {
- f2fs_msg(sb, KERN_INFO,
- "Invalid segment/section count (%u, %u x %u)",
- segment_count, total_sections, segs_per_sec);
- return 1;
+ f2fs_info(sbi, "Invalid segment/section count (%u, %u x %u)",
+ segment_count, total_sections, segs_per_sec);
+ return -EFSCORRUPTED;
}
if ((segment_count / segs_per_sec) < total_sections) {
- f2fs_msg(sb, KERN_INFO,
- "Small segment_count (%u < %u * %u)",
- segment_count, segs_per_sec, total_sections);
- return 1;
+ f2fs_info(sbi, "Small segment_count (%u < %u * %u)",
+ segment_count, segs_per_sec, total_sections);
+ return -EFSCORRUPTED;
}
- if (segment_count > (le32_to_cpu(raw_super->block_count) >> 9)) {
- f2fs_msg(sb, KERN_INFO,
- "Wrong segment_count / block_count (%u > %u)",
- segment_count, le32_to_cpu(raw_super->block_count));
- return 1;
+ if (segment_count > (le64_to_cpu(raw_super->block_count) >> 9)) {
+ f2fs_info(sbi, "Wrong segment_count / block_count (%u > %llu)",
+ segment_count, le64_to_cpu(raw_super->block_count));
+ return -EFSCORRUPTED;
}
if (secs_per_zone > total_sections || !secs_per_zone) {
- f2fs_msg(sb, KERN_INFO,
- "Wrong secs_per_zone / total_sections (%u, %u)",
- secs_per_zone, total_sections);
- return 1;
+ f2fs_info(sbi, "Wrong secs_per_zone / total_sections (%u, %u)",
+ secs_per_zone, total_sections);
+ return -EFSCORRUPTED;
}
if (le32_to_cpu(raw_super->extension_count) > F2FS_MAX_EXTENSION ||
raw_super->hot_ext_count > F2FS_MAX_EXTENSION ||
(le32_to_cpu(raw_super->extension_count) +
raw_super->hot_ext_count) > F2FS_MAX_EXTENSION) {
- f2fs_msg(sb, KERN_INFO,
- "Corrupted extension count (%u + %u > %u)",
- le32_to_cpu(raw_super->extension_count),
- raw_super->hot_ext_count,
- F2FS_MAX_EXTENSION);
- return 1;
+ f2fs_info(sbi, "Corrupted extension count (%u + %u > %u)",
+ le32_to_cpu(raw_super->extension_count),
+ raw_super->hot_ext_count,
+ F2FS_MAX_EXTENSION);
+ return -EFSCORRUPTED;
}
if (le32_to_cpu(raw_super->cp_payload) >
(blocks_per_seg - F2FS_CP_PACKS)) {
- f2fs_msg(sb, KERN_INFO,
- "Insane cp_payload (%u > %u)",
- le32_to_cpu(raw_super->cp_payload),
- blocks_per_seg - F2FS_CP_PACKS);
- return 1;
+ f2fs_info(sbi, "Insane cp_payload (%u > %u)",
+ le32_to_cpu(raw_super->cp_payload),
+ blocks_per_seg - F2FS_CP_PACKS);
+ return -EFSCORRUPTED;
}
/* check reserved ino info */
if (le32_to_cpu(raw_super->node_ino) != 1 ||
le32_to_cpu(raw_super->meta_ino) != 2 ||
le32_to_cpu(raw_super->root_ino) != 3) {
- f2fs_msg(sb, KERN_INFO,
- "Invalid Fs Meta Ino: node(%u) meta(%u) root(%u)",
- le32_to_cpu(raw_super->node_ino),
- le32_to_cpu(raw_super->meta_ino),
- le32_to_cpu(raw_super->root_ino));
- return 1;
+ f2fs_info(sbi, "Invalid Fs Meta Ino: node(%u) meta(%u) root(%u)",
+ le32_to_cpu(raw_super->node_ino),
+ le32_to_cpu(raw_super->meta_ino),
+ le32_to_cpu(raw_super->root_ino));
+ return -EFSCORRUPTED;
}
/* check CP/SIT/NAT/SSA/MAIN_AREA area boundary */
if (sanity_check_area_boundary(sbi, bh))
- return 1;
+ return -EFSCORRUPTED;
return 0;
}
@@ -2332,8 +2658,9 @@
unsigned int log_blocks_per_seg;
unsigned int segment_count_main;
unsigned int cp_pack_start_sum, cp_payload;
- block_t user_block_count;
- int i;
+ block_t user_block_count, valid_user_blocks;
+ block_t avail_node_count, valid_node_count;
+ int i, j;
total = le32_to_cpu(raw_super->segment_count);
fsmeta = le32_to_cpu(raw_super->segment_count_ckpt);
@@ -2352,8 +2679,7 @@
if (unlikely(fsmeta < F2FS_MIN_SEGMENTS ||
ovp_segments == 0 || reserved_segments == 0)) {
- f2fs_msg(sbi->sb, KERN_ERR,
- "Wrong layout: check mkfs.f2fs version");
+ f2fs_err(sbi, "Wrong layout: check mkfs.f2fs version");
return 1;
}
@@ -2362,8 +2688,23 @@
log_blocks_per_seg = le32_to_cpu(raw_super->log_blocks_per_seg);
if (!user_block_count || user_block_count >=
segment_count_main << log_blocks_per_seg) {
- f2fs_msg(sbi->sb, KERN_ERR,
- "Wrong user_block_count: %u", user_block_count);
+ f2fs_err(sbi, "Wrong user_block_count: %u",
+ user_block_count);
+ return 1;
+ }
+
+ valid_user_blocks = le64_to_cpu(ckpt->valid_block_count);
+ if (valid_user_blocks > user_block_count) {
+ f2fs_err(sbi, "Wrong valid_user_blocks: %u, user_block_count: %u",
+ valid_user_blocks, user_block_count);
+ return 1;
+ }
+
+ valid_node_count = le32_to_cpu(ckpt->valid_node_count);
+ avail_node_count = sbi->total_node_count - F2FS_RESERVED_NODE_NUM;
+ if (valid_node_count > avail_node_count) {
+ f2fs_err(sbi, "Wrong valid_node_count: %u, avail_node_count: %u",
+ valid_node_count, avail_node_count);
return 1;
}
@@ -2374,11 +2715,40 @@
if (le32_to_cpu(ckpt->cur_node_segno[i]) >= main_segs ||
le16_to_cpu(ckpt->cur_node_blkoff[i]) >= blocks_per_seg)
return 1;
+ for (j = i + 1; j < NR_CURSEG_NODE_TYPE; j++) {
+ if (le32_to_cpu(ckpt->cur_node_segno[i]) ==
+ le32_to_cpu(ckpt->cur_node_segno[j])) {
+ f2fs_err(sbi, "Node segment (%u, %u) has the same segno: %u",
+ i, j,
+ le32_to_cpu(ckpt->cur_node_segno[i]));
+ return 1;
+ }
+ }
}
for (i = 0; i < NR_CURSEG_DATA_TYPE; i++) {
if (le32_to_cpu(ckpt->cur_data_segno[i]) >= main_segs ||
le16_to_cpu(ckpt->cur_data_blkoff[i]) >= blocks_per_seg)
return 1;
+ for (j = i + 1; j < NR_CURSEG_DATA_TYPE; j++) {
+ if (le32_to_cpu(ckpt->cur_data_segno[i]) ==
+ le32_to_cpu(ckpt->cur_data_segno[j])) {
+ f2fs_err(sbi, "Data segment (%u, %u) has the same segno: %u",
+ i, j,
+ le32_to_cpu(ckpt->cur_data_segno[i]));
+ return 1;
+ }
+ }
+ }
+ for (i = 0; i < NR_CURSEG_NODE_TYPE; i++) {
+ for (j = 0; j < NR_CURSEG_DATA_TYPE; j++) {
+ if (le32_to_cpu(ckpt->cur_node_segno[i]) ==
+ le32_to_cpu(ckpt->cur_data_segno[j])) {
+ f2fs_err(sbi, "Node segment (%u) and Data segment (%u) has the same segno: %u",
+ i, j,
+ le32_to_cpu(ckpt->cur_node_segno[i]));
+ return 1;
+ }
+ }
}
sit_bitmap_size = le32_to_cpu(ckpt->sit_ver_bitmap_bytesize);
@@ -2386,9 +2756,8 @@
if (sit_bitmap_size != ((sit_segs / 2) << log_blocks_per_seg) / 8 ||
nat_bitmap_size != ((nat_segs / 2) << log_blocks_per_seg) / 8) {
- f2fs_msg(sbi->sb, KERN_ERR,
- "Wrong bitmap size: sit: %u, nat:%u",
- sit_bitmap_size, nat_bitmap_size);
+ f2fs_err(sbi, "Wrong bitmap size: sit: %u, nat:%u",
+ sit_bitmap_size, nat_bitmap_size);
return 1;
}
@@ -2397,14 +2766,22 @@
if (cp_pack_start_sum < cp_payload + 1 ||
cp_pack_start_sum > blocks_per_seg - 1 -
NR_CURSEG_TYPE) {
- f2fs_msg(sbi->sb, KERN_ERR,
- "Wrong cp_pack_start_sum: %u",
- cp_pack_start_sum);
+ f2fs_err(sbi, "Wrong cp_pack_start_sum: %u",
+ cp_pack_start_sum);
+ return 1;
+ }
+
+ if (__is_set_ckpt_flags(ckpt, CP_LARGE_NAT_BITMAP_FLAG) &&
+ le32_to_cpu(ckpt->checksum_offset) != CP_MIN_CHKSUM_OFFSET) {
+ f2fs_warn(sbi, "using deprecated layout of large_nat_bitmap, "
+ "please run fsck v1.13.0 or higher to repair, chksum_offset: %u, "
+ "fixed with patch: \"f2fs-tools: relocate chksum_offset for large_nat_bitmap feature\"",
+ le32_to_cpu(ckpt->checksum_offset));
return 1;
}
if (unlikely(f2fs_cp_error(sbi))) {
- f2fs_msg(sbi->sb, KERN_ERR, "A bug case: need to run fsck");
+ f2fs_err(sbi, "A bug case: need to run fsck");
return 1;
}
return 0;
@@ -2413,7 +2790,7 @@
static void init_sb_info(struct f2fs_sb_info *sbi)
{
struct f2fs_super_block *raw_super = sbi->raw_super;
- int i, j;
+ int i;
sbi->log_sectors_per_block =
le32_to_cpu(raw_super->log_sectors_per_block);
@@ -2431,11 +2808,19 @@
sbi->node_ino_num = le32_to_cpu(raw_super->node_ino);
sbi->meta_ino_num = le32_to_cpu(raw_super->meta_ino);
sbi->cur_victim_sec = NULL_SECNO;
+ sbi->next_victim_seg[BG_GC] = NULL_SEGNO;
+ sbi->next_victim_seg[FG_GC] = NULL_SEGNO;
sbi->max_victim_search = DEF_MAX_VICTIM_SEARCH;
+ sbi->migration_granularity = sbi->segs_per_sec;
sbi->dir_level = DEF_DIR_LEVEL;
sbi->interval_time[CP_TIME] = DEF_CP_INTERVAL;
sbi->interval_time[REQ_TIME] = DEF_IDLE_INTERVAL;
+ sbi->interval_time[DISCARD_TIME] = DEF_IDLE_INTERVAL;
+ sbi->interval_time[GC_TIME] = DEF_IDLE_INTERVAL;
+ sbi->interval_time[DISABLE_TIME] = DEF_DISABLE_INTERVAL;
+ sbi->interval_time[UMOUNT_DISCARD_TIMEOUT] =
+ DEF_UMOUNT_DISCARD_TIMEOUT;
clear_sbi_flag(sbi, SBI_NEED_FSCK);
for (i = 0; i < NR_COUNT_TYPE; i++)
@@ -2446,9 +2831,6 @@
INIT_LIST_HEAD(&sbi->s_list);
mutex_init(&sbi->umount_mutex);
- for (i = 0; i < NR_PAGE_TYPE - 1; i++)
- for (j = HOT; j < NR_TEMP_TYPE; j++)
- mutex_init(&sbi->wio_mutex[i][j]);
init_rwsem(&sbi->io_order_lock);
spin_lock_init(&sbi->cp_lock);
@@ -2466,8 +2848,12 @@
if (err)
return err;
- return percpu_counter_init(&sbi->total_valid_inode_count, 0,
+ err = percpu_counter_init(&sbi->total_valid_inode_count, 0,
GFP_KERNEL);
+ if (err)
+ percpu_counter_destroy(&sbi->alloc_valid_block_count);
+
+ return err;
}
#ifdef CONFIG_BLK_DEV_ZONED
@@ -2481,7 +2867,7 @@
unsigned int n = 0;
int err = -EIO;
- if (!f2fs_sb_has_blkzoned(sbi->sb))
+ if (!f2fs_sb_has_blkzoned(sbi))
return 0;
if (sbi->blocks_per_blkz && sbi->blocks_per_blkz !=
@@ -2497,9 +2883,11 @@
if (nr_sectors & (bdev_zone_sectors(bdev) - 1))
FDEV(devi).nr_blkz++;
- FDEV(devi).blkz_type = f2fs_kmalloc(sbi, FDEV(devi).nr_blkz,
- GFP_KERNEL);
- if (!FDEV(devi).blkz_type)
+ FDEV(devi).blkz_seq = f2fs_kzalloc(sbi,
+ BITS_TO_LONGS(FDEV(devi).nr_blkz)
+ * sizeof(unsigned long),
+ GFP_KERNEL);
+ if (!FDEV(devi).blkz_seq)
return -ENOMEM;
#define F2FS_REPORT_NR_ZONES 4096
@@ -2515,9 +2903,7 @@
while (zones && sector < nr_sectors) {
nr_zones = F2FS_REPORT_NR_ZONES;
- err = blkdev_report_zones(bdev, sector,
- zones, &nr_zones,
- GFP_KERNEL);
+ err = blkdev_report_zones(bdev, sector, zones, &nr_zones);
if (err)
break;
if (!nr_zones) {
@@ -2526,13 +2912,14 @@
}
for (i = 0; i < nr_zones; i++) {
- FDEV(devi).blkz_type[n] = zones[i].type;
+ if (zones[i].type != BLK_ZONE_TYPE_CONVENTIONAL)
+ set_bit(n, FDEV(devi).blkz_seq);
sector += zones[i].len;
n++;
}
}
- kfree(zones);
+ kvfree(zones);
return err;
}
@@ -2561,18 +2948,17 @@
for (block = 0; block < 2; block++) {
bh = sb_bread(sb, block);
if (!bh) {
- f2fs_msg(sb, KERN_ERR, "Unable to read %dth superblock",
- block + 1);
+ f2fs_err(sbi, "Unable to read %dth superblock",
+ block + 1);
err = -EIO;
continue;
}
/* sanity checking of raw super */
- if (sanity_check_raw_super(sbi, bh)) {
- f2fs_msg(sb, KERN_ERR,
- "Can't find valid F2FS filesystem in %dth superblock",
- block + 1);
- err = -EINVAL;
+ err = sanity_check_raw_super(sbi, bh);
+ if (err) {
+ f2fs_err(sbi, "Can't find valid F2FS filesystem in %dth superblock",
+ block + 1);
brelse(bh);
continue;
}
@@ -2592,7 +2978,7 @@
/* No valid superblock */
if (!*raw_super)
- kfree(super);
+ kvfree(super);
else
err = 0;
@@ -2602,6 +2988,7 @@
int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover)
{
struct buffer_head *bh;
+ __u32 crc = 0;
int err;
if ((recover && f2fs_readonly(sbi->sb)) ||
@@ -2610,6 +2997,13 @@
return -EROFS;
}
+ /* we should update superblock crc here */
+ if (!recover && f2fs_sb_has_sb_chksum(sbi)) {
+ crc = f2fs_crc32(sbi, F2FS_RAW_SUPER(sbi),
+ offsetof(struct f2fs_super_block, crc));
+ F2FS_RAW_SUPER(sbi)->crc = cpu_to_le32(crc);
+ }
+
/* write back-up superblock first */
bh = sb_bread(sbi->sb, sbi->valid_super_block ? 0 : 1);
if (!bh)
@@ -2692,37 +3086,80 @@
#ifdef CONFIG_BLK_DEV_ZONED
if (bdev_zoned_model(FDEV(i).bdev) == BLK_ZONED_HM &&
- !f2fs_sb_has_blkzoned(sbi->sb)) {
- f2fs_msg(sbi->sb, KERN_ERR,
- "Zoned block device feature not enabled\n");
+ !f2fs_sb_has_blkzoned(sbi)) {
+ f2fs_err(sbi, "Zoned block device feature not enabled\n");
return -EINVAL;
}
if (bdev_zoned_model(FDEV(i).bdev) != BLK_ZONED_NONE) {
if (init_blkz_info(sbi, i)) {
- f2fs_msg(sbi->sb, KERN_ERR,
- "Failed to initialize F2FS blkzone information");
+ f2fs_err(sbi, "Failed to initialize F2FS blkzone information");
return -EINVAL;
}
if (max_devices == 1)
break;
- f2fs_msg(sbi->sb, KERN_INFO,
- "Mount Device [%2d]: %20s, %8u, %8x - %8x (zone: %s)",
- i, FDEV(i).path,
- FDEV(i).total_segments,
- FDEV(i).start_blk, FDEV(i).end_blk,
- bdev_zoned_model(FDEV(i).bdev) == BLK_ZONED_HA ?
- "Host-aware" : "Host-managed");
+ f2fs_info(sbi, "Mount Device [%2d]: %20s, %8u, %8x - %8x (zone: %s)",
+ i, FDEV(i).path,
+ FDEV(i).total_segments,
+ FDEV(i).start_blk, FDEV(i).end_blk,
+ bdev_zoned_model(FDEV(i).bdev) == BLK_ZONED_HA ?
+ "Host-aware" : "Host-managed");
continue;
}
#endif
- f2fs_msg(sbi->sb, KERN_INFO,
- "Mount Device [%2d]: %20s, %8u, %8x - %8x",
- i, FDEV(i).path,
- FDEV(i).total_segments,
- FDEV(i).start_blk, FDEV(i).end_blk);
+ f2fs_info(sbi, "Mount Device [%2d]: %20s, %8u, %8x - %8x",
+ i, FDEV(i).path,
+ FDEV(i).total_segments,
+ FDEV(i).start_blk, FDEV(i).end_blk);
}
- f2fs_msg(sbi->sb, KERN_INFO,
- "IO Block Size: %8d KB", F2FS_IO_SIZE_KB(sbi));
+ f2fs_info(sbi,
+ "IO Block Size: %8d KB", F2FS_IO_SIZE_KB(sbi));
+ return 0;
+}
+
+static int f2fs_setup_casefold(struct f2fs_sb_info *sbi)
+{
+#ifdef CONFIG_UNICODE
+ if (f2fs_sb_has_casefold(sbi) && !sbi->s_encoding) {
+ const struct f2fs_sb_encodings *encoding_info;
+ struct unicode_map *encoding;
+ __u16 encoding_flags;
+
+ if (f2fs_sb_has_encrypt(sbi)) {
+ f2fs_err(sbi,
+ "Can't mount with encoding and encryption");
+ return -EINVAL;
+ }
+
+ if (f2fs_sb_read_encoding(sbi->raw_super, &encoding_info,
+ &encoding_flags)) {
+ f2fs_err(sbi,
+ "Encoding requested by superblock is unknown");
+ return -EINVAL;
+ }
+
+ encoding = utf8_load(encoding_info->version);
+ if (IS_ERR(encoding)) {
+ f2fs_err(sbi,
+ "can't mount with superblock charset: %s-%s "
+ "not supported by the kernel. flags: 0x%x.",
+ encoding_info->name, encoding_info->version,
+ encoding_flags);
+ return PTR_ERR(encoding);
+ }
+ f2fs_info(sbi, "Using encoding defined by superblock: "
+ "%s-%s with flags 0x%hx", encoding_info->name,
+ encoding_info->version?:"\b", encoding_flags);
+
+ sbi->s_encoding = encoding;
+ sbi->s_encoding_flags = encoding_flags;
+ sbi->sb->s_d_op = &f2fs_dentry_ops;
+ }
+#else
+ if (f2fs_sb_has_casefold(sbi)) {
+ f2fs_err(sbi, "Filesystem with casefold feature cannot be mounted without CONFIG_UNICODE");
+ return -EINVAL;
+ }
+#endif
return 0;
}
@@ -2746,10 +3183,11 @@
struct f2fs_super_block *raw_super;
struct inode *root;
int err;
- bool retry = true, need_fsck = false;
+ bool skip_recovery = false, need_fsck = false;
char *options = NULL;
int recovery, i, valid_super_block;
struct curseg_info *seg_i;
+ int retry_cnt = 1;
try_onemore:
err = -EINVAL;
@@ -2767,7 +3205,7 @@
/* Load the checksum driver */
sbi->s_chksum_driver = crypto_alloc_shash("crc32", 0, 0);
if (IS_ERR(sbi->s_chksum_driver)) {
- f2fs_msg(sb, KERN_ERR, "Cannot load crc32 driver.");
+ f2fs_err(sbi, "Cannot load crc32 driver.");
err = PTR_ERR(sbi->s_chksum_driver);
sbi->s_chksum_driver = NULL;
goto free_sbi;
@@ -2775,7 +3213,7 @@
/* set a block size */
if (unlikely(!sb_set_blocksize(sb, F2FS_BLKSIZE))) {
- f2fs_msg(sb, KERN_ERR, "unable to set blocksize");
+ f2fs_err(sbi, "unable to set blocksize");
goto free_sbi;
}
@@ -2788,7 +3226,7 @@
sbi->raw_super = raw_super;
/* precompute checksum seed for metadata */
- if (f2fs_sb_has_inode_chksum(sb))
+ if (f2fs_sb_has_inode_chksum(sbi))
sbi->s_chksum_seed = f2fs_chksum(sbi, ~0, raw_super->uuid,
sizeof(raw_super->uuid));
@@ -2798,9 +3236,8 @@
* devices, but mandatory for host-managed zoned block devices.
*/
#ifndef CONFIG_BLK_DEV_ZONED
- if (f2fs_sb_has_blkzoned(sb)) {
- f2fs_msg(sb, KERN_ERR,
- "Zoned block device support is not enabled\n");
+ if (f2fs_sb_has_blkzoned(sbi)) {
+ f2fs_err(sbi, "Zoned block device support is not enabled");
err = -EOPNOTSUPP;
goto free_sb_buf;
}
@@ -2821,17 +3258,17 @@
sb->s_maxbytes = sbi->max_file_blocks <<
le32_to_cpu(raw_super->log_blocksize);
sb->s_max_links = F2FS_LINK_MAX;
- get_random_bytes(&sbi->s_next_generation, sizeof(u32));
+
+ err = f2fs_setup_casefold(sbi);
+ if (err)
+ goto free_options;
#ifdef CONFIG_QUOTA
sb->dq_op = &f2fs_quota_operations;
- if (f2fs_sb_has_quota_ino(sb))
- sb->s_qcop = &dquot_quotactl_sysfile_ops;
- else
- sb->s_qcop = &f2fs_quotactl_ops;
+ sb->s_qcop = &f2fs_quotactl_ops;
sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP | QTYPE_MASK_PRJ;
- if (f2fs_sb_has_quota_ino(sbi->sb)) {
+ if (f2fs_sb_has_quota_ino(sbi)) {
for (i = 0; i < MAXQUOTAS; i++) {
if (f2fs_qf_ino(sbi->sb, i))
sbi->nquota_files++;
@@ -2840,9 +3277,12 @@
#endif
sb->s_op = &f2fs_sops;
-#ifdef CONFIG_F2FS_FS_ENCRYPTION
+#ifdef CONFIG_FS_ENCRYPTION
sb->s_cop = &f2fs_cryptops;
#endif
+#ifdef CONFIG_FS_VERITY
+ sb->s_vop = &f2fs_verityops;
+#endif
sb->s_xattr = f2fs_xattr_handlers;
sb->s_export_op = &f2fs_export_ops;
sb->s_magic = F2FS_SUPER_MAGIC;
@@ -2857,6 +3297,7 @@
mutex_init(&sbi->gc_mutex);
mutex_init(&sbi->writepages);
mutex_init(&sbi->cp_mutex);
+ mutex_init(&sbi->resize_mutex);
init_rwsem(&sbi->node_write);
init_rwsem(&sbi->node_change);
@@ -2879,7 +3320,7 @@
GFP_KERNEL);
if (!sbi->write_io[i]) {
err = -ENOMEM;
- goto free_options;
+ goto free_bio_info;
}
for (j = HOT; j < n; j++) {
@@ -2892,6 +3333,7 @@
}
init_rwsem(&sbi->cp_rwsem);
+ init_rwsem(&sbi->quota_sem);
init_waitqueue_head(&sbi->cp_wait);
init_sb_info(sbi);
@@ -2899,7 +3341,7 @@
if (err)
goto free_bio_info;
- if (F2FS_IO_SIZE(sbi) > 1) {
+ if (F2FS_IO_ALIGNED(sbi)) {
sbi->write_io_dummy =
mempool_create_page_pool(2 * (F2FS_IO_SIZE(sbi) - 1), 0);
if (!sbi->write_io_dummy) {
@@ -2911,21 +3353,31 @@
/* get an inode for meta space */
sbi->meta_inode = f2fs_iget(sb, F2FS_META_INO(sbi));
if (IS_ERR(sbi->meta_inode)) {
- f2fs_msg(sb, KERN_ERR, "Failed to read F2FS meta data inode");
+ f2fs_err(sbi, "Failed to read F2FS meta data inode");
err = PTR_ERR(sbi->meta_inode);
goto free_io_dummy;
}
err = f2fs_get_valid_checkpoint(sbi);
if (err) {
- f2fs_msg(sb, KERN_ERR, "Failed to get valid F2FS checkpoint");
+ f2fs_err(sbi, "Failed to get valid F2FS checkpoint");
goto free_meta_inode;
}
+ if (__is_set_ckpt_flags(F2FS_CKPT(sbi), CP_QUOTA_NEED_FSCK_FLAG))
+ set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR);
+ if (__is_set_ckpt_flags(F2FS_CKPT(sbi), CP_DISABLED_QUICK_FLAG)) {
+ set_sbi_flag(sbi, SBI_CP_DISABLED_QUICK);
+ sbi->interval_time[DISABLE_TIME] = DEF_DISABLE_QUICK_INTERVAL;
+ }
+
+ if (__is_set_ckpt_flags(F2FS_CKPT(sbi), CP_FSCK_FLAG))
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+
/* Initialize device list */
err = f2fs_scan_devices(sbi);
if (err) {
- f2fs_msg(sb, KERN_ERR, "Failed to find devices");
+ f2fs_err(sbi, "Failed to find devices");
goto free_devices;
}
@@ -2945,6 +3397,7 @@
INIT_LIST_HEAD(&sbi->inode_list[i]);
spin_lock_init(&sbi->inode_lock[i]);
}
+ mutex_init(&sbi->flush_lock);
f2fs_init_extent_cache_info(sbi);
@@ -2955,14 +3408,14 @@
/* setup f2fs internal modules */
err = f2fs_build_segment_manager(sbi);
if (err) {
- f2fs_msg(sb, KERN_ERR,
- "Failed to initialize F2FS segment manager");
+ f2fs_err(sbi, "Failed to initialize F2FS segment manager (%d)",
+ err);
goto free_sm;
}
err = f2fs_build_node_manager(sbi);
if (err) {
- f2fs_msg(sb, KERN_ERR,
- "Failed to initialize F2FS node manager");
+ f2fs_err(sbi, "Failed to initialize F2FS node manager (%d)",
+ err);
goto free_nm;
}
@@ -2980,36 +3433,36 @@
f2fs_build_gc_manager(sbi);
+ err = f2fs_build_stats(sbi);
+ if (err)
+ goto free_nm;
+
/* get an inode for node space */
sbi->node_inode = f2fs_iget(sb, F2FS_NODE_INO(sbi));
if (IS_ERR(sbi->node_inode)) {
- f2fs_msg(sb, KERN_ERR, "Failed to read node inode");
+ f2fs_err(sbi, "Failed to read node inode");
err = PTR_ERR(sbi->node_inode);
- goto free_nm;
+ goto free_stats;
}
- err = f2fs_build_stats(sbi);
- if (err)
- goto free_node_inode;
-
/* read root inode and dentry */
root = f2fs_iget(sb, F2FS_ROOT_INO(sbi));
if (IS_ERR(root)) {
- f2fs_msg(sb, KERN_ERR, "Failed to read root inode");
+ f2fs_err(sbi, "Failed to read root inode");
err = PTR_ERR(root);
- goto free_stats;
+ goto free_node_inode;
}
if (!S_ISDIR(root->i_mode) || !root->i_blocks ||
!root->i_size || !root->i_nlink) {
iput(root);
err = -EINVAL;
- goto free_stats;
+ goto free_node_inode;
}
sb->s_root = d_make_root(root); /* allocate root dentry */
if (!sb->s_root) {
err = -ENOMEM;
- goto free_root_inode;
+ goto free_node_inode;
}
err = f2fs_register_sysfs(sbi);
@@ -3018,13 +3471,10 @@
#ifdef CONFIG_QUOTA
/* Enable quota usage during mount */
- if (f2fs_sb_has_quota_ino(sb) && !f2fs_readonly(sb)) {
+ if (f2fs_sb_has_quota_ino(sbi) && !f2fs_readonly(sb)) {
err = f2fs_enable_quotas(sb);
- if (err) {
- f2fs_msg(sb, KERN_ERR,
- "Cannot turn on quotas: error %d", err);
- goto free_sysfs;
- }
+ if (err)
+ f2fs_err(sbi, "Cannot turn on quotas: error %d", err);
}
#endif
/* if there are nt orphan nodes free them */
@@ -3032,29 +3482,38 @@
if (err)
goto free_meta;
+ if (unlikely(is_set_ckpt_flags(sbi, CP_DISABLED_FLAG)))
+ goto reset_checkpoint;
+
/* recover fsynced data */
if (!test_opt(sbi, DISABLE_ROLL_FORWARD)) {
/*
* mount should be failed, when device has readonly mode, and
* previous checkpoint was not done by clean system shutdown.
*/
- if (bdev_read_only(sb->s_bdev) &&
- !is_set_ckpt_flags(sbi, CP_UMOUNT_FLAG)) {
- err = -EROFS;
- goto free_meta;
+ if (f2fs_hw_is_readonly(sbi)) {
+ if (!is_set_ckpt_flags(sbi, CP_UMOUNT_FLAG)) {
+ err = -EROFS;
+ f2fs_err(sbi, "Need to recover fsync data, but write access unavailable");
+ goto free_meta;
+ }
+ f2fs_info(sbi, "write access unavailable, skipping recovery");
+ goto reset_checkpoint;
}
if (need_fsck)
set_sbi_flag(sbi, SBI_NEED_FSCK);
- if (!retry)
- goto skip_recovery;
+ if (skip_recovery)
+ goto reset_checkpoint;
err = f2fs_recover_fsync_data(sbi, false);
if (err < 0) {
+ if (err != -ENOMEM)
+ skip_recovery = true;
need_fsck = true;
- f2fs_msg(sb, KERN_ERR,
- "Cannot recover all fsync data errno=%d", err);
+ f2fs_err(sbi, "Cannot recover all fsync data errno=%d",
+ err);
goto free_meta;
}
} else {
@@ -3062,15 +3521,22 @@
if (!f2fs_readonly(sb) && err > 0) {
err = -EINVAL;
- f2fs_msg(sb, KERN_ERR,
- "Need to recover fsync data");
+ f2fs_err(sbi, "Need to recover fsync data");
goto free_meta;
}
}
-skip_recovery:
+reset_checkpoint:
/* f2fs_recover_fsync_data() cleared this already */
clear_sbi_flag(sbi, SBI_POR_DOING);
+ if (test_opt(sbi, DISABLE_CHECKPOINT)) {
+ err = f2fs_disable_checkpoint(sbi);
+ if (err)
+ goto sync_free_meta;
+ } else if (is_set_ckpt_flags(sbi, CP_DISABLED_FLAG)) {
+ f2fs_enable_checkpoint(sbi);
+ }
+
/*
* If filesystem is not mounted as read-only then
* do start the gc_thread.
@@ -3079,34 +3545,39 @@
/* After POR, we can run background GC thread.*/
err = f2fs_start_gc_thread(sbi);
if (err)
- goto free_meta;
+ goto sync_free_meta;
}
- kfree(options);
+ kvfree(options);
/* recover broken superblock */
if (recovery) {
err = f2fs_commit_super(sbi, true);
- f2fs_msg(sb, KERN_INFO,
- "Try to recover %dth superblock, ret: %d",
- sbi->valid_super_block ? 1 : 2, err);
+ f2fs_info(sbi, "Try to recover %dth superblock, ret: %d",
+ sbi->valid_super_block ? 1 : 2, err);
}
f2fs_join_shrinker(sbi);
f2fs_tuning_parameters(sbi);
- f2fs_msg(sbi->sb, KERN_NOTICE, "Mounted with checkpoint version = %llx",
- cur_cp_version(F2FS_CKPT(sbi)));
+ f2fs_notice(sbi, "Mounted with checkpoint version = %llx",
+ cur_cp_version(F2FS_CKPT(sbi)));
f2fs_update_time(sbi, CP_TIME);
f2fs_update_time(sbi, REQ_TIME);
+ clear_sbi_flag(sbi, SBI_CP_DISABLED_QUICK);
return 0;
+sync_free_meta:
+ /* safe to flush all the data */
+ sync_filesystem(sbi->sb);
+ retry_cnt = 0;
+
free_meta:
#ifdef CONFIG_QUOTA
- if (f2fs_sb_has_quota_ino(sb) && !f2fs_readonly(sb))
+ f2fs_truncate_quota_inode_pages(sb);
+ if (f2fs_sb_has_quota_ino(sbi) && !f2fs_readonly(sb))
f2fs_quota_off_umount(sbi->sb);
#endif
- f2fs_sync_inode_meta(sbi);
/*
* Some dirty meta pages can be produced by f2fs_recover_orphan_inodes()
* failed by EIO. Then, iput(node_inode) can trigger balance_fs_bg()
@@ -3114,52 +3585,57 @@
* falls into an infinite loop in f2fs_sync_meta_pages().
*/
truncate_inode_pages_final(META_MAPPING(sbi));
-#ifdef CONFIG_QUOTA
-free_sysfs:
-#endif
+ /* evict some inodes being cached by GC */
+ evict_inodes(sb);
f2fs_unregister_sysfs(sbi);
free_root_inode:
dput(sb->s_root);
sb->s_root = NULL;
-free_stats:
- f2fs_destroy_stats(sbi);
free_node_inode:
f2fs_release_ino_entry(sbi, true);
truncate_inode_pages_final(NODE_MAPPING(sbi));
iput(sbi->node_inode);
+ sbi->node_inode = NULL;
+free_stats:
+ f2fs_destroy_stats(sbi);
free_nm:
f2fs_destroy_node_manager(sbi);
free_sm:
f2fs_destroy_segment_manager(sbi);
free_devices:
destroy_device_list(sbi);
- kfree(sbi->ckpt);
+ kvfree(sbi->ckpt);
free_meta_inode:
make_bad_inode(sbi->meta_inode);
iput(sbi->meta_inode);
+ sbi->meta_inode = NULL;
free_io_dummy:
mempool_destroy(sbi->write_io_dummy);
free_percpu:
destroy_percpu_info(sbi);
free_bio_info:
for (i = 0; i < NR_PAGE_TYPE; i++)
- kfree(sbi->write_io[i]);
+ kvfree(sbi->write_io[i]);
+
+#ifdef CONFIG_UNICODE
+ utf8_unload(sbi->s_encoding);
+#endif
free_options:
#ifdef CONFIG_QUOTA
for (i = 0; i < MAXQUOTAS; i++)
- kfree(F2FS_OPTION(sbi).s_qf_names[i]);
+ kvfree(F2FS_OPTION(sbi).s_qf_names[i]);
#endif
- kfree(options);
+ kvfree(options);
free_sb_buf:
- kfree(raw_super);
+ kvfree(raw_super);
free_sbi:
if (sbi->s_chksum_driver)
crypto_free_shash(sbi->s_chksum_driver);
- kfree(sbi);
+ kvfree(sbi);
/* give only one another chance */
- if (retry) {
- retry = false;
+ if (retry_cnt > 0 && skip_recovery) {
+ retry_cnt--;
shrink_dcache_sb(sb);
goto try_onemore;
}
@@ -3260,9 +3736,7 @@
err = register_filesystem(&f2fs_fs_type);
if (err)
goto free_shrinker;
- err = f2fs_create_root_stats();
- if (err)
- goto free_filesystem;
+ f2fs_create_root_stats();
err = f2fs_init_post_read_processing();
if (err)
goto free_root_stats;
@@ -3270,7 +3744,6 @@
free_root_stats:
f2fs_destroy_root_stats();
-free_filesystem:
unregister_filesystem(&f2fs_fs_type);
free_shrinker:
unregister_shrinker(&f2fs_shrinker_info);
diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
index 81c0e53..b558b64 100644
--- a/fs/f2fs/sysfs.c
+++ b/fs/f2fs/sysfs.c
@@ -1,18 +1,16 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* f2fs sysfs interface
*
* Copyright (c) 2012 Samsung Electronics Co., Ltd.
* http://www.samsung.com/
* Copyright (c) 2017 Chao Yu <chao@kernel.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
*/
#include <linux/compiler.h>
#include <linux/proc_fs.h>
#include <linux/f2fs_fs.h>
#include <linux/seq_file.h>
+#include <linux/unicode.h>
#include "f2fs.h"
#include "segment.h"
@@ -71,6 +69,33 @@
(unsigned long long)(dirty_segments(sbi)));
}
+static ssize_t unusable_show(struct f2fs_attr *a,
+ struct f2fs_sb_info *sbi, char *buf)
+{
+ block_t unusable;
+
+ if (test_opt(sbi, DISABLE_CHECKPOINT))
+ unusable = sbi->unusable_block_count;
+ else
+ unusable = f2fs_get_unusable_blocks(sbi);
+ return snprintf(buf, PAGE_SIZE, "%llu\n",
+ (unsigned long long)unusable);
+}
+
+static ssize_t encoding_show(struct f2fs_attr *a,
+ struct f2fs_sb_info *sbi, char *buf)
+{
+#ifdef CONFIG_UNICODE
+ if (f2fs_sb_has_casefold(sbi))
+ return snprintf(buf, PAGE_SIZE, "%s (%d.%d.%d)\n",
+ sbi->s_encoding->charset,
+ (sbi->s_encoding->version >> 16) & 0xff,
+ (sbi->s_encoding->version >> 8) & 0xff,
+ sbi->s_encoding->version & 0xff);
+#endif
+ return snprintf(buf, PAGE_SIZE, "(none)");
+}
+
static ssize_t lifetime_write_kbytes_show(struct f2fs_attr *a,
struct f2fs_sb_info *sbi, char *buf)
{
@@ -93,33 +118,42 @@
if (!sb->s_bdev->bd_part)
return snprintf(buf, PAGE_SIZE, "0\n");
- if (f2fs_sb_has_encrypt(sb))
+ if (f2fs_sb_has_encrypt(sbi))
len += snprintf(buf, PAGE_SIZE - len, "%s",
"encryption");
- if (f2fs_sb_has_blkzoned(sb))
+ if (f2fs_sb_has_blkzoned(sbi))
len += snprintf(buf + len, PAGE_SIZE - len, "%s%s",
len ? ", " : "", "blkzoned");
- if (f2fs_sb_has_extra_attr(sb))
+ if (f2fs_sb_has_extra_attr(sbi))
len += snprintf(buf + len, PAGE_SIZE - len, "%s%s",
len ? ", " : "", "extra_attr");
- if (f2fs_sb_has_project_quota(sb))
+ if (f2fs_sb_has_project_quota(sbi))
len += snprintf(buf + len, PAGE_SIZE - len, "%s%s",
len ? ", " : "", "projquota");
- if (f2fs_sb_has_inode_chksum(sb))
+ if (f2fs_sb_has_inode_chksum(sbi))
len += snprintf(buf + len, PAGE_SIZE - len, "%s%s",
len ? ", " : "", "inode_checksum");
- if (f2fs_sb_has_flexible_inline_xattr(sb))
+ if (f2fs_sb_has_flexible_inline_xattr(sbi))
len += snprintf(buf + len, PAGE_SIZE - len, "%s%s",
len ? ", " : "", "flexible_inline_xattr");
- if (f2fs_sb_has_quota_ino(sb))
+ if (f2fs_sb_has_quota_ino(sbi))
len += snprintf(buf + len, PAGE_SIZE - len, "%s%s",
len ? ", " : "", "quota_ino");
- if (f2fs_sb_has_inode_crtime(sb))
+ if (f2fs_sb_has_inode_crtime(sbi))
len += snprintf(buf + len, PAGE_SIZE - len, "%s%s",
len ? ", " : "", "inode_crtime");
- if (f2fs_sb_has_lost_found(sb))
+ if (f2fs_sb_has_lost_found(sbi))
len += snprintf(buf + len, PAGE_SIZE - len, "%s%s",
len ? ", " : "", "lost_found");
+ if (f2fs_sb_has_verity(sbi))
+ len += snprintf(buf + len, PAGE_SIZE - len, "%s%s",
+ len ? ", " : "", "verity");
+ if (f2fs_sb_has_sb_chksum(sbi))
+ len += snprintf(buf + len, PAGE_SIZE - len, "%s%s",
+ len ? ", " : "", "sb_checksum");
+ if (f2fs_sb_has_casefold(sbi))
+ len += snprintf(buf + len, PAGE_SIZE - len, "%s%s",
+ len ? ", " : "", "casefold");
len += snprintf(buf + len, PAGE_SIZE - len, "\n");
return len;
}
@@ -222,6 +256,8 @@
#ifdef CONFIG_F2FS_FAULT_INJECTION
if (a->struct_type == FAULT_INFO_TYPE && t >= (1 << FAULT_MAX))
return -EINVAL;
+ if (a->struct_type == FAULT_INFO_RATE && t >= UINT_MAX)
+ return -EINVAL;
#endif
if (a->struct_type == RESERVED_BLOCKS) {
spin_lock(&sbi->stat_lock);
@@ -246,6 +282,11 @@
return count;
}
+ if (!strcmp(a->attr.name, "migration_granularity")) {
+ if (t == 0 || t > sbi->segs_per_sec)
+ return -EINVAL;
+ }
+
if (!strcmp(a->attr.name, "trim_sections"))
return -EINVAL;
@@ -273,10 +314,16 @@
return count;
}
- *ui = t;
- if (!strcmp(a->attr.name, "iostat_enable") && *ui == 0)
- f2fs_reset_iostat(sbi);
+ if (!strcmp(a->attr.name, "iostat_enable")) {
+ sbi->iostat_enable = !!t;
+ if (!sbi->iostat_enable)
+ f2fs_reset_iostat(sbi);
+ return count;
+ }
+
+ *ui = (unsigned int)t;
+
return count;
}
@@ -337,6 +384,9 @@
FEAT_QUOTA_INO,
FEAT_INODE_CRTIME,
FEAT_LOST_FOUND,
+ FEAT_VERITY,
+ FEAT_SB_CHECKSUM,
+ FEAT_CASEFOLD,
};
static ssize_t f2fs_feature_show(struct f2fs_attr *a,
@@ -353,6 +403,9 @@
case FEAT_QUOTA_INO:
case FEAT_INODE_CRTIME:
case FEAT_LOST_FOUND:
+ case FEAT_VERITY:
+ case FEAT_SB_CHECKSUM:
+ case FEAT_CASEFOLD:
return snprintf(buf, PAGE_SIZE, "supported\n");
}
return 0;
@@ -404,9 +457,15 @@
F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ra_nid_pages, ra_nid_pages);
F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, dirty_nats_ratio, dirty_nats_ratio);
F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, max_victim_search, max_victim_search);
+F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, migration_granularity, migration_granularity);
F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, dir_level, dir_level);
F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, cp_interval, interval_time[CP_TIME]);
F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, idle_interval, interval_time[REQ_TIME]);
+F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, discard_idle_interval,
+ interval_time[DISCARD_TIME]);
+F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_idle_interval, interval_time[GC_TIME]);
+F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info,
+ umount_discard_timeout, interval_time[UMOUNT_DISCARD_TIMEOUT]);
F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, iostat_enable, iostat_enable);
F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, readdir_ra, readdir_ra);
F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_pin_file_thresh, gc_pin_file_threshold);
@@ -419,8 +478,10 @@
F2FS_GENERAL_RO_ATTR(lifetime_write_kbytes);
F2FS_GENERAL_RO_ATTR(features);
F2FS_GENERAL_RO_ATTR(current_reserved_blocks);
+F2FS_GENERAL_RO_ATTR(unusable);
+F2FS_GENERAL_RO_ATTR(encoding);
-#ifdef CONFIG_F2FS_FS_ENCRYPTION
+#ifdef CONFIG_FS_ENCRYPTION
F2FS_FEATURE_RO_ATTR(encryption, FEAT_CRYPTO);
#endif
#ifdef CONFIG_BLK_DEV_ZONED
@@ -434,6 +495,11 @@
F2FS_FEATURE_RO_ATTR(quota_ino, FEAT_QUOTA_INO);
F2FS_FEATURE_RO_ATTR(inode_crtime, FEAT_INODE_CRTIME);
F2FS_FEATURE_RO_ATTR(lost_found, FEAT_LOST_FOUND);
+#ifdef CONFIG_FS_VERITY
+F2FS_FEATURE_RO_ATTR(verity, FEAT_VERITY);
+#endif
+F2FS_FEATURE_RO_ATTR(sb_checksum, FEAT_SB_CHECKSUM);
+F2FS_FEATURE_RO_ATTR(casefold, FEAT_CASEFOLD);
#define ATTR_LIST(name) (&f2fs_attr_##name.attr)
static struct attribute *f2fs_attrs[] = {
@@ -454,12 +520,16 @@
ATTR_LIST(min_hot_blocks),
ATTR_LIST(min_ssr_sections),
ATTR_LIST(max_victim_search),
+ ATTR_LIST(migration_granularity),
ATTR_LIST(dir_level),
ATTR_LIST(ram_thresh),
ATTR_LIST(ra_nid_pages),
ATTR_LIST(dirty_nats_ratio),
ATTR_LIST(cp_interval),
ATTR_LIST(idle_interval),
+ ATTR_LIST(discard_idle_interval),
+ ATTR_LIST(gc_idle_interval),
+ ATTR_LIST(umount_discard_timeout),
ATTR_LIST(iostat_enable),
ATTR_LIST(readdir_ra),
ATTR_LIST(gc_pin_file_thresh),
@@ -469,15 +539,18 @@
ATTR_LIST(inject_type),
#endif
ATTR_LIST(dirty_segments),
+ ATTR_LIST(unusable),
ATTR_LIST(lifetime_write_kbytes),
ATTR_LIST(features),
ATTR_LIST(reserved_blocks),
ATTR_LIST(current_reserved_blocks),
+ ATTR_LIST(encoding),
NULL,
};
+ATTRIBUTE_GROUPS(f2fs);
static struct attribute *f2fs_feat_attrs[] = {
-#ifdef CONFIG_F2FS_FS_ENCRYPTION
+#ifdef CONFIG_FS_ENCRYPTION
ATTR_LIST(encryption),
#endif
#ifdef CONFIG_BLK_DEV_ZONED
@@ -491,8 +564,14 @@
ATTR_LIST(quota_ino),
ATTR_LIST(inode_crtime),
ATTR_LIST(lost_found),
+#ifdef CONFIG_FS_VERITY
+ ATTR_LIST(verity),
+#endif
+ ATTR_LIST(sb_checksum),
+ ATTR_LIST(casefold),
NULL,
};
+ATTRIBUTE_GROUPS(f2fs_feat);
static const struct sysfs_ops f2fs_attr_ops = {
.show = f2fs_attr_show,
@@ -500,7 +579,7 @@
};
static struct kobj_type f2fs_sb_ktype = {
- .default_attrs = f2fs_attrs,
+ .default_groups = f2fs_groups,
.sysfs_ops = &f2fs_attr_ops,
.release = f2fs_sb_release,
};
@@ -514,7 +593,7 @@
};
static struct kobj_type f2fs_feat_ktype = {
- .default_attrs = f2fs_feat_attrs,
+ .default_groups = f2fs_feat_groups,
.sysfs_ops = &f2fs_attr_ops,
};
@@ -539,8 +618,7 @@
if ((i % 10) == 0)
seq_printf(seq, "%-10d", i);
- seq_printf(seq, "%d|%-3u", se->type,
- get_valid_blocks(sbi, i, false));
+ seq_printf(seq, "%d|%-3u", se->type, se->valid_blocks);
if ((i % 10) == 9 || i == (total_segs - 1))
seq_putc(seq, '\n');
else
@@ -566,8 +644,7 @@
struct seg_entry *se = get_seg_entry(sbi, i);
seq_printf(seq, "%-10d", i);
- seq_printf(seq, "%d|%-3u|", se->type,
- get_valid_blocks(sbi, i, false));
+ seq_printf(seq, "%d|%-3u|", se->type, se->valid_blocks);
for (j = 0; j < SIT_VBLOCK_MAP_SIZE; j++)
seq_printf(seq, " %.2x", se->cur_valid_map[j]);
seq_putc(seq, '\n');
diff --git a/fs/f2fs/trace.c b/fs/f2fs/trace.c
index a1fcd00..d0ab533 100644
--- a/fs/f2fs/trace.c
+++ b/fs/f2fs/trace.c
@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* f2fs IO tracer
*
* Copyright (c) 2014 Motorola Mobility
* Copyright (c) 2014 Jaegeuk Kim <jaegeuk@kernel.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
*/
#include <linux/fs.h>
#include <linux/f2fs_fs.h>
@@ -17,7 +14,7 @@
#include "trace.h"
static RADIX_TREE(pids, GFP_ATOMIC);
-static struct mutex pids_lock;
+static spinlock_t pids_lock;
static struct last_io_info last_io;
static inline void __print_last_io(void)
@@ -61,23 +58,29 @@
set_page_private(page, (unsigned long)pid);
+retry:
if (radix_tree_preload(GFP_NOFS))
return;
- mutex_lock(&pids_lock);
+ spin_lock(&pids_lock);
p = radix_tree_lookup(&pids, pid);
if (p == current)
goto out;
if (p)
radix_tree_delete(&pids, pid);
- f2fs_radix_tree_insert(&pids, pid, current);
+ if (radix_tree_insert(&pids, pid, current)) {
+ spin_unlock(&pids_lock);
+ radix_tree_preload_end();
+ cond_resched();
+ goto retry;
+ }
trace_printk("%3x:%3x %4x %-16s\n",
MAJOR(inode->i_sb->s_dev), MINOR(inode->i_sb->s_dev),
pid, current->comm);
out:
- mutex_unlock(&pids_lock);
+ spin_unlock(&pids_lock);
radix_tree_preload_end();
}
@@ -122,7 +125,7 @@
void f2fs_build_trace_ios(void)
{
- mutex_init(&pids_lock);
+ spin_lock_init(&pids_lock);
}
#define PIDVEC_SIZE 128
@@ -150,7 +153,7 @@
pid_t next_pid = 0;
unsigned int found;
- mutex_lock(&pids_lock);
+ spin_lock(&pids_lock);
while ((found = gang_lookup_pids(pid, next_pid, PIDVEC_SIZE))) {
unsigned idx;
@@ -158,5 +161,5 @@
for (idx = 0; idx < found; idx++)
radix_tree_delete(&pids, pid[idx]);
}
- mutex_unlock(&pids_lock);
+ spin_unlock(&pids_lock);
}
diff --git a/fs/f2fs/trace.h b/fs/f2fs/trace.h
index 67db24a..e8075fc 100644
--- a/fs/f2fs/trace.h
+++ b/fs/f2fs/trace.h
@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* f2fs IO tracer
*
* Copyright (c) 2014 Motorola Mobility
* Copyright (c) 2014 Jaegeuk Kim <jaegeuk@kernel.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
*/
#ifndef __F2FS_TRACE_H__
#define __F2FS_TRACE_H__
diff --git a/fs/f2fs/verity.c b/fs/f2fs/verity.c
new file mode 100644
index 0000000..a401ef7
--- /dev/null
+++ b/fs/f2fs/verity.c
@@ -0,0 +1,247 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * fs/f2fs/verity.c: fs-verity support for f2fs
+ *
+ * Copyright 2019 Google LLC
+ */
+
+/*
+ * Implementation of fsverity_operations for f2fs.
+ *
+ * Like ext4, f2fs stores the verity metadata (Merkle tree and
+ * fsverity_descriptor) past the end of the file, starting at the first 64K
+ * boundary beyond i_size. This approach works because (a) verity files are
+ * readonly, and (b) pages fully beyond i_size aren't visible to userspace but
+ * can be read/written internally by f2fs with only some relatively small
+ * changes to f2fs. Extended attributes cannot be used because (a) f2fs limits
+ * the total size of an inode's xattr entries to 4096 bytes, which wouldn't be
+ * enough for even a single Merkle tree block, and (b) f2fs encryption doesn't
+ * encrypt xattrs, yet the verity metadata *must* be encrypted when the file is
+ * because it contains hashes of the plaintext data.
+ *
+ * Using a 64K boundary rather than a 4K one keeps things ready for
+ * architectures with 64K pages, and it doesn't necessarily waste space on-disk
+ * since there can be a hole between i_size and the start of the Merkle tree.
+ */
+
+#include <linux/f2fs_fs.h>
+
+#include "f2fs.h"
+#include "xattr.h"
+
+static inline loff_t f2fs_verity_metadata_pos(const struct inode *inode)
+{
+ return round_up(inode->i_size, 65536);
+}
+
+/*
+ * Read some verity metadata from the inode. __vfs_read() can't be used because
+ * we need to read beyond i_size.
+ */
+static int pagecache_read(struct inode *inode, void *buf, size_t count,
+ loff_t pos)
+{
+ while (count) {
+ size_t n = min_t(size_t, count,
+ PAGE_SIZE - offset_in_page(pos));
+ struct page *page;
+ void *addr;
+
+ page = read_mapping_page(inode->i_mapping, pos >> PAGE_SHIFT,
+ NULL);
+ if (IS_ERR(page))
+ return PTR_ERR(page);
+
+ addr = kmap_atomic(page);
+ memcpy(buf, addr + offset_in_page(pos), n);
+ kunmap_atomic(addr);
+
+ put_page(page);
+
+ buf += n;
+ pos += n;
+ count -= n;
+ }
+ return 0;
+}
+
+/*
+ * Write some verity metadata to the inode for FS_IOC_ENABLE_VERITY.
+ * kernel_write() can't be used because the file descriptor is readonly.
+ */
+static int pagecache_write(struct inode *inode, const void *buf, size_t count,
+ loff_t pos)
+{
+ if (pos + count > inode->i_sb->s_maxbytes)
+ return -EFBIG;
+
+ while (count) {
+ size_t n = min_t(size_t, count,
+ PAGE_SIZE - offset_in_page(pos));
+ struct page *page;
+ void *fsdata;
+ void *addr;
+ int res;
+
+ res = pagecache_write_begin(NULL, inode->i_mapping, pos, n, 0,
+ &page, &fsdata);
+ if (res)
+ return res;
+
+ addr = kmap_atomic(page);
+ memcpy(addr + offset_in_page(pos), buf, n);
+ kunmap_atomic(addr);
+
+ res = pagecache_write_end(NULL, inode->i_mapping, pos, n, n,
+ page, fsdata);
+ if (res < 0)
+ return res;
+ if (res != n)
+ return -EIO;
+
+ buf += n;
+ pos += n;
+ count -= n;
+ }
+ return 0;
+}
+
+/*
+ * Format of f2fs verity xattr. This points to the location of the verity
+ * descriptor within the file data rather than containing it directly because
+ * the verity descriptor *must* be encrypted when f2fs encryption is used. But,
+ * f2fs encryption does not encrypt xattrs.
+ */
+struct fsverity_descriptor_location {
+ __le32 version;
+ __le32 size;
+ __le64 pos;
+};
+
+static int f2fs_begin_enable_verity(struct file *filp)
+{
+ struct inode *inode = file_inode(filp);
+ int err;
+
+ if (f2fs_verity_in_progress(inode))
+ return -EBUSY;
+
+ if (f2fs_is_atomic_file(inode) || f2fs_is_volatile_file(inode))
+ return -EOPNOTSUPP;
+
+ /*
+ * Since the file was opened readonly, we have to initialize the quotas
+ * here and not rely on ->open() doing it. This must be done before
+ * evicting the inline data.
+ */
+ err = dquot_initialize(inode);
+ if (err)
+ return err;
+
+ err = f2fs_convert_inline_inode(inode);
+ if (err)
+ return err;
+
+ set_inode_flag(inode, FI_VERITY_IN_PROGRESS);
+ return 0;
+}
+
+static int f2fs_end_enable_verity(struct file *filp, const void *desc,
+ size_t desc_size, u64 merkle_tree_size)
+{
+ struct inode *inode = file_inode(filp);
+ u64 desc_pos = f2fs_verity_metadata_pos(inode) + merkle_tree_size;
+ struct fsverity_descriptor_location dloc = {
+ .version = cpu_to_le32(1),
+ .size = cpu_to_le32(desc_size),
+ .pos = cpu_to_le64(desc_pos),
+ };
+ int err = 0;
+
+ if (desc != NULL) {
+ /* Succeeded; write the verity descriptor. */
+ err = pagecache_write(inode, desc, desc_size, desc_pos);
+
+ /* Write all pages before clearing FI_VERITY_IN_PROGRESS. */
+ if (!err)
+ err = filemap_write_and_wait(inode->i_mapping);
+ }
+
+ /* If we failed, truncate anything we wrote past i_size. */
+ if (desc == NULL || err)
+ f2fs_truncate(inode);
+
+ clear_inode_flag(inode, FI_VERITY_IN_PROGRESS);
+
+ if (desc != NULL && !err) {
+ err = f2fs_setxattr(inode, F2FS_XATTR_INDEX_VERITY,
+ F2FS_XATTR_NAME_VERITY, &dloc, sizeof(dloc),
+ NULL, XATTR_CREATE);
+ if (!err) {
+ file_set_verity(inode);
+ f2fs_set_inode_flags(inode);
+ f2fs_mark_inode_dirty_sync(inode, true);
+ }
+ }
+ return err;
+}
+
+static int f2fs_get_verity_descriptor(struct inode *inode, void *buf,
+ size_t buf_size)
+{
+ struct fsverity_descriptor_location dloc;
+ int res;
+ u32 size;
+ u64 pos;
+
+ /* Get the descriptor location */
+ res = f2fs_getxattr(inode, F2FS_XATTR_INDEX_VERITY,
+ F2FS_XATTR_NAME_VERITY, &dloc, sizeof(dloc), NULL);
+ if (res < 0 && res != -ERANGE)
+ return res;
+ if (res != sizeof(dloc) || dloc.version != cpu_to_le32(1)) {
+ f2fs_warn(F2FS_I_SB(inode), "unknown verity xattr format");
+ return -EINVAL;
+ }
+ size = le32_to_cpu(dloc.size);
+ pos = le64_to_cpu(dloc.pos);
+
+ /* Get the descriptor */
+ if (pos + size < pos || pos + size > inode->i_sb->s_maxbytes ||
+ pos < f2fs_verity_metadata_pos(inode) || size > INT_MAX) {
+ f2fs_warn(F2FS_I_SB(inode), "invalid verity xattr");
+ return -EFSCORRUPTED;
+ }
+ if (buf_size) {
+ if (size > buf_size)
+ return -ERANGE;
+ res = pagecache_read(inode, buf, size, pos);
+ if (res)
+ return res;
+ }
+ return size;
+}
+
+static struct page *f2fs_read_merkle_tree_page(struct inode *inode,
+ pgoff_t index)
+{
+ index += f2fs_verity_metadata_pos(inode) >> PAGE_SHIFT;
+
+ return read_mapping_page(inode->i_mapping, index, NULL);
+}
+
+static int f2fs_write_merkle_tree_block(struct inode *inode, const void *buf,
+ u64 index, int log_blocksize)
+{
+ loff_t pos = f2fs_verity_metadata_pos(inode) + (index << log_blocksize);
+
+ return pagecache_write(inode, buf, 1 << log_blocksize, pos);
+}
+
+const struct fsverity_operations f2fs_verityops = {
+ .begin_enable_verity = f2fs_begin_enable_verity,
+ .end_enable_verity = f2fs_end_enable_verity,
+ .get_verity_descriptor = f2fs_get_verity_descriptor,
+ .read_merkle_tree_page = f2fs_read_merkle_tree_page,
+ .write_merkle_tree_block = f2fs_write_merkle_tree_block,
+};
diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c
index 77a010e..181900a 100644
--- a/fs/f2fs/xattr.c
+++ b/fs/f2fs/xattr.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* fs/f2fs/xattr.c
*
@@ -13,10 +14,6 @@
* suggestion of Luka Renko <luka.renko@hermes.si>.
* xattr consolidation Copyright (c) 2004 James Morris <jmorris@redhat.com>,
* Red Hat Inc.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
*/
#include <linux/rwsem.h>
#include <linux/f2fs_fs.h>
@@ -24,6 +21,7 @@
#include <linux/posix_acl_xattr.h>
#include "f2fs.h"
#include "xattr.h"
+#include "segment.h"
static int f2fs_xattr_generic_get(const struct xattr_handler *handler,
struct dentry *unused, struct inode *inode,
@@ -205,12 +203,17 @@
return handler;
}
-static struct f2fs_xattr_entry *__find_xattr(void *base_addr, int index,
- size_t len, const char *name)
+static struct f2fs_xattr_entry *__find_xattr(void *base_addr,
+ void *last_base_addr, int index,
+ size_t len, const char *name)
{
struct f2fs_xattr_entry *entry;
list_for_each_xattr(entry, base_addr) {
+ if ((void *)(entry) + sizeof(__u32) > last_base_addr ||
+ (void *)XATTR_NEXT_ENTRY(entry) > last_base_addr)
+ return NULL;
+
if (entry->e_name_index != index)
continue;
if (entry->e_name_len != len)
@@ -227,11 +230,11 @@
{
struct f2fs_xattr_entry *entry;
unsigned int inline_size = inline_xattr_size(inode);
+ void *max_addr = base_addr + inline_size;
list_for_each_xattr(entry, base_addr) {
- if ((void *)entry + sizeof(__u32) > base_addr + inline_size ||
- (void *)XATTR_NEXT_ENTRY(entry) + sizeof(__u32) >
- base_addr + inline_size) {
+ if ((void *)entry + sizeof(__u32) > max_addr ||
+ (void *)XATTR_NEXT_ENTRY(entry) > max_addr) {
*last_addr = entry;
return NULL;
}
@@ -242,6 +245,13 @@
if (!memcmp(entry->e_name, name, len))
break;
}
+
+ /* inline xattr header or entry across max inline xattr size */
+ if (IS_XATTR_LAST_ENTRY(entry) &&
+ (void *)entry + sizeof(__u32) > max_addr) {
+ *last_addr = entry;
+ return NULL;
+ }
return entry;
}
@@ -291,22 +301,24 @@
static int lookup_all_xattrs(struct inode *inode, struct page *ipage,
unsigned int index, unsigned int len,
const char *name, struct f2fs_xattr_entry **xe,
- void **base_addr)
+ void **base_addr, int *base_size)
{
- void *cur_addr, *txattr_addr, *last_addr = NULL;
+ void *cur_addr, *txattr_addr, *last_txattr_addr;
+ void *last_addr = NULL;
nid_t xnid = F2FS_I(inode)->i_xattr_nid;
- unsigned int size = xnid ? VALID_XATTR_BLOCK_SIZE : 0;
unsigned int inline_size = inline_xattr_size(inode);
int err = 0;
- if (!size && !inline_size)
+ if (!xnid && !inline_size)
return -ENODATA;
- txattr_addr = f2fs_kzalloc(F2FS_I_SB(inode),
- inline_size + size + XATTR_PADDING_SIZE, GFP_NOFS);
+ *base_size = XATTR_SIZE(xnid, inode) + XATTR_PADDING_SIZE;
+ txattr_addr = f2fs_kzalloc(F2FS_I_SB(inode), *base_size, GFP_NOFS);
if (!txattr_addr)
return -ENOMEM;
+ last_txattr_addr = (void *)txattr_addr + XATTR_SIZE(xnid, inode);
+
/* read from inline xattr */
if (inline_size) {
err = read_inline_xattr(inode, ipage, txattr_addr);
@@ -315,8 +327,10 @@
*xe = __find_inline_xattr(inode, txattr_addr, &last_addr,
index, len, name);
- if (*xe)
+ if (*xe) {
+ *base_size = inline_size;
goto check;
+ }
}
/* read from xattr node block */
@@ -331,7 +345,14 @@
else
cur_addr = txattr_addr;
- *xe = __find_xattr(cur_addr, index, len, name);
+ *xe = __find_xattr(cur_addr, last_txattr_addr, index, len, name);
+ if (!*xe) {
+ f2fs_err(F2FS_I_SB(inode), "inode (%lu) has corrupted xattr",
+ inode->i_ino);
+ set_sbi_flag(F2FS_I_SB(inode), SBI_NEED_FSCK);
+ err = -EFSCORRUPTED;
+ goto out;
+ }
check:
if (IS_XATTR_LAST_ENTRY(*xe)) {
err = -ENODATA;
@@ -341,7 +362,7 @@
*base_addr = txattr_addr;
return 0;
out:
- kzfree(txattr_addr);
+ kvfree(txattr_addr);
return err;
}
@@ -384,7 +405,7 @@
*base_addr = txattr_addr;
return 0;
fail:
- kzfree(txattr_addr);
+ kvfree(txattr_addr);
return err;
}
@@ -418,7 +439,7 @@
}
f2fs_wait_on_page_writeback(ipage ? ipage : in_page,
- NODE, true);
+ NODE, true, true);
/* no need to use xattr node block */
if (hsize <= inline_size) {
err = f2fs_truncate_xattr_node(inode);
@@ -442,7 +463,7 @@
goto in_page_out;
}
f2fs_bug_on(sbi, new_nid);
- f2fs_wait_on_page_writeback(xpage, NODE, true);
+ f2fs_wait_on_page_writeback(xpage, NODE, true, true);
} else {
struct dnode_of_data dn;
set_new_dnode(&dn, inode, NULL, NULL, new_nid);
@@ -477,6 +498,7 @@
int error = 0;
unsigned int size, len;
void *base_addr = NULL;
+ int base_size;
if (name == NULL)
return -EINVAL;
@@ -487,7 +509,7 @@
down_read(&F2FS_I(inode)->i_xattr_sem);
error = lookup_all_xattrs(inode, ipage, index, len, name,
- &entry, &base_addr);
+ &entry, &base_addr, &base_size);
up_read(&F2FS_I(inode)->i_xattr_sem);
if (error)
return error;
@@ -501,11 +523,16 @@
if (buffer) {
char *pval = entry->e_name + entry->e_name_len;
+
+ if (base_size - (pval - (char *)base_addr) < size) {
+ error = -ERANGE;
+ goto out;
+ }
memcpy(buffer, pval, size);
}
error = size;
out:
- kzfree(base_addr);
+ kvfree(base_addr);
return error;
}
@@ -533,7 +560,7 @@
if (!handler || (handler->list && !handler->list(dentry)))
continue;
- prefix = handler->prefix ?: handler->name;
+ prefix = xattr_prefix(handler);
prefix_len = strlen(prefix);
size = prefix_len + entry->e_name_len + 1;
if (buffer) {
@@ -551,7 +578,7 @@
}
error = buffer_size - rest;
cleanup:
- kzfree(base_addr);
+ kvfree(base_addr);
return error;
}
@@ -569,7 +596,8 @@
struct page *ipage, int flags)
{
struct f2fs_xattr_entry *here, *last;
- void *base_addr;
+ void *base_addr, *last_base_addr;
+ nid_t xnid = F2FS_I(inode)->i_xattr_nid;
int found, newsize;
size_t len;
__u32 new_hsize;
@@ -593,8 +621,17 @@
if (error)
return error;
+ last_base_addr = (void *)base_addr + XATTR_SIZE(xnid, inode);
+
/* find entry with wanted name. */
- here = __find_xattr(base_addr, index, len, name);
+ here = __find_xattr(base_addr, last_base_addr, index, len, name);
+ if (!here) {
+ f2fs_err(F2FS_I_SB(inode), "inode (%lu) has corrupted xattr",
+ inode->i_ino);
+ set_sbi_flag(F2FS_I_SB(inode), SBI_NEED_FSCK);
+ error = -EFSCORRUPTED;
+ goto exit;
+ }
found = IS_XATTR_LAST_ENTRY(here) ? 0 : 1;
@@ -682,7 +719,7 @@
if (!error && S_ISDIR(inode->i_mode))
set_sbi_flag(F2FS_I_SB(inode), SBI_NEED_CP);
exit:
- kzfree(base_addr);
+ kvfree(base_addr);
return error;
}
@@ -693,6 +730,11 @@
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
int err;
+ if (unlikely(f2fs_cp_error(sbi)))
+ return -EIO;
+ if (!f2fs_is_checkpoint_ready(sbi))
+ return -ENOSPC;
+
err = dquot_initialize(inode);
if (err)
return err;
diff --git a/fs/f2fs/xattr.h b/fs/f2fs/xattr.h
index dbcd1d1..de0c600 100644
--- a/fs/f2fs/xattr.h
+++ b/fs/f2fs/xattr.h
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* fs/f2fs/xattr.h
*
@@ -9,10 +10,6 @@
* On-disk format of extended attributes for the ext2 filesystem.
*
* (C) 2001 Andreas Gruenbacher, <a.gruenbacher@computer.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
*/
#ifndef __F2FS_XATTR_H__
#define __F2FS_XATTR_H__
@@ -37,8 +34,10 @@
#define F2FS_XATTR_INDEX_ADVISE 7
/* Should be same as EXT4_XATTR_INDEX_ENCRYPTION */
#define F2FS_XATTR_INDEX_ENCRYPTION 9
+#define F2FS_XATTR_INDEX_VERITY 11
#define F2FS_XATTR_NAME_ENCRYPTION_CONTEXT "c"
+#define F2FS_XATTR_NAME_VERITY "v"
struct f2fs_xattr_header {
__le32 h_magic; /* magic number for identification */
@@ -74,6 +73,8 @@
entry = XATTR_NEXT_ENTRY(entry))
#define VALID_XATTR_BLOCK_SIZE (PAGE_SIZE - sizeof(struct node_footer))
#define XATTR_PADDING_SIZE (sizeof(__u32))
+#define XATTR_SIZE(x,i) (((x) ? VALID_XATTR_BLOCK_SIZE : 0) + \
+ (inline_xattr_size(i)))
#define MIN_OFFSET(i) XATTR_ALIGN(inline_xattr_size(i) + \
VALID_XATTR_BLOCK_SIZE)
@@ -81,6 +82,12 @@
sizeof(struct f2fs_xattr_header) - \
sizeof(struct f2fs_xattr_entry))
+#define MAX_INLINE_XATTR_SIZE \
+ (DEF_ADDRS_PER_INODE - \
+ F2FS_TOTAL_EXTRA_ATTR_SIZE / sizeof(__le32) - \
+ DEF_INLINE_RESERVED_SIZE - \
+ MIN_INLINE_DENTRY_SIZE / sizeof(__le32))
+
/*
* On-disk structure of f2fs_xattr
* We use inline xattrs space + 1 block for xattr.