Update Linux to v5.10.109
Sourced from [1]
[1] https://cdn.kernel.org/pub/linux/kernel/v5.x/linux-5.10.109.tar.xz
Change-Id: I19bca9fc6762d4e63bcf3e4cba88bbe560d9c76c
Signed-off-by: Olivier Deprez <olivier.deprez@arm.com>
diff --git a/fs/f2fs/Kconfig b/fs/f2fs/Kconfig
index 652fd2e..d13c5c6 100644
--- a/fs/f2fs/Kconfig
+++ b/fs/f2fs/Kconfig
@@ -6,6 +6,7 @@
select CRYPTO
select CRYPTO_CRC32
select F2FS_FS_XATTR if FS_ENCRYPTION
+ select FS_ENCRYPTION_ALGS if FS_ENCRYPTION
help
F2FS is based on Log-structured File System (LFS), which supports
versatile "flash-friendly" features. The design has been focused on
@@ -21,7 +22,7 @@
config F2FS_STAT_FS
bool "F2FS Status Information"
- depends on F2FS_FS && DEBUG_FS
+ depends on F2FS_FS
default y
help
/sys/kernel/debug/f2fs/ contains information about all the partitions
@@ -92,3 +93,47 @@
Test F2FS to inject faults such as ENOMEM, ENOSPC, and so on.
If unsure, say N.
+
+config F2FS_FS_COMPRESSION
+ bool "F2FS compression feature"
+ depends on F2FS_FS
+ help
+ Enable filesystem-level compression on f2fs regular files,
+ multiple back-end compression algorithms are supported.
+
+config F2FS_FS_LZO
+ bool "LZO compression support"
+ depends on F2FS_FS_COMPRESSION
+ select LZO_COMPRESS
+ select LZO_DECOMPRESS
+ default y
+ help
+ Support LZO compress algorithm, if unsure, say Y.
+
+config F2FS_FS_LZ4
+ bool "LZ4 compression support"
+ depends on F2FS_FS_COMPRESSION
+ select LZ4_COMPRESS
+ select LZ4_DECOMPRESS
+ default y
+ help
+ Support LZ4 compress algorithm, if unsure, say Y.
+
+config F2FS_FS_ZSTD
+ bool "ZSTD compression support"
+ depends on F2FS_FS_COMPRESSION
+ select ZSTD_COMPRESS
+ select ZSTD_DECOMPRESS
+ default y
+ help
+ Support ZSTD compress algorithm, if unsure, say Y.
+
+config F2FS_FS_LZORLE
+ bool "LZO-RLE compression support"
+ depends on F2FS_FS_COMPRESSION
+ depends on F2FS_FS_LZO
+ select LZO_COMPRESS
+ select LZO_DECOMPRESS
+ default y
+ help
+ Support LZO-RLE compress algorithm, if unsure, say Y.
diff --git a/fs/f2fs/Makefile b/fs/f2fs/Makefile
index 2aaecc6..ee7316b 100644
--- a/fs/f2fs/Makefile
+++ b/fs/f2fs/Makefile
@@ -9,3 +9,4 @@
f2fs-$(CONFIG_F2FS_FS_POSIX_ACL) += acl.o
f2fs-$(CONFIG_F2FS_IO_TRACE) += trace.o
f2fs-$(CONFIG_FS_VERITY) += verity.o
+f2fs-$(CONFIG_F2FS_FS_COMPRESSION) += compress.o
diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c
index 217b290..3064135 100644
--- a/fs/f2fs/acl.c
+++ b/fs/f2fs/acl.c
@@ -160,7 +160,7 @@
return (void *)f2fs_acl;
fail:
- kvfree(f2fs_acl);
+ kfree(f2fs_acl);
return ERR_PTR(-EINVAL);
}
@@ -190,7 +190,7 @@
acl = NULL;
else
acl = ERR_PTR(retval);
- kvfree(value);
+ kfree(value);
return acl;
}
@@ -240,7 +240,7 @@
error = f2fs_setxattr(inode, name_index, "", value, size, ipage, 0);
- kvfree(value);
+ kfree(value);
if (!error)
set_cached_acl(inode, type, acl);
diff --git a/fs/f2fs/acl.h b/fs/f2fs/acl.h
index b96823c..124868c 100644
--- a/fs/f2fs/acl.h
+++ b/fs/f2fs/acl.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* fs/f2fs/acl.h
*
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index f7d27cb..9bcd77d 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -50,9 +50,6 @@
return page;
}
-/*
- * We guarantee no failure on the returned page.
- */
static struct page *__get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index,
bool is_meta)
{
@@ -89,6 +86,8 @@
return ERR_PTR(err);
}
+ f2fs_update_iostat(sbi, FS_META_READ_IO, F2FS_BLKSIZE);
+
lock_page(page);
if (unlikely(page->mapping != mapping)) {
f2fs_put_page(page, 1);
@@ -206,7 +205,7 @@
}
/*
- * Readahead CP/NAT/SIT/SSA pages
+ * Readahead CP/NAT/SIT/SSA/POR pages
*/
int f2fs_ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages,
int type, bool sync)
@@ -223,6 +222,7 @@
.is_por = (type == META_POR),
};
struct blk_plug plug;
+ int err;
if (unlikely(type == META_POR))
fio.op_flags &= ~REQ_META;
@@ -268,8 +268,11 @@
}
fio.page = page;
- f2fs_submit_page_bio(&fio);
- f2fs_put_page(page, 0);
+ err = f2fs_submit_page_bio(&fio);
+ f2fs_put_page(page, err ? 1 : 0);
+
+ if (!err)
+ f2fs_update_iostat(sbi, FS_META_READ_IO, F2FS_BLKSIZE);
}
out:
blk_finish_plug(&plug);
@@ -522,7 +525,7 @@
__remove_ino_entry(sbi, ino, type);
}
-/* mode should be APPEND_INO or UPDATE_INO */
+/* mode should be APPEND_INO, UPDATE_INO or TRANS_DIR_INO */
bool f2fs_exist_written_data(struct f2fs_sb_info *sbi, nid_t ino, int mode)
{
struct inode_management *im = &sbi->im[mode];
@@ -900,7 +903,7 @@
return -ENOMEM;
/*
* Finding out valid cp block involves read both
- * sets( cp pack1 and cp pack 2)
+ * sets( cp pack 1 and cp pack 2)
*/
cp_start_blk_no = le32_to_cpu(fsb->cp_blkaddr);
cp1 = validate_checkpoint(sbi, cp_start_blk_no, &cp1_version);
@@ -1144,7 +1147,8 @@
if (!is_journalled_quota(sbi))
return false;
- down_write(&sbi->quota_sem);
+ if (!down_write_trylock(&sbi->quota_sem))
+ return true;
if (is_sbi_flag_set(sbi, SBI_QUOTA_SKIP_FLUSH)) {
ret = false;
} else if (is_sbi_flag_set(sbi, SBI_QUOTA_NEED_REPAIR)) {
@@ -1169,10 +1173,12 @@
.nr_to_write = LONG_MAX,
.for_reclaim = 0,
};
- struct blk_plug plug;
int err = 0, cnt = 0;
- blk_start_plug(&plug);
+ /*
+ * Let's flush inline_data in dirty node pages.
+ */
+ f2fs_flush_inline_data(sbi);
retry_flush_quotas:
f2fs_lock_all(sbi);
@@ -1201,7 +1207,7 @@
f2fs_unlock_all(sbi);
err = f2fs_sync_dirty_inodes(sbi, DIR_INODE);
if (err)
- goto out;
+ return err;
cond_resched();
goto retry_flush_quotas;
}
@@ -1217,7 +1223,7 @@
f2fs_unlock_all(sbi);
err = f2fs_sync_inode_meta(sbi);
if (err)
- goto out;
+ return err;
cond_resched();
goto retry_flush_quotas;
}
@@ -1233,7 +1239,7 @@
if (err) {
up_write(&sbi->node_change);
f2fs_unlock_all(sbi);
- goto out;
+ return err;
}
cond_resched();
goto retry_flush_nodes;
@@ -1245,8 +1251,6 @@
*/
__prepare_cp_block(sbi);
up_write(&sbi->node_change);
-out:
- blk_finish_plug(&plug);
return err;
}
@@ -1261,15 +1265,20 @@
DEFINE_WAIT(wait);
for (;;) {
- prepare_to_wait(&sbi->cp_wait, &wait, TASK_UNINTERRUPTIBLE);
-
if (!get_pages(sbi, type))
break;
if (unlikely(f2fs_cp_error(sbi)))
break;
- io_schedule_timeout(HZ/50);
+ if (type == F2FS_DIRTY_META)
+ f2fs_sync_meta_pages(sbi, META, LONG_MAX,
+ FS_CP_META_IO);
+ else if (type == F2FS_WB_CP_DATA)
+ f2fs_submit_merged_write(sbi, DATA);
+
+ prepare_to_wait(&sbi->cp_wait, &wait, TASK_UNINTERRUPTIBLE);
+ io_schedule_timeout(DEFAULT_IO_TIMEOUT);
}
finish_wait(&sbi->cp_wait, &wait);
}
@@ -1395,10 +1404,7 @@
/* Flush all the NAT/SIT pages */
f2fs_sync_meta_pages(sbi, META, LONG_MAX, FS_CP_META_IO);
- /*
- * modify checkpoint
- * version number is already updated
- */
+ /* start to update checkpoint, cp ver is already updated previously */
ckpt->elapsed_time = cpu_to_le64(get_mtime(sbi, true));
ckpt->free_segment_count = cpu_to_le32(free_segments(sbi));
for (i = 0; i < NR_CURSEG_NODE_TYPE; i++) {
@@ -1418,7 +1424,7 @@
curseg_alloc_type(sbi, i + CURSEG_HOT_DATA);
}
- /* 2 cp + n data seg summary + orphan inode blocks */
+ /* 2 cp + n data seg summary + orphan inode blocks */
data_sum_blocks = f2fs_npages_for_summary_flush(sbi, false);
spin_lock_irqsave(&sbi->cp_lock, flags);
if (data_sum_blocks < NR_CURSEG_DATA_TYPE)
@@ -1517,10 +1523,11 @@
f2fs_wait_on_all_pages(sbi, F2FS_WB_CP_DATA);
/*
- * invalidate intermediate page cache borrowed from meta inode
- * which are used for migration of encrypted inode's blocks.
+ * invalidate intermediate page cache borrowed from meta inode which are
+ * used for migration of encrypted, verity or compressed inode's blocks.
*/
- if (f2fs_sb_has_encrypt(sbi))
+ if (f2fs_sb_has_encrypt(sbi) || f2fs_sb_has_verity(sbi) ||
+ f2fs_sb_has_compression(sbi))
invalidate_mapping_pages(META_MAPPING(sbi),
MAIN_BLKADDR(sbi), MAX_BLKADDR(sbi) - 1);
@@ -1551,9 +1558,6 @@
return unlikely(f2fs_cp_error(sbi)) ? -EIO : 0;
}
-/*
- * We guarantee that this checkpoint procedure will not fail.
- */
int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
{
struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
@@ -1568,7 +1572,8 @@
return 0;
f2fs_warn(sbi, "Start checkpoint disabled!");
}
- mutex_lock(&sbi->cp_mutex);
+ if (cpc->reason != CP_RESIZE)
+ mutex_lock(&sbi->cp_mutex);
if (!is_sbi_flag_set(sbi, SBI_IS_DIRTY) &&
((cpc->reason & CP_FASTBOOT) || (cpc->reason & CP_SYNC) ||
@@ -1621,12 +1626,16 @@
f2fs_flush_sit_entries(sbi, cpc);
- /* unlock all the fs_lock[] in do_checkpoint() */
+ /* save inmem log status */
+ f2fs_save_inmem_curseg(sbi);
+
err = do_checkpoint(sbi, cpc);
if (err)
f2fs_release_discard_addrs(sbi);
else
f2fs_clear_prefree_segments(sbi, cpc);
+
+ f2fs_restore_inmem_curseg(sbi);
stop:
unblock_operations(sbi);
stat_inc_cp_count(sbi->stat_info);
@@ -1634,11 +1643,12 @@
if (cpc->reason & CP_RECOVERY)
f2fs_notice(sbi, "checkpoint: version = %llx", ckpt_ver);
- /* do checkpoint periodically */
+ /* update CP_TIME to trigger checkpoint periodically */
f2fs_update_time(sbi, CP_TIME);
trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "finish checkpoint");
out:
- mutex_unlock(&sbi->cp_mutex);
+ if (cpc->reason != CP_RESIZE)
+ mutex_unlock(&sbi->cp_mutex);
return err;
}
@@ -1656,7 +1666,7 @@
}
sbi->max_orphans = (sbi->blocks_per_seg - F2FS_CP_PACKS -
- NR_CURSEG_TYPE - __cp_payload(sbi)) *
+ NR_CURSEG_PERSIST_TYPE - __cp_payload(sbi)) *
F2FS_ORPHANS_PER_BLOCK;
}
diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c
new file mode 100644
index 0000000..ec542e8
--- /dev/null
+++ b/fs/f2fs/compress.c
@@ -0,0 +1,1640 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * f2fs compress support
+ *
+ * Copyright (c) 2019 Chao Yu <chao@kernel.org>
+ */
+
+#include <linux/fs.h>
+#include <linux/f2fs_fs.h>
+#include <linux/writeback.h>
+#include <linux/backing-dev.h>
+#include <linux/lzo.h>
+#include <linux/lz4.h>
+#include <linux/zstd.h>
+
+#include "f2fs.h"
+#include "node.h"
+#include <trace/events/f2fs.h>
+
+static struct kmem_cache *cic_entry_slab;
+static struct kmem_cache *dic_entry_slab;
+
+static void *page_array_alloc(struct inode *inode, int nr)
+{
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+ unsigned int size = sizeof(struct page *) * nr;
+
+ if (likely(size <= sbi->page_array_slab_size))
+ return kmem_cache_zalloc(sbi->page_array_slab, GFP_NOFS);
+ return f2fs_kzalloc(sbi, size, GFP_NOFS);
+}
+
+static void page_array_free(struct inode *inode, void *pages, int nr)
+{
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+ unsigned int size = sizeof(struct page *) * nr;
+
+ if (!pages)
+ return;
+
+ if (likely(size <= sbi->page_array_slab_size))
+ kmem_cache_free(sbi->page_array_slab, pages);
+ else
+ kfree(pages);
+}
+
+struct f2fs_compress_ops {
+ int (*init_compress_ctx)(struct compress_ctx *cc);
+ void (*destroy_compress_ctx)(struct compress_ctx *cc);
+ int (*compress_pages)(struct compress_ctx *cc);
+ int (*init_decompress_ctx)(struct decompress_io_ctx *dic);
+ void (*destroy_decompress_ctx)(struct decompress_io_ctx *dic);
+ int (*decompress_pages)(struct decompress_io_ctx *dic);
+};
+
+static unsigned int offset_in_cluster(struct compress_ctx *cc, pgoff_t index)
+{
+ return index & (cc->cluster_size - 1);
+}
+
+static pgoff_t cluster_idx(struct compress_ctx *cc, pgoff_t index)
+{
+ return index >> cc->log_cluster_size;
+}
+
+static pgoff_t start_idx_of_cluster(struct compress_ctx *cc)
+{
+ return cc->cluster_idx << cc->log_cluster_size;
+}
+
+bool f2fs_is_compressed_page(struct page *page)
+{
+ if (!PagePrivate(page))
+ return false;
+ if (!page_private(page))
+ return false;
+ if (IS_ATOMIC_WRITTEN_PAGE(page) || IS_DUMMY_WRITTEN_PAGE(page))
+ return false;
+ /*
+ * page->private may be set with pid.
+ * pid_max is enough to check if it is traced.
+ */
+ if (IS_IO_TRACED_PAGE(page))
+ return false;
+
+ f2fs_bug_on(F2FS_M_SB(page->mapping),
+ *((u32 *)page_private(page)) != F2FS_COMPRESSED_PAGE_MAGIC);
+ return true;
+}
+
+static void f2fs_set_compressed_page(struct page *page,
+ struct inode *inode, pgoff_t index, void *data)
+{
+ SetPagePrivate(page);
+ set_page_private(page, (unsigned long)data);
+
+ /* i_crypto_info and iv index */
+ page->index = index;
+ page->mapping = inode->i_mapping;
+}
+
+static void f2fs_drop_rpages(struct compress_ctx *cc, int len, bool unlock)
+{
+ int i;
+
+ for (i = 0; i < len; i++) {
+ if (!cc->rpages[i])
+ continue;
+ if (unlock)
+ unlock_page(cc->rpages[i]);
+ else
+ put_page(cc->rpages[i]);
+ }
+}
+
+static void f2fs_put_rpages(struct compress_ctx *cc)
+{
+ f2fs_drop_rpages(cc, cc->cluster_size, false);
+}
+
+static void f2fs_unlock_rpages(struct compress_ctx *cc, int len)
+{
+ f2fs_drop_rpages(cc, len, true);
+}
+
+static void f2fs_put_rpages_wbc(struct compress_ctx *cc,
+ struct writeback_control *wbc, bool redirty, int unlock)
+{
+ unsigned int i;
+
+ for (i = 0; i < cc->cluster_size; i++) {
+ if (!cc->rpages[i])
+ continue;
+ if (redirty)
+ redirty_page_for_writepage(wbc, cc->rpages[i]);
+ f2fs_put_page(cc->rpages[i], unlock);
+ }
+}
+
+struct page *f2fs_compress_control_page(struct page *page)
+{
+ return ((struct compress_io_ctx *)page_private(page))->rpages[0];
+}
+
+int f2fs_init_compress_ctx(struct compress_ctx *cc)
+{
+ if (cc->rpages)
+ return 0;
+
+ cc->rpages = page_array_alloc(cc->inode, cc->cluster_size);
+ return cc->rpages ? 0 : -ENOMEM;
+}
+
+void f2fs_destroy_compress_ctx(struct compress_ctx *cc, bool reuse)
+{
+ page_array_free(cc->inode, cc->rpages, cc->cluster_size);
+ cc->rpages = NULL;
+ cc->nr_rpages = 0;
+ cc->nr_cpages = 0;
+ if (!reuse)
+ cc->cluster_idx = NULL_CLUSTER;
+}
+
+void f2fs_compress_ctx_add_page(struct compress_ctx *cc, struct page *page)
+{
+ unsigned int cluster_ofs;
+
+ if (!f2fs_cluster_can_merge_page(cc, page->index))
+ f2fs_bug_on(F2FS_I_SB(cc->inode), 1);
+
+ cluster_ofs = offset_in_cluster(cc, page->index);
+ cc->rpages[cluster_ofs] = page;
+ cc->nr_rpages++;
+ cc->cluster_idx = cluster_idx(cc, page->index);
+}
+
+#ifdef CONFIG_F2FS_FS_LZO
+static int lzo_init_compress_ctx(struct compress_ctx *cc)
+{
+ cc->private = f2fs_kvmalloc(F2FS_I_SB(cc->inode),
+ LZO1X_MEM_COMPRESS, GFP_NOFS);
+ if (!cc->private)
+ return -ENOMEM;
+
+ cc->clen = lzo1x_worst_compress(PAGE_SIZE << cc->log_cluster_size);
+ return 0;
+}
+
+static void lzo_destroy_compress_ctx(struct compress_ctx *cc)
+{
+ kvfree(cc->private);
+ cc->private = NULL;
+}
+
+static int lzo_compress_pages(struct compress_ctx *cc)
+{
+ int ret;
+
+ ret = lzo1x_1_compress(cc->rbuf, cc->rlen, cc->cbuf->cdata,
+ &cc->clen, cc->private);
+ if (ret != LZO_E_OK) {
+ printk_ratelimited("%sF2FS-fs (%s): lzo compress failed, ret:%d\n",
+ KERN_ERR, F2FS_I_SB(cc->inode)->sb->s_id, ret);
+ return -EIO;
+ }
+ return 0;
+}
+
+static int lzo_decompress_pages(struct decompress_io_ctx *dic)
+{
+ int ret;
+
+ ret = lzo1x_decompress_safe(dic->cbuf->cdata, dic->clen,
+ dic->rbuf, &dic->rlen);
+ if (ret != LZO_E_OK) {
+ printk_ratelimited("%sF2FS-fs (%s): lzo decompress failed, ret:%d\n",
+ KERN_ERR, F2FS_I_SB(dic->inode)->sb->s_id, ret);
+ return -EIO;
+ }
+
+ if (dic->rlen != PAGE_SIZE << dic->log_cluster_size) {
+ printk_ratelimited("%sF2FS-fs (%s): lzo invalid rlen:%zu, "
+ "expected:%lu\n", KERN_ERR,
+ F2FS_I_SB(dic->inode)->sb->s_id,
+ dic->rlen,
+ PAGE_SIZE << dic->log_cluster_size);
+ return -EIO;
+ }
+ return 0;
+}
+
+static const struct f2fs_compress_ops f2fs_lzo_ops = {
+ .init_compress_ctx = lzo_init_compress_ctx,
+ .destroy_compress_ctx = lzo_destroy_compress_ctx,
+ .compress_pages = lzo_compress_pages,
+ .decompress_pages = lzo_decompress_pages,
+};
+#endif
+
+#ifdef CONFIG_F2FS_FS_LZ4
+static int lz4_init_compress_ctx(struct compress_ctx *cc)
+{
+ cc->private = f2fs_kvmalloc(F2FS_I_SB(cc->inode),
+ LZ4_MEM_COMPRESS, GFP_NOFS);
+ if (!cc->private)
+ return -ENOMEM;
+
+ /*
+ * we do not change cc->clen to LZ4_compressBound(inputsize) to
+ * adapt worst compress case, because lz4 compressor can handle
+ * output budget properly.
+ */
+ cc->clen = cc->rlen - PAGE_SIZE - COMPRESS_HEADER_SIZE;
+ return 0;
+}
+
+static void lz4_destroy_compress_ctx(struct compress_ctx *cc)
+{
+ kvfree(cc->private);
+ cc->private = NULL;
+}
+
+static int lz4_compress_pages(struct compress_ctx *cc)
+{
+ int len;
+
+ len = LZ4_compress_default(cc->rbuf, cc->cbuf->cdata, cc->rlen,
+ cc->clen, cc->private);
+ if (!len)
+ return -EAGAIN;
+
+ cc->clen = len;
+ return 0;
+}
+
+static int lz4_decompress_pages(struct decompress_io_ctx *dic)
+{
+ int ret;
+
+ ret = LZ4_decompress_safe(dic->cbuf->cdata, dic->rbuf,
+ dic->clen, dic->rlen);
+ if (ret < 0) {
+ printk_ratelimited("%sF2FS-fs (%s): lz4 decompress failed, ret:%d\n",
+ KERN_ERR, F2FS_I_SB(dic->inode)->sb->s_id, ret);
+ return -EIO;
+ }
+
+ if (ret != PAGE_SIZE << dic->log_cluster_size) {
+ printk_ratelimited("%sF2FS-fs (%s): lz4 invalid rlen:%zu, "
+ "expected:%lu\n", KERN_ERR,
+ F2FS_I_SB(dic->inode)->sb->s_id,
+ dic->rlen,
+ PAGE_SIZE << dic->log_cluster_size);
+ return -EIO;
+ }
+ return 0;
+}
+
+static const struct f2fs_compress_ops f2fs_lz4_ops = {
+ .init_compress_ctx = lz4_init_compress_ctx,
+ .destroy_compress_ctx = lz4_destroy_compress_ctx,
+ .compress_pages = lz4_compress_pages,
+ .decompress_pages = lz4_decompress_pages,
+};
+#endif
+
+#ifdef CONFIG_F2FS_FS_ZSTD
+#define F2FS_ZSTD_DEFAULT_CLEVEL 1
+
+static int zstd_init_compress_ctx(struct compress_ctx *cc)
+{
+ ZSTD_parameters params;
+ ZSTD_CStream *stream;
+ void *workspace;
+ unsigned int workspace_size;
+
+ params = ZSTD_getParams(F2FS_ZSTD_DEFAULT_CLEVEL, cc->rlen, 0);
+ workspace_size = ZSTD_CStreamWorkspaceBound(params.cParams);
+
+ workspace = f2fs_kvmalloc(F2FS_I_SB(cc->inode),
+ workspace_size, GFP_NOFS);
+ if (!workspace)
+ return -ENOMEM;
+
+ stream = ZSTD_initCStream(params, 0, workspace, workspace_size);
+ if (!stream) {
+ printk_ratelimited("%sF2FS-fs (%s): %s ZSTD_initCStream failed\n",
+ KERN_ERR, F2FS_I_SB(cc->inode)->sb->s_id,
+ __func__);
+ kvfree(workspace);
+ return -EIO;
+ }
+
+ cc->private = workspace;
+ cc->private2 = stream;
+
+ cc->clen = cc->rlen - PAGE_SIZE - COMPRESS_HEADER_SIZE;
+ return 0;
+}
+
+static void zstd_destroy_compress_ctx(struct compress_ctx *cc)
+{
+ kvfree(cc->private);
+ cc->private = NULL;
+ cc->private2 = NULL;
+}
+
+static int zstd_compress_pages(struct compress_ctx *cc)
+{
+ ZSTD_CStream *stream = cc->private2;
+ ZSTD_inBuffer inbuf;
+ ZSTD_outBuffer outbuf;
+ int src_size = cc->rlen;
+ int dst_size = src_size - PAGE_SIZE - COMPRESS_HEADER_SIZE;
+ int ret;
+
+ inbuf.pos = 0;
+ inbuf.src = cc->rbuf;
+ inbuf.size = src_size;
+
+ outbuf.pos = 0;
+ outbuf.dst = cc->cbuf->cdata;
+ outbuf.size = dst_size;
+
+ ret = ZSTD_compressStream(stream, &outbuf, &inbuf);
+ if (ZSTD_isError(ret)) {
+ printk_ratelimited("%sF2FS-fs (%s): %s ZSTD_compressStream failed, ret: %d\n",
+ KERN_ERR, F2FS_I_SB(cc->inode)->sb->s_id,
+ __func__, ZSTD_getErrorCode(ret));
+ return -EIO;
+ }
+
+ ret = ZSTD_endStream(stream, &outbuf);
+ if (ZSTD_isError(ret)) {
+ printk_ratelimited("%sF2FS-fs (%s): %s ZSTD_endStream returned %d\n",
+ KERN_ERR, F2FS_I_SB(cc->inode)->sb->s_id,
+ __func__, ZSTD_getErrorCode(ret));
+ return -EIO;
+ }
+
+ /*
+ * there is compressed data remained in intermediate buffer due to
+ * no more space in cbuf.cdata
+ */
+ if (ret)
+ return -EAGAIN;
+
+ cc->clen = outbuf.pos;
+ return 0;
+}
+
+static int zstd_init_decompress_ctx(struct decompress_io_ctx *dic)
+{
+ ZSTD_DStream *stream;
+ void *workspace;
+ unsigned int workspace_size;
+ unsigned int max_window_size =
+ MAX_COMPRESS_WINDOW_SIZE(dic->log_cluster_size);
+
+ workspace_size = ZSTD_DStreamWorkspaceBound(max_window_size);
+
+ workspace = f2fs_kvmalloc(F2FS_I_SB(dic->inode),
+ workspace_size, GFP_NOFS);
+ if (!workspace)
+ return -ENOMEM;
+
+ stream = ZSTD_initDStream(max_window_size, workspace, workspace_size);
+ if (!stream) {
+ printk_ratelimited("%sF2FS-fs (%s): %s ZSTD_initDStream failed\n",
+ KERN_ERR, F2FS_I_SB(dic->inode)->sb->s_id,
+ __func__);
+ kvfree(workspace);
+ return -EIO;
+ }
+
+ dic->private = workspace;
+ dic->private2 = stream;
+
+ return 0;
+}
+
+static void zstd_destroy_decompress_ctx(struct decompress_io_ctx *dic)
+{
+ kvfree(dic->private);
+ dic->private = NULL;
+ dic->private2 = NULL;
+}
+
+static int zstd_decompress_pages(struct decompress_io_ctx *dic)
+{
+ ZSTD_DStream *stream = dic->private2;
+ ZSTD_inBuffer inbuf;
+ ZSTD_outBuffer outbuf;
+ int ret;
+
+ inbuf.pos = 0;
+ inbuf.src = dic->cbuf->cdata;
+ inbuf.size = dic->clen;
+
+ outbuf.pos = 0;
+ outbuf.dst = dic->rbuf;
+ outbuf.size = dic->rlen;
+
+ ret = ZSTD_decompressStream(stream, &outbuf, &inbuf);
+ if (ZSTD_isError(ret)) {
+ printk_ratelimited("%sF2FS-fs (%s): %s ZSTD_compressStream failed, ret: %d\n",
+ KERN_ERR, F2FS_I_SB(dic->inode)->sb->s_id,
+ __func__, ZSTD_getErrorCode(ret));
+ return -EIO;
+ }
+
+ if (dic->rlen != outbuf.pos) {
+ printk_ratelimited("%sF2FS-fs (%s): %s ZSTD invalid rlen:%zu, "
+ "expected:%lu\n", KERN_ERR,
+ F2FS_I_SB(dic->inode)->sb->s_id,
+ __func__, dic->rlen,
+ PAGE_SIZE << dic->log_cluster_size);
+ return -EIO;
+ }
+
+ return 0;
+}
+
+static const struct f2fs_compress_ops f2fs_zstd_ops = {
+ .init_compress_ctx = zstd_init_compress_ctx,
+ .destroy_compress_ctx = zstd_destroy_compress_ctx,
+ .compress_pages = zstd_compress_pages,
+ .init_decompress_ctx = zstd_init_decompress_ctx,
+ .destroy_decompress_ctx = zstd_destroy_decompress_ctx,
+ .decompress_pages = zstd_decompress_pages,
+};
+#endif
+
+#ifdef CONFIG_F2FS_FS_LZO
+#ifdef CONFIG_F2FS_FS_LZORLE
+static int lzorle_compress_pages(struct compress_ctx *cc)
+{
+ int ret;
+
+ ret = lzorle1x_1_compress(cc->rbuf, cc->rlen, cc->cbuf->cdata,
+ &cc->clen, cc->private);
+ if (ret != LZO_E_OK) {
+ printk_ratelimited("%sF2FS-fs (%s): lzo-rle compress failed, ret:%d\n",
+ KERN_ERR, F2FS_I_SB(cc->inode)->sb->s_id, ret);
+ return -EIO;
+ }
+ return 0;
+}
+
+static const struct f2fs_compress_ops f2fs_lzorle_ops = {
+ .init_compress_ctx = lzo_init_compress_ctx,
+ .destroy_compress_ctx = lzo_destroy_compress_ctx,
+ .compress_pages = lzorle_compress_pages,
+ .decompress_pages = lzo_decompress_pages,
+};
+#endif
+#endif
+
+static const struct f2fs_compress_ops *f2fs_cops[COMPRESS_MAX] = {
+#ifdef CONFIG_F2FS_FS_LZO
+ &f2fs_lzo_ops,
+#else
+ NULL,
+#endif
+#ifdef CONFIG_F2FS_FS_LZ4
+ &f2fs_lz4_ops,
+#else
+ NULL,
+#endif
+#ifdef CONFIG_F2FS_FS_ZSTD
+ &f2fs_zstd_ops,
+#else
+ NULL,
+#endif
+#if defined(CONFIG_F2FS_FS_LZO) && defined(CONFIG_F2FS_FS_LZORLE)
+ &f2fs_lzorle_ops,
+#else
+ NULL,
+#endif
+};
+
+bool f2fs_is_compress_backend_ready(struct inode *inode)
+{
+ if (!f2fs_compressed_file(inode))
+ return true;
+ return f2fs_cops[F2FS_I(inode)->i_compress_algorithm];
+}
+
+static mempool_t *compress_page_pool;
+static int num_compress_pages = 512;
+module_param(num_compress_pages, uint, 0444);
+MODULE_PARM_DESC(num_compress_pages,
+ "Number of intermediate compress pages to preallocate");
+
+int f2fs_init_compress_mempool(void)
+{
+ compress_page_pool = mempool_create_page_pool(num_compress_pages, 0);
+ if (!compress_page_pool)
+ return -ENOMEM;
+
+ return 0;
+}
+
+void f2fs_destroy_compress_mempool(void)
+{
+ mempool_destroy(compress_page_pool);
+}
+
+static struct page *f2fs_compress_alloc_page(void)
+{
+ struct page *page;
+
+ page = mempool_alloc(compress_page_pool, GFP_NOFS);
+ lock_page(page);
+
+ return page;
+}
+
+static void f2fs_compress_free_page(struct page *page)
+{
+ if (!page)
+ return;
+ set_page_private(page, (unsigned long)NULL);
+ ClearPagePrivate(page);
+ page->mapping = NULL;
+ unlock_page(page);
+ mempool_free(page, compress_page_pool);
+}
+
+#define MAX_VMAP_RETRIES 3
+
+static void *f2fs_vmap(struct page **pages, unsigned int count)
+{
+ int i;
+ void *buf = NULL;
+
+ for (i = 0; i < MAX_VMAP_RETRIES; i++) {
+ buf = vm_map_ram(pages, count, -1);
+ if (buf)
+ break;
+ vm_unmap_aliases();
+ }
+ return buf;
+}
+
+static int f2fs_compress_pages(struct compress_ctx *cc)
+{
+ struct f2fs_inode_info *fi = F2FS_I(cc->inode);
+ const struct f2fs_compress_ops *cops =
+ f2fs_cops[fi->i_compress_algorithm];
+ unsigned int max_len, new_nr_cpages;
+ struct page **new_cpages;
+ int i, ret;
+
+ trace_f2fs_compress_pages_start(cc->inode, cc->cluster_idx,
+ cc->cluster_size, fi->i_compress_algorithm);
+
+ if (cops->init_compress_ctx) {
+ ret = cops->init_compress_ctx(cc);
+ if (ret)
+ goto out;
+ }
+
+ max_len = COMPRESS_HEADER_SIZE + cc->clen;
+ cc->nr_cpages = DIV_ROUND_UP(max_len, PAGE_SIZE);
+
+ cc->cpages = page_array_alloc(cc->inode, cc->nr_cpages);
+ if (!cc->cpages) {
+ ret = -ENOMEM;
+ goto destroy_compress_ctx;
+ }
+
+ for (i = 0; i < cc->nr_cpages; i++) {
+ cc->cpages[i] = f2fs_compress_alloc_page();
+ if (!cc->cpages[i]) {
+ ret = -ENOMEM;
+ goto out_free_cpages;
+ }
+ }
+
+ cc->rbuf = f2fs_vmap(cc->rpages, cc->cluster_size);
+ if (!cc->rbuf) {
+ ret = -ENOMEM;
+ goto out_free_cpages;
+ }
+
+ cc->cbuf = f2fs_vmap(cc->cpages, cc->nr_cpages);
+ if (!cc->cbuf) {
+ ret = -ENOMEM;
+ goto out_vunmap_rbuf;
+ }
+
+ ret = cops->compress_pages(cc);
+ if (ret)
+ goto out_vunmap_cbuf;
+
+ max_len = PAGE_SIZE * (cc->cluster_size - 1) - COMPRESS_HEADER_SIZE;
+
+ if (cc->clen > max_len) {
+ ret = -EAGAIN;
+ goto out_vunmap_cbuf;
+ }
+
+ cc->cbuf->clen = cpu_to_le32(cc->clen);
+
+ for (i = 0; i < COMPRESS_DATA_RESERVED_SIZE; i++)
+ cc->cbuf->reserved[i] = cpu_to_le32(0);
+
+ new_nr_cpages = DIV_ROUND_UP(cc->clen + COMPRESS_HEADER_SIZE, PAGE_SIZE);
+
+ /* Now we're going to cut unnecessary tail pages */
+ new_cpages = page_array_alloc(cc->inode, new_nr_cpages);
+ if (!new_cpages) {
+ ret = -ENOMEM;
+ goto out_vunmap_cbuf;
+ }
+
+ /* zero out any unused part of the last page */
+ memset(&cc->cbuf->cdata[cc->clen], 0,
+ (new_nr_cpages * PAGE_SIZE) -
+ (cc->clen + COMPRESS_HEADER_SIZE));
+
+ vm_unmap_ram(cc->cbuf, cc->nr_cpages);
+ vm_unmap_ram(cc->rbuf, cc->cluster_size);
+
+ for (i = 0; i < cc->nr_cpages; i++) {
+ if (i < new_nr_cpages) {
+ new_cpages[i] = cc->cpages[i];
+ continue;
+ }
+ f2fs_compress_free_page(cc->cpages[i]);
+ cc->cpages[i] = NULL;
+ }
+
+ if (cops->destroy_compress_ctx)
+ cops->destroy_compress_ctx(cc);
+
+ page_array_free(cc->inode, cc->cpages, cc->nr_cpages);
+ cc->cpages = new_cpages;
+ cc->nr_cpages = new_nr_cpages;
+
+ trace_f2fs_compress_pages_end(cc->inode, cc->cluster_idx,
+ cc->clen, ret);
+ return 0;
+
+out_vunmap_cbuf:
+ vm_unmap_ram(cc->cbuf, cc->nr_cpages);
+out_vunmap_rbuf:
+ vm_unmap_ram(cc->rbuf, cc->cluster_size);
+out_free_cpages:
+ for (i = 0; i < cc->nr_cpages; i++) {
+ if (cc->cpages[i])
+ f2fs_compress_free_page(cc->cpages[i]);
+ }
+ page_array_free(cc->inode, cc->cpages, cc->nr_cpages);
+ cc->cpages = NULL;
+destroy_compress_ctx:
+ if (cops->destroy_compress_ctx)
+ cops->destroy_compress_ctx(cc);
+out:
+ trace_f2fs_compress_pages_end(cc->inode, cc->cluster_idx,
+ cc->clen, ret);
+ return ret;
+}
+
+void f2fs_decompress_pages(struct bio *bio, struct page *page, bool verity)
+{
+ struct decompress_io_ctx *dic =
+ (struct decompress_io_ctx *)page_private(page);
+ struct f2fs_sb_info *sbi = F2FS_I_SB(dic->inode);
+ struct f2fs_inode_info *fi= F2FS_I(dic->inode);
+ const struct f2fs_compress_ops *cops =
+ f2fs_cops[fi->i_compress_algorithm];
+ int ret;
+ int i;
+
+ dec_page_count(sbi, F2FS_RD_DATA);
+
+ if (bio->bi_status || PageError(page))
+ dic->failed = true;
+
+ if (atomic_dec_return(&dic->pending_pages))
+ return;
+
+ trace_f2fs_decompress_pages_start(dic->inode, dic->cluster_idx,
+ dic->cluster_size, fi->i_compress_algorithm);
+
+ /* submit partial compressed pages */
+ if (dic->failed) {
+ ret = -EIO;
+ goto out_free_dic;
+ }
+
+ dic->tpages = page_array_alloc(dic->inode, dic->cluster_size);
+ if (!dic->tpages) {
+ ret = -ENOMEM;
+ goto out_free_dic;
+ }
+
+ for (i = 0; i < dic->cluster_size; i++) {
+ if (dic->rpages[i]) {
+ dic->tpages[i] = dic->rpages[i];
+ continue;
+ }
+
+ dic->tpages[i] = f2fs_compress_alloc_page();
+ if (!dic->tpages[i]) {
+ ret = -ENOMEM;
+ goto out_free_dic;
+ }
+ }
+
+ if (cops->init_decompress_ctx) {
+ ret = cops->init_decompress_ctx(dic);
+ if (ret)
+ goto out_free_dic;
+ }
+
+ dic->rbuf = f2fs_vmap(dic->tpages, dic->cluster_size);
+ if (!dic->rbuf) {
+ ret = -ENOMEM;
+ goto destroy_decompress_ctx;
+ }
+
+ dic->cbuf = f2fs_vmap(dic->cpages, dic->nr_cpages);
+ if (!dic->cbuf) {
+ ret = -ENOMEM;
+ goto out_vunmap_rbuf;
+ }
+
+ dic->clen = le32_to_cpu(dic->cbuf->clen);
+ dic->rlen = PAGE_SIZE << dic->log_cluster_size;
+
+ if (dic->clen > PAGE_SIZE * dic->nr_cpages - COMPRESS_HEADER_SIZE) {
+ ret = -EFSCORRUPTED;
+ goto out_vunmap_cbuf;
+ }
+
+ ret = cops->decompress_pages(dic);
+
+out_vunmap_cbuf:
+ vm_unmap_ram(dic->cbuf, dic->nr_cpages);
+out_vunmap_rbuf:
+ vm_unmap_ram(dic->rbuf, dic->cluster_size);
+destroy_decompress_ctx:
+ if (cops->destroy_decompress_ctx)
+ cops->destroy_decompress_ctx(dic);
+out_free_dic:
+ if (!verity)
+ f2fs_decompress_end_io(dic->rpages, dic->cluster_size,
+ ret, false);
+
+ trace_f2fs_decompress_pages_end(dic->inode, dic->cluster_idx,
+ dic->clen, ret);
+ if (!verity)
+ f2fs_free_dic(dic);
+}
+
+static bool is_page_in_cluster(struct compress_ctx *cc, pgoff_t index)
+{
+ if (cc->cluster_idx == NULL_CLUSTER)
+ return true;
+ return cc->cluster_idx == cluster_idx(cc, index);
+}
+
+bool f2fs_cluster_is_empty(struct compress_ctx *cc)
+{
+ return cc->nr_rpages == 0;
+}
+
+static bool f2fs_cluster_is_full(struct compress_ctx *cc)
+{
+ return cc->cluster_size == cc->nr_rpages;
+}
+
+bool f2fs_cluster_can_merge_page(struct compress_ctx *cc, pgoff_t index)
+{
+ if (f2fs_cluster_is_empty(cc))
+ return true;
+ return is_page_in_cluster(cc, index);
+}
+
+static bool __cluster_may_compress(struct compress_ctx *cc)
+{
+ struct f2fs_sb_info *sbi = F2FS_I_SB(cc->inode);
+ loff_t i_size = i_size_read(cc->inode);
+ unsigned nr_pages = DIV_ROUND_UP(i_size, PAGE_SIZE);
+ int i;
+
+ for (i = 0; i < cc->cluster_size; i++) {
+ struct page *page = cc->rpages[i];
+
+ f2fs_bug_on(sbi, !page);
+
+ if (unlikely(f2fs_cp_error(sbi)))
+ return false;
+ if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
+ return false;
+
+ /* beyond EOF */
+ if (page->index >= nr_pages)
+ return false;
+ }
+ return true;
+}
+
+static int __f2fs_cluster_blocks(struct compress_ctx *cc, bool compr)
+{
+ struct dnode_of_data dn;
+ int ret;
+
+ set_new_dnode(&dn, cc->inode, NULL, NULL, 0);
+ ret = f2fs_get_dnode_of_data(&dn, start_idx_of_cluster(cc),
+ LOOKUP_NODE);
+ if (ret) {
+ if (ret == -ENOENT)
+ ret = 0;
+ goto fail;
+ }
+
+ if (dn.data_blkaddr == COMPRESS_ADDR) {
+ int i;
+
+ ret = 1;
+ for (i = 1; i < cc->cluster_size; i++) {
+ block_t blkaddr;
+
+ blkaddr = data_blkaddr(dn.inode,
+ dn.node_page, dn.ofs_in_node + i);
+ if (compr) {
+ if (__is_valid_data_blkaddr(blkaddr))
+ ret++;
+ } else {
+ if (blkaddr != NULL_ADDR)
+ ret++;
+ }
+ }
+ }
+fail:
+ f2fs_put_dnode(&dn);
+ return ret;
+}
+
+/* return # of compressed blocks in compressed cluster */
+static int f2fs_compressed_blocks(struct compress_ctx *cc)
+{
+ return __f2fs_cluster_blocks(cc, true);
+}
+
+/* return # of valid blocks in compressed cluster */
+static int f2fs_cluster_blocks(struct compress_ctx *cc)
+{
+ return __f2fs_cluster_blocks(cc, false);
+}
+
+int f2fs_is_compressed_cluster(struct inode *inode, pgoff_t index)
+{
+ struct compress_ctx cc = {
+ .inode = inode,
+ .log_cluster_size = F2FS_I(inode)->i_log_cluster_size,
+ .cluster_size = F2FS_I(inode)->i_cluster_size,
+ .cluster_idx = index >> F2FS_I(inode)->i_log_cluster_size,
+ };
+
+ return f2fs_cluster_blocks(&cc);
+}
+
+static bool cluster_may_compress(struct compress_ctx *cc)
+{
+ if (!f2fs_compressed_file(cc->inode))
+ return false;
+ if (f2fs_is_atomic_file(cc->inode))
+ return false;
+ if (f2fs_is_mmap_file(cc->inode))
+ return false;
+ if (!f2fs_cluster_is_full(cc))
+ return false;
+ if (unlikely(f2fs_cp_error(F2FS_I_SB(cc->inode))))
+ return false;
+ return __cluster_may_compress(cc);
+}
+
+static void set_cluster_writeback(struct compress_ctx *cc)
+{
+ int i;
+
+ for (i = 0; i < cc->cluster_size; i++) {
+ if (cc->rpages[i])
+ set_page_writeback(cc->rpages[i]);
+ }
+}
+
+static void set_cluster_dirty(struct compress_ctx *cc)
+{
+ int i;
+
+ for (i = 0; i < cc->cluster_size; i++)
+ if (cc->rpages[i])
+ set_page_dirty(cc->rpages[i]);
+}
+
+static int prepare_compress_overwrite(struct compress_ctx *cc,
+ struct page **pagep, pgoff_t index, void **fsdata)
+{
+ struct f2fs_sb_info *sbi = F2FS_I_SB(cc->inode);
+ struct address_space *mapping = cc->inode->i_mapping;
+ struct page *page;
+ struct dnode_of_data dn;
+ sector_t last_block_in_bio;
+ unsigned fgp_flag = FGP_LOCK | FGP_WRITE | FGP_CREAT;
+ pgoff_t start_idx = start_idx_of_cluster(cc);
+ int i, ret;
+ bool prealloc;
+
+retry:
+ ret = f2fs_cluster_blocks(cc);
+ if (ret <= 0)
+ return ret;
+
+ /* compressed case */
+ prealloc = (ret < cc->cluster_size);
+
+ ret = f2fs_init_compress_ctx(cc);
+ if (ret)
+ return ret;
+
+ /* keep page reference to avoid page reclaim */
+ for (i = 0; i < cc->cluster_size; i++) {
+ page = f2fs_pagecache_get_page(mapping, start_idx + i,
+ fgp_flag, GFP_NOFS);
+ if (!page) {
+ ret = -ENOMEM;
+ goto unlock_pages;
+ }
+
+ if (PageUptodate(page))
+ f2fs_put_page(page, 1);
+ else
+ f2fs_compress_ctx_add_page(cc, page);
+ }
+
+ if (!f2fs_cluster_is_empty(cc)) {
+ struct bio *bio = NULL;
+
+ ret = f2fs_read_multi_pages(cc, &bio, cc->cluster_size,
+ &last_block_in_bio, false, true);
+ f2fs_put_rpages(cc);
+ f2fs_destroy_compress_ctx(cc, true);
+ if (ret)
+ goto out;
+ if (bio)
+ f2fs_submit_bio(sbi, bio, DATA);
+
+ ret = f2fs_init_compress_ctx(cc);
+ if (ret)
+ goto out;
+ }
+
+ for (i = 0; i < cc->cluster_size; i++) {
+ f2fs_bug_on(sbi, cc->rpages[i]);
+
+ page = find_lock_page(mapping, start_idx + i);
+ if (!page) {
+ /* page can be truncated */
+ goto release_and_retry;
+ }
+
+ f2fs_wait_on_page_writeback(page, DATA, true, true);
+ f2fs_compress_ctx_add_page(cc, page);
+
+ if (!PageUptodate(page)) {
+release_and_retry:
+ f2fs_put_rpages(cc);
+ f2fs_unlock_rpages(cc, i + 1);
+ f2fs_destroy_compress_ctx(cc, true);
+ goto retry;
+ }
+ }
+
+ if (prealloc) {
+ f2fs_do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, true);
+
+ set_new_dnode(&dn, cc->inode, NULL, NULL, 0);
+
+ for (i = cc->cluster_size - 1; i > 0; i--) {
+ ret = f2fs_get_block(&dn, start_idx + i);
+ if (ret) {
+ i = cc->cluster_size;
+ break;
+ }
+
+ if (dn.data_blkaddr != NEW_ADDR)
+ break;
+ }
+
+ f2fs_do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, false);
+ }
+
+ if (likely(!ret)) {
+ *fsdata = cc->rpages;
+ *pagep = cc->rpages[offset_in_cluster(cc, index)];
+ return cc->cluster_size;
+ }
+
+unlock_pages:
+ f2fs_put_rpages(cc);
+ f2fs_unlock_rpages(cc, i);
+ f2fs_destroy_compress_ctx(cc, true);
+out:
+ return ret;
+}
+
+int f2fs_prepare_compress_overwrite(struct inode *inode,
+ struct page **pagep, pgoff_t index, void **fsdata)
+{
+ struct compress_ctx cc = {
+ .inode = inode,
+ .log_cluster_size = F2FS_I(inode)->i_log_cluster_size,
+ .cluster_size = F2FS_I(inode)->i_cluster_size,
+ .cluster_idx = index >> F2FS_I(inode)->i_log_cluster_size,
+ .rpages = NULL,
+ .nr_rpages = 0,
+ };
+
+ return prepare_compress_overwrite(&cc, pagep, index, fsdata);
+}
+
+bool f2fs_compress_write_end(struct inode *inode, void *fsdata,
+ pgoff_t index, unsigned copied)
+
+{
+ struct compress_ctx cc = {
+ .inode = inode,
+ .log_cluster_size = F2FS_I(inode)->i_log_cluster_size,
+ .cluster_size = F2FS_I(inode)->i_cluster_size,
+ .rpages = fsdata,
+ };
+ bool first_index = (index == cc.rpages[0]->index);
+
+ if (copied)
+ set_cluster_dirty(&cc);
+
+ f2fs_put_rpages_wbc(&cc, NULL, false, 1);
+ f2fs_destroy_compress_ctx(&cc, false);
+
+ return first_index;
+}
+
+int f2fs_truncate_partial_cluster(struct inode *inode, u64 from, bool lock)
+{
+ void *fsdata = NULL;
+ struct page *pagep;
+ int log_cluster_size = F2FS_I(inode)->i_log_cluster_size;
+ pgoff_t start_idx = from >> (PAGE_SHIFT + log_cluster_size) <<
+ log_cluster_size;
+ int err;
+
+ err = f2fs_is_compressed_cluster(inode, start_idx);
+ if (err < 0)
+ return err;
+
+ /* truncate normal cluster */
+ if (!err)
+ return f2fs_do_truncate_blocks(inode, from, lock);
+
+ /* truncate compressed cluster */
+ err = f2fs_prepare_compress_overwrite(inode, &pagep,
+ start_idx, &fsdata);
+
+ /* should not be a normal cluster */
+ f2fs_bug_on(F2FS_I_SB(inode), err == 0);
+
+ if (err <= 0)
+ return err;
+
+ if (err > 0) {
+ struct page **rpages = fsdata;
+ int cluster_size = F2FS_I(inode)->i_cluster_size;
+ int i;
+
+ for (i = cluster_size - 1; i >= 0; i--) {
+ loff_t start = rpages[i]->index << PAGE_SHIFT;
+
+ if (from <= start) {
+ zero_user_segment(rpages[i], 0, PAGE_SIZE);
+ } else {
+ zero_user_segment(rpages[i], from - start,
+ PAGE_SIZE);
+ break;
+ }
+ }
+
+ f2fs_compress_write_end(inode, fsdata, start_idx, true);
+ }
+ return 0;
+}
+
+static int f2fs_write_compressed_pages(struct compress_ctx *cc,
+ int *submitted,
+ struct writeback_control *wbc,
+ enum iostat_type io_type)
+{
+ struct inode *inode = cc->inode;
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+ struct f2fs_inode_info *fi = F2FS_I(inode);
+ struct f2fs_io_info fio = {
+ .sbi = sbi,
+ .ino = cc->inode->i_ino,
+ .type = DATA,
+ .op = REQ_OP_WRITE,
+ .op_flags = wbc_to_write_flags(wbc),
+ .old_blkaddr = NEW_ADDR,
+ .page = NULL,
+ .encrypted_page = NULL,
+ .compressed_page = NULL,
+ .submitted = false,
+ .io_type = io_type,
+ .io_wbc = wbc,
+ .encrypted = fscrypt_inode_uses_fs_layer_crypto(cc->inode),
+ };
+ struct dnode_of_data dn;
+ struct node_info ni;
+ struct compress_io_ctx *cic;
+ pgoff_t start_idx = start_idx_of_cluster(cc);
+ unsigned int last_index = cc->cluster_size - 1;
+ loff_t psize;
+ int i, err;
+
+ if (IS_NOQUOTA(inode)) {
+ /*
+ * We need to wait for node_write to avoid block allocation during
+ * checkpoint. This can only happen to quota writes which can cause
+ * the below discard race condition.
+ */
+ down_read(&sbi->node_write);
+ } else if (!f2fs_trylock_op(sbi)) {
+ goto out_free;
+ }
+
+ set_new_dnode(&dn, cc->inode, NULL, NULL, 0);
+
+ err = f2fs_get_dnode_of_data(&dn, start_idx, LOOKUP_NODE);
+ if (err)
+ goto out_unlock_op;
+
+ for (i = 0; i < cc->cluster_size; i++) {
+ if (data_blkaddr(dn.inode, dn.node_page,
+ dn.ofs_in_node + i) == NULL_ADDR)
+ goto out_put_dnode;
+ }
+
+ psize = (loff_t)(cc->rpages[last_index]->index + 1) << PAGE_SHIFT;
+
+ err = f2fs_get_node_info(fio.sbi, dn.nid, &ni);
+ if (err)
+ goto out_put_dnode;
+
+ fio.version = ni.version;
+
+ cic = kmem_cache_zalloc(cic_entry_slab, GFP_NOFS);
+ if (!cic)
+ goto out_put_dnode;
+
+ cic->magic = F2FS_COMPRESSED_PAGE_MAGIC;
+ cic->inode = inode;
+ atomic_set(&cic->pending_pages, cc->nr_cpages);
+ cic->rpages = page_array_alloc(cc->inode, cc->cluster_size);
+ if (!cic->rpages)
+ goto out_put_cic;
+
+ cic->nr_rpages = cc->cluster_size;
+
+ for (i = 0; i < cc->nr_cpages; i++) {
+ f2fs_set_compressed_page(cc->cpages[i], inode,
+ cc->rpages[i + 1]->index, cic);
+ fio.compressed_page = cc->cpages[i];
+
+ fio.old_blkaddr = data_blkaddr(dn.inode, dn.node_page,
+ dn.ofs_in_node + i + 1);
+
+ /* wait for GCed page writeback via META_MAPPING */
+ f2fs_wait_on_block_writeback(inode, fio.old_blkaddr);
+
+ if (fio.encrypted) {
+ fio.page = cc->rpages[i + 1];
+ err = f2fs_encrypt_one_page(&fio);
+ if (err)
+ goto out_destroy_crypt;
+ cc->cpages[i] = fio.encrypted_page;
+ }
+ }
+
+ set_cluster_writeback(cc);
+
+ for (i = 0; i < cc->cluster_size; i++)
+ cic->rpages[i] = cc->rpages[i];
+
+ for (i = 0; i < cc->cluster_size; i++, dn.ofs_in_node++) {
+ block_t blkaddr;
+
+ blkaddr = f2fs_data_blkaddr(&dn);
+ fio.page = cc->rpages[i];
+ fio.old_blkaddr = blkaddr;
+
+ /* cluster header */
+ if (i == 0) {
+ if (blkaddr == COMPRESS_ADDR)
+ fio.compr_blocks++;
+ if (__is_valid_data_blkaddr(blkaddr))
+ f2fs_invalidate_blocks(sbi, blkaddr);
+ f2fs_update_data_blkaddr(&dn, COMPRESS_ADDR);
+ goto unlock_continue;
+ }
+
+ if (fio.compr_blocks && __is_valid_data_blkaddr(blkaddr))
+ fio.compr_blocks++;
+
+ if (i > cc->nr_cpages) {
+ if (__is_valid_data_blkaddr(blkaddr)) {
+ f2fs_invalidate_blocks(sbi, blkaddr);
+ f2fs_update_data_blkaddr(&dn, NEW_ADDR);
+ }
+ goto unlock_continue;
+ }
+
+ f2fs_bug_on(fio.sbi, blkaddr == NULL_ADDR);
+
+ if (fio.encrypted)
+ fio.encrypted_page = cc->cpages[i - 1];
+ else
+ fio.compressed_page = cc->cpages[i - 1];
+
+ cc->cpages[i - 1] = NULL;
+ f2fs_outplace_write_data(&dn, &fio);
+ (*submitted)++;
+unlock_continue:
+ inode_dec_dirty_pages(cc->inode);
+ unlock_page(fio.page);
+ }
+
+ if (fio.compr_blocks)
+ f2fs_i_compr_blocks_update(inode, fio.compr_blocks - 1, false);
+ f2fs_i_compr_blocks_update(inode, cc->nr_cpages, true);
+
+ set_inode_flag(cc->inode, FI_APPEND_WRITE);
+ if (cc->cluster_idx == 0)
+ set_inode_flag(inode, FI_FIRST_BLOCK_WRITTEN);
+
+ f2fs_put_dnode(&dn);
+ if (IS_NOQUOTA(inode))
+ up_read(&sbi->node_write);
+ else
+ f2fs_unlock_op(sbi);
+
+ spin_lock(&fi->i_size_lock);
+ if (fi->last_disk_size < psize)
+ fi->last_disk_size = psize;
+ spin_unlock(&fi->i_size_lock);
+
+ f2fs_put_rpages(cc);
+ page_array_free(cc->inode, cc->cpages, cc->nr_cpages);
+ cc->cpages = NULL;
+ f2fs_destroy_compress_ctx(cc, false);
+ return 0;
+
+out_destroy_crypt:
+ page_array_free(cc->inode, cic->rpages, cc->cluster_size);
+
+ for (--i; i >= 0; i--)
+ fscrypt_finalize_bounce_page(&cc->cpages[i]);
+out_put_cic:
+ kmem_cache_free(cic_entry_slab, cic);
+out_put_dnode:
+ f2fs_put_dnode(&dn);
+out_unlock_op:
+ if (IS_NOQUOTA(inode))
+ up_read(&sbi->node_write);
+ else
+ f2fs_unlock_op(sbi);
+out_free:
+ for (i = 0; i < cc->nr_cpages; i++) {
+ if (!cc->cpages[i])
+ continue;
+ f2fs_compress_free_page(cc->cpages[i]);
+ cc->cpages[i] = NULL;
+ }
+ page_array_free(cc->inode, cc->cpages, cc->nr_cpages);
+ cc->cpages = NULL;
+ return -EAGAIN;
+}
+
+void f2fs_compress_write_end_io(struct bio *bio, struct page *page)
+{
+ struct f2fs_sb_info *sbi = bio->bi_private;
+ struct compress_io_ctx *cic =
+ (struct compress_io_ctx *)page_private(page);
+ int i;
+
+ if (unlikely(bio->bi_status))
+ mapping_set_error(cic->inode->i_mapping, -EIO);
+
+ f2fs_compress_free_page(page);
+
+ dec_page_count(sbi, F2FS_WB_DATA);
+
+ if (atomic_dec_return(&cic->pending_pages))
+ return;
+
+ for (i = 0; i < cic->nr_rpages; i++) {
+ WARN_ON(!cic->rpages[i]);
+ clear_cold_data(cic->rpages[i]);
+ end_page_writeback(cic->rpages[i]);
+ }
+
+ page_array_free(cic->inode, cic->rpages, cic->nr_rpages);
+ kmem_cache_free(cic_entry_slab, cic);
+}
+
+static int f2fs_write_raw_pages(struct compress_ctx *cc,
+ int *submitted,
+ struct writeback_control *wbc,
+ enum iostat_type io_type)
+{
+ struct address_space *mapping = cc->inode->i_mapping;
+ int _submitted, compr_blocks, ret, i;
+
+ compr_blocks = f2fs_compressed_blocks(cc);
+
+ for (i = 0; i < cc->cluster_size; i++) {
+ if (!cc->rpages[i])
+ continue;
+
+ redirty_page_for_writepage(wbc, cc->rpages[i]);
+ unlock_page(cc->rpages[i]);
+ }
+
+ if (compr_blocks < 0)
+ return compr_blocks;
+
+ for (i = 0; i < cc->cluster_size; i++) {
+ if (!cc->rpages[i])
+ continue;
+retry_write:
+ lock_page(cc->rpages[i]);
+
+ if (cc->rpages[i]->mapping != mapping) {
+continue_unlock:
+ unlock_page(cc->rpages[i]);
+ continue;
+ }
+
+ if (!PageDirty(cc->rpages[i]))
+ goto continue_unlock;
+
+ if (!clear_page_dirty_for_io(cc->rpages[i]))
+ goto continue_unlock;
+
+ ret = f2fs_write_single_data_page(cc->rpages[i], &_submitted,
+ NULL, NULL, wbc, io_type,
+ compr_blocks, false);
+ if (ret) {
+ if (ret == AOP_WRITEPAGE_ACTIVATE) {
+ unlock_page(cc->rpages[i]);
+ ret = 0;
+ } else if (ret == -EAGAIN) {
+ /*
+ * for quota file, just redirty left pages to
+ * avoid deadlock caused by cluster update race
+ * from foreground operation.
+ */
+ if (IS_NOQUOTA(cc->inode))
+ return 0;
+ ret = 0;
+ cond_resched();
+ congestion_wait(BLK_RW_ASYNC,
+ DEFAULT_IO_TIMEOUT);
+ goto retry_write;
+ }
+ return ret;
+ }
+
+ *submitted += _submitted;
+ }
+
+ f2fs_balance_fs(F2FS_M_SB(mapping), true);
+
+ return 0;
+}
+
+int f2fs_write_multi_pages(struct compress_ctx *cc,
+ int *submitted,
+ struct writeback_control *wbc,
+ enum iostat_type io_type)
+{
+ int err;
+
+ *submitted = 0;
+ if (cluster_may_compress(cc)) {
+ err = f2fs_compress_pages(cc);
+ if (err == -EAGAIN) {
+ goto write;
+ } else if (err) {
+ f2fs_put_rpages_wbc(cc, wbc, true, 1);
+ goto destroy_out;
+ }
+
+ err = f2fs_write_compressed_pages(cc, submitted,
+ wbc, io_type);
+ if (!err)
+ return 0;
+ f2fs_bug_on(F2FS_I_SB(cc->inode), err != -EAGAIN);
+ }
+write:
+ f2fs_bug_on(F2FS_I_SB(cc->inode), *submitted);
+
+ err = f2fs_write_raw_pages(cc, submitted, wbc, io_type);
+ f2fs_put_rpages_wbc(cc, wbc, false, 0);
+destroy_out:
+ f2fs_destroy_compress_ctx(cc, false);
+ return err;
+}
+
+struct decompress_io_ctx *f2fs_alloc_dic(struct compress_ctx *cc)
+{
+ struct decompress_io_ctx *dic;
+ pgoff_t start_idx = start_idx_of_cluster(cc);
+ int i;
+
+ dic = kmem_cache_zalloc(dic_entry_slab, GFP_NOFS);
+ if (!dic)
+ return ERR_PTR(-ENOMEM);
+
+ dic->rpages = page_array_alloc(cc->inode, cc->cluster_size);
+ if (!dic->rpages) {
+ kmem_cache_free(dic_entry_slab, dic);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ dic->magic = F2FS_COMPRESSED_PAGE_MAGIC;
+ dic->inode = cc->inode;
+ atomic_set(&dic->pending_pages, cc->nr_cpages);
+ dic->cluster_idx = cc->cluster_idx;
+ dic->cluster_size = cc->cluster_size;
+ dic->log_cluster_size = cc->log_cluster_size;
+ dic->nr_cpages = cc->nr_cpages;
+ dic->failed = false;
+
+ for (i = 0; i < dic->cluster_size; i++)
+ dic->rpages[i] = cc->rpages[i];
+ dic->nr_rpages = cc->cluster_size;
+
+ dic->cpages = page_array_alloc(dic->inode, dic->nr_cpages);
+ if (!dic->cpages)
+ goto out_free;
+
+ for (i = 0; i < dic->nr_cpages; i++) {
+ struct page *page;
+
+ page = f2fs_compress_alloc_page();
+ if (!page)
+ goto out_free;
+
+ f2fs_set_compressed_page(page, cc->inode,
+ start_idx + i + 1, dic);
+ dic->cpages[i] = page;
+ }
+
+ return dic;
+
+out_free:
+ f2fs_free_dic(dic);
+ return ERR_PTR(-ENOMEM);
+}
+
+void f2fs_free_dic(struct decompress_io_ctx *dic)
+{
+ int i;
+
+ if (dic->tpages) {
+ for (i = 0; i < dic->cluster_size; i++) {
+ if (dic->rpages[i])
+ continue;
+ if (!dic->tpages[i])
+ continue;
+ f2fs_compress_free_page(dic->tpages[i]);
+ }
+ page_array_free(dic->inode, dic->tpages, dic->cluster_size);
+ }
+
+ if (dic->cpages) {
+ for (i = 0; i < dic->nr_cpages; i++) {
+ if (!dic->cpages[i])
+ continue;
+ f2fs_compress_free_page(dic->cpages[i]);
+ }
+ page_array_free(dic->inode, dic->cpages, dic->nr_cpages);
+ }
+
+ page_array_free(dic->inode, dic->rpages, dic->nr_rpages);
+ kmem_cache_free(dic_entry_slab, dic);
+}
+
+void f2fs_decompress_end_io(struct page **rpages,
+ unsigned int cluster_size, bool err, bool verity)
+{
+ int i;
+
+ for (i = 0; i < cluster_size; i++) {
+ struct page *rpage = rpages[i];
+
+ if (!rpage)
+ continue;
+
+ if (err || PageError(rpage))
+ goto clear_uptodate;
+
+ if (!verity || fsverity_verify_page(rpage)) {
+ SetPageUptodate(rpage);
+ goto unlock;
+ }
+clear_uptodate:
+ ClearPageUptodate(rpage);
+ ClearPageError(rpage);
+unlock:
+ unlock_page(rpage);
+ }
+}
+
+int f2fs_init_page_array_cache(struct f2fs_sb_info *sbi)
+{
+ dev_t dev = sbi->sb->s_bdev->bd_dev;
+ char slab_name[32];
+
+ sprintf(slab_name, "f2fs_page_array_entry-%u:%u", MAJOR(dev), MINOR(dev));
+
+ sbi->page_array_slab_size = sizeof(struct page *) <<
+ F2FS_OPTION(sbi).compress_log_size;
+
+ sbi->page_array_slab = f2fs_kmem_cache_create(slab_name,
+ sbi->page_array_slab_size);
+ if (!sbi->page_array_slab)
+ return -ENOMEM;
+ return 0;
+}
+
+void f2fs_destroy_page_array_cache(struct f2fs_sb_info *sbi)
+{
+ kmem_cache_destroy(sbi->page_array_slab);
+}
+
+static int __init f2fs_init_cic_cache(void)
+{
+ cic_entry_slab = f2fs_kmem_cache_create("f2fs_cic_entry",
+ sizeof(struct compress_io_ctx));
+ if (!cic_entry_slab)
+ return -ENOMEM;
+ return 0;
+}
+
+static void f2fs_destroy_cic_cache(void)
+{
+ kmem_cache_destroy(cic_entry_slab);
+}
+
+static int __init f2fs_init_dic_cache(void)
+{
+ dic_entry_slab = f2fs_kmem_cache_create("f2fs_dic_entry",
+ sizeof(struct decompress_io_ctx));
+ if (!dic_entry_slab)
+ return -ENOMEM;
+ return 0;
+}
+
+static void f2fs_destroy_dic_cache(void)
+{
+ kmem_cache_destroy(dic_entry_slab);
+}
+
+int __init f2fs_init_compress_cache(void)
+{
+ int err;
+
+ err = f2fs_init_cic_cache();
+ if (err)
+ goto out;
+ err = f2fs_init_dic_cache();
+ if (err)
+ goto free_cic;
+ return 0;
+free_cic:
+ f2fs_destroy_cic_cache();
+out:
+ return -ENOMEM;
+}
+
+void f2fs_destroy_compress_cache(void)
+{
+ f2fs_destroy_dic_cache();
+ f2fs_destroy_cic_cache();
+}
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 1679f9c..1b11a42 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -14,11 +14,13 @@
#include <linux/pagevec.h>
#include <linux/blkdev.h>
#include <linux/bio.h>
+#include <linux/blk-crypto.h>
#include <linux/swap.h>
#include <linux/prefetch.h>
#include <linux/uio.h>
#include <linux/cleancache.h>
#include <linux/sched/signal.h>
+#include <linux/fiemap.h>
#include "f2fs.h"
#include "node.h"
@@ -29,7 +31,45 @@
#define NUM_PREALLOC_POST_READ_CTXS 128
static struct kmem_cache *bio_post_read_ctx_cache;
+static struct kmem_cache *bio_entry_slab;
static mempool_t *bio_post_read_ctx_pool;
+static struct bio_set f2fs_bioset;
+
+#define F2FS_BIO_POOL_SIZE NR_CURSEG_TYPE
+
+int __init f2fs_init_bioset(void)
+{
+ if (bioset_init(&f2fs_bioset, F2FS_BIO_POOL_SIZE,
+ 0, BIOSET_NEED_BVECS))
+ return -ENOMEM;
+ return 0;
+}
+
+void f2fs_destroy_bioset(void)
+{
+ bioset_exit(&f2fs_bioset);
+}
+
+static inline struct bio *__f2fs_bio_alloc(gfp_t gfp_mask,
+ unsigned int nr_iovecs)
+{
+ return bio_alloc_bioset(gfp_mask, nr_iovecs, &f2fs_bioset);
+}
+
+struct bio *f2fs_bio_alloc(struct f2fs_sb_info *sbi, int npages, bool noio)
+{
+ if (noio) {
+ /* No failure on bio allocation */
+ return __f2fs_bio_alloc(GFP_NOIO, npages);
+ }
+
+ if (time_to_inject(sbi, FAULT_ALLOC_BIO)) {
+ f2fs_show_injection_info(sbi, FAULT_ALLOC_BIO);
+ return NULL;
+ }
+
+ return __f2fs_bio_alloc(GFP_KERNEL, npages);
+}
static bool __is_cp_guaranteed(struct page *page)
{
@@ -40,11 +80,14 @@
if (!mapping)
return false;
+ if (f2fs_is_compressed_page(page))
+ return false;
+
inode = mapping->host;
sbi = F2FS_I_SB(inode);
if (inode->i_ino == F2FS_META_INO(sbi) ||
- inode->i_ino == F2FS_NODE_INO(sbi) ||
+ inode->i_ino == F2FS_NODE_INO(sbi) ||
S_ISDIR(inode->i_mode) ||
(S_ISREG(inode->i_mode) &&
(f2fs_is_atomic_file(inode) || IS_NOQUOTA(inode))) ||
@@ -72,19 +115,20 @@
/* postprocessing steps for read bios */
enum bio_post_read_step {
- STEP_INITIAL = 0,
STEP_DECRYPT,
+ STEP_DECOMPRESS_NOWQ, /* handle normal cluster data inplace */
+ STEP_DECOMPRESS, /* handle compressed cluster data in workqueue */
STEP_VERITY,
};
struct bio_post_read_ctx {
struct bio *bio;
+ struct f2fs_sb_info *sbi;
struct work_struct work;
- unsigned int cur_step;
unsigned int enabled_steps;
};
-static void __read_end_io(struct bio *bio)
+static void __read_end_io(struct bio *bio, bool compr, bool verity)
{
struct page *page;
struct bio_vec *bv;
@@ -93,6 +137,15 @@
bio_for_each_segment_all(bv, bio, iter_all) {
page = bv->bv_page;
+#ifdef CONFIG_F2FS_FS_COMPRESSION
+ if (compr && f2fs_is_compressed_page(page)) {
+ f2fs_decompress_pages(bio, page, verity);
+ continue;
+ }
+ if (verity)
+ continue;
+#endif
+
/* PG_error was set if any post_read step failed */
if (bio->bi_status || PageError(page)) {
ClearPageUptodate(page);
@@ -104,31 +157,133 @@
dec_page_count(F2FS_P_SB(page), __read_io_type(page));
unlock_page(page);
}
- if (bio->bi_private)
- mempool_free(bio->bi_private, bio_post_read_ctx_pool);
- bio_put(bio);
+}
+
+static void f2fs_release_read_bio(struct bio *bio);
+static void __f2fs_read_end_io(struct bio *bio, bool compr, bool verity)
+{
+ if (!compr)
+ __read_end_io(bio, false, verity);
+ f2fs_release_read_bio(bio);
+}
+
+static void f2fs_decompress_bio(struct bio *bio, bool verity)
+{
+ __read_end_io(bio, true, verity);
}
static void bio_post_read_processing(struct bio_post_read_ctx *ctx);
-static void decrypt_work(struct work_struct *work)
+static void f2fs_decrypt_work(struct bio_post_read_ctx *ctx)
{
- struct bio_post_read_ctx *ctx =
- container_of(work, struct bio_post_read_ctx, work);
-
fscrypt_decrypt_bio(ctx->bio);
-
- bio_post_read_processing(ctx);
}
-static void verity_work(struct work_struct *work)
+static void f2fs_decompress_work(struct bio_post_read_ctx *ctx)
+{
+ f2fs_decompress_bio(ctx->bio, ctx->enabled_steps & (1 << STEP_VERITY));
+}
+
+#ifdef CONFIG_F2FS_FS_COMPRESSION
+static void f2fs_verify_pages(struct page **rpages, unsigned int cluster_size)
+{
+ f2fs_decompress_end_io(rpages, cluster_size, false, true);
+}
+
+static void f2fs_verify_bio(struct bio *bio)
+{
+ struct bio_vec *bv;
+ struct bvec_iter_all iter_all;
+
+ bio_for_each_segment_all(bv, bio, iter_all) {
+ struct page *page = bv->bv_page;
+ struct decompress_io_ctx *dic;
+
+ dic = (struct decompress_io_ctx *)page_private(page);
+
+ if (dic) {
+ if (atomic_dec_return(&dic->verity_pages))
+ continue;
+ f2fs_verify_pages(dic->rpages,
+ dic->cluster_size);
+ f2fs_free_dic(dic);
+ continue;
+ }
+
+ if (bio->bi_status || PageError(page))
+ goto clear_uptodate;
+
+ if (fsverity_verify_page(page)) {
+ SetPageUptodate(page);
+ goto unlock;
+ }
+clear_uptodate:
+ ClearPageUptodate(page);
+ ClearPageError(page);
+unlock:
+ dec_page_count(F2FS_P_SB(page), __read_io_type(page));
+ unlock_page(page);
+ }
+}
+#endif
+
+static void f2fs_verity_work(struct work_struct *work)
+{
+ struct bio_post_read_ctx *ctx =
+ container_of(work, struct bio_post_read_ctx, work);
+ struct bio *bio = ctx->bio;
+#ifdef CONFIG_F2FS_FS_COMPRESSION
+ unsigned int enabled_steps = ctx->enabled_steps;
+#endif
+
+ /*
+ * fsverity_verify_bio() may call readpages() again, and while verity
+ * will be disabled for this, decryption may still be needed, resulting
+ * in another bio_post_read_ctx being allocated. So to prevent
+ * deadlocks we need to release the current ctx to the mempool first.
+ * This assumes that verity is the last post-read step.
+ */
+ mempool_free(ctx, bio_post_read_ctx_pool);
+ bio->bi_private = NULL;
+
+#ifdef CONFIG_F2FS_FS_COMPRESSION
+ /* previous step is decompression */
+ if (enabled_steps & (1 << STEP_DECOMPRESS)) {
+ f2fs_verify_bio(bio);
+ f2fs_release_read_bio(bio);
+ return;
+ }
+#endif
+
+ fsverity_verify_bio(bio);
+ __f2fs_read_end_io(bio, false, false);
+}
+
+static void f2fs_post_read_work(struct work_struct *work)
{
struct bio_post_read_ctx *ctx =
container_of(work, struct bio_post_read_ctx, work);
- fsverity_verify_bio(ctx->bio);
+ if (ctx->enabled_steps & (1 << STEP_DECRYPT))
+ f2fs_decrypt_work(ctx);
- bio_post_read_processing(ctx);
+ if (ctx->enabled_steps & (1 << STEP_DECOMPRESS))
+ f2fs_decompress_work(ctx);
+
+ if (ctx->enabled_steps & (1 << STEP_VERITY)) {
+ INIT_WORK(&ctx->work, f2fs_verity_work);
+ fsverity_enqueue_verify_work(&ctx->work);
+ return;
+ }
+
+ __f2fs_read_end_io(ctx->bio,
+ ctx->enabled_steps & (1 << STEP_DECOMPRESS), false);
+}
+
+static void f2fs_enqueue_post_read_work(struct f2fs_sb_info *sbi,
+ struct work_struct *work)
+{
+ queue_work(sbi->post_read_wq, work);
}
static void bio_post_read_processing(struct bio_post_read_ctx *ctx)
@@ -138,31 +293,26 @@
* verity may require reading metadata pages that need decryption, and
* we shouldn't recurse to the same workqueue.
*/
- switch (++ctx->cur_step) {
- case STEP_DECRYPT:
- if (ctx->enabled_steps & (1 << STEP_DECRYPT)) {
- INIT_WORK(&ctx->work, decrypt_work);
- fscrypt_enqueue_decrypt_work(&ctx->work);
- return;
- }
- ctx->cur_step++;
- /* fall-through */
- case STEP_VERITY:
- if (ctx->enabled_steps & (1 << STEP_VERITY)) {
- INIT_WORK(&ctx->work, verity_work);
- fsverity_enqueue_verify_work(&ctx->work);
- return;
- }
- ctx->cur_step++;
- /* fall-through */
- default:
- __read_end_io(ctx->bio);
+
+ if (ctx->enabled_steps & (1 << STEP_DECRYPT) ||
+ ctx->enabled_steps & (1 << STEP_DECOMPRESS)) {
+ INIT_WORK(&ctx->work, f2fs_post_read_work);
+ f2fs_enqueue_post_read_work(ctx->sbi, &ctx->work);
+ return;
}
+
+ if (ctx->enabled_steps & (1 << STEP_VERITY)) {
+ INIT_WORK(&ctx->work, f2fs_verity_work);
+ fsverity_enqueue_verify_work(&ctx->work);
+ return;
+ }
+
+ __f2fs_read_end_io(ctx->bio, false, false);
}
static bool f2fs_bio_post_read_required(struct bio *bio)
{
- return bio->bi_private && !bio->bi_status;
+ return bio->bi_private;
}
static void f2fs_read_end_io(struct bio *bio)
@@ -177,12 +327,11 @@
if (f2fs_bio_post_read_required(bio)) {
struct bio_post_read_ctx *ctx = bio->bi_private;
- ctx->cur_step = STEP_INITIAL;
bio_post_read_processing(ctx);
return;
}
- __read_end_io(bio);
+ __f2fs_read_end_io(bio, false, false);
}
static void f2fs_write_end_io(struct bio *bio)
@@ -213,6 +362,13 @@
fscrypt_finalize_bounce_page(&page);
+#ifdef CONFIG_F2FS_FS_COMPRESSION
+ if (f2fs_is_compressed_page(page)) {
+ f2fs_compress_write_end_io(bio, page);
+ continue;
+ }
+#endif
+
if (unlikely(bio->bi_status)) {
mapping_set_error(page->mapping, -EIO);
if (type == F2FS_WB_CP_DATA)
@@ -235,9 +391,6 @@
bio_put(bio);
}
-/*
- * Return true, if pre_bio's bdev is same as its target device.
- */
struct block_device *f2fs_target_device(struct f2fs_sb_info *sbi,
block_t blk_addr, struct bio *bio)
{
@@ -274,6 +427,9 @@
return 0;
}
+/*
+ * Return true, if pre_bio's bdev is same as its target device.
+ */
static bool __same_bdev(struct f2fs_sb_info *sbi,
block_t blk_addr, struct bio *bio)
{
@@ -281,9 +437,6 @@
return bio->bi_disk == b->bd_disk && bio->bi_partno == b->bd_partno;
}
-/*
- * Low-level block read/write IO operations.
- */
static struct bio *__bio_alloc(struct f2fs_io_info *fio, int npages)
{
struct f2fs_sb_info *sbi = fio->sbi;
@@ -307,6 +460,33 @@
return bio;
}
+static void f2fs_set_bio_crypt_ctx(struct bio *bio, const struct inode *inode,
+ pgoff_t first_idx,
+ const struct f2fs_io_info *fio,
+ gfp_t gfp_mask)
+{
+ /*
+ * The f2fs garbage collector sets ->encrypted_page when it wants to
+ * read/write raw data without encryption.
+ */
+ if (!fio || !fio->encrypted_page)
+ fscrypt_set_bio_crypt_ctx(bio, inode, first_idx, gfp_mask);
+}
+
+static bool f2fs_crypt_mergeable_bio(struct bio *bio, const struct inode *inode,
+ pgoff_t next_idx,
+ const struct f2fs_io_info *fio)
+{
+ /*
+ * The f2fs garbage collector sets ->encrypted_page when it wants to
+ * read/write raw data without encryption.
+ */
+ if (fio && fio->encrypted_page)
+ return !bio_has_crypt_ctx(bio);
+
+ return fscrypt_mergeable_bio(bio, inode, next_idx);
+}
+
static inline void __submit_bio(struct f2fs_sb_info *sbi,
struct bio *bio, enum page_type type)
{
@@ -316,7 +496,7 @@
if (type != DATA && type != NODE)
goto submit_io;
- if (test_opt(sbi, LFS) && current->plug)
+ if (f2fs_lfs_mode(sbi) && current->plug)
blk_finish_plug(current->plug);
if (!F2FS_IO_ALIGNED(sbi))
@@ -337,7 +517,7 @@
zero_user_segment(page, 0, PAGE_SIZE);
SetPagePrivate(page);
- set_page_private(page, (unsigned long)DUMMY_WRITTEN_PAGE);
+ set_page_private(page, DUMMY_WRITTEN_PAGE);
lock_page(page);
if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE)
f2fs_bug_on(sbi, 1);
@@ -357,6 +537,40 @@
submit_bio(bio);
}
+void f2fs_submit_bio(struct f2fs_sb_info *sbi,
+ struct bio *bio, enum page_type type)
+{
+ __submit_bio(sbi, bio, type);
+}
+
+static void __attach_io_flag(struct f2fs_io_info *fio)
+{
+ struct f2fs_sb_info *sbi = fio->sbi;
+ unsigned int temp_mask = (1 << NR_TEMP_TYPE) - 1;
+ unsigned int io_flag, fua_flag, meta_flag;
+
+ if (fio->type == DATA)
+ io_flag = sbi->data_io_flag;
+ else if (fio->type == NODE)
+ io_flag = sbi->node_io_flag;
+ else
+ return;
+
+ fua_flag = io_flag & temp_mask;
+ meta_flag = (io_flag >> NR_TEMP_TYPE) & temp_mask;
+
+ /*
+ * data/node io flag bits per temp:
+ * REQ_META | REQ_FUA |
+ * 5 | 4 | 3 | 2 | 1 | 0 |
+ * Cold | Warm | Hot | Cold | Warm | Hot |
+ */
+ if ((1 << fio->temp) & meta_flag)
+ fio->op_flags |= REQ_META;
+ if ((1 << fio->temp) & fua_flag)
+ fio->op_flags |= REQ_FUA;
+}
+
static void __submit_merged_bio(struct f2fs_bio_info *io)
{
struct f2fs_io_info *fio = &io->fio;
@@ -364,6 +578,7 @@
if (!io->bio)
return;
+ __attach_io_flag(fio);
bio_set_op_attrs(io->bio, fio->op, fio->op_flags);
if (is_read_io(fio->op))
@@ -379,7 +594,6 @@
struct page *page, nid_t ino)
{
struct bio_vec *bvec;
- struct page *target;
struct bvec_iter_all iter_all;
if (!bio)
@@ -389,10 +603,18 @@
return true;
bio_for_each_segment_all(bvec, bio, iter_all) {
+ struct page *target = bvec->bv_page;
- target = bvec->bv_page;
- if (fscrypt_is_bounce_page(target))
+ if (fscrypt_is_bounce_page(target)) {
target = fscrypt_pagecache_page(target);
+ if (IS_ERR(target))
+ continue;
+ }
+ if (f2fs_is_compressed_page(target)) {
+ target = f2fs_compress_control_page(target);
+ if (IS_ERR(target))
+ continue;
+ }
if (inode && inode == target->mapping->host)
return true;
@@ -490,6 +712,9 @@
/* Allocate a new bio */
bio = __bio_alloc(fio, 1);
+ f2fs_set_bio_crypt_ctx(bio, fio->page->mapping->host,
+ fio->page->index, fio, GFP_NOIO);
+
if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) {
bio_put(bio);
return -EFAULT;
@@ -498,6 +723,7 @@
if (fio->io_wbc && !is_read_io(fio->op))
wbc_account_cgroup_owner(fio->io_wbc, page, PAGE_SIZE);
+ __attach_io_flag(fio);
bio_set_op_attrs(bio, fio->op, fio->op_flags);
inc_page_count(fio->sbi, is_read_io(fio->op) ?
@@ -544,6 +770,134 @@
return io_type_is_mergeable(io, fio);
}
+static void add_bio_entry(struct f2fs_sb_info *sbi, struct bio *bio,
+ struct page *page, enum temp_type temp)
+{
+ struct f2fs_bio_info *io = sbi->write_io[DATA] + temp;
+ struct bio_entry *be;
+
+ be = f2fs_kmem_cache_alloc(bio_entry_slab, GFP_NOFS);
+ be->bio = bio;
+ bio_get(bio);
+
+ if (bio_add_page(bio, page, PAGE_SIZE, 0) != PAGE_SIZE)
+ f2fs_bug_on(sbi, 1);
+
+ down_write(&io->bio_list_lock);
+ list_add_tail(&be->list, &io->bio_list);
+ up_write(&io->bio_list_lock);
+}
+
+static void del_bio_entry(struct bio_entry *be)
+{
+ list_del(&be->list);
+ kmem_cache_free(bio_entry_slab, be);
+}
+
+static int add_ipu_page(struct f2fs_io_info *fio, struct bio **bio,
+ struct page *page)
+{
+ struct f2fs_sb_info *sbi = fio->sbi;
+ enum temp_type temp;
+ bool found = false;
+ int ret = -EAGAIN;
+
+ for (temp = HOT; temp < NR_TEMP_TYPE && !found; temp++) {
+ struct f2fs_bio_info *io = sbi->write_io[DATA] + temp;
+ struct list_head *head = &io->bio_list;
+ struct bio_entry *be;
+
+ down_write(&io->bio_list_lock);
+ list_for_each_entry(be, head, list) {
+ if (be->bio != *bio)
+ continue;
+
+ found = true;
+
+ f2fs_bug_on(sbi, !page_is_mergeable(sbi, *bio,
+ *fio->last_block,
+ fio->new_blkaddr));
+ if (f2fs_crypt_mergeable_bio(*bio,
+ fio->page->mapping->host,
+ fio->page->index, fio) &&
+ bio_add_page(*bio, page, PAGE_SIZE, 0) ==
+ PAGE_SIZE) {
+ ret = 0;
+ break;
+ }
+
+ /* page can't be merged into bio; submit the bio */
+ del_bio_entry(be);
+ __submit_bio(sbi, *bio, DATA);
+ break;
+ }
+ up_write(&io->bio_list_lock);
+ }
+
+ if (ret) {
+ bio_put(*bio);
+ *bio = NULL;
+ }
+
+ return ret;
+}
+
+void f2fs_submit_merged_ipu_write(struct f2fs_sb_info *sbi,
+ struct bio **bio, struct page *page)
+{
+ enum temp_type temp;
+ bool found = false;
+ struct bio *target = bio ? *bio : NULL;
+
+ for (temp = HOT; temp < NR_TEMP_TYPE && !found; temp++) {
+ struct f2fs_bio_info *io = sbi->write_io[DATA] + temp;
+ struct list_head *head = &io->bio_list;
+ struct bio_entry *be;
+
+ if (list_empty(head))
+ continue;
+
+ down_read(&io->bio_list_lock);
+ list_for_each_entry(be, head, list) {
+ if (target)
+ found = (target == be->bio);
+ else
+ found = __has_merged_page(be->bio, NULL,
+ page, 0);
+ if (found)
+ break;
+ }
+ up_read(&io->bio_list_lock);
+
+ if (!found)
+ continue;
+
+ found = false;
+
+ down_write(&io->bio_list_lock);
+ list_for_each_entry(be, head, list) {
+ if (target)
+ found = (target == be->bio);
+ else
+ found = __has_merged_page(be->bio, NULL,
+ page, 0);
+ if (found) {
+ target = be->bio;
+ del_bio_entry(be);
+ break;
+ }
+ }
+ up_write(&io->bio_list_lock);
+ }
+
+ if (found)
+ __submit_bio(sbi, target, DATA);
+ if (bio && *bio) {
+ bio_put(*bio);
+ *bio = NULL;
+ }
+}
+
int f2fs_merge_page_bio(struct f2fs_io_info *fio)
{
struct bio *bio = *fio->bio;
@@ -558,20 +912,20 @@
f2fs_trace_ios(fio, 0);
if (bio && !page_is_mergeable(fio->sbi, bio, *fio->last_block,
- fio->new_blkaddr)) {
- __submit_bio(fio->sbi, bio, fio->type);
- bio = NULL;
- }
+ fio->new_blkaddr))
+ f2fs_submit_merged_ipu_write(fio->sbi, &bio, NULL);
alloc_new:
if (!bio) {
bio = __bio_alloc(fio, BIO_MAX_PAGES);
+ __attach_io_flag(fio);
+ f2fs_set_bio_crypt_ctx(bio, fio->page->mapping->host,
+ fio->page->index, fio, GFP_NOIO);
bio_set_op_attrs(bio, fio->op, fio->op_flags);
- }
- if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) {
- __submit_bio(fio->sbi, bio, fio->type);
- bio = NULL;
- goto alloc_new;
+ add_bio_entry(fio->sbi, bio, page, fio->temp);
+ } else {
+ if (add_ipu_page(fio, &bio, page))
+ goto alloc_new;
}
if (fio->io_wbc)
@@ -585,19 +939,6 @@
return 0;
}
-static void f2fs_submit_ipu_bio(struct f2fs_sb_info *sbi, struct bio **bio,
- struct page *page)
-{
- if (!bio)
- return;
-
- if (!__has_merged_page(*bio, NULL, page, 0))
- return;
-
- __submit_bio(sbi, *bio, DATA);
- *bio = NULL;
-}
-
void f2fs_submit_page_write(struct f2fs_io_info *fio)
{
struct f2fs_sb_info *sbi = fio->sbi;
@@ -623,15 +964,23 @@
verify_fio_blkaddr(fio);
- bio_page = fio->encrypted_page ? fio->encrypted_page : fio->page;
+ if (fio->encrypted_page)
+ bio_page = fio->encrypted_page;
+ else if (fio->compressed_page)
+ bio_page = fio->compressed_page;
+ else
+ bio_page = fio->page;
/* set submitted = true as a return value */
fio->submitted = true;
inc_page_count(sbi, WB_DATA_TYPE(bio_page));
- if (io->bio && !io_is_mergeable(sbi, io->bio, io, fio,
- io->last_block_in_bio, fio->new_blkaddr))
+ if (io->bio &&
+ (!io_is_mergeable(sbi, io->bio, io, fio, io->last_block_in_bio,
+ fio->new_blkaddr) ||
+ !f2fs_crypt_mergeable_bio(io->bio, fio->page->mapping->host,
+ bio_page->index, fio)))
__submit_merged_bio(io);
alloc_new:
if (io->bio == NULL) {
@@ -643,6 +992,8 @@
goto skip;
}
io->bio = __bio_alloc(fio, BIO_MAX_PAGES);
+ f2fs_set_bio_crypt_ctx(io->bio, fio->page->mapping->host,
+ bio_page->index, fio, GFP_NOIO);
io->fio = *fio;
}
@@ -676,33 +1027,37 @@
static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr,
unsigned nr_pages, unsigned op_flag,
- pgoff_t first_idx)
+ pgoff_t first_idx, bool for_write,
+ bool for_verity)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct bio *bio;
struct bio_post_read_ctx *ctx;
unsigned int post_read_steps = 0;
- bio = f2fs_bio_alloc(sbi, min_t(int, nr_pages, BIO_MAX_PAGES), false);
+ bio = f2fs_bio_alloc(sbi, min_t(int, nr_pages, BIO_MAX_PAGES),
+ for_write);
if (!bio)
return ERR_PTR(-ENOMEM);
+
+ f2fs_set_bio_crypt_ctx(bio, inode, first_idx, NULL, GFP_NOFS);
+
f2fs_target_device(sbi, blkaddr, bio);
bio->bi_end_io = f2fs_read_end_io;
bio_set_op_attrs(bio, REQ_OP_READ, op_flag);
- if (f2fs_encrypted_file(inode))
+ if (fscrypt_inode_uses_fs_layer_crypto(inode))
post_read_steps |= 1 << STEP_DECRYPT;
-
- if (f2fs_need_verity(inode, first_idx))
+ if (f2fs_compressed_file(inode))
+ post_read_steps |= 1 << STEP_DECOMPRESS_NOWQ;
+ if (for_verity && f2fs_need_verity(inode, first_idx))
post_read_steps |= 1 << STEP_VERITY;
if (post_read_steps) {
+ /* Due to the mempool, this never fails. */
ctx = mempool_alloc(bio_post_read_ctx_pool, GFP_NOFS);
- if (!ctx) {
- bio_put(bio);
- return ERR_PTR(-ENOMEM);
- }
ctx->bio = bio;
+ ctx->sbi = sbi;
ctx->enabled_steps = post_read_steps;
bio->bi_private = ctx;
}
@@ -710,14 +1065,22 @@
return bio;
}
+static void f2fs_release_read_bio(struct bio *bio)
+{
+ if (bio->bi_private)
+ mempool_free(bio->bi_private, bio_post_read_ctx_pool);
+ bio_put(bio);
+}
+
/* This can handle encryption stuffs */
static int f2fs_submit_page_read(struct inode *inode, struct page *page,
- block_t blkaddr)
+ block_t blkaddr, int op_flags, bool for_write)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct bio *bio;
- bio = f2fs_grab_read_bio(inode, blkaddr, 1, 0, page->index);
+ bio = f2fs_grab_read_bio(inode, blkaddr, 1, op_flags,
+ page->index, for_write, true);
if (IS_ERR(bio))
return PTR_ERR(bio);
@@ -730,6 +1093,7 @@
}
ClearPageError(page);
inc_page_count(sbi, F2FS_RD_DATA);
+ f2fs_update_iostat(sbi, FS_DATA_READ_IO, F2FS_BLKSIZE);
__submit_bio(sbi, bio, DATA);
return 0;
}
@@ -789,8 +1153,7 @@
f2fs_wait_on_page_writeback(dn->node_page, NODE, true, true);
for (; count > 0; dn->ofs_in_node++) {
- block_t blkaddr = datablock_addr(dn->inode,
- dn->node_page, dn->ofs_in_node);
+ block_t blkaddr = f2fs_data_blkaddr(dn);
if (blkaddr == NULL_ADDR) {
dn->data_blkaddr = NEW_ADDR;
__set_data_blkaddr(dn);
@@ -832,7 +1195,7 @@
int f2fs_get_block(struct dnode_of_data *dn, pgoff_t index)
{
- struct extent_info ei = {0,0,0};
+ struct extent_info ei = {0, 0, 0};
struct inode *inode = dn->inode;
if (f2fs_lookup_extent_cache(inode, index, &ei)) {
@@ -904,7 +1267,8 @@
return page;
}
- err = f2fs_submit_page_read(inode, page, dn.data_blkaddr);
+ err = f2fs_submit_page_read(inode, page, dn.data_blkaddr,
+ op_flags, for_write);
if (err)
goto put_err;
return page;
@@ -1042,8 +1406,7 @@
if (err)
return err;
- dn->data_blkaddr = datablock_addr(dn->inode,
- dn->node_page, dn->ofs_in_node);
+ dn->data_blkaddr = f2fs_data_blkaddr(dn);
if (dn->data_blkaddr != NULL_ADDR)
goto alloc;
@@ -1054,7 +1417,7 @@
set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version);
old_blkaddr = dn->data_blkaddr;
f2fs_allocate_data_block(sbi, NULL, old_blkaddr, &dn->data_blkaddr,
- &sum, seg_type, NULL, false);
+ &sum, seg_type, NULL);
if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO)
invalidate_mapping_pages(META_MAPPING(sbi),
old_blkaddr, old_blkaddr);
@@ -1114,7 +1477,7 @@
return err;
}
-void __do_map_lock(struct f2fs_sb_info *sbi, int flag, bool lock)
+void f2fs_do_map_lock(struct f2fs_sb_info *sbi, int flag, bool lock)
{
if (flag == F2FS_GET_BLOCK_PRE_AIO) {
if (lock)
@@ -1130,13 +1493,9 @@
}
/*
- * f2fs_map_blocks() now supported readahead/bmap/rw direct_IO with
- * f2fs_map_blocks structure.
- * If original data blocks are allocated, then give them to blockdev.
- * Otherwise,
- * a. preallocate requested block addresses
- * b. do not use extent cache for better performance
- * c. give the block addresses to blockdev
+ * f2fs_map_blocks() tries to find or build mapping relationship which
+ * maps continuous logical blocks to physical blocks, and return such
+ * info via f2fs_map_blocks structure.
*/
int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
int create, int flag)
@@ -1164,7 +1523,7 @@
end = pgofs + maxblocks;
if (!create && f2fs_lookup_extent_cache(inode, pgofs, &ei)) {
- if (test_opt(sbi, LFS) && flag == F2FS_GET_BLOCK_DIO &&
+ if (f2fs_lfs_mode(sbi) && flag == F2FS_GET_BLOCK_DIO &&
map->m_may_create)
goto next_dnode;
@@ -1183,7 +1542,7 @@
next_dnode:
if (map->m_may_create)
- __do_map_lock(sbi, flag, true);
+ f2fs_do_map_lock(sbi, flag, true);
/* When reading holes, we need its node page */
set_new_dnode(&dn, inode, NULL, NULL, 0);
@@ -1223,7 +1582,7 @@
end_offset = ADDRS_PER_PAGE(dn.node_page, inode);
next_block:
- blkaddr = datablock_addr(dn.inode, dn.node_page, dn.ofs_in_node);
+ blkaddr = f2fs_data_blkaddr(&dn);
if (__is_valid_data_blkaddr(blkaddr) &&
!f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC_ENHANCE)) {
@@ -1233,7 +1592,7 @@
if (__is_valid_data_blkaddr(blkaddr)) {
/* use out-place-update for driect IO under LFS mode */
- if (test_opt(sbi, LFS) && flag == F2FS_GET_BLOCK_DIO &&
+ if (f2fs_lfs_mode(sbi) && flag == F2FS_GET_BLOCK_DIO &&
map->m_may_create) {
err = __allocate_data_block(&dn, map->m_seg_type);
if (err)
@@ -1346,7 +1705,7 @@
f2fs_put_dnode(&dn);
if (map->m_may_create) {
- __do_map_lock(sbi, flag, false);
+ f2fs_do_map_lock(sbi, flag, false);
f2fs_balance_fs(sbi, dn.node_changed);
}
goto next_dnode;
@@ -1372,7 +1731,7 @@
f2fs_put_dnode(&dn);
unlock_out:
if (map->m_may_create) {
- __do_map_lock(sbi, flag, false);
+ f2fs_do_map_lock(sbi, flag, false);
f2fs_balance_fs(sbi, dn.node_changed);
}
out:
@@ -1459,10 +1818,6 @@
static int get_data_block_bmap(struct inode *inode, sector_t iblock,
struct buffer_head *bh_result, int create)
{
- /* Block number less than F2FS MAX BLOCKS */
- if (unlikely(iblock >= F2FS_I_SB(inode)->max_file_blocks))
- return -EFBIG;
-
return __get_data_block(inode, iblock, bh_result, create,
F2FS_GET_BLOCK_BMAP, NULL,
NO_CHECK_TYPE, create);
@@ -1519,6 +1874,7 @@
flags |= FIEMAP_EXTENT_LAST;
err = fiemap_fill_next_extent(fieinfo, 0, phys, len, flags);
+ trace_f2fs_fiemap(inode, 0, phys, len, flags, err);
if (err || err == 1)
return err;
}
@@ -1542,12 +1898,33 @@
flags = FIEMAP_EXTENT_LAST;
}
- if (phys)
+ if (phys) {
err = fiemap_fill_next_extent(fieinfo, 0, phys, len, flags);
+ trace_f2fs_fiemap(inode, 0, phys, len, flags, err);
+ }
return (err < 0 ? err : 0);
}
+static loff_t max_inode_blocks(struct inode *inode)
+{
+ loff_t result = ADDRS_PER_INODE(inode);
+ loff_t leaf_count = ADDRS_PER_BLOCK(inode);
+
+ /* two direct node blocks */
+ result += (leaf_count * 2);
+
+ /* two indirect node blocks */
+ leaf_count *= NIDS_PER_BLOCK;
+ result += (leaf_count * 2);
+
+ /* one double indirect node block */
+ leaf_count *= NIDS_PER_BLOCK;
+ result += leaf_count;
+
+ return result;
+}
+
int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
u64 start, u64 len)
{
@@ -1557,6 +1934,8 @@
u64 logical = 0, phys = 0, size = 0;
u32 flags = 0;
int ret = 0;
+ bool compr_cluster = false;
+ unsigned int cluster_size = F2FS_I(inode)->i_cluster_size;
if (fieinfo->fi_flags & FIEMAP_FLAG_CACHE) {
ret = f2fs_precache_extents(inode);
@@ -1564,7 +1943,7 @@
return ret;
}
- ret = fiemap_check_flags(fieinfo, FIEMAP_FLAG_SYNC | FIEMAP_FLAG_XATTR);
+ ret = fiemap_prep(inode, fieinfo, start, &len, FIEMAP_FLAG_XATTR);
if (ret)
return ret;
@@ -1591,6 +1970,9 @@
memset(&map_bh, 0, sizeof(struct buffer_head));
map_bh.b_size = len;
+ if (compr_cluster)
+ map_bh.b_size = blk_to_logical(inode, cluster_size - 1);
+
ret = get_data_block(inode, start_blk, &map_bh, 0,
F2FS_GET_BLOCK_FIEMAP, &next_pgofs);
if (ret)
@@ -1601,7 +1983,7 @@
start_blk = next_pgofs;
if (blk_to_logical(inode, start_blk) < blk_to_logical(inode,
- F2FS_I_SB(inode)->max_file_blocks))
+ max_inode_blocks(inode)))
goto prep_next;
flags |= FIEMAP_EXTENT_LAST;
@@ -1613,11 +1995,39 @@
ret = fiemap_fill_next_extent(fieinfo, logical,
phys, size, flags);
+ trace_f2fs_fiemap(inode, logical, phys, size, flags, ret);
+ if (ret)
+ goto out;
+ size = 0;
}
- if (start_blk > last_blk || ret)
+ if (start_blk > last_blk)
goto out;
+ if (compr_cluster) {
+ compr_cluster = false;
+
+
+ logical = blk_to_logical(inode, start_blk - 1);
+ phys = blk_to_logical(inode, map_bh.b_blocknr);
+ size = blk_to_logical(inode, cluster_size);
+
+ flags |= FIEMAP_EXTENT_ENCODED;
+
+ start_blk += cluster_size - 1;
+
+ if (start_blk > last_blk)
+ goto out;
+
+ goto prep_next;
+ }
+
+ if (map_bh.b_blocknr == COMPRESS_ADDR) {
+ compr_cluster = true;
+ start_blk++;
+ goto prep_next;
+ }
+
logical = blk_to_logical(inode, start_blk);
phys = blk_to_logical(inode, map_bh.b_blocknr);
size = map_bh.b_size;
@@ -1728,15 +2138,17 @@
* This page will go to BIO. Do we need to send this
* BIO off first?
*/
- if (bio && !page_is_mergeable(F2FS_I_SB(inode), bio,
- *last_block_in_bio, block_nr)) {
+ if (bio && (!page_is_mergeable(F2FS_I_SB(inode), bio,
+ *last_block_in_bio, block_nr) ||
+ !f2fs_crypt_mergeable_bio(bio, inode, page->index, NULL))) {
submit_and_realloc:
__submit_bio(F2FS_I_SB(inode), bio, DATA);
bio = NULL;
}
if (bio == NULL) {
bio = f2fs_grab_read_bio(inode, block_nr, nr_pages,
- is_readahead ? REQ_RAHEAD : 0, page->index);
+ is_readahead ? REQ_RAHEAD : 0, page->index,
+ false, true);
if (IS_ERR(bio)) {
ret = PTR_ERR(bio);
bio = NULL;
@@ -1754,6 +2166,7 @@
goto submit_and_realloc;
inc_page_count(F2FS_I_SB(inode), F2FS_RD_DATA);
+ f2fs_update_iostat(F2FS_I_SB(inode), FS_DATA_READ_IO, F2FS_BLKSIZE);
ClearPageError(page);
*last_block_in_bio = block_nr;
goto out;
@@ -1768,6 +2181,189 @@
return ret;
}
+#ifdef CONFIG_F2FS_FS_COMPRESSION
+int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret,
+ unsigned nr_pages, sector_t *last_block_in_bio,
+ bool is_readahead, bool for_write)
+{
+ struct dnode_of_data dn;
+ struct inode *inode = cc->inode;
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+ struct bio *bio = *bio_ret;
+ unsigned int start_idx = cc->cluster_idx << cc->log_cluster_size;
+ sector_t last_block_in_file;
+ const unsigned blkbits = inode->i_blkbits;
+ const unsigned blocksize = 1 << blkbits;
+ struct decompress_io_ctx *dic = NULL;
+ struct bio_post_read_ctx *ctx;
+ bool for_verity = false;
+ int i;
+ int ret = 0;
+
+ f2fs_bug_on(sbi, f2fs_cluster_is_empty(cc));
+
+ last_block_in_file = (f2fs_readpage_limit(inode) +
+ blocksize - 1) >> blkbits;
+
+ /* get rid of pages beyond EOF */
+ for (i = 0; i < cc->cluster_size; i++) {
+ struct page *page = cc->rpages[i];
+
+ if (!page)
+ continue;
+ if ((sector_t)page->index >= last_block_in_file) {
+ zero_user_segment(page, 0, PAGE_SIZE);
+ if (!PageUptodate(page))
+ SetPageUptodate(page);
+ } else if (!PageUptodate(page)) {
+ continue;
+ }
+ unlock_page(page);
+ if (for_write)
+ put_page(page);
+ cc->rpages[i] = NULL;
+ cc->nr_rpages--;
+ }
+
+ /* we are done since all pages are beyond EOF */
+ if (f2fs_cluster_is_empty(cc))
+ goto out;
+
+ set_new_dnode(&dn, inode, NULL, NULL, 0);
+ ret = f2fs_get_dnode_of_data(&dn, start_idx, LOOKUP_NODE);
+ if (ret)
+ goto out;
+
+ f2fs_bug_on(sbi, dn.data_blkaddr != COMPRESS_ADDR);
+
+ for (i = 1; i < cc->cluster_size; i++) {
+ block_t blkaddr;
+
+ blkaddr = data_blkaddr(dn.inode, dn.node_page,
+ dn.ofs_in_node + i);
+
+ if (!__is_valid_data_blkaddr(blkaddr))
+ break;
+
+ if (!f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC)) {
+ ret = -EFAULT;
+ goto out_put_dnode;
+ }
+ cc->nr_cpages++;
+ }
+
+ /* nothing to decompress */
+ if (cc->nr_cpages == 0) {
+ ret = 0;
+ goto out_put_dnode;
+ }
+
+ dic = f2fs_alloc_dic(cc);
+ if (IS_ERR(dic)) {
+ ret = PTR_ERR(dic);
+ goto out_put_dnode;
+ }
+
+ /*
+ * It's possible to enable fsverity on the fly when handling a cluster,
+ * which requires complicated error handling. Instead of adding more
+ * complexity, let's give a rule where end_io post-processes fsverity
+ * per cluster. In order to do that, we need to submit bio, if previous
+ * bio sets a different post-process policy.
+ */
+ if (fsverity_active(cc->inode)) {
+ atomic_set(&dic->verity_pages, cc->nr_cpages);
+ for_verity = true;
+
+ if (bio) {
+ ctx = bio->bi_private;
+ if (!(ctx->enabled_steps & (1 << STEP_VERITY))) {
+ __submit_bio(sbi, bio, DATA);
+ bio = NULL;
+ }
+ }
+ }
+
+ for (i = 0; i < dic->nr_cpages; i++) {
+ struct page *page = dic->cpages[i];
+ block_t blkaddr;
+
+ blkaddr = data_blkaddr(dn.inode, dn.node_page,
+ dn.ofs_in_node + i + 1);
+
+ if (bio && (!page_is_mergeable(sbi, bio,
+ *last_block_in_bio, blkaddr) ||
+ !f2fs_crypt_mergeable_bio(bio, inode, page->index, NULL))) {
+submit_and_realloc:
+ __submit_bio(sbi, bio, DATA);
+ bio = NULL;
+ }
+
+ if (!bio) {
+ bio = f2fs_grab_read_bio(inode, blkaddr, nr_pages,
+ is_readahead ? REQ_RAHEAD : 0,
+ page->index, for_write, for_verity);
+ if (IS_ERR(bio)) {
+ unsigned int remained = dic->nr_cpages - i;
+ bool release = false;
+
+ ret = PTR_ERR(bio);
+ dic->failed = true;
+
+ if (for_verity) {
+ if (!atomic_sub_return(remained,
+ &dic->verity_pages))
+ release = true;
+ } else {
+ if (!atomic_sub_return(remained,
+ &dic->pending_pages))
+ release = true;
+ }
+
+ if (release) {
+ f2fs_decompress_end_io(dic->rpages,
+ cc->cluster_size, true,
+ false);
+ f2fs_free_dic(dic);
+ }
+
+ f2fs_put_dnode(&dn);
+ *bio_ret = NULL;
+ return ret;
+ }
+ }
+
+ f2fs_wait_on_block_writeback(inode, blkaddr);
+
+ if (bio_add_page(bio, page, blocksize, 0) < blocksize)
+ goto submit_and_realloc;
+
+ /* tag STEP_DECOMPRESS to handle IO in wq */
+ ctx = bio->bi_private;
+ if (!(ctx->enabled_steps & (1 << STEP_DECOMPRESS)))
+ ctx->enabled_steps |= 1 << STEP_DECOMPRESS;
+
+ inc_page_count(sbi, F2FS_RD_DATA);
+ f2fs_update_iostat(sbi, FS_DATA_READ_IO, F2FS_BLKSIZE);
+ f2fs_update_iostat(sbi, FS_CDATA_READ_IO, F2FS_BLKSIZE);
+ ClearPageError(page);
+ *last_block_in_bio = blkaddr;
+ }
+
+ f2fs_put_dnode(&dn);
+
+ *bio_ret = bio;
+ return 0;
+
+out_put_dnode:
+ f2fs_put_dnode(&dn);
+out:
+ f2fs_decompress_end_io(cc->rpages, cc->cluster_size, true, false);
+ *bio_ret = bio;
+ return ret;
+}
+#endif
+
/*
* This function was originally taken from fs/mpage.c, and customized for f2fs.
* Major change was from block_size == page_size in f2fs by default.
@@ -1777,15 +2373,28 @@
* use ->readpage() or do the necessary surgery to decouple ->readpages()
* from read-ahead.
*/
-static int f2fs_mpage_readpages(struct address_space *mapping,
- struct list_head *pages, struct page *page,
- unsigned nr_pages, bool is_readahead)
+static int f2fs_mpage_readpages(struct inode *inode,
+ struct readahead_control *rac, struct page *page)
{
struct bio *bio = NULL;
sector_t last_block_in_bio = 0;
- struct inode *inode = mapping->host;
struct f2fs_map_blocks map;
+#ifdef CONFIG_F2FS_FS_COMPRESSION
+ struct compress_ctx cc = {
+ .inode = inode,
+ .log_cluster_size = F2FS_I(inode)->i_log_cluster_size,
+ .cluster_size = F2FS_I(inode)->i_cluster_size,
+ .cluster_idx = NULL_CLUSTER,
+ .rpages = NULL,
+ .cpages = NULL,
+ .nr_rpages = 0,
+ .nr_cpages = 0,
+ };
+#endif
+ unsigned nr_pages = rac ? readahead_count(rac) : 1;
+ unsigned max_nr_pages = nr_pages;
int ret = 0;
+ bool drop_ra = false;
map.m_pblk = 0;
map.m_lblk = 0;
@@ -1796,33 +2405,92 @@
map.m_seg_type = NO_CHECK_TYPE;
map.m_may_create = false;
- for (; nr_pages; nr_pages--) {
- if (pages) {
- page = list_last_entry(pages, struct page, lru);
+ /*
+ * Two readahead threads for same address range can cause race condition
+ * which fragments sequential read IOs. So let's avoid each other.
+ */
+ if (rac && readahead_count(rac)) {
+ if (READ_ONCE(F2FS_I(inode)->ra_offset) == readahead_index(rac))
+ drop_ra = true;
+ else
+ WRITE_ONCE(F2FS_I(inode)->ra_offset,
+ readahead_index(rac));
+ }
+ for (; nr_pages; nr_pages--) {
+ if (rac) {
+ page = readahead_page(rac);
prefetchw(&page->flags);
- list_del(&page->lru);
- if (add_to_page_cache_lru(page, mapping,
- page_index(page),
- readahead_gfp_mask(mapping)))
- goto next_page;
+ if (drop_ra) {
+ f2fs_put_page(page, 1);
+ continue;
+ }
}
- ret = f2fs_read_single_page(inode, page, nr_pages, &map, &bio,
- &last_block_in_bio, is_readahead);
+#ifdef CONFIG_F2FS_FS_COMPRESSION
+ if (f2fs_compressed_file(inode)) {
+ /* there are remained comressed pages, submit them */
+ if (!f2fs_cluster_can_merge_page(&cc, page->index)) {
+ ret = f2fs_read_multi_pages(&cc, &bio,
+ max_nr_pages,
+ &last_block_in_bio,
+ rac != NULL, false);
+ f2fs_destroy_compress_ctx(&cc, false);
+ if (ret)
+ goto set_error_page;
+ }
+ ret = f2fs_is_compressed_cluster(inode, page->index);
+ if (ret < 0)
+ goto set_error_page;
+ else if (!ret)
+ goto read_single_page;
+
+ ret = f2fs_init_compress_ctx(&cc);
+ if (ret)
+ goto set_error_page;
+
+ f2fs_compress_ctx_add_page(&cc, page);
+
+ goto next_page;
+ }
+read_single_page:
+#endif
+
+ ret = f2fs_read_single_page(inode, page, max_nr_pages, &map,
+ &bio, &last_block_in_bio, rac);
if (ret) {
+#ifdef CONFIG_F2FS_FS_COMPRESSION
+set_error_page:
+#endif
SetPageError(page);
zero_user_segment(page, 0, PAGE_SIZE);
unlock_page(page);
}
+#ifdef CONFIG_F2FS_FS_COMPRESSION
next_page:
- if (pages)
+#endif
+ if (rac)
put_page(page);
+
+#ifdef CONFIG_F2FS_FS_COMPRESSION
+ if (f2fs_compressed_file(inode)) {
+ /* last page */
+ if (nr_pages == 1 && !f2fs_cluster_is_empty(&cc)) {
+ ret = f2fs_read_multi_pages(&cc, &bio,
+ max_nr_pages,
+ &last_block_in_bio,
+ rac != NULL, false);
+ f2fs_destroy_compress_ctx(&cc, false);
+ }
+ }
+#endif
}
- BUG_ON(pages && !list_empty(pages));
if (bio)
__submit_bio(F2FS_I_SB(inode), bio, DATA);
- return pages ? 0 : ret;
+
+ if (rac && readahead_count(rac) && !drop_ra)
+ WRITE_ONCE(F2FS_I(inode)->ra_offset, -1);
+ return ret;
}
static int f2fs_read_data_page(struct file *file, struct page *page)
@@ -1832,52 +2500,60 @@
trace_f2fs_readpage(page, DATA);
+ if (!f2fs_is_compress_backend_ready(inode)) {
+ unlock_page(page);
+ return -EOPNOTSUPP;
+ }
+
/* If the file has inline data, try to read it directly */
if (f2fs_has_inline_data(inode))
ret = f2fs_read_inline_data(inode, page);
if (ret == -EAGAIN)
- ret = f2fs_mpage_readpages(page_file_mapping(page),
- NULL, page, 1, false);
+ ret = f2fs_mpage_readpages(inode, NULL, page);
return ret;
}
-static int f2fs_read_data_pages(struct file *file,
- struct address_space *mapping,
- struct list_head *pages, unsigned nr_pages)
+static void f2fs_readahead(struct readahead_control *rac)
{
- struct inode *inode = mapping->host;
- struct page *page = list_last_entry(pages, struct page, lru);
+ struct inode *inode = rac->mapping->host;
- trace_f2fs_readpages(inode, page, nr_pages);
+ trace_f2fs_readpages(inode, readahead_index(rac), readahead_count(rac));
+
+ if (!f2fs_is_compress_backend_ready(inode))
+ return;
/* If the file has inline data, skip readpages */
if (f2fs_has_inline_data(inode))
- return 0;
+ return;
- return f2fs_mpage_readpages(mapping, pages, NULL, nr_pages, true);
+ f2fs_mpage_readpages(inode, rac, NULL);
}
-static int encrypt_one_page(struct f2fs_io_info *fio)
+int f2fs_encrypt_one_page(struct f2fs_io_info *fio)
{
struct inode *inode = fio->page->mapping->host;
- struct page *mpage;
+ struct page *mpage, *page;
gfp_t gfp_flags = GFP_NOFS;
if (!f2fs_encrypted_file(inode))
return 0;
+ page = fio->compressed_page ? fio->compressed_page : fio->page;
+
/* wait for GCed page writeback via META_MAPPING */
f2fs_wait_on_block_writeback(inode, fio->old_blkaddr);
+ if (fscrypt_inode_uses_inline_crypto(inode))
+ return 0;
+
retry_encrypt:
- fio->encrypted_page = fscrypt_encrypt_pagecache_blocks(fio->page,
- PAGE_SIZE, 0,
- gfp_flags);
+ fio->encrypted_page = fscrypt_encrypt_pagecache_blocks(page,
+ PAGE_SIZE, 0, gfp_flags);
if (IS_ERR(fio->encrypted_page)) {
/* flush pending IOs and wait for a while in the ENOMEM case */
if (PTR_ERR(fio->encrypted_page) == -ENOMEM) {
f2fs_flush_merged_writes(fio->sbi);
- congestion_wait(BLK_RW_ASYNC, HZ/50);
+ congestion_wait(BLK_RW_ASYNC, DEFAULT_IO_TIMEOUT);
gfp_flags |= __GFP_NOFAIL;
goto retry_encrypt;
}
@@ -1948,7 +2624,7 @@
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
- if (test_opt(sbi, LFS))
+ if (f2fs_lfs_mode(sbi))
return true;
if (S_ISDIR(inode->i_mode))
return true;
@@ -2032,7 +2708,7 @@
if (ipu_force ||
(__is_valid_data_blkaddr(fio->old_blkaddr) &&
need_inplace_update(fio))) {
- err = encrypt_one_page(fio);
+ err = f2fs_encrypt_one_page(fio);
if (err)
goto out_writepage;
@@ -2043,7 +2719,7 @@
f2fs_unlock_op(fio->sbi);
err = f2fs_inplace_write_data(fio);
if (err) {
- if (f2fs_encrypted_file(inode))
+ if (fscrypt_inode_uses_fs_layer_crypto(inode))
fscrypt_finalize_bounce_page(&fio->encrypted_page);
if (PageWriteback(page))
end_page_writeback(page);
@@ -2068,13 +2744,16 @@
fio->version = ni.version;
- err = encrypt_one_page(fio);
+ err = f2fs_encrypt_one_page(fio);
if (err)
goto out_writepage;
set_page_writeback(page);
ClearPageError(page);
+ if (fio->compr_blocks && fio->old_blkaddr == COMPRESS_ADDR)
+ f2fs_i_compr_blocks_update(inode, fio->compr_blocks - 1, false);
+
/* LFS mode write path */
f2fs_outplace_write_data(&dn, fio);
trace_f2fs_do_write_data_page(page, OPU);
@@ -2089,16 +2768,18 @@
return err;
}
-static int __write_data_page(struct page *page, bool *submitted,
+int f2fs_write_single_data_page(struct page *page, int *submitted,
struct bio **bio,
sector_t *last_block,
struct writeback_control *wbc,
- enum iostat_type io_type)
+ enum iostat_type io_type,
+ int compr_blocks,
+ bool allow_balance)
{
struct inode *inode = page->mapping->host;
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
loff_t i_size = i_size_read(inode);
- const pgoff_t end_index = ((unsigned long long) i_size)
+ const pgoff_t end_index = ((unsigned long long)i_size)
>> PAGE_SHIFT;
loff_t psize = (loff_t)(page->index + 1) << PAGE_SHIFT;
unsigned offset = 0;
@@ -2114,6 +2795,7 @@
.page = page,
.encrypted_page = NULL,
.submitted = false,
+ .compr_blocks = compr_blocks,
.need_lock = LOCK_RETRY,
.io_type = io_type,
.io_wbc = wbc,
@@ -2138,7 +2820,9 @@
if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
goto redirty_out;
- if (page->index < end_index || f2fs_verity_in_progress(inode))
+ if (page->index < end_index ||
+ f2fs_verity_in_progress(inode) ||
+ compr_blocks)
goto write;
/*
@@ -2159,10 +2843,22 @@
f2fs_available_free_memory(sbi, BASE_CHECK))))
goto redirty_out;
- /* Dentry blocks are controlled by checkpoint */
- if (S_ISDIR(inode->i_mode)) {
+ /* Dentry/quota blocks are controlled by checkpoint */
+ if (S_ISDIR(inode->i_mode) || IS_NOQUOTA(inode)) {
+ /*
+ * We need to wait for node_write to avoid block allocation during
+ * checkpoint. This can only happen to quota writes which can cause
+ * the below discard race condition.
+ */
+ if (IS_NOQUOTA(inode))
+ down_read(&sbi->node_write);
+
fio.need_lock = LOCK_DONE;
err = f2fs_do_write_data_page(&fio);
+
+ if (IS_NOQUOTA(inode))
+ up_read(&sbi->node_write);
+
goto done;
}
@@ -2191,10 +2887,10 @@
if (err) {
file_set_keep_isize(inode);
} else {
- down_write(&F2FS_I(inode)->i_sem);
+ spin_lock(&F2FS_I(inode)->i_size_lock);
if (F2FS_I(inode)->last_disk_size < psize)
F2FS_I(inode)->last_disk_size = psize;
- up_write(&F2FS_I(inode)->i_sem);
+ spin_unlock(&F2FS_I(inode)->i_size_lock);
}
done:
@@ -2214,22 +2910,19 @@
f2fs_remove_dirty_inode(inode);
submitted = NULL;
}
-
unlock_page(page);
if (!S_ISDIR(inode->i_mode) && !IS_NOQUOTA(inode) &&
- !F2FS_I(inode)->cp_task) {
- f2fs_submit_ipu_bio(sbi, bio, page);
+ !F2FS_I(inode)->cp_task && allow_balance)
f2fs_balance_fs(sbi, need_balance_fs);
- }
if (unlikely(f2fs_cp_error(sbi))) {
- f2fs_submit_ipu_bio(sbi, bio, page);
f2fs_submit_merged_write(sbi, DATA);
+ f2fs_submit_merged_ipu_write(sbi, bio, NULL);
submitted = NULL;
}
if (submitted)
- *submitted = fio.submitted;
+ *submitted = fio.submitted ? 1 : 0;
return 0;
@@ -2250,7 +2943,23 @@
static int f2fs_write_data_page(struct page *page,
struct writeback_control *wbc)
{
- return __write_data_page(page, NULL, NULL, NULL, wbc, FS_DATA_IO);
+#ifdef CONFIG_F2FS_FS_COMPRESSION
+ struct inode *inode = page->mapping->host;
+
+ if (unlikely(f2fs_cp_error(F2FS_I_SB(inode))))
+ goto out;
+
+ if (f2fs_compressed_file(inode)) {
+ if (f2fs_is_compressed_cluster(inode, page->index)) {
+ redirty_page_for_writepage(wbc, page);
+ return AOP_WRITEPAGE_ACTIVATE;
+ }
+ }
+out:
+#endif
+
+ return f2fs_write_single_data_page(page, NULL, NULL, NULL,
+ wbc, FS_DATA_IO, 0, true);
}
/*
@@ -2263,20 +2972,36 @@
enum iostat_type io_type)
{
int ret = 0;
- int done = 0;
+ int done = 0, retry = 0;
struct pagevec pvec;
struct f2fs_sb_info *sbi = F2FS_M_SB(mapping);
struct bio *bio = NULL;
sector_t last_block;
+#ifdef CONFIG_F2FS_FS_COMPRESSION
+ struct inode *inode = mapping->host;
+ struct compress_ctx cc = {
+ .inode = inode,
+ .log_cluster_size = F2FS_I(inode)->i_log_cluster_size,
+ .cluster_size = F2FS_I(inode)->i_cluster_size,
+ .cluster_idx = NULL_CLUSTER,
+ .rpages = NULL,
+ .nr_rpages = 0,
+ .cpages = NULL,
+ .rbuf = NULL,
+ .cbuf = NULL,
+ .rlen = PAGE_SIZE * F2FS_I(inode)->i_cluster_size,
+ .private = NULL,
+ };
+#endif
int nr_pages;
- pgoff_t uninitialized_var(writeback_index);
pgoff_t index;
pgoff_t end; /* Inclusive */
pgoff_t done_index;
- int cycled;
int range_whole = 0;
xa_mark_t tag;
int nwritten = 0;
+ int submitted = 0;
+ int i;
pagevec_init(&pvec);
@@ -2287,31 +3012,24 @@
clear_inode_flag(mapping->host, FI_HOT_DATA);
if (wbc->range_cyclic) {
- writeback_index = mapping->writeback_index; /* prev offset */
- index = writeback_index;
- if (index == 0)
- cycled = 1;
- else
- cycled = 0;
+ index = mapping->writeback_index; /* prev offset */
end = -1;
} else {
index = wbc->range_start >> PAGE_SHIFT;
end = wbc->range_end >> PAGE_SHIFT;
if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
range_whole = 1;
- cycled = 1; /* ignore range_cyclic tests */
}
if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
tag = PAGECACHE_TAG_TOWRITE;
else
tag = PAGECACHE_TAG_DIRTY;
retry:
+ retry = 0;
if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
tag_pages_for_writeback(mapping, index, end);
done_index = index;
- while (!done && (index <= end)) {
- int i;
-
+ while (!done && !retry && (index <= end)) {
nr_pages = pagevec_lookup_range_tag(&pvec, mapping, &index, end,
tag);
if (nr_pages == 0)
@@ -2319,15 +3037,62 @@
for (i = 0; i < nr_pages; i++) {
struct page *page = pvec.pages[i];
- bool submitted = false;
+ bool need_readd;
+readd:
+ need_readd = false;
+#ifdef CONFIG_F2FS_FS_COMPRESSION
+ if (f2fs_compressed_file(inode)) {
+ ret = f2fs_init_compress_ctx(&cc);
+ if (ret) {
+ done = 1;
+ break;
+ }
+ if (!f2fs_cluster_can_merge_page(&cc,
+ page->index)) {
+ ret = f2fs_write_multi_pages(&cc,
+ &submitted, wbc, io_type);
+ if (!ret)
+ need_readd = true;
+ goto result;
+ }
+
+ if (unlikely(f2fs_cp_error(sbi)))
+ goto lock_page;
+
+ if (f2fs_cluster_is_empty(&cc)) {
+ void *fsdata = NULL;
+ struct page *pagep;
+ int ret2;
+
+ ret2 = f2fs_prepare_compress_overwrite(
+ inode, &pagep,
+ page->index, &fsdata);
+ if (ret2 < 0) {
+ ret = ret2;
+ done = 1;
+ break;
+ } else if (ret2 &&
+ !f2fs_compress_write_end(inode,
+ fsdata, page->index,
+ 1)) {
+ retry = 1;
+ break;
+ }
+ } else {
+ goto lock_page;
+ }
+ }
+#endif
/* give a priority to WB_SYNC threads */
if (atomic_read(&sbi->wb_sync_req[DATA]) &&
wbc->sync_mode == WB_SYNC_NONE) {
done = 1;
break;
}
-
+#ifdef CONFIG_F2FS_FS_COMPRESSION
+lock_page:
+#endif
done_index = page->index;
retry_write:
lock_page(page);
@@ -2344,62 +3109,90 @@
}
if (PageWriteback(page)) {
- if (wbc->sync_mode != WB_SYNC_NONE) {
+ if (wbc->sync_mode != WB_SYNC_NONE)
f2fs_wait_on_page_writeback(page,
DATA, true, true);
- f2fs_submit_ipu_bio(sbi, &bio, page);
- } else {
+ else
goto continue_unlock;
- }
}
if (!clear_page_dirty_for_io(page))
goto continue_unlock;
- ret = __write_data_page(page, &submitted, &bio,
- &last_block, wbc, io_type);
+#ifdef CONFIG_F2FS_FS_COMPRESSION
+ if (f2fs_compressed_file(inode)) {
+ get_page(page);
+ f2fs_compress_ctx_add_page(&cc, page);
+ continue;
+ }
+#endif
+ ret = f2fs_write_single_data_page(page, &submitted,
+ &bio, &last_block, wbc, io_type,
+ 0, true);
+ if (ret == AOP_WRITEPAGE_ACTIVATE)
+ unlock_page(page);
+#ifdef CONFIG_F2FS_FS_COMPRESSION
+result:
+#endif
+ nwritten += submitted;
+ wbc->nr_to_write -= submitted;
+
if (unlikely(ret)) {
/*
* keep nr_to_write, since vfs uses this to
* get # of written pages.
*/
if (ret == AOP_WRITEPAGE_ACTIVATE) {
- unlock_page(page);
ret = 0;
- continue;
+ goto next;
} else if (ret == -EAGAIN) {
ret = 0;
if (wbc->sync_mode == WB_SYNC_ALL) {
cond_resched();
congestion_wait(BLK_RW_ASYNC,
- HZ/50);
+ DEFAULT_IO_TIMEOUT);
goto retry_write;
}
- continue;
+ goto next;
}
done_index = page->index + 1;
done = 1;
break;
- } else if (submitted) {
- nwritten++;
}
- if (--wbc->nr_to_write <= 0 &&
+ if (wbc->nr_to_write <= 0 &&
wbc->sync_mode == WB_SYNC_NONE) {
done = 1;
break;
}
+next:
+ if (need_readd)
+ goto readd;
}
pagevec_release(&pvec);
cond_resched();
}
-
- if (!cycled && !done) {
- cycled = 1;
+#ifdef CONFIG_F2FS_FS_COMPRESSION
+ /* flush remained pages in compress cluster */
+ if (f2fs_compressed_file(inode) && !f2fs_cluster_is_empty(&cc)) {
+ ret = f2fs_write_multi_pages(&cc, &submitted, wbc, io_type);
+ nwritten += submitted;
+ wbc->nr_to_write -= submitted;
+ if (ret) {
+ done = 1;
+ retry = 0;
+ }
+ }
+ if (f2fs_compressed_file(inode))
+ f2fs_destroy_compress_ctx(&cc, false);
+#endif
+ if (retry) {
index = 0;
- end = writeback_index - 1;
+ end = -1;
goto retry;
}
+ if (wbc->range_cyclic && !done)
+ done_index = 0;
if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
mapping->writeback_index = done_index;
@@ -2408,7 +3201,7 @@
NULL, 0, DATA);
/* submit cached bio of IPU write */
if (bio)
- __submit_bio(sbi, bio, DATA);
+ f2fs_submit_merged_ipu_write(sbi, &bio, NULL);
return ret;
}
@@ -2416,13 +3209,17 @@
static inline bool __should_serialize_io(struct inode *inode,
struct writeback_control *wbc)
{
+ /* to avoid deadlock in path of data flush */
+ if (F2FS_I(inode)->cp_task)
+ return false;
+
if (!S_ISREG(inode->i_mode))
return false;
if (IS_NOQUOTA(inode))
return false;
- /* to avoid deadlock in path of data flush */
- if (F2FS_I(inode)->cp_task)
- return false;
+
+ if (f2fs_compressed_file(inode))
+ return true;
if (wbc->sync_mode != WB_SYNC_ALL)
return true;
if (get_dirty_pages(inode) >= SM_I(F2FS_I_SB(inode))->min_seq_blocks)
@@ -2513,14 +3310,16 @@
struct inode *inode = mapping->host;
loff_t i_size = i_size_read(inode);
+ if (IS_NOQUOTA(inode))
+ return;
+
/* In the fs-verity case, f2fs_end_enable_verity() does the truncate */
if (to > i_size && !f2fs_verity_in_progress(inode)) {
down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
down_write(&F2FS_I(inode)->i_mmap_sem);
truncate_pagecache(inode, i_size);
- if (!IS_NOQUOTA(inode))
- f2fs_truncate_blocks(inode, i_size, true);
+ f2fs_truncate_blocks(inode, i_size, true);
up_write(&F2FS_I(inode)->i_mmap_sem);
up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
@@ -2557,9 +3356,10 @@
if (f2fs_has_inline_data(inode) ||
(pos & PAGE_MASK) >= i_size_read(inode)) {
- __do_map_lock(sbi, flag, true);
+ f2fs_do_map_lock(sbi, flag, true);
locked = true;
}
+
restart:
/* check inline_data */
ipage = f2fs_get_node_page(sbi, inode->i_ino);
@@ -2593,7 +3393,7 @@
err = f2fs_get_dnode_of_data(&dn, index, LOOKUP_NODE);
if (err || dn.data_blkaddr == NULL_ADDR) {
f2fs_put_dnode(&dn);
- __do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO,
+ f2fs_do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO,
true);
WARN_ON(flag != F2FS_GET_BLOCK_PRE_AIO);
locked = true;
@@ -2609,7 +3409,7 @@
f2fs_put_dnode(&dn);
unlock_out:
if (locked)
- __do_map_lock(sbi, flag, false);
+ f2fs_do_map_lock(sbi, flag, false);
return err;
}
@@ -2650,6 +3450,24 @@
if (err)
goto fail;
}
+
+#ifdef CONFIG_F2FS_FS_COMPRESSION
+ if (f2fs_compressed_file(inode)) {
+ int ret;
+
+ *fsdata = NULL;
+
+ ret = f2fs_prepare_compress_overwrite(inode, pagep,
+ index, fsdata);
+ if (ret < 0) {
+ err = ret;
+ goto fail;
+ } else if (ret) {
+ return 0;
+ }
+ }
+#endif
+
repeat:
/*
* Do not use grab_cache_page_write_begin() to avoid deadlock due to
@@ -2662,6 +3480,8 @@
goto fail;
}
+ /* TODO: cluster can be compressed due to race with .writepage */
+
*pagep = page;
err = prepare_write_begin(sbi, page, pos, len,
@@ -2701,7 +3521,7 @@
err = -EFSCORRUPTED;
goto fail;
}
- err = f2fs_submit_page_read(inode, page, blkaddr);
+ err = f2fs_submit_page_read(inode, page, blkaddr, 0, true);
if (err)
goto fail;
@@ -2745,6 +3565,20 @@
else
SetPageUptodate(page);
}
+
+#ifdef CONFIG_F2FS_FS_COMPRESSION
+ /* overwrite compressed file */
+ if (f2fs_compressed_file(inode) && fsdata) {
+ f2fs_compress_write_end(inode, fsdata, page->index, copied);
+ f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
+
+ if (pos + copied > i_size_read(inode) &&
+ !f2fs_verity_in_progress(inode))
+ f2fs_i_size_write(inode, pos + copied);
+ return copied;
+ }
+#endif
+
if (!copied)
goto unlock_out;
@@ -2792,7 +3626,7 @@
bio->bi_private = dio->orig_private;
bio->bi_end_io = dio->orig_end_io;
- kvfree(dio);
+ kfree(dio);
bio_endio(bio);
}
@@ -2875,7 +3709,8 @@
err = __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev,
iter, rw == WRITE ? get_data_block_dio_write :
get_data_block_dio, NULL, f2fs_dio_submit_bio,
- DIO_LOCKING | DIO_SKIP_HOLES);
+ rw == WRITE ? DIO_LOCKING | DIO_SKIP_HOLES :
+ DIO_SKIP_HOLES);
if (do_opu)
up_read(&fi->i_gc_rwsem[READ]);
@@ -2890,9 +3725,18 @@
err);
if (!do_opu)
set_inode_flag(inode, FI_UPDATE_WRITE);
+ } else if (err == -EIOCBQUEUED) {
+ f2fs_update_iostat(F2FS_I_SB(inode), APP_DIRECT_IO,
+ count - iov_iter_count(iter));
} else if (err < 0) {
f2fs_write_failed(mapping, offset + count);
}
+ } else {
+ if (err > 0)
+ f2fs_update_iostat(sbi, APP_DIRECT_READ_IO, err);
+ else if (err == -EIOCBQUEUED)
+ f2fs_update_iostat(F2FS_I_SB(inode), APP_DIRECT_READ_IO,
+ count - iov_iter_count(iter));
}
out:
@@ -2976,18 +3820,64 @@
return 0;
}
+
+static sector_t f2fs_bmap_compress(struct inode *inode, sector_t block)
+{
+#ifdef CONFIG_F2FS_FS_COMPRESSION
+ struct dnode_of_data dn;
+ sector_t start_idx, blknr = 0;
+ int ret;
+
+ start_idx = round_down(block, F2FS_I(inode)->i_cluster_size);
+
+ set_new_dnode(&dn, inode, NULL, NULL, 0);
+ ret = f2fs_get_dnode_of_data(&dn, start_idx, LOOKUP_NODE);
+ if (ret)
+ return 0;
+
+ if (dn.data_blkaddr != COMPRESS_ADDR) {
+ dn.ofs_in_node += block - start_idx;
+ blknr = f2fs_data_blkaddr(&dn);
+ if (!__is_valid_data_blkaddr(blknr))
+ blknr = 0;
+ }
+
+ f2fs_put_dnode(&dn);
+ return blknr;
+#else
+ return 0;
+#endif
+}
+
+
static sector_t f2fs_bmap(struct address_space *mapping, sector_t block)
{
struct inode *inode = mapping->host;
+ struct buffer_head tmp = {
+ .b_size = i_blocksize(inode),
+ };
+ sector_t blknr = 0;
if (f2fs_has_inline_data(inode))
- return 0;
+ goto out;
/* make sure allocating whole blocks */
if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY))
filemap_write_and_wait(mapping);
- return generic_block_bmap(mapping, block, get_data_block_bmap);
+ /* Block number less than F2FS MAX BLOCKS */
+ if (unlikely(block >= F2FS_I_SB(inode)->max_file_blocks))
+ goto out;
+
+ if (f2fs_compressed_file(inode)) {
+ blknr = f2fs_bmap_compress(inode, block);
+ } else {
+ if (!get_data_block_bmap(inode, block, &tmp, 0))
+ blknr = tmp.b_blocknr;
+ }
+out:
+ trace_f2fs_bmap(inode, block, blknr);
+ return blknr;
}
#ifdef CONFIG_MIGRATION
@@ -3047,6 +3937,83 @@
#endif
#ifdef CONFIG_SWAP
+static int check_swap_activate_fast(struct swap_info_struct *sis,
+ struct file *swap_file, sector_t *span)
+{
+ struct address_space *mapping = swap_file->f_mapping;
+ struct inode *inode = mapping->host;
+ sector_t cur_lblock;
+ sector_t last_lblock;
+ sector_t pblock;
+ sector_t lowest_pblock = -1;
+ sector_t highest_pblock = 0;
+ int nr_extents = 0;
+ unsigned long nr_pblocks;
+ unsigned long len;
+ int ret;
+
+ /*
+ * Map all the blocks into the extent list. This code doesn't try
+ * to be very smart.
+ */
+ cur_lblock = 0;
+ last_lblock = logical_to_blk(inode, i_size_read(inode));
+ len = i_size_read(inode);
+
+ while (cur_lblock <= last_lblock && cur_lblock < sis->max) {
+ struct buffer_head map_bh;
+ pgoff_t next_pgofs;
+
+ cond_resched();
+
+ memset(&map_bh, 0, sizeof(struct buffer_head));
+ map_bh.b_size = len - cur_lblock;
+
+ ret = get_data_block(inode, cur_lblock, &map_bh, 0,
+ F2FS_GET_BLOCK_FIEMAP, &next_pgofs);
+ if (ret)
+ goto err_out;
+
+ /* hole */
+ if (!buffer_mapped(&map_bh))
+ goto err_out;
+
+ pblock = map_bh.b_blocknr;
+ nr_pblocks = logical_to_blk(inode, map_bh.b_size);
+
+ if (cur_lblock + nr_pblocks >= sis->max)
+ nr_pblocks = sis->max - cur_lblock;
+
+ if (cur_lblock) { /* exclude the header page */
+ if (pblock < lowest_pblock)
+ lowest_pblock = pblock;
+ if (pblock + nr_pblocks - 1 > highest_pblock)
+ highest_pblock = pblock + nr_pblocks - 1;
+ }
+
+ /*
+ * We found a PAGE_SIZE-length, PAGE_SIZE-aligned run of blocks
+ */
+ ret = add_swap_extent(sis, cur_lblock, nr_pblocks, pblock);
+ if (ret < 0)
+ goto out;
+ nr_extents += ret;
+ cur_lblock += nr_pblocks;
+ }
+ ret = nr_extents;
+ *span = 1 + highest_pblock - lowest_pblock;
+ if (cur_lblock == 0)
+ cur_lblock = 1; /* force Empty message */
+ sis->max = cur_lblock;
+ sis->pages = cur_lblock - 1;
+ sis->highest_bit = cur_lblock - 1;
+out:
+ return ret;
+err_out:
+ pr_err("swapon: swapfile has holes\n");
+ return -EINVAL;
+}
+
/* Copied from generic_swapfile_activate() to check any holes */
static int check_swap_activate(struct swap_info_struct *sis,
struct file *swap_file, sector_t *span)
@@ -3063,6 +4030,9 @@
int nr_extents = 0;
int ret;
+ if (PAGE_SIZE == F2FS_BLKSIZE)
+ return check_swap_activate_fast(sis, swap_file, span);
+
blkbits = inode->i_blkbits;
blocks_per_page = PAGE_SIZE >> blkbits;
@@ -3077,12 +4047,16 @@
page_no < sis->max) {
unsigned block_in_page;
sector_t first_block;
+ sector_t block = 0;
+ int err = 0;
cond_resched();
- first_block = bmap(inode, probe_block);
- if (first_block == 0)
+ block = probe_block;
+ err = bmap(inode, &block);
+ if (err || !block)
goto bad_bmap;
+ first_block = block;
/*
* It must be PAGE_SIZE aligned on-disk
@@ -3094,11 +4068,13 @@
for (block_in_page = 1; block_in_page < blocks_per_page;
block_in_page++) {
- sector_t block;
- block = bmap(inode, probe_block + block_in_page);
- if (block == 0)
+ block = probe_block + block_in_page;
+ err = bmap(inode, &block);
+
+ if (err || !block)
goto bad_bmap;
+
if (block != first_block + block_in_page) {
/* Discontiguity */
probe_block++;
@@ -3152,10 +4128,19 @@
if (f2fs_readonly(F2FS_I_SB(inode)->sb))
return -EROFS;
+ if (f2fs_lfs_mode(F2FS_I_SB(inode))) {
+ f2fs_err(F2FS_I_SB(inode),
+ "Swapfile not supported in LFS mode");
+ return -EINVAL;
+ }
+
ret = f2fs_convert_inline_inode(inode);
if (ret)
return ret;
+ if (!f2fs_disable_compressed_file(inode))
+ return -EINVAL;
+
ret = check_swap_activate(sis, file, span);
if (ret < 0)
return ret;
@@ -3186,7 +4171,7 @@
const struct address_space_operations f2fs_dblock_aops = {
.readpage = f2fs_read_data_page,
- .readpages = f2fs_read_data_pages,
+ .readahead = f2fs_readahead,
.writepage = f2fs_write_data_page,
.writepages = f2fs_write_data_pages,
.write_begin = f2fs_write_begin,
@@ -3234,8 +4219,43 @@
return -ENOMEM;
}
-void __exit f2fs_destroy_post_read_processing(void)
+void f2fs_destroy_post_read_processing(void)
{
mempool_destroy(bio_post_read_ctx_pool);
kmem_cache_destroy(bio_post_read_ctx_cache);
}
+
+int f2fs_init_post_read_wq(struct f2fs_sb_info *sbi)
+{
+ if (!f2fs_sb_has_encrypt(sbi) &&
+ !f2fs_sb_has_verity(sbi) &&
+ !f2fs_sb_has_compression(sbi))
+ return 0;
+
+ sbi->post_read_wq = alloc_workqueue("f2fs_post_read_wq",
+ WQ_UNBOUND | WQ_HIGHPRI,
+ num_online_cpus());
+ if (!sbi->post_read_wq)
+ return -ENOMEM;
+ return 0;
+}
+
+void f2fs_destroy_post_read_wq(struct f2fs_sb_info *sbi)
+{
+ if (sbi->post_read_wq)
+ destroy_workqueue(sbi->post_read_wq);
+}
+
+int __init f2fs_init_bio_entry_cache(void)
+{
+ bio_entry_slab = f2fs_kmem_cache_create("f2fs_bio_entry_slab",
+ sizeof(struct bio_entry));
+ if (!bio_entry_slab)
+ return -ENOMEM;
+ return 0;
+}
+
+void f2fs_destroy_bio_entry_cache(void)
+{
+ kmem_cache_destroy(bio_entry_slab);
+}
diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
index d8d6444..197c914 100644
--- a/fs/f2fs/debug.c
+++ b/fs/f2fs/debug.c
@@ -21,9 +21,45 @@
#include "gc.h"
static LIST_HEAD(f2fs_stat_list);
-static struct dentry *f2fs_debugfs_root;
static DEFINE_MUTEX(f2fs_stat_mutex);
+#ifdef CONFIG_DEBUG_FS
+static struct dentry *f2fs_debugfs_root;
+#endif
+/*
+ * This function calculates BDF of every segments
+ */
+void f2fs_update_sit_info(struct f2fs_sb_info *sbi)
+{
+ struct f2fs_stat_info *si = F2FS_STAT(sbi);
+ unsigned long long blks_per_sec, hblks_per_sec, total_vblocks;
+ unsigned long long bimodal, dist;
+ unsigned int segno, vblocks;
+ int ndirty = 0;
+
+ bimodal = 0;
+ total_vblocks = 0;
+ blks_per_sec = BLKS_PER_SEC(sbi);
+ hblks_per_sec = blks_per_sec / 2;
+ for (segno = 0; segno < MAIN_SEGS(sbi); segno += sbi->segs_per_sec) {
+ vblocks = get_valid_blocks(sbi, segno, true);
+ dist = abs(vblocks - hblks_per_sec);
+ bimodal += dist * dist;
+
+ if (vblocks > 0 && vblocks < blks_per_sec) {
+ total_vblocks += vblocks;
+ ndirty++;
+ }
+ }
+ dist = div_u64(MAIN_SECS(sbi) * hblks_per_sec * hblks_per_sec, 100);
+ si->bimodal = div64_u64(bimodal, dist);
+ if (si->dirty_count)
+ si->avg_vblocks = div_u64(total_vblocks, ndirty);
+ else
+ si->avg_vblocks = 0;
+}
+
+#ifdef CONFIG_DEBUG_FS
static void update_general_status(struct f2fs_sb_info *sbi)
{
struct f2fs_stat_info *si = F2FS_STAT(sbi);
@@ -56,7 +92,7 @@
si->nquota_files = sbi->nquota_files;
si->ndirty_all = sbi->ndirty_inode[DIRTY_META];
si->inmem_pages = get_pages(sbi, F2FS_INMEM_PAGES);
- si->aw_cnt = atomic_read(&sbi->aw_cnt);
+ si->aw_cnt = sbi->atomic_files;
si->vw_cnt = atomic_read(&sbi->vw_cnt);
si->max_aw_cnt = atomic_read(&sbi->max_aw_cnt);
si->max_vw_cnt = atomic_read(&sbi->max_vw_cnt);
@@ -94,6 +130,8 @@
si->inline_xattr = atomic_read(&sbi->inline_xattr);
si->inline_inode = atomic_read(&sbi->inline_inode);
si->inline_dir = atomic_read(&sbi->inline_dir);
+ si->compr_inode = atomic_read(&sbi->compr_inode);
+ si->compr_blocks = atomic64_read(&sbi->compr_blocks);
si->append = sbi->im[APPEND_INO].ino_num;
si->update = sbi->im[UPDATE_INO].ino_num;
si->orphans = sbi->im[ORPHAN_INO].ino_num;
@@ -114,7 +152,6 @@
si->free_nids = NM_I(sbi)->nid_cnt[FREE_NID];
si->avail_nids = NM_I(sbi)->available_nids;
si->alloc_nids = NM_I(sbi)->nid_cnt[PREALLOC_NID];
- si->bg_gc = sbi->bg_gc;
si->io_skip_bggc = sbi->io_skip_bggc;
si->other_skip_bggc = sbi->other_skip_bggc;
si->skipped_atomic_files[BG_GC] = sbi->skipped_atomic_files[BG_GC];
@@ -127,7 +164,7 @@
* 100 / (int)(sbi->user_block_count >> sbi->log_blocks_per_seg)
/ 2;
si->util_invalid = 50 - si->util_free - si->util_valid;
- for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_NODE; i++) {
+ for (i = CURSEG_HOT_DATA; i < NO_CHECK_TYPE; i++) {
struct curseg_info *curseg = CURSEG_I(sbi, i);
si->curseg[i] = curseg->segno;
si->cursec[i] = GET_SEC_FROM_SEG(sbi, curseg->segno);
@@ -137,6 +174,26 @@
for (i = META_CP; i < META_MAX; i++)
si->meta_count[i] = atomic_read(&sbi->meta_count[i]);
+ for (i = 0; i < NO_CHECK_TYPE; i++) {
+ si->dirty_seg[i] = 0;
+ si->full_seg[i] = 0;
+ si->valid_blks[i] = 0;
+ }
+
+ for (i = 0; i < MAIN_SEGS(sbi); i++) {
+ int blks = get_seg_entry(sbi, i)->valid_blocks;
+ int type = get_seg_entry(sbi, i)->type;
+
+ if (!blks)
+ continue;
+
+ if (blks == sbi->blocks_per_seg)
+ si->full_seg[type]++;
+ else
+ si->dirty_seg[type]++;
+ si->valid_blks[type] += blks;
+ }
+
for (i = 0; i < 2; i++) {
si->segment_count[i] = sbi->segment_count[i];
si->block_count[i] = sbi->block_count[i];
@@ -146,39 +203,6 @@
}
/*
- * This function calculates BDF of every segments
- */
-static void update_sit_info(struct f2fs_sb_info *sbi)
-{
- struct f2fs_stat_info *si = F2FS_STAT(sbi);
- unsigned long long blks_per_sec, hblks_per_sec, total_vblocks;
- unsigned long long bimodal, dist;
- unsigned int segno, vblocks;
- int ndirty = 0;
-
- bimodal = 0;
- total_vblocks = 0;
- blks_per_sec = BLKS_PER_SEC(sbi);
- hblks_per_sec = blks_per_sec / 2;
- for (segno = 0; segno < MAIN_SEGS(sbi); segno += sbi->segs_per_sec) {
- vblocks = get_valid_blocks(sbi, segno, true);
- dist = abs(vblocks - hblks_per_sec);
- bimodal += dist * dist;
-
- if (vblocks > 0 && vblocks < blks_per_sec) {
- total_vblocks += vblocks;
- ndirty++;
- }
- }
- dist = div_u64(MAIN_SECS(sbi) * hblks_per_sec * hblks_per_sec, 100);
- si->bimodal = div64_u64(bimodal, dist);
- if (si->dirty_count)
- si->avg_vblocks = div_u64(total_vblocks, ndirty);
- else
- si->avg_vblocks = 0;
-}
-
-/*
* This function calculates memory footprint.
*/
static void update_mem_info(struct f2fs_sb_info *sbi)
@@ -298,6 +322,9 @@
si->ssa_area_segs, si->main_area_segs);
seq_printf(s, "(OverProv:%d Resv:%d)]\n\n",
si->overp_segs, si->rsvd_segs);
+ seq_printf(s, "Current Time Sec: %llu / Mounted Time Sec: %llu\n\n",
+ ktime_get_boottime_seconds(),
+ SIT_I(si->sbi)->mounted_time);
if (test_opt(si->sbi, DISCARD))
seq_printf(s, "Utilization: %u%% (%u valid blocks, %u discard blocks)\n",
si->utilization, si->valid_count, si->discard_blks);
@@ -316,35 +343,65 @@
si->inline_inode);
seq_printf(s, " - Inline_dentry Inode: %u\n",
si->inline_dir);
+ seq_printf(s, " - Compressed Inode: %u, Blocks: %llu\n",
+ si->compr_inode, si->compr_blocks);
seq_printf(s, " - Orphan/Append/Update Inode: %u, %u, %u\n",
si->orphans, si->append, si->update);
seq_printf(s, "\nMain area: %d segs, %d secs %d zones\n",
si->main_area_segs, si->main_area_sections,
si->main_area_zones);
- seq_printf(s, " - COLD data: %d, %d, %d\n",
+ seq_printf(s, " TYPE %8s %8s %8s %10s %10s %10s\n",
+ "segno", "secno", "zoneno", "dirty_seg", "full_seg", "valid_blk");
+ seq_printf(s, " - COLD data: %8d %8d %8d %10u %10u %10u\n",
si->curseg[CURSEG_COLD_DATA],
si->cursec[CURSEG_COLD_DATA],
- si->curzone[CURSEG_COLD_DATA]);
- seq_printf(s, " - WARM data: %d, %d, %d\n",
+ si->curzone[CURSEG_COLD_DATA],
+ si->dirty_seg[CURSEG_COLD_DATA],
+ si->full_seg[CURSEG_COLD_DATA],
+ si->valid_blks[CURSEG_COLD_DATA]);
+ seq_printf(s, " - WARM data: %8d %8d %8d %10u %10u %10u\n",
si->curseg[CURSEG_WARM_DATA],
si->cursec[CURSEG_WARM_DATA],
- si->curzone[CURSEG_WARM_DATA]);
- seq_printf(s, " - HOT data: %d, %d, %d\n",
+ si->curzone[CURSEG_WARM_DATA],
+ si->dirty_seg[CURSEG_WARM_DATA],
+ si->full_seg[CURSEG_WARM_DATA],
+ si->valid_blks[CURSEG_WARM_DATA]);
+ seq_printf(s, " - HOT data: %8d %8d %8d %10u %10u %10u\n",
si->curseg[CURSEG_HOT_DATA],
si->cursec[CURSEG_HOT_DATA],
- si->curzone[CURSEG_HOT_DATA]);
- seq_printf(s, " - Dir dnode: %d, %d, %d\n",
+ si->curzone[CURSEG_HOT_DATA],
+ si->dirty_seg[CURSEG_HOT_DATA],
+ si->full_seg[CURSEG_HOT_DATA],
+ si->valid_blks[CURSEG_HOT_DATA]);
+ seq_printf(s, " - Dir dnode: %8d %8d %8d %10u %10u %10u\n",
si->curseg[CURSEG_HOT_NODE],
si->cursec[CURSEG_HOT_NODE],
- si->curzone[CURSEG_HOT_NODE]);
- seq_printf(s, " - File dnode: %d, %d, %d\n",
+ si->curzone[CURSEG_HOT_NODE],
+ si->dirty_seg[CURSEG_HOT_NODE],
+ si->full_seg[CURSEG_HOT_NODE],
+ si->valid_blks[CURSEG_HOT_NODE]);
+ seq_printf(s, " - File dnode: %8d %8d %8d %10u %10u %10u\n",
si->curseg[CURSEG_WARM_NODE],
si->cursec[CURSEG_WARM_NODE],
- si->curzone[CURSEG_WARM_NODE]);
- seq_printf(s, " - Indir nodes: %d, %d, %d\n",
+ si->curzone[CURSEG_WARM_NODE],
+ si->dirty_seg[CURSEG_WARM_NODE],
+ si->full_seg[CURSEG_WARM_NODE],
+ si->valid_blks[CURSEG_WARM_NODE]);
+ seq_printf(s, " - Indir nodes: %8d %8d %8d %10u %10u %10u\n",
si->curseg[CURSEG_COLD_NODE],
si->cursec[CURSEG_COLD_NODE],
- si->curzone[CURSEG_COLD_NODE]);
+ si->curzone[CURSEG_COLD_NODE],
+ si->dirty_seg[CURSEG_COLD_NODE],
+ si->full_seg[CURSEG_COLD_NODE],
+ si->valid_blks[CURSEG_COLD_NODE]);
+ seq_printf(s, " - Pinned file: %8d %8d %8d\n",
+ si->curseg[CURSEG_COLD_DATA_PINNED],
+ si->cursec[CURSEG_COLD_DATA_PINNED],
+ si->curzone[CURSEG_COLD_DATA_PINNED]);
+ seq_printf(s, " - ATGC data: %8d %8d %8d\n",
+ si->curseg[CURSEG_ALL_DATA_ATGC],
+ si->cursec[CURSEG_ALL_DATA_ATGC],
+ si->curzone[CURSEG_ALL_DATA_ATGC]);
seq_printf(s, "\n - Valid: %d\n - Dirty: %d\n",
si->main_area_segs - si->dirty_count -
si->prefree_count - si->free_segs,
@@ -442,7 +499,7 @@
si->block_count[LFS], si->segment_count[LFS]);
/* segment usage info */
- update_sit_info(si->sbi);
+ f2fs_update_sit_info(si->sbi);
seq_printf(s, "\nBDF: %u, avg. vblocks: %u\n",
si->bimodal, si->avg_vblocks);
@@ -462,6 +519,7 @@
}
DEFINE_SHOW_ATTRIBUTE(stat);
+#endif
int f2fs_build_stats(struct f2fs_sb_info *sbi)
{
@@ -492,11 +550,12 @@
atomic_set(&sbi->inline_xattr, 0);
atomic_set(&sbi->inline_inode, 0);
atomic_set(&sbi->inline_dir, 0);
+ atomic_set(&sbi->compr_inode, 0);
+ atomic64_set(&sbi->compr_blocks, 0);
atomic_set(&sbi->inplace_count, 0);
for (i = META_CP; i < META_MAX; i++)
atomic_set(&sbi->meta_count[i], 0);
- atomic_set(&sbi->aw_cnt, 0);
atomic_set(&sbi->vw_cnt, 0);
atomic_set(&sbi->max_aw_cnt, 0);
atomic_set(&sbi->max_vw_cnt, 0);
@@ -516,19 +575,23 @@
list_del(&si->stat_list);
mutex_unlock(&f2fs_stat_mutex);
- kvfree(si);
+ kfree(si);
}
void __init f2fs_create_root_stats(void)
{
+#ifdef CONFIG_DEBUG_FS
f2fs_debugfs_root = debugfs_create_dir("f2fs", NULL);
debugfs_create_file("status", S_IRUGO, f2fs_debugfs_root, NULL,
&stat_fops);
+#endif
}
void f2fs_destroy_root_stats(void)
{
+#ifdef CONFIG_DEBUG_FS
debugfs_remove_recursive(f2fs_debugfs_root);
f2fs_debugfs_root = NULL;
+#endif
}
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index 99c4a86..6694298 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -70,6 +70,112 @@
return DT_UNKNOWN;
}
+/* If @dir is casefolded, initialize @fname->cf_name from @fname->usr_fname. */
+int f2fs_init_casefolded_name(const struct inode *dir,
+ struct f2fs_filename *fname)
+{
+#ifdef CONFIG_UNICODE
+ struct super_block *sb = dir->i_sb;
+ struct f2fs_sb_info *sbi = F2FS_SB(sb);
+
+ if (IS_CASEFOLDED(dir)) {
+ fname->cf_name.name = f2fs_kmalloc(sbi, F2FS_NAME_LEN,
+ GFP_NOFS);
+ if (!fname->cf_name.name)
+ return -ENOMEM;
+ fname->cf_name.len = utf8_casefold(sb->s_encoding,
+ fname->usr_fname,
+ fname->cf_name.name,
+ F2FS_NAME_LEN);
+ if ((int)fname->cf_name.len <= 0) {
+ kfree(fname->cf_name.name);
+ fname->cf_name.name = NULL;
+ if (sb_has_strict_encoding(sb))
+ return -EINVAL;
+ /* fall back to treating name as opaque byte sequence */
+ }
+ }
+#endif
+ return 0;
+}
+
+static int __f2fs_setup_filename(const struct inode *dir,
+ const struct fscrypt_name *crypt_name,
+ struct f2fs_filename *fname)
+{
+ int err;
+
+ memset(fname, 0, sizeof(*fname));
+
+ fname->usr_fname = crypt_name->usr_fname;
+ fname->disk_name = crypt_name->disk_name;
+#ifdef CONFIG_FS_ENCRYPTION
+ fname->crypto_buf = crypt_name->crypto_buf;
+#endif
+ if (crypt_name->is_nokey_name) {
+ /* hash was decoded from the no-key name */
+ fname->hash = cpu_to_le32(crypt_name->hash);
+ } else {
+ err = f2fs_init_casefolded_name(dir, fname);
+ if (err) {
+ f2fs_free_filename(fname);
+ return err;
+ }
+ f2fs_hash_filename(dir, fname);
+ }
+ return 0;
+}
+
+/*
+ * Prepare to search for @iname in @dir. This is similar to
+ * fscrypt_setup_filename(), but this also handles computing the casefolded name
+ * and the f2fs dirhash if needed, then packing all the information about this
+ * filename up into a 'struct f2fs_filename'.
+ */
+int f2fs_setup_filename(struct inode *dir, const struct qstr *iname,
+ int lookup, struct f2fs_filename *fname)
+{
+ struct fscrypt_name crypt_name;
+ int err;
+
+ err = fscrypt_setup_filename(dir, iname, lookup, &crypt_name);
+ if (err)
+ return err;
+
+ return __f2fs_setup_filename(dir, &crypt_name, fname);
+}
+
+/*
+ * Prepare to look up @dentry in @dir. This is similar to
+ * fscrypt_prepare_lookup(), but this also handles computing the casefolded name
+ * and the f2fs dirhash if needed, then packing all the information about this
+ * filename up into a 'struct f2fs_filename'.
+ */
+int f2fs_prepare_lookup(struct inode *dir, struct dentry *dentry,
+ struct f2fs_filename *fname)
+{
+ struct fscrypt_name crypt_name;
+ int err;
+
+ err = fscrypt_prepare_lookup(dir, dentry, &crypt_name);
+ if (err)
+ return err;
+
+ return __f2fs_setup_filename(dir, &crypt_name, fname);
+}
+
+void f2fs_free_filename(struct f2fs_filename *fname)
+{
+#ifdef CONFIG_FS_ENCRYPTION
+ kfree(fname->crypto_buf.name);
+ fname->crypto_buf.name = NULL;
+#endif
+#ifdef CONFIG_UNICODE
+ kfree(fname->cf_name.name);
+ fname->cf_name.name = NULL;
+#endif
+}
+
static unsigned long dir_block_index(unsigned int level,
int dir_level, unsigned int idx)
{
@@ -84,23 +190,16 @@
static struct f2fs_dir_entry *find_in_block(struct inode *dir,
struct page *dentry_page,
- struct fscrypt_name *fname,
- f2fs_hash_t namehash,
- int *max_slots,
- struct page **res_page)
+ const struct f2fs_filename *fname,
+ int *max_slots)
{
struct f2fs_dentry_block *dentry_blk;
- struct f2fs_dir_entry *de;
struct f2fs_dentry_ptr d;
dentry_blk = (struct f2fs_dentry_block *)page_address(dentry_page);
make_dentry_ptr_block(dir, &d, dentry_blk);
- de = f2fs_find_target_dentry(fname, namehash, max_slots, &d);
- if (de)
- *res_page = dentry_page;
-
- return de;
+ return f2fs_find_target_dentry(&d, fname, max_slots);
}
#ifdef CONFIG_UNICODE
@@ -109,102 +208,55 @@
* being searched for.
*/
static bool f2fs_match_ci_name(const struct inode *dir, const struct qstr *name,
- const struct qstr *entry, bool quick)
+ const u8 *de_name, u32 de_name_len)
{
- const struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
- const struct unicode_map *um = sbi->s_encoding;
+ const struct super_block *sb = dir->i_sb;
+ const struct unicode_map *um = sb->s_encoding;
+ struct qstr entry = QSTR_INIT(de_name, de_name_len);
int res;
- if (quick)
- res = utf8_strncasecmp_folded(um, name, entry);
- else
- res = utf8_strncasecmp(um, name, entry);
+ res = utf8_strncasecmp_folded(um, name, &entry);
if (res < 0) {
/*
* In strict mode, ignore invalid names. In non-strict mode,
* fall back to treating them as opaque byte sequences.
*/
- if (f2fs_has_strict_mode(sbi) || name->len != entry->len)
+ if (sb_has_strict_encoding(sb) || name->len != entry.len)
return false;
- return !memcmp(name->name, entry->name, name->len);
+ return !memcmp(name->name, entry.name, name->len);
}
return res == 0;
}
+#endif /* CONFIG_UNICODE */
-static void f2fs_fname_setup_ci_filename(struct inode *dir,
- const struct qstr *iname,
- struct fscrypt_str *cf_name)
+static inline bool f2fs_match_name(const struct inode *dir,
+ const struct f2fs_filename *fname,
+ const u8 *de_name, u32 de_name_len)
{
- struct f2fs_sb_info *sbi = F2FS_I_SB(dir);
-
- if (!IS_CASEFOLDED(dir)) {
- cf_name->name = NULL;
- return;
- }
-
- cf_name->name = f2fs_kmalloc(sbi, F2FS_NAME_LEN, GFP_NOFS);
- if (!cf_name->name)
- return;
-
- cf_name->len = utf8_casefold(sbi->s_encoding,
- iname, cf_name->name,
- F2FS_NAME_LEN);
- if ((int)cf_name->len <= 0) {
- kvfree(cf_name->name);
- cf_name->name = NULL;
- }
-}
-#endif
-
-static inline bool f2fs_match_name(struct f2fs_dentry_ptr *d,
- struct f2fs_dir_entry *de,
- struct fscrypt_name *fname,
- struct fscrypt_str *cf_str,
- unsigned long bit_pos,
- f2fs_hash_t namehash)
-{
-#ifdef CONFIG_UNICODE
- struct inode *parent = d->inode;
- struct f2fs_sb_info *sbi = F2FS_I_SB(parent);
- struct qstr entry;
-#endif
-
- if (de->hash_code != namehash)
- return false;
+ struct fscrypt_name f;
#ifdef CONFIG_UNICODE
- entry.name = d->filename[bit_pos];
- entry.len = de->name_len;
+ if (fname->cf_name.name) {
+ struct qstr cf = FSTR_TO_QSTR(&fname->cf_name);
- if (sbi->s_encoding && IS_CASEFOLDED(parent)) {
- if (cf_str->name) {
- struct qstr cf = {.name = cf_str->name,
- .len = cf_str->len};
- return f2fs_match_ci_name(parent, &cf, &entry, true);
- }
- return f2fs_match_ci_name(parent, fname->usr_fname, &entry,
- false);
+ return f2fs_match_ci_name(dir, &cf, de_name, de_name_len);
}
#endif
- if (fscrypt_match_name(fname, d->filename[bit_pos],
- le16_to_cpu(de->name_len)))
- return true;
- return false;
+ f.usr_fname = fname->usr_fname;
+ f.disk_name = fname->disk_name;
+#ifdef CONFIG_FS_ENCRYPTION
+ f.crypto_buf = fname->crypto_buf;
+#endif
+ return fscrypt_match_name(&f, de_name, de_name_len);
}
-struct f2fs_dir_entry *f2fs_find_target_dentry(struct fscrypt_name *fname,
- f2fs_hash_t namehash, int *max_slots,
- struct f2fs_dentry_ptr *d)
+struct f2fs_dir_entry *f2fs_find_target_dentry(const struct f2fs_dentry_ptr *d,
+ const struct f2fs_filename *fname, int *max_slots)
{
struct f2fs_dir_entry *de;
- struct fscrypt_str cf_str = { .name = NULL, .len = 0 };
unsigned long bit_pos = 0;
int max_len = 0;
-#ifdef CONFIG_UNICODE
- f2fs_fname_setup_ci_filename(d->inode, fname->usr_fname, &cf_str);
-#endif
-
if (max_slots)
*max_slots = 0;
while (bit_pos < d->max) {
@@ -221,7 +273,9 @@
continue;
}
- if (f2fs_match_name(d, de, fname, &cf_str, bit_pos, namehash))
+ if (de->hash_code == fname->hash &&
+ f2fs_match_name(d->inode, fname, d->filename[bit_pos],
+ le16_to_cpu(de->name_len)))
goto found;
if (max_slots && max_len > *max_slots)
@@ -235,33 +289,27 @@
found:
if (max_slots && max_len > *max_slots)
*max_slots = max_len;
-
-#ifdef CONFIG_UNICODE
- kvfree(cf_str.name);
-#endif
return de;
}
static struct f2fs_dir_entry *find_in_level(struct inode *dir,
unsigned int level,
- struct fscrypt_name *fname,
+ const struct f2fs_filename *fname,
struct page **res_page)
{
- struct qstr name = FSTR_TO_QSTR(&fname->disk_name);
- int s = GET_DENTRY_SLOTS(name.len);
+ int s = GET_DENTRY_SLOTS(fname->disk_name.len);
unsigned int nbucket, nblock;
unsigned int bidx, end_block;
struct page *dentry_page;
struct f2fs_dir_entry *de = NULL;
bool room = false;
int max_slots;
- f2fs_hash_t namehash = f2fs_dentry_hash(dir, &name, fname);
nbucket = dir_buckets(level, F2FS_I(dir)->i_dir_level);
nblock = bucket_blocks(level);
bidx = dir_block_index(level, F2FS_I(dir)->i_dir_level,
- le32_to_cpu(namehash) % nbucket);
+ le32_to_cpu(fname->hash) % nbucket);
end_block = bidx + nblock;
for (; bidx < end_block; bidx++) {
@@ -277,18 +325,19 @@
}
}
- de = find_in_block(dir, dentry_page, fname, namehash,
- &max_slots, res_page);
- if (de)
+ de = find_in_block(dir, dentry_page, fname, &max_slots);
+ if (de) {
+ *res_page = dentry_page;
break;
+ }
if (max_slots >= s)
room = true;
f2fs_put_page(dentry_page, 0);
}
- if (!de && room && F2FS_I(dir)->chash != namehash) {
- F2FS_I(dir)->chash = namehash;
+ if (!de && room && F2FS_I(dir)->chash != fname->hash) {
+ F2FS_I(dir)->chash = fname->hash;
F2FS_I(dir)->clevel = level;
}
@@ -296,7 +345,8 @@
}
struct f2fs_dir_entry *__f2fs_find_entry(struct inode *dir,
- struct fscrypt_name *fname, struct page **res_page)
+ const struct f2fs_filename *fname,
+ struct page **res_page)
{
unsigned long npages = dir_blocks(dir);
struct f2fs_dir_entry *de = NULL;
@@ -343,18 +393,10 @@
const struct qstr *child, struct page **res_page)
{
struct f2fs_dir_entry *de = NULL;
- struct fscrypt_name fname;
+ struct f2fs_filename fname;
int err;
-#ifdef CONFIG_UNICODE
- if (f2fs_has_strict_mode(F2FS_I_SB(dir)) && IS_CASEFOLDED(dir) &&
- utf8_validate(F2FS_I_SB(dir)->s_encoding, child)) {
- *res_page = ERR_PTR(-EINVAL);
- return NULL;
- }
-#endif
-
- err = fscrypt_setup_filename(dir, child, 1, &fname);
+ err = f2fs_setup_filename(dir, child, 1, &fname);
if (err) {
if (err == -ENOENT)
*res_page = NULL;
@@ -365,7 +407,7 @@
de = __f2fs_find_entry(dir, &fname, res_page);
- fscrypt_free_filename(&fname);
+ f2fs_free_filename(&fname);
return de;
}
@@ -406,7 +448,8 @@
f2fs_put_page(page, 1);
}
-static void init_dent_inode(const struct qstr *name, struct page *ipage)
+static void init_dent_inode(const struct f2fs_filename *fname,
+ struct page *ipage)
{
struct f2fs_inode *ri;
@@ -414,16 +457,16 @@
/* copy name info. to this inode page */
ri = F2FS_INODE(ipage);
- ri->i_namelen = cpu_to_le32(name->len);
- memcpy(ri->i_name, name->name, name->len);
+ ri->i_namelen = cpu_to_le32(fname->disk_name.len);
+ memcpy(ri->i_name, fname->disk_name.name, fname->disk_name.len);
set_page_dirty(ipage);
}
void f2fs_do_make_empty_dir(struct inode *inode, struct inode *parent,
struct f2fs_dentry_ptr *d)
{
- struct qstr dot = QSTR_INIT(".", 1);
- struct qstr dotdot = QSTR_INIT("..", 2);
+ struct fscrypt_str dot = FSTR_INIT(".", 1);
+ struct fscrypt_str dotdot = FSTR_INIT("..", 2);
/* update dirent of "." */
f2fs_update_dentry(inode->i_ino, inode->i_mode, d, &dot, 0, 0);
@@ -457,11 +500,9 @@
}
struct page *f2fs_init_inode_metadata(struct inode *inode, struct inode *dir,
- const struct qstr *new_name, const struct qstr *orig_name,
- struct page *dpage)
+ const struct f2fs_filename *fname, struct page *dpage)
{
struct page *page;
- int dummy_encrypt = DUMMY_ENCRYPTION_ENABLED(F2FS_I_SB(dir));
int err;
if (is_inode_flag_set(inode, FI_NEW_INODE)) {
@@ -484,13 +525,13 @@
if (err)
goto put_error;
- err = f2fs_init_security(inode, dir, orig_name, page);
+ err = f2fs_init_security(inode, dir,
+ fname ? fname->usr_fname : NULL, page);
if (err)
goto put_error;
- if ((IS_ENCRYPTED(dir) || dummy_encrypt) &&
- f2fs_may_encrypt(inode)) {
- err = fscrypt_inherit_context(dir, inode, page, false);
+ if (IS_ENCRYPTED(inode)) {
+ err = fscrypt_set_context(inode, page);
if (err)
goto put_error;
}
@@ -500,8 +541,8 @@
return page;
}
- if (new_name) {
- init_dent_inode(new_name, page);
+ if (fname) {
+ init_dent_inode(fname, page);
if (IS_ENCRYPTED(dir))
file_set_enc_name(inode);
}
@@ -568,9 +609,23 @@
goto next;
}
+bool f2fs_has_enough_room(struct inode *dir, struct page *ipage,
+ const struct f2fs_filename *fname)
+{
+ struct f2fs_dentry_ptr d;
+ unsigned int bit_pos;
+ int slots = GET_DENTRY_SLOTS(fname->disk_name.len);
+
+ make_dentry_ptr_inline(dir, &d, inline_data_addr(dir, ipage));
+
+ bit_pos = f2fs_room_for_filename(d.bitmap, slots, d.max);
+
+ return bit_pos < d.max;
+}
+
void f2fs_update_dentry(nid_t ino, umode_t mode, struct f2fs_dentry_ptr *d,
- const struct qstr *name, f2fs_hash_t name_hash,
- unsigned int bit_pos)
+ const struct fscrypt_str *name, f2fs_hash_t name_hash,
+ unsigned int bit_pos)
{
struct f2fs_dir_entry *de;
int slots = GET_DENTRY_SLOTS(name->len);
@@ -590,15 +645,13 @@
}
}
-int f2fs_add_regular_entry(struct inode *dir, const struct qstr *new_name,
- const struct qstr *orig_name,
- struct inode *inode, nid_t ino, umode_t mode)
+int f2fs_add_regular_entry(struct inode *dir, const struct f2fs_filename *fname,
+ struct inode *inode, nid_t ino, umode_t mode)
{
unsigned int bit_pos;
unsigned int level;
unsigned int current_depth;
unsigned long bidx, block;
- f2fs_hash_t dentry_hash;
unsigned int nbucket, nblock;
struct page *dentry_page = NULL;
struct f2fs_dentry_block *dentry_blk = NULL;
@@ -607,11 +660,10 @@
int slots, err = 0;
level = 0;
- slots = GET_DENTRY_SLOTS(new_name->len);
- dentry_hash = f2fs_dentry_hash(dir, new_name, NULL);
+ slots = GET_DENTRY_SLOTS(fname->disk_name.len);
current_depth = F2FS_I(dir)->i_current_depth;
- if (F2FS_I(dir)->chash == dentry_hash) {
+ if (F2FS_I(dir)->chash == fname->hash) {
level = F2FS_I(dir)->clevel;
F2FS_I(dir)->chash = 0;
}
@@ -633,7 +685,7 @@
nblock = bucket_blocks(level);
bidx = dir_block_index(level, F2FS_I(dir)->i_dir_level,
- (le32_to_cpu(dentry_hash) % nbucket));
+ (le32_to_cpu(fname->hash) % nbucket));
for (block = bidx; block <= (bidx + nblock - 1); block++) {
dentry_page = f2fs_get_new_data_page(dir, NULL, block, true);
@@ -657,8 +709,7 @@
if (inode) {
down_write(&F2FS_I(inode)->i_sem);
- page = f2fs_init_inode_metadata(inode, dir, new_name,
- orig_name, NULL);
+ page = f2fs_init_inode_metadata(inode, dir, fname, NULL);
if (IS_ERR(page)) {
err = PTR_ERR(page);
goto fail;
@@ -666,7 +717,8 @@
}
make_dentry_ptr_block(NULL, &d, dentry_blk);
- f2fs_update_dentry(ino, mode, &d, new_name, dentry_hash, bit_pos);
+ f2fs_update_dentry(ino, mode, &d, &fname->disk_name, fname->hash,
+ bit_pos);
set_page_dirty(dentry_page);
@@ -690,21 +742,15 @@
return err;
}
-int f2fs_add_dentry(struct inode *dir, struct fscrypt_name *fname,
- struct inode *inode, nid_t ino, umode_t mode)
+int f2fs_add_dentry(struct inode *dir, const struct f2fs_filename *fname,
+ struct inode *inode, nid_t ino, umode_t mode)
{
- struct qstr new_name;
int err = -EAGAIN;
- new_name.name = fname_name(fname);
- new_name.len = fname_len(fname);
-
if (f2fs_has_inline_dentry(dir))
- err = f2fs_add_inline_entry(dir, &new_name, fname->usr_fname,
- inode, ino, mode);
+ err = f2fs_add_inline_entry(dir, fname, inode, ino, mode);
if (err == -EAGAIN)
- err = f2fs_add_regular_entry(dir, &new_name, fname->usr_fname,
- inode, ino, mode);
+ err = f2fs_add_regular_entry(dir, fname, inode, ino, mode);
f2fs_update_time(F2FS_I_SB(dir), REQ_TIME);
return err;
@@ -717,17 +763,17 @@
int f2fs_do_add_link(struct inode *dir, const struct qstr *name,
struct inode *inode, nid_t ino, umode_t mode)
{
- struct fscrypt_name fname;
+ struct f2fs_filename fname;
struct page *page = NULL;
struct f2fs_dir_entry *de = NULL;
int err;
- err = fscrypt_setup_filename(dir, name, 0, &fname);
+ err = f2fs_setup_filename(dir, name, 0, &fname);
if (err)
return err;
/*
- * An immature stakable filesystem shows a race condition between lookup
+ * An immature stackable filesystem shows a race condition between lookup
* and create. If we have same task when doing lookup and create, it's
* definitely fine as expected by VFS normally. Otherwise, let's just
* verify on-disk dentry one more time, which guarantees filesystem
@@ -745,7 +791,7 @@
} else {
err = f2fs_add_dentry(dir, &fname, inode, ino, mode);
}
- fscrypt_free_filename(&fname);
+ f2fs_free_filename(&fname);
return err;
}
@@ -755,7 +801,7 @@
int err = 0;
down_write(&F2FS_I(inode)->i_sem);
- page = f2fs_init_inode_metadata(inode, dir, NULL, NULL, NULL);
+ page = f2fs_init_inode_metadata(inode, dir, NULL, NULL);
if (IS_ERR(page)) {
err = PTR_ERR(page);
goto fail;
@@ -826,12 +872,6 @@
0);
set_page_dirty(page);
- dir->i_ctime = dir->i_mtime = current_time(dir);
- f2fs_mark_inode_dirty_sync(dir, false);
-
- if (inode)
- f2fs_drop_nlink(dir, inode);
-
if (bit_pos == NR_DENTRY_IN_BLOCK &&
!f2fs_truncate_hole(dir, page->index, page->index + 1)) {
f2fs_clear_page_cache_dirty_tag(page);
@@ -843,6 +883,12 @@
f2fs_remove_dirty_inode(dir);
}
f2fs_put_page(page, 1);
+
+ dir->i_ctime = dir->i_mtime = current_time(dir);
+ f2fs_mark_inode_dirty_sync(dir, false);
+
+ if (inode)
+ f2fs_drop_nlink(dir, inode);
}
bool f2fs_empty_dir(struct inode *dir)
@@ -981,10 +1027,10 @@
if (IS_ENCRYPTED(inode)) {
err = fscrypt_get_encryption_info(inode);
- if (err && err != -ENOKEY)
+ if (err)
goto out;
- err = fscrypt_fname_alloc_buffer(inode, F2FS_NAME_LEN, &fstr);
+ err = fscrypt_fname_alloc_buffer(F2FS_NAME_LEN, &fstr);
if (err < 0)
goto out;
}
@@ -1059,75 +1105,8 @@
};
#ifdef CONFIG_UNICODE
-static int f2fs_d_compare(const struct dentry *dentry, unsigned int len,
- const char *str, const struct qstr *name)
-{
- const struct dentry *parent = READ_ONCE(dentry->d_parent);
- const struct inode *dir = READ_ONCE(parent->d_inode);
- const struct f2fs_sb_info *sbi = F2FS_SB(dentry->d_sb);
- struct qstr entry = QSTR_INIT(str, len);
- char strbuf[DNAME_INLINE_LEN];
- int res;
-
- if (!dir || !IS_CASEFOLDED(dir))
- goto fallback;
-
- /*
- * If the dentry name is stored in-line, then it may be concurrently
- * modified by a rename. If this happens, the VFS will eventually retry
- * the lookup, so it doesn't matter what ->d_compare() returns.
- * However, it's unsafe to call utf8_strncasecmp() with an unstable
- * string. Therefore, we have to copy the name into a temporary buffer.
- */
- if (len <= DNAME_INLINE_LEN - 1) {
- memcpy(strbuf, str, len);
- strbuf[len] = 0;
- entry.name = strbuf;
- /* prevent compiler from optimizing out the temporary buffer */
- barrier();
- }
-
- res = utf8_strncasecmp(sbi->s_encoding, name, &entry);
- if (res >= 0)
- return res;
-
- if (f2fs_has_strict_mode(sbi))
- return -EINVAL;
-fallback:
- if (len != name->len)
- return 1;
- return !!memcmp(str, name->name, len);
-}
-
-static int f2fs_d_hash(const struct dentry *dentry, struct qstr *str)
-{
- struct f2fs_sb_info *sbi = F2FS_SB(dentry->d_sb);
- const struct unicode_map *um = sbi->s_encoding;
- const struct inode *inode = READ_ONCE(dentry->d_inode);
- unsigned char *norm;
- int len, ret = 0;
-
- if (!inode || !IS_CASEFOLDED(inode))
- return 0;
-
- norm = f2fs_kmalloc(sbi, PATH_MAX, GFP_ATOMIC);
- if (!norm)
- return -ENOMEM;
-
- len = utf8_casefold(um, str, norm, PATH_MAX);
- if (len < 0) {
- if (f2fs_has_strict_mode(sbi))
- ret = -EINVAL;
- goto out;
- }
- str->hash = full_name_hash(dentry, norm, len);
-out:
- kvfree(norm);
- return ret;
-}
-
const struct dentry_operations f2fs_dentry_ops = {
- .d_hash = f2fs_d_hash,
- .d_compare = f2fs_d_compare,
+ .d_hash = generic_ci_d_hash,
+ .d_compare = generic_ci_d_compare,
};
#endif
diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c
index e600784..3ebf976 100644
--- a/fs/f2fs/extent_cache.c
+++ b/fs/f2fs/extent_cache.c
@@ -58,6 +58,29 @@
return re;
}
+struct rb_node **f2fs_lookup_rb_tree_ext(struct f2fs_sb_info *sbi,
+ struct rb_root_cached *root,
+ struct rb_node **parent,
+ unsigned long long key, bool *leftmost)
+{
+ struct rb_node **p = &root->rb_root.rb_node;
+ struct rb_entry *re;
+
+ while (*p) {
+ *parent = *p;
+ re = rb_entry(*parent, struct rb_entry, rb_node);
+
+ if (key < re->key) {
+ p = &(*p)->rb_left;
+ } else {
+ p = &(*p)->rb_right;
+ *leftmost = false;
+ }
+ }
+
+ return p;
+}
+
struct rb_node **f2fs_lookup_rb_tree_for_insert(struct f2fs_sb_info *sbi,
struct rb_root_cached *root,
struct rb_node **parent,
@@ -166,7 +189,7 @@
}
bool f2fs_check_rb_tree_consistence(struct f2fs_sb_info *sbi,
- struct rb_root_cached *root)
+ struct rb_root_cached *root, bool check_key)
{
#ifdef CONFIG_F2FS_CHECK_FS
struct rb_node *cur = rb_first_cached(root), *next;
@@ -183,13 +206,23 @@
cur_re = rb_entry(cur, struct rb_entry, rb_node);
next_re = rb_entry(next, struct rb_entry, rb_node);
+ if (check_key) {
+ if (cur_re->key > next_re->key) {
+ f2fs_info(sbi, "inconsistent rbtree, "
+ "cur(%llu) next(%llu)",
+ cur_re->key, next_re->key);
+ return false;
+ }
+ goto next;
+ }
+
if (cur_re->ofs + cur_re->len > next_re->ofs) {
f2fs_info(sbi, "inconsistent rbtree, cur(%u, %u) next(%u, %u)",
cur_re->ofs, cur_re->len,
next_re->ofs, next_re->len);
return false;
}
-
+next:
cur = next;
}
#endif
@@ -325,9 +358,10 @@
}
/* return true, if inode page is changed */
-static bool __f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_ext)
+static void __f2fs_init_extent_tree(struct inode *inode, struct page *ipage)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+ struct f2fs_extent *i_ext = ipage ? &F2FS_INODE(ipage)->i_ext : NULL;
struct extent_tree *et;
struct extent_node *en;
struct extent_info ei;
@@ -335,16 +369,18 @@
if (!f2fs_may_extent_tree(inode)) {
/* drop largest extent */
if (i_ext && i_ext->len) {
+ f2fs_wait_on_page_writeback(ipage, NODE, true, true);
i_ext->len = 0;
- return true;
+ set_page_dirty(ipage);
+ return;
}
- return false;
+ return;
}
et = __grab_extent_tree(inode);
if (!i_ext || !i_ext->len)
- return false;
+ return;
get_extent_info(&ei, i_ext);
@@ -360,17 +396,14 @@
}
out:
write_unlock(&et->lock);
- return false;
}
-bool f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_ext)
+void f2fs_init_extent_tree(struct inode *inode, struct page *ipage)
{
- bool ret = __f2fs_init_extent_tree(inode, i_ext);
+ __f2fs_init_extent_tree(inode, ipage);
if (!F2FS_I(inode)->extent_tree)
set_inode_flag(inode, FI_NO_EXTENT);
-
- return ret;
}
static bool f2fs_lookup_extent_tree(struct inode *inode, pgoff_t pgofs,
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 031a17b..6c4bf22 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* fs/f2fs/f2fs.h
*
@@ -22,6 +22,7 @@
#include <linux/bio.h>
#include <linux/blkdev.h>
#include <linux/quotaops.h>
+#include <linux/part_stat.h>
#include <crypto/hash.h>
#include <linux/fscrypt.h>
@@ -74,7 +75,6 @@
/*
* For mount options
*/
-#define F2FS_MOUNT_BG_GC 0x00000001
#define F2FS_MOUNT_DISABLE_ROLL_FORWARD 0x00000002
#define F2FS_MOUNT_DISCARD 0x00000004
#define F2FS_MOUNT_NOHEAP 0x00000008
@@ -88,11 +88,8 @@
#define F2FS_MOUNT_NOBARRIER 0x00000800
#define F2FS_MOUNT_FASTBOOT 0x00001000
#define F2FS_MOUNT_EXTENT_CACHE 0x00002000
-#define F2FS_MOUNT_FORCE_FG_GC 0x00004000
#define F2FS_MOUNT_DATA_FLUSH 0x00008000
#define F2FS_MOUNT_FAULT_INJECTION 0x00010000
-#define F2FS_MOUNT_ADAPTIVE 0x00020000
-#define F2FS_MOUNT_LFS 0x00040000
#define F2FS_MOUNT_USRQUOTA 0x00080000
#define F2FS_MOUNT_GRPQUOTA 0x00100000
#define F2FS_MOUNT_PRJQUOTA 0x00200000
@@ -101,6 +98,7 @@
#define F2FS_MOUNT_RESERVE_ROOT 0x01000000
#define F2FS_MOUNT_DISABLE_CHECKPOINT 0x02000000
#define F2FS_MOUNT_NORECOVERY 0x04000000
+#define F2FS_MOUNT_ATGC 0x08000000
#define F2FS_OPTION(sbi) ((sbi)->mount_opt)
#define clear_opt(sbi, option) (F2FS_OPTION(sbi).opt &= ~F2FS_MOUNT_##option)
@@ -117,6 +115,8 @@
*/
typedef u32 nid_t;
+#define COMPRESS_EXT_NUM 16
+
struct f2fs_mount_info {
unsigned int opt;
int write_io_size_bits; /* Write IO size bits */
@@ -137,11 +137,19 @@
int whint_mode;
int alloc_mode; /* segment allocation policy */
int fsync_mode; /* fsync policy */
- bool test_dummy_encryption; /* test dummy encryption */
+ int fs_mode; /* fs mode: LFS or ADAPTIVE */
+ int bggc_mode; /* bggc mode: off, on or sync */
+ struct fscrypt_dummy_policy dummy_enc_policy; /* test dummy encryption */
block_t unusable_cap_perc; /* percentage for cap */
block_t unusable_cap; /* Amount of space allowed to be
* unusable when disabling checkpoint
*/
+
+ /* For compression */
+ unsigned char compress_algorithm; /* algorithm type */
+ unsigned compress_log_size; /* cluster log size */
+ unsigned char compress_ext_cnt; /* extension count */
+ unsigned char extensions[COMPRESS_EXT_NUM][F2FS_EXTENSION_LEN]; /* extensions */
};
#define F2FS_FEATURE_ENCRYPT 0x0001
@@ -157,6 +165,7 @@
#define F2FS_FEATURE_VERITY 0x0400
#define F2FS_FEATURE_SB_CHKSUM 0x0800
#define F2FS_FEATURE_CASEFOLD 0x1000
+#define F2FS_FEATURE_COMPRESSION 0x2000
#define __F2FS_HAS_FEATURE(raw_super, mask) \
((raw_super->feature & cpu_to_le32(mask)) != 0)
@@ -187,6 +196,7 @@
#define CP_DISCARD 0x00000010
#define CP_TRIMMED 0x00000020
#define CP_PAUSE 0x00000040
+#define CP_RESIZE 0x00000080
#define MAX_DISCARD_BLOCKS(sbi) BLKS_PER_SEC(sbi)
#define DEF_MAX_DISCARD_REQUEST 8 /* issue 8 discards per round */
@@ -324,8 +334,8 @@
bool io_aware; /* issue discard in idle time */
bool sync; /* submit discard with REQ_SYNC flag */
bool ordered; /* issue discard by lba order */
+ bool timeout; /* discard timeout for put_super */
unsigned int granularity; /* discard granularity */
- int timeout; /* discard timeout for put_super */
};
struct discard_cmd_control {
@@ -392,88 +402,6 @@
return size <= MAX_SIT_JENTRIES(journal);
}
-/*
- * ioctl commands
- */
-#define F2FS_IOC_GETFLAGS FS_IOC_GETFLAGS
-#define F2FS_IOC_SETFLAGS FS_IOC_SETFLAGS
-#define F2FS_IOC_GETVERSION FS_IOC_GETVERSION
-
-#define F2FS_IOCTL_MAGIC 0xf5
-#define F2FS_IOC_START_ATOMIC_WRITE _IO(F2FS_IOCTL_MAGIC, 1)
-#define F2FS_IOC_COMMIT_ATOMIC_WRITE _IO(F2FS_IOCTL_MAGIC, 2)
-#define F2FS_IOC_START_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 3)
-#define F2FS_IOC_RELEASE_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 4)
-#define F2FS_IOC_ABORT_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 5)
-#define F2FS_IOC_GARBAGE_COLLECT _IOW(F2FS_IOCTL_MAGIC, 6, __u32)
-#define F2FS_IOC_WRITE_CHECKPOINT _IO(F2FS_IOCTL_MAGIC, 7)
-#define F2FS_IOC_DEFRAGMENT _IOWR(F2FS_IOCTL_MAGIC, 8, \
- struct f2fs_defragment)
-#define F2FS_IOC_MOVE_RANGE _IOWR(F2FS_IOCTL_MAGIC, 9, \
- struct f2fs_move_range)
-#define F2FS_IOC_FLUSH_DEVICE _IOW(F2FS_IOCTL_MAGIC, 10, \
- struct f2fs_flush_device)
-#define F2FS_IOC_GARBAGE_COLLECT_RANGE _IOW(F2FS_IOCTL_MAGIC, 11, \
- struct f2fs_gc_range)
-#define F2FS_IOC_GET_FEATURES _IOR(F2FS_IOCTL_MAGIC, 12, __u32)
-#define F2FS_IOC_SET_PIN_FILE _IOW(F2FS_IOCTL_MAGIC, 13, __u32)
-#define F2FS_IOC_GET_PIN_FILE _IOR(F2FS_IOCTL_MAGIC, 14, __u32)
-#define F2FS_IOC_PRECACHE_EXTENTS _IO(F2FS_IOCTL_MAGIC, 15)
-#define F2FS_IOC_RESIZE_FS _IOW(F2FS_IOCTL_MAGIC, 16, __u64)
-
-#define F2FS_IOC_GET_VOLUME_NAME FS_IOC_GETFSLABEL
-#define F2FS_IOC_SET_VOLUME_NAME FS_IOC_SETFSLABEL
-
-#define F2FS_IOC_SET_ENCRYPTION_POLICY FS_IOC_SET_ENCRYPTION_POLICY
-#define F2FS_IOC_GET_ENCRYPTION_POLICY FS_IOC_GET_ENCRYPTION_POLICY
-#define F2FS_IOC_GET_ENCRYPTION_PWSALT FS_IOC_GET_ENCRYPTION_PWSALT
-
-/*
- * should be same as XFS_IOC_GOINGDOWN.
- * Flags for going down operation used by FS_IOC_GOINGDOWN
- */
-#define F2FS_IOC_SHUTDOWN _IOR('X', 125, __u32) /* Shutdown */
-#define F2FS_GOING_DOWN_FULLSYNC 0x0 /* going down with full sync */
-#define F2FS_GOING_DOWN_METASYNC 0x1 /* going down with metadata */
-#define F2FS_GOING_DOWN_NOSYNC 0x2 /* going down */
-#define F2FS_GOING_DOWN_METAFLUSH 0x3 /* going down with meta flush */
-#define F2FS_GOING_DOWN_NEED_FSCK 0x4 /* going down to trigger fsck */
-
-#if defined(__KERNEL__) && defined(CONFIG_COMPAT)
-/*
- * ioctl commands in 32 bit emulation
- */
-#define F2FS_IOC32_GETFLAGS FS_IOC32_GETFLAGS
-#define F2FS_IOC32_SETFLAGS FS_IOC32_SETFLAGS
-#define F2FS_IOC32_GETVERSION FS_IOC32_GETVERSION
-#endif
-
-#define F2FS_IOC_FSGETXATTR FS_IOC_FSGETXATTR
-#define F2FS_IOC_FSSETXATTR FS_IOC_FSSETXATTR
-
-struct f2fs_gc_range {
- u32 sync;
- u64 start;
- u64 len;
-};
-
-struct f2fs_defragment {
- u64 start;
- u64 len;
-};
-
-struct f2fs_move_range {
- u32 dst_fd; /* destination fd */
- u64 pos_in; /* start position in src_fd */
- u64 pos_out; /* start position in dst_fd */
- u64 len; /* size to move */
-};
-
-struct f2fs_flush_device {
- u32 dev_num; /* device number to flush */
- u32 segments; /* # of segments to flush */
-};
-
/* for inline stuff */
#define DEF_INLINE_RESERVED_SIZE 1
static inline int get_extra_isize(struct inode *inode);
@@ -498,6 +426,42 @@
* For INODE and NODE manager
*/
/* for directory operations */
+
+struct f2fs_filename {
+ /*
+ * The filename the user specified. This is NULL for some
+ * filesystem-internal operations, e.g. converting an inline directory
+ * to a non-inline one, or roll-forward recovering an encrypted dentry.
+ */
+ const struct qstr *usr_fname;
+
+ /*
+ * The on-disk filename. For encrypted directories, this is encrypted.
+ * This may be NULL for lookups in an encrypted dir without the key.
+ */
+ struct fscrypt_str disk_name;
+
+ /* The dirhash of this filename */
+ f2fs_hash_t hash;
+
+#ifdef CONFIG_FS_ENCRYPTION
+ /*
+ * For lookups in encrypted directories: either the buffer backing
+ * disk_name, or a buffer that holds the decoded no-key name.
+ */
+ struct fscrypt_str crypto_buf;
+#endif
+#ifdef CONFIG_UNICODE
+ /*
+ * For casefolded directories: the casefolded name, but it's left NULL
+ * if the original name is not valid Unicode or if the filesystem is
+ * doing an internal operation where usr_fname is also NULL. In these
+ * cases we fall back to treating the name as an opaque byte sequence.
+ */
+ struct fscrypt_str cf_name;
+#endif
+};
+
struct f2fs_dentry_ptr {
struct inode *inode;
void *bitmap;
@@ -552,6 +516,9 @@
#define DEFAULT_RETRY_IO_COUNT 8 /* maximum retry read IO count */
+/* congestion wait timeout value, default: 20ms */
+#define DEFAULT_IO_TIMEOUT (msecs_to_jiffies(20))
+
/* maximum retry quota flush count */
#define DEFAULT_RETRY_QUOTA_FLUSH_COUNT 8
@@ -567,8 +534,13 @@
struct rb_entry {
struct rb_node rb_node; /* rb node located in rb-tree */
- unsigned int ofs; /* start offset of the entry */
- unsigned int len; /* length of the entry */
+ union {
+ struct {
+ unsigned int ofs; /* start offset of the entry */
+ unsigned int len; /* length of the entry */
+ };
+ unsigned long long key; /* 64-bits key */
+ } __packed;
};
struct extent_info {
@@ -668,6 +640,44 @@
MAX_GC_FAILURE
};
+/* used for f2fs_inode_info->flags */
+enum {
+ FI_NEW_INODE, /* indicate newly allocated inode */
+ FI_DIRTY_INODE, /* indicate inode is dirty or not */
+ FI_AUTO_RECOVER, /* indicate inode is recoverable */
+ FI_DIRTY_DIR, /* indicate directory has dirty pages */
+ FI_INC_LINK, /* need to increment i_nlink */
+ FI_ACL_MODE, /* indicate acl mode */
+ FI_NO_ALLOC, /* should not allocate any blocks */
+ FI_FREE_NID, /* free allocated nide */
+ FI_NO_EXTENT, /* not to use the extent cache */
+ FI_INLINE_XATTR, /* used for inline xattr */
+ FI_INLINE_DATA, /* used for inline data*/
+ FI_INLINE_DENTRY, /* used for inline dentry */
+ FI_APPEND_WRITE, /* inode has appended data */
+ FI_UPDATE_WRITE, /* inode has in-place-update data */
+ FI_NEED_IPU, /* used for ipu per file */
+ FI_ATOMIC_FILE, /* indicate atomic file */
+ FI_ATOMIC_COMMIT, /* indicate the state of atomical committing */
+ FI_VOLATILE_FILE, /* indicate volatile file */
+ FI_FIRST_BLOCK_WRITTEN, /* indicate #0 data block was written */
+ FI_DROP_CACHE, /* drop dirty page cache */
+ FI_DATA_EXIST, /* indicate data exists */
+ FI_INLINE_DOTS, /* indicate inline dot dentries */
+ FI_DO_DEFRAG, /* indicate defragment is running */
+ FI_DIRTY_FILE, /* indicate regular/symlink has dirty pages */
+ FI_NO_PREALLOC, /* indicate skipped preallocated blocks */
+ FI_HOT_DATA, /* indicate file is hot */
+ FI_EXTRA_ATTR, /* indicate file has extra attribute */
+ FI_PROJ_INHERIT, /* indicate file inherits projectid */
+ FI_PIN_FILE, /* indicate file should not be gced */
+ FI_ATOMIC_REVOKE_REQUEST, /* request to drop atomic data */
+ FI_VERITY_IN_PROGRESS, /* building fs-verity Merkle tree */
+ FI_COMPRESSED_FILE, /* indicate file's data can be compressed */
+ FI_MMAP_FILE, /* indicate file was mmapped */
+ FI_MAX, /* max flag, never be used */
+};
+
struct f2fs_inode_info {
struct inode vfs_inode; /* serve a vfs inode */
unsigned long i_flags; /* keep an inode flags for ioctl */
@@ -680,7 +690,7 @@
umode_t i_acl_mode; /* keep file acl mode temporarily */
/* Use below internally in f2fs*/
- unsigned long flags; /* use to pass per-file flags */
+ unsigned long flags[BITS_TO_LONGS(FI_MAX)]; /* use to pass per-file flags */
struct rw_semaphore i_sem; /* protect fi info */
atomic_t dirty_pages; /* # of dirty pages */
f2fs_hash_t chash; /* hash value of given file name */
@@ -689,6 +699,7 @@
struct task_struct *cp_task; /* separate cp/wb IO stats*/
nid_t i_xattr_nid; /* node id that contains xattrs */
loff_t last_disk_size; /* lastly written file size */
+ spinlock_t i_size_lock; /* protect last_disk_size */
#ifdef CONFIG_QUOTA
struct dquot *i_dquot[MAXQUOTAS];
@@ -702,6 +713,7 @@
struct list_head inmem_pages; /* inmemory pages managed by f2fs */
struct task_struct *inmem_task; /* store inmemory task */
struct mutex inmem_lock; /* lock for inmemory pages */
+ pgoff_t ra_offset; /* ongoing readahead offset */
struct extent_tree *extent_tree; /* cached extent_tree entry */
/* avoid racing between foreground op and gc */
@@ -714,6 +726,12 @@
int i_inline_xattr_size; /* inline xattr size */
struct timespec64 i_crtime; /* inode creation time */
struct timespec64 i_disk_time[4];/* inode disk times */
+
+ /* for file compress */
+ atomic_t i_compr_blocks; /* # of compressed blocks */
+ unsigned char i_compress_algorithm; /* algorithm type */
+ unsigned char i_log_cluster_size; /* log of cluster size */
+ unsigned int i_cluster_size; /* cluster size */
};
static inline void get_extent_info(struct extent_info *ext,
@@ -888,7 +906,9 @@
*/
#define NR_CURSEG_DATA_TYPE (3)
#define NR_CURSEG_NODE_TYPE (3)
-#define NR_CURSEG_TYPE (NR_CURSEG_DATA_TYPE + NR_CURSEG_NODE_TYPE)
+#define NR_CURSEG_INMEM_TYPE (2)
+#define NR_CURSEG_PERSIST_TYPE (NR_CURSEG_DATA_TYPE + NR_CURSEG_NODE_TYPE)
+#define NR_CURSEG_TYPE (NR_CURSEG_INMEM_TYPE + NR_CURSEG_PERSIST_TYPE)
enum {
CURSEG_HOT_DATA = 0, /* directory entry blocks */
@@ -897,7 +917,11 @@
CURSEG_HOT_NODE, /* direct node blocks of directory files */
CURSEG_WARM_NODE, /* direct node blocks of normal files */
CURSEG_COLD_NODE, /* indirect node blocks */
- NO_CHECK_TYPE,
+ NR_PERSISTENT_LOG, /* number of persistent log */
+ CURSEG_COLD_DATA_PINNED = NR_PERSISTENT_LOG,
+ /* pinned file that needs consecutive block address */
+ CURSEG_ALL_DATA_ATGC, /* SSR alloctor in hot/warm/cold data area */
+ NO_CHECK_TYPE, /* number of persistent & inmem log */
};
struct flush_cmd {
@@ -931,6 +955,7 @@
unsigned int segment_count; /* total # of segments */
unsigned int main_segments; /* # of segments in main area */
unsigned int reserved_segments; /* # of reserved segments */
+ unsigned int additional_reserved_segments;/* reserved segs for IO align feature */
unsigned int ovp_segments; /* # of overprovision segments */
/* a threshold to reclaim prefree segments */
@@ -1025,6 +1050,7 @@
enum cp_reason_type {
CP_NO_NEEDED,
CP_NON_REGULAR,
+ CP_COMPRESSED,
CP_HARDLINK,
CP_SB_NEED_CP,
CP_WRONG_PINO,
@@ -1036,8 +1062,9 @@
};
enum iostat_type {
- APP_DIRECT_IO, /* app direct IOs */
- APP_BUFFERED_IO, /* app buffered IOs */
+ /* WRITE IO */
+ APP_DIRECT_IO, /* app direct write IOs */
+ APP_BUFFERED_IO, /* app buffered write IOs */
APP_WRITE_IO, /* app write IOs */
APP_MAPPED_IO, /* app mapped IOs */
FS_DATA_IO, /* data IOs from kworker/fsync/reclaimer */
@@ -1048,6 +1075,19 @@
FS_CP_DATA_IO, /* data IOs from checkpoint */
FS_CP_NODE_IO, /* node IOs from checkpoint */
FS_CP_META_IO, /* meta IOs from checkpoint */
+
+ /* READ IO */
+ APP_DIRECT_READ_IO, /* app direct read IOs */
+ APP_BUFFERED_READ_IO, /* app buffered read IOs */
+ APP_READ_IO, /* app read IOs */
+ APP_MAPPED_READ_IO, /* app mapped read IOs */
+ FS_DATA_READ_IO, /* data read IOs */
+ FS_GDATA_READ_IO, /* data read IOs from background gc */
+ FS_CDATA_READ_IO, /* compressed data read IOs */
+ FS_NODE_READ_IO, /* node read IOs */
+ FS_META_READ_IO, /* meta read IOs */
+
+ /* other */
FS_DISCARD, /* discard */
NR_IO_TYPE,
};
@@ -1063,12 +1103,15 @@
block_t old_blkaddr; /* old block address before Cow */
struct page *page; /* page to be written */
struct page *encrypted_page; /* encrypted page */
+ struct page *compressed_page; /* compressed page */
struct list_head list; /* serialize IOs */
bool submitted; /* indicate IO submission */
int need_lock; /* indicate we need to lock cp_rwsem */
bool in_list; /* indicate fio is in io_list */
bool is_por; /* indicate IO is from recovery or not */
bool retry; /* need to reallocate block address */
+ int compr_blocks; /* # of compressed block addresses */
+ bool encrypted; /* indicate file is encrypted */
enum iostat_type io_type; /* io type */
struct writeback_control *io_wbc; /* writeback control */
struct bio **bio; /* bio for ipu */
@@ -1076,6 +1119,11 @@
unsigned char version; /* version of the node */
};
+struct bio_entry {
+ struct bio *bio;
+ struct list_head list;
+};
+
#define is_read_io(rw) ((rw) == READ)
struct f2fs_bio_info {
struct f2fs_sb_info *sbi; /* f2fs superblock */
@@ -1085,6 +1133,8 @@
struct rw_semaphore io_rwsem; /* blocking op for bio */
spinlock_t io_lock; /* serialize DATA/NODE IOs */
struct list_head io_list; /* track fios */
+ struct list_head bio_list; /* bio entry list head */
+ struct rw_semaphore bio_list_lock; /* lock to protect bio entry list */
};
#define FDEV(i) (sbi->devs[i])
@@ -1098,6 +1148,7 @@
#ifdef CONFIG_BLK_DEV_ZONED
unsigned int nr_blkz; /* Total number of zones */
unsigned long *blkz_seq; /* Bitmap indicating sequential zones */
+ block_t *zone_capacity_blocks; /* Array of zone capacity in blks */
#endif
};
@@ -1117,6 +1168,18 @@
unsigned long ino_num; /* number of entries */
};
+/* for GC_AT */
+struct atgc_management {
+ bool atgc_enabled; /* ATGC is enabled or not */
+ struct rb_root_cached root; /* root of victim rb-tree */
+ struct list_head victim_list; /* linked with all victim entries */
+ unsigned int victim_count; /* victim count in rb-tree */
+ unsigned int candidate_ratio; /* candidate ratio */
+ unsigned int max_candidate_count; /* max candidate count */
+ unsigned int age_weight; /* age weight, vblock_weight = 100 - age_weight */
+ unsigned long long age_threshold; /* age threshold */
+};
+
/* For s_flag in struct f2fs_sb_info */
enum {
SBI_IS_DIRTY, /* dirty flag for checkpoint */
@@ -1149,7 +1212,23 @@
GC_NORMAL,
GC_IDLE_CB,
GC_IDLE_GREEDY,
- GC_URGENT,
+ GC_IDLE_AT,
+ GC_URGENT_HIGH,
+ GC_URGENT_LOW,
+};
+
+enum {
+ BGGC_MODE_ON, /* background gc is on */
+ BGGC_MODE_OFF, /* background gc is off */
+ BGGC_MODE_SYNC, /*
+ * background gc is on, migrating blocks
+ * like foreground gc
+ */
+};
+
+enum {
+ FS_MODE_ADAPTIVE, /* use both lfs/ssr allocation */
+ FS_MODE_LFS, /* use lfs allocation only */
};
enum {
@@ -1169,13 +1248,101 @@
FSYNC_MODE_NOBARRIER, /* fsync behaves nobarrier based on posix */
};
-#ifdef CONFIG_FS_ENCRYPTION
-#define DUMMY_ENCRYPTION_ENABLED(sbi) \
- (unlikely(F2FS_OPTION(sbi).test_dummy_encryption))
+/*
+ * this value is set in page as a private data which indicate that
+ * the page is atomically written, and it is in inmem_pages list.
+ */
+#define ATOMIC_WRITTEN_PAGE ((unsigned long)-1)
+#define DUMMY_WRITTEN_PAGE ((unsigned long)-2)
+
+#define IS_ATOMIC_WRITTEN_PAGE(page) \
+ (page_private(page) == ATOMIC_WRITTEN_PAGE)
+#define IS_DUMMY_WRITTEN_PAGE(page) \
+ (page_private(page) == DUMMY_WRITTEN_PAGE)
+
+#ifdef CONFIG_F2FS_IO_TRACE
+#define IS_IO_TRACED_PAGE(page) \
+ (page_private(page) > 0 && \
+ page_private(page) < (unsigned long)PID_MAX_LIMIT)
#else
-#define DUMMY_ENCRYPTION_ENABLED(sbi) (0)
+#define IS_IO_TRACED_PAGE(page) (0)
#endif
+/* For compression */
+enum compress_algorithm_type {
+ COMPRESS_LZO,
+ COMPRESS_LZ4,
+ COMPRESS_ZSTD,
+ COMPRESS_LZORLE,
+ COMPRESS_MAX,
+};
+
+#define COMPRESS_DATA_RESERVED_SIZE 5
+struct compress_data {
+ __le32 clen; /* compressed data size */
+ __le32 reserved[COMPRESS_DATA_RESERVED_SIZE]; /* reserved */
+ u8 cdata[]; /* compressed data */
+};
+
+#define COMPRESS_HEADER_SIZE (sizeof(struct compress_data))
+
+#define F2FS_COMPRESSED_PAGE_MAGIC 0xF5F2C000
+
+/* compress context */
+struct compress_ctx {
+ struct inode *inode; /* inode the context belong to */
+ pgoff_t cluster_idx; /* cluster index number */
+ unsigned int cluster_size; /* page count in cluster */
+ unsigned int log_cluster_size; /* log of cluster size */
+ struct page **rpages; /* pages store raw data in cluster */
+ unsigned int nr_rpages; /* total page number in rpages */
+ struct page **cpages; /* pages store compressed data in cluster */
+ unsigned int nr_cpages; /* total page number in cpages */
+ void *rbuf; /* virtual mapped address on rpages */
+ struct compress_data *cbuf; /* virtual mapped address on cpages */
+ size_t rlen; /* valid data length in rbuf */
+ size_t clen; /* valid data length in cbuf */
+ void *private; /* payload buffer for specified compression algorithm */
+ void *private2; /* extra payload buffer */
+};
+
+/* compress context for write IO path */
+struct compress_io_ctx {
+ u32 magic; /* magic number to indicate page is compressed */
+ struct inode *inode; /* inode the context belong to */
+ struct page **rpages; /* pages store raw data in cluster */
+ unsigned int nr_rpages; /* total page number in rpages */
+ atomic_t pending_pages; /* in-flight compressed page count */
+};
+
+/* decompress io context for read IO path */
+struct decompress_io_ctx {
+ u32 magic; /* magic number to indicate page is compressed */
+ struct inode *inode; /* inode the context belong to */
+ pgoff_t cluster_idx; /* cluster index number */
+ unsigned int cluster_size; /* page count in cluster */
+ unsigned int log_cluster_size; /* log of cluster size */
+ struct page **rpages; /* pages store raw data in cluster */
+ unsigned int nr_rpages; /* total page number in rpages */
+ struct page **cpages; /* pages store compressed data in cluster */
+ unsigned int nr_cpages; /* total page number in cpages */
+ struct page **tpages; /* temp pages to pad holes in cluster */
+ void *rbuf; /* virtual mapped address on rpages */
+ struct compress_data *cbuf; /* virtual mapped address on cpages */
+ size_t rlen; /* valid data length in rbuf */
+ size_t clen; /* valid data length in cbuf */
+ atomic_t pending_pages; /* in-flight compressed page count */
+ atomic_t verity_pages; /* in-flight page count for verity */
+ bool failed; /* indicate IO error during decompression */
+ void *private; /* payload buffer for specified decompression algorithm */
+ void *private2; /* extra payload buffer */
+};
+
+#define NULL_CLUSTER ((unsigned int)(~0))
+#define MIN_COMPRESS_LOG_SIZE 2
+#define MAX_COMPRESS_LOG_SIZE 8
+#define MAX_COMPRESS_WINDOW_SIZE(log_size) ((PAGE_SIZE) << (log_size))
+
struct f2fs_sb_info {
struct super_block *sb; /* pointer to VFS super block */
struct proc_dir_entry *s_proc; /* proc entry */
@@ -1184,10 +1351,6 @@
int valid_super_block; /* valid super block no */
unsigned long s_flag; /* flags for sbi */
struct mutex writepages; /* mutex for writepages() */
-#ifdef CONFIG_UNICODE
- struct unicode_map *s_encoding;
- __u16 s_encoding_flags;
-#endif
#ifdef CONFIG_BLK_DEV_ZONED
unsigned int blocks_per_blkz; /* F2FS blocks per zone */
@@ -1220,7 +1383,7 @@
unsigned long last_time[MAX_TIME]; /* to store time in jiffies */
long interval_time[MAX_TIME]; /* to store thresholds */
- struct inode_management im[MAX_INO_ENTRY]; /* manage inode cache */
+ struct inode_management im[MAX_INO_ENTRY]; /* manage inode cache */
spinlock_t fsync_node_lock; /* for node entry lock */
struct list_head fsync_node_list; /* node list head */
@@ -1257,7 +1420,6 @@
unsigned int segs_per_sec; /* segments per section */
unsigned int secs_per_zone; /* sections per zone */
unsigned int total_sections; /* total section count */
- struct mutex resize_mutex; /* for resize exclusion */
unsigned int total_node_count; /* total node block count */
unsigned int total_valid_node_count; /* valid node block count */
loff_t max_file_blocks; /* max block index of file */
@@ -1291,18 +1453,24 @@
struct f2fs_mount_info mount_opt; /* mount options */
/* for cleaning operations */
- struct mutex gc_mutex; /* mutex for GC */
+ struct rw_semaphore gc_lock; /*
+ * semaphore for GC, avoid
+ * race between GC and GC or CP
+ */
struct f2fs_gc_kthread *gc_thread; /* GC thread */
+ struct atgc_management am; /* atgc management */
unsigned int cur_victim_sec; /* current victim section num */
unsigned int gc_mode; /* current GC state */
unsigned int next_victim_seg[2]; /* next segment in victim section */
+
/* for skip statistic */
- unsigned int atomic_files; /* # of opened atomic file */
+ unsigned int atomic_files; /* # of opened atomic file */
unsigned long long skipped_atomic_files[2]; /* FG_GC and BG_GC */
unsigned long long skipped_gc_rwsem; /* FG_GC only */
/* threshold for gc trials on pinned files */
u64 gc_pin_file_threshold;
+ struct rw_semaphore pin_sem;
/* maximum # of trials to find a victim segment for SSR and GC */
unsigned int max_victim_search;
@@ -1326,11 +1494,11 @@
atomic_t inline_xattr; /* # of inline_xattr inodes */
atomic_t inline_inode; /* # of inline_data inodes */
atomic_t inline_dir; /* # of inline_dentry inodes */
- atomic_t aw_cnt; /* # of atomic writes */
+ atomic_t compr_inode; /* # of compressed inodes */
+ atomic64_t compr_blocks; /* # of compressed blocks */
atomic_t vw_cnt; /* # of volatile writes */
atomic_t max_aw_cnt; /* max # of atomic writes */
atomic_t max_vw_cnt; /* max # of volatile writes */
- int bg_gc; /* background gc calls */
unsigned int io_skip_bggc; /* skip background gc for in-flight IO */
unsigned int other_skip_bggc; /* skip background gc for other reasons */
unsigned int ndirty_inode[NR_INODE_TYPE]; /* # of dirty inodes */
@@ -1339,8 +1507,15 @@
/* For app/fs IO statistics */
spinlock_t iostat_lock;
- unsigned long long write_iostat[NR_IO_TYPE];
+ unsigned long long rw_iostat[NR_IO_TYPE];
+ unsigned long long prev_rw_iostat[NR_IO_TYPE];
bool iostat_enable;
+ unsigned long iostat_next_period;
+ unsigned int iostat_period_ms;
+
+ /* to attach REQ_META|REQ_FUA flags */
+ unsigned int data_io_flag;
+ unsigned int node_io_flag;
/* For sysfs suppport */
struct kobject s_kobj;
@@ -1364,6 +1539,16 @@
/* Precomputed FS UUID checksum for seeding other checksums */
__u32 s_chksum_seed;
+
+ struct workqueue_struct *post_read_wq; /* post read workqueue */
+
+ struct kmem_cache *inline_xattr_slab; /* inline xattr entry */
+ unsigned int inline_xattr_slab_size; /* default inline xattr slab size */
+
+#ifdef CONFIG_F2FS_FS_COMPRESSION
+ struct kmem_cache *page_array_slab; /* page array entry */
+ unsigned int page_array_slab_size; /* default page array slab size */
+#endif
};
struct f2fs_private_dio {
@@ -1800,6 +1985,11 @@
if (!__allow_reserved_blocks(sbi, inode, true))
avail_user_block_count -= F2FS_OPTION(sbi).root_reserved_blocks;
+
+ if (F2FS_IO_ALIGNED(sbi))
+ avail_user_block_count -= sbi->blocks_per_seg *
+ SM_I(sbi)->additional_reserved_segments;
+
if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) {
if (avail_user_block_count > sbi->unusable_block_count)
avail_user_block_count -= sbi->unusable_block_count;
@@ -2045,6 +2235,11 @@
if (!__allow_reserved_blocks(sbi, inode, false))
valid_block_count += F2FS_OPTION(sbi).root_reserved_blocks;
+
+ if (F2FS_IO_ALIGNED(sbi))
+ valid_block_count += sbi->blocks_per_seg *
+ SM_I(sbi)->additional_reserved_segments;
+
user_block_count = sbi->user_block_count;
if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
user_block_count -= sbi->unusable_block_count;
@@ -2104,7 +2299,7 @@
dquot_free_inode(inode);
} else {
if (unlikely(inode->i_blocks == 0)) {
- f2fs_warn(sbi, "Inconsistent i_blocks, ino:%lu, iblocks:%llu",
+ f2fs_warn(sbi, "dec_valid_node_count: inconsistent i_blocks, ino:%lu, iblocks:%llu",
inode->i_ino,
(unsigned long long)inode->i_blocks);
set_sbi_flag(sbi, SBI_NEED_FSCK);
@@ -2221,29 +2416,9 @@
return entry;
}
-static inline struct bio *f2fs_bio_alloc(struct f2fs_sb_info *sbi,
- int npages, bool no_fail)
-{
- struct bio *bio;
-
- if (no_fail) {
- /* No failure on bio allocation */
- bio = bio_alloc(GFP_NOIO, npages);
- if (!bio)
- bio = bio_alloc(GFP_NOIO | __GFP_NOFAIL, npages);
- return bio;
- }
- if (time_to_inject(sbi, FAULT_ALLOC_BIO)) {
- f2fs_show_injection_info(sbi, FAULT_ALLOC_BIO);
- return NULL;
- }
-
- return bio_alloc(GFP_KERNEL, npages);
-}
-
static inline bool is_idle(struct f2fs_sb_info *sbi, int type)
{
- if (sbi->gc_mode == GC_URGENT)
+ if (sbi->gc_mode == GC_URGENT_HIGH)
return true;
if (get_pages(sbi, F2FS_RD_DATA) || get_pages(sbi, F2FS_RD_NODE) ||
@@ -2261,6 +2436,10 @@
atomic_read(&SM_I(sbi)->fcc_info->queued_flush))
return false;
+ if (sbi->gc_mode == GC_URGENT_LOW &&
+ (type == DISCARD_TIME || type == GC_TIME))
+ return true;
+
return f2fs_time_over(sbi, type);
}
@@ -2292,7 +2471,7 @@
}
static inline int f2fs_has_extra_attr(struct inode *inode);
-static inline block_t datablock_addr(struct inode *inode,
+static inline block_t data_blkaddr(struct inode *inode,
struct page *node_page, unsigned int offset)
{
struct f2fs_node *raw_node;
@@ -2302,9 +2481,9 @@
raw_node = F2FS_NODE(node_page);
- /* from GC path only */
if (is_inode) {
if (!inode)
+ /* from GC path only */
base = offset_in_addr(&raw_node->i);
else if (f2fs_has_extra_attr(inode))
base = get_extra_isize(inode);
@@ -2314,6 +2493,11 @@
return le32_to_cpu(addr_array[base + offset]);
}
+static inline block_t f2fs_data_blkaddr(struct dnode_of_data *dn)
+{
+ return data_blkaddr(dn->inode, dn->node_page, dn->ofs_in_node);
+}
+
static inline int f2fs_test_bit(unsigned int nr, char *addr)
{
int mask;
@@ -2377,11 +2561,13 @@
/*
* On-disk inode flags (f2fs_inode::i_flags)
*/
+#define F2FS_COMPR_FL 0x00000004 /* Compress file */
#define F2FS_SYNC_FL 0x00000008 /* Synchronous updates */
#define F2FS_IMMUTABLE_FL 0x00000010 /* Immutable file */
#define F2FS_APPEND_FL 0x00000020 /* writes to file may only append */
#define F2FS_NODUMP_FL 0x00000040 /* do not dump file */
#define F2FS_NOATIME_FL 0x00000080 /* do not update atime */
+#define F2FS_NOCOMP_FL 0x00000400 /* Don't compress */
#define F2FS_INDEX_FL 0x00001000 /* hash-indexed directory */
#define F2FS_DIRSYNC_FL 0x00010000 /* dirsync behaviour (directories only) */
#define F2FS_PROJINHERIT_FL 0x20000000 /* Create with parents projid */
@@ -2390,7 +2576,7 @@
/* Flags that should be inherited by new inodes from their parent. */
#define F2FS_FL_INHERITED (F2FS_SYNC_FL | F2FS_NODUMP_FL | F2FS_NOATIME_FL | \
F2FS_DIRSYNC_FL | F2FS_PROJINHERIT_FL | \
- F2FS_CASEFOLD_FL)
+ F2FS_CASEFOLD_FL | F2FS_COMPR_FL | F2FS_NOCOMP_FL)
/* Flags that are appropriate for regular files (all but dir-specific ones). */
#define F2FS_REG_FLMASK (~(F2FS_DIRSYNC_FL | F2FS_PROJINHERIT_FL | \
@@ -2409,41 +2595,6 @@
return flags & F2FS_OTHER_FLMASK;
}
-/* used for f2fs_inode_info->flags */
-enum {
- FI_NEW_INODE, /* indicate newly allocated inode */
- FI_DIRTY_INODE, /* indicate inode is dirty or not */
- FI_AUTO_RECOVER, /* indicate inode is recoverable */
- FI_DIRTY_DIR, /* indicate directory has dirty pages */
- FI_INC_LINK, /* need to increment i_nlink */
- FI_ACL_MODE, /* indicate acl mode */
- FI_NO_ALLOC, /* should not allocate any blocks */
- FI_FREE_NID, /* free allocated nide */
- FI_NO_EXTENT, /* not to use the extent cache */
- FI_INLINE_XATTR, /* used for inline xattr */
- FI_INLINE_DATA, /* used for inline data*/
- FI_INLINE_DENTRY, /* used for inline dentry */
- FI_APPEND_WRITE, /* inode has appended data */
- FI_UPDATE_WRITE, /* inode has in-place-update data */
- FI_NEED_IPU, /* used for ipu per file */
- FI_ATOMIC_FILE, /* indicate atomic file */
- FI_ATOMIC_COMMIT, /* indicate the state of atomical committing */
- FI_VOLATILE_FILE, /* indicate volatile file */
- FI_FIRST_BLOCK_WRITTEN, /* indicate #0 data block was written */
- FI_DROP_CACHE, /* drop dirty page cache */
- FI_DATA_EXIST, /* indicate data exists */
- FI_INLINE_DOTS, /* indicate inline dot dentries */
- FI_DO_DEFRAG, /* indicate defragment is running */
- FI_DIRTY_FILE, /* indicate regular/symlink has dirty pages */
- FI_NO_PREALLOC, /* indicate skipped preallocated blocks */
- FI_HOT_DATA, /* indicate file is hot */
- FI_EXTRA_ATTR, /* indicate file has extra attribute */
- FI_PROJ_INHERIT, /* indicate file inherits projectid */
- FI_PIN_FILE, /* indicate file should not be gced */
- FI_ATOMIC_REVOKE_REQUEST, /* request to drop atomic data */
- FI_VERITY_IN_PROGRESS, /* building fs-verity Merkle tree */
-};
-
static inline void __mark_inode_dirty_flag(struct inode *inode,
int flag, bool set)
{
@@ -2454,7 +2605,7 @@
case FI_NEW_INODE:
if (set)
return;
- /* fall through */
+ fallthrough;
case FI_DATA_EXIST:
case FI_INLINE_DOTS:
case FI_PIN_FILE:
@@ -2464,20 +2615,18 @@
static inline void set_inode_flag(struct inode *inode, int flag)
{
- if (!test_bit(flag, &F2FS_I(inode)->flags))
- set_bit(flag, &F2FS_I(inode)->flags);
+ set_bit(flag, F2FS_I(inode)->flags);
__mark_inode_dirty_flag(inode, flag, true);
}
static inline int is_inode_flag_set(struct inode *inode, int flag)
{
- return test_bit(flag, &F2FS_I(inode)->flags);
+ return test_bit(flag, F2FS_I(inode)->flags);
}
static inline void clear_inode_flag(struct inode *inode, int flag)
{
- if (test_bit(flag, &F2FS_I(inode)->flags))
- clear_bit(flag, &F2FS_I(inode)->flags);
+ clear_bit(flag, F2FS_I(inode)->flags);
__mark_inode_dirty_flag(inode, flag, false);
}
@@ -2568,19 +2717,19 @@
struct f2fs_inode_info *fi = F2FS_I(inode);
if (ri->i_inline & F2FS_INLINE_XATTR)
- set_bit(FI_INLINE_XATTR, &fi->flags);
+ set_bit(FI_INLINE_XATTR, fi->flags);
if (ri->i_inline & F2FS_INLINE_DATA)
- set_bit(FI_INLINE_DATA, &fi->flags);
+ set_bit(FI_INLINE_DATA, fi->flags);
if (ri->i_inline & F2FS_INLINE_DENTRY)
- set_bit(FI_INLINE_DENTRY, &fi->flags);
+ set_bit(FI_INLINE_DENTRY, fi->flags);
if (ri->i_inline & F2FS_DATA_EXIST)
- set_bit(FI_DATA_EXIST, &fi->flags);
+ set_bit(FI_DATA_EXIST, fi->flags);
if (ri->i_inline & F2FS_INLINE_DOTS)
- set_bit(FI_INLINE_DOTS, &fi->flags);
+ set_bit(FI_INLINE_DOTS, fi->flags);
if (ri->i_inline & F2FS_EXTRA_ATTR)
- set_bit(FI_EXTRA_ATTR, &fi->flags);
+ set_bit(FI_EXTRA_ATTR, fi->flags);
if (ri->i_inline & F2FS_PIN_FILE)
- set_bit(FI_PIN_FILE, &fi->flags);
+ set_bit(FI_PIN_FILE, fi->flags);
}
static inline void set_raw_inline(struct inode *inode, struct f2fs_inode *ri)
@@ -2613,16 +2762,27 @@
return is_inode_flag_set(inode, FI_INLINE_XATTR);
}
+static inline int f2fs_compressed_file(struct inode *inode)
+{
+ return S_ISREG(inode->i_mode) &&
+ is_inode_flag_set(inode, FI_COMPRESSED_FILE);
+}
+
static inline unsigned int addrs_per_inode(struct inode *inode)
{
unsigned int addrs = CUR_ADDRS_PER_INODE(inode) -
get_inline_xattr_addrs(inode);
- return ALIGN_DOWN(addrs, 1);
+
+ if (!f2fs_compressed_file(inode))
+ return addrs;
+ return ALIGN_DOWN(addrs, F2FS_I(inode)->i_cluster_size);
}
static inline unsigned int addrs_per_block(struct inode *inode)
{
- return ALIGN_DOWN(DEF_ADDRS_PER_BLOCK, 1);
+ if (!f2fs_compressed_file(inode))
+ return DEF_ADDRS_PER_BLOCK;
+ return ALIGN_DOWN(DEF_ADDRS_PER_BLOCK, F2FS_I(inode)->i_cluster_size);
}
static inline void *inline_xattr_addr(struct inode *inode, struct page *page)
@@ -2655,6 +2815,11 @@
return is_inode_flag_set(inode, FI_INLINE_DOTS);
}
+static inline int f2fs_is_mmap_file(struct inode *inode)
+{
+ return is_inode_flag_set(inode, FI_MMAP_FILE);
+}
+
static inline bool f2fs_is_pinned_file(struct inode *inode)
{
return is_inode_flag_set(inode, FI_PIN_FILE);
@@ -2749,9 +2914,9 @@
if (!f2fs_is_time_consistent(inode))
return false;
- down_read(&F2FS_I(inode)->i_sem);
+ spin_lock(&F2FS_I(inode)->i_size_lock);
ret = F2FS_I(inode)->last_disk_size == i_size_read(inode);
- up_read(&F2FS_I(inode)->i_sem);
+ spin_unlock(&F2FS_I(inode)->i_size_lock);
return ret;
}
@@ -2766,12 +2931,12 @@
return is_set_ckpt_flags(sbi, CP_ERROR_FLAG);
}
-static inline bool is_dot_dotdot(const struct qstr *str)
+static inline bool is_dot_dotdot(const u8 *name, size_t len)
{
- if (str->len == 1 && str->name[0] == '.')
+ if (len == 1 && name[0] == '.')
return true;
- if (str->len == 2 && str->name[0] == '.' && str->name[1] == '.')
+ if (len == 2 && name[0] == '.' && name[1] == '.')
return true;
return false;
@@ -2782,7 +2947,8 @@
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
if (!test_opt(sbi, EXTENT_CACHE) ||
- is_inode_flag_set(inode, FI_NO_EXTENT))
+ is_inode_flag_set(inode, FI_NO_EXTENT) ||
+ is_inode_flag_set(inode, FI_COMPRESSED_FILE))
return false;
/*
@@ -2853,29 +3019,45 @@
sizeof((f2fs_inode)->field)) \
<= (F2FS_OLD_ATTRIBUTE_SIZE + (extra_isize))) \
+#define DEFAULT_IOSTAT_PERIOD_MS 3000
+#define MIN_IOSTAT_PERIOD_MS 100
+/* maximum period of iostat tracing is 1 day */
+#define MAX_IOSTAT_PERIOD_MS 8640000
+
static inline void f2fs_reset_iostat(struct f2fs_sb_info *sbi)
{
int i;
spin_lock(&sbi->iostat_lock);
- for (i = 0; i < NR_IO_TYPE; i++)
- sbi->write_iostat[i] = 0;
+ for (i = 0; i < NR_IO_TYPE; i++) {
+ sbi->rw_iostat[i] = 0;
+ sbi->prev_rw_iostat[i] = 0;
+ }
spin_unlock(&sbi->iostat_lock);
}
+extern void f2fs_record_iostat(struct f2fs_sb_info *sbi);
+
static inline void f2fs_update_iostat(struct f2fs_sb_info *sbi,
enum iostat_type type, unsigned long long io_bytes)
{
if (!sbi->iostat_enable)
return;
spin_lock(&sbi->iostat_lock);
- sbi->write_iostat[type] += io_bytes;
+ sbi->rw_iostat[type] += io_bytes;
if (type == APP_WRITE_IO || type == APP_DIRECT_IO)
- sbi->write_iostat[APP_BUFFERED_IO] =
- sbi->write_iostat[APP_WRITE_IO] -
- sbi->write_iostat[APP_DIRECT_IO];
+ sbi->rw_iostat[APP_BUFFERED_IO] =
+ sbi->rw_iostat[APP_WRITE_IO] -
+ sbi->rw_iostat[APP_DIRECT_IO];
+
+ if (type == APP_READ_IO || type == APP_DIRECT_READ_IO)
+ sbi->rw_iostat[APP_BUFFERED_READ_IO] =
+ sbi->rw_iostat[APP_READ_IO] -
+ sbi->rw_iostat[APP_DIRECT_READ_IO];
spin_unlock(&sbi->iostat_lock);
+
+ f2fs_record_iostat(sbi);
}
#define __is_large_section(sbi) ((sbi)->segs_per_sec > 1)
@@ -2896,7 +3078,8 @@
static inline bool __is_valid_data_blkaddr(block_t blkaddr)
{
- if (blkaddr == NEW_ADDR || blkaddr == NULL_ADDR)
+ if (blkaddr == NEW_ADDR || blkaddr == NULL_ADDR ||
+ blkaddr == COMPRESS_ADDR)
return false;
return true;
}
@@ -2907,19 +3090,12 @@
if (PagePrivate(page))
return;
- get_page(page);
- SetPagePrivate(page);
- set_page_private(page, data);
+ attach_page_private(page, (void *)data);
}
static inline void f2fs_clear_page_private(struct page *page)
{
- if (!PagePrivate(page))
- return;
-
- set_page_private(page, 0);
- ClearPagePrivate(page);
- f2fs_put_page(page, 0);
+ detach_page_private(page);
}
/*
@@ -2927,6 +3103,7 @@
*/
int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync);
void f2fs_truncate_data_blocks(struct dnode_of_data *dn);
+int f2fs_do_truncate_blocks(struct inode *inode, u64 from, bool lock);
int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock);
int f2fs_truncate(struct inode *inode);
int f2fs_getattr(const struct path *path, struct kstat *stat,
@@ -2966,22 +3143,28 @@
* dir.c
*/
unsigned char f2fs_get_de_type(struct f2fs_dir_entry *de);
-struct f2fs_dir_entry *f2fs_find_target_dentry(struct fscrypt_name *fname,
- f2fs_hash_t namehash, int *max_slots,
- struct f2fs_dentry_ptr *d);
+int f2fs_init_casefolded_name(const struct inode *dir,
+ struct f2fs_filename *fname);
+int f2fs_setup_filename(struct inode *dir, const struct qstr *iname,
+ int lookup, struct f2fs_filename *fname);
+int f2fs_prepare_lookup(struct inode *dir, struct dentry *dentry,
+ struct f2fs_filename *fname);
+void f2fs_free_filename(struct f2fs_filename *fname);
+struct f2fs_dir_entry *f2fs_find_target_dentry(const struct f2fs_dentry_ptr *d,
+ const struct f2fs_filename *fname, int *max_slots);
int f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d,
unsigned int start_pos, struct fscrypt_str *fstr);
void f2fs_do_make_empty_dir(struct inode *inode, struct inode *parent,
struct f2fs_dentry_ptr *d);
struct page *f2fs_init_inode_metadata(struct inode *inode, struct inode *dir,
- const struct qstr *new_name,
- const struct qstr *orig_name, struct page *dpage);
+ const struct f2fs_filename *fname, struct page *dpage);
void f2fs_update_parent_metadata(struct inode *dir, struct inode *inode,
unsigned int current_depth);
int f2fs_room_for_filename(const void *bitmap, int slots, int max_slots);
void f2fs_drop_nlink(struct inode *dir, struct inode *inode);
struct f2fs_dir_entry *__f2fs_find_entry(struct inode *dir,
- struct fscrypt_name *fname, struct page **res_page);
+ const struct f2fs_filename *fname,
+ struct page **res_page);
struct f2fs_dir_entry *f2fs_find_entry(struct inode *dir,
const struct qstr *child, struct page **res_page);
struct f2fs_dir_entry *f2fs_parent_dir(struct inode *dir, struct page **p);
@@ -2989,13 +3172,14 @@
struct page **page);
void f2fs_set_link(struct inode *dir, struct f2fs_dir_entry *de,
struct page *page, struct inode *inode);
+bool f2fs_has_enough_room(struct inode *dir, struct page *ipage,
+ const struct f2fs_filename *fname);
void f2fs_update_dentry(nid_t ino, umode_t mode, struct f2fs_dentry_ptr *d,
- const struct qstr *name, f2fs_hash_t name_hash,
+ const struct fscrypt_str *name, f2fs_hash_t name_hash,
unsigned int bit_pos);
-int f2fs_add_regular_entry(struct inode *dir, const struct qstr *new_name,
- const struct qstr *orig_name,
+int f2fs_add_regular_entry(struct inode *dir, const struct f2fs_filename *fname,
struct inode *inode, nid_t ino, umode_t mode);
-int f2fs_add_dentry(struct inode *dir, struct fscrypt_name *fname,
+int f2fs_add_dentry(struct inode *dir, const struct f2fs_filename *fname,
struct inode *inode, nid_t ino, umode_t mode);
int f2fs_do_add_link(struct inode *dir, const struct qstr *name,
struct inode *inode, nid_t ino, umode_t mode);
@@ -3027,8 +3211,7 @@
/*
* hash.c
*/
-f2fs_hash_t f2fs_dentry_hash(const struct inode *dir,
- const struct qstr *name_info, struct fscrypt_name *fname);
+void f2fs_hash_filename(const struct inode *dir, struct f2fs_filename *fname);
/*
* node.c
@@ -3060,6 +3243,7 @@
struct page *f2fs_get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid);
struct page *f2fs_get_node_page_ra(struct page *parent, int start);
int f2fs_move_node_page(struct page *node_page, int gc_type);
+void f2fs_flush_inline_data(struct f2fs_sb_info *sbi);
int f2fs_fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode,
struct writeback_control *wbc, bool atomic,
unsigned int *seq_id);
@@ -3092,7 +3276,7 @@
void f2fs_drop_inmem_page(struct inode *inode, struct page *page);
int f2fs_commit_inmem_pages(struct inode *inode);
void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need);
-void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi);
+void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi, bool from_bg);
int f2fs_issue_flush(struct f2fs_sb_info *sbi, nid_t ino);
int f2fs_create_flush_cmd_control(struct f2fs_sb_info *sbi);
int f2fs_flush_device_cache(struct f2fs_sb_info *sbi);
@@ -3109,8 +3293,15 @@
int f2fs_disable_cp_again(struct f2fs_sb_info *sbi, block_t unusable);
void f2fs_release_discard_addrs(struct f2fs_sb_info *sbi);
int f2fs_npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra);
-void allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type,
+bool f2fs_segment_has_free_slot(struct f2fs_sb_info *sbi, int segno);
+void f2fs_init_inmem_curseg(struct f2fs_sb_info *sbi);
+void f2fs_save_inmem_curseg(struct f2fs_sb_info *sbi);
+void f2fs_restore_inmem_curseg(struct f2fs_sb_info *sbi);
+void f2fs_get_new_segment(struct f2fs_sb_info *sbi,
+ unsigned int *newseg, bool new_sec, int dir);
+void f2fs_allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type,
unsigned int start, unsigned int end);
+void f2fs_allocate_new_section(struct f2fs_sb_info *sbi, int type);
void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi);
int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range);
bool f2fs_exist_trim_candidates(struct f2fs_sb_info *sbi,
@@ -3126,7 +3317,8 @@
int f2fs_inplace_write_data(struct f2fs_io_info *fio);
void f2fs_do_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
block_t old_blkaddr, block_t new_blkaddr,
- bool recover_curseg, bool recover_newaddr);
+ bool recover_curseg, bool recover_newaddr,
+ bool from_gc);
void f2fs_replace_block(struct f2fs_sb_info *sbi, struct dnode_of_data *dn,
block_t old_addr, block_t new_addr,
unsigned char version, bool recover_curseg,
@@ -3134,7 +3326,7 @@
void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
block_t old_blkaddr, block_t *new_blkaddr,
struct f2fs_summary *sum, int type,
- struct f2fs_io_info *fio, bool add_list);
+ struct f2fs_io_info *fio);
void f2fs_wait_on_page_writeback(struct page *page,
enum page_type type, bool ordered, bool locked);
void f2fs_wait_on_block_writeback(struct inode *inode, block_t blkaddr);
@@ -3145,6 +3337,8 @@
int f2fs_lookup_journal_in_cursum(struct f2fs_journal *journal, int type,
unsigned int val, int alloc);
void f2fs_flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc);
+int f2fs_fix_curseg_write_pointer(struct f2fs_sb_info *sbi);
+int f2fs_check_write_pointer(struct f2fs_sb_info *sbi);
int f2fs_build_segment_manager(struct f2fs_sb_info *sbi);
void f2fs_destroy_segment_manager(struct f2fs_sb_info *sbi);
int __init f2fs_create_segment_manager_caches(void);
@@ -3152,6 +3346,10 @@
int f2fs_rw_hint_to_seg_type(enum rw_hint hint);
enum rw_hint f2fs_io_type_to_rw_hint(struct f2fs_sb_info *sbi,
enum page_type type, enum temp_type temp);
+unsigned int f2fs_usable_segs_in_sec(struct f2fs_sb_info *sbi,
+ unsigned int segno);
+unsigned int f2fs_usable_blks_in_seg(struct f2fs_sb_info *sbi,
+ unsigned int segno);
/*
* checkpoint.c
@@ -3195,12 +3393,19 @@
/*
* data.c
*/
-int f2fs_init_post_read_processing(void);
-void f2fs_destroy_post_read_processing(void);
+int __init f2fs_init_bioset(void);
+void f2fs_destroy_bioset(void);
+struct bio *f2fs_bio_alloc(struct f2fs_sb_info *sbi, int npages, bool noio);
+int f2fs_init_bio_entry_cache(void);
+void f2fs_destroy_bio_entry_cache(void);
+void f2fs_submit_bio(struct f2fs_sb_info *sbi,
+ struct bio *bio, enum page_type type);
void f2fs_submit_merged_write(struct f2fs_sb_info *sbi, enum page_type type);
void f2fs_submit_merged_write_cond(struct f2fs_sb_info *sbi,
struct inode *inode, struct page *page,
nid_t ino, enum page_type type);
+void f2fs_submit_merged_ipu_write(struct f2fs_sb_info *sbi,
+ struct bio **bio, struct page *page);
void f2fs_flush_merged_writes(struct f2fs_sb_info *sbi);
int f2fs_submit_page_bio(struct f2fs_io_info *fio);
int f2fs_merge_page_bio(struct f2fs_io_info *fio);
@@ -3223,13 +3428,19 @@
struct page *f2fs_get_new_data_page(struct inode *inode,
struct page *ipage, pgoff_t index, bool new_i_size);
int f2fs_do_write_data_page(struct f2fs_io_info *fio);
-void __do_map_lock(struct f2fs_sb_info *sbi, int flag, bool lock);
+void f2fs_do_map_lock(struct f2fs_sb_info *sbi, int flag, bool lock);
int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
int create, int flag);
int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
u64 start, u64 len);
+int f2fs_encrypt_one_page(struct f2fs_io_info *fio);
bool f2fs_should_update_inplace(struct inode *inode, struct f2fs_io_info *fio);
bool f2fs_should_update_outplace(struct inode *inode, struct f2fs_io_info *fio);
+int f2fs_write_single_data_page(struct page *page, int *submitted,
+ struct bio **bio, sector_t *last_block,
+ struct writeback_control *wbc,
+ enum iostat_type io_type,
+ int compr_blocks, bool allow_balance);
void f2fs_invalidate_page(struct page *page, unsigned int offset,
unsigned int length);
int f2fs_release_page(struct page *page, gfp_t wait);
@@ -3239,6 +3450,10 @@
#endif
bool f2fs_overwrite_io(struct inode *inode, loff_t pos, size_t len);
void f2fs_clear_page_cache_dirty_tag(struct page *page);
+int f2fs_init_post_read_processing(void);
+void f2fs_destroy_post_read_processing(void);
+int f2fs_init_post_read_wq(struct f2fs_sb_info *sbi);
+void f2fs_destroy_post_read_wq(struct f2fs_sb_info *sbi);
/*
* gc.c
@@ -3246,16 +3461,20 @@
int f2fs_start_gc_thread(struct f2fs_sb_info *sbi);
void f2fs_stop_gc_thread(struct f2fs_sb_info *sbi);
block_t f2fs_start_bidx_of_node(unsigned int node_ofs, struct inode *inode);
-int f2fs_gc(struct f2fs_sb_info *sbi, bool sync, bool background,
+int f2fs_gc(struct f2fs_sb_info *sbi, bool sync, bool background, bool force,
unsigned int segno);
void f2fs_build_gc_manager(struct f2fs_sb_info *sbi);
int f2fs_resize_fs(struct f2fs_sb_info *sbi, __u64 block_count);
+int __init f2fs_create_garbage_collection_cache(void);
+void f2fs_destroy_garbage_collection_cache(void);
/*
* recovery.c
*/
int f2fs_recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only);
bool f2fs_space_for_roll_forward(struct f2fs_sb_info *sbi);
+int __init f2fs_create_recovery_cache(void);
+void f2fs_destroy_recovery_cache(void);
/*
* debug.c
@@ -3285,6 +3504,8 @@
int nr_discard_cmd;
unsigned int undiscard_blks;
int inline_xattr, inline_inode, inline_dir, append, update, orphans;
+ int compr_inode;
+ unsigned long long compr_blocks;
int aw_cnt, max_aw_cnt, vw_cnt, max_vw_cnt;
unsigned int valid_count, valid_node_count, valid_inode_count, discard_blks;
unsigned int bimodal, avg_vblocks;
@@ -3300,6 +3521,9 @@
int curseg[NR_CURSEG_TYPE];
int cursec[NR_CURSEG_TYPE];
int curzone[NR_CURSEG_TYPE];
+ unsigned int dirty_seg[NR_CURSEG_TYPE];
+ unsigned int full_seg[NR_CURSEG_TYPE];
+ unsigned int valid_blks[NR_CURSEG_TYPE];
unsigned int meta_count[META_MAX];
unsigned int segment_count[2];
@@ -3316,7 +3540,7 @@
#define stat_inc_cp_count(si) ((si)->cp_count++)
#define stat_inc_bg_cp_count(si) ((si)->bg_cp_count++)
#define stat_inc_call_count(si) ((si)->call_count++)
-#define stat_inc_bggc_count(sbi) ((sbi)->bg_gc++)
+#define stat_inc_bggc_count(si) ((si)->bg_gc++)
#define stat_io_skip_bggc_count(sbi) ((sbi)->io_skip_bggc++)
#define stat_other_skip_bggc_count(sbi) ((sbi)->other_skip_bggc++)
#define stat_inc_dirty_inode(sbi, type) ((sbi)->ndirty_inode[type]++)
@@ -3355,6 +3579,20 @@
if (f2fs_has_inline_dentry(inode)) \
(atomic_dec(&F2FS_I_SB(inode)->inline_dir)); \
} while (0)
+#define stat_inc_compr_inode(inode) \
+ do { \
+ if (f2fs_compressed_file(inode)) \
+ (atomic_inc(&F2FS_I_SB(inode)->compr_inode)); \
+ } while (0)
+#define stat_dec_compr_inode(inode) \
+ do { \
+ if (f2fs_compressed_file(inode)) \
+ (atomic_dec(&F2FS_I_SB(inode)->compr_inode)); \
+ } while (0)
+#define stat_add_compr_blocks(inode, blocks) \
+ (atomic64_add(blocks, &F2FS_I_SB(inode)->compr_blocks))
+#define stat_sub_compr_blocks(inode, blocks) \
+ (atomic64_sub(blocks, &F2FS_I_SB(inode)->compr_blocks))
#define stat_inc_meta_count(sbi, blkaddr) \
do { \
if (blkaddr < SIT_I(sbi)->sit_base_addr) \
@@ -3372,13 +3610,9 @@
((sbi)->block_count[(curseg)->alloc_type]++)
#define stat_inc_inplace_blocks(sbi) \
(atomic_inc(&(sbi)->inplace_count))
-#define stat_inc_atomic_write(inode) \
- (atomic_inc(&F2FS_I_SB(inode)->aw_cnt))
-#define stat_dec_atomic_write(inode) \
- (atomic_dec(&F2FS_I_SB(inode)->aw_cnt))
#define stat_update_max_atomic_write(inode) \
do { \
- int cur = atomic_read(&F2FS_I_SB(inode)->aw_cnt); \
+ int cur = F2FS_I_SB(inode)->atomic_files; \
int max = atomic_read(&F2FS_I_SB(inode)->max_aw_cnt); \
if (cur > max) \
atomic_set(&F2FS_I_SB(inode)->max_aw_cnt, cur); \
@@ -3430,6 +3664,7 @@
void f2fs_destroy_stats(struct f2fs_sb_info *sbi);
void __init f2fs_create_root_stats(void);
void f2fs_destroy_root_stats(void);
+void f2fs_update_sit_info(struct f2fs_sb_info *sbi);
#else
#define stat_inc_cp_count(si) do { } while (0)
#define stat_inc_bg_cp_count(si) do { } while (0)
@@ -3439,8 +3674,8 @@
#define stat_other_skip_bggc_count(sbi) do { } while (0)
#define stat_inc_dirty_inode(sbi, type) do { } while (0)
#define stat_dec_dirty_inode(sbi, type) do { } while (0)
-#define stat_inc_total_hit(sb) do { } while (0)
-#define stat_inc_rbtree_node_hit(sb) do { } while (0)
+#define stat_inc_total_hit(sbi) do { } while (0)
+#define stat_inc_rbtree_node_hit(sbi) do { } while (0)
#define stat_inc_largest_node_hit(sbi) do { } while (0)
#define stat_inc_cached_node_hit(sbi) do { } while (0)
#define stat_inc_inline_xattr(inode) do { } while (0)
@@ -3449,6 +3684,10 @@
#define stat_dec_inline_inode(inode) do { } while (0)
#define stat_inc_inline_dir(inode) do { } while (0)
#define stat_dec_inline_dir(inode) do { } while (0)
+#define stat_inc_compr_inode(inode) do { } while (0)
+#define stat_dec_compr_inode(inode) do { } while (0)
+#define stat_add_compr_blocks(inode, blocks) do { } while (0)
+#define stat_sub_compr_blocks(inode, blocks) do { } while (0)
#define stat_inc_atomic_write(inode) do { } while (0)
#define stat_dec_atomic_write(inode) do { } while (0)
#define stat_update_max_atomic_write(inode) do { } while (0)
@@ -3468,6 +3707,7 @@
static inline void f2fs_destroy_stats(struct f2fs_sb_info *sbi) { }
static inline void __init f2fs_create_root_stats(void) { }
static inline void f2fs_destroy_root_stats(void) { }
+static inline void f2fs_update_sit_info(struct f2fs_sb_info *sbi) {}
#endif
extern const struct file_operations f2fs_dir_operations;
@@ -3496,14 +3736,15 @@
int f2fs_read_inline_data(struct inode *inode, struct page *page);
int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page);
int f2fs_convert_inline_inode(struct inode *inode);
+int f2fs_try_convert_inline_dir(struct inode *dir, struct dentry *dentry);
int f2fs_write_inline_data(struct inode *inode, struct page *page);
int f2fs_recover_inline_data(struct inode *inode, struct page *npage);
struct f2fs_dir_entry *f2fs_find_in_inline_dir(struct inode *dir,
- struct fscrypt_name *fname, struct page **res_page);
+ const struct f2fs_filename *fname,
+ struct page **res_page);
int f2fs_make_empty_inline_dir(struct inode *inode, struct inode *parent,
struct page *ipage);
-int f2fs_add_inline_entry(struct inode *dir, const struct qstr *new_name,
- const struct qstr *orig_name,
+int f2fs_add_inline_entry(struct inode *dir, const struct f2fs_filename *fname,
struct inode *inode, nid_t ino, umode_t mode);
void f2fs_delete_inline_entry(struct f2fs_dir_entry *dentry,
struct page *page, struct inode *dir,
@@ -3530,6 +3771,10 @@
*/
struct rb_entry *f2fs_lookup_rb_tree(struct rb_root_cached *root,
struct rb_entry *cached_re, unsigned int ofs);
+struct rb_node **f2fs_lookup_rb_tree_ext(struct f2fs_sb_info *sbi,
+ struct rb_root_cached *root,
+ struct rb_node **parent,
+ unsigned long long key, bool *left_most);
struct rb_node **f2fs_lookup_rb_tree_for_insert(struct f2fs_sb_info *sbi,
struct rb_root_cached *root,
struct rb_node **parent,
@@ -3540,9 +3785,9 @@
struct rb_node ***insert_p, struct rb_node **insert_parent,
bool force, bool *leftmost);
bool f2fs_check_rb_tree_consistence(struct f2fs_sb_info *sbi,
- struct rb_root_cached *root);
+ struct rb_root_cached *root, bool check_key);
unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink);
-bool f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_ext);
+void f2fs_init_extent_tree(struct inode *inode, struct page *ipage);
void f2fs_drop_extent_tree(struct inode *inode);
unsigned int f2fs_destroy_extent_node(struct inode *inode);
void f2fs_destroy_extent_tree(struct inode *inode);
@@ -3588,7 +3833,100 @@
*/
static inline bool f2fs_post_read_required(struct inode *inode)
{
- return f2fs_encrypted_file(inode) || fsverity_active(inode);
+ return f2fs_encrypted_file(inode) || fsverity_active(inode) ||
+ f2fs_compressed_file(inode);
+}
+
+/*
+ * compress.c
+ */
+#ifdef CONFIG_F2FS_FS_COMPRESSION
+bool f2fs_is_compressed_page(struct page *page);
+struct page *f2fs_compress_control_page(struct page *page);
+int f2fs_prepare_compress_overwrite(struct inode *inode,
+ struct page **pagep, pgoff_t index, void **fsdata);
+bool f2fs_compress_write_end(struct inode *inode, void *fsdata,
+ pgoff_t index, unsigned copied);
+int f2fs_truncate_partial_cluster(struct inode *inode, u64 from, bool lock);
+void f2fs_compress_write_end_io(struct bio *bio, struct page *page);
+bool f2fs_is_compress_backend_ready(struct inode *inode);
+int f2fs_init_compress_mempool(void);
+void f2fs_destroy_compress_mempool(void);
+void f2fs_decompress_pages(struct bio *bio, struct page *page, bool verity);
+bool f2fs_cluster_is_empty(struct compress_ctx *cc);
+bool f2fs_cluster_can_merge_page(struct compress_ctx *cc, pgoff_t index);
+void f2fs_compress_ctx_add_page(struct compress_ctx *cc, struct page *page);
+int f2fs_write_multi_pages(struct compress_ctx *cc,
+ int *submitted,
+ struct writeback_control *wbc,
+ enum iostat_type io_type);
+int f2fs_is_compressed_cluster(struct inode *inode, pgoff_t index);
+int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret,
+ unsigned nr_pages, sector_t *last_block_in_bio,
+ bool is_readahead, bool for_write);
+struct decompress_io_ctx *f2fs_alloc_dic(struct compress_ctx *cc);
+void f2fs_free_dic(struct decompress_io_ctx *dic);
+void f2fs_decompress_end_io(struct page **rpages,
+ unsigned int cluster_size, bool err, bool verity);
+int f2fs_init_compress_ctx(struct compress_ctx *cc);
+void f2fs_destroy_compress_ctx(struct compress_ctx *cc, bool reuse);
+void f2fs_init_compress_info(struct f2fs_sb_info *sbi);
+int f2fs_init_page_array_cache(struct f2fs_sb_info *sbi);
+void f2fs_destroy_page_array_cache(struct f2fs_sb_info *sbi);
+int __init f2fs_init_compress_cache(void);
+void f2fs_destroy_compress_cache(void);
+#else
+static inline bool f2fs_is_compressed_page(struct page *page) { return false; }
+static inline bool f2fs_is_compress_backend_ready(struct inode *inode)
+{
+ if (!f2fs_compressed_file(inode))
+ return true;
+ /* not support compression */
+ return false;
+}
+static inline struct page *f2fs_compress_control_page(struct page *page)
+{
+ WARN_ON_ONCE(1);
+ return ERR_PTR(-EINVAL);
+}
+static inline int f2fs_init_compress_mempool(void) { return 0; }
+static inline void f2fs_destroy_compress_mempool(void) { }
+static inline int f2fs_init_page_array_cache(struct f2fs_sb_info *sbi) { return 0; }
+static inline void f2fs_destroy_page_array_cache(struct f2fs_sb_info *sbi) { }
+static inline int __init f2fs_init_compress_cache(void) { return 0; }
+static inline void f2fs_destroy_compress_cache(void) { }
+#endif
+
+static inline void set_compress_context(struct inode *inode)
+{
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+
+ F2FS_I(inode)->i_compress_algorithm =
+ F2FS_OPTION(sbi).compress_algorithm;
+ F2FS_I(inode)->i_log_cluster_size =
+ F2FS_OPTION(sbi).compress_log_size;
+ F2FS_I(inode)->i_cluster_size =
+ 1 << F2FS_I(inode)->i_log_cluster_size;
+ F2FS_I(inode)->i_flags |= F2FS_COMPR_FL;
+ set_inode_flag(inode, FI_COMPRESSED_FILE);
+ stat_inc_compr_inode(inode);
+ f2fs_mark_inode_dirty_sync(inode, true);
+}
+
+static inline bool f2fs_disable_compressed_file(struct inode *inode)
+{
+ struct f2fs_inode_info *fi = F2FS_I(inode);
+
+ if (!f2fs_compressed_file(inode))
+ return true;
+ if (S_ISREG(inode->i_mode) && F2FS_HAS_BLOCKS(inode))
+ return false;
+
+ fi->i_flags &= ~F2FS_COMPR_FL;
+ stat_dec_compr_inode(inode);
+ clear_inode_flag(inode, FI_COMPRESSED_FILE);
+ f2fs_mark_inode_dirty_sync(inode, true);
+ return true;
}
#define F2FS_FEATURE_FUNCS(name, flagname) \
@@ -3609,6 +3947,7 @@
F2FS_FEATURE_FUNCS(verity, VERITY);
F2FS_FEATURE_FUNCS(sb_chksum, SB_CHKSUM);
F2FS_FEATURE_FUNCS(casefold, CASEFOLD);
+F2FS_FEATURE_FUNCS(compression, COMPRESSION);
#ifdef CONFIG_BLK_DEV_ZONED
static inline bool f2fs_blkz_is_seq(struct f2fs_sb_info *sbi, int devi,
@@ -3663,31 +4002,38 @@
return false;
}
-
-static inline void set_opt_mode(struct f2fs_sb_info *sbi, unsigned int mt)
+static inline bool f2fs_lfs_mode(struct f2fs_sb_info *sbi)
{
- clear_opt(sbi, ADAPTIVE);
- clear_opt(sbi, LFS);
-
- switch (mt) {
- case F2FS_MOUNT_ADAPTIVE:
- set_opt(sbi, ADAPTIVE);
- break;
- case F2FS_MOUNT_LFS:
- set_opt(sbi, LFS);
- break;
- }
+ return F2FS_OPTION(sbi).fs_mode == FS_MODE_LFS;
}
-static inline bool f2fs_may_encrypt(struct inode *inode)
+static inline bool f2fs_may_compress(struct inode *inode)
{
-#ifdef CONFIG_FS_ENCRYPTION
- umode_t mode = inode->i_mode;
+ if (IS_SWAPFILE(inode) || f2fs_is_pinned_file(inode) ||
+ f2fs_is_atomic_file(inode) ||
+ f2fs_is_volatile_file(inode))
+ return false;
+ return S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode);
+}
- return (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode));
-#else
- return false;
-#endif
+static inline void f2fs_i_compr_blocks_update(struct inode *inode,
+ u64 blocks, bool add)
+{
+ int diff = F2FS_I(inode)->i_cluster_size - blocks;
+ struct f2fs_inode_info *fi = F2FS_I(inode);
+
+ /* don't update i_compr_blocks if saved blocks were released */
+ if (!add && !atomic_read(&fi->i_compr_blocks))
+ return;
+
+ if (add) {
+ atomic_add(diff, &fi->i_compr_blocks);
+ stat_add_compr_blocks(inode, diff);
+ } else {
+ atomic_sub(diff, &fi->i_compr_blocks);
+ stat_sub_compr_blocks(inode, diff);
+ }
+ f2fs_mark_inode_dirty_sync(inode, true);
}
static inline int block_unaligned_IO(struct inode *inode,
@@ -3707,7 +4053,7 @@
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
int rw = iov_iter_rw(iter);
- return (test_opt(sbi, LFS) && (rw == WRITE) &&
+ return (f2fs_lfs_mode(sbi) && (rw == WRITE) &&
!block_unaligned_IO(inode, iocb, iter));
}
@@ -3727,7 +4073,7 @@
*/
if (f2fs_sb_has_blkzoned(sbi))
return true;
- if (test_opt(sbi, LFS) && (rw == WRITE)) {
+ if (f2fs_lfs_mode(sbi) && (rw == WRITE)) {
if (block_unaligned_IO(inode, iocb, iter))
return true;
if (F2FS_IO_ALIGNED(sbi))
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 516007b..1fbaab1 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -21,6 +21,7 @@
#include <linux/uuid.h>
#include <linux/file.h>
#include <linux/nls.h>
+#include <linux/sched/signal.h>
#include "f2fs.h"
#include "node.h"
@@ -30,6 +31,7 @@
#include "gc.h"
#include "trace.h"
#include <trace/events/f2fs.h>
+#include <uapi/linux/f2fs.h>
static vm_fault_t f2fs_filemap_fault(struct vm_fault *vmf)
{
@@ -40,6 +42,10 @@
ret = filemap_fault(vmf);
up_read(&F2FS_I(inode)->i_mmap_sem);
+ if (!ret)
+ f2fs_update_iostat(F2FS_I_SB(inode), APP_MAPPED_READ_IO,
+ F2FS_BLKSIZE);
+
trace_f2fs_filemap_fault(inode, vmf->pgoff, (unsigned long)ret);
return ret;
@@ -51,7 +57,11 @@
struct inode *inode = file_inode(vmf->vma->vm_file);
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct dnode_of_data dn;
- int err;
+ bool need_alloc = true;
+ int err = 0;
+
+ if (unlikely(IS_IMMUTABLE(inode)))
+ return VM_FAULT_SIGBUS;
if (unlikely(f2fs_cp_error(sbi))) {
err = -EIO;
@@ -63,8 +73,25 @@
goto err;
}
+#ifdef CONFIG_F2FS_FS_COMPRESSION
+ if (f2fs_compressed_file(inode)) {
+ int ret = f2fs_is_compressed_cluster(inode, page->index);
+
+ if (ret < 0) {
+ err = ret;
+ goto err;
+ } else if (ret) {
+ if (ret < F2FS_I(inode)->i_cluster_size) {
+ err = -EAGAIN;
+ goto err;
+ }
+ need_alloc = false;
+ }
+ }
+#endif
/* should do out of any locked page */
- f2fs_balance_fs(sbi, true);
+ if (need_alloc)
+ f2fs_balance_fs(sbi, true);
sb_start_pagefault(inode->i_sb);
@@ -81,18 +108,27 @@
goto out_sem;
}
- /* block allocation */
- __do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, true);
- set_new_dnode(&dn, inode, NULL, NULL, 0);
- err = f2fs_get_block(&dn, page->index);
- f2fs_put_dnode(&dn);
- __do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, false);
+ if (need_alloc) {
+ /* block allocation */
+ f2fs_do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, true);
+ set_new_dnode(&dn, inode, NULL, NULL, 0);
+ err = f2fs_get_block(&dn, page->index);
+ f2fs_put_dnode(&dn);
+ f2fs_do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, false);
+ }
+
+#ifdef CONFIG_F2FS_FS_COMPRESSION
+ if (!need_alloc) {
+ set_new_dnode(&dn, inode, NULL, NULL, 0);
+ err = f2fs_get_dnode_of_data(&dn, page->index, LOOKUP_NODE);
+ f2fs_put_dnode(&dn);
+ }
+#endif
if (err) {
unlock_page(page);
goto out_sem;
}
- /* fill the page */
f2fs_wait_on_page_writeback(page, DATA, false, true);
/* wait for GCed page writeback via META_MAPPING */
@@ -138,9 +174,11 @@
{
struct dentry *dentry;
- inode = igrab(inode);
- dentry = d_find_any_alias(inode);
- iput(inode);
+ /*
+ * Make sure to get the non-deleted alias. The alias associated with
+ * the open file descriptor being fsync()'ed may be deleted already.
+ */
+ dentry = d_find_alias(inode);
if (!dentry)
return 0;
@@ -156,6 +194,8 @@
if (!S_ISREG(inode->i_mode))
cp_reason = CP_NON_REGULAR;
+ else if (f2fs_compressed_file(inode))
+ cp_reason = CP_COMPRESSED;
else if (inode->i_nlink != 1)
cp_reason = CP_HARDLINK;
else if (is_sbi_flag_set(sbi, SBI_NEED_CP))
@@ -219,8 +259,7 @@
};
unsigned int seq_id = 0;
- if (unlikely(f2fs_readonly(inode->i_sb) ||
- is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
+ if (unlikely(f2fs_readonly(inode->i_sb)))
return 0;
trace_f2fs_sync_file_enter(inode);
@@ -234,7 +273,7 @@
ret = file_write_and_wait_range(file, start, end);
clear_inode_flag(inode, FI_NEED_IPU);
- if (ret) {
+ if (ret || is_sbi_flag_set(sbi, SBI_CP_DISABLED)) {
trace_f2fs_sync_file_exit(inode, cp_reason, datasync, ret);
return ret;
}
@@ -340,32 +379,15 @@
return f2fs_do_sync_file(file, start, end, datasync, false);
}
-static pgoff_t __get_first_dirty_index(struct address_space *mapping,
- pgoff_t pgofs, int whence)
-{
- struct page *page;
- int nr_pages;
-
- if (whence != SEEK_DATA)
- return 0;
-
- /* find first dirty page index */
- nr_pages = find_get_pages_tag(mapping, &pgofs, PAGECACHE_TAG_DIRTY,
- 1, &page);
- if (!nr_pages)
- return ULONG_MAX;
- pgofs = page->index;
- put_page(page);
- return pgofs;
-}
-
-static bool __found_offset(struct f2fs_sb_info *sbi, block_t blkaddr,
- pgoff_t dirty, pgoff_t pgofs, int whence)
+static bool __found_offset(struct address_space *mapping, block_t blkaddr,
+ pgoff_t index, int whence)
{
switch (whence) {
case SEEK_DATA:
- if ((blkaddr == NEW_ADDR && dirty == pgofs) ||
- __is_valid_data_blkaddr(blkaddr))
+ if (__is_valid_data_blkaddr(blkaddr))
+ return true;
+ if (blkaddr == NEW_ADDR &&
+ xa_get_mark(&mapping->i_pages, index, PAGECACHE_TAG_DIRTY))
return true;
break;
case SEEK_HOLE:
@@ -381,7 +403,7 @@
struct inode *inode = file->f_mapping->host;
loff_t maxbytes = inode->i_sb->s_maxbytes;
struct dnode_of_data dn;
- pgoff_t pgofs, end_offset, dirty;
+ pgoff_t pgofs, end_offset;
loff_t data_ofs = offset;
loff_t isize;
int err = 0;
@@ -393,16 +415,18 @@
goto fail;
/* handle inline data case */
- if (f2fs_has_inline_data(inode) || f2fs_has_inline_dentry(inode)) {
- if (whence == SEEK_HOLE)
+ if (f2fs_has_inline_data(inode)) {
+ if (whence == SEEK_HOLE) {
data_ofs = isize;
- goto found;
+ goto found;
+ } else if (whence == SEEK_DATA) {
+ data_ofs = offset;
+ goto found;
+ }
}
pgofs = (pgoff_t)(offset >> PAGE_SHIFT);
- dirty = __get_first_dirty_index(inode->i_mapping, pgofs, whence);
-
for (; data_ofs < isize; data_ofs = (loff_t)pgofs << PAGE_SHIFT) {
set_new_dnode(&dn, inode, NULL, NULL, 0);
err = f2fs_get_dnode_of_data(&dn, pgofs, LOOKUP_NODE);
@@ -426,8 +450,7 @@
data_ofs = (loff_t)pgofs << PAGE_SHIFT) {
block_t blkaddr;
- blkaddr = datablock_addr(dn.inode,
- dn.node_page, dn.ofs_in_node);
+ blkaddr = f2fs_data_blkaddr(&dn);
if (__is_valid_data_blkaddr(blkaddr) &&
!f2fs_is_valid_blkaddr(F2FS_I_SB(inode),
@@ -436,7 +459,7 @@
goto fail;
}
- if (__found_offset(F2FS_I_SB(inode), blkaddr, dirty,
+ if (__found_offset(file->f_mapping, blkaddr,
pgofs, whence)) {
f2fs_put_dnode(&dn);
goto found;
@@ -486,6 +509,9 @@
if (unlikely(f2fs_cp_error(F2FS_I_SB(inode))))
return -EIO;
+ if (!f2fs_is_compress_backend_ready(inode))
+ return -EOPNOTSUPP;
+
/* we don't need to use inline_data strictly */
err = f2fs_convert_inline_inode(inode);
if (err)
@@ -493,6 +519,7 @@
file_accessed(file);
vma->vm_ops = &f2fs_file_vm_ops;
+ set_inode_flag(inode, FI_MMAP_FILE);
return 0;
}
@@ -503,6 +530,9 @@
if (err)
return err;
+ if (!f2fs_is_compress_backend_ready(inode))
+ return -EOPNOTSUPP;
+
err = fsverity_file_open(inode, filp);
if (err)
return err;
@@ -519,6 +549,10 @@
int nr_free = 0, ofs = dn->ofs_in_node, len = count;
__le32 *addr;
int base = 0;
+ bool compressed_cluster = false;
+ int cluster_index = 0, valid_blocks = 0;
+ int cluster_size = F2FS_I(dn->inode)->i_cluster_size;
+ bool released = !atomic_read(&F2FS_I(dn->inode)->i_compr_blocks);
if (IS_INODE(dn->node_page) && f2fs_has_extra_attr(dn->inode))
base = get_extra_isize(dn->inode);
@@ -526,26 +560,45 @@
raw_node = F2FS_NODE(dn->node_page);
addr = blkaddr_in_node(raw_node) + base + ofs;
- for (; count > 0; count--, addr++, dn->ofs_in_node++) {
+ /* Assumption: truncateion starts with cluster */
+ for (; count > 0; count--, addr++, dn->ofs_in_node++, cluster_index++) {
block_t blkaddr = le32_to_cpu(*addr);
+ if (f2fs_compressed_file(dn->inode) &&
+ !(cluster_index & (cluster_size - 1))) {
+ if (compressed_cluster)
+ f2fs_i_compr_blocks_update(dn->inode,
+ valid_blocks, false);
+ compressed_cluster = (blkaddr == COMPRESS_ADDR);
+ valid_blocks = 0;
+ }
+
if (blkaddr == NULL_ADDR)
continue;
dn->data_blkaddr = NULL_ADDR;
f2fs_set_data_blkaddr(dn);
- if (__is_valid_data_blkaddr(blkaddr) &&
- !f2fs_is_valid_blkaddr(sbi, blkaddr,
+ if (__is_valid_data_blkaddr(blkaddr)) {
+ if (!f2fs_is_valid_blkaddr(sbi, blkaddr,
DATA_GENERIC_ENHANCE))
- continue;
+ continue;
+ if (compressed_cluster)
+ valid_blocks++;
+ }
- f2fs_invalidate_blocks(sbi, blkaddr);
if (dn->ofs_in_node == 0 && IS_INODE(dn->node_page))
clear_inode_flag(dn->inode, FI_FIRST_BLOCK_WRITTEN);
- nr_free++;
+
+ f2fs_invalidate_blocks(sbi, blkaddr);
+
+ if (!released || blkaddr != COMPRESS_ADDR)
+ nr_free++;
}
+ if (compressed_cluster)
+ f2fs_i_compr_blocks_update(dn->inode, valid_blocks, false);
+
if (nr_free) {
pgoff_t fofs;
/*
@@ -603,7 +656,7 @@
return 0;
}
-int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock)
+int f2fs_do_truncate_blocks(struct inode *inode, u64 from, bool lock)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct dnode_of_data dn;
@@ -668,6 +721,36 @@
return err;
}
+int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock)
+{
+ u64 free_from = from;
+ int err;
+
+#ifdef CONFIG_F2FS_FS_COMPRESSION
+ /*
+ * for compressed file, only support cluster size
+ * aligned truncation.
+ */
+ if (f2fs_compressed_file(inode))
+ free_from = round_up(from,
+ F2FS_I(inode)->i_cluster_size << PAGE_SHIFT);
+#endif
+
+ err = f2fs_do_truncate_blocks(inode, free_from, lock);
+ if (err)
+ return err;
+
+#ifdef CONFIG_F2FS_FS_COMPRESSION
+ if (from != free_from) {
+ err = f2fs_truncate_partial_cluster(inode, from, lock);
+ if (err)
+ return err;
+ }
+#endif
+
+ return 0;
+}
+
int f2fs_truncate(struct inode *inode)
{
int err;
@@ -723,6 +806,8 @@
}
flags = fi->i_flags;
+ if (flags & F2FS_COMPR_FL)
+ stat->attributes |= STATX_ATTR_COMPRESSED;
if (flags & F2FS_APPEND_FL)
stat->attributes |= STATX_ATTR_APPEND;
if (IS_ENCRYPTED(inode))
@@ -731,11 +816,15 @@
stat->attributes |= STATX_ATTR_IMMUTABLE;
if (flags & F2FS_NODUMP_FL)
stat->attributes |= STATX_ATTR_NODUMP;
+ if (IS_VERITY(inode))
+ stat->attributes |= STATX_ATTR_VERITY;
- stat->attributes_mask |= (STATX_ATTR_APPEND |
+ stat->attributes_mask |= (STATX_ATTR_COMPRESSED |
+ STATX_ATTR_APPEND |
STATX_ATTR_ENCRYPTED |
STATX_ATTR_IMMUTABLE |
- STATX_ATTR_NODUMP);
+ STATX_ATTR_NODUMP |
+ STATX_ATTR_VERITY);
generic_fillattr(inode, stat);
@@ -783,6 +872,18 @@
if (unlikely(f2fs_cp_error(F2FS_I_SB(inode))))
return -EIO;
+ if (unlikely(IS_IMMUTABLE(inode)))
+ return -EPERM;
+
+ if (unlikely(IS_APPEND(inode) &&
+ (attr->ia_valid & (ATTR_MODE | ATTR_UID |
+ ATTR_GID | ATTR_TIMES_SET))))
+ return -EPERM;
+
+ if ((attr->ia_valid & ATTR_SIZE) &&
+ !f2fs_is_compress_backend_ready(inode))
+ return -EOPNOTSUPP;
+
err = setattr_prepare(dentry, attr);
if (err)
return err;
@@ -853,10 +954,10 @@
if (err)
return err;
- down_write(&F2FS_I(inode)->i_sem);
+ spin_lock(&F2FS_I(inode)->i_size_lock);
inode->i_mtime = inode->i_ctime = current_time(inode);
F2FS_I(inode)->last_disk_size = i_size_read(inode);
- up_write(&F2FS_I(inode)->i_sem);
+ spin_unlock(&F2FS_I(inode)->i_size_lock);
}
__setattr_copy(inode, attr);
@@ -883,9 +984,7 @@
.setattr = f2fs_setattr,
.get_acl = f2fs_get_acl,
.set_acl = f2fs_set_acl,
-#ifdef CONFIG_F2FS_FS_XATTR
.listxattr = f2fs_listxattr,
-#endif
.fiemap = f2fs_fiemap,
};
@@ -1021,8 +1120,8 @@
} else if (ret == -ENOENT) {
if (dn.max_level == 0)
return -ENOENT;
- done = min((pgoff_t)ADDRS_PER_BLOCK(inode) - dn.ofs_in_node,
- len);
+ done = min((pgoff_t)ADDRS_PER_BLOCK(inode) -
+ dn.ofs_in_node, len);
blkaddr += done;
do_replace += done;
goto next;
@@ -1031,8 +1130,7 @@
done = min((pgoff_t)ADDRS_PER_PAGE(dn.node_page, inode) -
dn.ofs_in_node, len);
for (i = 0; i < done; i++, blkaddr++, do_replace++, dn.ofs_in_node++) {
- *blkaddr = datablock_addr(dn.inode,
- dn.node_page, dn.ofs_in_node);
+ *blkaddr = f2fs_data_blkaddr(&dn);
if (__is_valid_data_blkaddr(*blkaddr) &&
!f2fs_is_valid_blkaddr(sbi, *blkaddr,
@@ -1043,7 +1141,7 @@
if (!f2fs_is_checkpointed_data(sbi, *blkaddr)) {
- if (test_opt(sbi, LFS)) {
+ if (f2fs_lfs_mode(sbi)) {
f2fs_put_dnode(&dn);
return -EOPNOTSUPP;
}
@@ -1121,8 +1219,7 @@
ADDRS_PER_PAGE(dn.node_page, dst_inode) -
dn.ofs_in_node, len - i);
do {
- dn.data_blkaddr = datablock_addr(dn.inode,
- dn.node_page, dn.ofs_in_node);
+ dn.data_blkaddr = f2fs_data_blkaddr(&dn);
f2fs_truncate_data_blocks_range(&dn, 1);
if (do_replace[i]) {
@@ -1185,13 +1282,13 @@
src_blkaddr = f2fs_kvzalloc(F2FS_I_SB(src_inode),
array_size(olen, sizeof(block_t)),
- GFP_KERNEL);
+ GFP_NOFS);
if (!src_blkaddr)
return -ENOMEM;
do_replace = f2fs_kvzalloc(F2FS_I_SB(src_inode),
array_size(olen, sizeof(int)),
- GFP_KERNEL);
+ GFP_NOFS);
if (!do_replace) {
kvfree(src_blkaddr);
return -ENOMEM;
@@ -1279,8 +1376,6 @@
truncate_pagecache(inode, offset);
new_size = i_size_read(inode) - len;
- truncate_pagecache(inode, new_size);
-
ret = f2fs_truncate_blocks(inode, new_size, true);
up_write(&F2FS_I(inode)->i_mmap_sem);
if (!ret)
@@ -1298,8 +1393,7 @@
int ret;
for (; index < end; index++, dn->ofs_in_node++) {
- if (datablock_addr(dn->inode, dn->node_page,
- dn->ofs_in_node) == NULL_ADDR)
+ if (f2fs_data_blkaddr(dn) == NULL_ADDR)
count++;
}
@@ -1310,8 +1404,7 @@
dn->ofs_in_node = ofs_in_node;
for (index = start; index < end; index++, dn->ofs_in_node++) {
- dn->data_blkaddr = datablock_addr(dn->inode,
- dn->node_page, dn->ofs_in_node);
+ dn->data_blkaddr = f2fs_data_blkaddr(dn);
/*
* f2fs_reserve_new_blocks will not guarantee entire block
* allocation.
@@ -1520,9 +1613,10 @@
struct f2fs_map_blocks map = { .m_next_pgofs = NULL,
.m_next_extent = NULL, .m_seg_type = NO_CHECK_TYPE,
.m_may_create = true };
- pgoff_t pg_end;
+ pgoff_t pg_start, pg_end;
loff_t new_size = i_size_read(inode);
loff_t off_end;
+ block_t expanded = 0;
int err;
err = inode_newsize_ok(inode, (len + offset));
@@ -1535,27 +1629,62 @@
f2fs_balance_fs(sbi, true);
+ pg_start = ((unsigned long long)offset) >> PAGE_SHIFT;
pg_end = ((unsigned long long)offset + len) >> PAGE_SHIFT;
off_end = (offset + len) & (PAGE_SIZE - 1);
- map.m_lblk = ((unsigned long long)offset) >> PAGE_SHIFT;
- map.m_len = pg_end - map.m_lblk;
+ map.m_lblk = pg_start;
+ map.m_len = pg_end - pg_start;
if (off_end)
map.m_len++;
- if (f2fs_is_pinned_file(inode))
- map.m_seg_type = CURSEG_COLD_DATA;
+ if (!map.m_len)
+ return 0;
- err = f2fs_map_blocks(inode, &map, 1, (f2fs_is_pinned_file(inode) ?
- F2FS_GET_BLOCK_PRE_DIO :
- F2FS_GET_BLOCK_PRE_AIO));
+ if (f2fs_is_pinned_file(inode)) {
+ block_t sec_blks = BLKS_PER_SEC(sbi);
+ block_t sec_len = roundup(map.m_len, sec_blks);
+
+ map.m_len = sec_blks;
+next_alloc:
+ if (has_not_enough_free_secs(sbi, 0,
+ GET_SEC_FROM_SEG(sbi, overprovision_segments(sbi)))) {
+ down_write(&sbi->gc_lock);
+ err = f2fs_gc(sbi, true, false, false, NULL_SEGNO);
+ if (err && err != -ENODATA && err != -EAGAIN)
+ goto out_err;
+ }
+
+ down_write(&sbi->pin_sem);
+
+ f2fs_lock_op(sbi);
+ f2fs_allocate_new_section(sbi, CURSEG_COLD_DATA_PINNED);
+ f2fs_unlock_op(sbi);
+
+ map.m_seg_type = CURSEG_COLD_DATA_PINNED;
+ err = f2fs_map_blocks(inode, &map, 1, F2FS_GET_BLOCK_PRE_DIO);
+
+ up_write(&sbi->pin_sem);
+
+ expanded += map.m_len;
+ sec_len -= map.m_len;
+ map.m_lblk += map.m_len;
+ if (!err && sec_len)
+ goto next_alloc;
+
+ map.m_len = expanded;
+ } else {
+ err = f2fs_map_blocks(inode, &map, 1, F2FS_GET_BLOCK_PRE_AIO);
+ expanded = map.m_len;
+ }
+out_err:
if (err) {
pgoff_t last_off;
- if (!map.m_len)
+ if (!expanded)
return err;
- last_off = map.m_lblk + map.m_len - 1;
+ last_off = pg_start + expanded - 1;
/* update new size to the failed position */
new_size = (last_off == pg_end) ? offset + len :
@@ -1584,6 +1713,8 @@
return -EIO;
if (!f2fs_is_checkpoint_ready(F2FS_I_SB(inode)))
return -ENOSPC;
+ if (!f2fs_is_compress_backend_ready(inode))
+ return -EOPNOTSUPP;
/* f2fs only support ->fallocate for regular file */
if (!S_ISREG(inode->i_mode))
@@ -1593,6 +1724,11 @@
(mode & (FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_INSERT_RANGE)))
return -EOPNOTSUPP;
+ if (f2fs_compressed_file(inode) &&
+ (mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_COLLAPSE_RANGE |
+ FALLOC_FL_ZERO_RANGE | FALLOC_FL_INSERT_RANGE)))
+ return -EOPNOTSUPP;
+
if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE |
FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE |
FALLOC_FL_INSERT_RANGE))
@@ -1670,19 +1806,52 @@
static int f2fs_setflags_common(struct inode *inode, u32 iflags, u32 mask)
{
struct f2fs_inode_info *fi = F2FS_I(inode);
+ u32 masked_flags = fi->i_flags & mask;
+
+ f2fs_bug_on(F2FS_I_SB(inode), (iflags & ~mask));
/* Is it quota file? Do not allow user to mess with it */
if (IS_NOQUOTA(inode))
return -EPERM;
- if ((iflags ^ fi->i_flags) & F2FS_CASEFOLD_FL) {
+ if ((iflags ^ masked_flags) & F2FS_CASEFOLD_FL) {
if (!f2fs_sb_has_casefold(F2FS_I_SB(inode)))
return -EOPNOTSUPP;
if (!f2fs_empty_dir(inode))
return -ENOTEMPTY;
}
+ if (iflags & (F2FS_COMPR_FL | F2FS_NOCOMP_FL)) {
+ if (!f2fs_sb_has_compression(F2FS_I_SB(inode)))
+ return -EOPNOTSUPP;
+ if ((iflags & F2FS_COMPR_FL) && (iflags & F2FS_NOCOMP_FL))
+ return -EINVAL;
+ }
+
+ if ((iflags ^ masked_flags) & F2FS_COMPR_FL) {
+ if (masked_flags & F2FS_COMPR_FL) {
+ if (!f2fs_disable_compressed_file(inode))
+ return -EINVAL;
+ }
+ if (iflags & F2FS_NOCOMP_FL)
+ return -EINVAL;
+ if (iflags & F2FS_COMPR_FL) {
+ if (!f2fs_may_compress(inode))
+ return -EINVAL;
+ if (S_ISREG(inode->i_mode) && inode->i_size)
+ return -EINVAL;
+
+ set_compress_context(inode);
+ }
+ }
+ if ((iflags ^ masked_flags) & F2FS_NOCOMP_FL) {
+ if (masked_flags & F2FS_COMPR_FL)
+ return -EINVAL;
+ }
+
fi->i_flags = iflags | (fi->i_flags & ~mask);
+ f2fs_bug_on(F2FS_I_SB(inode), (fi->i_flags & F2FS_COMPR_FL) &&
+ (fi->i_flags & F2FS_NOCOMP_FL));
if (fi->i_flags & F2FS_PROJINHERIT_FL)
set_inode_flag(inode, FI_PROJ_INHERIT);
@@ -1708,11 +1877,13 @@
u32 iflag;
u32 fsflag;
} f2fs_fsflags_map[] = {
+ { F2FS_COMPR_FL, FS_COMPR_FL },
{ F2FS_SYNC_FL, FS_SYNC_FL },
{ F2FS_IMMUTABLE_FL, FS_IMMUTABLE_FL },
{ F2FS_APPEND_FL, FS_APPEND_FL },
{ F2FS_NODUMP_FL, FS_NODUMP_FL },
{ F2FS_NOATIME_FL, FS_NOATIME_FL },
+ { F2FS_NOCOMP_FL, FS_NOCOMP_FL },
{ F2FS_INDEX_FL, FS_INDEX_FL },
{ F2FS_DIRSYNC_FL, FS_DIRSYNC_FL },
{ F2FS_PROJINHERIT_FL, FS_PROJINHERIT_FL },
@@ -1720,11 +1891,13 @@
};
#define F2FS_GETTABLE_FS_FL ( \
+ FS_COMPR_FL | \
FS_SYNC_FL | \
FS_IMMUTABLE_FL | \
FS_APPEND_FL | \
FS_NODUMP_FL | \
FS_NOATIME_FL | \
+ FS_NOCOMP_FL | \
FS_INDEX_FL | \
FS_DIRSYNC_FL | \
FS_PROJINHERIT_FL | \
@@ -1735,11 +1908,13 @@
FS_CASEFOLD_FL)
#define F2FS_SETTABLE_FS_FL ( \
+ FS_COMPR_FL | \
FS_SYNC_FL | \
FS_IMMUTABLE_FL | \
FS_APPEND_FL | \
FS_NODUMP_FL | \
FS_NOATIME_FL | \
+ FS_NOCOMP_FL | \
FS_DIRSYNC_FL | \
FS_PROJINHERIT_FL | \
FS_CASEFOLD_FL)
@@ -1860,6 +2035,8 @@
inode_lock(inode);
+ f2fs_disable_compressed_file(inode);
+
if (f2fs_is_atomic_file(inode)) {
if (is_inode_flag_set(inode, FI_ATOMIC_REVOKE_REQUEST))
ret = -EINVAL;
@@ -1898,7 +2075,6 @@
f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
F2FS_I(inode)->inmem_task = current;
- stat_inc_atomic_write(inode);
stat_update_max_atomic_write(inode);
out:
inode_unlock(inode);
@@ -2273,6 +2449,14 @@
return fscrypt_ioctl_get_key_status(filp, (void __user *)arg);
}
+static int f2fs_ioc_get_encryption_nonce(struct file *filp, unsigned long arg)
+{
+ if (!f2fs_sb_has_encrypt(F2FS_I_SB(file_inode(filp))))
+ return -EOPNOTSUPP;
+
+ return fscrypt_ioctl_get_nonce(filp, (void __user *)arg);
+}
+
static int f2fs_ioc_gc(struct file *filp, unsigned long arg)
{
struct inode *inode = file_inode(filp);
@@ -2294,40 +2478,33 @@
return ret;
if (!sync) {
- if (!mutex_trylock(&sbi->gc_mutex)) {
+ if (!down_write_trylock(&sbi->gc_lock)) {
ret = -EBUSY;
goto out;
}
} else {
- mutex_lock(&sbi->gc_mutex);
+ down_write(&sbi->gc_lock);
}
- ret = f2fs_gc(sbi, sync, true, NULL_SEGNO);
+ ret = f2fs_gc(sbi, sync, true, false, NULL_SEGNO);
out:
mnt_drop_write_file(filp);
return ret;
}
-static int f2fs_ioc_gc_range(struct file *filp, unsigned long arg)
+static int __f2fs_ioc_gc_range(struct file *filp, struct f2fs_gc_range *range)
{
- struct inode *inode = file_inode(filp);
- struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
- struct f2fs_gc_range range;
+ struct f2fs_sb_info *sbi = F2FS_I_SB(file_inode(filp));
u64 end;
int ret;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
-
- if (copy_from_user(&range, (struct f2fs_gc_range __user *)arg,
- sizeof(range)))
- return -EFAULT;
-
if (f2fs_readonly(sbi->sb))
return -EROFS;
- end = range.start + range.len;
- if (end < range.start || range.start < MAIN_BLKADDR(sbi) ||
+ end = range->start + range->len;
+ if (end < range->start || range->start < MAIN_BLKADDR(sbi) ||
end >= MAX_BLKADDR(sbi))
return -EINVAL;
@@ -2336,24 +2513,40 @@
return ret;
do_more:
- if (!range.sync) {
- if (!mutex_trylock(&sbi->gc_mutex)) {
+ if (!range->sync) {
+ if (!down_write_trylock(&sbi->gc_lock)) {
ret = -EBUSY;
goto out;
}
} else {
- mutex_lock(&sbi->gc_mutex);
+ down_write(&sbi->gc_lock);
}
- ret = f2fs_gc(sbi, range.sync, true, GET_SEGNO(sbi, range.start));
- range.start += BLKS_PER_SEC(sbi);
- if (range.start <= end)
+ ret = f2fs_gc(sbi, range->sync, true, false,
+ GET_SEGNO(sbi, range->start));
+ if (ret) {
+ if (ret == -EBUSY)
+ ret = -EAGAIN;
+ goto out;
+ }
+ range->start += BLKS_PER_SEC(sbi);
+ if (range->start <= end)
goto do_more;
out:
mnt_drop_write_file(filp);
return ret;
}
+static int f2fs_ioc_gc_range(struct file *filp, unsigned long arg)
+{
+ struct f2fs_gc_range range;
+
+ if (copy_from_user(&range, (struct f2fs_gc_range __user *)arg,
+ sizeof(range)))
+ return -EFAULT;
+ return __f2fs_ioc_gc_range(filp, &range);
+}
+
static int f2fs_ioc_write_checkpoint(struct file *filp, unsigned long arg)
{
struct inode *inode = file_inode(filp);
@@ -2598,6 +2791,9 @@
if (IS_ENCRYPTED(src) || IS_ENCRYPTED(dst))
return -EOPNOTSUPP;
+ if (pos_out < 0 || pos_in < 0)
+ return -EINVAL;
+
if (src == dst) {
if (pos_in == pos_out)
return 0;
@@ -2687,9 +2883,9 @@
return ret;
}
-static int f2fs_ioc_move_range(struct file *filp, unsigned long arg)
+static int __f2fs_ioc_move_range(struct file *filp,
+ struct f2fs_move_range *range)
{
- struct f2fs_move_range range;
struct fd dst;
int err;
@@ -2697,11 +2893,7 @@
!(filp->f_mode & FMODE_WRITE))
return -EBADF;
- if (copy_from_user(&range, (struct f2fs_move_range __user *)arg,
- sizeof(range)))
- return -EFAULT;
-
- dst = fdget(range.dst_fd);
+ dst = fdget(range->dst_fd);
if (!dst.file)
return -EBADF;
@@ -2714,21 +2906,25 @@
if (err)
goto err_out;
- err = f2fs_move_file_range(filp, range.pos_in, dst.file,
- range.pos_out, range.len);
+ err = f2fs_move_file_range(filp, range->pos_in, dst.file,
+ range->pos_out, range->len);
mnt_drop_write_file(filp);
- if (err)
- goto err_out;
-
- if (copy_to_user((struct f2fs_move_range __user *)arg,
- &range, sizeof(range)))
- err = -EFAULT;
err_out:
fdput(dst);
return err;
}
+static int f2fs_ioc_move_range(struct file *filp, unsigned long arg)
+{
+ struct f2fs_move_range range;
+
+ if (copy_from_user(&range, (struct f2fs_move_range __user *)arg,
+ sizeof(range)))
+ return -EFAULT;
+ return __f2fs_ioc_move_range(filp, &range);
+}
+
static int f2fs_ioc_flush_device(struct file *filp, unsigned long arg)
{
struct inode *inode = file_inode(filp);
@@ -2773,14 +2969,14 @@
end_segno = min(start_segno + range.segments, dev_end_segno);
while (start_segno < end_segno) {
- if (!mutex_trylock(&sbi->gc_mutex)) {
+ if (!down_write_trylock(&sbi->gc_lock)) {
ret = -EBUSY;
goto out;
}
sm->last_victim[GC_CB] = end_segno + 1;
sm->last_victim[GC_GREEDY] = end_segno + 1;
sm->last_victim[ALLOC_NEXT] = end_segno + 1;
- ret = f2fs_gc(sbi, true, true, start_segno);
+ ret = f2fs_gc(sbi, true, true, true, start_segno);
if (ret == -EAGAIN)
ret = 0;
else if (ret < 0)
@@ -3068,10 +3264,16 @@
ret = -EAGAIN;
goto out;
}
+
ret = f2fs_convert_inline_inode(inode);
if (ret)
goto out;
+ if (!f2fs_disable_compressed_file(inode)) {
+ ret = -EOPNOTSUPP;
+ goto out;
+ }
+
set_inode_flag(inode, FI_PIN_FILE);
ret = F2FS_I(inode)->i_gc_failures[GC_FAILURE_PIN];
done:
@@ -3134,7 +3336,6 @@
{
struct f2fs_sb_info *sbi = F2FS_I_SB(file_inode(filp));
__u64 block_count;
- int ret;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
@@ -3146,9 +3347,7 @@
sizeof(block_count)))
return -EFAULT;
- ret = f2fs_resize_fs(sbi, block_count);
-
- return ret;
+ return f2fs_resize_fs(sbi, block_count);
}
static int f2fs_ioc_enable_verity(struct file *filp, unsigned long arg)
@@ -3175,7 +3374,7 @@
return fsverity_ioctl_measure(filp, (void __user *)arg);
}
-static int f2fs_get_volume_name(struct file *filp, unsigned long arg)
+static int f2fs_ioc_getfslabel(struct file *filp, unsigned long arg)
{
struct inode *inode = file_inode(filp);
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
@@ -3197,11 +3396,11 @@
min(FSLABEL_MAX, count)))
err = -EFAULT;
- kvfree(vbuf);
+ kfree(vbuf);
return err;
}
-static int f2fs_set_volume_name(struct file *filp, unsigned long arg)
+static int f2fs_ioc_setfslabel(struct file *filp, unsigned long arg)
{
struct inode *inode = file_inode(filp);
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
@@ -3237,19 +3436,539 @@
return err;
}
-long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
+static int f2fs_get_compress_blocks(struct file *filp, unsigned long arg)
{
- if (unlikely(f2fs_cp_error(F2FS_I_SB(file_inode(filp)))))
- return -EIO;
- if (!f2fs_is_checkpoint_ready(F2FS_I_SB(file_inode(filp))))
- return -ENOSPC;
+ struct inode *inode = file_inode(filp);
+ __u64 blocks;
+ if (!f2fs_sb_has_compression(F2FS_I_SB(inode)))
+ return -EOPNOTSUPP;
+
+ if (!f2fs_compressed_file(inode))
+ return -EINVAL;
+
+ blocks = atomic_read(&F2FS_I(inode)->i_compr_blocks);
+ return put_user(blocks, (u64 __user *)arg);
+}
+
+static int release_compress_blocks(struct dnode_of_data *dn, pgoff_t count)
+{
+ struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
+ unsigned int released_blocks = 0;
+ int cluster_size = F2FS_I(dn->inode)->i_cluster_size;
+ block_t blkaddr;
+ int i;
+
+ for (i = 0; i < count; i++) {
+ blkaddr = data_blkaddr(dn->inode, dn->node_page,
+ dn->ofs_in_node + i);
+
+ if (!__is_valid_data_blkaddr(blkaddr))
+ continue;
+ if (unlikely(!f2fs_is_valid_blkaddr(sbi, blkaddr,
+ DATA_GENERIC_ENHANCE)))
+ return -EFSCORRUPTED;
+ }
+
+ while (count) {
+ int compr_blocks = 0;
+
+ for (i = 0; i < cluster_size; i++, dn->ofs_in_node++) {
+ blkaddr = f2fs_data_blkaddr(dn);
+
+ if (i == 0) {
+ if (blkaddr == COMPRESS_ADDR)
+ continue;
+ dn->ofs_in_node += cluster_size;
+ goto next;
+ }
+
+ if (__is_valid_data_blkaddr(blkaddr))
+ compr_blocks++;
+
+ if (blkaddr != NEW_ADDR)
+ continue;
+
+ dn->data_blkaddr = NULL_ADDR;
+ f2fs_set_data_blkaddr(dn);
+ }
+
+ f2fs_i_compr_blocks_update(dn->inode, compr_blocks, false);
+ dec_valid_block_count(sbi, dn->inode,
+ cluster_size - compr_blocks);
+
+ released_blocks += cluster_size - compr_blocks;
+next:
+ count -= cluster_size;
+ }
+
+ return released_blocks;
+}
+
+static int f2fs_release_compress_blocks(struct file *filp, unsigned long arg)
+{
+ struct inode *inode = file_inode(filp);
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+ pgoff_t page_idx = 0, last_idx;
+ unsigned int released_blocks = 0;
+ int ret;
+ int writecount;
+
+ if (!f2fs_sb_has_compression(F2FS_I_SB(inode)))
+ return -EOPNOTSUPP;
+
+ if (!f2fs_compressed_file(inode))
+ return -EINVAL;
+
+ if (f2fs_readonly(sbi->sb))
+ return -EROFS;
+
+ ret = mnt_want_write_file(filp);
+ if (ret)
+ return ret;
+
+ f2fs_balance_fs(F2FS_I_SB(inode), true);
+
+ inode_lock(inode);
+
+ writecount = atomic_read(&inode->i_writecount);
+ if ((filp->f_mode & FMODE_WRITE && writecount != 1) ||
+ (!(filp->f_mode & FMODE_WRITE) && writecount)) {
+ ret = -EBUSY;
+ goto out;
+ }
+
+ if (IS_IMMUTABLE(inode)) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ ret = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX);
+ if (ret)
+ goto out;
+
+ F2FS_I(inode)->i_flags |= F2FS_IMMUTABLE_FL;
+ f2fs_set_inode_flags(inode);
+ inode->i_ctime = current_time(inode);
+ f2fs_mark_inode_dirty_sync(inode, true);
+
+ if (!atomic_read(&F2FS_I(inode)->i_compr_blocks))
+ goto out;
+
+ down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+ down_write(&F2FS_I(inode)->i_mmap_sem);
+
+ last_idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
+
+ while (page_idx < last_idx) {
+ struct dnode_of_data dn;
+ pgoff_t end_offset, count;
+
+ set_new_dnode(&dn, inode, NULL, NULL, 0);
+ ret = f2fs_get_dnode_of_data(&dn, page_idx, LOOKUP_NODE);
+ if (ret) {
+ if (ret == -ENOENT) {
+ page_idx = f2fs_get_next_page_offset(&dn,
+ page_idx);
+ ret = 0;
+ continue;
+ }
+ break;
+ }
+
+ end_offset = ADDRS_PER_PAGE(dn.node_page, inode);
+ count = min(end_offset - dn.ofs_in_node, last_idx - page_idx);
+ count = round_up(count, F2FS_I(inode)->i_cluster_size);
+
+ ret = release_compress_blocks(&dn, count);
+
+ f2fs_put_dnode(&dn);
+
+ if (ret < 0)
+ break;
+
+ page_idx += count;
+ released_blocks += ret;
+ }
+
+ up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+ up_write(&F2FS_I(inode)->i_mmap_sem);
+out:
+ inode_unlock(inode);
+
+ mnt_drop_write_file(filp);
+
+ if (ret >= 0) {
+ ret = put_user(released_blocks, (u64 __user *)arg);
+ } else if (released_blocks &&
+ atomic_read(&F2FS_I(inode)->i_compr_blocks)) {
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+ f2fs_warn(sbi, "%s: partial blocks were released i_ino=%lx "
+ "iblocks=%llu, released=%u, compr_blocks=%u, "
+ "run fsck to fix.",
+ __func__, inode->i_ino, inode->i_blocks,
+ released_blocks,
+ atomic_read(&F2FS_I(inode)->i_compr_blocks));
+ }
+
+ return ret;
+}
+
+static int reserve_compress_blocks(struct dnode_of_data *dn, pgoff_t count)
+{
+ struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
+ unsigned int reserved_blocks = 0;
+ int cluster_size = F2FS_I(dn->inode)->i_cluster_size;
+ block_t blkaddr;
+ int i;
+
+ for (i = 0; i < count; i++) {
+ blkaddr = data_blkaddr(dn->inode, dn->node_page,
+ dn->ofs_in_node + i);
+
+ if (!__is_valid_data_blkaddr(blkaddr))
+ continue;
+ if (unlikely(!f2fs_is_valid_blkaddr(sbi, blkaddr,
+ DATA_GENERIC_ENHANCE)))
+ return -EFSCORRUPTED;
+ }
+
+ while (count) {
+ int compr_blocks = 0;
+ blkcnt_t reserved;
+ int ret;
+
+ for (i = 0; i < cluster_size; i++, dn->ofs_in_node++) {
+ blkaddr = f2fs_data_blkaddr(dn);
+
+ if (i == 0) {
+ if (blkaddr == COMPRESS_ADDR)
+ continue;
+ dn->ofs_in_node += cluster_size;
+ goto next;
+ }
+
+ if (__is_valid_data_blkaddr(blkaddr)) {
+ compr_blocks++;
+ continue;
+ }
+
+ dn->data_blkaddr = NEW_ADDR;
+ f2fs_set_data_blkaddr(dn);
+ }
+
+ reserved = cluster_size - compr_blocks;
+ ret = inc_valid_block_count(sbi, dn->inode, &reserved);
+ if (ret)
+ return ret;
+
+ if (reserved != cluster_size - compr_blocks)
+ return -ENOSPC;
+
+ f2fs_i_compr_blocks_update(dn->inode, compr_blocks, true);
+
+ reserved_blocks += reserved;
+next:
+ count -= cluster_size;
+ }
+
+ return reserved_blocks;
+}
+
+static int f2fs_reserve_compress_blocks(struct file *filp, unsigned long arg)
+{
+ struct inode *inode = file_inode(filp);
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+ pgoff_t page_idx = 0, last_idx;
+ unsigned int reserved_blocks = 0;
+ int ret;
+
+ if (!f2fs_sb_has_compression(F2FS_I_SB(inode)))
+ return -EOPNOTSUPP;
+
+ if (!f2fs_compressed_file(inode))
+ return -EINVAL;
+
+ if (f2fs_readonly(sbi->sb))
+ return -EROFS;
+
+ ret = mnt_want_write_file(filp);
+ if (ret)
+ return ret;
+
+ if (atomic_read(&F2FS_I(inode)->i_compr_blocks))
+ goto out;
+
+ f2fs_balance_fs(F2FS_I_SB(inode), true);
+
+ inode_lock(inode);
+
+ if (!IS_IMMUTABLE(inode)) {
+ ret = -EINVAL;
+ goto unlock_inode;
+ }
+
+ down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+ down_write(&F2FS_I(inode)->i_mmap_sem);
+
+ last_idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
+
+ while (page_idx < last_idx) {
+ struct dnode_of_data dn;
+ pgoff_t end_offset, count;
+
+ set_new_dnode(&dn, inode, NULL, NULL, 0);
+ ret = f2fs_get_dnode_of_data(&dn, page_idx, LOOKUP_NODE);
+ if (ret) {
+ if (ret == -ENOENT) {
+ page_idx = f2fs_get_next_page_offset(&dn,
+ page_idx);
+ ret = 0;
+ continue;
+ }
+ break;
+ }
+
+ end_offset = ADDRS_PER_PAGE(dn.node_page, inode);
+ count = min(end_offset - dn.ofs_in_node, last_idx - page_idx);
+ count = round_up(count, F2FS_I(inode)->i_cluster_size);
+
+ ret = reserve_compress_blocks(&dn, count);
+
+ f2fs_put_dnode(&dn);
+
+ if (ret < 0)
+ break;
+
+ page_idx += count;
+ reserved_blocks += ret;
+ }
+
+ up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+ up_write(&F2FS_I(inode)->i_mmap_sem);
+
+ if (ret >= 0) {
+ F2FS_I(inode)->i_flags &= ~F2FS_IMMUTABLE_FL;
+ f2fs_set_inode_flags(inode);
+ inode->i_ctime = current_time(inode);
+ f2fs_mark_inode_dirty_sync(inode, true);
+ }
+unlock_inode:
+ inode_unlock(inode);
+out:
+ mnt_drop_write_file(filp);
+
+ if (ret >= 0) {
+ ret = put_user(reserved_blocks, (u64 __user *)arg);
+ } else if (reserved_blocks &&
+ atomic_read(&F2FS_I(inode)->i_compr_blocks)) {
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+ f2fs_warn(sbi, "%s: partial blocks were released i_ino=%lx "
+ "iblocks=%llu, reserved=%u, compr_blocks=%u, "
+ "run fsck to fix.",
+ __func__, inode->i_ino, inode->i_blocks,
+ reserved_blocks,
+ atomic_read(&F2FS_I(inode)->i_compr_blocks));
+ }
+
+ return ret;
+}
+
+static int f2fs_secure_erase(struct block_device *bdev, struct inode *inode,
+ pgoff_t off, block_t block, block_t len, u32 flags)
+{
+ struct request_queue *q = bdev_get_queue(bdev);
+ sector_t sector = SECTOR_FROM_BLOCK(block);
+ sector_t nr_sects = SECTOR_FROM_BLOCK(len);
+ int ret = 0;
+
+ if (!q)
+ return -ENXIO;
+
+ if (flags & F2FS_TRIM_FILE_DISCARD)
+ ret = blkdev_issue_discard(bdev, sector, nr_sects, GFP_NOFS,
+ blk_queue_secure_erase(q) ?
+ BLKDEV_DISCARD_SECURE : 0);
+
+ if (!ret && (flags & F2FS_TRIM_FILE_ZEROOUT)) {
+ if (IS_ENCRYPTED(inode))
+ ret = fscrypt_zeroout_range(inode, off, block, len);
+ else
+ ret = blkdev_issue_zeroout(bdev, sector, nr_sects,
+ GFP_NOFS, 0);
+ }
+
+ return ret;
+}
+
+static int f2fs_sec_trim_file(struct file *filp, unsigned long arg)
+{
+ struct inode *inode = file_inode(filp);
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+ struct address_space *mapping = inode->i_mapping;
+ struct block_device *prev_bdev = NULL;
+ struct f2fs_sectrim_range range;
+ pgoff_t index, pg_end, prev_index = 0;
+ block_t prev_block = 0, len = 0;
+ loff_t end_addr;
+ bool to_end = false;
+ int ret = 0;
+
+ if (!(filp->f_mode & FMODE_WRITE))
+ return -EBADF;
+
+ if (copy_from_user(&range, (struct f2fs_sectrim_range __user *)arg,
+ sizeof(range)))
+ return -EFAULT;
+
+ if (range.flags == 0 || (range.flags & ~F2FS_TRIM_FILE_MASK) ||
+ !S_ISREG(inode->i_mode))
+ return -EINVAL;
+
+ if (((range.flags & F2FS_TRIM_FILE_DISCARD) &&
+ !f2fs_hw_support_discard(sbi)) ||
+ ((range.flags & F2FS_TRIM_FILE_ZEROOUT) &&
+ IS_ENCRYPTED(inode) && f2fs_is_multi_device(sbi)))
+ return -EOPNOTSUPP;
+
+ file_start_write(filp);
+ inode_lock(inode);
+
+ if (f2fs_is_atomic_file(inode) || f2fs_compressed_file(inode) ||
+ range.start >= inode->i_size) {
+ ret = -EINVAL;
+ goto err;
+ }
+
+ if (range.len == 0)
+ goto err;
+
+ if (inode->i_size - range.start > range.len) {
+ end_addr = range.start + range.len;
+ } else {
+ end_addr = range.len == (u64)-1 ?
+ sbi->sb->s_maxbytes : inode->i_size;
+ to_end = true;
+ }
+
+ if (!IS_ALIGNED(range.start, F2FS_BLKSIZE) ||
+ (!to_end && !IS_ALIGNED(end_addr, F2FS_BLKSIZE))) {
+ ret = -EINVAL;
+ goto err;
+ }
+
+ index = F2FS_BYTES_TO_BLK(range.start);
+ pg_end = DIV_ROUND_UP(end_addr, F2FS_BLKSIZE);
+
+ ret = f2fs_convert_inline_inode(inode);
+ if (ret)
+ goto err;
+
+ down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+ down_write(&F2FS_I(inode)->i_mmap_sem);
+
+ ret = filemap_write_and_wait_range(mapping, range.start,
+ to_end ? LLONG_MAX : end_addr - 1);
+ if (ret)
+ goto out;
+
+ truncate_inode_pages_range(mapping, range.start,
+ to_end ? -1 : end_addr - 1);
+
+ while (index < pg_end) {
+ struct dnode_of_data dn;
+ pgoff_t end_offset, count;
+ int i;
+
+ set_new_dnode(&dn, inode, NULL, NULL, 0);
+ ret = f2fs_get_dnode_of_data(&dn, index, LOOKUP_NODE);
+ if (ret) {
+ if (ret == -ENOENT) {
+ index = f2fs_get_next_page_offset(&dn, index);
+ continue;
+ }
+ goto out;
+ }
+
+ end_offset = ADDRS_PER_PAGE(dn.node_page, inode);
+ count = min(end_offset - dn.ofs_in_node, pg_end - index);
+ for (i = 0; i < count; i++, index++, dn.ofs_in_node++) {
+ struct block_device *cur_bdev;
+ block_t blkaddr = f2fs_data_blkaddr(&dn);
+
+ if (!__is_valid_data_blkaddr(blkaddr))
+ continue;
+
+ if (!f2fs_is_valid_blkaddr(sbi, blkaddr,
+ DATA_GENERIC_ENHANCE)) {
+ ret = -EFSCORRUPTED;
+ f2fs_put_dnode(&dn);
+ goto out;
+ }
+
+ cur_bdev = f2fs_target_device(sbi, blkaddr, NULL);
+ if (f2fs_is_multi_device(sbi)) {
+ int di = f2fs_target_device_index(sbi, blkaddr);
+
+ blkaddr -= FDEV(di).start_blk;
+ }
+
+ if (len) {
+ if (prev_bdev == cur_bdev &&
+ index == prev_index + len &&
+ blkaddr == prev_block + len) {
+ len++;
+ } else {
+ ret = f2fs_secure_erase(prev_bdev,
+ inode, prev_index, prev_block,
+ len, range.flags);
+ if (ret) {
+ f2fs_put_dnode(&dn);
+ goto out;
+ }
+
+ len = 0;
+ }
+ }
+
+ if (!len) {
+ prev_bdev = cur_bdev;
+ prev_index = index;
+ prev_block = blkaddr;
+ len = 1;
+ }
+ }
+
+ f2fs_put_dnode(&dn);
+
+ if (fatal_signal_pending(current)) {
+ ret = -EINTR;
+ goto out;
+ }
+ cond_resched();
+ }
+
+ if (len)
+ ret = f2fs_secure_erase(prev_bdev, inode, prev_index,
+ prev_block, len, range.flags);
+out:
+ up_write(&F2FS_I(inode)->i_mmap_sem);
+ up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+err:
+ inode_unlock(inode);
+ file_end_write(filp);
+
+ return ret;
+}
+
+static long __f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
+{
switch (cmd) {
- case F2FS_IOC_GETFLAGS:
+ case FS_IOC_GETFLAGS:
return f2fs_ioc_getflags(filp, arg);
- case F2FS_IOC_SETFLAGS:
+ case FS_IOC_SETFLAGS:
return f2fs_ioc_setflags(filp, arg);
- case F2FS_IOC_GETVERSION:
+ case FS_IOC_GETVERSION:
return f2fs_ioc_getversion(filp, arg);
case F2FS_IOC_START_ATOMIC_WRITE:
return f2fs_ioc_start_atomic_write(filp);
@@ -3265,11 +3984,11 @@
return f2fs_ioc_shutdown(filp, arg);
case FITRIM:
return f2fs_ioc_fitrim(filp, arg);
- case F2FS_IOC_SET_ENCRYPTION_POLICY:
+ case FS_IOC_SET_ENCRYPTION_POLICY:
return f2fs_ioc_set_encryption_policy(filp, arg);
- case F2FS_IOC_GET_ENCRYPTION_POLICY:
+ case FS_IOC_GET_ENCRYPTION_POLICY:
return f2fs_ioc_get_encryption_policy(filp, arg);
- case F2FS_IOC_GET_ENCRYPTION_PWSALT:
+ case FS_IOC_GET_ENCRYPTION_PWSALT:
return f2fs_ioc_get_encryption_pwsalt(filp, arg);
case FS_IOC_GET_ENCRYPTION_POLICY_EX:
return f2fs_ioc_get_encryption_policy_ex(filp, arg);
@@ -3281,6 +4000,8 @@
return f2fs_ioc_remove_encryption_key_all_users(filp, arg);
case FS_IOC_GET_ENCRYPTION_KEY_STATUS:
return f2fs_ioc_get_encryption_key_status(filp, arg);
+ case FS_IOC_GET_ENCRYPTION_NONCE:
+ return f2fs_ioc_get_encryption_nonce(filp, arg);
case F2FS_IOC_GARBAGE_COLLECT:
return f2fs_ioc_gc(filp, arg);
case F2FS_IOC_GARBAGE_COLLECT_RANGE:
@@ -3295,9 +4016,9 @@
return f2fs_ioc_flush_device(filp, arg);
case F2FS_IOC_GET_FEATURES:
return f2fs_ioc_get_features(filp, arg);
- case F2FS_IOC_FSGETXATTR:
+ case FS_IOC_FSGETXATTR:
return f2fs_ioc_fsgetxattr(filp, arg);
- case F2FS_IOC_FSSETXATTR:
+ case FS_IOC_FSSETXATTR:
return f2fs_ioc_fssetxattr(filp, arg);
case F2FS_IOC_GET_PIN_FILE:
return f2fs_ioc_get_pin_file(filp, arg);
@@ -3311,15 +4032,50 @@
return f2fs_ioc_enable_verity(filp, arg);
case FS_IOC_MEASURE_VERITY:
return f2fs_ioc_measure_verity(filp, arg);
- case F2FS_IOC_GET_VOLUME_NAME:
- return f2fs_get_volume_name(filp, arg);
- case F2FS_IOC_SET_VOLUME_NAME:
- return f2fs_set_volume_name(filp, arg);
+ case FS_IOC_GETFSLABEL:
+ return f2fs_ioc_getfslabel(filp, arg);
+ case FS_IOC_SETFSLABEL:
+ return f2fs_ioc_setfslabel(filp, arg);
+ case F2FS_IOC_GET_COMPRESS_BLOCKS:
+ return f2fs_get_compress_blocks(filp, arg);
+ case F2FS_IOC_RELEASE_COMPRESS_BLOCKS:
+ return f2fs_release_compress_blocks(filp, arg);
+ case F2FS_IOC_RESERVE_COMPRESS_BLOCKS:
+ return f2fs_reserve_compress_blocks(filp, arg);
+ case F2FS_IOC_SEC_TRIM_FILE:
+ return f2fs_sec_trim_file(filp, arg);
default:
return -ENOTTY;
}
}
+long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
+{
+ if (unlikely(f2fs_cp_error(F2FS_I_SB(file_inode(filp)))))
+ return -EIO;
+ if (!f2fs_is_checkpoint_ready(F2FS_I_SB(file_inode(filp))))
+ return -ENOSPC;
+
+ return __f2fs_ioctl(filp, cmd, arg);
+}
+
+static ssize_t f2fs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter)
+{
+ struct file *file = iocb->ki_filp;
+ struct inode *inode = file_inode(file);
+ int ret;
+
+ if (!f2fs_is_compress_backend_ready(inode))
+ return -EOPNOTSUPP;
+
+ ret = generic_file_read_iter(iocb, iter);
+
+ if (ret > 0)
+ f2fs_update_iostat(F2FS_I_SB(inode), APP_READ_IO, ret);
+
+ return ret;
+}
+
static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
{
struct file *file = iocb->ki_filp;
@@ -3331,6 +4087,11 @@
goto out;
}
+ if (!f2fs_is_compress_backend_ready(inode)) {
+ ret = -EOPNOTSUPP;
+ goto out;
+ }
+
if (iocb->ki_flags & IOCB_NOWAIT) {
if (!inode_trylock(inode)) {
ret = -EAGAIN;
@@ -3340,6 +4101,11 @@
inode_lock(inode);
}
+ if (unlikely(IS_IMMUTABLE(inode))) {
+ ret = -EPERM;
+ goto unlock;
+ }
+
ret = generic_write_checks(iocb, from);
if (ret > 0) {
bool preallocated = false;
@@ -3404,6 +4170,7 @@
if (ret > 0)
f2fs_update_iostat(F2FS_I_SB(inode), APP_WRITE_IO, ret);
}
+unlock:
inode_unlock(inode);
out:
trace_f2fs_file_write_iter(inode, iocb->ki_pos,
@@ -3414,60 +4181,123 @@
}
#ifdef CONFIG_COMPAT
+struct compat_f2fs_gc_range {
+ u32 sync;
+ compat_u64 start;
+ compat_u64 len;
+};
+#define F2FS_IOC32_GARBAGE_COLLECT_RANGE _IOW(F2FS_IOCTL_MAGIC, 11,\
+ struct compat_f2fs_gc_range)
+
+static int f2fs_compat_ioc_gc_range(struct file *file, unsigned long arg)
+{
+ struct compat_f2fs_gc_range __user *urange;
+ struct f2fs_gc_range range;
+ int err;
+
+ urange = compat_ptr(arg);
+ err = get_user(range.sync, &urange->sync);
+ err |= get_user(range.start, &urange->start);
+ err |= get_user(range.len, &urange->len);
+ if (err)
+ return -EFAULT;
+
+ return __f2fs_ioc_gc_range(file, &range);
+}
+
+struct compat_f2fs_move_range {
+ u32 dst_fd;
+ compat_u64 pos_in;
+ compat_u64 pos_out;
+ compat_u64 len;
+};
+#define F2FS_IOC32_MOVE_RANGE _IOWR(F2FS_IOCTL_MAGIC, 9, \
+ struct compat_f2fs_move_range)
+
+static int f2fs_compat_ioc_move_range(struct file *file, unsigned long arg)
+{
+ struct compat_f2fs_move_range __user *urange;
+ struct f2fs_move_range range;
+ int err;
+
+ urange = compat_ptr(arg);
+ err = get_user(range.dst_fd, &urange->dst_fd);
+ err |= get_user(range.pos_in, &urange->pos_in);
+ err |= get_user(range.pos_out, &urange->pos_out);
+ err |= get_user(range.len, &urange->len);
+ if (err)
+ return -EFAULT;
+
+ return __f2fs_ioc_move_range(file, &range);
+}
+
long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
{
+ if (unlikely(f2fs_cp_error(F2FS_I_SB(file_inode(file)))))
+ return -EIO;
+ if (!f2fs_is_checkpoint_ready(F2FS_I_SB(file_inode(file))))
+ return -ENOSPC;
+
switch (cmd) {
- case F2FS_IOC32_GETFLAGS:
- cmd = F2FS_IOC_GETFLAGS;
+ case FS_IOC32_GETFLAGS:
+ cmd = FS_IOC_GETFLAGS;
break;
- case F2FS_IOC32_SETFLAGS:
- cmd = F2FS_IOC_SETFLAGS;
+ case FS_IOC32_SETFLAGS:
+ cmd = FS_IOC_SETFLAGS;
break;
- case F2FS_IOC32_GETVERSION:
- cmd = F2FS_IOC_GETVERSION;
+ case FS_IOC32_GETVERSION:
+ cmd = FS_IOC_GETVERSION;
break;
+ case F2FS_IOC32_GARBAGE_COLLECT_RANGE:
+ return f2fs_compat_ioc_gc_range(file, arg);
+ case F2FS_IOC32_MOVE_RANGE:
+ return f2fs_compat_ioc_move_range(file, arg);
case F2FS_IOC_START_ATOMIC_WRITE:
case F2FS_IOC_COMMIT_ATOMIC_WRITE:
case F2FS_IOC_START_VOLATILE_WRITE:
case F2FS_IOC_RELEASE_VOLATILE_WRITE:
case F2FS_IOC_ABORT_VOLATILE_WRITE:
case F2FS_IOC_SHUTDOWN:
- case F2FS_IOC_SET_ENCRYPTION_POLICY:
- case F2FS_IOC_GET_ENCRYPTION_PWSALT:
- case F2FS_IOC_GET_ENCRYPTION_POLICY:
+ case FITRIM:
+ case FS_IOC_SET_ENCRYPTION_POLICY:
+ case FS_IOC_GET_ENCRYPTION_PWSALT:
+ case FS_IOC_GET_ENCRYPTION_POLICY:
case FS_IOC_GET_ENCRYPTION_POLICY_EX:
case FS_IOC_ADD_ENCRYPTION_KEY:
case FS_IOC_REMOVE_ENCRYPTION_KEY:
case FS_IOC_REMOVE_ENCRYPTION_KEY_ALL_USERS:
case FS_IOC_GET_ENCRYPTION_KEY_STATUS:
+ case FS_IOC_GET_ENCRYPTION_NONCE:
case F2FS_IOC_GARBAGE_COLLECT:
- case F2FS_IOC_GARBAGE_COLLECT_RANGE:
case F2FS_IOC_WRITE_CHECKPOINT:
case F2FS_IOC_DEFRAGMENT:
- case F2FS_IOC_MOVE_RANGE:
case F2FS_IOC_FLUSH_DEVICE:
case F2FS_IOC_GET_FEATURES:
- case F2FS_IOC_FSGETXATTR:
- case F2FS_IOC_FSSETXATTR:
+ case FS_IOC_FSGETXATTR:
+ case FS_IOC_FSSETXATTR:
case F2FS_IOC_GET_PIN_FILE:
case F2FS_IOC_SET_PIN_FILE:
case F2FS_IOC_PRECACHE_EXTENTS:
case F2FS_IOC_RESIZE_FS:
case FS_IOC_ENABLE_VERITY:
case FS_IOC_MEASURE_VERITY:
- case F2FS_IOC_GET_VOLUME_NAME:
- case F2FS_IOC_SET_VOLUME_NAME:
+ case FS_IOC_GETFSLABEL:
+ case FS_IOC_SETFSLABEL:
+ case F2FS_IOC_GET_COMPRESS_BLOCKS:
+ case F2FS_IOC_RELEASE_COMPRESS_BLOCKS:
+ case F2FS_IOC_RESERVE_COMPRESS_BLOCKS:
+ case F2FS_IOC_SEC_TRIM_FILE:
break;
default:
return -ENOIOCTLCMD;
}
- return f2fs_ioctl(file, cmd, (unsigned long) compat_ptr(arg));
+ return __f2fs_ioctl(file, cmd, (unsigned long) compat_ptr(arg));
}
#endif
const struct file_operations f2fs_file_operations = {
.llseek = f2fs_llseek,
- .read_iter = generic_file_read_iter,
+ .read_iter = f2fs_file_read_iter,
.write_iter = f2fs_file_write_iter,
.open = f2fs_file_open,
.release = f2fs_release_file,
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index 4b6c362..6b240b7 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -13,6 +13,7 @@
#include <linux/kthread.h>
#include <linux/delay.h>
#include <linux/freezer.h>
+#include <linux/sched/signal.h>
#include "f2fs.h"
#include "node.h"
@@ -20,6 +21,11 @@
#include "gc.h"
#include <trace/events/f2fs.h>
+static struct kmem_cache *victim_entry_slab;
+
+static unsigned int count_bits(const unsigned long *addr,
+ unsigned int offset, unsigned int len);
+
static int gc_thread_func(void *data)
{
struct f2fs_sb_info *sbi = data;
@@ -31,6 +37,8 @@
set_freezable();
do {
+ bool sync_mode;
+
wait_event_interruptible_timeout(*wq,
kthread_should_stop() || freezing(current) ||
gc_th->gc_wake,
@@ -76,20 +84,20 @@
* invalidated soon after by user update or deletion.
* So, I'd like to wait some time to collect dirty segments.
*/
- if (sbi->gc_mode == GC_URGENT) {
+ if (sbi->gc_mode == GC_URGENT_HIGH) {
wait_ms = gc_th->urgent_sleep_time;
- mutex_lock(&sbi->gc_mutex);
+ down_write(&sbi->gc_lock);
goto do_gc;
}
- if (!mutex_trylock(&sbi->gc_mutex)) {
+ if (!down_write_trylock(&sbi->gc_lock)) {
stat_other_skip_bggc_count(sbi);
goto next;
}
if (!is_idle(sbi, GC_TIME)) {
increase_sleep_time(gc_th, &wait_ms);
- mutex_unlock(&sbi->gc_mutex);
+ up_write(&sbi->gc_lock);
stat_io_skip_bggc_count(sbi);
goto next;
}
@@ -99,17 +107,19 @@
else
increase_sleep_time(gc_th, &wait_ms);
do_gc:
- stat_inc_bggc_count(sbi);
+ stat_inc_bggc_count(sbi->stat_info);
+
+ sync_mode = F2FS_OPTION(sbi).bggc_mode == BGGC_MODE_SYNC;
/* if return value is not zero, no victim was selected */
- if (f2fs_gc(sbi, test_opt(sbi, FORCE_FG_GC), true, NULL_SEGNO))
+ if (f2fs_gc(sbi, sync_mode, true, false, NULL_SEGNO))
wait_ms = gc_th->no_gc_sleep_time;
trace_f2fs_background_gc(sbi->sb, wait_ms,
prefree_segments(sbi), free_segments(sbi));
/* balancing f2fs's metadata periodically */
- f2fs_balance_fs_bg(sbi);
+ f2fs_balance_fs_bg(sbi, true);
next:
sb_end_write(sbi->sb);
@@ -142,7 +152,7 @@
"f2fs_gc-%u:%u", MAJOR(dev), MINOR(dev));
if (IS_ERR(gc_th->f2fs_gc_task)) {
err = PTR_ERR(gc_th->f2fs_gc_task);
- kvfree(gc_th);
+ kfree(gc_th);
sbi->gc_thread = NULL;
}
out:
@@ -155,23 +165,36 @@
if (!gc_th)
return;
kthread_stop(gc_th->f2fs_gc_task);
- kvfree(gc_th);
+ kfree(gc_th);
sbi->gc_thread = NULL;
}
static int select_gc_type(struct f2fs_sb_info *sbi, int gc_type)
{
- int gc_mode = (gc_type == BG_GC) ? GC_CB : GC_GREEDY;
+ int gc_mode;
+
+ if (gc_type == BG_GC) {
+ if (sbi->am.atgc_enabled)
+ gc_mode = GC_AT;
+ else
+ gc_mode = GC_CB;
+ } else {
+ gc_mode = GC_GREEDY;
+ }
switch (sbi->gc_mode) {
case GC_IDLE_CB:
gc_mode = GC_CB;
break;
case GC_IDLE_GREEDY:
- case GC_URGENT:
+ case GC_URGENT_HIGH:
gc_mode = GC_GREEDY;
break;
+ case GC_IDLE_AT:
+ gc_mode = GC_AT;
+ break;
}
+
return gc_mode;
}
@@ -182,19 +205,34 @@
if (p->alloc_mode == SSR) {
p->gc_mode = GC_GREEDY;
- p->dirty_segmap = dirty_i->dirty_segmap[type];
+ p->dirty_bitmap = dirty_i->dirty_segmap[type];
+ p->max_search = dirty_i->nr_dirty[type];
+ p->ofs_unit = 1;
+ } else if (p->alloc_mode == AT_SSR) {
+ p->gc_mode = GC_GREEDY;
+ p->dirty_bitmap = dirty_i->dirty_segmap[type];
p->max_search = dirty_i->nr_dirty[type];
p->ofs_unit = 1;
} else {
p->gc_mode = select_gc_type(sbi, gc_type);
- p->dirty_segmap = dirty_i->dirty_segmap[DIRTY];
- p->max_search = dirty_i->nr_dirty[DIRTY];
p->ofs_unit = sbi->segs_per_sec;
+ if (__is_large_section(sbi)) {
+ p->dirty_bitmap = dirty_i->dirty_secmap;
+ p->max_search = count_bits(p->dirty_bitmap,
+ 0, MAIN_SECS(sbi));
+ } else {
+ p->dirty_bitmap = dirty_i->dirty_segmap[DIRTY];
+ p->max_search = dirty_i->nr_dirty[DIRTY];
+ }
}
- /* we need to check every dirty segments in the FG_GC case */
+ /*
+ * adjust candidates range, should select all dirty segments for
+ * foreground GC and urgent GC cases.
+ */
if (gc_type != FG_GC &&
- (sbi->gc_mode != GC_URGENT) &&
+ (sbi->gc_mode != GC_URGENT_HIGH) &&
+ (p->gc_mode != GC_AT && p->alloc_mode != AT_SSR) &&
p->max_search > sbi->max_victim_search)
p->max_search = sbi->max_victim_search;
@@ -212,10 +250,16 @@
/* SSR allocates in a segment unit */
if (p->alloc_mode == SSR)
return sbi->blocks_per_seg;
+ else if (p->alloc_mode == AT_SSR)
+ return UINT_MAX;
+
+ /* LFS */
if (p->gc_mode == GC_GREEDY)
return 2 * sbi->blocks_per_seg * p->ofs_unit;
else if (p->gc_mode == GC_CB)
return UINT_MAX;
+ else if (p->gc_mode == GC_AT)
+ return UINT_MAX;
else /* No other gc_mode */
return 0;
}
@@ -249,13 +293,14 @@
unsigned char age = 0;
unsigned char u;
unsigned int i;
+ unsigned int usable_segs_per_sec = f2fs_usable_segs_in_sec(sbi, segno);
- for (i = 0; i < sbi->segs_per_sec; i++)
+ for (i = 0; i < usable_segs_per_sec; i++)
mtime += get_seg_entry(sbi, start + i)->mtime;
vblocks = get_valid_blocks(sbi, segno, true);
- mtime = div_u64(mtime, sbi->segs_per_sec);
- vblocks = div_u64(vblocks, sbi->segs_per_sec);
+ mtime = div_u64(mtime, usable_segs_per_sec);
+ vblocks = div_u64(vblocks, usable_segs_per_sec);
u = (vblocks * 100) >> sbi->log_blocks_per_seg;
@@ -280,8 +325,11 @@
/* alloc_mode == LFS */
if (p->gc_mode == GC_GREEDY)
return get_valid_blocks(sbi, segno, true);
- else
+ else if (p->gc_mode == GC_CB)
return get_cb_cost(sbi, segno);
+
+ f2fs_bug_on(sbi, 1);
+ return 0;
}
static unsigned int count_bits(const unsigned long *addr,
@@ -296,6 +344,269 @@
return sum;
}
+static struct victim_entry *attach_victim_entry(struct f2fs_sb_info *sbi,
+ unsigned long long mtime, unsigned int segno,
+ struct rb_node *parent, struct rb_node **p,
+ bool left_most)
+{
+ struct atgc_management *am = &sbi->am;
+ struct victim_entry *ve;
+
+ ve = f2fs_kmem_cache_alloc(victim_entry_slab, GFP_NOFS);
+
+ ve->mtime = mtime;
+ ve->segno = segno;
+
+ rb_link_node(&ve->rb_node, parent, p);
+ rb_insert_color_cached(&ve->rb_node, &am->root, left_most);
+
+ list_add_tail(&ve->list, &am->victim_list);
+
+ am->victim_count++;
+
+ return ve;
+}
+
+static void insert_victim_entry(struct f2fs_sb_info *sbi,
+ unsigned long long mtime, unsigned int segno)
+{
+ struct atgc_management *am = &sbi->am;
+ struct rb_node **p;
+ struct rb_node *parent = NULL;
+ bool left_most = true;
+
+ p = f2fs_lookup_rb_tree_ext(sbi, &am->root, &parent, mtime, &left_most);
+ attach_victim_entry(sbi, mtime, segno, parent, p, left_most);
+}
+
+static void add_victim_entry(struct f2fs_sb_info *sbi,
+ struct victim_sel_policy *p, unsigned int segno)
+{
+ struct sit_info *sit_i = SIT_I(sbi);
+ unsigned int secno = GET_SEC_FROM_SEG(sbi, segno);
+ unsigned int start = GET_SEG_FROM_SEC(sbi, secno);
+ unsigned long long mtime = 0;
+ unsigned int i;
+
+ if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) {
+ if (p->gc_mode == GC_AT &&
+ get_valid_blocks(sbi, segno, true) == 0)
+ return;
+ }
+
+ for (i = 0; i < sbi->segs_per_sec; i++)
+ mtime += get_seg_entry(sbi, start + i)->mtime;
+ mtime = div_u64(mtime, sbi->segs_per_sec);
+
+ /* Handle if the system time has changed by the user */
+ if (mtime < sit_i->min_mtime)
+ sit_i->min_mtime = mtime;
+ if (mtime > sit_i->max_mtime)
+ sit_i->max_mtime = mtime;
+ if (mtime < sit_i->dirty_min_mtime)
+ sit_i->dirty_min_mtime = mtime;
+ if (mtime > sit_i->dirty_max_mtime)
+ sit_i->dirty_max_mtime = mtime;
+
+ /* don't choose young section as candidate */
+ if (sit_i->dirty_max_mtime - mtime < p->age_threshold)
+ return;
+
+ insert_victim_entry(sbi, mtime, segno);
+}
+
+static struct rb_node *lookup_central_victim(struct f2fs_sb_info *sbi,
+ struct victim_sel_policy *p)
+{
+ struct atgc_management *am = &sbi->am;
+ struct rb_node *parent = NULL;
+ bool left_most;
+
+ f2fs_lookup_rb_tree_ext(sbi, &am->root, &parent, p->age, &left_most);
+
+ return parent;
+}
+
+static void atgc_lookup_victim(struct f2fs_sb_info *sbi,
+ struct victim_sel_policy *p)
+{
+ struct sit_info *sit_i = SIT_I(sbi);
+ struct atgc_management *am = &sbi->am;
+ struct rb_root_cached *root = &am->root;
+ struct rb_node *node;
+ struct rb_entry *re;
+ struct victim_entry *ve;
+ unsigned long long total_time;
+ unsigned long long age, u, accu;
+ unsigned long long max_mtime = sit_i->dirty_max_mtime;
+ unsigned long long min_mtime = sit_i->dirty_min_mtime;
+ unsigned int sec_blocks = BLKS_PER_SEC(sbi);
+ unsigned int vblocks;
+ unsigned int dirty_threshold = max(am->max_candidate_count,
+ am->candidate_ratio *
+ am->victim_count / 100);
+ unsigned int age_weight = am->age_weight;
+ unsigned int cost;
+ unsigned int iter = 0;
+
+ if (max_mtime < min_mtime)
+ return;
+
+ max_mtime += 1;
+ total_time = max_mtime - min_mtime;
+
+ accu = div64_u64(ULLONG_MAX, total_time);
+ accu = min_t(unsigned long long, div_u64(accu, 100),
+ DEFAULT_ACCURACY_CLASS);
+
+ node = rb_first_cached(root);
+next:
+ re = rb_entry_safe(node, struct rb_entry, rb_node);
+ if (!re)
+ return;
+
+ ve = (struct victim_entry *)re;
+
+ if (ve->mtime >= max_mtime || ve->mtime < min_mtime)
+ goto skip;
+
+ /* age = 10000 * x% * 60 */
+ age = div64_u64(accu * (max_mtime - ve->mtime), total_time) *
+ age_weight;
+
+ vblocks = get_valid_blocks(sbi, ve->segno, true);
+ f2fs_bug_on(sbi, !vblocks || vblocks == sec_blocks);
+
+ /* u = 10000 * x% * 40 */
+ u = div64_u64(accu * (sec_blocks - vblocks), sec_blocks) *
+ (100 - age_weight);
+
+ f2fs_bug_on(sbi, age + u >= UINT_MAX);
+
+ cost = UINT_MAX - (age + u);
+ iter++;
+
+ if (cost < p->min_cost ||
+ (cost == p->min_cost && age > p->oldest_age)) {
+ p->min_cost = cost;
+ p->oldest_age = age;
+ p->min_segno = ve->segno;
+ }
+skip:
+ if (iter < dirty_threshold) {
+ node = rb_next(node);
+ goto next;
+ }
+}
+
+/*
+ * select candidates around source section in range of
+ * [target - dirty_threshold, target + dirty_threshold]
+ */
+static void atssr_lookup_victim(struct f2fs_sb_info *sbi,
+ struct victim_sel_policy *p)
+{
+ struct sit_info *sit_i = SIT_I(sbi);
+ struct atgc_management *am = &sbi->am;
+ struct rb_node *node;
+ struct rb_entry *re;
+ struct victim_entry *ve;
+ unsigned long long age;
+ unsigned long long max_mtime = sit_i->dirty_max_mtime;
+ unsigned long long min_mtime = sit_i->dirty_min_mtime;
+ unsigned int seg_blocks = sbi->blocks_per_seg;
+ unsigned int vblocks;
+ unsigned int dirty_threshold = max(am->max_candidate_count,
+ am->candidate_ratio *
+ am->victim_count / 100);
+ unsigned int cost;
+ unsigned int iter = 0;
+ int stage = 0;
+
+ if (max_mtime < min_mtime)
+ return;
+ max_mtime += 1;
+next_stage:
+ node = lookup_central_victim(sbi, p);
+next_node:
+ re = rb_entry_safe(node, struct rb_entry, rb_node);
+ if (!re) {
+ if (stage == 0)
+ goto skip_stage;
+ return;
+ }
+
+ ve = (struct victim_entry *)re;
+
+ if (ve->mtime >= max_mtime || ve->mtime < min_mtime)
+ goto skip_node;
+
+ age = max_mtime - ve->mtime;
+
+ vblocks = get_seg_entry(sbi, ve->segno)->ckpt_valid_blocks;
+ f2fs_bug_on(sbi, !vblocks);
+
+ /* rare case */
+ if (vblocks == seg_blocks)
+ goto skip_node;
+
+ iter++;
+
+ age = max_mtime - abs(p->age - age);
+ cost = UINT_MAX - vblocks;
+
+ if (cost < p->min_cost ||
+ (cost == p->min_cost && age > p->oldest_age)) {
+ p->min_cost = cost;
+ p->oldest_age = age;
+ p->min_segno = ve->segno;
+ }
+skip_node:
+ if (iter < dirty_threshold) {
+ if (stage == 0)
+ node = rb_prev(node);
+ else if (stage == 1)
+ node = rb_next(node);
+ goto next_node;
+ }
+skip_stage:
+ if (stage < 1) {
+ stage++;
+ iter = 0;
+ goto next_stage;
+ }
+}
+static void lookup_victim_by_age(struct f2fs_sb_info *sbi,
+ struct victim_sel_policy *p)
+{
+ f2fs_bug_on(sbi, !f2fs_check_rb_tree_consistence(sbi,
+ &sbi->am.root, true));
+
+ if (p->gc_mode == GC_AT)
+ atgc_lookup_victim(sbi, p);
+ else if (p->alloc_mode == AT_SSR)
+ atssr_lookup_victim(sbi, p);
+ else
+ f2fs_bug_on(sbi, 1);
+}
+
+static void release_victim_entry(struct f2fs_sb_info *sbi)
+{
+ struct atgc_management *am = &sbi->am;
+ struct victim_entry *ve, *tmp;
+
+ list_for_each_entry_safe(ve, tmp, &am->victim_list, list) {
+ list_del(&ve->list);
+ kmem_cache_free(victim_entry_slab, ve);
+ am->victim_count--;
+ }
+
+ am->root = RB_ROOT_CACHED;
+
+ f2fs_bug_on(sbi, am->victim_count);
+ f2fs_bug_on(sbi, !list_empty(&am->victim_list));
+}
+
/*
* This function is called from two paths.
* One is garbage collection and the other is SSR segment selection.
@@ -305,31 +616,51 @@
* which has minimum valid blocks and removes it from dirty seglist.
*/
static int get_victim_by_default(struct f2fs_sb_info *sbi,
- unsigned int *result, int gc_type, int type, char alloc_mode)
+ unsigned int *result, int gc_type, int type,
+ char alloc_mode, unsigned long long age)
{
struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
struct sit_info *sm = SIT_I(sbi);
struct victim_sel_policy p;
unsigned int secno, last_victim;
unsigned int last_segment;
- unsigned int nsearched = 0;
+ unsigned int nsearched;
+ bool is_atgc;
+ int ret = 0;
mutex_lock(&dirty_i->seglist_lock);
last_segment = MAIN_SECS(sbi) * sbi->segs_per_sec;
p.alloc_mode = alloc_mode;
- select_policy(sbi, gc_type, type, &p);
+ p.age = age;
+ p.age_threshold = sbi->am.age_threshold;
+retry:
+ select_policy(sbi, gc_type, type, &p);
p.min_segno = NULL_SEGNO;
+ p.oldest_age = 0;
p.min_cost = get_max_cost(sbi, &p);
+ is_atgc = (p.gc_mode == GC_AT || p.alloc_mode == AT_SSR);
+ nsearched = 0;
+
+ if (is_atgc)
+ SIT_I(sbi)->dirty_min_mtime = ULLONG_MAX;
+
if (*result != NULL_SEGNO) {
- if (get_valid_blocks(sbi, *result, false) &&
- !sec_usage_check(sbi, GET_SEC_FROM_SEG(sbi, *result)))
+ if (!get_valid_blocks(sbi, *result, false)) {
+ ret = -ENODATA;
+ goto out;
+ }
+
+ if (sec_usage_check(sbi, GET_SEC_FROM_SEG(sbi, *result)))
+ ret = -EBUSY;
+ else
p.min_segno = *result;
goto out;
}
+ ret = -ENODATA;
if (p.max_search == 0)
goto out;
@@ -357,10 +688,14 @@
}
while (1) {
- unsigned long cost;
- unsigned int segno;
+ unsigned long cost, *dirty_bitmap;
+ unsigned int unit_no, segno;
- segno = find_next_bit(p.dirty_segmap, last_segment, p.offset);
+ dirty_bitmap = p.dirty_bitmap;
+ unit_no = find_next_bit(dirty_bitmap,
+ last_segment / p.ofs_unit,
+ p.offset / p.ofs_unit);
+ segno = unit_no * p.ofs_unit;
if (segno >= last_segment) {
if (sm->last_victim[p.gc_mode]) {
last_segment =
@@ -373,14 +708,7 @@
}
p.offset = segno + p.ofs_unit;
- if (p.ofs_unit > 1) {
- p.offset -= segno % p.ofs_unit;
- nsearched += count_bits(p.dirty_segmap,
- p.offset - p.ofs_unit,
- p.ofs_unit);
- } else {
- nsearched++;
- }
+ nsearched++;
#ifdef CONFIG_F2FS_CHECK_FS
/*
@@ -396,14 +724,35 @@
if (sec_usage_check(sbi, secno))
goto next;
+
/* Don't touch checkpointed data */
- if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED) &&
- get_ckpt_valid_blocks(sbi, segno) &&
- p.alloc_mode != SSR))
- goto next;
+ if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) {
+ if (p.alloc_mode == LFS) {
+ /*
+ * LFS is set to find source section during GC.
+ * The victim should have no checkpointed data.
+ */
+ if (get_ckpt_valid_blocks(sbi, segno, true))
+ goto next;
+ } else {
+ /*
+ * SSR | AT_SSR are set to find target segment
+ * for writes which can be full by checkpointed
+ * and newly written blocks.
+ */
+ if (!f2fs_segment_has_free_slot(sbi, segno))
+ goto next;
+ }
+ }
+
if (gc_type == BG_GC && test_bit(secno, dirty_i->victim_secmap))
goto next;
+ if (is_atgc) {
+ add_victim_entry(sbi, &p, segno);
+ goto next;
+ }
+
cost = get_gc_cost(sbi, segno, &p);
if (p.min_cost > cost) {
@@ -413,14 +762,28 @@
next:
if (nsearched >= p.max_search) {
if (!sm->last_victim[p.gc_mode] && segno <= last_victim)
- sm->last_victim[p.gc_mode] = last_victim + 1;
+ sm->last_victim[p.gc_mode] =
+ last_victim + p.ofs_unit;
else
- sm->last_victim[p.gc_mode] = segno + 1;
+ sm->last_victim[p.gc_mode] = segno + p.ofs_unit;
sm->last_victim[p.gc_mode] %=
(MAIN_SECS(sbi) * sbi->segs_per_sec);
break;
}
}
+
+ /* get victim for GC_AT/AT_SSR */
+ if (is_atgc) {
+ lookup_victim_by_age(sbi, &p);
+ release_victim_entry(sbi);
+ }
+
+ if (is_atgc && p.min_segno == NULL_SEGNO &&
+ sm->elapsed_time < p.age_threshold) {
+ p.age_threshold = 0;
+ goto retry;
+ }
+
if (p.min_segno != NULL_SEGNO) {
got_it:
*result = (p.min_segno / p.ofs_unit) * p.ofs_unit;
@@ -432,6 +795,7 @@
else
set_bit(secno, dirty_i->victim_secmap);
}
+ ret = 0;
}
out:
@@ -441,7 +805,7 @@
prefree_segments(sbi), free_segments(sbi));
mutex_unlock(&dirty_i->seglist_lock);
- return (p.min_segno == NULL_SEGNO) ? 0 : 1;
+ return ret;
}
static const struct victim_selection default_v_ops = {
@@ -512,6 +876,7 @@
int phase = 0;
bool fggc = (gc_type == FG_GC);
int submitted = 0;
+ unsigned int usable_blks_in_seg = f2fs_usable_blks_in_seg(sbi, segno);
start_addr = START_BLOCK(sbi, segno);
@@ -521,7 +886,7 @@
if (fggc && phase == 2)
atomic_inc(&sbi->wb_sync_req[NODE]);
- for (off = 0; off < sbi->blocks_per_seg; off++, entry++) {
+ for (off = 0; off < usable_blks_in_seg; off++, entry++) {
nid_t nid = le32_to_cpu(entry->nid);
struct page *node_page;
struct node_info ni;
@@ -633,8 +998,11 @@
set_sbi_flag(sbi, SBI_NEED_FSCK);
}
+ if (f2fs_check_nid_range(sbi, dni->ino))
+ return false;
+
*nofs = ofs_of_node(node_page);
- source_blkaddr = datablock_addr(NULL, node_page, ofs_in_node);
+ source_blkaddr = data_blkaddr(NULL, node_page, ofs_in_node);
f2fs_put_page(node_page, 1);
if (source_blkaddr != blkaddr) {
@@ -730,6 +1098,10 @@
goto put_encrypted_page;
f2fs_put_page(fio.encrypted_page, 0);
f2fs_put_page(page, 1);
+
+ f2fs_update_iostat(sbi, FS_DATA_READ_IO, F2FS_BLKSIZE);
+ f2fs_update_iostat(sbi, FS_GDATA_READ_IO, F2FS_BLKSIZE);
+
return 0;
put_encrypted_page:
f2fs_put_page(fio.encrypted_page, 1);
@@ -762,7 +1134,9 @@
struct page *page, *mpage;
block_t newaddr;
int err = 0;
- bool lfs_mode = test_opt(fio.sbi, LFS);
+ bool lfs_mode = f2fs_lfs_mode(fio.sbi);
+ int type = fio.sbi->am.atgc_enabled ?
+ CURSEG_ALL_DATA_ATGC : CURSEG_COLD_DATA;
/* do not read out */
page = f2fs_grab_cache_page(inode->i_mapping, bidx, false);
@@ -821,8 +1195,10 @@
mpage = f2fs_grab_cache_page(META_MAPPING(fio.sbi),
fio.old_blkaddr, false);
- if (!mpage)
+ if (!mpage) {
+ err = -ENOMEM;
goto up_out;
+ }
fio.encrypted_page = mpage;
@@ -833,6 +1209,10 @@
f2fs_put_page(mpage, 1);
goto up_out;
}
+
+ f2fs_update_iostat(fio.sbi, FS_DATA_READ_IO, F2FS_BLKSIZE);
+ f2fs_update_iostat(fio.sbi, FS_GDATA_READ_IO, F2FS_BLKSIZE);
+
lock_page(mpage);
if (unlikely(mpage->mapping != META_MAPPING(fio.sbi) ||
!PageUptodate(mpage))) {
@@ -843,7 +1223,7 @@
}
f2fs_allocate_data_block(fio.sbi, NULL, fio.old_blkaddr, &newaddr,
- &sum, CURSEG_COLD_DATA, NULL, false);
+ &sum, type, NULL);
fio.encrypted_page = f2fs_pagecache_get_page(META_MAPPING(fio.sbi),
newaddr, FGP_LOCK | FGP_CREAT, GFP_NOFS);
@@ -893,7 +1273,7 @@
recover_block:
if (err)
f2fs_do_replace_block(fio.sbi, &sum, newaddr, fio.old_blkaddr,
- true, true);
+ true, true, true);
up_out:
if (lfs_mode)
up_write(&fio.sbi->io_order_lock);
@@ -970,7 +1350,8 @@
if (err) {
clear_cold_data(page);
if (err == -ENOMEM) {
- congestion_wait(BLK_RW_ASYNC, HZ/50);
+ congestion_wait(BLK_RW_ASYNC,
+ DEFAULT_IO_TIMEOUT);
goto retry;
}
if (is_dirty)
@@ -990,7 +1371,8 @@
* the victim data block is ignored.
*/
static int gc_data_segment(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
- struct gc_inode_list *gc_list, unsigned int segno, int gc_type)
+ struct gc_inode_list *gc_list, unsigned int segno, int gc_type,
+ bool force_migrate)
{
struct super_block *sb = sbi->sb;
struct f2fs_summary *entry;
@@ -998,13 +1380,14 @@
int off;
int phase = 0;
int submitted = 0;
+ unsigned int usable_blks_in_seg = f2fs_usable_blks_in_seg(sbi, segno);
start_addr = START_BLOCK(sbi, segno);
next_step:
entry = sum;
- for (off = 0; off < sbi->blocks_per_seg; off++, entry++) {
+ for (off = 0; off < usable_blks_in_seg; off++, entry++) {
struct page *data_page;
struct inode *inode;
struct node_info dni; /* dnode info for the data */
@@ -1018,8 +1401,8 @@
* race condition along with SSR block allocation.
*/
if ((gc_type == BG_GC && has_not_enough_free_secs(sbi, 0, 0)) ||
- get_valid_blocks(sbi, segno, false) ==
- sbi->blocks_per_seg)
+ (!force_migrate && get_valid_blocks(sbi, segno, true) ==
+ BLKS_PER_SEC(sbi)))
return submitted;
if (check_valid_map(sbi, segno, off) == 0)
@@ -1049,8 +1432,10 @@
if (phase == 3) {
inode = f2fs_iget(sb, dni.ino);
- if (IS_ERR(inode) || is_bad_inode(inode))
+ if (IS_ERR(inode) || is_bad_inode(inode)) {
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
continue;
+ }
if (!down_write_trylock(
&F2FS_I(inode)->i_gc_rwsem[WRITE])) {
@@ -1147,14 +1532,15 @@
down_write(&sit_i->sentry_lock);
ret = DIRTY_I(sbi)->v_ops->get_victim(sbi, victim, gc_type,
- NO_CHECK_TYPE, LFS);
+ NO_CHECK_TYPE, LFS, 0);
up_write(&sit_i->sentry_lock);
return ret;
}
static int do_garbage_collect(struct f2fs_sb_info *sbi,
unsigned int start_segno,
- struct gc_inode_list *gc_list, int gc_type)
+ struct gc_inode_list *gc_list, int gc_type,
+ bool force_migrate)
{
struct page *sum_page;
struct f2fs_summary_block *sum;
@@ -1169,6 +1555,17 @@
if (__is_large_section(sbi))
end_segno = rounddown(end_segno, sbi->segs_per_sec);
+ /*
+ * zone-capacity can be less than zone-size in zoned devices,
+ * resulting in less than expected usable segments in the zone,
+ * calculate the end segno in the zone which can be garbage collected
+ */
+ if (f2fs_sb_has_blkzoned(sbi))
+ end_segno -= sbi->segs_per_sec -
+ f2fs_usable_segs_in_sec(sbi, segno);
+
+ sanity_check_seg_type(sbi, get_seg_entry(sbi, segno)->type);
+
/* readahead multi ssa blocks those have contiguous address */
if (__is_large_section(sbi))
f2fs_ra_meta_pages(sbi, GET_SUM_BLOCK(sbi, segno),
@@ -1203,7 +1600,7 @@
if (get_valid_blocks(sbi, segno, false) == 0)
goto freed;
- if (__is_large_section(sbi) &&
+ if (gc_type == BG_GC && __is_large_section(sbi) &&
migrated >= sbi->migration_granularity)
goto skip;
if (!PageUptodate(sum_page) || unlikely(f2fs_cp_error(sbi)))
@@ -1230,15 +1627,16 @@
gc_type);
else
submitted += gc_data_segment(sbi, sum->entries, gc_list,
- segno, gc_type);
+ segno, gc_type,
+ force_migrate);
stat_inc_seg_count(sbi, type, gc_type);
+ migrated++;
freed:
if (gc_type == FG_GC &&
get_valid_blocks(sbi, segno, false) == 0)
seg_freed++;
- migrated++;
if (__is_large_section(sbi) && segno + 1 < end_segno)
sbi->next_victim_seg[gc_type] = segno + 1;
@@ -1258,7 +1656,7 @@
}
int f2fs_gc(struct f2fs_sb_info *sbi, bool sync,
- bool background, unsigned int segno)
+ bool background, bool force, unsigned int segno)
{
int gc_type = sync ? FG_GC : BG_GC;
int sec_freed = 0, seg_freed = 0, total_freed = 0;
@@ -1316,13 +1714,13 @@
ret = -EINVAL;
goto stop;
}
- if (!__get_victim(sbi, &segno, gc_type)) {
- ret = -ENODATA;
+ ret = __get_victim(sbi, &segno, gc_type);
+ if (ret)
goto stop;
- }
- seg_freed = do_garbage_collect(sbi, segno, &gc_list, gc_type);
- if (gc_type == FG_GC && seg_freed == sbi->segs_per_sec)
+ seg_freed = do_garbage_collect(sbi, segno, &gc_list, gc_type, force);
+ if (gc_type == FG_GC &&
+ seg_freed == f2fs_usable_segs_in_sec(sbi, segno))
sec_freed++;
total_freed += seg_freed;
@@ -1370,7 +1768,7 @@
reserved_segments(sbi),
prefree_segments(sbi));
- mutex_unlock(&sbi->gc_mutex);
+ up_write(&sbi->gc_lock);
put_gc_inode(&gc_list);
@@ -1379,6 +1777,38 @@
return ret;
}
+int __init f2fs_create_garbage_collection_cache(void)
+{
+ victim_entry_slab = f2fs_kmem_cache_create("f2fs_victim_entry",
+ sizeof(struct victim_entry));
+ if (!victim_entry_slab)
+ return -ENOMEM;
+ return 0;
+}
+
+void f2fs_destroy_garbage_collection_cache(void)
+{
+ kmem_cache_destroy(victim_entry_slab);
+}
+
+static void init_atgc_management(struct f2fs_sb_info *sbi)
+{
+ struct atgc_management *am = &sbi->am;
+
+ if (test_opt(sbi, ATGC) &&
+ SIT_I(sbi)->elapsed_time >= DEF_GC_THREAD_AGE_THRESHOLD)
+ am->atgc_enabled = true;
+
+ am->root = RB_ROOT_CACHED;
+ INIT_LIST_HEAD(&am->victim_list);
+ am->victim_count = 0;
+
+ am->candidate_ratio = DEF_GC_THREAD_CANDIDATE_RATIO;
+ am->max_candidate_count = DEF_GC_THREAD_MAX_CANDIDATE_COUNT;
+ am->age_weight = DEF_GC_THREAD_AGE_WEIGHT;
+ am->age_threshold = DEF_GC_THREAD_AGE_THRESHOLD;
+}
+
void f2fs_build_gc_manager(struct f2fs_sb_info *sbi)
{
DIRTY_I(sbi)->v_ops = &default_v_ops;
@@ -1389,18 +1819,37 @@
if (f2fs_is_multi_device(sbi) && !__is_large_section(sbi))
SIT_I(sbi)->last_victim[ALLOC_NEXT] =
GET_SEGNO(sbi, FDEV(0).end_blk) + 1;
+
+ init_atgc_management(sbi);
}
-static int free_segment_range(struct f2fs_sb_info *sbi, unsigned int start,
- unsigned int end)
+static int free_segment_range(struct f2fs_sb_info *sbi,
+ unsigned int secs, bool gc_only)
{
- int type;
- unsigned int segno, next_inuse;
+ unsigned int segno, next_inuse, start, end;
+ struct cp_control cpc = { CP_RESIZE, 0, 0, 0 };
+ int gc_mode, gc_type;
int err = 0;
+ int type;
+
+ /* Force block allocation for GC */
+ MAIN_SECS(sbi) -= secs;
+ start = MAIN_SECS(sbi) * sbi->segs_per_sec;
+ end = MAIN_SEGS(sbi) - 1;
+
+ mutex_lock(&DIRTY_I(sbi)->seglist_lock);
+ for (gc_mode = 0; gc_mode < MAX_GC_POLICY; gc_mode++)
+ if (SIT_I(sbi)->last_victim[gc_mode] >= start)
+ SIT_I(sbi)->last_victim[gc_mode] = 0;
+
+ for (gc_type = BG_GC; gc_type <= FG_GC; gc_type++)
+ if (sbi->next_victim_seg[gc_type] >= start)
+ sbi->next_victim_seg[gc_type] = NULL_SEGNO;
+ mutex_unlock(&DIRTY_I(sbi)->seglist_lock);
/* Move out cursegs from the target range */
- for (type = CURSEG_HOT_DATA; type < NR_CURSEG_TYPE; type++)
- allocate_segment_for_resize(sbi, type, start, end);
+ for (type = CURSEG_HOT_DATA; type < NR_CURSEG_PERSIST_TYPE; type++)
+ f2fs_allocate_segment_for_resize(sbi, type, start, end);
/* do GC to move out valid blocks in the range */
for (segno = start; segno <= end; segno += sbi->segs_per_sec) {
@@ -1409,18 +1858,24 @@
.iroot = RADIX_TREE_INIT(gc_list.iroot, GFP_NOFS),
};
- mutex_lock(&sbi->gc_mutex);
- do_garbage_collect(sbi, segno, &gc_list, FG_GC);
- mutex_unlock(&sbi->gc_mutex);
+ do_garbage_collect(sbi, segno, &gc_list, FG_GC, true);
put_gc_inode(&gc_list);
- if (get_valid_blocks(sbi, segno, true))
- return -EAGAIN;
+ if (!gc_only && get_valid_blocks(sbi, segno, true)) {
+ err = -EAGAIN;
+ goto out;
+ }
+ if (fatal_signal_pending(current)) {
+ err = -ERESTARTSYS;
+ goto out;
+ }
}
+ if (gc_only)
+ goto out;
- err = f2fs_sync_fs(sbi->sb, 1);
+ err = f2fs_write_checkpoint(sbi, &cpc);
if (err)
- return err;
+ goto out;
next_inuse = find_next_inuse(FREE_I(sbi), end + 1, start);
if (next_inuse <= end) {
@@ -1428,44 +1883,77 @@
next_inuse);
f2fs_bug_on(sbi, 1);
}
+out:
+ MAIN_SECS(sbi) += secs;
return err;
}
static void update_sb_metadata(struct f2fs_sb_info *sbi, int secs)
{
struct f2fs_super_block *raw_sb = F2FS_RAW_SUPER(sbi);
- int section_count = le32_to_cpu(raw_sb->section_count);
- int segment_count = le32_to_cpu(raw_sb->segment_count);
- int segment_count_main = le32_to_cpu(raw_sb->segment_count_main);
- long long block_count = le64_to_cpu(raw_sb->block_count);
+ int section_count;
+ int segment_count;
+ int segment_count_main;
+ long long block_count;
int segs = secs * sbi->segs_per_sec;
+ down_write(&sbi->sb_lock);
+
+ section_count = le32_to_cpu(raw_sb->section_count);
+ segment_count = le32_to_cpu(raw_sb->segment_count);
+ segment_count_main = le32_to_cpu(raw_sb->segment_count_main);
+ block_count = le64_to_cpu(raw_sb->block_count);
+
raw_sb->section_count = cpu_to_le32(section_count + secs);
raw_sb->segment_count = cpu_to_le32(segment_count + segs);
raw_sb->segment_count_main = cpu_to_le32(segment_count_main + segs);
raw_sb->block_count = cpu_to_le64(block_count +
(long long)segs * sbi->blocks_per_seg);
+ if (f2fs_is_multi_device(sbi)) {
+ int last_dev = sbi->s_ndevs - 1;
+ int dev_segs =
+ le32_to_cpu(raw_sb->devs[last_dev].total_segments);
+
+ raw_sb->devs[last_dev].total_segments =
+ cpu_to_le32(dev_segs + segs);
+ }
+
+ up_write(&sbi->sb_lock);
}
static void update_fs_metadata(struct f2fs_sb_info *sbi, int secs)
{
int segs = secs * sbi->segs_per_sec;
+ long long blks = (long long)segs * sbi->blocks_per_seg;
long long user_block_count =
le64_to_cpu(F2FS_CKPT(sbi)->user_block_count);
SM_I(sbi)->segment_count = (int)SM_I(sbi)->segment_count + segs;
MAIN_SEGS(sbi) = (int)MAIN_SEGS(sbi) + segs;
+ MAIN_SECS(sbi) += secs;
FREE_I(sbi)->free_sections = (int)FREE_I(sbi)->free_sections + secs;
FREE_I(sbi)->free_segments = (int)FREE_I(sbi)->free_segments + segs;
- F2FS_CKPT(sbi)->user_block_count = cpu_to_le64(user_block_count +
- (long long)segs * sbi->blocks_per_seg);
+ F2FS_CKPT(sbi)->user_block_count = cpu_to_le64(user_block_count + blks);
+
+ if (f2fs_is_multi_device(sbi)) {
+ int last_dev = sbi->s_ndevs - 1;
+
+ FDEV(last_dev).total_segments =
+ (int)FDEV(last_dev).total_segments + segs;
+ FDEV(last_dev).end_blk =
+ (long long)FDEV(last_dev).end_blk + blks;
+#ifdef CONFIG_BLK_DEV_ZONED
+ FDEV(last_dev).nr_blkz = (int)FDEV(last_dev).nr_blkz +
+ (int)(blks >> sbi->log_blocks_per_blkz);
+#endif
+ }
}
int f2fs_resize_fs(struct f2fs_sb_info *sbi, __u64 block_count)
{
__u64 old_block_count, shrunk_blocks;
+ struct cp_control cpc = { CP_RESIZE, 0, 0, 0 };
unsigned int secs;
- int gc_mode, gc_type;
int err = 0;
__u32 rem;
@@ -1473,6 +1961,15 @@
if (block_count > old_block_count)
return -EINVAL;
+ if (f2fs_is_multi_device(sbi)) {
+ int last_dev = sbi->s_ndevs - 1;
+ __u64 last_segs = FDEV(last_dev).total_segments;
+
+ if (block_count + last_segs * sbi->blocks_per_seg <=
+ old_block_count)
+ return -EINVAL;
+ }
+
/* new fs size should align to section size */
div_u64_rem(block_count, BLKS_PER_SEC(sbi), &rem);
if (rem)
@@ -1491,10 +1988,40 @@
return -EINVAL;
}
- freeze_bdev(sbi->sb->s_bdev);
-
shrunk_blocks = old_block_count - block_count;
secs = div_u64(shrunk_blocks, BLKS_PER_SEC(sbi));
+
+ /* stop other GC */
+ if (!down_write_trylock(&sbi->gc_lock))
+ return -EAGAIN;
+
+ /* stop CP to protect MAIN_SEC in free_segment_range */
+ f2fs_lock_op(sbi);
+
+ spin_lock(&sbi->stat_lock);
+ if (shrunk_blocks + valid_user_blocks(sbi) +
+ sbi->current_reserved_blocks + sbi->unusable_block_count +
+ F2FS_OPTION(sbi).root_reserved_blocks > sbi->user_block_count)
+ err = -ENOSPC;
+ spin_unlock(&sbi->stat_lock);
+
+ if (err)
+ goto out_unlock;
+
+ err = free_segment_range(sbi, secs, true);
+
+out_unlock:
+ f2fs_unlock_op(sbi);
+ up_write(&sbi->gc_lock);
+ if (err)
+ return err;
+
+ set_sbi_flag(sbi, SBI_IS_RESIZEFS);
+
+ freeze_super(sbi->sb);
+ down_write(&sbi->gc_lock);
+ mutex_lock(&sbi->cp_mutex);
+
spin_lock(&sbi->stat_lock);
if (shrunk_blocks + valid_user_blocks(sbi) +
sbi->current_reserved_blocks + sbi->unusable_block_count +
@@ -1503,69 +2030,44 @@
else
sbi->user_block_count -= shrunk_blocks;
spin_unlock(&sbi->stat_lock);
- if (err) {
- thaw_bdev(sbi->sb->s_bdev, sbi->sb);
- return err;
- }
-
- mutex_lock(&sbi->resize_mutex);
- set_sbi_flag(sbi, SBI_IS_RESIZEFS);
-
- mutex_lock(&DIRTY_I(sbi)->seglist_lock);
-
- MAIN_SECS(sbi) -= secs;
-
- for (gc_mode = 0; gc_mode < MAX_GC_POLICY; gc_mode++)
- if (SIT_I(sbi)->last_victim[gc_mode] >=
- MAIN_SECS(sbi) * sbi->segs_per_sec)
- SIT_I(sbi)->last_victim[gc_mode] = 0;
-
- for (gc_type = BG_GC; gc_type <= FG_GC; gc_type++)
- if (sbi->next_victim_seg[gc_type] >=
- MAIN_SECS(sbi) * sbi->segs_per_sec)
- sbi->next_victim_seg[gc_type] = NULL_SEGNO;
-
- mutex_unlock(&DIRTY_I(sbi)->seglist_lock);
-
- err = free_segment_range(sbi, MAIN_SECS(sbi) * sbi->segs_per_sec,
- MAIN_SEGS(sbi) - 1);
if (err)
- goto out;
+ goto out_err;
+
+ err = free_segment_range(sbi, secs, false);
+ if (err)
+ goto recover_out;
update_sb_metadata(sbi, -secs);
err = f2fs_commit_super(sbi, false);
if (err) {
update_sb_metadata(sbi, secs);
- goto out;
+ goto recover_out;
}
- mutex_lock(&sbi->cp_mutex);
update_fs_metadata(sbi, -secs);
clear_sbi_flag(sbi, SBI_IS_RESIZEFS);
set_sbi_flag(sbi, SBI_IS_DIRTY);
- mutex_unlock(&sbi->cp_mutex);
- err = f2fs_sync_fs(sbi->sb, 1);
+ err = f2fs_write_checkpoint(sbi, &cpc);
if (err) {
- mutex_lock(&sbi->cp_mutex);
update_fs_metadata(sbi, secs);
- mutex_unlock(&sbi->cp_mutex);
update_sb_metadata(sbi, secs);
f2fs_commit_super(sbi, false);
}
-out:
+recover_out:
if (err) {
set_sbi_flag(sbi, SBI_NEED_FSCK);
f2fs_err(sbi, "resize_fs failed, should run fsck to repair!");
- MAIN_SECS(sbi) += secs;
spin_lock(&sbi->stat_lock);
sbi->user_block_count += shrunk_blocks;
spin_unlock(&sbi->stat_lock);
}
+out_err:
+ mutex_unlock(&sbi->cp_mutex);
+ up_write(&sbi->gc_lock);
+ thaw_super(sbi->sb);
clear_sbi_flag(sbi, SBI_IS_RESIZEFS);
- mutex_unlock(&sbi->resize_mutex);
- thaw_bdev(sbi->sb->s_bdev, sbi->sb);
return err;
}
diff --git a/fs/f2fs/gc.h b/fs/f2fs/gc.h
index bbac9d3..0c8dae1 100644
--- a/fs/f2fs/gc.h
+++ b/fs/f2fs/gc.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* fs/f2fs/gc.h
*
@@ -14,6 +14,14 @@
#define DEF_GC_THREAD_MIN_SLEEP_TIME 30000 /* milliseconds */
#define DEF_GC_THREAD_MAX_SLEEP_TIME 60000
#define DEF_GC_THREAD_NOGC_SLEEP_TIME 300000 /* wait 5 min */
+
+/* choose candidates from sections which has age of more than 7 days */
+#define DEF_GC_THREAD_AGE_THRESHOLD (60 * 60 * 24 * 7)
+#define DEF_GC_THREAD_CANDIDATE_RATIO 20 /* select 20% oldest sections as candidates */
+#define DEF_GC_THREAD_MAX_CANDIDATE_COUNT 10 /* select at most 10 sections as candidates */
+#define DEF_GC_THREAD_AGE_WEIGHT 60 /* age weight */
+#define DEFAULT_ACCURACY_CLASS 10000 /* accuracy class */
+
#define LIMIT_INVALID_BLOCK 40 /* percentage over total user space */
#define LIMIT_FREE_BLOCK 40 /* percentage over invalid + free space */
@@ -41,16 +49,69 @@
struct radix_tree_root iroot;
};
+struct victim_info {
+ unsigned long long mtime; /* mtime of section */
+ unsigned int segno; /* section No. */
+};
+
+struct victim_entry {
+ struct rb_node rb_node; /* rb node located in rb-tree */
+ union {
+ struct {
+ unsigned long long mtime; /* mtime of section */
+ unsigned int segno; /* segment No. */
+ };
+ struct victim_info vi; /* victim info */
+ };
+ struct list_head list;
+};
+
/*
* inline functions
*/
+
+/*
+ * On a Zoned device zone-capacity can be less than zone-size and if
+ * zone-capacity is not aligned to f2fs segment size(2MB), then the segment
+ * starting just before zone-capacity has some blocks spanning across the
+ * zone-capacity, these blocks are not usable.
+ * Such spanning segments can be in free list so calculate the sum of usable
+ * blocks in currently free segments including normal and spanning segments.
+ */
+static inline block_t free_segs_blk_count_zoned(struct f2fs_sb_info *sbi)
+{
+ block_t free_seg_blks = 0;
+ struct free_segmap_info *free_i = FREE_I(sbi);
+ int j;
+
+ spin_lock(&free_i->segmap_lock);
+ for (j = 0; j < MAIN_SEGS(sbi); j++)
+ if (!test_bit(j, free_i->free_segmap))
+ free_seg_blks += f2fs_usable_blks_in_seg(sbi, j);
+ spin_unlock(&free_i->segmap_lock);
+
+ return free_seg_blks;
+}
+
+static inline block_t free_segs_blk_count(struct f2fs_sb_info *sbi)
+{
+ if (f2fs_sb_has_blkzoned(sbi))
+ return free_segs_blk_count_zoned(sbi);
+
+ return free_segments(sbi) << sbi->log_blocks_per_seg;
+}
+
static inline block_t free_user_blocks(struct f2fs_sb_info *sbi)
{
- if (free_segments(sbi) < overprovision_segments(sbi))
+ block_t free_blks, ovp_blks;
+
+ free_blks = free_segs_blk_count(sbi);
+ ovp_blks = overprovision_segments(sbi) << sbi->log_blocks_per_seg;
+
+ if (free_blks < ovp_blks)
return 0;
- else
- return (free_segments(sbi) - overprovision_segments(sbi))
- << sbi->log_blocks_per_seg;
+
+ return free_blks - ovp_blks;
}
static inline block_t limit_invalid_user_blocks(struct f2fs_sb_info *sbi)
diff --git a/fs/f2fs/hash.c b/fs/f2fs/hash.c
index 5bc4dcd..de841aa 100644
--- a/fs/f2fs/hash.c
+++ b/fs/f2fs/hash.c
@@ -12,7 +12,6 @@
#include <linux/types.h>
#include <linux/fs.h>
#include <linux/f2fs_fs.h>
-#include <linux/cryptohash.h>
#include <linux/pagemap.h>
#include <linux/unicode.h>
@@ -68,22 +67,9 @@
*buf++ = pad;
}
-static f2fs_hash_t __f2fs_dentry_hash(const struct qstr *name_info,
- struct fscrypt_name *fname)
+static u32 TEA_hash_name(const u8 *p, size_t len)
{
- __u32 hash;
- f2fs_hash_t f2fs_hash;
- const unsigned char *p;
__u32 in[8], buf[4];
- const unsigned char *name = name_info->name;
- size_t len = name_info->len;
-
- /* encrypted bigname case */
- if (fname && !fname->disk_name.name)
- return cpu_to_le32(fname->hash);
-
- if (is_dot_dotdot(name_info))
- return 0;
/* Initialize the default seed for the hash checksum functions */
buf[0] = 0x67452301;
@@ -91,7 +77,6 @@
buf[2] = 0x98badcfe;
buf[3] = 0x10325476;
- p = name;
while (1) {
str2hashbuf(p, len, in, 4);
TEA_transform(buf, in);
@@ -100,41 +85,43 @@
break;
len -= 16;
}
- hash = buf[0];
- f2fs_hash = cpu_to_le32(hash & ~F2FS_HASH_COL_BIT);
- return f2fs_hash;
+ return buf[0] & ~F2FS_HASH_COL_BIT;
}
-f2fs_hash_t f2fs_dentry_hash(const struct inode *dir,
- const struct qstr *name_info, struct fscrypt_name *fname)
+/*
+ * Compute @fname->hash. For all directories, @fname->disk_name must be set.
+ * For casefolded directories, @fname->usr_fname must be set, and also
+ * @fname->cf_name if the filename is valid Unicode.
+ */
+void f2fs_hash_filename(const struct inode *dir, struct f2fs_filename *fname)
{
-#ifdef CONFIG_UNICODE
- struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
- const struct unicode_map *um = sbi->s_encoding;
- int r, dlen;
- unsigned char *buff;
- struct qstr folded;
+ const u8 *name = fname->disk_name.name;
+ size_t len = fname->disk_name.len;
- if (!name_info->len || !IS_CASEFOLDED(dir))
- goto opaque_seq;
+ WARN_ON_ONCE(!name);
- buff = f2fs_kzalloc(sbi, sizeof(char) * PATH_MAX, GFP_KERNEL);
- if (!buff)
- return -ENOMEM;
-
- dlen = utf8_casefold(um, name_info, buff, PATH_MAX);
- if (dlen < 0) {
- kvfree(buff);
- goto opaque_seq;
+ if (is_dot_dotdot(name, len)) {
+ fname->hash = 0;
+ return;
}
- folded.name = buff;
- folded.len = dlen;
- r = __f2fs_dentry_hash(&folded, fname);
- kvfree(buff);
- return r;
-
-opaque_seq:
+#ifdef CONFIG_UNICODE
+ if (IS_CASEFOLDED(dir)) {
+ /*
+ * If the casefolded name is provided, hash it instead of the
+ * on-disk name. If the casefolded name is *not* provided, that
+ * should only be because the name wasn't valid Unicode, so fall
+ * back to treating the name as an opaque byte sequence.
+ */
+ WARN_ON_ONCE(!fname->usr_fname->name);
+ if (fname->cf_name.name) {
+ name = fname->cf_name.name;
+ len = fname->cf_name.len;
+ } else {
+ name = fname->usr_fname->name;
+ len = fname->usr_fname->len;
+ }
+ }
#endif
- return __f2fs_dentry_hash(name_info, fname);
+ fname->hash = cpu_to_le32(TEA_hash_name(name, len));
}
diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c
index c6bd669..1d7dafd 100644
--- a/fs/f2fs/inline.c
+++ b/fs/f2fs/inline.c
@@ -8,9 +8,11 @@
#include <linux/fs.h>
#include <linux/f2fs_fs.h>
+#include <linux/fiemap.h>
#include "f2fs.h"
#include "node.h"
+#include <trace/events/f2fs.h>
bool f2fs_may_inline_data(struct inode *inode)
{
@@ -315,15 +317,14 @@
}
struct f2fs_dir_entry *f2fs_find_in_inline_dir(struct inode *dir,
- struct fscrypt_name *fname, struct page **res_page)
+ const struct f2fs_filename *fname,
+ struct page **res_page)
{
struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
- struct qstr name = FSTR_TO_QSTR(&fname->disk_name);
struct f2fs_dir_entry *de;
struct f2fs_dentry_ptr d;
struct page *ipage;
void *inline_dentry;
- f2fs_hash_t namehash;
ipage = f2fs_get_node_page(sbi, dir->i_ino);
if (IS_ERR(ipage)) {
@@ -331,12 +332,10 @@
return NULL;
}
- namehash = f2fs_dentry_hash(dir, &name, fname);
-
inline_dentry = inline_data_addr(dir, ipage);
make_dentry_ptr_inline(dir, &d, inline_dentry);
- de = f2fs_find_target_dentry(fname, namehash, NULL, &d);
+ de = f2fs_find_target_dentry(&d, fname, NULL);
unlock_page(ipage);
if (de)
*res_page = ipage;
@@ -378,7 +377,7 @@
struct f2fs_dentry_ptr src, dst;
int err;
- page = f2fs_grab_cache_page(dir->i_mapping, 0, false);
+ page = f2fs_grab_cache_page(dir->i_mapping, 0, true);
if (!page) {
f2fs_put_page(ipage, 1);
return -ENOMEM;
@@ -453,7 +452,7 @@
while (bit_pos < d.max) {
struct f2fs_dir_entry *de;
- struct qstr new_name;
+ struct f2fs_filename fname;
nid_t ino;
umode_t fake_mode;
@@ -469,14 +468,19 @@
continue;
}
- new_name.name = d.filename[bit_pos];
- new_name.len = le16_to_cpu(de->name_len);
+ /*
+ * We only need the disk_name and hash to move the dentry.
+ * We don't need the original or casefolded filenames.
+ */
+ memset(&fname, 0, sizeof(fname));
+ fname.disk_name.name = d.filename[bit_pos];
+ fname.disk_name.len = le16_to_cpu(de->name_len);
+ fname.hash = de->hash_code;
ino = le32_to_cpu(de->ino);
fake_mode = f2fs_get_de_type(de) << S_SHIFT;
- err = f2fs_add_regular_entry(dir, &new_name, NULL, NULL,
- ino, fake_mode);
+ err = f2fs_add_regular_entry(dir, &fname, NULL, ino, fake_mode);
if (err)
goto punch_dentry_pages;
@@ -525,7 +529,7 @@
!f2fs_has_inline_xattr(dir))
F2FS_I(dir)->i_inline_xattr_size = 0;
- kvfree(backup_dentry);
+ kfree(backup_dentry);
return 0;
recover:
lock_page(ipage);
@@ -536,11 +540,11 @@
set_page_dirty(ipage);
f2fs_put_page(ipage, 1);
- kvfree(backup_dentry);
+ kfree(backup_dentry);
return err;
}
-static int f2fs_convert_inline_dir(struct inode *dir, struct page *ipage,
+static int do_convert_inline_dir(struct inode *dir, struct page *ipage,
void *inline_dentry)
{
if (!F2FS_I(dir)->i_dir_level)
@@ -549,17 +553,55 @@
return f2fs_move_rehashed_dirents(dir, ipage, inline_dentry);
}
-int f2fs_add_inline_entry(struct inode *dir, const struct qstr *new_name,
- const struct qstr *orig_name,
- struct inode *inode, nid_t ino, umode_t mode)
+int f2fs_try_convert_inline_dir(struct inode *dir, struct dentry *dentry)
+{
+ struct f2fs_sb_info *sbi = F2FS_I_SB(dir);
+ struct page *ipage;
+ struct f2fs_filename fname;
+ void *inline_dentry = NULL;
+ int err = 0;
+
+ if (!f2fs_has_inline_dentry(dir))
+ return 0;
+
+ f2fs_lock_op(sbi);
+
+ err = f2fs_setup_filename(dir, &dentry->d_name, 0, &fname);
+ if (err)
+ goto out;
+
+ ipage = f2fs_get_node_page(sbi, dir->i_ino);
+ if (IS_ERR(ipage)) {
+ err = PTR_ERR(ipage);
+ goto out_fname;
+ }
+
+ if (f2fs_has_enough_room(dir, ipage, &fname)) {
+ f2fs_put_page(ipage, 1);
+ goto out_fname;
+ }
+
+ inline_dentry = inline_data_addr(dir, ipage);
+
+ err = do_convert_inline_dir(dir, ipage, inline_dentry);
+ if (!err)
+ f2fs_put_page(ipage, 1);
+out_fname:
+ f2fs_free_filename(&fname);
+out:
+ f2fs_unlock_op(sbi);
+ return err;
+}
+
+int f2fs_add_inline_entry(struct inode *dir, const struct f2fs_filename *fname,
+ struct inode *inode, nid_t ino, umode_t mode)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(dir);
struct page *ipage;
unsigned int bit_pos;
- f2fs_hash_t name_hash;
void *inline_dentry = NULL;
struct f2fs_dentry_ptr d;
- int slots = GET_DENTRY_SLOTS(new_name->len);
+ int slots = GET_DENTRY_SLOTS(fname->disk_name.len);
struct page *page = NULL;
int err = 0;
@@ -572,7 +614,7 @@
bit_pos = f2fs_room_for_filename(d.bitmap, slots, d.max);
if (bit_pos >= d.max) {
- err = f2fs_convert_inline_dir(dir, ipage, inline_dentry);
+ err = do_convert_inline_dir(dir, ipage, inline_dentry);
if (err)
return err;
err = -EAGAIN;
@@ -581,8 +623,7 @@
if (inode) {
down_write(&F2FS_I(inode)->i_sem);
- page = f2fs_init_inode_metadata(inode, dir, new_name,
- orig_name, ipage);
+ page = f2fs_init_inode_metadata(inode, dir, fname, ipage);
if (IS_ERR(page)) {
err = PTR_ERR(page);
goto fail;
@@ -591,8 +632,8 @@
f2fs_wait_on_page_writeback(ipage, NODE, true, true);
- name_hash = f2fs_dentry_hash(dir, new_name, NULL);
- f2fs_update_dentry(ino, mode, &d, new_name, name_hash, bit_pos);
+ f2fs_update_dentry(ino, mode, &d, &fname->disk_name, fname->hash,
+ bit_pos);
set_page_dirty(ipage);
@@ -746,6 +787,7 @@
byteaddr += (char *)inline_data_addr(inode, ipage) -
(char *)F2FS_INODE(ipage);
err = fiemap_fill_next_extent(fieinfo, start, byteaddr, ilen, flags);
+ trace_f2fs_fiemap(inode, start, byteaddr, ilen, flags, err);
out:
f2fs_put_page(ipage, 1);
return err;
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index 502bd49..a35fcf4 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -200,6 +200,7 @@
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct f2fs_inode_info *fi = F2FS_I(inode);
+ struct f2fs_inode *ri = F2FS_INODE(node_page);
unsigned long long iblocks;
iblocks = le64_to_cpu(F2FS_INODE(node_page)->i_blocks);
@@ -286,6 +287,46 @@
return false;
}
+ if ((fi->i_flags & F2FS_CASEFOLD_FL) && !f2fs_sb_has_casefold(sbi)) {
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+ f2fs_warn(sbi, "%s: inode (ino=%lx) has casefold flag, but casefold feature is off",
+ __func__, inode->i_ino);
+ return false;
+ }
+
+ if (f2fs_has_extra_attr(inode) && f2fs_sb_has_compression(sbi) &&
+ fi->i_flags & F2FS_COMPR_FL &&
+ F2FS_FITS_IN_INODE(ri, fi->i_extra_isize,
+ i_log_cluster_size)) {
+ if (ri->i_compress_algorithm >= COMPRESS_MAX) {
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+ f2fs_warn(sbi, "%s: inode (ino=%lx) has unsupported "
+ "compress algorithm: %u, run fsck to fix",
+ __func__, inode->i_ino,
+ ri->i_compress_algorithm);
+ return false;
+ }
+ if (le64_to_cpu(ri->i_compr_blocks) >
+ SECTOR_TO_BLOCK(inode->i_blocks)) {
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+ f2fs_warn(sbi, "%s: inode (ino=%lx) has inconsistent "
+ "i_compr_blocks:%llu, i_blocks:%llu, run fsck to fix",
+ __func__, inode->i_ino,
+ le64_to_cpu(ri->i_compr_blocks),
+ SECTOR_TO_BLOCK(inode->i_blocks));
+ return false;
+ }
+ if (ri->i_log_cluster_size < MIN_COMPRESS_LOG_SIZE ||
+ ri->i_log_cluster_size > MAX_COMPRESS_LOG_SIZE) {
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+ f2fs_warn(sbi, "%s: inode (ino=%lx) has unsupported "
+ "log cluster size: %u, run fsck to fix",
+ __func__, inode->i_ino,
+ ri->i_log_cluster_size);
+ return false;
+ }
+ }
+
return true;
}
@@ -331,13 +372,12 @@
fi->i_flags = le32_to_cpu(ri->i_flags);
if (S_ISREG(inode->i_mode))
fi->i_flags &= ~F2FS_PROJINHERIT_FL;
- fi->flags = 0;
+ bitmap_zero(fi->flags, FI_MAX);
fi->i_advise = ri->i_advise;
fi->i_pino = le32_to_cpu(ri->i_pino);
fi->i_dir_level = ri->i_dir_level;
- if (f2fs_init_extent_tree(inode, &ri->i_ext))
- set_page_dirty(node_page);
+ f2fs_init_extent_tree(inode, node_page);
get_inline_info(inode, ri);
@@ -371,6 +411,7 @@
/* try to recover cold bit for non-dir inode */
if (!S_ISDIR(inode->i_mode) && !is_cold_node(node_page)) {
+ f2fs_wait_on_page_writeback(node_page, NODE, true, true);
set_cold_node(node_page, false);
set_page_dirty(node_page);
}
@@ -407,6 +448,19 @@
fi->i_crtime.tv_nsec = le32_to_cpu(ri->i_crtime_nsec);
}
+ if (f2fs_has_extra_attr(inode) && f2fs_sb_has_compression(sbi) &&
+ (fi->i_flags & F2FS_COMPR_FL)) {
+ if (F2FS_FITS_IN_INODE(ri, fi->i_extra_isize,
+ i_log_cluster_size)) {
+ atomic_set(&fi->i_compr_blocks,
+ le64_to_cpu(ri->i_compr_blocks));
+ fi->i_compress_algorithm = ri->i_compress_algorithm;
+ fi->i_log_cluster_size = ri->i_log_cluster_size;
+ fi->i_cluster_size = 1 << fi->i_log_cluster_size;
+ set_inode_flag(inode, FI_COMPRESSED_FILE);
+ }
+ }
+
F2FS_I(inode)->i_disk_time[0] = inode->i_atime;
F2FS_I(inode)->i_disk_time[1] = inode->i_ctime;
F2FS_I(inode)->i_disk_time[2] = inode->i_mtime;
@@ -416,6 +470,8 @@
stat_inc_inline_xattr(inode);
stat_inc_inline_inode(inode);
stat_inc_inline_dir(inode);
+ stat_inc_compr_inode(inode);
+ stat_add_compr_blocks(inode, atomic_read(&fi->i_compr_blocks));
return 0;
}
@@ -455,7 +511,7 @@
inode->i_op = &f2fs_dir_inode_operations;
inode->i_fop = &f2fs_dir_operations;
inode->i_mapping->a_ops = &f2fs_dblock_aops;
- inode_nohighmem(inode);
+ mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS);
} else if (S_ISLNK(inode->i_mode)) {
if (file_is_encrypt(inode))
inode->i_op = &f2fs_encrypted_symlink_inode_operations;
@@ -490,7 +546,7 @@
inode = f2fs_iget(sb, ino);
if (IS_ERR(inode)) {
if (PTR_ERR(inode) == -ENOMEM) {
- congestion_wait(BLK_RW_ASYNC, HZ/50);
+ congestion_wait(BLK_RW_ASYNC, DEFAULT_IO_TIMEOUT);
goto retry;
}
}
@@ -569,6 +625,18 @@
ri->i_crtime_nsec =
cpu_to_le32(F2FS_I(inode)->i_crtime.tv_nsec);
}
+
+ if (f2fs_sb_has_compression(F2FS_I_SB(inode)) &&
+ F2FS_FITS_IN_INODE(ri, F2FS_I(inode)->i_extra_isize,
+ i_log_cluster_size)) {
+ ri->i_compr_blocks =
+ cpu_to_le64(atomic_read(
+ &F2FS_I(inode)->i_compr_blocks));
+ ri->i_compress_algorithm =
+ F2FS_I(inode)->i_compress_algorithm;
+ ri->i_log_cluster_size =
+ F2FS_I(inode)->i_log_cluster_size;
+ }
}
__set_inode_rdev(inode, ri);
@@ -711,6 +779,9 @@
stat_dec_inline_xattr(inode);
stat_dec_inline_dir(inode);
stat_dec_inline_inode(inode);
+ stat_dec_compr_inode(inode);
+ stat_sub_compr_blocks(inode,
+ atomic_read(&F2FS_I(inode)->i_compr_blocks));
if (likely(!f2fs_cp_error(sbi) &&
!is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
@@ -718,7 +789,7 @@
else
f2fs_inode_synced(inode);
- /* ino == 0, if f2fs_new_inode() was failed t*/
+ /* for the case f2fs_new_inode() was failed, .i_ino is zero, skip it */
if (inode->i_ino)
invalidate_mapping_pages(NODE_MAPPING(sbi), inode->i_ino,
inode->i_ino);
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index 81a18ba..6ae2bea 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -28,6 +28,7 @@
nid_t ino;
struct inode *inode;
bool nid_free = false;
+ bool encrypt = false;
int xattr_size = 0;
int err;
@@ -69,15 +70,17 @@
F2FS_I(inode)->i_projid = make_kprojid(&init_user_ns,
F2FS_DEF_PROJID);
+ err = fscrypt_prepare_new_inode(dir, inode, &encrypt);
+ if (err)
+ goto fail_drop;
+
err = dquot_initialize(inode);
if (err)
goto fail_drop;
set_inode_flag(inode, FI_NEW_INODE);
- /* If the directory encrypted, then we should encrypt the inode. */
- if ((IS_ENCRYPTED(dir) || DUMMY_ENCRYPTION_ENABLED(sbi)) &&
- f2fs_may_encrypt(inode))
+ if (encrypt)
f2fs_set_encrypted_inode(inode);
if (f2fs_sb_has_extra_attr(sbi)) {
@@ -119,6 +122,13 @@
if (F2FS_I(inode)->i_flags & F2FS_PROJINHERIT_FL)
set_inode_flag(inode, FI_PROJ_INHERIT);
+ if (f2fs_sb_has_compression(sbi)) {
+ /* Inherit the compression flag in directory */
+ if ((F2FS_I(dir)->i_flags & F2FS_COMPR_FL) &&
+ f2fs_may_compress(inode))
+ set_compress_context(inode);
+ }
+
f2fs_set_inode_flags(inode);
trace_f2fs_new_inode(inode, 0);
@@ -143,12 +153,16 @@
return ERR_PTR(err);
}
-static inline int is_extension_exist(const unsigned char *s, const char *sub)
+static inline int is_extension_exist(const unsigned char *s, const char *sub,
+ bool tmp_ext)
{
size_t slen = strlen(s);
size_t sublen = strlen(sub);
int i;
+ if (sublen == 1 && *sub == '*')
+ return 1;
+
/*
* filename format of multimedia file should be defined as:
* "filename + '.' + extension + (optional: '.' + temp extension)".
@@ -156,6 +170,13 @@
if (slen < sublen + 2)
return 0;
+ if (!tmp_ext) {
+ /* file has no temp extension */
+ if (s[slen - sublen - 1] != '.')
+ return 0;
+ return !strncasecmp(s + slen - sublen, sub, sublen);
+ }
+
for (i = 1; i < slen - sublen; i++) {
if (s[i] != '.')
continue;
@@ -167,7 +188,7 @@
}
/*
- * Set multimedia files as cold files for hot/cold data separation
+ * Set file's temperature for hot/cold data separation
*/
static inline void set_file_temperature(struct f2fs_sb_info *sbi, struct inode *inode,
const unsigned char *name)
@@ -181,7 +202,7 @@
hot_count = sbi->raw_super->hot_ext_count;
for (i = 0; i < cold_count + hot_count; i++) {
- if (is_extension_exist(name, extlist[i]))
+ if (is_extension_exist(name, extlist[i], true))
break;
}
@@ -262,6 +283,45 @@
return 0;
}
+static void set_compress_inode(struct f2fs_sb_info *sbi, struct inode *inode,
+ const unsigned char *name)
+{
+ __u8 (*extlist)[F2FS_EXTENSION_LEN] = sbi->raw_super->extension_list;
+ unsigned char (*ext)[F2FS_EXTENSION_LEN];
+ unsigned int ext_cnt = F2FS_OPTION(sbi).compress_ext_cnt;
+ int i, cold_count, hot_count;
+
+ if (!f2fs_sb_has_compression(sbi) ||
+ is_inode_flag_set(inode, FI_COMPRESSED_FILE) ||
+ F2FS_I(inode)->i_flags & F2FS_NOCOMP_FL ||
+ !f2fs_may_compress(inode))
+ return;
+
+ down_read(&sbi->sb_lock);
+
+ cold_count = le32_to_cpu(sbi->raw_super->extension_count);
+ hot_count = sbi->raw_super->hot_ext_count;
+
+ for (i = cold_count; i < cold_count + hot_count; i++) {
+ if (is_extension_exist(name, extlist[i], false)) {
+ up_read(&sbi->sb_lock);
+ return;
+ }
+ }
+
+ up_read(&sbi->sb_lock);
+
+ ext = F2FS_OPTION(sbi).extensions;
+
+ for (i = 0; i < ext_cnt; i++) {
+ if (!is_extension_exist(name, ext[i], false))
+ continue;
+
+ set_compress_context(inode);
+ return;
+ }
+}
+
static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
bool excl)
{
@@ -286,6 +346,8 @@
if (!test_opt(sbi, DISABLE_EXT_IDENTIFY))
set_file_temperature(sbi, inode, dentry->d_name.name);
+ set_compress_inode(sbi, inode, dentry->d_name.name);
+
inode->i_op = &f2fs_file_inode_operations;
inode->i_fop = &f2fs_file_operations;
inode->i_mapping->a_ops = &f2fs_dblock_aops;
@@ -433,7 +495,7 @@
nid_t ino = -1;
int err = 0;
unsigned int root_ino = F2FS_ROOT_INO(F2FS_I_SB(dir));
- struct fscrypt_name fname;
+ struct f2fs_filename fname;
trace_f2fs_lookup_start(dir, dentry, flags);
@@ -442,19 +504,20 @@
goto out;
}
- err = fscrypt_prepare_lookup(dir, dentry, &fname);
+ err = f2fs_prepare_lookup(dir, dentry, &fname);
if (err == -ENOENT)
goto out_splice;
if (err)
goto out;
de = __f2fs_find_entry(dir, &fname, &page);
- fscrypt_free_filename(&fname);
+ f2fs_free_filename(&fname);
if (!de) {
if (IS_ERR(page)) {
err = PTR_ERR(page);
goto out;
}
+ err = -ENOENT;
goto out_splice;
}
@@ -500,7 +563,7 @@
#endif
new = d_splice_alias(inode, dentry);
err = PTR_ERR_OR_ZERO(new);
- trace_f2fs_lookup_end(dir, dentry, ino, err);
+ trace_f2fs_lookup_end(dir, dentry, ino, !new ? -ENOENT : err);
return new;
out_iput:
iput(inode);
@@ -515,19 +578,21 @@
struct inode *inode = d_inode(dentry);
struct f2fs_dir_entry *de;
struct page *page;
- int err = -ENOENT;
+ int err;
trace_f2fs_unlink_enter(dir, dentry);
- if (unlikely(f2fs_cp_error(sbi)))
- return -EIO;
+ if (unlikely(f2fs_cp_error(sbi))) {
+ err = -EIO;
+ goto fail;
+ }
err = dquot_initialize(dir);
if (err)
- return err;
+ goto fail;
err = dquot_initialize(inode);
if (err)
- return err;
+ goto fail;
de = f2fs_find_entry(dir, &dentry->d_name, &page);
if (!de) {
@@ -550,7 +615,7 @@
/* VFS negative dentries are incompatible with Encoding and
* Case-insensitiveness. Eventually we'll want avoid
* invalidating the dentries here, alongside with returning the
- * negative dentries at f2fs_lookup(), when it is better
+ * negative dentries at f2fs_lookup(), when it is better
* supported by the VFS for the CI case.
*/
if (IS_CASEFOLDED(dir))
@@ -655,7 +720,7 @@
f2fs_handle_failed_inode(inode);
out_free_encrypted_link:
if (disk_link.name != (unsigned char *)symname)
- kvfree(disk_link.name);
+ kfree(disk_link.name);
return err;
}
@@ -679,7 +744,7 @@
inode->i_op = &f2fs_dir_inode_operations;
inode->i_fop = &f2fs_dir_operations;
inode->i_mapping->a_ops = &f2fs_dblock_aops;
- inode_nohighmem(inode);
+ mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS);
set_inode_flag(inode, FI_INC_LINK);
f2fs_lock_op(sbi);
@@ -829,12 +894,6 @@
if (!f2fs_is_checkpoint_ready(sbi))
return -ENOSPC;
- if (IS_ENCRYPTED(dir) || DUMMY_ENCRYPTION_ENABLED(sbi)) {
- int err = fscrypt_get_encryption_info(dir);
- if (err)
- return err;
- }
-
return __f2fs_tmpfile(dir, dentry, mode, NULL);
}
@@ -854,12 +913,11 @@
struct inode *old_inode = d_inode(old_dentry);
struct inode *new_inode = d_inode(new_dentry);
struct inode *whiteout = NULL;
- struct page *old_dir_page;
+ struct page *old_dir_page = NULL;
struct page *old_page, *new_page = NULL;
struct f2fs_dir_entry *old_dir_entry = NULL;
struct f2fs_dir_entry *old_entry;
struct f2fs_dir_entry *new_entry;
- bool is_old_inline = f2fs_has_inline_dentry(old_dir);
int err;
if (unlikely(f2fs_cp_error(sbi)))
@@ -872,6 +930,20 @@
F2FS_I(old_dentry->d_inode)->i_projid)))
return -EXDEV;
+ /*
+ * If new_inode is null, the below renaming flow will
+ * add a link in old_dir which can conver inline_dir.
+ * After then, if we failed to get the entry due to other
+ * reasons like ENOMEM, we had to remove the new entry.
+ * Instead of adding such the error handling routine, let's
+ * simply convert first here.
+ */
+ if (old_dir == new_dir && !new_inode) {
+ err = f2fs_try_convert_inline_dir(old_dir, new_dentry);
+ if (err)
+ return err;
+ }
+
if (flags & RENAME_WHITEOUT) {
err = f2fs_create_whiteout(old_dir, &whiteout);
if (err)
@@ -933,6 +1005,7 @@
goto put_out_dir;
f2fs_set_link(new_dir, new_entry, new_page, old_inode);
+ new_page = NULL;
new_inode->i_ctime = current_time(new_inode);
down_write(&F2FS_I(new_inode)->i_sem);
@@ -958,28 +1031,6 @@
if (old_dir_entry)
f2fs_i_links_write(new_dir, true);
-
- /*
- * old entry and new entry can locate in the same inline
- * dentry in inode, when attaching new entry in inline dentry,
- * it could force inline dentry conversion, after that,
- * old_entry and old_page will point to wrong address, in
- * order to avoid this, let's do the check and update here.
- */
- if (is_old_inline && !f2fs_has_inline_dentry(old_dir)) {
- f2fs_put_page(old_page, 0);
- old_page = NULL;
-
- old_entry = f2fs_find_entry(old_dir,
- &old_dentry->d_name, &old_page);
- if (!old_entry) {
- err = -ENOENT;
- if (IS_ERR(old_page))
- err = PTR_ERR(old_page);
- f2fs_unlock_op(sbi);
- goto out_dir;
- }
- }
}
down_write(&F2FS_I(old_inode)->i_sem);
@@ -994,6 +1045,7 @@
f2fs_mark_inode_dirty_sync(old_inode, false);
f2fs_delete_entry(old_entry, old_page, old_dir, NULL);
+ old_page = NULL;
if (whiteout) {
set_inode_flag(whiteout, FI_INC_LINK);
@@ -1033,8 +1085,7 @@
put_out_dir:
f2fs_unlock_op(sbi);
- if (new_page)
- f2fs_put_page(new_page, 0);
+ f2fs_put_page(new_page, 0);
out_dir:
if (old_dir_entry)
f2fs_put_page(old_dir_page, 0);
@@ -1266,12 +1317,10 @@
}
const struct inode_operations f2fs_encrypted_symlink_inode_operations = {
- .get_link = f2fs_encrypted_get_link,
+ .get_link = f2fs_encrypted_get_link,
.getattr = f2fs_encrypted_symlink_getattr,
.setattr = f2fs_setattr,
-#ifdef CONFIG_F2FS_FS_XATTR
.listxattr = f2fs_listxattr,
-#endif
};
const struct inode_operations f2fs_dir_inode_operations = {
@@ -1289,27 +1338,21 @@
.setattr = f2fs_setattr,
.get_acl = f2fs_get_acl,
.set_acl = f2fs_set_acl,
-#ifdef CONFIG_F2FS_FS_XATTR
.listxattr = f2fs_listxattr,
-#endif
.fiemap = f2fs_fiemap,
};
const struct inode_operations f2fs_symlink_inode_operations = {
- .get_link = f2fs_get_link,
+ .get_link = f2fs_get_link,
.getattr = f2fs_getattr,
.setattr = f2fs_setattr,
-#ifdef CONFIG_F2FS_FS_XATTR
.listxattr = f2fs_listxattr,
-#endif
};
const struct inode_operations f2fs_special_inode_operations = {
.getattr = f2fs_getattr,
- .setattr = f2fs_setattr,
+ .setattr = f2fs_setattr,
.get_acl = f2fs_get_acl,
.set_acl = f2fs_set_acl,
-#ifdef CONFIG_F2FS_FS_XATTR
.listxattr = f2fs_listxattr,
-#endif
};
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 4cb182c..7e62580 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -514,9 +514,6 @@
return nr - nr_shrink;
}
-/*
- * This function always returns success
- */
int f2fs_get_node_info(struct f2fs_sb_info *sbi, nid_t nid,
struct node_info *ni)
{
@@ -625,10 +622,10 @@
switch (dn->max_level) {
case 3:
base += 2 * indirect_blks;
- /* fall through */
+ fallthrough;
case 2:
base += 2 * direct_blks;
- /* fall through */
+ fallthrough;
case 1:
base += direct_index;
break;
@@ -720,8 +717,7 @@
/*
* Caller should call f2fs_put_dnode(dn).
* Also, it should grab and release a rwsem by calling f2fs_lock_op() and
- * f2fs_unlock_op() only if ro is not set RDONLY_NODE.
- * In the case of RDONLY_NODE, we don't need to care about mutex.
+ * f2fs_unlock_op() only if mode is set with ALLOC_NODE.
*/
int f2fs_get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode)
{
@@ -813,8 +809,7 @@
dn->nid = nids[level];
dn->ofs_in_node = offset[level];
dn->node_page = npage[level];
- dn->data_blkaddr = datablock_addr(dn->inode,
- dn->node_page, dn->ofs_in_node);
+ dn->data_blkaddr = f2fs_data_blkaddr(dn);
return 0;
release_pages:
@@ -879,7 +874,7 @@
/* get direct node */
page = f2fs_get_node_page(F2FS_I_SB(dn->inode), dn->nid);
- if (IS_ERR(page) && PTR_ERR(page) == -ENOENT)
+ if (PTR_ERR(page) == -ENOENT)
return 1;
else if (IS_ERR(page))
return PTR_ERR(page);
@@ -1050,8 +1045,10 @@
trace_f2fs_truncate_inode_blocks_enter(inode, from);
level = get_node_path(inode, from, offset, noffset);
- if (level < 0)
+ if (level < 0) {
+ trace_f2fs_truncate_inode_blocks_exit(inode, level);
return level;
+ }
page = f2fs_get_node_page(sbi, inode->i_ino);
if (IS_ERR(page)) {
@@ -1192,8 +1189,9 @@
}
if (unlikely(inode->i_blocks != 0 && inode->i_blocks != 8)) {
- f2fs_warn(F2FS_I_SB(inode), "Inconsistent i_blocks, ino:%lu, iblocks:%llu",
- inode->i_ino, (unsigned long long)inode->i_blocks);
+ f2fs_warn(F2FS_I_SB(inode),
+ "f2fs_remove_inode_page: inconsistent i_blocks, ino:%lu, iblocks:%llu",
+ inode->i_ino, (unsigned long long)inode->i_blocks);
set_sbi_flag(F2FS_I_SB(inode), SBI_NEED_FSCK);
}
@@ -1308,7 +1306,13 @@
}
fio.new_blkaddr = fio.old_blkaddr = ni.blk_addr;
- return f2fs_submit_page_bio(&fio);
+
+ err = f2fs_submit_page_bio(&fio);
+
+ if (!err)
+ f2fs_update_iostat(sbi, FS_NODE_READ_IO, F2FS_BLKSIZE);
+
+ return err;
}
/*
@@ -1385,6 +1389,7 @@
nid, nid_of_node(page), ino_of_node(page),
ofs_of_node(page), cpver_of_node(page),
next_blkaddr_of_node(page));
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
err = -EINVAL;
out_err:
ClearPageUptodate(page);
@@ -1522,8 +1527,15 @@
trace_f2fs_writepage(page, NODE);
- if (unlikely(f2fs_cp_error(sbi)))
+ if (unlikely(f2fs_cp_error(sbi))) {
+ if (is_sbi_flag_set(sbi, SBI_IS_CLOSE)) {
+ ClearPageUptodate(page);
+ dec_page_count(sbi, F2FS_DIRTY_NODES);
+ unlock_page(page);
+ return 0;
+ }
goto redirty_out;
+ }
if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
goto redirty_out;
@@ -1721,7 +1733,7 @@
set_dentry_mark(page,
f2fs_need_dentry_mark(sbi, ino));
}
- /* may be written by other thread */
+ /* may be written by other thread */
if (!PageDirty(page))
set_page_dirty(page);
}
@@ -1809,6 +1821,51 @@
return true;
}
+void f2fs_flush_inline_data(struct f2fs_sb_info *sbi)
+{
+ pgoff_t index = 0;
+ struct pagevec pvec;
+ int nr_pages;
+
+ pagevec_init(&pvec);
+
+ while ((nr_pages = pagevec_lookup_tag(&pvec,
+ NODE_MAPPING(sbi), &index, PAGECACHE_TAG_DIRTY))) {
+ int i;
+
+ for (i = 0; i < nr_pages; i++) {
+ struct page *page = pvec.pages[i];
+
+ if (!IS_DNODE(page))
+ continue;
+
+ lock_page(page);
+
+ if (unlikely(page->mapping != NODE_MAPPING(sbi))) {
+continue_unlock:
+ unlock_page(page);
+ continue;
+ }
+
+ if (!PageDirty(page)) {
+ /* someone wrote it for us */
+ goto continue_unlock;
+ }
+
+ /* flush inline_data, if it's async context. */
+ if (is_inline_node(page)) {
+ clear_inline_node(page);
+ unlock_page(page);
+ flush_inline_data(sbi, ino_of_node(page));
+ continue;
+ }
+ unlock_page(page);
+ }
+ pagevec_release(&pvec);
+ cond_resched();
+ }
+}
+
int f2fs_sync_node_pages(struct f2fs_sb_info *sbi,
struct writeback_control *wbc,
bool do_balance, enum iostat_type io_type)
@@ -1872,6 +1929,10 @@
goto continue_unlock;
}
+ /* flush inline_data/inode, if it's async context. */
+ if (!do_balance)
+ goto write_node;
+
/* flush inline_data */
if (is_inline_node(page)) {
clear_inline_node(page);
@@ -1886,7 +1947,7 @@
if (flush_dirty_inode(page))
goto lock_node;
}
-
+write_node:
f2fs_wait_on_page_writeback(page, NODE, true, true);
if (!clear_page_dirty_for_io(page))
@@ -1984,7 +2045,7 @@
goto skip_write;
/* balancing f2fs's metadata in background */
- f2fs_balance_fs_bg(sbi);
+ f2fs_balance_fs_bg(sbi, true);
/* collect a number of dirty node pages and write together */
if (wbc->sync_mode != WB_SYNC_ALL &&
@@ -2045,7 +2106,7 @@
.invalidatepage = f2fs_invalidate_page,
.releasepage = f2fs_release_page,
#ifdef CONFIG_MIGRATION
- .migratepage = f2fs_migrate_page,
+ .migratepage = f2fs_migrate_page,
#endif
};
@@ -2056,7 +2117,7 @@
}
static int __insert_free_nid(struct f2fs_sb_info *sbi,
- struct free_nid *i, enum nid_state state)
+ struct free_nid *i)
{
struct f2fs_nm_info *nm_i = NM_I(sbi);
@@ -2064,10 +2125,8 @@
if (err)
return err;
- f2fs_bug_on(sbi, state != i->state);
- nm_i->nid_cnt[state]++;
- if (state == FREE_NID)
- list_add_tail(&i->list, &nm_i->free_nid_list);
+ nm_i->nid_cnt[FREE_NID]++;
+ list_add_tail(&i->list, &nm_i->free_nid_list);
return 0;
}
@@ -2189,7 +2248,7 @@
}
}
ret = true;
- err = __insert_free_nid(sbi, i, FREE_NID);
+ err = __insert_free_nid(sbi, i);
err_out:
if (update) {
update_free_nid_bitmap(sbi, nid, ret, build);
@@ -2493,7 +2552,6 @@
int f2fs_try_to_free_nids(struct f2fs_sb_info *sbi, int nr_shrink)
{
struct f2fs_nm_info *nm_i = NM_I(sbi);
- struct free_nid *i, *next;
int nr = nr_shrink;
if (nm_i->nid_cnt[FREE_NID] <= MAX_FREE_NIDS)
@@ -2502,17 +2560,23 @@
if (!mutex_trylock(&nm_i->build_lock))
return 0;
- spin_lock(&nm_i->nid_list_lock);
- list_for_each_entry_safe(i, next, &nm_i->free_nid_list, list) {
- if (nr_shrink <= 0 ||
- nm_i->nid_cnt[FREE_NID] <= MAX_FREE_NIDS)
- break;
+ while (nr_shrink && nm_i->nid_cnt[FREE_NID] > MAX_FREE_NIDS) {
+ struct free_nid *i, *next;
+ unsigned int batch = SHRINK_NID_BATCH_SIZE;
- __remove_free_nid(sbi, i, FREE_NID);
- kmem_cache_free(free_nid_slab, i);
- nr_shrink--;
+ spin_lock(&nm_i->nid_list_lock);
+ list_for_each_entry_safe(i, next, &nm_i->free_nid_list, list) {
+ if (!nr_shrink || !batch ||
+ nm_i->nid_cnt[FREE_NID] <= MAX_FREE_NIDS)
+ break;
+ __remove_free_nid(sbi, i, FREE_NID);
+ kmem_cache_free(free_nid_slab, i);
+ nr_shrink--;
+ batch--;
+ }
+ spin_unlock(&nm_i->nid_list_lock);
}
- spin_unlock(&nm_i->nid_list_lock);
+
mutex_unlock(&nm_i->build_lock);
return nr - nr_shrink;
@@ -2612,7 +2676,7 @@
retry:
ipage = f2fs_grab_cache_page(NODE_MAPPING(sbi), ino, false);
if (!ipage) {
- congestion_wait(BLK_RW_ASYNC, HZ/50);
+ congestion_wait(BLK_RW_ASYNC, DEFAULT_IO_TIMEOUT);
goto retry;
}
@@ -3052,9 +3116,6 @@
nm_i->next_scan_nid = le32_to_cpu(sbi->ckpt->next_free_nid);
nm_i->bitmap_size = __bitmap_size(sbi, NAT_BITMAP);
version_bitmap = __bitmap_ptr(sbi, NAT_BITMAP);
- if (!version_bitmap)
- return -EFAULT;
-
nm_i->nat_bitmap = kmemdup(version_bitmap, nm_i->bitmap_size,
GFP_KERNEL);
if (!nm_i->nat_bitmap)
@@ -3204,27 +3265,27 @@
kvfree(nm_i->nat_bitmap_mir);
#endif
sbi->nm_info = NULL;
- kvfree(nm_i);
+ kfree(nm_i);
}
int __init f2fs_create_node_manager_caches(void)
{
- nat_entry_slab = f2fs_kmem_cache_create("nat_entry",
+ nat_entry_slab = f2fs_kmem_cache_create("f2fs_nat_entry",
sizeof(struct nat_entry));
if (!nat_entry_slab)
goto fail;
- free_nid_slab = f2fs_kmem_cache_create("free_nid",
+ free_nid_slab = f2fs_kmem_cache_create("f2fs_free_nid",
sizeof(struct free_nid));
if (!free_nid_slab)
goto destroy_nat_entry;
- nat_entry_set_slab = f2fs_kmem_cache_create("nat_entry_set",
+ nat_entry_set_slab = f2fs_kmem_cache_create("f2fs_nat_entry_set",
sizeof(struct nat_entry_set));
if (!nat_entry_set_slab)
goto destroy_free_nid;
- fsync_node_entry_slab = f2fs_kmem_cache_create("fsync_node_entry",
+ fsync_node_entry_slab = f2fs_kmem_cache_create("f2fs_fsync_node_entry",
sizeof(struct fsync_node_entry));
if (!fsync_node_entry_slab)
goto destroy_nat_entry_set;
diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h
index 4a2e7ea..f84541b 100644
--- a/fs/f2fs/node.h
+++ b/fs/f2fs/node.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* fs/f2fs/node.h
*
@@ -15,6 +15,9 @@
#define FREE_NID_PAGES 8
#define MAX_FREE_NIDS (NAT_ENTRY_PER_BLOCK * FREE_NID_PAGES)
+/* size of free nid batch when shrinking */
+#define SHRINK_NID_BATCH_SIZE 8
+
#define DEF_RA_NID_PAGES 0 /* # of nid pages to be readaheaded */
/* maximum readahead size for node during getting data blocks */
diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
index 5f230e9..72ce131 100644
--- a/fs/f2fs/recovery.c
+++ b/fs/f2fs/recovery.c
@@ -107,13 +107,51 @@
kmem_cache_free(fsync_entry_slab, entry);
}
+static int init_recovered_filename(const struct inode *dir,
+ struct f2fs_inode *raw_inode,
+ struct f2fs_filename *fname,
+ struct qstr *usr_fname)
+{
+ int err;
+
+ memset(fname, 0, sizeof(*fname));
+ fname->disk_name.len = le32_to_cpu(raw_inode->i_namelen);
+ fname->disk_name.name = raw_inode->i_name;
+
+ if (WARN_ON(fname->disk_name.len > F2FS_NAME_LEN))
+ return -ENAMETOOLONG;
+
+ if (!IS_ENCRYPTED(dir)) {
+ usr_fname->name = fname->disk_name.name;
+ usr_fname->len = fname->disk_name.len;
+ fname->usr_fname = usr_fname;
+ }
+
+ /* Compute the hash of the filename */
+ if (IS_CASEFOLDED(dir)) {
+ err = f2fs_init_casefolded_name(dir, fname);
+ if (err)
+ return err;
+ f2fs_hash_filename(dir, fname);
+#ifdef CONFIG_UNICODE
+ /* Case-sensitive match is fine for recovery */
+ kfree(fname->cf_name.name);
+ fname->cf_name.name = NULL;
+#endif
+ } else {
+ f2fs_hash_filename(dir, fname);
+ }
+ return 0;
+}
+
static int recover_dentry(struct inode *inode, struct page *ipage,
struct list_head *dir_list)
{
struct f2fs_inode *raw_inode = F2FS_INODE(ipage);
nid_t pino = le32_to_cpu(raw_inode->i_pino);
struct f2fs_dir_entry *de;
- struct fscrypt_name fname;
+ struct f2fs_filename fname;
+ struct qstr usr_fname;
struct page *page;
struct inode *dir, *einode;
struct fsync_inode_entry *entry;
@@ -132,16 +170,9 @@
}
dir = entry->inode;
-
- memset(&fname, 0, sizeof(struct fscrypt_name));
- fname.disk_name.len = le32_to_cpu(raw_inode->i_namelen);
- fname.disk_name.name = raw_inode->i_name;
-
- if (unlikely(fname.disk_name.len > F2FS_NAME_LEN)) {
- WARN_ON(1);
- err = -ENAMETOOLONG;
+ err = init_recovered_filename(dir, raw_inode, &fname, &usr_fname);
+ if (err)
goto out;
- }
retry:
de = __f2fs_find_entry(dir, &fname, &page);
if (de && inode->i_ino == le32_to_cpu(de->ino))
@@ -496,8 +527,7 @@
return 0;
truncate_out:
- if (datablock_addr(tdn.inode, tdn.node_page,
- tdn.ofs_in_node) == blkaddr)
+ if (f2fs_data_blkaddr(&tdn) == blkaddr)
f2fs_truncate_data_blocks_range(&tdn, 1);
if (dn->inode->i_ino == nid && !dn->inode_page_locked)
unlock_page(dn->inode_page);
@@ -541,7 +571,7 @@
err = f2fs_get_dnode_of_data(&dn, start, ALLOC_NODE);
if (err) {
if (err == -ENOMEM) {
- congestion_wait(BLK_RW_ASYNC, HZ/50);
+ congestion_wait(BLK_RW_ASYNC, DEFAULT_IO_TIMEOUT);
goto retry_dn;
}
goto out;
@@ -566,8 +596,8 @@
for (; start < end; start++, dn.ofs_in_node++) {
block_t src, dest;
- src = datablock_addr(dn.inode, dn.node_page, dn.ofs_in_node);
- dest = datablock_addr(dn.inode, page, dn.ofs_in_node);
+ src = f2fs_data_blkaddr(&dn);
+ dest = data_blkaddr(dn.inode, page, dn.ofs_in_node);
if (__is_valid_data_blkaddr(src) &&
!f2fs_is_valid_blkaddr(sbi, src, META_POR)) {
@@ -624,7 +654,8 @@
err = check_index_in_prev_nodes(sbi, dest, &dn);
if (err) {
if (err == -ENOMEM) {
- congestion_wait(BLK_RW_ASYNC, HZ/50);
+ congestion_wait(BLK_RW_ASYNC,
+ DEFAULT_IO_TIMEOUT);
goto retry_prev;
}
goto err;
@@ -729,6 +760,7 @@
int ret = 0;
unsigned long s_flags = sbi->sb->s_flags;
bool need_writecp = false;
+ bool fix_curseg_write_pointer = false;
#ifdef CONFIG_QUOTA
int quota_enabled;
#endif
@@ -745,13 +777,6 @@
quota_enabled = f2fs_enable_quota_files(sbi, s_flags & SB_RDONLY);
#endif
- fsync_entry_slab = f2fs_kmem_cache_create("f2fs_fsync_inode_entry",
- sizeof(struct fsync_inode_entry));
- if (!fsync_entry_slab) {
- err = -ENOMEM;
- goto out;
- }
-
INIT_LIST_HEAD(&inode_list);
INIT_LIST_HEAD(&tmp_inode_list);
INIT_LIST_HEAD(&dir_list);
@@ -780,6 +805,8 @@
sbi->sb->s_flags = s_flags;
}
skip:
+ fix_curseg_write_pointer = !check_only || list_empty(&inode_list);
+
destroy_fsync_dnodes(&inode_list, err);
destroy_fsync_dnodes(&tmp_inode_list, err);
@@ -790,9 +817,22 @@
if (err) {
truncate_inode_pages_final(NODE_MAPPING(sbi));
truncate_inode_pages_final(META_MAPPING(sbi));
- } else {
- clear_sbi_flag(sbi, SBI_POR_DOING);
}
+
+ /*
+ * If fsync data succeeds or there is no fsync data to recover,
+ * and the f2fs is not read only, check and fix zoned block devices'
+ * write pointer consistency.
+ */
+ if (!err && fix_curseg_write_pointer && !f2fs_readonly(sbi->sb) &&
+ f2fs_sb_has_blkzoned(sbi)) {
+ err = f2fs_fix_curseg_write_pointer(sbi);
+ ret = err;
+ }
+
+ if (!err)
+ clear_sbi_flag(sbi, SBI_POR_DOING);
+
mutex_unlock(&sbi->cp_mutex);
/* let's drop all the directory inodes for clean checkpoint */
@@ -809,8 +849,6 @@
}
}
- kmem_cache_destroy(fsync_entry_slab);
-out:
#ifdef CONFIG_QUOTA
/* Turn quotas off */
if (quota_enabled)
@@ -820,3 +858,17 @@
return ret ? ret: err;
}
+
+int __init f2fs_create_recovery_cache(void)
+{
+ fsync_entry_slab = f2fs_kmem_cache_create("f2fs_fsync_inode_entry",
+ sizeof(struct fsync_inode_entry));
+ if (!fsync_entry_slab)
+ return -ENOMEM;
+ return 0;
+}
+
+void f2fs_destroy_recovery_cache(void)
+{
+ kmem_cache_destroy(fsync_entry_slab);
+}
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 78c54bb..d04b449 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -172,9 +172,9 @@
int dent_secs = get_blocktype_secs(sbi, F2FS_DIRTY_DENTS);
int imeta_secs = get_blocktype_secs(sbi, F2FS_DIRTY_IMETA);
- if (test_opt(sbi, LFS))
+ if (f2fs_lfs_mode(sbi))
return false;
- if (sbi->gc_mode == GC_URGENT)
+ if (sbi->gc_mode == GC_URGENT_HIGH)
return true;
if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
return true;
@@ -189,7 +189,7 @@
f2fs_trace_pid(page);
- f2fs_set_page_private(page, (unsigned long)ATOMIC_WRITTEN_PAGE);
+ f2fs_set_page_private(page, ATOMIC_WRITTEN_PAGE);
new = f2fs_kmem_cache_alloc(inmem_entry_slab, GFP_NOFS);
@@ -245,7 +245,8 @@
LOOKUP_NODE);
if (err) {
if (err == -ENOMEM) {
- congestion_wait(BLK_RW_ASYNC, HZ/50);
+ congestion_wait(BLK_RW_ASYNC,
+ DEFAULT_IO_TIMEOUT);
cond_resched();
goto retry;
}
@@ -312,7 +313,7 @@
skip:
iput(inode);
}
- congestion_wait(BLK_RW_ASYNC, HZ/50);
+ congestion_wait(BLK_RW_ASYNC, DEFAULT_IO_TIMEOUT);
cond_resched();
if (gc_failure) {
if (++looped >= count)
@@ -326,24 +327,27 @@
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct f2fs_inode_info *fi = F2FS_I(inode);
- while (!list_empty(&fi->inmem_pages)) {
+ do {
mutex_lock(&fi->inmem_lock);
+ if (list_empty(&fi->inmem_pages)) {
+ fi->i_gc_failures[GC_FAILURE_ATOMIC] = 0;
+
+ spin_lock(&sbi->inode_lock[ATOMIC_FILE]);
+ if (!list_empty(&fi->inmem_ilist))
+ list_del_init(&fi->inmem_ilist);
+ if (f2fs_is_atomic_file(inode)) {
+ clear_inode_flag(inode, FI_ATOMIC_FILE);
+ sbi->atomic_files--;
+ }
+ spin_unlock(&sbi->inode_lock[ATOMIC_FILE]);
+
+ mutex_unlock(&fi->inmem_lock);
+ break;
+ }
__revoke_inmem_pages(inode, &fi->inmem_pages,
true, false, true);
mutex_unlock(&fi->inmem_lock);
- }
-
- fi->i_gc_failures[GC_FAILURE_ATOMIC] = 0;
- stat_dec_atomic_write(inode);
-
- spin_lock(&sbi->inode_lock[ATOMIC_FILE]);
- if (!list_empty(&fi->inmem_ilist))
- list_del_init(&fi->inmem_ilist);
- if (f2fs_is_atomic_file(inode)) {
- clear_inode_flag(inode, FI_ATOMIC_FILE);
- sbi->atomic_files--;
- }
- spin_unlock(&sbi->inode_lock[ATOMIC_FILE]);
+ } while (1);
}
void f2fs_drop_inmem_page(struct inode *inode, struct page *page)
@@ -416,7 +420,8 @@
err = f2fs_do_write_data_page(&fio);
if (err) {
if (err == -ENOMEM) {
- congestion_wait(BLK_RW_ASYNC, HZ/50);
+ congestion_wait(BLK_RW_ASYNC,
+ DEFAULT_IO_TIMEOUT);
cond_resched();
goto retry;
}
@@ -495,7 +500,7 @@
/* balance_fs_bg is able to be pending */
if (need && excess_cached_nats(sbi))
- f2fs_balance_fs_bg(sbi);
+ f2fs_balance_fs_bg(sbi, false);
if (!f2fs_is_checkpoint_ready(sbi))
return;
@@ -505,12 +510,12 @@
* dir/node pages without enough free segments.
*/
if (has_not_enough_free_secs(sbi, 0, 0)) {
- mutex_lock(&sbi->gc_mutex);
- f2fs_gc(sbi, false, false, NULL_SEGNO);
+ down_write(&sbi->gc_lock);
+ f2fs_gc(sbi, false, false, false, NULL_SEGNO);
}
}
-void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi)
+void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi, bool from_bg)
{
if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
return;
@@ -539,7 +544,7 @@
excess_dirty_nats(sbi) ||
excess_dirty_nodes(sbi) ||
f2fs_time_over(sbi, CP_TIME)) {
- if (test_opt(sbi, DATA_FLUSH)) {
+ if (test_opt(sbi, DATA_FLUSH) && from_bg) {
struct blk_plug plug;
mutex_lock(&sbi->flush_lock);
@@ -727,7 +732,7 @@
"f2fs_flush-%u:%u", MAJOR(dev), MINOR(dev));
if (IS_ERR(fcc->f2fs_issue_flush)) {
err = PTR_ERR(fcc->f2fs_issue_flush);
- kvfree(fcc);
+ kfree(fcc);
SM_I(sbi)->fcc_info = NULL;
return err;
}
@@ -746,7 +751,7 @@
kthread_stop(flush_thread);
}
if (free) {
- kvfree(fcc);
+ kfree(fcc);
SM_I(sbi)->fcc_info = NULL;
}
}
@@ -758,6 +763,9 @@
if (!f2fs_is_multi_device(sbi))
return 0;
+ if (test_opt(sbi, NOBARRIER))
+ return 0;
+
for (i = 1; i < sbi->s_ndevs; i++) {
if (!f2fs_test_bit(i, (char *)&sbi->dirty_device))
continue;
@@ -795,6 +803,18 @@
}
if (!test_and_set_bit(segno, dirty_i->dirty_segmap[t]))
dirty_i->nr_dirty[t]++;
+
+ if (__is_large_section(sbi)) {
+ unsigned int secno = GET_SEC_FROM_SEG(sbi, segno);
+ block_t valid_blocks =
+ get_valid_blocks(sbi, segno, true);
+
+ f2fs_bug_on(sbi, unlikely(!valid_blocks ||
+ valid_blocks == BLKS_PER_SEC(sbi)));
+
+ if (!IS_CURSEC(sbi, secno))
+ set_bit(secno, dirty_i->dirty_secmap);
+ }
}
}
@@ -802,6 +822,7 @@
enum dirty_type dirty_type)
{
struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
+ block_t valid_blocks;
if (test_and_clear_bit(segno, dirty_i->dirty_segmap[dirty_type]))
dirty_i->nr_dirty[dirty_type]--;
@@ -813,13 +834,26 @@
if (test_and_clear_bit(segno, dirty_i->dirty_segmap[t]))
dirty_i->nr_dirty[t]--;
- if (get_valid_blocks(sbi, segno, true) == 0) {
+ valid_blocks = get_valid_blocks(sbi, segno, true);
+ if (valid_blocks == 0) {
clear_bit(GET_SEC_FROM_SEG(sbi, segno),
dirty_i->victim_secmap);
#ifdef CONFIG_F2FS_CHECK_FS
clear_bit(segno, SIT_I(sbi)->invalid_segmap);
#endif
}
+ if (__is_large_section(sbi)) {
+ unsigned int secno = GET_SEC_FROM_SEG(sbi, segno);
+
+ if (!valid_blocks ||
+ valid_blocks == BLKS_PER_SEC(sbi)) {
+ clear_bit(secno, dirty_i->dirty_secmap);
+ return;
+ }
+
+ if (!IS_CURSEC(sbi, secno))
+ set_bit(secno, dirty_i->dirty_secmap);
+ }
}
}
@@ -832,20 +866,22 @@
{
struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
unsigned short valid_blocks, ckpt_valid_blocks;
+ unsigned int usable_blocks;
if (segno == NULL_SEGNO || IS_CURSEG(sbi, segno))
return;
+ usable_blocks = f2fs_usable_blks_in_seg(sbi, segno);
mutex_lock(&dirty_i->seglist_lock);
valid_blocks = get_valid_blocks(sbi, segno, false);
- ckpt_valid_blocks = get_ckpt_valid_blocks(sbi, segno);
+ ckpt_valid_blocks = get_ckpt_valid_blocks(sbi, segno, false);
if (valid_blocks == 0 && (!is_sbi_flag_set(sbi, SBI_CP_DISABLED) ||
- ckpt_valid_blocks == sbi->blocks_per_seg)) {
+ ckpt_valid_blocks == usable_blocks)) {
__locate_dirty_segment(sbi, segno, PRE);
__remove_dirty_segment(sbi, segno, DIRTY);
- } else if (valid_blocks < sbi->blocks_per_seg) {
+ } else if (valid_blocks < usable_blocks) {
__locate_dirty_segment(sbi, segno, DIRTY);
} else {
/* Recovery routine with SSR needs this */
@@ -888,9 +924,11 @@
for_each_set_bit(segno, dirty_i->dirty_segmap[DIRTY], MAIN_SEGS(sbi)) {
se = get_seg_entry(sbi, segno);
if (IS_NODESEG(se->type))
- holes[NODE] += sbi->blocks_per_seg - se->valid_blocks;
+ holes[NODE] += f2fs_usable_blks_in_seg(sbi, segno) -
+ se->valid_blocks;
else
- holes[DATA] += sbi->blocks_per_seg - se->valid_blocks;
+ holes[DATA] += f2fs_usable_blks_in_seg(sbi, segno) -
+ se->valid_blocks;
}
mutex_unlock(&dirty_i->seglist_lock);
@@ -922,7 +960,7 @@
for_each_set_bit(segno, dirty_i->dirty_segmap[DIRTY], MAIN_SEGS(sbi)) {
if (get_valid_blocks(sbi, segno, false))
continue;
- if (get_ckpt_valid_blocks(sbi, segno))
+ if (get_ckpt_valid_blocks(sbi, segno, false))
continue;
mutex_unlock(&dirty_i->seglist_lock);
return segno;
@@ -1028,9 +1066,9 @@
struct discard_cmd *dc = (struct discard_cmd *)bio->bi_private;
unsigned long flags;
- dc->error = blk_status_to_errno(bio->bi_status);
-
spin_lock_irqsave(&dc->lock, flags);
+ if (!dc->error)
+ dc->error = blk_status_to_errno(bio->bi_status);
dc->bio_ref--;
if (!dc->bio_ref && dc->state == D_SUBMIT) {
dc->state = D_DONE;
@@ -1079,7 +1117,7 @@
dpolicy->max_requests = DEF_MAX_DISCARD_REQUEST;
dpolicy->io_aware_gran = MAX_PLIST_NUM;
- dpolicy->timeout = 0;
+ dpolicy->timeout = false;
if (discard_type == DPOLICY_BG) {
dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME;
@@ -1100,10 +1138,10 @@
} else if (discard_type == DPOLICY_FSTRIM) {
dpolicy->io_aware = false;
} else if (discard_type == DPOLICY_UMOUNT) {
- dpolicy->max_requests = UINT_MAX;
dpolicy->io_aware = false;
/* we need to issue all to keep CP_TRIMMED_FLAG */
dpolicy->granularity = 1;
+ dpolicy->timeout = true;
}
}
@@ -1213,12 +1251,14 @@
len = total_len;
}
- if (!err && len)
+ if (!err && len) {
+ dcc->undiscard_blks -= len;
__update_discard_tree_range(sbi, bdev, lstart, start, len);
+ }
return err;
}
-static struct discard_cmd *__insert_discard_tree(struct f2fs_sb_info *sbi,
+static void __insert_discard_tree(struct f2fs_sb_info *sbi,
struct block_device *bdev, block_t lstart,
block_t start, block_t len,
struct rb_node **insert_p,
@@ -1227,7 +1267,6 @@
struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
struct rb_node **p;
struct rb_node *parent = NULL;
- struct discard_cmd *dc = NULL;
bool leftmost = true;
if (insert_p && insert_parent) {
@@ -1239,12 +1278,8 @@
p = f2fs_lookup_rb_tree_for_insert(sbi, &dcc->root, &parent,
lstart, &leftmost);
do_insert:
- dc = __attach_discard_cmd(sbi, bdev, lstart, start, len, parent,
+ __attach_discard_cmd(sbi, bdev, lstart, start, len, parent,
p, leftmost);
- if (!dc)
- return NULL;
-
- return dc;
}
static void __relocate_discard_cmd(struct discard_cmd_control *dcc,
@@ -1461,6 +1496,8 @@
return issued;
}
+static unsigned int __wait_all_discard_cmd(struct f2fs_sb_info *sbi,
+ struct discard_policy *dpolicy);
static int __issue_discard_cmd(struct f2fs_sb_info *sbi,
struct discard_policy *dpolicy)
@@ -1469,15 +1506,17 @@
struct list_head *pend_list;
struct discard_cmd *dc, *tmp;
struct blk_plug plug;
- int i, issued = 0;
+ int i, issued;
bool io_interrupted = false;
- if (dpolicy->timeout != 0)
- f2fs_update_time(sbi, dpolicy->timeout);
+ if (dpolicy->timeout)
+ f2fs_update_time(sbi, UMOUNT_DISCARD_TIMEOUT);
+retry:
+ issued = 0;
for (i = MAX_PLIST_NUM - 1; i >= 0; i--) {
- if (dpolicy->timeout != 0 &&
- f2fs_time_over(sbi, dpolicy->timeout))
+ if (dpolicy->timeout &&
+ f2fs_time_over(sbi, UMOUNT_DISCARD_TIMEOUT))
break;
if (i + 1 < dpolicy->granularity)
@@ -1493,13 +1532,13 @@
goto next;
if (unlikely(dcc->rbtree_check))
f2fs_bug_on(sbi, !f2fs_check_rb_tree_consistence(sbi,
- &dcc->root));
+ &dcc->root, false));
blk_start_plug(&plug);
list_for_each_entry_safe(dc, tmp, pend_list, list) {
f2fs_bug_on(sbi, dc->state != D_PREP);
- if (dpolicy->timeout != 0 &&
- f2fs_time_over(sbi, dpolicy->timeout))
+ if (dpolicy->timeout &&
+ f2fs_time_over(sbi, UMOUNT_DISCARD_TIMEOUT))
break;
if (dpolicy->io_aware && i < dpolicy->io_aware_gran &&
@@ -1521,6 +1560,11 @@
break;
}
+ if (dpolicy->type == DPOLICY_UMOUNT && issued) {
+ __wait_all_discard_cmd(sbi, dpolicy);
+ goto retry;
+ }
+
if (!issued && io_interrupted)
issued = -1;
@@ -1678,7 +1722,6 @@
__init_discard_policy(sbi, &dpolicy, DPOLICY_UMOUNT,
dcc->discard_granularity);
- dpolicy.timeout = UMOUNT_DISCARD_TIMEOUT;
__issue_discard_cmd(sbi, &dpolicy);
dropped = __drop_discard_cmd(sbi);
@@ -1727,7 +1770,7 @@
continue;
}
- if (sbi->gc_mode == GC_URGENT)
+ if (sbi->gc_mode == GC_URGENT_HIGH)
__init_discard_policy(sbi, &dpolicy, DPOLICY_FORCE, 1);
sb_start_intwrite(sbi->sb);
@@ -1781,7 +1824,8 @@
return -EIO;
}
trace_f2fs_issue_reset_zone(bdev, blkstart);
- return blkdev_reset_zones(bdev, sector, nr_sects, GFP_NOFS);
+ return blkdev_zone_mgmt(bdev, REQ_OP_ZONE_RESET,
+ sector, nr_sects, GFP_NOFS);
}
/* For conventional zones, use regular discard if supported */
@@ -1925,7 +1969,7 @@
mutex_lock(&dirty_i->seglist_lock);
for_each_set_bit(segno, dirty_i->dirty_segmap[PRE], MAIN_SEGS(sbi))
- __set_test_and_free(sbi, segno);
+ __set_test_and_free(sbi, segno, false);
mutex_unlock(&dirty_i->seglist_lock);
}
@@ -1940,7 +1984,7 @@
unsigned int start = 0, end = -1;
unsigned int secno, start_segno;
bool force = (cpc->reason & CP_DISCARD);
- bool need_align = test_opt(sbi, LFS) && __is_large_section(sbi);
+ bool need_align = f2fs_lfs_mode(sbi) && __is_large_section(sbi);
mutex_lock(&dirty_i->seglist_lock);
@@ -1972,7 +2016,7 @@
(end - 1) <= cpc->trim_end)
continue;
- if (!test_opt(sbi, LFS) || !__is_large_section(sbi)) {
+ if (!f2fs_lfs_mode(sbi) || !__is_large_section(sbi)) {
f2fs_issue_discard(sbi, START_BLOCK(sbi, start),
(end - start) << sbi->log_blocks_per_seg);
continue;
@@ -2068,7 +2112,7 @@
"f2fs_discard-%u:%u", MAJOR(dev), MINOR(dev));
if (IS_ERR(dcc->f2fs_issue_discard)) {
err = PTR_ERR(dcc->f2fs_issue_discard);
- kvfree(dcc);
+ kfree(dcc);
SM_I(sbi)->dcc_info = NULL;
return err;
}
@@ -2092,7 +2136,7 @@
if (unlikely(atomic_read(&dcc->discard_cmd_cnt)))
f2fs_issue_discard_timeout(sbi);
- kvfree(dcc);
+ kfree(dcc);
SM_I(sbi)->dcc_info = NULL;
}
@@ -2117,6 +2161,39 @@
__mark_sit_entry_dirty(sbi, segno);
}
+static inline unsigned long long get_segment_mtime(struct f2fs_sb_info *sbi,
+ block_t blkaddr)
+{
+ unsigned int segno = GET_SEGNO(sbi, blkaddr);
+
+ if (segno == NULL_SEGNO)
+ return 0;
+ return get_seg_entry(sbi, segno)->mtime;
+}
+
+static void update_segment_mtime(struct f2fs_sb_info *sbi, block_t blkaddr,
+ unsigned long long old_mtime)
+{
+ struct seg_entry *se;
+ unsigned int segno = GET_SEGNO(sbi, blkaddr);
+ unsigned long long ctime = get_mtime(sbi, false);
+ unsigned long long mtime = old_mtime ? old_mtime : ctime;
+
+ if (segno == NULL_SEGNO)
+ return;
+
+ se = get_seg_entry(sbi, segno);
+
+ if (!se->mtime)
+ se->mtime = mtime;
+ else
+ se->mtime = div_u64(se->mtime * se->valid_blocks + mtime,
+ se->valid_blocks + 1);
+
+ if (ctime > SIT_I(sbi)->max_mtime)
+ SIT_I(sbi)->max_mtime = ctime;
+}
+
static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del)
{
struct seg_entry *se;
@@ -2133,13 +2210,10 @@
new_vblocks = se->valid_blocks + del;
offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr);
- f2fs_bug_on(sbi, (new_vblocks >> (sizeof(unsigned short) << 3) ||
- (new_vblocks > sbi->blocks_per_seg)));
+ f2fs_bug_on(sbi, (new_vblocks < 0 ||
+ (new_vblocks > f2fs_usable_blks_in_seg(sbi, segno))));
se->valid_blocks = new_vblocks;
- se->mtime = get_mtime(sbi, false);
- if (se->mtime > SIT_I(sbi)->max_mtime)
- SIT_I(sbi)->max_mtime = se->mtime;
/* Update valid block bitmap */
if (del > 0) {
@@ -2224,7 +2298,7 @@
struct sit_info *sit_i = SIT_I(sbi);
f2fs_bug_on(sbi, addr == NULL_ADDR);
- if (addr == NEW_ADDR)
+ if (addr == NEW_ADDR || addr == COMPRESS_ADDR)
return;
invalidate_mapping_pages(META_MAPPING(sbi), addr, addr);
@@ -2232,6 +2306,7 @@
/* add it into sit main buffer */
down_write(&sit_i->sentry_lock);
+ update_segment_mtime(sbi, addr, 0);
update_sit_entry(sbi, addr, -1);
/* add it into dirty seglist */
@@ -2358,9 +2433,9 @@
f2fs_put_page(page, 1);
}
-static int is_next_segment_free(struct f2fs_sb_info *sbi, int type)
+static int is_next_segment_free(struct f2fs_sb_info *sbi,
+ struct curseg_info *curseg, int type)
{
- struct curseg_info *curseg = CURSEG_I(sbi, type);
unsigned int segno = curseg->segno + 1;
struct free_segmap_info *free_i = FREE_I(sbi);
@@ -2464,7 +2539,9 @@
{
struct curseg_info *curseg = CURSEG_I(sbi, type);
struct summary_footer *sum_footer;
+ unsigned short seg_type = curseg->seg_type;
+ curseg->inited = true;
curseg->segno = curseg->next_segno;
curseg->zone = GET_ZONE_FROM_SEG(sbi, curseg->segno);
curseg->next_blkoff = 0;
@@ -2472,24 +2549,36 @@
sum_footer = &(curseg->sum_blk->footer);
memset(sum_footer, 0, sizeof(struct summary_footer));
- if (IS_DATASEG(type))
+
+ sanity_check_seg_type(sbi, seg_type);
+
+ if (IS_DATASEG(seg_type))
SET_SUM_TYPE(sum_footer, SUM_TYPE_DATA);
- if (IS_NODESEG(type))
+ if (IS_NODESEG(seg_type))
SET_SUM_TYPE(sum_footer, SUM_TYPE_NODE);
- __set_sit_entry_type(sbi, type, curseg->segno, modified);
+ __set_sit_entry_type(sbi, seg_type, curseg->segno, modified);
}
static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type)
{
+ struct curseg_info *curseg = CURSEG_I(sbi, type);
+ unsigned short seg_type = curseg->seg_type;
+
+ sanity_check_seg_type(sbi, seg_type);
+
/* if segs_per_sec is large than 1, we need to keep original policy. */
if (__is_large_section(sbi))
- return CURSEG_I(sbi, type)->segno;
+ return curseg->segno;
+
+ /* inmem log may not locate on any segment after mount */
+ if (!curseg->inited)
+ return 0;
if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
return 0;
if (test_opt(sbi, NOHEAP) &&
- (type == CURSEG_HOT_DATA || IS_NODESEG(type)))
+ (seg_type == CURSEG_HOT_DATA || IS_NODESEG(seg_type)))
return 0;
if (SIT_I(sbi)->last_victim[ALLOC_NEXT])
@@ -2499,7 +2588,7 @@
if (F2FS_OPTION(sbi).alloc_mode == ALLOC_MODE_REUSE)
return 0;
- return CURSEG_I(sbi, type)->segno;
+ return curseg->segno;
}
/*
@@ -2509,12 +2598,14 @@
static void new_curseg(struct f2fs_sb_info *sbi, int type, bool new_sec)
{
struct curseg_info *curseg = CURSEG_I(sbi, type);
+ unsigned short seg_type = curseg->seg_type;
unsigned int segno = curseg->segno;
int dir = ALLOC_LEFT;
- write_sum_page(sbi, curseg->sum_blk,
+ if (curseg->inited)
+ write_sum_page(sbi, curseg->sum_blk,
GET_SUM_BLOCK(sbi, segno));
- if (type == CURSEG_WARM_DATA || type == CURSEG_COLD_DATA)
+ if (seg_type == CURSEG_WARM_DATA || seg_type == CURSEG_COLD_DATA)
dir = ALLOC_RIGHT;
if (test_opt(sbi, NOHEAP))
@@ -2559,11 +2650,28 @@
seg->next_blkoff++;
}
+bool f2fs_segment_has_free_slot(struct f2fs_sb_info *sbi, int segno)
+{
+ struct seg_entry *se = get_seg_entry(sbi, segno);
+ int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long);
+ unsigned long *target_map = SIT_I(sbi)->tmp_map;
+ unsigned long *ckpt_map = (unsigned long *)se->ckpt_valid_map;
+ unsigned long *cur_map = (unsigned long *)se->cur_valid_map;
+ int i, pos;
+
+ for (i = 0; i < entries; i++)
+ target_map[i] = ckpt_map[i] | cur_map[i];
+
+ pos = __find_rev_next_zero_bit(target_map, sbi->blocks_per_seg, 0);
+
+ return pos < sbi->blocks_per_seg;
+}
+
/*
* This function always allocates a used segment(from dirty seglist) by SSR
* manner, so it should recover the existing segment information of valid blocks
*/
-static void change_curseg(struct f2fs_sb_info *sbi, int type)
+static void change_curseg(struct f2fs_sb_info *sbi, int type, bool flush)
{
struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
struct curseg_info *curseg = CURSEG_I(sbi, type);
@@ -2571,8 +2679,10 @@
struct f2fs_summary_block *sum_node;
struct page *sum_page;
- write_sum_page(sbi, curseg->sum_blk,
- GET_SUM_BLOCK(sbi, curseg->segno));
+ if (flush)
+ write_sum_page(sbi, curseg->sum_blk,
+ GET_SUM_BLOCK(sbi, curseg->segno));
+
__set_test_and_inuse(sbi, new_segno);
mutex_lock(&dirty_i->seglist_lock);
@@ -2595,23 +2705,129 @@
f2fs_put_page(sum_page, 1);
}
-static int get_ssr_segment(struct f2fs_sb_info *sbi, int type)
+static int get_ssr_segment(struct f2fs_sb_info *sbi, int type,
+ int alloc_mode, unsigned long long age);
+
+static void get_atssr_segment(struct f2fs_sb_info *sbi, int type,
+ int target_type, int alloc_mode,
+ unsigned long long age)
+{
+ struct curseg_info *curseg = CURSEG_I(sbi, type);
+
+ curseg->seg_type = target_type;
+
+ if (get_ssr_segment(sbi, type, alloc_mode, age)) {
+ struct seg_entry *se = get_seg_entry(sbi, curseg->next_segno);
+
+ curseg->seg_type = se->type;
+ change_curseg(sbi, type, true);
+ } else {
+ /* allocate cold segment by default */
+ curseg->seg_type = CURSEG_COLD_DATA;
+ new_curseg(sbi, type, true);
+ }
+ stat_inc_seg_type(sbi, curseg);
+}
+
+static void __f2fs_init_atgc_curseg(struct f2fs_sb_info *sbi)
+{
+ struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_ALL_DATA_ATGC);
+
+ if (!sbi->am.atgc_enabled)
+ return;
+
+ down_read(&SM_I(sbi)->curseg_lock);
+
+ mutex_lock(&curseg->curseg_mutex);
+ down_write(&SIT_I(sbi)->sentry_lock);
+
+ get_atssr_segment(sbi, CURSEG_ALL_DATA_ATGC, CURSEG_COLD_DATA, SSR, 0);
+
+ up_write(&SIT_I(sbi)->sentry_lock);
+ mutex_unlock(&curseg->curseg_mutex);
+
+ up_read(&SM_I(sbi)->curseg_lock);
+
+}
+void f2fs_init_inmem_curseg(struct f2fs_sb_info *sbi)
+{
+ __f2fs_init_atgc_curseg(sbi);
+}
+
+static void __f2fs_save_inmem_curseg(struct f2fs_sb_info *sbi, int type)
+{
+ struct curseg_info *curseg = CURSEG_I(sbi, type);
+
+ mutex_lock(&curseg->curseg_mutex);
+ if (!curseg->inited)
+ goto out;
+
+ if (get_valid_blocks(sbi, curseg->segno, false)) {
+ write_sum_page(sbi, curseg->sum_blk,
+ GET_SUM_BLOCK(sbi, curseg->segno));
+ } else {
+ mutex_lock(&DIRTY_I(sbi)->seglist_lock);
+ __set_test_and_free(sbi, curseg->segno, true);
+ mutex_unlock(&DIRTY_I(sbi)->seglist_lock);
+ }
+out:
+ mutex_unlock(&curseg->curseg_mutex);
+}
+
+void f2fs_save_inmem_curseg(struct f2fs_sb_info *sbi)
+{
+ __f2fs_save_inmem_curseg(sbi, CURSEG_COLD_DATA_PINNED);
+
+ if (sbi->am.atgc_enabled)
+ __f2fs_save_inmem_curseg(sbi, CURSEG_ALL_DATA_ATGC);
+}
+
+static void __f2fs_restore_inmem_curseg(struct f2fs_sb_info *sbi, int type)
+{
+ struct curseg_info *curseg = CURSEG_I(sbi, type);
+
+ mutex_lock(&curseg->curseg_mutex);
+ if (!curseg->inited)
+ goto out;
+ if (get_valid_blocks(sbi, curseg->segno, false))
+ goto out;
+
+ mutex_lock(&DIRTY_I(sbi)->seglist_lock);
+ __set_test_and_inuse(sbi, curseg->segno);
+ mutex_unlock(&DIRTY_I(sbi)->seglist_lock);
+out:
+ mutex_unlock(&curseg->curseg_mutex);
+}
+
+void f2fs_restore_inmem_curseg(struct f2fs_sb_info *sbi)
+{
+ __f2fs_restore_inmem_curseg(sbi, CURSEG_COLD_DATA_PINNED);
+
+ if (sbi->am.atgc_enabled)
+ __f2fs_restore_inmem_curseg(sbi, CURSEG_ALL_DATA_ATGC);
+}
+
+static int get_ssr_segment(struct f2fs_sb_info *sbi, int type,
+ int alloc_mode, unsigned long long age)
{
struct curseg_info *curseg = CURSEG_I(sbi, type);
const struct victim_selection *v_ops = DIRTY_I(sbi)->v_ops;
unsigned segno = NULL_SEGNO;
+ unsigned short seg_type = curseg->seg_type;
int i, cnt;
bool reversed = false;
+ sanity_check_seg_type(sbi, seg_type);
+
/* f2fs_need_SSR() already forces to do this */
- if (v_ops->get_victim(sbi, &segno, BG_GC, type, SSR)) {
+ if (!v_ops->get_victim(sbi, &segno, BG_GC, seg_type, alloc_mode, age)) {
curseg->next_segno = segno;
return 1;
}
/* For node segments, let's do SSR more intensively */
- if (IS_NODESEG(type)) {
- if (type >= CURSEG_WARM_NODE) {
+ if (IS_NODESEG(seg_type)) {
+ if (seg_type >= CURSEG_WARM_NODE) {
reversed = true;
i = CURSEG_COLD_NODE;
} else {
@@ -2619,7 +2835,7 @@
}
cnt = NR_CURSEG_NODE_TYPE;
} else {
- if (type >= CURSEG_WARM_DATA) {
+ if (seg_type >= CURSEG_WARM_DATA) {
reversed = true;
i = CURSEG_COLD_DATA;
} else {
@@ -2629,9 +2845,9 @@
}
for (; cnt-- > 0; reversed ? i-- : i++) {
- if (i == type)
+ if (i == seg_type)
continue;
- if (v_ops->get_victim(sbi, &segno, BG_GC, i, SSR)) {
+ if (!v_ops->get_victim(sbi, &segno, BG_GC, i, alloc_mode, age)) {
curseg->next_segno = segno;
return 1;
}
@@ -2660,20 +2876,22 @@
if (force)
new_curseg(sbi, type, true);
else if (!is_set_ckpt_flags(sbi, CP_CRC_RECOVERY_FLAG) &&
- type == CURSEG_WARM_NODE)
+ curseg->seg_type == CURSEG_WARM_NODE)
new_curseg(sbi, type, false);
- else if (curseg->alloc_type == LFS && is_next_segment_free(sbi, type) &&
+ else if (curseg->alloc_type == LFS &&
+ is_next_segment_free(sbi, curseg, type) &&
likely(!is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
new_curseg(sbi, type, false);
- else if (f2fs_need_SSR(sbi) && get_ssr_segment(sbi, type))
- change_curseg(sbi, type);
+ else if (f2fs_need_SSR(sbi) &&
+ get_ssr_segment(sbi, type, SSR, 0))
+ change_curseg(sbi, type, true);
else
new_curseg(sbi, type, false);
stat_inc_seg_type(sbi, curseg);
}
-void allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type,
+void f2fs_allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type,
unsigned int start, unsigned int end)
{
struct curseg_info *curseg = CURSEG_I(sbi, type);
@@ -2687,8 +2905,8 @@
if (segno < start || segno > end)
goto unlock;
- if (f2fs_need_SSR(sbi) && get_ssr_segment(sbi, type))
- change_curseg(sbi, type);
+ if (f2fs_need_SSR(sbi) && get_ssr_segment(sbi, type, SSR, 0))
+ change_curseg(sbi, type, true);
else
new_curseg(sbi, type, true);
@@ -2706,22 +2924,51 @@
up_read(&SM_I(sbi)->curseg_lock);
}
+static void __allocate_new_segment(struct f2fs_sb_info *sbi, int type,
+ bool new_sec)
+{
+ struct curseg_info *curseg = CURSEG_I(sbi, type);
+ unsigned int old_segno;
+
+ if (!curseg->inited)
+ goto alloc;
+
+ if (curseg->next_blkoff ||
+ get_valid_blocks(sbi, curseg->segno, new_sec))
+ goto alloc;
+
+ if (!get_ckpt_valid_blocks(sbi, curseg->segno, new_sec))
+ return;
+alloc:
+ old_segno = curseg->segno;
+ SIT_I(sbi)->s_ops->allocate_segment(sbi, type, true);
+ locate_dirty_segment(sbi, old_segno);
+}
+
+static void __allocate_new_section(struct f2fs_sb_info *sbi, int type)
+{
+ __allocate_new_segment(sbi, type, true);
+}
+
+void f2fs_allocate_new_section(struct f2fs_sb_info *sbi, int type)
+{
+ down_read(&SM_I(sbi)->curseg_lock);
+ down_write(&SIT_I(sbi)->sentry_lock);
+ __allocate_new_section(sbi, type);
+ up_write(&SIT_I(sbi)->sentry_lock);
+ up_read(&SM_I(sbi)->curseg_lock);
+}
+
void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi)
{
- struct curseg_info *curseg;
- unsigned int old_segno;
int i;
+ down_read(&SM_I(sbi)->curseg_lock);
down_write(&SIT_I(sbi)->sentry_lock);
-
- for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
- curseg = CURSEG_I(sbi, i);
- old_segno = curseg->segno;
- SIT_I(sbi)->s_ops->allocate_segment(sbi, i, true);
- locate_dirty_segment(sbi, old_segno);
- }
-
+ for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++)
+ __allocate_new_segment(sbi, i, false);
up_write(&SIT_I(sbi)->sentry_lock);
+ up_read(&SM_I(sbi)->curseg_lock);
}
static const struct segment_allocation default_salloc_ops = {
@@ -2765,7 +3012,7 @@
mutex_lock(&dcc->cmd_lock);
if (unlikely(dcc->rbtree_check))
f2fs_bug_on(sbi, !f2fs_check_rb_tree_consistence(sbi,
- &dcc->root));
+ &dcc->root, false));
dc = (struct discard_cmd *)f2fs_lookup_rb_tree_ret(&dcc->root,
NULL, start,
@@ -2800,7 +3047,7 @@
blk_finish_plug(&plug);
mutex_unlock(&dcc->cmd_lock);
trimmed += __wait_all_discard_cmd(sbi, NULL);
- congestion_wait(BLK_RW_ASYNC, HZ/50);
+ congestion_wait(BLK_RW_ASYNC, DEFAULT_IO_TIMEOUT);
goto next;
}
skip:
@@ -2829,7 +3076,7 @@
struct discard_policy dpolicy;
unsigned long long trimmed = 0;
int err = 0;
- bool need_align = test_opt(sbi, LFS) && __is_large_section(sbi);
+ bool need_align = f2fs_lfs_mode(sbi) && __is_large_section(sbi);
if (start >= MAX_BLKADDR(sbi) || range->len < sbi->blocksize)
return -EINVAL;
@@ -2859,9 +3106,9 @@
if (sbi->discard_blks == 0)
goto out;
- mutex_lock(&sbi->gc_mutex);
+ down_write(&sbi->gc_lock);
err = f2fs_write_checkpoint(sbi, &cpc);
- mutex_unlock(&sbi->gc_mutex);
+ up_write(&sbi->gc_lock);
if (err)
goto out;
@@ -2889,12 +3136,11 @@
return err;
}
-static bool __has_curseg_space(struct f2fs_sb_info *sbi, int type)
+static bool __has_curseg_space(struct f2fs_sb_info *sbi,
+ struct curseg_info *curseg)
{
- struct curseg_info *curseg = CURSEG_I(sbi, type);
- if (curseg->next_blkoff < sbi->blocks_per_seg)
- return true;
- return false;
+ return curseg->next_blkoff < f2fs_usable_blks_in_seg(sbi,
+ curseg->segno);
}
int f2fs_rw_hint_to_seg_type(enum rw_hint hint)
@@ -3034,7 +3280,13 @@
if (fio->type == DATA) {
struct inode *inode = fio->page->mapping->host;
- if (is_cold_data(fio->page) || file_is_cold(inode))
+ if (is_cold_data(fio->page)) {
+ if (fio->sbi->am.atgc_enabled)
+ return CURSEG_ALL_DATA_ATGC;
+ else
+ return CURSEG_COLD_DATA;
+ }
+ if (file_is_cold(inode) || f2fs_compressed_file(inode))
return CURSEG_COLD_DATA;
if (file_is_hot(inode) ||
is_inode_flag_set(inode, FI_HOT_DATA) ||
@@ -3080,18 +3332,29 @@
void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
block_t old_blkaddr, block_t *new_blkaddr,
struct f2fs_summary *sum, int type,
- struct f2fs_io_info *fio, bool add_list)
+ struct f2fs_io_info *fio)
{
struct sit_info *sit_i = SIT_I(sbi);
struct curseg_info *curseg = CURSEG_I(sbi, type);
+ unsigned long long old_mtime;
+ bool from_gc = (type == CURSEG_ALL_DATA_ATGC);
+ struct seg_entry *se = NULL;
down_read(&SM_I(sbi)->curseg_lock);
mutex_lock(&curseg->curseg_mutex);
down_write(&sit_i->sentry_lock);
+ if (from_gc) {
+ f2fs_bug_on(sbi, GET_SEGNO(sbi, old_blkaddr) == NULL_SEGNO);
+ se = get_seg_entry(sbi, GET_SEGNO(sbi, old_blkaddr));
+ sanity_check_seg_type(sbi, se->type);
+ f2fs_bug_on(sbi, IS_NODESEG(se->type));
+ }
*new_blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
+ f2fs_bug_on(sbi, curseg->next_blkoff >= sbi->blocks_per_seg);
+
f2fs_wait_discard_bio(sbi, *new_blkaddr);
/*
@@ -3105,6 +3368,14 @@
stat_inc_block_count(sbi, curseg);
+ if (from_gc) {
+ old_mtime = get_segment_mtime(sbi, old_blkaddr);
+ } else {
+ update_segment_mtime(sbi, old_blkaddr, 0);
+ old_mtime = 0;
+ }
+ update_segment_mtime(sbi, *new_blkaddr, old_mtime);
+
/*
* SIT information should be updated before segment allocation,
* since SSR needs latest valid block information.
@@ -3113,9 +3384,13 @@
if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO)
update_sit_entry(sbi, old_blkaddr, -1);
- if (!__has_curseg_space(sbi, type))
- sit_i->s_ops->allocate_segment(sbi, type, false);
-
+ if (!__has_curseg_space(sbi, curseg)) {
+ if (from_gc)
+ get_atssr_segment(sbi, type, se->type,
+ AT_SSR, se->mtime);
+ else
+ sit_i->s_ops->allocate_segment(sbi, type, false);
+ }
/*
* segment dirty status should be updated after segment allocation,
* so we just need to update status only one time after previous
@@ -3132,12 +3407,12 @@
f2fs_inode_chksum_set(sbi, page);
}
- if (F2FS_IO_ALIGNED(sbi))
- fio->retry = false;
-
- if (add_list) {
+ if (fio) {
struct f2fs_bio_info *io;
+ if (F2FS_IO_ALIGNED(sbi))
+ fio->retry = false;
+
INIT_LIST_HEAD(&fio->list);
fio->in_list = true;
io = sbi->write_io[fio->type] + fio->temp;
@@ -3175,13 +3450,13 @@
static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio)
{
int type = __get_segment_type(fio);
- bool keep_order = (test_opt(fio->sbi, LFS) && type == CURSEG_COLD_DATA);
+ bool keep_order = (f2fs_lfs_mode(fio->sbi) && type == CURSEG_COLD_DATA);
if (keep_order)
down_read(&fio->sbi->io_order_lock);
reallocate:
f2fs_allocate_data_block(fio->sbi, fio->page, fio->old_blkaddr,
- &fio->new_blkaddr, sum, type, fio, true);
+ &fio->new_blkaddr, sum, type, fio);
if (GET_SEGNO(fio->sbi, fio->old_blkaddr) != NULL_SEGNO)
invalidate_mapping_pages(META_MAPPING(fio->sbi),
fio->old_blkaddr, fio->old_blkaddr);
@@ -3271,7 +3546,7 @@
stat_inc_inplace_blocks(fio->sbi);
- if (fio->bio)
+ if (fio->bio && !(SM_I(sbi)->ipu_policy & (1 << F2FS_IPU_NOCACHE)))
err = f2fs_merge_page_bio(fio);
else
err = f2fs_submit_page_bio(fio);
@@ -3297,7 +3572,8 @@
void f2fs_do_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
block_t old_blkaddr, block_t new_blkaddr,
- bool recover_curseg, bool recover_newaddr)
+ bool recover_curseg, bool recover_newaddr,
+ bool from_gc)
{
struct sit_info *sit_i = SIT_I(sbi);
struct curseg_info *curseg;
@@ -3342,17 +3618,22 @@
/* change the current segment */
if (segno != curseg->segno) {
curseg->next_segno = segno;
- change_curseg(sbi, type);
+ change_curseg(sbi, type, true);
}
curseg->next_blkoff = GET_BLKOFF_FROM_SEG0(sbi, new_blkaddr);
__add_sum_entry(sbi, type, sum);
- if (!recover_curseg || recover_newaddr)
+ if (!recover_curseg || recover_newaddr) {
+ if (!from_gc)
+ update_segment_mtime(sbi, new_blkaddr, 0);
update_sit_entry(sbi, new_blkaddr, 1);
+ }
if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO) {
invalidate_mapping_pages(META_MAPPING(sbi),
old_blkaddr, old_blkaddr);
+ if (!from_gc)
+ update_segment_mtime(sbi, old_blkaddr, 0);
update_sit_entry(sbi, old_blkaddr, -1);
}
@@ -3364,7 +3645,7 @@
if (recover_curseg) {
if (old_cursegno != curseg->segno) {
curseg->next_segno = old_cursegno;
- change_curseg(sbi, type);
+ change_curseg(sbi, type, true);
}
curseg->next_blkoff = old_blkoff;
}
@@ -3384,7 +3665,7 @@
set_summary(&sum, dn->nid, dn->ofs_in_node, version);
f2fs_do_replace_block(sbi, &sum, old_addr, new_addr,
- recover_curseg, recover_newaddr);
+ recover_curseg, recover_newaddr, false);
f2fs_update_data_blkaddr(dn, new_addr);
}
@@ -3395,7 +3676,10 @@
if (PageWriteback(page)) {
struct f2fs_sb_info *sbi = F2FS_P_SB(page);
+ /* submit cached LFS IO */
f2fs_submit_merged_write_cond(sbi, NULL, page, 0, type);
+ /* sbumit cached IPU IO */
+ f2fs_submit_merged_ipu_write(sbi, NULL, page);
if (ordered) {
wait_on_page_writeback(page);
f2fs_bug_on(sbi, locked && PageWriteback(page));
@@ -3513,7 +3797,7 @@
blk_off = le16_to_cpu(ckpt->cur_data_blkoff[type -
CURSEG_HOT_DATA]);
if (__exist_node_summaries(sbi))
- blk_addr = sum_blk_addr(sbi, NR_CURSEG_TYPE, type);
+ blk_addr = sum_blk_addr(sbi, NR_CURSEG_PERSIST_TYPE, type);
else
blk_addr = sum_blk_addr(sbi, NR_CURSEG_DATA_TYPE, type);
} else {
@@ -3591,8 +3875,9 @@
}
if (__exist_node_summaries(sbi))
- f2fs_ra_meta_pages(sbi, sum_blk_addr(sbi, NR_CURSEG_TYPE, type),
- NR_CURSEG_TYPE - type, META_CP, true);
+ f2fs_ra_meta_pages(sbi,
+ sum_blk_addr(sbi, NR_CURSEG_PERSIST_TYPE, type),
+ NR_CURSEG_PERSIST_TYPE - type, META_CP, true);
for (; type <= CURSEG_COLD_NODE; type++) {
err = read_normal_summaries(sbi, type);
@@ -4050,7 +4335,7 @@
sit_i->dirty_sentries = 0;
sit_i->sents_per_block = SIT_ENTRY_PER_BLOCK;
sit_i->elapsed_time = le64_to_cpu(sbi->ckpt->elapsed_time);
- sit_i->mounted_time = ktime_get_real_seconds();
+ sit_i->mounted_time = ktime_get_boottime_seconds();
init_rwsem(&sit_i->sentry_lock);
return 0;
}
@@ -4094,14 +4379,14 @@
struct curseg_info *array;
int i;
- array = f2fs_kzalloc(sbi, array_size(NR_CURSEG_TYPE, sizeof(*array)),
- GFP_KERNEL);
+ array = f2fs_kzalloc(sbi, array_size(NR_CURSEG_TYPE,
+ sizeof(*array)), GFP_KERNEL);
if (!array)
return -ENOMEM;
SM_I(sbi)->curseg_array = array;
- for (i = 0; i < NR_CURSEG_TYPE; i++) {
+ for (i = 0; i < NO_CHECK_TYPE; i++) {
mutex_init(&array[i].curseg_mutex);
array[i].sum_blk = f2fs_kzalloc(sbi, PAGE_SIZE, GFP_KERNEL);
if (!array[i].sum_blk)
@@ -4111,8 +4396,15 @@
sizeof(struct f2fs_journal), GFP_KERNEL);
if (!array[i].journal)
return -ENOMEM;
+ if (i < NR_PERSISTENT_LOG)
+ array[i].seg_type = CURSEG_HOT_DATA + i;
+ else if (i == CURSEG_COLD_DATA_PINNED)
+ array[i].seg_type = CURSEG_COLD_DATA;
+ else if (i == CURSEG_ALL_DATA_ATGC)
+ array[i].seg_type = CURSEG_COLD_DATA;
array[i].segno = NULL_SEGNO;
array[i].next_blkoff = 0;
+ array[i].inited = false;
}
return restore_curseg_summaries(sbi);
}
@@ -4233,9 +4525,12 @@
{
unsigned int start;
int type;
+ struct seg_entry *sentry;
for (start = 0; start < MAIN_SEGS(sbi); start++) {
- struct seg_entry *sentry = get_seg_entry(sbi, start);
+ if (f2fs_usable_blks_in_seg(sbi, start) == 0)
+ continue;
+ sentry = get_seg_entry(sbi, start);
if (!sentry->valid_blocks)
__set_free(sbi, start);
else
@@ -4254,8 +4549,9 @@
{
struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
struct free_segmap_info *free_i = FREE_I(sbi);
- unsigned int segno = 0, offset = 0;
- unsigned short valid_blocks;
+ unsigned int segno = 0, offset = 0, secno;
+ block_t valid_blocks, usable_blks_in_seg;
+ block_t blks_per_sec = BLKS_PER_SEC(sbi);
while (1) {
/* find dirty segment based on free segmap */
@@ -4264,9 +4560,10 @@
break;
offset = segno + 1;
valid_blocks = get_valid_blocks(sbi, segno, false);
- if (valid_blocks == sbi->blocks_per_seg || !valid_blocks)
+ usable_blks_in_seg = f2fs_usable_blks_in_seg(sbi, segno);
+ if (valid_blocks == usable_blks_in_seg || !valid_blocks)
continue;
- if (valid_blocks > sbi->blocks_per_seg) {
+ if (valid_blocks > usable_blks_in_seg) {
f2fs_bug_on(sbi, 1);
continue;
}
@@ -4274,6 +4571,22 @@
__locate_dirty_segment(sbi, segno, DIRTY);
mutex_unlock(&dirty_i->seglist_lock);
}
+
+ if (!__is_large_section(sbi))
+ return;
+
+ mutex_lock(&dirty_i->seglist_lock);
+ for (segno = 0; segno < MAIN_SEGS(sbi); segno += sbi->segs_per_sec) {
+ valid_blocks = get_valid_blocks(sbi, segno, true);
+ secno = GET_SEC_FROM_SEG(sbi, segno);
+
+ if (!valid_blocks || valid_blocks == blks_per_sec)
+ continue;
+ if (IS_CURSEC(sbi, secno))
+ continue;
+ set_bit(secno, dirty_i->dirty_secmap);
+ }
+ mutex_unlock(&dirty_i->seglist_lock);
}
static int init_victim_secmap(struct f2fs_sb_info *sbi)
@@ -4310,6 +4623,14 @@
return -ENOMEM;
}
+ if (__is_large_section(sbi)) {
+ bitmap_size = f2fs_bitmap_size(MAIN_SECS(sbi));
+ dirty_i->dirty_secmap = f2fs_kvzalloc(sbi,
+ bitmap_size, GFP_KERNEL);
+ if (!dirty_i->dirty_secmap)
+ return -ENOMEM;
+ }
+
init_dirty_segmap(sbi);
return init_victim_secmap(sbi);
}
@@ -4322,11 +4643,13 @@
* In LFS/SSR curseg, .next_blkoff should point to an unused blkaddr;
* In LFS curseg, all blkaddr after .next_blkoff should be unused.
*/
- for (i = 0; i < NO_CHECK_TYPE; i++) {
+ for (i = 0; i < NR_PERSISTENT_LOG; i++) {
struct curseg_info *curseg = CURSEG_I(sbi, i);
struct seg_entry *se = get_seg_entry(sbi, curseg->segno);
unsigned int blkofs = curseg->next_blkoff;
+ sanity_check_seg_type(sbi, curseg->seg_type);
+
if (f2fs_test_bit(blkofs, se->cur_valid_map))
goto out;
@@ -4347,6 +4670,387 @@
return 0;
}
+#ifdef CONFIG_BLK_DEV_ZONED
+
+static int check_zone_write_pointer(struct f2fs_sb_info *sbi,
+ struct f2fs_dev_info *fdev,
+ struct blk_zone *zone)
+{
+ unsigned int wp_segno, wp_blkoff, zone_secno, zone_segno, segno;
+ block_t zone_block, wp_block, last_valid_block;
+ unsigned int log_sectors_per_block = sbi->log_blocksize - SECTOR_SHIFT;
+ int i, s, b, ret;
+ struct seg_entry *se;
+
+ if (zone->type != BLK_ZONE_TYPE_SEQWRITE_REQ)
+ return 0;
+
+ wp_block = fdev->start_blk + (zone->wp >> log_sectors_per_block);
+ wp_segno = GET_SEGNO(sbi, wp_block);
+ wp_blkoff = wp_block - START_BLOCK(sbi, wp_segno);
+ zone_block = fdev->start_blk + (zone->start >> log_sectors_per_block);
+ zone_segno = GET_SEGNO(sbi, zone_block);
+ zone_secno = GET_SEC_FROM_SEG(sbi, zone_segno);
+
+ if (zone_segno >= MAIN_SEGS(sbi))
+ return 0;
+
+ /*
+ * Skip check of zones cursegs point to, since
+ * fix_curseg_write_pointer() checks them.
+ */
+ for (i = 0; i < NO_CHECK_TYPE; i++)
+ if (zone_secno == GET_SEC_FROM_SEG(sbi,
+ CURSEG_I(sbi, i)->segno))
+ return 0;
+
+ /*
+ * Get last valid block of the zone.
+ */
+ last_valid_block = zone_block - 1;
+ for (s = sbi->segs_per_sec - 1; s >= 0; s--) {
+ segno = zone_segno + s;
+ se = get_seg_entry(sbi, segno);
+ for (b = sbi->blocks_per_seg - 1; b >= 0; b--)
+ if (f2fs_test_bit(b, se->cur_valid_map)) {
+ last_valid_block = START_BLOCK(sbi, segno) + b;
+ break;
+ }
+ if (last_valid_block >= zone_block)
+ break;
+ }
+
+ /*
+ * If last valid block is beyond the write pointer, report the
+ * inconsistency. This inconsistency does not cause write error
+ * because the zone will not be selected for write operation until
+ * it get discarded. Just report it.
+ */
+ if (last_valid_block >= wp_block) {
+ f2fs_notice(sbi, "Valid block beyond write pointer: "
+ "valid block[0x%x,0x%x] wp[0x%x,0x%x]",
+ GET_SEGNO(sbi, last_valid_block),
+ GET_BLKOFF_FROM_SEG0(sbi, last_valid_block),
+ wp_segno, wp_blkoff);
+ return 0;
+ }
+
+ /*
+ * If there is no valid block in the zone and if write pointer is
+ * not at zone start, reset the write pointer.
+ */
+ if (last_valid_block + 1 == zone_block && zone->wp != zone->start) {
+ f2fs_notice(sbi,
+ "Zone without valid block has non-zero write "
+ "pointer. Reset the write pointer: wp[0x%x,0x%x]",
+ wp_segno, wp_blkoff);
+ ret = __f2fs_issue_discard_zone(sbi, fdev->bdev, zone_block,
+ zone->len >> log_sectors_per_block);
+ if (ret) {
+ f2fs_err(sbi, "Discard zone failed: %s (errno=%d)",
+ fdev->path, ret);
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
+static struct f2fs_dev_info *get_target_zoned_dev(struct f2fs_sb_info *sbi,
+ block_t zone_blkaddr)
+{
+ int i;
+
+ for (i = 0; i < sbi->s_ndevs; i++) {
+ if (!bdev_is_zoned(FDEV(i).bdev))
+ continue;
+ if (sbi->s_ndevs == 1 || (FDEV(i).start_blk <= zone_blkaddr &&
+ zone_blkaddr <= FDEV(i).end_blk))
+ return &FDEV(i);
+ }
+
+ return NULL;
+}
+
+static int report_one_zone_cb(struct blk_zone *zone, unsigned int idx,
+ void *data) {
+ memcpy(data, zone, sizeof(struct blk_zone));
+ return 0;
+}
+
+static int fix_curseg_write_pointer(struct f2fs_sb_info *sbi, int type)
+{
+ struct curseg_info *cs = CURSEG_I(sbi, type);
+ struct f2fs_dev_info *zbd;
+ struct blk_zone zone;
+ unsigned int cs_section, wp_segno, wp_blkoff, wp_sector_off;
+ block_t cs_zone_block, wp_block;
+ unsigned int log_sectors_per_block = sbi->log_blocksize - SECTOR_SHIFT;
+ sector_t zone_sector;
+ int err;
+
+ cs_section = GET_SEC_FROM_SEG(sbi, cs->segno);
+ cs_zone_block = START_BLOCK(sbi, GET_SEG_FROM_SEC(sbi, cs_section));
+
+ zbd = get_target_zoned_dev(sbi, cs_zone_block);
+ if (!zbd)
+ return 0;
+
+ /* report zone for the sector the curseg points to */
+ zone_sector = (sector_t)(cs_zone_block - zbd->start_blk)
+ << log_sectors_per_block;
+ err = blkdev_report_zones(zbd->bdev, zone_sector, 1,
+ report_one_zone_cb, &zone);
+ if (err != 1) {
+ f2fs_err(sbi, "Report zone failed: %s errno=(%d)",
+ zbd->path, err);
+ return err;
+ }
+
+ if (zone.type != BLK_ZONE_TYPE_SEQWRITE_REQ)
+ return 0;
+
+ wp_block = zbd->start_blk + (zone.wp >> log_sectors_per_block);
+ wp_segno = GET_SEGNO(sbi, wp_block);
+ wp_blkoff = wp_block - START_BLOCK(sbi, wp_segno);
+ wp_sector_off = zone.wp & GENMASK(log_sectors_per_block - 1, 0);
+
+ if (cs->segno == wp_segno && cs->next_blkoff == wp_blkoff &&
+ wp_sector_off == 0)
+ return 0;
+
+ f2fs_notice(sbi, "Unaligned curseg[%d] with write pointer: "
+ "curseg[0x%x,0x%x] wp[0x%x,0x%x]",
+ type, cs->segno, cs->next_blkoff, wp_segno, wp_blkoff);
+
+ f2fs_notice(sbi, "Assign new section to curseg[%d]: "
+ "curseg[0x%x,0x%x]", type, cs->segno, cs->next_blkoff);
+ allocate_segment_by_default(sbi, type, true);
+
+ /* check consistency of the zone curseg pointed to */
+ if (check_zone_write_pointer(sbi, zbd, &zone))
+ return -EIO;
+
+ /* check newly assigned zone */
+ cs_section = GET_SEC_FROM_SEG(sbi, cs->segno);
+ cs_zone_block = START_BLOCK(sbi, GET_SEG_FROM_SEC(sbi, cs_section));
+
+ zbd = get_target_zoned_dev(sbi, cs_zone_block);
+ if (!zbd)
+ return 0;
+
+ zone_sector = (sector_t)(cs_zone_block - zbd->start_blk)
+ << log_sectors_per_block;
+ err = blkdev_report_zones(zbd->bdev, zone_sector, 1,
+ report_one_zone_cb, &zone);
+ if (err != 1) {
+ f2fs_err(sbi, "Report zone failed: %s errno=(%d)",
+ zbd->path, err);
+ return err;
+ }
+
+ if (zone.type != BLK_ZONE_TYPE_SEQWRITE_REQ)
+ return 0;
+
+ if (zone.wp != zone.start) {
+ f2fs_notice(sbi,
+ "New zone for curseg[%d] is not yet discarded. "
+ "Reset the zone: curseg[0x%x,0x%x]",
+ type, cs->segno, cs->next_blkoff);
+ err = __f2fs_issue_discard_zone(sbi, zbd->bdev,
+ zone_sector >> log_sectors_per_block,
+ zone.len >> log_sectors_per_block);
+ if (err) {
+ f2fs_err(sbi, "Discard zone failed: %s (errno=%d)",
+ zbd->path, err);
+ return err;
+ }
+ }
+
+ return 0;
+}
+
+int f2fs_fix_curseg_write_pointer(struct f2fs_sb_info *sbi)
+{
+ int i, ret;
+
+ for (i = 0; i < NR_PERSISTENT_LOG; i++) {
+ ret = fix_curseg_write_pointer(sbi, i);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+struct check_zone_write_pointer_args {
+ struct f2fs_sb_info *sbi;
+ struct f2fs_dev_info *fdev;
+};
+
+static int check_zone_write_pointer_cb(struct blk_zone *zone, unsigned int idx,
+ void *data) {
+ struct check_zone_write_pointer_args *args;
+ args = (struct check_zone_write_pointer_args *)data;
+
+ return check_zone_write_pointer(args->sbi, args->fdev, zone);
+}
+
+int f2fs_check_write_pointer(struct f2fs_sb_info *sbi)
+{
+ int i, ret;
+ struct check_zone_write_pointer_args args;
+
+ for (i = 0; i < sbi->s_ndevs; i++) {
+ if (!bdev_is_zoned(FDEV(i).bdev))
+ continue;
+
+ args.sbi = sbi;
+ args.fdev = &FDEV(i);
+ ret = blkdev_report_zones(FDEV(i).bdev, 0, BLK_ALL_ZONES,
+ check_zone_write_pointer_cb, &args);
+ if (ret < 0)
+ return ret;
+ }
+
+ return 0;
+}
+
+static bool is_conv_zone(struct f2fs_sb_info *sbi, unsigned int zone_idx,
+ unsigned int dev_idx)
+{
+ if (!bdev_is_zoned(FDEV(dev_idx).bdev))
+ return true;
+ return !test_bit(zone_idx, FDEV(dev_idx).blkz_seq);
+}
+
+/* Return the zone index in the given device */
+static unsigned int get_zone_idx(struct f2fs_sb_info *sbi, unsigned int secno,
+ int dev_idx)
+{
+ block_t sec_start_blkaddr = START_BLOCK(sbi, GET_SEG_FROM_SEC(sbi, secno));
+
+ return (sec_start_blkaddr - FDEV(dev_idx).start_blk) >>
+ sbi->log_blocks_per_blkz;
+}
+
+/*
+ * Return the usable segments in a section based on the zone's
+ * corresponding zone capacity. Zone is equal to a section.
+ */
+static inline unsigned int f2fs_usable_zone_segs_in_sec(
+ struct f2fs_sb_info *sbi, unsigned int segno)
+{
+ unsigned int dev_idx, zone_idx, unusable_segs_in_sec;
+
+ dev_idx = f2fs_target_device_index(sbi, START_BLOCK(sbi, segno));
+ zone_idx = get_zone_idx(sbi, GET_SEC_FROM_SEG(sbi, segno), dev_idx);
+
+ /* Conventional zone's capacity is always equal to zone size */
+ if (is_conv_zone(sbi, zone_idx, dev_idx))
+ return sbi->segs_per_sec;
+
+ /*
+ * If the zone_capacity_blocks array is NULL, then zone capacity
+ * is equal to the zone size for all zones
+ */
+ if (!FDEV(dev_idx).zone_capacity_blocks)
+ return sbi->segs_per_sec;
+
+ /* Get the segment count beyond zone capacity block */
+ unusable_segs_in_sec = (sbi->blocks_per_blkz -
+ FDEV(dev_idx).zone_capacity_blocks[zone_idx]) >>
+ sbi->log_blocks_per_seg;
+ return sbi->segs_per_sec - unusable_segs_in_sec;
+}
+
+/*
+ * Return the number of usable blocks in a segment. The number of blocks
+ * returned is always equal to the number of blocks in a segment for
+ * segments fully contained within a sequential zone capacity or a
+ * conventional zone. For segments partially contained in a sequential
+ * zone capacity, the number of usable blocks up to the zone capacity
+ * is returned. 0 is returned in all other cases.
+ */
+static inline unsigned int f2fs_usable_zone_blks_in_seg(
+ struct f2fs_sb_info *sbi, unsigned int segno)
+{
+ block_t seg_start, sec_start_blkaddr, sec_cap_blkaddr;
+ unsigned int zone_idx, dev_idx, secno;
+
+ secno = GET_SEC_FROM_SEG(sbi, segno);
+ seg_start = START_BLOCK(sbi, segno);
+ dev_idx = f2fs_target_device_index(sbi, seg_start);
+ zone_idx = get_zone_idx(sbi, secno, dev_idx);
+
+ /*
+ * Conventional zone's capacity is always equal to zone size,
+ * so, blocks per segment is unchanged.
+ */
+ if (is_conv_zone(sbi, zone_idx, dev_idx))
+ return sbi->blocks_per_seg;
+
+ if (!FDEV(dev_idx).zone_capacity_blocks)
+ return sbi->blocks_per_seg;
+
+ sec_start_blkaddr = START_BLOCK(sbi, GET_SEG_FROM_SEC(sbi, secno));
+ sec_cap_blkaddr = sec_start_blkaddr +
+ FDEV(dev_idx).zone_capacity_blocks[zone_idx];
+
+ /*
+ * If segment starts before zone capacity and spans beyond
+ * zone capacity, then usable blocks are from seg start to
+ * zone capacity. If the segment starts after the zone capacity,
+ * then there are no usable blocks.
+ */
+ if (seg_start >= sec_cap_blkaddr)
+ return 0;
+ if (seg_start + sbi->blocks_per_seg > sec_cap_blkaddr)
+ return sec_cap_blkaddr - seg_start;
+
+ return sbi->blocks_per_seg;
+}
+#else
+int f2fs_fix_curseg_write_pointer(struct f2fs_sb_info *sbi)
+{
+ return 0;
+}
+
+int f2fs_check_write_pointer(struct f2fs_sb_info *sbi)
+{
+ return 0;
+}
+
+static inline unsigned int f2fs_usable_zone_blks_in_seg(struct f2fs_sb_info *sbi,
+ unsigned int segno)
+{
+ return 0;
+}
+
+static inline unsigned int f2fs_usable_zone_segs_in_sec(struct f2fs_sb_info *sbi,
+ unsigned int segno)
+{
+ return 0;
+}
+#endif
+unsigned int f2fs_usable_blks_in_seg(struct f2fs_sb_info *sbi,
+ unsigned int segno)
+{
+ if (f2fs_sb_has_blkzoned(sbi))
+ return f2fs_usable_zone_blks_in_seg(sbi, segno);
+
+ return sbi->blocks_per_seg;
+}
+
+unsigned int f2fs_usable_segs_in_sec(struct f2fs_sb_info *sbi,
+ unsigned int segno)
+{
+ if (f2fs_sb_has_blkzoned(sbi))
+ return f2fs_usable_zone_segs_in_sec(sbi, segno);
+
+ return sbi->segs_per_sec;
+}
+
/*
* Update min, max modified time for cost-benefit GC algorithm
*/
@@ -4372,6 +5076,7 @@
sit_i->min_mtime = mtime;
}
sit_i->max_mtime = get_mtime(sbi, false);
+ sit_i->dirty_max_mtime = 0;
up_write(&sit_i->sentry_lock);
}
@@ -4400,7 +5105,7 @@
if (sm_info->rec_prefree_segments > DEF_MAX_RECLAIM_PREFREE_SEGMENTS)
sm_info->rec_prefree_segments = DEF_MAX_RECLAIM_PREFREE_SEGMENTS;
- if (!test_opt(sbi, LFS))
+ if (!f2fs_lfs_mode(sbi))
sm_info->ipu_policy = 1 << F2FS_IPU_FSYNC;
sm_info->min_ipu_util = DEF_MIN_IPU_UTIL;
sm_info->min_fsync_blocks = DEF_MIN_FSYNC_BLOCKS;
@@ -4479,9 +5184,15 @@
for (i = 0; i < NR_DIRTY_TYPE; i++)
discard_dirty_segmap(sbi, i);
+ if (__is_large_section(sbi)) {
+ mutex_lock(&dirty_i->seglist_lock);
+ kvfree(dirty_i->dirty_secmap);
+ mutex_unlock(&dirty_i->seglist_lock);
+ }
+
destroy_victim_secmap(sbi);
SM_I(sbi)->dirty_info = NULL;
- kvfree(dirty_i);
+ kfree(dirty_i);
}
static void destroy_curseg(struct f2fs_sb_info *sbi)
@@ -4493,10 +5204,10 @@
return;
SM_I(sbi)->curseg_array = NULL;
for (i = 0; i < NR_CURSEG_TYPE; i++) {
- kvfree(array[i].sum_blk);
- kvfree(array[i].journal);
+ kfree(array[i].sum_blk);
+ kfree(array[i].journal);
}
- kvfree(array);
+ kfree(array);
}
static void destroy_free_segmap(struct f2fs_sb_info *sbi)
@@ -4507,7 +5218,7 @@
SM_I(sbi)->free_info = NULL;
kvfree(free_i->free_segmap);
kvfree(free_i->free_secmap);
- kvfree(free_i);
+ kfree(free_i);
}
static void destroy_sit_info(struct f2fs_sb_info *sbi)
@@ -4519,7 +5230,7 @@
if (sit_i->sentries)
kvfree(sit_i->bitmap);
- kvfree(sit_i->tmp_map);
+ kfree(sit_i->tmp_map);
kvfree(sit_i->sentries);
kvfree(sit_i->sec_entries);
@@ -4531,7 +5242,7 @@
kvfree(sit_i->sit_bitmap_mir);
kvfree(sit_i->invalid_segmap);
#endif
- kvfree(sit_i);
+ kfree(sit_i);
}
void f2fs_destroy_segment_manager(struct f2fs_sb_info *sbi)
@@ -4547,27 +5258,27 @@
destroy_free_segmap(sbi);
destroy_sit_info(sbi);
sbi->sm_info = NULL;
- kvfree(sm_info);
+ kfree(sm_info);
}
int __init f2fs_create_segment_manager_caches(void)
{
- discard_entry_slab = f2fs_kmem_cache_create("discard_entry",
+ discard_entry_slab = f2fs_kmem_cache_create("f2fs_discard_entry",
sizeof(struct discard_entry));
if (!discard_entry_slab)
goto fail;
- discard_cmd_slab = f2fs_kmem_cache_create("discard_cmd",
+ discard_cmd_slab = f2fs_kmem_cache_create("f2fs_discard_cmd",
sizeof(struct discard_cmd));
if (!discard_cmd_slab)
goto destroy_discard_entry;
- sit_entry_set_slab = f2fs_kmem_cache_create("sit_entry_set",
+ sit_entry_set_slab = f2fs_kmem_cache_create("f2fs_sit_entry_set",
sizeof(struct sit_entry_set));
if (!sit_entry_set_slab)
goto destroy_discard_cmd;
- inmem_entry_slab = f2fs_kmem_cache_create("inmem_page_entry",
+ inmem_entry_slab = f2fs_kmem_cache_create("f2fs_inmem_page_entry",
sizeof(struct inmem_pages));
if (!inmem_entry_slab)
goto destroy_sit_entry_set;
diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
index 2034b9a..beef833 100644
--- a/fs/f2fs/segment.h
+++ b/fs/f2fs/segment.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* fs/f2fs/segment.h
*
@@ -16,13 +16,20 @@
#define DEF_MAX_RECLAIM_PREFREE_SEGMENTS 4096 /* 8GB in maximum */
#define F2FS_MIN_SEGMENTS 9 /* SB + 2 (CP + SIT + NAT) + SSA + MAIN */
+#define F2FS_MIN_META_SEGMENTS 8 /* SB + 2 (CP + SIT + NAT) + SSA */
/* L: Logical segment # in volume, R: Relative segment # in main area */
#define GET_L2R_SEGNO(free_i, segno) ((segno) - (free_i)->start_segno)
#define GET_R2L_SEGNO(free_i, segno) ((segno) + (free_i)->start_segno)
#define IS_DATASEG(t) ((t) <= CURSEG_COLD_DATA)
-#define IS_NODESEG(t) ((t) >= CURSEG_HOT_NODE)
+#define IS_NODESEG(t) ((t) >= CURSEG_HOT_NODE && (t) <= CURSEG_COLD_NODE)
+
+static inline void sanity_check_seg_type(struct f2fs_sb_info *sbi,
+ unsigned short seg_type)
+{
+ f2fs_bug_on(sbi, seg_type >= NR_PERSISTENT_LOG);
+}
#define IS_HOT(t) ((t) == CURSEG_HOT_NODE || (t) == CURSEG_HOT_DATA)
#define IS_WARM(t) ((t) == CURSEG_WARM_NODE || (t) == CURSEG_WARM_DATA)
@@ -34,7 +41,9 @@
((seg) == CURSEG_I(sbi, CURSEG_COLD_DATA)->segno) || \
((seg) == CURSEG_I(sbi, CURSEG_HOT_NODE)->segno) || \
((seg) == CURSEG_I(sbi, CURSEG_WARM_NODE)->segno) || \
- ((seg) == CURSEG_I(sbi, CURSEG_COLD_NODE)->segno))
+ ((seg) == CURSEG_I(sbi, CURSEG_COLD_NODE)->segno) || \
+ ((seg) == CURSEG_I(sbi, CURSEG_COLD_DATA_PINNED)->segno) || \
+ ((seg) == CURSEG_I(sbi, CURSEG_ALL_DATA_ATGC)->segno))
#define IS_CURSEC(sbi, secno) \
(((secno) == CURSEG_I(sbi, CURSEG_HOT_DATA)->segno / \
@@ -48,7 +57,11 @@
((secno) == CURSEG_I(sbi, CURSEG_WARM_NODE)->segno / \
(sbi)->segs_per_sec) || \
((secno) == CURSEG_I(sbi, CURSEG_COLD_NODE)->segno / \
- (sbi)->segs_per_sec)) \
+ (sbi)->segs_per_sec) || \
+ ((secno) == CURSEG_I(sbi, CURSEG_COLD_DATA_PINNED)->segno / \
+ (sbi)->segs_per_sec) || \
+ ((secno) == CURSEG_I(sbi, CURSEG_ALL_DATA_ATGC)->segno / \
+ (sbi)->segs_per_sec))
#define MAIN_BLKADDR(sbi) \
(SM_I(sbi) ? SM_I(sbi)->main_blkaddr : \
@@ -132,20 +145,25 @@
* In the victim_sel_policy->alloc_mode, there are two block allocation modes.
* LFS writes data sequentially with cleaning operations.
* SSR (Slack Space Recycle) reuses obsolete space without cleaning operations.
+ * AT_SSR (Age Threshold based Slack Space Recycle) merges fragments into
+ * fragmented segment which has similar aging degree.
*/
enum {
LFS = 0,
- SSR
+ SSR,
+ AT_SSR,
};
/*
* In the victim_sel_policy->gc_mode, there are two gc, aka cleaning, modes.
* GC_CB is based on cost-benefit algorithm.
* GC_GREEDY is based on greedy algorithm.
+ * GC_AT is based on age-threshold algorithm.
*/
enum {
GC_CB = 0,
GC_GREEDY,
+ GC_AT,
ALLOC_NEXT,
FLUSH_DEVICE,
MAX_GC_POLICY,
@@ -166,12 +184,18 @@
struct victim_sel_policy {
int alloc_mode; /* LFS or SSR */
int gc_mode; /* GC_CB or GC_GREEDY */
- unsigned long *dirty_segmap; /* dirty segment bitmap */
- unsigned int max_search; /* maximum # of segments to search */
+ unsigned long *dirty_bitmap; /* dirty segment/section bitmap */
+ unsigned int max_search; /*
+ * maximum # of segments/sections
+ * to search
+ */
unsigned int offset; /* last scanned bitmap offset */
unsigned int ofs_unit; /* bitmap search unit */
unsigned int min_cost; /* minimum cost */
+ unsigned long long oldest_age; /* oldest age of segments having the same min cost */
unsigned int min_segno; /* segment # having min. cost */
+ unsigned long long age; /* mtime of GCed section*/
+ unsigned long long age_threshold;/* age threshold */
};
struct seg_entry {
@@ -184,7 +208,7 @@
unsigned char *cur_valid_map_mir; /* mirror of current valid bitmap */
#endif
/*
- * # of valid blocks and the validity bitmap stored in the the last
+ * # of valid blocks and the validity bitmap stored in the last
* checkpoint pack. This information is used by the SSR mode.
*/
unsigned char *ckpt_valid_map; /* validity bitmap of blocks last cp */
@@ -200,18 +224,6 @@
void (*allocate_segment)(struct f2fs_sb_info *, int, bool);
};
-/*
- * this value is set in page as a private data which indicate that
- * the page is atomically written, and it is in inmem_pages list.
- */
-#define ATOMIC_WRITTEN_PAGE ((unsigned long)-1)
-#define DUMMY_WRITTEN_PAGE ((unsigned long)-2)
-
-#define IS_ATOMIC_WRITTEN_PAGE(page) \
- (page_private(page) == (unsigned long)ATOMIC_WRITTEN_PAGE)
-#define IS_DUMMY_WRITTEN_PAGE(page) \
- (page_private(page) == (unsigned long)DUMMY_WRITTEN_PAGE)
-
#define MAX_SKIP_GC_COUNT 16
struct inmem_pages {
@@ -249,6 +261,8 @@
unsigned long long mounted_time; /* mount time */
unsigned long long min_mtime; /* min. modification time */
unsigned long long max_mtime; /* max. modification time */
+ unsigned long long dirty_min_mtime; /* rerange candidates in GC_AT */
+ unsigned long long dirty_max_mtime; /* rerange candidates in GC_AT */
unsigned int last_victim[MAX_GC_POLICY]; /* last victim segment # */
};
@@ -278,6 +292,7 @@
struct dirty_seglist_info {
const struct victim_selection *v_ops; /* victim selction operation */
unsigned long *dirty_segmap[NR_DIRTY_TYPE];
+ unsigned long *dirty_secmap;
struct mutex seglist_lock; /* lock for segment bitmaps */
int nr_dirty[NR_DIRTY_TYPE]; /* # of dirty segments */
unsigned long *victim_secmap; /* background GC victims */
@@ -286,7 +301,7 @@
/* victim selection function for cleaning and SSR */
struct victim_selection {
int (*get_victim)(struct f2fs_sb_info *, unsigned int *,
- int, int, char);
+ int, int, char, unsigned long long);
};
/* for active log information */
@@ -296,10 +311,12 @@
struct rw_semaphore journal_rwsem; /* protect journal area */
struct f2fs_journal *journal; /* cached journal info */
unsigned char alloc_type; /* current allocation type */
+ unsigned short seg_type; /* segment type like CURSEG_XXX_TYPE */
unsigned int segno; /* current segment number */
unsigned short next_blkoff; /* next block offset to write */
unsigned int zone; /* current zone number */
unsigned int next_segno; /* preallocated segment */
+ bool inited; /* indicate inmem log is inited */
};
struct sit_entry_set {
@@ -344,8 +361,20 @@
}
static inline unsigned int get_ckpt_valid_blocks(struct f2fs_sb_info *sbi,
- unsigned int segno)
+ unsigned int segno, bool use_section)
{
+ if (use_section && __is_large_section(sbi)) {
+ unsigned int start_segno = START_SEGNO(segno);
+ unsigned int blocks = 0;
+ int i;
+
+ for (i = 0; i < sbi->segs_per_sec; i++, start_segno++) {
+ struct seg_entry *se = get_seg_entry(sbi, start_segno);
+
+ blocks += se->ckpt_valid_blocks;
+ }
+ return blocks;
+ }
return get_seg_entry(sbi, segno)->ckpt_valid_blocks;
}
@@ -417,6 +446,7 @@
unsigned int secno = GET_SEC_FROM_SEG(sbi, segno);
unsigned int start_segno = GET_SEG_FROM_SEC(sbi, secno);
unsigned int next;
+ unsigned int usable_segs = f2fs_usable_segs_in_sec(sbi, segno);
spin_lock(&free_i->segmap_lock);
clear_bit(segno, free_i->free_segmap);
@@ -424,7 +454,7 @@
next = find_next_bit(free_i->free_segmap,
start_segno + sbi->segs_per_sec, start_segno);
- if (next >= start_segno + sbi->segs_per_sec) {
+ if (next >= start_segno + usable_segs) {
clear_bit(secno, free_i->free_secmap);
free_i->free_sections++;
}
@@ -444,22 +474,23 @@
}
static inline void __set_test_and_free(struct f2fs_sb_info *sbi,
- unsigned int segno)
+ unsigned int segno, bool inmem)
{
struct free_segmap_info *free_i = FREE_I(sbi);
unsigned int secno = GET_SEC_FROM_SEG(sbi, segno);
unsigned int start_segno = GET_SEG_FROM_SEC(sbi, secno);
unsigned int next;
+ unsigned int usable_segs = f2fs_usable_segs_in_sec(sbi, segno);
spin_lock(&free_i->segmap_lock);
if (test_and_clear_bit(segno, free_i->free_segmap)) {
free_i->free_segments++;
- if (IS_CURSEC(sbi, secno))
+ if (!inmem && IS_CURSEC(sbi, secno))
goto skip_free;
next = find_next_bit(free_i->free_segmap,
start_segno + sbi->segs_per_sec, start_segno);
- if (next >= start_segno + sbi->segs_per_sec) {
+ if (next >= start_segno + usable_segs) {
if (test_and_clear_bit(secno, free_i->free_secmap))
free_i->free_sections++;
}
@@ -506,9 +537,10 @@
return FREE_I(sbi)->free_segments;
}
-static inline int reserved_segments(struct f2fs_sb_info *sbi)
+static inline unsigned int reserved_segments(struct f2fs_sb_info *sbi)
{
- return SM_I(sbi)->reserved_segments;
+ return SM_I(sbi)->reserved_segments +
+ SM_I(sbi)->additional_reserved_segments;
}
static inline unsigned int free_sections(struct f2fs_sb_info *sbi)
@@ -538,7 +570,7 @@
static inline int reserved_sections(struct f2fs_sb_info *sbi)
{
- return GET_SEC_FROM_SEG(sbi, (unsigned int)reserved_segments(sbi));
+ return GET_SEC_FROM_SEG(sbi, reserved_segments(sbi));
}
static inline bool has_curseg_enough_space(struct f2fs_sb_info *sbi)
@@ -552,8 +584,8 @@
/* check current node segment */
for (i = CURSEG_HOT_NODE; i <= CURSEG_COLD_NODE; i++) {
segno = CURSEG_I(sbi, i)->segno;
- left_blocks = sbi->blocks_per_seg -
- get_seg_entry(sbi, segno)->ckpt_valid_blocks;
+ left_blocks = f2fs_usable_blks_in_seg(sbi, segno) -
+ get_seg_entry(sbi, segno)->ckpt_valid_blocks;
if (node_blocks > left_blocks)
return false;
@@ -561,7 +593,7 @@
/* check current data segment */
segno = CURSEG_I(sbi, CURSEG_HOT_DATA)->segno;
- left_blocks = sbi->blocks_per_seg -
+ left_blocks = f2fs_usable_blks_in_seg(sbi, segno) -
get_seg_entry(sbi, segno)->ckpt_valid_blocks;
if (dent_blocks > left_blocks)
return false;
@@ -617,8 +649,10 @@
* threashold,
* F2FS_IPU_FSYNC - activated in fsync path only for high performance flash
* storages. IPU will be triggered only if the # of dirty
- * pages over min_fsync_blocks.
- * F2FS_IPUT_DISABLE - disable IPU. (=default option)
+ * pages over min_fsync_blocks. (=default option)
+ * F2FS_IPU_ASYNC - do IPU given by asynchronous write requests.
+ * F2FS_IPU_NOCACHE - disable IPU bio cache.
+ * F2FS_IPUT_DISABLE - disable IPU. (=default option in LFS mode)
*/
#define DEF_MIN_IPU_UTIL 70
#define DEF_MIN_FSYNC_BLOCKS 8
@@ -633,6 +667,7 @@
F2FS_IPU_SSR_UTIL,
F2FS_IPU_FSYNC,
F2FS_IPU_ASYNC,
+ F2FS_IPU_NOCACHE,
};
static inline unsigned int curseg_segno(struct f2fs_sb_info *sbi,
@@ -680,21 +715,22 @@
bool is_valid = test_bit_le(0, raw_sit->valid_map) ? true : false;
int valid_blocks = 0;
int cur_pos = 0, next_pos;
+ unsigned int usable_blks_per_seg = f2fs_usable_blks_in_seg(sbi, segno);
/* check bitmap with valid block count */
do {
if (is_valid) {
next_pos = find_next_zero_bit_le(&raw_sit->valid_map,
- sbi->blocks_per_seg,
+ usable_blks_per_seg,
cur_pos);
valid_blocks += next_pos - cur_pos;
} else
next_pos = find_next_bit_le(&raw_sit->valid_map,
- sbi->blocks_per_seg,
+ usable_blks_per_seg,
cur_pos);
cur_pos = next_pos;
is_valid = !is_valid;
- } while (cur_pos < sbi->blocks_per_seg);
+ } while (cur_pos < usable_blks_per_seg);
if (unlikely(GET_SIT_VBLOCKS(raw_sit) != valid_blocks)) {
f2fs_err(sbi, "Mismatch valid blocks %d vs. %d",
@@ -703,8 +739,13 @@
return -EFSCORRUPTED;
}
+ if (usable_blks_per_seg < sbi->blocks_per_seg)
+ f2fs_bug_on(sbi, find_next_bit_le(&raw_sit->valid_map,
+ sbi->blocks_per_seg,
+ usable_blks_per_seg) != sbi->blocks_per_seg);
+
/* check segment usage, and check boundary of a given segment number */
- if (unlikely(GET_SIT_VBLOCKS(raw_sit) > sbi->blocks_per_seg
+ if (unlikely(GET_SIT_VBLOCKS(raw_sit) > usable_blks_per_seg
|| segno > TOTAL_SEGS(sbi) - 1)) {
f2fs_err(sbi, "Wrong valid blocks %d or segno %u",
GET_SIT_VBLOCKS(raw_sit), segno);
@@ -763,7 +804,7 @@
bool base_time)
{
struct sit_info *sit_i = SIT_I(sbi);
- time64_t diff, now = ktime_get_real_seconds();
+ time64_t diff, now = ktime_get_boottime_seconds();
if (now >= sit_i->mounted_time)
return sit_i->elapsed_time + now - sit_i->mounted_time;
diff --git a/fs/f2fs/shrinker.c b/fs/f2fs/shrinker.c
index 3ceebaa..dd3c3c7 100644
--- a/fs/f2fs/shrinker.c
+++ b/fs/f2fs/shrinker.c
@@ -56,7 +56,7 @@
/* count extent cache entries */
count += __count_extent_cache(sbi);
- /* shrink clean nat cache entries */
+ /* count clean nat cache entries */
count += __count_nat_entries(sbi);
/* count free nids cache entries */
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 41bf656..af98abb 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -24,6 +24,7 @@
#include <linux/sysfs.h>
#include <linux/quota.h>
#include <linux/unicode.h>
+#include <linux/part_stat.h>
#include "f2fs.h"
#include "node.h"
@@ -137,10 +138,15 @@
Opt_alloc,
Opt_fsync,
Opt_test_dummy_encryption,
+ Opt_inlinecrypt,
Opt_checkpoint_disable,
Opt_checkpoint_disable_cap,
Opt_checkpoint_disable_cap_perc,
Opt_checkpoint_enable,
+ Opt_compress_algorithm,
+ Opt_compress_log_size,
+ Opt_compress_extension,
+ Opt_atgc,
Opt_err,
};
@@ -198,11 +204,17 @@
{Opt_whint, "whint_mode=%s"},
{Opt_alloc, "alloc_mode=%s"},
{Opt_fsync, "fsync_mode=%s"},
+ {Opt_test_dummy_encryption, "test_dummy_encryption=%s"},
{Opt_test_dummy_encryption, "test_dummy_encryption"},
+ {Opt_inlinecrypt, "inlinecrypt"},
{Opt_checkpoint_disable, "checkpoint=disable"},
{Opt_checkpoint_disable_cap, "checkpoint=disable:%u"},
{Opt_checkpoint_disable_cap_perc, "checkpoint=disable:%u%%"},
{Opt_checkpoint_enable, "checkpoint=enable"},
+ {Opt_compress_algorithm, "compress_algorithm=%s"},
+ {Opt_compress_log_size, "compress_log_size=%u"},
+ {Opt_compress_extension, "compress_extension=%s"},
+ {Opt_atgc, "atgc"},
{Opt_err, NULL},
};
@@ -277,6 +289,46 @@
F2FS_OPTION(sbi).s_resgid));
}
+static inline int adjust_reserved_segment(struct f2fs_sb_info *sbi)
+{
+ unsigned int sec_blks = sbi->blocks_per_seg * sbi->segs_per_sec;
+ unsigned int avg_vblocks;
+ unsigned int wanted_reserved_segments;
+ block_t avail_user_block_count;
+
+ if (!F2FS_IO_ALIGNED(sbi))
+ return 0;
+
+ /* average valid block count in section in worst case */
+ avg_vblocks = sec_blks / F2FS_IO_SIZE(sbi);
+
+ /*
+ * we need enough free space when migrating one section in worst case
+ */
+ wanted_reserved_segments = (F2FS_IO_SIZE(sbi) / avg_vblocks) *
+ reserved_segments(sbi);
+ wanted_reserved_segments -= reserved_segments(sbi);
+
+ avail_user_block_count = sbi->user_block_count -
+ sbi->current_reserved_blocks -
+ F2FS_OPTION(sbi).root_reserved_blocks;
+
+ if (wanted_reserved_segments * sbi->blocks_per_seg >
+ avail_user_block_count) {
+ f2fs_err(sbi, "IO align feature can't grab additional reserved segment: %u, available segments: %u",
+ wanted_reserved_segments,
+ avail_user_block_count >> sbi->log_blocks_per_seg);
+ return -ENOSPC;
+ }
+
+ SM_I(sbi)->additional_reserved_segments = wanted_reserved_segments;
+
+ f2fs_info(sbi, "IO align feature needs additional reserved segment: %u",
+ wanted_reserved_segments);
+
+ return 0;
+}
+
static inline void adjust_unusable_cap_perc(struct f2fs_sb_info *sbi)
{
if (!F2FS_OPTION(sbi).unusable_cap_perc)
@@ -340,7 +392,7 @@
set_opt(sbi, QUOTA);
return 0;
errout:
- kvfree(qname);
+ kfree(qname);
return ret;
}
@@ -352,7 +404,7 @@
f2fs_err(sbi, "Cannot change journaled quota options when quota turned on");
return -EINVAL;
}
- kvfree(F2FS_OPTION(sbi).s_qf_names[qtype]);
+ kfree(F2FS_OPTION(sbi).s_qf_names[qtype]);
F2FS_OPTION(sbi).s_qf_names[qtype] = NULL;
return 0;
}
@@ -403,17 +455,64 @@
}
#endif
-static int parse_options(struct super_block *sb, char *options)
+static int f2fs_set_test_dummy_encryption(struct super_block *sb,
+ const char *opt,
+ const substring_t *arg,
+ bool is_remount)
+{
+ struct f2fs_sb_info *sbi = F2FS_SB(sb);
+#ifdef CONFIG_FS_ENCRYPTION
+ int err;
+
+ if (!f2fs_sb_has_encrypt(sbi)) {
+ f2fs_err(sbi, "Encrypt feature is off");
+ return -EINVAL;
+ }
+
+ /*
+ * This mount option is just for testing, and it's not worthwhile to
+ * implement the extra complexity (e.g. RCU protection) that would be
+ * needed to allow it to be set or changed during remount. We do allow
+ * it to be specified during remount, but only if there is no change.
+ */
+ if (is_remount && !F2FS_OPTION(sbi).dummy_enc_policy.policy) {
+ f2fs_warn(sbi, "Can't set test_dummy_encryption on remount");
+ return -EINVAL;
+ }
+ err = fscrypt_set_test_dummy_encryption(
+ sb, arg->from, &F2FS_OPTION(sbi).dummy_enc_policy);
+ if (err) {
+ if (err == -EEXIST)
+ f2fs_warn(sbi,
+ "Can't change test_dummy_encryption on remount");
+ else if (err == -EINVAL)
+ f2fs_warn(sbi, "Value of option \"%s\" is unrecognized",
+ opt);
+ else
+ f2fs_warn(sbi, "Error processing option \"%s\" [%d]",
+ opt, err);
+ return -EINVAL;
+ }
+ f2fs_warn(sbi, "Test dummy encryption mode enabled");
+#else
+ f2fs_warn(sbi, "Test dummy encryption mount option ignored");
+#endif
+ return 0;
+}
+
+static int parse_options(struct super_block *sb, char *options, bool is_remount)
{
struct f2fs_sb_info *sbi = F2FS_SB(sb);
substring_t args[MAX_OPT_ARGS];
+#ifdef CONFIG_F2FS_FS_COMPRESSION
+ unsigned char (*ext)[F2FS_EXTENSION_LEN];
+ int ext_cnt;
+#endif
char *p, *name;
int arg = 0;
kuid_t uid;
kgid_t gid;
-#ifdef CONFIG_QUOTA
int ret;
-#endif
if (!options)
return 0;
@@ -435,20 +534,17 @@
if (!name)
return -ENOMEM;
- if (strlen(name) == 2 && !strncmp(name, "on", 2)) {
- set_opt(sbi, BG_GC);
- clear_opt(sbi, FORCE_FG_GC);
- } else if (strlen(name) == 3 && !strncmp(name, "off", 3)) {
- clear_opt(sbi, BG_GC);
- clear_opt(sbi, FORCE_FG_GC);
- } else if (strlen(name) == 4 && !strncmp(name, "sync", 4)) {
- set_opt(sbi, BG_GC);
- set_opt(sbi, FORCE_FG_GC);
+ if (!strcmp(name, "on")) {
+ F2FS_OPTION(sbi).bggc_mode = BGGC_MODE_ON;
+ } else if (!strcmp(name, "off")) {
+ F2FS_OPTION(sbi).bggc_mode = BGGC_MODE_OFF;
+ } else if (!strcmp(name, "sync")) {
+ F2FS_OPTION(sbi).bggc_mode = BGGC_MODE_SYNC;
} else {
- kvfree(name);
+ kfree(name);
return -EINVAL;
}
- kvfree(name);
+ kfree(name);
break;
case Opt_disable_roll_forward:
set_opt(sbi, DISABLE_ROLL_FORWARD);
@@ -526,7 +622,8 @@
case Opt_active_logs:
if (args->from && match_int(args, &arg))
return -EINVAL;
- if (arg != 2 && arg != 4 && arg != NR_CURSEG_TYPE)
+ if (arg != 2 && arg != 4 &&
+ arg != NR_CURSEG_PERSIST_TYPE)
return -EINVAL;
F2FS_OPTION(sbi).active_logs = arg;
break;
@@ -602,22 +699,20 @@
if (!name)
return -ENOMEM;
- if (strlen(name) == 8 &&
- !strncmp(name, "adaptive", 8)) {
+ if (!strcmp(name, "adaptive")) {
if (f2fs_sb_has_blkzoned(sbi)) {
f2fs_warn(sbi, "adaptive mode is not allowed with zoned block device feature");
- kvfree(name);
+ kfree(name);
return -EINVAL;
}
- set_opt_mode(sbi, F2FS_MOUNT_ADAPTIVE);
- } else if (strlen(name) == 3 &&
- !strncmp(name, "lfs", 3)) {
- set_opt_mode(sbi, F2FS_MOUNT_LFS);
+ F2FS_OPTION(sbi).fs_mode = FS_MODE_ADAPTIVE;
+ } else if (!strcmp(name, "lfs")) {
+ F2FS_OPTION(sbi).fs_mode = FS_MODE_LFS;
} else {
- kvfree(name);
+ kfree(name);
return -EINVAL;
}
- kvfree(name);
+ kfree(name);
break;
case Opt_io_size_bits:
if (args->from && match_int(args, &arg))
@@ -736,69 +831,61 @@
name = match_strdup(&args[0]);
if (!name)
return -ENOMEM;
- if (strlen(name) == 10 &&
- !strncmp(name, "user-based", 10)) {
+ if (!strcmp(name, "user-based")) {
F2FS_OPTION(sbi).whint_mode = WHINT_MODE_USER;
- } else if (strlen(name) == 3 &&
- !strncmp(name, "off", 3)) {
+ } else if (!strcmp(name, "off")) {
F2FS_OPTION(sbi).whint_mode = WHINT_MODE_OFF;
- } else if (strlen(name) == 8 &&
- !strncmp(name, "fs-based", 8)) {
+ } else if (!strcmp(name, "fs-based")) {
F2FS_OPTION(sbi).whint_mode = WHINT_MODE_FS;
} else {
- kvfree(name);
+ kfree(name);
return -EINVAL;
}
- kvfree(name);
+ kfree(name);
break;
case Opt_alloc:
name = match_strdup(&args[0]);
if (!name)
return -ENOMEM;
- if (strlen(name) == 7 &&
- !strncmp(name, "default", 7)) {
+ if (!strcmp(name, "default")) {
F2FS_OPTION(sbi).alloc_mode = ALLOC_MODE_DEFAULT;
- } else if (strlen(name) == 5 &&
- !strncmp(name, "reuse", 5)) {
+ } else if (!strcmp(name, "reuse")) {
F2FS_OPTION(sbi).alloc_mode = ALLOC_MODE_REUSE;
} else {
- kvfree(name);
+ kfree(name);
return -EINVAL;
}
- kvfree(name);
+ kfree(name);
break;
case Opt_fsync:
name = match_strdup(&args[0]);
if (!name)
return -ENOMEM;
- if (strlen(name) == 5 &&
- !strncmp(name, "posix", 5)) {
+ if (!strcmp(name, "posix")) {
F2FS_OPTION(sbi).fsync_mode = FSYNC_MODE_POSIX;
- } else if (strlen(name) == 6 &&
- !strncmp(name, "strict", 6)) {
+ } else if (!strcmp(name, "strict")) {
F2FS_OPTION(sbi).fsync_mode = FSYNC_MODE_STRICT;
- } else if (strlen(name) == 9 &&
- !strncmp(name, "nobarrier", 9)) {
+ } else if (!strcmp(name, "nobarrier")) {
F2FS_OPTION(sbi).fsync_mode =
FSYNC_MODE_NOBARRIER;
} else {
- kvfree(name);
+ kfree(name);
return -EINVAL;
}
- kvfree(name);
+ kfree(name);
break;
case Opt_test_dummy_encryption:
-#ifdef CONFIG_FS_ENCRYPTION
- if (!f2fs_sb_has_encrypt(sbi)) {
- f2fs_err(sbi, "Encrypt feature is off");
- return -EINVAL;
- }
-
- F2FS_OPTION(sbi).test_dummy_encryption = true;
- f2fs_info(sbi, "Test dummy encryption mode enabled");
+ ret = f2fs_set_test_dummy_encryption(sb, p, &args[0],
+ is_remount);
+ if (ret)
+ return ret;
+ break;
+ case Opt_inlinecrypt:
+#ifdef CONFIG_FS_ENCRYPTION_INLINE_CRYPT
+ sb->s_flags |= SB_INLINECRYPT;
#else
- f2fs_info(sbi, "Test dummy encryption mount option ignored");
+ f2fs_info(sbi, "inline encryption not supported");
#endif
break;
case Opt_checkpoint_disable_cap_perc:
@@ -821,6 +908,82 @@
case Opt_checkpoint_enable:
clear_opt(sbi, DISABLE_CHECKPOINT);
break;
+#ifdef CONFIG_F2FS_FS_COMPRESSION
+ case Opt_compress_algorithm:
+ if (!f2fs_sb_has_compression(sbi)) {
+ f2fs_info(sbi, "Image doesn't support compression");
+ break;
+ }
+ name = match_strdup(&args[0]);
+ if (!name)
+ return -ENOMEM;
+ if (!strcmp(name, "lzo")) {
+ F2FS_OPTION(sbi).compress_algorithm =
+ COMPRESS_LZO;
+ } else if (!strcmp(name, "lz4")) {
+ F2FS_OPTION(sbi).compress_algorithm =
+ COMPRESS_LZ4;
+ } else if (!strcmp(name, "zstd")) {
+ F2FS_OPTION(sbi).compress_algorithm =
+ COMPRESS_ZSTD;
+ } else if (!strcmp(name, "lzo-rle")) {
+ F2FS_OPTION(sbi).compress_algorithm =
+ COMPRESS_LZORLE;
+ } else {
+ kfree(name);
+ return -EINVAL;
+ }
+ kfree(name);
+ break;
+ case Opt_compress_log_size:
+ if (!f2fs_sb_has_compression(sbi)) {
+ f2fs_info(sbi, "Image doesn't support compression");
+ break;
+ }
+ if (args->from && match_int(args, &arg))
+ return -EINVAL;
+ if (arg < MIN_COMPRESS_LOG_SIZE ||
+ arg > MAX_COMPRESS_LOG_SIZE) {
+ f2fs_err(sbi,
+ "Compress cluster log size is out of range");
+ return -EINVAL;
+ }
+ F2FS_OPTION(sbi).compress_log_size = arg;
+ break;
+ case Opt_compress_extension:
+ if (!f2fs_sb_has_compression(sbi)) {
+ f2fs_info(sbi, "Image doesn't support compression");
+ break;
+ }
+ name = match_strdup(&args[0]);
+ if (!name)
+ return -ENOMEM;
+
+ ext = F2FS_OPTION(sbi).extensions;
+ ext_cnt = F2FS_OPTION(sbi).compress_ext_cnt;
+
+ if (strlen(name) >= F2FS_EXTENSION_LEN ||
+ ext_cnt >= COMPRESS_EXT_NUM) {
+ f2fs_err(sbi,
+ "invalid extension length/number");
+ kfree(name);
+ return -EINVAL;
+ }
+
+ strcpy(ext[ext_cnt], name);
+ F2FS_OPTION(sbi).compress_ext_cnt++;
+ kfree(name);
+ break;
+#else
+ case Opt_compress_algorithm:
+ case Opt_compress_log_size:
+ case Opt_compress_extension:
+ f2fs_info(sbi, "compression options not supported");
+ break;
+#endif
+ case Opt_atgc:
+ set_opt(sbi, ATGC);
+ break;
default:
f2fs_err(sbi, "Unrecognized mount option \"%s\" or missing value",
p);
@@ -847,8 +1010,19 @@
return -EINVAL;
}
#endif
+ /*
+ * The BLKZONED feature indicates that the drive was formatted with
+ * zone alignment optimization. This is optional for host-aware
+ * devices, but mandatory for host-managed zoned block devices.
+ */
+#ifndef CONFIG_BLK_DEV_ZONED
+ if (f2fs_sb_has_blkzoned(sbi)) {
+ f2fs_err(sbi, "Zoned block device support is not enabled");
+ return -EINVAL;
+ }
+#endif
- if (F2FS_IO_SIZE_BITS(sbi) && !test_opt(sbi, LFS)) {
+ if (F2FS_IO_SIZE_BITS(sbi) && !f2fs_lfs_mode(sbi)) {
f2fs_err(sbi, "Should set mode=lfs with %uKB-sized IO",
F2FS_IO_SIZE_KB(sbi));
return -EINVAL;
@@ -878,15 +1052,15 @@
}
}
- if (test_opt(sbi, DISABLE_CHECKPOINT) && test_opt(sbi, LFS)) {
+ if (test_opt(sbi, DISABLE_CHECKPOINT) && f2fs_lfs_mode(sbi)) {
f2fs_err(sbi, "LFS not compatible with checkpoint=disable\n");
return -EINVAL;
}
/* Not pass down write hints if the number of active logs is lesser
- * than NR_CURSEG_TYPE.
+ * than NR_CURSEG_PERSIST_TYPE.
*/
- if (F2FS_OPTION(sbi).active_logs != NR_CURSEG_TYPE)
+ if (F2FS_OPTION(sbi).active_logs != NR_CURSEG_PERSIST_TYPE)
F2FS_OPTION(sbi).whint_mode = WHINT_MODE_OFF;
return 0;
}
@@ -903,7 +1077,9 @@
/* Initialize f2fs-specific inode info */
atomic_set(&fi->dirty_pages, 0);
+ atomic_set(&fi->i_compr_blocks, 0);
init_rwsem(&fi->i_sem);
+ spin_lock_init(&fi->i_size_lock);
INIT_LIST_HEAD(&fi->dirty_list);
INIT_LIST_HEAD(&fi->gdirty_list);
INIT_LIST_HEAD(&fi->inmem_ilist);
@@ -917,6 +1093,8 @@
/* Will be used by directory only */
fi->i_dir_level = F2FS_SB(sb)->dir_level;
+ fi->ra_offset = -1;
+
return &fi->vfs_inode;
}
@@ -1064,6 +1242,7 @@
blkdev_put(FDEV(i).bdev, FMODE_EXCL);
#ifdef CONFIG_BLK_DEV_ZONED
kvfree(FDEV(i).blkz_seq);
+ kfree(FDEV(i).zone_capacity_blocks);
#endif
}
kvfree(sbi->devs);
@@ -1139,26 +1318,31 @@
f2fs_destroy_node_manager(sbi);
f2fs_destroy_segment_manager(sbi);
+ f2fs_destroy_post_read_wq(sbi);
+
kvfree(sbi->ckpt);
sb->s_fs_info = NULL;
if (sbi->s_chksum_driver)
crypto_free_shash(sbi->s_chksum_driver);
- kvfree(sbi->raw_super);
+ kfree(sbi->raw_super);
destroy_device_list(sbi);
+ f2fs_destroy_page_array_cache(sbi);
+ f2fs_destroy_xattr_caches(sbi);
mempool_destroy(sbi->write_io_dummy);
#ifdef CONFIG_QUOTA
for (i = 0; i < MAXQUOTAS; i++)
- kvfree(F2FS_OPTION(sbi).s_qf_names[i]);
+ kfree(F2FS_OPTION(sbi).s_qf_names[i]);
#endif
+ fscrypt_free_dummy_policy(&F2FS_OPTION(sbi).dummy_enc_policy);
destroy_percpu_info(sbi);
for (i = 0; i < NR_PAGE_TYPE; i++)
kvfree(sbi->write_io[i]);
#ifdef CONFIG_UNICODE
- utf8_unload(sbi->s_encoding);
+ utf8_unload(sb->s_encoding);
#endif
- kvfree(sbi);
+ kfree(sbi);
}
int f2fs_sync_fs(struct super_block *sb, int sync)
@@ -1181,9 +1365,9 @@
cpc.reason = __get_cp_reason(sbi);
- mutex_lock(&sbi->gc_mutex);
+ down_write(&sbi->gc_lock);
err = f2fs_write_checkpoint(sbi, &cpc);
- mutex_unlock(&sbi->gc_mutex);
+ up_write(&sbi->gc_lock);
}
f2fs_trace_ios(NULL, 1);
@@ -1298,8 +1482,7 @@
}
buf->f_namelen = F2FS_NAME_LEN;
- buf->f_fsid.val[0] = (u32)id;
- buf->f_fsid.val[1] = (u32)(id >> 32);
+ buf->f_fsid = u64_to_fsid(id);
#ifdef CONFIG_QUOTA
if (is_inode_flag_set(dentry->d_inode, FI_PROJ_INHERIT) &&
@@ -1347,18 +1530,52 @@
#endif
}
+static inline void f2fs_show_compress_options(struct seq_file *seq,
+ struct super_block *sb)
+{
+ struct f2fs_sb_info *sbi = F2FS_SB(sb);
+ char *algtype = "";
+ int i;
+
+ if (!f2fs_sb_has_compression(sbi))
+ return;
+
+ switch (F2FS_OPTION(sbi).compress_algorithm) {
+ case COMPRESS_LZO:
+ algtype = "lzo";
+ break;
+ case COMPRESS_LZ4:
+ algtype = "lz4";
+ break;
+ case COMPRESS_ZSTD:
+ algtype = "zstd";
+ break;
+ case COMPRESS_LZORLE:
+ algtype = "lzo-rle";
+ break;
+ }
+ seq_printf(seq, ",compress_algorithm=%s", algtype);
+
+ seq_printf(seq, ",compress_log_size=%u",
+ F2FS_OPTION(sbi).compress_log_size);
+
+ for (i = 0; i < F2FS_OPTION(sbi).compress_ext_cnt; i++) {
+ seq_printf(seq, ",compress_extension=%s",
+ F2FS_OPTION(sbi).extensions[i]);
+ }
+}
+
static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
{
struct f2fs_sb_info *sbi = F2FS_SB(root->d_sb);
- if (!f2fs_readonly(sbi->sb) && test_opt(sbi, BG_GC)) {
- if (test_opt(sbi, FORCE_FG_GC))
- seq_printf(seq, ",background_gc=%s", "sync");
- else
- seq_printf(seq, ",background_gc=%s", "on");
- } else {
+ if (F2FS_OPTION(sbi).bggc_mode == BGGC_MODE_SYNC)
+ seq_printf(seq, ",background_gc=%s", "sync");
+ else if (F2FS_OPTION(sbi).bggc_mode == BGGC_MODE_ON)
+ seq_printf(seq, ",background_gc=%s", "on");
+ else if (F2FS_OPTION(sbi).bggc_mode == BGGC_MODE_OFF)
seq_printf(seq, ",background_gc=%s", "off");
- }
+
if (test_opt(sbi, DISABLE_ROLL_FORWARD))
seq_puts(seq, ",disable_roll_forward");
if (test_opt(sbi, NORECOVERY))
@@ -1414,9 +1631,9 @@
seq_puts(seq, ",data_flush");
seq_puts(seq, ",mode=");
- if (test_opt(sbi, ADAPTIVE))
+ if (F2FS_OPTION(sbi).fs_mode == FS_MODE_ADAPTIVE)
seq_puts(seq, "adaptive");
- else if (test_opt(sbi, LFS))
+ else if (F2FS_OPTION(sbi).fs_mode == FS_MODE_LFS)
seq_puts(seq, "lfs");
seq_printf(seq, ",active_logs=%u", F2FS_OPTION(sbi).active_logs);
if (test_opt(sbi, RESERVE_ROOT))
@@ -1452,10 +1669,11 @@
seq_printf(seq, ",whint_mode=%s", "user-based");
else if (F2FS_OPTION(sbi).whint_mode == WHINT_MODE_FS)
seq_printf(seq, ",whint_mode=%s", "fs-based");
-#ifdef CONFIG_FS_ENCRYPTION
- if (F2FS_OPTION(sbi).test_dummy_encryption)
- seq_puts(seq, ",test_dummy_encryption");
-#endif
+
+ fscrypt_show_test_dummy_encryption(seq, ',', sbi->sb);
+
+ if (sbi->sb->s_flags & SB_INLINECRYPT)
+ seq_puts(seq, ",inlinecrypt");
if (F2FS_OPTION(sbi).alloc_mode == ALLOC_MODE_DEFAULT)
seq_printf(seq, ",alloc_mode=%s", "default");
@@ -1471,22 +1689,33 @@
seq_printf(seq, ",fsync_mode=%s", "strict");
else if (F2FS_OPTION(sbi).fsync_mode == FSYNC_MODE_NOBARRIER)
seq_printf(seq, ",fsync_mode=%s", "nobarrier");
+
+#ifdef CONFIG_F2FS_FS_COMPRESSION
+ f2fs_show_compress_options(seq, sbi->sb);
+#endif
+
+ if (test_opt(sbi, ATGC))
+ seq_puts(seq, ",atgc");
return 0;
}
static void default_options(struct f2fs_sb_info *sbi)
{
/* init some FS parameters */
- F2FS_OPTION(sbi).active_logs = NR_CURSEG_TYPE;
+ F2FS_OPTION(sbi).active_logs = NR_CURSEG_PERSIST_TYPE;
F2FS_OPTION(sbi).inline_xattr_size = DEFAULT_INLINE_XATTR_ADDRS;
F2FS_OPTION(sbi).whint_mode = WHINT_MODE_OFF;
F2FS_OPTION(sbi).alloc_mode = ALLOC_MODE_DEFAULT;
F2FS_OPTION(sbi).fsync_mode = FSYNC_MODE_POSIX;
- F2FS_OPTION(sbi).test_dummy_encryption = false;
F2FS_OPTION(sbi).s_resuid = make_kuid(&init_user_ns, F2FS_DEF_RESUID);
F2FS_OPTION(sbi).s_resgid = make_kgid(&init_user_ns, F2FS_DEF_RESGID);
+ F2FS_OPTION(sbi).compress_algorithm = COMPRESS_LZ4;
+ F2FS_OPTION(sbi).compress_log_size = MIN_COMPRESS_LOG_SIZE;
+ F2FS_OPTION(sbi).compress_ext_cnt = 0;
+ F2FS_OPTION(sbi).bggc_mode = BGGC_MODE_ON;
- set_opt(sbi, BG_GC);
+ sbi->sb->s_flags &= ~SB_INLINECRYPT;
+
set_opt(sbi, INLINE_XATTR);
set_opt(sbi, INLINE_DATA);
set_opt(sbi, INLINE_DENTRY);
@@ -1498,9 +1727,9 @@
set_opt(sbi, FLUSH_MERGE);
set_opt(sbi, DISCARD);
if (f2fs_sb_has_blkzoned(sbi))
- set_opt_mode(sbi, F2FS_MOUNT_LFS);
+ F2FS_OPTION(sbi).fs_mode = FS_MODE_LFS;
else
- set_opt_mode(sbi, F2FS_MOUNT_ADAPTIVE);
+ F2FS_OPTION(sbi).fs_mode = FS_MODE_ADAPTIVE;
#ifdef CONFIG_F2FS_FS_XATTR
set_opt(sbi, XATTR_USER);
@@ -1533,8 +1762,8 @@
f2fs_update_time(sbi, DISABLE_TIME);
while (!f2fs_time_over(sbi, DISABLE_TIME)) {
- mutex_lock(&sbi->gc_mutex);
- err = f2fs_gc(sbi, true, false, NULL_SEGNO);
+ down_write(&sbi->gc_lock);
+ err = f2fs_gc(sbi, true, false, false, NULL_SEGNO);
if (err == -ENODATA) {
err = 0;
break;
@@ -1555,7 +1784,7 @@
goto restore_flag;
}
- mutex_lock(&sbi->gc_mutex);
+ down_write(&sbi->gc_lock);
cpc.reason = CP_PAUSE;
set_sbi_flag(sbi, SBI_CP_DISABLED);
err = f2fs_write_checkpoint(sbi, &cpc);
@@ -1567,20 +1796,32 @@
spin_unlock(&sbi->stat_lock);
out_unlock:
- mutex_unlock(&sbi->gc_mutex);
+ up_write(&sbi->gc_lock);
restore_flag:
- sbi->sb->s_flags = s_flags; /* Restore MS_RDONLY status */
+ sbi->sb->s_flags = s_flags; /* Restore SB_RDONLY status */
return err;
}
static void f2fs_enable_checkpoint(struct f2fs_sb_info *sbi)
{
- mutex_lock(&sbi->gc_mutex);
+ int retry = DEFAULT_RETRY_IO_COUNT;
+
+ /* we should flush all the data to keep data consistency */
+ do {
+ sync_inodes_sb(sbi->sb);
+ cond_resched();
+ congestion_wait(BLK_RW_ASYNC, DEFAULT_IO_TIMEOUT);
+ } while (get_pages(sbi, F2FS_DIRTY_DATA) && retry--);
+
+ if (unlikely(retry < 0))
+ f2fs_warn(sbi, "checkpoint=enable has some unwritten data.");
+
+ down_write(&sbi->gc_lock);
f2fs_dirty_to_prefree(sbi);
clear_sbi_flag(sbi, SBI_CP_DISABLED);
set_sbi_flag(sbi, SBI_IS_DIRTY);
- mutex_unlock(&sbi->gc_mutex);
+ up_write(&sbi->gc_lock);
f2fs_sync_fs(sbi->sb, 1);
}
@@ -1596,6 +1837,7 @@
bool no_extent_cache = !test_opt(sbi, EXTENT_CACHE);
bool disable_checkpoint = test_opt(sbi, DISABLE_CHECKPOINT);
bool no_io_align = !F2FS_IO_ALIGNED(sbi);
+ bool no_atgc = !test_opt(sbi, ATGC);
bool checkpoint_changed;
#ifdef CONFIG_QUOTA
int i, j;
@@ -1617,7 +1859,7 @@
GFP_KERNEL);
if (!org_mount_opt.s_qf_names[i]) {
for (j = 0; j < i; j++)
- kvfree(org_mount_opt.s_qf_names[j]);
+ kfree(org_mount_opt.s_qf_names[j]);
return -ENOMEM;
}
} else {
@@ -1638,7 +1880,7 @@
default_options(sbi);
/* parse mount options */
- err = parse_options(sb, data);
+ err = parse_options(sb, data, true);
if (err)
goto restore_opts;
checkpoint_changed =
@@ -1668,6 +1910,13 @@
}
}
#endif
+ /* disallow enable atgc dynamically */
+ if (no_atgc == !!test_opt(sbi, ATGC)) {
+ err = -EINVAL;
+ f2fs_warn(sbi, "switch atgc option is not allowed");
+ goto restore_opts;
+ }
+
/* disallow enable/disable extent_cache dynamically */
if (no_extent_cache == !!test_opt(sbi, EXTENT_CACHE)) {
err = -EINVAL;
@@ -1692,7 +1941,8 @@
* or if background_gc = off is passed in mount
* option. Also sync the filesystem.
*/
- if ((*flags & SB_RDONLY) || !test_opt(sbi, BG_GC)) {
+ if ((*flags & SB_RDONLY) ||
+ F2FS_OPTION(sbi).bggc_mode == BGGC_MODE_OFF) {
if (sbi->gc_thread) {
f2fs_stop_gc_thread(sbi);
need_restart_gc = true;
@@ -1741,7 +1991,7 @@
#ifdef CONFIG_QUOTA
/* Release old quota file names */
for (i = 0; i < MAXQUOTAS; i++)
- kvfree(org_mount_opt.s_qf_names[i]);
+ kfree(org_mount_opt.s_qf_names[i]);
#endif
/* Update the POSIXACL Flag */
sb->s_flags = (sb->s_flags & ~SB_POSIXACL) |
@@ -1762,7 +2012,7 @@
#ifdef CONFIG_QUOTA
F2FS_OPTION(sbi).s_jquota_fmt = org_mount_opt.s_jquota_fmt;
for (i = 0; i < MAXQUOTAS; i++) {
- kvfree(F2FS_OPTION(sbi).s_qf_names[i]);
+ kfree(F2FS_OPTION(sbi).s_qf_names[i]);
F2FS_OPTION(sbi).s_qf_names[i] = org_mount_opt.s_qf_names[i];
}
#endif
@@ -1798,7 +2048,8 @@
page = read_cache_page_gfp(mapping, blkidx, GFP_NOFS);
if (IS_ERR(page)) {
if (PTR_ERR(page) == -ENOMEM) {
- congestion_wait(BLK_RW_ASYNC, HZ/50);
+ congestion_wait(BLK_RW_ASYNC,
+ DEFAULT_IO_TIMEOUT);
goto repeat;
}
set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR);
@@ -1853,7 +2104,8 @@
&page, &fsdata);
if (unlikely(err)) {
if (err == -ENOMEM) {
- congestion_wait(BLK_RW_ASYNC, HZ/50);
+ congestion_wait(BLK_RW_ASYNC,
+ DEFAULT_IO_TIMEOUT);
goto retry;
}
set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR);
@@ -1951,7 +2203,7 @@
/* Don't account quota for quota files to avoid recursion */
qf_inode->i_flags |= S_NOQUOTA;
- err = dquot_enable(qf_inode, type, format_id, flags);
+ err = dquot_load_quota_inode(qf_inode, type, format_id, flags);
iput(qf_inode);
return err;
}
@@ -2181,7 +2433,7 @@
struct f2fs_sb_info *sbi = F2FS_SB(dquot->dq_sb);
int ret;
- down_read(&sbi->quota_sem);
+ down_read_nested(&sbi->quota_sem, SINGLE_DEPTH_NESTING);
ret = dquot_commit(dquot);
if (ret < 0)
set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR);
@@ -2205,13 +2457,10 @@
static int f2fs_dquot_release(struct dquot *dquot)
{
struct f2fs_sb_info *sbi = F2FS_SB(dquot->dq_sb);
- int ret;
+ int ret = dquot_release(dquot);
- down_read(&sbi->quota_sem);
- ret = dquot_release(dquot);
if (ret < 0)
set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR);
- up_read(&sbi->quota_sem);
return ret;
}
@@ -2219,29 +2468,22 @@
{
struct super_block *sb = dquot->dq_sb;
struct f2fs_sb_info *sbi = F2FS_SB(sb);
- int ret;
-
- down_read(&sbi->quota_sem);
- ret = dquot_mark_dquot_dirty(dquot);
+ int ret = dquot_mark_dquot_dirty(dquot);
/* if we are using journalled quota */
if (is_journalled_quota(sbi))
set_sbi_flag(sbi, SBI_QUOTA_NEED_FLUSH);
- up_read(&sbi->quota_sem);
return ret;
}
static int f2fs_dquot_commit_info(struct super_block *sb, int type)
{
struct f2fs_sb_info *sbi = F2FS_SB(sb);
- int ret;
+ int ret = dquot_commit_info(sb, type);
- down_read(&sbi->quota_sem);
- ret = dquot_commit_info(sb, type);
if (ret < 0)
set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR);
- up_read(&sbi->quota_sem);
return ret;
}
@@ -2334,18 +2576,53 @@
ctx, len, fs_data, XATTR_CREATE);
}
-static bool f2fs_dummy_context(struct inode *inode)
+static const union fscrypt_policy *f2fs_get_dummy_policy(struct super_block *sb)
{
- return DUMMY_ENCRYPTION_ENABLED(F2FS_I_SB(inode));
+ return F2FS_OPTION(F2FS_SB(sb)).dummy_enc_policy.policy;
+}
+
+static bool f2fs_has_stable_inodes(struct super_block *sb)
+{
+ return true;
+}
+
+static void f2fs_get_ino_and_lblk_bits(struct super_block *sb,
+ int *ino_bits_ret, int *lblk_bits_ret)
+{
+ *ino_bits_ret = 8 * sizeof(nid_t);
+ *lblk_bits_ret = 8 * sizeof(block_t);
+}
+
+static int f2fs_get_num_devices(struct super_block *sb)
+{
+ struct f2fs_sb_info *sbi = F2FS_SB(sb);
+
+ if (f2fs_is_multi_device(sbi))
+ return sbi->s_ndevs;
+ return 1;
+}
+
+static void f2fs_get_devices(struct super_block *sb,
+ struct request_queue **devs)
+{
+ struct f2fs_sb_info *sbi = F2FS_SB(sb);
+ int i;
+
+ for (i = 0; i < sbi->s_ndevs; i++)
+ devs[i] = bdev_get_queue(FDEV(i).bdev);
}
static const struct fscrypt_operations f2fs_cryptops = {
- .key_prefix = "f2fs:",
- .get_context = f2fs_get_context,
- .set_context = f2fs_set_context,
- .dummy_context = f2fs_dummy_context,
- .empty_dir = f2fs_empty_dir,
- .max_namelen = F2FS_NAME_LEN,
+ .key_prefix = "f2fs:",
+ .get_context = f2fs_get_context,
+ .set_context = f2fs_set_context,
+ .get_dummy_policy = f2fs_get_dummy_policy,
+ .empty_dir = f2fs_empty_dir,
+ .max_namelen = F2FS_NAME_LEN,
+ .has_stable_inodes = f2fs_has_stable_inodes,
+ .get_ino_and_lblk_bits = f2fs_get_ino_and_lblk_bits,
+ .get_num_devices = f2fs_get_num_devices,
+ .get_devices = f2fs_get_devices,
};
#endif
@@ -2496,10 +2773,8 @@
}
if (main_end_blkaddr > seg_end_blkaddr) {
- f2fs_info(sbi, "Wrong MAIN_AREA boundary, start(%u) end(%u) block(%u)",
- main_blkaddr,
- segment0_blkaddr +
- (segment_count << log_blocks_per_seg),
+ f2fs_info(sbi, "Wrong MAIN_AREA boundary, start(%u) end(%llu) block(%u)",
+ main_blkaddr, seg_end_blkaddr,
segment_count_main << log_blocks_per_seg);
return true;
} else if (main_end_blkaddr < seg_end_blkaddr) {
@@ -2517,10 +2792,8 @@
err = __f2fs_commit_super(bh, NULL);
res = err ? "failed" : "done";
}
- f2fs_info(sbi, "Fix alignment : %s, start(%u) end(%u) block(%u)",
- res, main_blkaddr,
- segment0_blkaddr +
- (segment_count << log_blocks_per_seg),
+ f2fs_info(sbi, "Fix alignment : %s, start(%u) end(%llu) block(%u)",
+ res, main_blkaddr, seg_end_blkaddr,
segment_count_main << log_blocks_per_seg);
if (err)
return true;
@@ -2531,7 +2804,7 @@
static int sanity_check_raw_super(struct f2fs_sb_info *sbi,
struct buffer_head *bh)
{
- block_t segment_count, segs_per_sec, secs_per_zone;
+ block_t segment_count, segs_per_sec, secs_per_zone, segment_count_main;
block_t total_sections, blocks_per_seg;
struct f2fs_super_block *raw_super = (struct f2fs_super_block *)
(bh->b_data + F2FS_SUPER_OFFSET);
@@ -2601,6 +2874,7 @@
}
segment_count = le32_to_cpu(raw_super->segment_count);
+ segment_count_main = le32_to_cpu(raw_super->segment_count_main);
segs_per_sec = le32_to_cpu(raw_super->segs_per_sec);
secs_per_zone = le32_to_cpu(raw_super->secs_per_zone);
total_sections = le32_to_cpu(raw_super->section_count);
@@ -2614,14 +2888,19 @@
return -EFSCORRUPTED;
}
- if (total_sections > segment_count ||
- total_sections < F2FS_MIN_SEGMENTS ||
+ if (total_sections > segment_count_main || total_sections < 1 ||
segs_per_sec > segment_count || !segs_per_sec) {
f2fs_info(sbi, "Invalid segment/section count (%u, %u x %u)",
segment_count, total_sections, segs_per_sec);
return -EFSCORRUPTED;
}
+ if (segment_count_main != total_sections * segs_per_sec) {
+ f2fs_info(sbi, "Invalid segment/section count (%u != %u * %u)",
+ segment_count_main, total_sections, segs_per_sec);
+ return -EFSCORRUPTED;
+ }
+
if ((segment_count / segs_per_sec) < total_sections) {
f2fs_info(sbi, "Small segment_count (%u < %u * %u)",
segment_count, segs_per_sec, total_sections);
@@ -2634,6 +2913,27 @@
return -EFSCORRUPTED;
}
+ if (RDEV(0).path[0]) {
+ block_t dev_seg_count = le32_to_cpu(RDEV(0).total_segments);
+ int i = 1;
+
+ while (i < MAX_DEVICES && RDEV(i).path[0]) {
+ dev_seg_count += le32_to_cpu(RDEV(i).total_segments);
+ i++;
+ }
+ if (segment_count != dev_seg_count) {
+ f2fs_info(sbi, "Segment count (%u) mismatch with total segments from devices (%u)",
+ segment_count, dev_seg_count);
+ return -EFSCORRUPTED;
+ }
+ } else {
+ if (__F2FS_HAS_FEATURE(raw_super, F2FS_FEATURE_BLKZONED) &&
+ !bdev_is_zoned(sbi->sb->s_bdev)) {
+ f2fs_info(sbi, "Zoned block device path is missing");
+ return -EFSCORRUPTED;
+ }
+ }
+
if (secs_per_zone > total_sections || !secs_per_zone) {
f2fs_info(sbi, "Wrong secs_per_zone / total_sections (%u, %u)",
secs_per_zone, total_sections);
@@ -2650,11 +2950,13 @@
return -EFSCORRUPTED;
}
- if (le32_to_cpu(raw_super->cp_payload) >
- (blocks_per_seg - F2FS_CP_PACKS)) {
- f2fs_info(sbi, "Insane cp_payload (%u > %u)",
+ if (le32_to_cpu(raw_super->cp_payload) >=
+ (blocks_per_seg - F2FS_CP_PACKS -
+ NR_CURSEG_PERSIST_TYPE)) {
+ f2fs_info(sbi, "Insane cp_payload (%u >= %u)",
le32_to_cpu(raw_super->cp_payload),
- blocks_per_seg - F2FS_CP_PACKS);
+ blocks_per_seg - F2FS_CP_PACKS -
+ NR_CURSEG_PERSIST_TYPE);
return -EFSCORRUPTED;
}
@@ -2690,6 +2992,7 @@
unsigned int cp_pack_start_sum, cp_payload;
block_t user_block_count, valid_user_blocks;
block_t avail_node_count, valid_node_count;
+ unsigned int nat_blocks, nat_bits_bytes, nat_bits_blocks;
int i, j;
total = le32_to_cpu(raw_super->segment_count);
@@ -2707,7 +3010,7 @@
ovp_segments = le32_to_cpu(ckpt->overprov_segment_count);
reserved_segments = le32_to_cpu(ckpt->rsvd_segment_count);
- if (unlikely(fsmeta < F2FS_MIN_SEGMENTS ||
+ if (unlikely(fsmeta < F2FS_MIN_META_SEGMENTS ||
ovp_segments == 0 || reserved_segments == 0)) {
f2fs_err(sbi, "Wrong layout: check mkfs.f2fs version");
return 1;
@@ -2795,7 +3098,7 @@
cp_payload = __cp_payload(sbi);
if (cp_pack_start_sum < cp_payload + 1 ||
cp_pack_start_sum > blocks_per_seg - 1 -
- NR_CURSEG_TYPE) {
+ NR_CURSEG_PERSIST_TYPE) {
f2fs_err(sbi, "Wrong cp_pack_start_sum: %u",
cp_pack_start_sum);
return 1;
@@ -2810,6 +3113,17 @@
return 1;
}
+ nat_blocks = nat_segs << log_blocks_per_seg;
+ nat_bits_bytes = nat_blocks / BITS_PER_BYTE;
+ nat_bits_blocks = F2FS_BLK_ALIGN((nat_bits_bytes << 1) + 8);
+ if (__is_set_ckpt_flags(ckpt, CP_NAT_BITS_FLAG) &&
+ (cp_payload + F2FS_CP_PACKS +
+ NR_CURSEG_PERSIST_TYPE + nat_bits_blocks >= blocks_per_seg)) {
+ f2fs_warn(sbi, "Insane cp_payload: %u, nat_bits_blocks: %u)",
+ cp_payload, nat_bits_blocks);
+ return 1;
+ }
+
if (unlikely(f2fs_cp_error(sbi))) {
f2fs_err(sbi, "A bug case: need to run fsck");
return 1;
@@ -2868,6 +3182,7 @@
spin_lock_init(&sbi->dev_lock);
init_rwsem(&sbi->sb_lock);
+ init_rwsem(&sbi->pin_sem);
}
static int init_percpu_info(struct f2fs_sb_info *sbi)
@@ -2887,15 +3202,35 @@
}
#ifdef CONFIG_BLK_DEV_ZONED
+
+struct f2fs_report_zones_args {
+ struct f2fs_dev_info *dev;
+ bool zone_cap_mismatch;
+};
+
+static int f2fs_report_zone_cb(struct blk_zone *zone, unsigned int idx,
+ void *data)
+{
+ struct f2fs_report_zones_args *rz_args = data;
+
+ if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL)
+ return 0;
+
+ set_bit(idx, rz_args->dev->blkz_seq);
+ rz_args->dev->zone_capacity_blocks[idx] = zone->capacity >>
+ F2FS_LOG_SECTORS_PER_BLOCK;
+ if (zone->len != zone->capacity && !rz_args->zone_cap_mismatch)
+ rz_args->zone_cap_mismatch = true;
+
+ return 0;
+}
+
static int init_blkz_info(struct f2fs_sb_info *sbi, int devi)
{
struct block_device *bdev = FDEV(devi).bdev;
sector_t nr_sectors = bdev->bd_part->nr_sects;
- sector_t sector = 0;
- struct blk_zone *zones;
- unsigned int i, nr_zones;
- unsigned int n = 0;
- int err = -EIO;
+ struct f2fs_report_zones_args rep_zone_arg;
+ int ret;
if (!f2fs_sb_has_blkzoned(sbi))
return 0;
@@ -2920,38 +3255,27 @@
if (!FDEV(devi).blkz_seq)
return -ENOMEM;
-#define F2FS_REPORT_NR_ZONES 4096
-
- zones = f2fs_kzalloc(sbi,
- array_size(F2FS_REPORT_NR_ZONES,
- sizeof(struct blk_zone)),
- GFP_KERNEL);
- if (!zones)
+ /* Get block zones type and zone-capacity */
+ FDEV(devi).zone_capacity_blocks = f2fs_kzalloc(sbi,
+ FDEV(devi).nr_blkz * sizeof(block_t),
+ GFP_KERNEL);
+ if (!FDEV(devi).zone_capacity_blocks)
return -ENOMEM;
- /* Get block zones type */
- while (zones && sector < nr_sectors) {
+ rep_zone_arg.dev = &FDEV(devi);
+ rep_zone_arg.zone_cap_mismatch = false;
- nr_zones = F2FS_REPORT_NR_ZONES;
- err = blkdev_report_zones(bdev, sector, zones, &nr_zones);
- if (err)
- break;
- if (!nr_zones) {
- err = -EIO;
- break;
- }
+ ret = blkdev_report_zones(bdev, 0, BLK_ALL_ZONES, f2fs_report_zone_cb,
+ &rep_zone_arg);
+ if (ret < 0)
+ return ret;
- for (i = 0; i < nr_zones; i++) {
- if (zones[i].type != BLK_ZONE_TYPE_CONVENTIONAL)
- set_bit(n, FDEV(devi).blkz_seq);
- sector += zones[i].len;
- n++;
- }
+ if (!rep_zone_arg.zone_cap_mismatch) {
+ kfree(FDEV(devi).zone_capacity_blocks);
+ FDEV(devi).zone_capacity_blocks = NULL;
}
- kvfree(zones);
-
- return err;
+ return 0;
}
#endif
@@ -2981,6 +3305,7 @@
f2fs_err(sbi, "Unable to read %dth superblock",
block + 1);
err = -EIO;
+ *recovery = 1;
continue;
}
@@ -2990,6 +3315,7 @@
f2fs_err(sbi, "Can't find valid F2FS filesystem in %dth superblock",
block + 1);
brelse(bh);
+ *recovery = 1;
continue;
}
@@ -3002,13 +3328,9 @@
brelse(bh);
}
- /* Fail to read any one of the superblocks*/
- if (err < 0)
- *recovery = 1;
-
/* No valid superblock */
if (!*raw_super)
- kvfree(super);
+ kfree(super);
else
err = 0;
@@ -3149,7 +3471,7 @@
static int f2fs_setup_casefold(struct f2fs_sb_info *sbi)
{
#ifdef CONFIG_UNICODE
- if (f2fs_sb_has_casefold(sbi) && !sbi->s_encoding) {
+ if (f2fs_sb_has_casefold(sbi) && !sbi->sb->s_encoding) {
const struct f2fs_sb_encodings *encoding_info;
struct unicode_map *encoding;
__u16 encoding_flags;
@@ -3180,8 +3502,8 @@
"%s-%s with flags 0x%hx", encoding_info->name,
encoding_info->version?:"\b", encoding_flags);
- sbi->s_encoding = encoding;
- sbi->s_encoding_flags = encoding_flags;
+ sbi->sb->s_encoding = encoding;
+ sbi->sb->s_encoding_flags = encoding_flags;
sbi->sb->s_d_op = &f2fs_dentry_ops;
}
#else
@@ -3260,18 +3582,6 @@
sbi->s_chksum_seed = f2fs_chksum(sbi, ~0, raw_super->uuid,
sizeof(raw_super->uuid));
- /*
- * The BLKZONED feature indicates that the drive was formatted with
- * zone alignment optimization. This is optional for host-aware
- * devices, but mandatory for host-managed zoned block devices.
- */
-#ifndef CONFIG_BLK_DEV_ZONED
- if (f2fs_sb_has_blkzoned(sbi)) {
- f2fs_err(sbi, "Zoned block device support is not enabled");
- err = -EOPNOTSUPP;
- goto free_sb_buf;
- }
-#endif
default_options(sbi);
/* parse mount options */
options = kstrdup((const char *)data, GFP_KERNEL);
@@ -3280,7 +3590,7 @@
goto free_sb_buf;
}
- err = parse_options(sb, options);
+ err = parse_options(sb, options, false);
if (err)
goto free_options;
@@ -3324,10 +3634,9 @@
/* init f2fs-specific super block info */
sbi->valid_super_block = valid_super_block;
- mutex_init(&sbi->gc_mutex);
+ init_rwsem(&sbi->gc_lock);
mutex_init(&sbi->writepages);
mutex_init(&sbi->cp_mutex);
- mutex_init(&sbi->resize_mutex);
init_rwsem(&sbi->node_write);
init_rwsem(&sbi->node_change);
@@ -3338,6 +3647,7 @@
/* init iostat info */
spin_lock_init(&sbi->iostat_lock);
sbi->iostat_enable = false;
+ sbi->iostat_period_ms = DEFAULT_IOSTAT_PERIOD_MS;
for (i = 0; i < NR_PAGE_TYPE; i++) {
int n = (i == META) ? 1: NR_TEMP_TYPE;
@@ -3359,6 +3669,8 @@
sbi->write_io[i][j].bio = NULL;
spin_lock_init(&sbi->write_io[i][j].io_lock);
INIT_LIST_HEAD(&sbi->write_io[i][j].io_list);
+ INIT_LIST_HEAD(&sbi->write_io[i][j].bio_list);
+ init_rwsem(&sbi->write_io[i][j].bio_list_lock);
}
}
@@ -3380,12 +3692,20 @@
}
}
+ /* init per sbi slab cache */
+ err = f2fs_init_xattr_caches(sbi);
+ if (err)
+ goto free_io_dummy;
+ err = f2fs_init_page_array_cache(sbi);
+ if (err)
+ goto free_xattr_cache;
+
/* get an inode for meta space */
sbi->meta_inode = f2fs_iget(sb, F2FS_META_INO(sbi));
if (IS_ERR(sbi->meta_inode)) {
f2fs_err(sbi, "Failed to read F2FS meta data inode");
err = PTR_ERR(sbi->meta_inode);
- goto free_io_dummy;
+ goto free_page_array_cache;
}
err = f2fs_get_valid_checkpoint(sbi);
@@ -3411,6 +3731,12 @@
goto free_devices;
}
+ err = f2fs_init_post_read_wq(sbi);
+ if (err) {
+ f2fs_err(sbi, "Failed to initialize post read workqueue");
+ goto free_devices;
+ }
+
sbi->total_valid_node_count =
le32_to_cpu(sbi->ckpt->valid_node_count);
percpu_counter_set(&sbi->total_valid_inode_count,
@@ -3450,6 +3776,10 @@
goto free_nm;
}
+ err = adjust_reserved_segment(sbi);
+ if (err)
+ goto free_nm;
+
/* For write statistics */
if (sb->s_bdev->bd_part)
sbi->sectors_written_start =
@@ -3508,7 +3838,7 @@
f2fs_err(sbi, "Cannot turn on quotas: error %d", err);
}
#endif
- /* if there are nt orphan nodes free them */
+ /* if there are any orphan inodes, free them */
err = f2fs_recover_orphan_inodes(sbi);
if (err)
goto free_meta;
@@ -3557,7 +3887,20 @@
goto free_meta;
}
}
+
+ /*
+ * If the f2fs is not readonly and fsync data recovery succeeds,
+ * check zoned block devices' write pointer consistency.
+ */
+ if (!err && !f2fs_readonly(sb) && f2fs_sb_has_blkzoned(sbi)) {
+ err = f2fs_check_write_pointer(sbi);
+ if (err)
+ goto free_meta;
+ }
+
reset_checkpoint:
+ f2fs_init_inmem_curseg(sbi);
+
/* f2fs_recover_fsync_data() cleared this already */
clear_sbi_flag(sbi, SBI_POR_DOING);
@@ -3573,7 +3916,7 @@
* If filesystem is not mounted as read-only then
* do start the gc_thread.
*/
- if (test_opt(sbi, BG_GC) && !f2fs_readonly(sb)) {
+ if (F2FS_OPTION(sbi).bggc_mode != BGGC_MODE_OFF && !f2fs_readonly(sb)) {
/* After POR, we can run background GC thread.*/
err = f2fs_start_gc_thread(sbi);
if (err)
@@ -3634,6 +3977,7 @@
f2fs_destroy_node_manager(sbi);
free_sm:
f2fs_destroy_segment_manager(sbi);
+ f2fs_destroy_post_read_wq(sbi);
free_devices:
destroy_device_list(sbi);
kvfree(sbi->ckpt);
@@ -3641,6 +3985,10 @@
make_bad_inode(sbi->meta_inode);
iput(sbi->meta_inode);
sbi->meta_inode = NULL;
+free_page_array_cache:
+ f2fs_destroy_page_array_cache(sbi);
+free_xattr_cache:
+ f2fs_destroy_xattr_caches(sbi);
free_io_dummy:
mempool_destroy(sbi->write_io_dummy);
free_percpu:
@@ -3650,20 +3998,22 @@
kvfree(sbi->write_io[i]);
#ifdef CONFIG_UNICODE
- utf8_unload(sbi->s_encoding);
+ utf8_unload(sb->s_encoding);
+ sb->s_encoding = NULL;
#endif
free_options:
#ifdef CONFIG_QUOTA
for (i = 0; i < MAXQUOTAS; i++)
- kvfree(F2FS_OPTION(sbi).s_qf_names[i]);
+ kfree(F2FS_OPTION(sbi).s_qf_names[i]);
#endif
+ fscrypt_free_dummy_policy(&F2FS_OPTION(sbi).dummy_enc_policy);
kvfree(options);
free_sb_buf:
- kvfree(raw_super);
+ kfree(raw_super);
free_sbi:
if (sbi->s_chksum_driver)
crypto_free_shash(sbi->s_chksum_driver);
- kvfree(sbi);
+ kfree(sbi);
/* give only one another chance */
if (retry_cnt > 0 && skip_recovery) {
@@ -3756,12 +4106,18 @@
err = f2fs_create_checkpoint_caches();
if (err)
goto free_segment_manager_caches;
- err = f2fs_create_extent_cache();
+ err = f2fs_create_recovery_cache();
if (err)
goto free_checkpoint_caches;
- err = f2fs_init_sysfs();
+ err = f2fs_create_extent_cache();
+ if (err)
+ goto free_recovery_cache;
+ err = f2fs_create_garbage_collection_cache();
if (err)
goto free_extent_cache;
+ err = f2fs_init_sysfs();
+ if (err)
+ goto free_garbage_collection_cache;
err = register_shrinker(&f2fs_shrinker_info);
if (err)
goto free_sysfs;
@@ -3772,8 +4128,27 @@
err = f2fs_init_post_read_processing();
if (err)
goto free_root_stats;
+ err = f2fs_init_bio_entry_cache();
+ if (err)
+ goto free_post_read;
+ err = f2fs_init_bioset();
+ if (err)
+ goto free_bio_enrty_cache;
+ err = f2fs_init_compress_mempool();
+ if (err)
+ goto free_bioset;
+ err = f2fs_init_compress_cache();
+ if (err)
+ goto free_compress_mempool;
return 0;
-
+free_compress_mempool:
+ f2fs_destroy_compress_mempool();
+free_bioset:
+ f2fs_destroy_bioset();
+free_bio_enrty_cache:
+ f2fs_destroy_bio_entry_cache();
+free_post_read:
+ f2fs_destroy_post_read_processing();
free_root_stats:
f2fs_destroy_root_stats();
unregister_filesystem(&f2fs_fs_type);
@@ -3781,8 +4156,12 @@
unregister_shrinker(&f2fs_shrinker_info);
free_sysfs:
f2fs_exit_sysfs();
+free_garbage_collection_cache:
+ f2fs_destroy_garbage_collection_cache();
free_extent_cache:
f2fs_destroy_extent_cache();
+free_recovery_cache:
+ f2fs_destroy_recovery_cache();
free_checkpoint_caches:
f2fs_destroy_checkpoint_caches();
free_segment_manager_caches:
@@ -3797,12 +4176,18 @@
static void __exit exit_f2fs_fs(void)
{
+ f2fs_destroy_compress_cache();
+ f2fs_destroy_compress_mempool();
+ f2fs_destroy_bioset();
+ f2fs_destroy_bio_entry_cache();
f2fs_destroy_post_read_processing();
f2fs_destroy_root_stats();
unregister_filesystem(&f2fs_fs_type);
unregister_shrinker(&f2fs_shrinker_info);
f2fs_exit_sysfs();
+ f2fs_destroy_garbage_collection_cache();
f2fs_destroy_extent_cache();
+ f2fs_destroy_recovery_cache();
f2fs_destroy_checkpoint_caches();
f2fs_destroy_segment_manager_caches();
f2fs_destroy_node_manager_caches();
diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
index 3a5360e..7ffd4bb 100644
--- a/fs/f2fs/sysfs.c
+++ b/fs/f2fs/sysfs.c
@@ -15,6 +15,7 @@
#include "f2fs.h"
#include "segment.h"
#include "gc.h"
+#include <trace/events/f2fs.h>
static struct proc_dir_entry *f2fs_proc_root;
@@ -25,6 +26,9 @@
DCC_INFO, /* struct discard_cmd_control */
NM_INFO, /* struct f2fs_nm_info */
F2FS_SBI, /* struct f2fs_sb_info */
+#ifdef CONFIG_F2FS_STAT_FS
+ STAT_INFO, /* struct f2fs_stat_info */
+#endif
#ifdef CONFIG_F2FS_FAULT_INJECTION
FAULT_INFO_RATE, /* struct f2fs_fault_info */
FAULT_INFO_TYPE, /* struct f2fs_fault_info */
@@ -42,6 +46,9 @@
int id;
};
+static ssize_t f2fs_sbi_show(struct f2fs_attr *a,
+ struct f2fs_sb_info *sbi, char *buf);
+
static unsigned char *__struct_ptr(struct f2fs_sb_info *sbi, int struct_type)
{
if (struct_type == GC_THREAD)
@@ -59,14 +66,98 @@
struct_type == FAULT_INFO_TYPE)
return (unsigned char *)&F2FS_OPTION(sbi).fault_info;
#endif
+#ifdef CONFIG_F2FS_STAT_FS
+ else if (struct_type == STAT_INFO)
+ return (unsigned char *)F2FS_STAT(sbi);
+#endif
return NULL;
}
static ssize_t dirty_segments_show(struct f2fs_attr *a,
struct f2fs_sb_info *sbi, char *buf)
{
- return snprintf(buf, PAGE_SIZE, "%llu\n",
- (unsigned long long)(dirty_segments(sbi)));
+ return sprintf(buf, "%llu\n",
+ (unsigned long long)(dirty_segments(sbi)));
+}
+
+static ssize_t free_segments_show(struct f2fs_attr *a,
+ struct f2fs_sb_info *sbi, char *buf)
+{
+ return sprintf(buf, "%llu\n",
+ (unsigned long long)(free_segments(sbi)));
+}
+
+static ssize_t lifetime_write_kbytes_show(struct f2fs_attr *a,
+ struct f2fs_sb_info *sbi, char *buf)
+{
+ struct super_block *sb = sbi->sb;
+
+ if (!sb->s_bdev->bd_part)
+ return sprintf(buf, "0\n");
+
+ return sprintf(buf, "%llu\n",
+ (unsigned long long)(sbi->kbytes_written +
+ BD_PART_WRITTEN(sbi)));
+}
+
+static ssize_t features_show(struct f2fs_attr *a,
+ struct f2fs_sb_info *sbi, char *buf)
+{
+ struct super_block *sb = sbi->sb;
+ int len = 0;
+
+ if (!sb->s_bdev->bd_part)
+ return sprintf(buf, "0\n");
+
+ if (f2fs_sb_has_encrypt(sbi))
+ len += scnprintf(buf, PAGE_SIZE - len, "%s",
+ "encryption");
+ if (f2fs_sb_has_blkzoned(sbi))
+ len += scnprintf(buf + len, PAGE_SIZE - len, "%s%s",
+ len ? ", " : "", "blkzoned");
+ if (f2fs_sb_has_extra_attr(sbi))
+ len += scnprintf(buf + len, PAGE_SIZE - len, "%s%s",
+ len ? ", " : "", "extra_attr");
+ if (f2fs_sb_has_project_quota(sbi))
+ len += scnprintf(buf + len, PAGE_SIZE - len, "%s%s",
+ len ? ", " : "", "projquota");
+ if (f2fs_sb_has_inode_chksum(sbi))
+ len += scnprintf(buf + len, PAGE_SIZE - len, "%s%s",
+ len ? ", " : "", "inode_checksum");
+ if (f2fs_sb_has_flexible_inline_xattr(sbi))
+ len += scnprintf(buf + len, PAGE_SIZE - len, "%s%s",
+ len ? ", " : "", "flexible_inline_xattr");
+ if (f2fs_sb_has_quota_ino(sbi))
+ len += scnprintf(buf + len, PAGE_SIZE - len, "%s%s",
+ len ? ", " : "", "quota_ino");
+ if (f2fs_sb_has_inode_crtime(sbi))
+ len += scnprintf(buf + len, PAGE_SIZE - len, "%s%s",
+ len ? ", " : "", "inode_crtime");
+ if (f2fs_sb_has_lost_found(sbi))
+ len += scnprintf(buf + len, PAGE_SIZE - len, "%s%s",
+ len ? ", " : "", "lost_found");
+ if (f2fs_sb_has_verity(sbi))
+ len += scnprintf(buf + len, PAGE_SIZE - len, "%s%s",
+ len ? ", " : "", "verity");
+ if (f2fs_sb_has_sb_chksum(sbi))
+ len += scnprintf(buf + len, PAGE_SIZE - len, "%s%s",
+ len ? ", " : "", "sb_checksum");
+ if (f2fs_sb_has_casefold(sbi))
+ len += scnprintf(buf + len, PAGE_SIZE - len, "%s%s",
+ len ? ", " : "", "casefold");
+ if (f2fs_sb_has_compression(sbi))
+ len += scnprintf(buf + len, PAGE_SIZE - len, "%s%s",
+ len ? ", " : "", "compression");
+ len += scnprintf(buf + len, PAGE_SIZE - len, "%s%s",
+ len ? ", " : "", "pin_file");
+ len += scnprintf(buf + len, PAGE_SIZE - len, "\n");
+ return len;
+}
+
+static ssize_t current_reserved_blocks_show(struct f2fs_attr *a,
+ struct f2fs_sb_info *sbi, char *buf)
+{
+ return sprintf(buf, "%u\n", sbi->current_reserved_blocks);
}
static ssize_t unusable_show(struct f2fs_attr *a,
@@ -78,90 +169,67 @@
unusable = sbi->unusable_block_count;
else
unusable = f2fs_get_unusable_blocks(sbi);
- return snprintf(buf, PAGE_SIZE, "%llu\n",
- (unsigned long long)unusable);
+ return sprintf(buf, "%llu\n", (unsigned long long)unusable);
}
static ssize_t encoding_show(struct f2fs_attr *a,
struct f2fs_sb_info *sbi, char *buf)
{
#ifdef CONFIG_UNICODE
+ struct super_block *sb = sbi->sb;
+
if (f2fs_sb_has_casefold(sbi))
return snprintf(buf, PAGE_SIZE, "%s (%d.%d.%d)\n",
- sbi->s_encoding->charset,
- (sbi->s_encoding->version >> 16) & 0xff,
- (sbi->s_encoding->version >> 8) & 0xff,
- sbi->s_encoding->version & 0xff);
+ sb->s_encoding->charset,
+ (sb->s_encoding->version >> 16) & 0xff,
+ (sb->s_encoding->version >> 8) & 0xff,
+ sb->s_encoding->version & 0xff);
#endif
- return snprintf(buf, PAGE_SIZE, "(none)");
+ return sprintf(buf, "(none)");
}
-static ssize_t lifetime_write_kbytes_show(struct f2fs_attr *a,
+static ssize_t mounted_time_sec_show(struct f2fs_attr *a,
struct f2fs_sb_info *sbi, char *buf)
{
- struct super_block *sb = sbi->sb;
+ return sprintf(buf, "%llu", SIT_I(sbi)->mounted_time);
+}
- if (!sb->s_bdev->bd_part)
- return snprintf(buf, PAGE_SIZE, "0\n");
+#ifdef CONFIG_F2FS_STAT_FS
+static ssize_t moved_blocks_foreground_show(struct f2fs_attr *a,
+ struct f2fs_sb_info *sbi, char *buf)
+{
+ struct f2fs_stat_info *si = F2FS_STAT(sbi);
+ return sprintf(buf, "%llu\n",
+ (unsigned long long)(si->tot_blks -
+ (si->bg_data_blks + si->bg_node_blks)));
+}
+
+static ssize_t moved_blocks_background_show(struct f2fs_attr *a,
+ struct f2fs_sb_info *sbi, char *buf)
+{
+ struct f2fs_stat_info *si = F2FS_STAT(sbi);
+
+ return sprintf(buf, "%llu\n",
+ (unsigned long long)(si->bg_data_blks + si->bg_node_blks));
+}
+
+static ssize_t avg_vblocks_show(struct f2fs_attr *a,
+ struct f2fs_sb_info *sbi, char *buf)
+{
+ struct f2fs_stat_info *si = F2FS_STAT(sbi);
+
+ si->dirty_count = dirty_segments(sbi);
+ f2fs_update_sit_info(sbi);
+ return sprintf(buf, "%llu\n", (unsigned long long)(si->avg_vblocks));
+}
+#endif
+
+static ssize_t main_blkaddr_show(struct f2fs_attr *a,
+ struct f2fs_sb_info *sbi, char *buf)
+{
return snprintf(buf, PAGE_SIZE, "%llu\n",
- (unsigned long long)(sbi->kbytes_written +
- BD_PART_WRITTEN(sbi)));
-}
-
-static ssize_t features_show(struct f2fs_attr *a,
- struct f2fs_sb_info *sbi, char *buf)
-{
- struct super_block *sb = sbi->sb;
- int len = 0;
-
- if (!sb->s_bdev->bd_part)
- return snprintf(buf, PAGE_SIZE, "0\n");
-
- if (f2fs_sb_has_encrypt(sbi))
- len += snprintf(buf, PAGE_SIZE - len, "%s",
- "encryption");
- if (f2fs_sb_has_blkzoned(sbi))
- len += snprintf(buf + len, PAGE_SIZE - len, "%s%s",
- len ? ", " : "", "blkzoned");
- if (f2fs_sb_has_extra_attr(sbi))
- len += snprintf(buf + len, PAGE_SIZE - len, "%s%s",
- len ? ", " : "", "extra_attr");
- if (f2fs_sb_has_project_quota(sbi))
- len += snprintf(buf + len, PAGE_SIZE - len, "%s%s",
- len ? ", " : "", "projquota");
- if (f2fs_sb_has_inode_chksum(sbi))
- len += snprintf(buf + len, PAGE_SIZE - len, "%s%s",
- len ? ", " : "", "inode_checksum");
- if (f2fs_sb_has_flexible_inline_xattr(sbi))
- len += snprintf(buf + len, PAGE_SIZE - len, "%s%s",
- len ? ", " : "", "flexible_inline_xattr");
- if (f2fs_sb_has_quota_ino(sbi))
- len += snprintf(buf + len, PAGE_SIZE - len, "%s%s",
- len ? ", " : "", "quota_ino");
- if (f2fs_sb_has_inode_crtime(sbi))
- len += snprintf(buf + len, PAGE_SIZE - len, "%s%s",
- len ? ", " : "", "inode_crtime");
- if (f2fs_sb_has_lost_found(sbi))
- len += snprintf(buf + len, PAGE_SIZE - len, "%s%s",
- len ? ", " : "", "lost_found");
- if (f2fs_sb_has_verity(sbi))
- len += snprintf(buf + len, PAGE_SIZE - len, "%s%s",
- len ? ", " : "", "verity");
- if (f2fs_sb_has_sb_chksum(sbi))
- len += snprintf(buf + len, PAGE_SIZE - len, "%s%s",
- len ? ", " : "", "sb_checksum");
- if (f2fs_sb_has_casefold(sbi))
- len += snprintf(buf + len, PAGE_SIZE - len, "%s%s",
- len ? ", " : "", "casefold");
- len += snprintf(buf + len, PAGE_SIZE - len, "\n");
- return len;
-}
-
-static ssize_t current_reserved_blocks_show(struct f2fs_attr *a,
- struct f2fs_sb_info *sbi, char *buf)
-{
- return snprintf(buf, PAGE_SIZE, "%u\n", sbi->current_reserved_blocks);
+ (unsigned long long)MAIN_BLKADDR(sbi));
}
static ssize_t f2fs_sbi_show(struct f2fs_attr *a,
@@ -181,23 +249,23 @@
int hot_count = sbi->raw_super->hot_ext_count;
int len = 0, i;
- len += snprintf(buf + len, PAGE_SIZE - len,
+ len += scnprintf(buf + len, PAGE_SIZE - len,
"cold file extension:\n");
for (i = 0; i < cold_count; i++)
- len += snprintf(buf + len, PAGE_SIZE - len, "%s\n",
+ len += scnprintf(buf + len, PAGE_SIZE - len, "%s\n",
extlist[i]);
- len += snprintf(buf + len, PAGE_SIZE - len,
+ len += scnprintf(buf + len, PAGE_SIZE - len,
"hot file extension:\n");
for (i = cold_count; i < cold_count + hot_count; i++)
- len += snprintf(buf + len, PAGE_SIZE - len, "%s\n",
+ len += scnprintf(buf + len, PAGE_SIZE - len, "%s\n",
extlist[i]);
return len;
}
ui = (unsigned int *)(ptr + a->offset);
- return snprintf(buf, PAGE_SIZE, "%u\n", *ui);
+ return sprintf(buf, "%u\n", *ui);
}
static ssize_t __sbi_store(struct f2fs_attr *a,
@@ -262,7 +330,9 @@
if (a->struct_type == RESERVED_BLOCKS) {
spin_lock(&sbi->stat_lock);
if (t > (unsigned long)(sbi->user_block_count -
- F2FS_OPTION(sbi).root_reserved_blocks)) {
+ F2FS_OPTION(sbi).root_reserved_blocks -
+ sbi->blocks_per_seg *
+ SM_I(sbi)->additional_reserved_segments)) {
spin_unlock(&sbi->stat_lock);
return -EINVAL;
}
@@ -291,29 +361,37 @@
return -EINVAL;
if (!strcmp(a->attr.name, "gc_urgent")) {
- if (t >= 1) {
- sbi->gc_mode = GC_URGENT;
+ if (t == 0) {
+ sbi->gc_mode = GC_NORMAL;
+ } else if (t == 1) {
+ sbi->gc_mode = GC_URGENT_HIGH;
if (sbi->gc_thread) {
sbi->gc_thread->gc_wake = 1;
wake_up_interruptible_all(
&sbi->gc_thread->gc_wait_queue_head);
wake_up_discard_thread(sbi, true);
}
+ } else if (t == 2) {
+ sbi->gc_mode = GC_URGENT_LOW;
+ } else {
+ return -EINVAL;
+ }
+ return count;
+ }
+ if (!strcmp(a->attr.name, "gc_idle")) {
+ if (t == GC_IDLE_CB) {
+ sbi->gc_mode = GC_IDLE_CB;
+ } else if (t == GC_IDLE_GREEDY) {
+ sbi->gc_mode = GC_IDLE_GREEDY;
+ } else if (t == GC_IDLE_AT) {
+ if (!sbi->am.atgc_enabled)
+ return -EINVAL;
+ sbi->gc_mode = GC_AT;
} else {
sbi->gc_mode = GC_NORMAL;
}
return count;
}
- if (!strcmp(a->attr.name, "gc_idle")) {
- if (t == GC_IDLE_CB)
- sbi->gc_mode = GC_IDLE_CB;
- else if (t == GC_IDLE_GREEDY)
- sbi->gc_mode = GC_IDLE_GREEDY;
- else
- sbi->gc_mode = GC_NORMAL;
- return count;
- }
-
if (!strcmp(a->attr.name, "iostat_enable")) {
sbi->iostat_enable = !!t;
@@ -322,6 +400,15 @@
return count;
}
+ if (!strcmp(a->attr.name, "iostat_period_ms")) {
+ if (t < MIN_IOSTAT_PERIOD_MS || t > MAX_IOSTAT_PERIOD_MS)
+ return -EINVAL;
+ spin_lock(&sbi->iostat_lock);
+ sbi->iostat_period_ms = (unsigned int)t;
+ spin_unlock(&sbi->iostat_lock);
+ return count;
+ }
+
*ui = (unsigned int)t;
return count;
@@ -387,6 +474,8 @@
FEAT_VERITY,
FEAT_SB_CHECKSUM,
FEAT_CASEFOLD,
+ FEAT_COMPRESSION,
+ FEAT_TEST_DUMMY_ENCRYPTION_V2,
};
static ssize_t f2fs_feature_show(struct f2fs_attr *a,
@@ -406,7 +495,9 @@
case FEAT_VERITY:
case FEAT_SB_CHECKSUM:
case FEAT_CASEFOLD:
- return snprintf(buf, PAGE_SIZE, "supported\n");
+ case FEAT_COMPRESSION:
+ case FEAT_TEST_DUMMY_ENCRYPTION_V2:
+ return sprintf(buf, "supported\n");
}
return 0;
}
@@ -435,6 +526,14 @@
.id = _id, \
}
+#define F2FS_STAT_ATTR(_struct_type, _struct_name, _name, _elname) \
+static struct f2fs_attr f2fs_attr_##_name = { \
+ .attr = {.name = __stringify(_name), .mode = 0444 }, \
+ .show = f2fs_sbi_show, \
+ .struct_type = _struct_type, \
+ .offset = offsetof(struct _struct_name, _elname), \
+}
+
F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_urgent_sleep_time,
urgent_sleep_time);
F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_min_sleep_time, min_sleep_time);
@@ -467,6 +566,7 @@
F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info,
umount_discard_timeout, interval_time[UMOUNT_DISCARD_TIMEOUT]);
F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, iostat_enable, iostat_enable);
+F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, iostat_period_ms, iostat_period_ms);
F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, readdir_ra, readdir_ra);
F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_pin_file_thresh, gc_pin_file_threshold);
F2FS_RW_ATTR(F2FS_SBI, f2fs_super_block, extension_list, extension_list);
@@ -474,15 +574,30 @@
F2FS_RW_ATTR(FAULT_INFO_RATE, f2fs_fault_info, inject_rate, inject_rate);
F2FS_RW_ATTR(FAULT_INFO_TYPE, f2fs_fault_info, inject_type, inject_type);
#endif
+F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, data_io_flag, data_io_flag);
+F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, node_io_flag, node_io_flag);
F2FS_GENERAL_RO_ATTR(dirty_segments);
+F2FS_GENERAL_RO_ATTR(free_segments);
F2FS_GENERAL_RO_ATTR(lifetime_write_kbytes);
F2FS_GENERAL_RO_ATTR(features);
F2FS_GENERAL_RO_ATTR(current_reserved_blocks);
F2FS_GENERAL_RO_ATTR(unusable);
F2FS_GENERAL_RO_ATTR(encoding);
+F2FS_GENERAL_RO_ATTR(mounted_time_sec);
+F2FS_GENERAL_RO_ATTR(main_blkaddr);
+#ifdef CONFIG_F2FS_STAT_FS
+F2FS_STAT_ATTR(STAT_INFO, f2fs_stat_info, cp_foreground_calls, cp_count);
+F2FS_STAT_ATTR(STAT_INFO, f2fs_stat_info, cp_background_calls, bg_cp_count);
+F2FS_STAT_ATTR(STAT_INFO, f2fs_stat_info, gc_foreground_calls, call_count);
+F2FS_STAT_ATTR(STAT_INFO, f2fs_stat_info, gc_background_calls, bg_gc);
+F2FS_GENERAL_RO_ATTR(moved_blocks_background);
+F2FS_GENERAL_RO_ATTR(moved_blocks_foreground);
+F2FS_GENERAL_RO_ATTR(avg_vblocks);
+#endif
#ifdef CONFIG_FS_ENCRYPTION
F2FS_FEATURE_RO_ATTR(encryption, FEAT_CRYPTO);
+F2FS_FEATURE_RO_ATTR(test_dummy_encryption_v2, FEAT_TEST_DUMMY_ENCRYPTION_V2);
#endif
#ifdef CONFIG_BLK_DEV_ZONED
F2FS_FEATURE_RO_ATTR(block_zoned, FEAT_BLKZONED);
@@ -502,6 +617,9 @@
#ifdef CONFIG_UNICODE
F2FS_FEATURE_RO_ATTR(casefold, FEAT_CASEFOLD);
#endif
+#ifdef CONFIG_F2FS_FS_COMPRESSION
+F2FS_FEATURE_RO_ATTR(compression, FEAT_COMPRESSION);
+#endif
#define ATTR_LIST(name) (&f2fs_attr_##name.attr)
static struct attribute *f2fs_attrs[] = {
@@ -512,6 +630,7 @@
ATTR_LIST(gc_idle),
ATTR_LIST(gc_urgent),
ATTR_LIST(reclaim_segments),
+ ATTR_LIST(main_blkaddr),
ATTR_LIST(max_small_discards),
ATTR_LIST(discard_granularity),
ATTR_LIST(batched_trim_sections),
@@ -533,6 +652,7 @@
ATTR_LIST(gc_idle_interval),
ATTR_LIST(umount_discard_timeout),
ATTR_LIST(iostat_enable),
+ ATTR_LIST(iostat_period_ms),
ATTR_LIST(readdir_ra),
ATTR_LIST(gc_pin_file_thresh),
ATTR_LIST(extension_list),
@@ -540,13 +660,26 @@
ATTR_LIST(inject_rate),
ATTR_LIST(inject_type),
#endif
+ ATTR_LIST(data_io_flag),
+ ATTR_LIST(node_io_flag),
ATTR_LIST(dirty_segments),
+ ATTR_LIST(free_segments),
ATTR_LIST(unusable),
ATTR_LIST(lifetime_write_kbytes),
ATTR_LIST(features),
ATTR_LIST(reserved_blocks),
ATTR_LIST(current_reserved_blocks),
ATTR_LIST(encoding),
+ ATTR_LIST(mounted_time_sec),
+#ifdef CONFIG_F2FS_STAT_FS
+ ATTR_LIST(cp_foreground_calls),
+ ATTR_LIST(cp_background_calls),
+ ATTR_LIST(gc_foreground_calls),
+ ATTR_LIST(gc_background_calls),
+ ATTR_LIST(moved_blocks_foreground),
+ ATTR_LIST(moved_blocks_background),
+ ATTR_LIST(avg_vblocks),
+#endif
NULL,
};
ATTRIBUTE_GROUPS(f2fs);
@@ -554,6 +687,7 @@
static struct attribute *f2fs_feat_attrs[] = {
#ifdef CONFIG_FS_ENCRYPTION
ATTR_LIST(encryption),
+ ATTR_LIST(test_dummy_encryption_v2),
#endif
#ifdef CONFIG_BLK_DEV_ZONED
ATTR_LIST(block_zoned),
@@ -573,6 +707,9 @@
#ifdef CONFIG_UNICODE
ATTR_LIST(casefold),
#endif
+#ifdef CONFIG_F2FS_FS_COMPRESSION
+ ATTR_LIST(compression),
+#endif
NULL,
};
ATTRIBUTE_GROUPS(f2fs_feat);
@@ -593,7 +730,7 @@
};
static struct kset f2fs_kset = {
- .kobj = {.ktype = &f2fs_ktype},
+ .kobj = {.ktype = &f2fs_ktype},
};
static struct kobj_type f2fs_feat_ktype = {
@@ -656,6 +793,33 @@
return 0;
}
+void f2fs_record_iostat(struct f2fs_sb_info *sbi)
+{
+ unsigned long long iostat_diff[NR_IO_TYPE];
+ int i;
+
+ if (time_is_after_jiffies(sbi->iostat_next_period))
+ return;
+
+ /* Need double check under the lock */
+ spin_lock(&sbi->iostat_lock);
+ if (time_is_after_jiffies(sbi->iostat_next_period)) {
+ spin_unlock(&sbi->iostat_lock);
+ return;
+ }
+ sbi->iostat_next_period = jiffies +
+ msecs_to_jiffies(sbi->iostat_period_ms);
+
+ for (i = 0; i < NR_IO_TYPE; i++) {
+ iostat_diff[i] = sbi->rw_iostat[i] -
+ sbi->prev_rw_iostat[i];
+ sbi->prev_rw_iostat[i] = sbi->rw_iostat[i];
+ }
+ spin_unlock(&sbi->iostat_lock);
+
+ trace_f2fs_iostat(sbi, iostat_diff);
+}
+
static int __maybe_unused iostat_info_seq_show(struct seq_file *seq,
void *offset)
{
@@ -668,33 +832,58 @@
seq_printf(seq, "time: %-16llu\n", now);
- /* print app IOs */
+ /* print app write IOs */
+ seq_puts(seq, "[WRITE]\n");
seq_printf(seq, "app buffered: %-16llu\n",
- sbi->write_iostat[APP_BUFFERED_IO]);
+ sbi->rw_iostat[APP_BUFFERED_IO]);
seq_printf(seq, "app direct: %-16llu\n",
- sbi->write_iostat[APP_DIRECT_IO]);
+ sbi->rw_iostat[APP_DIRECT_IO]);
seq_printf(seq, "app mapped: %-16llu\n",
- sbi->write_iostat[APP_MAPPED_IO]);
+ sbi->rw_iostat[APP_MAPPED_IO]);
- /* print fs IOs */
+ /* print fs write IOs */
seq_printf(seq, "fs data: %-16llu\n",
- sbi->write_iostat[FS_DATA_IO]);
+ sbi->rw_iostat[FS_DATA_IO]);
seq_printf(seq, "fs node: %-16llu\n",
- sbi->write_iostat[FS_NODE_IO]);
+ sbi->rw_iostat[FS_NODE_IO]);
seq_printf(seq, "fs meta: %-16llu\n",
- sbi->write_iostat[FS_META_IO]);
+ sbi->rw_iostat[FS_META_IO]);
seq_printf(seq, "fs gc data: %-16llu\n",
- sbi->write_iostat[FS_GC_DATA_IO]);
+ sbi->rw_iostat[FS_GC_DATA_IO]);
seq_printf(seq, "fs gc node: %-16llu\n",
- sbi->write_iostat[FS_GC_NODE_IO]);
+ sbi->rw_iostat[FS_GC_NODE_IO]);
seq_printf(seq, "fs cp data: %-16llu\n",
- sbi->write_iostat[FS_CP_DATA_IO]);
+ sbi->rw_iostat[FS_CP_DATA_IO]);
seq_printf(seq, "fs cp node: %-16llu\n",
- sbi->write_iostat[FS_CP_NODE_IO]);
+ sbi->rw_iostat[FS_CP_NODE_IO]);
seq_printf(seq, "fs cp meta: %-16llu\n",
- sbi->write_iostat[FS_CP_META_IO]);
+ sbi->rw_iostat[FS_CP_META_IO]);
+
+ /* print app read IOs */
+ seq_puts(seq, "[READ]\n");
+ seq_printf(seq, "app buffered: %-16llu\n",
+ sbi->rw_iostat[APP_BUFFERED_READ_IO]);
+ seq_printf(seq, "app direct: %-16llu\n",
+ sbi->rw_iostat[APP_DIRECT_READ_IO]);
+ seq_printf(seq, "app mapped: %-16llu\n",
+ sbi->rw_iostat[APP_MAPPED_READ_IO]);
+
+ /* print fs read IOs */
+ seq_printf(seq, "fs data: %-16llu\n",
+ sbi->rw_iostat[FS_DATA_READ_IO]);
+ seq_printf(seq, "fs gc data: %-16llu\n",
+ sbi->rw_iostat[FS_GDATA_READ_IO]);
+ seq_printf(seq, "fs compr_data: %-16llu\n",
+ sbi->rw_iostat[FS_CDATA_READ_IO]);
+ seq_printf(seq, "fs node: %-16llu\n",
+ sbi->rw_iostat[FS_NODE_READ_IO]);
+ seq_printf(seq, "fs meta: %-16llu\n",
+ sbi->rw_iostat[FS_META_READ_IO]);
+
+ /* print other IOs */
+ seq_puts(seq, "[OTHER]\n");
seq_printf(seq, "fs discard: %-16llu\n",
- sbi->write_iostat[FS_DISCARD]);
+ sbi->rw_iostat[FS_DISCARD]);
return 0;
}
diff --git a/fs/f2fs/trace.h b/fs/f2fs/trace.h
index e8075fc..789f6aa 100644
--- a/fs/f2fs/trace.h
+++ b/fs/f2fs/trace.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* f2fs IO tracer
*
diff --git a/fs/f2fs/verity.c b/fs/f2fs/verity.c
index 7944a08..15ba369 100644
--- a/fs/f2fs/verity.c
+++ b/fs/f2fs/verity.c
@@ -29,6 +29,8 @@
#include "f2fs.h"
#include "xattr.h"
+#define F2FS_VERIFY_VER (1)
+
static inline loff_t f2fs_verity_metadata_pos(const struct inode *inode)
{
return round_up(inode->i_size, 65536);
@@ -153,7 +155,7 @@
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
u64 desc_pos = f2fs_verity_metadata_pos(inode) + merkle_tree_size;
struct fsverity_descriptor_location dloc = {
- .version = cpu_to_le32(1),
+ .version = cpu_to_le32(F2FS_VERIFY_VER),
.size = cpu_to_le32(desc_size),
.pos = cpu_to_le64(desc_pos),
};
@@ -232,7 +234,7 @@
F2FS_XATTR_NAME_VERITY, &dloc, sizeof(dloc), NULL);
if (res < 0 && res != -ERANGE)
return res;
- if (res != sizeof(dloc) || dloc.version != cpu_to_le32(1)) {
+ if (res != sizeof(dloc) || dloc.version != cpu_to_le32(F2FS_VERIFY_VER)) {
f2fs_warn(F2FS_I_SB(inode), "unknown verity xattr format");
return -EINVAL;
}
@@ -256,11 +258,23 @@
}
static struct page *f2fs_read_merkle_tree_page(struct inode *inode,
- pgoff_t index)
+ pgoff_t index,
+ unsigned long num_ra_pages)
{
+ DEFINE_READAHEAD(ractl, NULL, inode->i_mapping, index);
+ struct page *page;
+
index += f2fs_verity_metadata_pos(inode) >> PAGE_SHIFT;
- return read_mapping_page(inode->i_mapping, index, NULL);
+ page = find_get_page_flags(inode->i_mapping, index, FGP_ACCESSED);
+ if (!page || !PageUptodate(page)) {
+ if (page)
+ put_page(page);
+ else if (num_ra_pages > 1)
+ page_cache_ra_unbounded(&ractl, num_ra_pages, 0);
+ page = read_mapping_page(inode->i_mapping, index, NULL);
+ }
+ return page;
}
static int f2fs_write_merkle_tree_block(struct inode *inode, const void *buf,
diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c
index 296b318..f44c601 100644
--- a/fs/f2fs/xattr.c
+++ b/fs/f2fs/xattr.c
@@ -23,6 +23,25 @@
#include "xattr.h"
#include "segment.h"
+static void *xattr_alloc(struct f2fs_sb_info *sbi, int size, bool *is_inline)
+{
+ if (likely(size == sbi->inline_xattr_slab_size)) {
+ *is_inline = true;
+ return kmem_cache_zalloc(sbi->inline_xattr_slab, GFP_NOFS);
+ }
+ *is_inline = false;
+ return f2fs_kzalloc(sbi, size, GFP_NOFS);
+}
+
+static void xattr_free(struct f2fs_sb_info *sbi, void *xattr_addr,
+ bool is_inline)
+{
+ if (is_inline)
+ kmem_cache_free(sbi->inline_xattr_slab, xattr_addr);
+ else
+ kfree(xattr_addr);
+}
+
static int f2fs_xattr_generic_get(const struct xattr_handler *handler,
struct dentry *unused, struct inode *inode,
const char *name, void *buffer, size_t size)
@@ -156,8 +175,8 @@
const struct xattr_handler f2fs_xattr_advise_handler = {
.name = F2FS_SYSTEM_ADVISE_NAME,
.flags = F2FS_XATTR_INDEX_ADVISE,
- .get = f2fs_xattr_advise_get,
- .set = f2fs_xattr_advise_set,
+ .get = f2fs_xattr_advise_get,
+ .set = f2fs_xattr_advise_set,
};
const struct xattr_handler f2fs_xattr_security_handler = {
@@ -301,7 +320,8 @@
static int lookup_all_xattrs(struct inode *inode, struct page *ipage,
unsigned int index, unsigned int len,
const char *name, struct f2fs_xattr_entry **xe,
- void **base_addr, int *base_size)
+ void **base_addr, int *base_size,
+ bool *is_inline)
{
void *cur_addr, *txattr_addr, *last_txattr_addr;
void *last_addr = NULL;
@@ -312,12 +332,12 @@
if (!xnid && !inline_size)
return -ENODATA;
- *base_size = XATTR_SIZE(xnid, inode) + XATTR_PADDING_SIZE;
- txattr_addr = f2fs_kzalloc(F2FS_I_SB(inode), *base_size, GFP_NOFS);
+ *base_size = XATTR_SIZE(inode) + XATTR_PADDING_SIZE;
+ txattr_addr = xattr_alloc(F2FS_I_SB(inode), *base_size, is_inline);
if (!txattr_addr)
return -ENOMEM;
- last_txattr_addr = (void *)txattr_addr + XATTR_SIZE(xnid, inode);
+ last_txattr_addr = (void *)txattr_addr + XATTR_SIZE(inode);
/* read from inline xattr */
if (inline_size) {
@@ -362,7 +382,7 @@
*base_addr = txattr_addr;
return 0;
out:
- kvfree(txattr_addr);
+ xattr_free(F2FS_I_SB(inode), txattr_addr, *is_inline);
return err;
}
@@ -405,7 +425,7 @@
*base_addr = txattr_addr;
return 0;
fail:
- kvfree(txattr_addr);
+ kfree(txattr_addr);
return err;
}
@@ -499,6 +519,7 @@
unsigned int size, len;
void *base_addr = NULL;
int base_size;
+ bool is_inline;
if (name == NULL)
return -EINVAL;
@@ -509,7 +530,7 @@
down_read(&F2FS_I(inode)->i_xattr_sem);
error = lookup_all_xattrs(inode, ipage, index, len, name,
- &entry, &base_addr, &base_size);
+ &entry, &base_addr, &base_size, &is_inline);
up_read(&F2FS_I(inode)->i_xattr_sem);
if (error)
return error;
@@ -532,14 +553,13 @@
}
error = size;
out:
- kvfree(base_addr);
+ xattr_free(F2FS_I_SB(inode), base_addr, is_inline);
return error;
}
ssize_t f2fs_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size)
{
struct inode *inode = d_inode(dentry);
- nid_t xnid = F2FS_I(inode)->i_xattr_nid;
struct f2fs_xattr_entry *entry;
void *base_addr, *last_base_addr;
int error = 0;
@@ -551,7 +571,7 @@
if (error)
return error;
- last_base_addr = (void *)base_addr + XATTR_SIZE(xnid, inode);
+ last_base_addr = (void *)base_addr + XATTR_SIZE(inode);
list_for_each_xattr(entry, base_addr) {
const struct xattr_handler *handler =
@@ -590,7 +610,7 @@
}
error = buffer_size - rest;
cleanup:
- kvfree(base_addr);
+ kfree(base_addr);
return error;
}
@@ -609,7 +629,6 @@
{
struct f2fs_xattr_entry *here, *last;
void *base_addr, *last_base_addr;
- nid_t xnid = F2FS_I(inode)->i_xattr_nid;
int found, newsize;
size_t len;
__u32 new_hsize;
@@ -633,7 +652,7 @@
if (error)
return error;
- last_base_addr = (void *)base_addr + XATTR_SIZE(xnid, inode);
+ last_base_addr = (void *)base_addr + XATTR_SIZE(inode);
/* find entry with wanted name. */
here = __find_xattr(base_addr, last_base_addr, index, len, name);
@@ -661,8 +680,17 @@
}
last = here;
- while (!IS_XATTR_LAST_ENTRY(last))
+ while (!IS_XATTR_LAST_ENTRY(last)) {
+ if ((void *)(last) + sizeof(__u32) > last_base_addr ||
+ (void *)XATTR_NEXT_ENTRY(last) > last_base_addr) {
+ f2fs_err(F2FS_I_SB(inode), "inode (%lu) has invalid last xattr entry, entry_size: %zu",
+ inode->i_ino, ENTRY_SIZE(last));
+ set_sbi_flag(F2FS_I_SB(inode), SBI_NEED_FSCK);
+ error = -EFSCORRUPTED;
+ goto exit;
+ }
last = XATTR_NEXT_ENTRY(last);
+ }
newsize = XATTR_ALIGN(sizeof(struct f2fs_xattr_entry) + len + size);
@@ -731,7 +759,7 @@
if (!error && S_ISDIR(inode->i_mode))
set_sbi_flag(F2FS_I_SB(inode), SBI_NEED_CP);
exit:
- kvfree(base_addr);
+ kfree(base_addr);
return error;
}
@@ -758,14 +786,34 @@
f2fs_balance_fs(sbi, true);
f2fs_lock_op(sbi);
- /* protect xattr_ver */
- down_write(&F2FS_I(inode)->i_sem);
down_write(&F2FS_I(inode)->i_xattr_sem);
err = __f2fs_setxattr(inode, index, name, value, size, ipage, flags);
up_write(&F2FS_I(inode)->i_xattr_sem);
- up_write(&F2FS_I(inode)->i_sem);
f2fs_unlock_op(sbi);
f2fs_update_time(sbi, REQ_TIME);
return err;
}
+
+int f2fs_init_xattr_caches(struct f2fs_sb_info *sbi)
+{
+ dev_t dev = sbi->sb->s_bdev->bd_dev;
+ char slab_name[32];
+
+ sprintf(slab_name, "f2fs_xattr_entry-%u:%u", MAJOR(dev), MINOR(dev));
+
+ sbi->inline_xattr_slab_size = F2FS_OPTION(sbi).inline_xattr_size *
+ sizeof(__le32) + XATTR_PADDING_SIZE;
+
+ sbi->inline_xattr_slab = f2fs_kmem_cache_create(slab_name,
+ sbi->inline_xattr_slab_size);
+ if (!sbi->inline_xattr_slab)
+ return -ENOMEM;
+
+ return 0;
+}
+
+void f2fs_destroy_xattr_caches(struct f2fs_sb_info *sbi)
+{
+ kmem_cache_destroy(sbi->inline_xattr_slab);
+}
diff --git a/fs/f2fs/xattr.h b/fs/f2fs/xattr.h
index de0c600..416d652 100644
--- a/fs/f2fs/xattr.h
+++ b/fs/f2fs/xattr.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* fs/f2fs/xattr.h
*
@@ -49,7 +49,7 @@
__u8 e_name_index;
__u8 e_name_len;
__le16 e_value_size; /* size of attribute value */
- char e_name[0]; /* attribute name */
+ char e_name[]; /* attribute name */
};
#define XATTR_HDR(ptr) ((struct f2fs_xattr_header *)(ptr))
@@ -73,7 +73,8 @@
entry = XATTR_NEXT_ENTRY(entry))
#define VALID_XATTR_BLOCK_SIZE (PAGE_SIZE - sizeof(struct node_footer))
#define XATTR_PADDING_SIZE (sizeof(__u32))
-#define XATTR_SIZE(x,i) (((x) ? VALID_XATTR_BLOCK_SIZE : 0) + \
+#define XATTR_SIZE(i) ((F2FS_I(i)->i_xattr_nid ? \
+ VALID_XATTR_BLOCK_SIZE : 0) + \
(inline_xattr_size(i)))
#define MIN_OFFSET(i) XATTR_ALIGN(inline_xattr_size(i) + \
VALID_XATTR_BLOCK_SIZE)
@@ -130,9 +131,12 @@
extern int f2fs_getxattr(struct inode *, int, const char *, void *,
size_t, struct page *);
extern ssize_t f2fs_listxattr(struct dentry *, char *, size_t);
+extern int f2fs_init_xattr_caches(struct f2fs_sb_info *);
+extern void f2fs_destroy_xattr_caches(struct f2fs_sb_info *);
#else
#define f2fs_xattr_handlers NULL
+#define f2fs_listxattr NULL
static inline int f2fs_setxattr(struct inode *inode, int index,
const char *name, const void *value, size_t size,
struct page *page, int flags)
@@ -145,11 +149,8 @@
{
return -EOPNOTSUPP;
}
-static inline ssize_t f2fs_listxattr(struct dentry *dentry, char *buffer,
- size_t buffer_size)
-{
- return -EOPNOTSUPP;
-}
+static inline int f2fs_init_xattr_caches(struct f2fs_sb_info *sbi) { return 0; }
+static inline void f2fs_destroy_xattr_caches(struct f2fs_sb_info *sbi) { }
#endif
#ifdef CONFIG_F2FS_FS_SECURITY