Update Linux to v5.4.2
Change-Id: Idf6911045d9d382da2cfe01b1edff026404ac8fd
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 30779aa..8087095 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* fs/f2fs/segment.c
*
* Copyright (c) 2012 Samsung Electronics Co., Ltd.
* http://www.samsung.com/
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
*/
#include <linux/fs.h>
#include <linux/f2fs_fs.h>
@@ -179,6 +176,8 @@
return false;
if (sbi->gc_mode == GC_URGENT)
return true;
+ if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
+ return true;
return free_sections(sbi) <= (node_secs + 2 * dent_secs + imeta_secs +
SM_I(sbi)->min_ssr_sections + reserved_sections(sbi));
@@ -186,14 +185,11 @@
void f2fs_register_inmem_page(struct inode *inode, struct page *page)
{
- struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
- struct f2fs_inode_info *fi = F2FS_I(inode);
struct inmem_pages *new;
f2fs_trace_pid(page);
- set_page_private(page, (unsigned long)ATOMIC_WRITTEN_PAGE);
- SetPagePrivate(page);
+ f2fs_set_page_private(page, (unsigned long)ATOMIC_WRITTEN_PAGE);
new = f2fs_kmem_cache_alloc(inmem_entry_slab, GFP_NOFS);
@@ -202,21 +198,18 @@
INIT_LIST_HEAD(&new->list);
/* increase reference count with clean state */
- mutex_lock(&fi->inmem_lock);
get_page(page);
- list_add_tail(&new->list, &fi->inmem_pages);
- spin_lock(&sbi->inode_lock[ATOMIC_FILE]);
- if (list_empty(&fi->inmem_ilist))
- list_add_tail(&fi->inmem_ilist, &sbi->inode_list[ATOMIC_FILE]);
- spin_unlock(&sbi->inode_lock[ATOMIC_FILE]);
+ mutex_lock(&F2FS_I(inode)->inmem_lock);
+ list_add_tail(&new->list, &F2FS_I(inode)->inmem_pages);
inc_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES);
- mutex_unlock(&fi->inmem_lock);
+ mutex_unlock(&F2FS_I(inode)->inmem_lock);
trace_f2fs_register_inmem_page(page, INMEM);
}
static int __revoke_inmem_pages(struct inode *inode,
- struct list_head *head, bool drop, bool recover)
+ struct list_head *head, bool drop, bool recover,
+ bool trylock)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct inmem_pages *cur, *tmp;
@@ -228,9 +221,18 @@
if (drop)
trace_f2fs_commit_inmem_page(page, INMEM_DROP);
- lock_page(page);
+ if (trylock) {
+ /*
+ * to avoid deadlock in between page lock and
+ * inmem_lock.
+ */
+ if (!trylock_page(page))
+ continue;
+ } else {
+ lock_page(page);
+ }
- f2fs_wait_on_page_writeback(page, DATA, true);
+ f2fs_wait_on_page_writeback(page, DATA, true, true);
if (recover) {
struct dnode_of_data dn;
@@ -267,10 +269,11 @@
}
next:
/* we don't need to invalidate this in the sccessful status */
- if (drop || recover)
+ if (drop || recover) {
ClearPageUptodate(page);
- set_page_private(page, 0);
- ClearPagePrivate(page);
+ clear_cold_data(page);
+ }
+ f2fs_clear_page_private(page);
f2fs_put_page(page, 1);
list_del(&cur->list);
@@ -317,17 +320,21 @@
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct f2fs_inode_info *fi = F2FS_I(inode);
- mutex_lock(&fi->inmem_lock);
- __revoke_inmem_pages(inode, &fi->inmem_pages, true, false);
- spin_lock(&sbi->inode_lock[ATOMIC_FILE]);
- if (!list_empty(&fi->inmem_ilist))
- list_del_init(&fi->inmem_ilist);
- spin_unlock(&sbi->inode_lock[ATOMIC_FILE]);
- mutex_unlock(&fi->inmem_lock);
+ while (!list_empty(&fi->inmem_pages)) {
+ mutex_lock(&fi->inmem_lock);
+ __revoke_inmem_pages(inode, &fi->inmem_pages,
+ true, false, true);
+ mutex_unlock(&fi->inmem_lock);
+ }
clear_inode_flag(inode, FI_ATOMIC_FILE);
fi->i_gc_failures[GC_FAILURE_ATOMIC] = 0;
stat_dec_atomic_write(inode);
+
+ spin_lock(&sbi->inode_lock[ATOMIC_FILE]);
+ if (!list_empty(&fi->inmem_ilist))
+ list_del_init(&fi->inmem_ilist);
+ spin_unlock(&sbi->inode_lock[ATOMIC_FILE]);
}
void f2fs_drop_inmem_page(struct inode *inode, struct page *page)
@@ -353,8 +360,7 @@
kmem_cache_free(inmem_entry_slab, cur);
ClearPageUptodate(page);
- set_page_private(page, 0);
- ClearPagePrivate(page);
+ f2fs_clear_page_private(page);
f2fs_put_page(page, 0);
trace_f2fs_commit_inmem_page(page, INMEM_INVALIDATE);
@@ -374,7 +380,7 @@
.io_type = FS_DATA_IO,
};
struct list_head revoke_list;
- pgoff_t last_idx = ULONG_MAX;
+ bool submit_bio = false;
int err = 0;
INIT_LIST_HEAD(&revoke_list);
@@ -386,8 +392,9 @@
if (page->mapping == inode->i_mapping) {
trace_f2fs_commit_inmem_page(page, INMEM);
+ f2fs_wait_on_page_writeback(page, DATA, true, true);
+
set_page_dirty(page);
- f2fs_wait_on_page_writeback(page, DATA, true);
if (clear_page_dirty_for_io(page)) {
inode_dec_dirty_pages(inode);
f2fs_remove_dirty_inode(inode);
@@ -409,14 +416,14 @@
}
/* record old blkaddr for revoking */
cur->old_addr = fio.old_blkaddr;
- last_idx = page->index;
+ submit_bio = true;
}
unlock_page(page);
list_move_tail(&cur->list, &revoke_list);
}
- if (last_idx != ULONG_MAX)
- f2fs_submit_merged_write_cond(sbi, inode, 0, last_idx, DATA);
+ if (submit_bio)
+ f2fs_submit_merged_write_cond(sbi, inode, NULL, 0, DATA);
if (err) {
/*
@@ -427,12 +434,15 @@
* recovery or rewrite & commit last transaction. For other
* error number, revoking was done by filesystem itself.
*/
- err = __revoke_inmem_pages(inode, &revoke_list, false, true);
+ err = __revoke_inmem_pages(inode, &revoke_list,
+ false, true, false);
/* drop all uncommitted pages */
- __revoke_inmem_pages(inode, &fi->inmem_pages, true, false);
+ __revoke_inmem_pages(inode, &fi->inmem_pages,
+ true, false, false);
} else {
- __revoke_inmem_pages(inode, &revoke_list, false, false);
+ __revoke_inmem_pages(inode, &revoke_list,
+ false, false, false);
}
return err;
@@ -453,11 +463,6 @@
mutex_lock(&fi->inmem_lock);
err = __f2fs_commit_inmem_pages(inode);
-
- spin_lock(&sbi->inode_lock[ATOMIC_FILE]);
- if (!list_empty(&fi->inmem_ilist))
- list_del_init(&fi->inmem_ilist);
- spin_unlock(&sbi->inode_lock[ATOMIC_FILE]);
mutex_unlock(&fi->inmem_lock);
clear_inode_flag(inode, FI_ATOMIC_COMMIT);
@@ -483,6 +488,9 @@
if (need && excess_cached_nats(sbi))
f2fs_balance_fs_bg(sbi);
+ if (!f2fs_is_checkpoint_ready(sbi))
+ return;
+
/*
* We should do GC or end up with checkpoint, if there are so many dirty
* dir/node pages without enough free segments.
@@ -511,7 +519,7 @@
else
f2fs_build_free_nids(sbi, false, false);
- if (!is_idle(sbi) &&
+ if (!is_idle(sbi, REQ_TIME) &&
(!excess_dirty_nats(sbi) && !excess_dirty_nodes(sbi)))
return;
@@ -525,9 +533,13 @@
if (test_opt(sbi, DATA_FLUSH)) {
struct blk_plug plug;
+ mutex_lock(&sbi->flush_lock);
+
blk_start_plug(&plug);
f2fs_sync_dirty_inodes(sbi, FILE_INODE);
blk_finish_plug(&plug);
+
+ mutex_unlock(&sbi->flush_lock);
}
f2fs_sync_fs(sbi->sb, true);
stat_inc_bg_cp_count(sbi->stat_info);
@@ -537,9 +549,13 @@
static int __submit_flush_wait(struct f2fs_sb_info *sbi,
struct block_device *bdev)
{
- struct bio *bio = f2fs_bio_alloc(sbi, 0, true);
+ struct bio *bio;
int ret;
+ bio = f2fs_bio_alloc(sbi, 0, false);
+ if (!bio)
+ return -ENOMEM;
+
bio->bi_opf = REQ_OP_WRITE | REQ_SYNC | REQ_PREFLUSH;
bio_set_dev(bio, bdev);
ret = submit_bio_wait(bio);
@@ -555,7 +571,7 @@
int ret = 0;
int i;
- if (!sbi->s_ndevs)
+ if (!f2fs_is_multi_device(sbi))
return __submit_flush_wait(sbi, sbi->sb->s_bdev);
for (i = 0; i < sbi->s_ndevs; i++) {
@@ -616,14 +632,17 @@
return 0;
if (!test_opt(sbi, FLUSH_MERGE)) {
+ atomic_inc(&fcc->queued_flush);
ret = submit_flush_wait(sbi, ino);
+ atomic_dec(&fcc->queued_flush);
atomic_inc(&fcc->issued_flush);
return ret;
}
- if (atomic_inc_return(&fcc->issing_flush) == 1 || sbi->s_ndevs > 1) {
+ if (atomic_inc_return(&fcc->queued_flush) == 1 ||
+ f2fs_is_multi_device(sbi)) {
ret = submit_flush_wait(sbi, ino);
- atomic_dec(&fcc->issing_flush);
+ atomic_dec(&fcc->queued_flush);
atomic_inc(&fcc->issued_flush);
return ret;
@@ -642,14 +661,14 @@
if (fcc->f2fs_issue_flush) {
wait_for_completion(&cmd.wait);
- atomic_dec(&fcc->issing_flush);
+ atomic_dec(&fcc->queued_flush);
} else {
struct llist_node *list;
list = llist_del_all(&fcc->issue_list);
if (!list) {
wait_for_completion(&cmd.wait);
- atomic_dec(&fcc->issing_flush);
+ atomic_dec(&fcc->queued_flush);
} else {
struct flush_cmd *tmp, *next;
@@ -658,7 +677,7 @@
llist_for_each_entry_safe(tmp, next, list, llnode) {
if (tmp == &cmd) {
cmd.ret = ret;
- atomic_dec(&fcc->issing_flush);
+ atomic_dec(&fcc->queued_flush);
continue;
}
tmp->ret = ret;
@@ -687,7 +706,7 @@
if (!fcc)
return -ENOMEM;
atomic_set(&fcc->issued_flush, 0);
- atomic_set(&fcc->issing_flush, 0);
+ atomic_set(&fcc->queued_flush, 0);
init_waitqueue_head(&fcc->flush_wait_queue);
init_llist_head(&fcc->issue_list);
SM_I(sbi)->fcc_info = fcc;
@@ -699,7 +718,7 @@
"f2fs_flush-%u:%u", MAJOR(dev), MINOR(dev));
if (IS_ERR(fcc->f2fs_issue_flush)) {
err = PTR_ERR(fcc->f2fs_issue_flush);
- kfree(fcc);
+ kvfree(fcc);
SM_I(sbi)->fcc_info = NULL;
return err;
}
@@ -718,7 +737,7 @@
kthread_stop(flush_thread);
}
if (free) {
- kfree(fcc);
+ kvfree(fcc);
SM_I(sbi)->fcc_info = NULL;
}
}
@@ -727,7 +746,7 @@
{
int ret = 0, i;
- if (!sbi->s_ndevs)
+ if (!f2fs_is_multi_device(sbi))
return 0;
for (i = 1; i < sbi->s_ndevs; i++) {
@@ -785,9 +804,13 @@
if (test_and_clear_bit(segno, dirty_i->dirty_segmap[t]))
dirty_i->nr_dirty[t]--;
- if (get_valid_blocks(sbi, segno, true) == 0)
+ if (get_valid_blocks(sbi, segno, true) == 0) {
clear_bit(GET_SEC_FROM_SEG(sbi, segno),
dirty_i->victim_secmap);
+#ifdef CONFIG_F2FS_CHECK_FS
+ clear_bit(segno, SIT_I(sbi)->invalid_segmap);
+#endif
+ }
}
}
@@ -799,7 +822,7 @@
static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno)
{
struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
- unsigned short valid_blocks;
+ unsigned short valid_blocks, ckpt_valid_blocks;
if (segno == NULL_SEGNO || IS_CURSEG(sbi, segno))
return;
@@ -807,8 +830,10 @@
mutex_lock(&dirty_i->seglist_lock);
valid_blocks = get_valid_blocks(sbi, segno, false);
+ ckpt_valid_blocks = get_ckpt_valid_blocks(sbi, segno);
- if (valid_blocks == 0) {
+ if (valid_blocks == 0 && (!is_sbi_flag_set(sbi, SBI_CP_DISABLED) ||
+ ckpt_valid_blocks == sbi->blocks_per_seg)) {
__locate_dirty_segment(sbi, segno, PRE);
__remove_dirty_segment(sbi, segno, DIRTY);
} else if (valid_blocks < sbi->blocks_per_seg) {
@@ -821,6 +846,82 @@
mutex_unlock(&dirty_i->seglist_lock);
}
+/* This moves currently empty dirty blocks to prefree. Must hold seglist_lock */
+void f2fs_dirty_to_prefree(struct f2fs_sb_info *sbi)
+{
+ struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
+ unsigned int segno;
+
+ mutex_lock(&dirty_i->seglist_lock);
+ for_each_set_bit(segno, dirty_i->dirty_segmap[DIRTY], MAIN_SEGS(sbi)) {
+ if (get_valid_blocks(sbi, segno, false))
+ continue;
+ if (IS_CURSEG(sbi, segno))
+ continue;
+ __locate_dirty_segment(sbi, segno, PRE);
+ __remove_dirty_segment(sbi, segno, DIRTY);
+ }
+ mutex_unlock(&dirty_i->seglist_lock);
+}
+
+block_t f2fs_get_unusable_blocks(struct f2fs_sb_info *sbi)
+{
+ int ovp_hole_segs =
+ (overprovision_segments(sbi) - reserved_segments(sbi));
+ block_t ovp_holes = ovp_hole_segs << sbi->log_blocks_per_seg;
+ struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
+ block_t holes[2] = {0, 0}; /* DATA and NODE */
+ block_t unusable;
+ struct seg_entry *se;
+ unsigned int segno;
+
+ mutex_lock(&dirty_i->seglist_lock);
+ for_each_set_bit(segno, dirty_i->dirty_segmap[DIRTY], MAIN_SEGS(sbi)) {
+ se = get_seg_entry(sbi, segno);
+ if (IS_NODESEG(se->type))
+ holes[NODE] += sbi->blocks_per_seg - se->valid_blocks;
+ else
+ holes[DATA] += sbi->blocks_per_seg - se->valid_blocks;
+ }
+ mutex_unlock(&dirty_i->seglist_lock);
+
+ unusable = holes[DATA] > holes[NODE] ? holes[DATA] : holes[NODE];
+ if (unusable > ovp_holes)
+ return unusable - ovp_holes;
+ return 0;
+}
+
+int f2fs_disable_cp_again(struct f2fs_sb_info *sbi, block_t unusable)
+{
+ int ovp_hole_segs =
+ (overprovision_segments(sbi) - reserved_segments(sbi));
+ if (unusable > F2FS_OPTION(sbi).unusable_cap)
+ return -EAGAIN;
+ if (is_sbi_flag_set(sbi, SBI_CP_DISABLED_QUICK) &&
+ dirty_segments(sbi) > ovp_hole_segs)
+ return -EAGAIN;
+ return 0;
+}
+
+/* This is only used by SBI_CP_DISABLED */
+static unsigned int get_free_segment(struct f2fs_sb_info *sbi)
+{
+ struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
+ unsigned int segno = 0;
+
+ mutex_lock(&dirty_i->seglist_lock);
+ for_each_set_bit(segno, dirty_i->dirty_segmap[DIRTY], MAIN_SEGS(sbi)) {
+ if (get_valid_blocks(sbi, segno, false))
+ continue;
+ if (get_ckpt_valid_blocks(sbi, segno))
+ continue;
+ mutex_unlock(&dirty_i->seglist_lock);
+ return segno;
+ }
+ mutex_unlock(&dirty_i->seglist_lock);
+ return NULL_SEGNO;
+}
+
static struct discard_cmd *__create_discard_cmd(struct f2fs_sb_info *sbi,
struct block_device *bdev, block_t lstart,
block_t start, block_t len)
@@ -841,7 +942,7 @@
dc->len = len;
dc->ref = 0;
dc->state = D_PREP;
- dc->issuing = 0;
+ dc->queued = 0;
dc->error = 0;
init_completion(&dc->wait);
list_add_tail(&dc->list, pend_list);
@@ -856,7 +957,8 @@
static struct discard_cmd *__attach_discard_cmd(struct f2fs_sb_info *sbi,
struct block_device *bdev, block_t lstart,
block_t start, block_t len,
- struct rb_node *parent, struct rb_node **p)
+ struct rb_node *parent, struct rb_node **p,
+ bool leftmost)
{
struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
struct discard_cmd *dc;
@@ -864,7 +966,7 @@
dc = __create_discard_cmd(sbi, bdev, lstart, start, len);
rb_link_node(&dc->rb_node, parent, p);
- rb_insert_color(&dc->rb_node, &dcc->root);
+ rb_insert_color_cached(&dc->rb_node, &dcc->root, leftmost);
return dc;
}
@@ -873,10 +975,10 @@
struct discard_cmd *dc)
{
if (dc->state == D_DONE)
- atomic_sub(dc->issuing, &dcc->issing_discard);
+ atomic_sub(dc->queued, &dcc->queued_discard);
list_del(&dc->list);
- rb_erase(&dc->rb_node, &dcc->root);
+ rb_erase_cached(&dc->rb_node, &dcc->root);
dcc->undiscard_blks -= dc->len;
kmem_cache_free(discard_cmd_slab, dc);
@@ -905,9 +1007,9 @@
dc->error = 0;
if (dc->error)
- f2fs_msg(sbi->sb, KERN_INFO,
- "Issue discard(%u, %u, %u) failed, ret: %d",
- dc->lstart, dc->start, dc->len, dc->error);
+ printk_ratelimited(
+ "%sF2FS-fs: Issue discard(%u, %u, %u) failed, ret: %d",
+ KERN_INFO, dc->lstart, dc->start, dc->len, dc->error);
__detach_discard_cmd(dcc, dc);
}
@@ -967,6 +1069,7 @@
dpolicy->max_requests = DEF_MAX_DISCARD_REQUEST;
dpolicy->io_aware_gran = MAX_PLIST_NUM;
+ dpolicy->timeout = 0;
if (discard_type == DPOLICY_BG) {
dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME;
@@ -989,6 +1092,8 @@
} else if (discard_type == DPOLICY_UMOUNT) {
dpolicy->max_requests = UINT_MAX;
dpolicy->io_aware = false;
+ /* we need to issue all to keep CP_TRIMMED_FLAG */
+ dpolicy->granularity = 1;
}
}
@@ -1076,12 +1181,12 @@
dc->bio_ref++;
spin_unlock_irqrestore(&dc->lock, flags);
- atomic_inc(&dcc->issing_discard);
- dc->issuing++;
+ atomic_inc(&dcc->queued_discard);
+ dc->queued++;
list_move_tail(&dc->list, wait_list);
/* sanity check on discard range */
- __check_sit_bitmap(sbi, start, start + len);
+ __check_sit_bitmap(sbi, lstart, lstart + len);
bio->bi_private = dc;
bio->bi_end_io = f2fs_submit_discard_endio;
@@ -1113,6 +1218,7 @@
struct rb_node **p;
struct rb_node *parent = NULL;
struct discard_cmd *dc = NULL;
+ bool leftmost = true;
if (insert_p && insert_parent) {
parent = insert_parent;
@@ -1120,9 +1226,11 @@
goto do_insert;
}
- p = f2fs_lookup_rb_tree_for_insert(sbi, &dcc->root, &parent, lstart);
+ p = f2fs_lookup_rb_tree_for_insert(sbi, &dcc->root, &parent,
+ lstart, &leftmost);
do_insert:
- dc = __attach_discard_cmd(sbi, bdev, lstart, start, len, parent, p);
+ dc = __attach_discard_cmd(sbi, bdev, lstart, start, len, parent,
+ p, leftmost);
if (!dc)
return NULL;
@@ -1190,7 +1298,7 @@
NULL, lstart,
(struct rb_entry **)&prev_dc,
(struct rb_entry **)&next_dc,
- &insert_p, &insert_parent, true);
+ &insert_p, &insert_parent, true, NULL);
if (dc)
prev_dc = dc;
@@ -1268,9 +1376,12 @@
{
block_t lblkstart = blkstart;
+ if (!f2fs_bdev_support_discard(bdev))
+ return 0;
+
trace_f2fs_queue_discard(bdev, blkstart, blklen);
- if (sbi->s_ndevs) {
+ if (f2fs_is_multi_device(sbi)) {
int devi = f2fs_target_device_index(sbi, blkstart);
blkstart -= FDEV(devi).start_blk;
@@ -1298,7 +1409,7 @@
NULL, pos,
(struct rb_entry **)&prev_dc,
(struct rb_entry **)&next_dc,
- &insert_p, &insert_parent, true);
+ &insert_p, &insert_parent, true, NULL);
if (!dc)
dc = next_dc;
@@ -1311,7 +1422,7 @@
if (dc->state != D_PREP)
goto next;
- if (dpolicy->io_aware && !is_idle(sbi)) {
+ if (dpolicy->io_aware && !is_idle(sbi, DISCARD_TIME)) {
io_interrupted = true;
break;
}
@@ -1351,7 +1462,14 @@
int i, issued = 0;
bool io_interrupted = false;
+ if (dpolicy->timeout != 0)
+ f2fs_update_time(sbi, dpolicy->timeout);
+
for (i = MAX_PLIST_NUM - 1; i >= 0; i--) {
+ if (dpolicy->timeout != 0 &&
+ f2fs_time_over(sbi, dpolicy->timeout))
+ break;
+
if (i + 1 < dpolicy->granularity)
break;
@@ -1370,8 +1488,12 @@
list_for_each_entry_safe(dc, tmp, pend_list, list) {
f2fs_bug_on(sbi, dc->state != D_PREP);
+ if (dpolicy->timeout != 0 &&
+ f2fs_time_over(sbi, dpolicy->timeout))
+ break;
+
if (dpolicy->io_aware && i < dpolicy->io_aware_gran &&
- !is_idle(sbi)) {
+ !is_idle(sbi, DISCARD_TIME)) {
io_interrupted = true;
break;
}
@@ -1538,7 +1660,7 @@
}
/* This comes from f2fs_put_super */
-bool f2fs_wait_discard_bios(struct f2fs_sb_info *sbi)
+bool f2fs_issue_discard_timeout(struct f2fs_sb_info *sbi)
{
struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
struct discard_policy dpolicy;
@@ -1546,6 +1668,7 @@
__init_discard_policy(sbi, &dpolicy, DPOLICY_UMOUNT,
dcc->discard_granularity);
+ dpolicy.timeout = UMOUNT_DISCARD_TIMEOUT;
__issue_discard_cmd(sbi, &dpolicy);
dropped = __drop_discard_cmd(sbi);
@@ -1579,6 +1702,10 @@
if (dcc->discard_wake)
dcc->discard_wake = 0;
+ /* clean up pending candidates before going to sleep */
+ if (atomic_read(&dcc->queued_discard))
+ __wait_all_discard_cmd(sbi, NULL);
+
if (try_to_freeze())
continue;
if (f2fs_readonly(sbi->sb))
@@ -1600,7 +1727,9 @@
__wait_all_discard_cmd(sbi, &dpolicy);
wait_ms = dpolicy.min_interval;
} else if (issued == -1){
- wait_ms = dpolicy.mid_interval;
+ wait_ms = f2fs_time_to_wait(sbi, DISCARD_TIME);
+ if (!wait_ms)
+ wait_ms = dpolicy.mid_interval;
} else {
wait_ms = dpolicy.max_interval;
}
@@ -1619,42 +1748,34 @@
block_t lblkstart = blkstart;
int devi = 0;
- if (sbi->s_ndevs) {
+ if (f2fs_is_multi_device(sbi)) {
devi = f2fs_target_device_index(sbi, blkstart);
+ if (blkstart < FDEV(devi).start_blk ||
+ blkstart > FDEV(devi).end_blk) {
+ f2fs_err(sbi, "Invalid block %x", blkstart);
+ return -EIO;
+ }
blkstart -= FDEV(devi).start_blk;
}
- /*
- * We need to know the type of the zone: for conventional zones,
- * use regular discard if the drive supports it. For sequential
- * zones, reset the zone write pointer.
- */
- switch (get_blkz_type(sbi, bdev, blkstart)) {
-
- case BLK_ZONE_TYPE_CONVENTIONAL:
- if (!blk_queue_discard(bdev_get_queue(bdev)))
- return 0;
- return __queue_discard_cmd(sbi, bdev, lblkstart, blklen);
- case BLK_ZONE_TYPE_SEQWRITE_REQ:
- case BLK_ZONE_TYPE_SEQWRITE_PREF:
+ /* For sequential zones, reset the zone write pointer */
+ if (f2fs_blkz_is_seq(sbi, devi, blkstart)) {
sector = SECTOR_FROM_BLOCK(blkstart);
nr_sects = SECTOR_FROM_BLOCK(blklen);
if (sector & (bdev_zone_sectors(bdev) - 1) ||
nr_sects != bdev_zone_sectors(bdev)) {
- f2fs_msg(sbi->sb, KERN_INFO,
- "(%d) %s: Unaligned discard attempted (block %x + %x)",
- devi, sbi->s_ndevs ? FDEV(devi).path: "",
- blkstart, blklen);
+ f2fs_err(sbi, "(%d) %s: Unaligned zone reset attempted (block %x + %x)",
+ devi, sbi->s_ndevs ? FDEV(devi).path : "",
+ blkstart, blklen);
return -EIO;
}
trace_f2fs_issue_reset_zone(bdev, blkstart);
- return blkdev_reset_zones(bdev, sector,
- nr_sects, GFP_NOFS);
- default:
- /* Unknown zone type: broken device ? */
- return -EIO;
+ return blkdev_reset_zones(bdev, sector, nr_sects, GFP_NOFS);
}
+
+ /* For conventional zones, use regular discard if supported */
+ return __queue_discard_cmd(sbi, bdev, lblkstart, blklen);
}
#endif
@@ -1662,8 +1783,7 @@
struct block_device *bdev, block_t blkstart, block_t blklen)
{
#ifdef CONFIG_BLK_DEV_ZONED
- if (f2fs_sb_has_blkzoned(sbi->sb) &&
- bdev_zoned_model(bdev) != BLK_ZONED_NONE)
+ if (f2fs_sb_has_blkzoned(sbi) && bdev_is_zoned(bdev))
return __f2fs_issue_discard_zone(sbi, bdev, blkstart, blklen);
#endif
return __queue_discard_cmd(sbi, bdev, blkstart, blklen);
@@ -1725,11 +1845,11 @@
struct list_head *head = &SM_I(sbi)->dcc_info->entry_list;
int i;
- if (se->valid_blocks == max_blocks || !f2fs_discard_en(sbi))
+ if (se->valid_blocks == max_blocks || !f2fs_hw_support_discard(sbi))
return false;
if (!force) {
- if (!test_opt(sbi, DISCARD) || !se->valid_blocks ||
+ if (!f2fs_realtime_discard_enable(sbi) || !se->valid_blocks ||
SM_I(sbi)->dcc_info->nr_discards >=
SM_I(sbi)->dcc_info->max_discards)
return false;
@@ -1810,7 +1930,7 @@
unsigned int start = 0, end = -1;
unsigned int secno, start_segno;
bool force = (cpc->reason & CP_DISCARD);
- bool need_align = test_opt(sbi, LFS) && sbi->segs_per_sec > 1;
+ bool need_align = test_opt(sbi, LFS) && __is_large_section(sbi);
mutex_lock(&dirty_i->seglist_lock);
@@ -1835,14 +1955,14 @@
dirty_i->nr_dirty[PRE]--;
}
- if (!test_opt(sbi, DISCARD))
+ if (!f2fs_realtime_discard_enable(sbi))
continue;
if (force && start >= cpc->trim_start &&
(end - 1) <= cpc->trim_end)
continue;
- if (!test_opt(sbi, LFS) || sbi->segs_per_sec == 1) {
+ if (!test_opt(sbi, LFS) || !__is_large_section(sbi)) {
f2fs_issue_discard(sbi, START_BLOCK(sbi, start),
(end - start) << sbi->log_blocks_per_seg);
continue;
@@ -1874,7 +1994,7 @@
sbi->blocks_per_seg, cur_pos);
len = next_pos - cur_pos;
- if (f2fs_sb_has_blkzoned(sbi->sb) ||
+ if (f2fs_sb_has_blkzoned(sbi) ||
(force && len < cpc->trim_minlen))
goto skip;
@@ -1922,13 +2042,13 @@
INIT_LIST_HEAD(&dcc->fstrim_list);
mutex_init(&dcc->cmd_lock);
atomic_set(&dcc->issued_discard, 0);
- atomic_set(&dcc->issing_discard, 0);
+ atomic_set(&dcc->queued_discard, 0);
atomic_set(&dcc->discard_cmd_cnt, 0);
dcc->nr_discards = 0;
dcc->max_discards = MAIN_SEGS(sbi) << sbi->log_blocks_per_seg;
dcc->undiscard_blks = 0;
dcc->next_pos = 0;
- dcc->root = RB_ROOT;
+ dcc->root = RB_ROOT_CACHED;
dcc->rbtree_check = false;
init_waitqueue_head(&dcc->discard_wait_queue);
@@ -1938,7 +2058,7 @@
"f2fs_discard-%u:%u", MAJOR(dev), MINOR(dev));
if (IS_ERR(dcc->f2fs_issue_discard)) {
err = PTR_ERR(dcc->f2fs_issue_discard);
- kfree(dcc);
+ kvfree(dcc);
SM_I(sbi)->dcc_info = NULL;
return err;
}
@@ -1955,7 +2075,14 @@
f2fs_stop_discard_thread(sbi);
- kfree(dcc);
+ /*
+ * Recovery can cache discard commands, so in error path of
+ * fill_super(), it needs to give a chance to handle them.
+ */
+ if (unlikely(atomic_read(&dcc->discard_cmd_cnt)))
+ f2fs_issue_discard_timeout(sbi);
+
+ kvfree(dcc);
SM_I(sbi)->dcc_info = NULL;
}
@@ -2011,26 +2138,27 @@
mir_exist = f2fs_test_and_set_bit(offset,
se->cur_valid_map_mir);
if (unlikely(exist != mir_exist)) {
- f2fs_msg(sbi->sb, KERN_ERR, "Inconsistent error "
- "when setting bitmap, blk:%u, old bit:%d",
- blkaddr, exist);
+ f2fs_err(sbi, "Inconsistent error when setting bitmap, blk:%u, old bit:%d",
+ blkaddr, exist);
f2fs_bug_on(sbi, 1);
}
#endif
if (unlikely(exist)) {
- f2fs_msg(sbi->sb, KERN_ERR,
- "Bitmap was wrongly set, blk:%u", blkaddr);
+ f2fs_err(sbi, "Bitmap was wrongly set, blk:%u",
+ blkaddr);
f2fs_bug_on(sbi, 1);
se->valid_blocks--;
del = 0;
}
- if (f2fs_discard_en(sbi) &&
- !f2fs_test_and_set_bit(offset, se->discard_map))
+ if (!f2fs_test_and_set_bit(offset, se->discard_map))
sbi->discard_blks--;
- /* don't overwrite by SSR to keep node chain */
- if (IS_NODESEG(se->type)) {
+ /*
+ * SSR should never reuse block which is checkpointed
+ * or newly invalidated.
+ */
+ if (!is_sbi_flag_set(sbi, SBI_CP_DISABLED)) {
if (!f2fs_test_and_set_bit(offset, se->ckpt_valid_map))
se->ckpt_valid_blocks++;
}
@@ -2040,22 +2168,32 @@
mir_exist = f2fs_test_and_clear_bit(offset,
se->cur_valid_map_mir);
if (unlikely(exist != mir_exist)) {
- f2fs_msg(sbi->sb, KERN_ERR, "Inconsistent error "
- "when clearing bitmap, blk:%u, old bit:%d",
- blkaddr, exist);
+ f2fs_err(sbi, "Inconsistent error when clearing bitmap, blk:%u, old bit:%d",
+ blkaddr, exist);
f2fs_bug_on(sbi, 1);
}
#endif
if (unlikely(!exist)) {
- f2fs_msg(sbi->sb, KERN_ERR,
- "Bitmap was wrongly cleared, blk:%u", blkaddr);
+ f2fs_err(sbi, "Bitmap was wrongly cleared, blk:%u",
+ blkaddr);
f2fs_bug_on(sbi, 1);
se->valid_blocks++;
del = 0;
+ } else if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) {
+ /*
+ * If checkpoints are off, we must not reuse data that
+ * was used in the previous checkpoint. If it was used
+ * before, we must track that to know how much space we
+ * really have.
+ */
+ if (f2fs_test_bit(offset, se->ckpt_valid_map)) {
+ spin_lock(&sbi->stat_lock);
+ sbi->unusable_block_count++;
+ spin_unlock(&sbi->stat_lock);
+ }
}
- if (f2fs_discard_en(sbi) &&
- f2fs_test_and_clear_bit(offset, se->discard_map))
+ if (f2fs_test_and_clear_bit(offset, se->discard_map))
sbi->discard_blks++;
}
if (!f2fs_test_bit(offset, se->ckpt_valid_map))
@@ -2066,7 +2204,7 @@
/* update total number of valid blocks to be written in ckpt area */
SIT_I(sbi)->written_valid_blocks += del;
- if (sbi->segs_per_sec > 1)
+ if (__is_large_section(sbi))
get_sec_entry(sbi, segno)->valid_blocks += del;
}
@@ -2099,7 +2237,7 @@
struct seg_entry *se;
bool is_cp = false;
- if (!is_valid_data_blkaddr(sbi, blkaddr))
+ if (!__is_valid_data_blkaddr(blkaddr))
return true;
down_read(&sit_i->sentry_lock);
@@ -2332,9 +2470,12 @@
static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type)
{
/* if segs_per_sec is large than 1, we need to keep original policy. */
- if (sbi->segs_per_sec != 1)
+ if (__is_large_section(sbi))
return CURSEG_I(sbi, type)->segno;
+ if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
+ return 0;
+
if (test_opt(sbi, NOHEAP) &&
(type == CURSEG_HOT_DATA || IS_NODESEG(type)))
return 0;
@@ -2432,6 +2573,7 @@
__next_free_blkoff(sbi, curseg, 0);
sum_page = f2fs_get_sum_page(sbi, new_segno);
+ f2fs_bug_on(sbi, IS_ERR(sum_page));
sum_node = (struct f2fs_summary_block *)page_address(sum_page);
memcpy(curseg->sum_blk, sum_node, SUM_ENTRY_SIZE);
f2fs_put_page(sum_page, 1);
@@ -2478,6 +2620,15 @@
return 1;
}
}
+
+ /* find valid_blocks=0 in dirty list */
+ if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) {
+ segno = get_free_segment(sbi);
+ if (segno != NULL_SEGNO) {
+ curseg->next_segno = segno;
+ return 1;
+ }
+ }
return 0;
}
@@ -2495,7 +2646,8 @@
else if (!is_set_ckpt_flags(sbi, CP_CRC_RECOVERY_FLAG) &&
type == CURSEG_WARM_NODE)
new_curseg(sbi, type, false);
- else if (curseg->alloc_type == LFS && is_next_segment_free(sbi, type))
+ else if (curseg->alloc_type == LFS && is_next_segment_free(sbi, type) &&
+ likely(!is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
new_curseg(sbi, type, false);
else if (f2fs_need_SSR(sbi) && get_ssr_segment(sbi, type))
change_curseg(sbi, type);
@@ -2505,6 +2657,39 @@
stat_inc_seg_type(sbi, curseg);
}
+void allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type,
+ unsigned int start, unsigned int end)
+{
+ struct curseg_info *curseg = CURSEG_I(sbi, type);
+ unsigned int segno;
+
+ down_read(&SM_I(sbi)->curseg_lock);
+ mutex_lock(&curseg->curseg_mutex);
+ down_write(&SIT_I(sbi)->sentry_lock);
+
+ segno = CURSEG_I(sbi, type)->segno;
+ if (segno < start || segno > end)
+ goto unlock;
+
+ if (f2fs_need_SSR(sbi) && get_ssr_segment(sbi, type))
+ change_curseg(sbi, type);
+ else
+ new_curseg(sbi, type, true);
+
+ stat_inc_seg_type(sbi, curseg);
+
+ locate_dirty_segment(sbi, segno);
+unlock:
+ up_write(&SIT_I(sbi)->sentry_lock);
+
+ if (segno != curseg->segno)
+ f2fs_notice(sbi, "For resize: curseg of type %d: %u ==> %u",
+ type, segno, curseg->segno);
+
+ mutex_unlock(&curseg->curseg_mutex);
+ up_read(&SM_I(sbi)->curseg_lock);
+}
+
void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi)
{
struct curseg_info *curseg;
@@ -2570,7 +2755,7 @@
NULL, start,
(struct rb_entry **)&prev_dc,
(struct rb_entry **)&next_dc,
- &insert_p, &insert_parent, true);
+ &insert_p, &insert_parent, true, NULL);
if (!dc)
dc = next_dc;
@@ -2628,7 +2813,7 @@
struct discard_policy dpolicy;
unsigned long long trimmed = 0;
int err = 0;
- bool need_align = test_opt(sbi, LFS) && sbi->segs_per_sec > 1;
+ bool need_align = test_opt(sbi, LFS) && __is_large_section(sbi);
if (start >= MAX_BLKADDR(sbi) || range->len < sbi->blocksize)
return -EINVAL;
@@ -2637,9 +2822,8 @@
goto out;
if (is_sbi_flag_set(sbi, SBI_NEED_FSCK)) {
- f2fs_msg(sbi->sb, KERN_WARNING,
- "Found FS corruption, run fsck to fix.");
- return -EIO;
+ f2fs_warn(sbi, "Found FS corruption, run fsck to fix.");
+ return -EFSCORRUPTED;
}
/* start/end segment number in main_area */
@@ -2671,7 +2855,7 @@
* discard option. User configuration looks like using runtime discard
* or periodic fstrim instead of it.
*/
- if (test_opt(sbi, DISCARD))
+ if (f2fs_realtime_discard_enable(sbi))
goto out;
start_block = START_BLOCK(sbi, start_segno);
@@ -2932,12 +3116,14 @@
f2fs_inode_chksum_set(sbi, page);
}
+ if (F2FS_IO_ALIGNED(sbi))
+ fio->retry = false;
+
if (add_list) {
struct f2fs_bio_info *io;
INIT_LIST_HEAD(&fio->list);
fio->in_list = true;
- fio->retry = false;
io = sbi->write_io[fio->type] + fio->temp;
spin_lock(&io->io_lock);
list_add_tail(&fio->list, &io->io_list);
@@ -2954,7 +3140,7 @@
struct f2fs_sb_info *sbi = fio->sbi;
unsigned int devidx;
- if (!sbi->s_ndevs)
+ if (!f2fs_is_multi_device(sbi))
return;
devidx = f2fs_target_device_index(sbi, fio->new_blkaddr);
@@ -3020,6 +3206,7 @@
ClearPageError(page);
f2fs_submit_page_write(&fio);
+ stat_inc_meta_count(sbi, page->index);
f2fs_update_iostat(sbi, io_type, F2FS_BLKSIZE);
}
@@ -3051,21 +3238,31 @@
{
int err;
struct f2fs_sb_info *sbi = fio->sbi;
+ unsigned int segno;
fio->new_blkaddr = fio->old_blkaddr;
/* i/o temperature is needed for passing down write hints */
__get_segment_type(fio);
- f2fs_bug_on(sbi, !IS_DATASEG(get_seg_entry(sbi,
- GET_SEGNO(sbi, fio->new_blkaddr))->type));
+ segno = GET_SEGNO(sbi, fio->new_blkaddr);
+
+ if (!IS_DATASEG(get_seg_entry(sbi, segno)->type)) {
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+ f2fs_warn(sbi, "%s: incorrect segment(%u) type, run fsck to fix.",
+ __func__, segno);
+ return -EFSCORRUPTED;
+ }
stat_inc_inplace_blocks(fio->sbi);
- err = f2fs_submit_page_bio(fio);
- if (!err)
+ if (fio->bio)
+ err = f2fs_merge_page_bio(fio);
+ else
+ err = f2fs_submit_page_bio(fio);
+ if (!err) {
update_device_state(fio);
-
- f2fs_update_iostat(fio->sbi, fio->io_type, F2FS_BLKSIZE);
+ f2fs_update_iostat(fio->sbi, fio->io_type, F2FS_BLKSIZE);
+ }
return err;
}
@@ -3177,34 +3374,48 @@
}
void f2fs_wait_on_page_writeback(struct page *page,
- enum page_type type, bool ordered)
+ enum page_type type, bool ordered, bool locked)
{
if (PageWriteback(page)) {
struct f2fs_sb_info *sbi = F2FS_P_SB(page);
- f2fs_submit_merged_write_cond(sbi, page->mapping->host,
- 0, page->index, type);
- if (ordered)
+ f2fs_submit_merged_write_cond(sbi, NULL, page, 0, type);
+ if (ordered) {
wait_on_page_writeback(page);
- else
+ f2fs_bug_on(sbi, locked && PageWriteback(page));
+ } else {
wait_for_stable_page(page);
+ }
}
}
-void f2fs_wait_on_block_writeback(struct f2fs_sb_info *sbi, block_t blkaddr)
+void f2fs_wait_on_block_writeback(struct inode *inode, block_t blkaddr)
{
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct page *cpage;
- if (!is_valid_data_blkaddr(sbi, blkaddr))
+ if (!f2fs_post_read_required(inode))
+ return;
+
+ if (!__is_valid_data_blkaddr(blkaddr))
return;
cpage = find_lock_page(META_MAPPING(sbi), blkaddr);
if (cpage) {
- f2fs_wait_on_page_writeback(cpage, DATA, true);
+ f2fs_wait_on_page_writeback(cpage, DATA, true, true);
f2fs_put_page(cpage, 1);
}
}
+void f2fs_wait_on_block_writeback_range(struct inode *inode, block_t blkaddr,
+ block_t len)
+{
+ block_t i;
+
+ for (i = 0; i < len; i++)
+ f2fs_wait_on_block_writeback(inode, blkaddr + i);
+}
+
static int read_compacted_summaries(struct f2fs_sb_info *sbi)
{
struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
@@ -3375,8 +3586,11 @@
/* sanity check for summary blocks */
if (nats_in_cursum(nat_j) > NAT_JOURNAL_ENTRIES ||
- sits_in_cursum(sit_j) > SIT_JOURNAL_ENTRIES)
+ sits_in_cursum(sit_j) > SIT_JOURNAL_ENTRIES) {
+ f2fs_err(sbi, "invalid journal entries nats %u sits %u\n",
+ nats_in_cursum(nat_j), sits_in_cursum(sit_j));
return -EINVAL;
+ }
return 0;
}
@@ -3607,7 +3821,7 @@
struct f2fs_journal *journal = curseg->journal;
struct sit_entry_set *ses, *tmp;
struct list_head *head = &SM_I(sbi)->sit_entry_set;
- bool to_journal = true;
+ bool to_journal = !is_sbi_flag_set(sbi, SBI_IS_RESIZEFS);
struct seg_entry *se;
down_write(&sit_i->sentry_lock);
@@ -3626,7 +3840,8 @@
* entries, remove all entries from journal and add and account
* them in sit entry set.
*/
- if (!__has_cursum_space(journal, sit_i->dirty_sentries, SIT_JOURNAL))
+ if (!__has_cursum_space(journal, sit_i->dirty_sentries, SIT_JOURNAL) ||
+ !to_journal)
remove_sits_in_journal(sbi);
/*
@@ -3723,8 +3938,8 @@
struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
struct sit_info *sit_i;
unsigned int sit_segs, start;
- char *src_bitmap;
- unsigned int bitmap_size;
+ char *src_bitmap, *bitmap;
+ unsigned int bitmap_size, main_bitmap_size, sit_bitmap_size;
/* allocate memory for SIT information */
sit_i = f2fs_kzalloc(sbi, sizeof(struct sit_info), GFP_KERNEL);
@@ -3740,42 +3955,44 @@
if (!sit_i->sentries)
return -ENOMEM;
- bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi));
- sit_i->dirty_sentries_bitmap = f2fs_kvzalloc(sbi, bitmap_size,
+ main_bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi));
+ sit_i->dirty_sentries_bitmap = f2fs_kvzalloc(sbi, main_bitmap_size,
GFP_KERNEL);
if (!sit_i->dirty_sentries_bitmap)
return -ENOMEM;
+#ifdef CONFIG_F2FS_CHECK_FS
+ bitmap_size = MAIN_SEGS(sbi) * SIT_VBLOCK_MAP_SIZE * 4;
+#else
+ bitmap_size = MAIN_SEGS(sbi) * SIT_VBLOCK_MAP_SIZE * 3;
+#endif
+ sit_i->bitmap = f2fs_kvzalloc(sbi, bitmap_size, GFP_KERNEL);
+ if (!sit_i->bitmap)
+ return -ENOMEM;
+
+ bitmap = sit_i->bitmap;
+
for (start = 0; start < MAIN_SEGS(sbi); start++) {
- sit_i->sentries[start].cur_valid_map
- = f2fs_kzalloc(sbi, SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
- sit_i->sentries[start].ckpt_valid_map
- = f2fs_kzalloc(sbi, SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
- if (!sit_i->sentries[start].cur_valid_map ||
- !sit_i->sentries[start].ckpt_valid_map)
- return -ENOMEM;
+ sit_i->sentries[start].cur_valid_map = bitmap;
+ bitmap += SIT_VBLOCK_MAP_SIZE;
+
+ sit_i->sentries[start].ckpt_valid_map = bitmap;
+ bitmap += SIT_VBLOCK_MAP_SIZE;
#ifdef CONFIG_F2FS_CHECK_FS
- sit_i->sentries[start].cur_valid_map_mir
- = f2fs_kzalloc(sbi, SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
- if (!sit_i->sentries[start].cur_valid_map_mir)
- return -ENOMEM;
+ sit_i->sentries[start].cur_valid_map_mir = bitmap;
+ bitmap += SIT_VBLOCK_MAP_SIZE;
#endif
- if (f2fs_discard_en(sbi)) {
- sit_i->sentries[start].discard_map
- = f2fs_kzalloc(sbi, SIT_VBLOCK_MAP_SIZE,
- GFP_KERNEL);
- if (!sit_i->sentries[start].discard_map)
- return -ENOMEM;
- }
+ sit_i->sentries[start].discard_map = bitmap;
+ bitmap += SIT_VBLOCK_MAP_SIZE;
}
sit_i->tmp_map = f2fs_kzalloc(sbi, SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
if (!sit_i->tmp_map)
return -ENOMEM;
- if (sbi->segs_per_sec > 1) {
+ if (__is_large_section(sbi)) {
sit_i->sec_entries =
f2fs_kvzalloc(sbi, array_size(sizeof(struct sec_entry),
MAIN_SECS(sbi)),
@@ -3788,17 +4005,23 @@
sit_segs = le32_to_cpu(raw_super->segment_count_sit) >> 1;
/* setup SIT bitmap from ckeckpoint pack */
- bitmap_size = __bitmap_size(sbi, SIT_BITMAP);
+ sit_bitmap_size = __bitmap_size(sbi, SIT_BITMAP);
src_bitmap = __bitmap_ptr(sbi, SIT_BITMAP);
- sit_i->sit_bitmap = kmemdup(src_bitmap, bitmap_size, GFP_KERNEL);
+ sit_i->sit_bitmap = kmemdup(src_bitmap, sit_bitmap_size, GFP_KERNEL);
if (!sit_i->sit_bitmap)
return -ENOMEM;
#ifdef CONFIG_F2FS_CHECK_FS
- sit_i->sit_bitmap_mir = kmemdup(src_bitmap, bitmap_size, GFP_KERNEL);
+ sit_i->sit_bitmap_mir = kmemdup(src_bitmap,
+ sit_bitmap_size, GFP_KERNEL);
if (!sit_i->sit_bitmap_mir)
return -ENOMEM;
+
+ sit_i->invalid_segmap = f2fs_kvzalloc(sbi,
+ main_bitmap_size, GFP_KERNEL);
+ if (!sit_i->invalid_segmap)
+ return -ENOMEM;
#endif
/* init SIT information */
@@ -3807,7 +4030,7 @@
sit_i->sit_base_addr = le32_to_cpu(raw_super->sit_blkaddr);
sit_i->sit_blocks = sit_segs << sbi->log_blocks_per_seg;
sit_i->written_valid_blocks = 0;
- sit_i->bitmap_size = bitmap_size;
+ sit_i->bitmap_size = sit_bitmap_size;
sit_i->dirty_sentries = 0;
sit_i->sents_per_block = SIT_ENTRY_PER_BLOCK;
sit_i->elapsed_time = le64_to_cpu(sbi->ckpt->elapsed_time);
@@ -3904,6 +4127,8 @@
se = &sit_i->sentries[start];
page = get_current_sit_page(sbi, start);
+ if (IS_ERR(page))
+ return PTR_ERR(page);
sit_blk = (struct f2fs_sit_block *)page_address(page);
sit = sit_blk->entries[SIT_ENTRY_OFFSET(sit_i, start)];
f2fs_put_page(page, 1);
@@ -3916,21 +4141,19 @@
total_node_blocks += se->valid_blocks;
/* build discard map only one time */
- if (f2fs_discard_en(sbi)) {
- if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) {
- memset(se->discard_map, 0xff,
- SIT_VBLOCK_MAP_SIZE);
- } else {
- memcpy(se->discard_map,
- se->cur_valid_map,
- SIT_VBLOCK_MAP_SIZE);
- sbi->discard_blks +=
- sbi->blocks_per_seg -
- se->valid_blocks;
- }
+ if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) {
+ memset(se->discard_map, 0xff,
+ SIT_VBLOCK_MAP_SIZE);
+ } else {
+ memcpy(se->discard_map,
+ se->cur_valid_map,
+ SIT_VBLOCK_MAP_SIZE);
+ sbi->discard_blks +=
+ sbi->blocks_per_seg -
+ se->valid_blocks;
}
- if (sbi->segs_per_sec > 1)
+ if (__is_large_section(sbi))
get_sec_entry(sbi, start)->valid_blocks +=
se->valid_blocks;
}
@@ -3943,11 +4166,9 @@
start = le32_to_cpu(segno_in_journal(journal, i));
if (start >= MAIN_SEGS(sbi)) {
- f2fs_msg(sbi->sb, KERN_ERR,
- "Wrong journal entry on segno %u",
- start);
- set_sbi_flag(sbi, SBI_NEED_FSCK);
- err = -EINVAL;
+ f2fs_err(sbi, "Wrong journal entry on segno %u",
+ start);
+ err = -EFSCORRUPTED;
break;
}
@@ -3965,19 +4186,16 @@
if (IS_NODESEG(se->type))
total_node_blocks += se->valid_blocks;
- if (f2fs_discard_en(sbi)) {
- if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) {
- memset(se->discard_map, 0xff,
- SIT_VBLOCK_MAP_SIZE);
- } else {
- memcpy(se->discard_map, se->cur_valid_map,
- SIT_VBLOCK_MAP_SIZE);
- sbi->discard_blks += old_valid_blocks;
- sbi->discard_blks -= se->valid_blocks;
- }
+ if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) {
+ memset(se->discard_map, 0xff, SIT_VBLOCK_MAP_SIZE);
+ } else {
+ memcpy(se->discard_map, se->cur_valid_map,
+ SIT_VBLOCK_MAP_SIZE);
+ sbi->discard_blks += old_valid_blocks;
+ sbi->discard_blks -= se->valid_blocks;
}
- if (sbi->segs_per_sec > 1) {
+ if (__is_large_section(sbi)) {
get_sec_entry(sbi, start)->valid_blocks +=
se->valid_blocks;
get_sec_entry(sbi, start)->valid_blocks -=
@@ -3987,11 +4205,9 @@
up_read(&curseg->journal_rwsem);
if (!err && total_node_blocks != valid_node_count(sbi)) {
- f2fs_msg(sbi->sb, KERN_ERR,
- "SIT is corrupted node# %u vs %u",
- total_node_blocks, valid_node_count(sbi));
- set_sbi_flag(sbi, SBI_NEED_FSCK);
- err = -EINVAL;
+ f2fs_err(sbi, "SIT is corrupted node# %u vs %u",
+ total_node_blocks, valid_node_count(sbi));
+ err = -EFSCORRUPTED;
}
return err;
@@ -4082,6 +4298,39 @@
return init_victim_secmap(sbi);
}
+static int sanity_check_curseg(struct f2fs_sb_info *sbi)
+{
+ int i;
+
+ /*
+ * In LFS/SSR curseg, .next_blkoff should point to an unused blkaddr;
+ * In LFS curseg, all blkaddr after .next_blkoff should be unused.
+ */
+ for (i = 0; i < NO_CHECK_TYPE; i++) {
+ struct curseg_info *curseg = CURSEG_I(sbi, i);
+ struct seg_entry *se = get_seg_entry(sbi, curseg->segno);
+ unsigned int blkofs = curseg->next_blkoff;
+
+ if (f2fs_test_bit(blkofs, se->cur_valid_map))
+ goto out;
+
+ if (curseg->alloc_type == SSR)
+ continue;
+
+ for (blkofs += 1; blkofs < sbi->blocks_per_seg; blkofs++) {
+ if (!f2fs_test_bit(blkofs, se->cur_valid_map))
+ continue;
+out:
+ f2fs_err(sbi,
+ "Current segment's next free block offset is inconsistent with bitmap, logtype:%u, segno:%u, type:%u, next_blkoff:%u, blkofs:%u",
+ i, curseg->segno, curseg->alloc_type,
+ curseg->next_blkoff, blkofs);
+ return -EFSCORRUPTED;
+ }
+ }
+ return 0;
+}
+
/*
* Update min, max modified time for cost-benefit GC algorithm
*/
@@ -4177,6 +4426,10 @@
if (err)
return err;
+ err = sanity_check_curseg(sbi);
+ if (err)
+ return err;
+
init_min_max_mtime(sbi);
return 0;
}
@@ -4212,7 +4465,7 @@
destroy_victim_secmap(sbi);
SM_I(sbi)->dirty_info = NULL;
- kfree(dirty_i);
+ kvfree(dirty_i);
}
static void destroy_curseg(struct f2fs_sb_info *sbi)
@@ -4224,10 +4477,10 @@
return;
SM_I(sbi)->curseg_array = NULL;
for (i = 0; i < NR_CURSEG_TYPE; i++) {
- kfree(array[i].sum_blk);
- kfree(array[i].journal);
+ kvfree(array[i].sum_blk);
+ kvfree(array[i].journal);
}
- kfree(array);
+ kvfree(array);
}
static void destroy_free_segmap(struct f2fs_sb_info *sbi)
@@ -4238,39 +4491,31 @@
SM_I(sbi)->free_info = NULL;
kvfree(free_i->free_segmap);
kvfree(free_i->free_secmap);
- kfree(free_i);
+ kvfree(free_i);
}
static void destroy_sit_info(struct f2fs_sb_info *sbi)
{
struct sit_info *sit_i = SIT_I(sbi);
- unsigned int start;
if (!sit_i)
return;
- if (sit_i->sentries) {
- for (start = 0; start < MAIN_SEGS(sbi); start++) {
- kfree(sit_i->sentries[start].cur_valid_map);
-#ifdef CONFIG_F2FS_CHECK_FS
- kfree(sit_i->sentries[start].cur_valid_map_mir);
-#endif
- kfree(sit_i->sentries[start].ckpt_valid_map);
- kfree(sit_i->sentries[start].discard_map);
- }
- }
- kfree(sit_i->tmp_map);
+ if (sit_i->sentries)
+ kvfree(sit_i->bitmap);
+ kvfree(sit_i->tmp_map);
kvfree(sit_i->sentries);
kvfree(sit_i->sec_entries);
kvfree(sit_i->dirty_sentries_bitmap);
SM_I(sbi)->sit_info = NULL;
- kfree(sit_i->sit_bitmap);
+ kvfree(sit_i->sit_bitmap);
#ifdef CONFIG_F2FS_CHECK_FS
- kfree(sit_i->sit_bitmap_mir);
+ kvfree(sit_i->sit_bitmap_mir);
+ kvfree(sit_i->invalid_segmap);
#endif
- kfree(sit_i);
+ kvfree(sit_i);
}
void f2fs_destroy_segment_manager(struct f2fs_sb_info *sbi)
@@ -4286,7 +4531,7 @@
destroy_free_segmap(sbi);
destroy_sit_info(sbi);
sbi->sm_info = NULL;
- kfree(sm_info);
+ kvfree(sm_info);
}
int __init f2fs_create_segment_manager_caches(void)