Blame - fs/ext4/super.c - hafnium/third_party/linux

blob: 9e210bc85c817e03aab29c88aee0968bc5779353 [file] [log] [blame]

Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1	// SPDX-License-Identifier: GPL-2.0
				2	/*
				3	* linux/fs/ext4/super.c
				4	*
				5	* Copyright (C) 1992, 1993, 1994, 1995
				6	* Remy Card (card@masi.ibp.fr)
				7	* Laboratoire MASI - Institut Blaise Pascal
				8	* Universite Pierre et Marie Curie (Paris VI)
				9	*
				10	* from
				11	*
				12	* linux/fs/minix/inode.c
				13	*
				14	* Copyright (C) 1991, 1992 Linus Torvalds
				15	*
				16	* Big-endian to little-endian byte-swapping/bitmaps by
				17	* David S. Miller (davem@caip.rutgers.edu), 1995
				18	*/
				19
				20	#include <linux/module.h>
				21	#include <linux/string.h>
				22	#include <linux/fs.h>
				23	#include <linux/time.h>
				24	#include <linux/vmalloc.h>
				25	#include <linux/slab.h>
				26	#include <linux/init.h>
				27	#include <linux/blkdev.h>
				28	#include <linux/backing-dev.h>
				29	#include <linux/parser.h>
				30	#include <linux/buffer_head.h>
				31	#include <linux/exportfs.h>
				32	#include <linux/vfs.h>
				33	#include <linux/random.h>
				34	#include <linux/mount.h>
				35	#include <linux/namei.h>
				36	#include <linux/quotaops.h>
				37	#include <linux/seq_file.h>
				38	#include <linux/ctype.h>
				39	#include <linux/log2.h>
				40	#include <linux/crc16.h>
				41	#include <linux/dax.h>
				42	#include <linux/cleancache.h>
				43	#include <linux/uaccess.h>
				44	#include <linux/iversion.h>
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	45	#include <linux/unicode.h>
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	46	#include <linux/part_stat.h>
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	47	#include <linux/kthread.h>
				48	#include <linux/freezer.h>
				49
				50	#include "ext4.h"
				51	#include "ext4_extents.h" /* Needed for trace points definition */
				52	#include "ext4_jbd2.h"
				53	#include "xattr.h"
				54	#include "acl.h"
				55	#include "mballoc.h"
				56	#include "fsmap.h"
				57
				58	#define CREATE_TRACE_POINTS
				59	#include <trace/events/ext4.h>
				60
				61	static struct ext4_lazy_init *ext4_li_info;
				62	static struct mutex ext4_li_mtx;
				63	static struct ratelimit_state ext4_mount_msg_ratelimit;
				64
				65	static int ext4_load_journal(struct super_block , struct ext4_super_block ,
				66	unsigned long journal_devnum);
				67	static int ext4_show_options(struct seq_file seq, struct dentry root);
				68	static int ext4_commit_super(struct super_block *sb, int sync);
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame]	69	static int ext4_mark_recovery_complete(struct super_block *sb,
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	70	struct ext4_super_block *es);
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame]	71	static int ext4_clear_journal_err(struct super_block *sb,
				72	struct ext4_super_block *es);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	73	static int ext4_sync_fs(struct super_block *sb, int wait);
				74	static int ext4_remount(struct super_block sb, int flags, char *data);
				75	static int ext4_statfs(struct dentry dentry, struct kstatfs buf);
				76	static int ext4_unfreeze(struct super_block *sb);
				77	static int ext4_freeze(struct super_block *sb);
				78	static struct dentry ext4_mount(struct file_system_type fs_type, int flags,
				79	const char dev_name, void data);
				80	static inline int ext2_feature_set_ok(struct super_block *sb);
				81	static inline int ext3_feature_set_ok(struct super_block *sb);
				82	static int ext4_feature_set_ok(struct super_block *sb, int readonly);
				83	static void ext4_destroy_lazyinit_thread(void);
				84	static void ext4_unregister_li_request(struct super_block *sb);
				85	static void ext4_clear_request_list(void);
				86	static struct inode ext4_get_journal_inode(struct super_block sb,
				87	unsigned int journal_inum);
				88
				89	/*
				90	* Lock ordering
				91	*
				92	* Note the difference between i_mmap_sem (EXT4_I(inode)->i_mmap_sem) and
				93	* i_mmap_rwsem (inode->i_mmap_rwsem)!
				94	*
				95	* page fault path:
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	96	* mmap_lock -> sb_start_pagefault -> i_mmap_sem (r) -> transaction start ->
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	97	* page lock -> i_data_sem (rw)
				98	*
				99	* buffered write path:
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	100	* sb_start_write -> i_mutex -> mmap_lock
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	101	* sb_start_write -> i_mutex -> transaction start -> page lock ->
				102	* i_data_sem (rw)
				103	*
				104	* truncate:
				105	* sb_start_write -> i_mutex -> i_mmap_sem (w) -> i_mmap_rwsem (w) -> page lock
				106	* sb_start_write -> i_mutex -> i_mmap_sem (w) -> transaction start ->
				107	* i_data_sem (rw)
				108	*
				109	* direct IO:
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	110	* sb_start_write -> i_mutex -> mmap_lock
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	111	* sb_start_write -> i_mutex -> transaction start -> i_data_sem (rw)
				112	*
				113	* writepages:
				114	* transaction start -> page lock(s) -> i_data_sem (rw)
				115	*/
				116
				117	#if !defined(CONFIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT2)
				118	static struct file_system_type ext2_fs_type = {
				119	.owner = THIS_MODULE,
				120	.name = "ext2",
				121	.mount = ext4_mount,
				122	.kill_sb = kill_block_super,
				123	.fs_flags = FS_REQUIRES_DEV,
				124	};
				125	MODULE_ALIAS_FS("ext2");
				126	MODULE_ALIAS("ext2");
				127	#define IS_EXT2_SB(sb) ((sb)->s_bdev->bd_holder == &ext2_fs_type)
				128	#else
				129	#define IS_EXT2_SB(sb) (0)
				130	#endif
				131
				132
				133	static struct file_system_type ext3_fs_type = {
				134	.owner = THIS_MODULE,
				135	.name = "ext3",
				136	.mount = ext4_mount,
				137	.kill_sb = kill_block_super,
				138	.fs_flags = FS_REQUIRES_DEV,
				139	};
				140	MODULE_ALIAS_FS("ext3");
				141	MODULE_ALIAS("ext3");
				142	#define IS_EXT3_SB(sb) ((sb)->s_bdev->bd_holder == &ext3_fs_type)
				143
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	144
				145	static inline void __ext4_read_bh(struct buffer_head *bh, int op_flags,
				146	bh_end_io_t *end_io)
				147	{
				148	/*
				149	* buffer's verified bit is no longer valid after reading from
				150	* disk again due to write out error, clear it to make sure we
				151	* recheck the buffer contents.
				152	*/
				153	clear_buffer_verified(bh);
				154
				155	bh->b_end_io = end_io ? end_io : end_buffer_read_sync;
				156	get_bh(bh);
				157	submit_bh(REQ_OP_READ, op_flags, bh);
				158	}
				159
				160	void ext4_read_bh_nowait(struct buffer_head *bh, int op_flags,
				161	bh_end_io_t *end_io)
				162	{
				163	BUG_ON(!buffer_locked(bh));
				164
				165	if (ext4_buffer_uptodate(bh)) {
				166	unlock_buffer(bh);
				167	return;
				168	}
				169	__ext4_read_bh(bh, op_flags, end_io);
				170	}
				171
				172	int ext4_read_bh(struct buffer_head bh, int op_flags, bh_end_io_t end_io)
				173	{
				174	BUG_ON(!buffer_locked(bh));
				175
				176	if (ext4_buffer_uptodate(bh)) {
				177	unlock_buffer(bh);
				178	return 0;
				179	}
				180
				181	__ext4_read_bh(bh, op_flags, end_io);
				182
				183	wait_on_buffer(bh);
				184	if (buffer_uptodate(bh))
				185	return 0;
				186	return -EIO;
				187	}
				188
				189	int ext4_read_bh_lock(struct buffer_head *bh, int op_flags, bool wait)
				190	{
				191	if (trylock_buffer(bh)) {
				192	if (wait)
				193	return ext4_read_bh(bh, op_flags, NULL);
				194	ext4_read_bh_nowait(bh, op_flags, NULL);
				195	return 0;
				196	}
				197	if (wait) {
				198	wait_on_buffer(bh);
				199	if (buffer_uptodate(bh))
				200	return 0;
				201	return -EIO;
				202	}
				203	return 0;
				204	}
				205
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	206	/*
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	207	* This works like __bread_gfp() except it uses ERR_PTR for error
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	208	* returns. Currently with sb_bread it's impossible to distinguish
				209	* between ENOMEM and EIO situations (since both result in a NULL
				210	* return.
				211	*/
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	212	static struct buffer_head __ext4_sb_bread_gfp(struct super_block sb,
				213	sector_t block, int op_flags,
				214	gfp_t gfp)
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	215	{
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	216	struct buffer_head *bh;
				217	int ret;
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	218
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	219	bh = sb_getblk_gfp(sb, block, gfp);
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	220	if (bh == NULL)
				221	return ERR_PTR(-ENOMEM);
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	222	if (ext4_buffer_uptodate(bh))
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	223	return bh;
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	224
				225	ret = ext4_read_bh_lock(bh, REQ_META \| op_flags, true);
				226	if (ret) {
				227	put_bh(bh);
				228	return ERR_PTR(ret);
				229	}
				230	return bh;
				231	}
				232
				233	struct buffer_head ext4_sb_bread(struct super_block sb, sector_t block,
				234	int op_flags)
				235	{
				236	return __ext4_sb_bread_gfp(sb, block, op_flags, __GFP_MOVABLE);
				237	}
				238
				239	struct buffer_head ext4_sb_bread_unmovable(struct super_block sb,
				240	sector_t block)
				241	{
				242	return __ext4_sb_bread_gfp(sb, block, 0, 0);
				243	}
				244
				245	void ext4_sb_breadahead_unmovable(struct super_block *sb, sector_t block)
				246	{
				247	struct buffer_head *bh = sb_getblk_gfp(sb, block, 0);
				248
				249	if (likely(bh)) {
				250	ext4_read_bh_lock(bh, REQ_RAHEAD, false);
				251	brelse(bh);
				252	}
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	253	}
				254
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	255	static int ext4_verify_csum_type(struct super_block *sb,
				256	struct ext4_super_block *es)
				257	{
				258	if (!ext4_has_feature_metadata_csum(sb))
				259	return 1;
				260
				261	return es->s_checksum_type == EXT4_CRC32C_CHKSUM;
				262	}
				263
				264	static __le32 ext4_superblock_csum(struct super_block *sb,
				265	struct ext4_super_block *es)
				266	{
				267	struct ext4_sb_info *sbi = EXT4_SB(sb);
				268	int offset = offsetof(struct ext4_super_block, s_checksum);
				269	__u32 csum;
				270
				271	csum = ext4_chksum(sbi, ~0, (char *)es, offset);
				272
				273	return cpu_to_le32(csum);
				274	}
				275
				276	static int ext4_superblock_csum_verify(struct super_block *sb,
				277	struct ext4_super_block *es)
				278	{
				279	if (!ext4_has_metadata_csum(sb))
				280	return 1;
				281
				282	return es->s_checksum == ext4_superblock_csum(sb, es);
				283	}
				284
				285	void ext4_superblock_csum_set(struct super_block *sb)
				286	{
				287	struct ext4_super_block *es = EXT4_SB(sb)->s_es;
				288
				289	if (!ext4_has_metadata_csum(sb))
				290	return;
				291
				292	es->s_checksum = ext4_superblock_csum(sb, es);
				293	}
				294
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	295	ext4_fsblk_t ext4_block_bitmap(struct super_block *sb,
				296	struct ext4_group_desc *bg)
				297	{
				298	return le32_to_cpu(bg->bg_block_bitmap_lo) \|
				299	(EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
				300	(ext4_fsblk_t)le32_to_cpu(bg->bg_block_bitmap_hi) << 32 : 0);
				301	}
				302
				303	ext4_fsblk_t ext4_inode_bitmap(struct super_block *sb,
				304	struct ext4_group_desc *bg)
				305	{
				306	return le32_to_cpu(bg->bg_inode_bitmap_lo) \|
				307	(EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
				308	(ext4_fsblk_t)le32_to_cpu(bg->bg_inode_bitmap_hi) << 32 : 0);
				309	}
				310
				311	ext4_fsblk_t ext4_inode_table(struct super_block *sb,
				312	struct ext4_group_desc *bg)
				313	{
				314	return le32_to_cpu(bg->bg_inode_table_lo) \|
				315	(EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
				316	(ext4_fsblk_t)le32_to_cpu(bg->bg_inode_table_hi) << 32 : 0);
				317	}
				318
				319	__u32 ext4_free_group_clusters(struct super_block *sb,
				320	struct ext4_group_desc *bg)
				321	{
				322	return le16_to_cpu(bg->bg_free_blocks_count_lo) \|
				323	(EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
				324	(__u32)le16_to_cpu(bg->bg_free_blocks_count_hi) << 16 : 0);
				325	}
				326
				327	__u32 ext4_free_inodes_count(struct super_block *sb,
				328	struct ext4_group_desc *bg)
				329	{
				330	return le16_to_cpu(bg->bg_free_inodes_count_lo) \|
				331	(EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
				332	(__u32)le16_to_cpu(bg->bg_free_inodes_count_hi) << 16 : 0);
				333	}
				334
				335	__u32 ext4_used_dirs_count(struct super_block *sb,
				336	struct ext4_group_desc *bg)
				337	{
				338	return le16_to_cpu(bg->bg_used_dirs_count_lo) \|
				339	(EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
				340	(__u32)le16_to_cpu(bg->bg_used_dirs_count_hi) << 16 : 0);
				341	}
				342
				343	__u32 ext4_itable_unused_count(struct super_block *sb,
				344	struct ext4_group_desc *bg)
				345	{
				346	return le16_to_cpu(bg->bg_itable_unused_lo) \|
				347	(EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
				348	(__u32)le16_to_cpu(bg->bg_itable_unused_hi) << 16 : 0);
				349	}
				350
				351	void ext4_block_bitmap_set(struct super_block *sb,
				352	struct ext4_group_desc *bg, ext4_fsblk_t blk)
				353	{
				354	bg->bg_block_bitmap_lo = cpu_to_le32((u32)blk);
				355	if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
				356	bg->bg_block_bitmap_hi = cpu_to_le32(blk >> 32);
				357	}
				358
				359	void ext4_inode_bitmap_set(struct super_block *sb,
				360	struct ext4_group_desc *bg, ext4_fsblk_t blk)
				361	{
				362	bg->bg_inode_bitmap_lo = cpu_to_le32((u32)blk);
				363	if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
				364	bg->bg_inode_bitmap_hi = cpu_to_le32(blk >> 32);
				365	}
				366
				367	void ext4_inode_table_set(struct super_block *sb,
				368	struct ext4_group_desc *bg, ext4_fsblk_t blk)
				369	{
				370	bg->bg_inode_table_lo = cpu_to_le32((u32)blk);
				371	if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
				372	bg->bg_inode_table_hi = cpu_to_le32(blk >> 32);
				373	}
				374
				375	void ext4_free_group_clusters_set(struct super_block *sb,
				376	struct ext4_group_desc *bg, __u32 count)
				377	{
				378	bg->bg_free_blocks_count_lo = cpu_to_le16((__u16)count);
				379	if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
				380	bg->bg_free_blocks_count_hi = cpu_to_le16(count >> 16);
				381	}
				382
				383	void ext4_free_inodes_set(struct super_block *sb,
				384	struct ext4_group_desc *bg, __u32 count)
				385	{
				386	bg->bg_free_inodes_count_lo = cpu_to_le16((__u16)count);
				387	if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
				388	bg->bg_free_inodes_count_hi = cpu_to_le16(count >> 16);
				389	}
				390
				391	void ext4_used_dirs_set(struct super_block *sb,
				392	struct ext4_group_desc *bg, __u32 count)
				393	{
				394	bg->bg_used_dirs_count_lo = cpu_to_le16((__u16)count);
				395	if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
				396	bg->bg_used_dirs_count_hi = cpu_to_le16(count >> 16);
				397	}
				398
				399	void ext4_itable_unused_set(struct super_block *sb,
				400	struct ext4_group_desc *bg, __u32 count)
				401	{
				402	bg->bg_itable_unused_lo = cpu_to_le16((__u16)count);
				403	if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
				404	bg->bg_itable_unused_hi = cpu_to_le16(count >> 16);
				405	}
				406
				407	static void __ext4_update_tstamp(__le32 lo, __u8 hi)
				408	{
				409	time64_t now = ktime_get_real_seconds();
				410
				411	now = clamp_val(now, 0, (1ull << 40) - 1);
				412
				413	*lo = cpu_to_le32(lower_32_bits(now));
				414	*hi = upper_32_bits(now);
				415	}
				416
				417	static time64_t __ext4_get_tstamp(__le32 lo, __u8 hi)
				418	{
				419	return ((time64_t)(hi) << 32) + le32_to_cpu(lo);
				420	}
				421	#define ext4_update_tstamp(es, tstamp) \
				422	__ext4_update_tstamp(&(es)->tstamp, &(es)->tstamp ## _hi)
				423	#define ext4_get_tstamp(es, tstamp) \
				424	__ext4_get_tstamp(&(es)->tstamp, &(es)->tstamp ## _hi)
				425
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	426	static void __save_error_info(struct super_block *sb, int error,
				427	__u32 ino, __u64 block,
				428	const char *func, unsigned int line)
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	429	{
				430	struct ext4_super_block *es = EXT4_SB(sb)->s_es;
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	431	int err;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	432
				433	EXT4_SB(sb)->s_mount_state \|= EXT4_ERROR_FS;
				434	if (bdev_read_only(sb->s_bdev))
				435	return;
				436	es->s_state \|= cpu_to_le16(EXT4_ERROR_FS);
				437	ext4_update_tstamp(es, s_last_error_time);
				438	strncpy(es->s_last_error_func, func, sizeof(es->s_last_error_func));
				439	es->s_last_error_line = cpu_to_le32(line);
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	440	es->s_last_error_ino = cpu_to_le32(ino);
				441	es->s_last_error_block = cpu_to_le64(block);
				442	switch (error) {
				443	case EIO:
				444	err = EXT4_ERR_EIO;
				445	break;
				446	case ENOMEM:
				447	err = EXT4_ERR_ENOMEM;
				448	break;
				449	case EFSBADCRC:
				450	err = EXT4_ERR_EFSBADCRC;
				451	break;
				452	case 0:
				453	case EFSCORRUPTED:
				454	err = EXT4_ERR_EFSCORRUPTED;
				455	break;
				456	case ENOSPC:
				457	err = EXT4_ERR_ENOSPC;
				458	break;
				459	case ENOKEY:
				460	err = EXT4_ERR_ENOKEY;
				461	break;
				462	case EROFS:
				463	err = EXT4_ERR_EROFS;
				464	break;
				465	case EFBIG:
				466	err = EXT4_ERR_EFBIG;
				467	break;
				468	case EEXIST:
				469	err = EXT4_ERR_EEXIST;
				470	break;
				471	case ERANGE:
				472	err = EXT4_ERR_ERANGE;
				473	break;
				474	case EOVERFLOW:
				475	err = EXT4_ERR_EOVERFLOW;
				476	break;
				477	case EBUSY:
				478	err = EXT4_ERR_EBUSY;
				479	break;
				480	case ENOTDIR:
				481	err = EXT4_ERR_ENOTDIR;
				482	break;
				483	case ENOTEMPTY:
				484	err = EXT4_ERR_ENOTEMPTY;
				485	break;
				486	case ESHUTDOWN:
				487	err = EXT4_ERR_ESHUTDOWN;
				488	break;
				489	case EFAULT:
				490	err = EXT4_ERR_EFAULT;
				491	break;
				492	default:
				493	err = EXT4_ERR_UNKNOWN;
				494	}
				495	es->s_last_error_errcode = err;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	496	if (!es->s_first_error_time) {
				497	es->s_first_error_time = es->s_last_error_time;
				498	es->s_first_error_time_hi = es->s_last_error_time_hi;
				499	strncpy(es->s_first_error_func, func,
				500	sizeof(es->s_first_error_func));
				501	es->s_first_error_line = cpu_to_le32(line);
				502	es->s_first_error_ino = es->s_last_error_ino;
				503	es->s_first_error_block = es->s_last_error_block;
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	504	es->s_first_error_errcode = es->s_last_error_errcode;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	505	}
				506	/*
				507	* Start the daily error reporting function if it hasn't been
				508	* started already
				509	*/
				510	if (!es->s_error_count)
				511	mod_timer(&EXT4_SB(sb)->s_err_report, jiffies + 246060*HZ);
				512	le32_add_cpu(&es->s_error_count, 1);
				513	}
				514
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	515	static void save_error_info(struct super_block *sb, int error,
				516	__u32 ino, __u64 block,
				517	const char *func, unsigned int line)
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	518	{
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	519	__save_error_info(sb, error, ino, block, func, line);
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame]	520	if (!bdev_read_only(sb->s_bdev))
				521	ext4_commit_super(sb, 1);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	522	}
				523
				524	/*
				525	* The del_gendisk() function uninitializes the disk-specific data
				526	* structures, including the bdi structure, without telling anyone
				527	* else. Once this happens, any attempt to call mark_buffer_dirty()
				528	* (for example, by ext4_commit_super), will cause a kernel OOPS.
				529	* This is a kludge to prevent these oops until we can put in a proper
				530	* hook in del_gendisk() to inform the VFS and file system layers.
				531	*/
				532	static int block_device_ejected(struct super_block *sb)
				533	{
				534	struct inode *bd_inode = sb->s_bdev->bd_inode;
				535	struct backing_dev_info *bdi = inode_to_bdi(bd_inode);
				536
				537	return bdi->dev == NULL;
				538	}
				539
				540	static void ext4_journal_commit_callback(journal_t journal, transaction_t txn)
				541	{
				542	struct super_block *sb = journal->j_private;
				543	struct ext4_sb_info *sbi = EXT4_SB(sb);
				544	int error = is_journal_aborted(journal);
				545	struct ext4_journal_cb_entry *jce;
				546
				547	BUG_ON(txn->t_state == T_FINISHED);
				548
				549	ext4_process_freed_data(sb, txn->t_tid);
				550
				551	spin_lock(&sbi->s_md_lock);
				552	while (!list_empty(&txn->t_private_list)) {
				553	jce = list_entry(txn->t_private_list.next,
				554	struct ext4_journal_cb_entry, jce_list);
				555	list_del_init(&jce->jce_list);
				556	spin_unlock(&sbi->s_md_lock);
				557	jce->jce_func(sb, jce, error);
				558	spin_lock(&sbi->s_md_lock);
				559	}
				560	spin_unlock(&sbi->s_md_lock);
				561	}
				562
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	563	/*
				564	* This writepage callback for write_cache_pages()
				565	* takes care of a few cases after page cleaning.
				566	*
				567	* write_cache_pages() already checks for dirty pages
				568	* and calls clear_page_dirty_for_io(), which we want,
				569	* to write protect the pages.
				570	*
				571	* However, we may have to redirty a page (see below.)
				572	*/
				573	static int ext4_journalled_writepage_callback(struct page *page,
				574	struct writeback_control *wbc,
				575	void *data)
				576	{
				577	transaction_t transaction = (transaction_t ) data;
				578	struct buffer_head bh, head;
				579	struct journal_head *jh;
				580
				581	bh = head = page_buffers(page);
				582	do {
				583	/*
				584	* We have to redirty a page in these cases:
				585	* 1) If buffer is dirty, it means the page was dirty because it
				586	* contains a buffer that needs checkpointing. So the dirty bit
				587	* needs to be preserved so that checkpointing writes the buffer
				588	* properly.
				589	* 2) If buffer is not part of the committing transaction
				590	* (we may have just accidentally come across this buffer because
				591	* inode range tracking is not exact) or if the currently running
				592	* transaction already contains this buffer as well, dirty bit
				593	* needs to be preserved so that the buffer gets writeprotected
				594	* properly on running transaction's commit.
				595	*/
				596	jh = bh2jh(bh);
				597	if (buffer_dirty(bh) \|\|
				598	(jh && (jh->b_transaction != transaction \|\|
				599	jh->b_next_transaction))) {
				600	redirty_page_for_writepage(wbc, page);
				601	goto out;
				602	}
				603	} while ((bh = bh->b_this_page) != head);
				604
				605	out:
				606	return AOP_WRITEPAGE_ACTIVATE;
				607	}
				608
				609	static int ext4_journalled_submit_inode_data_buffers(struct jbd2_inode *jinode)
				610	{
				611	struct address_space *mapping = jinode->i_vfs_inode->i_mapping;
				612	struct writeback_control wbc = {
				613	.sync_mode = WB_SYNC_ALL,
				614	.nr_to_write = LONG_MAX,
				615	.range_start = jinode->i_dirty_start,
				616	.range_end = jinode->i_dirty_end,
				617	};
				618
				619	return write_cache_pages(mapping, &wbc,
				620	ext4_journalled_writepage_callback,
				621	jinode->i_transaction);
				622	}
				623
				624	static int ext4_journal_submit_inode_data_buffers(struct jbd2_inode *jinode)
				625	{
				626	int ret;
				627
				628	if (ext4_should_journal_data(jinode->i_vfs_inode))
				629	ret = ext4_journalled_submit_inode_data_buffers(jinode);
				630	else
				631	ret = jbd2_journal_submit_inode_data_buffers(jinode);
				632
				633	return ret;
				634	}
				635
				636	static int ext4_journal_finish_inode_data_buffers(struct jbd2_inode *jinode)
				637	{
				638	int ret = 0;
				639
				640	if (!ext4_should_journal_data(jinode->i_vfs_inode))
				641	ret = jbd2_journal_finish_inode_data_buffers(jinode);
				642
				643	return ret;
				644	}
				645
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	646	static bool system_going_down(void)
				647	{
				648	return system_state == SYSTEM_HALT \|\| system_state == SYSTEM_POWER_OFF
				649	\|\| system_state == SYSTEM_RESTART;
				650	}
				651
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	652	/* Deal with the reporting of failure conditions on a filesystem such as
				653	* inconsistencies detected or read IO failures.
				654	*
				655	* On ext2, we can store the error state of the filesystem in the
				656	* superblock. That is not possible on ext4, because we may have other
				657	* write ordering constraints on the superblock which prevent us from
				658	* writing it out straight away; and given that the journal is about to
				659	* be aborted, we can't rely on the current, or future, transactions to
				660	* write out the superblock safely.
				661	*
				662	* We'll just use the jbd2_journal_abort() error code to record an error in
				663	* the journal instead. On recovery, the journal will complain about
				664	* that error until we've noted it down and cleared it.
				665	*/
				666
				667	static void ext4_handle_error(struct super_block *sb)
				668	{
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame]	669	journal_t *journal = EXT4_SB(sb)->s_journal;
				670
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	671	if (test_opt(sb, WARN_ON_ERROR))
				672	WARN_ON_ONCE(1);
				673
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame]	674	if (sb_rdonly(sb) \|\| test_opt(sb, ERRORS_CONT))
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	675	return;
				676
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	677	ext4_set_mount_flag(sb, EXT4_MF_FS_ABORTED);
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame]	678	if (journal)
				679	jbd2_journal_abort(journal, -EIO);
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	680	/*
				681	* We force ERRORS_RO behavior when system is rebooting. Otherwise we
				682	* could panic during 'reboot -f' as the underlying device got already
				683	* disabled.
				684	*/
				685	if (test_opt(sb, ERRORS_RO) \|\| system_going_down()) {
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	686	ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only");
				687	/*
				688	* Make sure updated value of ->s_mount_flags will be visible
				689	* before ->s_flags update
				690	*/
				691	smp_wmb();
				692	sb->s_flags \|= SB_RDONLY;
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	693	} else if (test_opt(sb, ERRORS_PANIC)) {
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	694	panic("EXT4-fs (device %s): panic forced after error\n",
				695	sb->s_id);
				696	}
				697	}
				698
				699	#define ext4_error_ratelimit(sb) \
				700	___ratelimit(&(EXT4_SB(sb)->s_err_ratelimit_state), \
				701	"EXT4-fs error")
				702
				703	void __ext4_error(struct super_block sb, const char function,
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	704	unsigned int line, int error, __u64 block,
				705	const char *fmt, ...)
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	706	{
				707	struct va_format vaf;
				708	va_list args;
				709
				710	if (unlikely(ext4_forced_shutdown(EXT4_SB(sb))))
				711	return;
				712
				713	trace_ext4_error(sb, function, line);
				714	if (ext4_error_ratelimit(sb)) {
				715	va_start(args, fmt);
				716	vaf.fmt = fmt;
				717	vaf.va = &args;
				718	printk(KERN_CRIT
				719	"EXT4-fs error (device %s): %s:%d: comm %s: %pV\n",
				720	sb->s_id, function, line, current->comm, &vaf);
				721	va_end(args);
				722	}
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	723	save_error_info(sb, error, 0, block, function, line);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	724	ext4_handle_error(sb);
				725	}
				726
				727	void __ext4_error_inode(struct inode inode, const char function,
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	728	unsigned int line, ext4_fsblk_t block, int error,
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	729	const char *fmt, ...)
				730	{
				731	va_list args;
				732	struct va_format vaf;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	733
				734	if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
				735	return;
				736
				737	trace_ext4_error(inode->i_sb, function, line);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	738	if (ext4_error_ratelimit(inode->i_sb)) {
				739	va_start(args, fmt);
				740	vaf.fmt = fmt;
				741	vaf.va = &args;
				742	if (block)
				743	printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: "
				744	"inode #%lu: block %llu: comm %s: %pV\n",
				745	inode->i_sb->s_id, function, line, inode->i_ino,
				746	block, current->comm, &vaf);
				747	else
				748	printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: "
				749	"inode #%lu: comm %s: %pV\n",
				750	inode->i_sb->s_id, function, line, inode->i_ino,
				751	current->comm, &vaf);
				752	va_end(args);
				753	}
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	754	save_error_info(inode->i_sb, error, inode->i_ino, block,
				755	function, line);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	756	ext4_handle_error(inode->i_sb);
				757	}
				758
				759	void __ext4_error_file(struct file file, const char function,
				760	unsigned int line, ext4_fsblk_t block,
				761	const char *fmt, ...)
				762	{
				763	va_list args;
				764	struct va_format vaf;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	765	struct inode *inode = file_inode(file);
				766	char pathname[80], *path;
				767
				768	if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
				769	return;
				770
				771	trace_ext4_error(inode->i_sb, function, line);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	772	if (ext4_error_ratelimit(inode->i_sb)) {
				773	path = file_path(file, pathname, sizeof(pathname));
				774	if (IS_ERR(path))
				775	path = "(unknown)";
				776	va_start(args, fmt);
				777	vaf.fmt = fmt;
				778	vaf.va = &args;
				779	if (block)
				780	printk(KERN_CRIT
				781	"EXT4-fs error (device %s): %s:%d: inode #%lu: "
				782	"block %llu: comm %s: path %s: %pV\n",
				783	inode->i_sb->s_id, function, line, inode->i_ino,
				784	block, current->comm, path, &vaf);
				785	else
				786	printk(KERN_CRIT
				787	"EXT4-fs error (device %s): %s:%d: inode #%lu: "
				788	"comm %s: path %s: %pV\n",
				789	inode->i_sb->s_id, function, line, inode->i_ino,
				790	current->comm, path, &vaf);
				791	va_end(args);
				792	}
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	793	save_error_info(inode->i_sb, EFSCORRUPTED, inode->i_ino, block,
				794	function, line);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	795	ext4_handle_error(inode->i_sb);
				796	}
				797
				798	const char ext4_decode_error(struct super_block sb, int errno,
				799	char nbuf[16])
				800	{
				801	char *errstr = NULL;
				802
				803	switch (errno) {
				804	case -EFSCORRUPTED:
				805	errstr = "Corrupt filesystem";
				806	break;
				807	case -EFSBADCRC:
				808	errstr = "Filesystem failed CRC";
				809	break;
				810	case -EIO:
				811	errstr = "IO failure";
				812	break;
				813	case -ENOMEM:
				814	errstr = "Out of memory";
				815	break;
				816	case -EROFS:
				817	if (!sb \|\| (EXT4_SB(sb)->s_journal &&
				818	EXT4_SB(sb)->s_journal->j_flags & JBD2_ABORT))
				819	errstr = "Journal has aborted";
				820	else
				821	errstr = "Readonly filesystem";
				822	break;
				823	default:
				824	/* If the caller passed in an extra buffer for unknown
				825	* errors, textualise them now. Else we just return
				826	* NULL. */
				827	if (nbuf) {
				828	/* Check for truncated error codes... */
				829	if (snprintf(nbuf, 16, "error %d", -errno) >= 0)
				830	errstr = nbuf;
				831	}
				832	break;
				833	}
				834
				835	return errstr;
				836	}
				837
				838	/* __ext4_std_error decodes expected errors from journaling functions
				839	* automatically and invokes the appropriate error response. */
				840
				841	void __ext4_std_error(struct super_block sb, const char function,
				842	unsigned int line, int errno)
				843	{
				844	char nbuf[16];
				845	const char *errstr;
				846
				847	if (unlikely(ext4_forced_shutdown(EXT4_SB(sb))))
				848	return;
				849
				850	/* Special case: if the error is EROFS, and we're not already
				851	* inside a transaction, then there's really no point in logging
				852	* an error. */
				853	if (errno == -EROFS && journal_current_handle() == NULL && sb_rdonly(sb))
				854	return;
				855
				856	if (ext4_error_ratelimit(sb)) {
				857	errstr = ext4_decode_error(sb, errno, nbuf);
				858	printk(KERN_CRIT "EXT4-fs error (device %s) in %s:%d: %s\n",
				859	sb->s_id, function, line, errstr);
				860	}
				861
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	862	save_error_info(sb, -errno, 0, 0, function, line);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	863	ext4_handle_error(sb);
				864	}
				865
				866	/*
				867	* ext4_abort is a much stronger failure handler than ext4_error. The
				868	* abort function may be used to deal with unrecoverable failures such
				869	* as journal IO errors or ENOMEM at a critical moment in log management.
				870	*
				871	* We unconditionally force the filesystem into an ABORT\|READONLY state,
				872	* unless the error response on the fs has been set to panic in which
				873	* case we take the easy way out and panic immediately.
				874	*/
				875
				876	void __ext4_abort(struct super_block sb, const char function,
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	877	unsigned int line, int error, const char *fmt, ...)
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	878	{
				879	struct va_format vaf;
				880	va_list args;
				881
				882	if (unlikely(ext4_forced_shutdown(EXT4_SB(sb))))
				883	return;
				884
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	885	save_error_info(sb, error, 0, 0, function, line);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	886	va_start(args, fmt);
				887	vaf.fmt = fmt;
				888	vaf.va = &args;
				889	printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: %pV\n",
				890	sb->s_id, function, line, &vaf);
				891	va_end(args);
				892
				893	if (sb_rdonly(sb) == 0) {
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	894	ext4_set_mount_flag(sb, EXT4_MF_FS_ABORTED);
				895	if (EXT4_SB(sb)->s_journal)
				896	jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO);
				897
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	898	ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only");
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	899	/*
				900	* Make sure updated value of ->s_mount_flags will be visible
				901	* before ->s_flags update
				902	*/
				903	smp_wmb();
				904	sb->s_flags \|= SB_RDONLY;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	905	}
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	906	if (test_opt(sb, ERRORS_PANIC) && !system_going_down())
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	907	panic("EXT4-fs panic from previous error\n");
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	908	}
				909
				910	void __ext4_msg(struct super_block *sb,
				911	const char prefix, const char fmt, ...)
				912	{
				913	struct va_format vaf;
				914	va_list args;
				915
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	916	atomic_inc(&EXT4_SB(sb)->s_msg_count);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	917	if (!___ratelimit(&(EXT4_SB(sb)->s_msg_ratelimit_state), "EXT4-fs"))
				918	return;
				919
				920	va_start(args, fmt);
				921	vaf.fmt = fmt;
				922	vaf.va = &args;
				923	printk("%sEXT4-fs (%s): %pV\n", prefix, sb->s_id, &vaf);
				924	va_end(args);
				925	}
				926
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	927	static int ext4_warning_ratelimit(struct super_block *sb)
				928	{
				929	atomic_inc(&EXT4_SB(sb)->s_warning_count);
				930	return ___ratelimit(&(EXT4_SB(sb)->s_warning_ratelimit_state),
				931	"EXT4-fs warning");
				932	}
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	933
				934	void __ext4_warning(struct super_block sb, const char function,
				935	unsigned int line, const char *fmt, ...)
				936	{
				937	struct va_format vaf;
				938	va_list args;
				939
				940	if (!ext4_warning_ratelimit(sb))
				941	return;
				942
				943	va_start(args, fmt);
				944	vaf.fmt = fmt;
				945	vaf.va = &args;
				946	printk(KERN_WARNING "EXT4-fs warning (device %s): %s:%d: %pV\n",
				947	sb->s_id, function, line, &vaf);
				948	va_end(args);
				949	}
				950
				951	void __ext4_warning_inode(const struct inode inode, const char function,
				952	unsigned int line, const char *fmt, ...)
				953	{
				954	struct va_format vaf;
				955	va_list args;
				956
				957	if (!ext4_warning_ratelimit(inode->i_sb))
				958	return;
				959
				960	va_start(args, fmt);
				961	vaf.fmt = fmt;
				962	vaf.va = &args;
				963	printk(KERN_WARNING "EXT4-fs warning (device %s): %s:%d: "
				964	"inode #%lu: comm %s: %pV\n", inode->i_sb->s_id,
				965	function, line, inode->i_ino, current->comm, &vaf);
				966	va_end(args);
				967	}
				968
				969	void __ext4_grp_locked_error(const char *function, unsigned int line,
				970	struct super_block *sb, ext4_group_t grp,
				971	unsigned long ino, ext4_fsblk_t block,
				972	const char *fmt, ...)
				973	__releases(bitlock)
				974	__acquires(bitlock)
				975	{
				976	struct va_format vaf;
				977	va_list args;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	978
				979	if (unlikely(ext4_forced_shutdown(EXT4_SB(sb))))
				980	return;
				981
				982	trace_ext4_error(sb, function, line);
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	983	__save_error_info(sb, EFSCORRUPTED, ino, block, function, line);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	984
				985	if (ext4_error_ratelimit(sb)) {
				986	va_start(args, fmt);
				987	vaf.fmt = fmt;
				988	vaf.va = &args;
				989	printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: group %u, ",
				990	sb->s_id, function, line, grp);
				991	if (ino)
				992	printk(KERN_CONT "inode %lu: ", ino);
				993	if (block)
				994	printk(KERN_CONT "block %llu:",
				995	(unsigned long long) block);
				996	printk(KERN_CONT "%pV\n", &vaf);
				997	va_end(args);
				998	}
				999
				1000	if (test_opt(sb, WARN_ON_ERROR))
				1001	WARN_ON_ONCE(1);
				1002
				1003	if (test_opt(sb, ERRORS_CONT)) {
				1004	ext4_commit_super(sb, 0);
				1005	return;
				1006	}
				1007
				1008	ext4_unlock_group(sb, grp);
				1009	ext4_commit_super(sb, 1);
				1010	ext4_handle_error(sb);
				1011	/*
				1012	* We only get here in the ERRORS_RO case; relocking the group
				1013	* may be dangerous, but nothing bad will happen since the
				1014	* filesystem will have already been marked read/only and the
				1015	* journal has been aborted. We return 1 as a hint to callers
				1016	* who might what to use the return value from
				1017	* ext4_grp_locked_error() to distinguish between the
				1018	* ERRORS_CONT and ERRORS_RO case, and perhaps return more
				1019	* aggressively from the ext4 function in question, with a
				1020	* more appropriate error code.
				1021	*/
				1022	ext4_lock_group(sb, grp);
				1023	return;
				1024	}
				1025
				1026	void ext4_mark_group_bitmap_corrupted(struct super_block *sb,
				1027	ext4_group_t group,
				1028	unsigned int flags)
				1029	{
				1030	struct ext4_sb_info *sbi = EXT4_SB(sb);
				1031	struct ext4_group_info *grp = ext4_get_group_info(sb, group);
				1032	struct ext4_group_desc *gdp = ext4_get_group_desc(sb, group, NULL);
				1033	int ret;
				1034
				1035	if (flags & EXT4_GROUP_INFO_BBITMAP_CORRUPT) {
				1036	ret = ext4_test_and_set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT,
				1037	&grp->bb_state);
				1038	if (!ret)
				1039	percpu_counter_sub(&sbi->s_freeclusters_counter,
				1040	grp->bb_free);
				1041	}
				1042
				1043	if (flags & EXT4_GROUP_INFO_IBITMAP_CORRUPT) {
				1044	ret = ext4_test_and_set_bit(EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT,
				1045	&grp->bb_state);
				1046	if (!ret && gdp) {
				1047	int count;
				1048
				1049	count = ext4_free_inodes_count(sb, gdp);
				1050	percpu_counter_sub(&sbi->s_freeinodes_counter,
				1051	count);
				1052	}
				1053	}
				1054	}
				1055
				1056	void ext4_update_dynamic_rev(struct super_block *sb)
				1057	{
				1058	struct ext4_super_block *es = EXT4_SB(sb)->s_es;
				1059
				1060	if (le32_to_cpu(es->s_rev_level) > EXT4_GOOD_OLD_REV)
				1061	return;
				1062
				1063	ext4_warning(sb,
				1064	"updating to rev %d because of new feature flag, "
				1065	"running e2fsck is recommended",
				1066	EXT4_DYNAMIC_REV);
				1067
				1068	es->s_first_ino = cpu_to_le32(EXT4_GOOD_OLD_FIRST_INO);
				1069	es->s_inode_size = cpu_to_le16(EXT4_GOOD_OLD_INODE_SIZE);
				1070	es->s_rev_level = cpu_to_le32(EXT4_DYNAMIC_REV);
				1071	/* leave es->s_feature_compat flags alone /
				1072	/* es->s_uuid will be set by e2fsck if empty */
				1073
				1074	/*
				1075	* The rest of the superblock fields should be zero, and if not it
				1076	* means they are likely already in use, so leave them alone. We
				1077	* can leave it up to e2fsck to clean up any inconsistencies there.
				1078	*/
				1079	}
				1080
				1081	/*
				1082	* Open the external journal device
				1083	*/
				1084	static struct block_device ext4_blkdev_get(dev_t dev, struct super_block sb)
				1085	{
				1086	struct block_device *bdev;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1087
				1088	bdev = blkdev_get_by_dev(dev, FMODE_READ\|FMODE_WRITE\|FMODE_EXCL, sb);
				1089	if (IS_ERR(bdev))
				1090	goto fail;
				1091	return bdev;
				1092
				1093	fail:
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	1094	ext4_msg(sb, KERN_ERR,
				1095	"failed to open journal device unknown-block(%u,%u) %ld",
				1096	MAJOR(dev), MINOR(dev), PTR_ERR(bdev));
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1097	return NULL;
				1098	}
				1099
				1100	/*
				1101	* Release the journal device
				1102	*/
				1103	static void ext4_blkdev_put(struct block_device *bdev)
				1104	{
				1105	blkdev_put(bdev, FMODE_READ\|FMODE_WRITE\|FMODE_EXCL);
				1106	}
				1107
				1108	static void ext4_blkdev_remove(struct ext4_sb_info *sbi)
				1109	{
				1110	struct block_device *bdev;
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	1111	bdev = sbi->s_journal_bdev;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1112	if (bdev) {
				1113	ext4_blkdev_put(bdev);
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	1114	sbi->s_journal_bdev = NULL;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1115	}
				1116	}
				1117
				1118	static inline struct inode orphan_list_entry(struct list_head l)
				1119	{
				1120	return &list_entry(l, struct ext4_inode_info, i_orphan)->vfs_inode;
				1121	}
				1122
				1123	static void dump_orphan_list(struct super_block sb, struct ext4_sb_info sbi)
				1124	{
				1125	struct list_head *l;
				1126
				1127	ext4_msg(sb, KERN_ERR, "sb orphan head is %d",
				1128	le32_to_cpu(sbi->s_es->s_last_orphan));
				1129
				1130	printk(KERN_ERR "sb_info orphan list:\n");
				1131	list_for_each(l, &sbi->s_orphan) {
				1132	struct inode *inode = orphan_list_entry(l);
				1133	printk(KERN_ERR " "
				1134	"inode %s:%lu at %p: mode %o, nlink %d, next %d\n",
				1135	inode->i_sb->s_id, inode->i_ino, inode,
				1136	inode->i_mode, inode->i_nlink,
				1137	NEXT_ORPHAN(inode));
				1138	}
				1139	}
				1140
				1141	#ifdef CONFIG_QUOTA
				1142	static int ext4_quota_off(struct super_block *sb, int type);
				1143
				1144	static inline void ext4_quota_off_umount(struct super_block *sb)
				1145	{
				1146	int type;
				1147
				1148	/* Use our quota_off function to clear inode flags etc. */
				1149	for (type = 0; type < EXT4_MAXQUOTAS; type++)
				1150	ext4_quota_off(sb, type);
				1151	}
				1152
				1153	/*
				1154	* This is a helper function which is used in the mount/remount
				1155	* codepaths (which holds s_umount) to fetch the quota file name.
				1156	*/
				1157	static inline char get_qf_name(struct super_block sb,
				1158	struct ext4_sb_info *sbi,
				1159	int type)
				1160	{
				1161	return rcu_dereference_protected(sbi->s_qf_names[type],
				1162	lockdep_is_held(&sb->s_umount));
				1163	}
				1164	#else
				1165	static inline void ext4_quota_off_umount(struct super_block *sb)
				1166	{
				1167	}
				1168	#endif
				1169
				1170	static void ext4_put_super(struct super_block *sb)
				1171	{
				1172	struct ext4_sb_info *sbi = EXT4_SB(sb);
				1173	struct ext4_super_block *es = sbi->s_es;
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame]	1174	struct buffer_head **group_desc;
				1175	struct flex_groups **flex_groups;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1176	int aborted = 0;
				1177	int i, err;
				1178
				1179	ext4_unregister_li_request(sb);
				1180	ext4_quota_off_umount(sb);
				1181
				1182	destroy_workqueue(sbi->rsv_conversion_wq);
				1183
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	1184	/*
				1185	* Unregister sysfs before destroying jbd2 journal.
				1186	* Since we could still access attr_journal_task attribute via sysfs
				1187	* path which could have sbi->s_journal->j_task as NULL
				1188	*/
				1189	ext4_unregister_sysfs(sb);
				1190
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1191	if (sbi->s_journal) {
				1192	aborted = is_journal_aborted(sbi->s_journal);
				1193	err = jbd2_journal_destroy(sbi->s_journal);
				1194	sbi->s_journal = NULL;
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	1195	if ((err < 0) && !aborted) {
				1196	ext4_abort(sb, -err, "Couldn't clean up the journal");
				1197	}
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1198	}
				1199
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1200	ext4_es_unregister_shrinker(sbi);
				1201	del_timer_sync(&sbi->s_err_report);
				1202	ext4_release_system_zone(sb);
				1203	ext4_mb_release(sb);
				1204	ext4_ext_release(sb);
				1205
				1206	if (!sb_rdonly(sb) && !aborted) {
				1207	ext4_clear_feature_journal_needs_recovery(sb);
				1208	es->s_state = cpu_to_le16(sbi->s_mount_state);
				1209	}
				1210	if (!sb_rdonly(sb))
				1211	ext4_commit_super(sb, 1);
				1212
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame]	1213	rcu_read_lock();
				1214	group_desc = rcu_dereference(sbi->s_group_desc);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1215	for (i = 0; i < sbi->s_gdb_count; i++)
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame]	1216	brelse(group_desc[i]);
				1217	kvfree(group_desc);
				1218	flex_groups = rcu_dereference(sbi->s_flex_groups);
				1219	if (flex_groups) {
				1220	for (i = 0; i < sbi->s_flex_groups_allocated; i++)
				1221	kvfree(flex_groups[i]);
				1222	kvfree(flex_groups);
				1223	}
				1224	rcu_read_unlock();
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1225	percpu_counter_destroy(&sbi->s_freeclusters_counter);
				1226	percpu_counter_destroy(&sbi->s_freeinodes_counter);
				1227	percpu_counter_destroy(&sbi->s_dirs_counter);
				1228	percpu_counter_destroy(&sbi->s_dirtyclusters_counter);
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame]	1229	percpu_counter_destroy(&sbi->s_sra_exceeded_retry_limit);
				1230	percpu_free_rwsem(&sbi->s_writepages_rwsem);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1231	#ifdef CONFIG_QUOTA
				1232	for (i = 0; i < EXT4_MAXQUOTAS; i++)
				1233	kfree(get_qf_name(sb, sbi, i));
				1234	#endif
				1235
				1236	/* Debugging code just in case the in-memory inode orphan list
				1237	* isn't empty. The on-disk one can be non-empty if we've
				1238	* detected an error and taken the fs readonly, but the
				1239	* in-memory list had better be clean by this point. */
				1240	if (!list_empty(&sbi->s_orphan))
				1241	dump_orphan_list(sb, sbi);
				1242	J_ASSERT(list_empty(&sbi->s_orphan));
				1243
				1244	sync_blockdev(sb->s_bdev);
				1245	invalidate_bdev(sb->s_bdev);
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	1246	if (sbi->s_journal_bdev && sbi->s_journal_bdev != sb->s_bdev) {
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1247	/*
				1248	* Invalidate the journal device's buffers. We don't want them
				1249	* floating about in memory - the physical journal device may
				1250	* hotswapped, and it breaks the `ro-after' testing code.
				1251	*/
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	1252	sync_blockdev(sbi->s_journal_bdev);
				1253	invalidate_bdev(sbi->s_journal_bdev);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1254	ext4_blkdev_remove(sbi);
				1255	}
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	1256
				1257	ext4_xattr_destroy_cache(sbi->s_ea_inode_cache);
				1258	sbi->s_ea_inode_cache = NULL;
				1259
				1260	ext4_xattr_destroy_cache(sbi->s_ea_block_cache);
				1261	sbi->s_ea_block_cache = NULL;
				1262
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	1263	ext4_stop_mmpd(sbi);
				1264
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1265	brelse(sbi->s_sbh);
				1266	sb->s_fs_info = NULL;
				1267	/*
				1268	* Now that we are completely done shutting down the
				1269	* superblock, we need to actually destroy the kobject.
				1270	*/
				1271	kobject_put(&sbi->s_kobj);
				1272	wait_for_completion(&sbi->s_kobj_unregister);
				1273	if (sbi->s_chksum_driver)
				1274	crypto_free_shash(sbi->s_chksum_driver);
				1275	kfree(sbi->s_blockgroup_lock);
				1276	fs_put_dax(sbi->s_daxdev);
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	1277	fscrypt_free_dummy_policy(&sbi->s_dummy_enc_policy);
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	1278	#ifdef CONFIG_UNICODE
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	1279	utf8_unload(sb->s_encoding);
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	1280	#endif
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1281	kfree(sbi);
				1282	}
				1283
				1284	static struct kmem_cache *ext4_inode_cachep;
				1285
				1286	/*
				1287	* Called inside transaction, so use GFP_NOFS
				1288	*/
				1289	static struct inode ext4_alloc_inode(struct super_block sb)
				1290	{
				1291	struct ext4_inode_info *ei;
				1292
				1293	ei = kmem_cache_alloc(ext4_inode_cachep, GFP_NOFS);
				1294	if (!ei)
				1295	return NULL;
				1296
				1297	inode_set_iversion(&ei->vfs_inode, 1);
				1298	spin_lock_init(&ei->i_raw_lock);
				1299	INIT_LIST_HEAD(&ei->i_prealloc_list);
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	1300	atomic_set(&ei->i_prealloc_active, 0);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1301	spin_lock_init(&ei->i_prealloc_lock);
				1302	ext4_es_init_tree(&ei->i_es_tree);
				1303	rwlock_init(&ei->i_es_lock);
				1304	INIT_LIST_HEAD(&ei->i_es_list);
				1305	ei->i_es_all_nr = 0;
				1306	ei->i_es_shk_nr = 0;
				1307	ei->i_es_shrink_lblk = 0;
				1308	ei->i_reserved_data_blocks = 0;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1309	spin_lock_init(&(ei->i_block_reservation_lock));
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	1310	ext4_init_pending_tree(&ei->i_pending_tree);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1311	#ifdef CONFIG_QUOTA
				1312	ei->i_reserved_quota = 0;
				1313	memset(&ei->i_dquot, 0, sizeof(ei->i_dquot));
				1314	#endif
				1315	ei->jinode = NULL;
				1316	INIT_LIST_HEAD(&ei->i_rsv_conversion_list);
				1317	spin_lock_init(&ei->i_completed_io_lock);
				1318	ei->i_sync_tid = 0;
				1319	ei->i_datasync_tid = 0;
				1320	atomic_set(&ei->i_unwritten, 0);
				1321	INIT_WORK(&ei->i_rsv_conversion_work, ext4_end_io_rsv_work);
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	1322	ext4_fc_init_inode(&ei->vfs_inode);
				1323	mutex_init(&ei->i_fc_lock);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1324	return &ei->vfs_inode;
				1325	}
				1326
				1327	static int ext4_drop_inode(struct inode *inode)
				1328	{
				1329	int drop = generic_drop_inode(inode);
				1330
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	1331	if (!drop)
				1332	drop = fscrypt_drop_inode(inode);
				1333
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1334	trace_ext4_drop_inode(inode, drop);
				1335	return drop;
				1336	}
				1337
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	1338	static void ext4_free_in_core_inode(struct inode *inode)
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1339	{
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	1340	fscrypt_free_inode(inode);
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	1341	if (!list_empty(&(EXT4_I(inode)->i_fc_list))) {
				1342	pr_warn("%s: inode %ld still in fc list",
				1343	__func__, inode->i_ino);
				1344	}
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1345	kmem_cache_free(ext4_inode_cachep, EXT4_I(inode));
				1346	}
				1347
				1348	static void ext4_destroy_inode(struct inode *inode)
				1349	{
				1350	if (!list_empty(&(EXT4_I(inode)->i_orphan))) {
				1351	ext4_msg(inode->i_sb, KERN_ERR,
				1352	"Inode %lu (%p): orphan list check failed!",
				1353	inode->i_ino, EXT4_I(inode));
				1354	print_hex_dump(KERN_INFO, "", DUMP_PREFIX_ADDRESS, 16, 4,
				1355	EXT4_I(inode), sizeof(struct ext4_inode_info),
				1356	true);
				1357	dump_stack();
				1358	}
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	1359
				1360	if (EXT4_I(inode)->i_reserved_data_blocks)
				1361	ext4_msg(inode->i_sb, KERN_ERR,
				1362	"Inode %lu (%p): i_reserved_data_blocks (%u) not cleared!",
				1363	inode->i_ino, EXT4_I(inode),
				1364	EXT4_I(inode)->i_reserved_data_blocks);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1365	}
				1366
				1367	static void init_once(void *foo)
				1368	{
				1369	struct ext4_inode_info ei = (struct ext4_inode_info ) foo;
				1370
				1371	INIT_LIST_HEAD(&ei->i_orphan);
				1372	init_rwsem(&ei->xattr_sem);
				1373	init_rwsem(&ei->i_data_sem);
				1374	init_rwsem(&ei->i_mmap_sem);
				1375	inode_init_once(&ei->vfs_inode);
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	1376	ext4_fc_init_inode(&ei->vfs_inode);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1377	}
				1378
				1379	static int __init init_inodecache(void)
				1380	{
				1381	ext4_inode_cachep = kmem_cache_create_usercopy("ext4_inode_cache",
				1382	sizeof(struct ext4_inode_info), 0,
				1383	(SLAB_RECLAIM_ACCOUNT\|SLAB_MEM_SPREAD\|
				1384	SLAB_ACCOUNT),
				1385	offsetof(struct ext4_inode_info, i_data),
				1386	sizeof_field(struct ext4_inode_info, i_data),
				1387	init_once);
				1388	if (ext4_inode_cachep == NULL)
				1389	return -ENOMEM;
				1390	return 0;
				1391	}
				1392
				1393	static void destroy_inodecache(void)
				1394	{
				1395	/*
				1396	* Make sure all delayed rcu free inodes are flushed before we
				1397	* destroy cache.
				1398	*/
				1399	rcu_barrier();
				1400	kmem_cache_destroy(ext4_inode_cachep);
				1401	}
				1402
				1403	void ext4_clear_inode(struct inode *inode)
				1404	{
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	1405	ext4_fc_del(inode);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1406	invalidate_inode_buffers(inode);
				1407	clear_inode(inode);
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	1408	ext4_discard_preallocations(inode, 0);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1409	ext4_es_remove_extent(inode, 0, EXT_MAX_BLOCKS);
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame]	1410	dquot_drop(inode);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1411	if (EXT4_I(inode)->jinode) {
				1412	jbd2_journal_release_jbd_inode(EXT4_JOURNAL(inode),
				1413	EXT4_I(inode)->jinode);
				1414	jbd2_free_inode(EXT4_I(inode)->jinode);
				1415	EXT4_I(inode)->jinode = NULL;
				1416	}
				1417	fscrypt_put_encryption_info(inode);
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	1418	fsverity_cleanup_inode(inode);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1419	}
				1420
				1421	static struct inode ext4_nfs_get_inode(struct super_block sb,
				1422	u64 ino, u32 generation)
				1423	{
				1424	struct inode *inode;
				1425
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	1426	/*
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1427	* Currently we don't know the generation for parent directory, so
				1428	* a generation of 0 means "accept any"
				1429	*/
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	1430	inode = ext4_iget(sb, ino, EXT4_IGET_HANDLE);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1431	if (IS_ERR(inode))
				1432	return ERR_CAST(inode);
				1433	if (generation && inode->i_generation != generation) {
				1434	iput(inode);
				1435	return ERR_PTR(-ESTALE);
				1436	}
				1437
				1438	return inode;
				1439	}
				1440
				1441	static struct dentry ext4_fh_to_dentry(struct super_block sb, struct fid *fid,
				1442	int fh_len, int fh_type)
				1443	{
				1444	return generic_fh_to_dentry(sb, fid, fh_len, fh_type,
				1445	ext4_nfs_get_inode);
				1446	}
				1447
				1448	static struct dentry ext4_fh_to_parent(struct super_block sb, struct fid *fid,
				1449	int fh_len, int fh_type)
				1450	{
				1451	return generic_fh_to_parent(sb, fid, fh_len, fh_type,
				1452	ext4_nfs_get_inode);
				1453	}
				1454
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	1455	static int ext4_nfs_commit_metadata(struct inode *inode)
				1456	{
				1457	struct writeback_control wbc = {
				1458	.sync_mode = WB_SYNC_ALL
				1459	};
				1460
				1461	trace_ext4_nfs_commit_metadata(inode);
				1462	return ext4_write_inode(inode, &wbc);
				1463	}
				1464
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1465	/*
				1466	* Try to release metadata pages (indirect blocks, directories) which are
				1467	* mapped via the block device. Since these pages could have journal heads
				1468	* which would prevent try_to_free_buffers() from freeing them, we must use
				1469	* jbd2 layer's try_to_free_buffers() function to release them.
				1470	*/
				1471	static int bdev_try_to_free_page(struct super_block sb, struct page page,
				1472	gfp_t wait)
				1473	{
				1474	journal_t *journal = EXT4_SB(sb)->s_journal;
				1475
				1476	WARN_ON(PageChecked(page));
				1477	if (!page_has_buffers(page))
				1478	return 0;
				1479	if (journal)
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	1480	return jbd2_journal_try_to_free_buffers(journal, page);
				1481
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1482	return try_to_free_buffers(page);
				1483	}
				1484
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	1485	#ifdef CONFIG_FS_ENCRYPTION
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1486	static int ext4_get_context(struct inode inode, void ctx, size_t len)
				1487	{
				1488	return ext4_xattr_get(inode, EXT4_XATTR_INDEX_ENCRYPTION,
				1489	EXT4_XATTR_NAME_ENCRYPTION_CONTEXT, ctx, len);
				1490	}
				1491
				1492	static int ext4_set_context(struct inode inode, const void ctx, size_t len,
				1493	void *fs_data)
				1494	{
				1495	handle_t *handle = fs_data;
				1496	int res, res2, credits, retries = 0;
				1497
				1498	/*
				1499	* Encrypting the root directory is not allowed because e2fsck expects
				1500	* lost+found to exist and be unencrypted, and encrypting the root
				1501	* directory would imply encrypting the lost+found directory as well as
				1502	* the filename "lost+found" itself.
				1503	*/
				1504	if (inode->i_ino == EXT4_ROOT_INO)
				1505	return -EPERM;
				1506
				1507	if (WARN_ON_ONCE(IS_DAX(inode) && i_size_read(inode)))
				1508	return -EINVAL;
				1509
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	1510	if (ext4_test_inode_flag(inode, EXT4_INODE_DAX))
				1511	return -EOPNOTSUPP;
				1512
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1513	res = ext4_convert_inline_data(inode);
				1514	if (res)
				1515	return res;
				1516
				1517	/*
				1518	* If a journal handle was specified, then the encryption context is
				1519	* being set on a new inode via inheritance and is part of a larger
				1520	* transaction to create the inode. Otherwise the encryption context is
				1521	* being set on an existing inode in its own transaction. Only in the
				1522	* latter case should the "retry on ENOSPC" logic be used.
				1523	*/
				1524
				1525	if (handle) {
				1526	res = ext4_xattr_set_handle(handle, inode,
				1527	EXT4_XATTR_INDEX_ENCRYPTION,
				1528	EXT4_XATTR_NAME_ENCRYPTION_CONTEXT,
				1529	ctx, len, 0);
				1530	if (!res) {
				1531	ext4_set_inode_flag(inode, EXT4_INODE_ENCRYPT);
				1532	ext4_clear_inode_state(inode,
				1533	EXT4_STATE_MAY_INLINE_DATA);
				1534	/*
				1535	* Update inode->i_flags - S_ENCRYPTED will be enabled,
				1536	* S_DAX may be disabled
				1537	*/
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	1538	ext4_set_inode_flags(inode, false);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1539	}
				1540	return res;
				1541	}
				1542
				1543	res = dquot_initialize(inode);
				1544	if (res)
				1545	return res;
				1546	retry:
				1547	res = ext4_xattr_set_credits(inode, len, false /* is_create */,
				1548	&credits);
				1549	if (res)
				1550	return res;
				1551
				1552	handle = ext4_journal_start(inode, EXT4_HT_MISC, credits);
				1553	if (IS_ERR(handle))
				1554	return PTR_ERR(handle);
				1555
				1556	res = ext4_xattr_set_handle(handle, inode, EXT4_XATTR_INDEX_ENCRYPTION,
				1557	EXT4_XATTR_NAME_ENCRYPTION_CONTEXT,
				1558	ctx, len, 0);
				1559	if (!res) {
				1560	ext4_set_inode_flag(inode, EXT4_INODE_ENCRYPT);
				1561	/*
				1562	* Update inode->i_flags - S_ENCRYPTED will be enabled,
				1563	* S_DAX may be disabled
				1564	*/
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	1565	ext4_set_inode_flags(inode, false);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1566	res = ext4_mark_inode_dirty(handle, inode);
				1567	if (res)
				1568	EXT4_ERROR_INODE(inode, "Failed to mark inode dirty");
				1569	}
				1570	res2 = ext4_journal_stop(handle);
				1571
				1572	if (res == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
				1573	goto retry;
				1574	if (!res)
				1575	res = res2;
				1576	return res;
				1577	}
				1578
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	1579	static const union fscrypt_policy ext4_get_dummy_policy(struct super_block sb)
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1580	{
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	1581	return EXT4_SB(sb)->s_dummy_enc_policy.policy;
				1582	}
				1583
				1584	static bool ext4_has_stable_inodes(struct super_block *sb)
				1585	{
				1586	return ext4_has_feature_stable_inodes(sb);
				1587	}
				1588
				1589	static void ext4_get_ino_and_lblk_bits(struct super_block *sb,
				1590	int ino_bits_ret, int lblk_bits_ret)
				1591	{
				1592	ino_bits_ret = 8 sizeof(EXT4_SB(sb)->s_es->s_inodes_count);
				1593	lblk_bits_ret = 8 sizeof(ext4_lblk_t);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1594	}
				1595
				1596	static const struct fscrypt_operations ext4_cryptops = {
				1597	.key_prefix = "ext4:",
				1598	.get_context = ext4_get_context,
				1599	.set_context = ext4_set_context,
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	1600	.get_dummy_policy = ext4_get_dummy_policy,
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1601	.empty_dir = ext4_empty_dir,
				1602	.max_namelen = EXT4_NAME_LEN,
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	1603	.has_stable_inodes = ext4_has_stable_inodes,
				1604	.get_ino_and_lblk_bits = ext4_get_ino_and_lblk_bits,
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1605	};
				1606	#endif
				1607
				1608	#ifdef CONFIG_QUOTA
				1609	static const char * const quotatypes[] = INITQFNAMES;
				1610	#define QTYPE2NAME(t) (quotatypes[t])
				1611
				1612	static int ext4_write_dquot(struct dquot *dquot);
				1613	static int ext4_acquire_dquot(struct dquot *dquot);
				1614	static int ext4_release_dquot(struct dquot *dquot);
				1615	static int ext4_mark_dquot_dirty(struct dquot *dquot);
				1616	static int ext4_write_info(struct super_block *sb, int type);
				1617	static int ext4_quota_on(struct super_block *sb, int type, int format_id,
				1618	const struct path *path);
				1619	static int ext4_quota_on_mount(struct super_block *sb, int type);
				1620	static ssize_t ext4_quota_read(struct super_block sb, int type, char data,
				1621	size_t len, loff_t off);
				1622	static ssize_t ext4_quota_write(struct super_block *sb, int type,
				1623	const char *data, size_t len, loff_t off);
				1624	static int ext4_quota_enable(struct super_block *sb, int type, int format_id,
				1625	unsigned int flags);
				1626	static int ext4_enable_quotas(struct super_block *sb);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1627
				1628	static struct dquot *ext4_get_dquots(struct inode inode)
				1629	{
				1630	return EXT4_I(inode)->i_dquot;
				1631	}
				1632
				1633	static const struct dquot_operations ext4_quota_operations = {
				1634	.get_reserved_space = ext4_get_reserved_space,
				1635	.write_dquot = ext4_write_dquot,
				1636	.acquire_dquot = ext4_acquire_dquot,
				1637	.release_dquot = ext4_release_dquot,
				1638	.mark_dirty = ext4_mark_dquot_dirty,
				1639	.write_info = ext4_write_info,
				1640	.alloc_dquot = dquot_alloc,
				1641	.destroy_dquot = dquot_destroy,
				1642	.get_projid = ext4_get_projid,
				1643	.get_inode_usage = ext4_get_inode_usage,
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	1644	.get_next_id = dquot_get_next_id,
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1645	};
				1646
				1647	static const struct quotactl_ops ext4_qctl_operations = {
				1648	.quota_on = ext4_quota_on,
				1649	.quota_off = ext4_quota_off,
				1650	.quota_sync = dquot_quota_sync,
				1651	.get_state = dquot_get_state,
				1652	.set_info = dquot_set_dqinfo,
				1653	.get_dqblk = dquot_get_dqblk,
				1654	.set_dqblk = dquot_set_dqblk,
				1655	.get_nextdqblk = dquot_get_next_dqblk,
				1656	};
				1657	#endif
				1658
				1659	static const struct super_operations ext4_sops = {
				1660	.alloc_inode = ext4_alloc_inode,
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	1661	.free_inode = ext4_free_in_core_inode,
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1662	.destroy_inode = ext4_destroy_inode,
				1663	.write_inode = ext4_write_inode,
				1664	.dirty_inode = ext4_dirty_inode,
				1665	.drop_inode = ext4_drop_inode,
				1666	.evict_inode = ext4_evict_inode,
				1667	.put_super = ext4_put_super,
				1668	.sync_fs = ext4_sync_fs,
				1669	.freeze_fs = ext4_freeze,
				1670	.unfreeze_fs = ext4_unfreeze,
				1671	.statfs = ext4_statfs,
				1672	.remount_fs = ext4_remount,
				1673	.show_options = ext4_show_options,
				1674	#ifdef CONFIG_QUOTA
				1675	.quota_read = ext4_quota_read,
				1676	.quota_write = ext4_quota_write,
				1677	.get_dquots = ext4_get_dquots,
				1678	#endif
				1679	.bdev_try_to_free_page = bdev_try_to_free_page,
				1680	};
				1681
				1682	static const struct export_operations ext4_export_ops = {
				1683	.fh_to_dentry = ext4_fh_to_dentry,
				1684	.fh_to_parent = ext4_fh_to_parent,
				1685	.get_parent = ext4_get_parent,
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	1686	.commit_metadata = ext4_nfs_commit_metadata,
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1687	};
				1688
				1689	enum {
				1690	Opt_bsd_df, Opt_minix_df, Opt_grpid, Opt_nogrpid,
				1691	Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic, Opt_err_ro,
				1692	Opt_nouid32, Opt_debug, Opt_removed,
				1693	Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl,
				1694	Opt_auto_da_alloc, Opt_noauto_da_alloc, Opt_noload,
				1695	Opt_commit, Opt_min_batch_time, Opt_max_batch_time, Opt_journal_dev,
				1696	Opt_journal_path, Opt_journal_checksum, Opt_journal_async_commit,
				1697	Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
				1698	Opt_data_err_abort, Opt_data_err_ignore, Opt_test_dummy_encryption,
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	1699	Opt_inlinecrypt,
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1700	Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
				1701	Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_jqfmt_vfsv1, Opt_quota,
				1702	Opt_noquota, Opt_barrier, Opt_nobarrier, Opt_err,
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	1703	Opt_usrquota, Opt_grpquota, Opt_prjquota, Opt_i_version,
				1704	Opt_dax, Opt_dax_always, Opt_dax_inode, Opt_dax_never,
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1705	Opt_stripe, Opt_delalloc, Opt_nodelalloc, Opt_warn_on_error,
				1706	Opt_nowarn_on_error, Opt_mblk_io_submit,
				1707	Opt_lazytime, Opt_nolazytime, Opt_debug_want_extra_isize,
				1708	Opt_nomblk_io_submit, Opt_block_validity, Opt_noblock_validity,
				1709	Opt_inode_readahead_blks, Opt_journal_ioprio,
				1710	Opt_dioread_nolock, Opt_dioread_lock,
				1711	Opt_discard, Opt_nodiscard, Opt_init_itable, Opt_noinit_itable,
				1712	Opt_max_dir_size_kb, Opt_nojournal_checksum, Opt_nombcache,
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	1713	Opt_prefetch_block_bitmaps,
				1714	#ifdef CONFIG_EXT4_DEBUG
				1715	Opt_fc_debug_max_replay, Opt_fc_debug_force
				1716	#endif
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1717	};
				1718
				1719	static const match_table_t tokens = {
				1720	{Opt_bsd_df, "bsddf"},
				1721	{Opt_minix_df, "minixdf"},
				1722	{Opt_grpid, "grpid"},
				1723	{Opt_grpid, "bsdgroups"},
				1724	{Opt_nogrpid, "nogrpid"},
				1725	{Opt_nogrpid, "sysvgroups"},
				1726	{Opt_resgid, "resgid=%u"},
				1727	{Opt_resuid, "resuid=%u"},
				1728	{Opt_sb, "sb=%u"},
				1729	{Opt_err_cont, "errors=continue"},
				1730	{Opt_err_panic, "errors=panic"},
				1731	{Opt_err_ro, "errors=remount-ro"},
				1732	{Opt_nouid32, "nouid32"},
				1733	{Opt_debug, "debug"},
				1734	{Opt_removed, "oldalloc"},
				1735	{Opt_removed, "orlov"},
				1736	{Opt_user_xattr, "user_xattr"},
				1737	{Opt_nouser_xattr, "nouser_xattr"},
				1738	{Opt_acl, "acl"},
				1739	{Opt_noacl, "noacl"},
				1740	{Opt_noload, "norecovery"},
				1741	{Opt_noload, "noload"},
				1742	{Opt_removed, "nobh"},
				1743	{Opt_removed, "bh"},
				1744	{Opt_commit, "commit=%u"},
				1745	{Opt_min_batch_time, "min_batch_time=%u"},
				1746	{Opt_max_batch_time, "max_batch_time=%u"},
				1747	{Opt_journal_dev, "journal_dev=%u"},
				1748	{Opt_journal_path, "journal_path=%s"},
				1749	{Opt_journal_checksum, "journal_checksum"},
				1750	{Opt_nojournal_checksum, "nojournal_checksum"},
				1751	{Opt_journal_async_commit, "journal_async_commit"},
				1752	{Opt_abort, "abort"},
				1753	{Opt_data_journal, "data=journal"},
				1754	{Opt_data_ordered, "data=ordered"},
				1755	{Opt_data_writeback, "data=writeback"},
				1756	{Opt_data_err_abort, "data_err=abort"},
				1757	{Opt_data_err_ignore, "data_err=ignore"},
				1758	{Opt_offusrjquota, "usrjquota="},
				1759	{Opt_usrjquota, "usrjquota=%s"},
				1760	{Opt_offgrpjquota, "grpjquota="},
				1761	{Opt_grpjquota, "grpjquota=%s"},
				1762	{Opt_jqfmt_vfsold, "jqfmt=vfsold"},
				1763	{Opt_jqfmt_vfsv0, "jqfmt=vfsv0"},
				1764	{Opt_jqfmt_vfsv1, "jqfmt=vfsv1"},
				1765	{Opt_grpquota, "grpquota"},
				1766	{Opt_noquota, "noquota"},
				1767	{Opt_quota, "quota"},
				1768	{Opt_usrquota, "usrquota"},
				1769	{Opt_prjquota, "prjquota"},
				1770	{Opt_barrier, "barrier=%u"},
				1771	{Opt_barrier, "barrier"},
				1772	{Opt_nobarrier, "nobarrier"},
				1773	{Opt_i_version, "i_version"},
				1774	{Opt_dax, "dax"},
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	1775	{Opt_dax_always, "dax=always"},
				1776	{Opt_dax_inode, "dax=inode"},
				1777	{Opt_dax_never, "dax=never"},
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1778	{Opt_stripe, "stripe=%u"},
				1779	{Opt_delalloc, "delalloc"},
				1780	{Opt_warn_on_error, "warn_on_error"},
				1781	{Opt_nowarn_on_error, "nowarn_on_error"},
				1782	{Opt_lazytime, "lazytime"},
				1783	{Opt_nolazytime, "nolazytime"},
				1784	{Opt_debug_want_extra_isize, "debug_want_extra_isize=%u"},
				1785	{Opt_nodelalloc, "nodelalloc"},
				1786	{Opt_removed, "mblk_io_submit"},
				1787	{Opt_removed, "nomblk_io_submit"},
				1788	{Opt_block_validity, "block_validity"},
				1789	{Opt_noblock_validity, "noblock_validity"},
				1790	{Opt_inode_readahead_blks, "inode_readahead_blks=%u"},
				1791	{Opt_journal_ioprio, "journal_ioprio=%u"},
				1792	{Opt_auto_da_alloc, "auto_da_alloc=%u"},
				1793	{Opt_auto_da_alloc, "auto_da_alloc"},
				1794	{Opt_noauto_da_alloc, "noauto_da_alloc"},
				1795	{Opt_dioread_nolock, "dioread_nolock"},
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	1796	{Opt_dioread_lock, "nodioread_nolock"},
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1797	{Opt_dioread_lock, "dioread_lock"},
				1798	{Opt_discard, "discard"},
				1799	{Opt_nodiscard, "nodiscard"},
				1800	{Opt_init_itable, "init_itable=%u"},
				1801	{Opt_init_itable, "init_itable"},
				1802	{Opt_noinit_itable, "noinit_itable"},
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	1803	#ifdef CONFIG_EXT4_DEBUG
				1804	{Opt_fc_debug_force, "fc_debug_force"},
				1805	{Opt_fc_debug_max_replay, "fc_debug_max_replay=%u"},
				1806	#endif
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1807	{Opt_max_dir_size_kb, "max_dir_size_kb=%u"},
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	1808	{Opt_test_dummy_encryption, "test_dummy_encryption=%s"},
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1809	{Opt_test_dummy_encryption, "test_dummy_encryption"},
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	1810	{Opt_inlinecrypt, "inlinecrypt"},
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1811	{Opt_nombcache, "nombcache"},
				1812	{Opt_nombcache, "no_mbcache"}, /* for backward compatibility */
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	1813	{Opt_prefetch_block_bitmaps, "prefetch_block_bitmaps"},
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1814	{Opt_removed, "check=none"}, /* mount option from ext2/3 */
				1815	{Opt_removed, "nocheck"}, /* mount option from ext2/3 */
				1816	{Opt_removed, "reservation"}, /* mount option from ext2/3 */
				1817	{Opt_removed, "noreservation"}, /* mount option from ext2/3 */
				1818	{Opt_removed, "journal=%u"}, /* mount option from ext2/3 */
				1819	{Opt_err, NULL},
				1820	};
				1821
				1822	static ext4_fsblk_t get_sb_block(void **data)
				1823	{
				1824	ext4_fsblk_t sb_block;
				1825	char options = (char ) *data;
				1826
				1827	if (!options \|\| strncmp(options, "sb=", 3) != 0)
				1828	return 1; /* Default location */
				1829
				1830	options += 3;
				1831	/* TODO: use simple_strtoll with >32bit ext4 */
				1832	sb_block = simple_strtoul(options, &options, 0);
				1833	if (options && options != ',') {
				1834	printk(KERN_ERR "EXT4-fs: Invalid sb specification: %s\n",
				1835	(char ) data);
				1836	return 1;
				1837	}
				1838	if (*options == ',')
				1839	options++;
				1840	data = (void ) options;
				1841
				1842	return sb_block;
				1843	}
				1844
				1845	#define DEFAULT_JOURNAL_IOPRIO (IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 3))
				1846	static const char deprecated_msg[] =
				1847	"Mount option \"%s\" will be removed by %s\n"
				1848	"Contact linux-ext4@vger.kernel.org if you think we should keep it.\n";
				1849
				1850	#ifdef CONFIG_QUOTA
				1851	static int set_qf_name(struct super_block sb, int qtype, substring_t args)
				1852	{
				1853	struct ext4_sb_info *sbi = EXT4_SB(sb);
				1854	char qname, old_qname = get_qf_name(sb, sbi, qtype);
				1855	int ret = -1;
				1856
				1857	if (sb_any_quota_loaded(sb) && !old_qname) {
				1858	ext4_msg(sb, KERN_ERR,
				1859	"Cannot change journaled "
				1860	"quota options when quota turned on");
				1861	return -1;
				1862	}
				1863	if (ext4_has_feature_quota(sb)) {
				1864	ext4_msg(sb, KERN_INFO, "Journaled quota options "
				1865	"ignored when QUOTA feature is enabled");
				1866	return 1;
				1867	}
				1868	qname = match_strdup(args);
				1869	if (!qname) {
				1870	ext4_msg(sb, KERN_ERR,
				1871	"Not enough memory for storing quotafile name");
				1872	return -1;
				1873	}
				1874	if (old_qname) {
				1875	if (strcmp(old_qname, qname) == 0)
				1876	ret = 1;
				1877	else
				1878	ext4_msg(sb, KERN_ERR,
				1879	"%s quota file already specified",
				1880	QTYPE2NAME(qtype));
				1881	goto errout;
				1882	}
				1883	if (strchr(qname, '/')) {
				1884	ext4_msg(sb, KERN_ERR,
				1885	"quotafile must be on filesystem root");
				1886	goto errout;
				1887	}
				1888	rcu_assign_pointer(sbi->s_qf_names[qtype], qname);
				1889	set_opt(sb, QUOTA);
				1890	return 1;
				1891	errout:
				1892	kfree(qname);
				1893	return ret;
				1894	}
				1895
				1896	static int clear_qf_name(struct super_block *sb, int qtype)
				1897	{
				1898
				1899	struct ext4_sb_info *sbi = EXT4_SB(sb);
				1900	char *old_qname = get_qf_name(sb, sbi, qtype);
				1901
				1902	if (sb_any_quota_loaded(sb) && old_qname) {
				1903	ext4_msg(sb, KERN_ERR, "Cannot change journaled quota options"
				1904	" when quota turned on");
				1905	return -1;
				1906	}
				1907	rcu_assign_pointer(sbi->s_qf_names[qtype], NULL);
				1908	synchronize_rcu();
				1909	kfree(old_qname);
				1910	return 1;
				1911	}
				1912	#endif
				1913
				1914	#define MOPT_SET 0x0001
				1915	#define MOPT_CLEAR 0x0002
				1916	#define MOPT_NOSUPPORT 0x0004
				1917	#define MOPT_EXPLICIT 0x0008
				1918	#define MOPT_CLEAR_ERR 0x0010
				1919	#define MOPT_GTE0 0x0020
				1920	#ifdef CONFIG_QUOTA
				1921	#define MOPT_Q 0
				1922	#define MOPT_QFMT 0x0040
				1923	#else
				1924	#define MOPT_Q MOPT_NOSUPPORT
				1925	#define MOPT_QFMT MOPT_NOSUPPORT
				1926	#endif
				1927	#define MOPT_DATAJ 0x0080
				1928	#define MOPT_NO_EXT2 0x0100
				1929	#define MOPT_NO_EXT3 0x0200
				1930	#define MOPT_EXT4_ONLY (MOPT_NO_EXT2 \| MOPT_NO_EXT3)
				1931	#define MOPT_STRING 0x0400
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	1932	#define MOPT_SKIP 0x0800
				1933	#define MOPT_2 0x1000
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1934
				1935	static const struct mount_opts {
				1936	int token;
				1937	int mount_opt;
				1938	int flags;
				1939	} ext4_mount_opts[] = {
				1940	{Opt_minix_df, EXT4_MOUNT_MINIX_DF, MOPT_SET},
				1941	{Opt_bsd_df, EXT4_MOUNT_MINIX_DF, MOPT_CLEAR},
				1942	{Opt_grpid, EXT4_MOUNT_GRPID, MOPT_SET},
				1943	{Opt_nogrpid, EXT4_MOUNT_GRPID, MOPT_CLEAR},
				1944	{Opt_block_validity, EXT4_MOUNT_BLOCK_VALIDITY, MOPT_SET},
				1945	{Opt_noblock_validity, EXT4_MOUNT_BLOCK_VALIDITY, MOPT_CLEAR},
				1946	{Opt_dioread_nolock, EXT4_MOUNT_DIOREAD_NOLOCK,
				1947	MOPT_EXT4_ONLY \| MOPT_SET},
				1948	{Opt_dioread_lock, EXT4_MOUNT_DIOREAD_NOLOCK,
				1949	MOPT_EXT4_ONLY \| MOPT_CLEAR},
				1950	{Opt_discard, EXT4_MOUNT_DISCARD, MOPT_SET},
				1951	{Opt_nodiscard, EXT4_MOUNT_DISCARD, MOPT_CLEAR},
				1952	{Opt_delalloc, EXT4_MOUNT_DELALLOC,
				1953	MOPT_EXT4_ONLY \| MOPT_SET \| MOPT_EXPLICIT},
				1954	{Opt_nodelalloc, EXT4_MOUNT_DELALLOC,
				1955	MOPT_EXT4_ONLY \| MOPT_CLEAR},
				1956	{Opt_warn_on_error, EXT4_MOUNT_WARN_ON_ERROR, MOPT_SET},
				1957	{Opt_nowarn_on_error, EXT4_MOUNT_WARN_ON_ERROR, MOPT_CLEAR},
				1958	{Opt_nojournal_checksum, EXT4_MOUNT_JOURNAL_CHECKSUM,
				1959	MOPT_EXT4_ONLY \| MOPT_CLEAR},
				1960	{Opt_journal_checksum, EXT4_MOUNT_JOURNAL_CHECKSUM,
				1961	MOPT_EXT4_ONLY \| MOPT_SET \| MOPT_EXPLICIT},
				1962	{Opt_journal_async_commit, (EXT4_MOUNT_JOURNAL_ASYNC_COMMIT \|
				1963	EXT4_MOUNT_JOURNAL_CHECKSUM),
				1964	MOPT_EXT4_ONLY \| MOPT_SET \| MOPT_EXPLICIT},
				1965	{Opt_noload, EXT4_MOUNT_NOLOAD, MOPT_NO_EXT2 \| MOPT_SET},
				1966	{Opt_err_panic, EXT4_MOUNT_ERRORS_PANIC, MOPT_SET \| MOPT_CLEAR_ERR},
				1967	{Opt_err_ro, EXT4_MOUNT_ERRORS_RO, MOPT_SET \| MOPT_CLEAR_ERR},
				1968	{Opt_err_cont, EXT4_MOUNT_ERRORS_CONT, MOPT_SET \| MOPT_CLEAR_ERR},
				1969	{Opt_data_err_abort, EXT4_MOUNT_DATA_ERR_ABORT,
				1970	MOPT_NO_EXT2},
				1971	{Opt_data_err_ignore, EXT4_MOUNT_DATA_ERR_ABORT,
				1972	MOPT_NO_EXT2},
				1973	{Opt_barrier, EXT4_MOUNT_BARRIER, MOPT_SET},
				1974	{Opt_nobarrier, EXT4_MOUNT_BARRIER, MOPT_CLEAR},
				1975	{Opt_noauto_da_alloc, EXT4_MOUNT_NO_AUTO_DA_ALLOC, MOPT_SET},
				1976	{Opt_auto_da_alloc, EXT4_MOUNT_NO_AUTO_DA_ALLOC, MOPT_CLEAR},
				1977	{Opt_noinit_itable, EXT4_MOUNT_INIT_INODE_TABLE, MOPT_CLEAR},
				1978	{Opt_commit, 0, MOPT_GTE0},
				1979	{Opt_max_batch_time, 0, MOPT_GTE0},
				1980	{Opt_min_batch_time, 0, MOPT_GTE0},
				1981	{Opt_inode_readahead_blks, 0, MOPT_GTE0},
				1982	{Opt_init_itable, 0, MOPT_GTE0},
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	1983	{Opt_dax, EXT4_MOUNT_DAX_ALWAYS, MOPT_SET \| MOPT_SKIP},
				1984	{Opt_dax_always, EXT4_MOUNT_DAX_ALWAYS,
				1985	MOPT_EXT4_ONLY \| MOPT_SET \| MOPT_SKIP},
				1986	{Opt_dax_inode, EXT4_MOUNT2_DAX_INODE,
				1987	MOPT_EXT4_ONLY \| MOPT_SET \| MOPT_SKIP},
				1988	{Opt_dax_never, EXT4_MOUNT2_DAX_NEVER,
				1989	MOPT_EXT4_ONLY \| MOPT_SET \| MOPT_SKIP},
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1990	{Opt_stripe, 0, MOPT_GTE0},
				1991	{Opt_resuid, 0, MOPT_GTE0},
				1992	{Opt_resgid, 0, MOPT_GTE0},
				1993	{Opt_journal_dev, 0, MOPT_NO_EXT2 \| MOPT_GTE0},
				1994	{Opt_journal_path, 0, MOPT_NO_EXT2 \| MOPT_STRING},
				1995	{Opt_journal_ioprio, 0, MOPT_NO_EXT2 \| MOPT_GTE0},
				1996	{Opt_data_journal, EXT4_MOUNT_JOURNAL_DATA, MOPT_NO_EXT2 \| MOPT_DATAJ},
				1997	{Opt_data_ordered, EXT4_MOUNT_ORDERED_DATA, MOPT_NO_EXT2 \| MOPT_DATAJ},
				1998	{Opt_data_writeback, EXT4_MOUNT_WRITEBACK_DATA,
				1999	MOPT_NO_EXT2 \| MOPT_DATAJ},
				2000	{Opt_user_xattr, EXT4_MOUNT_XATTR_USER, MOPT_SET},
				2001	{Opt_nouser_xattr, EXT4_MOUNT_XATTR_USER, MOPT_CLEAR},
				2002	#ifdef CONFIG_EXT4_FS_POSIX_ACL
				2003	{Opt_acl, EXT4_MOUNT_POSIX_ACL, MOPT_SET},
				2004	{Opt_noacl, EXT4_MOUNT_POSIX_ACL, MOPT_CLEAR},
				2005	#else
				2006	{Opt_acl, 0, MOPT_NOSUPPORT},
				2007	{Opt_noacl, 0, MOPT_NOSUPPORT},
				2008	#endif
				2009	{Opt_nouid32, EXT4_MOUNT_NO_UID32, MOPT_SET},
				2010	{Opt_debug, EXT4_MOUNT_DEBUG, MOPT_SET},
				2011	{Opt_debug_want_extra_isize, 0, MOPT_GTE0},
				2012	{Opt_quota, EXT4_MOUNT_QUOTA \| EXT4_MOUNT_USRQUOTA, MOPT_SET \| MOPT_Q},
				2013	{Opt_usrquota, EXT4_MOUNT_QUOTA \| EXT4_MOUNT_USRQUOTA,
				2014	MOPT_SET \| MOPT_Q},
				2015	{Opt_grpquota, EXT4_MOUNT_QUOTA \| EXT4_MOUNT_GRPQUOTA,
				2016	MOPT_SET \| MOPT_Q},
				2017	{Opt_prjquota, EXT4_MOUNT_QUOTA \| EXT4_MOUNT_PRJQUOTA,
				2018	MOPT_SET \| MOPT_Q},
				2019	{Opt_noquota, (EXT4_MOUNT_QUOTA \| EXT4_MOUNT_USRQUOTA \|
				2020	EXT4_MOUNT_GRPQUOTA \| EXT4_MOUNT_PRJQUOTA),
				2021	MOPT_CLEAR \| MOPT_Q},
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame]	2022	{Opt_usrjquota, 0, MOPT_Q \| MOPT_STRING},
				2023	{Opt_grpjquota, 0, MOPT_Q \| MOPT_STRING},
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	2024	{Opt_offusrjquota, 0, MOPT_Q},
				2025	{Opt_offgrpjquota, 0, MOPT_Q},
				2026	{Opt_jqfmt_vfsold, QFMT_VFS_OLD, MOPT_QFMT},
				2027	{Opt_jqfmt_vfsv0, QFMT_VFS_V0, MOPT_QFMT},
				2028	{Opt_jqfmt_vfsv1, QFMT_VFS_V1, MOPT_QFMT},
				2029	{Opt_max_dir_size_kb, 0, MOPT_GTE0},
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	2030	{Opt_test_dummy_encryption, 0, MOPT_STRING},
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	2031	{Opt_nombcache, EXT4_MOUNT_NO_MBCACHE, MOPT_SET},
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	2032	{Opt_prefetch_block_bitmaps, EXT4_MOUNT_PREFETCH_BLOCK_BITMAPS,
				2033	MOPT_SET},
				2034	#ifdef CONFIG_EXT4_DEBUG
				2035	{Opt_fc_debug_force, EXT4_MOUNT2_JOURNAL_FAST_COMMIT,
				2036	MOPT_SET \| MOPT_2 \| MOPT_EXT4_ONLY},
				2037	{Opt_fc_debug_max_replay, 0, MOPT_GTE0},
				2038	#endif
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	2039	{Opt_err, 0, 0}
				2040	};
				2041
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	2042	#ifdef CONFIG_UNICODE
				2043	static const struct ext4_sb_encodings {
				2044	__u16 magic;
				2045	char *name;
				2046	char *version;
				2047	} ext4_sb_encoding_map[] = {
				2048	{EXT4_ENC_UTF8_12_1, "utf8", "12.1.0"},
				2049	};
				2050
				2051	static int ext4_sb_read_encoding(const struct ext4_super_block *es,
				2052	const struct ext4_sb_encodings **encoding,
				2053	__u16 *flags)
				2054	{
				2055	__u16 magic = le16_to_cpu(es->s_encoding);
				2056	int i;
				2057
				2058	for (i = 0; i < ARRAY_SIZE(ext4_sb_encoding_map); i++)
				2059	if (magic == ext4_sb_encoding_map[i].magic)
				2060	break;
				2061
				2062	if (i >= ARRAY_SIZE(ext4_sb_encoding_map))
				2063	return -EINVAL;
				2064
				2065	*encoding = &ext4_sb_encoding_map[i];
				2066	*flags = le16_to_cpu(es->s_encoding_flags);
				2067
				2068	return 0;
				2069	}
				2070	#endif
				2071
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	2072	static int ext4_set_test_dummy_encryption(struct super_block *sb,
				2073	const char *opt,
				2074	const substring_t *arg,
				2075	bool is_remount)
				2076	{
				2077	#ifdef CONFIG_FS_ENCRYPTION
				2078	struct ext4_sb_info *sbi = EXT4_SB(sb);
				2079	int err;
				2080
				2081	/*
				2082	* This mount option is just for testing, and it's not worthwhile to
				2083	* implement the extra complexity (e.g. RCU protection) that would be
				2084	* needed to allow it to be set or changed during remount. We do allow
				2085	* it to be specified during remount, but only if there is no change.
				2086	*/
				2087	if (is_remount && !sbi->s_dummy_enc_policy.policy) {
				2088	ext4_msg(sb, KERN_WARNING,
				2089	"Can't set test_dummy_encryption on remount");
				2090	return -1;
				2091	}
				2092	err = fscrypt_set_test_dummy_encryption(sb, arg->from,
				2093	&sbi->s_dummy_enc_policy);
				2094	if (err) {
				2095	if (err == -EEXIST)
				2096	ext4_msg(sb, KERN_WARNING,
				2097	"Can't change test_dummy_encryption on remount");
				2098	else if (err == -EINVAL)
				2099	ext4_msg(sb, KERN_WARNING,
				2100	"Value of option \"%s\" is unrecognized", opt);
				2101	else
				2102	ext4_msg(sb, KERN_WARNING,
				2103	"Error processing option \"%s\" [%d]",
				2104	opt, err);
				2105	return -1;
				2106	}
				2107	ext4_msg(sb, KERN_WARNING, "Test dummy encryption mode enabled");
				2108	#else
				2109	ext4_msg(sb, KERN_WARNING,
				2110	"Test dummy encryption mount option ignored");
				2111	#endif
				2112	return 1;
				2113	}
				2114
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	2115	static int handle_mount_opt(struct super_block sb, char opt, int token,
				2116	substring_t args, unsigned long journal_devnum,
				2117	unsigned int *journal_ioprio, int is_remount)
				2118	{
				2119	struct ext4_sb_info *sbi = EXT4_SB(sb);
				2120	const struct mount_opts *m;
				2121	kuid_t uid;
				2122	kgid_t gid;
				2123	int arg = 0;
				2124
				2125	#ifdef CONFIG_QUOTA
				2126	if (token == Opt_usrjquota)
				2127	return set_qf_name(sb, USRQUOTA, &args[0]);
				2128	else if (token == Opt_grpjquota)
				2129	return set_qf_name(sb, GRPQUOTA, &args[0]);
				2130	else if (token == Opt_offusrjquota)
				2131	return clear_qf_name(sb, USRQUOTA);
				2132	else if (token == Opt_offgrpjquota)
				2133	return clear_qf_name(sb, GRPQUOTA);
				2134	#endif
				2135	switch (token) {
				2136	case Opt_noacl:
				2137	case Opt_nouser_xattr:
				2138	ext4_msg(sb, KERN_WARNING, deprecated_msg, opt, "3.5");
				2139	break;
				2140	case Opt_sb:
				2141	return 1; /* handled by get_sb_block() */
				2142	case Opt_removed:
				2143	ext4_msg(sb, KERN_WARNING, "Ignoring removed %s option", opt);
				2144	return 1;
				2145	case Opt_abort:
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	2146	ext4_set_mount_flag(sb, EXT4_MF_FS_ABORTED);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	2147	return 1;
				2148	case Opt_i_version:
				2149	sb->s_flags \|= SB_I_VERSION;
				2150	return 1;
				2151	case Opt_lazytime:
				2152	sb->s_flags \|= SB_LAZYTIME;
				2153	return 1;
				2154	case Opt_nolazytime:
				2155	sb->s_flags &= ~SB_LAZYTIME;
				2156	return 1;
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	2157	case Opt_inlinecrypt:
				2158	#ifdef CONFIG_FS_ENCRYPTION_INLINE_CRYPT
				2159	sb->s_flags \|= SB_INLINECRYPT;
				2160	#else
				2161	ext4_msg(sb, KERN_ERR, "inline encryption not supported");
				2162	#endif
				2163	return 1;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	2164	}
				2165
				2166	for (m = ext4_mount_opts; m->token != Opt_err; m++)
				2167	if (token == m->token)
				2168	break;
				2169
				2170	if (m->token == Opt_err) {
				2171	ext4_msg(sb, KERN_ERR, "Unrecognized mount option \"%s\" "
				2172	"or missing value", opt);
				2173	return -1;
				2174	}
				2175
				2176	if ((m->flags & MOPT_NO_EXT2) && IS_EXT2_SB(sb)) {
				2177	ext4_msg(sb, KERN_ERR,
				2178	"Mount option \"%s\" incompatible with ext2", opt);
				2179	return -1;
				2180	}
				2181	if ((m->flags & MOPT_NO_EXT3) && IS_EXT3_SB(sb)) {
				2182	ext4_msg(sb, KERN_ERR,
				2183	"Mount option \"%s\" incompatible with ext3", opt);
				2184	return -1;
				2185	}
				2186
				2187	if (args->from && !(m->flags & MOPT_STRING) && match_int(args, &arg))
				2188	return -1;
				2189	if (args->from && (m->flags & MOPT_GTE0) && (arg < 0))
				2190	return -1;
				2191	if (m->flags & MOPT_EXPLICIT) {
				2192	if (m->mount_opt & EXT4_MOUNT_DELALLOC) {
				2193	set_opt2(sb, EXPLICIT_DELALLOC);
				2194	} else if (m->mount_opt & EXT4_MOUNT_JOURNAL_CHECKSUM) {
				2195	set_opt2(sb, EXPLICIT_JOURNAL_CHECKSUM);
				2196	} else
				2197	return -1;
				2198	}
				2199	if (m->flags & MOPT_CLEAR_ERR)
				2200	clear_opt(sb, ERRORS_MASK);
				2201	if (token == Opt_noquota && sb_any_quota_loaded(sb)) {
				2202	ext4_msg(sb, KERN_ERR, "Cannot change quota "
				2203	"options when quota turned on");
				2204	return -1;
				2205	}
				2206
				2207	if (m->flags & MOPT_NOSUPPORT) {
				2208	ext4_msg(sb, KERN_ERR, "%s option not supported", opt);
				2209	} else if (token == Opt_commit) {
				2210	if (arg == 0)
				2211	arg = JBD2_DEFAULT_MAX_COMMIT_AGE;
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	2212	else if (arg > INT_MAX / HZ) {
				2213	ext4_msg(sb, KERN_ERR,
				2214	"Invalid commit interval %d, "
				2215	"must be smaller than %d",
				2216	arg, INT_MAX / HZ);
				2217	return -1;
				2218	}
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	2219	sbi->s_commit_interval = HZ * arg;
				2220	} else if (token == Opt_debug_want_extra_isize) {
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame]	2221	if ((arg & 1) \|\|
				2222	(arg < 4) \|\|
				2223	(arg > (sbi->s_inode_size - EXT4_GOOD_OLD_INODE_SIZE))) {
				2224	ext4_msg(sb, KERN_ERR,
				2225	"Invalid want_extra_isize %d", arg);
				2226	return -1;
				2227	}
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	2228	sbi->s_want_extra_isize = arg;
				2229	} else if (token == Opt_max_batch_time) {
				2230	sbi->s_max_batch_time = arg;
				2231	} else if (token == Opt_min_batch_time) {
				2232	sbi->s_min_batch_time = arg;
				2233	} else if (token == Opt_inode_readahead_blks) {
				2234	if (arg && (arg > (1 << 30) \|\| !is_power_of_2(arg))) {
				2235	ext4_msg(sb, KERN_ERR,
				2236	"EXT4-fs: inode_readahead_blks must be "
				2237	"0 or a power of 2 smaller than 2^31");
				2238	return -1;
				2239	}
				2240	sbi->s_inode_readahead_blks = arg;
				2241	} else if (token == Opt_init_itable) {
				2242	set_opt(sb, INIT_INODE_TABLE);
				2243	if (!args->from)
				2244	arg = EXT4_DEF_LI_WAIT_MULT;
				2245	sbi->s_li_wait_mult = arg;
				2246	} else if (token == Opt_max_dir_size_kb) {
				2247	sbi->s_max_dir_size_kb = arg;
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	2248	#ifdef CONFIG_EXT4_DEBUG
				2249	} else if (token == Opt_fc_debug_max_replay) {
				2250	sbi->s_fc_debug_max_replay = arg;
				2251	#endif
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	2252	} else if (token == Opt_stripe) {
				2253	sbi->s_stripe = arg;
				2254	} else if (token == Opt_resuid) {
				2255	uid = make_kuid(current_user_ns(), arg);
				2256	if (!uid_valid(uid)) {
				2257	ext4_msg(sb, KERN_ERR, "Invalid uid value %d", arg);
				2258	return -1;
				2259	}
				2260	sbi->s_resuid = uid;
				2261	} else if (token == Opt_resgid) {
				2262	gid = make_kgid(current_user_ns(), arg);
				2263	if (!gid_valid(gid)) {
				2264	ext4_msg(sb, KERN_ERR, "Invalid gid value %d", arg);
				2265	return -1;
				2266	}
				2267	sbi->s_resgid = gid;
				2268	} else if (token == Opt_journal_dev) {
				2269	if (is_remount) {
				2270	ext4_msg(sb, KERN_ERR,
				2271	"Cannot specify journal on remount");
				2272	return -1;
				2273	}
				2274	*journal_devnum = arg;
				2275	} else if (token == Opt_journal_path) {
				2276	char *journal_path;
				2277	struct inode *journal_inode;
				2278	struct path path;
				2279	int error;
				2280
				2281	if (is_remount) {
				2282	ext4_msg(sb, KERN_ERR,
				2283	"Cannot specify journal on remount");
				2284	return -1;
				2285	}
				2286	journal_path = match_strdup(&args[0]);
				2287	if (!journal_path) {
				2288	ext4_msg(sb, KERN_ERR, "error: could not dup "
				2289	"journal device string");
				2290	return -1;
				2291	}
				2292
				2293	error = kern_path(journal_path, LOOKUP_FOLLOW, &path);
				2294	if (error) {
				2295	ext4_msg(sb, KERN_ERR, "error: could not find "
				2296	"journal device path: error %d", error);
				2297	kfree(journal_path);
				2298	return -1;
				2299	}
				2300
				2301	journal_inode = d_inode(path.dentry);
				2302	if (!S_ISBLK(journal_inode->i_mode)) {
				2303	ext4_msg(sb, KERN_ERR, "error: journal path %s "
				2304	"is not a block device", journal_path);
				2305	path_put(&path);
				2306	kfree(journal_path);
				2307	return -1;
				2308	}
				2309
				2310	*journal_devnum = new_encode_dev(journal_inode->i_rdev);
				2311	path_put(&path);
				2312	kfree(journal_path);
				2313	} else if (token == Opt_journal_ioprio) {
				2314	if (arg > 7) {
				2315	ext4_msg(sb, KERN_ERR, "Invalid journal IO priority"
				2316	" (must be 0-7)");
				2317	return -1;
				2318	}
				2319	*journal_ioprio =
				2320	IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, arg);
				2321	} else if (token == Opt_test_dummy_encryption) {
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	2322	return ext4_set_test_dummy_encryption(sb, opt, &args[0],
				2323	is_remount);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	2324	} else if (m->flags & MOPT_DATAJ) {
				2325	if (is_remount) {
				2326	if (!sbi->s_journal)
				2327	ext4_msg(sb, KERN_WARNING, "Remounting file system with no journal so ignoring journalled data option");
				2328	else if (test_opt(sb, DATA_FLAGS) != m->mount_opt) {
				2329	ext4_msg(sb, KERN_ERR,
				2330	"Cannot change data mode on remount");
				2331	return -1;
				2332	}
				2333	} else {
				2334	clear_opt(sb, DATA_FLAGS);
				2335	sbi->s_mount_opt \|= m->mount_opt;
				2336	}
				2337	#ifdef CONFIG_QUOTA
				2338	} else if (m->flags & MOPT_QFMT) {
				2339	if (sb_any_quota_loaded(sb) &&
				2340	sbi->s_jquota_fmt != m->mount_opt) {
				2341	ext4_msg(sb, KERN_ERR, "Cannot change journaled "
				2342	"quota options when quota turned on");
				2343	return -1;
				2344	}
				2345	if (ext4_has_feature_quota(sb)) {
				2346	ext4_msg(sb, KERN_INFO,
				2347	"Quota format mount options ignored "
				2348	"when QUOTA feature is enabled");
				2349	return 1;
				2350	}
				2351	sbi->s_jquota_fmt = m->mount_opt;
				2352	#endif
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	2353	} else if (token == Opt_dax \|\| token == Opt_dax_always \|\|
				2354	token == Opt_dax_inode \|\| token == Opt_dax_never) {
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	2355	#ifdef CONFIG_FS_DAX
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	2356	switch (token) {
				2357	case Opt_dax:
				2358	case Opt_dax_always:
				2359	if (is_remount &&
				2360	(!(sbi->s_mount_opt & EXT4_MOUNT_DAX_ALWAYS) \|\|
				2361	(sbi->s_mount_opt2 & EXT4_MOUNT2_DAX_NEVER))) {
				2362	fail_dax_change_remount:
				2363	ext4_msg(sb, KERN_ERR, "can't change "
				2364	"dax mount option while remounting");
				2365	return -1;
				2366	}
				2367	if (is_remount &&
				2368	(test_opt(sb, DATA_FLAGS) ==
				2369	EXT4_MOUNT_JOURNAL_DATA)) {
				2370	ext4_msg(sb, KERN_ERR, "can't mount with "
				2371	"both data=journal and dax");
				2372	return -1;
				2373	}
				2374	ext4_msg(sb, KERN_WARNING,
				2375	"DAX enabled. Warning: EXPERIMENTAL, use at your own risk");
				2376	sbi->s_mount_opt \|= EXT4_MOUNT_DAX_ALWAYS;
				2377	sbi->s_mount_opt2 &= ~EXT4_MOUNT2_DAX_NEVER;
				2378	break;
				2379	case Opt_dax_never:
				2380	if (is_remount &&
				2381	(!(sbi->s_mount_opt2 & EXT4_MOUNT2_DAX_NEVER) \|\|
				2382	(sbi->s_mount_opt & EXT4_MOUNT_DAX_ALWAYS)))
				2383	goto fail_dax_change_remount;
				2384	sbi->s_mount_opt2 \|= EXT4_MOUNT2_DAX_NEVER;
				2385	sbi->s_mount_opt &= ~EXT4_MOUNT_DAX_ALWAYS;
				2386	break;
				2387	case Opt_dax_inode:
				2388	if (is_remount &&
				2389	((sbi->s_mount_opt & EXT4_MOUNT_DAX_ALWAYS) \|\|
				2390	(sbi->s_mount_opt2 & EXT4_MOUNT2_DAX_NEVER) \|\|
				2391	!(sbi->s_mount_opt2 & EXT4_MOUNT2_DAX_INODE)))
				2392	goto fail_dax_change_remount;
				2393	sbi->s_mount_opt &= ~EXT4_MOUNT_DAX_ALWAYS;
				2394	sbi->s_mount_opt2 &= ~EXT4_MOUNT2_DAX_NEVER;
				2395	/* Strictly for printing options */
				2396	sbi->s_mount_opt2 \|= EXT4_MOUNT2_DAX_INODE;
				2397	break;
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame]	2398	}
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	2399	#else
				2400	ext4_msg(sb, KERN_INFO, "dax option not supported");
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	2401	sbi->s_mount_opt2 \|= EXT4_MOUNT2_DAX_NEVER;
				2402	sbi->s_mount_opt &= ~EXT4_MOUNT_DAX_ALWAYS;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	2403	return -1;
				2404	#endif
				2405	} else if (token == Opt_data_err_abort) {
				2406	sbi->s_mount_opt \|= m->mount_opt;
				2407	} else if (token == Opt_data_err_ignore) {
				2408	sbi->s_mount_opt &= ~m->mount_opt;
				2409	} else {
				2410	if (!args->from)
				2411	arg = 1;
				2412	if (m->flags & MOPT_CLEAR)
				2413	arg = !arg;
				2414	else if (unlikely(!(m->flags & MOPT_SET))) {
				2415	ext4_msg(sb, KERN_WARNING,
				2416	"buggy handling of option %s", opt);
				2417	WARN_ON(1);
				2418	return -1;
				2419	}
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	2420	if (m->flags & MOPT_2) {
				2421	if (arg != 0)
				2422	sbi->s_mount_opt2 \|= m->mount_opt;
				2423	else
				2424	sbi->s_mount_opt2 &= ~m->mount_opt;
				2425	} else {
				2426	if (arg != 0)
				2427	sbi->s_mount_opt \|= m->mount_opt;
				2428	else
				2429	sbi->s_mount_opt &= ~m->mount_opt;
				2430	}
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	2431	}
				2432	return 1;
				2433	}
				2434
				2435	static int parse_options(char options, struct super_block sb,
				2436	unsigned long *journal_devnum,
				2437	unsigned int *journal_ioprio,
				2438	int is_remount)
				2439	{
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	2440	struct ext4_sb_info __maybe_unused *sbi = EXT4_SB(sb);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	2441	char p, __maybe_unused usr_qf_name, __maybe_unused *grp_qf_name;
				2442	substring_t args[MAX_OPT_ARGS];
				2443	int token;
				2444
				2445	if (!options)
				2446	return 1;
				2447
				2448	while ((p = strsep(&options, ",")) != NULL) {
				2449	if (!*p)
				2450	continue;
				2451	/*
				2452	* Initialize args struct so we know whether arg was
				2453	* found; some options take optional arguments.
				2454	*/
				2455	args[0].to = args[0].from = NULL;
				2456	token = match_token(p, tokens, args);
				2457	if (handle_mount_opt(sb, p, token, args, journal_devnum,
				2458	journal_ioprio, is_remount) < 0)
				2459	return 0;
				2460	}
				2461	#ifdef CONFIG_QUOTA
				2462	/*
				2463	* We do the test below only for project quotas. 'usrquota' and
				2464	* 'grpquota' mount options are allowed even without quota feature
				2465	* to support legacy quotas in quota files.
				2466	*/
				2467	if (test_opt(sb, PRJQUOTA) && !ext4_has_feature_project(sb)) {
				2468	ext4_msg(sb, KERN_ERR, "Project quota feature not enabled. "
				2469	"Cannot enable project quota enforcement.");
				2470	return 0;
				2471	}
				2472	usr_qf_name = get_qf_name(sb, sbi, USRQUOTA);
				2473	grp_qf_name = get_qf_name(sb, sbi, GRPQUOTA);
				2474	if (usr_qf_name \|\| grp_qf_name) {
				2475	if (test_opt(sb, USRQUOTA) && usr_qf_name)
				2476	clear_opt(sb, USRQUOTA);
				2477
				2478	if (test_opt(sb, GRPQUOTA) && grp_qf_name)
				2479	clear_opt(sb, GRPQUOTA);
				2480
				2481	if (test_opt(sb, GRPQUOTA) \|\| test_opt(sb, USRQUOTA)) {
				2482	ext4_msg(sb, KERN_ERR, "old and new quota "
				2483	"format mixing");
				2484	return 0;
				2485	}
				2486
				2487	if (!sbi->s_jquota_fmt) {
				2488	ext4_msg(sb, KERN_ERR, "journaled quota format "
				2489	"not specified");
				2490	return 0;
				2491	}
				2492	}
				2493	#endif
				2494	if (test_opt(sb, DIOREAD_NOLOCK)) {
				2495	int blocksize =
				2496	BLOCK_SIZE << le32_to_cpu(sbi->s_es->s_log_block_size);
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	2497	if (blocksize < PAGE_SIZE)
				2498	ext4_msg(sb, KERN_WARNING, "Warning: mounting with an "
				2499	"experimental mount option 'dioread_nolock' "
				2500	"for blocksize < PAGE_SIZE");
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	2501	}
				2502	return 1;
				2503	}
				2504
				2505	static inline void ext4_show_quota_options(struct seq_file *seq,
				2506	struct super_block *sb)
				2507	{
				2508	#if defined(CONFIG_QUOTA)
				2509	struct ext4_sb_info *sbi = EXT4_SB(sb);
				2510	char usr_qf_name, grp_qf_name;
				2511
				2512	if (sbi->s_jquota_fmt) {
				2513	char *fmtname = "";
				2514
				2515	switch (sbi->s_jquota_fmt) {
				2516	case QFMT_VFS_OLD:
				2517	fmtname = "vfsold";
				2518	break;
				2519	case QFMT_VFS_V0:
				2520	fmtname = "vfsv0";
				2521	break;
				2522	case QFMT_VFS_V1:
				2523	fmtname = "vfsv1";
				2524	break;
				2525	}
				2526	seq_printf(seq, ",jqfmt=%s", fmtname);
				2527	}
				2528
				2529	rcu_read_lock();
				2530	usr_qf_name = rcu_dereference(sbi->s_qf_names[USRQUOTA]);
				2531	grp_qf_name = rcu_dereference(sbi->s_qf_names[GRPQUOTA]);
				2532	if (usr_qf_name)
				2533	seq_show_option(seq, "usrjquota", usr_qf_name);
				2534	if (grp_qf_name)
				2535	seq_show_option(seq, "grpjquota", grp_qf_name);
				2536	rcu_read_unlock();
				2537	#endif
				2538	}
				2539
				2540	static const char *token2str(int token)
				2541	{
				2542	const struct match_token *t;
				2543
				2544	for (t = tokens; t->token != Opt_err; t++)
				2545	if (t->token == token && !strchr(t->pattern, '='))
				2546	break;
				2547	return t->pattern;
				2548	}
				2549
				2550	/*
				2551	* Show an option if
				2552	* - it's set to a non-default value OR
				2553	* - if the per-sb default is different from the global default
				2554	*/
				2555	static int _ext4_show_options(struct seq_file seq, struct super_block sb,
				2556	int nodefs)
				2557	{
				2558	struct ext4_sb_info *sbi = EXT4_SB(sb);
				2559	struct ext4_super_block *es = sbi->s_es;
				2560	int def_errors, def_mount_opt = sbi->s_def_mount_opt;
				2561	const struct mount_opts *m;
				2562	char sep = nodefs ? '\n' : ',';
				2563
				2564	#define SEQ_OPTS_PUTS(str) seq_printf(seq, "%c" str, sep)
				2565	#define SEQ_OPTS_PRINT(str, arg) seq_printf(seq, "%c" str, sep, arg)
				2566
				2567	if (sbi->s_sb_block != 1)
				2568	SEQ_OPTS_PRINT("sb=%llu", sbi->s_sb_block);
				2569
				2570	for (m = ext4_mount_opts; m->token != Opt_err; m++) {
				2571	int want_set = m->flags & MOPT_SET;
				2572	if (((m->flags & (MOPT_SET\|MOPT_CLEAR)) == 0) \|\|
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	2573	(m->flags & MOPT_CLEAR_ERR) \|\| m->flags & MOPT_SKIP)
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	2574	continue;
				2575	if (!nodefs && !(m->mount_opt & (sbi->s_mount_opt ^ def_mount_opt)))
				2576	continue; /* skip if same as the default */
				2577	if ((want_set &&
				2578	(sbi->s_mount_opt & m->mount_opt) != m->mount_opt) \|\|
				2579	(!want_set && (sbi->s_mount_opt & m->mount_opt)))
				2580	continue; /* select Opt_noFoo vs Opt_Foo */
				2581	SEQ_OPTS_PRINT("%s", token2str(m->token));
				2582	}
				2583
				2584	if (nodefs \|\| !uid_eq(sbi->s_resuid, make_kuid(&init_user_ns, EXT4_DEF_RESUID)) \|\|
				2585	le16_to_cpu(es->s_def_resuid) != EXT4_DEF_RESUID)
				2586	SEQ_OPTS_PRINT("resuid=%u",
				2587	from_kuid_munged(&init_user_ns, sbi->s_resuid));
				2588	if (nodefs \|\| !gid_eq(sbi->s_resgid, make_kgid(&init_user_ns, EXT4_DEF_RESGID)) \|\|
				2589	le16_to_cpu(es->s_def_resgid) != EXT4_DEF_RESGID)
				2590	SEQ_OPTS_PRINT("resgid=%u",
				2591	from_kgid_munged(&init_user_ns, sbi->s_resgid));
				2592	def_errors = nodefs ? -1 : le16_to_cpu(es->s_errors);
				2593	if (test_opt(sb, ERRORS_RO) && def_errors != EXT4_ERRORS_RO)
				2594	SEQ_OPTS_PUTS("errors=remount-ro");
				2595	if (test_opt(sb, ERRORS_CONT) && def_errors != EXT4_ERRORS_CONTINUE)
				2596	SEQ_OPTS_PUTS("errors=continue");
				2597	if (test_opt(sb, ERRORS_PANIC) && def_errors != EXT4_ERRORS_PANIC)
				2598	SEQ_OPTS_PUTS("errors=panic");
				2599	if (nodefs \|\| sbi->s_commit_interval != JBD2_DEFAULT_MAX_COMMIT_AGE*HZ)
				2600	SEQ_OPTS_PRINT("commit=%lu", sbi->s_commit_interval / HZ);
				2601	if (nodefs \|\| sbi->s_min_batch_time != EXT4_DEF_MIN_BATCH_TIME)
				2602	SEQ_OPTS_PRINT("min_batch_time=%u", sbi->s_min_batch_time);
				2603	if (nodefs \|\| sbi->s_max_batch_time != EXT4_DEF_MAX_BATCH_TIME)
				2604	SEQ_OPTS_PRINT("max_batch_time=%u", sbi->s_max_batch_time);
				2605	if (sb->s_flags & SB_I_VERSION)
				2606	SEQ_OPTS_PUTS("i_version");
				2607	if (nodefs \|\| sbi->s_stripe)
				2608	SEQ_OPTS_PRINT("stripe=%lu", sbi->s_stripe);
				2609	if (nodefs \|\| EXT4_MOUNT_DATA_FLAGS &
				2610	(sbi->s_mount_opt ^ def_mount_opt)) {
				2611	if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)
				2612	SEQ_OPTS_PUTS("data=journal");
				2613	else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA)
				2614	SEQ_OPTS_PUTS("data=ordered");
				2615	else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)
				2616	SEQ_OPTS_PUTS("data=writeback");
				2617	}
				2618	if (nodefs \|\|
				2619	sbi->s_inode_readahead_blks != EXT4_DEF_INODE_READAHEAD_BLKS)
				2620	SEQ_OPTS_PRINT("inode_readahead_blks=%u",
				2621	sbi->s_inode_readahead_blks);
				2622
				2623	if (test_opt(sb, INIT_INODE_TABLE) && (nodefs \|\|
				2624	(sbi->s_li_wait_mult != EXT4_DEF_LI_WAIT_MULT)))
				2625	SEQ_OPTS_PRINT("init_itable=%u", sbi->s_li_wait_mult);
				2626	if (nodefs \|\| sbi->s_max_dir_size_kb)
				2627	SEQ_OPTS_PRINT("max_dir_size_kb=%u", sbi->s_max_dir_size_kb);
				2628	if (test_opt(sb, DATA_ERR_ABORT))
				2629	SEQ_OPTS_PUTS("data_err=abort");
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	2630
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	2631	fscrypt_show_test_dummy_encryption(seq, sep, sb);
				2632
				2633	if (sb->s_flags & SB_INLINECRYPT)
				2634	SEQ_OPTS_PUTS("inlinecrypt");
				2635
				2636	if (test_opt(sb, DAX_ALWAYS)) {
				2637	if (IS_EXT2_SB(sb))
				2638	SEQ_OPTS_PUTS("dax");
				2639	else
				2640	SEQ_OPTS_PUTS("dax=always");
				2641	} else if (test_opt2(sb, DAX_NEVER)) {
				2642	SEQ_OPTS_PUTS("dax=never");
				2643	} else if (test_opt2(sb, DAX_INODE)) {
				2644	SEQ_OPTS_PUTS("dax=inode");
				2645	}
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	2646	ext4_show_quota_options(seq, sb);
				2647	return 0;
				2648	}
				2649
				2650	static int ext4_show_options(struct seq_file seq, struct dentry root)
				2651	{
				2652	return _ext4_show_options(seq, root->d_sb, 0);
				2653	}
				2654
				2655	int ext4_seq_options_show(struct seq_file seq, void offset)
				2656	{
				2657	struct super_block *sb = seq->private;
				2658	int rc;
				2659
				2660	seq_puts(seq, sb_rdonly(sb) ? "ro" : "rw");
				2661	rc = _ext4_show_options(seq, sb, 1);
				2662	seq_puts(seq, "\n");
				2663	return rc;
				2664	}
				2665
				2666	static int ext4_setup_super(struct super_block sb, struct ext4_super_block es,
				2667	int read_only)
				2668	{
				2669	struct ext4_sb_info *sbi = EXT4_SB(sb);
				2670	int err = 0;
				2671
				2672	if (le32_to_cpu(es->s_rev_level) > EXT4_MAX_SUPP_REV) {
				2673	ext4_msg(sb, KERN_ERR, "revision level too high, "
				2674	"forcing read-only mode");
				2675	err = -EROFS;
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame]	2676	goto done;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	2677	}
				2678	if (read_only)
				2679	goto done;
				2680	if (!(sbi->s_mount_state & EXT4_VALID_FS))
				2681	ext4_msg(sb, KERN_WARNING, "warning: mounting unchecked fs, "
				2682	"running e2fsck is recommended");
				2683	else if (sbi->s_mount_state & EXT4_ERROR_FS)
				2684	ext4_msg(sb, KERN_WARNING,
				2685	"warning: mounting fs with errors, "
				2686	"running e2fsck is recommended");
				2687	else if ((__s16) le16_to_cpu(es->s_max_mnt_count) > 0 &&
				2688	le16_to_cpu(es->s_mnt_count) >=
				2689	(unsigned short) (__s16) le16_to_cpu(es->s_max_mnt_count))
				2690	ext4_msg(sb, KERN_WARNING,
				2691	"warning: maximal mount count reached, "
				2692	"running e2fsck is recommended");
				2693	else if (le32_to_cpu(es->s_checkinterval) &&
				2694	(ext4_get_tstamp(es, s_lastcheck) +
				2695	le32_to_cpu(es->s_checkinterval) <= ktime_get_real_seconds()))
				2696	ext4_msg(sb, KERN_WARNING,
				2697	"warning: checktime reached, "
				2698	"running e2fsck is recommended");
				2699	if (!sbi->s_journal)
				2700	es->s_state &= cpu_to_le16(~EXT4_VALID_FS);
				2701	if (!(__s16) le16_to_cpu(es->s_max_mnt_count))
				2702	es->s_max_mnt_count = cpu_to_le16(EXT4_DFL_MAX_MNT_COUNT);
				2703	le16_add_cpu(&es->s_mnt_count, 1);
				2704	ext4_update_tstamp(es, s_mtime);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	2705	if (sbi->s_journal)
				2706	ext4_set_feature_journal_needs_recovery(sb);
				2707
				2708	err = ext4_commit_super(sb, 1);
				2709	done:
				2710	if (test_opt(sb, DEBUG))
				2711	printk(KERN_INFO "[EXT4 FS bs=%lu, gc=%u, "
				2712	"bpg=%lu, ipg=%lu, mo=%04x, mo2=%04x]\n",
				2713	sb->s_blocksize,
				2714	sbi->s_groups_count,
				2715	EXT4_BLOCKS_PER_GROUP(sb),
				2716	EXT4_INODES_PER_GROUP(sb),
				2717	sbi->s_mount_opt, sbi->s_mount_opt2);
				2718
				2719	cleancache_init_fs(sb);
				2720	return err;
				2721	}
				2722
				2723	int ext4_alloc_flex_bg_array(struct super_block *sb, ext4_group_t ngroup)
				2724	{
				2725	struct ext4_sb_info *sbi = EXT4_SB(sb);
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame]	2726	struct flex_groups old_groups, new_groups;
				2727	int size, i, j;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	2728
				2729	if (!sbi->s_log_groups_per_flex)
				2730	return 0;
				2731
				2732	size = ext4_flex_group(sbi, ngroup - 1) + 1;
				2733	if (size <= sbi->s_flex_groups_allocated)
				2734	return 0;
				2735
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame]	2736	new_groups = kvzalloc(roundup_pow_of_two(size *
				2737	sizeof(*sbi->s_flex_groups)), GFP_KERNEL);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	2738	if (!new_groups) {
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame]	2739	ext4_msg(sb, KERN_ERR,
				2740	"not enough memory for %d flex group pointers", size);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	2741	return -ENOMEM;
				2742	}
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame]	2743	for (i = sbi->s_flex_groups_allocated; i < size; i++) {
				2744	new_groups[i] = kvzalloc(roundup_pow_of_two(
				2745	sizeof(struct flex_groups)),
				2746	GFP_KERNEL);
				2747	if (!new_groups[i]) {
				2748	for (j = sbi->s_flex_groups_allocated; j < i; j++)
				2749	kvfree(new_groups[j]);
				2750	kvfree(new_groups);
				2751	ext4_msg(sb, KERN_ERR,
				2752	"not enough memory for %d flex groups", size);
				2753	return -ENOMEM;
				2754	}
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	2755	}
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame]	2756	rcu_read_lock();
				2757	old_groups = rcu_dereference(sbi->s_flex_groups);
				2758	if (old_groups)
				2759	memcpy(new_groups, old_groups,
				2760	(sbi->s_flex_groups_allocated *
				2761	sizeof(struct flex_groups *)));
				2762	rcu_read_unlock();
				2763	rcu_assign_pointer(sbi->s_flex_groups, new_groups);
				2764	sbi->s_flex_groups_allocated = size;
				2765	if (old_groups)
				2766	ext4_kvfree_array_rcu(old_groups);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	2767	return 0;
				2768	}
				2769
				2770	static int ext4_fill_flex_info(struct super_block *sb)
				2771	{
				2772	struct ext4_sb_info *sbi = EXT4_SB(sb);
				2773	struct ext4_group_desc *gdp = NULL;
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame]	2774	struct flex_groups *fg;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	2775	ext4_group_t flex_group;
				2776	int i, err;
				2777
				2778	sbi->s_log_groups_per_flex = sbi->s_es->s_log_groups_per_flex;
				2779	if (sbi->s_log_groups_per_flex < 1 \|\| sbi->s_log_groups_per_flex > 31) {
				2780	sbi->s_log_groups_per_flex = 0;
				2781	return 1;
				2782	}
				2783
				2784	err = ext4_alloc_flex_bg_array(sb, sbi->s_groups_count);
				2785	if (err)
				2786	goto failed;
				2787
				2788	for (i = 0; i < sbi->s_groups_count; i++) {
				2789	gdp = ext4_get_group_desc(sb, i, NULL);
				2790
				2791	flex_group = ext4_flex_group(sbi, i);
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame]	2792	fg = sbi_array_rcu_deref(sbi, s_flex_groups, flex_group);
				2793	atomic_add(ext4_free_inodes_count(sb, gdp), &fg->free_inodes);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	2794	atomic64_add(ext4_free_group_clusters(sb, gdp),
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame]	2795	&fg->free_clusters);
				2796	atomic_add(ext4_used_dirs_count(sb, gdp), &fg->used_dirs);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	2797	}
				2798
				2799	return 1;
				2800	failed:
				2801	return 0;
				2802	}
				2803
				2804	static __le16 ext4_group_desc_csum(struct super_block *sb, __u32 block_group,
				2805	struct ext4_group_desc *gdp)
				2806	{
				2807	int offset = offsetof(struct ext4_group_desc, bg_checksum);
				2808	__u16 crc = 0;
				2809	__le32 le_group = cpu_to_le32(block_group);
				2810	struct ext4_sb_info *sbi = EXT4_SB(sb);
				2811
				2812	if (ext4_has_metadata_csum(sbi->s_sb)) {
				2813	/* Use new metadata_csum algorithm */
				2814	__u32 csum32;
				2815	__u16 dummy_csum = 0;
				2816
				2817	csum32 = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)&le_group,
				2818	sizeof(le_group));
				2819	csum32 = ext4_chksum(sbi, csum32, (__u8 *)gdp, offset);
				2820	csum32 = ext4_chksum(sbi, csum32, (__u8 *)&dummy_csum,
				2821	sizeof(dummy_csum));
				2822	offset += sizeof(dummy_csum);
				2823	if (offset < sbi->s_desc_size)
				2824	csum32 = ext4_chksum(sbi, csum32, (__u8 *)gdp + offset,
				2825	sbi->s_desc_size - offset);
				2826
				2827	crc = csum32 & 0xFFFF;
				2828	goto out;
				2829	}
				2830
				2831	/* old crc16 code */
				2832	if (!ext4_has_feature_gdt_csum(sb))
				2833	return 0;
				2834
				2835	crc = crc16(~0, sbi->s_es->s_uuid, sizeof(sbi->s_es->s_uuid));
				2836	crc = crc16(crc, (__u8 *)&le_group, sizeof(le_group));
				2837	crc = crc16(crc, (__u8 *)gdp, offset);
				2838	offset += sizeof(gdp->bg_checksum); /* skip checksum */
				2839	/* for checksum of struct ext4_group_desc do the rest...*/
				2840	if (ext4_has_feature_64bit(sb) &&
				2841	offset < le16_to_cpu(sbi->s_es->s_desc_size))
				2842	crc = crc16(crc, (__u8 *)gdp + offset,
				2843	le16_to_cpu(sbi->s_es->s_desc_size) -
				2844	offset);
				2845
				2846	out:
				2847	return cpu_to_le16(crc);
				2848	}
				2849
				2850	int ext4_group_desc_csum_verify(struct super_block *sb, __u32 block_group,
				2851	struct ext4_group_desc *gdp)
				2852	{
				2853	if (ext4_has_group_desc_csum(sb) &&
				2854	(gdp->bg_checksum != ext4_group_desc_csum(sb, block_group, gdp)))
				2855	return 0;
				2856
				2857	return 1;
				2858	}
				2859
				2860	void ext4_group_desc_csum_set(struct super_block *sb, __u32 block_group,
				2861	struct ext4_group_desc *gdp)
				2862	{
				2863	if (!ext4_has_group_desc_csum(sb))
				2864	return;
				2865	gdp->bg_checksum = ext4_group_desc_csum(sb, block_group, gdp);
				2866	}
				2867
				2868	/* Called at mount-time, super-block is locked */
				2869	static int ext4_check_descriptors(struct super_block *sb,
				2870	ext4_fsblk_t sb_block,
				2871	ext4_group_t *first_not_zeroed)
				2872	{
				2873	struct ext4_sb_info *sbi = EXT4_SB(sb);
				2874	ext4_fsblk_t first_block = le32_to_cpu(sbi->s_es->s_first_data_block);
				2875	ext4_fsblk_t last_block;
				2876	ext4_fsblk_t last_bg_block = sb_block + ext4_bg_num_gdb(sb, 0);
				2877	ext4_fsblk_t block_bitmap;
				2878	ext4_fsblk_t inode_bitmap;
				2879	ext4_fsblk_t inode_table;
				2880	int flexbg_flag = 0;
				2881	ext4_group_t i, grp = sbi->s_groups_count;
				2882
				2883	if (ext4_has_feature_flex_bg(sb))
				2884	flexbg_flag = 1;
				2885
				2886	ext4_debug("Checking group descriptors");
				2887
				2888	for (i = 0; i < sbi->s_groups_count; i++) {
				2889	struct ext4_group_desc *gdp = ext4_get_group_desc(sb, i, NULL);
				2890
				2891	if (i == sbi->s_groups_count - 1 \|\| flexbg_flag)
				2892	last_block = ext4_blocks_count(sbi->s_es) - 1;
				2893	else
				2894	last_block = first_block +
				2895	(EXT4_BLOCKS_PER_GROUP(sb) - 1);
				2896
				2897	if ((grp == sbi->s_groups_count) &&
				2898	!(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED)))
				2899	grp = i;
				2900
				2901	block_bitmap = ext4_block_bitmap(sb, gdp);
				2902	if (block_bitmap == sb_block) {
				2903	ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
				2904	"Block bitmap for group %u overlaps "
				2905	"superblock", i);
				2906	if (!sb_rdonly(sb))
				2907	return 0;
				2908	}
				2909	if (block_bitmap >= sb_block + 1 &&
				2910	block_bitmap <= last_bg_block) {
				2911	ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
				2912	"Block bitmap for group %u overlaps "
				2913	"block group descriptors", i);
				2914	if (!sb_rdonly(sb))
				2915	return 0;
				2916	}
				2917	if (block_bitmap < first_block \|\| block_bitmap > last_block) {
				2918	ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
				2919	"Block bitmap for group %u not in group "
				2920	"(block %llu)!", i, block_bitmap);
				2921	return 0;
				2922	}
				2923	inode_bitmap = ext4_inode_bitmap(sb, gdp);
				2924	if (inode_bitmap == sb_block) {
				2925	ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
				2926	"Inode bitmap for group %u overlaps "
				2927	"superblock", i);
				2928	if (!sb_rdonly(sb))
				2929	return 0;
				2930	}
				2931	if (inode_bitmap >= sb_block + 1 &&
				2932	inode_bitmap <= last_bg_block) {
				2933	ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
				2934	"Inode bitmap for group %u overlaps "
				2935	"block group descriptors", i);
				2936	if (!sb_rdonly(sb))
				2937	return 0;
				2938	}
				2939	if (inode_bitmap < first_block \|\| inode_bitmap > last_block) {
				2940	ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
				2941	"Inode bitmap for group %u not in group "
				2942	"(block %llu)!", i, inode_bitmap);
				2943	return 0;
				2944	}
				2945	inode_table = ext4_inode_table(sb, gdp);
				2946	if (inode_table == sb_block) {
				2947	ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
				2948	"Inode table for group %u overlaps "
				2949	"superblock", i);
				2950	if (!sb_rdonly(sb))
				2951	return 0;
				2952	}
				2953	if (inode_table >= sb_block + 1 &&
				2954	inode_table <= last_bg_block) {
				2955	ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
				2956	"Inode table for group %u overlaps "
				2957	"block group descriptors", i);
				2958	if (!sb_rdonly(sb))
				2959	return 0;
				2960	}
				2961	if (inode_table < first_block \|\|
				2962	inode_table + sbi->s_itb_per_group - 1 > last_block) {
				2963	ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
				2964	"Inode table for group %u not in group "
				2965	"(block %llu)!", i, inode_table);
				2966	return 0;
				2967	}
				2968	ext4_lock_group(sb, i);
				2969	if (!ext4_group_desc_csum_verify(sb, i, gdp)) {
				2970	ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
				2971	"Checksum for group %u failed (%u!=%u)",
				2972	i, le16_to_cpu(ext4_group_desc_csum(sb, i,
				2973	gdp)), le16_to_cpu(gdp->bg_checksum));
				2974	if (!sb_rdonly(sb)) {
				2975	ext4_unlock_group(sb, i);
				2976	return 0;
				2977	}
				2978	}
				2979	ext4_unlock_group(sb, i);
				2980	if (!flexbg_flag)
				2981	first_block += EXT4_BLOCKS_PER_GROUP(sb);
				2982	}
				2983	if (NULL != first_not_zeroed)
				2984	*first_not_zeroed = grp;
				2985	return 1;
				2986	}
				2987
				2988	/* ext4_orphan_cleanup() walks a singly-linked list of inodes (starting at
				2989	* the superblock) which were deleted from all directories, but held open by
				2990	* a process at the time of a crash. We walk the list and try to delete these
				2991	* inodes at recovery time (only with a read-write filesystem).
				2992	*
				2993	* In order to keep the orphan inode chain consistent during traversal (in
				2994	* case of crash during recovery), we link each inode into the superblock
				2995	* orphan list_head and handle it the same way as an inode deletion during
				2996	* normal operation (which journals the operations for us).
				2997	*
				2998	* We only do an iget() and an iput() on each inode, which is very safe if we
				2999	* accidentally point at an in-use or already deleted inode. The worst that
				3000	* can happen in this case is that we get a "bit already cleared" message from
				3001	* ext4_free_inode(). The only reason we would point at a wrong inode is if
				3002	* e2fsck was run on this filesystem, and it must have already done the orphan
				3003	* inode cleanup for us, so we can safely abort without any further action.
				3004	*/
				3005	static void ext4_orphan_cleanup(struct super_block *sb,
				3006	struct ext4_super_block *es)
				3007	{
				3008	unsigned int s_flags = sb->s_flags;
				3009	int ret, nr_orphans = 0, nr_truncates = 0;
				3010	#ifdef CONFIG_QUOTA
				3011	int quota_update = 0;
				3012	int i;
				3013	#endif
				3014	if (!es->s_last_orphan) {
				3015	jbd_debug(4, "no orphan inodes to clean up\n");
				3016	return;
				3017	}
				3018
				3019	if (bdev_read_only(sb->s_bdev)) {
				3020	ext4_msg(sb, KERN_ERR, "write access "
				3021	"unavailable, skipping orphan cleanup");
				3022	return;
				3023	}
				3024
				3025	/* Check if feature set would not allow a r/w mount */
				3026	if (!ext4_feature_set_ok(sb, 0)) {
				3027	ext4_msg(sb, KERN_INFO, "Skipping orphan cleanup due to "
				3028	"unknown ROCOMPAT features");
				3029	return;
				3030	}
				3031
				3032	if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) {
				3033	/* don't clear list on RO mount w/ errors */
				3034	if (es->s_last_orphan && !(s_flags & SB_RDONLY)) {
				3035	ext4_msg(sb, KERN_INFO, "Errors on filesystem, "
				3036	"clearing orphan list.\n");
				3037	es->s_last_orphan = 0;
				3038	}
				3039	jbd_debug(1, "Skipping orphan recovery on fs with errors.\n");
				3040	return;
				3041	}
				3042
				3043	if (s_flags & SB_RDONLY) {
				3044	ext4_msg(sb, KERN_INFO, "orphan cleanup on readonly fs");
				3045	sb->s_flags &= ~SB_RDONLY;
				3046	}
				3047	#ifdef CONFIG_QUOTA
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	3048	/*
				3049	* Turn on quotas which were not enabled for read-only mounts if
				3050	* filesystem has quota feature, so that they are updated correctly.
				3051	*/
				3052	if (ext4_has_feature_quota(sb) && (s_flags & SB_RDONLY)) {
				3053	int ret = ext4_enable_quotas(sb);
				3054
				3055	if (!ret)
				3056	quota_update = 1;
				3057	else
				3058	ext4_msg(sb, KERN_ERR,
				3059	"Cannot turn on quotas: error %d", ret);
				3060	}
				3061
				3062	/* Turn on journaled quotas used for old sytle */
				3063	for (i = 0; i < EXT4_MAXQUOTAS; i++) {
				3064	if (EXT4_SB(sb)->s_qf_names[i]) {
				3065	int ret = ext4_quota_on_mount(sb, i);
				3066
				3067	if (!ret)
				3068	quota_update = 1;
				3069	else
				3070	ext4_msg(sb, KERN_ERR,
				3071	"Cannot turn on journaled "
				3072	"quota: type %d: error %d", i, ret);
				3073	}
				3074	}
				3075	#endif
				3076
				3077	while (es->s_last_orphan) {
				3078	struct inode *inode;
				3079
				3080	/*
				3081	* We may have encountered an error during cleanup; if
				3082	* so, skip the rest.
				3083	*/
				3084	if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) {
				3085	jbd_debug(1, "Skipping orphan recovery on fs with errors.\n");
				3086	es->s_last_orphan = 0;
				3087	break;
				3088	}
				3089
				3090	inode = ext4_orphan_get(sb, le32_to_cpu(es->s_last_orphan));
				3091	if (IS_ERR(inode)) {
				3092	es->s_last_orphan = 0;
				3093	break;
				3094	}
				3095
				3096	list_add(&EXT4_I(inode)->i_orphan, &EXT4_SB(sb)->s_orphan);
				3097	dquot_initialize(inode);
				3098	if (inode->i_nlink) {
				3099	if (test_opt(sb, DEBUG))
				3100	ext4_msg(sb, KERN_DEBUG,
				3101	"%s: truncating inode %lu to %lld bytes",
				3102	__func__, inode->i_ino, inode->i_size);
				3103	jbd_debug(2, "truncating inode %lu to %lld bytes\n",
				3104	inode->i_ino, inode->i_size);
				3105	inode_lock(inode);
				3106	truncate_inode_pages(inode->i_mapping, inode->i_size);
				3107	ret = ext4_truncate(inode);
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame]	3108	if (ret) {
				3109	/*
				3110	* We need to clean up the in-core orphan list
				3111	* manually if ext4_truncate() failed to get a
				3112	* transaction handle.
				3113	*/
				3114	ext4_orphan_del(NULL, inode);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	3115	ext4_std_error(inode->i_sb, ret);
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame]	3116	}
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	3117	inode_unlock(inode);
				3118	nr_truncates++;
				3119	} else {
				3120	if (test_opt(sb, DEBUG))
				3121	ext4_msg(sb, KERN_DEBUG,
				3122	"%s: deleting unreferenced inode %lu",
				3123	__func__, inode->i_ino);
				3124	jbd_debug(2, "deleting unreferenced inode %lu\n",
				3125	inode->i_ino);
				3126	nr_orphans++;
				3127	}
				3128	iput(inode); /* The delete magic happens here! */
				3129	}
				3130
				3131	#define PLURAL(x) (x), ((x) == 1) ? "" : "s"
				3132
				3133	if (nr_orphans)
				3134	ext4_msg(sb, KERN_INFO, "%d orphan inode%s deleted",
				3135	PLURAL(nr_orphans));
				3136	if (nr_truncates)
				3137	ext4_msg(sb, KERN_INFO, "%d truncate%s cleaned up",
				3138	PLURAL(nr_truncates));
				3139	#ifdef CONFIG_QUOTA
				3140	/* Turn off quotas if they were enabled for orphan cleanup */
				3141	if (quota_update) {
				3142	for (i = 0; i < EXT4_MAXQUOTAS; i++) {
				3143	if (sb_dqopt(sb)->files[i])
				3144	dquot_quota_off(sb, i);
				3145	}
				3146	}
				3147	#endif
				3148	sb->s_flags = s_flags; /* Restore SB_RDONLY status */
				3149	}
				3150
				3151	/*
				3152	* Maximal extent format file size.
				3153	* Resulting logical blkno at s_maxbytes must fit in our on-disk
				3154	* extent format containers, within a sector_t, and within i_blocks
				3155	* in the vfs. ext4 inode has 48 bits of i_block in fsblock units,
				3156	* so that won't be a limiting factor.
				3157	*
				3158	* However there is other limiting factor. We do store extents in the form
				3159	* of starting block and length, hence the resulting length of the extent
				3160	* covering maximum file size must fit into on-disk format containers as
				3161	* well. Given that length is always by 1 unit bigger than max unit (because
				3162	* we count 0 as well) we have to lower the s_maxbytes by one fs block.
				3163	*
				3164	* Note, this does not consider any metadata overhead for vfs i_blocks.
				3165	*/
				3166	static loff_t ext4_max_size(int blkbits, int has_huge_files)
				3167	{
				3168	loff_t res;
				3169	loff_t upper_limit = MAX_LFS_FILESIZE;
				3170
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	3171	BUILD_BUG_ON(sizeof(blkcnt_t) < sizeof(u64));
				3172
				3173	if (!has_huge_files) {
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	3174	upper_limit = (1LL << 32) - 1;
				3175
				3176	/* total blocks in file system block size */
				3177	upper_limit >>= (blkbits - 9);
				3178	upper_limit <<= blkbits;
				3179	}
				3180
				3181	/*
				3182	* 32-bit extent-start container, ee_block. We lower the maxbytes
				3183	* by one fs block, so ee_len can cover the extent of maximum file
				3184	* size
				3185	*/
				3186	res = (1LL << 32) - 1;
				3187	res <<= blkbits;
				3188
				3189	/* Sanity check against vm- & vfs- imposed limits */
				3190	if (res > upper_limit)
				3191	res = upper_limit;
				3192
				3193	return res;
				3194	}
				3195
				3196	/*
				3197	* Maximal bitmap file size. There is a direct, and {,double-,triple-}indirect
				3198	* block limit, and also a limit of (2^48 - 1) 512-byte sectors in i_blocks.
				3199	* We need to be 1 filesystem block less than the 2^48 sector limit.
				3200	*/
				3201	static loff_t ext4_max_bitmap_size(int bits, int has_huge_files)
				3202	{
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	3203	unsigned long long upper_limit, res = EXT4_NDIR_BLOCKS;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	3204	int meta_blocks;
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	3205
				3206	/*
				3207	* This is calculated to be the largest file size for a dense, block
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	3208	* mapped file such that the file's total number of 512-byte sectors,
				3209	* including data and all indirect blocks, does not exceed (2^48 - 1).
				3210	*
				3211	* __u32 i_blocks_lo and _u16 i_blocks_high represent the total
				3212	* number of 512-byte sectors of the file.
				3213	*/
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	3214	if (!has_huge_files) {
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	3215	/*
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	3216	* !has_huge_files or implies that the inode i_block field
				3217	* represents total file blocks in 2^32 512-byte sectors ==
				3218	* size of vfs inode i_blocks * 8
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	3219	*/
				3220	upper_limit = (1LL << 32) - 1;
				3221
				3222	/* total blocks in file system block size */
				3223	upper_limit >>= (bits - 9);
				3224
				3225	} else {
				3226	/*
				3227	* We use 48 bit ext4_inode i_blocks
				3228	* With EXT4_HUGE_FILE_FL set the i_blocks
				3229	* represent total number of blocks in
				3230	* file system block size
				3231	*/
				3232	upper_limit = (1LL << 48) - 1;
				3233
				3234	}
				3235
				3236	/* indirect blocks */
				3237	meta_blocks = 1;
				3238	/* double indirect blocks */
				3239	meta_blocks += 1 + (1LL << (bits-2));
				3240	/* tripple indirect blocks */
				3241	meta_blocks += 1 + (1LL << (bits-2)) + (1LL << (2*(bits-2)));
				3242
				3243	upper_limit -= meta_blocks;
				3244	upper_limit <<= bits;
				3245
				3246	res += 1LL << (bits-2);
				3247	res += 1LL << (2*(bits-2));
				3248	res += 1LL << (3*(bits-2));
				3249	res <<= bits;
				3250	if (res > upper_limit)
				3251	res = upper_limit;
				3252
				3253	if (res > MAX_LFS_FILESIZE)
				3254	res = MAX_LFS_FILESIZE;
				3255
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	3256	return (loff_t)res;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	3257	}
				3258
				3259	static ext4_fsblk_t descriptor_loc(struct super_block *sb,
				3260	ext4_fsblk_t logical_sb_block, int nr)
				3261	{
				3262	struct ext4_sb_info *sbi = EXT4_SB(sb);
				3263	ext4_group_t bg, first_meta_bg;
				3264	int has_super = 0;
				3265
				3266	first_meta_bg = le32_to_cpu(sbi->s_es->s_first_meta_bg);
				3267
				3268	if (!ext4_has_feature_meta_bg(sb) \|\| nr < first_meta_bg)
				3269	return logical_sb_block + nr + 1;
				3270	bg = sbi->s_desc_per_block * nr;
				3271	if (ext4_bg_has_super(sb, bg))
				3272	has_super = 1;
				3273
				3274	/*
				3275	* If we have a meta_bg fs with 1k blocks, group 0's GDT is at
				3276	* block 2, not 1. If s_first_data_block == 0 (bigalloc is enabled
				3277	* on modern mke2fs or blksize > 1k on older mke2fs) then we must
				3278	* compensate.
				3279	*/
				3280	if (sb->s_blocksize == 1024 && nr == 0 &&
				3281	le32_to_cpu(sbi->s_es->s_first_data_block) == 0)
				3282	has_super++;
				3283
				3284	return (has_super + ext4_group_first_block_no(sb, bg));
				3285	}
				3286
				3287	/**
				3288	* ext4_get_stripe_size: Get the stripe size.
				3289	* @sbi: In memory super block info
				3290	*
				3291	* If we have specified it via mount option, then
				3292	* use the mount option value. If the value specified at mount time is
				3293	* greater than the blocks per group use the super block value.
				3294	* If the super block value is greater than blocks per group return 0.
				3295	* Allocator needs it be less than blocks per group.
				3296	*
				3297	*/
				3298	static unsigned long ext4_get_stripe_size(struct ext4_sb_info *sbi)
				3299	{
				3300	unsigned long stride = le16_to_cpu(sbi->s_es->s_raid_stride);
				3301	unsigned long stripe_width =
				3302	le32_to_cpu(sbi->s_es->s_raid_stripe_width);
				3303	int ret;
				3304
				3305	if (sbi->s_stripe && sbi->s_stripe <= sbi->s_blocks_per_group)
				3306	ret = sbi->s_stripe;
				3307	else if (stripe_width && stripe_width <= sbi->s_blocks_per_group)
				3308	ret = stripe_width;
				3309	else if (stride && stride <= sbi->s_blocks_per_group)
				3310	ret = stride;
				3311	else
				3312	ret = 0;
				3313
				3314	/*
				3315	* If the stripe width is 1, this makes no sense and
				3316	* we set it to 0 to turn off stripe handling code.
				3317	*/
				3318	if (ret <= 1)
				3319	ret = 0;
				3320
				3321	return ret;
				3322	}
				3323
				3324	/*
				3325	* Check whether this filesystem can be mounted based on
				3326	* the features present and the RDONLY/RDWR mount requested.
				3327	* Returns 1 if this filesystem can be mounted as requested,
				3328	* 0 if it cannot be.
				3329	*/
				3330	static int ext4_feature_set_ok(struct super_block *sb, int readonly)
				3331	{
				3332	if (ext4_has_unknown_ext4_incompat_features(sb)) {
				3333	ext4_msg(sb, KERN_ERR,
				3334	"Couldn't mount because of "
				3335	"unsupported optional features (%x)",
				3336	(le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_incompat) &
				3337	~EXT4_FEATURE_INCOMPAT_SUPP));
				3338	return 0;
				3339	}
				3340
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	3341	#ifndef CONFIG_UNICODE
				3342	if (ext4_has_feature_casefold(sb)) {
				3343	ext4_msg(sb, KERN_ERR,
				3344	"Filesystem with casefold feature cannot be "
				3345	"mounted without CONFIG_UNICODE");
				3346	return 0;
				3347	}
				3348	#endif
				3349
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	3350	if (readonly)
				3351	return 1;
				3352
				3353	if (ext4_has_feature_readonly(sb)) {
				3354	ext4_msg(sb, KERN_INFO, "filesystem is read-only");
				3355	sb->s_flags \|= SB_RDONLY;
				3356	return 1;
				3357	}
				3358
				3359	/* Check that feature set is OK for a read-write mount */
				3360	if (ext4_has_unknown_ext4_ro_compat_features(sb)) {
				3361	ext4_msg(sb, KERN_ERR, "couldn't mount RDWR because of "
				3362	"unsupported optional features (%x)",
				3363	(le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_ro_compat) &
				3364	~EXT4_FEATURE_RO_COMPAT_SUPP));
				3365	return 0;
				3366	}
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	3367	if (ext4_has_feature_bigalloc(sb) && !ext4_has_feature_extents(sb)) {
				3368	ext4_msg(sb, KERN_ERR,
				3369	"Can't support bigalloc feature without "
				3370	"extents feature\n");
				3371	return 0;
				3372	}
				3373
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame]	3374	#if !IS_ENABLED(CONFIG_QUOTA) \|\| !IS_ENABLED(CONFIG_QFMT_V2)
				3375	if (!readonly && (ext4_has_feature_quota(sb) \|\|
				3376	ext4_has_feature_project(sb))) {
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	3377	ext4_msg(sb, KERN_ERR,
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame]	3378	"The kernel was not built with CONFIG_QUOTA and CONFIG_QFMT_V2");
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	3379	return 0;
				3380	}
				3381	#endif /* CONFIG_QUOTA */
				3382	return 1;
				3383	}
				3384
				3385	/*
				3386	* This function is called once a day if we have errors logged
				3387	* on the file system
				3388	*/
				3389	static void print_daily_error_info(struct timer_list *t)
				3390	{
				3391	struct ext4_sb_info *sbi = from_timer(sbi, t, s_err_report);
				3392	struct super_block *sb = sbi->s_sb;
				3393	struct ext4_super_block *es = sbi->s_es;
				3394
				3395	if (es->s_error_count)
				3396	/* fsck newer than v1.41.13 is needed to clean this condition. */
				3397	ext4_msg(sb, KERN_NOTICE, "error count since last fsck: %u",
				3398	le32_to_cpu(es->s_error_count));
				3399	if (es->s_first_error_time) {
				3400	printk(KERN_NOTICE "EXT4-fs (%s): initial error at time %llu: %.*s:%d",
				3401	sb->s_id,
				3402	ext4_get_tstamp(es, s_first_error_time),
				3403	(int) sizeof(es->s_first_error_func),
				3404	es->s_first_error_func,
				3405	le32_to_cpu(es->s_first_error_line));
				3406	if (es->s_first_error_ino)
				3407	printk(KERN_CONT ": inode %u",
				3408	le32_to_cpu(es->s_first_error_ino));
				3409	if (es->s_first_error_block)
				3410	printk(KERN_CONT ": block %llu", (unsigned long long)
				3411	le64_to_cpu(es->s_first_error_block));
				3412	printk(KERN_CONT "\n");
				3413	}
				3414	if (es->s_last_error_time) {
				3415	printk(KERN_NOTICE "EXT4-fs (%s): last error at time %llu: %.*s:%d",
				3416	sb->s_id,
				3417	ext4_get_tstamp(es, s_last_error_time),
				3418	(int) sizeof(es->s_last_error_func),
				3419	es->s_last_error_func,
				3420	le32_to_cpu(es->s_last_error_line));
				3421	if (es->s_last_error_ino)
				3422	printk(KERN_CONT ": inode %u",
				3423	le32_to_cpu(es->s_last_error_ino));
				3424	if (es->s_last_error_block)
				3425	printk(KERN_CONT ": block %llu", (unsigned long long)
				3426	le64_to_cpu(es->s_last_error_block));
				3427	printk(KERN_CONT "\n");
				3428	}
				3429	mod_timer(&sbi->s_err_report, jiffies + 246060HZ); / Once a day */
				3430	}
				3431
				3432	/* Find next suitable group and run ext4_init_inode_table */
				3433	static int ext4_run_li_request(struct ext4_li_request *elr)
				3434	{
				3435	struct ext4_group_desc *gdp = NULL;
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	3436	struct super_block *sb = elr->lr_super;
				3437	ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count;
				3438	ext4_group_t group = elr->lr_next_group;
				3439	unsigned int prefetch_ios = 0;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	3440	int ret = 0;
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	3441	u64 start_time;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	3442
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	3443	if (elr->lr_mode == EXT4_LI_MODE_PREFETCH_BBITMAP) {
				3444	elr->lr_next_group = ext4_mb_prefetch(sb, group,
				3445	EXT4_SB(sb)->s_mb_prefetch, &prefetch_ios);
				3446	if (prefetch_ios)
				3447	ext4_mb_prefetch_fini(sb, elr->lr_next_group,
				3448	prefetch_ios);
				3449	trace_ext4_prefetch_bitmaps(sb, group, elr->lr_next_group,
				3450	prefetch_ios);
				3451	if (group >= elr->lr_next_group) {
				3452	ret = 1;
				3453	if (elr->lr_first_not_zeroed != ngroups &&
				3454	!sb_rdonly(sb) && test_opt(sb, INIT_INODE_TABLE)) {
				3455	elr->lr_next_group = elr->lr_first_not_zeroed;
				3456	elr->lr_mode = EXT4_LI_MODE_ITABLE;
				3457	ret = 0;
				3458	}
				3459	}
				3460	return ret;
				3461	}
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	3462
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	3463	for (; group < ngroups; group++) {
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	3464	gdp = ext4_get_group_desc(sb, group, NULL);
				3465	if (!gdp) {
				3466	ret = 1;
				3467	break;
				3468	}
				3469
				3470	if (!(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED)))
				3471	break;
				3472	}
				3473
				3474	if (group >= ngroups)
				3475	ret = 1;
				3476
				3477	if (!ret) {
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	3478	start_time = ktime_get_real_ns();
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	3479	ret = ext4_init_inode_table(sb, group,
				3480	elr->lr_timeout ? 0 : 1);
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	3481	trace_ext4_lazy_itable_init(sb, group);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	3482	if (elr->lr_timeout == 0) {
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	3483	elr->lr_timeout = nsecs_to_jiffies((ktime_get_real_ns() - start_time) *
				3484	EXT4_SB(elr->lr_super)->s_li_wait_mult);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	3485	}
				3486	elr->lr_next_sched = jiffies + elr->lr_timeout;
				3487	elr->lr_next_group = group + 1;
				3488	}
				3489	return ret;
				3490	}
				3491
				3492	/*
				3493	* Remove lr_request from the list_request and free the
				3494	* request structure. Should be called with li_list_mtx held
				3495	*/
				3496	static void ext4_remove_li_request(struct ext4_li_request *elr)
				3497	{
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	3498	if (!elr)
				3499	return;
				3500
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	3501	list_del(&elr->lr_request);
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	3502	EXT4_SB(elr->lr_super)->s_li_request = NULL;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	3503	kfree(elr);
				3504	}
				3505
				3506	static void ext4_unregister_li_request(struct super_block *sb)
				3507	{
				3508	mutex_lock(&ext4_li_mtx);
				3509	if (!ext4_li_info) {
				3510	mutex_unlock(&ext4_li_mtx);
				3511	return;
				3512	}
				3513
				3514	mutex_lock(&ext4_li_info->li_list_mtx);
				3515	ext4_remove_li_request(EXT4_SB(sb)->s_li_request);
				3516	mutex_unlock(&ext4_li_info->li_list_mtx);
				3517	mutex_unlock(&ext4_li_mtx);
				3518	}
				3519
				3520	static struct task_struct *ext4_lazyinit_task;
				3521
				3522	/*
				3523	* This is the function where ext4lazyinit thread lives. It walks
				3524	* through the request list searching for next scheduled filesystem.
				3525	* When such a fs is found, run the lazy initialization request
				3526	* (ext4_rn_li_request) and keep track of the time spend in this
				3527	* function. Based on that time we compute next schedule time of
				3528	* the request. When walking through the list is complete, compute
				3529	* next waking time and put itself into sleep.
				3530	*/
				3531	static int ext4_lazyinit_thread(void *arg)
				3532	{
				3533	struct ext4_lazy_init eli = (struct ext4_lazy_init )arg;
				3534	struct list_head pos, n;
				3535	struct ext4_li_request *elr;
				3536	unsigned long next_wakeup, cur;
				3537
				3538	BUG_ON(NULL == eli);
				3539
				3540	cont_thread:
				3541	while (true) {
				3542	next_wakeup = MAX_JIFFY_OFFSET;
				3543
				3544	mutex_lock(&eli->li_list_mtx);
				3545	if (list_empty(&eli->li_request_list)) {
				3546	mutex_unlock(&eli->li_list_mtx);
				3547	goto exit_thread;
				3548	}
				3549	list_for_each_safe(pos, n, &eli->li_request_list) {
				3550	int err = 0;
				3551	int progress = 0;
				3552	elr = list_entry(pos, struct ext4_li_request,
				3553	lr_request);
				3554
				3555	if (time_before(jiffies, elr->lr_next_sched)) {
				3556	if (time_before(elr->lr_next_sched, next_wakeup))
				3557	next_wakeup = elr->lr_next_sched;
				3558	continue;
				3559	}
				3560	if (down_read_trylock(&elr->lr_super->s_umount)) {
				3561	if (sb_start_write_trylock(elr->lr_super)) {
				3562	progress = 1;
				3563	/*
				3564	* We hold sb->s_umount, sb can not
				3565	* be removed from the list, it is
				3566	* now safe to drop li_list_mtx
				3567	*/
				3568	mutex_unlock(&eli->li_list_mtx);
				3569	err = ext4_run_li_request(elr);
				3570	sb_end_write(elr->lr_super);
				3571	mutex_lock(&eli->li_list_mtx);
				3572	n = pos->next;
				3573	}
				3574	up_read((&elr->lr_super->s_umount));
				3575	}
				3576	/* error, remove the lazy_init job */
				3577	if (err) {
				3578	ext4_remove_li_request(elr);
				3579	continue;
				3580	}
				3581	if (!progress) {
				3582	elr->lr_next_sched = jiffies +
				3583	(prandom_u32()
				3584	% (EXT4_DEF_LI_MAX_START_DELAY * HZ));
				3585	}
				3586	if (time_before(elr->lr_next_sched, next_wakeup))
				3587	next_wakeup = elr->lr_next_sched;
				3588	}
				3589	mutex_unlock(&eli->li_list_mtx);
				3590
				3591	try_to_freeze();
				3592
				3593	cur = jiffies;
				3594	if ((time_after_eq(cur, next_wakeup)) \|\|
				3595	(MAX_JIFFY_OFFSET == next_wakeup)) {
				3596	cond_resched();
				3597	continue;
				3598	}
				3599
				3600	schedule_timeout_interruptible(next_wakeup - cur);
				3601
				3602	if (kthread_should_stop()) {
				3603	ext4_clear_request_list();
				3604	goto exit_thread;
				3605	}
				3606	}
				3607
				3608	exit_thread:
				3609	/*
				3610	* It looks like the request list is empty, but we need
				3611	* to check it under the li_list_mtx lock, to prevent any
				3612	* additions into it, and of course we should lock ext4_li_mtx
				3613	* to atomically free the list and ext4_li_info, because at
				3614	* this point another ext4 filesystem could be registering
				3615	* new one.
				3616	*/
				3617	mutex_lock(&ext4_li_mtx);
				3618	mutex_lock(&eli->li_list_mtx);
				3619	if (!list_empty(&eli->li_request_list)) {
				3620	mutex_unlock(&eli->li_list_mtx);
				3621	mutex_unlock(&ext4_li_mtx);
				3622	goto cont_thread;
				3623	}
				3624	mutex_unlock(&eli->li_list_mtx);
				3625	kfree(ext4_li_info);
				3626	ext4_li_info = NULL;
				3627	mutex_unlock(&ext4_li_mtx);
				3628
				3629	return 0;
				3630	}
				3631
				3632	static void ext4_clear_request_list(void)
				3633	{
				3634	struct list_head pos, n;
				3635	struct ext4_li_request *elr;
				3636
				3637	mutex_lock(&ext4_li_info->li_list_mtx);
				3638	list_for_each_safe(pos, n, &ext4_li_info->li_request_list) {
				3639	elr = list_entry(pos, struct ext4_li_request,
				3640	lr_request);
				3641	ext4_remove_li_request(elr);
				3642	}
				3643	mutex_unlock(&ext4_li_info->li_list_mtx);
				3644	}
				3645
				3646	static int ext4_run_lazyinit_thread(void)
				3647	{
				3648	ext4_lazyinit_task = kthread_run(ext4_lazyinit_thread,
				3649	ext4_li_info, "ext4lazyinit");
				3650	if (IS_ERR(ext4_lazyinit_task)) {
				3651	int err = PTR_ERR(ext4_lazyinit_task);
				3652	ext4_clear_request_list();
				3653	kfree(ext4_li_info);
				3654	ext4_li_info = NULL;
				3655	printk(KERN_CRIT "EXT4-fs: error %d creating inode table "
				3656	"initialization thread\n",
				3657	err);
				3658	return err;
				3659	}
				3660	ext4_li_info->li_state \|= EXT4_LAZYINIT_RUNNING;
				3661	return 0;
				3662	}
				3663
				3664	/*
				3665	* Check whether it make sense to run itable init. thread or not.
				3666	* If there is at least one uninitialized inode table, return
				3667	* corresponding group number, else the loop goes through all
				3668	* groups and return total number of groups.
				3669	*/
				3670	static ext4_group_t ext4_has_uninit_itable(struct super_block *sb)
				3671	{
				3672	ext4_group_t group, ngroups = EXT4_SB(sb)->s_groups_count;
				3673	struct ext4_group_desc *gdp = NULL;
				3674
				3675	if (!ext4_has_group_desc_csum(sb))
				3676	return ngroups;
				3677
				3678	for (group = 0; group < ngroups; group++) {
				3679	gdp = ext4_get_group_desc(sb, group, NULL);
				3680	if (!gdp)
				3681	continue;
				3682
				3683	if (!(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED)))
				3684	break;
				3685	}
				3686
				3687	return group;
				3688	}
				3689
				3690	static int ext4_li_info_new(void)
				3691	{
				3692	struct ext4_lazy_init *eli = NULL;
				3693
				3694	eli = kzalloc(sizeof(*eli), GFP_KERNEL);
				3695	if (!eli)
				3696	return -ENOMEM;
				3697
				3698	INIT_LIST_HEAD(&eli->li_request_list);
				3699	mutex_init(&eli->li_list_mtx);
				3700
				3701	eli->li_state \|= EXT4_LAZYINIT_QUIT;
				3702
				3703	ext4_li_info = eli;
				3704
				3705	return 0;
				3706	}
				3707
				3708	static struct ext4_li_request ext4_li_request_new(struct super_block sb,
				3709	ext4_group_t start)
				3710	{
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	3711	struct ext4_li_request *elr;
				3712
				3713	elr = kzalloc(sizeof(*elr), GFP_KERNEL);
				3714	if (!elr)
				3715	return NULL;
				3716
				3717	elr->lr_super = sb;
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	3718	elr->lr_first_not_zeroed = start;
				3719	if (test_opt(sb, PREFETCH_BLOCK_BITMAPS))
				3720	elr->lr_mode = EXT4_LI_MODE_PREFETCH_BBITMAP;
				3721	else {
				3722	elr->lr_mode = EXT4_LI_MODE_ITABLE;
				3723	elr->lr_next_group = start;
				3724	}
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	3725
				3726	/*
				3727	* Randomize first schedule time of the request to
				3728	* spread the inode table initialization requests
				3729	* better.
				3730	*/
				3731	elr->lr_next_sched = jiffies + (prandom_u32() %
				3732	(EXT4_DEF_LI_MAX_START_DELAY * HZ));
				3733	return elr;
				3734	}
				3735
				3736	int ext4_register_li_request(struct super_block *sb,
				3737	ext4_group_t first_not_zeroed)
				3738	{
				3739	struct ext4_sb_info *sbi = EXT4_SB(sb);
				3740	struct ext4_li_request *elr = NULL;
				3741	ext4_group_t ngroups = sbi->s_groups_count;
				3742	int ret = 0;
				3743
				3744	mutex_lock(&ext4_li_mtx);
				3745	if (sbi->s_li_request != NULL) {
				3746	/*
				3747	* Reset timeout so it can be computed again, because
				3748	* s_li_wait_mult might have changed.
				3749	*/
				3750	sbi->s_li_request->lr_timeout = 0;
				3751	goto out;
				3752	}
				3753
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	3754	if (!test_opt(sb, PREFETCH_BLOCK_BITMAPS) &&
				3755	(first_not_zeroed == ngroups \|\| sb_rdonly(sb) \|\|
				3756	!test_opt(sb, INIT_INODE_TABLE)))
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	3757	goto out;
				3758
				3759	elr = ext4_li_request_new(sb, first_not_zeroed);
				3760	if (!elr) {
				3761	ret = -ENOMEM;
				3762	goto out;
				3763	}
				3764
				3765	if (NULL == ext4_li_info) {
				3766	ret = ext4_li_info_new();
				3767	if (ret)
				3768	goto out;
				3769	}
				3770
				3771	mutex_lock(&ext4_li_info->li_list_mtx);
				3772	list_add(&elr->lr_request, &ext4_li_info->li_request_list);
				3773	mutex_unlock(&ext4_li_info->li_list_mtx);
				3774
				3775	sbi->s_li_request = elr;
				3776	/*
				3777	* set elr to NULL here since it has been inserted to
				3778	* the request_list and the removal and free of it is
				3779	* handled by ext4_clear_request_list from now on.
				3780	*/
				3781	elr = NULL;
				3782
				3783	if (!(ext4_li_info->li_state & EXT4_LAZYINIT_RUNNING)) {
				3784	ret = ext4_run_lazyinit_thread();
				3785	if (ret)
				3786	goto out;
				3787	}
				3788	out:
				3789	mutex_unlock(&ext4_li_mtx);
				3790	if (ret)
				3791	kfree(elr);
				3792	return ret;
				3793	}
				3794
				3795	/*
				3796	* We do not need to lock anything since this is called on
				3797	* module unload.
				3798	*/
				3799	static void ext4_destroy_lazyinit_thread(void)
				3800	{
				3801	/*
				3802	* If thread exited earlier
				3803	* there's nothing to be done.
				3804	*/
				3805	if (!ext4_li_info \|\| !ext4_lazyinit_task)
				3806	return;
				3807
				3808	kthread_stop(ext4_lazyinit_task);
				3809	}
				3810
				3811	static int set_journal_csum_feature_set(struct super_block *sb)
				3812	{
				3813	int ret = 1;
				3814	int compat, incompat;
				3815	struct ext4_sb_info *sbi = EXT4_SB(sb);
				3816
				3817	if (ext4_has_metadata_csum(sb)) {
				3818	/* journal checksum v3 */
				3819	compat = 0;
				3820	incompat = JBD2_FEATURE_INCOMPAT_CSUM_V3;
				3821	} else {
				3822	/* journal checksum v1 */
				3823	compat = JBD2_FEATURE_COMPAT_CHECKSUM;
				3824	incompat = 0;
				3825	}
				3826
				3827	jbd2_journal_clear_features(sbi->s_journal,
				3828	JBD2_FEATURE_COMPAT_CHECKSUM, 0,
				3829	JBD2_FEATURE_INCOMPAT_CSUM_V3 \|
				3830	JBD2_FEATURE_INCOMPAT_CSUM_V2);
				3831	if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) {
				3832	ret = jbd2_journal_set_features(sbi->s_journal,
				3833	compat, 0,
				3834	JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT \|
				3835	incompat);
				3836	} else if (test_opt(sb, JOURNAL_CHECKSUM)) {
				3837	ret = jbd2_journal_set_features(sbi->s_journal,
				3838	compat, 0,
				3839	incompat);
				3840	jbd2_journal_clear_features(sbi->s_journal, 0, 0,
				3841	JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT);
				3842	} else {
				3843	jbd2_journal_clear_features(sbi->s_journal, 0, 0,
				3844	JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT);
				3845	}
				3846
				3847	return ret;
				3848	}
				3849
				3850	/*
				3851	* Note: calculating the overhead so we can be compatible with
				3852	* historical BSD practice is quite difficult in the face of
				3853	* clusters/bigalloc. This is because multiple metadata blocks from
				3854	* different block group can end up in the same allocation cluster.
				3855	* Calculating the exact overhead in the face of clustered allocation
				3856	* requires either O(all block bitmaps) in memory or O(number of block
				3857	* groups**2) in time. We will still calculate the superblock for
				3858	* older file systems --- and if we come across with a bigalloc file
				3859	* system with zero in s_overhead_clusters the estimate will be close to
				3860	* correct especially for very large cluster sizes --- but for newer
				3861	* file systems, it's better to calculate this figure once at mkfs
				3862	* time, and store it in the superblock. If the superblock value is
				3863	* present (even for non-bigalloc file systems), we will use it.
				3864	*/
				3865	static int count_overhead(struct super_block *sb, ext4_group_t grp,
				3866	char *buf)
				3867	{
				3868	struct ext4_sb_info *sbi = EXT4_SB(sb);
				3869	struct ext4_group_desc *gdp;
				3870	ext4_fsblk_t first_block, last_block, b;
				3871	ext4_group_t i, ngroups = ext4_get_groups_count(sb);
				3872	int s, j, count = 0;
				3873
				3874	if (!ext4_has_feature_bigalloc(sb))
				3875	return (ext4_bg_has_super(sb, grp) + ext4_bg_num_gdb(sb, grp) +
				3876	sbi->s_itb_per_group + 2);
				3877
				3878	first_block = le32_to_cpu(sbi->s_es->s_first_data_block) +
				3879	(grp * EXT4_BLOCKS_PER_GROUP(sb));
				3880	last_block = first_block + EXT4_BLOCKS_PER_GROUP(sb) - 1;
				3881	for (i = 0; i < ngroups; i++) {
				3882	gdp = ext4_get_group_desc(sb, i, NULL);
				3883	b = ext4_block_bitmap(sb, gdp);
				3884	if (b >= first_block && b <= last_block) {
				3885	ext4_set_bit(EXT4_B2C(sbi, b - first_block), buf);
				3886	count++;
				3887	}
				3888	b = ext4_inode_bitmap(sb, gdp);
				3889	if (b >= first_block && b <= last_block) {
				3890	ext4_set_bit(EXT4_B2C(sbi, b - first_block), buf);
				3891	count++;
				3892	}
				3893	b = ext4_inode_table(sb, gdp);
				3894	if (b >= first_block && b + sbi->s_itb_per_group <= last_block)
				3895	for (j = 0; j < sbi->s_itb_per_group; j++, b++) {
				3896	int c = EXT4_B2C(sbi, b - first_block);
				3897	ext4_set_bit(c, buf);
				3898	count++;
				3899	}
				3900	if (i != grp)
				3901	continue;
				3902	s = 0;
				3903	if (ext4_bg_has_super(sb, grp)) {
				3904	ext4_set_bit(s++, buf);
				3905	count++;
				3906	}
				3907	j = ext4_bg_num_gdb(sb, grp);
				3908	if (s + j > EXT4_BLOCKS_PER_GROUP(sb)) {
				3909	ext4_error(sb, "Invalid number of block group "
				3910	"descriptor blocks: %d", j);
				3911	j = EXT4_BLOCKS_PER_GROUP(sb) - s;
				3912	}
				3913	count += j;
				3914	for (; j > 0; j--)
				3915	ext4_set_bit(EXT4_B2C(sbi, s++), buf);
				3916	}
				3917	if (!count)
				3918	return 0;
				3919	return EXT4_CLUSTERS_PER_GROUP(sb) -
				3920	ext4_count_free(buf, EXT4_CLUSTERS_PER_GROUP(sb) / 8);
				3921	}
				3922
				3923	/*
				3924	* Compute the overhead and stash it in sbi->s_overhead
				3925	*/
				3926	int ext4_calculate_overhead(struct super_block *sb)
				3927	{
				3928	struct ext4_sb_info *sbi = EXT4_SB(sb);
				3929	struct ext4_super_block *es = sbi->s_es;
				3930	struct inode *j_inode;
				3931	unsigned int j_blocks, j_inum = le32_to_cpu(es->s_journal_inum);
				3932	ext4_group_t i, ngroups = ext4_get_groups_count(sb);
				3933	ext4_fsblk_t overhead = 0;
				3934	char buf = (char ) get_zeroed_page(GFP_NOFS);
				3935
				3936	if (!buf)
				3937	return -ENOMEM;
				3938
				3939	/*
				3940	* Compute the overhead (FS structures). This is constant
				3941	* for a given filesystem unless the number of block groups
				3942	* changes so we cache the previous value until it does.
				3943	*/
				3944
				3945	/*
				3946	* All of the blocks before first_data_block are overhead
				3947	*/
				3948	overhead = EXT4_B2C(sbi, le32_to_cpu(es->s_first_data_block));
				3949
				3950	/*
				3951	* Add the overhead found in each block group
				3952	*/
				3953	for (i = 0; i < ngroups; i++) {
				3954	int blks;
				3955
				3956	blks = count_overhead(sb, i, buf);
				3957	overhead += blks;
				3958	if (blks)
				3959	memset(buf, 0, PAGE_SIZE);
				3960	cond_resched();
				3961	}
				3962
				3963	/*
				3964	* Add the internal journal blocks whether the journal has been
				3965	* loaded or not
				3966	*/
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	3967	if (sbi->s_journal && !sbi->s_journal_bdev)
				3968	overhead += EXT4_NUM_B2C(sbi, sbi->s_journal->j_total_len);
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame]	3969	else if (ext4_has_feature_journal(sb) && !sbi->s_journal && j_inum) {
				3970	/* j_inum for internal journal is non-zero */
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	3971	j_inode = ext4_get_journal_inode(sb, j_inum);
				3972	if (j_inode) {
				3973	j_blocks = j_inode->i_size >> sb->s_blocksize_bits;
				3974	overhead += EXT4_NUM_B2C(sbi, j_blocks);
				3975	iput(j_inode);
				3976	} else {
				3977	ext4_msg(sb, KERN_ERR, "can't get journal size");
				3978	}
				3979	}
				3980	sbi->s_overhead = overhead;
				3981	smp_wmb();
				3982	free_page((unsigned long) buf);
				3983	return 0;
				3984	}
				3985
				3986	static void ext4_set_resv_clusters(struct super_block *sb)
				3987	{
				3988	ext4_fsblk_t resv_clusters;
				3989	struct ext4_sb_info *sbi = EXT4_SB(sb);
				3990
				3991	/*
				3992	* There's no need to reserve anything when we aren't using extents.
				3993	* The space estimates are exact, there are no unwritten extents,
				3994	* hole punching doesn't need new metadata... This is needed especially
				3995	* to keep ext2/3 backward compatibility.
				3996	*/
				3997	if (!ext4_has_feature_extents(sb))
				3998	return;
				3999	/*
				4000	* By default we reserve 2% or 4096 clusters, whichever is smaller.
				4001	* This should cover the situations where we can not afford to run
				4002	* out of space like for example punch hole, or converting
				4003	* unwritten extents in delalloc path. In most cases such
				4004	* allocation would require 1, or 2 blocks, higher numbers are
				4005	* very rare.
				4006	*/
				4007	resv_clusters = (ext4_blocks_count(sbi->s_es) >>
				4008	sbi->s_cluster_bits);
				4009
				4010	do_div(resv_clusters, 50);
				4011	resv_clusters = min_t(ext4_fsblk_t, resv_clusters, 4096);
				4012
				4013	atomic64_set(&sbi->s_resv_clusters, resv_clusters);
				4014	}
				4015
				4016	static int ext4_fill_super(struct super_block sb, void data, int silent)
				4017	{
				4018	struct dax_device *dax_dev = fs_dax_get_by_bdev(sb->s_bdev);
				4019	char *orig_data = kstrdup(data, GFP_KERNEL);
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame]	4020	struct buffer_head bh, *group_desc;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	4021	struct ext4_super_block *es = NULL;
				4022	struct ext4_sb_info sbi = kzalloc(sizeof(sbi), GFP_KERNEL);
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame]	4023	struct flex_groups **flex_groups;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	4024	ext4_fsblk_t block;
				4025	ext4_fsblk_t sb_block = get_sb_block(&data);
				4026	ext4_fsblk_t logical_sb_block;
				4027	unsigned long offset = 0;
				4028	unsigned long journal_devnum = 0;
				4029	unsigned long def_mount_opts;
				4030	struct inode *root;
				4031	const char *descr;
				4032	int ret = -ENOMEM;
				4033	int blocksize, clustersize;
				4034	unsigned int db_count;
				4035	unsigned int i;
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	4036	int needs_recovery, has_huge_files;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	4037	__u64 blocks_count;
				4038	int err = 0;
				4039	unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO;
				4040	ext4_group_t first_not_zeroed;
				4041
				4042	if ((data && !orig_data) \|\| !sbi)
				4043	goto out_free_base;
				4044
				4045	sbi->s_daxdev = dax_dev;
				4046	sbi->s_blockgroup_lock =
				4047	kzalloc(sizeof(struct blockgroup_lock), GFP_KERNEL);
				4048	if (!sbi->s_blockgroup_lock)
				4049	goto out_free_base;
				4050
				4051	sb->s_fs_info = sbi;
				4052	sbi->s_sb = sb;
				4053	sbi->s_inode_readahead_blks = EXT4_DEF_INODE_READAHEAD_BLKS;
				4054	sbi->s_sb_block = sb_block;
				4055	if (sb->s_bdev->bd_part)
				4056	sbi->s_sectors_written_start =
				4057	part_stat_read(sb->s_bdev->bd_part, sectors[STAT_WRITE]);
				4058
				4059	/* Cleanup superblock name */
				4060	strreplace(sb->s_id, '/', '!');
				4061
				4062	/* -EINVAL is default */
				4063	ret = -EINVAL;
				4064	blocksize = sb_min_blocksize(sb, EXT4_MIN_BLOCK_SIZE);
				4065	if (!blocksize) {
				4066	ext4_msg(sb, KERN_ERR, "unable to set blocksize");
				4067	goto out_fail;
				4068	}
				4069
				4070	/*
				4071	* The ext4 superblock will not be buffer aligned for other than 1kB
				4072	* block sizes. We need to calculate the offset from buffer start.
				4073	*/
				4074	if (blocksize != EXT4_MIN_BLOCK_SIZE) {
				4075	logical_sb_block = sb_block * EXT4_MIN_BLOCK_SIZE;
				4076	offset = do_div(logical_sb_block, blocksize);
				4077	} else {
				4078	logical_sb_block = sb_block;
				4079	}
				4080
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	4081	bh = ext4_sb_bread_unmovable(sb, logical_sb_block);
				4082	if (IS_ERR(bh)) {
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	4083	ext4_msg(sb, KERN_ERR, "unable to read superblock");
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	4084	ret = PTR_ERR(bh);
				4085	bh = NULL;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	4086	goto out_fail;
				4087	}
				4088	/*
				4089	* Note: s_es must be initialized as soon as possible because
				4090	* some ext4 macro-instructions depend on its value
				4091	*/
				4092	es = (struct ext4_super_block *) (bh->b_data + offset);
				4093	sbi->s_es = es;
				4094	sb->s_magic = le16_to_cpu(es->s_magic);
				4095	if (sb->s_magic != EXT4_SUPER_MAGIC)
				4096	goto cantfind_ext4;
				4097	sbi->s_kbytes_written = le64_to_cpu(es->s_kbytes_written);
				4098
				4099	/* Warn if metadata_csum and gdt_csum are both set. */
				4100	if (ext4_has_feature_metadata_csum(sb) &&
				4101	ext4_has_feature_gdt_csum(sb))
				4102	ext4_warning(sb, "metadata_csum and uninit_bg are "
				4103	"redundant flags; please run fsck.");
				4104
				4105	/* Check for a known checksum algorithm */
				4106	if (!ext4_verify_csum_type(sb, es)) {
				4107	ext4_msg(sb, KERN_ERR, "VFS: Found ext4 filesystem with "
				4108	"unknown checksum algorithm.");
				4109	silent = 1;
				4110	goto cantfind_ext4;
				4111	}
				4112
				4113	/* Load the checksum driver */
				4114	sbi->s_chksum_driver = crypto_alloc_shash("crc32c", 0, 0);
				4115	if (IS_ERR(sbi->s_chksum_driver)) {
				4116	ext4_msg(sb, KERN_ERR, "Cannot load crc32c driver.");
				4117	ret = PTR_ERR(sbi->s_chksum_driver);
				4118	sbi->s_chksum_driver = NULL;
				4119	goto failed_mount;
				4120	}
				4121
				4122	/* Check superblock checksum */
				4123	if (!ext4_superblock_csum_verify(sb, es)) {
				4124	ext4_msg(sb, KERN_ERR, "VFS: Found ext4 filesystem with "
				4125	"invalid superblock checksum. Run e2fsck?");
				4126	silent = 1;
				4127	ret = -EFSBADCRC;
				4128	goto cantfind_ext4;
				4129	}
				4130
				4131	/* Precompute checksum seed for all metadata */
				4132	if (ext4_has_feature_csum_seed(sb))
				4133	sbi->s_csum_seed = le32_to_cpu(es->s_checksum_seed);
				4134	else if (ext4_has_metadata_csum(sb) \|\| ext4_has_feature_ea_inode(sb))
				4135	sbi->s_csum_seed = ext4_chksum(sbi, ~0, es->s_uuid,
				4136	sizeof(es->s_uuid));
				4137
				4138	/* Set defaults before we parse the mount options */
				4139	def_mount_opts = le32_to_cpu(es->s_default_mount_opts);
				4140	set_opt(sb, INIT_INODE_TABLE);
				4141	if (def_mount_opts & EXT4_DEFM_DEBUG)
				4142	set_opt(sb, DEBUG);
				4143	if (def_mount_opts & EXT4_DEFM_BSDGROUPS)
				4144	set_opt(sb, GRPID);
				4145	if (def_mount_opts & EXT4_DEFM_UID16)
				4146	set_opt(sb, NO_UID32);
				4147	/* xattr user namespace & acls are now defaulted on */
				4148	set_opt(sb, XATTR_USER);
				4149	#ifdef CONFIG_EXT4_FS_POSIX_ACL
				4150	set_opt(sb, POSIX_ACL);
				4151	#endif
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	4152	if (ext4_has_feature_fast_commit(sb))
				4153	set_opt2(sb, JOURNAL_FAST_COMMIT);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	4154	/* don't forget to enable journal_csum when metadata_csum is enabled. */
				4155	if (ext4_has_metadata_csum(sb))
				4156	set_opt(sb, JOURNAL_CHECKSUM);
				4157
				4158	if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_DATA)
				4159	set_opt(sb, JOURNAL_DATA);
				4160	else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_ORDERED)
				4161	set_opt(sb, ORDERED_DATA);
				4162	else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_WBACK)
				4163	set_opt(sb, WRITEBACK_DATA);
				4164
				4165	if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_PANIC)
				4166	set_opt(sb, ERRORS_PANIC);
				4167	else if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_CONTINUE)
				4168	set_opt(sb, ERRORS_CONT);
				4169	else
				4170	set_opt(sb, ERRORS_RO);
				4171	/* block_validity enabled by default; disable with noblock_validity */
				4172	set_opt(sb, BLOCK_VALIDITY);
				4173	if (def_mount_opts & EXT4_DEFM_DISCARD)
				4174	set_opt(sb, DISCARD);
				4175
				4176	sbi->s_resuid = make_kuid(&init_user_ns, le16_to_cpu(es->s_def_resuid));
				4177	sbi->s_resgid = make_kgid(&init_user_ns, le16_to_cpu(es->s_def_resgid));
				4178	sbi->s_commit_interval = JBD2_DEFAULT_MAX_COMMIT_AGE * HZ;
				4179	sbi->s_min_batch_time = EXT4_DEF_MIN_BATCH_TIME;
				4180	sbi->s_max_batch_time = EXT4_DEF_MAX_BATCH_TIME;
				4181
				4182	if ((def_mount_opts & EXT4_DEFM_NOBARRIER) == 0)
				4183	set_opt(sb, BARRIER);
				4184
				4185	/*
				4186	* enable delayed allocation by default
				4187	* Use -o nodelalloc to turn it off
				4188	*/
				4189	if (!IS_EXT3_SB(sb) && !IS_EXT2_SB(sb) &&
				4190	((def_mount_opts & EXT4_DEFM_NODELALLOC) == 0))
				4191	set_opt(sb, DELALLOC);
				4192
				4193	/*
				4194	* set default s_li_wait_mult for lazyinit, for the case there is
				4195	* no mount option specified.
				4196	*/
				4197	sbi->s_li_wait_mult = EXT4_DEF_LI_WAIT_MULT;
				4198
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	4199	if (le32_to_cpu(es->s_log_block_size) >
				4200	(EXT4_MAX_BLOCK_LOG_SIZE - EXT4_MIN_BLOCK_LOG_SIZE)) {
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame]	4201	ext4_msg(sb, KERN_ERR,
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	4202	"Invalid log block size: %u",
				4203	le32_to_cpu(es->s_log_block_size));
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame]	4204	goto failed_mount;
				4205	}
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	4206	if (le32_to_cpu(es->s_log_cluster_size) >
				4207	(EXT4_MAX_CLUSTER_LOG_SIZE - EXT4_MIN_BLOCK_LOG_SIZE)) {
				4208	ext4_msg(sb, KERN_ERR,
				4209	"Invalid log cluster size: %u",
				4210	le32_to_cpu(es->s_log_cluster_size));
				4211	goto failed_mount;
				4212	}
				4213
				4214	blocksize = EXT4_MIN_BLOCK_SIZE << le32_to_cpu(es->s_log_block_size);
				4215
				4216	if (blocksize == PAGE_SIZE)
				4217	set_opt(sb, DIOREAD_NOLOCK);
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame]	4218
				4219	if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV) {
				4220	sbi->s_inode_size = EXT4_GOOD_OLD_INODE_SIZE;
				4221	sbi->s_first_ino = EXT4_GOOD_OLD_FIRST_INO;
				4222	} else {
				4223	sbi->s_inode_size = le16_to_cpu(es->s_inode_size);
				4224	sbi->s_first_ino = le32_to_cpu(es->s_first_ino);
				4225	if (sbi->s_first_ino < EXT4_GOOD_OLD_FIRST_INO) {
				4226	ext4_msg(sb, KERN_ERR, "invalid first ino: %u",
				4227	sbi->s_first_ino);
				4228	goto failed_mount;
				4229	}
				4230	if ((sbi->s_inode_size < EXT4_GOOD_OLD_INODE_SIZE) \|\|
				4231	(!is_power_of_2(sbi->s_inode_size)) \|\|
				4232	(sbi->s_inode_size > blocksize)) {
				4233	ext4_msg(sb, KERN_ERR,
				4234	"unsupported inode size: %d",
				4235	sbi->s_inode_size);
				4236	ext4_msg(sb, KERN_ERR, "blocksize: %d", blocksize);
				4237	goto failed_mount;
				4238	}
				4239	/*
				4240	* i_atime_extra is the last extra field available for
				4241	* [acm]times in struct ext4_inode. Checking for that
				4242	* field should suffice to ensure we have extra space
				4243	* for all three.
				4244	*/
				4245	if (sbi->s_inode_size >= offsetof(struct ext4_inode, i_atime_extra) +
				4246	sizeof(((struct ext4_inode *)0)->i_atime_extra)) {
				4247	sb->s_time_gran = 1;
				4248	sb->s_time_max = EXT4_EXTRA_TIMESTAMP_MAX;
				4249	} else {
				4250	sb->s_time_gran = NSEC_PER_SEC;
				4251	sb->s_time_max = EXT4_NON_EXTRA_TIMESTAMP_MAX;
				4252	}
				4253	sb->s_time_min = EXT4_TIMESTAMP_MIN;
				4254	}
				4255	if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE) {
				4256	sbi->s_want_extra_isize = sizeof(struct ext4_inode) -
				4257	EXT4_GOOD_OLD_INODE_SIZE;
				4258	if (ext4_has_feature_extra_isize(sb)) {
				4259	unsigned v, max = (sbi->s_inode_size -
				4260	EXT4_GOOD_OLD_INODE_SIZE);
				4261
				4262	v = le16_to_cpu(es->s_want_extra_isize);
				4263	if (v > max) {
				4264	ext4_msg(sb, KERN_ERR,
				4265	"bad s_want_extra_isize: %d", v);
				4266	goto failed_mount;
				4267	}
				4268	if (sbi->s_want_extra_isize < v)
				4269	sbi->s_want_extra_isize = v;
				4270
				4271	v = le16_to_cpu(es->s_min_extra_isize);
				4272	if (v > max) {
				4273	ext4_msg(sb, KERN_ERR,
				4274	"bad s_min_extra_isize: %d", v);
				4275	goto failed_mount;
				4276	}
				4277	if (sbi->s_want_extra_isize < v)
				4278	sbi->s_want_extra_isize = v;
				4279	}
				4280	}
				4281
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	4282	if (sbi->s_es->s_mount_opts[0]) {
				4283	char *s_mount_opts = kstrndup(sbi->s_es->s_mount_opts,
				4284	sizeof(sbi->s_es->s_mount_opts),
				4285	GFP_KERNEL);
				4286	if (!s_mount_opts)
				4287	goto failed_mount;
				4288	if (!parse_options(s_mount_opts, sb, &journal_devnum,
				4289	&journal_ioprio, 0)) {
				4290	ext4_msg(sb, KERN_WARNING,
				4291	"failed to parse options in superblock: %s",
				4292	s_mount_opts);
				4293	}
				4294	kfree(s_mount_opts);
				4295	}
				4296	sbi->s_def_mount_opt = sbi->s_mount_opt;
				4297	if (!parse_options((char *) data, sb, &journal_devnum,
				4298	&journal_ioprio, 0))
				4299	goto failed_mount;
				4300
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	4301	#ifdef CONFIG_UNICODE
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	4302	if (ext4_has_feature_casefold(sb) && !sb->s_encoding) {
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	4303	const struct ext4_sb_encodings *encoding_info;
				4304	struct unicode_map *encoding;
				4305	__u16 encoding_flags;
				4306
				4307	if (ext4_has_feature_encrypt(sb)) {
				4308	ext4_msg(sb, KERN_ERR,
				4309	"Can't mount with encoding and encryption");
				4310	goto failed_mount;
				4311	}
				4312
				4313	if (ext4_sb_read_encoding(es, &encoding_info,
				4314	&encoding_flags)) {
				4315	ext4_msg(sb, KERN_ERR,
				4316	"Encoding requested by superblock is unknown");
				4317	goto failed_mount;
				4318	}
				4319
				4320	encoding = utf8_load(encoding_info->version);
				4321	if (IS_ERR(encoding)) {
				4322	ext4_msg(sb, KERN_ERR,
				4323	"can't mount with superblock charset: %s-%s "
				4324	"not supported by the kernel. flags: 0x%x.",
				4325	encoding_info->name, encoding_info->version,
				4326	encoding_flags);
				4327	goto failed_mount;
				4328	}
				4329	ext4_msg(sb, KERN_INFO,"Using encoding defined by superblock: "
				4330	"%s-%s with flags 0x%hx", encoding_info->name,
				4331	encoding_info->version?:"\b", encoding_flags);
				4332
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	4333	sb->s_encoding = encoding;
				4334	sb->s_encoding_flags = encoding_flags;
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	4335	}
				4336	#endif
				4337
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	4338	if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) {
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	4339	printk_once(KERN_WARNING "EXT4-fs: Warning: mounting with data=journal disables delayed allocation, dioread_nolock, O_DIRECT and fast_commit support!\n");
				4340	/* can't mount with both data=journal and dioread_nolock. */
				4341	clear_opt(sb, DIOREAD_NOLOCK);
				4342	clear_opt2(sb, JOURNAL_FAST_COMMIT);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	4343	if (test_opt2(sb, EXPLICIT_DELALLOC)) {
				4344	ext4_msg(sb, KERN_ERR, "can't mount with "
				4345	"both data=journal and delalloc");
				4346	goto failed_mount;
				4347	}
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	4348	if (test_opt(sb, DAX_ALWAYS)) {
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	4349	ext4_msg(sb, KERN_ERR, "can't mount with "
				4350	"both data=journal and dax");
				4351	goto failed_mount;
				4352	}
				4353	if (ext4_has_feature_encrypt(sb)) {
				4354	ext4_msg(sb, KERN_WARNING,
				4355	"encrypted files will use data=ordered "
				4356	"instead of data journaling mode");
				4357	}
				4358	if (test_opt(sb, DELALLOC))
				4359	clear_opt(sb, DELALLOC);
				4360	} else {
				4361	sb->s_iflags \|= SB_I_CGROUPWB;
				4362	}
				4363
				4364	sb->s_flags = (sb->s_flags & ~SB_POSIXACL) \|
				4365	(test_opt(sb, POSIX_ACL) ? SB_POSIXACL : 0);
				4366
				4367	if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV &&
				4368	(ext4_has_compat_features(sb) \|\|
				4369	ext4_has_ro_compat_features(sb) \|\|
				4370	ext4_has_incompat_features(sb)))
				4371	ext4_msg(sb, KERN_WARNING,
				4372	"feature flags set on rev 0 fs, "
				4373	"running e2fsck is recommended");
				4374
				4375	if (es->s_creator_os == cpu_to_le32(EXT4_OS_HURD)) {
				4376	set_opt2(sb, HURD_COMPAT);
				4377	if (ext4_has_feature_64bit(sb)) {
				4378	ext4_msg(sb, KERN_ERR,
				4379	"The Hurd can't support 64-bit file systems");
				4380	goto failed_mount;
				4381	}
				4382
				4383	/*
				4384	* ea_inode feature uses l_i_version field which is not
				4385	* available in HURD_COMPAT mode.
				4386	*/
				4387	if (ext4_has_feature_ea_inode(sb)) {
				4388	ext4_msg(sb, KERN_ERR,
				4389	"ea_inode feature is not supported for Hurd");
				4390	goto failed_mount;
				4391	}
				4392	}
				4393
				4394	if (IS_EXT2_SB(sb)) {
				4395	if (ext2_feature_set_ok(sb))
				4396	ext4_msg(sb, KERN_INFO, "mounting ext2 file system "
				4397	"using the ext4 subsystem");
				4398	else {
				4399	/*
				4400	* If we're probing be silent, if this looks like
				4401	* it's actually an ext[34] filesystem.
				4402	*/
				4403	if (silent && ext4_feature_set_ok(sb, sb_rdonly(sb)))
				4404	goto failed_mount;
				4405	ext4_msg(sb, KERN_ERR, "couldn't mount as ext2 due "
				4406	"to feature incompatibilities");
				4407	goto failed_mount;
				4408	}
				4409	}
				4410
				4411	if (IS_EXT3_SB(sb)) {
				4412	if (ext3_feature_set_ok(sb))
				4413	ext4_msg(sb, KERN_INFO, "mounting ext3 file system "
				4414	"using the ext4 subsystem");
				4415	else {
				4416	/*
				4417	* If we're probing be silent, if this looks like
				4418	* it's actually an ext4 filesystem.
				4419	*/
				4420	if (silent && ext4_feature_set_ok(sb, sb_rdonly(sb)))
				4421	goto failed_mount;
				4422	ext4_msg(sb, KERN_ERR, "couldn't mount as ext3 due "
				4423	"to feature incompatibilities");
				4424	goto failed_mount;
				4425	}
				4426	}
				4427
				4428	/*
				4429	* Check feature flags regardless of the revision level, since we
				4430	* previously didn't change the revision level when setting the flags,
				4431	* so there is a chance incompat flags are set on a rev 0 filesystem.
				4432	*/
				4433	if (!ext4_feature_set_ok(sb, (sb_rdonly(sb))))
				4434	goto failed_mount;
				4435
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	4436	if (le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) > (blocksize / 4)) {
				4437	ext4_msg(sb, KERN_ERR,
				4438	"Number of reserved GDT blocks insanely large: %d",
				4439	le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks));
				4440	goto failed_mount;
				4441	}
				4442
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	4443	if (bdev_dax_supported(sb->s_bdev, blocksize))
				4444	set_bit(EXT4_FLAGS_BDEV_IS_DAX, &sbi->s_ext4_flags);
				4445
				4446	if (sbi->s_mount_opt & EXT4_MOUNT_DAX_ALWAYS) {
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	4447	if (ext4_has_feature_inline_data(sb)) {
				4448	ext4_msg(sb, KERN_ERR, "Cannot use DAX on a filesystem"
				4449	" that may contain inline data");
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	4450	goto failed_mount;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	4451	}
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	4452	if (!test_bit(EXT4_FLAGS_BDEV_IS_DAX, &sbi->s_ext4_flags)) {
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	4453	ext4_msg(sb, KERN_ERR,
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	4454	"DAX unsupported by block device.");
				4455	goto failed_mount;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	4456	}
				4457	}
				4458
				4459	if (ext4_has_feature_encrypt(sb) && es->s_encryption_level) {
				4460	ext4_msg(sb, KERN_ERR, "Unsupported encryption level %d",
				4461	es->s_encryption_level);
				4462	goto failed_mount;
				4463	}
				4464
				4465	if (sb->s_blocksize != blocksize) {
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	4466	/*
				4467	* bh must be released before kill_bdev(), otherwise
				4468	* it won't be freed and its page also. kill_bdev()
				4469	* is called by sb_set_blocksize().
				4470	*/
				4471	brelse(bh);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	4472	/* Validate the filesystem blocksize */
				4473	if (!sb_set_blocksize(sb, blocksize)) {
				4474	ext4_msg(sb, KERN_ERR, "bad block size %d",
				4475	blocksize);
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	4476	bh = NULL;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	4477	goto failed_mount;
				4478	}
				4479
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	4480	logical_sb_block = sb_block * EXT4_MIN_BLOCK_SIZE;
				4481	offset = do_div(logical_sb_block, blocksize);
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	4482	bh = ext4_sb_bread_unmovable(sb, logical_sb_block);
				4483	if (IS_ERR(bh)) {
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	4484	ext4_msg(sb, KERN_ERR,
				4485	"Can't read superblock on 2nd try");
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	4486	ret = PTR_ERR(bh);
				4487	bh = NULL;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	4488	goto failed_mount;
				4489	}
				4490	es = (struct ext4_super_block *)(bh->b_data + offset);
				4491	sbi->s_es = es;
				4492	if (es->s_magic != cpu_to_le16(EXT4_SUPER_MAGIC)) {
				4493	ext4_msg(sb, KERN_ERR,
				4494	"Magic mismatch, very weird!");
				4495	goto failed_mount;
				4496	}
				4497	}
				4498
				4499	has_huge_files = ext4_has_feature_huge_file(sb);
				4500	sbi->s_bitmap_maxbytes = ext4_max_bitmap_size(sb->s_blocksize_bits,
				4501	has_huge_files);
				4502	sb->s_maxbytes = ext4_max_size(sb->s_blocksize_bits, has_huge_files);
				4503
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	4504	sbi->s_desc_size = le16_to_cpu(es->s_desc_size);
				4505	if (ext4_has_feature_64bit(sb)) {
				4506	if (sbi->s_desc_size < EXT4_MIN_DESC_SIZE_64BIT \|\|
				4507	sbi->s_desc_size > EXT4_MAX_DESC_SIZE \|\|
				4508	!is_power_of_2(sbi->s_desc_size)) {
				4509	ext4_msg(sb, KERN_ERR,
				4510	"unsupported descriptor size %lu",
				4511	sbi->s_desc_size);
				4512	goto failed_mount;
				4513	}
				4514	} else
				4515	sbi->s_desc_size = EXT4_MIN_DESC_SIZE;
				4516
				4517	sbi->s_blocks_per_group = le32_to_cpu(es->s_blocks_per_group);
				4518	sbi->s_inodes_per_group = le32_to_cpu(es->s_inodes_per_group);
				4519
				4520	sbi->s_inodes_per_block = blocksize / EXT4_INODE_SIZE(sb);
				4521	if (sbi->s_inodes_per_block == 0)
				4522	goto cantfind_ext4;
				4523	if (sbi->s_inodes_per_group < sbi->s_inodes_per_block \|\|
				4524	sbi->s_inodes_per_group > blocksize * 8) {
				4525	ext4_msg(sb, KERN_ERR, "invalid inodes per group: %lu\n",
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame]	4526	sbi->s_inodes_per_group);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	4527	goto failed_mount;
				4528	}
				4529	sbi->s_itb_per_group = sbi->s_inodes_per_group /
				4530	sbi->s_inodes_per_block;
				4531	sbi->s_desc_per_block = blocksize / EXT4_DESC_SIZE(sb);
				4532	sbi->s_sbh = bh;
				4533	sbi->s_mount_state = le16_to_cpu(es->s_state);
				4534	sbi->s_addr_per_block_bits = ilog2(EXT4_ADDR_PER_BLOCK(sb));
				4535	sbi->s_desc_per_block_bits = ilog2(EXT4_DESC_PER_BLOCK(sb));
				4536
				4537	for (i = 0; i < 4; i++)
				4538	sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]);
				4539	sbi->s_def_hash_version = es->s_def_hash_version;
				4540	if (ext4_has_feature_dir_index(sb)) {
				4541	i = le32_to_cpu(es->s_flags);
				4542	if (i & EXT2_FLAGS_UNSIGNED_HASH)
				4543	sbi->s_hash_unsigned = 3;
				4544	else if ((i & EXT2_FLAGS_SIGNED_HASH) == 0) {
				4545	#ifdef __CHAR_UNSIGNED__
				4546	if (!sb_rdonly(sb))
				4547	es->s_flags \|=
				4548	cpu_to_le32(EXT2_FLAGS_UNSIGNED_HASH);
				4549	sbi->s_hash_unsigned = 3;
				4550	#else
				4551	if (!sb_rdonly(sb))
				4552	es->s_flags \|=
				4553	cpu_to_le32(EXT2_FLAGS_SIGNED_HASH);
				4554	#endif
				4555	}
				4556	}
				4557
				4558	/* Handle clustersize */
				4559	clustersize = BLOCK_SIZE << le32_to_cpu(es->s_log_cluster_size);
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	4560	if (ext4_has_feature_bigalloc(sb)) {
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	4561	if (clustersize < blocksize) {
				4562	ext4_msg(sb, KERN_ERR,
				4563	"cluster size (%d) smaller than "
				4564	"block size (%d)", clustersize, blocksize);
				4565	goto failed_mount;
				4566	}
				4567	sbi->s_cluster_bits = le32_to_cpu(es->s_log_cluster_size) -
				4568	le32_to_cpu(es->s_log_block_size);
				4569	sbi->s_clusters_per_group =
				4570	le32_to_cpu(es->s_clusters_per_group);
				4571	if (sbi->s_clusters_per_group > blocksize * 8) {
				4572	ext4_msg(sb, KERN_ERR,
				4573	"#clusters per group too big: %lu",
				4574	sbi->s_clusters_per_group);
				4575	goto failed_mount;
				4576	}
				4577	if (sbi->s_blocks_per_group !=
				4578	(sbi->s_clusters_per_group * (clustersize / blocksize))) {
				4579	ext4_msg(sb, KERN_ERR, "blocks per group (%lu) and "
				4580	"clusters per group (%lu) inconsistent",
				4581	sbi->s_blocks_per_group,
				4582	sbi->s_clusters_per_group);
				4583	goto failed_mount;
				4584	}
				4585	} else {
				4586	if (clustersize != blocksize) {
				4587	ext4_msg(sb, KERN_ERR,
				4588	"fragment/cluster size (%d) != "
				4589	"block size (%d)", clustersize, blocksize);
				4590	goto failed_mount;
				4591	}
				4592	if (sbi->s_blocks_per_group > blocksize * 8) {
				4593	ext4_msg(sb, KERN_ERR,
				4594	"#blocks per group too big: %lu",
				4595	sbi->s_blocks_per_group);
				4596	goto failed_mount;
				4597	}
				4598	sbi->s_clusters_per_group = sbi->s_blocks_per_group;
				4599	sbi->s_cluster_bits = 0;
				4600	}
				4601	sbi->s_cluster_ratio = clustersize / blocksize;
				4602
				4603	/* Do we have standard group size of clustersize * 8 blocks ? */
				4604	if (sbi->s_blocks_per_group == clustersize << 3)
				4605	set_opt2(sb, STD_GROUP_SIZE);
				4606
				4607	/*
				4608	* Test whether we have more sectors than will fit in sector_t,
				4609	* and whether the max offset is addressable by the page cache.
				4610	*/
				4611	err = generic_check_addressable(sb->s_blocksize_bits,
				4612	ext4_blocks_count(es));
				4613	if (err) {
				4614	ext4_msg(sb, KERN_ERR, "filesystem"
				4615	" too large to mount safely on this system");
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	4616	goto failed_mount;
				4617	}
				4618
				4619	if (EXT4_BLOCKS_PER_GROUP(sb) == 0)
				4620	goto cantfind_ext4;
				4621
				4622	/* check blocks count against device size */
				4623	blocks_count = sb->s_bdev->bd_inode->i_size >> sb->s_blocksize_bits;
				4624	if (blocks_count && ext4_blocks_count(es) > blocks_count) {
				4625	ext4_msg(sb, KERN_WARNING, "bad geometry: block count %llu "
				4626	"exceeds size of device (%llu blocks)",
				4627	ext4_blocks_count(es), blocks_count);
				4628	goto failed_mount;
				4629	}
				4630
				4631	/*
				4632	* It makes no sense for the first data block to be beyond the end
				4633	* of the filesystem.
				4634	*/
				4635	if (le32_to_cpu(es->s_first_data_block) >= ext4_blocks_count(es)) {
				4636	ext4_msg(sb, KERN_WARNING, "bad geometry: first data "
				4637	"block %u is beyond end of filesystem (%llu)",
				4638	le32_to_cpu(es->s_first_data_block),
				4639	ext4_blocks_count(es));
				4640	goto failed_mount;
				4641	}
				4642	if ((es->s_first_data_block == 0) && (es->s_log_block_size == 0) &&
				4643	(sbi->s_cluster_ratio == 1)) {
				4644	ext4_msg(sb, KERN_WARNING, "bad geometry: first data "
				4645	"block is 0 with a 1k block and cluster size");
				4646	goto failed_mount;
				4647	}
				4648
				4649	blocks_count = (ext4_blocks_count(es) -
				4650	le32_to_cpu(es->s_first_data_block) +
				4651	EXT4_BLOCKS_PER_GROUP(sb) - 1);
				4652	do_div(blocks_count, EXT4_BLOCKS_PER_GROUP(sb));
				4653	if (blocks_count > ((uint64_t)1<<32) - EXT4_DESC_PER_BLOCK(sb)) {
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame]	4654	ext4_msg(sb, KERN_WARNING, "groups count too large: %llu "
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	4655	"(block count %llu, first data block %u, "
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame]	4656	"blocks per group %lu)", blocks_count,
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	4657	ext4_blocks_count(es),
				4658	le32_to_cpu(es->s_first_data_block),
				4659	EXT4_BLOCKS_PER_GROUP(sb));
				4660	goto failed_mount;
				4661	}
				4662	sbi->s_groups_count = blocks_count;
				4663	sbi->s_blockfile_groups = min_t(ext4_group_t, sbi->s_groups_count,
				4664	(EXT4_MAX_BLOCK_FILE_PHYS / EXT4_BLOCKS_PER_GROUP(sb)));
				4665	if (((u64)sbi->s_groups_count * sbi->s_inodes_per_group) !=
				4666	le32_to_cpu(es->s_inodes_count)) {
				4667	ext4_msg(sb, KERN_ERR, "inodes count not valid: %u vs %llu",
				4668	le32_to_cpu(es->s_inodes_count),
				4669	((u64)sbi->s_groups_count * sbi->s_inodes_per_group));
				4670	ret = -EINVAL;
				4671	goto failed_mount;
				4672	}
				4673	db_count = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) /
				4674	EXT4_DESC_PER_BLOCK(sb);
				4675	if (ext4_has_feature_meta_bg(sb)) {
				4676	if (le32_to_cpu(es->s_first_meta_bg) > db_count) {
				4677	ext4_msg(sb, KERN_WARNING,
				4678	"first meta block group too large: %u "
				4679	"(group descriptor block count %u)",
				4680	le32_to_cpu(es->s_first_meta_bg), db_count);
				4681	goto failed_mount;
				4682	}
				4683	}
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame]	4684	rcu_assign_pointer(sbi->s_group_desc,
				4685	kvmalloc_array(db_count,
				4686	sizeof(struct buffer_head *),
				4687	GFP_KERNEL));
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	4688	if (sbi->s_group_desc == NULL) {
				4689	ext4_msg(sb, KERN_ERR, "not enough memory");
				4690	ret = -ENOMEM;
				4691	goto failed_mount;
				4692	}
				4693
				4694	bgl_lock_init(sbi->s_blockgroup_lock);
				4695
				4696	/* Pre-read the descriptors into the buffer cache */
				4697	for (i = 0; i < db_count; i++) {
				4698	block = descriptor_loc(sb, logical_sb_block, i);
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	4699	ext4_sb_breadahead_unmovable(sb, block);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	4700	}
				4701
				4702	for (i = 0; i < db_count; i++) {
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame]	4703	struct buffer_head *bh;
				4704
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	4705	block = descriptor_loc(sb, logical_sb_block, i);
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	4706	bh = ext4_sb_bread_unmovable(sb, block);
				4707	if (IS_ERR(bh)) {
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	4708	ext4_msg(sb, KERN_ERR,
				4709	"can't read group descriptor %d", i);
				4710	db_count = i;
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	4711	ret = PTR_ERR(bh);
				4712	bh = NULL;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	4713	goto failed_mount2;
				4714	}
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame]	4715	rcu_read_lock();
				4716	rcu_dereference(sbi->s_group_desc)[i] = bh;
				4717	rcu_read_unlock();
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	4718	}
				4719	sbi->s_gdb_count = db_count;
				4720	if (!ext4_check_descriptors(sb, logical_sb_block, &first_not_zeroed)) {
				4721	ext4_msg(sb, KERN_ERR, "group descriptors corrupted!");
				4722	ret = -EFSCORRUPTED;
				4723	goto failed_mount2;
				4724	}
				4725
				4726	timer_setup(&sbi->s_err_report, print_daily_error_info, 0);
				4727
				4728	/* Register extent status tree shrinker */
				4729	if (ext4_es_register_shrinker(sbi))
				4730	goto failed_mount3;
				4731
				4732	sbi->s_stripe = ext4_get_stripe_size(sbi);
				4733	sbi->s_extent_max_zeroout_kb = 32;
				4734
				4735	/*
				4736	* set up enough so that it can read an inode
				4737	*/
				4738	sb->s_op = &ext4_sops;
				4739	sb->s_export_op = &ext4_export_ops;
				4740	sb->s_xattr = ext4_xattr_handlers;
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	4741	#ifdef CONFIG_FS_ENCRYPTION
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	4742	sb->s_cop = &ext4_cryptops;
				4743	#endif
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	4744	#ifdef CONFIG_FS_VERITY
				4745	sb->s_vop = &ext4_verityops;
				4746	#endif
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	4747	#ifdef CONFIG_QUOTA
				4748	sb->dq_op = &ext4_quota_operations;
				4749	if (ext4_has_feature_quota(sb))
				4750	sb->s_qcop = &dquot_quotactl_sysfile_ops;
				4751	else
				4752	sb->s_qcop = &ext4_qctl_operations;
				4753	sb->s_quota_types = QTYPE_MASK_USR \| QTYPE_MASK_GRP \| QTYPE_MASK_PRJ;
				4754	#endif
				4755	memcpy(&sb->s_uuid, es->s_uuid, sizeof(es->s_uuid));
				4756
				4757	INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */
				4758	mutex_init(&sbi->s_orphan_lock);
				4759
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	4760	/* Initialize fast commit stuff */
				4761	atomic_set(&sbi->s_fc_subtid, 0);
				4762	atomic_set(&sbi->s_fc_ineligible_updates, 0);
				4763	INIT_LIST_HEAD(&sbi->s_fc_q[FC_Q_MAIN]);
				4764	INIT_LIST_HEAD(&sbi->s_fc_q[FC_Q_STAGING]);
				4765	INIT_LIST_HEAD(&sbi->s_fc_dentry_q[FC_Q_MAIN]);
				4766	INIT_LIST_HEAD(&sbi->s_fc_dentry_q[FC_Q_STAGING]);
				4767	sbi->s_fc_bytes = 0;
				4768	ext4_clear_mount_flag(sb, EXT4_MF_FC_INELIGIBLE);
				4769	ext4_clear_mount_flag(sb, EXT4_MF_FC_COMMITTING);
				4770	spin_lock_init(&sbi->s_fc_lock);
				4771	memset(&sbi->s_fc_stats, 0, sizeof(sbi->s_fc_stats));
				4772	sbi->s_fc_replay_state.fc_regions = NULL;
				4773	sbi->s_fc_replay_state.fc_regions_size = 0;
				4774	sbi->s_fc_replay_state.fc_regions_used = 0;
				4775	sbi->s_fc_replay_state.fc_regions_valid = 0;
				4776	sbi->s_fc_replay_state.fc_modified_inodes = NULL;
				4777	sbi->s_fc_replay_state.fc_modified_inodes_size = 0;
				4778	sbi->s_fc_replay_state.fc_modified_inodes_used = 0;
				4779
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	4780	sb->s_root = NULL;
				4781
				4782	needs_recovery = (es->s_last_orphan != 0 \|\|
				4783	ext4_has_feature_journal_needs_recovery(sb));
				4784
				4785	if (ext4_has_feature_mmp(sb) && !sb_rdonly(sb))
				4786	if (ext4_multi_mount_protect(sb, le64_to_cpu(es->s_mmp_block)))
				4787	goto failed_mount3a;
				4788
				4789	/*
				4790	* The first inode we look at is the journal inode. Don't try
				4791	* root first: it may be modified in the journal!
				4792	*/
				4793	if (!test_opt(sb, NOLOAD) && ext4_has_feature_journal(sb)) {
				4794	err = ext4_load_journal(sb, es, journal_devnum);
				4795	if (err)
				4796	goto failed_mount3a;
				4797	} else if (test_opt(sb, NOLOAD) && !sb_rdonly(sb) &&
				4798	ext4_has_feature_journal_needs_recovery(sb)) {
				4799	ext4_msg(sb, KERN_ERR, "required journal recovery "
				4800	"suppressed and not mounted read-only");
				4801	goto failed_mount_wq;
				4802	} else {
				4803	/* Nojournal mode, all journal mount options are illegal */
				4804	if (test_opt2(sb, EXPLICIT_JOURNAL_CHECKSUM)) {
				4805	ext4_msg(sb, KERN_ERR, "can't mount with "
				4806	"journal_checksum, fs mounted w/o journal");
				4807	goto failed_mount_wq;
				4808	}
				4809	if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) {
				4810	ext4_msg(sb, KERN_ERR, "can't mount with "
				4811	"journal_async_commit, fs mounted w/o journal");
				4812	goto failed_mount_wq;
				4813	}
				4814	if (sbi->s_commit_interval != JBD2_DEFAULT_MAX_COMMIT_AGE*HZ) {
				4815	ext4_msg(sb, KERN_ERR, "can't mount with "
				4816	"commit=%lu, fs mounted w/o journal",
				4817	sbi->s_commit_interval / HZ);
				4818	goto failed_mount_wq;
				4819	}
				4820	if (EXT4_MOUNT_DATA_FLAGS &
				4821	(sbi->s_mount_opt ^ sbi->s_def_mount_opt)) {
				4822	ext4_msg(sb, KERN_ERR, "can't mount with "
				4823	"data=, fs mounted w/o journal");
				4824	goto failed_mount_wq;
				4825	}
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	4826	sbi->s_def_mount_opt &= ~EXT4_MOUNT_JOURNAL_CHECKSUM;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	4827	clear_opt(sb, JOURNAL_CHECKSUM);
				4828	clear_opt(sb, DATA_FLAGS);
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	4829	clear_opt2(sb, JOURNAL_FAST_COMMIT);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	4830	sbi->s_journal = NULL;
				4831	needs_recovery = 0;
				4832	goto no_journal;
				4833	}
				4834
				4835	if (ext4_has_feature_64bit(sb) &&
				4836	!jbd2_journal_set_features(EXT4_SB(sb)->s_journal, 0, 0,
				4837	JBD2_FEATURE_INCOMPAT_64BIT)) {
				4838	ext4_msg(sb, KERN_ERR, "Failed to set 64-bit journal feature");
				4839	goto failed_mount_wq;
				4840	}
				4841
				4842	if (!set_journal_csum_feature_set(sb)) {
				4843	ext4_msg(sb, KERN_ERR, "Failed to set journal checksum "
				4844	"feature set");
				4845	goto failed_mount_wq;
				4846	}
				4847
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	4848	if (test_opt2(sb, JOURNAL_FAST_COMMIT) &&
				4849	!jbd2_journal_set_features(EXT4_SB(sb)->s_journal, 0, 0,
				4850	JBD2_FEATURE_INCOMPAT_FAST_COMMIT)) {
				4851	ext4_msg(sb, KERN_ERR,
				4852	"Failed to set fast commit journal feature");
				4853	goto failed_mount_wq;
				4854	}
				4855
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	4856	/* We have now updated the journal if required, so we can
				4857	* validate the data journaling mode. */
				4858	switch (test_opt(sb, DATA_FLAGS)) {
				4859	case 0:
				4860	/* No mode set, assume a default based on the journal
				4861	* capabilities: ORDERED_DATA if the journal can
				4862	* cope, else JOURNAL_DATA
				4863	*/
				4864	if (jbd2_journal_check_available_features
				4865	(sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE)) {
				4866	set_opt(sb, ORDERED_DATA);
				4867	sbi->s_def_mount_opt \|= EXT4_MOUNT_ORDERED_DATA;
				4868	} else {
				4869	set_opt(sb, JOURNAL_DATA);
				4870	sbi->s_def_mount_opt \|= EXT4_MOUNT_JOURNAL_DATA;
				4871	}
				4872	break;
				4873
				4874	case EXT4_MOUNT_ORDERED_DATA:
				4875	case EXT4_MOUNT_WRITEBACK_DATA:
				4876	if (!jbd2_journal_check_available_features
				4877	(sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE)) {
				4878	ext4_msg(sb, KERN_ERR, "Journal does not support "
				4879	"requested data journaling mode");
				4880	goto failed_mount_wq;
				4881	}
				4882	default:
				4883	break;
				4884	}
				4885
				4886	if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA &&
				4887	test_opt(sb, JOURNAL_ASYNC_COMMIT)) {
				4888	ext4_msg(sb, KERN_ERR, "can't mount with "
				4889	"journal_async_commit in data=ordered mode");
				4890	goto failed_mount_wq;
				4891	}
				4892
				4893	set_task_ioprio(sbi->s_journal->j_task, journal_ioprio);
				4894
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	4895	sbi->s_journal->j_submit_inode_data_buffers =
				4896	ext4_journal_submit_inode_data_buffers;
				4897	sbi->s_journal->j_finish_inode_data_buffers =
				4898	ext4_journal_finish_inode_data_buffers;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	4899
				4900	no_journal:
				4901	if (!test_opt(sb, NO_MBCACHE)) {
				4902	sbi->s_ea_block_cache = ext4_xattr_create_cache();
				4903	if (!sbi->s_ea_block_cache) {
				4904	ext4_msg(sb, KERN_ERR,
				4905	"Failed to create ea_block_cache");
				4906	goto failed_mount_wq;
				4907	}
				4908
				4909	if (ext4_has_feature_ea_inode(sb)) {
				4910	sbi->s_ea_inode_cache = ext4_xattr_create_cache();
				4911	if (!sbi->s_ea_inode_cache) {
				4912	ext4_msg(sb, KERN_ERR,
				4913	"Failed to create ea_inode_cache");
				4914	goto failed_mount_wq;
				4915	}
				4916	}
				4917	}
				4918
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	4919	if (ext4_has_feature_verity(sb) && blocksize != PAGE_SIZE) {
				4920	ext4_msg(sb, KERN_ERR, "Unsupported blocksize for fs-verity");
				4921	goto failed_mount_wq;
				4922	}
				4923
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	4924	if (DUMMY_ENCRYPTION_ENABLED(sbi) && !sb_rdonly(sb) &&
				4925	!ext4_has_feature_encrypt(sb)) {
				4926	ext4_set_feature_encrypt(sb);
				4927	ext4_commit_super(sb, 1);
				4928	}
				4929
				4930	/*
				4931	* Get the # of file system overhead blocks from the
				4932	* superblock if present.
				4933	*/
				4934	if (es->s_overhead_clusters)
				4935	sbi->s_overhead = le32_to_cpu(es->s_overhead_clusters);
				4936	else {
				4937	err = ext4_calculate_overhead(sb);
				4938	if (err)
				4939	goto failed_mount_wq;
				4940	}
				4941
				4942	/*
				4943	* The maximum number of concurrent works can be high and
				4944	* concurrency isn't really necessary. Limit it to 1.
				4945	*/
				4946	EXT4_SB(sb)->rsv_conversion_wq =
				4947	alloc_workqueue("ext4-rsv-conversion", WQ_MEM_RECLAIM \| WQ_UNBOUND, 1);
				4948	if (!EXT4_SB(sb)->rsv_conversion_wq) {
				4949	printk(KERN_ERR "EXT4-fs: failed to create workqueue\n");
				4950	ret = -ENOMEM;
				4951	goto failed_mount4;
				4952	}
				4953
				4954	/*
				4955	* The jbd2_journal_load will have done any necessary log recovery,
				4956	* so we can safely mount the rest of the filesystem now.
				4957	*/
				4958
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	4959	root = ext4_iget(sb, EXT4_ROOT_INO, EXT4_IGET_SPECIAL);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	4960	if (IS_ERR(root)) {
				4961	ext4_msg(sb, KERN_ERR, "get root inode failed");
				4962	ret = PTR_ERR(root);
				4963	root = NULL;
				4964	goto failed_mount4;
				4965	}
				4966	if (!S_ISDIR(root->i_mode) \|\| !root->i_blocks \|\| !root->i_size) {
				4967	ext4_msg(sb, KERN_ERR, "corrupt root inode, run e2fsck");
				4968	iput(root);
				4969	goto failed_mount4;
				4970	}
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	4971
				4972	#ifdef CONFIG_UNICODE
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	4973	if (sb->s_encoding)
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	4974	sb->s_d_op = &ext4_dentry_ops;
				4975	#endif
				4976
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	4977	sb->s_root = d_make_root(root);
				4978	if (!sb->s_root) {
				4979	ext4_msg(sb, KERN_ERR, "get root dentry failed");
				4980	ret = -ENOMEM;
				4981	goto failed_mount4;
				4982	}
				4983
				4984	ret = ext4_setup_super(sb, es, sb_rdonly(sb));
				4985	if (ret == -EROFS) {
				4986	sb->s_flags \|= SB_RDONLY;
				4987	ret = 0;
				4988	} else if (ret)
				4989	goto failed_mount4a;
				4990
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	4991	ext4_set_resv_clusters(sb);
				4992
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame]	4993	if (test_opt(sb, BLOCK_VALIDITY)) {
				4994	err = ext4_setup_system_zone(sb);
				4995	if (err) {
				4996	ext4_msg(sb, KERN_ERR, "failed to initialize system "
				4997	"zone (%d)", err);
				4998	goto failed_mount4a;
				4999	}
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	5000	}
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	5001	ext4_fc_replay_cleanup(sb);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	5002
				5003	ext4_ext_init(sb);
				5004	err = ext4_mb_init(sb);
				5005	if (err) {
				5006	ext4_msg(sb, KERN_ERR, "failed to initialize mballoc (%d)",
				5007	err);
				5008	goto failed_mount5;
				5009	}
				5010
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	5011	/*
				5012	* We can only set up the journal commit callback once
				5013	* mballoc is initialized
				5014	*/
				5015	if (sbi->s_journal)
				5016	sbi->s_journal->j_commit_callback =
				5017	ext4_journal_commit_callback;
				5018
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	5019	block = ext4_count_free_clusters(sb);
				5020	ext4_free_blocks_count_set(sbi->s_es,
				5021	EXT4_C2B(sbi, block));
				5022	ext4_superblock_csum_set(sb);
				5023	err = percpu_counter_init(&sbi->s_freeclusters_counter, block,
				5024	GFP_KERNEL);
				5025	if (!err) {
				5026	unsigned long freei = ext4_count_free_inodes(sb);
				5027	sbi->s_es->s_free_inodes_count = cpu_to_le32(freei);
				5028	ext4_superblock_csum_set(sb);
				5029	err = percpu_counter_init(&sbi->s_freeinodes_counter, freei,
				5030	GFP_KERNEL);
				5031	}
				5032	if (!err)
				5033	err = percpu_counter_init(&sbi->s_dirs_counter,
				5034	ext4_count_dirs(sb), GFP_KERNEL);
				5035	if (!err)
				5036	err = percpu_counter_init(&sbi->s_dirtyclusters_counter, 0,
				5037	GFP_KERNEL);
				5038	if (!err)
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame]	5039	err = percpu_counter_init(&sbi->s_sra_exceeded_retry_limit, 0,
				5040	GFP_KERNEL);
				5041	if (!err)
				5042	err = percpu_init_rwsem(&sbi->s_writepages_rwsem);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	5043
				5044	if (err) {
				5045	ext4_msg(sb, KERN_ERR, "insufficient memory");
				5046	goto failed_mount6;
				5047	}
				5048
				5049	if (ext4_has_feature_flex_bg(sb))
				5050	if (!ext4_fill_flex_info(sb)) {
				5051	ext4_msg(sb, KERN_ERR,
				5052	"unable to initialize "
				5053	"flex_bg meta info!");
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame]	5054	ret = -ENOMEM;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	5055	goto failed_mount6;
				5056	}
				5057
				5058	err = ext4_register_li_request(sb, first_not_zeroed);
				5059	if (err)
				5060	goto failed_mount6;
				5061
				5062	err = ext4_register_sysfs(sb);
				5063	if (err)
				5064	goto failed_mount7;
				5065
				5066	#ifdef CONFIG_QUOTA
				5067	/* Enable quota usage during mount. */
				5068	if (ext4_has_feature_quota(sb) && !sb_rdonly(sb)) {
				5069	err = ext4_enable_quotas(sb);
				5070	if (err)
				5071	goto failed_mount8;
				5072	}
				5073	#endif /* CONFIG_QUOTA */
				5074
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	5075	/*
				5076	* Save the original bdev mapping's wb_err value which could be
				5077	* used to detect the metadata async write error.
				5078	*/
				5079	spin_lock_init(&sbi->s_bdev_wb_lock);
				5080	errseq_check_and_advance(&sb->s_bdev->bd_inode->i_mapping->wb_err,
				5081	&sbi->s_bdev_wb_err);
				5082	sb->s_bdev->bd_super = sb;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	5083	EXT4_SB(sb)->s_mount_state \|= EXT4_ORPHAN_FS;
				5084	ext4_orphan_cleanup(sb, es);
				5085	EXT4_SB(sb)->s_mount_state &= ~EXT4_ORPHAN_FS;
				5086	if (needs_recovery) {
				5087	ext4_msg(sb, KERN_INFO, "recovery complete");
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame]	5088	err = ext4_mark_recovery_complete(sb, es);
				5089	if (err)
				5090	goto failed_mount8;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	5091	}
				5092	if (EXT4_SB(sb)->s_journal) {
				5093	if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)
				5094	descr = " journalled data mode";
				5095	else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA)
				5096	descr = " ordered data mode";
				5097	else
				5098	descr = " writeback data mode";
				5099	} else
				5100	descr = "out journal";
				5101
				5102	if (test_opt(sb, DISCARD)) {
				5103	struct request_queue *q = bdev_get_queue(sb->s_bdev);
				5104	if (!blk_queue_discard(q))
				5105	ext4_msg(sb, KERN_WARNING,
				5106	"mounting with \"discard\" option, but "
				5107	"the device does not support discard");
				5108	}
				5109
				5110	if (___ratelimit(&ext4_mount_msg_ratelimit, "EXT4-fs mount"))
				5111	ext4_msg(sb, KERN_INFO, "mounted filesystem with%s. "
				5112	"Opts: %.*s%s%s", descr,
				5113	(int) sizeof(sbi->s_es->s_mount_opts),
				5114	sbi->s_es->s_mount_opts,
				5115	*sbi->s_es->s_mount_opts ? "; " : "", orig_data);
				5116
				5117	if (es->s_error_count)
				5118	mod_timer(&sbi->s_err_report, jiffies + 300HZ); / 5 minutes */
				5119
				5120	/* Enable message ratelimiting. Default is 10 messages per 5 secs. */
				5121	ratelimit_state_init(&sbi->s_err_ratelimit_state, 5 * HZ, 10);
				5122	ratelimit_state_init(&sbi->s_warning_ratelimit_state, 5 * HZ, 10);
				5123	ratelimit_state_init(&sbi->s_msg_ratelimit_state, 5 * HZ, 10);
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	5124	atomic_set(&sbi->s_warning_count, 0);
				5125	atomic_set(&sbi->s_msg_count, 0);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	5126
				5127	kfree(orig_data);
				5128	return 0;
				5129
				5130	cantfind_ext4:
				5131	if (!silent)
				5132	ext4_msg(sb, KERN_ERR, "VFS: Can't find ext4 filesystem");
				5133	goto failed_mount;
				5134
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	5135	failed_mount8:
				5136	ext4_unregister_sysfs(sb);
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame]	5137	kobject_put(&sbi->s_kobj);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	5138	failed_mount7:
				5139	ext4_unregister_li_request(sb);
				5140	failed_mount6:
				5141	ext4_mb_release(sb);
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame]	5142	rcu_read_lock();
				5143	flex_groups = rcu_dereference(sbi->s_flex_groups);
				5144	if (flex_groups) {
				5145	for (i = 0; i < sbi->s_flex_groups_allocated; i++)
				5146	kvfree(flex_groups[i]);
				5147	kvfree(flex_groups);
				5148	}
				5149	rcu_read_unlock();
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	5150	percpu_counter_destroy(&sbi->s_freeclusters_counter);
				5151	percpu_counter_destroy(&sbi->s_freeinodes_counter);
				5152	percpu_counter_destroy(&sbi->s_dirs_counter);
				5153	percpu_counter_destroy(&sbi->s_dirtyclusters_counter);
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame]	5154	percpu_counter_destroy(&sbi->s_sra_exceeded_retry_limit);
				5155	percpu_free_rwsem(&sbi->s_writepages_rwsem);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	5156	failed_mount5:
				5157	ext4_ext_release(sb);
				5158	ext4_release_system_zone(sb);
				5159	failed_mount4a:
				5160	dput(sb->s_root);
				5161	sb->s_root = NULL;
				5162	failed_mount4:
				5163	ext4_msg(sb, KERN_ERR, "mount failed");
				5164	if (EXT4_SB(sb)->rsv_conversion_wq)
				5165	destroy_workqueue(EXT4_SB(sb)->rsv_conversion_wq);
				5166	failed_mount_wq:
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	5167	ext4_xattr_destroy_cache(sbi->s_ea_inode_cache);
				5168	sbi->s_ea_inode_cache = NULL;
				5169
				5170	ext4_xattr_destroy_cache(sbi->s_ea_block_cache);
				5171	sbi->s_ea_block_cache = NULL;
				5172
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	5173	if (sbi->s_journal) {
				5174	jbd2_journal_destroy(sbi->s_journal);
				5175	sbi->s_journal = NULL;
				5176	}
				5177	failed_mount3a:
				5178	ext4_es_unregister_shrinker(sbi);
				5179	failed_mount3:
				5180	del_timer_sync(&sbi->s_err_report);
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	5181	ext4_stop_mmpd(sbi);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	5182	failed_mount2:
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame]	5183	rcu_read_lock();
				5184	group_desc = rcu_dereference(sbi->s_group_desc);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	5185	for (i = 0; i < db_count; i++)
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame]	5186	brelse(group_desc[i]);
				5187	kvfree(group_desc);
				5188	rcu_read_unlock();
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	5189	failed_mount:
				5190	if (sbi->s_chksum_driver)
				5191	crypto_free_shash(sbi->s_chksum_driver);
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	5192
				5193	#ifdef CONFIG_UNICODE
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	5194	utf8_unload(sb->s_encoding);
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	5195	#endif
				5196
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	5197	#ifdef CONFIG_QUOTA
				5198	for (i = 0; i < EXT4_MAXQUOTAS; i++)
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	5199	kfree(get_qf_name(sb, sbi, i));
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	5200	#endif
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	5201	fscrypt_free_dummy_policy(&sbi->s_dummy_enc_policy);
				5202	/* ext4_blkdev_remove() calls kill_bdev(), release bh before it. */
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	5203	brelse(bh);
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	5204	ext4_blkdev_remove(sbi);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	5205	out_fail:
				5206	sb->s_fs_info = NULL;
				5207	kfree(sbi->s_blockgroup_lock);
				5208	out_free_base:
				5209	kfree(sbi);
				5210	kfree(orig_data);
				5211	fs_put_dax(dax_dev);
				5212	return err ? err : ret;
				5213	}
				5214
				5215	/*
				5216	* Setup any per-fs journal parameters now. We'll do this both on
				5217	* initial mount, once the journal has been initialised but before we've
				5218	* done any recovery; and again on any subsequent remount.
				5219	*/
				5220	static void ext4_init_journal_params(struct super_block sb, journal_t journal)
				5221	{
				5222	struct ext4_sb_info *sbi = EXT4_SB(sb);
				5223
				5224	journal->j_commit_interval = sbi->s_commit_interval;
				5225	journal->j_min_batch_time = sbi->s_min_batch_time;
				5226	journal->j_max_batch_time = sbi->s_max_batch_time;
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	5227	ext4_fc_init(sb, journal);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	5228
				5229	write_lock(&journal->j_state_lock);
				5230	if (test_opt(sb, BARRIER))
				5231	journal->j_flags \|= JBD2_BARRIER;
				5232	else
				5233	journal->j_flags &= ~JBD2_BARRIER;
				5234	if (test_opt(sb, DATA_ERR_ABORT))
				5235	journal->j_flags \|= JBD2_ABORT_ON_SYNCDATA_ERR;
				5236	else
				5237	journal->j_flags &= ~JBD2_ABORT_ON_SYNCDATA_ERR;
				5238	write_unlock(&journal->j_state_lock);
				5239	}
				5240
				5241	static struct inode ext4_get_journal_inode(struct super_block sb,
				5242	unsigned int journal_inum)
				5243	{
				5244	struct inode *journal_inode;
				5245
				5246	/*
				5247	* Test for the existence of a valid inode on disk. Bad things
				5248	* happen if we iget() an unused inode, as the subsequent iput()
				5249	* will try to delete it.
				5250	*/
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	5251	journal_inode = ext4_iget(sb, journal_inum, EXT4_IGET_SPECIAL);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	5252	if (IS_ERR(journal_inode)) {
				5253	ext4_msg(sb, KERN_ERR, "no journal found");
				5254	return NULL;
				5255	}
				5256	if (!journal_inode->i_nlink) {
				5257	make_bad_inode(journal_inode);
				5258	iput(journal_inode);
				5259	ext4_msg(sb, KERN_ERR, "journal inode is deleted");
				5260	return NULL;
				5261	}
				5262
				5263	jbd_debug(2, "Journal inode found at %p: %lld bytes\n",
				5264	journal_inode, journal_inode->i_size);
				5265	if (!S_ISREG(journal_inode->i_mode)) {
				5266	ext4_msg(sb, KERN_ERR, "invalid journal inode");
				5267	iput(journal_inode);
				5268	return NULL;
				5269	}
				5270	return journal_inode;
				5271	}
				5272
				5273	static journal_t ext4_get_journal(struct super_block sb,
				5274	unsigned int journal_inum)
				5275	{
				5276	struct inode *journal_inode;
				5277	journal_t *journal;
				5278
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame]	5279	if (WARN_ON_ONCE(!ext4_has_feature_journal(sb)))
				5280	return NULL;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	5281
				5282	journal_inode = ext4_get_journal_inode(sb, journal_inum);
				5283	if (!journal_inode)
				5284	return NULL;
				5285
				5286	journal = jbd2_journal_init_inode(journal_inode);
				5287	if (!journal) {
				5288	ext4_msg(sb, KERN_ERR, "Could not load journal inode");
				5289	iput(journal_inode);
				5290	return NULL;
				5291	}
				5292	journal->j_private = sb;
				5293	ext4_init_journal_params(sb, journal);
				5294	return journal;
				5295	}
				5296
				5297	static journal_t ext4_get_dev_journal(struct super_block sb,
				5298	dev_t j_dev)
				5299	{
				5300	struct buffer_head *bh;
				5301	journal_t *journal;
				5302	ext4_fsblk_t start;
				5303	ext4_fsblk_t len;
				5304	int hblock, blocksize;
				5305	ext4_fsblk_t sb_block;
				5306	unsigned long offset;
				5307	struct ext4_super_block *es;
				5308	struct block_device *bdev;
				5309
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame]	5310	if (WARN_ON_ONCE(!ext4_has_feature_journal(sb)))
				5311	return NULL;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	5312
				5313	bdev = ext4_blkdev_get(j_dev, sb);
				5314	if (bdev == NULL)
				5315	return NULL;
				5316
				5317	blocksize = sb->s_blocksize;
				5318	hblock = bdev_logical_block_size(bdev);
				5319	if (blocksize < hblock) {
				5320	ext4_msg(sb, KERN_ERR,
				5321	"blocksize too small for journal device");
				5322	goto out_bdev;
				5323	}
				5324
				5325	sb_block = EXT4_MIN_BLOCK_SIZE / blocksize;
				5326	offset = EXT4_MIN_BLOCK_SIZE % blocksize;
				5327	set_blocksize(bdev, blocksize);
				5328	if (!(bh = __bread(bdev, sb_block, blocksize))) {
				5329	ext4_msg(sb, KERN_ERR, "couldn't read superblock of "
				5330	"external journal");
				5331	goto out_bdev;
				5332	}
				5333
				5334	es = (struct ext4_super_block *) (bh->b_data + offset);
				5335	if ((le16_to_cpu(es->s_magic) != EXT4_SUPER_MAGIC) \|\|
				5336	!(le32_to_cpu(es->s_feature_incompat) &
				5337	EXT4_FEATURE_INCOMPAT_JOURNAL_DEV)) {
				5338	ext4_msg(sb, KERN_ERR, "external journal has "
				5339	"bad superblock");
				5340	brelse(bh);
				5341	goto out_bdev;
				5342	}
				5343
				5344	if ((le32_to_cpu(es->s_feature_ro_compat) &
				5345	EXT4_FEATURE_RO_COMPAT_METADATA_CSUM) &&
				5346	es->s_checksum != ext4_superblock_csum(sb, es)) {
				5347	ext4_msg(sb, KERN_ERR, "external journal has "
				5348	"corrupt superblock");
				5349	brelse(bh);
				5350	goto out_bdev;
				5351	}
				5352
				5353	if (memcmp(EXT4_SB(sb)->s_es->s_journal_uuid, es->s_uuid, 16)) {
				5354	ext4_msg(sb, KERN_ERR, "journal UUID does not match");
				5355	brelse(bh);
				5356	goto out_bdev;
				5357	}
				5358
				5359	len = ext4_blocks_count(es);
				5360	start = sb_block + 1;
				5361	brelse(bh); /* we're done with the superblock */
				5362
				5363	journal = jbd2_journal_init_dev(bdev, sb->s_bdev,
				5364	start, len, blocksize);
				5365	if (!journal) {
				5366	ext4_msg(sb, KERN_ERR, "failed to create device journal");
				5367	goto out_bdev;
				5368	}
				5369	journal->j_private = sb;
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	5370	if (ext4_read_bh_lock(journal->j_sb_buffer, REQ_META \| REQ_PRIO, true)) {
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	5371	ext4_msg(sb, KERN_ERR, "I/O error on journal device");
				5372	goto out_journal;
				5373	}
				5374	if (be32_to_cpu(journal->j_superblock->s_nr_users) != 1) {
				5375	ext4_msg(sb, KERN_ERR, "External journal has more than one "
				5376	"user (unsupported) - %d",
				5377	be32_to_cpu(journal->j_superblock->s_nr_users));
				5378	goto out_journal;
				5379	}
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	5380	EXT4_SB(sb)->s_journal_bdev = bdev;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	5381	ext4_init_journal_params(sb, journal);
				5382	return journal;
				5383
				5384	out_journal:
				5385	jbd2_journal_destroy(journal);
				5386	out_bdev:
				5387	ext4_blkdev_put(bdev);
				5388	return NULL;
				5389	}
				5390
				5391	static int ext4_load_journal(struct super_block *sb,
				5392	struct ext4_super_block *es,
				5393	unsigned long journal_devnum)
				5394	{
				5395	journal_t *journal;
				5396	unsigned int journal_inum = le32_to_cpu(es->s_journal_inum);
				5397	dev_t journal_dev;
				5398	int err = 0;
				5399	int really_read_only;
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame]	5400	int journal_dev_ro;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	5401
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame]	5402	if (WARN_ON_ONCE(!ext4_has_feature_journal(sb)))
				5403	return -EFSCORRUPTED;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	5404
				5405	if (journal_devnum &&
				5406	journal_devnum != le32_to_cpu(es->s_journal_dev)) {
				5407	ext4_msg(sb, KERN_INFO, "external journal device major/minor "
				5408	"numbers have changed");
				5409	journal_dev = new_decode_dev(journal_devnum);
				5410	} else
				5411	journal_dev = new_decode_dev(le32_to_cpu(es->s_journal_dev));
				5412
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame]	5413	if (journal_inum && journal_dev) {
				5414	ext4_msg(sb, KERN_ERR,
				5415	"filesystem has both journal inode and journal device!");
				5416	return -EINVAL;
				5417	}
				5418
				5419	if (journal_inum) {
				5420	journal = ext4_get_journal(sb, journal_inum);
				5421	if (!journal)
				5422	return -EINVAL;
				5423	} else {
				5424	journal = ext4_get_dev_journal(sb, journal_dev);
				5425	if (!journal)
				5426	return -EINVAL;
				5427	}
				5428
				5429	journal_dev_ro = bdev_read_only(journal->j_dev);
				5430	really_read_only = bdev_read_only(sb->s_bdev) \| journal_dev_ro;
				5431
				5432	if (journal_dev_ro && !sb_rdonly(sb)) {
				5433	ext4_msg(sb, KERN_ERR,
				5434	"journal device read-only, try mounting with '-o ro'");
				5435	err = -EROFS;
				5436	goto err_out;
				5437	}
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	5438
				5439	/*
				5440	* Are we loading a blank journal or performing recovery after a
				5441	* crash? For recovery, we need to check in advance whether we
				5442	* can get read-write access to the device.
				5443	*/
				5444	if (ext4_has_feature_journal_needs_recovery(sb)) {
				5445	if (sb_rdonly(sb)) {
				5446	ext4_msg(sb, KERN_INFO, "INFO: recovery "
				5447	"required on readonly filesystem");
				5448	if (really_read_only) {
				5449	ext4_msg(sb, KERN_ERR, "write access "
				5450	"unavailable, cannot proceed "
				5451	"(try mounting with noload)");
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame]	5452	err = -EROFS;
				5453	goto err_out;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	5454	}
				5455	ext4_msg(sb, KERN_INFO, "write access will "
				5456	"be enabled during recovery");
				5457	}
				5458	}
				5459
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	5460	if (!(journal->j_flags & JBD2_BARRIER))
				5461	ext4_msg(sb, KERN_INFO, "barriers disabled");
				5462
				5463	if (!ext4_has_feature_journal_needs_recovery(sb))
				5464	err = jbd2_journal_wipe(journal, !really_read_only);
				5465	if (!err) {
				5466	char *save = kmalloc(EXT4_S_ERR_LEN, GFP_KERNEL);
				5467	if (save)
				5468	memcpy(save, ((char *) es) +
				5469	EXT4_S_ERR_START, EXT4_S_ERR_LEN);
				5470	err = jbd2_journal_load(journal);
				5471	if (save)
				5472	memcpy(((char *) es) + EXT4_S_ERR_START,
				5473	save, EXT4_S_ERR_LEN);
				5474	kfree(save);
				5475	}
				5476
				5477	if (err) {
				5478	ext4_msg(sb, KERN_ERR, "error loading journal");
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame]	5479	goto err_out;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	5480	}
				5481
				5482	EXT4_SB(sb)->s_journal = journal;
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame]	5483	err = ext4_clear_journal_err(sb, es);
				5484	if (err) {
				5485	EXT4_SB(sb)->s_journal = NULL;
				5486	jbd2_journal_destroy(journal);
				5487	return err;
				5488	}
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	5489
				5490	if (!really_read_only && journal_devnum &&
				5491	journal_devnum != le32_to_cpu(es->s_journal_dev)) {
				5492	es->s_journal_dev = cpu_to_le32(journal_devnum);
				5493
				5494	/* Make sure we flush the recovery flag to disk. */
				5495	ext4_commit_super(sb, 1);
				5496	}
				5497
				5498	return 0;
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame]	5499
				5500	err_out:
				5501	jbd2_journal_destroy(journal);
				5502	return err;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	5503	}
				5504
				5505	static int ext4_commit_super(struct super_block *sb, int sync)
				5506	{
				5507	struct ext4_super_block *es = EXT4_SB(sb)->s_es;
				5508	struct buffer_head *sbh = EXT4_SB(sb)->s_sbh;
				5509	int error = 0;
				5510
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame]	5511	if (!sbh)
				5512	return -EINVAL;
				5513	if (block_device_ejected(sb))
				5514	return -ENODEV;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	5515
				5516	/*
				5517	* If the file system is mounted read-only, don't update the
				5518	* superblock write time. This avoids updating the superblock
				5519	* write time when we are mounting the root file system
				5520	* read/only but we need to replay the journal; at that point,
				5521	* for people who are east of GMT and who make their clock
				5522	* tick in localtime for Windows bug-for-bug compatibility,
				5523	* the clock is set in the future, and this will cause e2fsck
				5524	* to complain and force a full file system check.
				5525	*/
				5526	if (!(sb->s_flags & SB_RDONLY))
				5527	ext4_update_tstamp(es, s_wtime);
				5528	if (sb->s_bdev->bd_part)
				5529	es->s_kbytes_written =
				5530	cpu_to_le64(EXT4_SB(sb)->s_kbytes_written +
				5531	((part_stat_read(sb->s_bdev->bd_part,
				5532	sectors[STAT_WRITE]) -
				5533	EXT4_SB(sb)->s_sectors_written_start) >> 1));
				5534	else
				5535	es->s_kbytes_written =
				5536	cpu_to_le64(EXT4_SB(sb)->s_kbytes_written);
				5537	if (percpu_counter_initialized(&EXT4_SB(sb)->s_freeclusters_counter))
				5538	ext4_free_blocks_count_set(es,
				5539	EXT4_C2B(EXT4_SB(sb), percpu_counter_sum_positive(
				5540	&EXT4_SB(sb)->s_freeclusters_counter)));
				5541	if (percpu_counter_initialized(&EXT4_SB(sb)->s_freeinodes_counter))
				5542	es->s_free_inodes_count =
				5543	cpu_to_le32(percpu_counter_sum_positive(
				5544	&EXT4_SB(sb)->s_freeinodes_counter));
				5545	BUFFER_TRACE(sbh, "marking dirty");
				5546	ext4_superblock_csum_set(sb);
				5547	if (sync)
				5548	lock_buffer(sbh);
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	5549	if (buffer_write_io_error(sbh) \|\| !buffer_uptodate(sbh)) {
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	5550	/*
				5551	* Oh, dear. A previous attempt to write the
				5552	* superblock failed. This could happen because the
				5553	* USB device was yanked out. Or it could happen to
				5554	* be a transient write error and maybe the block will
				5555	* be remapped. Nothing we can do but to retry the
				5556	* write and hope for the best.
				5557	*/
				5558	ext4_msg(sb, KERN_ERR, "previous I/O error to "
				5559	"superblock detected");
				5560	clear_buffer_write_io_error(sbh);
				5561	set_buffer_uptodate(sbh);
				5562	}
				5563	mark_buffer_dirty(sbh);
				5564	if (sync) {
				5565	unlock_buffer(sbh);
				5566	error = __sync_dirty_buffer(sbh,
				5567	REQ_SYNC \| (test_opt(sb, BARRIER) ? REQ_FUA : 0));
				5568	if (buffer_write_io_error(sbh)) {
				5569	ext4_msg(sb, KERN_ERR, "I/O error while writing "
				5570	"superblock");
				5571	clear_buffer_write_io_error(sbh);
				5572	set_buffer_uptodate(sbh);
				5573	}
				5574	}
				5575	return error;
				5576	}
				5577
				5578	/*
				5579	* Have we just finished recovery? If so, and if we are mounting (or
				5580	* remounting) the filesystem readonly, then we will end up with a
				5581	* consistent fs on disk. Record that fact.
				5582	*/
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame]	5583	static int ext4_mark_recovery_complete(struct super_block *sb,
				5584	struct ext4_super_block *es)
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	5585	{
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame]	5586	int err;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	5587	journal_t *journal = EXT4_SB(sb)->s_journal;
				5588
				5589	if (!ext4_has_feature_journal(sb)) {
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame]	5590	if (journal != NULL) {
				5591	ext4_error(sb, "Journal got removed while the fs was "
				5592	"mounted!");
				5593	return -EFSCORRUPTED;
				5594	}
				5595	return 0;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	5596	}
				5597	jbd2_journal_lock_updates(journal);
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame]	5598	err = jbd2_journal_flush(journal);
				5599	if (err < 0)
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	5600	goto out;
				5601
				5602	if (ext4_has_feature_journal_needs_recovery(sb) && sb_rdonly(sb)) {
				5603	ext4_clear_feature_journal_needs_recovery(sb);
				5604	ext4_commit_super(sb, 1);
				5605	}
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	5606	out:
				5607	jbd2_journal_unlock_updates(journal);
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame]	5608	return err;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	5609	}
				5610
				5611	/*
				5612	* If we are mounting (or read-write remounting) a filesystem whose journal
				5613	* has recorded an error from a previous lifetime, move that error to the
				5614	* main filesystem now.
				5615	*/
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame]	5616	static int ext4_clear_journal_err(struct super_block *sb,
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	5617	struct ext4_super_block *es)
				5618	{
				5619	journal_t *journal;
				5620	int j_errno;
				5621	const char *errstr;
				5622
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame]	5623	if (!ext4_has_feature_journal(sb)) {
				5624	ext4_error(sb, "Journal got removed while the fs was mounted!");
				5625	return -EFSCORRUPTED;
				5626	}
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	5627
				5628	journal = EXT4_SB(sb)->s_journal;
				5629
				5630	/*
				5631	* Now check for any error status which may have been recorded in the
				5632	* journal by a prior ext4_error() or ext4_abort()
				5633	*/
				5634
				5635	j_errno = jbd2_journal_errno(journal);
				5636	if (j_errno) {
				5637	char nbuf[16];
				5638
				5639	errstr = ext4_decode_error(sb, j_errno, nbuf);
				5640	ext4_warning(sb, "Filesystem error recorded "
				5641	"from previous mount: %s", errstr);
				5642	ext4_warning(sb, "Marking fs in need of filesystem check.");
				5643
				5644	EXT4_SB(sb)->s_mount_state \|= EXT4_ERROR_FS;
				5645	es->s_state \|= cpu_to_le16(EXT4_ERROR_FS);
				5646	ext4_commit_super(sb, 1);
				5647
				5648	jbd2_journal_clear_err(journal);
				5649	jbd2_journal_update_sb_errno(journal);
				5650	}
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame]	5651	return 0;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	5652	}
				5653
				5654	/*
				5655	* Force the running and committing transactions to commit,
				5656	* and wait on the commit.
				5657	*/
				5658	int ext4_force_commit(struct super_block *sb)
				5659	{
				5660	journal_t *journal;
				5661
				5662	if (sb_rdonly(sb))
				5663	return 0;
				5664
				5665	journal = EXT4_SB(sb)->s_journal;
				5666	return ext4_journal_force_commit(journal);
				5667	}
				5668
				5669	static int ext4_sync_fs(struct super_block *sb, int wait)
				5670	{
				5671	int ret = 0;
				5672	tid_t target;
				5673	bool needs_barrier = false;
				5674	struct ext4_sb_info *sbi = EXT4_SB(sb);
				5675
				5676	if (unlikely(ext4_forced_shutdown(sbi)))
				5677	return 0;
				5678
				5679	trace_ext4_sync_fs(sb, wait);
				5680	flush_workqueue(sbi->rsv_conversion_wq);
				5681	/*
				5682	* Writeback quota in non-journalled quota case - journalled quota has
				5683	* no dirty dquots
				5684	*/
				5685	dquot_writeback_dquots(sb, -1);
				5686	/*
				5687	* Data writeback is possible w/o journal transaction, so barrier must
				5688	* being sent at the end of the function. But we can skip it if
				5689	* transaction_commit will do it for us.
				5690	*/
				5691	if (sbi->s_journal) {
				5692	target = jbd2_get_latest_transaction(sbi->s_journal);
				5693	if (wait && sbi->s_journal->j_flags & JBD2_BARRIER &&
				5694	!jbd2_trans_will_send_data_barrier(sbi->s_journal, target))
				5695	needs_barrier = true;
				5696
				5697	if (jbd2_journal_start_commit(sbi->s_journal, &target)) {
				5698	if (wait)
				5699	ret = jbd2_log_wait_commit(sbi->s_journal,
				5700	target);
				5701	}
				5702	} else if (wait && test_opt(sb, BARRIER))
				5703	needs_barrier = true;
				5704	if (needs_barrier) {
				5705	int err;
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	5706	err = blkdev_issue_flush(sb->s_bdev, GFP_KERNEL);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	5707	if (!ret)
				5708	ret = err;
				5709	}
				5710
				5711	return ret;
				5712	}
				5713
				5714	/*
				5715	* LVM calls this function before a (read-only) snapshot is created. This
				5716	* gives us a chance to flush the journal completely and mark the fs clean.
				5717	*
				5718	* Note that only this function cannot bring a filesystem to be in a clean
				5719	* state independently. It relies on upper layer to stop all data & metadata
				5720	* modifications.
				5721	*/
				5722	static int ext4_freeze(struct super_block *sb)
				5723	{
				5724	int error = 0;
				5725	journal_t *journal;
				5726
				5727	if (sb_rdonly(sb))
				5728	return 0;
				5729
				5730	journal = EXT4_SB(sb)->s_journal;
				5731
				5732	if (journal) {
				5733	/* Now we set up the journal barrier. */
				5734	jbd2_journal_lock_updates(journal);
				5735
				5736	/*
				5737	* Don't clear the needs_recovery flag if we failed to
				5738	* flush the journal.
				5739	*/
				5740	error = jbd2_journal_flush(journal);
				5741	if (error < 0)
				5742	goto out;
				5743
				5744	/* Journal blocked and flushed, clear needs_recovery flag. */
				5745	ext4_clear_feature_journal_needs_recovery(sb);
				5746	}
				5747
				5748	error = ext4_commit_super(sb, 1);
				5749	out:
				5750	if (journal)
				5751	/* we rely on upper layer to stop further updates */
				5752	jbd2_journal_unlock_updates(journal);
				5753	return error;
				5754	}
				5755
				5756	/*
				5757	* Called by LVM after the snapshot is done. We need to reset the RECOVER
				5758	* flag here, even though the filesystem is not technically dirty yet.
				5759	*/
				5760	static int ext4_unfreeze(struct super_block *sb)
				5761	{
				5762	if (sb_rdonly(sb) \|\| ext4_forced_shutdown(EXT4_SB(sb)))
				5763	return 0;
				5764
				5765	if (EXT4_SB(sb)->s_journal) {
				5766	/* Reset the needs_recovery flag before the fs is unlocked. */
				5767	ext4_set_feature_journal_needs_recovery(sb);
				5768	}
				5769
				5770	ext4_commit_super(sb, 1);
				5771	return 0;
				5772	}
				5773
				5774	/*
				5775	* Structure to save mount options for ext4_remount's benefit
				5776	*/
				5777	struct ext4_mount_options {
				5778	unsigned long s_mount_opt;
				5779	unsigned long s_mount_opt2;
				5780	kuid_t s_resuid;
				5781	kgid_t s_resgid;
				5782	unsigned long s_commit_interval;
				5783	u32 s_min_batch_time, s_max_batch_time;
				5784	#ifdef CONFIG_QUOTA
				5785	int s_jquota_fmt;
				5786	char *s_qf_names[EXT4_MAXQUOTAS];
				5787	#endif
				5788	};
				5789
				5790	static int ext4_remount(struct super_block sb, int flags, char *data)
				5791	{
				5792	struct ext4_super_block *es;
				5793	struct ext4_sb_info *sbi = EXT4_SB(sb);
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame]	5794	unsigned long old_sb_flags, vfs_flags;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	5795	struct ext4_mount_options old_opts;
				5796	int enable_quota = 0;
				5797	ext4_group_t g;
				5798	unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO;
				5799	int err = 0;
				5800	#ifdef CONFIG_QUOTA
				5801	int i, j;
				5802	char *to_free[EXT4_MAXQUOTAS];
				5803	#endif
				5804	char *orig_data = kstrdup(data, GFP_KERNEL);
				5805
				5806	if (data && !orig_data)
				5807	return -ENOMEM;
				5808
				5809	/* Store the original options */
				5810	old_sb_flags = sb->s_flags;
				5811	old_opts.s_mount_opt = sbi->s_mount_opt;
				5812	old_opts.s_mount_opt2 = sbi->s_mount_opt2;
				5813	old_opts.s_resuid = sbi->s_resuid;
				5814	old_opts.s_resgid = sbi->s_resgid;
				5815	old_opts.s_commit_interval = sbi->s_commit_interval;
				5816	old_opts.s_min_batch_time = sbi->s_min_batch_time;
				5817	old_opts.s_max_batch_time = sbi->s_max_batch_time;
				5818	#ifdef CONFIG_QUOTA
				5819	old_opts.s_jquota_fmt = sbi->s_jquota_fmt;
				5820	for (i = 0; i < EXT4_MAXQUOTAS; i++)
				5821	if (sbi->s_qf_names[i]) {
				5822	char *qf_name = get_qf_name(sb, sbi, i);
				5823
				5824	old_opts.s_qf_names[i] = kstrdup(qf_name, GFP_KERNEL);
				5825	if (!old_opts.s_qf_names[i]) {
				5826	for (j = 0; j < i; j++)
				5827	kfree(old_opts.s_qf_names[j]);
				5828	kfree(orig_data);
				5829	return -ENOMEM;
				5830	}
				5831	} else
				5832	old_opts.s_qf_names[i] = NULL;
				5833	#endif
				5834	if (sbi->s_journal && sbi->s_journal->j_task->io_context)
				5835	journal_ioprio = sbi->s_journal->j_task->io_context->ioprio;
				5836
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame]	5837	/*
				5838	* Some options can be enabled by ext4 and/or by VFS mount flag
				5839	* either way we need to make sure it matches in both *flags and
				5840	* s_flags. Copy those selected flags from *flags to s_flags
				5841	*/
				5842	vfs_flags = SB_LAZYTIME \| SB_I_VERSION;
				5843	sb->s_flags = (sb->s_flags & ~vfs_flags) \| (*flags & vfs_flags);
				5844
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	5845	if (!parse_options(data, sb, NULL, &journal_ioprio, 1)) {
				5846	err = -EINVAL;
				5847	goto restore_opts;
				5848	}
				5849
				5850	if ((old_opts.s_mount_opt & EXT4_MOUNT_JOURNAL_CHECKSUM) ^
				5851	test_opt(sb, JOURNAL_CHECKSUM)) {
				5852	ext4_msg(sb, KERN_ERR, "changing journal_checksum "
				5853	"during remount not supported; ignoring");
				5854	sbi->s_mount_opt ^= EXT4_MOUNT_JOURNAL_CHECKSUM;
				5855	}
				5856
				5857	if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) {
				5858	if (test_opt2(sb, EXPLICIT_DELALLOC)) {
				5859	ext4_msg(sb, KERN_ERR, "can't mount with "
				5860	"both data=journal and delalloc");
				5861	err = -EINVAL;
				5862	goto restore_opts;
				5863	}
				5864	if (test_opt(sb, DIOREAD_NOLOCK)) {
				5865	ext4_msg(sb, KERN_ERR, "can't mount with "
				5866	"both data=journal and dioread_nolock");
				5867	err = -EINVAL;
				5868	goto restore_opts;
				5869	}
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	5870	} else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA) {
				5871	if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) {
				5872	ext4_msg(sb, KERN_ERR, "can't mount with "
				5873	"journal_async_commit in data=ordered mode");
				5874	err = -EINVAL;
				5875	goto restore_opts;
				5876	}
				5877	}
				5878
				5879	if ((sbi->s_mount_opt ^ old_opts.s_mount_opt) & EXT4_MOUNT_NO_MBCACHE) {
				5880	ext4_msg(sb, KERN_ERR, "can't enable nombcache during remount");
				5881	err = -EINVAL;
				5882	goto restore_opts;
				5883	}
				5884
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	5885	if (ext4_test_mount_flag(sb, EXT4_MF_FS_ABORTED))
				5886	ext4_abort(sb, EXT4_ERR_ESHUTDOWN, "Abort forced by user");
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	5887
				5888	sb->s_flags = (sb->s_flags & ~SB_POSIXACL) \|
				5889	(test_opt(sb, POSIX_ACL) ? SB_POSIXACL : 0);
				5890
				5891	es = sbi->s_es;
				5892
				5893	if (sbi->s_journal) {
				5894	ext4_init_journal_params(sb, sbi->s_journal);
				5895	set_task_ioprio(sbi->s_journal->j_task, journal_ioprio);
				5896	}
				5897
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	5898	if ((bool)(*flags & SB_RDONLY) != sb_rdonly(sb)) {
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	5899	if (ext4_test_mount_flag(sb, EXT4_MF_FS_ABORTED)) {
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	5900	err = -EROFS;
				5901	goto restore_opts;
				5902	}
				5903
				5904	if (*flags & SB_RDONLY) {
				5905	err = sync_filesystem(sb);
				5906	if (err < 0)
				5907	goto restore_opts;
				5908	err = dquot_suspend(sb, -1);
				5909	if (err < 0)
				5910	goto restore_opts;
				5911
				5912	/*
				5913	* First of all, the unconditional stuff we have to do
				5914	* to disable replay of the journal when we next remount
				5915	*/
				5916	sb->s_flags \|= SB_RDONLY;
				5917
				5918	/*
				5919	* OK, test if we are remounting a valid rw partition
				5920	* readonly, and if so set the rdonly flag and then
				5921	* mark the partition as valid again.
				5922	*/
				5923	if (!(es->s_state & cpu_to_le16(EXT4_VALID_FS)) &&
				5924	(sbi->s_mount_state & EXT4_VALID_FS))
				5925	es->s_state = cpu_to_le16(sbi->s_mount_state);
				5926
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame]	5927	if (sbi->s_journal) {
				5928	/*
				5929	* We let remount-ro finish even if marking fs
				5930	* as clean failed...
				5931	*/
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	5932	ext4_mark_recovery_complete(sb, es);
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame]	5933	}
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	5934	} else {
				5935	/* Make sure we can mount this feature set readwrite */
				5936	if (ext4_has_feature_readonly(sb) \|\|
				5937	!ext4_feature_set_ok(sb, 0)) {
				5938	err = -EROFS;
				5939	goto restore_opts;
				5940	}
				5941	/*
				5942	* Make sure the group descriptor checksums
				5943	* are sane. If they aren't, refuse to remount r/w.
				5944	*/
				5945	for (g = 0; g < sbi->s_groups_count; g++) {
				5946	struct ext4_group_desc *gdp =
				5947	ext4_get_group_desc(sb, g, NULL);
				5948
				5949	if (!ext4_group_desc_csum_verify(sb, g, gdp)) {
				5950	ext4_msg(sb, KERN_ERR,
				5951	"ext4_remount: Checksum for group %u failed (%u!=%u)",
				5952	g, le16_to_cpu(ext4_group_desc_csum(sb, g, gdp)),
				5953	le16_to_cpu(gdp->bg_checksum));
				5954	err = -EFSBADCRC;
				5955	goto restore_opts;
				5956	}
				5957	}
				5958
				5959	/*
				5960	* If we have an unprocessed orphan list hanging
				5961	* around from a previously readonly bdev mount,
				5962	* require a full umount/remount for now.
				5963	*/
				5964	if (es->s_last_orphan) {
				5965	ext4_msg(sb, KERN_WARNING, "Couldn't "
				5966	"remount RDWR because of unprocessed "
				5967	"orphan inode list. Please "
				5968	"umount/remount instead");
				5969	err = -EINVAL;
				5970	goto restore_opts;
				5971	}
				5972
				5973	/*
				5974	* Mounting a RDONLY partition read-write, so reread
				5975	* and store the current valid flag. (It may have
				5976	* been changed by e2fsck since we originally mounted
				5977	* the partition.)
				5978	*/
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame]	5979	if (sbi->s_journal) {
				5980	err = ext4_clear_journal_err(sb, es);
				5981	if (err)
				5982	goto restore_opts;
				5983	}
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	5984	sbi->s_mount_state = le16_to_cpu(es->s_state);
				5985
				5986	err = ext4_setup_super(sb, es, 0);
				5987	if (err)
				5988	goto restore_opts;
				5989
				5990	sb->s_flags &= ~SB_RDONLY;
				5991	if (ext4_has_feature_mmp(sb))
				5992	if (ext4_multi_mount_protect(sb,
				5993	le64_to_cpu(es->s_mmp_block))) {
				5994	err = -EROFS;
				5995	goto restore_opts;
				5996	}
				5997	enable_quota = 1;
				5998	}
				5999	}
				6000
				6001	/*
				6002	* Reinitialize lazy itable initialization thread based on
				6003	* current settings
				6004	*/
				6005	if (sb_rdonly(sb) \|\| !test_opt(sb, INIT_INODE_TABLE))
				6006	ext4_unregister_li_request(sb);
				6007	else {
				6008	ext4_group_t first_not_zeroed;
				6009	first_not_zeroed = ext4_has_uninit_itable(sb);
				6010	ext4_register_li_request(sb, first_not_zeroed);
				6011	}
				6012
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame]	6013	/*
				6014	* Handle creation of system zone data early because it can fail.
				6015	* Releasing of existing data is done when we are sure remount will
				6016	* succeed.
				6017	*/
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	6018	if (test_opt(sb, BLOCK_VALIDITY) && !sbi->s_system_blks) {
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame]	6019	err = ext4_setup_system_zone(sb);
				6020	if (err)
				6021	goto restore_opts;
				6022	}
				6023
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	6024	if (sbi->s_journal == NULL && !(old_sb_flags & SB_RDONLY)) {
				6025	err = ext4_commit_super(sb, 1);
				6026	if (err)
				6027	goto restore_opts;
				6028	}
				6029
				6030	#ifdef CONFIG_QUOTA
				6031	/* Release old quota file names */
				6032	for (i = 0; i < EXT4_MAXQUOTAS; i++)
				6033	kfree(old_opts.s_qf_names[i]);
				6034	if (enable_quota) {
				6035	if (sb_any_quota_suspended(sb))
				6036	dquot_resume(sb, -1);
				6037	else if (ext4_has_feature_quota(sb)) {
				6038	err = ext4_enable_quotas(sb);
				6039	if (err)
				6040	goto restore_opts;
				6041	}
				6042	}
				6043	#endif
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	6044	if (!test_opt(sb, BLOCK_VALIDITY) && sbi->s_system_blks)
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame]	6045	ext4_release_system_zone(sb);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	6046
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	6047	if (!ext4_has_feature_mmp(sb) \|\| sb_rdonly(sb))
				6048	ext4_stop_mmpd(sbi);
				6049
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame]	6050	/*
				6051	* Some options can be enabled by ext4 and/or by VFS mount flag
				6052	* either way we need to make sure it matches in both *flags and
				6053	* s_flags. Copy those selected flags from s_flags to *flags
				6054	*/
				6055	flags = (flags & ~vfs_flags) \| (sb->s_flags & vfs_flags);
				6056
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	6057	ext4_msg(sb, KERN_INFO, "re-mounted. Opts: %s", orig_data);
				6058	kfree(orig_data);
				6059	return 0;
				6060
				6061	restore_opts:
				6062	sb->s_flags = old_sb_flags;
				6063	sbi->s_mount_opt = old_opts.s_mount_opt;
				6064	sbi->s_mount_opt2 = old_opts.s_mount_opt2;
				6065	sbi->s_resuid = old_opts.s_resuid;
				6066	sbi->s_resgid = old_opts.s_resgid;
				6067	sbi->s_commit_interval = old_opts.s_commit_interval;
				6068	sbi->s_min_batch_time = old_opts.s_min_batch_time;
				6069	sbi->s_max_batch_time = old_opts.s_max_batch_time;
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	6070	if (!test_opt(sb, BLOCK_VALIDITY) && sbi->s_system_blks)
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame]	6071	ext4_release_system_zone(sb);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	6072	#ifdef CONFIG_QUOTA
				6073	sbi->s_jquota_fmt = old_opts.s_jquota_fmt;
				6074	for (i = 0; i < EXT4_MAXQUOTAS; i++) {
				6075	to_free[i] = get_qf_name(sb, sbi, i);
				6076	rcu_assign_pointer(sbi->s_qf_names[i], old_opts.s_qf_names[i]);
				6077	}
				6078	synchronize_rcu();
				6079	for (i = 0; i < EXT4_MAXQUOTAS; i++)
				6080	kfree(to_free[i]);
				6081	#endif
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	6082	if (!ext4_has_feature_mmp(sb) \|\| sb_rdonly(sb))
				6083	ext4_stop_mmpd(sbi);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	6084	kfree(orig_data);
				6085	return err;
				6086	}
				6087
				6088	#ifdef CONFIG_QUOTA
				6089	static int ext4_statfs_project(struct super_block *sb,
				6090	kprojid_t projid, struct kstatfs *buf)
				6091	{
				6092	struct kqid qid;
				6093	struct dquot *dquot;
				6094	u64 limit;
				6095	u64 curblock;
				6096
				6097	qid = make_kqid_projid(projid);
				6098	dquot = dqget(sb, qid);
				6099	if (IS_ERR(dquot))
				6100	return PTR_ERR(dquot);
				6101	spin_lock(&dquot->dq_dqb_lock);
				6102
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	6103	limit = min_not_zero(dquot->dq_dqb.dqb_bsoftlimit,
				6104	dquot->dq_dqb.dqb_bhardlimit);
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame]	6105	limit >>= sb->s_blocksize_bits;
				6106
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	6107	if (limit && buf->f_blocks > limit) {
				6108	curblock = (dquot->dq_dqb.dqb_curspace +
				6109	dquot->dq_dqb.dqb_rsvspace) >> sb->s_blocksize_bits;
				6110	buf->f_blocks = limit;
				6111	buf->f_bfree = buf->f_bavail =
				6112	(buf->f_blocks > curblock) ?
				6113	(buf->f_blocks - curblock) : 0;
				6114	}
				6115
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	6116	limit = min_not_zero(dquot->dq_dqb.dqb_isoftlimit,
				6117	dquot->dq_dqb.dqb_ihardlimit);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	6118	if (limit && buf->f_files > limit) {
				6119	buf->f_files = limit;
				6120	buf->f_ffree =
				6121	(buf->f_files > dquot->dq_dqb.dqb_curinodes) ?
				6122	(buf->f_files - dquot->dq_dqb.dqb_curinodes) : 0;
				6123	}
				6124
				6125	spin_unlock(&dquot->dq_dqb_lock);
				6126	dqput(dquot);
				6127	return 0;
				6128	}
				6129	#endif
				6130
				6131	static int ext4_statfs(struct dentry dentry, struct kstatfs buf)
				6132	{
				6133	struct super_block *sb = dentry->d_sb;
				6134	struct ext4_sb_info *sbi = EXT4_SB(sb);
				6135	struct ext4_super_block *es = sbi->s_es;
				6136	ext4_fsblk_t overhead = 0, resv_blocks;
				6137	u64 fsid;
				6138	s64 bfree;
				6139	resv_blocks = EXT4_C2B(sbi, atomic64_read(&sbi->s_resv_clusters));
				6140
				6141	if (!test_opt(sb, MINIX_DF))
				6142	overhead = sbi->s_overhead;
				6143
				6144	buf->f_type = EXT4_SUPER_MAGIC;
				6145	buf->f_bsize = sb->s_blocksize;
				6146	buf->f_blocks = ext4_blocks_count(es) - EXT4_C2B(sbi, overhead);
				6147	bfree = percpu_counter_sum_positive(&sbi->s_freeclusters_counter) -
				6148	percpu_counter_sum_positive(&sbi->s_dirtyclusters_counter);
				6149	/* prevent underflow in case that few free space is available */
				6150	buf->f_bfree = EXT4_C2B(sbi, max_t(s64, bfree, 0));
				6151	buf->f_bavail = buf->f_bfree -
				6152	(ext4_r_blocks_count(es) + resv_blocks);
				6153	if (buf->f_bfree < (ext4_r_blocks_count(es) + resv_blocks))
				6154	buf->f_bavail = 0;
				6155	buf->f_files = le32_to_cpu(es->s_inodes_count);
				6156	buf->f_ffree = percpu_counter_sum_positive(&sbi->s_freeinodes_counter);
				6157	buf->f_namelen = EXT4_NAME_LEN;
				6158	fsid = le64_to_cpup((void *)es->s_uuid) ^
				6159	le64_to_cpup((void *)es->s_uuid + sizeof(u64));
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	6160	buf->f_fsid = u64_to_fsid(fsid);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	6161
				6162	#ifdef CONFIG_QUOTA
				6163	if (ext4_test_inode_flag(dentry->d_inode, EXT4_INODE_PROJINHERIT) &&
				6164	sb_has_quota_limits_enabled(sb, PRJQUOTA))
				6165	ext4_statfs_project(sb, EXT4_I(dentry->d_inode)->i_projid, buf);
				6166	#endif
				6167	return 0;
				6168	}
				6169
				6170
				6171	#ifdef CONFIG_QUOTA
				6172
				6173	/*
				6174	* Helper functions so that transaction is started before we acquire dqio_sem
				6175	* to keep correct lock ordering of transaction > dqio_sem
				6176	*/
				6177	static inline struct inode dquot_to_inode(struct dquot dquot)
				6178	{
				6179	return sb_dqopt(dquot->dq_sb)->files[dquot->dq_id.type];
				6180	}
				6181
				6182	static int ext4_write_dquot(struct dquot *dquot)
				6183	{
				6184	int ret, err;
				6185	handle_t *handle;
				6186	struct inode *inode;
				6187
				6188	inode = dquot_to_inode(dquot);
				6189	handle = ext4_journal_start(inode, EXT4_HT_QUOTA,
				6190	EXT4_QUOTA_TRANS_BLOCKS(dquot->dq_sb));
				6191	if (IS_ERR(handle))
				6192	return PTR_ERR(handle);
				6193	ret = dquot_commit(dquot);
				6194	err = ext4_journal_stop(handle);
				6195	if (!ret)
				6196	ret = err;
				6197	return ret;
				6198	}
				6199
				6200	static int ext4_acquire_dquot(struct dquot *dquot)
				6201	{
				6202	int ret, err;
				6203	handle_t *handle;
				6204
				6205	handle = ext4_journal_start(dquot_to_inode(dquot), EXT4_HT_QUOTA,
				6206	EXT4_QUOTA_INIT_BLOCKS(dquot->dq_sb));
				6207	if (IS_ERR(handle))
				6208	return PTR_ERR(handle);
				6209	ret = dquot_acquire(dquot);
				6210	err = ext4_journal_stop(handle);
				6211	if (!ret)
				6212	ret = err;
				6213	return ret;
				6214	}
				6215
				6216	static int ext4_release_dquot(struct dquot *dquot)
				6217	{
				6218	int ret, err;
				6219	handle_t *handle;
				6220
				6221	handle = ext4_journal_start(dquot_to_inode(dquot), EXT4_HT_QUOTA,
				6222	EXT4_QUOTA_DEL_BLOCKS(dquot->dq_sb));
				6223	if (IS_ERR(handle)) {
				6224	/* Release dquot anyway to avoid endless cycle in dqput() */
				6225	dquot_release(dquot);
				6226	return PTR_ERR(handle);
				6227	}
				6228	ret = dquot_release(dquot);
				6229	err = ext4_journal_stop(handle);
				6230	if (!ret)
				6231	ret = err;
				6232	return ret;
				6233	}
				6234
				6235	static int ext4_mark_dquot_dirty(struct dquot *dquot)
				6236	{
				6237	struct super_block *sb = dquot->dq_sb;
				6238	struct ext4_sb_info *sbi = EXT4_SB(sb);
				6239
				6240	/* Are we journaling quotas? */
				6241	if (ext4_has_feature_quota(sb) \|\|
				6242	sbi->s_qf_names[USRQUOTA] \|\| sbi->s_qf_names[GRPQUOTA]) {
				6243	dquot_mark_dquot_dirty(dquot);
				6244	return ext4_write_dquot(dquot);
				6245	} else {
				6246	return dquot_mark_dquot_dirty(dquot);
				6247	}
				6248	}
				6249
				6250	static int ext4_write_info(struct super_block *sb, int type)
				6251	{
				6252	int ret, err;
				6253	handle_t *handle;
				6254
				6255	/* Data block + inode block */
				6256	handle = ext4_journal_start(d_inode(sb->s_root), EXT4_HT_QUOTA, 2);
				6257	if (IS_ERR(handle))
				6258	return PTR_ERR(handle);
				6259	ret = dquot_commit_info(sb, type);
				6260	err = ext4_journal_stop(handle);
				6261	if (!ret)
				6262	ret = err;
				6263	return ret;
				6264	}
				6265
				6266	/*
				6267	* Turn on quotas during mount time - we need to find
				6268	* the quota file and such...
				6269	*/
				6270	static int ext4_quota_on_mount(struct super_block *sb, int type)
				6271	{
				6272	return dquot_quota_on_mount(sb, get_qf_name(sb, EXT4_SB(sb), type),
				6273	EXT4_SB(sb)->s_jquota_fmt, type);
				6274	}
				6275
				6276	static void lockdep_set_quota_inode(struct inode *inode, int subclass)
				6277	{
				6278	struct ext4_inode_info *ei = EXT4_I(inode);
				6279
				6280	/* The first argument of lockdep_set_subclass has to be
				6281	* exactly the same as the argument to init_rwsem() --- in
				6282	* this case, in init_once() --- or lockdep gets unhappy
				6283	* because the name of the lock is set using the
				6284	* stringification of the argument to init_rwsem().
				6285	*/
				6286	(void) ei; /* shut up clang warning if !CONFIG_LOCKDEP */
				6287	lockdep_set_subclass(&ei->i_data_sem, subclass);
				6288	}
				6289
				6290	/*
				6291	* Standard function to be called on quota_on
				6292	*/
				6293	static int ext4_quota_on(struct super_block *sb, int type, int format_id,
				6294	const struct path *path)
				6295	{
				6296	int err;
				6297
				6298	if (!test_opt(sb, QUOTA))
				6299	return -EINVAL;
				6300
				6301	/* Quotafile not on the same filesystem? */
				6302	if (path->dentry->d_sb != sb)
				6303	return -EXDEV;
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame]	6304
				6305	/* Quota already enabled for this file? */
				6306	if (IS_NOQUOTA(d_inode(path->dentry)))
				6307	return -EBUSY;
				6308
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	6309	/* Journaling quota? */
				6310	if (EXT4_SB(sb)->s_qf_names[type]) {
				6311	/* Quotafile not in fs root? */
				6312	if (path->dentry->d_parent != sb->s_root)
				6313	ext4_msg(sb, KERN_WARNING,
				6314	"Quota file not on filesystem root. "
				6315	"Journaled quota will not work");
				6316	sb_dqopt(sb)->flags \|= DQUOT_NOLIST_DIRTY;
				6317	} else {
				6318	/*
				6319	* Clear the flag just in case mount options changed since
				6320	* last time.
				6321	*/
				6322	sb_dqopt(sb)->flags &= ~DQUOT_NOLIST_DIRTY;
				6323	}
				6324
				6325	/*
				6326	* When we journal data on quota file, we have to flush journal to see
				6327	* all updates to the file when we bypass pagecache...
				6328	*/
				6329	if (EXT4_SB(sb)->s_journal &&
				6330	ext4_should_journal_data(d_inode(path->dentry))) {
				6331	/*
				6332	* We don't need to lock updates but journal_flush() could
				6333	* otherwise be livelocked...
				6334	*/
				6335	jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal);
				6336	err = jbd2_journal_flush(EXT4_SB(sb)->s_journal);
				6337	jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
				6338	if (err)
				6339	return err;
				6340	}
				6341
				6342	lockdep_set_quota_inode(path->dentry->d_inode, I_DATA_SEM_QUOTA);
				6343	err = dquot_quota_on(sb, type, format_id, path);
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	6344	if (!err) {
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	6345	struct inode *inode = d_inode(path->dentry);
				6346	handle_t *handle;
				6347
				6348	/*
				6349	* Set inode flags to prevent userspace from messing with quota
				6350	* files. If this fails, we return success anyway since quotas
				6351	* are already enabled and this is not a hard failure.
				6352	*/
				6353	inode_lock(inode);
				6354	handle = ext4_journal_start(inode, EXT4_HT_QUOTA, 1);
				6355	if (IS_ERR(handle))
				6356	goto unlock_inode;
				6357	EXT4_I(inode)->i_flags \|= EXT4_NOATIME_FL \| EXT4_IMMUTABLE_FL;
				6358	inode_set_flags(inode, S_NOATIME \| S_IMMUTABLE,
				6359	S_NOATIME \| S_IMMUTABLE);
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	6360	err = ext4_mark_inode_dirty(handle, inode);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	6361	ext4_journal_stop(handle);
				6362	unlock_inode:
				6363	inode_unlock(inode);
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	6364	if (err)
				6365	dquot_quota_off(sb, type);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	6366	}
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	6367	if (err)
				6368	lockdep_set_quota_inode(path->dentry->d_inode,
				6369	I_DATA_SEM_NORMAL);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	6370	return err;
				6371	}
				6372
				6373	static int ext4_quota_enable(struct super_block *sb, int type, int format_id,
				6374	unsigned int flags)
				6375	{
				6376	int err;
				6377	struct inode *qf_inode;
				6378	unsigned long qf_inums[EXT4_MAXQUOTAS] = {
				6379	le32_to_cpu(EXT4_SB(sb)->s_es->s_usr_quota_inum),
				6380	le32_to_cpu(EXT4_SB(sb)->s_es->s_grp_quota_inum),
				6381	le32_to_cpu(EXT4_SB(sb)->s_es->s_prj_quota_inum)
				6382	};
				6383
				6384	BUG_ON(!ext4_has_feature_quota(sb));
				6385
				6386	if (!qf_inums[type])
				6387	return -EPERM;
				6388
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	6389	qf_inode = ext4_iget(sb, qf_inums[type], EXT4_IGET_SPECIAL);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	6390	if (IS_ERR(qf_inode)) {
				6391	ext4_error(sb, "Bad quota inode # %lu", qf_inums[type]);
				6392	return PTR_ERR(qf_inode);
				6393	}
				6394
				6395	/* Don't account quota for quota files to avoid recursion */
				6396	qf_inode->i_flags \|= S_NOQUOTA;
				6397	lockdep_set_quota_inode(qf_inode, I_DATA_SEM_QUOTA);
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	6398	err = dquot_load_quota_inode(qf_inode, type, format_id, flags);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	6399	if (err)
				6400	lockdep_set_quota_inode(qf_inode, I_DATA_SEM_NORMAL);
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	6401	iput(qf_inode);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	6402
				6403	return err;
				6404	}
				6405
				6406	/* Enable usage tracking for all quota types. */
				6407	static int ext4_enable_quotas(struct super_block *sb)
				6408	{
				6409	int type, err = 0;
				6410	unsigned long qf_inums[EXT4_MAXQUOTAS] = {
				6411	le32_to_cpu(EXT4_SB(sb)->s_es->s_usr_quota_inum),
				6412	le32_to_cpu(EXT4_SB(sb)->s_es->s_grp_quota_inum),
				6413	le32_to_cpu(EXT4_SB(sb)->s_es->s_prj_quota_inum)
				6414	};
				6415	bool quota_mopt[EXT4_MAXQUOTAS] = {
				6416	test_opt(sb, USRQUOTA),
				6417	test_opt(sb, GRPQUOTA),
				6418	test_opt(sb, PRJQUOTA),
				6419	};
				6420
				6421	sb_dqopt(sb)->flags \|= DQUOT_QUOTA_SYS_FILE \| DQUOT_NOLIST_DIRTY;
				6422	for (type = 0; type < EXT4_MAXQUOTAS; type++) {
				6423	if (qf_inums[type]) {
				6424	err = ext4_quota_enable(sb, type, QFMT_VFS_V1,
				6425	DQUOT_USAGE_ENABLED \|
				6426	(quota_mopt[type] ? DQUOT_LIMITS_ENABLED : 0));
				6427	if (err) {
				6428	ext4_warning(sb,
				6429	"Failed to enable quota tracking "
				6430	"(type=%d, err=%d). Please run "
				6431	"e2fsck to fix.", type, err);
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	6432	for (type--; type >= 0; type--) {
				6433	struct inode *inode;
				6434
				6435	inode = sb_dqopt(sb)->files[type];
				6436	if (inode)
				6437	inode = igrab(inode);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	6438	dquot_quota_off(sb, type);
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	6439	if (inode) {
				6440	lockdep_set_quota_inode(inode,
				6441	I_DATA_SEM_NORMAL);
				6442	iput(inode);
				6443	}
				6444	}
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	6445
				6446	return err;
				6447	}
				6448	}
				6449	}
				6450	return 0;
				6451	}
				6452
				6453	static int ext4_quota_off(struct super_block *sb, int type)
				6454	{
				6455	struct inode *inode = sb_dqopt(sb)->files[type];
				6456	handle_t *handle;
				6457	int err;
				6458
				6459	/* Force all delayed allocation blocks to be allocated.
				6460	* Caller already holds s_umount sem */
				6461	if (test_opt(sb, DELALLOC))
				6462	sync_filesystem(sb);
				6463
				6464	if (!inode \|\| !igrab(inode))
				6465	goto out;
				6466
				6467	err = dquot_quota_off(sb, type);
				6468	if (err \|\| ext4_has_feature_quota(sb))
				6469	goto out_put;
				6470
				6471	inode_lock(inode);
				6472	/*
				6473	* Update modification times of quota files when userspace can
				6474	* start looking at them. If we fail, we return success anyway since
				6475	* this is not a hard failure and quotas are already disabled.
				6476	*/
				6477	handle = ext4_journal_start(inode, EXT4_HT_QUOTA, 1);
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	6478	if (IS_ERR(handle)) {
				6479	err = PTR_ERR(handle);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	6480	goto out_unlock;
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	6481	}
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	6482	EXT4_I(inode)->i_flags &= ~(EXT4_NOATIME_FL \| EXT4_IMMUTABLE_FL);
				6483	inode_set_flags(inode, 0, S_NOATIME \| S_IMMUTABLE);
				6484	inode->i_mtime = inode->i_ctime = current_time(inode);
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	6485	err = ext4_mark_inode_dirty(handle, inode);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	6486	ext4_journal_stop(handle);
				6487	out_unlock:
				6488	inode_unlock(inode);
				6489	out_put:
				6490	lockdep_set_quota_inode(inode, I_DATA_SEM_NORMAL);
				6491	iput(inode);
				6492	return err;
				6493	out:
				6494	return dquot_quota_off(sb, type);
				6495	}
				6496
				6497	/* Read data from quotafile - avoid pagecache and such because we cannot afford
				6498	* acquiring the locks... As quota files are never truncated and quota code
				6499	* itself serializes the operations (and no one else should touch the files)
				6500	* we don't have to be afraid of races */
				6501	static ssize_t ext4_quota_read(struct super_block sb, int type, char data,
				6502	size_t len, loff_t off)
				6503	{
				6504	struct inode *inode = sb_dqopt(sb)->files[type];
				6505	ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb);
				6506	int offset = off & (sb->s_blocksize - 1);
				6507	int tocopy;
				6508	size_t toread;
				6509	struct buffer_head *bh;
				6510	loff_t i_size = i_size_read(inode);
				6511
				6512	if (off > i_size)
				6513	return 0;
				6514	if (off+len > i_size)
				6515	len = i_size-off;
				6516	toread = len;
				6517	while (toread > 0) {
				6518	tocopy = sb->s_blocksize - offset < toread ?
				6519	sb->s_blocksize - offset : toread;
				6520	bh = ext4_bread(NULL, inode, blk, 0);
				6521	if (IS_ERR(bh))
				6522	return PTR_ERR(bh);
				6523	if (!bh) /* A hole? */
				6524	memset(data, 0, tocopy);
				6525	else
				6526	memcpy(data, bh->b_data+offset, tocopy);
				6527	brelse(bh);
				6528	offset = 0;
				6529	toread -= tocopy;
				6530	data += tocopy;
				6531	blk++;
				6532	}
				6533	return len;
				6534	}
				6535
				6536	/* Write to quotafile (we know the transaction is already started and has
				6537	* enough credits) */
				6538	static ssize_t ext4_quota_write(struct super_block *sb, int type,
				6539	const char *data, size_t len, loff_t off)
				6540	{
				6541	struct inode *inode = sb_dqopt(sb)->files[type];
				6542	ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb);
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	6543	int err = 0, err2 = 0, offset = off & (sb->s_blocksize - 1);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	6544	int retries = 0;
				6545	struct buffer_head *bh;
				6546	handle_t *handle = journal_current_handle();
				6547
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	6548	if (!handle) {
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	6549	ext4_msg(sb, KERN_WARNING, "Quota write (off=%llu, len=%llu)"
				6550	" cancelled because transaction is not started",
				6551	(unsigned long long)off, (unsigned long long)len);
				6552	return -EIO;
				6553	}
				6554	/*
				6555	* Since we account only one data block in transaction credits,
				6556	* then it is impossible to cross a block boundary.
				6557	*/
				6558	if (sb->s_blocksize - offset < len) {
				6559	ext4_msg(sb, KERN_WARNING, "Quota write (off=%llu, len=%llu)"
				6560	" cancelled because not block aligned",
				6561	(unsigned long long)off, (unsigned long long)len);
				6562	return -EIO;
				6563	}
				6564
				6565	do {
				6566	bh = ext4_bread(handle, inode, blk,
				6567	EXT4_GET_BLOCKS_CREATE \|
				6568	EXT4_GET_BLOCKS_METADATA_NOFAIL);
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	6569	} while (PTR_ERR(bh) == -ENOSPC &&
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	6570	ext4_should_retry_alloc(inode->i_sb, &retries));
				6571	if (IS_ERR(bh))
				6572	return PTR_ERR(bh);
				6573	if (!bh)
				6574	goto out;
				6575	BUFFER_TRACE(bh, "get write access");
				6576	err = ext4_journal_get_write_access(handle, bh);
				6577	if (err) {
				6578	brelse(bh);
				6579	return err;
				6580	}
				6581	lock_buffer(bh);
				6582	memcpy(bh->b_data+offset, data, len);
				6583	flush_dcache_page(bh->b_page);
				6584	unlock_buffer(bh);
				6585	err = ext4_handle_dirty_metadata(handle, NULL, bh);
				6586	brelse(bh);
				6587	out:
				6588	if (inode->i_size < off + len) {
				6589	i_size_write(inode, off + len);
				6590	EXT4_I(inode)->i_disksize = inode->i_size;
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	6591	err2 = ext4_mark_inode_dirty(handle, inode);
				6592	if (unlikely(err2 && !err))
				6593	err = err2;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	6594	}
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	6595	return err ? err : len;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	6596	}
				6597	#endif
				6598
				6599	static struct dentry ext4_mount(struct file_system_type fs_type, int flags,
				6600	const char dev_name, void data)
				6601	{
				6602	return mount_bdev(fs_type, flags, dev_name, data, ext4_fill_super);
				6603	}
				6604
				6605	#if !defined(CONFIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT2)
				6606	static inline void register_as_ext2(void)
				6607	{
				6608	int err = register_filesystem(&ext2_fs_type);
				6609	if (err)
				6610	printk(KERN_WARNING
				6611	"EXT4-fs: Unable to register as ext2 (%d)\n", err);
				6612	}
				6613
				6614	static inline void unregister_as_ext2(void)
				6615	{
				6616	unregister_filesystem(&ext2_fs_type);
				6617	}
				6618
				6619	static inline int ext2_feature_set_ok(struct super_block *sb)
				6620	{
				6621	if (ext4_has_unknown_ext2_incompat_features(sb))
				6622	return 0;
				6623	if (sb_rdonly(sb))
				6624	return 1;
				6625	if (ext4_has_unknown_ext2_ro_compat_features(sb))
				6626	return 0;
				6627	return 1;
				6628	}
				6629	#else
				6630	static inline void register_as_ext2(void) { }
				6631	static inline void unregister_as_ext2(void) { }
				6632	static inline int ext2_feature_set_ok(struct super_block *sb) { return 0; }
				6633	#endif
				6634
				6635	static inline void register_as_ext3(void)
				6636	{
				6637	int err = register_filesystem(&ext3_fs_type);
				6638	if (err)
				6639	printk(KERN_WARNING
				6640	"EXT4-fs: Unable to register as ext3 (%d)\n", err);
				6641	}
				6642
				6643	static inline void unregister_as_ext3(void)
				6644	{
				6645	unregister_filesystem(&ext3_fs_type);
				6646	}
				6647
				6648	static inline int ext3_feature_set_ok(struct super_block *sb)
				6649	{
				6650	if (ext4_has_unknown_ext3_incompat_features(sb))
				6651	return 0;
				6652	if (!ext4_has_feature_journal(sb))
				6653	return 0;
				6654	if (sb_rdonly(sb))
				6655	return 1;
				6656	if (ext4_has_unknown_ext3_ro_compat_features(sb))
				6657	return 0;
				6658	return 1;
				6659	}
				6660
				6661	static struct file_system_type ext4_fs_type = {
				6662	.owner = THIS_MODULE,
				6663	.name = "ext4",
				6664	.mount = ext4_mount,
				6665	.kill_sb = kill_block_super,
				6666	.fs_flags = FS_REQUIRES_DEV,
				6667	};
				6668	MODULE_ALIAS_FS("ext4");
				6669
				6670	/* Shared across all ext4 file systems */
				6671	wait_queue_head_t ext4__ioend_wq[EXT4_WQ_HASH_SZ];
				6672
				6673	static int __init ext4_init_fs(void)
				6674	{
				6675	int i, err;
				6676
				6677	ratelimit_state_init(&ext4_mount_msg_ratelimit, 30 * HZ, 64);
				6678	ext4_li_info = NULL;
				6679	mutex_init(&ext4_li_mtx);
				6680
				6681	/* Build-time check for flags consistency */
				6682	ext4_check_flag_values();
				6683
				6684	for (i = 0; i < EXT4_WQ_HASH_SZ; i++)
				6685	init_waitqueue_head(&ext4__ioend_wq[i]);
				6686
				6687	err = ext4_init_es();
				6688	if (err)
				6689	return err;
				6690
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	6691	err = ext4_init_pending();
				6692	if (err)
				6693	goto out7;
				6694
				6695	err = ext4_init_post_read_processing();
				6696	if (err)
				6697	goto out6;
				6698
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	6699	err = ext4_init_pageio();
				6700	if (err)
				6701	goto out5;
				6702
				6703	err = ext4_init_system_zone();
				6704	if (err)
				6705	goto out4;
				6706
				6707	err = ext4_init_sysfs();
				6708	if (err)
				6709	goto out3;
				6710
				6711	err = ext4_init_mballoc();
				6712	if (err)
				6713	goto out2;
				6714	err = init_inodecache();
				6715	if (err)
				6716	goto out1;
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	6717
				6718	err = ext4_fc_init_dentry_cache();
				6719	if (err)
				6720	goto out05;
				6721
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	6722	register_as_ext3();
				6723	register_as_ext2();
				6724	err = register_filesystem(&ext4_fs_type);
				6725	if (err)
				6726	goto out;
				6727
				6728	return 0;
				6729	out:
				6730	unregister_as_ext2();
				6731	unregister_as_ext3();
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	6732	ext4_fc_destroy_dentry_cache();
				6733	out05:
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	6734	destroy_inodecache();
				6735	out1:
				6736	ext4_exit_mballoc();
				6737	out2:
				6738	ext4_exit_sysfs();
				6739	out3:
				6740	ext4_exit_system_zone();
				6741	out4:
				6742	ext4_exit_pageio();
				6743	out5:
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	6744	ext4_exit_post_read_processing();
				6745	out6:
				6746	ext4_exit_pending();
				6747	out7:
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	6748	ext4_exit_es();
				6749
				6750	return err;
				6751	}
				6752
				6753	static void __exit ext4_exit_fs(void)
				6754	{
				6755	ext4_destroy_lazyinit_thread();
				6756	unregister_as_ext2();
				6757	unregister_as_ext3();
				6758	unregister_filesystem(&ext4_fs_type);
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	6759	ext4_fc_destroy_dentry_cache();
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	6760	destroy_inodecache();
				6761	ext4_exit_mballoc();
				6762	ext4_exit_sysfs();
				6763	ext4_exit_system_zone();
				6764	ext4_exit_pageio();
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	6765	ext4_exit_post_read_processing();
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	6766	ext4_exit_es();
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	6767	ext4_exit_pending();
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	6768	}
				6769
				6770	MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others");
				6771	MODULE_DESCRIPTION("Fourth Extended Filesystem");
				6772	MODULE_LICENSE("GPL");
				6773	MODULE_SOFTDEP("pre: crc32c");
				6774	module_init(ext4_init_fs)
				6775	module_exit(ext4_exit_fs)