Blame - fs/ext4/ialloc.c - hafnium/third_party/linux

blob: 2addcb8730e19afedcabac390e775124e6a66559 [file] [log] [blame]

Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1	// SPDX-License-Identifier: GPL-2.0
				2	/*
				3	* linux/fs/ext4/ialloc.c
				4	*
				5	* Copyright (C) 1992, 1993, 1994, 1995
				6	* Remy Card (card@masi.ibp.fr)
				7	* Laboratoire MASI - Institut Blaise Pascal
				8	* Universite Pierre et Marie Curie (Paris VI)
				9	*
				10	* BSD ufs-inspired inode and directory allocation by
				11	* Stephen Tweedie (sct@redhat.com), 1993
				12	* Big-endian to little-endian byte-swapping/bitmaps by
				13	* David S. Miller (davem@caip.rutgers.edu), 1995
				14	*/
				15
				16	#include <linux/time.h>
				17	#include <linux/fs.h>
				18	#include <linux/stat.h>
				19	#include <linux/string.h>
				20	#include <linux/quotaops.h>
				21	#include <linux/buffer_head.h>
				22	#include <linux/random.h>
				23	#include <linux/bitops.h>
				24	#include <linux/blkdev.h>
				25	#include <linux/cred.h>
				26
				27	#include <asm/byteorder.h>
				28
				29	#include "ext4.h"
				30	#include "ext4_jbd2.h"
				31	#include "xattr.h"
				32	#include "acl.h"
				33
				34	#include <trace/events/ext4.h>
				35
				36	/*
				37	* ialloc.c contains the inodes allocation and deallocation routines
				38	*/
				39
				40	/*
				41	* The free inodes are managed by bitmaps. A file system contains several
				42	* blocks groups. Each group contains 1 bitmap block for blocks, 1 bitmap
				43	* block for inodes, N blocks for the inode table and data blocks.
				44	*
				45	* The file system contains group descriptors which are located after the
				46	* super block. Each descriptor contains the number of the bitmap block and
				47	* the free blocks count in the block.
				48	*/
				49
				50	/*
				51	* To avoid calling the atomic setbit hundreds or thousands of times, we only
				52	* need to use it within a single byte (to ensure we get endianness right).
				53	* We can use memset for the rest of the bitmap as there are no other users.
				54	*/
				55	void ext4_mark_bitmap_end(int start_bit, int end_bit, char *bitmap)
				56	{
				57	int i;
				58
				59	if (start_bit >= end_bit)
				60	return;
				61
				62	ext4_debug("mark end bits +%d through +%d used\n", start_bit, end_bit);
				63	for (i = start_bit; i < ((start_bit + 7) & ~7UL); i++)
				64	ext4_set_bit(i, bitmap);
				65	if (i < end_bit)
				66	memset(bitmap + (i >> 3), 0xff, (end_bit - i) >> 3);
				67	}
				68
				69	void ext4_end_bitmap_read(struct buffer_head *bh, int uptodate)
				70	{
				71	if (uptodate) {
				72	set_buffer_uptodate(bh);
				73	set_bitmap_uptodate(bh);
				74	}
				75	unlock_buffer(bh);
				76	put_bh(bh);
				77	}
				78
				79	static int ext4_validate_inode_bitmap(struct super_block *sb,
				80	struct ext4_group_desc *desc,
				81	ext4_group_t block_group,
				82	struct buffer_head *bh)
				83	{
				84	ext4_fsblk_t blk;
				85	struct ext4_group_info *grp = ext4_get_group_info(sb, block_group);
				86
				87	if (buffer_verified(bh))
				88	return 0;
				89	if (EXT4_MB_GRP_IBITMAP_CORRUPT(grp))
				90	return -EFSCORRUPTED;
				91
				92	ext4_lock_group(sb, block_group);
				93	if (buffer_verified(bh))
				94	goto verified;
				95	blk = ext4_inode_bitmap(sb, desc);
				96	if (!ext4_inode_bitmap_csum_verify(sb, block_group, desc, bh,
				97	EXT4_INODES_PER_GROUP(sb) / 8)) {
				98	ext4_unlock_group(sb, block_group);
				99	ext4_error(sb, "Corrupt inode bitmap - block_group = %u, "
				100	"inode_bitmap = %llu", block_group, blk);
				101	ext4_mark_group_bitmap_corrupted(sb, block_group,
				102	EXT4_GROUP_INFO_IBITMAP_CORRUPT);
				103	return -EFSBADCRC;
				104	}
				105	set_buffer_verified(bh);
				106	verified:
				107	ext4_unlock_group(sb, block_group);
				108	return 0;
				109	}
				110
				111	/*
				112	* Read the inode allocation bitmap for a given block_group, reading
				113	* into the specified slot in the superblock's bitmap cache.
				114	*
				115	* Return buffer_head of bitmap on success or NULL.
				116	*/
				117	static struct buffer_head *
				118	ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group)
				119	{
				120	struct ext4_group_desc *desc;
				121	struct ext4_sb_info *sbi = EXT4_SB(sb);
				122	struct buffer_head *bh = NULL;
				123	ext4_fsblk_t bitmap_blk;
				124	int err;
				125
				126	desc = ext4_get_group_desc(sb, block_group, NULL);
				127	if (!desc)
				128	return ERR_PTR(-EFSCORRUPTED);
				129
				130	bitmap_blk = ext4_inode_bitmap(sb, desc);
				131	if ((bitmap_blk <= le32_to_cpu(sbi->s_es->s_first_data_block)) \|\|
				132	(bitmap_blk >= ext4_blocks_count(sbi->s_es))) {
				133	ext4_error(sb, "Invalid inode bitmap blk %llu in "
				134	"block_group %u", bitmap_blk, block_group);
				135	ext4_mark_group_bitmap_corrupted(sb, block_group,
				136	EXT4_GROUP_INFO_IBITMAP_CORRUPT);
				137	return ERR_PTR(-EFSCORRUPTED);
				138	}
				139	bh = sb_getblk(sb, bitmap_blk);
				140	if (unlikely(!bh)) {
				141	ext4_warning(sb, "Cannot read inode bitmap - "
				142	"block_group = %u, inode_bitmap = %llu",
				143	block_group, bitmap_blk);
				144	return ERR_PTR(-ENOMEM);
				145	}
				146	if (bitmap_uptodate(bh))
				147	goto verify;
				148
				149	lock_buffer(bh);
				150	if (bitmap_uptodate(bh)) {
				151	unlock_buffer(bh);
				152	goto verify;
				153	}
				154
				155	ext4_lock_group(sb, block_group);
				156	if (ext4_has_group_desc_csum(sb) &&
				157	(desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT))) {
				158	if (block_group == 0) {
				159	ext4_unlock_group(sb, block_group);
				160	unlock_buffer(bh);
				161	ext4_error(sb, "Inode bitmap for bg 0 marked "
				162	"uninitialized");
				163	err = -EFSCORRUPTED;
				164	goto out;
				165	}
				166	memset(bh->b_data, 0, (EXT4_INODES_PER_GROUP(sb) + 7) / 8);
				167	ext4_mark_bitmap_end(EXT4_INODES_PER_GROUP(sb),
				168	sb->s_blocksize * 8, bh->b_data);
				169	set_bitmap_uptodate(bh);
				170	set_buffer_uptodate(bh);
				171	set_buffer_verified(bh);
				172	ext4_unlock_group(sb, block_group);
				173	unlock_buffer(bh);
				174	return bh;
				175	}
				176	ext4_unlock_group(sb, block_group);
				177
				178	if (buffer_uptodate(bh)) {
				179	/*
				180	* if not uninit if bh is uptodate,
				181	* bitmap is also uptodate
				182	*/
				183	set_bitmap_uptodate(bh);
				184	unlock_buffer(bh);
				185	goto verify;
				186	}
				187	/*
				188	* submit the buffer_head for reading
				189	*/
				190	trace_ext4_load_inode_bitmap(sb, block_group);
				191	bh->b_end_io = ext4_end_bitmap_read;
				192	get_bh(bh);
				193	submit_bh(REQ_OP_READ, REQ_META \| REQ_PRIO, bh);
				194	wait_on_buffer(bh);
				195	if (!buffer_uptodate(bh)) {
				196	put_bh(bh);
				197	ext4_error(sb, "Cannot read inode bitmap - "
				198	"block_group = %u, inode_bitmap = %llu",
				199	block_group, bitmap_blk);
				200	ext4_mark_group_bitmap_corrupted(sb, block_group,
				201	EXT4_GROUP_INFO_IBITMAP_CORRUPT);
				202	return ERR_PTR(-EIO);
				203	}
				204
				205	verify:
				206	err = ext4_validate_inode_bitmap(sb, desc, block_group, bh);
				207	if (err)
				208	goto out;
				209	return bh;
				210	out:
				211	put_bh(bh);
				212	return ERR_PTR(err);
				213	}
				214
				215	/*
				216	* NOTE! When we get the inode, we're the only people
				217	* that have access to it, and as such there are no
				218	* race conditions we have to worry about. The inode
				219	* is not on the hash-lists, and it cannot be reached
				220	* through the filesystem because the directory entry
				221	* has been deleted earlier.
				222	*
				223	* HOWEVER: we must make sure that we get no aliases,
				224	* which means that we have to call "clear_inode()"
				225	* _before_ we mark the inode not in use in the inode
				226	* bitmaps. Otherwise a newly created file might use
				227	* the same inode number (not actually the same pointer
				228	* though), and then we'd have two inodes sharing the
				229	* same inode number and space on the harddisk.
				230	*/
				231	void ext4_free_inode(handle_t handle, struct inode inode)
				232	{
				233	struct super_block *sb = inode->i_sb;
				234	int is_directory;
				235	unsigned long ino;
				236	struct buffer_head *bitmap_bh = NULL;
				237	struct buffer_head *bh2;
				238	ext4_group_t block_group;
				239	unsigned long bit;
				240	struct ext4_group_desc *gdp;
				241	struct ext4_super_block *es;
				242	struct ext4_sb_info *sbi;
				243	int fatal = 0, err, count, cleared;
				244	struct ext4_group_info *grp;
				245
				246	if (!sb) {
				247	printk(KERN_ERR "EXT4-fs: %s:%d: inode on "
				248	"nonexistent device\n", __func__, __LINE__);
				249	return;
				250	}
				251	if (atomic_read(&inode->i_count) > 1) {
				252	ext4_msg(sb, KERN_ERR, "%s:%d: inode #%lu: count=%d",
				253	__func__, __LINE__, inode->i_ino,
				254	atomic_read(&inode->i_count));
				255	return;
				256	}
				257	if (inode->i_nlink) {
				258	ext4_msg(sb, KERN_ERR, "%s:%d: inode #%lu: nlink=%d\n",
				259	__func__, __LINE__, inode->i_ino, inode->i_nlink);
				260	return;
				261	}
				262	sbi = EXT4_SB(sb);
				263
				264	ino = inode->i_ino;
				265	ext4_debug("freeing inode %lu\n", ino);
				266	trace_ext4_free_inode(inode);
				267
				268	/*
				269	* Note: we must free any quota before locking the superblock,
				270	* as writing the quota to disk may need the lock as well.
				271	*/
				272	dquot_initialize(inode);
				273	dquot_free_inode(inode);
				274	dquot_drop(inode);
				275
				276	is_directory = S_ISDIR(inode->i_mode);
				277
				278	/* Do this BEFORE marking the inode not in use or returning an error */
				279	ext4_clear_inode(inode);
				280
				281	es = sbi->s_es;
				282	if (ino < EXT4_FIRST_INO(sb) \|\| ino > le32_to_cpu(es->s_inodes_count)) {
				283	ext4_error(sb, "reserved or nonexistent inode %lu", ino);
				284	goto error_return;
				285	}
				286	block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb);
				287	bit = (ino - 1) % EXT4_INODES_PER_GROUP(sb);
				288	bitmap_bh = ext4_read_inode_bitmap(sb, block_group);
				289	/* Don't bother if the inode bitmap is corrupt. */
				290	grp = ext4_get_group_info(sb, block_group);
				291	if (IS_ERR(bitmap_bh)) {
				292	fatal = PTR_ERR(bitmap_bh);
				293	bitmap_bh = NULL;
				294	goto error_return;
				295	}
				296	if (unlikely(EXT4_MB_GRP_IBITMAP_CORRUPT(grp))) {
				297	fatal = -EFSCORRUPTED;
				298	goto error_return;
				299	}
				300
				301	BUFFER_TRACE(bitmap_bh, "get_write_access");
				302	fatal = ext4_journal_get_write_access(handle, bitmap_bh);
				303	if (fatal)
				304	goto error_return;
				305
				306	fatal = -ESRCH;
				307	gdp = ext4_get_group_desc(sb, block_group, &bh2);
				308	if (gdp) {
				309	BUFFER_TRACE(bh2, "get_write_access");
				310	fatal = ext4_journal_get_write_access(handle, bh2);
				311	}
				312	ext4_lock_group(sb, block_group);
				313	cleared = ext4_test_and_clear_bit(bit, bitmap_bh->b_data);
				314	if (fatal \|\| !cleared) {
				315	ext4_unlock_group(sb, block_group);
				316	goto out;
				317	}
				318
				319	count = ext4_free_inodes_count(sb, gdp) + 1;
				320	ext4_free_inodes_set(sb, gdp, count);
				321	if (is_directory) {
				322	count = ext4_used_dirs_count(sb, gdp) - 1;
				323	ext4_used_dirs_set(sb, gdp, count);
				324	percpu_counter_dec(&sbi->s_dirs_counter);
				325	}
				326	ext4_inode_bitmap_csum_set(sb, block_group, gdp, bitmap_bh,
				327	EXT4_INODES_PER_GROUP(sb) / 8);
				328	ext4_group_desc_csum_set(sb, block_group, gdp);
				329	ext4_unlock_group(sb, block_group);
				330
				331	percpu_counter_inc(&sbi->s_freeinodes_counter);
				332	if (sbi->s_log_groups_per_flex) {
				333	ext4_group_t f = ext4_flex_group(sbi, block_group);
				334
				335	atomic_inc(&sbi->s_flex_groups[f].free_inodes);
				336	if (is_directory)
				337	atomic_dec(&sbi->s_flex_groups[f].used_dirs);
				338	}
				339	BUFFER_TRACE(bh2, "call ext4_handle_dirty_metadata");
				340	fatal = ext4_handle_dirty_metadata(handle, NULL, bh2);
				341	out:
				342	if (cleared) {
				343	BUFFER_TRACE(bitmap_bh, "call ext4_handle_dirty_metadata");
				344	err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
				345	if (!fatal)
				346	fatal = err;
				347	} else {
				348	ext4_error(sb, "bit already cleared for inode %lu", ino);
				349	ext4_mark_group_bitmap_corrupted(sb, block_group,
				350	EXT4_GROUP_INFO_IBITMAP_CORRUPT);
				351	}
				352
				353	error_return:
				354	brelse(bitmap_bh);
				355	ext4_std_error(sb, fatal);
				356	}
				357
				358	struct orlov_stats {
				359	__u64 free_clusters;
				360	__u32 free_inodes;
				361	__u32 used_dirs;
				362	};
				363
				364	/*
				365	* Helper function for Orlov's allocator; returns critical information
				366	* for a particular block group or flex_bg. If flex_size is 1, then g
				367	* is a block group number; otherwise it is flex_bg number.
				368	*/
				369	static void get_orlov_stats(struct super_block *sb, ext4_group_t g,
				370	int flex_size, struct orlov_stats *stats)
				371	{
				372	struct ext4_group_desc *desc;
				373	struct flex_groups *flex_group = EXT4_SB(sb)->s_flex_groups;
				374
				375	if (flex_size > 1) {
				376	stats->free_inodes = atomic_read(&flex_group[g].free_inodes);
				377	stats->free_clusters = atomic64_read(&flex_group[g].free_clusters);
				378	stats->used_dirs = atomic_read(&flex_group[g].used_dirs);
				379	return;
				380	}
				381
				382	desc = ext4_get_group_desc(sb, g, NULL);
				383	if (desc) {
				384	stats->free_inodes = ext4_free_inodes_count(sb, desc);
				385	stats->free_clusters = ext4_free_group_clusters(sb, desc);
				386	stats->used_dirs = ext4_used_dirs_count(sb, desc);
				387	} else {
				388	stats->free_inodes = 0;
				389	stats->free_clusters = 0;
				390	stats->used_dirs = 0;
				391	}
				392	}
				393
				394	/*
				395	* Orlov's allocator for directories.
				396	*
				397	* We always try to spread first-level directories.
				398	*
				399	* If there are blockgroups with both free inodes and free blocks counts
				400	* not worse than average we return one with smallest directory count.
				401	* Otherwise we simply return a random group.
				402	*
				403	* For the rest rules look so:
				404	*
				405	* It's OK to put directory into a group unless
				406	* it has too many directories already (max_dirs) or
				407	* it has too few free inodes left (min_inodes) or
				408	* it has too few free blocks left (min_blocks) or
				409	* Parent's group is preferred, if it doesn't satisfy these
				410	* conditions we search cyclically through the rest. If none
				411	* of the groups look good we just look for a group with more
				412	* free inodes than average (starting at parent's group).
				413	*/
				414
				415	static int find_group_orlov(struct super_block sb, struct inode parent,
				416	ext4_group_t *group, umode_t mode,
				417	const struct qstr *qstr)
				418	{
				419	ext4_group_t parent_group = EXT4_I(parent)->i_block_group;
				420	struct ext4_sb_info *sbi = EXT4_SB(sb);
				421	ext4_group_t real_ngroups = ext4_get_groups_count(sb);
				422	int inodes_per_group = EXT4_INODES_PER_GROUP(sb);
				423	unsigned int freei, avefreei, grp_free;
				424	ext4_fsblk_t freeb, avefreec;
				425	unsigned int ndirs;
				426	int max_dirs, min_inodes;
				427	ext4_grpblk_t min_clusters;
				428	ext4_group_t i, grp, g, ngroups;
				429	struct ext4_group_desc *desc;
				430	struct orlov_stats stats;
				431	int flex_size = ext4_flex_bg_size(sbi);
				432	struct dx_hash_info hinfo;
				433
				434	ngroups = real_ngroups;
				435	if (flex_size > 1) {
				436	ngroups = (real_ngroups + flex_size - 1) >>
				437	sbi->s_log_groups_per_flex;
				438	parent_group >>= sbi->s_log_groups_per_flex;
				439	}
				440
				441	freei = percpu_counter_read_positive(&sbi->s_freeinodes_counter);
				442	avefreei = freei / ngroups;
				443	freeb = EXT4_C2B(sbi,
				444	percpu_counter_read_positive(&sbi->s_freeclusters_counter));
				445	avefreec = freeb;
				446	do_div(avefreec, ngroups);
				447	ndirs = percpu_counter_read_positive(&sbi->s_dirs_counter);
				448
				449	if (S_ISDIR(mode) &&
				450	((parent == d_inode(sb->s_root)) \|\|
				451	(ext4_test_inode_flag(parent, EXT4_INODE_TOPDIR)))) {
				452	int best_ndir = inodes_per_group;
				453	int ret = -1;
				454
				455	if (qstr) {
				456	hinfo.hash_version = DX_HASH_HALF_MD4;
				457	hinfo.seed = sbi->s_hash_seed;
				458	ext4fs_dirhash(qstr->name, qstr->len, &hinfo);
				459	grp = hinfo.hash;
				460	} else
				461	grp = prandom_u32();
				462	parent_group = (unsigned)grp % ngroups;
				463	for (i = 0; i < ngroups; i++) {
				464	g = (parent_group + i) % ngroups;
				465	get_orlov_stats(sb, g, flex_size, &stats);
				466	if (!stats.free_inodes)
				467	continue;
				468	if (stats.used_dirs >= best_ndir)
				469	continue;
				470	if (stats.free_inodes < avefreei)
				471	continue;
				472	if (stats.free_clusters < avefreec)
				473	continue;
				474	grp = g;
				475	ret = 0;
				476	best_ndir = stats.used_dirs;
				477	}
				478	if (ret)
				479	goto fallback;
				480	found_flex_bg:
				481	if (flex_size == 1) {
				482	*group = grp;
				483	return 0;
				484	}
				485
				486	/*
				487	* We pack inodes at the beginning of the flexgroup's
				488	* inode tables. Block allocation decisions will do
				489	* something similar, although regular files will
				490	* start at 2nd block group of the flexgroup. See
				491	* ext4_ext_find_goal() and ext4_find_near().
				492	*/
				493	grp *= flex_size;
				494	for (i = 0; i < flex_size; i++) {
				495	if (grp+i >= real_ngroups)
				496	break;
				497	desc = ext4_get_group_desc(sb, grp+i, NULL);
				498	if (desc && ext4_free_inodes_count(sb, desc)) {
				499	*group = grp+i;
				500	return 0;
				501	}
				502	}
				503	goto fallback;
				504	}
				505
				506	max_dirs = ndirs / ngroups + inodes_per_group / 16;
				507	min_inodes = avefreei - inodes_per_group*flex_size / 4;
				508	if (min_inodes < 1)
				509	min_inodes = 1;
				510	min_clusters = avefreec - EXT4_CLUSTERS_PER_GROUP(sb)*flex_size / 4;
				511
				512	/*
				513	* Start looking in the flex group where we last allocated an
				514	* inode for this parent directory
				515	*/
				516	if (EXT4_I(parent)->i_last_alloc_group != ~0) {
				517	parent_group = EXT4_I(parent)->i_last_alloc_group;
				518	if (flex_size > 1)
				519	parent_group >>= sbi->s_log_groups_per_flex;
				520	}
				521
				522	for (i = 0; i < ngroups; i++) {
				523	grp = (parent_group + i) % ngroups;
				524	get_orlov_stats(sb, grp, flex_size, &stats);
				525	if (stats.used_dirs >= max_dirs)
				526	continue;
				527	if (stats.free_inodes < min_inodes)
				528	continue;
				529	if (stats.free_clusters < min_clusters)
				530	continue;
				531	goto found_flex_bg;
				532	}
				533
				534	fallback:
				535	ngroups = real_ngroups;
				536	avefreei = freei / ngroups;
				537	fallback_retry:
				538	parent_group = EXT4_I(parent)->i_block_group;
				539	for (i = 0; i < ngroups; i++) {
				540	grp = (parent_group + i) % ngroups;
				541	desc = ext4_get_group_desc(sb, grp, NULL);
				542	if (desc) {
				543	grp_free = ext4_free_inodes_count(sb, desc);
				544	if (grp_free && grp_free >= avefreei) {
				545	*group = grp;
				546	return 0;
				547	}
				548	}
				549	}
				550
				551	if (avefreei) {
				552	/*
				553	* The free-inodes counter is approximate, and for really small
				554	* filesystems the above test can fail to find any blockgroups
				555	*/
				556	avefreei = 0;
				557	goto fallback_retry;
				558	}
				559
				560	return -1;
				561	}
				562
				563	static int find_group_other(struct super_block sb, struct inode parent,
				564	ext4_group_t *group, umode_t mode)
				565	{
				566	ext4_group_t parent_group = EXT4_I(parent)->i_block_group;
				567	ext4_group_t i, last, ngroups = ext4_get_groups_count(sb);
				568	struct ext4_group_desc *desc;
				569	int flex_size = ext4_flex_bg_size(EXT4_SB(sb));
				570
				571	/*
				572	* Try to place the inode is the same flex group as its
				573	* parent. If we can't find space, use the Orlov algorithm to
				574	* find another flex group, and store that information in the
				575	* parent directory's inode information so that use that flex
				576	* group for future allocations.
				577	*/
				578	if (flex_size > 1) {
				579	int retry = 0;
				580
				581	try_again:
				582	parent_group &= ~(flex_size-1);
				583	last = parent_group + flex_size;
				584	if (last > ngroups)
				585	last = ngroups;
				586	for (i = parent_group; i < last; i++) {
				587	desc = ext4_get_group_desc(sb, i, NULL);
				588	if (desc && ext4_free_inodes_count(sb, desc)) {
				589	*group = i;
				590	return 0;
				591	}
				592	}
				593	if (!retry && EXT4_I(parent)->i_last_alloc_group != ~0) {
				594	retry = 1;
				595	parent_group = EXT4_I(parent)->i_last_alloc_group;
				596	goto try_again;
				597	}
				598	/*
				599	* If this didn't work, use the Orlov search algorithm
				600	* to find a new flex group; we pass in the mode to
				601	* avoid the topdir algorithms.
				602	*/
				603	*group = parent_group + flex_size;
				604	if (*group > ngroups)
				605	*group = 0;
				606	return find_group_orlov(sb, parent, group, mode, NULL);
				607	}
				608
				609	/*
				610	* Try to place the inode in its parent directory
				611	*/
				612	*group = parent_group;
				613	desc = ext4_get_group_desc(sb, *group, NULL);
				614	if (desc && ext4_free_inodes_count(sb, desc) &&
				615	ext4_free_group_clusters(sb, desc))
				616	return 0;
				617
				618	/*
				619	* We're going to place this inode in a different blockgroup from its
				620	* parent. We want to cause files in a common directory to all land in
				621	* the same blockgroup. But we want files which are in a different
				622	* directory which shares a blockgroup with our parent to land in a
				623	* different blockgroup.
				624	*
				625	* So add our directory's i_ino into the starting point for the hash.
				626	*/
				627	group = (group + parent->i_ino) % ngroups;
				628
				629	/*
				630	* Use a quadratic hash to find a group with a free inode and some free
				631	* blocks.
				632	*/
				633	for (i = 1; i < ngroups; i <<= 1) {
				634	*group += i;
				635	if (*group >= ngroups)
				636	*group -= ngroups;
				637	desc = ext4_get_group_desc(sb, *group, NULL);
				638	if (desc && ext4_free_inodes_count(sb, desc) &&
				639	ext4_free_group_clusters(sb, desc))
				640	return 0;
				641	}
				642
				643	/*
				644	* That failed: try linear search for a free inode, even if that group
				645	* has no free blocks.
				646	*/
				647	*group = parent_group;
				648	for (i = 0; i < ngroups; i++) {
				649	if (++*group >= ngroups)
				650	*group = 0;
				651	desc = ext4_get_group_desc(sb, *group, NULL);
				652	if (desc && ext4_free_inodes_count(sb, desc))
				653	return 0;
				654	}
				655
				656	return -1;
				657	}
				658
				659	/*
				660	* In no journal mode, if an inode has recently been deleted, we want
				661	* to avoid reusing it until we're reasonably sure the inode table
				662	* block has been written back to disk. (Yes, these values are
				663	* somewhat arbitrary...)
				664	*/
				665	#define RECENTCY_MIN 5
				666	#define RECENTCY_DIRTY 300
				667
				668	static int recently_deleted(struct super_block *sb, ext4_group_t group, int ino)
				669	{
				670	struct ext4_group_desc *gdp;
				671	struct ext4_inode *raw_inode;
				672	struct buffer_head *bh;
				673	int inodes_per_block = EXT4_SB(sb)->s_inodes_per_block;
				674	int offset, ret = 0;
				675	int recentcy = RECENTCY_MIN;
				676	u32 dtime, now;
				677
				678	gdp = ext4_get_group_desc(sb, group, NULL);
				679	if (unlikely(!gdp))
				680	return 0;
				681
				682	bh = sb_find_get_block(sb, ext4_inode_table(sb, gdp) +
				683	(ino / inodes_per_block));
				684	if (!bh \|\| !buffer_uptodate(bh))
				685	/*
				686	* If the block is not in the buffer cache, then it
				687	* must have been written out.
				688	*/
				689	goto out;
				690
				691	offset = (ino % inodes_per_block) * EXT4_INODE_SIZE(sb);
				692	raw_inode = (struct ext4_inode *) (bh->b_data + offset);
				693
				694	/* i_dtime is only 32 bits on disk, but we only care about relative
				695	* times in the range of a few minutes (i.e. long enough to sync a
				696	* recently-deleted inode to disk), so using the low 32 bits of the
				697	* clock (a 68 year range) is enough, see time_before32() */
				698	dtime = le32_to_cpu(raw_inode->i_dtime);
				699	now = ktime_get_real_seconds();
				700	if (buffer_dirty(bh))
				701	recentcy += RECENTCY_DIRTY;
				702
				703	if (dtime && time_before32(dtime, now) &&
				704	time_before32(now, dtime + recentcy))
				705	ret = 1;
				706	out:
				707	brelse(bh);
				708	return ret;
				709	}
				710
				711	static int find_inode_bit(struct super_block *sb, ext4_group_t group,
				712	struct buffer_head bitmap, unsigned long ino)
				713	{
				714	next:
				715	ino = ext4_find_next_zero_bit((unsigned long )
				716	bitmap->b_data,
				717	EXT4_INODES_PER_GROUP(sb), *ino);
				718	if (*ino >= EXT4_INODES_PER_GROUP(sb))
				719	return 0;
				720
				721	if ((EXT4_SB(sb)->s_journal == NULL) &&
				722	recently_deleted(sb, group, *ino)) {
				723	ino = ino + 1;
				724	if (*ino < EXT4_INODES_PER_GROUP(sb))
				725	goto next;
				726	return 0;
				727	}
				728
				729	return 1;
				730	}
				731
				732	/*
				733	* There are two policies for allocating an inode. If the new inode is
				734	* a directory, then a forward search is made for a block group with both
				735	* free space and a low directory-to-inode ratio; if that fails, then of
				736	* the groups with above-average free space, that group with the fewest
				737	* directories already is chosen.
				738	*
				739	* For other inodes, search forward from the parent directory's block
				740	* group to find a free inode.
				741	*/
				742	struct inode __ext4_new_inode(handle_t handle, struct inode *dir,
				743	umode_t mode, const struct qstr *qstr,
				744	__u32 goal, uid_t *owner, __u32 i_flags,
				745	int handle_type, unsigned int line_no,
				746	int nblocks)
				747	{
				748	struct super_block *sb;
				749	struct buffer_head *inode_bitmap_bh = NULL;
				750	struct buffer_head *group_desc_bh;
				751	ext4_group_t ngroups, group = 0;
				752	unsigned long ino = 0;
				753	struct inode *inode;
				754	struct ext4_group_desc *gdp = NULL;
				755	struct ext4_inode_info *ei;
				756	struct ext4_sb_info *sbi;
				757	int ret2, err;
				758	struct inode *ret;
				759	ext4_group_t i;
				760	ext4_group_t flex_group;
				761	struct ext4_group_info *grp;
				762	int encrypt = 0;
				763
				764	/* Cannot create files in a deleted directory */
				765	if (!dir \|\| !dir->i_nlink)
				766	return ERR_PTR(-EPERM);
				767
				768	sb = dir->i_sb;
				769	sbi = EXT4_SB(sb);
				770
				771	if (unlikely(ext4_forced_shutdown(sbi)))
				772	return ERR_PTR(-EIO);
				773
				774	if ((ext4_encrypted_inode(dir) \|\| DUMMY_ENCRYPTION_ENABLED(sbi)) &&
				775	(S_ISREG(mode) \|\| S_ISDIR(mode) \|\| S_ISLNK(mode)) &&
				776	!(i_flags & EXT4_EA_INODE_FL)) {
				777	err = fscrypt_get_encryption_info(dir);
				778	if (err)
				779	return ERR_PTR(err);
				780	if (!fscrypt_has_encryption_key(dir))
				781	return ERR_PTR(-ENOKEY);
				782	encrypt = 1;
				783	}
				784
				785	if (!handle && sbi->s_journal && !(i_flags & EXT4_EA_INODE_FL)) {
				786	#ifdef CONFIG_EXT4_FS_POSIX_ACL
				787	struct posix_acl *p = get_acl(dir, ACL_TYPE_DEFAULT);
				788
				789	if (IS_ERR(p))
				790	return ERR_CAST(p);
				791	if (p) {
				792	int acl_size = p->a_count * sizeof(ext4_acl_entry);
				793
				794	nblocks += (S_ISDIR(mode) ? 2 : 1) *
				795	__ext4_xattr_set_credits(sb, NULL /* inode */,
				796	NULL /* block_bh */, acl_size,
				797	true /* is_create */);
				798	posix_acl_release(p);
				799	}
				800	#endif
				801
				802	#ifdef CONFIG_SECURITY
				803	{
				804	int num_security_xattrs = 1;
				805
				806	#ifdef CONFIG_INTEGRITY
				807	num_security_xattrs++;
				808	#endif
				809	/*
				810	* We assume that security xattrs are never
				811	* more than 1k. In practice they are under
				812	* 128 bytes.
				813	*/
				814	nblocks += num_security_xattrs *
				815	__ext4_xattr_set_credits(sb, NULL /* inode */,
				816	NULL /* block_bh */, 1024,
				817	true /* is_create */);
				818	}
				819	#endif
				820	if (encrypt)
				821	nblocks += __ext4_xattr_set_credits(sb,
				822	NULL /* inode /, NULL / block_bh */,
				823	FSCRYPT_SET_CONTEXT_MAX_SIZE,
				824	true /* is_create */);
				825	}
				826
				827	ngroups = ext4_get_groups_count(sb);
				828	trace_ext4_request_inode(dir, mode);
				829	inode = new_inode(sb);
				830	if (!inode)
				831	return ERR_PTR(-ENOMEM);
				832	ei = EXT4_I(inode);
				833
				834	/*
				835	* Initialize owners and quota early so that we don't have to account
				836	* for quota initialization worst case in standard inode creating
				837	* transaction
				838	*/
				839	if (owner) {
				840	inode->i_mode = mode;
				841	i_uid_write(inode, owner[0]);
				842	i_gid_write(inode, owner[1]);
				843	} else if (test_opt(sb, GRPID)) {
				844	inode->i_mode = mode;
				845	inode->i_uid = current_fsuid();
				846	inode->i_gid = dir->i_gid;
				847	} else
				848	inode_init_owner(inode, dir, mode);
				849
				850	if (ext4_has_feature_project(sb) &&
				851	ext4_test_inode_flag(dir, EXT4_INODE_PROJINHERIT))
				852	ei->i_projid = EXT4_I(dir)->i_projid;
				853	else
				854	ei->i_projid = make_kprojid(&init_user_ns, EXT4_DEF_PROJID);
				855
				856	err = dquot_initialize(inode);
				857	if (err)
				858	goto out;
				859
				860	if (!goal)
				861	goal = sbi->s_inode_goal;
				862
				863	if (goal && goal <= le32_to_cpu(sbi->s_es->s_inodes_count)) {
				864	group = (goal - 1) / EXT4_INODES_PER_GROUP(sb);
				865	ino = (goal - 1) % EXT4_INODES_PER_GROUP(sb);
				866	ret2 = 0;
				867	goto got_group;
				868	}
				869
				870	if (S_ISDIR(mode))
				871	ret2 = find_group_orlov(sb, dir, &group, mode, qstr);
				872	else
				873	ret2 = find_group_other(sb, dir, &group, mode);
				874
				875	got_group:
				876	EXT4_I(dir)->i_last_alloc_group = group;
				877	err = -ENOSPC;
				878	if (ret2 == -1)
				879	goto out;
				880
				881	/*
				882	* Normally we will only go through one pass of this loop,
				883	* unless we get unlucky and it turns out the group we selected
				884	* had its last inode grabbed by someone else.
				885	*/
				886	for (i = 0; i < ngroups; i++, ino = 0) {
				887	err = -EIO;
				888
				889	gdp = ext4_get_group_desc(sb, group, &group_desc_bh);
				890	if (!gdp)
				891	goto out;
				892
				893	/*
				894	* Check free inodes count before loading bitmap.
				895	*/
				896	if (ext4_free_inodes_count(sb, gdp) == 0)
				897	goto next_group;
				898
				899	grp = ext4_get_group_info(sb, group);
				900	/* Skip groups with already-known suspicious inode tables */
				901	if (EXT4_MB_GRP_IBITMAP_CORRUPT(grp))
				902	goto next_group;
				903
				904	brelse(inode_bitmap_bh);
				905	inode_bitmap_bh = ext4_read_inode_bitmap(sb, group);
				906	/* Skip groups with suspicious inode tables */
				907	if (EXT4_MB_GRP_IBITMAP_CORRUPT(grp) \|\|
				908	IS_ERR(inode_bitmap_bh)) {
				909	inode_bitmap_bh = NULL;
				910	goto next_group;
				911	}
				912
				913	repeat_in_this_group:
				914	ret2 = find_inode_bit(sb, group, inode_bitmap_bh, &ino);
				915	if (!ret2)
				916	goto next_group;
				917
				918	if (group == 0 && (ino + 1) < EXT4_FIRST_INO(sb)) {
				919	ext4_error(sb, "reserved inode found cleared - "
				920	"inode=%lu", ino + 1);
				921	ext4_mark_group_bitmap_corrupted(sb, group,
				922	EXT4_GROUP_INFO_IBITMAP_CORRUPT);
				923	goto next_group;
				924	}
				925
				926	if (!handle) {
				927	BUG_ON(nblocks <= 0);
				928	handle = __ext4_journal_start_sb(dir->i_sb, line_no,
				929	handle_type, nblocks,
				930	0);
				931	if (IS_ERR(handle)) {
				932	err = PTR_ERR(handle);
				933	ext4_std_error(sb, err);
				934	goto out;
				935	}
				936	}
				937	BUFFER_TRACE(inode_bitmap_bh, "get_write_access");
				938	err = ext4_journal_get_write_access(handle, inode_bitmap_bh);
				939	if (err) {
				940	ext4_std_error(sb, err);
				941	goto out;
				942	}
				943	ext4_lock_group(sb, group);
				944	ret2 = ext4_test_and_set_bit(ino, inode_bitmap_bh->b_data);
				945	if (ret2) {
				946	/* Someone already took the bit. Repeat the search
				947	* with lock held.
				948	*/
				949	ret2 = find_inode_bit(sb, group, inode_bitmap_bh, &ino);
				950	if (ret2) {
				951	ext4_set_bit(ino, inode_bitmap_bh->b_data);
				952	ret2 = 0;
				953	} else {
				954	ret2 = 1; /* we didn't grab the inode */
				955	}
				956	}
				957	ext4_unlock_group(sb, group);
				958	ino++; /* the inode bitmap is zero-based */
				959	if (!ret2)
				960	goto got; /* we grabbed the inode! */
				961
				962	if (ino < EXT4_INODES_PER_GROUP(sb))
				963	goto repeat_in_this_group;
				964	next_group:
				965	if (++group == ngroups)
				966	group = 0;
				967	}
				968	err = -ENOSPC;
				969	goto out;
				970
				971	got:
				972	BUFFER_TRACE(inode_bitmap_bh, "call ext4_handle_dirty_metadata");
				973	err = ext4_handle_dirty_metadata(handle, NULL, inode_bitmap_bh);
				974	if (err) {
				975	ext4_std_error(sb, err);
				976	goto out;
				977	}
				978
				979	BUFFER_TRACE(group_desc_bh, "get_write_access");
				980	err = ext4_journal_get_write_access(handle, group_desc_bh);
				981	if (err) {
				982	ext4_std_error(sb, err);
				983	goto out;
				984	}
				985
				986	/* We may have to initialize the block bitmap if it isn't already */
				987	if (ext4_has_group_desc_csum(sb) &&
				988	gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
				989	struct buffer_head *block_bitmap_bh;
				990
				991	block_bitmap_bh = ext4_read_block_bitmap(sb, group);
				992	if (IS_ERR(block_bitmap_bh)) {
				993	err = PTR_ERR(block_bitmap_bh);
				994	goto out;
				995	}
				996	BUFFER_TRACE(block_bitmap_bh, "get block bitmap access");
				997	err = ext4_journal_get_write_access(handle, block_bitmap_bh);
				998	if (err) {
				999	brelse(block_bitmap_bh);
				1000	ext4_std_error(sb, err);
				1001	goto out;
				1002	}
				1003
				1004	BUFFER_TRACE(block_bitmap_bh, "dirty block bitmap");
				1005	err = ext4_handle_dirty_metadata(handle, NULL, block_bitmap_bh);
				1006
				1007	/* recheck and clear flag under lock if we still need to */
				1008	ext4_lock_group(sb, group);
				1009	if (ext4_has_group_desc_csum(sb) &&
				1010	(gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))) {
				1011	gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
				1012	ext4_free_group_clusters_set(sb, gdp,
				1013	ext4_free_clusters_after_init(sb, group, gdp));
				1014	ext4_block_bitmap_csum_set(sb, group, gdp,
				1015	block_bitmap_bh);
				1016	ext4_group_desc_csum_set(sb, group, gdp);
				1017	}
				1018	ext4_unlock_group(sb, group);
				1019	brelse(block_bitmap_bh);
				1020
				1021	if (err) {
				1022	ext4_std_error(sb, err);
				1023	goto out;
				1024	}
				1025	}
				1026
				1027	/* Update the relevant bg descriptor fields */
				1028	if (ext4_has_group_desc_csum(sb)) {
				1029	int free;
				1030	struct ext4_group_info *grp = ext4_get_group_info(sb, group);
				1031
				1032	down_read(&grp->alloc_sem); /* protect vs itable lazyinit */
				1033	ext4_lock_group(sb, group); /* while we modify the bg desc */
				1034	free = EXT4_INODES_PER_GROUP(sb) -
				1035	ext4_itable_unused_count(sb, gdp);
				1036	if (gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) {
				1037	gdp->bg_flags &= cpu_to_le16(~EXT4_BG_INODE_UNINIT);
				1038	free = 0;
				1039	}
				1040	/*
				1041	* Check the relative inode number against the last used
				1042	* relative inode number in this group. if it is greater
				1043	* we need to update the bg_itable_unused count
				1044	*/
				1045	if (ino > free)
				1046	ext4_itable_unused_set(sb, gdp,
				1047	(EXT4_INODES_PER_GROUP(sb) - ino));
				1048	up_read(&grp->alloc_sem);
				1049	} else {
				1050	ext4_lock_group(sb, group);
				1051	}
				1052
				1053	ext4_free_inodes_set(sb, gdp, ext4_free_inodes_count(sb, gdp) - 1);
				1054	if (S_ISDIR(mode)) {
				1055	ext4_used_dirs_set(sb, gdp, ext4_used_dirs_count(sb, gdp) + 1);
				1056	if (sbi->s_log_groups_per_flex) {
				1057	ext4_group_t f = ext4_flex_group(sbi, group);
				1058
				1059	atomic_inc(&sbi->s_flex_groups[f].used_dirs);
				1060	}
				1061	}
				1062	if (ext4_has_group_desc_csum(sb)) {
				1063	ext4_inode_bitmap_csum_set(sb, group, gdp, inode_bitmap_bh,
				1064	EXT4_INODES_PER_GROUP(sb) / 8);
				1065	ext4_group_desc_csum_set(sb, group, gdp);
				1066	}
				1067	ext4_unlock_group(sb, group);
				1068
				1069	BUFFER_TRACE(group_desc_bh, "call ext4_handle_dirty_metadata");
				1070	err = ext4_handle_dirty_metadata(handle, NULL, group_desc_bh);
				1071	if (err) {
				1072	ext4_std_error(sb, err);
				1073	goto out;
				1074	}
				1075
				1076	percpu_counter_dec(&sbi->s_freeinodes_counter);
				1077	if (S_ISDIR(mode))
				1078	percpu_counter_inc(&sbi->s_dirs_counter);
				1079
				1080	if (sbi->s_log_groups_per_flex) {
				1081	flex_group = ext4_flex_group(sbi, group);
				1082	atomic_dec(&sbi->s_flex_groups[flex_group].free_inodes);
				1083	}
				1084
				1085	inode->i_ino = ino + group * EXT4_INODES_PER_GROUP(sb);
				1086	/* This is the optimal IO size (for stat), not the fs block size */
				1087	inode->i_blocks = 0;
				1088	inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
				1089	ei->i_crtime = inode->i_mtime;
				1090
				1091	memset(ei->i_data, 0, sizeof(ei->i_data));
				1092	ei->i_dir_start_lookup = 0;
				1093	ei->i_disksize = 0;
				1094
				1095	/* Don't inherit extent flag from directory, amongst others. */
				1096	ei->i_flags =
				1097	ext4_mask_flags(mode, EXT4_I(dir)->i_flags & EXT4_FL_INHERITED);
				1098	ei->i_flags \|= i_flags;
				1099	ei->i_file_acl = 0;
				1100	ei->i_dtime = 0;
				1101	ei->i_block_group = group;
				1102	ei->i_last_alloc_group = ~0;
				1103
				1104	ext4_set_inode_flags(inode);
				1105	if (IS_DIRSYNC(inode))
				1106	ext4_handle_sync(handle);
				1107	if (insert_inode_locked(inode) < 0) {
				1108	/*
				1109	* Likely a bitmap corruption causing inode to be allocated
				1110	* twice.
				1111	*/
				1112	err = -EIO;
				1113	ext4_error(sb, "failed to insert inode %lu: doubly allocated?",
				1114	inode->i_ino);
				1115	ext4_mark_group_bitmap_corrupted(sb, group,
				1116	EXT4_GROUP_INFO_IBITMAP_CORRUPT);
				1117	goto out;
				1118	}
				1119	inode->i_generation = prandom_u32();
				1120
				1121	/* Precompute checksum seed for inode metadata */
				1122	if (ext4_has_metadata_csum(sb)) {
				1123	__u32 csum;
				1124	__le32 inum = cpu_to_le32(inode->i_ino);
				1125	__le32 gen = cpu_to_le32(inode->i_generation);
				1126	csum = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)&inum,
				1127	sizeof(inum));
				1128	ei->i_csum_seed = ext4_chksum(sbi, csum, (__u8 *)&gen,
				1129	sizeof(gen));
				1130	}
				1131
				1132	ext4_clear_state_flags(ei); /* Only relevant on 32-bit archs */
				1133	ext4_set_inode_state(inode, EXT4_STATE_NEW);
				1134
				1135	ei->i_extra_isize = sbi->s_want_extra_isize;
				1136	ei->i_inline_off = 0;
				1137	if (ext4_has_feature_inline_data(sb))
				1138	ext4_set_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA);
				1139	ret = inode;
				1140	err = dquot_alloc_inode(inode);
				1141	if (err)
				1142	goto fail_drop;
				1143
				1144	/*
				1145	* Since the encryption xattr will always be unique, create it first so
				1146	* that it's less likely to end up in an external xattr block and
				1147	* prevent its deduplication.
				1148	*/
				1149	if (encrypt) {
				1150	err = fscrypt_inherit_context(dir, inode, handle, true);
				1151	if (err)
				1152	goto fail_free_drop;
				1153	}
				1154
				1155	if (!(ei->i_flags & EXT4_EA_INODE_FL)) {
				1156	err = ext4_init_acl(handle, inode, dir);
				1157	if (err)
				1158	goto fail_free_drop;
				1159
				1160	err = ext4_init_security(handle, inode, dir, qstr);
				1161	if (err)
				1162	goto fail_free_drop;
				1163	}
				1164
				1165	if (ext4_has_feature_extents(sb)) {
				1166	/* set extent flag only for directory, file and normal symlink*/
				1167	if (S_ISDIR(mode) \|\| S_ISREG(mode) \|\| S_ISLNK(mode)) {
				1168	ext4_set_inode_flag(inode, EXT4_INODE_EXTENTS);
				1169	ext4_ext_tree_init(handle, inode);
				1170	}
				1171	}
				1172
				1173	if (ext4_handle_valid(handle)) {
				1174	ei->i_sync_tid = handle->h_transaction->t_tid;
				1175	ei->i_datasync_tid = handle->h_transaction->t_tid;
				1176	}
				1177
				1178	err = ext4_mark_inode_dirty(handle, inode);
				1179	if (err) {
				1180	ext4_std_error(sb, err);
				1181	goto fail_free_drop;
				1182	}
				1183
				1184	ext4_debug("allocating inode %lu\n", inode->i_ino);
				1185	trace_ext4_allocate_inode(inode, dir, mode);
				1186	brelse(inode_bitmap_bh);
				1187	return ret;
				1188
				1189	fail_free_drop:
				1190	dquot_free_inode(inode);
				1191	fail_drop:
				1192	clear_nlink(inode);
				1193	unlock_new_inode(inode);
				1194	out:
				1195	dquot_drop(inode);
				1196	inode->i_flags \|= S_NOQUOTA;
				1197	iput(inode);
				1198	brelse(inode_bitmap_bh);
				1199	return ERR_PTR(err);
				1200	}
				1201
				1202	/* Verify that we are loading a valid orphan from disk */
				1203	struct inode ext4_orphan_get(struct super_block sb, unsigned long ino)
				1204	{
				1205	unsigned long max_ino = le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count);
				1206	ext4_group_t block_group;
				1207	int bit;
				1208	struct buffer_head *bitmap_bh = NULL;
				1209	struct inode *inode = NULL;
				1210	int err = -EFSCORRUPTED;
				1211
				1212	if (ino < EXT4_FIRST_INO(sb) \|\| ino > max_ino)
				1213	goto bad_orphan;
				1214
				1215	block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb);
				1216	bit = (ino - 1) % EXT4_INODES_PER_GROUP(sb);
				1217	bitmap_bh = ext4_read_inode_bitmap(sb, block_group);
				1218	if (IS_ERR(bitmap_bh))
				1219	return (struct inode *) bitmap_bh;
				1220
				1221	/* Having the inode bit set should be a 100% indicator that this
				1222	* is a valid orphan (no e2fsck run on fs). Orphans also include
				1223	* inodes that were being truncated, so we can't check i_nlink==0.
				1224	*/
				1225	if (!ext4_test_bit(bit, bitmap_bh->b_data))
				1226	goto bad_orphan;
				1227
				1228	inode = ext4_iget(sb, ino);
				1229	if (IS_ERR(inode)) {
				1230	err = PTR_ERR(inode);
				1231	ext4_error(sb, "couldn't read orphan inode %lu (err %d)",
				1232	ino, err);
				1233	return inode;
				1234	}
				1235
				1236	/*
				1237	* If the orphans has i_nlinks > 0 then it should be able to
				1238	* be truncated, otherwise it won't be removed from the orphan
				1239	* list during processing and an infinite loop will result.
				1240	* Similarly, it must not be a bad inode.
				1241	*/
				1242	if ((inode->i_nlink && !ext4_can_truncate(inode)) \|\|
				1243	is_bad_inode(inode))
				1244	goto bad_orphan;
				1245
				1246	if (NEXT_ORPHAN(inode) > max_ino)
				1247	goto bad_orphan;
				1248	brelse(bitmap_bh);
				1249	return inode;
				1250
				1251	bad_orphan:
				1252	ext4_error(sb, "bad orphan inode %lu", ino);
				1253	if (bitmap_bh)
				1254	printk(KERN_ERR "ext4_test_bit(bit=%d, block=%llu) = %d\n",
				1255	bit, (unsigned long long)bitmap_bh->b_blocknr,
				1256	ext4_test_bit(bit, bitmap_bh->b_data));
				1257	if (inode) {
				1258	printk(KERN_ERR "is_bad_inode(inode)=%d\n",
				1259	is_bad_inode(inode));
				1260	printk(KERN_ERR "NEXT_ORPHAN(inode)=%u\n",
				1261	NEXT_ORPHAN(inode));
				1262	printk(KERN_ERR "max_ino=%lu\n", max_ino);
				1263	printk(KERN_ERR "i_nlink=%u\n", inode->i_nlink);
				1264	/* Avoid freeing blocks if we got a bad deleted inode */
				1265	if (inode->i_nlink == 0)
				1266	inode->i_blocks = 0;
				1267	iput(inode);
				1268	}
				1269	brelse(bitmap_bh);
				1270	return ERR_PTR(err);
				1271	}
				1272
				1273	unsigned long ext4_count_free_inodes(struct super_block *sb)
				1274	{
				1275	unsigned long desc_count;
				1276	struct ext4_group_desc *gdp;
				1277	ext4_group_t i, ngroups = ext4_get_groups_count(sb);
				1278	#ifdef EXT4FS_DEBUG
				1279	struct ext4_super_block *es;
				1280	unsigned long bitmap_count, x;
				1281	struct buffer_head *bitmap_bh = NULL;
				1282
				1283	es = EXT4_SB(sb)->s_es;
				1284	desc_count = 0;
				1285	bitmap_count = 0;
				1286	gdp = NULL;
				1287	for (i = 0; i < ngroups; i++) {
				1288	gdp = ext4_get_group_desc(sb, i, NULL);
				1289	if (!gdp)
				1290	continue;
				1291	desc_count += ext4_free_inodes_count(sb, gdp);
				1292	brelse(bitmap_bh);
				1293	bitmap_bh = ext4_read_inode_bitmap(sb, i);
				1294	if (IS_ERR(bitmap_bh)) {
				1295	bitmap_bh = NULL;
				1296	continue;
				1297	}
				1298
				1299	x = ext4_count_free(bitmap_bh->b_data,
				1300	EXT4_INODES_PER_GROUP(sb) / 8);
				1301	printk(KERN_DEBUG "group %lu: stored = %d, counted = %lu\n",
				1302	(unsigned long) i, ext4_free_inodes_count(sb, gdp), x);
				1303	bitmap_count += x;
				1304	}
				1305	brelse(bitmap_bh);
				1306	printk(KERN_DEBUG "ext4_count_free_inodes: "
				1307	"stored = %u, computed = %lu, %lu\n",
				1308	le32_to_cpu(es->s_free_inodes_count), desc_count, bitmap_count);
				1309	return desc_count;
				1310	#else
				1311	desc_count = 0;
				1312	for (i = 0; i < ngroups; i++) {
				1313	gdp = ext4_get_group_desc(sb, i, NULL);
				1314	if (!gdp)
				1315	continue;
				1316	desc_count += ext4_free_inodes_count(sb, gdp);
				1317	cond_resched();
				1318	}
				1319	return desc_count;
				1320	#endif
				1321	}
				1322
				1323	/* Called at mount-time, super-block is locked */
				1324	unsigned long ext4_count_dirs(struct super_block * sb)
				1325	{
				1326	unsigned long count = 0;
				1327	ext4_group_t i, ngroups = ext4_get_groups_count(sb);
				1328
				1329	for (i = 0; i < ngroups; i++) {
				1330	struct ext4_group_desc *gdp = ext4_get_group_desc(sb, i, NULL);
				1331	if (!gdp)
				1332	continue;
				1333	count += ext4_used_dirs_count(sb, gdp);
				1334	}
				1335	return count;
				1336	}
				1337
				1338	/*
				1339	* Zeroes not yet zeroed inode table - just write zeroes through the whole
				1340	* inode table. Must be called without any spinlock held. The only place
				1341	* where it is called from on active part of filesystem is ext4lazyinit
				1342	* thread, so we do not need any special locks, however we have to prevent
				1343	* inode allocation from the current group, so we take alloc_sem lock, to
				1344	* block ext4_new_inode() until we are finished.
				1345	*/
				1346	int ext4_init_inode_table(struct super_block *sb, ext4_group_t group,
				1347	int barrier)
				1348	{
				1349	struct ext4_group_info *grp = ext4_get_group_info(sb, group);
				1350	struct ext4_sb_info *sbi = EXT4_SB(sb);
				1351	struct ext4_group_desc *gdp = NULL;
				1352	struct buffer_head *group_desc_bh;
				1353	handle_t *handle;
				1354	ext4_fsblk_t blk;
				1355	int num, ret = 0, used_blks = 0;
				1356
				1357	/* This should not happen, but just to be sure check this */
				1358	if (sb_rdonly(sb)) {
				1359	ret = 1;
				1360	goto out;
				1361	}
				1362
				1363	gdp = ext4_get_group_desc(sb, group, &group_desc_bh);
				1364	if (!gdp)
				1365	goto out;
				1366
				1367	/*
				1368	* We do not need to lock this, because we are the only one
				1369	* handling this flag.
				1370	*/
				1371	if (gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED))
				1372	goto out;
				1373
				1374	handle = ext4_journal_start_sb(sb, EXT4_HT_MISC, 1);
				1375	if (IS_ERR(handle)) {
				1376	ret = PTR_ERR(handle);
				1377	goto out;
				1378	}
				1379
				1380	down_write(&grp->alloc_sem);
				1381	/*
				1382	* If inode bitmap was already initialized there may be some
				1383	* used inodes so we need to skip blocks with used inodes in
				1384	* inode table.
				1385	*/
				1386	if (!(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)))
				1387	used_blks = DIV_ROUND_UP((EXT4_INODES_PER_GROUP(sb) -
				1388	ext4_itable_unused_count(sb, gdp)),
				1389	sbi->s_inodes_per_block);
				1390
				1391	if ((used_blks < 0) \|\| (used_blks > sbi->s_itb_per_group) \|\|
				1392	((group == 0) && ((EXT4_INODES_PER_GROUP(sb) -
				1393	ext4_itable_unused_count(sb, gdp)) <
				1394	EXT4_FIRST_INO(sb)))) {
				1395	ext4_error(sb, "Something is wrong with group %u: "
				1396	"used itable blocks: %d; "
				1397	"itable unused count: %u",
				1398	group, used_blks,
				1399	ext4_itable_unused_count(sb, gdp));
				1400	ret = 1;
				1401	goto err_out;
				1402	}
				1403
				1404	blk = ext4_inode_table(sb, gdp) + used_blks;
				1405	num = sbi->s_itb_per_group - used_blks;
				1406
				1407	BUFFER_TRACE(group_desc_bh, "get_write_access");
				1408	ret = ext4_journal_get_write_access(handle,
				1409	group_desc_bh);
				1410	if (ret)
				1411	goto err_out;
				1412
				1413	/*
				1414	* Skip zeroout if the inode table is full. But we set the ZEROED
				1415	* flag anyway, because obviously, when it is full it does not need
				1416	* further zeroing.
				1417	*/
				1418	if (unlikely(num == 0))
				1419	goto skip_zeroout;
				1420
				1421	ext4_debug("going to zero out inode table in group %d\n",
				1422	group);
				1423	ret = sb_issue_zeroout(sb, blk, num, GFP_NOFS);
				1424	if (ret < 0)
				1425	goto err_out;
				1426	if (barrier)
				1427	blkdev_issue_flush(sb->s_bdev, GFP_NOFS, NULL);
				1428
				1429	skip_zeroout:
				1430	ext4_lock_group(sb, group);
				1431	gdp->bg_flags \|= cpu_to_le16(EXT4_BG_INODE_ZEROED);
				1432	ext4_group_desc_csum_set(sb, group, gdp);
				1433	ext4_unlock_group(sb, group);
				1434
				1435	BUFFER_TRACE(group_desc_bh,
				1436	"call ext4_handle_dirty_metadata");
				1437	ret = ext4_handle_dirty_metadata(handle, NULL,
				1438	group_desc_bh);
				1439
				1440	err_out:
				1441	up_write(&grp->alloc_sem);
				1442	ext4_journal_stop(handle);
				1443	out:
				1444	return ret;
				1445	}