Blame - fs/xfs/xfs_mount.c - hafnium/third_party/linux

blob: 5a0ce0c2c4bbdc65318ab54622f1795860a33fe1 [file] [log] [blame]

Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1	// SPDX-License-Identifier: GPL-2.0
				2	/*
				3	* Copyright (c) 2000-2005 Silicon Graphics, Inc.
				4	* All Rights Reserved.
				5	*/
				6	#include "xfs.h"
				7	#include "xfs_fs.h"
				8	#include "xfs_shared.h"
				9	#include "xfs_format.h"
				10	#include "xfs_log_format.h"
				11	#include "xfs_trans_resv.h"
				12	#include "xfs_bit.h"
				13	#include "xfs_sb.h"
				14	#include "xfs_mount.h"
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	15	#include "xfs_inode.h"
				16	#include "xfs_dir2.h"
				17	#include "xfs_ialloc.h"
				18	#include "xfs_alloc.h"
				19	#include "xfs_rtalloc.h"
				20	#include "xfs_bmap.h"
				21	#include "xfs_trans.h"
				22	#include "xfs_trans_priv.h"
				23	#include "xfs_log.h"
				24	#include "xfs_error.h"
				25	#include "xfs_quota.h"
				26	#include "xfs_fsops.h"
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	27	#include "xfs_icache.h"
				28	#include "xfs_sysfs.h"
				29	#include "xfs_rmap_btree.h"
				30	#include "xfs_refcount_btree.h"
				31	#include "xfs_reflink.h"
				32	#include "xfs_extent_busy.h"
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	33	#include "xfs_health.h"
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	34
				35
				36	static DEFINE_MUTEX(xfs_uuid_table_mutex);
				37	static int xfs_uuid_table_size;
				38	static uuid_t *xfs_uuid_table;
				39
				40	void
				41	xfs_uuid_table_free(void)
				42	{
				43	if (xfs_uuid_table_size == 0)
				44	return;
				45	kmem_free(xfs_uuid_table);
				46	xfs_uuid_table = NULL;
				47	xfs_uuid_table_size = 0;
				48	}
				49
				50	/*
				51	* See if the UUID is unique among mounted XFS filesystems.
				52	* Mount fails if UUID is nil or a FS with the same UUID is already mounted.
				53	*/
				54	STATIC int
				55	xfs_uuid_mount(
				56	struct xfs_mount *mp)
				57	{
				58	uuid_t *uuid = &mp->m_sb.sb_uuid;
				59	int hole, i;
				60
				61	/* Publish UUID in struct super_block */
				62	uuid_copy(&mp->m_super->s_uuid, uuid);
				63
				64	if (mp->m_flags & XFS_MOUNT_NOUUID)
				65	return 0;
				66
				67	if (uuid_is_null(uuid)) {
				68	xfs_warn(mp, "Filesystem has null UUID - can't mount");
				69	return -EINVAL;
				70	}
				71
				72	mutex_lock(&xfs_uuid_table_mutex);
				73	for (i = 0, hole = -1; i < xfs_uuid_table_size; i++) {
				74	if (uuid_is_null(&xfs_uuid_table[i])) {
				75	hole = i;
				76	continue;
				77	}
				78	if (uuid_equal(uuid, &xfs_uuid_table[i]))
				79	goto out_duplicate;
				80	}
				81
				82	if (hole < 0) {
				83	xfs_uuid_table = kmem_realloc(xfs_uuid_table,
				84	(xfs_uuid_table_size + 1) * sizeof(*xfs_uuid_table),
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	85	0);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	86	hole = xfs_uuid_table_size++;
				87	}
				88	xfs_uuid_table[hole] = *uuid;
				89	mutex_unlock(&xfs_uuid_table_mutex);
				90
				91	return 0;
				92
				93	out_duplicate:
				94	mutex_unlock(&xfs_uuid_table_mutex);
				95	xfs_warn(mp, "Filesystem has duplicate UUID %pU - can't mount", uuid);
				96	return -EINVAL;
				97	}
				98
				99	STATIC void
				100	xfs_uuid_unmount(
				101	struct xfs_mount *mp)
				102	{
				103	uuid_t *uuid = &mp->m_sb.sb_uuid;
				104	int i;
				105
				106	if (mp->m_flags & XFS_MOUNT_NOUUID)
				107	return;
				108
				109	mutex_lock(&xfs_uuid_table_mutex);
				110	for (i = 0; i < xfs_uuid_table_size; i++) {
				111	if (uuid_is_null(&xfs_uuid_table[i]))
				112	continue;
				113	if (!uuid_equal(uuid, &xfs_uuid_table[i]))
				114	continue;
				115	memset(&xfs_uuid_table[i], 0, sizeof(uuid_t));
				116	break;
				117	}
				118	ASSERT(i < xfs_uuid_table_size);
				119	mutex_unlock(&xfs_uuid_table_mutex);
				120	}
				121
				122
				123	STATIC void
				124	__xfs_free_perag(
				125	struct rcu_head *head)
				126	{
				127	struct xfs_perag *pag = container_of(head, struct xfs_perag, rcu_head);
				128
				129	ASSERT(atomic_read(&pag->pag_ref) == 0);
				130	kmem_free(pag);
				131	}
				132
				133	/*
				134	* Free up the per-ag resources associated with the mount structure.
				135	*/
				136	STATIC void
				137	xfs_free_perag(
				138	xfs_mount_t *mp)
				139	{
				140	xfs_agnumber_t agno;
				141	struct xfs_perag *pag;
				142
				143	for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) {
				144	spin_lock(&mp->m_perag_lock);
				145	pag = radix_tree_delete(&mp->m_perag_tree, agno);
				146	spin_unlock(&mp->m_perag_lock);
				147	ASSERT(pag);
				148	ASSERT(atomic_read(&pag->pag_ref) == 0);
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	149	xfs_iunlink_destroy(pag);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	150	xfs_buf_hash_destroy(pag);
				151	mutex_destroy(&pag->pag_ici_reclaim_lock);
				152	call_rcu(&pag->rcu_head, __xfs_free_perag);
				153	}
				154	}
				155
				156	/*
				157	* Check size of device based on the (data/realtime) block count.
				158	* Note: this check is used by the growfs code as well as mount.
				159	*/
				160	int
				161	xfs_sb_validate_fsb_count(
				162	xfs_sb_t *sbp,
				163	uint64_t nblocks)
				164	{
				165	ASSERT(PAGE_SHIFT >= sbp->sb_blocklog);
				166	ASSERT(sbp->sb_blocklog >= BBSHIFT);
				167
				168	/* Limited by ULONG_MAX of page cache index */
				169	if (nblocks >> (PAGE_SHIFT - sbp->sb_blocklog) > ULONG_MAX)
				170	return -EFBIG;
				171	return 0;
				172	}
				173
				174	int
				175	xfs_initialize_perag(
				176	xfs_mount_t *mp,
				177	xfs_agnumber_t agcount,
				178	xfs_agnumber_t *maxagi)
				179	{
				180	xfs_agnumber_t index;
				181	xfs_agnumber_t first_initialised = NULLAGNUMBER;
				182	xfs_perag_t *pag;
				183	int error = -ENOMEM;
				184
				185	/*
				186	* Walk the current per-ag tree so we don't try to initialise AGs
				187	* that already exist (growfs case). Allocate and insert all the
				188	* AGs we don't find ready for initialisation.
				189	*/
				190	for (index = 0; index < agcount; index++) {
				191	pag = xfs_perag_get(mp, index);
				192	if (pag) {
				193	xfs_perag_put(pag);
				194	continue;
				195	}
				196
				197	pag = kmem_zalloc(sizeof(*pag), KM_MAYFAIL);
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame^]	198	if (!pag) {
				199	error = -ENOMEM;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	200	goto out_unwind_new_pags;
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame^]	201	}
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	202	pag->pag_agno = index;
				203	pag->pag_mount = mp;
				204	spin_lock_init(&pag->pag_ici_lock);
				205	mutex_init(&pag->pag_ici_reclaim_lock);
				206	INIT_RADIX_TREE(&pag->pag_ici_root, GFP_ATOMIC);
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame^]	207
				208	error = xfs_buf_hash_init(pag);
				209	if (error)
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	210	goto out_free_pag;
				211	init_waitqueue_head(&pag->pagb_wait);
				212	spin_lock_init(&pag->pagb_lock);
				213	pag->pagb_count = 0;
				214	pag->pagb_tree = RB_ROOT;
				215
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame^]	216	error = radix_tree_preload(GFP_NOFS);
				217	if (error)
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	218	goto out_hash_destroy;
				219
				220	spin_lock(&mp->m_perag_lock);
				221	if (radix_tree_insert(&mp->m_perag_tree, index, pag)) {
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	222	WARN_ON_ONCE(1);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	223	spin_unlock(&mp->m_perag_lock);
				224	radix_tree_preload_end();
				225	error = -EEXIST;
				226	goto out_hash_destroy;
				227	}
				228	spin_unlock(&mp->m_perag_lock);
				229	radix_tree_preload_end();
				230	/* first new pag is fully initialized */
				231	if (first_initialised == NULLAGNUMBER)
				232	first_initialised = index;
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	233	error = xfs_iunlink_init(pag);
				234	if (error)
				235	goto out_hash_destroy;
				236	spin_lock_init(&pag->pag_state_lock);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	237	}
				238
				239	index = xfs_set_inode_alloc(mp, agcount);
				240
				241	if (maxagi)
				242	*maxagi = index;
				243
				244	mp->m_ag_prealloc_blocks = xfs_prealloc_blocks(mp);
				245	return 0;
				246
				247	out_hash_destroy:
				248	xfs_buf_hash_destroy(pag);
				249	out_free_pag:
				250	mutex_destroy(&pag->pag_ici_reclaim_lock);
				251	kmem_free(pag);
				252	out_unwind_new_pags:
				253	/* unwind any prior newly initialized pags */
				254	for (index = first_initialised; index < agcount; index++) {
				255	pag = radix_tree_delete(&mp->m_perag_tree, index);
				256	if (!pag)
				257	break;
				258	xfs_buf_hash_destroy(pag);
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	259	xfs_iunlink_destroy(pag);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	260	mutex_destroy(&pag->pag_ici_reclaim_lock);
				261	kmem_free(pag);
				262	}
				263	return error;
				264	}
				265
				266	/*
				267	* xfs_readsb
				268	*
				269	* Does the initial read of the superblock.
				270	*/
				271	int
				272	xfs_readsb(
				273	struct xfs_mount *mp,
				274	int flags)
				275	{
				276	unsigned int sector_size;
				277	struct xfs_buf *bp;
				278	struct xfs_sb *sbp = &mp->m_sb;
				279	int error;
				280	int loud = !(flags & XFS_MFSI_QUIET);
				281	const struct xfs_buf_ops *buf_ops;
				282
				283	ASSERT(mp->m_sb_bp == NULL);
				284	ASSERT(mp->m_ddev_targp != NULL);
				285
				286	/*
				287	* For the initial read, we must guess at the sector
				288	* size based on the block device. It's enough to
				289	* get the sb_sectsize out of the superblock and
				290	* then reread with the proper length.
				291	* We don't verify it yet, because it may not be complete.
				292	*/
				293	sector_size = xfs_getsize_buftarg(mp->m_ddev_targp);
				294	buf_ops = NULL;
				295
				296	/*
				297	* Allocate a (locked) buffer to hold the superblock. This will be kept
				298	* around at all times to optimize access to the superblock. Therefore,
				299	* set XBF_NO_IOACCT to make sure it doesn't hold the buftarg count
				300	* elevated.
				301	*/
				302	reread:
				303	error = xfs_buf_read_uncached(mp->m_ddev_targp, XFS_SB_DADDR,
				304	BTOBB(sector_size), XBF_NO_IOACCT, &bp,
				305	buf_ops);
				306	if (error) {
				307	if (loud)
				308	xfs_warn(mp, "SB validate failed with error %d.", error);
				309	/* bad CRC means corrupted metadata */
				310	if (error == -EFSBADCRC)
				311	error = -EFSCORRUPTED;
				312	return error;
				313	}
				314
				315	/*
				316	* Initialize the mount structure from the superblock.
				317	*/
				318	xfs_sb_from_disk(sbp, XFS_BUF_TO_SBP(bp));
				319
				320	/*
				321	* If we haven't validated the superblock, do so now before we try
				322	* to check the sector size and reread the superblock appropriately.
				323	*/
				324	if (sbp->sb_magicnum != XFS_SB_MAGIC) {
				325	if (loud)
				326	xfs_warn(mp, "Invalid superblock magic number");
				327	error = -EINVAL;
				328	goto release_buf;
				329	}
				330
				331	/*
				332	* We must be able to do sector-sized and sector-aligned IO.
				333	*/
				334	if (sector_size > sbp->sb_sectsize) {
				335	if (loud)
				336	xfs_warn(mp, "device supports %u byte sectors (not %u)",
				337	sector_size, sbp->sb_sectsize);
				338	error = -ENOSYS;
				339	goto release_buf;
				340	}
				341
				342	if (buf_ops == NULL) {
				343	/*
				344	* Re-read the superblock so the buffer is correctly sized,
				345	* and properly verified.
				346	*/
				347	xfs_buf_relse(bp);
				348	sector_size = sbp->sb_sectsize;
				349	buf_ops = loud ? &xfs_sb_buf_ops : &xfs_sb_quiet_buf_ops;
				350	goto reread;
				351	}
				352
				353	xfs_reinit_percpu_counters(mp);
				354
				355	/* no need to be quiet anymore, so reset the buf ops */
				356	bp->b_ops = &xfs_sb_buf_ops;
				357
				358	mp->m_sb_bp = bp;
				359	xfs_buf_unlock(bp);
				360	return 0;
				361
				362	release_buf:
				363	xfs_buf_relse(bp);
				364	return error;
				365	}
				366
				367	/*
				368	* Update alignment values based on mount options and sb values
				369	*/
				370	STATIC int
				371	xfs_update_alignment(xfs_mount_t *mp)
				372	{
				373	xfs_sb_t *sbp = &(mp->m_sb);
				374
				375	if (mp->m_dalign) {
				376	/*
				377	* If stripe unit and stripe width are not multiples
				378	* of the fs blocksize turn off alignment.
				379	*/
				380	if ((BBTOB(mp->m_dalign) & mp->m_blockmask) \|\|
				381	(BBTOB(mp->m_swidth) & mp->m_blockmask)) {
				382	xfs_warn(mp,
				383	"alignment check failed: sunit/swidth vs. blocksize(%d)",
				384	sbp->sb_blocksize);
				385	return -EINVAL;
				386	} else {
				387	/*
				388	* Convert the stripe unit and width to FSBs.
				389	*/
				390	mp->m_dalign = XFS_BB_TO_FSBT(mp, mp->m_dalign);
				391	if (mp->m_dalign && (sbp->sb_agblocks % mp->m_dalign)) {
				392	xfs_warn(mp,
				393	"alignment check failed: sunit/swidth vs. agsize(%d)",
				394	sbp->sb_agblocks);
				395	return -EINVAL;
				396	} else if (mp->m_dalign) {
				397	mp->m_swidth = XFS_BB_TO_FSBT(mp, mp->m_swidth);
				398	} else {
				399	xfs_warn(mp,
				400	"alignment check failed: sunit(%d) less than bsize(%d)",
				401	mp->m_dalign, sbp->sb_blocksize);
				402	return -EINVAL;
				403	}
				404	}
				405
				406	/*
				407	* Update superblock with new values
				408	* and log changes
				409	*/
				410	if (xfs_sb_version_hasdalign(sbp)) {
				411	if (sbp->sb_unit != mp->m_dalign) {
				412	sbp->sb_unit = mp->m_dalign;
				413	mp->m_update_sb = true;
				414	}
				415	if (sbp->sb_width != mp->m_swidth) {
				416	sbp->sb_width = mp->m_swidth;
				417	mp->m_update_sb = true;
				418	}
				419	} else {
				420	xfs_warn(mp,
				421	"cannot change alignment: superblock does not support data alignment");
				422	return -EINVAL;
				423	}
				424	} else if ((mp->m_flags & XFS_MOUNT_NOALIGN) != XFS_MOUNT_NOALIGN &&
				425	xfs_sb_version_hasdalign(&mp->m_sb)) {
				426	mp->m_dalign = sbp->sb_unit;
				427	mp->m_swidth = sbp->sb_width;
				428	}
				429
				430	return 0;
				431	}
				432
				433	/*
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	434	* Set the default minimum read and write sizes unless
				435	* already specified in a mount option.
				436	* We use smaller I/O sizes when the file system
				437	* is being used for NFS service (wsync mount option).
				438	*/
				439	STATIC void
				440	xfs_set_rw_sizes(xfs_mount_t *mp)
				441	{
				442	xfs_sb_t *sbp = &(mp->m_sb);
				443	int readio_log, writeio_log;
				444
				445	if (!(mp->m_flags & XFS_MOUNT_DFLT_IOSIZE)) {
				446	if (mp->m_flags & XFS_MOUNT_WSYNC) {
				447	readio_log = XFS_WSYNC_READIO_LOG;
				448	writeio_log = XFS_WSYNC_WRITEIO_LOG;
				449	} else {
				450	readio_log = XFS_READIO_LOG_LARGE;
				451	writeio_log = XFS_WRITEIO_LOG_LARGE;
				452	}
				453	} else {
				454	readio_log = mp->m_readio_log;
				455	writeio_log = mp->m_writeio_log;
				456	}
				457
				458	if (sbp->sb_blocklog > readio_log) {
				459	mp->m_readio_log = sbp->sb_blocklog;
				460	} else {
				461	mp->m_readio_log = readio_log;
				462	}
				463	mp->m_readio_blocks = 1 << (mp->m_readio_log - sbp->sb_blocklog);
				464	if (sbp->sb_blocklog > writeio_log) {
				465	mp->m_writeio_log = sbp->sb_blocklog;
				466	} else {
				467	mp->m_writeio_log = writeio_log;
				468	}
				469	mp->m_writeio_blocks = 1 << (mp->m_writeio_log - sbp->sb_blocklog);
				470	}
				471
				472	/*
				473	* precalculate the low space thresholds for dynamic speculative preallocation.
				474	*/
				475	void
				476	xfs_set_low_space_thresholds(
				477	struct xfs_mount *mp)
				478	{
				479	int i;
				480
				481	for (i = 0; i < XFS_LOWSP_MAX; i++) {
				482	uint64_t space = mp->m_sb.sb_dblocks;
				483
				484	do_div(space, 100);
				485	mp->m_low_space[i] = space * (i + 1);
				486	}
				487	}
				488
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	489	/*
				490	* Check that the data (and log if separate) is an ok size.
				491	*/
				492	STATIC int
				493	xfs_check_sizes(
				494	struct xfs_mount *mp)
				495	{
				496	struct xfs_buf *bp;
				497	xfs_daddr_t d;
				498	int error;
				499
				500	d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks);
				501	if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_dblocks) {
				502	xfs_warn(mp, "filesystem size mismatch detected");
				503	return -EFBIG;
				504	}
				505	error = xfs_buf_read_uncached(mp->m_ddev_targp,
				506	d - XFS_FSS_TO_BB(mp, 1),
				507	XFS_FSS_TO_BB(mp, 1), 0, &bp, NULL);
				508	if (error) {
				509	xfs_warn(mp, "last sector read failed");
				510	return error;
				511	}
				512	xfs_buf_relse(bp);
				513
				514	if (mp->m_logdev_targp == mp->m_ddev_targp)
				515	return 0;
				516
				517	d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_logblocks);
				518	if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_logblocks) {
				519	xfs_warn(mp, "log size mismatch detected");
				520	return -EFBIG;
				521	}
				522	error = xfs_buf_read_uncached(mp->m_logdev_targp,
				523	d - XFS_FSB_TO_BB(mp, 1),
				524	XFS_FSB_TO_BB(mp, 1), 0, &bp, NULL);
				525	if (error) {
				526	xfs_warn(mp, "log device read failed");
				527	return error;
				528	}
				529	xfs_buf_relse(bp);
				530	return 0;
				531	}
				532
				533	/*
				534	* Clear the quotaflags in memory and in the superblock.
				535	*/
				536	int
				537	xfs_mount_reset_sbqflags(
				538	struct xfs_mount *mp)
				539	{
				540	mp->m_qflags = 0;
				541
				542	/* It is OK to look at sb_qflags in the mount path without m_sb_lock. */
				543	if (mp->m_sb.sb_qflags == 0)
				544	return 0;
				545	spin_lock(&mp->m_sb_lock);
				546	mp->m_sb.sb_qflags = 0;
				547	spin_unlock(&mp->m_sb_lock);
				548
				549	if (!xfs_fs_writable(mp, SB_FREEZE_WRITE))
				550	return 0;
				551
				552	return xfs_sync_sb(mp, false);
				553	}
				554
				555	uint64_t
				556	xfs_default_resblks(xfs_mount_t *mp)
				557	{
				558	uint64_t resblks;
				559
				560	/*
				561	* We default to 5% or 8192 fsbs of space reserved, whichever is
				562	* smaller. This is intended to cover concurrent allocation
				563	* transactions when we initially hit enospc. These each require a 4
				564	* block reservation. Hence by default we cover roughly 2000 concurrent
				565	* allocation reservations.
				566	*/
				567	resblks = mp->m_sb.sb_dblocks;
				568	do_div(resblks, 20);
				569	resblks = min_t(uint64_t, resblks, 8192);
				570	return resblks;
				571	}
				572
				573	/* Ensure the summary counts are correct. */
				574	STATIC int
				575	xfs_check_summary_counts(
				576	struct xfs_mount *mp)
				577	{
				578	/*
				579	* The AG0 superblock verifier rejects in-progress filesystems,
				580	* so we should never see the flag set this far into mounting.
				581	*/
				582	if (mp->m_sb.sb_inprogress) {
				583	xfs_err(mp, "sb_inprogress set after log recovery??");
				584	WARN_ON(1);
				585	return -EFSCORRUPTED;
				586	}
				587
				588	/*
				589	* Now the log is mounted, we know if it was an unclean shutdown or
				590	* not. If it was, with the first phase of recovery has completed, we
				591	* have consistent AG blocks on disk. We have not recovered EFIs yet,
				592	* but they are recovered transactionally in the second recovery phase
				593	* later.
				594	*
				595	* If the log was clean when we mounted, we can check the summary
				596	* counters. If any of them are obviously incorrect, we can recompute
				597	* them from the AGF headers in the next step.
				598	*/
				599	if (XFS_LAST_UNMOUNT_WAS_CLEAN(mp) &&
				600	(mp->m_sb.sb_fdblocks > mp->m_sb.sb_dblocks \|\|
				601	!xfs_verify_icount(mp, mp->m_sb.sb_icount) \|\|
				602	mp->m_sb.sb_ifree > mp->m_sb.sb_icount))
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	603	xfs_fs_mark_sick(mp, XFS_SICK_FS_COUNTERS);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	604
				605	/*
				606	* We can safely re-initialise incore superblock counters from the
				607	* per-ag data. These may not be correct if the filesystem was not
				608	* cleanly unmounted, so we waited for recovery to finish before doing
				609	* this.
				610	*
				611	* If the filesystem was cleanly unmounted or the previous check did
				612	* not flag anything weird, then we can trust the values in the
				613	* superblock to be correct and we don't need to do anything here.
				614	* Otherwise, recalculate the summary counters.
				615	*/
				616	if ((!xfs_sb_version_haslazysbcount(&mp->m_sb) \|\|
				617	XFS_LAST_UNMOUNT_WAS_CLEAN(mp)) &&
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	618	!xfs_fs_has_sickness(mp, XFS_SICK_FS_COUNTERS))
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	619	return 0;
				620
				621	return xfs_initialize_perag_data(mp, mp->m_sb.sb_agcount);
				622	}
				623
				624	/*
				625	* This function does the following on an initial mount of a file system:
				626	* - reads the superblock from disk and init the mount struct
				627	* - if we're a 32-bit kernel, do a size check on the superblock
				628	* so we don't mount terabyte filesystems
				629	* - init mount struct realtime fields
				630	* - allocate inode hash table for fs
				631	* - init directory manager
				632	* - perform recovery and init the log manager
				633	*/
				634	int
				635	xfs_mountfs(
				636	struct xfs_mount *mp)
				637	{
				638	struct xfs_sb *sbp = &(mp->m_sb);
				639	struct xfs_inode *rip;
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	640	struct xfs_ino_geometry *igeo = M_IGEO(mp);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	641	uint64_t resblks;
				642	uint quotamount = 0;
				643	uint quotaflags = 0;
				644	int error = 0;
				645
				646	xfs_sb_mount_common(mp, sbp);
				647
				648	/*
				649	* Check for a mismatched features2 values. Older kernels read & wrote
				650	* into the wrong sb offset for sb_features2 on some platforms due to
				651	* xfs_sb_t not being 64bit size aligned when sb_features2 was added,
				652	* which made older superblock reading/writing routines swap it as a
				653	* 64-bit value.
				654	*
				655	* For backwards compatibility, we make both slots equal.
				656	*
				657	* If we detect a mismatched field, we OR the set bits into the existing
				658	* features2 field in case it has already been modified; we don't want
				659	* to lose any features. We then update the bad location with the ORed
				660	* value so that older kernels will see any features2 flags. The
				661	* superblock writeback code ensures the new sb_features2 is copied to
				662	* sb_bad_features2 before it is logged or written to disk.
				663	*/
				664	if (xfs_sb_has_mismatched_features2(sbp)) {
				665	xfs_warn(mp, "correcting sb_features alignment problem");
				666	sbp->sb_features2 \|= sbp->sb_bad_features2;
				667	mp->m_update_sb = true;
				668
				669	/*
				670	* Re-check for ATTR2 in case it was found in bad_features2
				671	* slot.
				672	*/
				673	if (xfs_sb_version_hasattr2(&mp->m_sb) &&
				674	!(mp->m_flags & XFS_MOUNT_NOATTR2))
				675	mp->m_flags \|= XFS_MOUNT_ATTR2;
				676	}
				677
				678	if (xfs_sb_version_hasattr2(&mp->m_sb) &&
				679	(mp->m_flags & XFS_MOUNT_NOATTR2)) {
				680	xfs_sb_version_removeattr2(&mp->m_sb);
				681	mp->m_update_sb = true;
				682
				683	/* update sb_versionnum for the clearing of the morebits */
				684	if (!sbp->sb_features2)
				685	mp->m_update_sb = true;
				686	}
				687
				688	/* always use v2 inodes by default now */
				689	if (!(mp->m_sb.sb_versionnum & XFS_SB_VERSION_NLINKBIT)) {
				690	mp->m_sb.sb_versionnum \|= XFS_SB_VERSION_NLINKBIT;
				691	mp->m_update_sb = true;
				692	}
				693
				694	/*
				695	* Check if sb_agblocks is aligned at stripe boundary
				696	* If sb_agblocks is NOT aligned turn off m_dalign since
				697	* allocator alignment is within an ag, therefore ag has
				698	* to be aligned at stripe boundary.
				699	*/
				700	error = xfs_update_alignment(mp);
				701	if (error)
				702	goto out;
				703
				704	xfs_alloc_compute_maxlevels(mp);
				705	xfs_bmap_compute_maxlevels(mp, XFS_DATA_FORK);
				706	xfs_bmap_compute_maxlevels(mp, XFS_ATTR_FORK);
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	707	xfs_ialloc_setup_geometry(mp);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	708	xfs_rmapbt_compute_maxlevels(mp);
				709	xfs_refcountbt_compute_maxlevels(mp);
				710
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	711	/* enable fail_at_unmount as default */
				712	mp->m_fail_unmount = true;
				713
				714	error = xfs_sysfs_init(&mp->m_kobj, &xfs_mp_ktype, NULL, mp->m_fsname);
				715	if (error)
				716	goto out;
				717
				718	error = xfs_sysfs_init(&mp->m_stats.xs_kobj, &xfs_stats_ktype,
				719	&mp->m_kobj, "stats");
				720	if (error)
				721	goto out_remove_sysfs;
				722
				723	error = xfs_error_sysfs_init(mp);
				724	if (error)
				725	goto out_del_stats;
				726
				727	error = xfs_errortag_init(mp);
				728	if (error)
				729	goto out_remove_error_sysfs;
				730
				731	error = xfs_uuid_mount(mp);
				732	if (error)
				733	goto out_remove_errortag;
				734
				735	/*
				736	* Set the minimum read and write sizes
				737	*/
				738	xfs_set_rw_sizes(mp);
				739
				740	/* set the low space thresholds for dynamic preallocation */
				741	xfs_set_low_space_thresholds(mp);
				742
				743	/*
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	744	* If enabled, sparse inode chunk alignment is expected to match the
				745	* cluster size. Full inode chunk alignment must match the chunk size,
				746	* but that is checked on sb read verification...
				747	*/
				748	if (xfs_sb_version_hassparseinodes(&mp->m_sb) &&
				749	mp->m_sb.sb_spino_align !=
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	750	XFS_B_TO_FSBT(mp, igeo->inode_cluster_size_raw)) {
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	751	xfs_warn(mp,
				752	"Sparse inode block alignment (%u) must match cluster size (%llu).",
				753	mp->m_sb.sb_spino_align,
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	754	XFS_B_TO_FSBT(mp, igeo->inode_cluster_size_raw));
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	755	error = -EINVAL;
				756	goto out_remove_uuid;
				757	}
				758
				759	/*
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	760	* Check that the data (and log if separate) is an ok size.
				761	*/
				762	error = xfs_check_sizes(mp);
				763	if (error)
				764	goto out_remove_uuid;
				765
				766	/*
				767	* Initialize realtime fields in the mount structure
				768	*/
				769	error = xfs_rtmount_init(mp);
				770	if (error) {
				771	xfs_warn(mp, "RT mount failed");
				772	goto out_remove_uuid;
				773	}
				774
				775	/*
				776	* Copies the low order bits of the timestamp and the randomly
				777	* set "sequence" number out of a UUID.
				778	*/
				779	mp->m_fixedfsid[0] =
				780	(get_unaligned_be16(&sbp->sb_uuid.b[8]) << 16) \|
				781	get_unaligned_be16(&sbp->sb_uuid.b[4]);
				782	mp->m_fixedfsid[1] = get_unaligned_be32(&sbp->sb_uuid.b[0]);
				783
				784	error = xfs_da_mount(mp);
				785	if (error) {
				786	xfs_warn(mp, "Failed dir/attr init: %d", error);
				787	goto out_remove_uuid;
				788	}
				789
				790	/*
				791	* Initialize the precomputed transaction reservations values.
				792	*/
				793	xfs_trans_init(mp);
				794
				795	/*
				796	* Allocate and initialize the per-ag data.
				797	*/
				798	error = xfs_initialize_perag(mp, sbp->sb_agcount, &mp->m_maxagi);
				799	if (error) {
				800	xfs_warn(mp, "Failed per-ag init: %d", error);
				801	goto out_free_dir;
				802	}
				803
				804	if (!sbp->sb_logblocks) {
				805	xfs_warn(mp, "no log defined");
				806	XFS_ERROR_REPORT("xfs_mountfs", XFS_ERRLEVEL_LOW, mp);
				807	error = -EFSCORRUPTED;
				808	goto out_free_perag;
				809	}
				810
				811	/*
				812	* Log's mount-time initialization. The first part of recovery can place
				813	* some items on the AIL, to be handled when recovery is finished or
				814	* cancelled.
				815	*/
				816	error = xfs_log_mount(mp, mp->m_logdev_targp,
				817	XFS_FSB_TO_DADDR(mp, sbp->sb_logstart),
				818	XFS_FSB_TO_BB(mp, sbp->sb_logblocks));
				819	if (error) {
				820	xfs_warn(mp, "log mount failed");
				821	goto out_fail_wait;
				822	}
				823
				824	/* Make sure the summary counts are ok. */
				825	error = xfs_check_summary_counts(mp);
				826	if (error)
				827	goto out_log_dealloc;
				828
				829	/*
				830	* Get and sanity-check the root inode.
				831	* Save the pointer to it in the mount structure.
				832	*/
				833	error = xfs_iget(mp, NULL, sbp->sb_rootino, XFS_IGET_UNTRUSTED,
				834	XFS_ILOCK_EXCL, &rip);
				835	if (error) {
				836	xfs_warn(mp,
				837	"Failed to read root inode 0x%llx, error %d",
				838	sbp->sb_rootino, -error);
				839	goto out_log_dealloc;
				840	}
				841
				842	ASSERT(rip != NULL);
				843
				844	if (unlikely(!S_ISDIR(VFS_I(rip)->i_mode))) {
				845	xfs_warn(mp, "corrupted root inode %llu: not a directory",
				846	(unsigned long long)rip->i_ino);
				847	xfs_iunlock(rip, XFS_ILOCK_EXCL);
				848	XFS_ERROR_REPORT("xfs_mountfs_int(2)", XFS_ERRLEVEL_LOW,
				849	mp);
				850	error = -EFSCORRUPTED;
				851	goto out_rele_rip;
				852	}
				853	mp->m_rootip = rip; /* save it */
				854
				855	xfs_iunlock(rip, XFS_ILOCK_EXCL);
				856
				857	/*
				858	* Initialize realtime inode pointers in the mount structure
				859	*/
				860	error = xfs_rtmount_inodes(mp);
				861	if (error) {
				862	/*
				863	* Free up the root inode.
				864	*/
				865	xfs_warn(mp, "failed to read RT inodes");
				866	goto out_rele_rip;
				867	}
				868
				869	/*
				870	* If this is a read-only mount defer the superblock updates until
				871	* the next remount into writeable mode. Otherwise we would never
				872	* perform the update e.g. for the root filesystem.
				873	*/
				874	if (mp->m_update_sb && !(mp->m_flags & XFS_MOUNT_RDONLY)) {
				875	error = xfs_sync_sb(mp, false);
				876	if (error) {
				877	xfs_warn(mp, "failed to write sb changes");
				878	goto out_rtunmount;
				879	}
				880	}
				881
				882	/*
				883	* Initialise the XFS quota management subsystem for this mount
				884	*/
				885	if (XFS_IS_QUOTA_RUNNING(mp)) {
				886	error = xfs_qm_newmount(mp, &quotamount, &quotaflags);
				887	if (error)
				888	goto out_rtunmount;
				889	} else {
				890	ASSERT(!XFS_IS_QUOTA_ON(mp));
				891
				892	/*
				893	* If a file system had quotas running earlier, but decided to
				894	* mount without -o uquota/pquota/gquota options, revoke the
				895	* quotachecked license.
				896	*/
				897	if (mp->m_sb.sb_qflags & XFS_ALL_QUOTA_ACCT) {
				898	xfs_notice(mp, "resetting quota flags");
				899	error = xfs_mount_reset_sbqflags(mp);
				900	if (error)
				901	goto out_rtunmount;
				902	}
				903	}
				904
				905	/*
				906	* Finish recovering the file system. This part needed to be delayed
				907	* until after the root and real-time bitmap inodes were consistently
				908	* read in.
				909	*/
				910	error = xfs_log_mount_finish(mp);
				911	if (error) {
				912	xfs_warn(mp, "log mount finish failed");
				913	goto out_rtunmount;
				914	}
				915
				916	/*
				917	* Now the log is fully replayed, we can transition to full read-only
				918	* mode for read-only mounts. This will sync all the metadata and clean
				919	* the log so that the recovery we just performed does not have to be
				920	* replayed again on the next mount.
				921	*
				922	* We use the same quiesce mechanism as the rw->ro remount, as they are
				923	* semantically identical operations.
				924	*/
				925	if ((mp->m_flags & (XFS_MOUNT_RDONLY\|XFS_MOUNT_NORECOVERY)) ==
				926	XFS_MOUNT_RDONLY) {
				927	xfs_quiesce_attr(mp);
				928	}
				929
				930	/*
				931	* Complete the quota initialisation, post-log-replay component.
				932	*/
				933	if (quotamount) {
				934	ASSERT(mp->m_qflags == 0);
				935	mp->m_qflags = quotaflags;
				936
				937	xfs_qm_mount_quotas(mp);
				938	}
				939
				940	/*
				941	* Now we are mounted, reserve a small amount of unused space for
				942	* privileged transactions. This is needed so that transaction
				943	* space required for critical operations can dip into this pool
				944	* when at ENOSPC. This is needed for operations like create with
				945	* attr, unwritten extent conversion at ENOSPC, etc. Data allocations
				946	* are not allowed to use this reserved space.
				947	*
				948	* This may drive us straight to ENOSPC on mount, but that implies
				949	* we were already there on the last unmount. Warn if this occurs.
				950	*/
				951	if (!(mp->m_flags & XFS_MOUNT_RDONLY)) {
				952	resblks = xfs_default_resblks(mp);
				953	error = xfs_reserve_blocks(mp, &resblks, NULL);
				954	if (error)
				955	xfs_warn(mp,
				956	"Unable to allocate reserve blocks. Continuing without reserve pool.");
				957
				958	/* Recover any CoW blocks that never got remapped. */
				959	error = xfs_reflink_recover_cow(mp);
				960	if (error) {
				961	xfs_err(mp,
				962	"Error %d recovering leftover CoW allocations.", error);
				963	xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
				964	goto out_quota;
				965	}
				966
				967	/* Reserve AG blocks for future btree expansion. */
				968	error = xfs_fs_reserve_ag_blocks(mp);
				969	if (error && error != -ENOSPC)
				970	goto out_agresv;
				971	}
				972
				973	return 0;
				974
				975	out_agresv:
				976	xfs_fs_unreserve_ag_blocks(mp);
				977	out_quota:
				978	xfs_qm_unmount_quotas(mp);
				979	out_rtunmount:
				980	xfs_rtunmount_inodes(mp);
				981	out_rele_rip:
				982	xfs_irele(rip);
				983	/* Clean out dquots that might be in memory after quotacheck. */
				984	xfs_qm_unmount(mp);
				985	/*
				986	* Cancel all delayed reclaim work and reclaim the inodes directly.
				987	* We have to do this /after/ rtunmount and qm_unmount because those
				988	* two will have scheduled delayed reclaim for the rt/quota inodes.
				989	*
				990	* This is slightly different from the unmountfs call sequence
				991	* because we could be tearing down a partially set up mount. In
				992	* particular, if log_mount_finish fails we bail out without calling
				993	* qm_unmount_quotas and therefore rely on qm_unmount to release the
				994	* quota inodes.
				995	*/
				996	cancel_delayed_work_sync(&mp->m_reclaim_work);
				997	xfs_reclaim_inodes(mp, SYNC_WAIT);
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	998	xfs_health_unmount(mp);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	999	out_log_dealloc:
				1000	mp->m_flags \|= XFS_MOUNT_UNMOUNTING;
				1001	xfs_log_mount_cancel(mp);
				1002	out_fail_wait:
				1003	if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp)
				1004	xfs_wait_buftarg(mp->m_logdev_targp);
				1005	xfs_wait_buftarg(mp->m_ddev_targp);
				1006	out_free_perag:
				1007	xfs_free_perag(mp);
				1008	out_free_dir:
				1009	xfs_da_unmount(mp);
				1010	out_remove_uuid:
				1011	xfs_uuid_unmount(mp);
				1012	out_remove_errortag:
				1013	xfs_errortag_del(mp);
				1014	out_remove_error_sysfs:
				1015	xfs_error_sysfs_del(mp);
				1016	out_del_stats:
				1017	xfs_sysfs_del(&mp->m_stats.xs_kobj);
				1018	out_remove_sysfs:
				1019	xfs_sysfs_del(&mp->m_kobj);
				1020	out:
				1021	return error;
				1022	}
				1023
				1024	/*
				1025	* This flushes out the inodes,dquots and the superblock, unmounts the
				1026	* log and makes sure that incore structures are freed.
				1027	*/
				1028	void
				1029	xfs_unmountfs(
				1030	struct xfs_mount *mp)
				1031	{
				1032	uint64_t resblks;
				1033	int error;
				1034
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	1035	xfs_stop_block_reaping(mp);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1036	xfs_fs_unreserve_ag_blocks(mp);
				1037	xfs_qm_unmount_quotas(mp);
				1038	xfs_rtunmount_inodes(mp);
				1039	xfs_irele(mp->m_rootip);
				1040
				1041	/*
				1042	* We can potentially deadlock here if we have an inode cluster
				1043	* that has been freed has its buffer still pinned in memory because
				1044	* the transaction is still sitting in a iclog. The stale inodes
				1045	* on that buffer will have their flush locks held until the
				1046	* transaction hits the disk and the callbacks run. the inode
				1047	* flush takes the flush lock unconditionally and with nothing to
				1048	* push out the iclog we will never get that unlocked. hence we
				1049	* need to force the log first.
				1050	*/
				1051	xfs_log_force(mp, XFS_LOG_SYNC);
				1052
				1053	/*
				1054	* Wait for all busy extents to be freed, including completion of
				1055	* any discard operation.
				1056	*/
				1057	xfs_extent_busy_wait_all(mp);
				1058	flush_workqueue(xfs_discard_wq);
				1059
				1060	/*
				1061	* We now need to tell the world we are unmounting. This will allow
				1062	* us to detect that the filesystem is going away and we should error
				1063	* out anything that we have been retrying in the background. This will
				1064	* prevent neverending retries in AIL pushing from hanging the unmount.
				1065	*/
				1066	mp->m_flags \|= XFS_MOUNT_UNMOUNTING;
				1067
				1068	/*
				1069	* Flush all pending changes from the AIL.
				1070	*/
				1071	xfs_ail_push_all_sync(mp->m_ail);
				1072
				1073	/*
				1074	* And reclaim all inodes. At this point there should be no dirty
				1075	* inodes and none should be pinned or locked, but use synchronous
				1076	* reclaim just to be sure. We can stop background inode reclaim
				1077	* here as well if it is still running.
				1078	*/
				1079	cancel_delayed_work_sync(&mp->m_reclaim_work);
				1080	xfs_reclaim_inodes(mp, SYNC_WAIT);
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	1081	xfs_health_unmount(mp);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1082
				1083	xfs_qm_unmount(mp);
				1084
				1085	/*
				1086	* Unreserve any blocks we have so that when we unmount we don't account
				1087	* the reserved free space as used. This is really only necessary for
				1088	* lazy superblock counting because it trusts the incore superblock
				1089	* counters to be absolutely correct on clean unmount.
				1090	*
				1091	* We don't bother correcting this elsewhere for lazy superblock
				1092	* counting because on mount of an unclean filesystem we reconstruct the
				1093	* correct counter value and this is irrelevant.
				1094	*
				1095	* For non-lazy counter filesystems, this doesn't matter at all because
				1096	* we only every apply deltas to the superblock and hence the incore
				1097	* value does not matter....
				1098	*/
				1099	resblks = 0;
				1100	error = xfs_reserve_blocks(mp, &resblks, NULL);
				1101	if (error)
				1102	xfs_warn(mp, "Unable to free reserved block pool. "
				1103	"Freespace may not be correct on next mount.");
				1104
				1105	error = xfs_log_sbcount(mp);
				1106	if (error)
				1107	xfs_warn(mp, "Unable to update superblock counters. "
				1108	"Freespace may not be correct on next mount.");
				1109
				1110
				1111	xfs_log_unmount(mp);
				1112	xfs_da_unmount(mp);
				1113	xfs_uuid_unmount(mp);
				1114
				1115	#if defined(DEBUG)
				1116	xfs_errortag_clearall(mp);
				1117	#endif
				1118	xfs_free_perag(mp);
				1119
				1120	xfs_errortag_del(mp);
				1121	xfs_error_sysfs_del(mp);
				1122	xfs_sysfs_del(&mp->m_stats.xs_kobj);
				1123	xfs_sysfs_del(&mp->m_kobj);
				1124	}
				1125
				1126	/*
				1127	* Determine whether modifications can proceed. The caller specifies the minimum
				1128	* freeze level for which modifications should not be allowed. This allows
				1129	* certain operations to proceed while the freeze sequence is in progress, if
				1130	* necessary.
				1131	*/
				1132	bool
				1133	xfs_fs_writable(
				1134	struct xfs_mount *mp,
				1135	int level)
				1136	{
				1137	ASSERT(level > SB_UNFROZEN);
				1138	if ((mp->m_super->s_writers.frozen >= level) \|\|
				1139	XFS_FORCED_SHUTDOWN(mp) \|\| (mp->m_flags & XFS_MOUNT_RDONLY))
				1140	return false;
				1141
				1142	return true;
				1143	}
				1144
				1145	/*
				1146	* xfs_log_sbcount
				1147	*
				1148	* Sync the superblock counters to disk.
				1149	*
				1150	* Note this code can be called during the process of freezing, so we use the
				1151	* transaction allocator that does not block when the transaction subsystem is
				1152	* in its frozen state.
				1153	*/
				1154	int
				1155	xfs_log_sbcount(xfs_mount_t *mp)
				1156	{
				1157	/* allow this to proceed during the freeze sequence... */
				1158	if (!xfs_fs_writable(mp, SB_FREEZE_COMPLETE))
				1159	return 0;
				1160
				1161	/*
				1162	* we don't need to do this if we are updating the superblock
				1163	* counters on every modification.
				1164	*/
				1165	if (!xfs_sb_version_haslazysbcount(&mp->m_sb))
				1166	return 0;
				1167
				1168	return xfs_sync_sb(mp, true);
				1169	}
				1170
				1171	/*
				1172	* Deltas for the inode count are +/-64, hence we use a large batch size
				1173	* of 128 so we don't need to take the counter lock on every update.
				1174	*/
				1175	#define XFS_ICOUNT_BATCH 128
				1176	int
				1177	xfs_mod_icount(
				1178	struct xfs_mount *mp,
				1179	int64_t delta)
				1180	{
				1181	percpu_counter_add_batch(&mp->m_icount, delta, XFS_ICOUNT_BATCH);
				1182	if (__percpu_counter_compare(&mp->m_icount, 0, XFS_ICOUNT_BATCH) < 0) {
				1183	ASSERT(0);
				1184	percpu_counter_add(&mp->m_icount, -delta);
				1185	return -EINVAL;
				1186	}
				1187	return 0;
				1188	}
				1189
				1190	int
				1191	xfs_mod_ifree(
				1192	struct xfs_mount *mp,
				1193	int64_t delta)
				1194	{
				1195	percpu_counter_add(&mp->m_ifree, delta);
				1196	if (percpu_counter_compare(&mp->m_ifree, 0) < 0) {
				1197	ASSERT(0);
				1198	percpu_counter_add(&mp->m_ifree, -delta);
				1199	return -EINVAL;
				1200	}
				1201	return 0;
				1202	}
				1203
				1204	/*
				1205	* Deltas for the block count can vary from 1 to very large, but lock contention
				1206	* only occurs on frequent small block count updates such as in the delayed
				1207	* allocation path for buffered writes (page a time updates). Hence we set
				1208	* a large batch count (1024) to minimise global counter updates except when
				1209	* we get near to ENOSPC and we have to be very accurate with our updates.
				1210	*/
				1211	#define XFS_FDBLOCKS_BATCH 1024
				1212	int
				1213	xfs_mod_fdblocks(
				1214	struct xfs_mount *mp,
				1215	int64_t delta,
				1216	bool rsvd)
				1217	{
				1218	int64_t lcounter;
				1219	long long res_used;
				1220	s32 batch;
				1221
				1222	if (delta > 0) {
				1223	/*
				1224	* If the reserve pool is depleted, put blocks back into it
				1225	* first. Most of the time the pool is full.
				1226	*/
				1227	if (likely(mp->m_resblks == mp->m_resblks_avail)) {
				1228	percpu_counter_add(&mp->m_fdblocks, delta);
				1229	return 0;
				1230	}
				1231
				1232	spin_lock(&mp->m_sb_lock);
				1233	res_used = (long long)(mp->m_resblks - mp->m_resblks_avail);
				1234
				1235	if (res_used > delta) {
				1236	mp->m_resblks_avail += delta;
				1237	} else {
				1238	delta -= res_used;
				1239	mp->m_resblks_avail = mp->m_resblks;
				1240	percpu_counter_add(&mp->m_fdblocks, delta);
				1241	}
				1242	spin_unlock(&mp->m_sb_lock);
				1243	return 0;
				1244	}
				1245
				1246	/*
				1247	* Taking blocks away, need to be more accurate the closer we
				1248	* are to zero.
				1249	*
				1250	* If the counter has a value of less than 2 * max batch size,
				1251	* then make everything serialise as we are real close to
				1252	* ENOSPC.
				1253	*/
				1254	if (__percpu_counter_compare(&mp->m_fdblocks, 2 * XFS_FDBLOCKS_BATCH,
				1255	XFS_FDBLOCKS_BATCH) < 0)
				1256	batch = 1;
				1257	else
				1258	batch = XFS_FDBLOCKS_BATCH;
				1259
				1260	percpu_counter_add_batch(&mp->m_fdblocks, delta, batch);
				1261	if (__percpu_counter_compare(&mp->m_fdblocks, mp->m_alloc_set_aside,
				1262	XFS_FDBLOCKS_BATCH) >= 0) {
				1263	/* we had space! */
				1264	return 0;
				1265	}
				1266
				1267	/*
				1268	* lock up the sb for dipping into reserves before releasing the space
				1269	* that took us to ENOSPC.
				1270	*/
				1271	spin_lock(&mp->m_sb_lock);
				1272	percpu_counter_add(&mp->m_fdblocks, -delta);
				1273	if (!rsvd)
				1274	goto fdblocks_enospc;
				1275
				1276	lcounter = (long long)mp->m_resblks_avail + delta;
				1277	if (lcounter >= 0) {
				1278	mp->m_resblks_avail = lcounter;
				1279	spin_unlock(&mp->m_sb_lock);
				1280	return 0;
				1281	}
				1282	printk_once(KERN_WARNING
				1283	"Filesystem \"%s\": reserve blocks depleted! "
				1284	"Consider increasing reserve pool size.",
				1285	mp->m_fsname);
				1286	fdblocks_enospc:
				1287	spin_unlock(&mp->m_sb_lock);
				1288	return -ENOSPC;
				1289	}
				1290
				1291	int
				1292	xfs_mod_frextents(
				1293	struct xfs_mount *mp,
				1294	int64_t delta)
				1295	{
				1296	int64_t lcounter;
				1297	int ret = 0;
				1298
				1299	spin_lock(&mp->m_sb_lock);
				1300	lcounter = mp->m_sb.sb_frextents + delta;
				1301	if (lcounter < 0)
				1302	ret = -ENOSPC;
				1303	else
				1304	mp->m_sb.sb_frextents = lcounter;
				1305	spin_unlock(&mp->m_sb_lock);
				1306	return ret;
				1307	}
				1308
				1309	/*
				1310	* xfs_getsb() is called to obtain the buffer for the superblock.
				1311	* The buffer is returned locked and read in from disk.
				1312	* The buffer should be released with a call to xfs_brelse().
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1313	*/
				1314	struct xfs_buf *
				1315	xfs_getsb(
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	1316	struct xfs_mount *mp)
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1317	{
				1318	struct xfs_buf *bp = mp->m_sb_bp;
				1319
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	1320	xfs_buf_lock(bp);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1321	xfs_buf_hold(bp);
				1322	ASSERT(bp->b_flags & XBF_DONE);
				1323	return bp;
				1324	}
				1325
				1326	/*
				1327	* Used to free the superblock along various error paths.
				1328	*/
				1329	void
				1330	xfs_freesb(
				1331	struct xfs_mount *mp)
				1332	{
				1333	struct xfs_buf *bp = mp->m_sb_bp;
				1334
				1335	xfs_buf_lock(bp);
				1336	mp->m_sb_bp = NULL;
				1337	xfs_buf_relse(bp);
				1338	}
				1339
				1340	/*
				1341	* If the underlying (data/log/rt) device is readonly, there are some
				1342	* operations that cannot proceed.
				1343	*/
				1344	int
				1345	xfs_dev_is_read_only(
				1346	struct xfs_mount *mp,
				1347	char *message)
				1348	{
				1349	if (xfs_readonly_buftarg(mp->m_ddev_targp) \|\|
				1350	xfs_readonly_buftarg(mp->m_logdev_targp) \|\|
				1351	(mp->m_rtdev_targp && xfs_readonly_buftarg(mp->m_rtdev_targp))) {
				1352	xfs_notice(mp, "%s required on read-only device.", message);
				1353	xfs_notice(mp, "write access unavailable, cannot proceed.");
				1354	return -EROFS;
				1355	}
				1356	return 0;
				1357	}
				1358
				1359	/* Force the summary counters to be recalculated at next mount. */
				1360	void
				1361	xfs_force_summary_recalc(
				1362	struct xfs_mount *mp)
				1363	{
				1364	if (!xfs_sb_version_haslazysbcount(&mp->m_sb))
				1365	return;
				1366
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	1367	xfs_fs_mark_sick(mp, XFS_SICK_FS_COUNTERS);
				1368	}
				1369
				1370	/*
				1371	* Update the in-core delayed block counter.
				1372	*
				1373	* We prefer to update the counter without having to take a spinlock for every
				1374	* counter update (i.e. batching). Each change to delayed allocation
				1375	* reservations can change can easily exceed the default percpu counter
				1376	* batching, so we use a larger batch factor here.
				1377	*
				1378	* Note that we don't currently have any callers requiring fast summation
				1379	* (e.g. percpu_counter_read) so we can use a big batch value here.
				1380	*/
				1381	#define XFS_DELALLOC_BATCH (4096)
				1382	void
				1383	xfs_mod_delalloc(
				1384	struct xfs_mount *mp,
				1385	int64_t delta)
				1386	{
				1387	percpu_counter_add_batch(&mp->m_delalloc_blks, delta,
				1388	XFS_DELALLOC_BATCH);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1389	}