Blame - fs/xfs/scrub/repair.c - hafnium/third_party/linux.git

blob: 9f08dd9bf1d553934bf2aa2f8a108ab8435bc19c [file] [log] [blame]

Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1	// SPDX-License-Identifier: GPL-2.0+
				2	/*
				3	* Copyright (C) 2018 Oracle. All Rights Reserved.
				4	* Author: Darrick J. Wong <darrick.wong@oracle.com>
				5	*/
				6	#include "xfs.h"
				7	#include "xfs_fs.h"
				8	#include "xfs_shared.h"
				9	#include "xfs_format.h"
				10	#include "xfs_trans_resv.h"
				11	#include "xfs_mount.h"
				12	#include "xfs_defer.h"
				13	#include "xfs_btree.h"
				14	#include "xfs_bit.h"
				15	#include "xfs_log_format.h"
				16	#include "xfs_trans.h"
				17	#include "xfs_sb.h"
				18	#include "xfs_inode.h"
				19	#include "xfs_icache.h"
				20	#include "xfs_alloc.h"
				21	#include "xfs_alloc_btree.h"
				22	#include "xfs_ialloc.h"
				23	#include "xfs_ialloc_btree.h"
				24	#include "xfs_rmap.h"
				25	#include "xfs_rmap_btree.h"
				26	#include "xfs_refcount.h"
				27	#include "xfs_refcount_btree.h"
				28	#include "xfs_extent_busy.h"
				29	#include "xfs_ag_resv.h"
				30	#include "xfs_trans_space.h"
				31	#include "xfs_quota.h"
				32	#include "scrub/xfs_scrub.h"
				33	#include "scrub/scrub.h"
				34	#include "scrub/common.h"
				35	#include "scrub/trace.h"
				36	#include "scrub/repair.h"
				37	#include "scrub/bitmap.h"
				38
				39	/*
				40	* Attempt to repair some metadata, if the metadata is corrupt and userspace
				41	* told us to fix it. This function returns -EAGAIN to mean "re-run scrub",
				42	* and will set *fixed to true if it thinks it repaired anything.
				43	*/
				44	int
				45	xrep_attempt(
				46	struct xfs_inode *ip,
				47	struct xfs_scrub *sc,
				48	bool *fixed)
				49	{
				50	int error = 0;
				51
				52	trace_xrep_attempt(ip, sc->sm, error);
				53
				54	xchk_ag_btcur_free(&sc->sa);
				55
				56	/* Repair whatever's broken. */
				57	ASSERT(sc->ops->repair);
				58	error = sc->ops->repair(sc);
				59	trace_xrep_done(ip, sc->sm, error);
				60	switch (error) {
				61	case 0:
				62	/*
				63	* Repair succeeded. Commit the fixes and perform a second
				64	* scrub so that we can tell userspace if we fixed the problem.
				65	*/
				66	sc->sm->sm_flags &= ~XFS_SCRUB_FLAGS_OUT;
				67	*fixed = true;
				68	return -EAGAIN;
				69	case -EDEADLOCK:
				70	case -EAGAIN:
				71	/* Tell the caller to try again having grabbed all the locks. */
				72	if (!sc->try_harder) {
				73	sc->try_harder = true;
				74	return -EAGAIN;
				75	}
				76	/*
				77	* We tried harder but still couldn't grab all the resources
				78	* we needed to fix it. The corruption has not been fixed,
				79	* so report back to userspace.
				80	*/
				81	return -EFSCORRUPTED;
				82	default:
				83	return error;
				84	}
				85	}
				86
				87	/*
				88	* Complain about unfixable problems in the filesystem. We don't log
				89	* corruptions when IFLAG_REPAIR wasn't set on the assumption that the driver
				90	* program is xfs_scrub, which will call back with IFLAG_REPAIR set if the
				91	* administrator isn't running xfs_scrub in no-repairs mode.
				92	*
				93	* Use this helper function because _ratelimited silently declares a static
				94	* structure to track rate limiting information.
				95	*/
				96	void
				97	xrep_failure(
				98	struct xfs_mount *mp)
				99	{
				100	xfs_alert_ratelimited(mp,
				101	"Corruption not fixed during online repair. Unmount and run xfs_repair.");
				102	}
				103
				104	/*
				105	* Repair probe -- userspace uses this to probe if we're willing to repair a
				106	* given mountpoint.
				107	*/
				108	int
				109	xrep_probe(
				110	struct xfs_scrub *sc)
				111	{
				112	int error = 0;
				113
				114	if (xchk_should_terminate(sc, &error))
				115	return error;
				116
				117	return 0;
				118	}
				119
				120	/*
				121	* Roll a transaction, keeping the AG headers locked and reinitializing
				122	* the btree cursors.
				123	*/
				124	int
				125	xrep_roll_ag_trans(
				126	struct xfs_scrub *sc)
				127	{
				128	int error;
				129
				130	/* Keep the AG header buffers locked so we can keep going. */
				131	if (sc->sa.agi_bp)
				132	xfs_trans_bhold(sc->tp, sc->sa.agi_bp);
				133	if (sc->sa.agf_bp)
				134	xfs_trans_bhold(sc->tp, sc->sa.agf_bp);
				135	if (sc->sa.agfl_bp)
				136	xfs_trans_bhold(sc->tp, sc->sa.agfl_bp);
				137
				138	/* Roll the transaction. */
				139	error = xfs_trans_roll(&sc->tp);
				140	if (error)
				141	goto out_release;
				142
				143	/* Join AG headers to the new transaction. */
				144	if (sc->sa.agi_bp)
				145	xfs_trans_bjoin(sc->tp, sc->sa.agi_bp);
				146	if (sc->sa.agf_bp)
				147	xfs_trans_bjoin(sc->tp, sc->sa.agf_bp);
				148	if (sc->sa.agfl_bp)
				149	xfs_trans_bjoin(sc->tp, sc->sa.agfl_bp);
				150
				151	return 0;
				152
				153	out_release:
				154	/*
				155	* Rolling failed, so release the hold on the buffers. The
				156	* buffers will be released during teardown on our way out
				157	* of the kernel.
				158	*/
				159	if (sc->sa.agi_bp)
				160	xfs_trans_bhold_release(sc->tp, sc->sa.agi_bp);
				161	if (sc->sa.agf_bp)
				162	xfs_trans_bhold_release(sc->tp, sc->sa.agf_bp);
				163	if (sc->sa.agfl_bp)
				164	xfs_trans_bhold_release(sc->tp, sc->sa.agfl_bp);
				165
				166	return error;
				167	}
				168
				169	/*
				170	* Does the given AG have enough space to rebuild a btree? Neither AG
				171	* reservation can be critical, and we must have enough space (factoring
				172	* in AG reservations) to construct a whole btree.
				173	*/
				174	bool
				175	xrep_ag_has_space(
				176	struct xfs_perag *pag,
				177	xfs_extlen_t nr_blocks,
				178	enum xfs_ag_resv_type type)
				179	{
				180	return !xfs_ag_resv_critical(pag, XFS_AG_RESV_RMAPBT) &&
				181	!xfs_ag_resv_critical(pag, XFS_AG_RESV_METADATA) &&
				182	pag->pagf_freeblks > xfs_ag_resv_needed(pag, type) + nr_blocks;
				183	}
				184
				185	/*
				186	* Figure out how many blocks to reserve for an AG repair. We calculate the
				187	* worst case estimate for the number of blocks we'd need to rebuild one of
				188	* any type of per-AG btree.
				189	*/
				190	xfs_extlen_t
				191	xrep_calc_ag_resblks(
				192	struct xfs_scrub *sc)
				193	{
				194	struct xfs_mount *mp = sc->mp;
				195	struct xfs_scrub_metadata *sm = sc->sm;
				196	struct xfs_perag *pag;
				197	struct xfs_buf *bp;
				198	xfs_agino_t icount = NULLAGINO;
				199	xfs_extlen_t aglen = NULLAGBLOCK;
				200	xfs_extlen_t usedlen;
				201	xfs_extlen_t freelen;
				202	xfs_extlen_t bnobt_sz;
				203	xfs_extlen_t inobt_sz;
				204	xfs_extlen_t rmapbt_sz;
				205	xfs_extlen_t refcbt_sz;
				206	int error;
				207
				208	if (!(sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR))
				209	return 0;
				210
				211	pag = xfs_perag_get(mp, sm->sm_agno);
				212	if (pag->pagi_init) {
				213	/* Use in-core icount if possible. */
				214	icount = pag->pagi_count;
				215	} else {
				216	/* Try to get the actual counters from disk. */
				217	error = xfs_ialloc_read_agi(mp, NULL, sm->sm_agno, &bp);
				218	if (!error) {
				219	icount = pag->pagi_count;
				220	xfs_buf_relse(bp);
				221	}
				222	}
				223
				224	/* Now grab the block counters from the AGF. */
				225	error = xfs_alloc_read_agf(mp, NULL, sm->sm_agno, 0, &bp);
				226	if (!error) {
				227	aglen = be32_to_cpu(XFS_BUF_TO_AGF(bp)->agf_length);
				228	freelen = be32_to_cpu(XFS_BUF_TO_AGF(bp)->agf_freeblks);
				229	usedlen = aglen - freelen;
				230	xfs_buf_relse(bp);
				231	}
				232	xfs_perag_put(pag);
				233
				234	/* If the icount is impossible, make some worst-case assumptions. */
				235	if (icount == NULLAGINO \|\|
				236	!xfs_verify_agino(mp, sm->sm_agno, icount)) {
				237	xfs_agino_t first, last;
				238
				239	xfs_agino_range(mp, sm->sm_agno, &first, &last);
				240	icount = last - first + 1;
				241	}
				242
				243	/* If the block counts are impossible, make worst-case assumptions. */
				244	if (aglen == NULLAGBLOCK \|\|
				245	aglen != xfs_ag_block_count(mp, sm->sm_agno) \|\|
				246	freelen >= aglen) {
				247	aglen = xfs_ag_block_count(mp, sm->sm_agno);
				248	freelen = aglen;
				249	usedlen = aglen;
				250	}
				251
				252	trace_xrep_calc_ag_resblks(mp, sm->sm_agno, icount, aglen,
				253	freelen, usedlen);
				254
				255	/*
				256	* Figure out how many blocks we'd need worst case to rebuild
				257	* each type of btree. Note that we can only rebuild the
				258	* bnobt/cntbt or inobt/finobt as pairs.
				259	*/
				260	bnobt_sz = 2 * xfs_allocbt_calc_size(mp, freelen);
				261	if (xfs_sb_version_hassparseinodes(&mp->m_sb))
				262	inobt_sz = xfs_iallocbt_calc_size(mp, icount /
				263	XFS_INODES_PER_HOLEMASK_BIT);
				264	else
				265	inobt_sz = xfs_iallocbt_calc_size(mp, icount /
				266	XFS_INODES_PER_CHUNK);
				267	if (xfs_sb_version_hasfinobt(&mp->m_sb))
				268	inobt_sz *= 2;
				269	if (xfs_sb_version_hasreflink(&mp->m_sb))
				270	refcbt_sz = xfs_refcountbt_calc_size(mp, usedlen);
				271	else
				272	refcbt_sz = 0;
				273	if (xfs_sb_version_hasrmapbt(&mp->m_sb)) {
				274	/*
				275	* Guess how many blocks we need to rebuild the rmapbt.
				276	* For non-reflink filesystems we can't have more records than
				277	* used blocks. However, with reflink it's possible to have
				278	* more than one rmap record per AG block. We don't know how
				279	* many rmaps there could be in the AG, so we start off with
				280	* what we hope is an generous over-estimation.
				281	*/
				282	if (xfs_sb_version_hasreflink(&mp->m_sb))
				283	rmapbt_sz = xfs_rmapbt_calc_size(mp,
				284	(unsigned long long)aglen * 2);
				285	else
				286	rmapbt_sz = xfs_rmapbt_calc_size(mp, usedlen);
				287	} else {
				288	rmapbt_sz = 0;
				289	}
				290
				291	trace_xrep_calc_ag_resblks_btsize(mp, sm->sm_agno, bnobt_sz,
				292	inobt_sz, rmapbt_sz, refcbt_sz);
				293
				294	return max(max(bnobt_sz, inobt_sz), max(rmapbt_sz, refcbt_sz));
				295	}
				296
				297	/* Allocate a block in an AG. */
				298	int
				299	xrep_alloc_ag_block(
				300	struct xfs_scrub *sc,
				301	struct xfs_owner_info *oinfo,
				302	xfs_fsblock_t *fsbno,
				303	enum xfs_ag_resv_type resv)
				304	{
				305	struct xfs_alloc_arg args = {0};
				306	xfs_agblock_t bno;
				307	int error;
				308
				309	switch (resv) {
				310	case XFS_AG_RESV_AGFL:
				311	case XFS_AG_RESV_RMAPBT:
				312	error = xfs_alloc_get_freelist(sc->tp, sc->sa.agf_bp, &bno, 1);
				313	if (error)
				314	return error;
				315	if (bno == NULLAGBLOCK)
				316	return -ENOSPC;
				317	xfs_extent_busy_reuse(sc->mp, sc->sa.agno, bno,
				318	1, false);
				319	*fsbno = XFS_AGB_TO_FSB(sc->mp, sc->sa.agno, bno);
				320	if (resv == XFS_AG_RESV_RMAPBT)
				321	xfs_ag_resv_rmapbt_alloc(sc->mp, sc->sa.agno);
				322	return 0;
				323	default:
				324	break;
				325	}
				326
				327	args.tp = sc->tp;
				328	args.mp = sc->mp;
				329	args.oinfo = *oinfo;
				330	args.fsbno = XFS_AGB_TO_FSB(args.mp, sc->sa.agno, 0);
				331	args.minlen = 1;
				332	args.maxlen = 1;
				333	args.prod = 1;
				334	args.type = XFS_ALLOCTYPE_THIS_AG;
				335	args.resv = resv;
				336
				337	error = xfs_alloc_vextent(&args);
				338	if (error)
				339	return error;
				340	if (args.fsbno == NULLFSBLOCK)
				341	return -ENOSPC;
				342	ASSERT(args.len == 1);
				343	*fsbno = args.fsbno;
				344
				345	return 0;
				346	}
				347
				348	/* Initialize a new AG btree root block with zero entries. */
				349	int
				350	xrep_init_btblock(
				351	struct xfs_scrub *sc,
				352	xfs_fsblock_t fsb,
				353	struct xfs_buf **bpp,
				354	xfs_btnum_t btnum,
				355	const struct xfs_buf_ops *ops)
				356	{
				357	struct xfs_trans *tp = sc->tp;
				358	struct xfs_mount *mp = sc->mp;
				359	struct xfs_buf *bp;
				360
				361	trace_xrep_init_btblock(mp, XFS_FSB_TO_AGNO(mp, fsb),
				362	XFS_FSB_TO_AGBNO(mp, fsb), btnum);
				363
				364	ASSERT(XFS_FSB_TO_AGNO(mp, fsb) == sc->sa.agno);
				365	bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, XFS_FSB_TO_DADDR(mp, fsb),
				366	XFS_FSB_TO_BB(mp, 1), 0);
				367	xfs_buf_zero(bp, 0, BBTOB(bp->b_length));
				368	xfs_btree_init_block(mp, bp, btnum, 0, 0, sc->sa.agno, 0);
				369	xfs_trans_buf_set_type(tp, bp, XFS_BLFT_BTREE_BUF);
				370	xfs_trans_log_buf(tp, bp, 0, bp->b_length);
				371	bp->b_ops = ops;
				372	*bpp = bp;
				373
				374	return 0;
				375	}
				376
				377	/*
				378	* Reconstructing per-AG Btrees
				379	*
				380	* When a space btree is corrupt, we don't bother trying to fix it. Instead,
				381	* we scan secondary space metadata to derive the records that should be in
				382	* the damaged btree, initialize a fresh btree root, and insert the records.
				383	* Note that for rebuilding the rmapbt we scan all the primary data to
				384	* generate the new records.
				385	*
				386	* However, that leaves the matter of removing all the metadata describing the
				387	* old broken structure. For primary metadata we use the rmap data to collect
				388	* every extent with a matching rmap owner (bitmap); we then iterate all other
				389	* metadata structures with the same rmap owner to collect the extents that
				390	* cannot be removed (sublist). We then subtract sublist from bitmap to
				391	* derive the blocks that were used by the old btree. These blocks can be
				392	* reaped.
				393	*
				394	* For rmapbt reconstructions we must use different tactics for extent
				395	* collection. First we iterate all primary metadata (this excludes the old
				396	* rmapbt, obviously) to generate new rmap records. The gaps in the rmap
				397	* records are collected as bitmap. The bnobt records are collected as
				398	* sublist. As with the other btrees we subtract sublist from bitmap, and the
				399	* result (since the rmapbt lives in the free space) are the blocks from the
				400	* old rmapbt.
				401	*
				402	* Disposal of Blocks from Old per-AG Btrees
				403	*
				404	* Now that we've constructed a new btree to replace the damaged one, we want
				405	* to dispose of the blocks that (we think) the old btree was using.
				406	* Previously, we used the rmapbt to collect the extents (bitmap) with the
				407	* rmap owner corresponding to the tree we rebuilt, collected extents for any
				408	* blocks with the same rmap owner that are owned by another data structure
				409	* (sublist), and subtracted sublist from bitmap. In theory the extents
				410	* remaining in bitmap are the old btree's blocks.
				411	*
				412	* Unfortunately, it's possible that the btree was crosslinked with other
				413	* blocks on disk. The rmap data can tell us if there are multiple owners, so
				414	* if the rmapbt says there is an owner of this block other than @oinfo, then
				415	* the block is crosslinked. Remove the reverse mapping and continue.
				416	*
				417	* If there is one rmap record, we can free the block, which removes the
				418	* reverse mapping but doesn't add the block to the free space. Our repair
				419	* strategy is to hope the other metadata objects crosslinked on this block
				420	* will be rebuilt (atop different blocks), thereby removing all the cross
				421	* links.
				422	*
				423	* If there are no rmap records at all, we also free the block. If the btree
				424	* being rebuilt lives in the free space (bnobt/cntbt/rmapbt) then there isn't
				425	* supposed to be a rmap record and everything is ok. For other btrees there
				426	* had to have been an rmap entry for the block to have ended up on @bitmap,
				427	* so if it's gone now there's something wrong and the fs will shut down.
				428	*
				429	* Note: If there are multiple rmap records with only the same rmap owner as
				430	* the btree we're trying to rebuild and the block is indeed owned by another
				431	* data structure with the same rmap owner, then the block will be in sublist
				432	* and therefore doesn't need disposal. If there are multiple rmap records
				433	* with only the same rmap owner but the block is not owned by something with
				434	* the same rmap owner, the block will be freed.
				435	*
				436	* The caller is responsible for locking the AG headers for the entire rebuild
				437	* operation so that nothing else can sneak in and change the AG state while
				438	* we're not looking. We also assume that the caller already invalidated any
				439	* buffers associated with @bitmap.
				440	*/
				441
				442	/*
				443	* Invalidate buffers for per-AG btree blocks we're dumping. This function
				444	* is not intended for use with file data repairs; we have bunmapi for that.
				445	*/
				446	int
				447	xrep_invalidate_blocks(
				448	struct xfs_scrub *sc,
				449	struct xfs_bitmap *bitmap)
				450	{
				451	struct xfs_bitmap_range *bmr;
				452	struct xfs_bitmap_range *n;
				453	struct xfs_buf *bp;
				454	xfs_fsblock_t fsbno;
				455
				456	/*
				457	* For each block in each extent, see if there's an incore buffer for
				458	* exactly that block; if so, invalidate it. The buffer cache only
				459	* lets us look for one buffer at a time, so we have to look one block
				460	* at a time. Avoid invalidating AG headers and post-EOFS blocks
				461	* because we never own those; and if we can't TRYLOCK the buffer we
				462	* assume it's owned by someone else.
				463	*/
				464	for_each_xfs_bitmap_block(fsbno, bmr, n, bitmap) {
				465	/* Skip AG headers and post-EOFS blocks */
				466	if (!xfs_verify_fsbno(sc->mp, fsbno))
				467	continue;
				468	bp = xfs_buf_incore(sc->mp->m_ddev_targp,
				469	XFS_FSB_TO_DADDR(sc->mp, fsbno),
				470	XFS_FSB_TO_BB(sc->mp, 1), XBF_TRYLOCK);
				471	if (bp) {
				472	xfs_trans_bjoin(sc->tp, bp);
				473	xfs_trans_binval(sc->tp, bp);
				474	}
				475	}
				476
				477	return 0;
				478	}
				479
				480	/* Ensure the freelist is the correct size. */
				481	int
				482	xrep_fix_freelist(
				483	struct xfs_scrub *sc,
				484	bool can_shrink)
				485	{
				486	struct xfs_alloc_arg args = {0};
				487
				488	args.mp = sc->mp;
				489	args.tp = sc->tp;
				490	args.agno = sc->sa.agno;
				491	args.alignment = 1;
				492	args.pag = sc->sa.pag;
				493
				494	return xfs_alloc_fix_freelist(&args,
				495	can_shrink ? 0 : XFS_ALLOC_FLAG_NOSHRINK);
				496	}
				497
				498	/*
				499	* Put a block back on the AGFL.
				500	*/
				501	STATIC int
				502	xrep_put_freelist(
				503	struct xfs_scrub *sc,
				504	xfs_agblock_t agbno)
				505	{
				506	struct xfs_owner_info oinfo;
				507	int error;
				508
				509	/* Make sure there's space on the freelist. */
				510	error = xrep_fix_freelist(sc, true);
				511	if (error)
				512	return error;
				513
				514	/*
				515	* Since we're "freeing" a lost block onto the AGFL, we have to
				516	* create an rmap for the block prior to merging it or else other
				517	* parts will break.
				518	*/
				519	xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_AG);
				520	error = xfs_rmap_alloc(sc->tp, sc->sa.agf_bp, sc->sa.agno, agbno, 1,
				521	&oinfo);
				522	if (error)
				523	return error;
				524
				525	/* Put the block on the AGFL. */
				526	error = xfs_alloc_put_freelist(sc->tp, sc->sa.agf_bp, sc->sa.agfl_bp,
				527	agbno, 0);
				528	if (error)
				529	return error;
				530	xfs_extent_busy_insert(sc->tp, sc->sa.agno, agbno, 1,
				531	XFS_EXTENT_BUSY_SKIP_DISCARD);
				532
				533	return 0;
				534	}
				535
				536	/* Dispose of a single block. */
				537	STATIC int
				538	xrep_reap_block(
				539	struct xfs_scrub *sc,
				540	xfs_fsblock_t fsbno,
				541	struct xfs_owner_info *oinfo,
				542	enum xfs_ag_resv_type resv)
				543	{
				544	struct xfs_btree_cur *cur;
				545	struct xfs_buf *agf_bp = NULL;
				546	xfs_agnumber_t agno;
				547	xfs_agblock_t agbno;
				548	bool has_other_rmap;
				549	int error;
				550
				551	agno = XFS_FSB_TO_AGNO(sc->mp, fsbno);
				552	agbno = XFS_FSB_TO_AGBNO(sc->mp, fsbno);
				553
				554	/*
				555	* If we are repairing per-inode metadata, we need to read in the AGF
				556	* buffer. Otherwise, we're repairing a per-AG structure, so reuse
				557	* the AGF buffer that the setup functions already grabbed.
				558	*/
				559	if (sc->ip) {
				560	error = xfs_alloc_read_agf(sc->mp, sc->tp, agno, 0, &agf_bp);
				561	if (error)
				562	return error;
				563	if (!agf_bp)
				564	return -ENOMEM;
				565	} else {
				566	agf_bp = sc->sa.agf_bp;
				567	}
				568	cur = xfs_rmapbt_init_cursor(sc->mp, sc->tp, agf_bp, agno);
				569
				570	/* Can we find any other rmappings? */
				571	error = xfs_rmap_has_other_keys(cur, agbno, 1, oinfo, &has_other_rmap);
				572	xfs_btree_del_cursor(cur, error);
				573	if (error)
				574	goto out_free;
				575
				576	/*
				577	* If there are other rmappings, this block is cross linked and must
				578	* not be freed. Remove the reverse mapping and move on. Otherwise,
				579	* we were the only owner of the block, so free the extent, which will
				580	* also remove the rmap.
				581	*
				582	* XXX: XFS doesn't support detecting the case where a single block
				583	* metadata structure is crosslinked with a multi-block structure
				584	* because the buffer cache doesn't detect aliasing problems, so we
				585	* can't fix 100% of crosslinking problems (yet). The verifiers will
				586	* blow on writeout, the filesystem will shut down, and the admin gets
				587	* to run xfs_repair.
				588	*/
				589	if (has_other_rmap)
				590	error = xfs_rmap_free(sc->tp, agf_bp, agno, agbno, 1, oinfo);
				591	else if (resv == XFS_AG_RESV_AGFL)
				592	error = xrep_put_freelist(sc, agbno);
				593	else
				594	error = xfs_free_extent(sc->tp, fsbno, 1, oinfo, resv);
				595	if (agf_bp != sc->sa.agf_bp)
				596	xfs_trans_brelse(sc->tp, agf_bp);
				597	if (error)
				598	return error;
				599
				600	if (sc->ip)
				601	return xfs_trans_roll_inode(&sc->tp, sc->ip);
				602	return xrep_roll_ag_trans(sc);
				603
				604	out_free:
				605	if (agf_bp != sc->sa.agf_bp)
				606	xfs_trans_brelse(sc->tp, agf_bp);
				607	return error;
				608	}
				609
				610	/* Dispose of every block of every extent in the bitmap. */
				611	int
				612	xrep_reap_extents(
				613	struct xfs_scrub *sc,
				614	struct xfs_bitmap *bitmap,
				615	struct xfs_owner_info *oinfo,
				616	enum xfs_ag_resv_type type)
				617	{
				618	struct xfs_bitmap_range *bmr;
				619	struct xfs_bitmap_range *n;
				620	xfs_fsblock_t fsbno;
				621	int error = 0;
				622
				623	ASSERT(xfs_sb_version_hasrmapbt(&sc->mp->m_sb));
				624
				625	for_each_xfs_bitmap_block(fsbno, bmr, n, bitmap) {
				626	ASSERT(sc->ip != NULL \|\|
				627	XFS_FSB_TO_AGNO(sc->mp, fsbno) == sc->sa.agno);
				628	trace_xrep_dispose_btree_extent(sc->mp,
				629	XFS_FSB_TO_AGNO(sc->mp, fsbno),
				630	XFS_FSB_TO_AGBNO(sc->mp, fsbno), 1);
				631
				632	error = xrep_reap_block(sc, fsbno, oinfo, type);
				633	if (error)
				634	goto out;
				635	}
				636
				637	out:
				638	xfs_bitmap_destroy(bitmap);
				639	return error;
				640	}
				641
				642	/*
				643	* Finding per-AG Btree Roots for AGF/AGI Reconstruction
				644	*
				645	* If the AGF or AGI become slightly corrupted, it may be necessary to rebuild
				646	* the AG headers by using the rmap data to rummage through the AG looking for
				647	* btree roots. This is not guaranteed to work if the AG is heavily damaged
				648	* or the rmap data are corrupt.
				649	*
				650	* Callers of xrep_find_ag_btree_roots must lock the AGF and AGFL
				651	* buffers if the AGF is being rebuilt; or the AGF and AGI buffers if the
				652	* AGI is being rebuilt. It must maintain these locks until it's safe for
				653	* other threads to change the btrees' shapes. The caller provides
				654	* information about the btrees to look for by passing in an array of
				655	* xrep_find_ag_btree with the (rmap owner, buf_ops, magic) fields set.
				656	* The (root, height) fields will be set on return if anything is found. The
				657	* last element of the array should have a NULL buf_ops to mark the end of the
				658	* array.
				659	*
				660	* For every rmapbt record matching any of the rmap owners in btree_info,
				661	* read each block referenced by the rmap record. If the block is a btree
				662	* block from this filesystem matching any of the magic numbers and has a
				663	* level higher than what we've already seen, remember the block and the
				664	* height of the tree required to have such a block. When the call completes,
				665	* we return the highest block we've found for each btree description; those
				666	* should be the roots.
				667	*/
				668
				669	struct xrep_findroot {
				670	struct xfs_scrub *sc;
				671	struct xfs_buf *agfl_bp;
				672	struct xfs_agf *agf;
				673	struct xrep_find_ag_btree *btree_info;
				674	};
				675
				676	/* See if our block is in the AGFL. */
				677	STATIC int
				678	xrep_findroot_agfl_walk(
				679	struct xfs_mount *mp,
				680	xfs_agblock_t bno,
				681	void *priv)
				682	{
				683	xfs_agblock_t *agbno = priv;
				684
				685	return (*agbno == bno) ? XFS_BTREE_QUERY_RANGE_ABORT : 0;
				686	}
				687
				688	/* Does this block match the btree information passed in? */
				689	STATIC int
				690	xrep_findroot_block(
				691	struct xrep_findroot *ri,
				692	struct xrep_find_ag_btree *fab,
				693	uint64_t owner,
				694	xfs_agblock_t agbno,
				695	bool *found_it)
				696	{
				697	struct xfs_mount *mp = ri->sc->mp;
				698	struct xfs_buf *bp;
				699	struct xfs_btree_block *btblock;
				700	xfs_daddr_t daddr;
				701	int error;
				702
				703	daddr = XFS_AGB_TO_DADDR(mp, ri->sc->sa.agno, agbno);
				704
				705	/*
				706	* Blocks in the AGFL have stale contents that might just happen to
				707	* have a matching magic and uuid. We don't want to pull these blocks
				708	* in as part of a tree root, so we have to filter out the AGFL stuff
				709	* here. If the AGFL looks insane we'll just refuse to repair.
				710	*/
				711	if (owner == XFS_RMAP_OWN_AG) {
				712	error = xfs_agfl_walk(mp, ri->agf, ri->agfl_bp,
				713	xrep_findroot_agfl_walk, &agbno);
				714	if (error == XFS_BTREE_QUERY_RANGE_ABORT)
				715	return 0;
				716	if (error)
				717	return error;
				718	}
				719
				720	error = xfs_trans_read_buf(mp, ri->sc->tp, mp->m_ddev_targp, daddr,
				721	mp->m_bsize, 0, &bp, NULL);
				722	if (error)
				723	return error;
				724
				725	/*
				726	* Does this look like a block matching our fs and higher than any
				727	* other block we've found so far? If so, reattach buffer verifiers
				728	* so the AIL won't complain if the buffer is also dirty.
				729	*/
				730	btblock = XFS_BUF_TO_BLOCK(bp);
				731	if (be32_to_cpu(btblock->bb_magic) != fab->magic)
				732	goto out;
				733	if (xfs_sb_version_hascrc(&mp->m_sb) &&
				734	!uuid_equal(&btblock->bb_u.s.bb_uuid, &mp->m_sb.sb_meta_uuid))
				735	goto out;
				736	bp->b_ops = fab->buf_ops;
				737
				738	/* Ignore this block if it's lower in the tree than we've seen. */
				739	if (fab->root != NULLAGBLOCK &&
				740	xfs_btree_get_level(btblock) < fab->height)
				741	goto out;
				742
				743	/* Make sure we pass the verifiers. */
				744	bp->b_ops->verify_read(bp);
				745	if (bp->b_error)
				746	goto out;
				747	fab->root = agbno;
				748	fab->height = xfs_btree_get_level(btblock) + 1;
				749	*found_it = true;
				750
				751	trace_xrep_findroot_block(mp, ri->sc->sa.agno, agbno,
				752	be32_to_cpu(btblock->bb_magic), fab->height - 1);
				753	out:
				754	xfs_trans_brelse(ri->sc->tp, bp);
				755	return error;
				756	}
				757
				758	/*
				759	* Do any of the blocks in this rmap record match one of the btrees we're
				760	* looking for?
				761	*/
				762	STATIC int
				763	xrep_findroot_rmap(
				764	struct xfs_btree_cur *cur,
				765	struct xfs_rmap_irec *rec,
				766	void *priv)
				767	{
				768	struct xrep_findroot *ri = priv;
				769	struct xrep_find_ag_btree *fab;
				770	xfs_agblock_t b;
				771	bool found_it;
				772	int error = 0;
				773
				774	/* Ignore anything that isn't AG metadata. */
				775	if (!XFS_RMAP_NON_INODE_OWNER(rec->rm_owner))
				776	return 0;
				777
				778	/* Otherwise scan each block + btree type. */
				779	for (b = 0; b < rec->rm_blockcount; b++) {
				780	found_it = false;
				781	for (fab = ri->btree_info; fab->buf_ops; fab++) {
				782	if (rec->rm_owner != fab->rmap_owner)
				783	continue;
				784	error = xrep_findroot_block(ri, fab,
				785	rec->rm_owner, rec->rm_startblock + b,
				786	&found_it);
				787	if (error)
				788	return error;
				789	if (found_it)
				790	break;
				791	}
				792	}
				793
				794	return 0;
				795	}
				796
				797	/* Find the roots of the per-AG btrees described in btree_info. */
				798	int
				799	xrep_find_ag_btree_roots(
				800	struct xfs_scrub *sc,
				801	struct xfs_buf *agf_bp,
				802	struct xrep_find_ag_btree *btree_info,
				803	struct xfs_buf *agfl_bp)
				804	{
				805	struct xfs_mount *mp = sc->mp;
				806	struct xrep_findroot ri;
				807	struct xrep_find_ag_btree *fab;
				808	struct xfs_btree_cur *cur;
				809	int error;
				810
				811	ASSERT(xfs_buf_islocked(agf_bp));
				812	ASSERT(agfl_bp == NULL \|\| xfs_buf_islocked(agfl_bp));
				813
				814	ri.sc = sc;
				815	ri.btree_info = btree_info;
				816	ri.agf = XFS_BUF_TO_AGF(agf_bp);
				817	ri.agfl_bp = agfl_bp;
				818	for (fab = btree_info; fab->buf_ops; fab++) {
				819	ASSERT(agfl_bp \|\| fab->rmap_owner != XFS_RMAP_OWN_AG);
				820	ASSERT(XFS_RMAP_NON_INODE_OWNER(fab->rmap_owner));
				821	fab->root = NULLAGBLOCK;
				822	fab->height = 0;
				823	}
				824
				825	cur = xfs_rmapbt_init_cursor(mp, sc->tp, agf_bp, sc->sa.agno);
				826	error = xfs_rmap_query_all(cur, xrep_findroot_rmap, &ri);
				827	xfs_btree_del_cursor(cur, error);
				828
				829	return error;
				830	}
				831
				832	/* Force a quotacheck the next time we mount. */
				833	void
				834	xrep_force_quotacheck(
				835	struct xfs_scrub *sc,
				836	uint dqtype)
				837	{
				838	uint flag;
				839
				840	flag = xfs_quota_chkd_flag(dqtype);
				841	if (!(flag & sc->mp->m_qflags))
				842	return;
				843
				844	sc->mp->m_qflags &= ~flag;
				845	spin_lock(&sc->mp->m_sb_lock);
				846	sc->mp->m_sb.sb_qflags &= ~flag;
				847	spin_unlock(&sc->mp->m_sb_lock);
				848	xfs_log_sb(sc->tp);
				849	}
				850
				851	/*
				852	* Attach dquots to this inode, or schedule quotacheck to fix them.
				853	*
				854	* This function ensures that the appropriate dquots are attached to an inode.
				855	* We cannot allow the dquot code to allocate an on-disk dquot block here
				856	* because we're already in transaction context with the inode locked. The
				857	* on-disk dquot should already exist anyway. If the quota code signals
				858	* corruption or missing quota information, schedule quotacheck, which will
				859	* repair corruptions in the quota metadata.
				860	*/
				861	int
				862	xrep_ino_dqattach(
				863	struct xfs_scrub *sc)
				864	{
				865	int error;
				866
				867	error = xfs_qm_dqattach_locked(sc->ip, false);
				868	switch (error) {
				869	case -EFSBADCRC:
				870	case -EFSCORRUPTED:
				871	case -ENOENT:
				872	xfs_err_ratelimited(sc->mp,
				873	"inode %llu repair encountered quota error %d, quotacheck forced.",
				874	(unsigned long long)sc->ip->i_ino, error);
				875	if (XFS_IS_UQUOTA_ON(sc->mp) && !sc->ip->i_udquot)
				876	xrep_force_quotacheck(sc, XFS_DQ_USER);
				877	if (XFS_IS_GQUOTA_ON(sc->mp) && !sc->ip->i_gdquot)
				878	xrep_force_quotacheck(sc, XFS_DQ_GROUP);
				879	if (XFS_IS_PQUOTA_ON(sc->mp) && !sc->ip->i_pdquot)
				880	xrep_force_quotacheck(sc, XFS_DQ_PROJ);
				881	/* fall through */
				882	case -ESRCH:
				883	error = 0;
				884	break;
				885	default:
				886	break;
				887	}
				888
				889	return error;
				890	}