Update Linux to v5.4.2
Change-Id: Idf6911045d9d382da2cfe01b1edff026404ac8fd
diff --git a/fs/xfs/scrub/agheader.c b/fs/xfs/scrub/agheader.c
index 3068a93..ba0f747 100644
--- a/fs/xfs/scrub/agheader.c
+++ b/fs/xfs/scrub/agheader.c
@@ -9,20 +9,13 @@
#include "xfs_format.h"
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
-#include "xfs_defer.h"
#include "xfs_btree.h"
-#include "xfs_bit.h"
-#include "xfs_log_format.h"
-#include "xfs_trans.h"
#include "xfs_sb.h"
-#include "xfs_inode.h"
#include "xfs_alloc.h"
#include "xfs_ialloc.h"
#include "xfs_rmap.h"
-#include "scrub/xfs_scrub.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
-#include "scrub/trace.h"
/* Superblock */
@@ -32,7 +25,6 @@
struct xfs_scrub *sc,
struct xfs_buf *bp)
{
- struct xfs_owner_info oinfo;
struct xfs_mount *mp = sc->mp;
xfs_agnumber_t agno = sc->sm->sm_agno;
xfs_agblock_t agbno;
@@ -49,8 +41,7 @@
xchk_xref_is_used_space(sc, agbno, 1);
xchk_xref_is_not_inode_chunk(sc, agbno, 1);
- xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_FS);
- xchk_xref_is_owned_by(sc, agbno, 1, &oinfo);
+ xchk_xref_is_owned_by(sc, agbno, 1, &XFS_RMAP_OINFO_FS);
xchk_xref_is_not_shared(sc, agbno, 1);
/* scrub teardown will take care of sc->sa for us */
@@ -401,7 +392,7 @@
if (!xchk_should_check_xref(sc, &error, &sc->sa.cnt_cur))
return;
if (!have) {
- if (agf->agf_freeblks != be32_to_cpu(0))
+ if (agf->agf_freeblks != cpu_to_be32(0))
xchk_block_xref_set_corrupt(sc, sc->sa.agf_bp);
return;
}
@@ -484,7 +475,6 @@
xchk_agf_xref(
struct xfs_scrub *sc)
{
- struct xfs_owner_info oinfo;
struct xfs_mount *mp = sc->mp;
xfs_agblock_t agbno;
int error;
@@ -502,8 +492,7 @@
xchk_agf_xref_freeblks(sc);
xchk_agf_xref_cntbt(sc);
xchk_xref_is_not_inode_chunk(sc, agbno, 1);
- xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_FS);
- xchk_xref_is_owned_by(sc, agbno, 1, &oinfo);
+ xchk_xref_is_owned_by(sc, agbno, 1, &XFS_RMAP_OINFO_FS);
xchk_agf_xref_btreeblks(sc);
xchk_xref_is_not_shared(sc, agbno, 1);
xchk_agf_xref_refcblks(sc);
@@ -518,6 +507,7 @@
{
struct xfs_mount *mp = sc->mp;
struct xfs_agf *agf;
+ struct xfs_perag *pag;
xfs_agnumber_t agno;
xfs_agblock_t agbno;
xfs_agblock_t eoag;
@@ -590,6 +580,16 @@
if (agfl_count != 0 && fl_count != agfl_count)
xchk_block_set_corrupt(sc, sc->sa.agf_bp);
+ /* Do the incore counters match? */
+ pag = xfs_perag_get(mp, agno);
+ if (pag->pagf_freeblks != be32_to_cpu(agf->agf_freeblks))
+ xchk_block_set_corrupt(sc, sc->sa.agf_bp);
+ if (pag->pagf_flcount != be32_to_cpu(agf->agf_flcount))
+ xchk_block_set_corrupt(sc, sc->sa.agf_bp);
+ if (pag->pagf_btreeblks != be32_to_cpu(agf->agf_btreeblks))
+ xchk_block_set_corrupt(sc, sc->sa.agf_bp);
+ xfs_perag_put(pag);
+
xchk_agf_xref(sc);
out:
return error;
@@ -598,7 +598,6 @@
/* AGFL */
struct xchk_agfl_info {
- struct xfs_owner_info oinfo;
unsigned int sz_entries;
unsigned int nr_entries;
xfs_agblock_t *entries;
@@ -609,15 +608,14 @@
STATIC void
xchk_agfl_block_xref(
struct xfs_scrub *sc,
- xfs_agblock_t agbno,
- struct xfs_owner_info *oinfo)
+ xfs_agblock_t agbno)
{
if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
return;
xchk_xref_is_used_space(sc, agbno, 1);
xchk_xref_is_not_inode_chunk(sc, agbno, 1);
- xchk_xref_is_owned_by(sc, agbno, 1, oinfo);
+ xchk_xref_is_owned_by(sc, agbno, 1, &XFS_RMAP_OINFO_AG);
xchk_xref_is_not_shared(sc, agbno, 1);
}
@@ -638,10 +636,10 @@
else
xchk_block_set_corrupt(sc, sc->sa.agfl_bp);
- xchk_agfl_block_xref(sc, agbno, priv);
+ xchk_agfl_block_xref(sc, agbno);
if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
- return XFS_BTREE_QUERY_RANGE_ABORT;
+ return -ECANCELED;
return 0;
}
@@ -662,7 +660,6 @@
xchk_agfl_xref(
struct xfs_scrub *sc)
{
- struct xfs_owner_info oinfo;
struct xfs_mount *mp = sc->mp;
xfs_agblock_t agbno;
int error;
@@ -678,8 +675,7 @@
xchk_xref_is_used_space(sc, agbno, 1);
xchk_xref_is_not_inode_chunk(sc, agbno, 1);
- xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_FS);
- xchk_xref_is_owned_by(sc, agbno, 1, &oinfo);
+ xchk_xref_is_owned_by(sc, agbno, 1, &XFS_RMAP_OINFO_FS);
xchk_xref_is_not_shared(sc, agbno, 1);
/*
@@ -732,10 +728,9 @@
}
/* Check the blocks in the AGFL. */
- xfs_rmap_ag_owner(&sai.oinfo, XFS_RMAP_OWN_AG);
error = xfs_agfl_walk(sc->mp, XFS_BUF_TO_AGF(sc->sa.agf_bp),
sc->sa.agfl_bp, xchk_agfl_block, &sai);
- if (error == XFS_BTREE_QUERY_RANGE_ABORT) {
+ if (error == -ECANCELED) {
error = 0;
goto out_free;
}
@@ -791,7 +786,6 @@
xchk_agi_xref(
struct xfs_scrub *sc)
{
- struct xfs_owner_info oinfo;
struct xfs_mount *mp = sc->mp;
xfs_agblock_t agbno;
int error;
@@ -808,8 +802,7 @@
xchk_xref_is_used_space(sc, agbno, 1);
xchk_xref_is_not_inode_chunk(sc, agbno, 1);
xchk_agi_xref_icounts(sc);
- xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_FS);
- xchk_xref_is_owned_by(sc, agbno, 1, &oinfo);
+ xchk_xref_is_owned_by(sc, agbno, 1, &XFS_RMAP_OINFO_FS);
xchk_xref_is_not_shared(sc, agbno, 1);
/* scrub teardown will take care of sc->sa for us */
@@ -822,6 +815,7 @@
{
struct xfs_mount *mp = sc->mp;
struct xfs_agi *agi;
+ struct xfs_perag *pag;
xfs_agnumber_t agno;
xfs_agblock_t agbno;
xfs_agblock_t eoag;
@@ -875,25 +869,31 @@
/* Check inode pointers */
agino = be32_to_cpu(agi->agi_newino);
- if (agino != NULLAGINO && !xfs_verify_agino(mp, agno, agino))
+ if (!xfs_verify_agino_or_null(mp, agno, agino))
xchk_block_set_corrupt(sc, sc->sa.agi_bp);
agino = be32_to_cpu(agi->agi_dirino);
- if (agino != NULLAGINO && !xfs_verify_agino(mp, agno, agino))
+ if (!xfs_verify_agino_or_null(mp, agno, agino))
xchk_block_set_corrupt(sc, sc->sa.agi_bp);
/* Check unlinked inode buckets */
for (i = 0; i < XFS_AGI_UNLINKED_BUCKETS; i++) {
agino = be32_to_cpu(agi->agi_unlinked[i]);
- if (agino == NULLAGINO)
- continue;
- if (!xfs_verify_agino(mp, agno, agino))
+ if (!xfs_verify_agino_or_null(mp, agno, agino))
xchk_block_set_corrupt(sc, sc->sa.agi_bp);
}
if (agi->agi_pad32 != cpu_to_be32(0))
xchk_block_set_corrupt(sc, sc->sa.agi_bp);
+ /* Do the incore counters match? */
+ pag = xfs_perag_get(mp, agno);
+ if (pag->pagi_count != be32_to_cpu(agi->agi_count))
+ xchk_block_set_corrupt(sc, sc->sa.agi_bp);
+ if (pag->pagi_freecount != be32_to_cpu(agi->agi_freecount))
+ xchk_block_set_corrupt(sc, sc->sa.agi_bp);
+ xfs_perag_put(pag);
+
xchk_agi_xref(sc);
out:
return error;
diff --git a/fs/xfs/scrub/agheader_repair.c b/fs/xfs/scrub/agheader_repair.c
index f7568a4..7a1a38b 100644
--- a/fs/xfs/scrub/agheader_repair.c
+++ b/fs/xfs/scrub/agheader_repair.c
@@ -9,22 +9,17 @@
#include "xfs_format.h"
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
-#include "xfs_defer.h"
#include "xfs_btree.h"
-#include "xfs_bit.h"
#include "xfs_log_format.h"
#include "xfs_trans.h"
#include "xfs_sb.h"
-#include "xfs_inode.h"
#include "xfs_alloc.h"
#include "xfs_alloc_btree.h"
#include "xfs_ialloc.h"
#include "xfs_ialloc_btree.h"
#include "xfs_rmap.h"
#include "xfs_rmap_btree.h"
-#include "xfs_refcount.h"
#include "xfs_refcount_btree.h"
-#include "scrub/xfs_scrub.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
#include "scrub/trace.h"
@@ -341,23 +336,19 @@
struct xrep_find_ag_btree fab[XREP_AGF_MAX] = {
[XREP_AGF_BNOBT] = {
.rmap_owner = XFS_RMAP_OWN_AG,
- .buf_ops = &xfs_allocbt_buf_ops,
- .magic = XFS_ABTB_CRC_MAGIC,
+ .buf_ops = &xfs_bnobt_buf_ops,
},
[XREP_AGF_CNTBT] = {
.rmap_owner = XFS_RMAP_OWN_AG,
- .buf_ops = &xfs_allocbt_buf_ops,
- .magic = XFS_ABTC_CRC_MAGIC,
+ .buf_ops = &xfs_cntbt_buf_ops,
},
[XREP_AGF_RMAPBT] = {
.rmap_owner = XFS_RMAP_OWN_AG,
.buf_ops = &xfs_rmapbt_buf_ops,
- .magic = XFS_RMAP_CRC_MAGIC,
},
[XREP_AGF_REFCOUNTBT] = {
.rmap_owner = XFS_RMAP_OWN_REFC,
.buf_ops = &xfs_refcountbt_buf_ops,
- .magic = XFS_REFC_CRC_MAGIC,
},
[XREP_AGF_END] = {
.buf_ops = NULL,
@@ -646,7 +637,6 @@
xrep_agfl(
struct xfs_scrub *sc)
{
- struct xfs_owner_info oinfo;
struct xfs_bitmap agfl_extents;
struct xfs_mount *mp = sc->mp;
struct xfs_buf *agf_bp;
@@ -708,8 +698,8 @@
goto err;
/* Dump any AGFL overflow. */
- xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_AG);
- return xrep_reap_extents(sc, &agfl_extents, &oinfo, XFS_AG_RESV_AGFL);
+ return xrep_reap_extents(sc, &agfl_extents, &XFS_RMAP_OINFO_AG,
+ XFS_AG_RESV_AGFL);
err:
xfs_bitmap_destroy(&agfl_extents);
return error;
@@ -876,12 +866,10 @@
[XREP_AGI_INOBT] = {
.rmap_owner = XFS_RMAP_OWN_INOBT,
.buf_ops = &xfs_inobt_buf_ops,
- .magic = XFS_IBT_CRC_MAGIC,
},
[XREP_AGI_FINOBT] = {
.rmap_owner = XFS_RMAP_OWN_INOBT,
- .buf_ops = &xfs_inobt_buf_ops,
- .magic = XFS_FIBT_CRC_MAGIC,
+ .buf_ops = &xfs_finobt_buf_ops,
},
[XREP_AGI_END] = {
.buf_ops = NULL
diff --git a/fs/xfs/scrub/alloc.c b/fs/xfs/scrub/alloc.c
index 376bcb5..5533e48 100644
--- a/fs/xfs/scrub/alloc.c
+++ b/fs/xfs/scrub/alloc.c
@@ -9,19 +9,12 @@
#include "xfs_format.h"
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
-#include "xfs_defer.h"
#include "xfs_btree.h"
-#include "xfs_bit.h"
-#include "xfs_log_format.h"
-#include "xfs_trans.h"
-#include "xfs_sb.h"
#include "xfs_alloc.h"
#include "xfs_rmap.h"
-#include "scrub/xfs_scrub.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
#include "scrub/btree.h"
-#include "scrub/trace.h"
/*
* Set us up to scrub free space btrees.
@@ -104,7 +97,6 @@
xfs_agnumber_t agno = bs->cur->bc_private.a.agno;
xfs_agblock_t bno;
xfs_extlen_t len;
- int error = 0;
bno = be32_to_cpu(rec->alloc.ar_startblock);
len = be32_to_cpu(rec->alloc.ar_blockcount);
@@ -116,7 +108,7 @@
xchk_allocbt_xref(bs->sc, bno, len);
- return error;
+ return 0;
}
/* Scrub the freespace btrees for some AG. */
@@ -125,12 +117,10 @@
struct xfs_scrub *sc,
xfs_btnum_t which)
{
- struct xfs_owner_info oinfo;
struct xfs_btree_cur *cur;
- xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_AG);
cur = which == XFS_BTNUM_BNO ? sc->sa.bno_cur : sc->sa.cnt_cur;
- return xchk_btree(sc, cur, xchk_allocbt_rec, &oinfo, NULL);
+ return xchk_btree(sc, cur, xchk_allocbt_rec, &XFS_RMAP_OINFO_AG, NULL);
}
int
diff --git a/fs/xfs/scrub/attr.c b/fs/xfs/scrub/attr.c
index 81d5e90..0edc7f8 100644
--- a/fs/xfs/scrub/attr.c
+++ b/fs/xfs/scrub/attr.c
@@ -9,26 +9,62 @@
#include "xfs_format.h"
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
-#include "xfs_defer.h"
-#include "xfs_btree.h"
-#include "xfs_bit.h"
#include "xfs_log_format.h"
-#include "xfs_trans.h"
-#include "xfs_sb.h"
#include "xfs_inode.h"
#include "xfs_da_format.h"
#include "xfs_da_btree.h"
-#include "xfs_dir2.h"
#include "xfs_attr.h"
#include "xfs_attr_leaf.h"
-#include "scrub/xfs_scrub.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
#include "scrub/dabtree.h"
-#include "scrub/trace.h"
+#include "scrub/attr.h"
-#include <linux/posix_acl_xattr.h>
-#include <linux/xattr.h>
+/*
+ * Allocate enough memory to hold an attr value and attr block bitmaps,
+ * reallocating the buffer if necessary. Buffer contents are not preserved
+ * across a reallocation.
+ */
+int
+xchk_setup_xattr_buf(
+ struct xfs_scrub *sc,
+ size_t value_size,
+ xfs_km_flags_t flags)
+{
+ size_t sz;
+ struct xchk_xattr_buf *ab = sc->buf;
+
+ /*
+ * We need enough space to read an xattr value from the file or enough
+ * space to hold three copies of the xattr free space bitmap. We don't
+ * need the buffer space for both purposes at the same time.
+ */
+ sz = 3 * sizeof(long) * BITS_TO_LONGS(sc->mp->m_attr_geo->blksize);
+ sz = max_t(size_t, sz, value_size);
+
+ /*
+ * If there's already a buffer, figure out if we need to reallocate it
+ * to accommodate a larger size.
+ */
+ if (ab) {
+ if (sz <= ab->sz)
+ return 0;
+ kmem_free(ab);
+ sc->buf = NULL;
+ }
+
+ /*
+ * Don't zero the buffer upon allocation to avoid runtime overhead.
+ * All users must be careful never to read uninitialized contents.
+ */
+ ab = kmem_alloc_large(sizeof(*ab) + sz, flags);
+ if (!ab)
+ return -ENOMEM;
+
+ ab->sz = sz;
+ sc->buf = ab;
+ return 0;
+}
/* Set us up to scrub an inode's extended attributes. */
int
@@ -36,19 +72,18 @@
struct xfs_scrub *sc,
struct xfs_inode *ip)
{
- size_t sz;
+ int error;
/*
- * Allocate the buffer without the inode lock held. We need enough
- * space to read every xattr value in the file or enough space to
- * hold three copies of the xattr free space bitmap. (Not both at
- * the same time.)
+ * We failed to get memory while checking attrs, so this time try to
+ * get all the memory we're ever going to need. Allocate the buffer
+ * without the inode lock held, which means we can sleep.
*/
- sz = max_t(size_t, XATTR_SIZE_MAX, 3 * sizeof(long) *
- BITS_TO_LONGS(sc->mp->m_attr_geo->blksize));
- sc->buf = kmem_zalloc_large(sz, KM_SLEEP);
- if (!sc->buf)
- return -ENOMEM;
+ if (sc->flags & XCHK_TRY_HARDER) {
+ error = xchk_setup_xattr_buf(sc, XATTR_SIZE_MAX, 0);
+ if (error)
+ return error;
+ }
return xchk_setup_inode_contents(sc, ip, 0);
}
@@ -82,12 +117,36 @@
sx = container_of(context, struct xchk_xattr, context);
+ if (xchk_should_terminate(sx->sc, &error)) {
+ context->seen_enough = error;
+ return;
+ }
+
if (flags & XFS_ATTR_INCOMPLETE) {
/* Incomplete attr key, just mark the inode for preening. */
xchk_ino_set_preen(sx->sc, context->dp->i_ino);
return;
}
+ /* Does this name make sense? */
+ if (!xfs_attr_namecheck(name, namelen)) {
+ xchk_fblock_set_corrupt(sx->sc, XFS_ATTR_FORK, args.blkno);
+ return;
+ }
+
+ /*
+ * Try to allocate enough memory to extrat the attr value. If that
+ * doesn't work, we overload the seen_enough variable to convey
+ * the error message back to the main scrub function.
+ */
+ error = xchk_setup_xattr_buf(sx->sc, valuelen, KM_MAYFAIL);
+ if (error == -ENOMEM)
+ error = -EDEADLOCK;
+ if (error) {
+ context->seen_enough = error;
+ return;
+ }
+
args.flags = ATTR_KERNOTIME;
if (flags & XFS_ATTR_ROOT)
args.flags |= ATTR_ROOT;
@@ -100,12 +159,10 @@
args.namelen = namelen;
args.hashval = xfs_da_hashname(args.name, args.namelen);
args.trans = context->tp;
- args.value = sx->sc->buf;
- args.valuelen = XATTR_SIZE_MAX;
+ args.value = xchk_xattr_valuebuf(sx->sc);
+ args.valuelen = valuelen;
error = xfs_attr_get_ilocked(context->dp, &args);
- if (error == -EEXIST)
- error = 0;
if (!xchk_fblock_process_error(sx->sc, XFS_ATTR_FORK, args.blkno,
&error))
goto fail_xref;
@@ -159,13 +216,12 @@
unsigned long *map,
struct xfs_attr3_icleaf_hdr *leafhdr)
{
- unsigned long *freemap;
- unsigned long *dstmap;
+ unsigned long *freemap = xchk_xattr_freemap(sc);
+ unsigned long *dstmap = xchk_xattr_dstmap(sc);
unsigned int mapsize = sc->mp->m_attr_geo->blksize;
int i;
/* Construct bitmap of freemap contents. */
- freemap = (unsigned long *)sc->buf + BITS_TO_LONGS(mapsize);
bitmap_zero(freemap, mapsize);
for (i = 0; i < XFS_ATTR_LEAF_MAPSIZE; i++) {
if (!xchk_xattr_set_map(sc, freemap,
@@ -175,7 +231,6 @@
}
/* Look for bits that are set in freemap and are marked in use. */
- dstmap = freemap + BITS_TO_LONGS(mapsize);
return bitmap_and(dstmap, freemap, map, mapsize) == 0;
}
@@ -190,13 +245,13 @@
char *buf_end,
struct xfs_attr_leafblock *leaf,
struct xfs_attr3_icleaf_hdr *leafhdr,
- unsigned long *usedmap,
struct xfs_attr_leaf_entry *ent,
int idx,
unsigned int *usedbytes,
__u32 *last_hashval)
{
struct xfs_mount *mp = ds->state->mp;
+ unsigned long *usedmap = xchk_xattr_usedmap(ds->sc);
char *name_end;
struct xfs_attr_leaf_name_local *lentry;
struct xfs_attr_leaf_name_remote *rentry;
@@ -256,16 +311,26 @@
struct xfs_attr_leafblock *leaf = bp->b_addr;
struct xfs_attr_leaf_entry *ent;
struct xfs_attr_leaf_entry *entries;
- unsigned long *usedmap = ds->sc->buf;
+ unsigned long *usedmap;
char *buf_end;
size_t off;
__u32 last_hashval = 0;
unsigned int usedbytes = 0;
unsigned int hdrsize;
int i;
+ int error;
if (*last_checked == blk->blkno)
return 0;
+
+ /* Allocate memory for block usage checking. */
+ error = xchk_setup_xattr_buf(ds->sc, 0, KM_MAYFAIL);
+ if (error == -ENOMEM)
+ return -EDEADLOCK;
+ if (error)
+ return error;
+ usedmap = xchk_xattr_usedmap(ds->sc);
+
*last_checked = blk->blkno;
bitmap_zero(usedmap, mp->m_attr_geo->blksize);
@@ -313,7 +378,7 @@
/* Check the entry and nameval. */
xchk_xattr_entry(ds, level, buf_end, leaf, &leafhdr,
- usedmap, ent, i, &usedbytes, &last_hashval);
+ ent, i, &usedbytes, &last_hashval);
if (ds->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
goto out;
@@ -453,6 +518,10 @@
error = xfs_attr_list_int_ilocked(&sx.context);
if (!xchk_fblock_process_error(sc, XFS_ATTR_FORK, 0, &error))
goto out;
+
+ /* Did our listent function try to return any errors? */
+ if (sx.context.seen_enough < 0)
+ error = sx.context.seen_enough;
out:
return error;
}
diff --git a/fs/xfs/scrub/attr.h b/fs/xfs/scrub/attr.h
new file mode 100644
index 0000000..13a1d2e
--- /dev/null
+++ b/fs/xfs/scrub/attr.h
@@ -0,0 +1,71 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2019 Oracle. All Rights Reserved.
+ * Author: Darrick J. Wong <darrick.wong@oracle.com>
+ */
+#ifndef __XFS_SCRUB_ATTR_H__
+#define __XFS_SCRUB_ATTR_H__
+
+/*
+ * Temporary storage for online scrub and repair of extended attributes.
+ */
+struct xchk_xattr_buf {
+ /* Size of @buf, in bytes. */
+ size_t sz;
+
+ /*
+ * Memory buffer -- either used for extracting attr values while
+ * walking the attributes; or for computing attr block bitmaps when
+ * checking the attribute tree.
+ *
+ * Each bitmap contains enough bits to track every byte in an attr
+ * block (rounded up to the size of an unsigned long). The attr block
+ * used space bitmap starts at the beginning of the buffer; the free
+ * space bitmap follows immediately after; and we have a third buffer
+ * for storing intermediate bitmap results.
+ */
+ uint8_t buf[0];
+};
+
+/* A place to store attribute values. */
+static inline uint8_t *
+xchk_xattr_valuebuf(
+ struct xfs_scrub *sc)
+{
+ struct xchk_xattr_buf *ab = sc->buf;
+
+ return ab->buf;
+}
+
+/* A bitmap of space usage computed by walking an attr leaf block. */
+static inline unsigned long *
+xchk_xattr_usedmap(
+ struct xfs_scrub *sc)
+{
+ struct xchk_xattr_buf *ab = sc->buf;
+
+ return (unsigned long *)ab->buf;
+}
+
+/* A bitmap of free space computed by walking attr leaf block free info. */
+static inline unsigned long *
+xchk_xattr_freemap(
+ struct xfs_scrub *sc)
+{
+ return xchk_xattr_usedmap(sc) +
+ BITS_TO_LONGS(sc->mp->m_attr_geo->blksize);
+}
+
+/* A bitmap used to hold temporary results. */
+static inline unsigned long *
+xchk_xattr_dstmap(
+ struct xfs_scrub *sc)
+{
+ return xchk_xattr_freemap(sc) +
+ BITS_TO_LONGS(sc->mp->m_attr_geo->blksize);
+}
+
+int xchk_setup_xattr_buf(struct xfs_scrub *sc, size_t value_size,
+ xfs_km_flags_t flags);
+
+#endif /* __XFS_SCRUB_ATTR_H__ */
diff --git a/fs/xfs/scrub/bitmap.c b/fs/xfs/scrub/bitmap.c
index fdadc9e..3d47d11 100644
--- a/fs/xfs/scrub/bitmap.c
+++ b/fs/xfs/scrub/bitmap.c
@@ -10,11 +10,6 @@
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
#include "xfs_btree.h"
-#include "scrub/xfs_scrub.h"
-#include "scrub/scrub.h"
-#include "scrub/common.h"
-#include "scrub/trace.h"
-#include "scrub/repair.h"
#include "scrub/bitmap.h"
/*
diff --git a/fs/xfs/scrub/bmap.c b/fs/xfs/scrub/bmap.c
index e1d11f3..fa6ea64 100644
--- a/fs/xfs/scrub/bmap.c
+++ b/fs/xfs/scrub/bmap.c
@@ -9,27 +9,19 @@
#include "xfs_format.h"
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
-#include "xfs_defer.h"
#include "xfs_btree.h"
#include "xfs_bit.h"
#include "xfs_log_format.h"
#include "xfs_trans.h"
-#include "xfs_sb.h"
#include "xfs_inode.h"
-#include "xfs_inode_fork.h"
#include "xfs_alloc.h"
-#include "xfs_rtalloc.h"
#include "xfs_bmap.h"
-#include "xfs_bmap_util.h"
#include "xfs_bmap_btree.h"
#include "xfs_rmap.h"
#include "xfs_rmap_btree.h"
-#include "xfs_refcount.h"
-#include "scrub/xfs_scrub.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
#include "scrub/btree.h"
-#include "scrub/trace.h"
/* Set us up with an inode's bmap. */
int
@@ -83,6 +75,7 @@
xfs_fileoff_t lastoff;
bool is_rt;
bool is_shared;
+ bool was_loaded;
int whichfork;
};
@@ -221,25 +214,20 @@
/* Cross-reference a single rtdev extent record. */
STATIC void
-xchk_bmap_rt_extent_xref(
- struct xchk_bmap_info *info,
+xchk_bmap_rt_iextent_xref(
struct xfs_inode *ip,
- struct xfs_btree_cur *cur,
+ struct xchk_bmap_info *info,
struct xfs_bmbt_irec *irec)
{
- if (info->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
- return;
-
xchk_xref_is_used_rt_space(info->sc, irec->br_startblock,
irec->br_blockcount);
}
/* Cross-reference a single datadev extent record. */
STATIC void
-xchk_bmap_extent_xref(
- struct xchk_bmap_info *info,
+xchk_bmap_iextent_xref(
struct xfs_inode *ip,
- struct xfs_btree_cur *cur,
+ struct xchk_bmap_info *info,
struct xfs_bmbt_irec *irec)
{
struct xfs_mount *mp = info->sc->mp;
@@ -248,9 +236,6 @@
xfs_extlen_t len;
int error;
- if (info->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
- return;
-
agno = XFS_FSB_TO_AGNO(mp, irec->br_startblock);
agbno = XFS_FSB_TO_AGBNO(mp, irec->br_startblock);
len = irec->br_blockcount;
@@ -281,22 +266,42 @@
xchk_ag_free(info->sc, &info->sc->sa);
}
+/*
+ * Directories and attr forks should never have blocks that can't be addressed
+ * by a xfs_dablk_t.
+ */
+STATIC void
+xchk_bmap_dirattr_extent(
+ struct xfs_inode *ip,
+ struct xchk_bmap_info *info,
+ struct xfs_bmbt_irec *irec)
+{
+ struct xfs_mount *mp = ip->i_mount;
+ xfs_fileoff_t off;
+
+ if (!S_ISDIR(VFS_I(ip)->i_mode) && info->whichfork != XFS_ATTR_FORK)
+ return;
+
+ if (!xfs_verify_dablk(mp, irec->br_startoff))
+ xchk_fblock_set_corrupt(info->sc, info->whichfork,
+ irec->br_startoff);
+
+ off = irec->br_startoff + irec->br_blockcount - 1;
+ if (!xfs_verify_dablk(mp, off))
+ xchk_fblock_set_corrupt(info->sc, info->whichfork, off);
+}
+
/* Scrub a single extent record. */
STATIC int
-xchk_bmap_extent(
+xchk_bmap_iextent(
struct xfs_inode *ip,
- struct xfs_btree_cur *cur,
struct xchk_bmap_info *info,
struct xfs_bmbt_irec *irec)
{
struct xfs_mount *mp = info->sc->mp;
- struct xfs_buf *bp = NULL;
xfs_filblks_t end;
int error = 0;
- if (cur)
- xfs_btree_get_block(cur, 0, &bp);
-
/*
* Check for out-of-order extents. This record could have come
* from the incore list, for which there is no ordering check.
@@ -305,6 +310,8 @@
xchk_fblock_set_corrupt(info->sc, info->whichfork,
irec->br_startoff);
+ xchk_bmap_dirattr_extent(ip, info, irec);
+
/* There should never be a "hole" extent in either extent list. */
if (irec->br_startblock == HOLESTARTBLOCK)
xchk_fblock_set_corrupt(info->sc, info->whichfork,
@@ -345,10 +352,13 @@
xchk_fblock_set_corrupt(info->sc, info->whichfork,
irec->br_startoff);
+ if (info->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
+ return 0;
+
if (info->is_rt)
- xchk_bmap_rt_extent_xref(info, ip, cur, irec);
+ xchk_bmap_rt_iextent_xref(ip, info, irec);
else
- xchk_bmap_extent_xref(info, ip, cur, irec);
+ xchk_bmap_iextent_xref(ip, info, irec);
info->lastoff = irec->br_startoff + irec->br_blockcount;
return error;
@@ -361,10 +371,13 @@
union xfs_btree_rec *rec)
{
struct xfs_bmbt_irec irec;
+ struct xfs_bmbt_irec iext_irec;
+ struct xfs_iext_cursor icur;
struct xchk_bmap_info *info = bs->private;
struct xfs_inode *ip = bs->cur->bc_private.b.ip;
struct xfs_buf *bp = NULL;
struct xfs_btree_block *block;
+ struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, info->whichfork);
uint64_t owner;
int i;
@@ -383,9 +396,26 @@
}
}
- /* Set up the in-core record and scrub it. */
+ /*
+ * Check that the incore extent tree contains an extent that matches
+ * this one exactly. We validate those cached bmaps later, so we don't
+ * need to check them here. If the incore extent tree was just loaded
+ * from disk by the scrubber, we assume that its contents match what's
+ * on disk (we still hold the ILOCK) and skip the equivalence check.
+ */
+ if (!info->was_loaded)
+ return 0;
+
xfs_bmbt_disk_get_all(&rec->bmbt, &irec);
- return xchk_bmap_extent(ip, bs->cur, info, &irec);
+ if (!xfs_iext_lookup_extent(ip, ifp, irec.br_startoff, &icur,
+ &iext_irec) ||
+ irec.br_startoff != iext_irec.br_startoff ||
+ irec.br_startblock != iext_irec.br_startblock ||
+ irec.br_blockcount != iext_irec.br_blockcount ||
+ irec.br_state != iext_irec.br_state)
+ xchk_fblock_set_corrupt(bs->sc, info->whichfork,
+ irec.br_startoff);
+ return 0;
}
/* Scan the btree records. */
@@ -396,15 +426,26 @@
struct xchk_bmap_info *info)
{
struct xfs_owner_info oinfo;
+ struct xfs_ifork *ifp = XFS_IFORK_PTR(sc->ip, whichfork);
struct xfs_mount *mp = sc->mp;
struct xfs_inode *ip = sc->ip;
struct xfs_btree_cur *cur;
int error;
+ /* Load the incore bmap cache if it's not loaded. */
+ info->was_loaded = ifp->if_flags & XFS_IFEXTENTS;
+ if (!info->was_loaded) {
+ error = xfs_iread_extents(sc->tp, ip, whichfork);
+ if (!xchk_fblock_process_error(sc, whichfork, 0, &error))
+ goto out;
+ }
+
+ /* Check the btree structure. */
cur = xfs_bmbt_init_cursor(mp, sc->tp, ip, whichfork);
xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, whichfork);
error = xchk_btree(sc, cur, xchk_bmapbt_rec, &oinfo, info);
xfs_btree_del_cursor(cur, error);
+out:
return error;
}
@@ -481,7 +522,7 @@
out:
if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
- return XFS_BTREE_QUERY_RANGE_ABORT;
+ return -ECANCELED;
return 0;
}
@@ -510,7 +551,7 @@
sbcri.sc = sc;
sbcri.whichfork = whichfork;
error = xfs_rmap_query_all(cur, xchk_bmap_check_rmap, &sbcri);
- if (error == XFS_BTREE_QUERY_RANGE_ABORT)
+ if (error == -ECANCELED)
error = 0;
xfs_btree_del_cursor(cur, error);
@@ -652,13 +693,6 @@
if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
goto out;
- /* Now try to scrub the in-memory extent list. */
- if (!(ifp->if_flags & XFS_IFEXTENTS)) {
- error = xfs_iread_extents(sc->tp, ip, whichfork);
- if (!xchk_fblock_process_error(sc, whichfork, 0, &error))
- goto out;
- }
-
/* Find the offset of the last extent in the mapping. */
error = xfs_bmap_last_offset(ip, &endoff, whichfork);
if (!xchk_fblock_process_error(sc, whichfork, 0, &error))
@@ -670,7 +704,7 @@
for_each_xfs_iext(ifp, &icur, &irec) {
if (xchk_should_terminate(sc, &error) ||
(sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT))
- break;
+ goto out;
if (isnullstartblock(irec.br_startblock))
continue;
if (irec.br_startoff >= endoff) {
@@ -678,7 +712,7 @@
irec.br_startoff);
goto out;
}
- error = xchk_bmap_extent(ip, NULL, &info, &irec);
+ error = xchk_bmap_iextent(ip, &info, &irec);
if (error)
goto out;
}
diff --git a/fs/xfs/scrub/btree.c b/fs/xfs/scrub/btree.c
index 4ae959f..f52a7b8 100644
--- a/fs/xfs/scrub/btree.c
+++ b/fs/xfs/scrub/btree.c
@@ -9,14 +9,7 @@
#include "xfs_format.h"
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
-#include "xfs_defer.h"
#include "xfs_btree.h"
-#include "xfs_bit.h"
-#include "xfs_log_format.h"
-#include "xfs_trans.h"
-#include "xfs_sb.h"
-#include "xfs_inode.h"
-#include "xfs_alloc.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
#include "scrub/btree.h"
@@ -415,8 +408,17 @@
struct xfs_btree_cur *cur = bs->cur;
struct check_owner *co;
- if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) && bp == NULL)
+ /*
+ * In theory, xfs_btree_get_block should only give us a null buffer
+ * pointer for the root of a root-in-inode btree type, but we need
+ * to check defensively here in case the cursor state is also screwed
+ * up.
+ */
+ if (bp == NULL) {
+ if (!(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE))
+ xchk_btree_set_corrupt(bs->sc, bs->cur, level);
return 0;
+ }
/*
* We want to cross-reference each btree block with the bnobt
@@ -583,31 +585,32 @@
*/
int
xchk_btree(
- struct xfs_scrub *sc,
- struct xfs_btree_cur *cur,
- xchk_btree_rec_fn scrub_fn,
- struct xfs_owner_info *oinfo,
- void *private)
+ struct xfs_scrub *sc,
+ struct xfs_btree_cur *cur,
+ xchk_btree_rec_fn scrub_fn,
+ const struct xfs_owner_info *oinfo,
+ void *private)
{
- struct xchk_btree bs = { NULL };
- union xfs_btree_ptr ptr;
- union xfs_btree_ptr *pp;
- union xfs_btree_rec *recp;
- struct xfs_btree_block *block;
- int level;
- struct xfs_buf *bp;
- struct check_owner *co;
- struct check_owner *n;
- int i;
- int error = 0;
+ struct xchk_btree bs = {
+ .cur = cur,
+ .scrub_rec = scrub_fn,
+ .oinfo = oinfo,
+ .firstrec = true,
+ .private = private,
+ .sc = sc,
+ };
+ union xfs_btree_ptr ptr;
+ union xfs_btree_ptr *pp;
+ union xfs_btree_rec *recp;
+ struct xfs_btree_block *block;
+ int level;
+ struct xfs_buf *bp;
+ struct check_owner *co;
+ struct check_owner *n;
+ int i;
+ int error = 0;
/* Initialize scrub state */
- bs.cur = cur;
- bs.scrub_rec = scrub_fn;
- bs.oinfo = oinfo;
- bs.firstrec = true;
- bs.private = private;
- bs.sc = sc;
for (i = 0; i < XFS_BTREE_MAXLEVELS; i++)
bs.firstkey[i] = true;
INIT_LIST_HEAD(&bs.to_check);
diff --git a/fs/xfs/scrub/btree.h b/fs/xfs/scrub/btree.h
index aada763..5572e47 100644
--- a/fs/xfs/scrub/btree.h
+++ b/fs/xfs/scrub/btree.h
@@ -31,21 +31,21 @@
struct xchk_btree {
/* caller-provided scrub state */
- struct xfs_scrub *sc;
- struct xfs_btree_cur *cur;
- xchk_btree_rec_fn scrub_rec;
- struct xfs_owner_info *oinfo;
- void *private;
+ struct xfs_scrub *sc;
+ struct xfs_btree_cur *cur;
+ xchk_btree_rec_fn scrub_rec;
+ const struct xfs_owner_info *oinfo;
+ void *private;
/* internal scrub state */
- union xfs_btree_rec lastrec;
- bool firstrec;
- union xfs_btree_key lastkey[XFS_BTREE_MAXLEVELS];
- bool firstkey[XFS_BTREE_MAXLEVELS];
- struct list_head to_check;
+ union xfs_btree_rec lastrec;
+ bool firstrec;
+ union xfs_btree_key lastkey[XFS_BTREE_MAXLEVELS];
+ bool firstkey[XFS_BTREE_MAXLEVELS];
+ struct list_head to_check;
};
int xchk_btree(struct xfs_scrub *sc, struct xfs_btree_cur *cur,
- xchk_btree_rec_fn scrub_fn, struct xfs_owner_info *oinfo,
+ xchk_btree_rec_fn scrub_fn, const struct xfs_owner_info *oinfo,
void *private);
#endif /* __XFS_SCRUB_BTREE_H__ */
diff --git a/fs/xfs/scrub/common.c b/fs/xfs/scrub/common.c
index 346b02a..1887605 100644
--- a/fs/xfs/scrub/common.c
+++ b/fs/xfs/scrub/common.c
@@ -9,22 +9,16 @@
#include "xfs_format.h"
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
-#include "xfs_defer.h"
#include "xfs_btree.h"
-#include "xfs_bit.h"
#include "xfs_log_format.h"
#include "xfs_trans.h"
#include "xfs_sb.h"
#include "xfs_inode.h"
#include "xfs_icache.h"
-#include "xfs_itable.h"
#include "xfs_alloc.h"
#include "xfs_alloc_btree.h"
-#include "xfs_bmap.h"
-#include "xfs_bmap_btree.h"
#include "xfs_ialloc.h"
#include "xfs_ialloc_btree.h"
-#include "xfs_refcount.h"
#include "xfs_refcount_btree.h"
#include "xfs_rmap.h"
#include "xfs_rmap_btree.h"
@@ -32,12 +26,11 @@
#include "xfs_trans_priv.h"
#include "xfs_attr.h"
#include "xfs_reflink.h"
-#include "scrub/xfs_scrub.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
#include "scrub/trace.h"
-#include "scrub/btree.h"
#include "scrub/repair.h"
+#include "scrub/health.h"
/* Common code for the metadata scrubbers. */
@@ -208,6 +201,15 @@
trace_xchk_ino_preen(sc, ino, __return_address);
}
+/* Record something being wrong with the filesystem primary superblock. */
+void
+xchk_set_corrupt(
+ struct xfs_scrub *sc)
+{
+ sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
+ trace_xchk_fs_error(sc, 0, __return_address);
+}
+
/* Record a corrupt block. */
void
xchk_block_set_corrupt(
@@ -313,8 +315,8 @@
*/
struct xchk_rmap_ownedby_info {
- struct xfs_owner_info *oinfo;
- xfs_filblks_t *blocks;
+ const struct xfs_owner_info *oinfo;
+ xfs_filblks_t *blocks;
};
STATIC int
@@ -347,15 +349,15 @@
xchk_count_rmap_ownedby_ag(
struct xfs_scrub *sc,
struct xfs_btree_cur *cur,
- struct xfs_owner_info *oinfo,
+ const struct xfs_owner_info *oinfo,
xfs_filblks_t *blocks)
{
- struct xchk_rmap_ownedby_info sroi;
+ struct xchk_rmap_ownedby_info sroi = {
+ .oinfo = oinfo,
+ .blocks = blocks,
+ };
- sroi.oinfo = oinfo;
*blocks = 0;
- sroi.blocks = blocks;
-
return xfs_rmap_query_all(cur, xchk_count_rmap_ownedby_irec,
&sroi);
}
@@ -458,13 +460,18 @@
struct xfs_mount *mp = sc->mp;
xfs_agnumber_t agno = sa->agno;
- if (sa->agf_bp) {
+ xchk_perag_get(sc->mp, sa);
+ if (sa->agf_bp &&
+ xchk_ag_btree_healthy_enough(sc, sa->pag, XFS_BTNUM_BNO)) {
/* Set up a bnobt cursor for cross-referencing. */
sa->bno_cur = xfs_allocbt_init_cursor(mp, sc->tp, sa->agf_bp,
agno, XFS_BTNUM_BNO);
if (!sa->bno_cur)
goto err;
+ }
+ if (sa->agf_bp &&
+ xchk_ag_btree_healthy_enough(sc, sa->pag, XFS_BTNUM_CNT)) {
/* Set up a cntbt cursor for cross-referencing. */
sa->cnt_cur = xfs_allocbt_init_cursor(mp, sc->tp, sa->agf_bp,
agno, XFS_BTNUM_CNT);
@@ -473,7 +480,8 @@
}
/* Set up a inobt cursor for cross-referencing. */
- if (sa->agi_bp) {
+ if (sa->agi_bp &&
+ xchk_ag_btree_healthy_enough(sc, sa->pag, XFS_BTNUM_INO)) {
sa->ino_cur = xfs_inobt_init_cursor(mp, sc->tp, sa->agi_bp,
agno, XFS_BTNUM_INO);
if (!sa->ino_cur)
@@ -481,7 +489,8 @@
}
/* Set up a finobt cursor for cross-referencing. */
- if (sa->agi_bp && xfs_sb_version_hasfinobt(&mp->m_sb)) {
+ if (sa->agi_bp && xfs_sb_version_hasfinobt(&mp->m_sb) &&
+ xchk_ag_btree_healthy_enough(sc, sa->pag, XFS_BTNUM_FINO)) {
sa->fino_cur = xfs_inobt_init_cursor(mp, sc->tp, sa->agi_bp,
agno, XFS_BTNUM_FINO);
if (!sa->fino_cur)
@@ -489,7 +498,8 @@
}
/* Set up a rmapbt cursor for cross-referencing. */
- if (sa->agf_bp && xfs_sb_version_hasrmapbt(&mp->m_sb)) {
+ if (sa->agf_bp && xfs_sb_version_hasrmapbt(&mp->m_sb) &&
+ xchk_ag_btree_healthy_enough(sc, sa->pag, XFS_BTNUM_RMAP)) {
sa->rmap_cur = xfs_rmapbt_init_cursor(mp, sc->tp, sa->agf_bp,
agno);
if (!sa->rmap_cur)
@@ -497,7 +507,8 @@
}
/* Set up a refcountbt cursor for cross-referencing. */
- if (sa->agf_bp && xfs_sb_version_hasreflink(&mp->m_sb)) {
+ if (sa->agf_bp && xfs_sb_version_hasreflink(&mp->m_sb) &&
+ xchk_ag_btree_healthy_enough(sc, sa->pag, XFS_BTNUM_REFC)) {
sa->refc_cur = xfs_refcountbt_init_cursor(mp, sc->tp,
sa->agf_bp, agno);
if (!sa->refc_cur)
@@ -884,3 +895,21 @@
}
return -EDEADLOCK;
}
+
+/* Pause background reaping of resources. */
+void
+xchk_stop_reaping(
+ struct xfs_scrub *sc)
+{
+ sc->flags |= XCHK_REAPING_DISABLED;
+ xfs_stop_block_reaping(sc->mp);
+}
+
+/* Restart background reaping of resources. */
+void
+xchk_start_reaping(
+ struct xfs_scrub *sc)
+{
+ xfs_start_block_reaping(sc->mp);
+ sc->flags &= ~XCHK_REAPING_DISABLED;
+}
diff --git a/fs/xfs/scrub/common.h b/fs/xfs/scrub/common.h
index 2d4324d..003a772 100644
--- a/fs/xfs/scrub/common.h
+++ b/fs/xfs/scrub/common.h
@@ -39,6 +39,7 @@
struct xfs_buf *bp);
void xchk_ino_set_preen(struct xfs_scrub *sc, xfs_ino_t ino);
+void xchk_set_corrupt(struct xfs_scrub *sc);
void xchk_block_set_corrupt(struct xfs_scrub *sc,
struct xfs_buf *bp);
void xchk_ino_set_corrupt(struct xfs_scrub *sc, xfs_ino_t ino);
@@ -105,6 +106,7 @@
return -ENOENT;
}
#endif
+int xchk_setup_fscounters(struct xfs_scrub *sc, struct xfs_inode *ip);
void xchk_ag_free(struct xfs_scrub *sc, struct xchk_ag *sa);
int xchk_ag_init(struct xfs_scrub *sc, xfs_agnumber_t agno,
@@ -116,7 +118,7 @@
void xchk_ag_btcur_free(struct xchk_ag *sa);
int xchk_ag_btcur_init(struct xfs_scrub *sc, struct xchk_ag *sa);
int xchk_count_rmap_ownedby_ag(struct xfs_scrub *sc, struct xfs_btree_cur *cur,
- struct xfs_owner_info *oinfo, xfs_filblks_t *blocks);
+ const struct xfs_owner_info *oinfo, xfs_filblks_t *blocks);
int xchk_setup_ag_btree(struct xfs_scrub *sc, struct xfs_inode *ip,
bool force_log);
@@ -137,5 +139,7 @@
int xchk_metadata_inode_forks(struct xfs_scrub *sc);
int xchk_ilock_inverted(struct xfs_inode *ip, uint lock_mode);
+void xchk_stop_reaping(struct xfs_scrub *sc);
+void xchk_start_reaping(struct xfs_scrub *sc);
#endif /* __XFS_SCRUB_COMMON_H__ */
diff --git a/fs/xfs/scrub/dabtree.c b/fs/xfs/scrub/dabtree.c
index f1260b4..77ff9f9 100644
--- a/fs/xfs/scrub/dabtree.c
+++ b/fs/xfs/scrub/dabtree.c
@@ -9,20 +9,12 @@
#include "xfs_format.h"
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
-#include "xfs_defer.h"
-#include "xfs_btree.h"
-#include "xfs_bit.h"
#include "xfs_log_format.h"
#include "xfs_trans.h"
-#include "xfs_sb.h"
#include "xfs_inode.h"
-#include "xfs_inode_fork.h"
-#include "xfs_da_format.h"
-#include "xfs_da_btree.h"
#include "xfs_dir2.h"
#include "xfs_dir2_priv.h"
#include "xfs_attr_leaf.h"
-#include "scrub/xfs_scrub.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
#include "scrub/trace.h"
@@ -286,7 +278,11 @@
/* Compare upper level pointer to sibling pointer. */
if (ds->state->altpath.blk[level].blkno != sibling)
xchk_da_set_corrupt(ds, level);
- xfs_trans_brelse(ds->dargs.trans, ds->state->altpath.blk[level].bp);
+ if (ds->state->altpath.blk[level].bp) {
+ xfs_trans_brelse(ds->dargs.trans,
+ ds->state->altpath.blk[level].bp);
+ ds->state->altpath.blk[level].bp = NULL;
+ }
out:
return error;
}
@@ -574,6 +570,11 @@
/* Drill another level deeper. */
blkno = be32_to_cpu(key->before);
level++;
+ if (level >= XFS_DA_NODE_MAXDEPTH) {
+ /* Too deep! */
+ xchk_da_set_corrupt(&ds, level - 1);
+ break;
+ }
ds.tree_level--;
error = xchk_da_btree_block(&ds, level, blkno);
if (error)
diff --git a/fs/xfs/scrub/dir.c b/fs/xfs/scrub/dir.c
index cd3e4d7..1e2e117 100644
--- a/fs/xfs/scrub/dir.c
+++ b/fs/xfs/scrub/dir.c
@@ -9,24 +9,14 @@
#include "xfs_format.h"
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
-#include "xfs_defer.h"
-#include "xfs_btree.h"
-#include "xfs_bit.h"
#include "xfs_log_format.h"
#include "xfs_trans.h"
-#include "xfs_sb.h"
#include "xfs_inode.h"
#include "xfs_icache.h"
-#include "xfs_itable.h"
-#include "xfs_da_format.h"
-#include "xfs_da_btree.h"
#include "xfs_dir2.h"
#include "xfs_dir2_priv.h"
-#include "xfs_ialloc.h"
-#include "scrub/xfs_scrub.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
-#include "scrub/trace.h"
#include "scrub/dabtree.h"
/* Set us up to scrub directories. */
@@ -129,6 +119,12 @@
goto out;
}
+ /* Does this name make sense? */
+ if (!xfs_dir2_namecheck(name, namelen)) {
+ xchk_fblock_set_corrupt(sdc->sc, XFS_DATA_FORK, offset);
+ goto out;
+ }
+
if (!strncmp(".", name, namelen)) {
/* If this is "." then check that the inum matches the dir. */
if (xfs_sb_version_hasftype(&mp->m_sb) && type != DT_DIR)
diff --git a/fs/xfs/scrub/fscounters.c b/fs/xfs/scrub/fscounters.c
new file mode 100644
index 0000000..98f82d7
--- /dev/null
+++ b/fs/xfs/scrub/fscounters.c
@@ -0,0 +1,354 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Copyright (C) 2019 Oracle. All Rights Reserved.
+ * Author: Darrick J. Wong <darrick.wong@oracle.com>
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_mount.h"
+#include "xfs_sb.h"
+#include "xfs_alloc.h"
+#include "xfs_ialloc.h"
+#include "xfs_health.h"
+#include "scrub/scrub.h"
+#include "scrub/common.h"
+#include "scrub/trace.h"
+
+/*
+ * FS Summary Counters
+ * ===================
+ *
+ * The basics of filesystem summary counter checking are that we iterate the
+ * AGs counting the number of free blocks, free space btree blocks, per-AG
+ * reservations, inodes, delayed allocation reservations, and free inodes.
+ * Then we compare what we computed against the in-core counters.
+ *
+ * However, the reality is that summary counters are a tricky beast to check.
+ * While we /could/ freeze the filesystem and scramble around the AGs counting
+ * the free blocks, in practice we prefer not do that for a scan because
+ * freezing is costly. To get around this, we added a per-cpu counter of the
+ * delalloc reservations so that we can rotor around the AGs relatively
+ * quickly, and we allow the counts to be slightly off because we're not taking
+ * any locks while we do this.
+ *
+ * So the first thing we do is warm up the buffer cache in the setup routine by
+ * walking all the AGs to make sure the incore per-AG structure has been
+ * initialized. The expected value calculation then iterates the incore per-AG
+ * structures as quickly as it can. We snapshot the percpu counters before and
+ * after this operation and use the difference in counter values to guess at
+ * our tolerance for mismatch between expected and actual counter values.
+ */
+
+/*
+ * Since the expected value computation is lockless but only browses incore
+ * values, the percpu counters should be fairly close to each other. However,
+ * we'll allow ourselves to be off by at least this (arbitrary) amount.
+ */
+#define XCHK_FSCOUNT_MIN_VARIANCE (512)
+
+/*
+ * Make sure the per-AG structure has been initialized from the on-disk header
+ * contents and trust that the incore counters match the ondisk counters. (The
+ * AGF and AGI scrubbers check them, and a normal xfs_scrub run checks the
+ * summary counters after checking all AG headers). Do this from the setup
+ * function so that the inner AG aggregation loop runs as quickly as possible.
+ *
+ * This function runs during the setup phase /before/ we start checking any
+ * metadata.
+ */
+STATIC int
+xchk_fscount_warmup(
+ struct xfs_scrub *sc)
+{
+ struct xfs_mount *mp = sc->mp;
+ struct xfs_buf *agi_bp = NULL;
+ struct xfs_buf *agf_bp = NULL;
+ struct xfs_perag *pag = NULL;
+ xfs_agnumber_t agno;
+ int error = 0;
+
+ for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) {
+ pag = xfs_perag_get(mp, agno);
+
+ if (pag->pagi_init && pag->pagf_init)
+ goto next_loop_perag;
+
+ /* Lock both AG headers. */
+ error = xfs_ialloc_read_agi(mp, sc->tp, agno, &agi_bp);
+ if (error)
+ break;
+ error = xfs_alloc_read_agf(mp, sc->tp, agno, 0, &agf_bp);
+ if (error)
+ break;
+ error = -ENOMEM;
+ if (!agf_bp || !agi_bp)
+ break;
+
+ /*
+ * These are supposed to be initialized by the header read
+ * function.
+ */
+ error = -EFSCORRUPTED;
+ if (!pag->pagi_init || !pag->pagf_init)
+ break;
+
+ xfs_buf_relse(agf_bp);
+ agf_bp = NULL;
+ xfs_buf_relse(agi_bp);
+ agi_bp = NULL;
+next_loop_perag:
+ xfs_perag_put(pag);
+ pag = NULL;
+ error = 0;
+
+ if (fatal_signal_pending(current))
+ break;
+ }
+
+ if (agf_bp)
+ xfs_buf_relse(agf_bp);
+ if (agi_bp)
+ xfs_buf_relse(agi_bp);
+ if (pag)
+ xfs_perag_put(pag);
+ return error;
+}
+
+int
+xchk_setup_fscounters(
+ struct xfs_scrub *sc,
+ struct xfs_inode *ip)
+{
+ struct xchk_fscounters *fsc;
+ int error;
+
+ sc->buf = kmem_zalloc(sizeof(struct xchk_fscounters), 0);
+ if (!sc->buf)
+ return -ENOMEM;
+ fsc = sc->buf;
+
+ xfs_icount_range(sc->mp, &fsc->icount_min, &fsc->icount_max);
+
+ /* We must get the incore counters set up before we can proceed. */
+ error = xchk_fscount_warmup(sc);
+ if (error)
+ return error;
+
+ /*
+ * Pause background reclaim while we're scrubbing to reduce the
+ * likelihood of background perturbations to the counters throwing off
+ * our calculations.
+ */
+ xchk_stop_reaping(sc);
+
+ return xchk_trans_alloc(sc, 0);
+}
+
+/*
+ * Calculate what the global in-core counters ought to be from the incore
+ * per-AG structure. Callers can compare this to the actual in-core counters
+ * to estimate by how much both in-core and on-disk counters need to be
+ * adjusted.
+ */
+STATIC int
+xchk_fscount_aggregate_agcounts(
+ struct xfs_scrub *sc,
+ struct xchk_fscounters *fsc)
+{
+ struct xfs_mount *mp = sc->mp;
+ struct xfs_perag *pag;
+ uint64_t delayed;
+ xfs_agnumber_t agno;
+ int tries = 8;
+
+retry:
+ fsc->icount = 0;
+ fsc->ifree = 0;
+ fsc->fdblocks = 0;
+
+ for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) {
+ pag = xfs_perag_get(mp, agno);
+
+ /* This somehow got unset since the warmup? */
+ if (!pag->pagi_init || !pag->pagf_init) {
+ xfs_perag_put(pag);
+ return -EFSCORRUPTED;
+ }
+
+ /* Count all the inodes */
+ fsc->icount += pag->pagi_count;
+ fsc->ifree += pag->pagi_freecount;
+
+ /* Add up the free/freelist/bnobt/cntbt blocks */
+ fsc->fdblocks += pag->pagf_freeblks;
+ fsc->fdblocks += pag->pagf_flcount;
+ fsc->fdblocks += pag->pagf_btreeblks;
+
+ /*
+ * Per-AG reservations are taken out of the incore counters,
+ * so they must be left out of the free blocks computation.
+ */
+ fsc->fdblocks -= pag->pag_meta_resv.ar_reserved;
+ fsc->fdblocks -= pag->pag_rmapbt_resv.ar_orig_reserved;
+
+ xfs_perag_put(pag);
+
+ if (fatal_signal_pending(current))
+ break;
+ }
+
+ /*
+ * The global incore space reservation is taken from the incore
+ * counters, so leave that out of the computation.
+ */
+ fsc->fdblocks -= mp->m_resblks_avail;
+
+ /*
+ * Delayed allocation reservations are taken out of the incore counters
+ * but not recorded on disk, so leave them and their indlen blocks out
+ * of the computation.
+ */
+ delayed = percpu_counter_sum(&mp->m_delalloc_blks);
+ fsc->fdblocks -= delayed;
+
+ trace_xchk_fscounters_calc(mp, fsc->icount, fsc->ifree, fsc->fdblocks,
+ delayed);
+
+
+ /* Bail out if the values we compute are totally nonsense. */
+ if (fsc->icount < fsc->icount_min || fsc->icount > fsc->icount_max ||
+ fsc->fdblocks > mp->m_sb.sb_dblocks ||
+ fsc->ifree > fsc->icount_max)
+ return -EFSCORRUPTED;
+
+ /*
+ * If ifree > icount then we probably had some perturbation in the
+ * counters while we were calculating things. We'll try a few times
+ * to maintain ifree <= icount before giving up.
+ */
+ if (fsc->ifree > fsc->icount) {
+ if (tries--)
+ goto retry;
+ xchk_set_incomplete(sc);
+ return 0;
+ }
+
+ return 0;
+}
+
+/*
+ * Is the @counter reasonably close to the @expected value?
+ *
+ * We neither locked nor froze anything in the filesystem while aggregating the
+ * per-AG data to compute the @expected value, which means that the counter
+ * could have changed. We know the @old_value of the summation of the counter
+ * before the aggregation, and we re-sum the counter now. If the expected
+ * value falls between the two summations, we're ok.
+ *
+ * Otherwise, we /might/ have a problem. If the change in the summations is
+ * more than we want to tolerate, the filesystem is probably busy and we should
+ * just send back INCOMPLETE and see if userspace will try again.
+ */
+static inline bool
+xchk_fscount_within_range(
+ struct xfs_scrub *sc,
+ const int64_t old_value,
+ struct percpu_counter *counter,
+ uint64_t expected)
+{
+ int64_t min_value, max_value;
+ int64_t curr_value = percpu_counter_sum(counter);
+
+ trace_xchk_fscounters_within_range(sc->mp, expected, curr_value,
+ old_value);
+
+ /* Negative values are always wrong. */
+ if (curr_value < 0)
+ return false;
+
+ /* Exact matches are always ok. */
+ if (curr_value == expected)
+ return true;
+
+ min_value = min(old_value, curr_value);
+ max_value = max(old_value, curr_value);
+
+ /* Within the before-and-after range is ok. */
+ if (expected >= min_value && expected <= max_value)
+ return true;
+
+ /*
+ * If the difference between the two summations is too large, the fs
+ * might just be busy and so we'll mark the scrub incomplete. Return
+ * true here so that we don't mark the counter corrupt.
+ *
+ * XXX: In the future when userspace can grant scrub permission to
+ * quiesce the filesystem to solve the outsized variance problem, this
+ * check should be moved up and the return code changed to signal to
+ * userspace that we need quiesce permission.
+ */
+ if (max_value - min_value >= XCHK_FSCOUNT_MIN_VARIANCE) {
+ xchk_set_incomplete(sc);
+ return true;
+ }
+
+ return false;
+}
+
+/* Check the superblock counters. */
+int
+xchk_fscounters(
+ struct xfs_scrub *sc)
+{
+ struct xfs_mount *mp = sc->mp;
+ struct xchk_fscounters *fsc = sc->buf;
+ int64_t icount, ifree, fdblocks;
+ int error;
+
+ /* Snapshot the percpu counters. */
+ icount = percpu_counter_sum(&mp->m_icount);
+ ifree = percpu_counter_sum(&mp->m_ifree);
+ fdblocks = percpu_counter_sum(&mp->m_fdblocks);
+
+ /* No negative values, please! */
+ if (icount < 0 || ifree < 0 || fdblocks < 0)
+ xchk_set_corrupt(sc);
+
+ /* See if icount is obviously wrong. */
+ if (icount < fsc->icount_min || icount > fsc->icount_max)
+ xchk_set_corrupt(sc);
+
+ /* See if fdblocks is obviously wrong. */
+ if (fdblocks > mp->m_sb.sb_dblocks)
+ xchk_set_corrupt(sc);
+
+ /*
+ * If ifree exceeds icount by more than the minimum variance then
+ * something's probably wrong with the counters.
+ */
+ if (ifree > icount && ifree - icount > XCHK_FSCOUNT_MIN_VARIANCE)
+ xchk_set_corrupt(sc);
+
+ /* Walk the incore AG headers to calculate the expected counters. */
+ error = xchk_fscount_aggregate_agcounts(sc, fsc);
+ if (!xchk_process_error(sc, 0, XFS_SB_BLOCK(mp), &error))
+ return error;
+ if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_INCOMPLETE)
+ return 0;
+
+ /* Compare the in-core counters with whatever we counted. */
+ if (!xchk_fscount_within_range(sc, icount, &mp->m_icount, fsc->icount))
+ xchk_set_corrupt(sc);
+
+ if (!xchk_fscount_within_range(sc, ifree, &mp->m_ifree, fsc->ifree))
+ xchk_set_corrupt(sc);
+
+ if (!xchk_fscount_within_range(sc, fdblocks, &mp->m_fdblocks,
+ fsc->fdblocks))
+ xchk_set_corrupt(sc);
+
+ return 0;
+}
diff --git a/fs/xfs/scrub/health.c b/fs/xfs/scrub/health.c
new file mode 100644
index 0000000..b2f6028
--- /dev/null
+++ b/fs/xfs/scrub/health.c
@@ -0,0 +1,229 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Copyright (C) 2019 Oracle. All Rights Reserved.
+ * Author: Darrick J. Wong <darrick.wong@oracle.com>
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_btree.h"
+#include "xfs_sb.h"
+#include "xfs_health.h"
+#include "scrub/scrub.h"
+
+/*
+ * Scrub and In-Core Filesystem Health Assessments
+ * ===============================================
+ *
+ * Online scrub and repair have the time and the ability to perform stronger
+ * checks than we can do from the metadata verifiers, because they can
+ * cross-reference records between data structures. Therefore, scrub is in a
+ * good position to update the online filesystem health assessments to reflect
+ * the good/bad state of the data structure.
+ *
+ * We therefore extend scrub in the following ways to achieve this:
+ *
+ * 1. Create a "sick_mask" field in the scrub context. When we're setting up a
+ * scrub call, set this to the default XFS_SICK_* flag(s) for the selected
+ * scrub type (call it A). Scrub and repair functions can override the default
+ * sick_mask value if they choose.
+ *
+ * 2. If the scrubber returns a runtime error code, we exit making no changes
+ * to the incore sick state.
+ *
+ * 3. If the scrubber finds that A is clean, use sick_mask to clear the incore
+ * sick flags before exiting.
+ *
+ * 4. If the scrubber finds that A is corrupt, use sick_mask to set the incore
+ * sick flags. If the user didn't want to repair then we exit, leaving the
+ * metadata structure unfixed and the sick flag set.
+ *
+ * 5. Now we know that A is corrupt and the user wants to repair, so run the
+ * repairer. If the repairer returns an error code, we exit with that error
+ * code, having made no further changes to the incore sick state.
+ *
+ * 6. If repair rebuilds A correctly and the subsequent re-scrub of A is clean,
+ * use sick_mask to clear the incore sick flags. This should have the effect
+ * that A is no longer marked sick.
+ *
+ * 7. If repair rebuilds A incorrectly, the re-scrub will find it corrupt and
+ * use sick_mask to set the incore sick flags. This should have no externally
+ * visible effect since we already set them in step (4).
+ *
+ * There are some complications to this story, however. For certain types of
+ * complementary metadata indices (e.g. inobt/finobt), it is easier to rebuild
+ * both structures at the same time. The following principles apply to this
+ * type of repair strategy:
+ *
+ * 8. Any repair function that rebuilds multiple structures should update
+ * sick_mask_visible to reflect whatever other structures are rebuilt, and
+ * verify that all the rebuilt structures can pass a scrub check. The outcomes
+ * of 5-7 still apply, but with a sick_mask that covers everything being
+ * rebuilt.
+ */
+
+/* Map our scrub type to a sick mask and a set of health update functions. */
+
+enum xchk_health_group {
+ XHG_FS = 1,
+ XHG_RT,
+ XHG_AG,
+ XHG_INO,
+};
+
+struct xchk_health_map {
+ enum xchk_health_group group;
+ unsigned int sick_mask;
+};
+
+static const struct xchk_health_map type_to_health_flag[XFS_SCRUB_TYPE_NR] = {
+ [XFS_SCRUB_TYPE_SB] = { XHG_AG, XFS_SICK_AG_SB },
+ [XFS_SCRUB_TYPE_AGF] = { XHG_AG, XFS_SICK_AG_AGF },
+ [XFS_SCRUB_TYPE_AGFL] = { XHG_AG, XFS_SICK_AG_AGFL },
+ [XFS_SCRUB_TYPE_AGI] = { XHG_AG, XFS_SICK_AG_AGI },
+ [XFS_SCRUB_TYPE_BNOBT] = { XHG_AG, XFS_SICK_AG_BNOBT },
+ [XFS_SCRUB_TYPE_CNTBT] = { XHG_AG, XFS_SICK_AG_CNTBT },
+ [XFS_SCRUB_TYPE_INOBT] = { XHG_AG, XFS_SICK_AG_INOBT },
+ [XFS_SCRUB_TYPE_FINOBT] = { XHG_AG, XFS_SICK_AG_FINOBT },
+ [XFS_SCRUB_TYPE_RMAPBT] = { XHG_AG, XFS_SICK_AG_RMAPBT },
+ [XFS_SCRUB_TYPE_REFCNTBT] = { XHG_AG, XFS_SICK_AG_REFCNTBT },
+ [XFS_SCRUB_TYPE_INODE] = { XHG_INO, XFS_SICK_INO_CORE },
+ [XFS_SCRUB_TYPE_BMBTD] = { XHG_INO, XFS_SICK_INO_BMBTD },
+ [XFS_SCRUB_TYPE_BMBTA] = { XHG_INO, XFS_SICK_INO_BMBTA },
+ [XFS_SCRUB_TYPE_BMBTC] = { XHG_INO, XFS_SICK_INO_BMBTC },
+ [XFS_SCRUB_TYPE_DIR] = { XHG_INO, XFS_SICK_INO_DIR },
+ [XFS_SCRUB_TYPE_XATTR] = { XHG_INO, XFS_SICK_INO_XATTR },
+ [XFS_SCRUB_TYPE_SYMLINK] = { XHG_INO, XFS_SICK_INO_SYMLINK },
+ [XFS_SCRUB_TYPE_PARENT] = { XHG_INO, XFS_SICK_INO_PARENT },
+ [XFS_SCRUB_TYPE_RTBITMAP] = { XHG_RT, XFS_SICK_RT_BITMAP },
+ [XFS_SCRUB_TYPE_RTSUM] = { XHG_RT, XFS_SICK_RT_SUMMARY },
+ [XFS_SCRUB_TYPE_UQUOTA] = { XHG_FS, XFS_SICK_FS_UQUOTA },
+ [XFS_SCRUB_TYPE_GQUOTA] = { XHG_FS, XFS_SICK_FS_GQUOTA },
+ [XFS_SCRUB_TYPE_PQUOTA] = { XHG_FS, XFS_SICK_FS_PQUOTA },
+ [XFS_SCRUB_TYPE_FSCOUNTERS] = { XHG_FS, XFS_SICK_FS_COUNTERS },
+};
+
+/* Return the health status mask for this scrub type. */
+unsigned int
+xchk_health_mask_for_scrub_type(
+ __u32 scrub_type)
+{
+ return type_to_health_flag[scrub_type].sick_mask;
+}
+
+/*
+ * Update filesystem health assessments based on what we found and did.
+ *
+ * If the scrubber finds errors, we mark sick whatever's mentioned in
+ * sick_mask, no matter whether this is a first scan or an
+ * evaluation of repair effectiveness.
+ *
+ * Otherwise, no direct corruption was found, so mark whatever's in
+ * sick_mask as healthy.
+ */
+void
+xchk_update_health(
+ struct xfs_scrub *sc)
+{
+ struct xfs_perag *pag;
+ bool bad;
+
+ if (!sc->sick_mask)
+ return;
+
+ bad = (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT);
+ switch (type_to_health_flag[sc->sm->sm_type].group) {
+ case XHG_AG:
+ pag = xfs_perag_get(sc->mp, sc->sm->sm_agno);
+ if (bad)
+ xfs_ag_mark_sick(pag, sc->sick_mask);
+ else
+ xfs_ag_mark_healthy(pag, sc->sick_mask);
+ xfs_perag_put(pag);
+ break;
+ case XHG_INO:
+ if (!sc->ip)
+ return;
+ if (bad)
+ xfs_inode_mark_sick(sc->ip, sc->sick_mask);
+ else
+ xfs_inode_mark_healthy(sc->ip, sc->sick_mask);
+ break;
+ case XHG_FS:
+ if (bad)
+ xfs_fs_mark_sick(sc->mp, sc->sick_mask);
+ else
+ xfs_fs_mark_healthy(sc->mp, sc->sick_mask);
+ break;
+ case XHG_RT:
+ if (bad)
+ xfs_rt_mark_sick(sc->mp, sc->sick_mask);
+ else
+ xfs_rt_mark_healthy(sc->mp, sc->sick_mask);
+ break;
+ default:
+ ASSERT(0);
+ break;
+ }
+}
+
+/* Is the given per-AG btree healthy enough for scanning? */
+bool
+xchk_ag_btree_healthy_enough(
+ struct xfs_scrub *sc,
+ struct xfs_perag *pag,
+ xfs_btnum_t btnum)
+{
+ unsigned int mask = 0;
+
+ /*
+ * We always want the cursor if it's the same type as whatever we're
+ * scrubbing, even if we already know the structure is corrupt.
+ *
+ * Otherwise, we're only interested in the btree for cross-referencing.
+ * If we know the btree is bad then don't bother, just set XFAIL.
+ */
+ switch (btnum) {
+ case XFS_BTNUM_BNO:
+ if (sc->sm->sm_type == XFS_SCRUB_TYPE_BNOBT)
+ return true;
+ mask = XFS_SICK_AG_BNOBT;
+ break;
+ case XFS_BTNUM_CNT:
+ if (sc->sm->sm_type == XFS_SCRUB_TYPE_CNTBT)
+ return true;
+ mask = XFS_SICK_AG_CNTBT;
+ break;
+ case XFS_BTNUM_INO:
+ if (sc->sm->sm_type == XFS_SCRUB_TYPE_INOBT)
+ return true;
+ mask = XFS_SICK_AG_INOBT;
+ break;
+ case XFS_BTNUM_FINO:
+ if (sc->sm->sm_type == XFS_SCRUB_TYPE_FINOBT)
+ return true;
+ mask = XFS_SICK_AG_FINOBT;
+ break;
+ case XFS_BTNUM_RMAP:
+ if (sc->sm->sm_type == XFS_SCRUB_TYPE_RMAPBT)
+ return true;
+ mask = XFS_SICK_AG_RMAPBT;
+ break;
+ case XFS_BTNUM_REFC:
+ if (sc->sm->sm_type == XFS_SCRUB_TYPE_REFCNTBT)
+ return true;
+ mask = XFS_SICK_AG_REFCNTBT;
+ break;
+ default:
+ ASSERT(0);
+ return true;
+ }
+
+ if (xfs_ag_has_sickness(pag, mask)) {
+ sc->sm->sm_flags |= XFS_SCRUB_OFLAG_XFAIL;
+ return false;
+ }
+
+ return true;
+}
diff --git a/fs/xfs/scrub/health.h b/fs/xfs/scrub/health.h
new file mode 100644
index 0000000..d0b938d
--- /dev/null
+++ b/fs/xfs/scrub/health.h
@@ -0,0 +1,14 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Copyright (C) 2019 Oracle. All Rights Reserved.
+ * Author: Darrick J. Wong <darrick.wong@oracle.com>
+ */
+#ifndef __XFS_SCRUB_HEALTH_H__
+#define __XFS_SCRUB_HEALTH_H__
+
+unsigned int xchk_health_mask_for_scrub_type(__u32 scrub_type);
+void xchk_update_health(struct xfs_scrub *sc);
+bool xchk_ag_btree_healthy_enough(struct xfs_scrub *sc, struct xfs_perag *pag,
+ xfs_btnum_t btnum);
+
+#endif /* __XFS_SCRUB_HEALTH_H__ */
diff --git a/fs/xfs/scrub/ialloc.c b/fs/xfs/scrub/ialloc.c
index 224dba9..6817587 100644
--- a/fs/xfs/scrub/ialloc.c
+++ b/fs/xfs/scrub/ialloc.c
@@ -9,21 +9,14 @@
#include "xfs_format.h"
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
-#include "xfs_defer.h"
#include "xfs_btree.h"
-#include "xfs_bit.h"
#include "xfs_log_format.h"
#include "xfs_trans.h"
-#include "xfs_sb.h"
#include "xfs_inode.h"
-#include "xfs_alloc.h"
#include "xfs_ialloc.h"
#include "xfs_ialloc_btree.h"
#include "xfs_icache.h"
#include "xfs_rmap.h"
-#include "xfs_log.h"
-#include "xfs_trans_priv.h"
-#include "scrub/xfs_scrub.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
#include "scrub/btree.h"
@@ -39,11 +32,22 @@
struct xfs_scrub *sc,
struct xfs_inode *ip)
{
- return xchk_setup_ag_btree(sc, ip, sc->try_harder);
+ return xchk_setup_ag_btree(sc, ip, sc->flags & XCHK_TRY_HARDER);
}
/* Inode btree scrubber. */
+struct xchk_iallocbt {
+ /* Number of inodes we see while scanning inobt. */
+ unsigned long long inodes;
+
+ /* Expected next startino, for big block filesystems. */
+ xfs_agino_t next_startino;
+
+ /* Expected end of the current inode cluster. */
+ xfs_agino_t next_cluster_ino;
+};
+
/*
* If we're checking the finobt, cross-reference with the inobt.
* Otherwise we're checking the inobt; if there is an finobt, make sure
@@ -82,15 +86,12 @@
xfs_agblock_t agbno,
xfs_extlen_t len)
{
- struct xfs_owner_info oinfo;
-
if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
return;
xchk_xref_is_used_space(sc, agbno, len);
xchk_iallocbt_chunk_xref_other(sc, irec, agino);
- xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_INODES);
- xchk_xref_is_owned_by(sc, agbno, len, &oinfo);
+ xchk_xref_is_owned_by(sc, agbno, len, &XFS_RMAP_OINFO_INODES);
xchk_xref_is_not_shared(sc, agbno, len);
}
@@ -126,42 +127,58 @@
return hweight64(freemask);
}
-/* Check a particular inode with ir_free. */
+/*
+ * Check that an inode's allocation status matches ir_free in the inobt
+ * record. First we try querying the in-core inode state, and if the inode
+ * isn't loaded we examine the on-disk inode directly.
+ *
+ * Since there can be 1:M and M:1 mappings between inobt records and inode
+ * clusters, we pass in the inode location information as an inobt record;
+ * the index of an inode cluster within the inobt record (as well as the
+ * cluster buffer itself); and the index of the inode within the cluster.
+ *
+ * @irec is the inobt record.
+ * @irec_ino is the inode offset from the start of the record.
+ * @dip is the on-disk inode.
+ */
STATIC int
-xchk_iallocbt_check_cluster_freemask(
+xchk_iallocbt_check_cluster_ifree(
struct xchk_btree *bs,
- xfs_ino_t fsino,
- xfs_agino_t chunkino,
- xfs_agino_t clusterino,
struct xfs_inobt_rec_incore *irec,
- struct xfs_buf *bp)
+ unsigned int irec_ino,
+ struct xfs_dinode *dip)
{
- struct xfs_dinode *dip;
struct xfs_mount *mp = bs->cur->bc_mp;
- bool inode_is_free = false;
+ xfs_ino_t fsino;
+ xfs_agino_t agino;
+ bool irec_free;
+ bool ino_inuse;
bool freemask_ok;
- bool inuse;
int error = 0;
if (xchk_should_terminate(bs->sc, &error))
return error;
- dip = xfs_buf_offset(bp, clusterino * mp->m_sb.sb_inodesize);
+ /*
+ * Given an inobt record and the offset of an inode from the start of
+ * the record, compute which fs inode we're talking about.
+ */
+ agino = irec->ir_startino + irec_ino;
+ fsino = XFS_AGINO_TO_INO(mp, bs->cur->bc_private.a.agno, agino);
+ irec_free = (irec->ir_free & XFS_INOBT_MASK(irec_ino));
+
if (be16_to_cpu(dip->di_magic) != XFS_DINODE_MAGIC ||
- (dip->di_version >= 3 &&
- be64_to_cpu(dip->di_ino) != fsino + clusterino)) {
+ (dip->di_version >= 3 && be64_to_cpu(dip->di_ino) != fsino)) {
xchk_btree_set_corrupt(bs->sc, bs->cur, 0);
goto out;
}
- if (irec->ir_free & XFS_INOBT_MASK(chunkino + clusterino))
- inode_is_free = true;
- error = xfs_icache_inode_is_allocated(mp, bs->cur->bc_tp,
- fsino + clusterino, &inuse);
+ error = xfs_icache_inode_is_allocated(mp, bs->cur->bc_tp, fsino,
+ &ino_inuse);
if (error == -ENODATA) {
/* Not cached, just read the disk buffer */
- freemask_ok = inode_is_free ^ !!(dip->di_mode);
- if (!bs->sc->try_harder && !freemask_ok)
+ freemask_ok = irec_free ^ !!(dip->di_mode);
+ if (!(bs->sc->flags & XCHK_TRY_HARDER) && !freemask_ok)
return -EDEADLOCK;
} else if (error < 0) {
/*
@@ -172,7 +189,7 @@
goto out;
} else {
/* Inode is all there. */
- freemask_ok = inode_is_free ^ inuse;
+ freemask_ok = irec_free ^ ino_inuse;
}
if (!freemask_ok)
xchk_btree_set_corrupt(bs->sc, bs->cur, 0);
@@ -180,91 +197,225 @@
return 0;
}
-/* Make sure the free mask is consistent with what the inodes think. */
+/*
+ * Check that the holemask and freemask of a hypothetical inode cluster match
+ * what's actually on disk. If sparse inodes are enabled, the cluster does
+ * not actually have to map to inodes if the corresponding holemask bit is set.
+ *
+ * @cluster_base is the first inode in the cluster within the @irec.
+ */
STATIC int
-xchk_iallocbt_check_freemask(
+xchk_iallocbt_check_cluster(
struct xchk_btree *bs,
- struct xfs_inobt_rec_incore *irec)
+ struct xfs_inobt_rec_incore *irec,
+ unsigned int cluster_base)
{
- struct xfs_owner_info oinfo;
struct xfs_imap imap;
struct xfs_mount *mp = bs->cur->bc_mp;
struct xfs_dinode *dip;
- struct xfs_buf *bp;
- xfs_ino_t fsino;
- xfs_agino_t nr_inodes;
- xfs_agino_t agino;
- xfs_agino_t chunkino;
- xfs_agino_t clusterino;
+ struct xfs_buf *cluster_bp;
+ unsigned int nr_inodes;
+ xfs_agnumber_t agno = bs->cur->bc_private.a.agno;
xfs_agblock_t agbno;
- int blks_per_cluster;
- uint16_t holemask;
+ unsigned int cluster_index;
+ uint16_t cluster_mask = 0;
uint16_t ir_holemask;
int error = 0;
- /* Make sure the freemask matches the inode records. */
- blks_per_cluster = xfs_icluster_size_fsb(mp);
- nr_inodes = XFS_OFFBNO_TO_AGINO(mp, blks_per_cluster, 0);
- xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_INODES);
+ nr_inodes = min_t(unsigned int, XFS_INODES_PER_CHUNK,
+ M_IGEO(mp)->inodes_per_cluster);
- for (agino = irec->ir_startino;
- agino < irec->ir_startino + XFS_INODES_PER_CHUNK;
- agino += blks_per_cluster * mp->m_sb.sb_inopblock) {
- fsino = XFS_AGINO_TO_INO(mp, bs->cur->bc_private.a.agno, agino);
- chunkino = agino - irec->ir_startino;
- agbno = XFS_AGINO_TO_AGBNO(mp, agino);
+ /* Map this inode cluster */
+ agbno = XFS_AGINO_TO_AGBNO(mp, irec->ir_startino + cluster_base);
- /* Compute the holemask mask for this cluster. */
- for (clusterino = 0, holemask = 0; clusterino < nr_inodes;
- clusterino += XFS_INODES_PER_HOLEMASK_BIT)
- holemask |= XFS_INOBT_MASK((chunkino + clusterino) /
- XFS_INODES_PER_HOLEMASK_BIT);
+ /* Compute a bitmask for this cluster that can be used for holemask. */
+ for (cluster_index = 0;
+ cluster_index < nr_inodes;
+ cluster_index += XFS_INODES_PER_HOLEMASK_BIT)
+ cluster_mask |= XFS_INOBT_MASK((cluster_base + cluster_index) /
+ XFS_INODES_PER_HOLEMASK_BIT);
- /* The whole cluster must be a hole or not a hole. */
- ir_holemask = (irec->ir_holemask & holemask);
- if (ir_holemask != holemask && ir_holemask != 0) {
+ /*
+ * Map the first inode of this cluster to a buffer and offset.
+ * Be careful about inobt records that don't align with the start of
+ * the inode buffer when block sizes are large enough to hold multiple
+ * inode chunks. When this happens, cluster_base will be zero but
+ * ir_startino can be large enough to make im_boffset nonzero.
+ */
+ ir_holemask = (irec->ir_holemask & cluster_mask);
+ imap.im_blkno = XFS_AGB_TO_DADDR(mp, agno, agbno);
+ imap.im_len = XFS_FSB_TO_BB(mp, M_IGEO(mp)->blocks_per_cluster);
+ imap.im_boffset = XFS_INO_TO_OFFSET(mp, irec->ir_startino) <<
+ mp->m_sb.sb_inodelog;
+
+ if (imap.im_boffset != 0 && cluster_base != 0) {
+ ASSERT(imap.im_boffset == 0 || cluster_base == 0);
+ xchk_btree_set_corrupt(bs->sc, bs->cur, 0);
+ return 0;
+ }
+
+ trace_xchk_iallocbt_check_cluster(mp, agno, irec->ir_startino,
+ imap.im_blkno, imap.im_len, cluster_base, nr_inodes,
+ cluster_mask, ir_holemask,
+ XFS_INO_TO_OFFSET(mp, irec->ir_startino +
+ cluster_base));
+
+ /* The whole cluster must be a hole or not a hole. */
+ if (ir_holemask != cluster_mask && ir_holemask != 0) {
+ xchk_btree_set_corrupt(bs->sc, bs->cur, 0);
+ return 0;
+ }
+
+ /* If any part of this is a hole, skip it. */
+ if (ir_holemask) {
+ xchk_xref_is_not_owned_by(bs->sc, agbno,
+ M_IGEO(mp)->blocks_per_cluster,
+ &XFS_RMAP_OINFO_INODES);
+ return 0;
+ }
+
+ xchk_xref_is_owned_by(bs->sc, agbno, M_IGEO(mp)->blocks_per_cluster,
+ &XFS_RMAP_OINFO_INODES);
+
+ /* Grab the inode cluster buffer. */
+ error = xfs_imap_to_bp(mp, bs->cur->bc_tp, &imap, &dip, &cluster_bp,
+ 0, 0);
+ if (!xchk_btree_xref_process_error(bs->sc, bs->cur, 0, &error))
+ return error;
+
+ /* Check free status of each inode within this cluster. */
+ for (cluster_index = 0; cluster_index < nr_inodes; cluster_index++) {
+ struct xfs_dinode *dip;
+
+ if (imap.im_boffset >= BBTOB(cluster_bp->b_length)) {
xchk_btree_set_corrupt(bs->sc, bs->cur, 0);
- continue;
+ break;
}
- /* If any part of this is a hole, skip it. */
- if (ir_holemask) {
- xchk_xref_is_not_owned_by(bs->sc, agbno,
- blks_per_cluster, &oinfo);
- continue;
- }
+ dip = xfs_buf_offset(cluster_bp, imap.im_boffset);
+ error = xchk_iallocbt_check_cluster_ifree(bs, irec,
+ cluster_base + cluster_index, dip);
+ if (error)
+ break;
+ imap.im_boffset += mp->m_sb.sb_inodesize;
+ }
- xchk_xref_is_owned_by(bs->sc, agbno, blks_per_cluster,
- &oinfo);
+ xfs_trans_brelse(bs->cur->bc_tp, cluster_bp);
+ return error;
+}
- /* Grab the inode cluster buffer. */
- imap.im_blkno = XFS_AGB_TO_DADDR(mp, bs->cur->bc_private.a.agno,
- agbno);
- imap.im_len = XFS_FSB_TO_BB(mp, blks_per_cluster);
- imap.im_boffset = 0;
+/*
+ * For all the inode clusters that could map to this inobt record, make sure
+ * that the holemask makes sense and that the allocation status of each inode
+ * matches the freemask.
+ */
+STATIC int
+xchk_iallocbt_check_clusters(
+ struct xchk_btree *bs,
+ struct xfs_inobt_rec_incore *irec)
+{
+ unsigned int cluster_base;
+ int error = 0;
- error = xfs_imap_to_bp(mp, bs->cur->bc_tp, &imap,
- &dip, &bp, 0, 0);
- if (!xchk_btree_xref_process_error(bs->sc, bs->cur, 0,
- &error))
- continue;
-
- /* Which inodes are free? */
- for (clusterino = 0; clusterino < nr_inodes; clusterino++) {
- error = xchk_iallocbt_check_cluster_freemask(bs,
- fsino, chunkino, clusterino, irec, bp);
- if (error) {
- xfs_trans_brelse(bs->cur->bc_tp, bp);
- return error;
- }
- }
-
- xfs_trans_brelse(bs->cur->bc_tp, bp);
+ /*
+ * For the common case where this inobt record maps to multiple inode
+ * clusters this will call _check_cluster for each cluster.
+ *
+ * For the case that multiple inobt records map to a single cluster,
+ * this will call _check_cluster once.
+ */
+ for (cluster_base = 0;
+ cluster_base < XFS_INODES_PER_CHUNK;
+ cluster_base += M_IGEO(bs->sc->mp)->inodes_per_cluster) {
+ error = xchk_iallocbt_check_cluster(bs, irec, cluster_base);
+ if (error)
+ break;
}
return error;
}
+/*
+ * Make sure this inode btree record is aligned properly. Because a fs block
+ * contains multiple inodes, we check that the inobt record is aligned to the
+ * correct inode, not just the correct block on disk. This results in a finer
+ * grained corruption check.
+ */
+STATIC void
+xchk_iallocbt_rec_alignment(
+ struct xchk_btree *bs,
+ struct xfs_inobt_rec_incore *irec)
+{
+ struct xfs_mount *mp = bs->sc->mp;
+ struct xchk_iallocbt *iabt = bs->private;
+ struct xfs_ino_geometry *igeo = M_IGEO(mp);
+
+ /*
+ * finobt records have different positioning requirements than inobt
+ * records: each finobt record must have a corresponding inobt record.
+ * That is checked in the xref function, so for now we only catch the
+ * obvious case where the record isn't at all aligned properly.
+ *
+ * Note that if a fs block contains more than a single chunk of inodes,
+ * we will have finobt records only for those chunks containing free
+ * inodes, and therefore expect chunk alignment of finobt records.
+ * Otherwise, we expect that the finobt record is aligned to the
+ * cluster alignment as told by the superblock.
+ */
+ if (bs->cur->bc_btnum == XFS_BTNUM_FINO) {
+ unsigned int imask;
+
+ imask = min_t(unsigned int, XFS_INODES_PER_CHUNK,
+ igeo->cluster_align_inodes) - 1;
+ if (irec->ir_startino & imask)
+ xchk_btree_set_corrupt(bs->sc, bs->cur, 0);
+ return;
+ }
+
+ if (iabt->next_startino != NULLAGINO) {
+ /*
+ * We're midway through a cluster of inodes that is mapped by
+ * multiple inobt records. Did we get the record for the next
+ * irec in the sequence?
+ */
+ if (irec->ir_startino != iabt->next_startino) {
+ xchk_btree_set_corrupt(bs->sc, bs->cur, 0);
+ return;
+ }
+
+ iabt->next_startino += XFS_INODES_PER_CHUNK;
+
+ /* Are we done with the cluster? */
+ if (iabt->next_startino >= iabt->next_cluster_ino) {
+ iabt->next_startino = NULLAGINO;
+ iabt->next_cluster_ino = NULLAGINO;
+ }
+ return;
+ }
+
+ /* inobt records must be aligned to cluster and inoalignmnt size. */
+ if (irec->ir_startino & (igeo->cluster_align_inodes - 1)) {
+ xchk_btree_set_corrupt(bs->sc, bs->cur, 0);
+ return;
+ }
+
+ if (irec->ir_startino & (igeo->inodes_per_cluster - 1)) {
+ xchk_btree_set_corrupt(bs->sc, bs->cur, 0);
+ return;
+ }
+
+ if (igeo->inodes_per_cluster <= XFS_INODES_PER_CHUNK)
+ return;
+
+ /*
+ * If this is the start of an inode cluster that can be mapped by
+ * multiple inobt records, the next inobt record must follow exactly
+ * after this one.
+ */
+ iabt->next_startino = irec->ir_startino + XFS_INODES_PER_CHUNK;
+ iabt->next_cluster_ino = irec->ir_startino + igeo->inodes_per_cluster;
+}
+
/* Scrub an inobt/finobt record. */
STATIC int
xchk_iallocbt_rec(
@@ -272,12 +423,11 @@
union xfs_btree_rec *rec)
{
struct xfs_mount *mp = bs->cur->bc_mp;
- xfs_filblks_t *inode_blocks = bs->private;
+ struct xchk_iallocbt *iabt = bs->private;
struct xfs_inobt_rec_incore irec;
uint64_t holes;
xfs_agnumber_t agno = bs->cur->bc_private.a.agno;
xfs_agino_t agino;
- xfs_agblock_t agbno;
xfs_extlen_t len;
int holecount;
int i;
@@ -304,14 +454,11 @@
goto out;
}
- /* Make sure this record is aligned to cluster and inoalignmnt size. */
- agbno = XFS_AGINO_TO_AGBNO(mp, irec.ir_startino);
- if ((agbno & (xfs_ialloc_cluster_alignment(mp) - 1)) ||
- (agbno & (xfs_icluster_size_fsb(mp) - 1)))
- xchk_btree_set_corrupt(bs->sc, bs->cur, 0);
+ xchk_iallocbt_rec_alignment(bs, &irec);
+ if (bs->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
+ goto out;
- *inode_blocks += XFS_B_TO_FSB(mp,
- irec.ir_count * mp->m_sb.sb_inodesize);
+ iabt->inodes += irec.ir_count;
/* Handle non-sparse inodes */
if (!xfs_inobt_issparse(irec.ir_holemask)) {
@@ -322,7 +469,7 @@
if (!xchk_iallocbt_chunk(bs, &irec, agino, len))
goto out;
- goto check_freemask;
+ goto check_clusters;
}
/* Check each chunk of a sparse inode cluster. */
@@ -348,8 +495,8 @@
holecount + irec.ir_count != XFS_INODES_PER_CHUNK)
xchk_btree_set_corrupt(bs->sc, bs->cur, 0);
-check_freemask:
- error = xchk_iallocbt_check_freemask(bs, &irec);
+check_clusters:
+ error = xchk_iallocbt_check_clusters(bs, &irec);
if (error)
goto out;
@@ -366,7 +513,6 @@
struct xfs_scrub *sc,
int which)
{
- struct xfs_owner_info oinfo;
xfs_filblks_t blocks;
xfs_extlen_t inobt_blocks = 0;
xfs_extlen_t finobt_blocks = 0;
@@ -388,9 +534,8 @@
return;
}
- xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_INOBT);
- error = xchk_count_rmap_ownedby_ag(sc, sc->sa.rmap_cur, &oinfo,
- &blocks);
+ error = xchk_count_rmap_ownedby_ag(sc, sc->sa.rmap_cur,
+ &XFS_RMAP_OINFO_INOBT, &blocks);
if (!xchk_should_check_xref(sc, &error, &sc->sa.rmap_cur))
return;
if (blocks != inobt_blocks + finobt_blocks)
@@ -405,21 +550,21 @@
xchk_iallocbt_xref_rmap_inodes(
struct xfs_scrub *sc,
int which,
- xfs_filblks_t inode_blocks)
+ unsigned long long inodes)
{
- struct xfs_owner_info oinfo;
xfs_filblks_t blocks;
+ xfs_filblks_t inode_blocks;
int error;
if (!sc->sa.rmap_cur || xchk_skip_xref(sc->sm))
return;
/* Check that we saw as many inode blocks as the rmap knows about. */
- xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_INODES);
- error = xchk_count_rmap_ownedby_ag(sc, sc->sa.rmap_cur, &oinfo,
- &blocks);
+ error = xchk_count_rmap_ownedby_ag(sc, sc->sa.rmap_cur,
+ &XFS_RMAP_OINFO_INODES, &blocks);
if (!xchk_should_check_xref(sc, &error, &sc->sa.rmap_cur))
return;
+ inode_blocks = XFS_B_TO_FSB(sc->mp, inodes * sc->mp->m_sb.sb_inodesize);
if (blocks != inode_blocks)
xchk_btree_xref_set_corrupt(sc, sc->sa.rmap_cur, 0);
}
@@ -431,14 +576,16 @@
xfs_btnum_t which)
{
struct xfs_btree_cur *cur;
- struct xfs_owner_info oinfo;
- xfs_filblks_t inode_blocks = 0;
+ struct xchk_iallocbt iabt = {
+ .inodes = 0,
+ .next_startino = NULLAGINO,
+ .next_cluster_ino = NULLAGINO,
+ };
int error;
- xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_INOBT);
cur = which == XFS_BTNUM_INO ? sc->sa.ino_cur : sc->sa.fino_cur;
- error = xchk_btree(sc, cur, xchk_iallocbt_rec, &oinfo,
- &inode_blocks);
+ error = xchk_btree(sc, cur, xchk_iallocbt_rec, &XFS_RMAP_OINFO_INOBT,
+ &iabt);
if (error)
return error;
@@ -452,7 +599,7 @@
* to inode chunks with free inodes.
*/
if (which == XFS_BTNUM_INO)
- xchk_iallocbt_xref_rmap_inodes(sc, which, inode_blocks);
+ xchk_iallocbt_xref_rmap_inodes(sc, which, iabt.inodes);
return error;
}
diff --git a/fs/xfs/scrub/inode.c b/fs/xfs/scrub/inode.c
index e386c9b..6d483ab 100644
--- a/fs/xfs/scrub/inode.c
+++ b/fs/xfs/scrub/inode.c
@@ -9,27 +9,17 @@
#include "xfs_format.h"
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
-#include "xfs_defer.h"
#include "xfs_btree.h"
-#include "xfs_bit.h"
#include "xfs_log_format.h"
-#include "xfs_trans.h"
-#include "xfs_sb.h"
#include "xfs_inode.h"
-#include "xfs_icache.h"
-#include "xfs_inode_buf.h"
-#include "xfs_inode_fork.h"
#include "xfs_ialloc.h"
#include "xfs_da_format.h"
#include "xfs_reflink.h"
#include "xfs_rmap.h"
-#include "xfs_bmap.h"
#include "xfs_bmap_util.h"
-#include "scrub/xfs_scrub.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
#include "scrub/btree.h"
-#include "scrub/trace.h"
/*
* Grab total control of the inode metadata. It doesn't matter here if
@@ -509,7 +499,6 @@
xfs_ino_t ino,
struct xfs_dinode *dip)
{
- struct xfs_owner_info oinfo;
xfs_agnumber_t agno;
xfs_agblock_t agbno;
int error;
@@ -526,8 +515,7 @@
xchk_xref_is_used_space(sc, agbno, 1);
xchk_inode_xref_finobt(sc, ino);
- xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_INODES);
- xchk_xref_is_owned_by(sc, agbno, 1, &oinfo);
+ xchk_xref_is_owned_by(sc, agbno, 1, &XFS_RMAP_OINFO_INODES);
xchk_xref_is_not_shared(sc, agbno, 1);
xchk_inode_xref_bmap(sc, dip);
diff --git a/fs/xfs/scrub/parent.c b/fs/xfs/scrub/parent.c
index 1c9d7c7..c962bd5 100644
--- a/fs/xfs/scrub/parent.c
+++ b/fs/xfs/scrub/parent.c
@@ -9,21 +9,13 @@
#include "xfs_format.h"
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
-#include "xfs_defer.h"
-#include "xfs_btree.h"
-#include "xfs_bit.h"
#include "xfs_log_format.h"
-#include "xfs_trans.h"
-#include "xfs_sb.h"
#include "xfs_inode.h"
#include "xfs_icache.h"
#include "xfs_dir2.h"
#include "xfs_dir2_priv.h"
-#include "xfs_ialloc.h"
-#include "scrub/xfs_scrub.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
-#include "scrub/trace.h"
/* Set us up to scrub parents. */
int
@@ -320,7 +312,7 @@
* If we failed to lock the parent inode even after a retry, just mark
* this scrub incomplete and return.
*/
- if (sc->try_harder && error == -EDEADLOCK) {
+ if ((sc->flags & XCHK_TRY_HARDER) && error == -EDEADLOCK) {
error = 0;
xchk_set_incomplete(sc);
}
diff --git a/fs/xfs/scrub/quota.c b/fs/xfs/scrub/quota.c
index 782d582..0a33b44 100644
--- a/fs/xfs/scrub/quota.c
+++ b/fs/xfs/scrub/quota.c
@@ -9,24 +9,13 @@
#include "xfs_format.h"
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
-#include "xfs_defer.h"
-#include "xfs_btree.h"
-#include "xfs_bit.h"
#include "xfs_log_format.h"
#include "xfs_trans.h"
-#include "xfs_sb.h"
#include "xfs_inode.h"
-#include "xfs_inode_fork.h"
-#include "xfs_alloc.h"
-#include "xfs_bmap.h"
#include "xfs_quota.h"
#include "xfs_qm.h"
-#include "xfs_dquot.h"
-#include "xfs_dquot_item.h"
-#include "scrub/xfs_scrub.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
-#include "scrub/trace.h"
/* Convert a scrub type code to a DQ flag, or return 0 if error. */
static inline uint
@@ -60,7 +49,7 @@
dqtype = xchk_quota_to_dqtype(sc);
if (dqtype == 0)
return -EINVAL;
- sc->has_quotaofflock = true;
+ sc->flags |= XCHK_HAS_QUOTAOFFLOCK;
mutex_lock(&sc->mp->m_quotainfo->qi_quotaofflock);
if (!xfs_this_quota_on(sc->mp, dqtype))
return -ENOENT;
@@ -144,7 +133,7 @@
if (bsoft > bhard)
xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, offset);
- if (ihard > mp->m_maxicount)
+ if (ihard > M_IGEO(mp)->maxicount)
xchk_fblock_set_warning(sc, XFS_DATA_FORK, offset);
if (isoft > ihard)
xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, offset);
diff --git a/fs/xfs/scrub/refcount.c b/fs/xfs/scrub/refcount.c
index e8c82b0..0cab11a 100644
--- a/fs/xfs/scrub/refcount.c
+++ b/fs/xfs/scrub/refcount.c
@@ -7,22 +7,12 @@
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
-#include "xfs_trans_resv.h"
-#include "xfs_mount.h"
-#include "xfs_defer.h"
#include "xfs_btree.h"
-#include "xfs_bit.h"
-#include "xfs_log_format.h"
-#include "xfs_trans.h"
-#include "xfs_sb.h"
-#include "xfs_alloc.h"
#include "xfs_rmap.h"
#include "xfs_refcount.h"
-#include "scrub/xfs_scrub.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
#include "scrub/btree.h"
-#include "scrub/trace.h"
/*
* Set us up to scrub reference count btrees.
@@ -351,7 +341,6 @@
xfs_extlen_t len;
xfs_nlink_t refcount;
bool has_cowflag;
- int error = 0;
bno = be32_to_cpu(rec->refc.rc_startblock);
len = be32_to_cpu(rec->refc.rc_blockcount);
@@ -376,14 +365,13 @@
xchk_refcountbt_xref(bs->sc, bno, len, refcount);
- return error;
+ return 0;
}
/* Make sure we have as many refc blocks as the rmap says. */
STATIC void
xchk_refcount_xref_rmap(
struct xfs_scrub *sc,
- struct xfs_owner_info *oinfo,
xfs_filblks_t cow_blocks)
{
xfs_extlen_t refcbt_blocks = 0;
@@ -397,17 +385,16 @@
error = xfs_btree_count_blocks(sc->sa.refc_cur, &refcbt_blocks);
if (!xchk_btree_process_error(sc, sc->sa.refc_cur, 0, &error))
return;
- error = xchk_count_rmap_ownedby_ag(sc, sc->sa.rmap_cur, oinfo,
- &blocks);
+ error = xchk_count_rmap_ownedby_ag(sc, sc->sa.rmap_cur,
+ &XFS_RMAP_OINFO_REFC, &blocks);
if (!xchk_should_check_xref(sc, &error, &sc->sa.rmap_cur))
return;
if (blocks != refcbt_blocks)
xchk_btree_xref_set_corrupt(sc, sc->sa.rmap_cur, 0);
/* Check that we saw as many cow blocks as the rmap knows about. */
- xfs_rmap_ag_owner(oinfo, XFS_RMAP_OWN_COW);
- error = xchk_count_rmap_ownedby_ag(sc, sc->sa.rmap_cur, oinfo,
- &blocks);
+ error = xchk_count_rmap_ownedby_ag(sc, sc->sa.rmap_cur,
+ &XFS_RMAP_OINFO_COW, &blocks);
if (!xchk_should_check_xref(sc, &error, &sc->sa.rmap_cur))
return;
if (blocks != cow_blocks)
@@ -419,17 +406,15 @@
xchk_refcountbt(
struct xfs_scrub *sc)
{
- struct xfs_owner_info oinfo;
xfs_agblock_t cow_blocks = 0;
int error;
- xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_REFC);
error = xchk_btree(sc, sc->sa.refc_cur, xchk_refcountbt_rec,
- &oinfo, &cow_blocks);
+ &XFS_RMAP_OINFO_REFC, &cow_blocks);
if (error)
return error;
- xchk_refcount_xref_rmap(sc, &oinfo, cow_blocks);
+ xchk_refcount_xref_rmap(sc, cow_blocks);
return 0;
}
diff --git a/fs/xfs/scrub/repair.c b/fs/xfs/scrub/repair.c
index 9f08dd9..b70a88b 100644
--- a/fs/xfs/scrub/repair.c
+++ b/fs/xfs/scrub/repair.c
@@ -9,27 +9,21 @@
#include "xfs_format.h"
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
-#include "xfs_defer.h"
#include "xfs_btree.h"
-#include "xfs_bit.h"
#include "xfs_log_format.h"
#include "xfs_trans.h"
#include "xfs_sb.h"
#include "xfs_inode.h"
-#include "xfs_icache.h"
#include "xfs_alloc.h"
#include "xfs_alloc_btree.h"
#include "xfs_ialloc.h"
#include "xfs_ialloc_btree.h"
#include "xfs_rmap.h"
#include "xfs_rmap_btree.h"
-#include "xfs_refcount.h"
#include "xfs_refcount_btree.h"
#include "xfs_extent_busy.h"
#include "xfs_ag_resv.h"
-#include "xfs_trans_space.h"
#include "xfs_quota.h"
-#include "scrub/xfs_scrub.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
#include "scrub/trace.h"
@@ -44,8 +38,7 @@
int
xrep_attempt(
struct xfs_inode *ip,
- struct xfs_scrub *sc,
- bool *fixed)
+ struct xfs_scrub *sc)
{
int error = 0;
@@ -64,13 +57,13 @@
* scrub so that we can tell userspace if we fixed the problem.
*/
sc->sm->sm_flags &= ~XFS_SCRUB_FLAGS_OUT;
- *fixed = true;
+ sc->flags |= XREP_ALREADY_FIXED;
return -EAGAIN;
case -EDEADLOCK:
case -EAGAIN:
/* Tell the caller to try again having grabbed all the locks. */
- if (!sc->try_harder) {
- sc->try_harder = true;
+ if (!(sc->flags & XCHK_TRY_HARDER)) {
+ sc->flags |= XCHK_TRY_HARDER;
return -EAGAIN;
}
/*
@@ -135,10 +128,16 @@
if (sc->sa.agfl_bp)
xfs_trans_bhold(sc->tp, sc->sa.agfl_bp);
- /* Roll the transaction. */
+ /*
+ * Roll the transaction. We still own the buffer and the buffer lock
+ * regardless of whether or not the roll succeeds. If the roll fails,
+ * the buffers will be released during teardown on our way out of the
+ * kernel. If it succeeds, we join them to the new transaction and
+ * move on.
+ */
error = xfs_trans_roll(&sc->tp);
if (error)
- goto out_release;
+ return error;
/* Join AG headers to the new transaction. */
if (sc->sa.agi_bp)
@@ -149,21 +148,6 @@
xfs_trans_bjoin(sc->tp, sc->sa.agfl_bp);
return 0;
-
-out_release:
- /*
- * Rolling failed, so release the hold on the buffers. The
- * buffers will be released during teardown on our way out
- * of the kernel.
- */
- if (sc->sa.agi_bp)
- xfs_trans_bhold_release(sc->tp, sc->sa.agi_bp);
- if (sc->sa.agf_bp)
- xfs_trans_bhold_release(sc->tp, sc->sa.agf_bp);
- if (sc->sa.agfl_bp)
- xfs_trans_bhold_release(sc->tp, sc->sa.agfl_bp);
-
- return error;
}
/*
@@ -297,14 +281,14 @@
/* Allocate a block in an AG. */
int
xrep_alloc_ag_block(
- struct xfs_scrub *sc,
- struct xfs_owner_info *oinfo,
- xfs_fsblock_t *fsbno,
- enum xfs_ag_resv_type resv)
+ struct xfs_scrub *sc,
+ const struct xfs_owner_info *oinfo,
+ xfs_fsblock_t *fsbno,
+ enum xfs_ag_resv_type resv)
{
- struct xfs_alloc_arg args = {0};
- xfs_agblock_t bno;
- int error;
+ struct xfs_alloc_arg args = {0};
+ xfs_agblock_t bno;
+ int error;
switch (resv) {
case XFS_AG_RESV_AGFL:
@@ -365,9 +349,9 @@
bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, XFS_FSB_TO_DADDR(mp, fsb),
XFS_FSB_TO_BB(mp, 1), 0);
xfs_buf_zero(bp, 0, BBTOB(bp->b_length));
- xfs_btree_init_block(mp, bp, btnum, 0, 0, sc->sa.agno, 0);
+ xfs_btree_init_block(mp, bp, btnum, 0, 0, sc->sa.agno);
xfs_trans_buf_set_type(tp, bp, XFS_BLFT_BTREE_BUF);
- xfs_trans_log_buf(tp, bp, 0, bp->b_length);
+ xfs_trans_log_buf(tp, bp, 0, BBTOB(bp->b_length) - 1);
bp->b_ops = ops;
*bpp = bp;
@@ -503,7 +487,6 @@
struct xfs_scrub *sc,
xfs_agblock_t agbno)
{
- struct xfs_owner_info oinfo;
int error;
/* Make sure there's space on the freelist. */
@@ -516,9 +499,8 @@
* create an rmap for the block prior to merging it or else other
* parts will break.
*/
- xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_AG);
error = xfs_rmap_alloc(sc->tp, sc->sa.agf_bp, sc->sa.agno, agbno, 1,
- &oinfo);
+ &XFS_RMAP_OINFO_AG);
if (error)
return error;
@@ -536,17 +518,17 @@
/* Dispose of a single block. */
STATIC int
xrep_reap_block(
- struct xfs_scrub *sc,
- xfs_fsblock_t fsbno,
- struct xfs_owner_info *oinfo,
- enum xfs_ag_resv_type resv)
+ struct xfs_scrub *sc,
+ xfs_fsblock_t fsbno,
+ const struct xfs_owner_info *oinfo,
+ enum xfs_ag_resv_type resv)
{
- struct xfs_btree_cur *cur;
- struct xfs_buf *agf_bp = NULL;
- xfs_agnumber_t agno;
- xfs_agblock_t agbno;
- bool has_other_rmap;
- int error;
+ struct xfs_btree_cur *cur;
+ struct xfs_buf *agf_bp = NULL;
+ xfs_agnumber_t agno;
+ xfs_agblock_t agbno;
+ bool has_other_rmap;
+ int error;
agno = XFS_FSB_TO_AGNO(sc->mp, fsbno);
agbno = XFS_FSB_TO_AGBNO(sc->mp, fsbno);
@@ -610,15 +592,15 @@
/* Dispose of every block of every extent in the bitmap. */
int
xrep_reap_extents(
- struct xfs_scrub *sc,
- struct xfs_bitmap *bitmap,
- struct xfs_owner_info *oinfo,
- enum xfs_ag_resv_type type)
+ struct xfs_scrub *sc,
+ struct xfs_bitmap *bitmap,
+ const struct xfs_owner_info *oinfo,
+ enum xfs_ag_resv_type type)
{
- struct xfs_bitmap_range *bmr;
- struct xfs_bitmap_range *n;
- xfs_fsblock_t fsbno;
- int error = 0;
+ struct xfs_bitmap_range *bmr;
+ struct xfs_bitmap_range *n;
+ xfs_fsblock_t fsbno;
+ int error = 0;
ASSERT(xfs_sb_version_hasrmapbt(&sc->mp->m_sb));
@@ -682,7 +664,7 @@
{
xfs_agblock_t *agbno = priv;
- return (*agbno == bno) ? XFS_BTREE_QUERY_RANGE_ABORT : 0;
+ return (*agbno == bno) ? -ECANCELED : 0;
}
/* Does this block match the btree information passed in? */
@@ -692,13 +674,14 @@
struct xrep_find_ag_btree *fab,
uint64_t owner,
xfs_agblock_t agbno,
- bool *found_it)
+ bool *done_with_block)
{
struct xfs_mount *mp = ri->sc->mp;
struct xfs_buf *bp;
struct xfs_btree_block *btblock;
xfs_daddr_t daddr;
- int error;
+ int block_level;
+ int error = 0;
daddr = XFS_AGB_TO_DADDR(mp, ri->sc->sa.agno, agbno);
@@ -711,42 +694,123 @@
if (owner == XFS_RMAP_OWN_AG) {
error = xfs_agfl_walk(mp, ri->agf, ri->agfl_bp,
xrep_findroot_agfl_walk, &agbno);
- if (error == XFS_BTREE_QUERY_RANGE_ABORT)
+ if (error == -ECANCELED)
return 0;
if (error)
return error;
}
+ /*
+ * Read the buffer into memory so that we can see if it's a match for
+ * our btree type. We have no clue if it is beforehand, and we want to
+ * avoid xfs_trans_read_buf's behavior of dumping the DONE state (which
+ * will cause needless disk reads in subsequent calls to this function)
+ * and logging metadata verifier failures.
+ *
+ * Therefore, pass in NULL buffer ops. If the buffer was already in
+ * memory from some other caller it will already have b_ops assigned.
+ * If it was in memory from a previous unsuccessful findroot_block
+ * call, the buffer won't have b_ops but it should be clean and ready
+ * for us to try to verify if the read call succeeds. The same applies
+ * if the buffer wasn't in memory at all.
+ *
+ * Note: If we never match a btree type with this buffer, it will be
+ * left in memory with NULL b_ops. This shouldn't be a problem unless
+ * the buffer gets written.
+ */
error = xfs_trans_read_buf(mp, ri->sc->tp, mp->m_ddev_targp, daddr,
mp->m_bsize, 0, &bp, NULL);
if (error)
return error;
- /*
- * Does this look like a block matching our fs and higher than any
- * other block we've found so far? If so, reattach buffer verifiers
- * so the AIL won't complain if the buffer is also dirty.
- */
+ /* Ensure the block magic matches the btree type we're looking for. */
btblock = XFS_BUF_TO_BLOCK(bp);
- if (be32_to_cpu(btblock->bb_magic) != fab->magic)
- goto out;
- if (xfs_sb_version_hascrc(&mp->m_sb) &&
- !uuid_equal(&btblock->bb_u.s.bb_uuid, &mp->m_sb.sb_meta_uuid))
- goto out;
- bp->b_ops = fab->buf_ops;
-
- /* Ignore this block if it's lower in the tree than we've seen. */
- if (fab->root != NULLAGBLOCK &&
- xfs_btree_get_level(btblock) < fab->height)
+ ASSERT(fab->buf_ops->magic[1] != 0);
+ if (btblock->bb_magic != fab->buf_ops->magic[1])
goto out;
- /* Make sure we pass the verifiers. */
- bp->b_ops->verify_read(bp);
- if (bp->b_error)
+ /*
+ * If the buffer already has ops applied and they're not the ones for
+ * this btree type, we know this block doesn't match the btree and we
+ * can bail out.
+ *
+ * If the buffer ops match ours, someone else has already validated
+ * the block for us, so we can move on to checking if this is a root
+ * block candidate.
+ *
+ * If the buffer does not have ops, nobody has successfully validated
+ * the contents and the buffer cannot be dirty. If the magic, uuid,
+ * and structure match this btree type then we'll move on to checking
+ * if it's a root block candidate. If there is no match, bail out.
+ */
+ if (bp->b_ops) {
+ if (bp->b_ops != fab->buf_ops)
+ goto out;
+ } else {
+ ASSERT(!xfs_trans_buf_is_dirty(bp));
+ if (!uuid_equal(&btblock->bb_u.s.bb_uuid,
+ &mp->m_sb.sb_meta_uuid))
+ goto out;
+ /*
+ * Read verifiers can reference b_ops, so we set the pointer
+ * here. If the verifier fails we'll reset the buffer state
+ * to what it was before we touched the buffer.
+ */
+ bp->b_ops = fab->buf_ops;
+ fab->buf_ops->verify_read(bp);
+ if (bp->b_error) {
+ bp->b_ops = NULL;
+ bp->b_error = 0;
+ goto out;
+ }
+
+ /*
+ * Some read verifiers will (re)set b_ops, so we must be
+ * careful not to change b_ops after running the verifier.
+ */
+ }
+
+ /*
+ * This block passes the magic/uuid and verifier tests for this btree
+ * type. We don't need the caller to try the other tree types.
+ */
+ *done_with_block = true;
+
+ /*
+ * Compare this btree block's level to the height of the current
+ * candidate root block.
+ *
+ * If the level matches the root we found previously, throw away both
+ * blocks because there can't be two candidate roots.
+ *
+ * If level is lower in the tree than the root we found previously,
+ * ignore this block.
+ */
+ block_level = xfs_btree_get_level(btblock);
+ if (block_level + 1 == fab->height) {
+ fab->root = NULLAGBLOCK;
goto out;
- fab->root = agbno;
- fab->height = xfs_btree_get_level(btblock) + 1;
- *found_it = true;
+ } else if (block_level < fab->height) {
+ goto out;
+ }
+
+ /*
+ * This is the highest block in the tree that we've found so far.
+ * Update the btree height to reflect what we've learned from this
+ * block.
+ */
+ fab->height = block_level + 1;
+
+ /*
+ * If this block doesn't have sibling pointers, then it's the new root
+ * block candidate. Otherwise, the root will be found farther up the
+ * tree.
+ */
+ if (btblock->bb_u.s.bb_leftsib == cpu_to_be32(NULLAGBLOCK) &&
+ btblock->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK))
+ fab->root = agbno;
+ else
+ fab->root = NULLAGBLOCK;
trace_xrep_findroot_block(mp, ri->sc->sa.agno, agbno,
be32_to_cpu(btblock->bb_magic), fab->height - 1);
@@ -768,7 +832,7 @@
struct xrep_findroot *ri = priv;
struct xrep_find_ag_btree *fab;
xfs_agblock_t b;
- bool found_it;
+ bool done;
int error = 0;
/* Ignore anything that isn't AG metadata. */
@@ -777,16 +841,16 @@
/* Otherwise scan each block + btree type. */
for (b = 0; b < rec->rm_blockcount; b++) {
- found_it = false;
+ done = false;
for (fab = ri->btree_info; fab->buf_ops; fab++) {
if (rec->rm_owner != fab->rmap_owner)
continue;
error = xrep_findroot_block(ri, fab,
rec->rm_owner, rec->rm_startblock + b,
- &found_it);
+ &done);
if (error)
return error;
- if (found_it)
+ if (done)
break;
}
}
diff --git a/fs/xfs/scrub/repair.h b/fs/xfs/scrub/repair.h
index 9de321e..60c61d7 100644
--- a/fs/xfs/scrub/repair.h
+++ b/fs/xfs/scrub/repair.h
@@ -15,14 +15,15 @@
/* Repair helpers */
-int xrep_attempt(struct xfs_inode *ip, struct xfs_scrub *sc, bool *fixed);
+int xrep_attempt(struct xfs_inode *ip, struct xfs_scrub *sc);
void xrep_failure(struct xfs_mount *mp);
int xrep_roll_ag_trans(struct xfs_scrub *sc);
bool xrep_ag_has_space(struct xfs_perag *pag, xfs_extlen_t nr_blocks,
enum xfs_ag_resv_type type);
xfs_extlen_t xrep_calc_ag_resblks(struct xfs_scrub *sc);
-int xrep_alloc_ag_block(struct xfs_scrub *sc, struct xfs_owner_info *oinfo,
- xfs_fsblock_t *fsbno, enum xfs_ag_resv_type resv);
+int xrep_alloc_ag_block(struct xfs_scrub *sc,
+ const struct xfs_owner_info *oinfo, xfs_fsblock_t *fsbno,
+ enum xfs_ag_resv_type resv);
int xrep_init_btblock(struct xfs_scrub *sc, xfs_fsblock_t fsb,
struct xfs_buf **bpp, xfs_btnum_t btnum,
const struct xfs_buf_ops *ops);
@@ -32,7 +33,7 @@
int xrep_fix_freelist(struct xfs_scrub *sc, bool can_shrink);
int xrep_invalidate_blocks(struct xfs_scrub *sc, struct xfs_bitmap *btlist);
int xrep_reap_extents(struct xfs_scrub *sc, struct xfs_bitmap *exlist,
- struct xfs_owner_info *oinfo, enum xfs_ag_resv_type type);
+ const struct xfs_owner_info *oinfo, enum xfs_ag_resv_type type);
struct xrep_find_ag_btree {
/* in: rmap owner of the btree we're looking for */
@@ -41,9 +42,6 @@
/* in: buffer ops */
const struct xfs_buf_ops *buf_ops;
- /* in: magic number of the btree */
- uint32_t magic;
-
/* out: the highest btree block found and the tree height */
xfs_agblock_t root;
unsigned int height;
@@ -66,8 +64,7 @@
static inline int xrep_attempt(
struct xfs_inode *ip,
- struct xfs_scrub *sc,
- bool *fixed)
+ struct xfs_scrub *sc)
{
return -EOPNOTSUPP;
}
diff --git a/fs/xfs/scrub/rmap.c b/fs/xfs/scrub/rmap.c
index 5e293c1..8d4cefd 100644
--- a/fs/xfs/scrub/rmap.c
+++ b/fs/xfs/scrub/rmap.c
@@ -9,21 +9,12 @@
#include "xfs_format.h"
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
-#include "xfs_defer.h"
#include "xfs_btree.h"
-#include "xfs_bit.h"
-#include "xfs_log_format.h"
-#include "xfs_trans.h"
-#include "xfs_sb.h"
-#include "xfs_alloc.h"
-#include "xfs_ialloc.h"
#include "xfs_rmap.h"
#include "xfs_refcount.h"
-#include "scrub/xfs_scrub.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
#include "scrub/btree.h"
-#include "scrub/trace.h"
/*
* Set us up to scrub reverse mapping btrees.
@@ -174,24 +165,21 @@
xchk_rmapbt(
struct xfs_scrub *sc)
{
- struct xfs_owner_info oinfo;
-
- xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_AG);
return xchk_btree(sc, sc->sa.rmap_cur, xchk_rmapbt_rec,
- &oinfo, NULL);
+ &XFS_RMAP_OINFO_AG, NULL);
}
/* xref check that the extent is owned by a given owner */
static inline void
xchk_xref_check_owner(
- struct xfs_scrub *sc,
- xfs_agblock_t bno,
- xfs_extlen_t len,
- struct xfs_owner_info *oinfo,
- bool should_have_rmap)
+ struct xfs_scrub *sc,
+ xfs_agblock_t bno,
+ xfs_extlen_t len,
+ const struct xfs_owner_info *oinfo,
+ bool should_have_rmap)
{
- bool has_rmap;
- int error;
+ bool has_rmap;
+ int error;
if (!sc->sa.rmap_cur || xchk_skip_xref(sc->sm))
return;
@@ -207,10 +195,10 @@
/* xref check that the extent is owned by a given owner */
void
xchk_xref_is_owned_by(
- struct xfs_scrub *sc,
- xfs_agblock_t bno,
- xfs_extlen_t len,
- struct xfs_owner_info *oinfo)
+ struct xfs_scrub *sc,
+ xfs_agblock_t bno,
+ xfs_extlen_t len,
+ const struct xfs_owner_info *oinfo)
{
xchk_xref_check_owner(sc, bno, len, oinfo, true);
}
@@ -218,10 +206,10 @@
/* xref check that the extent is not owned by a given owner */
void
xchk_xref_is_not_owned_by(
- struct xfs_scrub *sc,
- xfs_agblock_t bno,
- xfs_extlen_t len,
- struct xfs_owner_info *oinfo)
+ struct xfs_scrub *sc,
+ xfs_agblock_t bno,
+ xfs_extlen_t len,
+ const struct xfs_owner_info *oinfo)
{
xchk_xref_check_owner(sc, bno, len, oinfo, false);
}
diff --git a/fs/xfs/scrub/rtbitmap.c b/fs/xfs/scrub/rtbitmap.c
index 665d4bb..c642bc2 100644
--- a/fs/xfs/scrub/rtbitmap.c
+++ b/fs/xfs/scrub/rtbitmap.c
@@ -9,19 +9,12 @@
#include "xfs_format.h"
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
-#include "xfs_defer.h"
-#include "xfs_btree.h"
-#include "xfs_bit.h"
#include "xfs_log_format.h"
#include "xfs_trans.h"
-#include "xfs_sb.h"
-#include "xfs_alloc.h"
#include "xfs_rtalloc.h"
#include "xfs_inode.h"
-#include "scrub/xfs_scrub.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
-#include "scrub/trace.h"
/* Set us up with the realtime metadata locked. */
int
@@ -141,9 +134,8 @@
startext = fsbno;
endext = fsbno + len - 1;
do_div(startext, sc->mp->m_sb.sb_rextsize);
- if (do_div(endext, sc->mp->m_sb.sb_rextsize))
- endext++;
- extcount = endext - startext;
+ do_div(endext, sc->mp->m_sb.sb_rextsize);
+ extcount = endext - startext + 1;
xfs_ilock(sc->mp->m_rbmip, XFS_ILOCK_SHARED | XFS_ILOCK_RTBITMAP);
error = xfs_rtalloc_extent_is_free(sc->mp, sc->tp, startext, extcount,
&is_free);
diff --git a/fs/xfs/scrub/scrub.c b/fs/xfs/scrub/scrub.c
index 4bfae1e..15c8c5f 100644
--- a/fs/xfs/scrub/scrub.c
+++ b/fs/xfs/scrub/scrub.c
@@ -9,37 +9,18 @@
#include "xfs_format.h"
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
-#include "xfs_defer.h"
-#include "xfs_btree.h"
-#include "xfs_bit.h"
#include "xfs_log_format.h"
#include "xfs_trans.h"
-#include "xfs_sb.h"
#include "xfs_inode.h"
-#include "xfs_icache.h"
-#include "xfs_itable.h"
-#include "xfs_alloc.h"
-#include "xfs_alloc_btree.h"
-#include "xfs_bmap.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_ialloc.h"
-#include "xfs_ialloc_btree.h"
-#include "xfs_refcount.h"
-#include "xfs_refcount_btree.h"
-#include "xfs_rmap.h"
-#include "xfs_rmap_btree.h"
#include "xfs_quota.h"
#include "xfs_qm.h"
#include "xfs_errortag.h"
#include "xfs_error.h"
-#include "xfs_log.h"
-#include "xfs_trans_priv.h"
-#include "scrub/xfs_scrub.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
#include "scrub/trace.h"
-#include "scrub/btree.h"
#include "scrub/repair.h"
+#include "scrub/health.h"
/*
* Online Scrub and Repair
@@ -186,8 +167,12 @@
xfs_irele(sc->ip);
sc->ip = NULL;
}
- if (sc->has_quotaofflock)
+ if (sc->flags & XCHK_REAPING_DISABLED)
+ xchk_start_reaping(sc);
+ if (sc->flags & XCHK_HAS_QUOTAOFFLOCK) {
mutex_unlock(&sc->mp->m_quotainfo->qi_quotaofflock);
+ sc->flags &= ~XCHK_HAS_QUOTAOFFLOCK;
+ }
if (sc->buf) {
kmem_free(sc->buf);
sc->buf = NULL;
@@ -347,6 +332,12 @@
.scrub = xchk_quota,
.repair = xrep_notsupported,
},
+ [XFS_SCRUB_TYPE_FSCOUNTERS] = { /* fs summary counters */
+ .type = ST_FS,
+ .setup = xchk_setup_fscounters,
+ .scrub = xchk_fscounters,
+ .repair = xrep_notsupported,
+ },
};
/* This isn't a stable feature, warn once per day. */
@@ -412,19 +403,6 @@
goto out;
}
- error = -EOPNOTSUPP;
- /*
- * We won't scrub any filesystem that doesn't have the ability
- * to record unwritten extents. The option was made default in
- * 2003, removed from mkfs in 2007, and cannot be disabled in
- * v5, so if we find a filesystem without this flag it's either
- * really old or totally unsupported. Avoid it either way.
- * We also don't support v1-v3 filesystems, which aren't
- * mountable.
- */
- if (!xfs_sb_version_hasextflgbit(&mp->m_sb))
- goto out;
-
/*
* We only want to repair read-write v5+ filesystems. Defer the check
* for ops->repair until after our scrub confirms that we need to
@@ -479,10 +457,14 @@
struct xfs_inode *ip,
struct xfs_scrub_metadata *sm)
{
- struct xfs_scrub sc;
+ struct xfs_scrub sc = {
+ .mp = ip->i_mount,
+ .sm = sm,
+ .sa = {
+ .agno = NULLAGNUMBER,
+ },
+ };
struct xfs_mount *mp = ip->i_mount;
- bool try_harder = false;
- bool already_fixed = false;
int error = 0;
BUILD_BUG_ON(sizeof(meta_scrub_ops) !=
@@ -504,21 +486,17 @@
xchk_experimental_warning(mp);
+ sc.ops = &meta_scrub_ops[sm->sm_type];
+ sc.sick_mask = xchk_health_mask_for_scrub_type(sm->sm_type);
retry_op:
/* Set up for the operation. */
- memset(&sc, 0, sizeof(sc));
- sc.mp = ip->i_mount;
- sc.sm = sm;
- sc.ops = &meta_scrub_ops[sm->sm_type];
- sc.try_harder = try_harder;
- sc.sa.agno = NULLAGNUMBER;
error = sc.ops->setup(&sc, ip);
if (error)
goto out_teardown;
/* Scrub for errors. */
error = sc.ops->scrub(&sc);
- if (!try_harder && error == -EDEADLOCK) {
+ if (!(sc.flags & XCHK_TRY_HARDER) && error == -EDEADLOCK) {
/*
* Scrubbers return -EDEADLOCK to mean 'try harder'.
* Tear down everything we hold, then set up again with
@@ -527,12 +505,15 @@
error = xchk_teardown(&sc, ip, 0);
if (error)
goto out;
- try_harder = true;
+ sc.flags |= XCHK_TRY_HARDER;
goto retry_op;
} else if (error)
goto out_teardown;
- if ((sc.sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR) && !already_fixed) {
+ xchk_update_health(&sc);
+
+ if ((sc.sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR) &&
+ !(sc.flags & XREP_ALREADY_FIXED)) {
bool needs_fix;
/* Let debug users force us into the repair routines. */
@@ -555,10 +536,13 @@
* If it's broken, userspace wants us to fix it, and we haven't
* already tried to fix it, then attempt a repair.
*/
- error = xrep_attempt(ip, &sc, &already_fixed);
+ error = xrep_attempt(ip, &sc);
if (error == -EAGAIN) {
- if (sc.try_harder)
- try_harder = true;
+ /*
+ * Either the repair function succeeded or it couldn't
+ * get all the resources it needs; either way, we go
+ * back to the beginning and call the scrub function.
+ */
error = xchk_teardown(&sc, ip, 0);
if (error) {
xrep_failure(mp);
diff --git a/fs/xfs/scrub/scrub.h b/fs/xfs/scrub/scrub.h
index af323b2..ad1ceb4 100644
--- a/fs/xfs/scrub/scrub.h
+++ b/fs/xfs/scrub/scrub.h
@@ -62,13 +62,27 @@
struct xfs_inode *ip;
void *buf;
uint ilock_flags;
- bool try_harder;
- bool has_quotaofflock;
+
+ /* See the XCHK/XREP state flags below. */
+ unsigned int flags;
+
+ /*
+ * The XFS_SICK_* flags that correspond to the metadata being scrubbed
+ * or repaired. We will use this mask to update the in-core fs health
+ * status with whatever we find.
+ */
+ unsigned int sick_mask;
/* State tracking for single-AG operations. */
struct xchk_ag sa;
};
+/* XCHK state flags grow up from zero, XREP state flags grown down from 2^31 */
+#define XCHK_TRY_HARDER (1 << 0) /* can't get resources, try again */
+#define XCHK_HAS_QUOTAOFFLOCK (1 << 1) /* we hold the quotaoff lock */
+#define XCHK_REAPING_DISABLED (1 << 2) /* background block reaping paused */
+#define XREP_ALREADY_FIXED (1 << 31) /* checking our repair work */
+
/* Metadata scrubbers */
int xchk_tester(struct xfs_scrub *sc);
int xchk_superblock(struct xfs_scrub *sc);
@@ -113,6 +127,7 @@
return -ENOENT;
}
#endif
+int xchk_fscounters(struct xfs_scrub *sc);
/* cross-referencing helpers */
void xchk_xref_is_used_space(struct xfs_scrub *sc, xfs_agblock_t agbno,
@@ -122,9 +137,9 @@
void xchk_xref_is_inode_chunk(struct xfs_scrub *sc, xfs_agblock_t agbno,
xfs_extlen_t len);
void xchk_xref_is_owned_by(struct xfs_scrub *sc, xfs_agblock_t agbno,
- xfs_extlen_t len, struct xfs_owner_info *oinfo);
+ xfs_extlen_t len, const struct xfs_owner_info *oinfo);
void xchk_xref_is_not_owned_by(struct xfs_scrub *sc, xfs_agblock_t agbno,
- xfs_extlen_t len, struct xfs_owner_info *oinfo);
+ xfs_extlen_t len, const struct xfs_owner_info *oinfo);
void xchk_xref_has_no_owner(struct xfs_scrub *sc, xfs_agblock_t agbno,
xfs_extlen_t len);
void xchk_xref_is_cow_staging(struct xfs_scrub *sc, xfs_agblock_t bno,
@@ -138,4 +153,12 @@
# define xchk_xref_is_used_rt_space(sc, rtbno, len) do { } while (0)
#endif
+struct xchk_fscounters {
+ uint64_t icount;
+ uint64_t ifree;
+ uint64_t fdblocks;
+ unsigned long long icount_min;
+ unsigned long long icount_max;
+};
+
#endif /* __XFS_SCRUB_SCRUB_H__ */
diff --git a/fs/xfs/scrub/symlink.c b/fs/xfs/scrub/symlink.c
index f7ebaa9..5641ae5 100644
--- a/fs/xfs/scrub/symlink.c
+++ b/fs/xfs/scrub/symlink.c
@@ -9,19 +9,11 @@
#include "xfs_format.h"
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
-#include "xfs_defer.h"
-#include "xfs_btree.h"
-#include "xfs_bit.h"
#include "xfs_log_format.h"
-#include "xfs_trans.h"
-#include "xfs_sb.h"
#include "xfs_inode.h"
-#include "xfs_inode_fork.h"
#include "xfs_symlink.h"
-#include "scrub/xfs_scrub.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
-#include "scrub/trace.h"
/* Set us up to scrub a symbolic link. */
int
@@ -30,7 +22,7 @@
struct xfs_inode *ip)
{
/* Allocate the buffer without the inode lock held. */
- sc->buf = kmem_zalloc_large(XFS_SYMLINK_MAXLEN + 1, KM_SLEEP);
+ sc->buf = kmem_zalloc_large(XFS_SYMLINK_MAXLEN + 1, 0);
if (!sc->buf)
return -ENOMEM;
diff --git a/fs/xfs/scrub/trace.c b/fs/xfs/scrub/trace.c
index 96feaf8..9eaab2e 100644
--- a/fs/xfs/scrub/trace.c
+++ b/fs/xfs/scrub/trace.c
@@ -10,15 +10,9 @@
#include "xfs_log_format.h"
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
-#include "xfs_defer.h"
-#include "xfs_da_format.h"
#include "xfs_inode.h"
#include "xfs_btree.h"
-#include "xfs_trans.h"
-#include "xfs_bit.h"
-#include "scrub/xfs_scrub.h"
#include "scrub/scrub.h"
-#include "scrub/common.h"
/* Figure out which block the btree cursor was pointing to. */
static inline xfs_fsblock_t
diff --git a/fs/xfs/scrub/trace.h b/fs/xfs/scrub/trace.h
index 4e20f0e..3362bae 100644
--- a/fs/xfs/scrub/trace.h
+++ b/fs/xfs/scrub/trace.h
@@ -12,6 +12,73 @@
#include <linux/tracepoint.h>
#include "xfs_bit.h"
+/*
+ * ftrace's __print_symbolic requires that all enum values be wrapped in the
+ * TRACE_DEFINE_ENUM macro so that the enum value can be encoded in the ftrace
+ * ring buffer. Somehow this was only worth mentioning in the ftrace sample
+ * code.
+ */
+TRACE_DEFINE_ENUM(XFS_BTNUM_BNOi);
+TRACE_DEFINE_ENUM(XFS_BTNUM_CNTi);
+TRACE_DEFINE_ENUM(XFS_BTNUM_BMAPi);
+TRACE_DEFINE_ENUM(XFS_BTNUM_INOi);
+TRACE_DEFINE_ENUM(XFS_BTNUM_FINOi);
+TRACE_DEFINE_ENUM(XFS_BTNUM_RMAPi);
+TRACE_DEFINE_ENUM(XFS_BTNUM_REFCi);
+
+TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_PROBE);
+TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_SB);
+TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_AGF);
+TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_AGFL);
+TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_AGI);
+TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_BNOBT);
+TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_CNTBT);
+TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_INOBT);
+TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_FINOBT);
+TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_RMAPBT);
+TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_REFCNTBT);
+TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_INODE);
+TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_BMBTD);
+TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_BMBTA);
+TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_BMBTC);
+TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_DIR);
+TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_XATTR);
+TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_SYMLINK);
+TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_PARENT);
+TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_RTBITMAP);
+TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_RTSUM);
+TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_UQUOTA);
+TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_GQUOTA);
+TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_PQUOTA);
+TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_FSCOUNTERS);
+
+#define XFS_SCRUB_TYPE_STRINGS \
+ { XFS_SCRUB_TYPE_PROBE, "probe" }, \
+ { XFS_SCRUB_TYPE_SB, "sb" }, \
+ { XFS_SCRUB_TYPE_AGF, "agf" }, \
+ { XFS_SCRUB_TYPE_AGFL, "agfl" }, \
+ { XFS_SCRUB_TYPE_AGI, "agi" }, \
+ { XFS_SCRUB_TYPE_BNOBT, "bnobt" }, \
+ { XFS_SCRUB_TYPE_CNTBT, "cntbt" }, \
+ { XFS_SCRUB_TYPE_INOBT, "inobt" }, \
+ { XFS_SCRUB_TYPE_FINOBT, "finobt" }, \
+ { XFS_SCRUB_TYPE_RMAPBT, "rmapbt" }, \
+ { XFS_SCRUB_TYPE_REFCNTBT, "refcountbt" }, \
+ { XFS_SCRUB_TYPE_INODE, "inode" }, \
+ { XFS_SCRUB_TYPE_BMBTD, "bmapbtd" }, \
+ { XFS_SCRUB_TYPE_BMBTA, "bmapbta" }, \
+ { XFS_SCRUB_TYPE_BMBTC, "bmapbtc" }, \
+ { XFS_SCRUB_TYPE_DIR, "directory" }, \
+ { XFS_SCRUB_TYPE_XATTR, "xattr" }, \
+ { XFS_SCRUB_TYPE_SYMLINK, "symlink" }, \
+ { XFS_SCRUB_TYPE_PARENT, "parent" }, \
+ { XFS_SCRUB_TYPE_RTBITMAP, "rtbitmap" }, \
+ { XFS_SCRUB_TYPE_RTSUM, "rtsummary" }, \
+ { XFS_SCRUB_TYPE_UQUOTA, "usrquota" }, \
+ { XFS_SCRUB_TYPE_GQUOTA, "grpquota" }, \
+ { XFS_SCRUB_TYPE_PQUOTA, "prjquota" }, \
+ { XFS_SCRUB_TYPE_FSCOUNTERS, "fscounters" }
+
DECLARE_EVENT_CLASS(xchk_class,
TP_PROTO(struct xfs_inode *ip, struct xfs_scrub_metadata *sm,
int error),
@@ -36,10 +103,10 @@
__entry->flags = sm->sm_flags;
__entry->error = error;
),
- TP_printk("dev %d:%d ino 0x%llx type %u agno %u inum %llu gen %u flags 0x%x error %d",
+ TP_printk("dev %d:%d ino 0x%llx type %s agno %u inum %llu gen %u flags 0x%x error %d",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->ino,
- __entry->type,
+ __print_symbolic(__entry->type, XFS_SCRUB_TYPE_STRINGS),
__entry->agno,
__entry->inum,
__entry->gen,
@@ -78,9 +145,9 @@
__entry->error = error;
__entry->ret_ip = ret_ip;
),
- TP_printk("dev %d:%d type %u agno %u agbno %u error %d ret_ip %pS",
+ TP_printk("dev %d:%d type %s agno %u agbno %u error %d ret_ip %pS",
MAJOR(__entry->dev), MINOR(__entry->dev),
- __entry->type,
+ __print_symbolic(__entry->type, XFS_SCRUB_TYPE_STRINGS),
__entry->agno,
__entry->bno,
__entry->error,
@@ -109,11 +176,11 @@
__entry->error = error;
__entry->ret_ip = ret_ip;
),
- TP_printk("dev %d:%d ino 0x%llx fork %d type %u offset %llu error %d ret_ip %pS",
+ TP_printk("dev %d:%d ino 0x%llx fork %d type %s offset %llu error %d ret_ip %pS",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->ino,
__entry->whichfork,
- __entry->type,
+ __print_symbolic(__entry->type, XFS_SCRUB_TYPE_STRINGS),
__entry->offset,
__entry->error,
__entry->ret_ip)
@@ -144,9 +211,9 @@
__entry->bno = bno;
__entry->ret_ip = ret_ip;
),
- TP_printk("dev %d:%d type %u agno %u agbno %u ret_ip %pS",
+ TP_printk("dev %d:%d type %s agno %u agbno %u ret_ip %pS",
MAJOR(__entry->dev), MINOR(__entry->dev),
- __entry->type,
+ __print_symbolic(__entry->type, XFS_SCRUB_TYPE_STRINGS),
__entry->agno,
__entry->bno,
__entry->ret_ip)
@@ -158,6 +225,7 @@
void *ret_ip), \
TP_ARGS(sc, daddr, ret_ip))
+DEFINE_SCRUB_BLOCK_ERROR_EVENT(xchk_fs_error);
DEFINE_SCRUB_BLOCK_ERROR_EVENT(xchk_block_error);
DEFINE_SCRUB_BLOCK_ERROR_EVENT(xchk_block_preen);
@@ -176,10 +244,10 @@
__entry->type = sc->sm->sm_type;
__entry->ret_ip = ret_ip;
),
- TP_printk("dev %d:%d ino 0x%llx type %u ret_ip %pS",
+ TP_printk("dev %d:%d ino 0x%llx type %s ret_ip %pS",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->ino,
- __entry->type,
+ __print_symbolic(__entry->type, XFS_SCRUB_TYPE_STRINGS),
__entry->ret_ip)
)
@@ -213,11 +281,11 @@
__entry->offset = offset;
__entry->ret_ip = ret_ip;
),
- TP_printk("dev %d:%d ino 0x%llx fork %d type %u offset %llu ret_ip %pS",
+ TP_printk("dev %d:%d ino 0x%llx fork %d type %s offset %llu ret_ip %pS",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->ino,
__entry->whichfork,
- __entry->type,
+ __print_symbolic(__entry->type, XFS_SCRUB_TYPE_STRINGS),
__entry->offset,
__entry->ret_ip)
);
@@ -244,9 +312,9 @@
__entry->type = sc->sm->sm_type;
__entry->ret_ip = ret_ip;
),
- TP_printk("dev %d:%d type %u ret_ip %pS",
+ TP_printk("dev %d:%d type %s ret_ip %pS",
MAJOR(__entry->dev), MINOR(__entry->dev),
- __entry->type,
+ __print_symbolic(__entry->type, XFS_SCRUB_TYPE_STRINGS),
__entry->ret_ip)
);
@@ -278,10 +346,10 @@
__entry->error = error;
__entry->ret_ip = ret_ip;
),
- TP_printk("dev %d:%d type %u btnum %d level %d ptr %d agno %u agbno %u error %d ret_ip %pS",
+ TP_printk("dev %d:%d type %s btree %s level %d ptr %d agno %u agbno %u error %d ret_ip %pS",
MAJOR(__entry->dev), MINOR(__entry->dev),
- __entry->type,
- __entry->btnum,
+ __print_symbolic(__entry->type, XFS_SCRUB_TYPE_STRINGS),
+ __print_symbolic(__entry->btnum, XFS_BTNUM_STRINGS),
__entry->level,
__entry->ptr,
__entry->agno,
@@ -321,12 +389,12 @@
__entry->error = error;
__entry->ret_ip = ret_ip;
),
- TP_printk("dev %d:%d ino 0x%llx fork %d type %u btnum %d level %d ptr %d agno %u agbno %u error %d ret_ip %pS",
+ TP_printk("dev %d:%d ino 0x%llx fork %d type %s btree %s level %d ptr %d agno %u agbno %u error %d ret_ip %pS",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->ino,
__entry->whichfork,
- __entry->type,
- __entry->btnum,
+ __print_symbolic(__entry->type, XFS_SCRUB_TYPE_STRINGS),
+ __print_symbolic(__entry->btnum, XFS_BTNUM_STRINGS),
__entry->level,
__entry->ptr,
__entry->agno,
@@ -360,10 +428,10 @@
__entry->ptr = cur->bc_ptrs[level];
__entry->ret_ip = ret_ip;
),
- TP_printk("dev %d:%d type %u btnum %d level %d ptr %d agno %u agbno %u ret_ip %pS",
+ TP_printk("dev %d:%d type %s btree %s level %d ptr %d agno %u agbno %u ret_ip %pS",
MAJOR(__entry->dev), MINOR(__entry->dev),
- __entry->type,
- __entry->btnum,
+ __print_symbolic(__entry->type, XFS_SCRUB_TYPE_STRINGS),
+ __print_symbolic(__entry->btnum, XFS_BTNUM_STRINGS),
__entry->level,
__entry->ptr,
__entry->agno,
@@ -400,12 +468,12 @@
__entry->ptr = cur->bc_ptrs[level];
__entry->ret_ip = ret_ip;
),
- TP_printk("dev %d:%d ino 0x%llx fork %d type %u btnum %d level %d ptr %d agno %u agbno %u ret_ip %pS",
+ TP_printk("dev %d:%d ino 0x%llx fork %d type %s btree %s level %d ptr %d agno %u agbno %u ret_ip %pS",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->ino,
__entry->whichfork,
- __entry->type,
- __entry->btnum,
+ __print_symbolic(__entry->type, XFS_SCRUB_TYPE_STRINGS),
+ __print_symbolic(__entry->btnum, XFS_BTNUM_STRINGS),
__entry->level,
__entry->ptr,
__entry->agno,
@@ -439,10 +507,10 @@
__entry->nlevels = cur->bc_nlevels;
__entry->ptr = cur->bc_ptrs[level];
),
- TP_printk("dev %d:%d type %u btnum %d agno %u agbno %u level %d nlevels %d ptr %d",
+ TP_printk("dev %d:%d type %s btree %s agno %u agbno %u level %d nlevels %d ptr %d",
MAJOR(__entry->dev), MINOR(__entry->dev),
- __entry->type,
- __entry->btnum,
+ __print_symbolic(__entry->type, XFS_SCRUB_TYPE_STRINGS),
+ __print_symbolic(__entry->btnum, XFS_BTNUM_STRINGS),
__entry->agno,
__entry->bno,
__entry->level,
@@ -473,13 +541,116 @@
__entry->error = error;
__entry->ret_ip = ret_ip;
),
- TP_printk("dev %d:%d type %u xref error %d ret_ip %pF",
+ TP_printk("dev %d:%d type %s xref error %d ret_ip %pS",
MAJOR(__entry->dev), MINOR(__entry->dev),
- __entry->type,
+ __print_symbolic(__entry->type, XFS_SCRUB_TYPE_STRINGS),
__entry->error,
__entry->ret_ip)
);
+TRACE_EVENT(xchk_iallocbt_check_cluster,
+ TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
+ xfs_agino_t startino, xfs_daddr_t map_daddr,
+ unsigned short map_len, unsigned int chunk_ino,
+ unsigned int nr_inodes, uint16_t cluster_mask,
+ uint16_t holemask, unsigned int cluster_ino),
+ TP_ARGS(mp, agno, startino, map_daddr, map_len, chunk_ino, nr_inodes,
+ cluster_mask, holemask, cluster_ino),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(xfs_agnumber_t, agno)
+ __field(xfs_agino_t, startino)
+ __field(xfs_daddr_t, map_daddr)
+ __field(unsigned short, map_len)
+ __field(unsigned int, chunk_ino)
+ __field(unsigned int, nr_inodes)
+ __field(unsigned int, cluster_ino)
+ __field(uint16_t, cluster_mask)
+ __field(uint16_t, holemask)
+ ),
+ TP_fast_assign(
+ __entry->dev = mp->m_super->s_dev;
+ __entry->agno = agno;
+ __entry->startino = startino;
+ __entry->map_daddr = map_daddr;
+ __entry->map_len = map_len;
+ __entry->chunk_ino = chunk_ino;
+ __entry->nr_inodes = nr_inodes;
+ __entry->cluster_mask = cluster_mask;
+ __entry->holemask = holemask;
+ __entry->cluster_ino = cluster_ino;
+ ),
+ TP_printk("dev %d:%d agno %d startino %u daddr 0x%llx len %d chunkino %u nr_inodes %u cluster_mask 0x%x holemask 0x%x cluster_ino %u",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->agno,
+ __entry->startino,
+ __entry->map_daddr,
+ __entry->map_len,
+ __entry->chunk_ino,
+ __entry->nr_inodes,
+ __entry->cluster_mask,
+ __entry->holemask,
+ __entry->cluster_ino)
+)
+
+TRACE_EVENT(xchk_fscounters_calc,
+ TP_PROTO(struct xfs_mount *mp, uint64_t icount, uint64_t ifree,
+ uint64_t fdblocks, uint64_t delalloc),
+ TP_ARGS(mp, icount, ifree, fdblocks, delalloc),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(int64_t, icount_sb)
+ __field(uint64_t, icount_calculated)
+ __field(int64_t, ifree_sb)
+ __field(uint64_t, ifree_calculated)
+ __field(int64_t, fdblocks_sb)
+ __field(uint64_t, fdblocks_calculated)
+ __field(uint64_t, delalloc)
+ ),
+ TP_fast_assign(
+ __entry->dev = mp->m_super->s_dev;
+ __entry->icount_sb = mp->m_sb.sb_icount;
+ __entry->icount_calculated = icount;
+ __entry->ifree_sb = mp->m_sb.sb_ifree;
+ __entry->ifree_calculated = ifree;
+ __entry->fdblocks_sb = mp->m_sb.sb_fdblocks;
+ __entry->fdblocks_calculated = fdblocks;
+ __entry->delalloc = delalloc;
+ ),
+ TP_printk("dev %d:%d icount %lld:%llu ifree %lld::%llu fdblocks %lld::%llu delalloc %llu",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->icount_sb,
+ __entry->icount_calculated,
+ __entry->ifree_sb,
+ __entry->ifree_calculated,
+ __entry->fdblocks_sb,
+ __entry->fdblocks_calculated,
+ __entry->delalloc)
+)
+
+TRACE_EVENT(xchk_fscounters_within_range,
+ TP_PROTO(struct xfs_mount *mp, uint64_t expected, int64_t curr_value,
+ int64_t old_value),
+ TP_ARGS(mp, expected, curr_value, old_value),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(uint64_t, expected)
+ __field(int64_t, curr_value)
+ __field(int64_t, old_value)
+ ),
+ TP_fast_assign(
+ __entry->dev = mp->m_super->s_dev;
+ __entry->expected = expected;
+ __entry->curr_value = curr_value;
+ __entry->old_value = old_value;
+ ),
+ TP_printk("dev %d:%d expected %llu curr_value %lld old_value %lld",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->expected,
+ __entry->curr_value,
+ __entry->old_value)
+)
+
/* repair tracepoints */
#if IS_ENABLED(CONFIG_XFS_ONLINE_REPAIR)
@@ -598,11 +769,11 @@
__entry->agbno = agbno;
__entry->btnum = btnum;
),
- TP_printk("dev %d:%d agno %u agbno %u btnum %d",
+ TP_printk("dev %d:%d agno %u agbno %u btree %s",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->agno,
__entry->agbno,
- __entry->btnum)
+ __print_symbolic(__entry->btnum, XFS_BTNUM_STRINGS))
)
TRACE_EVENT(xrep_findroot_block,
TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, xfs_agblock_t agbno,