blob: e386c9b0b4ab7de6bc2d42fddcf204539d8d2a28 [file] [log] [blame]
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001// SPDX-License-Identifier: GPL-2.0+
2/*
3 * Copyright (C) 2017 Oracle. All Rights Reserved.
4 * Author: Darrick J. Wong <darrick.wong@oracle.com>
5 */
6#include "xfs.h"
7#include "xfs_fs.h"
8#include "xfs_shared.h"
9#include "xfs_format.h"
10#include "xfs_trans_resv.h"
11#include "xfs_mount.h"
12#include "xfs_defer.h"
13#include "xfs_btree.h"
14#include "xfs_bit.h"
15#include "xfs_log_format.h"
16#include "xfs_trans.h"
17#include "xfs_sb.h"
18#include "xfs_inode.h"
19#include "xfs_icache.h"
20#include "xfs_inode_buf.h"
21#include "xfs_inode_fork.h"
22#include "xfs_ialloc.h"
23#include "xfs_da_format.h"
24#include "xfs_reflink.h"
25#include "xfs_rmap.h"
26#include "xfs_bmap.h"
27#include "xfs_bmap_util.h"
28#include "scrub/xfs_scrub.h"
29#include "scrub/scrub.h"
30#include "scrub/common.h"
31#include "scrub/btree.h"
32#include "scrub/trace.h"
33
34/*
35 * Grab total control of the inode metadata. It doesn't matter here if
36 * the file data is still changing; exclusive access to the metadata is
37 * the goal.
38 */
39int
40xchk_setup_inode(
41 struct xfs_scrub *sc,
42 struct xfs_inode *ip)
43{
44 int error;
45
46 /*
47 * Try to get the inode. If the verifiers fail, we try again
48 * in raw mode.
49 */
50 error = xchk_get_inode(sc, ip);
51 switch (error) {
52 case 0:
53 break;
54 case -EFSCORRUPTED:
55 case -EFSBADCRC:
56 return xchk_trans_alloc(sc, 0);
57 default:
58 return error;
59 }
60
61 /* Got the inode, lock it and we're ready to go. */
62 sc->ilock_flags = XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL;
63 xfs_ilock(sc->ip, sc->ilock_flags);
64 error = xchk_trans_alloc(sc, 0);
65 if (error)
66 goto out;
67 sc->ilock_flags |= XFS_ILOCK_EXCL;
68 xfs_ilock(sc->ip, XFS_ILOCK_EXCL);
69
70out:
71 /* scrub teardown will unlock and release the inode for us */
72 return error;
73}
74
75/* Inode core */
76
77/* Validate di_extsize hint. */
78STATIC void
79xchk_inode_extsize(
80 struct xfs_scrub *sc,
81 struct xfs_dinode *dip,
82 xfs_ino_t ino,
83 uint16_t mode,
84 uint16_t flags)
85{
86 xfs_failaddr_t fa;
87
88 fa = xfs_inode_validate_extsize(sc->mp, be32_to_cpu(dip->di_extsize),
89 mode, flags);
90 if (fa)
91 xchk_ino_set_corrupt(sc, ino);
92}
93
94/*
95 * Validate di_cowextsize hint.
96 *
97 * The rules are documented at xfs_ioctl_setattr_check_cowextsize().
98 * These functions must be kept in sync with each other.
99 */
100STATIC void
101xchk_inode_cowextsize(
102 struct xfs_scrub *sc,
103 struct xfs_dinode *dip,
104 xfs_ino_t ino,
105 uint16_t mode,
106 uint16_t flags,
107 uint64_t flags2)
108{
109 xfs_failaddr_t fa;
110
111 fa = xfs_inode_validate_cowextsize(sc->mp,
112 be32_to_cpu(dip->di_cowextsize), mode, flags,
113 flags2);
114 if (fa)
115 xchk_ino_set_corrupt(sc, ino);
116}
117
118/* Make sure the di_flags make sense for the inode. */
119STATIC void
120xchk_inode_flags(
121 struct xfs_scrub *sc,
122 struct xfs_dinode *dip,
123 xfs_ino_t ino,
124 uint16_t mode,
125 uint16_t flags)
126{
127 struct xfs_mount *mp = sc->mp;
128
129 /* di_flags are all taken, last bit cannot be used */
130 if (flags & ~XFS_DIFLAG_ANY)
131 goto bad;
132
133 /* rt flags require rt device */
134 if ((flags & (XFS_DIFLAG_REALTIME | XFS_DIFLAG_RTINHERIT)) &&
135 !mp->m_rtdev_targp)
136 goto bad;
137
138 /* new rt bitmap flag only valid for rbmino */
139 if ((flags & XFS_DIFLAG_NEWRTBM) && ino != mp->m_sb.sb_rbmino)
140 goto bad;
141
142 /* directory-only flags */
143 if ((flags & (XFS_DIFLAG_RTINHERIT |
144 XFS_DIFLAG_EXTSZINHERIT |
145 XFS_DIFLAG_PROJINHERIT |
146 XFS_DIFLAG_NOSYMLINKS)) &&
147 !S_ISDIR(mode))
148 goto bad;
149
150 /* file-only flags */
151 if ((flags & (XFS_DIFLAG_REALTIME | FS_XFLAG_EXTSIZE)) &&
152 !S_ISREG(mode))
153 goto bad;
154
155 /* filestreams and rt make no sense */
156 if ((flags & XFS_DIFLAG_FILESTREAM) && (flags & XFS_DIFLAG_REALTIME))
157 goto bad;
158
159 return;
160bad:
161 xchk_ino_set_corrupt(sc, ino);
162}
163
164/* Make sure the di_flags2 make sense for the inode. */
165STATIC void
166xchk_inode_flags2(
167 struct xfs_scrub *sc,
168 struct xfs_dinode *dip,
169 xfs_ino_t ino,
170 uint16_t mode,
171 uint16_t flags,
172 uint64_t flags2)
173{
174 struct xfs_mount *mp = sc->mp;
175
176 /* Unknown di_flags2 could be from a future kernel */
177 if (flags2 & ~XFS_DIFLAG2_ANY)
178 xchk_ino_set_warning(sc, ino);
179
180 /* reflink flag requires reflink feature */
181 if ((flags2 & XFS_DIFLAG2_REFLINK) &&
182 !xfs_sb_version_hasreflink(&mp->m_sb))
183 goto bad;
184
185 /* cowextsize flag is checked w.r.t. mode separately */
186
187 /* file/dir-only flags */
188 if ((flags2 & XFS_DIFLAG2_DAX) && !(S_ISREG(mode) || S_ISDIR(mode)))
189 goto bad;
190
191 /* file-only flags */
192 if ((flags2 & XFS_DIFLAG2_REFLINK) && !S_ISREG(mode))
193 goto bad;
194
195 /* realtime and reflink make no sense, currently */
196 if ((flags & XFS_DIFLAG_REALTIME) && (flags2 & XFS_DIFLAG2_REFLINK))
197 goto bad;
198
199 /* dax and reflink make no sense, currently */
200 if ((flags2 & XFS_DIFLAG2_DAX) && (flags2 & XFS_DIFLAG2_REFLINK))
201 goto bad;
202
203 return;
204bad:
205 xchk_ino_set_corrupt(sc, ino);
206}
207
208/* Scrub all the ondisk inode fields. */
209STATIC void
210xchk_dinode(
211 struct xfs_scrub *sc,
212 struct xfs_dinode *dip,
213 xfs_ino_t ino)
214{
215 struct xfs_mount *mp = sc->mp;
216 size_t fork_recs;
217 unsigned long long isize;
218 uint64_t flags2;
219 uint32_t nextents;
220 uint16_t flags;
221 uint16_t mode;
222
223 flags = be16_to_cpu(dip->di_flags);
224 if (dip->di_version >= 3)
225 flags2 = be64_to_cpu(dip->di_flags2);
226 else
227 flags2 = 0;
228
229 /* di_mode */
230 mode = be16_to_cpu(dip->di_mode);
231 switch (mode & S_IFMT) {
232 case S_IFLNK:
233 case S_IFREG:
234 case S_IFDIR:
235 case S_IFCHR:
236 case S_IFBLK:
237 case S_IFIFO:
238 case S_IFSOCK:
239 /* mode is recognized */
240 break;
241 default:
242 xchk_ino_set_corrupt(sc, ino);
243 break;
244 }
245
246 /* v1/v2 fields */
247 switch (dip->di_version) {
248 case 1:
249 /*
250 * We autoconvert v1 inodes into v2 inodes on writeout,
251 * so just mark this inode for preening.
252 */
253 xchk_ino_set_preen(sc, ino);
254 break;
255 case 2:
256 case 3:
257 if (dip->di_onlink != 0)
258 xchk_ino_set_corrupt(sc, ino);
259
260 if (dip->di_mode == 0 && sc->ip)
261 xchk_ino_set_corrupt(sc, ino);
262
263 if (dip->di_projid_hi != 0 &&
264 !xfs_sb_version_hasprojid32bit(&mp->m_sb))
265 xchk_ino_set_corrupt(sc, ino);
266 break;
267 default:
268 xchk_ino_set_corrupt(sc, ino);
269 return;
270 }
271
272 /*
273 * di_uid/di_gid -- -1 isn't invalid, but there's no way that
274 * userspace could have created that.
275 */
276 if (dip->di_uid == cpu_to_be32(-1U) ||
277 dip->di_gid == cpu_to_be32(-1U))
278 xchk_ino_set_warning(sc, ino);
279
280 /* di_format */
281 switch (dip->di_format) {
282 case XFS_DINODE_FMT_DEV:
283 if (!S_ISCHR(mode) && !S_ISBLK(mode) &&
284 !S_ISFIFO(mode) && !S_ISSOCK(mode))
285 xchk_ino_set_corrupt(sc, ino);
286 break;
287 case XFS_DINODE_FMT_LOCAL:
288 if (!S_ISDIR(mode) && !S_ISLNK(mode))
289 xchk_ino_set_corrupt(sc, ino);
290 break;
291 case XFS_DINODE_FMT_EXTENTS:
292 if (!S_ISREG(mode) && !S_ISDIR(mode) && !S_ISLNK(mode))
293 xchk_ino_set_corrupt(sc, ino);
294 break;
295 case XFS_DINODE_FMT_BTREE:
296 if (!S_ISREG(mode) && !S_ISDIR(mode))
297 xchk_ino_set_corrupt(sc, ino);
298 break;
299 case XFS_DINODE_FMT_UUID:
300 default:
301 xchk_ino_set_corrupt(sc, ino);
302 break;
303 }
304
305 /* di_[amc]time.nsec */
306 if (be32_to_cpu(dip->di_atime.t_nsec) >= NSEC_PER_SEC)
307 xchk_ino_set_corrupt(sc, ino);
308 if (be32_to_cpu(dip->di_mtime.t_nsec) >= NSEC_PER_SEC)
309 xchk_ino_set_corrupt(sc, ino);
310 if (be32_to_cpu(dip->di_ctime.t_nsec) >= NSEC_PER_SEC)
311 xchk_ino_set_corrupt(sc, ino);
312
313 /*
314 * di_size. xfs_dinode_verify checks for things that screw up
315 * the VFS such as the upper bit being set and zero-length
316 * symlinks/directories, but we can do more here.
317 */
318 isize = be64_to_cpu(dip->di_size);
319 if (isize & (1ULL << 63))
320 xchk_ino_set_corrupt(sc, ino);
321
322 /* Devices, fifos, and sockets must have zero size */
323 if (!S_ISDIR(mode) && !S_ISREG(mode) && !S_ISLNK(mode) && isize != 0)
324 xchk_ino_set_corrupt(sc, ino);
325
326 /* Directories can't be larger than the data section size (32G) */
327 if (S_ISDIR(mode) && (isize == 0 || isize >= XFS_DIR2_SPACE_SIZE))
328 xchk_ino_set_corrupt(sc, ino);
329
330 /* Symlinks can't be larger than SYMLINK_MAXLEN */
331 if (S_ISLNK(mode) && (isize == 0 || isize >= XFS_SYMLINK_MAXLEN))
332 xchk_ino_set_corrupt(sc, ino);
333
334 /*
335 * Warn if the running kernel can't handle the kinds of offsets
336 * needed to deal with the file size. In other words, if the
337 * pagecache can't cache all the blocks in this file due to
338 * overly large offsets, flag the inode for admin review.
339 */
340 if (isize >= mp->m_super->s_maxbytes)
341 xchk_ino_set_warning(sc, ino);
342
343 /* di_nblocks */
344 if (flags2 & XFS_DIFLAG2_REFLINK) {
345 ; /* nblocks can exceed dblocks */
346 } else if (flags & XFS_DIFLAG_REALTIME) {
347 /*
348 * nblocks is the sum of data extents (in the rtdev),
349 * attr extents (in the datadev), and both forks' bmbt
350 * blocks (in the datadev). This clumsy check is the
351 * best we can do without cross-referencing with the
352 * inode forks.
353 */
354 if (be64_to_cpu(dip->di_nblocks) >=
355 mp->m_sb.sb_dblocks + mp->m_sb.sb_rblocks)
356 xchk_ino_set_corrupt(sc, ino);
357 } else {
358 if (be64_to_cpu(dip->di_nblocks) >= mp->m_sb.sb_dblocks)
359 xchk_ino_set_corrupt(sc, ino);
360 }
361
362 xchk_inode_flags(sc, dip, ino, mode, flags);
363
364 xchk_inode_extsize(sc, dip, ino, mode, flags);
365
366 /* di_nextents */
367 nextents = be32_to_cpu(dip->di_nextents);
368 fork_recs = XFS_DFORK_DSIZE(dip, mp) / sizeof(struct xfs_bmbt_rec);
369 switch (dip->di_format) {
370 case XFS_DINODE_FMT_EXTENTS:
371 if (nextents > fork_recs)
372 xchk_ino_set_corrupt(sc, ino);
373 break;
374 case XFS_DINODE_FMT_BTREE:
375 if (nextents <= fork_recs)
376 xchk_ino_set_corrupt(sc, ino);
377 break;
378 default:
379 if (nextents != 0)
380 xchk_ino_set_corrupt(sc, ino);
381 break;
382 }
383
384 /* di_forkoff */
385 if (XFS_DFORK_APTR(dip) >= (char *)dip + mp->m_sb.sb_inodesize)
386 xchk_ino_set_corrupt(sc, ino);
387 if (dip->di_anextents != 0 && dip->di_forkoff == 0)
388 xchk_ino_set_corrupt(sc, ino);
389 if (dip->di_forkoff == 0 && dip->di_aformat != XFS_DINODE_FMT_EXTENTS)
390 xchk_ino_set_corrupt(sc, ino);
391
392 /* di_aformat */
393 if (dip->di_aformat != XFS_DINODE_FMT_LOCAL &&
394 dip->di_aformat != XFS_DINODE_FMT_EXTENTS &&
395 dip->di_aformat != XFS_DINODE_FMT_BTREE)
396 xchk_ino_set_corrupt(sc, ino);
397
398 /* di_anextents */
399 nextents = be16_to_cpu(dip->di_anextents);
400 fork_recs = XFS_DFORK_ASIZE(dip, mp) / sizeof(struct xfs_bmbt_rec);
401 switch (dip->di_aformat) {
402 case XFS_DINODE_FMT_EXTENTS:
403 if (nextents > fork_recs)
404 xchk_ino_set_corrupt(sc, ino);
405 break;
406 case XFS_DINODE_FMT_BTREE:
407 if (nextents <= fork_recs)
408 xchk_ino_set_corrupt(sc, ino);
409 break;
410 default:
411 if (nextents != 0)
412 xchk_ino_set_corrupt(sc, ino);
413 }
414
415 if (dip->di_version >= 3) {
416 if (be32_to_cpu(dip->di_crtime.t_nsec) >= NSEC_PER_SEC)
417 xchk_ino_set_corrupt(sc, ino);
418 xchk_inode_flags2(sc, dip, ino, mode, flags, flags2);
419 xchk_inode_cowextsize(sc, dip, ino, mode, flags,
420 flags2);
421 }
422}
423
424/*
425 * Make sure the finobt doesn't think this inode is free.
426 * We don't have to check the inobt ourselves because we got the inode via
427 * IGET_UNTRUSTED, which checks the inobt for us.
428 */
429static void
430xchk_inode_xref_finobt(
431 struct xfs_scrub *sc,
432 xfs_ino_t ino)
433{
434 struct xfs_inobt_rec_incore rec;
435 xfs_agino_t agino;
436 int has_record;
437 int error;
438
439 if (!sc->sa.fino_cur || xchk_skip_xref(sc->sm))
440 return;
441
442 agino = XFS_INO_TO_AGINO(sc->mp, ino);
443
444 /*
445 * Try to get the finobt record. If we can't get it, then we're
446 * in good shape.
447 */
448 error = xfs_inobt_lookup(sc->sa.fino_cur, agino, XFS_LOOKUP_LE,
449 &has_record);
450 if (!xchk_should_check_xref(sc, &error, &sc->sa.fino_cur) ||
451 !has_record)
452 return;
453
454 error = xfs_inobt_get_rec(sc->sa.fino_cur, &rec, &has_record);
455 if (!xchk_should_check_xref(sc, &error, &sc->sa.fino_cur) ||
456 !has_record)
457 return;
458
459 /*
460 * Otherwise, make sure this record either doesn't cover this inode,
461 * or that it does but it's marked present.
462 */
463 if (rec.ir_startino > agino ||
464 rec.ir_startino + XFS_INODES_PER_CHUNK <= agino)
465 return;
466
467 if (rec.ir_free & XFS_INOBT_MASK(agino - rec.ir_startino))
468 xchk_btree_xref_set_corrupt(sc, sc->sa.fino_cur, 0);
469}
470
471/* Cross reference the inode fields with the forks. */
472STATIC void
473xchk_inode_xref_bmap(
474 struct xfs_scrub *sc,
475 struct xfs_dinode *dip)
476{
477 xfs_extnum_t nextents;
478 xfs_filblks_t count;
479 xfs_filblks_t acount;
480 int error;
481
482 if (xchk_skip_xref(sc->sm))
483 return;
484
485 /* Walk all the extents to check nextents/naextents/nblocks. */
486 error = xfs_bmap_count_blocks(sc->tp, sc->ip, XFS_DATA_FORK,
487 &nextents, &count);
488 if (!xchk_should_check_xref(sc, &error, NULL))
489 return;
490 if (nextents < be32_to_cpu(dip->di_nextents))
491 xchk_ino_xref_set_corrupt(sc, sc->ip->i_ino);
492
493 error = xfs_bmap_count_blocks(sc->tp, sc->ip, XFS_ATTR_FORK,
494 &nextents, &acount);
495 if (!xchk_should_check_xref(sc, &error, NULL))
496 return;
497 if (nextents != be16_to_cpu(dip->di_anextents))
498 xchk_ino_xref_set_corrupt(sc, sc->ip->i_ino);
499
500 /* Check nblocks against the inode. */
501 if (count + acount != be64_to_cpu(dip->di_nblocks))
502 xchk_ino_xref_set_corrupt(sc, sc->ip->i_ino);
503}
504
505/* Cross-reference with the other btrees. */
506STATIC void
507xchk_inode_xref(
508 struct xfs_scrub *sc,
509 xfs_ino_t ino,
510 struct xfs_dinode *dip)
511{
512 struct xfs_owner_info oinfo;
513 xfs_agnumber_t agno;
514 xfs_agblock_t agbno;
515 int error;
516
517 if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
518 return;
519
520 agno = XFS_INO_TO_AGNO(sc->mp, ino);
521 agbno = XFS_INO_TO_AGBNO(sc->mp, ino);
522
523 error = xchk_ag_init(sc, agno, &sc->sa);
524 if (!xchk_xref_process_error(sc, agno, agbno, &error))
525 return;
526
527 xchk_xref_is_used_space(sc, agbno, 1);
528 xchk_inode_xref_finobt(sc, ino);
529 xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_INODES);
530 xchk_xref_is_owned_by(sc, agbno, 1, &oinfo);
531 xchk_xref_is_not_shared(sc, agbno, 1);
532 xchk_inode_xref_bmap(sc, dip);
533
534 xchk_ag_free(sc, &sc->sa);
535}
536
537/*
538 * If the reflink iflag disagrees with a scan for shared data fork extents,
539 * either flag an error (shared extents w/ no flag) or a preen (flag set w/o
540 * any shared extents). We already checked for reflink iflag set on a non
541 * reflink filesystem.
542 */
543static void
544xchk_inode_check_reflink_iflag(
545 struct xfs_scrub *sc,
546 xfs_ino_t ino)
547{
548 struct xfs_mount *mp = sc->mp;
549 bool has_shared;
550 int error;
551
552 if (!xfs_sb_version_hasreflink(&mp->m_sb))
553 return;
554
555 error = xfs_reflink_inode_has_shared_extents(sc->tp, sc->ip,
556 &has_shared);
557 if (!xchk_xref_process_error(sc, XFS_INO_TO_AGNO(mp, ino),
558 XFS_INO_TO_AGBNO(mp, ino), &error))
559 return;
560 if (xfs_is_reflink_inode(sc->ip) && !has_shared)
561 xchk_ino_set_preen(sc, ino);
562 else if (!xfs_is_reflink_inode(sc->ip) && has_shared)
563 xchk_ino_set_corrupt(sc, ino);
564}
565
566/* Scrub an inode. */
567int
568xchk_inode(
569 struct xfs_scrub *sc)
570{
571 struct xfs_dinode di;
572 int error = 0;
573
574 /*
575 * If sc->ip is NULL, that means that the setup function called
576 * xfs_iget to look up the inode. xfs_iget returned a EFSCORRUPTED
577 * and a NULL inode, so flag the corruption error and return.
578 */
579 if (!sc->ip) {
580 xchk_ino_set_corrupt(sc, sc->sm->sm_ino);
581 return 0;
582 }
583
584 /* Scrub the inode core. */
585 xfs_inode_to_disk(sc->ip, &di, 0);
586 xchk_dinode(sc, &di, sc->ip->i_ino);
587 if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
588 goto out;
589
590 /*
591 * Look for discrepancies between file's data blocks and the reflink
592 * iflag. We already checked the iflag against the file mode when
593 * we scrubbed the dinode.
594 */
595 if (S_ISREG(VFS_I(sc->ip)->i_mode))
596 xchk_inode_check_reflink_iflag(sc, sc->ip->i_ino);
597
598 xchk_inode_xref(sc, sc->ip->i_ino, &di);
599out:
600 return error;
601}