Update Linux to v5.4.148
Sourced from [1]
[1] https://cdn.kernel.org/pub/linux/kernel/v5.x/linux-5.4.148.tar.gz
Change-Id: Ib3d26c5ba9b022e2e03533005c4fed4d7c30b61b
Signed-off-by: Olivier Deprez <olivier.deprez@arm.com>
diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c
index 533b04a..436f686 100644
--- a/fs/xfs/libxfs/xfs_alloc.c
+++ b/fs/xfs/libxfs/xfs_alloc.c
@@ -2209,6 +2209,7 @@
new->xefi_startblock = XFS_AGB_TO_FSB(mp, agno, agbno);
new->xefi_blockcount = 1;
new->xefi_oinfo = *oinfo;
+ new->xefi_skip_discard = false;
trace_xfs_agfl_free_defer(mp, agno, 0, agbno, 1);
@@ -2598,6 +2599,13 @@
be32_to_cpu(agf->agf_flcount) <= xfs_agfl_size(mp)))
return __this_address;
+ if (be32_to_cpu(agf->agf_length) > mp->m_sb.sb_dblocks)
+ return __this_address;
+
+ if (be32_to_cpu(agf->agf_freeblks) < be32_to_cpu(agf->agf_longest) ||
+ be32_to_cpu(agf->agf_freeblks) > be32_to_cpu(agf->agf_length))
+ return __this_address;
+
if (be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) < 1 ||
be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]) < 1 ||
be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) > XFS_BTREE_MAXLEVELS ||
@@ -2609,6 +2617,10 @@
be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) > XFS_BTREE_MAXLEVELS))
return __this_address;
+ if (xfs_sb_version_hasrmapbt(&mp->m_sb) &&
+ be32_to_cpu(agf->agf_rmap_blocks) > be32_to_cpu(agf->agf_length))
+ return __this_address;
+
/*
* during growfs operations, the perag is not fully initialised,
* so we can't use it for any useful checking. growfs ensures we can't
@@ -2623,6 +2635,11 @@
return __this_address;
if (xfs_sb_version_hasreflink(&mp->m_sb) &&
+ be32_to_cpu(agf->agf_refcount_blocks) >
+ be32_to_cpu(agf->agf_length))
+ return __this_address;
+
+ if (xfs_sb_version_hasreflink(&mp->m_sb) &&
(be32_to_cpu(agf->agf_refcount_level) < 1 ||
be32_to_cpu(agf->agf_refcount_level) > XFS_BTREE_MAXLEVELS))
return __this_address;
diff --git a/fs/xfs/libxfs/xfs_attr.h b/fs/xfs/libxfs/xfs_attr.h
index 94badfa..91c2cb1 100644
--- a/fs/xfs/libxfs/xfs_attr.h
+++ b/fs/xfs/libxfs/xfs_attr.h
@@ -26,7 +26,7 @@
*========================================================================*/
-#define ATTR_DONTFOLLOW 0x0001 /* -- unused, from IRIX -- */
+#define ATTR_DONTFOLLOW 0x0001 /* -- ignored, from IRIX -- */
#define ATTR_ROOT 0x0002 /* use attrs in root (trusted) namespace */
#define ATTR_TRUST 0x0004 /* -- unused, from IRIX -- */
#define ATTR_SECURE 0x0008 /* use attrs in security namespace */
@@ -37,7 +37,10 @@
#define ATTR_KERNOVAL 0x2000 /* [kernel] get attr size only, not value */
#define ATTR_INCOMPLETE 0x4000 /* [kernel] return INCOMPLETE attr keys */
-#define ATTR_ALLOC 0x8000 /* allocate xattr buffer on demand */
+#define ATTR_ALLOC 0x8000 /* [kernel] allocate xattr buffer on demand */
+
+#define ATTR_KERNEL_FLAGS \
+ (ATTR_KERNOTIME | ATTR_KERNOVAL | ATTR_INCOMPLETE | ATTR_ALLOC)
#define XFS_ATTR_FLAGS \
{ ATTR_DONTFOLLOW, "DONTFOLLOW" }, \
diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c
index f0089e8..de33efc 100644
--- a/fs/xfs/libxfs/xfs_attr_leaf.c
+++ b/fs/xfs/libxfs/xfs_attr_leaf.c
@@ -453,13 +453,15 @@
* special case for dev/uuid inodes, they have fixed size data forks.
*/
int
-xfs_attr_shortform_bytesfit(xfs_inode_t *dp, int bytes)
+xfs_attr_shortform_bytesfit(
+ struct xfs_inode *dp,
+ int bytes)
{
- int offset;
- int minforkoff; /* lower limit on valid forkoff locations */
- int maxforkoff; /* upper limit on valid forkoff locations */
- int dsize;
- xfs_mount_t *mp = dp->i_mount;
+ struct xfs_mount *mp = dp->i_mount;
+ int64_t dsize;
+ int minforkoff;
+ int maxforkoff;
+ int offset;
/* rounded down */
offset = (XFS_LITINO(mp, dp->i_d.di_version) - bytes) >> 3;
@@ -525,7 +527,7 @@
* A data fork btree root must have space for at least
* MINDBTPTRS key/ptr pairs if the data fork is small or empty.
*/
- minforkoff = max(dsize, XFS_BMDR_SPACE_CALC(MINDBTPTRS));
+ minforkoff = max_t(int64_t, dsize, XFS_BMDR_SPACE_CALC(MINDBTPTRS));
minforkoff = roundup(minforkoff, 8) >> 3;
/* attr fork btree root can have at least this many key/ptr pairs */
@@ -583,8 +585,8 @@
ASSERT(ifp->if_flags & XFS_IFINLINE);
}
xfs_idata_realloc(dp, sizeof(*hdr), XFS_ATTR_FORK);
- hdr = (xfs_attr_sf_hdr_t *)ifp->if_u1.if_data;
- hdr->count = 0;
+ hdr = (struct xfs_attr_sf_hdr *)ifp->if_u1.if_data;
+ memset(hdr, 0, sizeof(*hdr));
hdr->totsize = cpu_to_be16(sizeof(*hdr));
xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_ADATA);
}
@@ -924,7 +926,7 @@
char *endp;
struct xfs_ifork *ifp;
int i;
- int size;
+ int64_t size;
ASSERT(ip->i_d.di_aformat == XFS_DINODE_FMT_LOCAL);
ifp = XFS_IFORK_PTR(ip, XFS_ATTR_FORK);
@@ -946,8 +948,10 @@
* struct xfs_attr_sf_entry has a variable length.
* Check the fixed-offset parts of the structure are
* within the data buffer.
+ * xfs_attr_sf_entry is defined with a 1-byte variable
+ * array at the end, so we must subtract that off.
*/
- if (((char *)sfep + sizeof(*sfep)) >= endp)
+ if (((char *)sfep + sizeof(*sfep) - 1) >= endp)
return __this_address;
/* Don't allow names with known bad length. */
@@ -1447,7 +1451,9 @@
for (i = 0; i < XFS_ATTR_LEAF_MAPSIZE; i++) {
if (ichdr->freemap[i].base == tmp) {
ichdr->freemap[i].base += sizeof(xfs_attr_leaf_entry_t);
- ichdr->freemap[i].size -= sizeof(xfs_attr_leaf_entry_t);
+ ichdr->freemap[i].size -=
+ min_t(uint16_t, ichdr->freemap[i].size,
+ sizeof(xfs_attr_leaf_entry_t));
}
}
ichdr->usedbytes += xfs_attr_leaf_entsize(leaf, args->index);
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index 02469d5..c114d24 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -4985,20 +4985,25 @@
flags = XFS_ILOG_CORE;
if (whichfork == XFS_DATA_FORK && XFS_IS_REALTIME_INODE(ip)) {
- xfs_fsblock_t bno;
xfs_filblks_t len;
xfs_extlen_t mod;
- bno = div_u64_rem(del->br_startblock, mp->m_sb.sb_rextsize,
- &mod);
- ASSERT(mod == 0);
len = div_u64_rem(del->br_blockcount, mp->m_sb.sb_rextsize,
&mod);
ASSERT(mod == 0);
- error = xfs_rtfree_extent(tp, bno, (xfs_extlen_t)len);
- if (error)
- goto done;
+ if (!(bflags & XFS_BMAPI_REMAP)) {
+ xfs_fsblock_t bno;
+
+ bno = div_u64_rem(del->br_startblock,
+ mp->m_sb.sb_rextsize, &mod);
+ ASSERT(mod == 0);
+
+ error = xfs_rtfree_extent(tp, bno, (xfs_extlen_t)len);
+ if (error)
+ goto done;
+ }
+
do_fx = 0;
nblks = len * mp->m_sb.sb_rextsize;
qfield = XFS_TRANS_DQ_RTBCOUNT;
@@ -5300,7 +5305,7 @@
* Make sure we don't touch multiple AGF headers out of order
* in a single transaction, as that could cause AB-BA deadlocks.
*/
- if (!wasdel) {
+ if (!wasdel && !isrt) {
agno = XFS_FSB_TO_AGNO(mp, del.br_startblock);
if (prev_agno != NULLAGNUMBER && prev_agno > agno)
break;
@@ -5376,16 +5381,17 @@
}
div_u64_rem(del.br_startblock, mp->m_sb.sb_rextsize, &mod);
if (mod) {
+ xfs_extlen_t off = mp->m_sb.sb_rextsize - mod;
+
/*
* Realtime extent is lined up at the end but not
* at the front. We'll get rid of full extents if
* we can.
*/
- mod = mp->m_sb.sb_rextsize - mod;
- if (del.br_blockcount > mod) {
- del.br_blockcount -= mod;
- del.br_startoff += mod;
- del.br_startblock += mod;
+ if (del.br_blockcount > off) {
+ del.br_blockcount -= off;
+ del.br_startoff += off;
+ del.br_startblock += off;
} else if (del.br_startoff == start &&
(del.br_state == XFS_EXT_UNWRITTEN ||
tp->t_blk_res == 0)) {
@@ -5403,6 +5409,7 @@
continue;
} else if (del.br_state == XFS_EXT_UNWRITTEN) {
struct xfs_bmbt_irec prev;
+ xfs_fileoff_t unwrite_start;
/*
* This one is already unwritten.
@@ -5416,12 +5423,13 @@
ASSERT(!isnullstartblock(prev.br_startblock));
ASSERT(del.br_startblock ==
prev.br_startblock + prev.br_blockcount);
- if (prev.br_startoff < start) {
- mod = start - prev.br_startoff;
- prev.br_blockcount -= mod;
- prev.br_startblock += mod;
- prev.br_startoff = start;
- }
+ unwrite_start = max3(start,
+ del.br_startoff - mod,
+ prev.br_startoff);
+ mod = unwrite_start - prev.br_startoff;
+ prev.br_startoff = unwrite_start;
+ prev.br_startblock += mod;
+ prev.br_blockcount -= mod;
prev.br_state = XFS_EXT_UNWRITTEN;
error = xfs_bmap_add_extent_unwritten_real(tp,
ip, whichfork, &icur, &cur,
@@ -6179,7 +6187,7 @@
isrt = XFS_IS_REALTIME_INODE(ip);
endfsb = irec->br_startblock + irec->br_blockcount - 1;
- if (isrt) {
+ if (isrt && whichfork == XFS_DATA_FORK) {
if (!xfs_verify_rtbno(mp, irec->br_startblock))
return __this_address;
if (!xfs_verify_rtbno(mp, endfsb))
diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h
index e2798c6..093716a 100644
--- a/fs/xfs/libxfs/xfs_bmap.h
+++ b/fs/xfs/libxfs/xfs_bmap.h
@@ -52,9 +52,9 @@
{
xfs_fsblock_t xefi_startblock;/* starting fs block number */
xfs_extlen_t xefi_blockcount;/* number of blocks in extent */
+ bool xefi_skip_discard;
struct list_head xefi_list;
struct xfs_owner_info xefi_oinfo; /* extent owner */
- bool xefi_skip_discard;
};
#define XFS_BMAP_MAX_NMAP 4
diff --git a/fs/xfs/libxfs/xfs_dir2_node.c b/fs/xfs/libxfs/xfs_dir2_node.c
index 705c4f5..99d5b2e 100644
--- a/fs/xfs/libxfs/xfs_dir2_node.c
+++ b/fs/xfs/libxfs/xfs_dir2_node.c
@@ -210,6 +210,7 @@
if (fa) {
xfs_verifier_error(*bpp, -EFSCORRUPTED, fa);
xfs_trans_brelse(tp, *bpp);
+ *bpp = NULL;
return -EFSCORRUPTED;
}
diff --git a/fs/xfs/libxfs/xfs_dir2_sf.c b/fs/xfs/libxfs/xfs_dir2_sf.c
index 85f14fc..ae16ca7 100644
--- a/fs/xfs/libxfs/xfs_dir2_sf.c
+++ b/fs/xfs/libxfs/xfs_dir2_sf.c
@@ -628,7 +628,7 @@
int i;
int i8count;
int offset;
- int size;
+ int64_t size;
int error;
uint8_t filetype;
diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c
index 588d446..443cf33 100644
--- a/fs/xfs/libxfs/xfs_ialloc.c
+++ b/fs/xfs/libxfs/xfs_ialloc.c
@@ -679,7 +679,7 @@
args.minalignslop = igeo->cluster_align - 1;
/* Allow space for the inode btree to split. */
- args.minleft = igeo->inobt_maxlevels - 1;
+ args.minleft = igeo->inobt_maxlevels;
if ((error = xfs_alloc_vextent(&args)))
return error;
@@ -727,7 +727,7 @@
/*
* Allow space for the inode btree to split.
*/
- args.minleft = igeo->inobt_maxlevels - 1;
+ args.minleft = igeo->inobt_maxlevels;
if ((error = xfs_alloc_vextent(&args)))
return error;
}
diff --git a/fs/xfs/libxfs/xfs_iext_tree.c b/fs/xfs/libxfs/xfs_iext_tree.c
index 7bc8740..5245180 100644
--- a/fs/xfs/libxfs/xfs_iext_tree.c
+++ b/fs/xfs/libxfs/xfs_iext_tree.c
@@ -596,7 +596,7 @@
struct xfs_ifork *ifp,
struct xfs_iext_cursor *cur)
{
- size_t new_size = ifp->if_bytes + sizeof(struct xfs_iext_rec);
+ int64_t new_size = ifp->if_bytes + sizeof(struct xfs_iext_rec);
void *new;
/* account for the prev/next pointers */
diff --git a/fs/xfs/libxfs/xfs_inode_fork.c b/fs/xfs/libxfs/xfs_inode_fork.c
index c643bee..8fdd042 100644
--- a/fs/xfs/libxfs/xfs_inode_fork.c
+++ b/fs/xfs/libxfs/xfs_inode_fork.c
@@ -129,7 +129,7 @@
struct xfs_inode *ip,
int whichfork,
const void *data,
- int size)
+ int64_t size)
{
struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork);
int mem_size = size, real_size = 0;
@@ -467,11 +467,11 @@
void
xfs_idata_realloc(
struct xfs_inode *ip,
- int byte_diff,
+ int64_t byte_diff,
int whichfork)
{
struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork);
- int new_size = (int)ifp->if_bytes + byte_diff;
+ int64_t new_size = ifp->if_bytes + byte_diff;
ASSERT(new_size >= 0);
ASSERT(new_size <= XFS_IFORK_SIZE(ip, whichfork));
@@ -552,7 +552,7 @@
struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork);
struct xfs_iext_cursor icur;
struct xfs_bmbt_irec rec;
- int copied = 0;
+ int64_t copied = 0;
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL | XFS_ILOCK_SHARED));
ASSERT(ifp->if_bytes > 0);
diff --git a/fs/xfs/libxfs/xfs_inode_fork.h b/fs/xfs/libxfs/xfs_inode_fork.h
index 00c62ce..7b845c0 100644
--- a/fs/xfs/libxfs/xfs_inode_fork.h
+++ b/fs/xfs/libxfs/xfs_inode_fork.h
@@ -13,16 +13,16 @@
* File incore extent information, present for each of data & attr forks.
*/
struct xfs_ifork {
- int if_bytes; /* bytes in if_u1 */
- unsigned int if_seq; /* fork mod counter */
+ int64_t if_bytes; /* bytes in if_u1 */
struct xfs_btree_block *if_broot; /* file's incore btree root */
- short if_broot_bytes; /* bytes allocated for root */
- unsigned char if_flags; /* per-fork flags */
+ unsigned int if_seq; /* fork mod counter */
int if_height; /* height of the extent tree */
union {
void *if_root; /* extent tree root */
char *if_data; /* inline file data */
} if_u1;
+ short if_broot_bytes; /* bytes allocated for root */
+ unsigned char if_flags; /* per-fork flags */
};
/*
@@ -93,12 +93,14 @@
void xfs_iflush_fork(struct xfs_inode *, struct xfs_dinode *,
struct xfs_inode_log_item *, int);
void xfs_idestroy_fork(struct xfs_inode *, int);
-void xfs_idata_realloc(struct xfs_inode *, int, int);
+void xfs_idata_realloc(struct xfs_inode *ip, int64_t byte_diff,
+ int whichfork);
void xfs_iroot_realloc(struct xfs_inode *, int, int);
int xfs_iread_extents(struct xfs_trans *, struct xfs_inode *, int);
int xfs_iextents_copy(struct xfs_inode *, struct xfs_bmbt_rec *,
int);
-void xfs_init_local_fork(struct xfs_inode *, int, const void *, int);
+void xfs_init_local_fork(struct xfs_inode *ip, int whichfork,
+ const void *data, int64_t size);
xfs_extnum_t xfs_iext_count(struct xfs_ifork *ifp);
void xfs_iext_insert(struct xfs_inode *, struct xfs_iext_cursor *cur,
diff --git a/fs/xfs/libxfs/xfs_rmap.c b/fs/xfs/libxfs/xfs_rmap.c
index 38e9414..9d3c67b 100644
--- a/fs/xfs/libxfs/xfs_rmap.c
+++ b/fs/xfs/libxfs/xfs_rmap.c
@@ -1379,7 +1379,7 @@
* record for our insertion point. This will also give us the record for
* start block contiguity tests.
*/
- error = xfs_rmap_lookup_le_range(cur, bno, owner, offset, flags,
+ error = xfs_rmap_lookup_le_range(cur, bno, owner, offset, oldext,
&PREV, &i);
if (error)
goto done;
diff --git a/fs/xfs/libxfs/xfs_rtbitmap.c b/fs/xfs/libxfs/xfs_rtbitmap.c
index 8ea1efc..42085e7 100644
--- a/fs/xfs/libxfs/xfs_rtbitmap.c
+++ b/fs/xfs/libxfs/xfs_rtbitmap.c
@@ -1018,7 +1018,6 @@
struct xfs_mount *mp = tp->t_mountp;
xfs_rtblock_t rtstart;
xfs_rtblock_t rtend;
- xfs_rtblock_t rem;
int is_free;
int error = 0;
@@ -1027,13 +1026,12 @@
if (low_rec->ar_startext >= mp->m_sb.sb_rextents ||
low_rec->ar_startext == high_rec->ar_startext)
return 0;
- if (high_rec->ar_startext > mp->m_sb.sb_rextents)
- high_rec->ar_startext = mp->m_sb.sb_rextents;
+ high_rec->ar_startext = min(high_rec->ar_startext,
+ mp->m_sb.sb_rextents - 1);
/* Iterate the bitmap, looking for discrepancies. */
rtstart = low_rec->ar_startext;
- rem = high_rec->ar_startext - rtstart;
- while (rem) {
+ while (rtstart <= high_rec->ar_startext) {
/* Is the first block free? */
error = xfs_rtcheck_range(mp, tp, rtstart, 1, 1, &rtend,
&is_free);
@@ -1042,7 +1040,7 @@
/* How long does the extent go for? */
error = xfs_rtfind_forw(mp, tp, rtstart,
- high_rec->ar_startext - 1, &rtend);
+ high_rec->ar_startext, &rtend);
if (error)
break;
@@ -1055,7 +1053,6 @@
break;
}
- rem -= rtend - rtstart + 1;
rtstart = rtend + 1;
}
diff --git a/fs/xfs/libxfs/xfs_trans_inode.c b/fs/xfs/libxfs/xfs_trans_inode.c
index a9ad909..0ba7368 100644
--- a/fs/xfs/libxfs/xfs_trans_inode.c
+++ b/fs/xfs/libxfs/xfs_trans_inode.c
@@ -36,6 +36,7 @@
ASSERT(iip->ili_lock_flags == 0);
iip->ili_lock_flags = lock_flags;
+ ASSERT(!xfs_iflags_test(ip, XFS_ISTALE));
/*
* Get a log_item_desc to point at the new item.
@@ -91,6 +92,7 @@
ASSERT(ip->i_itemp != NULL);
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
+ ASSERT(!xfs_iflags_test(ip, XFS_ISTALE));
/*
* Don't bother with i_lock for the I_DIRTY_TIME check here, as races
@@ -98,9 +100,9 @@
* to log the timestamps, or will clear already cleared fields in the
* worst case.
*/
- if (inode->i_state & (I_DIRTY_TIME | I_DIRTY_TIME_EXPIRED)) {
+ if (inode->i_state & I_DIRTY_TIME) {
spin_lock(&inode->i_lock);
- inode->i_state &= ~(I_DIRTY_TIME | I_DIRTY_TIME_EXPIRED);
+ inode->i_state &= ~I_DIRTY_TIME;
spin_unlock(&inode->i_lock);
}
diff --git a/fs/xfs/libxfs/xfs_trans_resv.c b/fs/xfs/libxfs/xfs_trans_resv.c
index d12bbd5..b3584cd 100644
--- a/fs/xfs/libxfs/xfs_trans_resv.c
+++ b/fs/xfs/libxfs/xfs_trans_resv.c
@@ -197,6 +197,24 @@
}
/*
+ * Per-extent log reservation for the btree changes involved in freeing or
+ * allocating a realtime extent. We have to be able to log as many rtbitmap
+ * blocks as needed to mark inuse MAXEXTLEN blocks' worth of realtime extents,
+ * as well as the realtime summary block.
+ */
+unsigned int
+xfs_rtalloc_log_count(
+ struct xfs_mount *mp,
+ unsigned int num_ops)
+{
+ unsigned int blksz = XFS_FSB_TO_B(mp, 1);
+ unsigned int rtbmp_bytes;
+
+ rtbmp_bytes = (MAXEXTLEN / mp->m_sb.sb_rextsize) / NBBY;
+ return (howmany(rtbmp_bytes, blksz) + 1) * num_ops;
+}
+
+/*
* Various log reservation values.
*
* These are based on the size of the file system block because that is what
@@ -218,13 +236,21 @@
/*
* In a write transaction we can allocate a maximum of 2
- * extents. This gives:
+ * extents. This gives (t1):
* the inode getting the new extents: inode size
* the inode's bmap btree: max depth * block size
* the agfs of the ags from which the extents are allocated: 2 * sector
* the superblock free block counter: sector size
* the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size
- * And the bmap_finish transaction can free bmap blocks in a join:
+ * Or, if we're writing to a realtime file (t2):
+ * the inode getting the new extents: inode size
+ * the inode's bmap btree: max depth * block size
+ * the agfs of the ags from which the extents are allocated: 2 * sector
+ * the superblock free block counter: sector size
+ * the realtime bitmap: ((MAXEXTLEN / rtextsize) / NBBY) bytes
+ * the realtime summary: 1 block
+ * the allocation btrees: 2 trees * (2 * max depth - 1) * block size
+ * And the bmap_finish transaction can free bmap blocks in a join (t3):
* the agfs of the ags containing the blocks: 2 * sector size
* the agfls of the ags containing the blocks: 2 * sector size
* the super block free block counter: sector size
@@ -234,40 +260,72 @@
xfs_calc_write_reservation(
struct xfs_mount *mp)
{
- return XFS_DQUOT_LOGRES(mp) +
- max((xfs_calc_inode_res(mp, 1) +
+ unsigned int t1, t2, t3;
+ unsigned int blksz = XFS_FSB_TO_B(mp, 1);
+
+ t1 = xfs_calc_inode_res(mp, 1) +
+ xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK), blksz) +
+ xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) +
+ xfs_calc_buf_res(xfs_allocfree_log_count(mp, 2), blksz);
+
+ if (xfs_sb_version_hasrealtime(&mp->m_sb)) {
+ t2 = xfs_calc_inode_res(mp, 1) +
xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK),
- XFS_FSB_TO_B(mp, 1)) +
+ blksz) +
xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) +
- xfs_calc_buf_res(xfs_allocfree_log_count(mp, 2),
- XFS_FSB_TO_B(mp, 1))),
- (xfs_calc_buf_res(5, mp->m_sb.sb_sectsize) +
- xfs_calc_buf_res(xfs_allocfree_log_count(mp, 2),
- XFS_FSB_TO_B(mp, 1))));
+ xfs_calc_buf_res(xfs_rtalloc_log_count(mp, 1), blksz) +
+ xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1), blksz);
+ } else {
+ t2 = 0;
+ }
+
+ t3 = xfs_calc_buf_res(5, mp->m_sb.sb_sectsize) +
+ xfs_calc_buf_res(xfs_allocfree_log_count(mp, 2), blksz);
+
+ return XFS_DQUOT_LOGRES(mp) + max3(t1, t2, t3);
}
/*
- * In truncating a file we free up to two extents at once. We can modify:
+ * In truncating a file we free up to two extents at once. We can modify (t1):
* the inode being truncated: inode size
* the inode's bmap btree: (max depth + 1) * block size
- * And the bmap_finish transaction can free the blocks and bmap blocks:
+ * And the bmap_finish transaction can free the blocks and bmap blocks (t2):
* the agf for each of the ags: 4 * sector size
* the agfl for each of the ags: 4 * sector size
* the super block to reflect the freed blocks: sector size
* worst case split in allocation btrees per extent assuming 4 extents:
* 4 exts * 2 trees * (2 * max depth - 1) * block size
+ * Or, if it's a realtime file (t3):
+ * the agf for each of the ags: 2 * sector size
+ * the agfl for each of the ags: 2 * sector size
+ * the super block to reflect the freed blocks: sector size
+ * the realtime bitmap: 2 exts * ((MAXEXTLEN / rtextsize) / NBBY) bytes
+ * the realtime summary: 2 exts * 1 block
+ * worst case split in allocation btrees per extent assuming 2 extents:
+ * 2 exts * 2 trees * (2 * max depth - 1) * block size
*/
STATIC uint
xfs_calc_itruncate_reservation(
struct xfs_mount *mp)
{
- return XFS_DQUOT_LOGRES(mp) +
- max((xfs_calc_inode_res(mp, 1) +
- xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) + 1,
- XFS_FSB_TO_B(mp, 1))),
- (xfs_calc_buf_res(9, mp->m_sb.sb_sectsize) +
- xfs_calc_buf_res(xfs_allocfree_log_count(mp, 4),
- XFS_FSB_TO_B(mp, 1))));
+ unsigned int t1, t2, t3;
+ unsigned int blksz = XFS_FSB_TO_B(mp, 1);
+
+ t1 = xfs_calc_inode_res(mp, 1) +
+ xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) + 1, blksz);
+
+ t2 = xfs_calc_buf_res(9, mp->m_sb.sb_sectsize) +
+ xfs_calc_buf_res(xfs_allocfree_log_count(mp, 4), blksz);
+
+ if (xfs_sb_version_hasrealtime(&mp->m_sb)) {
+ t3 = xfs_calc_buf_res(5, mp->m_sb.sb_sectsize) +
+ xfs_calc_buf_res(xfs_rtalloc_log_count(mp, 2), blksz) +
+ xfs_calc_buf_res(xfs_allocfree_log_count(mp, 2), blksz);
+ } else {
+ t3 = 0;
+ }
+
+ return XFS_DQUOT_LOGRES(mp) + max3(t1, t2, t3);
}
/*
diff --git a/fs/xfs/libxfs/xfs_trans_space.h b/fs/xfs/libxfs/xfs_trans_space.h
index 88221c7..7ad3659 100644
--- a/fs/xfs/libxfs/xfs_trans_space.h
+++ b/fs/xfs/libxfs/xfs_trans_space.h
@@ -57,8 +57,8 @@
XFS_DAREMOVE_SPACE_RES(mp, XFS_DATA_FORK)
#define XFS_IALLOC_SPACE_RES(mp) \
(M_IGEO(mp)->ialloc_blks + \
- (xfs_sb_version_hasfinobt(&mp->m_sb) ? 2 : 1 * \
- (M_IGEO(mp)->inobt_maxlevels - 1)))
+ ((xfs_sb_version_hasfinobt(&mp->m_sb) ? 2 : 1) * \
+ M_IGEO(mp)->inobt_maxlevels))
/*
* Space reservation values for various transactions.
diff --git a/fs/xfs/scrub/bmap.c b/fs/xfs/scrub/bmap.c
index fa6ea64..52892f4 100644
--- a/fs/xfs/scrub/bmap.c
+++ b/fs/xfs/scrub/bmap.c
@@ -45,9 +45,27 @@
*/
if (S_ISREG(VFS_I(sc->ip)->i_mode) &&
sc->sm->sm_type == XFS_SCRUB_TYPE_BMBTD) {
+ struct address_space *mapping = VFS_I(sc->ip)->i_mapping;
+
inode_dio_wait(VFS_I(sc->ip));
- error = filemap_write_and_wait(VFS_I(sc->ip)->i_mapping);
- if (error)
+
+ /*
+ * Try to flush all incore state to disk before we examine the
+ * space mappings for the data fork. Leave accumulated errors
+ * in the mapping for the writer threads to consume.
+ *
+ * On ENOSPC or EIO writeback errors, we continue into the
+ * extent mapping checks because write failures do not
+ * necessarily imply anything about the correctness of the file
+ * metadata. The metadata and the file data could be on
+ * completely separate devices; a media failure might only
+ * affect a subset of the disk, etc. We can handle delalloc
+ * extents in the scrubber, so leaving them in memory is fine.
+ */
+ error = filemap_fdatawrite(mapping);
+ if (!error)
+ error = filemap_fdatawait_keep_errors(mapping);
+ if (error && (error != -ENOSPC && error != -EIO))
goto out;
}
@@ -95,6 +113,8 @@
if (info->whichfork == XFS_ATTR_FORK)
rflags |= XFS_RMAP_ATTR_FORK;
+ if (irec->br_state == XFS_EXT_UNWRITTEN)
+ rflags |= XFS_RMAP_UNWRITTEN;
/*
* CoW staging extents are owned (on disk) by the refcountbt, so
@@ -198,13 +218,13 @@
* which doesn't track unwritten state.
*/
if (owner != XFS_RMAP_OWN_COW &&
- irec->br_state == XFS_EXT_UNWRITTEN &&
- !(rmap.rm_flags & XFS_RMAP_UNWRITTEN))
+ !!(irec->br_state == XFS_EXT_UNWRITTEN) !=
+ !!(rmap.rm_flags & XFS_RMAP_UNWRITTEN))
xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
irec->br_startoff);
- if (info->whichfork == XFS_ATTR_FORK &&
- !(rmap.rm_flags & XFS_RMAP_ATTR_FORK))
+ if (!!(info->whichfork == XFS_ATTR_FORK) !=
+ !!(rmap.rm_flags & XFS_RMAP_ATTR_FORK))
xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
irec->br_startoff);
if (rmap.rm_flags & XFS_RMAP_BMBT_BLOCK)
diff --git a/fs/xfs/scrub/btree.c b/fs/xfs/scrub/btree.c
index f52a7b8..debf392 100644
--- a/fs/xfs/scrub/btree.c
+++ b/fs/xfs/scrub/btree.c
@@ -452,32 +452,41 @@
int level,
struct xfs_btree_block *block)
{
- unsigned int numrecs;
- int ok_level;
-
- numrecs = be16_to_cpu(block->bb_numrecs);
+ struct xfs_btree_cur *cur = bs->cur;
+ unsigned int root_level = cur->bc_nlevels - 1;
+ unsigned int numrecs = be16_to_cpu(block->bb_numrecs);
/* More records than minrecs means the block is ok. */
- if (numrecs >= bs->cur->bc_ops->get_minrecs(bs->cur, level))
+ if (numrecs >= cur->bc_ops->get_minrecs(cur, level))
return;
/*
- * Certain btree blocks /can/ have fewer than minrecs records. Any
- * level greater than or equal to the level of the highest dedicated
- * btree block are allowed to violate this constraint.
- *
- * For a btree rooted in a block, the btree root can have fewer than
- * minrecs records. If the btree is rooted in an inode and does not
- * store records in the root, the direct children of the root and the
- * root itself can have fewer than minrecs records.
+ * For btrees rooted in the inode, it's possible that the root block
+ * contents spilled into a regular ondisk block because there wasn't
+ * enough space in the inode root. The number of records in that
+ * child block might be less than the standard minrecs, but that's ok
+ * provided that there's only one direct child of the root.
*/
- ok_level = bs->cur->bc_nlevels - 1;
- if (bs->cur->bc_flags & XFS_BTREE_ROOT_IN_INODE)
- ok_level--;
- if (level >= ok_level)
- return;
+ if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
+ level == cur->bc_nlevels - 2) {
+ struct xfs_btree_block *root_block;
+ struct xfs_buf *root_bp;
+ int root_maxrecs;
- xchk_btree_set_corrupt(bs->sc, bs->cur, level);
+ root_block = xfs_btree_get_block(cur, root_level, &root_bp);
+ root_maxrecs = cur->bc_ops->get_dmaxrecs(cur, root_level);
+ if (be16_to_cpu(root_block->bb_numrecs) != 1 ||
+ numrecs <= root_maxrecs)
+ xchk_btree_set_corrupt(bs->sc, cur, level);
+ return;
+ }
+
+ /*
+ * Otherwise, only the root level is allowed to have fewer than minrecs
+ * records or keyptrs.
+ */
+ if (level < root_level)
+ xchk_btree_set_corrupt(bs->sc, cur, level);
}
/*
diff --git a/fs/xfs/scrub/common.h b/fs/xfs/scrub/common.h
index 003a772..2e50d14 100644
--- a/fs/xfs/scrub/common.h
+++ b/fs/xfs/scrub/common.h
@@ -14,8 +14,15 @@
static inline bool
xchk_should_terminate(
struct xfs_scrub *sc,
- int *error)
+ int *error)
{
+ /*
+ * If preemption is disabled, we need to yield to the scheduler every
+ * few seconds so that we don't run afoul of the soft lockup watchdog
+ * or RCU stall detector.
+ */
+ cond_resched();
+
if (fatal_signal_pending(current)) {
if (*error == 0)
*error = -EAGAIN;
diff --git a/fs/xfs/scrub/dir.c b/fs/xfs/scrub/dir.c
index 1e2e117..20eca2d 100644
--- a/fs/xfs/scrub/dir.c
+++ b/fs/xfs/scrub/dir.c
@@ -152,6 +152,9 @@
xname.type = XFS_DIR3_FT_UNKNOWN;
error = xfs_dir_lookup(sdc->sc->tp, ip, &xname, &lookup_ino, NULL);
+ /* ENOENT means the hash lookup failed and the dir is corrupt */
+ if (error == -ENOENT)
+ error = -EFSCORRUPTED;
if (!xchk_fblock_process_error(sdc->sc, XFS_DATA_FORK, offset,
&error))
goto out;
diff --git a/fs/xfs/scrub/inode.c b/fs/xfs/scrub/inode.c
index 6d483ab..1bea029 100644
--- a/fs/xfs/scrub/inode.c
+++ b/fs/xfs/scrub/inode.c
@@ -121,8 +121,7 @@
goto bad;
/* rt flags require rt device */
- if ((flags & (XFS_DIFLAG_REALTIME | XFS_DIFLAG_RTINHERIT)) &&
- !mp->m_rtdev_targp)
+ if ((flags & XFS_DIFLAG_REALTIME) && !mp->m_rtdev_targp)
goto bad;
/* new rt bitmap flag only valid for rbmino */
diff --git a/fs/xfs/scrub/refcount.c b/fs/xfs/scrub/refcount.c
index 0cab11a..5c6b71b 100644
--- a/fs/xfs/scrub/refcount.c
+++ b/fs/xfs/scrub/refcount.c
@@ -170,7 +170,6 @@
*/
INIT_LIST_HEAD(&worklist);
rbno = NULLAGBLOCK;
- nr = 1;
/* Make sure the fragments actually /are/ in agbno order. */
bno = 0;
@@ -184,15 +183,14 @@
* Find all the rmaps that start at or before the refc extent,
* and put them on the worklist.
*/
+ nr = 0;
list_for_each_entry_safe(frag, n, &refchk->fragments, list) {
- if (frag->rm.rm_startblock > refchk->bno)
- goto done;
+ if (frag->rm.rm_startblock > refchk->bno || nr > target_nr)
+ break;
bno = frag->rm.rm_startblock + frag->rm.rm_blockcount;
if (bno < rbno)
rbno = bno;
list_move_tail(&frag->list, &worklist);
- if (nr == target_nr)
- break;
nr++;
}
diff --git a/fs/xfs/scrub/scrub.c b/fs/xfs/scrub/scrub.c
index 15c8c5f..720bef5 100644
--- a/fs/xfs/scrub/scrub.c
+++ b/fs/xfs/scrub/scrub.c
@@ -167,6 +167,7 @@
xfs_irele(sc->ip);
sc->ip = NULL;
}
+ sb_end_write(sc->mp->m_super);
if (sc->flags & XCHK_REAPING_DISABLED)
xchk_start_reaping(sc);
if (sc->flags & XCHK_HAS_QUOTAOFFLOCK) {
@@ -489,6 +490,14 @@
sc.ops = &meta_scrub_ops[sm->sm_type];
sc.sick_mask = xchk_health_mask_for_scrub_type(sm->sm_type);
retry_op:
+ /*
+ * If freeze runs concurrently with a scrub, the freeze can be delayed
+ * indefinitely as we walk the filesystem and iterate over metadata
+ * buffers. Freeze quiesces the log (which waits for the buffer LRU to
+ * be emptied) and that won't happen while checking is running.
+ */
+ sb_start_write(mp->m_super);
+
/* Set up for the operation. */
error = sc.ops->setup(&sc, ip);
if (error)
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index 4f44370..d6d78e1 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -1039,6 +1039,7 @@
goto out_unlock;
}
+/* Caller must first wait for the completion of any pending DIOs if required. */
int
xfs_flush_unmap_range(
struct xfs_inode *ip,
@@ -1050,9 +1051,6 @@
xfs_off_t rounding, start, end;
int error;
- /* wait for the completion of any pending DIOs */
- inode_dio_wait(inode);
-
rounding = max_t(xfs_off_t, 1 << mp->m_sb.sb_blocklog, PAGE_SIZE);
start = round_down(offset, rounding);
end = round_up(offset + len, rounding) - 1;
@@ -1084,10 +1082,6 @@
if (len <= 0) /* if nothing being freed */
return 0;
- error = xfs_flush_unmap_range(ip, offset, len);
- if (error)
- return error;
-
startoffset_fsb = XFS_B_TO_FSB(mp, offset);
endoffset_fsb = XFS_B_TO_FSBT(mp, offset + len);
@@ -1760,7 +1754,7 @@
if (xfs_inode_has_cow_data(tip)) {
error = xfs_reflink_cancel_cow_range(tip, 0, NULLFILEOFF, true);
if (error)
- return error;
+ goto out_unlock;
}
/*
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 0abba17..1264ac6 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -1162,8 +1162,10 @@
bp->b_ops->verify_read(bp);
}
- if (!bp->b_error)
+ if (!bp->b_error) {
+ bp->b_flags &= ~XBF_WRITE_FAIL;
bp->b_flags |= XBF_DONE;
+ }
if (bp->b_iodone)
(*(bp->b_iodone))(bp);
@@ -1223,7 +1225,7 @@
bp->b_flags |= XBF_WRITE;
bp->b_flags &= ~(XBF_ASYNC | XBF_READ | _XBF_DELWRI_Q |
- XBF_WRITE_FAIL | XBF_DONE);
+ XBF_DONE);
error = xfs_buf_submit(bp);
if (error)
@@ -1929,7 +1931,7 @@
* synchronously. Otherwise, drop the buffer from the delwri
* queue and submit async.
*/
- bp->b_flags &= ~(_XBF_DELWRI_Q | XBF_WRITE_FAIL);
+ bp->b_flags &= ~_XBF_DELWRI_Q;
bp->b_flags |= XBF_WRITE;
if (wait_list) {
bp->b_flags &= ~XBF_ASYNC;
diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c
index aeb95e7..3cbf248 100644
--- a/fs/xfs/xfs_dquot.c
+++ b/fs/xfs/xfs_dquot.c
@@ -1116,13 +1116,12 @@
dqb = bp->b_addr + dqp->q_bufoffset;
ddqp = &dqb->dd_diskdq;
- /*
- * A simple sanity check in case we got a corrupted dquot.
- */
- fa = xfs_dqblk_verify(mp, dqb, be32_to_cpu(ddqp->d_id), 0);
+ /* sanity check the in-core structure before we flush */
+ fa = xfs_dquot_verify(mp, &dqp->q_core, be32_to_cpu(dqp->q_core.d_id),
+ 0);
if (fa) {
xfs_alert(mp, "corrupt dquot ID 0x%x in memory at %pS",
- be32_to_cpu(ddqp->d_id), fa);
+ be32_to_cpu(dqp->q_core.d_id), fa);
xfs_buf_relse(bp);
xfs_dqfunlock(dqp);
xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 1ffb179..203065a 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -818,6 +818,36 @@
if (error)
goto out_unlock;
+ /*
+ * Must wait for all AIO to complete before we continue as AIO can
+ * change the file size on completion without holding any locks we
+ * currently hold. We must do this first because AIO can update both
+ * the on disk and in memory inode sizes, and the operations that follow
+ * require the in-memory size to be fully up-to-date.
+ */
+ inode_dio_wait(inode);
+
+ /*
+ * Now AIO and DIO has drained we flush and (if necessary) invalidate
+ * the cached range over the first operation we are about to run.
+ *
+ * We care about zero and collapse here because they both run a hole
+ * punch over the range first. Because that can zero data, and the range
+ * of invalidation for the shift operations is much larger, we still do
+ * the required flush for collapse in xfs_prepare_shift().
+ *
+ * Insert has the same range requirements as collapse, and we extend the
+ * file first which can zero data. Hence insert has the same
+ * flush/invalidate requirements as collapse and so they are both
+ * handled at the right time by xfs_prepare_shift().
+ */
+ if (mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_ZERO_RANGE |
+ FALLOC_FL_COLLAPSE_RANGE)) {
+ error = xfs_flush_unmap_range(ip, offset, len);
+ if (error)
+ goto out_unlock;
+ }
+
if (mode & FALLOC_FL_PUNCH_HOLE) {
error = xfs_free_file_space(ip, offset, len);
if (error)
@@ -1172,6 +1202,14 @@
return ret;
}
+static inline bool
+xfs_is_write_fault(
+ struct vm_fault *vmf)
+{
+ return (vmf->flags & FAULT_FLAG_WRITE) &&
+ (vmf->vma->vm_flags & VM_SHARED);
+}
+
static vm_fault_t
xfs_filemap_fault(
struct vm_fault *vmf)
@@ -1179,7 +1217,7 @@
/* DAX can shortcut the normal fault path on write faults! */
return __xfs_filemap_fault(vmf, PE_SIZE_PTE,
IS_DAX(file_inode(vmf->vma->vm_file)) &&
- (vmf->flags & FAULT_FLAG_WRITE));
+ xfs_is_write_fault(vmf));
}
static vm_fault_t
@@ -1192,7 +1230,7 @@
/* DAX can shortcut the normal fault path on write faults! */
return __xfs_filemap_fault(vmf, pe_size,
- (vmf->flags & FAULT_FLAG_WRITE));
+ xfs_is_write_fault(vmf));
}
static vm_fault_t
diff --git a/fs/xfs/xfs_fsmap.c b/fs/xfs/xfs_fsmap.c
index d082143..01c0933 100644
--- a/fs/xfs/xfs_fsmap.c
+++ b/fs/xfs/xfs_fsmap.c
@@ -26,7 +26,7 @@
#include "xfs_rtalloc.h"
/* Convert an xfs_fsmap to an fsmap. */
-void
+static void
xfs_fsmap_from_internal(
struct fsmap *dest,
struct xfs_fsmap *src)
@@ -154,8 +154,7 @@
/* getfsmap query state */
struct xfs_getfsmap_info {
struct xfs_fsmap_head *head;
- xfs_fsmap_format_t formatter; /* formatting fn */
- void *format_arg; /* format buffer */
+ struct fsmap *fsmap_recs; /* mapping records */
struct xfs_buf *agf_bp; /* AGF, for refcount queries */
xfs_daddr_t next_daddr; /* next daddr we expect */
u64 missing_owner; /* owner of holes */
@@ -223,6 +222,20 @@
return 0;
}
+static inline void
+xfs_getfsmap_format(
+ struct xfs_mount *mp,
+ struct xfs_fsmap *xfm,
+ struct xfs_getfsmap_info *info)
+{
+ struct fsmap *rec;
+
+ trace_xfs_getfsmap_mapping(mp, xfm);
+
+ rec = &info->fsmap_recs[info->head->fmh_entries++];
+ xfs_fsmap_from_internal(rec, xfm);
+}
+
/*
* Format a reverse mapping for getfsmap, having translated rm_startblock
* into the appropriate daddr units.
@@ -255,6 +268,9 @@
/* Are we just counting mappings? */
if (info->head->fmh_count == 0) {
+ if (info->head->fmh_entries == UINT_MAX)
+ return -ECANCELED;
+
if (rec_daddr > info->next_daddr)
info->head->fmh_entries++;
@@ -284,10 +300,7 @@
fmr.fmr_offset = 0;
fmr.fmr_length = rec_daddr - info->next_daddr;
fmr.fmr_flags = FMR_OF_SPECIAL_OWNER;
- error = info->formatter(&fmr, info->format_arg);
- if (error)
- return error;
- info->head->fmh_entries++;
+ xfs_getfsmap_format(mp, &fmr, info);
}
if (info->last)
@@ -319,11 +332,8 @@
if (shared)
fmr.fmr_flags |= FMR_OF_SHARED;
}
- error = info->formatter(&fmr, info->format_arg);
- if (error)
- return error;
- info->head->fmh_entries++;
+ xfs_getfsmap_format(mp, &fmr, info);
out:
rec_daddr += XFS_FSB_TO_BB(mp, rec->rm_blockcount);
if (info->next_daddr < rec_daddr)
@@ -791,11 +801,11 @@
#endif /* CONFIG_XFS_RT */
/*
- * Get filesystem's extents as described in head, and format for
- * output. Calls formatter to fill the user's buffer until all
- * extents are mapped, until the passed-in head->fmh_count slots have
- * been filled, or until the formatter short-circuits the loop, if it
- * is tracking filled-in extents on its own.
+ * Get filesystem's extents as described in head, and format for output. Fills
+ * in the supplied records array until there are no more reverse mappings to
+ * return or head.fmh_entries == head.fmh_count. In the second case, this
+ * function returns -ECANCELED to indicate that more records would have been
+ * returned.
*
* Key to Confusion
* ----------------
@@ -815,8 +825,7 @@
xfs_getfsmap(
struct xfs_mount *mp,
struct xfs_fsmap_head *head,
- xfs_fsmap_format_t formatter,
- void *arg)
+ struct fsmap *fsmap_recs)
{
struct xfs_trans *tp = NULL;
struct xfs_fsmap dkeys[2]; /* per-dev keys */
@@ -891,10 +900,17 @@
info.next_daddr = head->fmh_keys[0].fmr_physical +
head->fmh_keys[0].fmr_length;
- info.formatter = formatter;
- info.format_arg = arg;
+ info.fsmap_recs = fsmap_recs;
info.head = head;
+ /*
+ * If fsmap runs concurrently with a scrub, the freeze can be delayed
+ * indefinitely as we walk the rmapbt and iterate over metadata
+ * buffers. Freeze quiesces the log (which waits for the buffer LRU to
+ * be emptied) and that won't happen while we're reading buffers.
+ */
+ sb_start_write(mp->m_super);
+
/* For each device we support... */
for (i = 0; i < XFS_GETFSMAP_DEVS; i++) {
/* Is this device within the range the user asked for? */
@@ -934,6 +950,7 @@
if (tp)
xfs_trans_cancel(tp);
+ sb_end_write(mp->m_super);
head->fmh_oflags = FMH_OF_DEV_T;
return error;
}
diff --git a/fs/xfs/xfs_fsmap.h b/fs/xfs/xfs_fsmap.h
index c6c5773..a077578 100644
--- a/fs/xfs/xfs_fsmap.h
+++ b/fs/xfs/xfs_fsmap.h
@@ -27,13 +27,9 @@
struct xfs_fsmap fmh_keys[2]; /* low and high keys */
};
-void xfs_fsmap_from_internal(struct fsmap *dest, struct xfs_fsmap *src);
void xfs_fsmap_to_internal(struct xfs_fsmap *dest, struct fsmap *src);
-/* fsmap to userspace formatter - copy to user & advance pointer */
-typedef int (*xfs_fsmap_format_t)(struct xfs_fsmap *, void *);
-
int xfs_getfsmap(struct xfs_mount *mp, struct xfs_fsmap_head *head,
- xfs_fsmap_format_t formatter, void *arg);
+ struct fsmap *out_recs);
#endif /* __XFS_FSMAP_H__ */
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
index 944add5..a1135b8 100644
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -907,7 +907,12 @@
{
struct xfs_mount *mp = container_of(to_delayed_work(work),
struct xfs_mount, m_eofblocks_work);
+
+ if (!sb_start_write_trylock(mp->m_super))
+ return;
xfs_icache_free_eofblocks(mp, NULL);
+ sb_end_write(mp->m_super);
+
xfs_queue_eofblocks(mp);
}
@@ -934,7 +939,12 @@
{
struct xfs_mount *mp = container_of(to_delayed_work(work),
struct xfs_mount, m_cowblocks_work);
+
+ if (!sb_start_write_trylock(mp->m_super))
+ return;
xfs_icache_free_cowblocks(mp, NULL);
+ sb_end_write(mp->m_super);
+
xfs_queue_cowblocks(mp);
}
@@ -1122,7 +1132,7 @@
goto out_ifunlock;
xfs_iunpin_wait(ip);
}
- if (xfs_iflags_test(ip, XFS_ISTALE) || xfs_inode_clean(ip)) {
+ if (xfs_inode_clean(ip)) {
xfs_ifunlock(ip);
goto reclaim;
}
@@ -1209,6 +1219,7 @@
xfs_ilock(ip, XFS_ILOCK_EXCL);
xfs_qm_dqdetach(ip);
xfs_iunlock(ip, XFS_ILOCK_EXCL);
+ ASSERT(xfs_inode_clean(ip));
__xfs_inode_free(ip);
return error;
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 18f4b26..b339ff9 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1761,10 +1761,31 @@
return error;
}
+ /*
+ * We do not hold the inode locked across the entire rolling transaction
+ * here. We only need to hold it for the first transaction that
+ * xfs_ifree() builds, which may mark the inode XFS_ISTALE if the
+ * underlying cluster buffer is freed. Relogging an XFS_ISTALE inode
+ * here breaks the relationship between cluster buffer invalidation and
+ * stale inode invalidation on cluster buffer item journal commit
+ * completion, and can result in leaving dirty stale inodes hanging
+ * around in memory.
+ *
+ * We have no need for serialising this inode operation against other
+ * operations - we freed the inode and hence reallocation is required
+ * and that will serialise on reallocating the space the deferops need
+ * to free. Hence we can unlock the inode on the first commit of
+ * the transaction rather than roll it right through the deferops. This
+ * avoids relogging the XFS_ISTALE inode.
+ *
+ * We check that xfs_ifree() hasn't grown an internal transaction roll
+ * by asserting that the inode is still locked when it returns.
+ */
xfs_ilock(ip, XFS_ILOCK_EXCL);
- xfs_trans_ijoin(tp, ip, 0);
+ xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
error = xfs_ifree(tp, ip);
+ ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
if (error) {
/*
* If we fail to free the inode, shut down. The cancel
@@ -1777,7 +1798,6 @@
xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR);
}
xfs_trans_cancel(tp);
- xfs_iunlock(ip, XFS_ILOCK_EXCL);
return error;
}
@@ -1795,7 +1815,6 @@
xfs_notice(mp, "%s: xfs_trans_commit returned error %d",
__func__, error);
- xfs_iunlock(ip, XFS_ILOCK_EXCL);
return 0;
}
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index d58f0d6..b3021d9 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -536,6 +536,8 @@
error = 0;
for (i = 0; i < am_hreq.opcount; i++) {
+ ops[i].am_flags &= ~ATTR_KERNEL_FLAGS;
+
ops[i].am_error = strncpy_from_user((char *)attr_name,
ops[i].am_attrname, MAXNAMELEN);
if (ops[i].am_error == 0 || ops[i].am_error == MAXNAMELEN)
@@ -665,6 +667,31 @@
goto out_unlock;
}
+ /*
+ * Must wait for all AIO to complete before we continue as AIO can
+ * change the file size on completion without holding any locks we
+ * currently hold. We must do this first because AIO can update both
+ * the on disk and in memory inode sizes, and the operations that follow
+ * require the in-memory size to be fully up-to-date.
+ */
+ inode_dio_wait(inode);
+
+ /*
+ * Now that AIO and DIO has drained we can flush and (if necessary)
+ * invalidate the cached range over the first operation we are about to
+ * run. We include zero range here because it starts with a hole punch
+ * over the target range.
+ */
+ switch (cmd) {
+ case XFS_IOC_ZERO_RANGE:
+ case XFS_IOC_UNRESVSP:
+ case XFS_IOC_UNRESVSP64:
+ error = xfs_flush_unmap_range(ip, bf->l_start, bf->l_len);
+ if (error)
+ goto out_unlock;
+ break;
+ }
+
switch (cmd) {
case XFS_IOC_ZERO_RANGE:
flags |= XFS_PREALLOC_SET;
@@ -1829,39 +1856,17 @@
return error;
}
-struct getfsmap_info {
- struct xfs_mount *mp;
- struct fsmap_head __user *data;
- unsigned int idx;
- __u32 last_flags;
-};
-
-STATIC int
-xfs_getfsmap_format(struct xfs_fsmap *xfm, void *priv)
-{
- struct getfsmap_info *info = priv;
- struct fsmap fm;
-
- trace_xfs_getfsmap_mapping(info->mp, xfm);
-
- info->last_flags = xfm->fmr_flags;
- xfs_fsmap_from_internal(&fm, xfm);
- if (copy_to_user(&info->data->fmh_recs[info->idx++], &fm,
- sizeof(struct fsmap)))
- return -EFAULT;
-
- return 0;
-}
-
STATIC int
xfs_ioc_getfsmap(
struct xfs_inode *ip,
struct fsmap_head __user *arg)
{
- struct getfsmap_info info = { NULL };
struct xfs_fsmap_head xhead = {0};
struct fsmap_head head;
- bool aborted = false;
+ struct fsmap *recs;
+ unsigned int count;
+ __u32 last_flags = 0;
+ bool done = false;
int error;
if (copy_from_user(&head, arg, sizeof(struct fsmap_head)))
@@ -1873,38 +1878,112 @@
sizeof(head.fmh_keys[1].fmr_reserved)))
return -EINVAL;
+ /*
+ * Use an internal memory buffer so that we don't have to copy fsmap
+ * data to userspace while holding locks. Start by trying to allocate
+ * up to 128k for the buffer, but fall back to a single page if needed.
+ */
+ count = min_t(unsigned int, head.fmh_count,
+ 131072 / sizeof(struct fsmap));
+ recs = kvzalloc(count * sizeof(struct fsmap), GFP_KERNEL);
+ if (!recs) {
+ count = min_t(unsigned int, head.fmh_count,
+ PAGE_SIZE / sizeof(struct fsmap));
+ recs = kvzalloc(count * sizeof(struct fsmap), GFP_KERNEL);
+ if (!recs)
+ return -ENOMEM;
+ }
+
xhead.fmh_iflags = head.fmh_iflags;
- xhead.fmh_count = head.fmh_count;
xfs_fsmap_to_internal(&xhead.fmh_keys[0], &head.fmh_keys[0]);
xfs_fsmap_to_internal(&xhead.fmh_keys[1], &head.fmh_keys[1]);
trace_xfs_getfsmap_low_key(ip->i_mount, &xhead.fmh_keys[0]);
trace_xfs_getfsmap_high_key(ip->i_mount, &xhead.fmh_keys[1]);
- info.mp = ip->i_mount;
- info.data = arg;
- error = xfs_getfsmap(ip->i_mount, &xhead, xfs_getfsmap_format, &info);
- if (error == -ECANCELED) {
- error = 0;
- aborted = true;
- } else if (error)
- return error;
+ head.fmh_entries = 0;
+ do {
+ struct fsmap __user *user_recs;
+ struct fsmap *last_rec;
- /* If we didn't abort, set the "last" flag in the last fmx */
- if (!aborted && info.idx) {
- info.last_flags |= FMR_OF_LAST;
- if (copy_to_user(&info.data->fmh_recs[info.idx - 1].fmr_flags,
- &info.last_flags, sizeof(info.last_flags)))
- return -EFAULT;
+ user_recs = &arg->fmh_recs[head.fmh_entries];
+ xhead.fmh_entries = 0;
+ xhead.fmh_count = min_t(unsigned int, count,
+ head.fmh_count - head.fmh_entries);
+
+ /* Run query, record how many entries we got. */
+ error = xfs_getfsmap(ip->i_mount, &xhead, recs);
+ switch (error) {
+ case 0:
+ /*
+ * There are no more records in the result set. Copy
+ * whatever we got to userspace and break out.
+ */
+ done = true;
+ break;
+ case -ECANCELED:
+ /*
+ * The internal memory buffer is full. Copy whatever
+ * records we got to userspace and go again if we have
+ * not yet filled the userspace buffer.
+ */
+ error = 0;
+ break;
+ default:
+ goto out_free;
+ }
+ head.fmh_entries += xhead.fmh_entries;
+ head.fmh_oflags = xhead.fmh_oflags;
+
+ /*
+ * If the caller wanted a record count or there aren't any
+ * new records to return, we're done.
+ */
+ if (head.fmh_count == 0 || xhead.fmh_entries == 0)
+ break;
+
+ /* Copy all the records we got out to userspace. */
+ if (copy_to_user(user_recs, recs,
+ xhead.fmh_entries * sizeof(struct fsmap))) {
+ error = -EFAULT;
+ goto out_free;
+ }
+
+ /* Remember the last record flags we copied to userspace. */
+ last_rec = &recs[xhead.fmh_entries - 1];
+ last_flags = last_rec->fmr_flags;
+
+ /* Set up the low key for the next iteration. */
+ xfs_fsmap_to_internal(&xhead.fmh_keys[0], last_rec);
+ trace_xfs_getfsmap_low_key(ip->i_mount, &xhead.fmh_keys[0]);
+ } while (!done && head.fmh_entries < head.fmh_count);
+
+ /*
+ * If there are no more records in the query result set and we're not
+ * in counting mode, mark the last record returned with the LAST flag.
+ */
+ if (done && head.fmh_count > 0 && head.fmh_entries > 0) {
+ struct fsmap __user *user_rec;
+
+ last_flags |= FMR_OF_LAST;
+ user_rec = &arg->fmh_recs[head.fmh_entries - 1];
+
+ if (copy_to_user(&user_rec->fmr_flags, &last_flags,
+ sizeof(last_flags))) {
+ error = -EFAULT;
+ goto out_free;
+ }
}
/* copy back header */
- head.fmh_entries = xhead.fmh_entries;
- head.fmh_oflags = xhead.fmh_oflags;
- if (copy_to_user(arg, &head, sizeof(struct fsmap_head)))
- return -EFAULT;
+ if (copy_to_user(arg, &head, sizeof(struct fsmap_head))) {
+ error = -EFAULT;
+ goto out_free;
+ }
- return 0;
+out_free:
+ kmem_free(recs);
+ return error;
}
STATIC int
@@ -2399,7 +2478,10 @@
if (error)
return error;
- return xfs_icache_free_eofblocks(mp, &keofb);
+ sb_start_write(mp->m_super);
+ error = xfs_icache_free_eofblocks(mp, &keofb);
+ sb_end_write(mp->m_super);
+ return error;
}
default:
diff --git a/fs/xfs/xfs_ioctl32.c b/fs/xfs/xfs_ioctl32.c
index 1e08bf7..e61cc41 100644
--- a/fs/xfs/xfs_ioctl32.c
+++ b/fs/xfs/xfs_ioctl32.c
@@ -450,6 +450,8 @@
error = 0;
for (i = 0; i < am_hreq.opcount; i++) {
+ ops[i].am_flags &= ~ATTR_KERNEL_FLAGS;
+
ops[i].am_error = strncpy_from_user((char *)attr_name,
compat_ptr(ops[i].am_attrname),
MAXNAMELEN);
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index f780e22..239c954 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -1002,9 +1002,15 @@
* I/O, which must be block aligned, we need to report the
* newly allocated address. If the data fork has a hole, copy
* the COW fork mapping to avoid allocating to the data fork.
+ *
+ * Otherwise, ensure that the imap range does not extend past
+ * the range allocated/found in cmap.
*/
if (directio || imap.br_startblock == HOLESTARTBLOCK)
imap = cmap;
+ else
+ xfs_trim_extent(&imap, cmap.br_startoff,
+ cmap.br_blockcount);
end_fsb = imap.br_startoff + imap.br_blockcount;
length = XFS_FSB_TO_B(mp, end_fsb) - offset;
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index fe285d1..ca8c763 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -839,7 +839,7 @@
ASSERT(xfs_isilocked(ip, XFS_MMAPLOCK_EXCL));
ASSERT(S_ISREG(inode->i_mode));
ASSERT((iattr->ia_valid & (ATTR_UID|ATTR_GID|ATTR_ATIME|ATTR_ATIME_SET|
- ATTR_MTIME_SET|ATTR_KILL_PRIV|ATTR_TIMES_SET)) == 0);
+ ATTR_MTIME_SET|ATTR_TIMES_SET)) == 0);
oldsize = inode->i_size;
newsize = iattr->ia_size;
@@ -885,6 +885,16 @@
error = iomap_zero_range(inode, oldsize, newsize - oldsize,
&did_zeroing, &xfs_iomap_ops);
} else {
+ /*
+ * iomap won't detect a dirty page over an unwritten block (or a
+ * cow block over a hole) and subsequently skips zeroing the
+ * newly post-EOF portion of the page. Flush the new EOF to
+ * convert the block before the pagecache truncate.
+ */
+ error = filemap_write_and_wait_range(inode->i_mapping, newsize,
+ newsize);
+ if (error)
+ return error;
error = iomap_truncate_page(inode, newsize, &did_zeroing,
&xfs_iomap_ops);
}
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 641d07f..7b0d9ad 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -1495,6 +1495,8 @@
prev_iclog = iclog->ic_next;
kmem_free(iclog->ic_data);
kmem_free(iclog);
+ if (prev_iclog == log->l_iclog)
+ break;
}
out_free_log:
kmem_free(log);
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index ba5b6f3..5a0ce0c 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -195,21 +195,26 @@
}
pag = kmem_zalloc(sizeof(*pag), KM_MAYFAIL);
- if (!pag)
+ if (!pag) {
+ error = -ENOMEM;
goto out_unwind_new_pags;
+ }
pag->pag_agno = index;
pag->pag_mount = mp;
spin_lock_init(&pag->pag_ici_lock);
mutex_init(&pag->pag_ici_reclaim_lock);
INIT_RADIX_TREE(&pag->pag_ici_root, GFP_ATOMIC);
- if (xfs_buf_hash_init(pag))
+
+ error = xfs_buf_hash_init(pag);
+ if (error)
goto out_free_pag;
init_waitqueue_head(&pag->pagb_wait);
spin_lock_init(&pag->pagb_lock);
pag->pagb_count = 0;
pag->pagb_tree = RB_ROOT;
- if (radix_tree_preload(GFP_NOFS))
+ error = radix_tree_preload(GFP_NOFS);
+ if (error)
goto out_hash_destroy;
spin_lock(&mp->m_perag_lock);
diff --git a/fs/xfs/xfs_pnfs.c b/fs/xfs/xfs_pnfs.c
index a339bd5..f63fe8d 100644
--- a/fs/xfs/xfs_pnfs.c
+++ b/fs/xfs/xfs_pnfs.c
@@ -134,7 +134,7 @@
goto out_unlock;
error = invalidate_inode_pages2(inode->i_mapping);
if (WARN_ON_ONCE(error))
- return error;
+ goto out_unlock;
end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + length);
offset_fsb = XFS_B_TO_FSBT(mp, offset);
diff --git a/fs/xfs/xfs_quotaops.c b/fs/xfs/xfs_quotaops.c
index cd6c721..c7de17d 100644
--- a/fs/xfs/xfs_quotaops.c
+++ b/fs/xfs/xfs_quotaops.c
@@ -201,6 +201,9 @@
if (XFS_IS_QUOTA_ON(mp))
return -EINVAL;
+ if (uflags & ~(FS_USER_QUOTA | FS_GROUP_QUOTA | FS_PROJ_QUOTA))
+ return -EINVAL;
+
if (uflags & FS_USER_QUOTA)
flags |= XFS_DQ_USER;
if (uflags & FS_GROUP_QUOTA)
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index 0f08153..904d828 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -1005,6 +1005,7 @@
xfs_filblks_t rlen;
xfs_filblks_t unmap_len;
xfs_off_t newlen;
+ int64_t qres;
int error;
unmap_len = irec->br_startoff + irec->br_blockcount - destoff;
@@ -1027,13 +1028,19 @@
xfs_ilock(ip, XFS_ILOCK_EXCL);
xfs_trans_ijoin(tp, ip, 0);
- /* If we're not just clearing space, then do we have enough quota? */
- if (real_extent) {
- error = xfs_trans_reserve_quota_nblks(tp, ip,
- irec->br_blockcount, 0, XFS_QMOPT_RES_REGBLKS);
- if (error)
- goto out_cancel;
- }
+ /*
+ * Reserve quota for this operation. We don't know if the first unmap
+ * in the dest file will cause a bmap btree split, so we always reserve
+ * at least enough blocks for that split. If the extent being mapped
+ * in is written, we need to reserve quota for that too.
+ */
+ qres = XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK);
+ if (real_extent)
+ qres += irec->br_blockcount;
+ error = xfs_trans_reserve_quota_nblks(tp, ip, qres, 0,
+ XFS_QMOPT_RES_REGBLKS);
+ if (error)
+ goto out_cancel;
trace_xfs_reflink_remap(ip, irec->br_startoff,
irec->br_blockcount, irec->br_startblock);
@@ -1053,6 +1060,7 @@
uirec.br_startblock = irec->br_startblock + rlen;
uirec.br_startoff = irec->br_startoff + rlen;
uirec.br_blockcount = unmap_len - rlen;
+ uirec.br_state = irec->br_state;
unmap_len = rlen;
/* If this isn't a real mapping, we're done. */
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
index 4a48a8c..6d5ddc4 100644
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -247,6 +247,9 @@
end = XFS_BLOCKTOBIT(mp, bbno + 1) - 1;
i <= end;
i++) {
+ /* Make sure we don't scan off the end of the rt volume. */
+ maxlen = min(mp->m_sb.sb_rextents, i + maxlen) - i;
+
/*
* See if there's a free extent of maxlen starting at i.
* If it's not so then next will contain the first non-free.
@@ -442,6 +445,14 @@
*/
if (bno >= mp->m_sb.sb_rextents)
bno = mp->m_sb.sb_rextents - 1;
+
+ /* Make sure we don't run off the end of the rt volume. */
+ maxlen = min(mp->m_sb.sb_rextents, bno + maxlen) - bno;
+ if (maxlen < minlen) {
+ *rtblock = NULLRTBLOCK;
+ return 0;
+ }
+
/*
* Try the exact allocation first.
*/
@@ -1010,10 +1021,13 @@
xfs_ilock(mp->m_rbmip, XFS_ILOCK_EXCL);
xfs_trans_ijoin(tp, mp->m_rbmip, XFS_ILOCK_EXCL);
/*
- * Update the bitmap inode's size.
+ * Update the bitmap inode's size ondisk and incore. We need
+ * to update the incore size so that inode inactivation won't
+ * punch what it thinks are "posteof" blocks.
*/
mp->m_rbmip->i_d.di_size =
nsbp->sb_rbmblocks * nsbp->sb_blocksize;
+ i_size_write(VFS_I(mp->m_rbmip), mp->m_rbmip->i_d.di_size);
xfs_trans_log_inode(tp, mp->m_rbmip, XFS_ILOG_CORE);
/*
* Get the summary inode into the transaction.
@@ -1021,9 +1035,12 @@
xfs_ilock(mp->m_rsumip, XFS_ILOCK_EXCL);
xfs_trans_ijoin(tp, mp->m_rsumip, XFS_ILOCK_EXCL);
/*
- * Update the summary inode's size.
+ * Update the summary inode's size. We need to update the
+ * incore size so that inode inactivation won't punch what it
+ * thinks are "posteof" blocks.
*/
mp->m_rsumip->i_d.di_size = nmp->m_rsumsize;
+ i_size_write(VFS_I(mp->m_rsumip), mp->m_rsumip->i_d.di_size);
xfs_trans_log_inode(tp, mp->m_rsumip, XFS_ILOG_CORE);
/*
* Copy summary data from old to new sizes.
diff --git a/fs/xfs/xfs_sysfs.h b/fs/xfs/xfs_sysfs.h
index e9f810f..4358585 100644
--- a/fs/xfs/xfs_sysfs.h
+++ b/fs/xfs/xfs_sysfs.h
@@ -32,9 +32,11 @@
struct xfs_kobj *parent_kobj,
const char *name)
{
+ struct kobject *parent;
+
+ parent = parent_kobj ? &parent_kobj->kobject : NULL;
init_completion(&kobj->complete);
- return kobject_init_and_add(&kobj->kobject, ktype,
- &parent_kobj->kobject, "%s", name);
+ return kobject_init_and_add(&kobj->kobject, ktype, parent, "%s", name);
}
static inline void
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index f4795fd..b32a664 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -306,6 +306,11 @@
*
* Note the zero-length reservation; this transaction MUST be cancelled
* without any dirty data.
+ *
+ * Callers should obtain freeze protection to avoid two conflicts with fs
+ * freezing: (1) having active transactions trip the m_active_trans ASSERTs;
+ * and (2) grabbing buffers at the same time that freeze is trying to drain
+ * the buffer LRU list.
*/
int
xfs_trans_alloc_empty(
diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c
index 6ccfd75..812108f 100644
--- a/fs/xfs/xfs_trans_ail.c
+++ b/fs/xfs/xfs_trans_ail.c
@@ -529,8 +529,9 @@
{
struct xfs_ail *ailp = data;
long tout = 0; /* milliseconds */
+ unsigned int noreclaim_flag;
- current->flags |= PF_MEMALLOC;
+ noreclaim_flag = memalloc_noreclaim_save();
set_freezable();
while (1) {
@@ -601,6 +602,7 @@
tout = xfsaild_push(ailp);
}
+ memalloc_noreclaim_restore(noreclaim_flag);
return 0;
}
diff --git a/fs/xfs/xfs_trans_dquot.c b/fs/xfs/xfs_trans_dquot.c
index 1645746..904780d 100644
--- a/fs/xfs/xfs_trans_dquot.c
+++ b/fs/xfs/xfs_trans_dquot.c
@@ -646,7 +646,7 @@
}
}
if (ninos > 0) {
- total_count = be64_to_cpu(dqp->q_core.d_icount) + ninos;
+ total_count = dqp->q_res_icount + ninos;
timer = be32_to_cpu(dqp->q_core.d_itimer);
warns = be16_to_cpu(dqp->q_core.d_iwarns);
warnlimit = dqp->q_mount->m_quotainfo->qi_iwarnlimit;