Blame - fs/xfs/xfs_iomap.c - hafnium/third_party/linux

blob: 6320aca39f39415257f3bbb9b0313dbc26284861 [file] [log] [blame]

Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1	// SPDX-License-Identifier: GPL-2.0
				2	/*
				3	* Copyright (c) 2000-2006 Silicon Graphics, Inc.
				4	* Copyright (c) 2016-2018 Christoph Hellwig.
				5	* All Rights Reserved.
				6	*/
				7	#include <linux/iomap.h>
				8	#include "xfs.h"
				9	#include "xfs_fs.h"
				10	#include "xfs_shared.h"
				11	#include "xfs_format.h"
				12	#include "xfs_log_format.h"
				13	#include "xfs_trans_resv.h"
				14	#include "xfs_mount.h"
				15	#include "xfs_defer.h"
				16	#include "xfs_inode.h"
				17	#include "xfs_btree.h"
				18	#include "xfs_bmap_btree.h"
				19	#include "xfs_bmap.h"
				20	#include "xfs_bmap_util.h"
				21	#include "xfs_errortag.h"
				22	#include "xfs_error.h"
				23	#include "xfs_trans.h"
				24	#include "xfs_trans_space.h"
				25	#include "xfs_inode_item.h"
				26	#include "xfs_iomap.h"
				27	#include "xfs_trace.h"
				28	#include "xfs_icache.h"
				29	#include "xfs_quota.h"
				30	#include "xfs_dquot_item.h"
				31	#include "xfs_dquot.h"
				32	#include "xfs_reflink.h"
				33
				34
				35	#define XFS_WRITEIO_ALIGN(mp,off) (((off) >> mp->m_writeio_log) \
				36	<< mp->m_writeio_log)
				37
				38	void
				39	xfs_bmbt_to_iomap(
				40	struct xfs_inode *ip,
				41	struct iomap *iomap,
				42	struct xfs_bmbt_irec *imap)
				43	{
				44	struct xfs_mount *mp = ip->i_mount;
				45
				46	if (imap->br_startblock == HOLESTARTBLOCK) {
				47	iomap->addr = IOMAP_NULL_ADDR;
				48	iomap->type = IOMAP_HOLE;
				49	} else if (imap->br_startblock == DELAYSTARTBLOCK) {
				50	iomap->addr = IOMAP_NULL_ADDR;
				51	iomap->type = IOMAP_DELALLOC;
				52	} else {
				53	iomap->addr = BBTOB(xfs_fsb_to_db(ip, imap->br_startblock));
				54	if (imap->br_state == XFS_EXT_UNWRITTEN)
				55	iomap->type = IOMAP_UNWRITTEN;
				56	else
				57	iomap->type = IOMAP_MAPPED;
				58	}
				59	iomap->offset = XFS_FSB_TO_B(mp, imap->br_startoff);
				60	iomap->length = XFS_FSB_TO_B(mp, imap->br_blockcount);
				61	iomap->bdev = xfs_find_bdev_for_inode(VFS_I(ip));
				62	iomap->dax_dev = xfs_find_daxdev_for_inode(VFS_I(ip));
				63	}
				64
				65	xfs_extlen_t
				66	xfs_eof_alignment(
				67	struct xfs_inode *ip,
				68	xfs_extlen_t extsize)
				69	{
				70	struct xfs_mount *mp = ip->i_mount;
				71	xfs_extlen_t align = 0;
				72
				73	if (!XFS_IS_REALTIME_INODE(ip)) {
				74	/*
				75	* Round up the allocation request to a stripe unit
				76	* (m_dalign) boundary if the file size is >= stripe unit
				77	* size, and we are allocating past the allocation eof.
				78	*
				79	* If mounted with the "-o swalloc" option the alignment is
				80	* increased from the strip unit size to the stripe width.
				81	*/
				82	if (mp->m_swidth && (mp->m_flags & XFS_MOUNT_SWALLOC))
				83	align = mp->m_swidth;
				84	else if (mp->m_dalign)
				85	align = mp->m_dalign;
				86
				87	if (align && XFS_ISIZE(ip) < XFS_FSB_TO_B(mp, align))
				88	align = 0;
				89	}
				90
				91	/*
				92	* Always round up the allocation request to an extent boundary
				93	* (when file on a real-time subvolume or has di_extsize hint).
				94	*/
				95	if (extsize) {
				96	if (align)
				97	align = roundup_64(align, extsize);
				98	else
				99	align = extsize;
				100	}
				101
				102	return align;
				103	}
				104
				105	STATIC int
				106	xfs_iomap_eof_align_last_fsb(
				107	struct xfs_inode *ip,
				108	xfs_extlen_t extsize,
				109	xfs_fileoff_t *last_fsb)
				110	{
				111	xfs_extlen_t align = xfs_eof_alignment(ip, extsize);
				112
				113	if (align) {
				114	xfs_fileoff_t new_last_fsb = roundup_64(*last_fsb, align);
				115	int eof, error;
				116
				117	error = xfs_bmap_eof(ip, new_last_fsb, XFS_DATA_FORK, &eof);
				118	if (error)
				119	return error;
				120	if (eof)
				121	*last_fsb = new_last_fsb;
				122	}
				123	return 0;
				124	}
				125
				126	STATIC int
				127	xfs_alert_fsblock_zero(
				128	xfs_inode_t *ip,
				129	xfs_bmbt_irec_t *imap)
				130	{
				131	xfs_alert_tag(ip->i_mount, XFS_PTAG_FSBLOCK_ZERO,
				132	"Access to block zero in inode %llu "
				133	"start_block: %llx start_off: %llx "
				134	"blkcnt: %llx extent-state: %x",
				135	(unsigned long long)ip->i_ino,
				136	(unsigned long long)imap->br_startblock,
				137	(unsigned long long)imap->br_startoff,
				138	(unsigned long long)imap->br_blockcount,
				139	imap->br_state);
				140	return -EFSCORRUPTED;
				141	}
				142
				143	int
				144	xfs_iomap_write_direct(
				145	xfs_inode_t *ip,
				146	xfs_off_t offset,
				147	size_t count,
				148	xfs_bmbt_irec_t *imap,
				149	int nmaps)
				150	{
				151	xfs_mount_t *mp = ip->i_mount;
				152	xfs_fileoff_t offset_fsb;
				153	xfs_fileoff_t last_fsb;
				154	xfs_filblks_t count_fsb, resaligned;
				155	xfs_extlen_t extsz;
				156	int nimaps;
				157	int quota_flag;
				158	int rt;
				159	xfs_trans_t *tp;
				160	uint qblocks, resblks, resrtextents;
				161	int error;
				162	int lockmode;
				163	int bmapi_flags = XFS_BMAPI_PREALLOC;
				164	uint tflags = 0;
				165
				166	rt = XFS_IS_REALTIME_INODE(ip);
				167	extsz = xfs_get_extsz_hint(ip);
				168	lockmode = XFS_ILOCK_SHARED; /* locked by caller */
				169
				170	ASSERT(xfs_isilocked(ip, lockmode));
				171
				172	offset_fsb = XFS_B_TO_FSBT(mp, offset);
				173	last_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)(offset + count)));
				174	if ((offset + count) > XFS_ISIZE(ip)) {
				175	/*
				176	* Assert that the in-core extent list is present since this can
				177	* call xfs_iread_extents() and we only have the ilock shared.
				178	* This should be safe because the lock was held around a bmapi
				179	* call in the caller and we only need it to access the in-core
				180	* list.
				181	*/
				182	ASSERT(XFS_IFORK_PTR(ip, XFS_DATA_FORK)->if_flags &
				183	XFS_IFEXTENTS);
				184	error = xfs_iomap_eof_align_last_fsb(ip, extsz, &last_fsb);
				185	if (error)
				186	goto out_unlock;
				187	} else {
				188	if (nmaps && (imap->br_startblock == HOLESTARTBLOCK))
				189	last_fsb = min(last_fsb, (xfs_fileoff_t)
				190	imap->br_blockcount +
				191	imap->br_startoff);
				192	}
				193	count_fsb = last_fsb - offset_fsb;
				194	ASSERT(count_fsb > 0);
				195	resaligned = xfs_aligned_fsb_count(offset_fsb, count_fsb, extsz);
				196
				197	if (unlikely(rt)) {
				198	resrtextents = qblocks = resaligned;
				199	resrtextents /= mp->m_sb.sb_rextsize;
				200	resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0);
				201	quota_flag = XFS_QMOPT_RES_RTBLKS;
				202	} else {
				203	resrtextents = 0;
				204	resblks = qblocks = XFS_DIOSTRAT_SPACE_RES(mp, resaligned);
				205	quota_flag = XFS_QMOPT_RES_REGBLKS;
				206	}
				207
				208	/*
				209	* Drop the shared lock acquired by the caller, attach the dquot if
				210	* necessary and move on to transaction setup.
				211	*/
				212	xfs_iunlock(ip, lockmode);
				213	error = xfs_qm_dqattach(ip);
				214	if (error)
				215	return error;
				216
				217	/*
				218	* For DAX, we do not allocate unwritten extents, but instead we zero
				219	* the block before we commit the transaction. Ideally we'd like to do
				220	* this outside the transaction context, but if we commit and then crash
				221	* we may not have zeroed the blocks and this will be exposed on
				222	* recovery of the allocation. Hence we must zero before commit.
				223	*
				224	* Further, if we are mapping unwritten extents here, we need to zero
				225	* and convert them to written so that we don't need an unwritten extent
				226	* callback for DAX. This also means that we need to be able to dip into
				227	* the reserve block pool for bmbt block allocation if there is no space
				228	* left but we need to do unwritten extent conversion.
				229	*/
				230	if (IS_DAX(VFS_I(ip))) {
				231	bmapi_flags = XFS_BMAPI_CONVERT \| XFS_BMAPI_ZERO;
				232	if (imap->br_state == XFS_EXT_UNWRITTEN) {
				233	tflags \|= XFS_TRANS_RESERVE;
				234	resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0) << 1;
				235	}
				236	}
				237	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, resrtextents,
				238	tflags, &tp);
				239	if (error)
				240	return error;
				241
				242	lockmode = XFS_ILOCK_EXCL;
				243	xfs_ilock(ip, lockmode);
				244
				245	error = xfs_trans_reserve_quota_nblks(tp, ip, qblocks, 0, quota_flag);
				246	if (error)
				247	goto out_trans_cancel;
				248
				249	xfs_trans_ijoin(tp, ip, 0);
				250
				251	/*
				252	* From this point onwards we overwrite the imap pointer that the
				253	* caller gave to us.
				254	*/
				255	nimaps = 1;
				256	error = xfs_bmapi_write(tp, ip, offset_fsb, count_fsb,
				257	bmapi_flags, resblks, imap, &nimaps);
				258	if (error)
				259	goto out_res_cancel;
				260
				261	/*
				262	* Complete the transaction
				263	*/
				264	error = xfs_trans_commit(tp);
				265	if (error)
				266	goto out_unlock;
				267
				268	/*
				269	* Copy any maps to caller's array and return any error.
				270	*/
				271	if (nimaps == 0) {
				272	error = -ENOSPC;
				273	goto out_unlock;
				274	}
				275
				276	if (!(imap->br_startblock \|\| XFS_IS_REALTIME_INODE(ip)))
				277	error = xfs_alert_fsblock_zero(ip, imap);
				278
				279	out_unlock:
				280	xfs_iunlock(ip, lockmode);
				281	return error;
				282
				283	out_res_cancel:
				284	xfs_trans_unreserve_quota_nblks(tp, ip, (long)qblocks, 0, quota_flag);
				285	out_trans_cancel:
				286	xfs_trans_cancel(tp);
				287	goto out_unlock;
				288	}
				289
				290	STATIC bool
				291	xfs_quota_need_throttle(
				292	struct xfs_inode *ip,
				293	int type,
				294	xfs_fsblock_t alloc_blocks)
				295	{
				296	struct xfs_dquot *dq = xfs_inode_dquot(ip, type);
				297
				298	if (!dq \|\| !xfs_this_quota_on(ip->i_mount, type))
				299	return false;
				300
				301	/* no hi watermark, no throttle */
				302	if (!dq->q_prealloc_hi_wmark)
				303	return false;
				304
				305	/* under the lo watermark, no throttle */
				306	if (dq->q_res_bcount + alloc_blocks < dq->q_prealloc_lo_wmark)
				307	return false;
				308
				309	return true;
				310	}
				311
				312	STATIC void
				313	xfs_quota_calc_throttle(
				314	struct xfs_inode *ip,
				315	int type,
				316	xfs_fsblock_t *qblocks,
				317	int *qshift,
				318	int64_t *qfreesp)
				319	{
				320	int64_t freesp;
				321	int shift = 0;
				322	struct xfs_dquot *dq = xfs_inode_dquot(ip, type);
				323
				324	/* no dq, or over hi wmark, squash the prealloc completely */
				325	if (!dq \|\| dq->q_res_bcount >= dq->q_prealloc_hi_wmark) {
				326	*qblocks = 0;
				327	*qfreesp = 0;
				328	return;
				329	}
				330
				331	freesp = dq->q_prealloc_hi_wmark - dq->q_res_bcount;
				332	if (freesp < dq->q_low_space[XFS_QLOWSP_5_PCNT]) {
				333	shift = 2;
				334	if (freesp < dq->q_low_space[XFS_QLOWSP_3_PCNT])
				335	shift += 2;
				336	if (freesp < dq->q_low_space[XFS_QLOWSP_1_PCNT])
				337	shift += 2;
				338	}
				339
				340	if (freesp < *qfreesp)
				341	*qfreesp = freesp;
				342
				343	/* only overwrite the throttle values if we are more aggressive */
				344	if ((freesp >> shift) < (qblocks >> qshift)) {
				345	*qblocks = freesp;
				346	*qshift = shift;
				347	}
				348	}
				349
				350	/*
				351	* If we are doing a write at the end of the file and there are no allocations
				352	* past this one, then extend the allocation out to the file system's write
				353	* iosize.
				354	*
				355	* If we don't have a user specified preallocation size, dynamically increase
				356	* the preallocation size as the size of the file grows. Cap the maximum size
				357	* at a single extent or less if the filesystem is near full. The closer the
				358	* filesystem is to full, the smaller the maximum prealocation.
				359	*
				360	* As an exception we don't do any preallocation at all if the file is smaller
				361	* than the minimum preallocation and we are using the default dynamic
				362	* preallocation scheme, as it is likely this is the only write to the file that
				363	* is going to be done.
				364	*
				365	* We clean up any extra space left over when the file is closed in
				366	* xfs_inactive().
				367	*/
				368	STATIC xfs_fsblock_t
				369	xfs_iomap_prealloc_size(
				370	struct xfs_inode *ip,
				371	loff_t offset,
				372	loff_t count,
				373	struct xfs_iext_cursor *icur)
				374	{
				375	struct xfs_mount *mp = ip->i_mount;
				376	struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
				377	xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset);
				378	struct xfs_bmbt_irec prev;
				379	int shift = 0;
				380	int64_t freesp;
				381	xfs_fsblock_t qblocks;
				382	int qshift = 0;
				383	xfs_fsblock_t alloc_blocks = 0;
				384
				385	if (offset + count <= XFS_ISIZE(ip))
				386	return 0;
				387
				388	if (!(mp->m_flags & XFS_MOUNT_DFLT_IOSIZE) &&
				389	(XFS_ISIZE(ip) < XFS_FSB_TO_B(mp, mp->m_writeio_blocks)))
				390	return 0;
				391
				392	/*
				393	* If an explicit allocsize is set, the file is small, or we
				394	* are writing behind a hole, then use the minimum prealloc:
				395	*/
				396	if ((mp->m_flags & XFS_MOUNT_DFLT_IOSIZE) \|\|
				397	XFS_ISIZE(ip) < XFS_FSB_TO_B(mp, mp->m_dalign) \|\|
				398	!xfs_iext_peek_prev_extent(ifp, icur, &prev) \|\|
				399	prev.br_startoff + prev.br_blockcount < offset_fsb)
				400	return mp->m_writeio_blocks;
				401
				402	/*
				403	* Determine the initial size of the preallocation. We are beyond the
				404	* current EOF here, but we need to take into account whether this is
				405	* a sparse write or an extending write when determining the
				406	* preallocation size. Hence we need to look up the extent that ends
				407	* at the current write offset and use the result to determine the
				408	* preallocation size.
				409	*
				410	* If the extent is a hole, then preallocation is essentially disabled.
				411	* Otherwise we take the size of the preceding data extent as the basis
				412	* for the preallocation size. If the size of the extent is greater than
				413	* half the maximum extent length, then use the current offset as the
				414	* basis. This ensures that for large files the preallocation size
				415	* always extends to MAXEXTLEN rather than falling short due to things
				416	* like stripe unit/width alignment of real extents.
				417	*/
				418	if (prev.br_blockcount <= (MAXEXTLEN >> 1))
				419	alloc_blocks = prev.br_blockcount << 1;
				420	else
				421	alloc_blocks = XFS_B_TO_FSB(mp, offset);
				422	if (!alloc_blocks)
				423	goto check_writeio;
				424	qblocks = alloc_blocks;
				425
				426	/*
				427	* MAXEXTLEN is not a power of two value but we round the prealloc down
				428	* to the nearest power of two value after throttling. To prevent the
				429	* round down from unconditionally reducing the maximum supported prealloc
				430	* size, we round up first, apply appropriate throttling, round down and
				431	* cap the value to MAXEXTLEN.
				432	*/
				433	alloc_blocks = XFS_FILEOFF_MIN(roundup_pow_of_two(MAXEXTLEN),
				434	alloc_blocks);
				435
				436	freesp = percpu_counter_read_positive(&mp->m_fdblocks);
				437	if (freesp < mp->m_low_space[XFS_LOWSP_5_PCNT]) {
				438	shift = 2;
				439	if (freesp < mp->m_low_space[XFS_LOWSP_4_PCNT])
				440	shift++;
				441	if (freesp < mp->m_low_space[XFS_LOWSP_3_PCNT])
				442	shift++;
				443	if (freesp < mp->m_low_space[XFS_LOWSP_2_PCNT])
				444	shift++;
				445	if (freesp < mp->m_low_space[XFS_LOWSP_1_PCNT])
				446	shift++;
				447	}
				448
				449	/*
				450	* Check each quota to cap the prealloc size, provide a shift value to
				451	* throttle with and adjust amount of available space.
				452	*/
				453	if (xfs_quota_need_throttle(ip, XFS_DQ_USER, alloc_blocks))
				454	xfs_quota_calc_throttle(ip, XFS_DQ_USER, &qblocks, &qshift,
				455	&freesp);
				456	if (xfs_quota_need_throttle(ip, XFS_DQ_GROUP, alloc_blocks))
				457	xfs_quota_calc_throttle(ip, XFS_DQ_GROUP, &qblocks, &qshift,
				458	&freesp);
				459	if (xfs_quota_need_throttle(ip, XFS_DQ_PROJ, alloc_blocks))
				460	xfs_quota_calc_throttle(ip, XFS_DQ_PROJ, &qblocks, &qshift,
				461	&freesp);
				462
				463	/*
				464	* The final prealloc size is set to the minimum of free space available
				465	* in each of the quotas and the overall filesystem.
				466	*
				467	* The shift throttle value is set to the maximum value as determined by
				468	* the global low free space values and per-quota low free space values.
				469	*/
				470	alloc_blocks = min(alloc_blocks, qblocks);
				471	shift = max(shift, qshift);
				472
				473	if (shift)
				474	alloc_blocks >>= shift;
				475	/*
				476	* rounddown_pow_of_two() returns an undefined result if we pass in
				477	* alloc_blocks = 0.
				478	*/
				479	if (alloc_blocks)
				480	alloc_blocks = rounddown_pow_of_two(alloc_blocks);
				481	if (alloc_blocks > MAXEXTLEN)
				482	alloc_blocks = MAXEXTLEN;
				483
				484	/*
				485	* If we are still trying to allocate more space than is
				486	* available, squash the prealloc hard. This can happen if we
				487	* have a large file on a small filesystem and the above
				488	* lowspace thresholds are smaller than MAXEXTLEN.
				489	*/
				490	while (alloc_blocks && alloc_blocks >= freesp)
				491	alloc_blocks >>= 4;
				492	check_writeio:
				493	if (alloc_blocks < mp->m_writeio_blocks)
				494	alloc_blocks = mp->m_writeio_blocks;
				495	trace_xfs_iomap_prealloc_size(ip, alloc_blocks, shift,
				496	mp->m_writeio_blocks);
				497	return alloc_blocks;
				498	}
				499
				500	static int
				501	xfs_file_iomap_begin_delay(
				502	struct inode *inode,
				503	loff_t offset,
				504	loff_t count,
				505	struct iomap *iomap)
				506	{
				507	struct xfs_inode *ip = XFS_I(inode);
				508	struct xfs_mount *mp = ip->i_mount;
				509	struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
				510	xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset);
				511	xfs_fileoff_t maxbytes_fsb =
				512	XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes);
				513	xfs_fileoff_t end_fsb;
				514	int error = 0, eof = 0;
				515	struct xfs_bmbt_irec got;
				516	struct xfs_iext_cursor icur;
				517	xfs_fsblock_t prealloc_blocks = 0;
				518
				519	ASSERT(!XFS_IS_REALTIME_INODE(ip));
				520	ASSERT(!xfs_get_extsz_hint(ip));
				521
				522	xfs_ilock(ip, XFS_ILOCK_EXCL);
				523
				524	if (unlikely(XFS_TEST_ERROR(
				525	(XFS_IFORK_FORMAT(ip, XFS_DATA_FORK) != XFS_DINODE_FMT_EXTENTS &&
				526	XFS_IFORK_FORMAT(ip, XFS_DATA_FORK) != XFS_DINODE_FMT_BTREE),
				527	mp, XFS_ERRTAG_BMAPIFORMAT))) {
				528	XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp);
				529	error = -EFSCORRUPTED;
				530	goto out_unlock;
				531	}
				532
				533	XFS_STATS_INC(mp, xs_blk_mapw);
				534
				535	if (!(ifp->if_flags & XFS_IFEXTENTS)) {
				536	error = xfs_iread_extents(NULL, ip, XFS_DATA_FORK);
				537	if (error)
				538	goto out_unlock;
				539	}
				540
				541	eof = !xfs_iext_lookup_extent(ip, ifp, offset_fsb, &icur, &got);
				542	if (!eof && got.br_startoff <= offset_fsb) {
				543	if (xfs_is_reflink_inode(ip)) {
				544	bool shared;
				545
				546	end_fsb = min(XFS_B_TO_FSB(mp, offset + count),
				547	maxbytes_fsb);
				548	xfs_trim_extent(&got, offset_fsb, end_fsb - offset_fsb);
				549	error = xfs_reflink_reserve_cow(ip, &got, &shared);
				550	if (error)
				551	goto out_unlock;
				552	}
				553
				554	trace_xfs_iomap_found(ip, offset, count, 0, &got);
				555	goto done;
				556	}
				557
				558	error = xfs_qm_dqattach_locked(ip, false);
				559	if (error)
				560	goto out_unlock;
				561
				562	/*
				563	* We cap the maximum length we map here to MAX_WRITEBACK_PAGES pages
				564	* to keep the chunks of work done where somewhat symmetric with the
				565	* work writeback does. This is a completely arbitrary number pulled
				566	* out of thin air as a best guess for initial testing.
				567	*
				568	* Note that the values needs to be less than 32-bits wide until
				569	* the lower level functions are updated.
				570	*/
				571	count = min_t(loff_t, count, 1024 * PAGE_SIZE);
				572	end_fsb = min(XFS_B_TO_FSB(mp, offset + count), maxbytes_fsb);
				573
				574	if (eof) {
				575	prealloc_blocks = xfs_iomap_prealloc_size(ip, offset, count,
				576	&icur);
				577	if (prealloc_blocks) {
				578	xfs_extlen_t align;
				579	xfs_off_t end_offset;
				580	xfs_fileoff_t p_end_fsb;
				581
				582	end_offset = XFS_WRITEIO_ALIGN(mp, offset + count - 1);
				583	p_end_fsb = XFS_B_TO_FSBT(mp, end_offset) +
				584	prealloc_blocks;
				585
				586	align = xfs_eof_alignment(ip, 0);
				587	if (align)
				588	p_end_fsb = roundup_64(p_end_fsb, align);
				589
				590	p_end_fsb = min(p_end_fsb, maxbytes_fsb);
				591	ASSERT(p_end_fsb > offset_fsb);
				592	prealloc_blocks = p_end_fsb - end_fsb;
				593	}
				594	}
				595
				596	retry:
				597	error = xfs_bmapi_reserve_delalloc(ip, XFS_DATA_FORK, offset_fsb,
				598	end_fsb - offset_fsb, prealloc_blocks, &got, &icur,
				599	eof);
				600	switch (error) {
				601	case 0:
				602	break;
				603	case -ENOSPC:
				604	case -EDQUOT:
				605	/* retry without any preallocation */
				606	trace_xfs_delalloc_enospc(ip, offset, count);
				607	if (prealloc_blocks) {
				608	prealloc_blocks = 0;
				609	goto retry;
				610	}
				611	/FALLTHRU/
				612	default:
				613	goto out_unlock;
				614	}
				615
				616	/*
				617	* Flag newly allocated delalloc blocks with IOMAP_F_NEW so we punch
				618	* them out if the write happens to fail.
				619	*/
				620	iomap->flags \|= IOMAP_F_NEW;
				621	trace_xfs_iomap_alloc(ip, offset, count, 0, &got);
				622	done:
				623	if (isnullstartblock(got.br_startblock))
				624	got.br_startblock = DELAYSTARTBLOCK;
				625
				626	if (!got.br_startblock) {
				627	error = xfs_alert_fsblock_zero(ip, &got);
				628	if (error)
				629	goto out_unlock;
				630	}
				631
				632	xfs_bmbt_to_iomap(ip, iomap, &got);
				633
				634	out_unlock:
				635	xfs_iunlock(ip, XFS_ILOCK_EXCL);
				636	return error;
				637	}
				638
				639	/*
				640	* Pass in a delayed allocate extent, convert it to real extents;
				641	* return to the caller the extent we create which maps on top of
				642	* the originating callers request.
				643	*
				644	* Called without a lock on the inode.
				645	*
				646	* We no longer bother to look at the incoming map - all we have to
				647	* guarantee is that whatever we allocate fills the required range.
				648	*/
				649	int
				650	xfs_iomap_write_allocate(
				651	xfs_inode_t *ip,
				652	int whichfork,
				653	xfs_off_t offset,
				654	xfs_bmbt_irec_t *imap,
				655	unsigned int *cow_seq)
				656	{
				657	xfs_mount_t *mp = ip->i_mount;
				658	struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork);
				659	xfs_fileoff_t offset_fsb, last_block;
				660	xfs_fileoff_t end_fsb, map_start_fsb;
				661	xfs_filblks_t count_fsb;
				662	xfs_trans_t *tp;
				663	int nimaps;
				664	int error = 0;
				665	int flags = XFS_BMAPI_DELALLOC;
				666	int nres;
				667
				668	if (whichfork == XFS_COW_FORK)
				669	flags \|= XFS_BMAPI_COWFORK \| XFS_BMAPI_PREALLOC;
				670
				671	/*
				672	* Make sure that the dquots are there.
				673	*/
				674	error = xfs_qm_dqattach(ip);
				675	if (error)
				676	return error;
				677
				678	offset_fsb = XFS_B_TO_FSBT(mp, offset);
				679	count_fsb = imap->br_blockcount;
				680	map_start_fsb = imap->br_startoff;
				681
				682	XFS_STATS_ADD(mp, xs_xstrat_bytes, XFS_FSB_TO_B(mp, count_fsb));
				683
				684	while (count_fsb != 0) {
				685	/*
				686	* Set up a transaction with which to allocate the
				687	* backing store for the file. Do allocations in a
				688	* loop until we get some space in the range we are
				689	* interested in. The other space that might be allocated
				690	* is in the delayed allocation extent on which we sit
				691	* but before our buffer starts.
				692	*/
				693	nimaps = 0;
				694	while (nimaps == 0) {
				695	nres = XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK);
				696	/*
				697	* We have already reserved space for the extent and any
				698	* indirect blocks when creating the delalloc extent,
				699	* there is no need to reserve space in this transaction
				700	* again.
				701	*/
				702	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, 0,
				703	0, XFS_TRANS_RESERVE, &tp);
				704	if (error)
				705	return error;
				706
				707	xfs_ilock(ip, XFS_ILOCK_EXCL);
				708	xfs_trans_ijoin(tp, ip, 0);
				709
				710	/*
				711	* it is possible that the extents have changed since
				712	* we did the read call as we dropped the ilock for a
				713	* while. We have to be careful about truncates or hole
				714	* punchs here - we are not allowed to allocate
				715	* non-delalloc blocks here.
				716	*
				717	* The only protection against truncation is the pages
				718	* for the range we are being asked to convert are
				719	* locked and hence a truncate will block on them
				720	* first.
				721	*
				722	* As a result, if we go beyond the range we really
				723	* need and hit an delalloc extent boundary followed by
				724	* a hole while we have excess blocks in the map, we
				725	* will fill the hole incorrectly and overrun the
				726	* transaction reservation.
				727	*
				728	* Using a single map prevents this as we are forced to
				729	* check each map we look for overlap with the desired
				730	* range and abort as soon as we find it. Also, given
				731	* that we only return a single map, having one beyond
				732	* what we can return is probably a bit silly.
				733	*
				734	* We also need to check that we don't go beyond EOF;
				735	* this is a truncate optimisation as a truncate sets
				736	* the new file size before block on the pages we
				737	* currently have locked under writeback. Because they
				738	* are about to be tossed, we don't need to write them
				739	* back....
				740	*/
				741	nimaps = 1;
				742	end_fsb = XFS_B_TO_FSB(mp, XFS_ISIZE(ip));
				743	error = xfs_bmap_last_offset(ip, &last_block,
				744	XFS_DATA_FORK);
				745	if (error)
				746	goto trans_cancel;
				747
				748	last_block = XFS_FILEOFF_MAX(last_block, end_fsb);
				749	if ((map_start_fsb + count_fsb) > last_block) {
				750	count_fsb = last_block - map_start_fsb;
				751	if (count_fsb == 0) {
				752	error = -EAGAIN;
				753	goto trans_cancel;
				754	}
				755	}
				756
				757	/*
				758	* From this point onwards we overwrite the imap
				759	* pointer that the caller gave to us.
				760	*/
				761	error = xfs_bmapi_write(tp, ip, map_start_fsb,
				762	count_fsb, flags, nres, imap,
				763	&nimaps);
				764	if (error)
				765	goto trans_cancel;
				766
				767	error = xfs_trans_commit(tp);
				768	if (error)
				769	goto error0;
				770
				771	if (whichfork == XFS_COW_FORK)
				772	*cow_seq = READ_ONCE(ifp->if_seq);
				773	xfs_iunlock(ip, XFS_ILOCK_EXCL);
				774	}
				775
				776	/*
				777	* See if we were able to allocate an extent that
				778	* covers at least part of the callers request
				779	*/
				780	if (!(imap->br_startblock \|\| XFS_IS_REALTIME_INODE(ip)))
				781	return xfs_alert_fsblock_zero(ip, imap);
				782
				783	if ((offset_fsb >= imap->br_startoff) &&
				784	(offset_fsb < (imap->br_startoff +
				785	imap->br_blockcount))) {
				786	XFS_STATS_INC(mp, xs_xstrat_quick);
				787	return 0;
				788	}
				789
				790	/*
				791	* So far we have not mapped the requested part of the
				792	* file, just surrounding data, try again.
				793	*/
				794	count_fsb -= imap->br_blockcount;
				795	map_start_fsb = imap->br_startoff + imap->br_blockcount;
				796	}
				797
				798	trans_cancel:
				799	xfs_trans_cancel(tp);
				800	error0:
				801	xfs_iunlock(ip, XFS_ILOCK_EXCL);
				802	return error;
				803	}
				804
				805	int
				806	xfs_iomap_write_unwritten(
				807	xfs_inode_t *ip,
				808	xfs_off_t offset,
				809	xfs_off_t count,
				810	bool update_isize)
				811	{
				812	xfs_mount_t *mp = ip->i_mount;
				813	xfs_fileoff_t offset_fsb;
				814	xfs_filblks_t count_fsb;
				815	xfs_filblks_t numblks_fsb;
				816	int nimaps;
				817	xfs_trans_t *tp;
				818	xfs_bmbt_irec_t imap;
				819	struct inode *inode = VFS_I(ip);
				820	xfs_fsize_t i_size;
				821	uint resblks;
				822	int error;
				823
				824	trace_xfs_unwritten_convert(ip, offset, count);
				825
				826	offset_fsb = XFS_B_TO_FSBT(mp, offset);
				827	count_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count);
				828	count_fsb = (xfs_filblks_t)(count_fsb - offset_fsb);
				829
				830	/*
				831	* Reserve enough blocks in this transaction for two complete extent
				832	* btree splits. We may be converting the middle part of an unwritten
				833	* extent and in this case we will insert two new extents in the btree
				834	* each of which could cause a full split.
				835	*
				836	* This reservation amount will be used in the first call to
				837	* xfs_bmbt_split() to select an AG with enough space to satisfy the
				838	* rest of the operation.
				839	*/
				840	resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0) << 1;
				841
				842	do {
				843	/*
				844	* Set up a transaction to convert the range of extents
				845	* from unwritten to real. Do allocations in a loop until
				846	* we have covered the range passed in.
				847	*
				848	* Note that we can't risk to recursing back into the filesystem
				849	* here as we might be asked to write out the same inode that we
				850	* complete here and might deadlock on the iolock.
				851	*/
				852	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0,
				853	XFS_TRANS_RESERVE \| XFS_TRANS_NOFS, &tp);
				854	if (error)
				855	return error;
				856
				857	xfs_ilock(ip, XFS_ILOCK_EXCL);
				858	xfs_trans_ijoin(tp, ip, 0);
				859
				860	/*
				861	* Modify the unwritten extent state of the buffer.
				862	*/
				863	nimaps = 1;
				864	error = xfs_bmapi_write(tp, ip, offset_fsb, count_fsb,
				865	XFS_BMAPI_CONVERT, resblks, &imap,
				866	&nimaps);
				867	if (error)
				868	goto error_on_bmapi_transaction;
				869
				870	/*
				871	* Log the updated inode size as we go. We have to be careful
				872	* to only log it up to the actual write offset if it is
				873	* halfway into a block.
				874	*/
				875	i_size = XFS_FSB_TO_B(mp, offset_fsb + count_fsb);
				876	if (i_size > offset + count)
				877	i_size = offset + count;
				878	if (update_isize && i_size > i_size_read(inode))
				879	i_size_write(inode, i_size);
				880	i_size = xfs_new_eof(ip, i_size);
				881	if (i_size) {
				882	ip->i_d.di_size = i_size;
				883	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
				884	}
				885
				886	error = xfs_trans_commit(tp);
				887	xfs_iunlock(ip, XFS_ILOCK_EXCL);
				888	if (error)
				889	return error;
				890
				891	if (!(imap.br_startblock \|\| XFS_IS_REALTIME_INODE(ip)))
				892	return xfs_alert_fsblock_zero(ip, &imap);
				893
				894	if ((numblks_fsb = imap.br_blockcount) == 0) {
				895	/*
				896	* The numblks_fsb value should always get
				897	* smaller, otherwise the loop is stuck.
				898	*/
				899	ASSERT(imap.br_blockcount);
				900	break;
				901	}
				902	offset_fsb += numblks_fsb;
				903	count_fsb -= numblks_fsb;
				904	} while (count_fsb > 0);
				905
				906	return 0;
				907
				908	error_on_bmapi_transaction:
				909	xfs_trans_cancel(tp);
				910	xfs_iunlock(ip, XFS_ILOCK_EXCL);
				911	return error;
				912	}
				913
				914	static inline bool
				915	imap_needs_alloc(
				916	struct inode *inode,
				917	struct xfs_bmbt_irec *imap,
				918	int nimaps)
				919	{
				920	return !nimaps \|\|
				921	imap->br_startblock == HOLESTARTBLOCK \|\|
				922	imap->br_startblock == DELAYSTARTBLOCK \|\|
				923	(IS_DAX(inode) && imap->br_state == XFS_EXT_UNWRITTEN);
				924	}
				925
				926	static inline bool
				927	needs_cow_for_zeroing(
				928	struct xfs_bmbt_irec *imap,
				929	int nimaps)
				930	{
				931	return nimaps &&
				932	imap->br_startblock != HOLESTARTBLOCK &&
				933	imap->br_state != XFS_EXT_UNWRITTEN;
				934	}
				935
				936	static int
				937	xfs_ilock_for_iomap(
				938	struct xfs_inode *ip,
				939	unsigned flags,
				940	unsigned *lockmode)
				941	{
				942	unsigned mode = XFS_ILOCK_SHARED;
				943	bool is_write = flags & (IOMAP_WRITE \| IOMAP_ZERO);
				944
				945	/*
				946	* COW writes may allocate delalloc space or convert unwritten COW
				947	* extents, so we need to make sure to take the lock exclusively here.
				948	*/
				949	if (xfs_is_reflink_inode(ip) && is_write) {
				950	/*
				951	* FIXME: It could still overwrite on unshared extents and not
				952	* need allocation.
				953	*/
				954	if (flags & IOMAP_NOWAIT)
				955	return -EAGAIN;
				956	mode = XFS_ILOCK_EXCL;
				957	}
				958
				959	/*
				960	* Extents not yet cached requires exclusive access, don't block. This
				961	* is an opencoded xfs_ilock_data_map_shared() call but with
				962	* non-blocking behaviour.
				963	*/
				964	if (!(ip->i_df.if_flags & XFS_IFEXTENTS)) {
				965	if (flags & IOMAP_NOWAIT)
				966	return -EAGAIN;
				967	mode = XFS_ILOCK_EXCL;
				968	}
				969
				970	relock:
				971	if (flags & IOMAP_NOWAIT) {
				972	if (!xfs_ilock_nowait(ip, mode))
				973	return -EAGAIN;
				974	} else {
				975	xfs_ilock(ip, mode);
				976	}
				977
				978	/*
				979	* The reflink iflag could have changed since the earlier unlocked
				980	* check, so if we got ILOCK_SHARED for a write and but we're now a
				981	* reflink inode we have to switch to ILOCK_EXCL and relock.
				982	*/
				983	if (mode == XFS_ILOCK_SHARED && is_write && xfs_is_reflink_inode(ip)) {
				984	xfs_iunlock(ip, mode);
				985	mode = XFS_ILOCK_EXCL;
				986	goto relock;
				987	}
				988
				989	*lockmode = mode;
				990	return 0;
				991	}
				992
				993	static int
				994	xfs_file_iomap_begin(
				995	struct inode *inode,
				996	loff_t offset,
				997	loff_t length,
				998	unsigned flags,
				999	struct iomap *iomap)
				1000	{
				1001	struct xfs_inode *ip = XFS_I(inode);
				1002	struct xfs_mount *mp = ip->i_mount;
				1003	struct xfs_bmbt_irec imap;
				1004	xfs_fileoff_t offset_fsb, end_fsb;
				1005	int nimaps = 1, error = 0;
				1006	bool shared = false, trimmed = false;
				1007	unsigned lockmode;
				1008
				1009	if (XFS_FORCED_SHUTDOWN(mp))
				1010	return -EIO;
				1011
				1012	if (((flags & (IOMAP_WRITE \| IOMAP_DIRECT)) == IOMAP_WRITE) &&
				1013	!IS_DAX(inode) && !xfs_get_extsz_hint(ip)) {
				1014	/* Reserve delalloc blocks for regular writeback. */
				1015	return xfs_file_iomap_begin_delay(inode, offset, length, iomap);
				1016	}
				1017
				1018	/*
				1019	* Lock the inode in the manner required for the specified operation and
				1020	* check for as many conditions that would result in blocking as
				1021	* possible. This removes most of the non-blocking checks from the
				1022	* mapping code below.
				1023	*/
				1024	error = xfs_ilock_for_iomap(ip, flags, &lockmode);
				1025	if (error)
				1026	return error;
				1027
				1028	ASSERT(offset <= mp->m_super->s_maxbytes);
				1029	if (offset > mp->m_super->s_maxbytes - length)
				1030	length = mp->m_super->s_maxbytes - offset;
				1031	offset_fsb = XFS_B_TO_FSBT(mp, offset);
				1032	end_fsb = XFS_B_TO_FSB(mp, offset + length);
				1033
				1034	error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, &imap,
				1035	&nimaps, 0);
				1036	if (error)
				1037	goto out_unlock;
				1038
				1039	if (flags & IOMAP_REPORT) {
				1040	/* Trim the mapping to the nearest shared extent boundary. */
				1041	error = xfs_reflink_trim_around_shared(ip, &imap, &shared,
				1042	&trimmed);
				1043	if (error)
				1044	goto out_unlock;
				1045	}
				1046
				1047	/* Non-modifying mapping requested, so we are done */
				1048	if (!(flags & (IOMAP_WRITE \| IOMAP_ZERO)))
				1049	goto out_found;
				1050
				1051	/*
				1052	* Break shared extents if necessary. Checks for non-blocking IO have
				1053	* been done up front, so we don't need to do them here.
				1054	*/
				1055	if (xfs_is_reflink_inode(ip)) {
				1056	/* if zeroing doesn't need COW allocation, then we are done. */
				1057	if ((flags & IOMAP_ZERO) &&
				1058	!needs_cow_for_zeroing(&imap, nimaps))
				1059	goto out_found;
				1060
				1061	if (flags & IOMAP_DIRECT) {
				1062	/* may drop and re-acquire the ilock */
				1063	error = xfs_reflink_allocate_cow(ip, &imap, &shared,
				1064	&lockmode);
				1065	if (error)
				1066	goto out_unlock;
				1067	} else {
				1068	error = xfs_reflink_reserve_cow(ip, &imap, &shared);
				1069	if (error)
				1070	goto out_unlock;
				1071	}
				1072
				1073	end_fsb = imap.br_startoff + imap.br_blockcount;
				1074	length = XFS_FSB_TO_B(mp, end_fsb) - offset;
				1075	}
				1076
				1077	/* Don't need to allocate over holes when doing zeroing operations. */
				1078	if (flags & IOMAP_ZERO)
				1079	goto out_found;
				1080
				1081	if (!imap_needs_alloc(inode, &imap, nimaps))
				1082	goto out_found;
				1083
				1084	/* If nowait is set bail since we are going to make allocations. */
				1085	if (flags & IOMAP_NOWAIT) {
				1086	error = -EAGAIN;
				1087	goto out_unlock;
				1088	}
				1089
				1090	/*
				1091	* We cap the maximum length we map to a sane size to keep the chunks
				1092	* of work done where somewhat symmetric with the work writeback does.
				1093	* This is a completely arbitrary number pulled out of thin air as a
				1094	* best guess for initial testing.
				1095	*
				1096	* Note that the values needs to be less than 32-bits wide until the
				1097	* lower level functions are updated.
				1098	*/
				1099	length = min_t(loff_t, length, 1024 * PAGE_SIZE);
				1100
				1101	/*
				1102	* xfs_iomap_write_direct() expects the shared lock. It is unlocked on
				1103	* return.
				1104	*/
				1105	if (lockmode == XFS_ILOCK_EXCL)
				1106	xfs_ilock_demote(ip, lockmode);
				1107	error = xfs_iomap_write_direct(ip, offset, length, &imap,
				1108	nimaps);
				1109	if (error)
				1110	return error;
				1111
				1112	iomap->flags \|= IOMAP_F_NEW;
				1113	trace_xfs_iomap_alloc(ip, offset, length, 0, &imap);
				1114
				1115	out_finish:
				1116	if (xfs_ipincount(ip) && (ip->i_itemp->ili_fsync_fields
				1117	& ~XFS_ILOG_TIMESTAMP))
				1118	iomap->flags \|= IOMAP_F_DIRTY;
				1119
				1120	xfs_bmbt_to_iomap(ip, iomap, &imap);
				1121
				1122	if (shared)
				1123	iomap->flags \|= IOMAP_F_SHARED;
				1124	return 0;
				1125
				1126	out_found:
				1127	ASSERT(nimaps);
				1128	xfs_iunlock(ip, lockmode);
				1129	trace_xfs_iomap_found(ip, offset, length, 0, &imap);
				1130	goto out_finish;
				1131
				1132	out_unlock:
				1133	xfs_iunlock(ip, lockmode);
				1134	return error;
				1135	}
				1136
				1137	static int
				1138	xfs_file_iomap_end_delalloc(
				1139	struct xfs_inode *ip,
				1140	loff_t offset,
				1141	loff_t length,
				1142	ssize_t written,
				1143	struct iomap *iomap)
				1144	{
				1145	struct xfs_mount *mp = ip->i_mount;
				1146	xfs_fileoff_t start_fsb;
				1147	xfs_fileoff_t end_fsb;
				1148	int error = 0;
				1149
				1150	/*
				1151	* Behave as if the write failed if drop writes is enabled. Set the NEW
				1152	* flag to force delalloc cleanup.
				1153	*/
				1154	if (XFS_TEST_ERROR(false, mp, XFS_ERRTAG_DROP_WRITES)) {
				1155	iomap->flags \|= IOMAP_F_NEW;
				1156	written = 0;
				1157	}
				1158
				1159	/*
				1160	* start_fsb refers to the first unused block after a short write. If
				1161	* nothing was written, round offset down to point at the first block in
				1162	* the range.
				1163	*/
				1164	if (unlikely(!written))
				1165	start_fsb = XFS_B_TO_FSBT(mp, offset);
				1166	else
				1167	start_fsb = XFS_B_TO_FSB(mp, offset + written);
				1168	end_fsb = XFS_B_TO_FSB(mp, offset + length);
				1169
				1170	/*
				1171	* Trim delalloc blocks if they were allocated by this write and we
				1172	* didn't manage to write the whole range.
				1173	*
				1174	* We don't need to care about racing delalloc as we hold i_mutex
				1175	* across the reserve/allocate/unreserve calls. If there are delalloc
				1176	* blocks in the range, they are ours.
				1177	*/
				1178	if ((iomap->flags & IOMAP_F_NEW) && start_fsb < end_fsb) {
				1179	truncate_pagecache_range(VFS_I(ip), XFS_FSB_TO_B(mp, start_fsb),
				1180	XFS_FSB_TO_B(mp, end_fsb) - 1);
				1181
				1182	error = xfs_bmap_punch_delalloc_range(ip, start_fsb,
				1183	end_fsb - start_fsb);
				1184	if (error && !XFS_FORCED_SHUTDOWN(mp)) {
				1185	xfs_alert(mp, "%s: unable to clean up ino %lld",
				1186	__func__, ip->i_ino);
				1187	return error;
				1188	}
				1189	}
				1190
				1191	return 0;
				1192	}
				1193
				1194	static int
				1195	xfs_file_iomap_end(
				1196	struct inode *inode,
				1197	loff_t offset,
				1198	loff_t length,
				1199	ssize_t written,
				1200	unsigned flags,
				1201	struct iomap *iomap)
				1202	{
				1203	if ((flags & IOMAP_WRITE) && iomap->type == IOMAP_DELALLOC)
				1204	return xfs_file_iomap_end_delalloc(XFS_I(inode), offset,
				1205	length, written, iomap);
				1206	return 0;
				1207	}
				1208
				1209	const struct iomap_ops xfs_iomap_ops = {
				1210	.iomap_begin = xfs_file_iomap_begin,
				1211	.iomap_end = xfs_file_iomap_end,
				1212	};
				1213
				1214	static int
				1215	xfs_xattr_iomap_begin(
				1216	struct inode *inode,
				1217	loff_t offset,
				1218	loff_t length,
				1219	unsigned flags,
				1220	struct iomap *iomap)
				1221	{
				1222	struct xfs_inode *ip = XFS_I(inode);
				1223	struct xfs_mount *mp = ip->i_mount;
				1224	xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset);
				1225	xfs_fileoff_t end_fsb = XFS_B_TO_FSB(mp, offset + length);
				1226	struct xfs_bmbt_irec imap;
				1227	int nimaps = 1, error = 0;
				1228	unsigned lockmode;
				1229
				1230	if (XFS_FORCED_SHUTDOWN(mp))
				1231	return -EIO;
				1232
				1233	lockmode = xfs_ilock_attr_map_shared(ip);
				1234
				1235	/* if there are no attribute fork or extents, return ENOENT */
				1236	if (!XFS_IFORK_Q(ip) \|\| !ip->i_d.di_anextents) {
				1237	error = -ENOENT;
				1238	goto out_unlock;
				1239	}
				1240
				1241	ASSERT(ip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL);
				1242	error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, &imap,
				1243	&nimaps, XFS_BMAPI_ATTRFORK);
				1244	out_unlock:
				1245	xfs_iunlock(ip, lockmode);
				1246
				1247	if (!error) {
				1248	ASSERT(nimaps);
				1249	xfs_bmbt_to_iomap(ip, iomap, &imap);
				1250	}
				1251
				1252	return error;
				1253	}
				1254
				1255	const struct iomap_ops xfs_xattr_iomap_ops = {
				1256	.iomap_begin = xfs_xattr_iomap_begin,
				1257	};