blob: 0fda3051760d1fe6ae976ffc2b1cb5d62fae74ed [file] [log] [blame]
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001// SPDX-License-Identifier: GPL-2.0
2/*
3 * Copyright (c) 2003-2006, Cluster File Systems, Inc, info@clusterfs.com
4 * Written by Alex Tomas <alex@clusterfs.com>
5 *
6 * Architecture independence:
7 * Copyright (c) 2005, Bull S.A.
8 * Written by Pierre Peiffer <pierre.peiffer@bull.net>
9 */
10
11/*
12 * Extents support for EXT4
13 *
14 * TODO:
15 * - ext4*_error() should be used in some situations
16 * - analyze all BUG()/BUG_ON(), use -EIO where appropriate
17 * - smart tree reduction
18 */
19
20#include <linux/fs.h>
21#include <linux/time.h>
22#include <linux/jbd2.h>
23#include <linux/highuid.h>
24#include <linux/pagemap.h>
25#include <linux/quotaops.h>
26#include <linux/string.h>
27#include <linux/slab.h>
28#include <linux/uaccess.h>
29#include <linux/fiemap.h>
30#include <linux/backing-dev.h>
Olivier Deprez157378f2022-04-04 15:47:50 +020031#include <linux/iomap.h>
Andrew Scullb4b6d4a2019-01-02 15:54:55 +000032#include "ext4_jbd2.h"
33#include "ext4_extents.h"
34#include "xattr.h"
35
36#include <trace/events/ext4.h>
37
38/*
39 * used by extent splitting.
40 */
41#define EXT4_EXT_MAY_ZEROOUT 0x1 /* safe to zeroout if split fails \
42 due to ENOSPC */
43#define EXT4_EXT_MARK_UNWRIT1 0x2 /* mark first half unwritten */
44#define EXT4_EXT_MARK_UNWRIT2 0x4 /* mark second half unwritten */
45
46#define EXT4_EXT_DATA_VALID1 0x8 /* first half contains valid data */
47#define EXT4_EXT_DATA_VALID2 0x10 /* second half contains valid data */
48
49static __le32 ext4_extent_block_csum(struct inode *inode,
50 struct ext4_extent_header *eh)
51{
52 struct ext4_inode_info *ei = EXT4_I(inode);
53 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
54 __u32 csum;
55
56 csum = ext4_chksum(sbi, ei->i_csum_seed, (__u8 *)eh,
57 EXT4_EXTENT_TAIL_OFFSET(eh));
58 return cpu_to_le32(csum);
59}
60
61static int ext4_extent_block_csum_verify(struct inode *inode,
62 struct ext4_extent_header *eh)
63{
64 struct ext4_extent_tail *et;
65
66 if (!ext4_has_metadata_csum(inode->i_sb))
67 return 1;
68
69 et = find_ext4_extent_tail(eh);
70 if (et->et_checksum != ext4_extent_block_csum(inode, eh))
71 return 0;
72 return 1;
73}
74
75static void ext4_extent_block_csum_set(struct inode *inode,
76 struct ext4_extent_header *eh)
77{
78 struct ext4_extent_tail *et;
79
80 if (!ext4_has_metadata_csum(inode->i_sb))
81 return;
82
83 et = find_ext4_extent_tail(eh);
84 et->et_checksum = ext4_extent_block_csum(inode, eh);
85}
86
Andrew Scullb4b6d4a2019-01-02 15:54:55 +000087static int ext4_split_extent_at(handle_t *handle,
88 struct inode *inode,
89 struct ext4_ext_path **ppath,
90 ext4_lblk_t split,
91 int split_flag,
92 int flags);
93
Olivier Deprez157378f2022-04-04 15:47:50 +020094static int ext4_ext_trunc_restart_fn(struct inode *inode, int *dropped)
Andrew Scullb4b6d4a2019-01-02 15:54:55 +000095{
Andrew Scullb4b6d4a2019-01-02 15:54:55 +000096 /*
Olivier Deprez157378f2022-04-04 15:47:50 +020097 * Drop i_data_sem to avoid deadlock with ext4_map_blocks. At this
98 * moment, get_block can be called only for blocks inside i_size since
99 * page cache has been already dropped and writes are blocked by
100 * i_mutex. So we can safely drop the i_data_sem here.
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000101 */
Olivier Deprez157378f2022-04-04 15:47:50 +0200102 BUG_ON(EXT4_JOURNAL(inode) == NULL);
103 ext4_discard_preallocations(inode, 0);
104 up_write(&EXT4_I(inode)->i_data_sem);
105 *dropped = 1;
106 return 0;
107}
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000108
Olivier Deprez157378f2022-04-04 15:47:50 +0200109/*
110 * Make sure 'handle' has at least 'check_cred' credits. If not, restart
111 * transaction with 'restart_cred' credits. The function drops i_data_sem
112 * when restarting transaction and gets it after transaction is restarted.
113 *
114 * The function returns 0 on success, 1 if transaction had to be restarted,
115 * and < 0 in case of fatal error.
116 */
117int ext4_datasem_ensure_credits(handle_t *handle, struct inode *inode,
118 int check_cred, int restart_cred,
119 int revoke_cred)
120{
121 int ret;
122 int dropped = 0;
123
124 ret = ext4_journal_ensure_credits_fn(handle, check_cred, restart_cred,
125 revoke_cred, ext4_ext_trunc_restart_fn(inode, &dropped));
126 if (dropped)
127 down_write(&EXT4_I(inode)->i_data_sem);
128 return ret;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000129}
130
131/*
132 * could return:
133 * - EROFS
134 * - ENOMEM
135 */
136static int ext4_ext_get_access(handle_t *handle, struct inode *inode,
137 struct ext4_ext_path *path)
138{
Olivier Deprez157378f2022-04-04 15:47:50 +0200139 int err = 0;
140
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000141 if (path->p_bh) {
142 /* path points to block */
143 BUFFER_TRACE(path->p_bh, "get_write_access");
Olivier Deprez157378f2022-04-04 15:47:50 +0200144 err = ext4_journal_get_write_access(handle, path->p_bh);
145 /*
146 * The extent buffer's verified bit will be set again in
147 * __ext4_ext_dirty(). We could leave an inconsistent
148 * buffer if the extents updating procudure break off du
149 * to some error happens, force to check it again.
150 */
151 if (!err)
152 clear_buffer_verified(path->p_bh);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000153 }
154 /* path points to leaf/index in inode body */
155 /* we use in-core data, no need to protect them */
Olivier Deprez157378f2022-04-04 15:47:50 +0200156 return err;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000157}
158
159/*
160 * could return:
161 * - EROFS
162 * - ENOMEM
163 * - EIO
164 */
Olivier Deprez157378f2022-04-04 15:47:50 +0200165static int __ext4_ext_dirty(const char *where, unsigned int line,
166 handle_t *handle, struct inode *inode,
167 struct ext4_ext_path *path)
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000168{
169 int err;
170
171 WARN_ON(!rwsem_is_locked(&EXT4_I(inode)->i_data_sem));
172 if (path->p_bh) {
173 ext4_extent_block_csum_set(inode, ext_block_hdr(path->p_bh));
174 /* path points to block */
175 err = __ext4_handle_dirty_metadata(where, line, handle,
176 inode, path->p_bh);
Olivier Deprez157378f2022-04-04 15:47:50 +0200177 /* Extents updating done, re-set verified flag */
178 if (!err)
179 set_buffer_verified(path->p_bh);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000180 } else {
181 /* path points to leaf/index in inode body */
182 err = ext4_mark_inode_dirty(handle, inode);
183 }
184 return err;
185}
186
Olivier Deprez157378f2022-04-04 15:47:50 +0200187#define ext4_ext_dirty(handle, inode, path) \
188 __ext4_ext_dirty(__func__, __LINE__, (handle), (inode), (path))
189
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000190static ext4_fsblk_t ext4_ext_find_goal(struct inode *inode,
191 struct ext4_ext_path *path,
192 ext4_lblk_t block)
193{
194 if (path) {
195 int depth = path->p_depth;
196 struct ext4_extent *ex;
197
198 /*
199 * Try to predict block placement assuming that we are
200 * filling in a file which will eventually be
201 * non-sparse --- i.e., in the case of libbfd writing
202 * an ELF object sections out-of-order but in a way
203 * the eventually results in a contiguous object or
204 * executable file, or some database extending a table
205 * space file. However, this is actually somewhat
206 * non-ideal if we are writing a sparse file such as
207 * qemu or KVM writing a raw image file that is going
208 * to stay fairly sparse, since it will end up
209 * fragmenting the file system's free space. Maybe we
210 * should have some hueristics or some way to allow
211 * userspace to pass a hint to file system,
212 * especially if the latter case turns out to be
213 * common.
214 */
215 ex = path[depth].p_ext;
216 if (ex) {
217 ext4_fsblk_t ext_pblk = ext4_ext_pblock(ex);
218 ext4_lblk_t ext_block = le32_to_cpu(ex->ee_block);
219
220 if (block > ext_block)
221 return ext_pblk + (block - ext_block);
222 else
223 return ext_pblk - (ext_block - block);
224 }
225
226 /* it looks like index is empty;
227 * try to find starting block from index itself */
228 if (path[depth].p_bh)
229 return path[depth].p_bh->b_blocknr;
230 }
231
232 /* OK. use inode's group */
233 return ext4_inode_to_goal_block(inode);
234}
235
236/*
237 * Allocation for a meta data block
238 */
239static ext4_fsblk_t
240ext4_ext_new_meta_block(handle_t *handle, struct inode *inode,
241 struct ext4_ext_path *path,
242 struct ext4_extent *ex, int *err, unsigned int flags)
243{
244 ext4_fsblk_t goal, newblock;
245
246 goal = ext4_ext_find_goal(inode, path, le32_to_cpu(ex->ee_block));
247 newblock = ext4_new_meta_blocks(handle, inode, goal, flags,
248 NULL, err);
249 return newblock;
250}
251
252static inline int ext4_ext_space_block(struct inode *inode, int check)
253{
254 int size;
255
256 size = (inode->i_sb->s_blocksize - sizeof(struct ext4_extent_header))
257 / sizeof(struct ext4_extent);
258#ifdef AGGRESSIVE_TEST
259 if (!check && size > 6)
260 size = 6;
261#endif
262 return size;
263}
264
265static inline int ext4_ext_space_block_idx(struct inode *inode, int check)
266{
267 int size;
268
269 size = (inode->i_sb->s_blocksize - sizeof(struct ext4_extent_header))
270 / sizeof(struct ext4_extent_idx);
271#ifdef AGGRESSIVE_TEST
272 if (!check && size > 5)
273 size = 5;
274#endif
275 return size;
276}
277
278static inline int ext4_ext_space_root(struct inode *inode, int check)
279{
280 int size;
281
282 size = sizeof(EXT4_I(inode)->i_data);
283 size -= sizeof(struct ext4_extent_header);
284 size /= sizeof(struct ext4_extent);
285#ifdef AGGRESSIVE_TEST
286 if (!check && size > 3)
287 size = 3;
288#endif
289 return size;
290}
291
292static inline int ext4_ext_space_root_idx(struct inode *inode, int check)
293{
294 int size;
295
296 size = sizeof(EXT4_I(inode)->i_data);
297 size -= sizeof(struct ext4_extent_header);
298 size /= sizeof(struct ext4_extent_idx);
299#ifdef AGGRESSIVE_TEST
300 if (!check && size > 4)
301 size = 4;
302#endif
303 return size;
304}
305
306static inline int
307ext4_force_split_extent_at(handle_t *handle, struct inode *inode,
308 struct ext4_ext_path **ppath, ext4_lblk_t lblk,
309 int nofail)
310{
311 struct ext4_ext_path *path = *ppath;
312 int unwritten = ext4_ext_is_unwritten(path[path->p_depth].p_ext);
Olivier Deprez157378f2022-04-04 15:47:50 +0200313 int flags = EXT4_EX_NOCACHE | EXT4_GET_BLOCKS_PRE_IO;
314
315 if (nofail)
316 flags |= EXT4_GET_BLOCKS_METADATA_NOFAIL | EXT4_EX_NOFAIL;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000317
318 return ext4_split_extent_at(handle, inode, ppath, lblk, unwritten ?
319 EXT4_EXT_MARK_UNWRIT1|EXT4_EXT_MARK_UNWRIT2 : 0,
Olivier Deprez157378f2022-04-04 15:47:50 +0200320 flags);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000321}
322
323static int
324ext4_ext_max_entries(struct inode *inode, int depth)
325{
326 int max;
327
328 if (depth == ext_depth(inode)) {
329 if (depth == 0)
330 max = ext4_ext_space_root(inode, 1);
331 else
332 max = ext4_ext_space_root_idx(inode, 1);
333 } else {
334 if (depth == 0)
335 max = ext4_ext_space_block(inode, 1);
336 else
337 max = ext4_ext_space_block_idx(inode, 1);
338 }
339
340 return max;
341}
342
343static int ext4_valid_extent(struct inode *inode, struct ext4_extent *ext)
344{
345 ext4_fsblk_t block = ext4_ext_pblock(ext);
346 int len = ext4_ext_get_actual_len(ext);
347 ext4_lblk_t lblock = le32_to_cpu(ext->ee_block);
348
349 /*
350 * We allow neither:
351 * - zero length
352 * - overflow/wrap-around
353 */
354 if (lblock + len <= lblock)
355 return 0;
Olivier Deprez157378f2022-04-04 15:47:50 +0200356 return ext4_inode_block_valid(inode, block, len);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000357}
358
359static int ext4_valid_extent_idx(struct inode *inode,
360 struct ext4_extent_idx *ext_idx)
361{
362 ext4_fsblk_t block = ext4_idx_pblock(ext_idx);
363
Olivier Deprez157378f2022-04-04 15:47:50 +0200364 return ext4_inode_block_valid(inode, block, 1);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000365}
366
367static int ext4_valid_extent_entries(struct inode *inode,
Olivier Deprez157378f2022-04-04 15:47:50 +0200368 struct ext4_extent_header *eh,
369 ext4_lblk_t lblk, ext4_fsblk_t *pblk,
370 int depth)
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000371{
372 unsigned short entries;
Olivier Deprez157378f2022-04-04 15:47:50 +0200373 ext4_lblk_t lblock = 0;
374 ext4_lblk_t prev = 0;
375
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000376 if (eh->eh_entries == 0)
377 return 1;
378
379 entries = le16_to_cpu(eh->eh_entries);
380
381 if (depth == 0) {
382 /* leaf entries */
383 struct ext4_extent *ext = EXT_FIRST_EXTENT(eh);
Olivier Deprez157378f2022-04-04 15:47:50 +0200384
385 /*
386 * The logical block in the first entry should equal to
387 * the number in the index block.
388 */
389 if (depth != ext_depth(inode) &&
390 lblk != le32_to_cpu(ext->ee_block))
391 return 0;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000392 while (entries) {
393 if (!ext4_valid_extent(inode, ext))
394 return 0;
395
396 /* Check for overlapping extents */
397 lblock = le32_to_cpu(ext->ee_block);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000398 if ((lblock <= prev) && prev) {
Olivier Deprez157378f2022-04-04 15:47:50 +0200399 *pblk = ext4_ext_pblock(ext);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000400 return 0;
401 }
Olivier Deprez157378f2022-04-04 15:47:50 +0200402 prev = lblock + ext4_ext_get_actual_len(ext) - 1;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000403 ext++;
404 entries--;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000405 }
406 } else {
407 struct ext4_extent_idx *ext_idx = EXT_FIRST_INDEX(eh);
Olivier Deprez157378f2022-04-04 15:47:50 +0200408
409 /*
410 * The logical block in the first entry should equal to
411 * the number in the parent index block.
412 */
413 if (depth != ext_depth(inode) &&
414 lblk != le32_to_cpu(ext_idx->ei_block))
415 return 0;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000416 while (entries) {
417 if (!ext4_valid_extent_idx(inode, ext_idx))
418 return 0;
Olivier Deprez157378f2022-04-04 15:47:50 +0200419
420 /* Check for overlapping index extents */
421 lblock = le32_to_cpu(ext_idx->ei_block);
422 if ((lblock <= prev) && prev) {
423 *pblk = ext4_idx_pblock(ext_idx);
424 return 0;
425 }
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000426 ext_idx++;
427 entries--;
Olivier Deprez157378f2022-04-04 15:47:50 +0200428 prev = lblock;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000429 }
430 }
431 return 1;
432}
433
434static int __ext4_ext_check(const char *function, unsigned int line,
435 struct inode *inode, struct ext4_extent_header *eh,
Olivier Deprez157378f2022-04-04 15:47:50 +0200436 int depth, ext4_fsblk_t pblk, ext4_lblk_t lblk)
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000437{
438 const char *error_msg;
439 int max = 0, err = -EFSCORRUPTED;
440
441 if (unlikely(eh->eh_magic != EXT4_EXT_MAGIC)) {
442 error_msg = "invalid magic";
443 goto corrupted;
444 }
445 if (unlikely(le16_to_cpu(eh->eh_depth) != depth)) {
446 error_msg = "unexpected eh_depth";
447 goto corrupted;
448 }
449 if (unlikely(eh->eh_max == 0)) {
450 error_msg = "invalid eh_max";
451 goto corrupted;
452 }
453 max = ext4_ext_max_entries(inode, depth);
454 if (unlikely(le16_to_cpu(eh->eh_max) > max)) {
455 error_msg = "too large eh_max";
456 goto corrupted;
457 }
458 if (unlikely(le16_to_cpu(eh->eh_entries) > le16_to_cpu(eh->eh_max))) {
459 error_msg = "invalid eh_entries";
460 goto corrupted;
461 }
Olivier Deprez157378f2022-04-04 15:47:50 +0200462 if (!ext4_valid_extent_entries(inode, eh, lblk, &pblk, depth)) {
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000463 error_msg = "invalid extent entries";
464 goto corrupted;
465 }
466 if (unlikely(depth > 32)) {
467 error_msg = "too large eh_depth";
468 goto corrupted;
469 }
470 /* Verify checksum on non-root extent tree nodes */
471 if (ext_depth(inode) != depth &&
472 !ext4_extent_block_csum_verify(inode, eh)) {
473 error_msg = "extent tree corrupted";
474 err = -EFSBADCRC;
475 goto corrupted;
476 }
477 return 0;
478
479corrupted:
Olivier Deprez157378f2022-04-04 15:47:50 +0200480 ext4_error_inode_err(inode, function, line, 0, -err,
481 "pblk %llu bad header/extent: %s - magic %x, "
482 "entries %u, max %u(%u), depth %u(%u)",
483 (unsigned long long) pblk, error_msg,
484 le16_to_cpu(eh->eh_magic),
485 le16_to_cpu(eh->eh_entries),
486 le16_to_cpu(eh->eh_max),
487 max, le16_to_cpu(eh->eh_depth), depth);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000488 return err;
489}
490
491#define ext4_ext_check(inode, eh, depth, pblk) \
Olivier Deprez157378f2022-04-04 15:47:50 +0200492 __ext4_ext_check(__func__, __LINE__, (inode), (eh), (depth), (pblk), 0)
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000493
494int ext4_ext_check_inode(struct inode *inode)
495{
496 return ext4_ext_check(inode, ext_inode_hdr(inode), ext_depth(inode), 0);
497}
498
Olivier Deprez0e641232021-09-23 10:07:05 +0200499static void ext4_cache_extents(struct inode *inode,
500 struct ext4_extent_header *eh)
501{
502 struct ext4_extent *ex = EXT_FIRST_EXTENT(eh);
503 ext4_lblk_t prev = 0;
504 int i;
505
506 for (i = le16_to_cpu(eh->eh_entries); i > 0; i--, ex++) {
507 unsigned int status = EXTENT_STATUS_WRITTEN;
508 ext4_lblk_t lblk = le32_to_cpu(ex->ee_block);
509 int len = ext4_ext_get_actual_len(ex);
510
511 if (prev && (prev != lblk))
512 ext4_es_cache_extent(inode, prev, lblk - prev, ~0,
513 EXTENT_STATUS_HOLE);
514
515 if (ext4_ext_is_unwritten(ex))
516 status = EXTENT_STATUS_UNWRITTEN;
517 ext4_es_cache_extent(inode, lblk, len,
518 ext4_ext_pblock(ex), status);
519 prev = lblk + len;
520 }
521}
522
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000523static struct buffer_head *
524__read_extent_tree_block(const char *function, unsigned int line,
Olivier Deprez157378f2022-04-04 15:47:50 +0200525 struct inode *inode, struct ext4_extent_idx *idx,
526 int depth, int flags)
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000527{
528 struct buffer_head *bh;
529 int err;
Olivier Deprez157378f2022-04-04 15:47:50 +0200530 gfp_t gfp_flags = __GFP_MOVABLE | GFP_NOFS;
531 ext4_fsblk_t pblk;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000532
Olivier Deprez157378f2022-04-04 15:47:50 +0200533 if (flags & EXT4_EX_NOFAIL)
534 gfp_flags |= __GFP_NOFAIL;
535
536 pblk = ext4_idx_pblock(idx);
537 bh = sb_getblk_gfp(inode->i_sb, pblk, gfp_flags);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000538 if (unlikely(!bh))
539 return ERR_PTR(-ENOMEM);
540
541 if (!bh_uptodate_or_lock(bh)) {
542 trace_ext4_ext_load_extent(inode, pblk, _RET_IP_);
Olivier Deprez157378f2022-04-04 15:47:50 +0200543 err = ext4_read_bh(bh, 0, NULL);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000544 if (err < 0)
545 goto errout;
546 }
547 if (buffer_verified(bh) && !(flags & EXT4_EX_FORCE_CACHE))
548 return bh;
Olivier Deprez157378f2022-04-04 15:47:50 +0200549 err = __ext4_ext_check(function, line, inode, ext_block_hdr(bh),
550 depth, pblk, le32_to_cpu(idx->ei_block));
551 if (err)
552 goto errout;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000553 set_buffer_verified(bh);
554 /*
555 * If this is a leaf block, cache all of its entries
556 */
557 if (!(flags & EXT4_EX_NOCACHE) && depth == 0) {
558 struct ext4_extent_header *eh = ext_block_hdr(bh);
Olivier Deprez0e641232021-09-23 10:07:05 +0200559 ext4_cache_extents(inode, eh);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000560 }
561 return bh;
562errout:
563 put_bh(bh);
564 return ERR_PTR(err);
565
566}
567
Olivier Deprez157378f2022-04-04 15:47:50 +0200568#define read_extent_tree_block(inode, idx, depth, flags) \
569 __read_extent_tree_block(__func__, __LINE__, (inode), (idx), \
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000570 (depth), (flags))
571
572/*
573 * This function is called to cache a file's extent information in the
574 * extent status tree
575 */
576int ext4_ext_precache(struct inode *inode)
577{
578 struct ext4_inode_info *ei = EXT4_I(inode);
579 struct ext4_ext_path *path = NULL;
580 struct buffer_head *bh;
581 int i = 0, depth, ret = 0;
582
583 if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
584 return 0; /* not an extent-mapped inode */
585
586 down_read(&ei->i_data_sem);
587 depth = ext_depth(inode);
588
Olivier Deprez157378f2022-04-04 15:47:50 +0200589 /* Don't cache anything if there are no external extent blocks */
590 if (!depth) {
591 up_read(&ei->i_data_sem);
592 return ret;
593 }
594
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000595 path = kcalloc(depth + 1, sizeof(struct ext4_ext_path),
596 GFP_NOFS);
597 if (path == NULL) {
598 up_read(&ei->i_data_sem);
599 return -ENOMEM;
600 }
601
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000602 path[0].p_hdr = ext_inode_hdr(inode);
603 ret = ext4_ext_check(inode, path[0].p_hdr, depth, 0);
604 if (ret)
605 goto out;
606 path[0].p_idx = EXT_FIRST_INDEX(path[0].p_hdr);
607 while (i >= 0) {
608 /*
609 * If this is a leaf block or we've reached the end of
610 * the index block, go up
611 */
612 if ((i == depth) ||
613 path[i].p_idx > EXT_LAST_INDEX(path[i].p_hdr)) {
614 brelse(path[i].p_bh);
615 path[i].p_bh = NULL;
616 i--;
617 continue;
618 }
Olivier Deprez157378f2022-04-04 15:47:50 +0200619 bh = read_extent_tree_block(inode, path[i].p_idx++,
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000620 depth - i - 1,
621 EXT4_EX_FORCE_CACHE);
622 if (IS_ERR(bh)) {
623 ret = PTR_ERR(bh);
624 break;
625 }
626 i++;
627 path[i].p_bh = bh;
628 path[i].p_hdr = ext_block_hdr(bh);
629 path[i].p_idx = EXT_FIRST_INDEX(path[i].p_hdr);
630 }
631 ext4_set_inode_state(inode, EXT4_STATE_EXT_PRECACHED);
632out:
633 up_read(&ei->i_data_sem);
634 ext4_ext_drop_refs(path);
635 kfree(path);
636 return ret;
637}
638
639#ifdef EXT_DEBUG
640static void ext4_ext_show_path(struct inode *inode, struct ext4_ext_path *path)
641{
642 int k, l = path->p_depth;
643
Olivier Deprez157378f2022-04-04 15:47:50 +0200644 ext_debug(inode, "path:");
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000645 for (k = 0; k <= l; k++, path++) {
646 if (path->p_idx) {
Olivier Deprez157378f2022-04-04 15:47:50 +0200647 ext_debug(inode, " %d->%llu",
648 le32_to_cpu(path->p_idx->ei_block),
649 ext4_idx_pblock(path->p_idx));
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000650 } else if (path->p_ext) {
Olivier Deprez157378f2022-04-04 15:47:50 +0200651 ext_debug(inode, " %d:[%d]%d:%llu ",
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000652 le32_to_cpu(path->p_ext->ee_block),
653 ext4_ext_is_unwritten(path->p_ext),
654 ext4_ext_get_actual_len(path->p_ext),
655 ext4_ext_pblock(path->p_ext));
656 } else
Olivier Deprez157378f2022-04-04 15:47:50 +0200657 ext_debug(inode, " []");
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000658 }
Olivier Deprez157378f2022-04-04 15:47:50 +0200659 ext_debug(inode, "\n");
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000660}
661
662static void ext4_ext_show_leaf(struct inode *inode, struct ext4_ext_path *path)
663{
664 int depth = ext_depth(inode);
665 struct ext4_extent_header *eh;
666 struct ext4_extent *ex;
667 int i;
668
669 if (!path)
670 return;
671
672 eh = path[depth].p_hdr;
673 ex = EXT_FIRST_EXTENT(eh);
674
Olivier Deprez157378f2022-04-04 15:47:50 +0200675 ext_debug(inode, "Displaying leaf extents\n");
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000676
677 for (i = 0; i < le16_to_cpu(eh->eh_entries); i++, ex++) {
Olivier Deprez157378f2022-04-04 15:47:50 +0200678 ext_debug(inode, "%d:[%d]%d:%llu ", le32_to_cpu(ex->ee_block),
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000679 ext4_ext_is_unwritten(ex),
680 ext4_ext_get_actual_len(ex), ext4_ext_pblock(ex));
681 }
Olivier Deprez157378f2022-04-04 15:47:50 +0200682 ext_debug(inode, "\n");
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000683}
684
685static void ext4_ext_show_move(struct inode *inode, struct ext4_ext_path *path,
686 ext4_fsblk_t newblock, int level)
687{
688 int depth = ext_depth(inode);
689 struct ext4_extent *ex;
690
691 if (depth != level) {
692 struct ext4_extent_idx *idx;
693 idx = path[level].p_idx;
694 while (idx <= EXT_MAX_INDEX(path[level].p_hdr)) {
Olivier Deprez157378f2022-04-04 15:47:50 +0200695 ext_debug(inode, "%d: move %d:%llu in new index %llu\n",
696 level, le32_to_cpu(idx->ei_block),
697 ext4_idx_pblock(idx), newblock);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000698 idx++;
699 }
700
701 return;
702 }
703
704 ex = path[depth].p_ext;
705 while (ex <= EXT_MAX_EXTENT(path[depth].p_hdr)) {
Olivier Deprez157378f2022-04-04 15:47:50 +0200706 ext_debug(inode, "move %d:%llu:[%d]%d in new leaf %llu\n",
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000707 le32_to_cpu(ex->ee_block),
708 ext4_ext_pblock(ex),
709 ext4_ext_is_unwritten(ex),
710 ext4_ext_get_actual_len(ex),
711 newblock);
712 ex++;
713 }
714}
715
716#else
717#define ext4_ext_show_path(inode, path)
718#define ext4_ext_show_leaf(inode, path)
719#define ext4_ext_show_move(inode, path, newblock, level)
720#endif
721
722void ext4_ext_drop_refs(struct ext4_ext_path *path)
723{
724 int depth, i;
725
726 if (!path)
727 return;
728 depth = path->p_depth;
Olivier Deprez157378f2022-04-04 15:47:50 +0200729 for (i = 0; i <= depth; i++, path++) {
730 brelse(path->p_bh);
731 path->p_bh = NULL;
732 }
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000733}
734
735/*
736 * ext4_ext_binsearch_idx:
737 * binary search for the closest index of the given block
738 * the header must be checked before calling this
739 */
740static void
741ext4_ext_binsearch_idx(struct inode *inode,
742 struct ext4_ext_path *path, ext4_lblk_t block)
743{
744 struct ext4_extent_header *eh = path->p_hdr;
745 struct ext4_extent_idx *r, *l, *m;
746
747
Olivier Deprez157378f2022-04-04 15:47:50 +0200748 ext_debug(inode, "binsearch for %u(idx): ", block);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000749
750 l = EXT_FIRST_INDEX(eh) + 1;
751 r = EXT_LAST_INDEX(eh);
752 while (l <= r) {
753 m = l + (r - l) / 2;
754 if (block < le32_to_cpu(m->ei_block))
755 r = m - 1;
756 else
757 l = m + 1;
Olivier Deprez157378f2022-04-04 15:47:50 +0200758 ext_debug(inode, "%p(%u):%p(%u):%p(%u) ", l,
759 le32_to_cpu(l->ei_block), m, le32_to_cpu(m->ei_block),
760 r, le32_to_cpu(r->ei_block));
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000761 }
762
763 path->p_idx = l - 1;
Olivier Deprez157378f2022-04-04 15:47:50 +0200764 ext_debug(inode, " -> %u->%lld ", le32_to_cpu(path->p_idx->ei_block),
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000765 ext4_idx_pblock(path->p_idx));
766
767#ifdef CHECK_BINSEARCH
768 {
769 struct ext4_extent_idx *chix, *ix;
770 int k;
771
772 chix = ix = EXT_FIRST_INDEX(eh);
773 for (k = 0; k < le16_to_cpu(eh->eh_entries); k++, ix++) {
Olivier Deprez157378f2022-04-04 15:47:50 +0200774 if (k != 0 && le32_to_cpu(ix->ei_block) <=
775 le32_to_cpu(ix[-1].ei_block)) {
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000776 printk(KERN_DEBUG "k=%d, ix=0x%p, "
777 "first=0x%p\n", k,
778 ix, EXT_FIRST_INDEX(eh));
779 printk(KERN_DEBUG "%u <= %u\n",
780 le32_to_cpu(ix->ei_block),
781 le32_to_cpu(ix[-1].ei_block));
782 }
783 BUG_ON(k && le32_to_cpu(ix->ei_block)
784 <= le32_to_cpu(ix[-1].ei_block));
785 if (block < le32_to_cpu(ix->ei_block))
786 break;
787 chix = ix;
788 }
789 BUG_ON(chix != path->p_idx);
790 }
791#endif
792
793}
794
795/*
796 * ext4_ext_binsearch:
797 * binary search for closest extent of the given block
798 * the header must be checked before calling this
799 */
800static void
801ext4_ext_binsearch(struct inode *inode,
802 struct ext4_ext_path *path, ext4_lblk_t block)
803{
804 struct ext4_extent_header *eh = path->p_hdr;
805 struct ext4_extent *r, *l, *m;
806
807 if (eh->eh_entries == 0) {
808 /*
809 * this leaf is empty:
810 * we get such a leaf in split/add case
811 */
812 return;
813 }
814
Olivier Deprez157378f2022-04-04 15:47:50 +0200815 ext_debug(inode, "binsearch for %u: ", block);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000816
817 l = EXT_FIRST_EXTENT(eh) + 1;
818 r = EXT_LAST_EXTENT(eh);
819
820 while (l <= r) {
821 m = l + (r - l) / 2;
822 if (block < le32_to_cpu(m->ee_block))
823 r = m - 1;
824 else
825 l = m + 1;
Olivier Deprez157378f2022-04-04 15:47:50 +0200826 ext_debug(inode, "%p(%u):%p(%u):%p(%u) ", l,
827 le32_to_cpu(l->ee_block), m, le32_to_cpu(m->ee_block),
828 r, le32_to_cpu(r->ee_block));
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000829 }
830
831 path->p_ext = l - 1;
Olivier Deprez157378f2022-04-04 15:47:50 +0200832 ext_debug(inode, " -> %d:%llu:[%d]%d ",
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000833 le32_to_cpu(path->p_ext->ee_block),
834 ext4_ext_pblock(path->p_ext),
835 ext4_ext_is_unwritten(path->p_ext),
836 ext4_ext_get_actual_len(path->p_ext));
837
838#ifdef CHECK_BINSEARCH
839 {
840 struct ext4_extent *chex, *ex;
841 int k;
842
843 chex = ex = EXT_FIRST_EXTENT(eh);
844 for (k = 0; k < le16_to_cpu(eh->eh_entries); k++, ex++) {
845 BUG_ON(k && le32_to_cpu(ex->ee_block)
846 <= le32_to_cpu(ex[-1].ee_block));
847 if (block < le32_to_cpu(ex->ee_block))
848 break;
849 chex = ex;
850 }
851 BUG_ON(chex != path->p_ext);
852 }
853#endif
854
855}
856
Olivier Deprez157378f2022-04-04 15:47:50 +0200857void ext4_ext_tree_init(handle_t *handle, struct inode *inode)
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000858{
859 struct ext4_extent_header *eh;
860
861 eh = ext_inode_hdr(inode);
862 eh->eh_depth = 0;
863 eh->eh_entries = 0;
864 eh->eh_magic = EXT4_EXT_MAGIC;
865 eh->eh_max = cpu_to_le16(ext4_ext_space_root(inode, 0));
Olivier Deprez0e641232021-09-23 10:07:05 +0200866 eh->eh_generation = 0;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000867 ext4_mark_inode_dirty(handle, inode);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000868}
869
870struct ext4_ext_path *
871ext4_find_extent(struct inode *inode, ext4_lblk_t block,
872 struct ext4_ext_path **orig_path, int flags)
873{
874 struct ext4_extent_header *eh;
875 struct buffer_head *bh;
876 struct ext4_ext_path *path = orig_path ? *orig_path : NULL;
877 short int depth, i, ppos = 0;
878 int ret;
Olivier Deprez157378f2022-04-04 15:47:50 +0200879 gfp_t gfp_flags = GFP_NOFS;
880
881 if (flags & EXT4_EX_NOFAIL)
882 gfp_flags |= __GFP_NOFAIL;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000883
884 eh = ext_inode_hdr(inode);
885 depth = ext_depth(inode);
886 if (depth < 0 || depth > EXT4_MAX_EXTENT_DEPTH) {
887 EXT4_ERROR_INODE(inode, "inode has invalid extent depth: %d",
888 depth);
889 ret = -EFSCORRUPTED;
890 goto err;
891 }
892
893 if (path) {
894 ext4_ext_drop_refs(path);
895 if (depth > path[0].p_maxdepth) {
896 kfree(path);
897 *orig_path = path = NULL;
898 }
899 }
900 if (!path) {
901 /* account possible depth increase */
902 path = kcalloc(depth + 2, sizeof(struct ext4_ext_path),
Olivier Deprez157378f2022-04-04 15:47:50 +0200903 gfp_flags);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000904 if (unlikely(!path))
905 return ERR_PTR(-ENOMEM);
906 path[0].p_maxdepth = depth + 1;
907 }
908 path[0].p_hdr = eh;
909 path[0].p_bh = NULL;
910
911 i = depth;
Olivier Deprez0e641232021-09-23 10:07:05 +0200912 if (!(flags & EXT4_EX_NOCACHE) && depth == 0)
913 ext4_cache_extents(inode, eh);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000914 /* walk through the tree */
915 while (i) {
Olivier Deprez157378f2022-04-04 15:47:50 +0200916 ext_debug(inode, "depth %d: num %d, max %d\n",
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000917 ppos, le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max));
918
919 ext4_ext_binsearch_idx(inode, path + ppos, block);
920 path[ppos].p_block = ext4_idx_pblock(path[ppos].p_idx);
921 path[ppos].p_depth = i;
922 path[ppos].p_ext = NULL;
923
Olivier Deprez157378f2022-04-04 15:47:50 +0200924 bh = read_extent_tree_block(inode, path[ppos].p_idx, --i, flags);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000925 if (IS_ERR(bh)) {
926 ret = PTR_ERR(bh);
927 goto err;
928 }
929
930 eh = ext_block_hdr(bh);
931 ppos++;
932 path[ppos].p_bh = bh;
933 path[ppos].p_hdr = eh;
934 }
935
936 path[ppos].p_depth = i;
937 path[ppos].p_ext = NULL;
938 path[ppos].p_idx = NULL;
939
940 /* find extent */
941 ext4_ext_binsearch(inode, path + ppos, block);
942 /* if not an empty leaf */
943 if (path[ppos].p_ext)
944 path[ppos].p_block = ext4_ext_pblock(path[ppos].p_ext);
945
946 ext4_ext_show_path(inode, path);
947
948 return path;
949
950err:
951 ext4_ext_drop_refs(path);
952 kfree(path);
953 if (orig_path)
954 *orig_path = NULL;
955 return ERR_PTR(ret);
956}
957
958/*
959 * ext4_ext_insert_index:
960 * insert new index [@logical;@ptr] into the block at @curp;
961 * check where to insert: before @curp or after @curp
962 */
963static int ext4_ext_insert_index(handle_t *handle, struct inode *inode,
964 struct ext4_ext_path *curp,
965 int logical, ext4_fsblk_t ptr)
966{
967 struct ext4_extent_idx *ix;
968 int len, err;
969
970 err = ext4_ext_get_access(handle, inode, curp);
971 if (err)
972 return err;
973
974 if (unlikely(logical == le32_to_cpu(curp->p_idx->ei_block))) {
975 EXT4_ERROR_INODE(inode,
976 "logical %d == ei_block %d!",
977 logical, le32_to_cpu(curp->p_idx->ei_block));
978 return -EFSCORRUPTED;
979 }
980
981 if (unlikely(le16_to_cpu(curp->p_hdr->eh_entries)
982 >= le16_to_cpu(curp->p_hdr->eh_max))) {
983 EXT4_ERROR_INODE(inode,
984 "eh_entries %d >= eh_max %d!",
985 le16_to_cpu(curp->p_hdr->eh_entries),
986 le16_to_cpu(curp->p_hdr->eh_max));
987 return -EFSCORRUPTED;
988 }
989
990 if (logical > le32_to_cpu(curp->p_idx->ei_block)) {
991 /* insert after */
Olivier Deprez157378f2022-04-04 15:47:50 +0200992 ext_debug(inode, "insert new index %d after: %llu\n",
993 logical, ptr);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000994 ix = curp->p_idx + 1;
995 } else {
996 /* insert before */
Olivier Deprez157378f2022-04-04 15:47:50 +0200997 ext_debug(inode, "insert new index %d before: %llu\n",
998 logical, ptr);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000999 ix = curp->p_idx;
1000 }
1001
1002 len = EXT_LAST_INDEX(curp->p_hdr) - ix + 1;
1003 BUG_ON(len < 0);
1004 if (len > 0) {
Olivier Deprez157378f2022-04-04 15:47:50 +02001005 ext_debug(inode, "insert new index %d: "
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001006 "move %d indices from 0x%p to 0x%p\n",
1007 logical, len, ix, ix + 1);
1008 memmove(ix + 1, ix, len * sizeof(struct ext4_extent_idx));
1009 }
1010
1011 if (unlikely(ix > EXT_MAX_INDEX(curp->p_hdr))) {
1012 EXT4_ERROR_INODE(inode, "ix > EXT_MAX_INDEX!");
1013 return -EFSCORRUPTED;
1014 }
1015
1016 ix->ei_block = cpu_to_le32(logical);
1017 ext4_idx_store_pblock(ix, ptr);
1018 le16_add_cpu(&curp->p_hdr->eh_entries, 1);
1019
1020 if (unlikely(ix > EXT_LAST_INDEX(curp->p_hdr))) {
1021 EXT4_ERROR_INODE(inode, "ix > EXT_LAST_INDEX!");
1022 return -EFSCORRUPTED;
1023 }
1024
1025 err = ext4_ext_dirty(handle, inode, curp);
1026 ext4_std_error(inode->i_sb, err);
1027
1028 return err;
1029}
1030
1031/*
1032 * ext4_ext_split:
1033 * inserts new subtree into the path, using free index entry
1034 * at depth @at:
1035 * - allocates all needed blocks (new leaf and all intermediate index blocks)
1036 * - makes decision where to split
1037 * - moves remaining extents and index entries (right to the split point)
1038 * into the newly allocated blocks
1039 * - initializes subtree
1040 */
1041static int ext4_ext_split(handle_t *handle, struct inode *inode,
1042 unsigned int flags,
1043 struct ext4_ext_path *path,
1044 struct ext4_extent *newext, int at)
1045{
1046 struct buffer_head *bh = NULL;
1047 int depth = ext_depth(inode);
1048 struct ext4_extent_header *neh;
1049 struct ext4_extent_idx *fidx;
1050 int i = at, k, m, a;
1051 ext4_fsblk_t newblock, oldblock;
1052 __le32 border;
1053 ext4_fsblk_t *ablocks = NULL; /* array of allocated blocks */
Olivier Deprez157378f2022-04-04 15:47:50 +02001054 gfp_t gfp_flags = GFP_NOFS;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001055 int err = 0;
David Brazdil0f672f62019-12-10 10:32:29 +00001056 size_t ext_size = 0;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001057
Olivier Deprez157378f2022-04-04 15:47:50 +02001058 if (flags & EXT4_EX_NOFAIL)
1059 gfp_flags |= __GFP_NOFAIL;
1060
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001061 /* make decision: where to split? */
1062 /* FIXME: now decision is simplest: at current extent */
1063
1064 /* if current leaf will be split, then we should use
1065 * border from split point */
1066 if (unlikely(path[depth].p_ext > EXT_MAX_EXTENT(path[depth].p_hdr))) {
1067 EXT4_ERROR_INODE(inode, "p_ext > EXT_MAX_EXTENT!");
1068 return -EFSCORRUPTED;
1069 }
1070 if (path[depth].p_ext != EXT_MAX_EXTENT(path[depth].p_hdr)) {
1071 border = path[depth].p_ext[1].ee_block;
Olivier Deprez157378f2022-04-04 15:47:50 +02001072 ext_debug(inode, "leaf will be split."
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001073 " next leaf starts at %d\n",
1074 le32_to_cpu(border));
1075 } else {
1076 border = newext->ee_block;
Olivier Deprez157378f2022-04-04 15:47:50 +02001077 ext_debug(inode, "leaf will be added."
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001078 " next leaf starts at %d\n",
1079 le32_to_cpu(border));
1080 }
1081
1082 /*
1083 * If error occurs, then we break processing
1084 * and mark filesystem read-only. index won't
1085 * be inserted and tree will be in consistent
1086 * state. Next mount will repair buffers too.
1087 */
1088
1089 /*
1090 * Get array to track all allocated blocks.
1091 * We need this to handle errors and free blocks
1092 * upon them.
1093 */
Olivier Deprez157378f2022-04-04 15:47:50 +02001094 ablocks = kcalloc(depth, sizeof(ext4_fsblk_t), gfp_flags);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001095 if (!ablocks)
1096 return -ENOMEM;
1097
1098 /* allocate all needed blocks */
Olivier Deprez157378f2022-04-04 15:47:50 +02001099 ext_debug(inode, "allocate %d blocks for indexes/leaf\n", depth - at);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001100 for (a = 0; a < depth - at; a++) {
1101 newblock = ext4_ext_new_meta_block(handle, inode, path,
1102 newext, &err, flags);
1103 if (newblock == 0)
1104 goto cleanup;
1105 ablocks[a] = newblock;
1106 }
1107
1108 /* initialize new leaf */
1109 newblock = ablocks[--a];
1110 if (unlikely(newblock == 0)) {
1111 EXT4_ERROR_INODE(inode, "newblock == 0!");
1112 err = -EFSCORRUPTED;
1113 goto cleanup;
1114 }
1115 bh = sb_getblk_gfp(inode->i_sb, newblock, __GFP_MOVABLE | GFP_NOFS);
1116 if (unlikely(!bh)) {
1117 err = -ENOMEM;
1118 goto cleanup;
1119 }
1120 lock_buffer(bh);
1121
1122 err = ext4_journal_get_create_access(handle, bh);
1123 if (err)
1124 goto cleanup;
1125
1126 neh = ext_block_hdr(bh);
1127 neh->eh_entries = 0;
1128 neh->eh_max = cpu_to_le16(ext4_ext_space_block(inode, 0));
1129 neh->eh_magic = EXT4_EXT_MAGIC;
1130 neh->eh_depth = 0;
Olivier Deprez0e641232021-09-23 10:07:05 +02001131 neh->eh_generation = 0;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001132
1133 /* move remainder of path[depth] to the new leaf */
1134 if (unlikely(path[depth].p_hdr->eh_entries !=
1135 path[depth].p_hdr->eh_max)) {
1136 EXT4_ERROR_INODE(inode, "eh_entries %d != eh_max %d!",
1137 path[depth].p_hdr->eh_entries,
1138 path[depth].p_hdr->eh_max);
1139 err = -EFSCORRUPTED;
1140 goto cleanup;
1141 }
1142 /* start copy from next extent */
1143 m = EXT_MAX_EXTENT(path[depth].p_hdr) - path[depth].p_ext++;
1144 ext4_ext_show_move(inode, path, newblock, depth);
1145 if (m) {
1146 struct ext4_extent *ex;
1147 ex = EXT_FIRST_EXTENT(neh);
1148 memmove(ex, path[depth].p_ext, sizeof(struct ext4_extent) * m);
1149 le16_add_cpu(&neh->eh_entries, m);
1150 }
1151
David Brazdil0f672f62019-12-10 10:32:29 +00001152 /* zero out unused area in the extent block */
1153 ext_size = sizeof(struct ext4_extent_header) +
1154 sizeof(struct ext4_extent) * le16_to_cpu(neh->eh_entries);
1155 memset(bh->b_data + ext_size, 0, inode->i_sb->s_blocksize - ext_size);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001156 ext4_extent_block_csum_set(inode, neh);
1157 set_buffer_uptodate(bh);
1158 unlock_buffer(bh);
1159
1160 err = ext4_handle_dirty_metadata(handle, inode, bh);
1161 if (err)
1162 goto cleanup;
1163 brelse(bh);
1164 bh = NULL;
1165
1166 /* correct old leaf */
1167 if (m) {
1168 err = ext4_ext_get_access(handle, inode, path + depth);
1169 if (err)
1170 goto cleanup;
1171 le16_add_cpu(&path[depth].p_hdr->eh_entries, -m);
1172 err = ext4_ext_dirty(handle, inode, path + depth);
1173 if (err)
1174 goto cleanup;
1175
1176 }
1177
1178 /* create intermediate indexes */
1179 k = depth - at - 1;
1180 if (unlikely(k < 0)) {
1181 EXT4_ERROR_INODE(inode, "k %d < 0!", k);
1182 err = -EFSCORRUPTED;
1183 goto cleanup;
1184 }
1185 if (k)
Olivier Deprez157378f2022-04-04 15:47:50 +02001186 ext_debug(inode, "create %d intermediate indices\n", k);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001187 /* insert new index into current index block */
1188 /* current depth stored in i var */
1189 i = depth - 1;
1190 while (k--) {
1191 oldblock = newblock;
1192 newblock = ablocks[--a];
1193 bh = sb_getblk(inode->i_sb, newblock);
1194 if (unlikely(!bh)) {
1195 err = -ENOMEM;
1196 goto cleanup;
1197 }
1198 lock_buffer(bh);
1199
1200 err = ext4_journal_get_create_access(handle, bh);
1201 if (err)
1202 goto cleanup;
1203
1204 neh = ext_block_hdr(bh);
1205 neh->eh_entries = cpu_to_le16(1);
1206 neh->eh_magic = EXT4_EXT_MAGIC;
1207 neh->eh_max = cpu_to_le16(ext4_ext_space_block_idx(inode, 0));
1208 neh->eh_depth = cpu_to_le16(depth - i);
Olivier Deprez0e641232021-09-23 10:07:05 +02001209 neh->eh_generation = 0;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001210 fidx = EXT_FIRST_INDEX(neh);
1211 fidx->ei_block = border;
1212 ext4_idx_store_pblock(fidx, oldblock);
1213
Olivier Deprez157378f2022-04-04 15:47:50 +02001214 ext_debug(inode, "int.index at %d (block %llu): %u -> %llu\n",
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001215 i, newblock, le32_to_cpu(border), oldblock);
1216
1217 /* move remainder of path[i] to the new index block */
1218 if (unlikely(EXT_MAX_INDEX(path[i].p_hdr) !=
1219 EXT_LAST_INDEX(path[i].p_hdr))) {
1220 EXT4_ERROR_INODE(inode,
1221 "EXT_MAX_INDEX != EXT_LAST_INDEX ee_block %d!",
1222 le32_to_cpu(path[i].p_ext->ee_block));
1223 err = -EFSCORRUPTED;
1224 goto cleanup;
1225 }
1226 /* start copy indexes */
1227 m = EXT_MAX_INDEX(path[i].p_hdr) - path[i].p_idx++;
Olivier Deprez157378f2022-04-04 15:47:50 +02001228 ext_debug(inode, "cur 0x%p, last 0x%p\n", path[i].p_idx,
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001229 EXT_MAX_INDEX(path[i].p_hdr));
1230 ext4_ext_show_move(inode, path, newblock, i);
1231 if (m) {
1232 memmove(++fidx, path[i].p_idx,
1233 sizeof(struct ext4_extent_idx) * m);
1234 le16_add_cpu(&neh->eh_entries, m);
1235 }
David Brazdil0f672f62019-12-10 10:32:29 +00001236 /* zero out unused area in the extent block */
1237 ext_size = sizeof(struct ext4_extent_header) +
1238 (sizeof(struct ext4_extent) * le16_to_cpu(neh->eh_entries));
1239 memset(bh->b_data + ext_size, 0,
1240 inode->i_sb->s_blocksize - ext_size);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001241 ext4_extent_block_csum_set(inode, neh);
1242 set_buffer_uptodate(bh);
1243 unlock_buffer(bh);
1244
1245 err = ext4_handle_dirty_metadata(handle, inode, bh);
1246 if (err)
1247 goto cleanup;
1248 brelse(bh);
1249 bh = NULL;
1250
1251 /* correct old index */
1252 if (m) {
1253 err = ext4_ext_get_access(handle, inode, path + i);
1254 if (err)
1255 goto cleanup;
1256 le16_add_cpu(&path[i].p_hdr->eh_entries, -m);
1257 err = ext4_ext_dirty(handle, inode, path + i);
1258 if (err)
1259 goto cleanup;
1260 }
1261
1262 i--;
1263 }
1264
1265 /* insert new index */
1266 err = ext4_ext_insert_index(handle, inode, path + at,
1267 le32_to_cpu(border), newblock);
1268
1269cleanup:
1270 if (bh) {
1271 if (buffer_locked(bh))
1272 unlock_buffer(bh);
1273 brelse(bh);
1274 }
1275
1276 if (err) {
1277 /* free all allocated blocks in error case */
1278 for (i = 0; i < depth; i++) {
1279 if (!ablocks[i])
1280 continue;
1281 ext4_free_blocks(handle, inode, NULL, ablocks[i], 1,
1282 EXT4_FREE_BLOCKS_METADATA);
1283 }
1284 }
1285 kfree(ablocks);
1286
1287 return err;
1288}
1289
1290/*
1291 * ext4_ext_grow_indepth:
1292 * implements tree growing procedure:
1293 * - allocates new block
1294 * - moves top-level data (index block or leaf) into the new block
1295 * - initializes new top-level, creating index that points to the
1296 * just created block
1297 */
1298static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode,
1299 unsigned int flags)
1300{
1301 struct ext4_extent_header *neh;
1302 struct buffer_head *bh;
1303 ext4_fsblk_t newblock, goal = 0;
1304 struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es;
1305 int err = 0;
David Brazdil0f672f62019-12-10 10:32:29 +00001306 size_t ext_size = 0;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001307
1308 /* Try to prepend new index to old one */
1309 if (ext_depth(inode))
1310 goal = ext4_idx_pblock(EXT_FIRST_INDEX(ext_inode_hdr(inode)));
1311 if (goal > le32_to_cpu(es->s_first_data_block)) {
1312 flags |= EXT4_MB_HINT_TRY_GOAL;
1313 goal--;
1314 } else
1315 goal = ext4_inode_to_goal_block(inode);
1316 newblock = ext4_new_meta_blocks(handle, inode, goal, flags,
1317 NULL, &err);
1318 if (newblock == 0)
1319 return err;
1320
1321 bh = sb_getblk_gfp(inode->i_sb, newblock, __GFP_MOVABLE | GFP_NOFS);
1322 if (unlikely(!bh))
1323 return -ENOMEM;
1324 lock_buffer(bh);
1325
1326 err = ext4_journal_get_create_access(handle, bh);
1327 if (err) {
1328 unlock_buffer(bh);
1329 goto out;
1330 }
1331
David Brazdil0f672f62019-12-10 10:32:29 +00001332 ext_size = sizeof(EXT4_I(inode)->i_data);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001333 /* move top-level index/leaf into new block */
David Brazdil0f672f62019-12-10 10:32:29 +00001334 memmove(bh->b_data, EXT4_I(inode)->i_data, ext_size);
1335 /* zero out unused area in the extent block */
1336 memset(bh->b_data + ext_size, 0, inode->i_sb->s_blocksize - ext_size);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001337
1338 /* set size of new block */
1339 neh = ext_block_hdr(bh);
1340 /* old root could have indexes or leaves
1341 * so calculate e_max right way */
1342 if (ext_depth(inode))
1343 neh->eh_max = cpu_to_le16(ext4_ext_space_block_idx(inode, 0));
1344 else
1345 neh->eh_max = cpu_to_le16(ext4_ext_space_block(inode, 0));
1346 neh->eh_magic = EXT4_EXT_MAGIC;
1347 ext4_extent_block_csum_set(inode, neh);
1348 set_buffer_uptodate(bh);
1349 unlock_buffer(bh);
1350
1351 err = ext4_handle_dirty_metadata(handle, inode, bh);
1352 if (err)
1353 goto out;
1354
1355 /* Update top-level index: num,max,pointer */
1356 neh = ext_inode_hdr(inode);
1357 neh->eh_entries = cpu_to_le16(1);
1358 ext4_idx_store_pblock(EXT_FIRST_INDEX(neh), newblock);
1359 if (neh->eh_depth == 0) {
1360 /* Root extent block becomes index block */
1361 neh->eh_max = cpu_to_le16(ext4_ext_space_root_idx(inode, 0));
1362 EXT_FIRST_INDEX(neh)->ei_block =
1363 EXT_FIRST_EXTENT(neh)->ee_block;
1364 }
Olivier Deprez157378f2022-04-04 15:47:50 +02001365 ext_debug(inode, "new root: num %d(%d), lblock %d, ptr %llu\n",
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001366 le16_to_cpu(neh->eh_entries), le16_to_cpu(neh->eh_max),
1367 le32_to_cpu(EXT_FIRST_INDEX(neh)->ei_block),
1368 ext4_idx_pblock(EXT_FIRST_INDEX(neh)));
1369
1370 le16_add_cpu(&neh->eh_depth, 1);
Olivier Deprez157378f2022-04-04 15:47:50 +02001371 err = ext4_mark_inode_dirty(handle, inode);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001372out:
1373 brelse(bh);
1374
1375 return err;
1376}
1377
1378/*
1379 * ext4_ext_create_new_leaf:
1380 * finds empty index and adds new leaf.
1381 * if no free index is found, then it requests in-depth growing.
1382 */
1383static int ext4_ext_create_new_leaf(handle_t *handle, struct inode *inode,
1384 unsigned int mb_flags,
1385 unsigned int gb_flags,
1386 struct ext4_ext_path **ppath,
1387 struct ext4_extent *newext)
1388{
1389 struct ext4_ext_path *path = *ppath;
1390 struct ext4_ext_path *curp;
1391 int depth, i, err = 0;
1392
1393repeat:
1394 i = depth = ext_depth(inode);
1395
1396 /* walk up to the tree and look for free index entry */
1397 curp = path + depth;
1398 while (i > 0 && !EXT_HAS_FREE_INDEX(curp)) {
1399 i--;
1400 curp--;
1401 }
1402
1403 /* we use already allocated block for index block,
1404 * so subsequent data blocks should be contiguous */
1405 if (EXT_HAS_FREE_INDEX(curp)) {
1406 /* if we found index with free entry, then use that
1407 * entry: create all needed subtree and add new leaf */
1408 err = ext4_ext_split(handle, inode, mb_flags, path, newext, i);
1409 if (err)
1410 goto out;
1411
1412 /* refill path */
1413 path = ext4_find_extent(inode,
1414 (ext4_lblk_t)le32_to_cpu(newext->ee_block),
1415 ppath, gb_flags);
1416 if (IS_ERR(path))
1417 err = PTR_ERR(path);
1418 } else {
1419 /* tree is full, time to grow in depth */
1420 err = ext4_ext_grow_indepth(handle, inode, mb_flags);
1421 if (err)
1422 goto out;
1423
1424 /* refill path */
1425 path = ext4_find_extent(inode,
1426 (ext4_lblk_t)le32_to_cpu(newext->ee_block),
1427 ppath, gb_flags);
1428 if (IS_ERR(path)) {
1429 err = PTR_ERR(path);
1430 goto out;
1431 }
1432
1433 /*
1434 * only first (depth 0 -> 1) produces free space;
1435 * in all other cases we have to split the grown tree
1436 */
1437 depth = ext_depth(inode);
1438 if (path[depth].p_hdr->eh_entries == path[depth].p_hdr->eh_max) {
1439 /* now we need to split */
1440 goto repeat;
1441 }
1442 }
1443
1444out:
1445 return err;
1446}
1447
1448/*
1449 * search the closest allocated block to the left for *logical
1450 * and returns it at @logical + it's physical address at @phys
1451 * if *logical is the smallest allocated block, the function
1452 * returns 0 at @phys
1453 * return value contains 0 (success) or error code
1454 */
1455static int ext4_ext_search_left(struct inode *inode,
1456 struct ext4_ext_path *path,
1457 ext4_lblk_t *logical, ext4_fsblk_t *phys)
1458{
1459 struct ext4_extent_idx *ix;
1460 struct ext4_extent *ex;
1461 int depth, ee_len;
1462
1463 if (unlikely(path == NULL)) {
1464 EXT4_ERROR_INODE(inode, "path == NULL *logical %d!", *logical);
1465 return -EFSCORRUPTED;
1466 }
1467 depth = path->p_depth;
1468 *phys = 0;
1469
1470 if (depth == 0 && path->p_ext == NULL)
1471 return 0;
1472
1473 /* usually extent in the path covers blocks smaller
1474 * then *logical, but it can be that extent is the
1475 * first one in the file */
1476
1477 ex = path[depth].p_ext;
1478 ee_len = ext4_ext_get_actual_len(ex);
1479 if (*logical < le32_to_cpu(ex->ee_block)) {
1480 if (unlikely(EXT_FIRST_EXTENT(path[depth].p_hdr) != ex)) {
1481 EXT4_ERROR_INODE(inode,
1482 "EXT_FIRST_EXTENT != ex *logical %d ee_block %d!",
1483 *logical, le32_to_cpu(ex->ee_block));
1484 return -EFSCORRUPTED;
1485 }
1486 while (--depth >= 0) {
1487 ix = path[depth].p_idx;
1488 if (unlikely(ix != EXT_FIRST_INDEX(path[depth].p_hdr))) {
1489 EXT4_ERROR_INODE(inode,
1490 "ix (%d) != EXT_FIRST_INDEX (%d) (depth %d)!",
1491 ix != NULL ? le32_to_cpu(ix->ei_block) : 0,
1492 EXT_FIRST_INDEX(path[depth].p_hdr) != NULL ?
1493 le32_to_cpu(EXT_FIRST_INDEX(path[depth].p_hdr)->ei_block) : 0,
1494 depth);
1495 return -EFSCORRUPTED;
1496 }
1497 }
1498 return 0;
1499 }
1500
1501 if (unlikely(*logical < (le32_to_cpu(ex->ee_block) + ee_len))) {
1502 EXT4_ERROR_INODE(inode,
1503 "logical %d < ee_block %d + ee_len %d!",
1504 *logical, le32_to_cpu(ex->ee_block), ee_len);
1505 return -EFSCORRUPTED;
1506 }
1507
1508 *logical = le32_to_cpu(ex->ee_block) + ee_len - 1;
1509 *phys = ext4_ext_pblock(ex) + ee_len - 1;
1510 return 0;
1511}
1512
1513/*
Olivier Deprez157378f2022-04-04 15:47:50 +02001514 * Search the closest allocated block to the right for *logical
1515 * and returns it at @logical + it's physical address at @phys.
1516 * If not exists, return 0 and @phys is set to 0. We will return
1517 * 1 which means we found an allocated block and ret_ex is valid.
1518 * Or return a (< 0) error code.
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001519 */
1520static int ext4_ext_search_right(struct inode *inode,
1521 struct ext4_ext_path *path,
1522 ext4_lblk_t *logical, ext4_fsblk_t *phys,
Olivier Deprez157378f2022-04-04 15:47:50 +02001523 struct ext4_extent *ret_ex)
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001524{
1525 struct buffer_head *bh = NULL;
1526 struct ext4_extent_header *eh;
1527 struct ext4_extent_idx *ix;
1528 struct ext4_extent *ex;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001529 int depth; /* Note, NOT eh_depth; depth from top of tree */
1530 int ee_len;
1531
1532 if (unlikely(path == NULL)) {
1533 EXT4_ERROR_INODE(inode, "path == NULL *logical %d!", *logical);
1534 return -EFSCORRUPTED;
1535 }
1536 depth = path->p_depth;
1537 *phys = 0;
1538
1539 if (depth == 0 && path->p_ext == NULL)
1540 return 0;
1541
1542 /* usually extent in the path covers blocks smaller
1543 * then *logical, but it can be that extent is the
1544 * first one in the file */
1545
1546 ex = path[depth].p_ext;
1547 ee_len = ext4_ext_get_actual_len(ex);
1548 if (*logical < le32_to_cpu(ex->ee_block)) {
1549 if (unlikely(EXT_FIRST_EXTENT(path[depth].p_hdr) != ex)) {
1550 EXT4_ERROR_INODE(inode,
1551 "first_extent(path[%d].p_hdr) != ex",
1552 depth);
1553 return -EFSCORRUPTED;
1554 }
1555 while (--depth >= 0) {
1556 ix = path[depth].p_idx;
1557 if (unlikely(ix != EXT_FIRST_INDEX(path[depth].p_hdr))) {
1558 EXT4_ERROR_INODE(inode,
1559 "ix != EXT_FIRST_INDEX *logical %d!",
1560 *logical);
1561 return -EFSCORRUPTED;
1562 }
1563 }
1564 goto found_extent;
1565 }
1566
1567 if (unlikely(*logical < (le32_to_cpu(ex->ee_block) + ee_len))) {
1568 EXT4_ERROR_INODE(inode,
1569 "logical %d < ee_block %d + ee_len %d!",
1570 *logical, le32_to_cpu(ex->ee_block), ee_len);
1571 return -EFSCORRUPTED;
1572 }
1573
1574 if (ex != EXT_LAST_EXTENT(path[depth].p_hdr)) {
1575 /* next allocated block in this leaf */
1576 ex++;
1577 goto found_extent;
1578 }
1579
1580 /* go up and search for index to the right */
1581 while (--depth >= 0) {
1582 ix = path[depth].p_idx;
1583 if (ix != EXT_LAST_INDEX(path[depth].p_hdr))
1584 goto got_index;
1585 }
1586
1587 /* we've gone up to the root and found no index to the right */
1588 return 0;
1589
1590got_index:
1591 /* we've found index to the right, let's
1592 * follow it and find the closest allocated
1593 * block to the right */
1594 ix++;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001595 while (++depth < path->p_depth) {
1596 /* subtract from p_depth to get proper eh_depth */
Olivier Deprez157378f2022-04-04 15:47:50 +02001597 bh = read_extent_tree_block(inode, ix, path->p_depth - depth, 0);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001598 if (IS_ERR(bh))
1599 return PTR_ERR(bh);
1600 eh = ext_block_hdr(bh);
1601 ix = EXT_FIRST_INDEX(eh);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001602 put_bh(bh);
1603 }
1604
Olivier Deprez157378f2022-04-04 15:47:50 +02001605 bh = read_extent_tree_block(inode, ix, path->p_depth - depth, 0);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001606 if (IS_ERR(bh))
1607 return PTR_ERR(bh);
1608 eh = ext_block_hdr(bh);
1609 ex = EXT_FIRST_EXTENT(eh);
1610found_extent:
1611 *logical = le32_to_cpu(ex->ee_block);
1612 *phys = ext4_ext_pblock(ex);
Olivier Deprez157378f2022-04-04 15:47:50 +02001613 if (ret_ex)
1614 *ret_ex = *ex;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001615 if (bh)
1616 put_bh(bh);
Olivier Deprez157378f2022-04-04 15:47:50 +02001617 return 1;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001618}
1619
1620/*
1621 * ext4_ext_next_allocated_block:
1622 * returns allocated block in subsequent extent or EXT_MAX_BLOCKS.
1623 * NOTE: it considers block number from index entry as
1624 * allocated block. Thus, index entries have to be consistent
1625 * with leaves.
1626 */
1627ext4_lblk_t
1628ext4_ext_next_allocated_block(struct ext4_ext_path *path)
1629{
1630 int depth;
1631
1632 BUG_ON(path == NULL);
1633 depth = path->p_depth;
1634
1635 if (depth == 0 && path->p_ext == NULL)
1636 return EXT_MAX_BLOCKS;
1637
1638 while (depth >= 0) {
Olivier Deprez157378f2022-04-04 15:47:50 +02001639 struct ext4_ext_path *p = &path[depth];
1640
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001641 if (depth == path->p_depth) {
1642 /* leaf */
Olivier Deprez157378f2022-04-04 15:47:50 +02001643 if (p->p_ext && p->p_ext != EXT_LAST_EXTENT(p->p_hdr))
1644 return le32_to_cpu(p->p_ext[1].ee_block);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001645 } else {
1646 /* index */
Olivier Deprez157378f2022-04-04 15:47:50 +02001647 if (p->p_idx != EXT_LAST_INDEX(p->p_hdr))
1648 return le32_to_cpu(p->p_idx[1].ei_block);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001649 }
1650 depth--;
1651 }
1652
1653 return EXT_MAX_BLOCKS;
1654}
1655
1656/*
1657 * ext4_ext_next_leaf_block:
1658 * returns first allocated block from next leaf or EXT_MAX_BLOCKS
1659 */
1660static ext4_lblk_t ext4_ext_next_leaf_block(struct ext4_ext_path *path)
1661{
1662 int depth;
1663
1664 BUG_ON(path == NULL);
1665 depth = path->p_depth;
1666
1667 /* zero-tree has no leaf blocks at all */
1668 if (depth == 0)
1669 return EXT_MAX_BLOCKS;
1670
1671 /* go to index block */
1672 depth--;
1673
1674 while (depth >= 0) {
1675 if (path[depth].p_idx !=
1676 EXT_LAST_INDEX(path[depth].p_hdr))
1677 return (ext4_lblk_t)
1678 le32_to_cpu(path[depth].p_idx[1].ei_block);
1679 depth--;
1680 }
1681
1682 return EXT_MAX_BLOCKS;
1683}
1684
1685/*
1686 * ext4_ext_correct_indexes:
1687 * if leaf gets modified and modified extent is first in the leaf,
1688 * then we have to correct all indexes above.
1689 * TODO: do we need to correct tree in all cases?
1690 */
1691static int ext4_ext_correct_indexes(handle_t *handle, struct inode *inode,
1692 struct ext4_ext_path *path)
1693{
1694 struct ext4_extent_header *eh;
1695 int depth = ext_depth(inode);
1696 struct ext4_extent *ex;
1697 __le32 border;
1698 int k, err = 0;
1699
1700 eh = path[depth].p_hdr;
1701 ex = path[depth].p_ext;
1702
1703 if (unlikely(ex == NULL || eh == NULL)) {
1704 EXT4_ERROR_INODE(inode,
1705 "ex %p == NULL or eh %p == NULL", ex, eh);
1706 return -EFSCORRUPTED;
1707 }
1708
1709 if (depth == 0) {
1710 /* there is no tree at all */
1711 return 0;
1712 }
1713
1714 if (ex != EXT_FIRST_EXTENT(eh)) {
1715 /* we correct tree if first leaf got modified only */
1716 return 0;
1717 }
1718
1719 /*
1720 * TODO: we need correction if border is smaller than current one
1721 */
1722 k = depth - 1;
1723 border = path[depth].p_ext->ee_block;
1724 err = ext4_ext_get_access(handle, inode, path + k);
1725 if (err)
1726 return err;
1727 path[k].p_idx->ei_block = border;
1728 err = ext4_ext_dirty(handle, inode, path + k);
1729 if (err)
1730 return err;
1731
1732 while (k--) {
1733 /* change all left-side indexes */
1734 if (path[k+1].p_idx != EXT_FIRST_INDEX(path[k+1].p_hdr))
1735 break;
1736 err = ext4_ext_get_access(handle, inode, path + k);
1737 if (err)
1738 break;
1739 path[k].p_idx->ei_block = border;
1740 err = ext4_ext_dirty(handle, inode, path + k);
1741 if (err)
1742 break;
1743 }
1744
1745 return err;
1746}
1747
Olivier Deprez157378f2022-04-04 15:47:50 +02001748static int ext4_can_extents_be_merged(struct inode *inode,
1749 struct ext4_extent *ex1,
1750 struct ext4_extent *ex2)
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001751{
1752 unsigned short ext1_ee_len, ext2_ee_len;
1753
1754 if (ext4_ext_is_unwritten(ex1) != ext4_ext_is_unwritten(ex2))
1755 return 0;
1756
1757 ext1_ee_len = ext4_ext_get_actual_len(ex1);
1758 ext2_ee_len = ext4_ext_get_actual_len(ex2);
1759
1760 if (le32_to_cpu(ex1->ee_block) + ext1_ee_len !=
1761 le32_to_cpu(ex2->ee_block))
1762 return 0;
1763
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001764 if (ext1_ee_len + ext2_ee_len > EXT_INIT_MAX_LEN)
1765 return 0;
Olivier Deprez157378f2022-04-04 15:47:50 +02001766
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001767 if (ext4_ext_is_unwritten(ex1) &&
Olivier Deprez157378f2022-04-04 15:47:50 +02001768 ext1_ee_len + ext2_ee_len > EXT_UNWRITTEN_MAX_LEN)
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001769 return 0;
1770#ifdef AGGRESSIVE_TEST
1771 if (ext1_ee_len >= 4)
1772 return 0;
1773#endif
1774
1775 if (ext4_ext_pblock(ex1) + ext1_ee_len == ext4_ext_pblock(ex2))
1776 return 1;
1777 return 0;
1778}
1779
1780/*
1781 * This function tries to merge the "ex" extent to the next extent in the tree.
1782 * It always tries to merge towards right. If you want to merge towards
1783 * left, pass "ex - 1" as argument instead of "ex".
1784 * Returns 0 if the extents (ex and ex+1) were _not_ merged and returns
1785 * 1 if they got merged.
1786 */
1787static int ext4_ext_try_to_merge_right(struct inode *inode,
1788 struct ext4_ext_path *path,
1789 struct ext4_extent *ex)
1790{
1791 struct ext4_extent_header *eh;
1792 unsigned int depth, len;
1793 int merge_done = 0, unwritten;
1794
1795 depth = ext_depth(inode);
1796 BUG_ON(path[depth].p_hdr == NULL);
1797 eh = path[depth].p_hdr;
1798
1799 while (ex < EXT_LAST_EXTENT(eh)) {
1800 if (!ext4_can_extents_be_merged(inode, ex, ex + 1))
1801 break;
1802 /* merge with next extent! */
1803 unwritten = ext4_ext_is_unwritten(ex);
1804 ex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex)
1805 + ext4_ext_get_actual_len(ex + 1));
1806 if (unwritten)
1807 ext4_ext_mark_unwritten(ex);
1808
1809 if (ex + 1 < EXT_LAST_EXTENT(eh)) {
1810 len = (EXT_LAST_EXTENT(eh) - ex - 1)
1811 * sizeof(struct ext4_extent);
1812 memmove(ex + 1, ex + 2, len);
1813 }
1814 le16_add_cpu(&eh->eh_entries, -1);
1815 merge_done = 1;
1816 WARN_ON(eh->eh_entries == 0);
1817 if (!eh->eh_entries)
1818 EXT4_ERROR_INODE(inode, "eh->eh_entries = 0!");
1819 }
1820
1821 return merge_done;
1822}
1823
1824/*
1825 * This function does a very simple check to see if we can collapse
1826 * an extent tree with a single extent tree leaf block into the inode.
1827 */
1828static void ext4_ext_try_to_merge_up(handle_t *handle,
1829 struct inode *inode,
1830 struct ext4_ext_path *path)
1831{
1832 size_t s;
1833 unsigned max_root = ext4_ext_space_root(inode, 0);
1834 ext4_fsblk_t blk;
1835
1836 if ((path[0].p_depth != 1) ||
1837 (le16_to_cpu(path[0].p_hdr->eh_entries) != 1) ||
1838 (le16_to_cpu(path[1].p_hdr->eh_entries) > max_root))
1839 return;
1840
1841 /*
1842 * We need to modify the block allocation bitmap and the block
1843 * group descriptor to release the extent tree block. If we
1844 * can't get the journal credits, give up.
1845 */
Olivier Deprez157378f2022-04-04 15:47:50 +02001846 if (ext4_journal_extend(handle, 2,
1847 ext4_free_metadata_revoke_credits(inode->i_sb, 1)))
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001848 return;
1849
1850 /*
1851 * Copy the extent data up to the inode
1852 */
1853 blk = ext4_idx_pblock(path[0].p_idx);
1854 s = le16_to_cpu(path[1].p_hdr->eh_entries) *
1855 sizeof(struct ext4_extent_idx);
1856 s += sizeof(struct ext4_extent_header);
1857
1858 path[1].p_maxdepth = path[0].p_maxdepth;
1859 memcpy(path[0].p_hdr, path[1].p_hdr, s);
1860 path[0].p_depth = 0;
1861 path[0].p_ext = EXT_FIRST_EXTENT(path[0].p_hdr) +
1862 (path[1].p_ext - EXT_FIRST_EXTENT(path[1].p_hdr));
1863 path[0].p_hdr->eh_max = cpu_to_le16(max_root);
1864
1865 brelse(path[1].p_bh);
1866 ext4_free_blocks(handle, inode, NULL, blk, 1,
1867 EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET);
1868}
1869
1870/*
Olivier Deprez157378f2022-04-04 15:47:50 +02001871 * This function tries to merge the @ex extent to neighbours in the tree, then
1872 * tries to collapse the extent tree into the inode.
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001873 */
1874static void ext4_ext_try_to_merge(handle_t *handle,
1875 struct inode *inode,
1876 struct ext4_ext_path *path,
Olivier Deprez157378f2022-04-04 15:47:50 +02001877 struct ext4_extent *ex)
1878{
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001879 struct ext4_extent_header *eh;
1880 unsigned int depth;
1881 int merge_done = 0;
1882
1883 depth = ext_depth(inode);
1884 BUG_ON(path[depth].p_hdr == NULL);
1885 eh = path[depth].p_hdr;
1886
1887 if (ex > EXT_FIRST_EXTENT(eh))
1888 merge_done = ext4_ext_try_to_merge_right(inode, path, ex - 1);
1889
1890 if (!merge_done)
1891 (void) ext4_ext_try_to_merge_right(inode, path, ex);
1892
1893 ext4_ext_try_to_merge_up(handle, inode, path);
1894}
1895
1896/*
1897 * check if a portion of the "newext" extent overlaps with an
1898 * existing extent.
1899 *
1900 * If there is an overlap discovered, it updates the length of the newext
1901 * such that there will be no overlap, and then returns 1.
1902 * If there is no overlap found, it returns 0.
1903 */
1904static unsigned int ext4_ext_check_overlap(struct ext4_sb_info *sbi,
1905 struct inode *inode,
1906 struct ext4_extent *newext,
1907 struct ext4_ext_path *path)
1908{
1909 ext4_lblk_t b1, b2;
1910 unsigned int depth, len1;
1911 unsigned int ret = 0;
1912
1913 b1 = le32_to_cpu(newext->ee_block);
1914 len1 = ext4_ext_get_actual_len(newext);
1915 depth = ext_depth(inode);
1916 if (!path[depth].p_ext)
1917 goto out;
1918 b2 = EXT4_LBLK_CMASK(sbi, le32_to_cpu(path[depth].p_ext->ee_block));
1919
1920 /*
1921 * get the next allocated block if the extent in the path
1922 * is before the requested block(s)
1923 */
1924 if (b2 < b1) {
1925 b2 = ext4_ext_next_allocated_block(path);
1926 if (b2 == EXT_MAX_BLOCKS)
1927 goto out;
1928 b2 = EXT4_LBLK_CMASK(sbi, b2);
1929 }
1930
1931 /* check for wrap through zero on extent logical start block*/
1932 if (b1 + len1 < b1) {
1933 len1 = EXT_MAX_BLOCKS - b1;
1934 newext->ee_len = cpu_to_le16(len1);
1935 ret = 1;
1936 }
1937
1938 /* check for overlap */
1939 if (b1 + len1 > b2) {
1940 newext->ee_len = cpu_to_le16(b2 - b1);
1941 ret = 1;
1942 }
1943out:
1944 return ret;
1945}
1946
1947/*
1948 * ext4_ext_insert_extent:
Olivier Deprez157378f2022-04-04 15:47:50 +02001949 * tries to merge requested extent into the existing extent or
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001950 * inserts requested extent as new one into the tree,
1951 * creating new leaf in the no-space case.
1952 */
1953int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
1954 struct ext4_ext_path **ppath,
1955 struct ext4_extent *newext, int gb_flags)
1956{
1957 struct ext4_ext_path *path = *ppath;
1958 struct ext4_extent_header *eh;
1959 struct ext4_extent *ex, *fex;
1960 struct ext4_extent *nearex; /* nearest extent */
1961 struct ext4_ext_path *npath = NULL;
1962 int depth, len, err;
1963 ext4_lblk_t next;
1964 int mb_flags = 0, unwritten;
1965
1966 if (gb_flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
1967 mb_flags |= EXT4_MB_DELALLOC_RESERVED;
1968 if (unlikely(ext4_ext_get_actual_len(newext) == 0)) {
1969 EXT4_ERROR_INODE(inode, "ext4_ext_get_actual_len(newext) == 0");
1970 return -EFSCORRUPTED;
1971 }
1972 depth = ext_depth(inode);
1973 ex = path[depth].p_ext;
1974 eh = path[depth].p_hdr;
1975 if (unlikely(path[depth].p_hdr == NULL)) {
1976 EXT4_ERROR_INODE(inode, "path[%d].p_hdr == NULL", depth);
1977 return -EFSCORRUPTED;
1978 }
1979
1980 /* try to insert block into found extent and return */
1981 if (ex && !(gb_flags & EXT4_GET_BLOCKS_PRE_IO)) {
1982
1983 /*
1984 * Try to see whether we should rather test the extent on
1985 * right from ex, or from the left of ex. This is because
1986 * ext4_find_extent() can return either extent on the
1987 * left, or on the right from the searched position. This
1988 * will make merging more effective.
1989 */
1990 if (ex < EXT_LAST_EXTENT(eh) &&
1991 (le32_to_cpu(ex->ee_block) +
1992 ext4_ext_get_actual_len(ex) <
1993 le32_to_cpu(newext->ee_block))) {
1994 ex += 1;
1995 goto prepend;
1996 } else if ((ex > EXT_FIRST_EXTENT(eh)) &&
1997 (le32_to_cpu(newext->ee_block) +
1998 ext4_ext_get_actual_len(newext) <
1999 le32_to_cpu(ex->ee_block)))
2000 ex -= 1;
2001
2002 /* Try to append newex to the ex */
2003 if (ext4_can_extents_be_merged(inode, ex, newext)) {
Olivier Deprez157378f2022-04-04 15:47:50 +02002004 ext_debug(inode, "append [%d]%d block to %u:[%d]%d"
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002005 "(from %llu)\n",
2006 ext4_ext_is_unwritten(newext),
2007 ext4_ext_get_actual_len(newext),
2008 le32_to_cpu(ex->ee_block),
2009 ext4_ext_is_unwritten(ex),
2010 ext4_ext_get_actual_len(ex),
2011 ext4_ext_pblock(ex));
2012 err = ext4_ext_get_access(handle, inode,
2013 path + depth);
2014 if (err)
2015 return err;
2016 unwritten = ext4_ext_is_unwritten(ex);
2017 ex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex)
2018 + ext4_ext_get_actual_len(newext));
2019 if (unwritten)
2020 ext4_ext_mark_unwritten(ex);
2021 eh = path[depth].p_hdr;
2022 nearex = ex;
2023 goto merge;
2024 }
2025
2026prepend:
2027 /* Try to prepend newex to the ex */
2028 if (ext4_can_extents_be_merged(inode, newext, ex)) {
Olivier Deprez157378f2022-04-04 15:47:50 +02002029 ext_debug(inode, "prepend %u[%d]%d block to %u:[%d]%d"
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002030 "(from %llu)\n",
2031 le32_to_cpu(newext->ee_block),
2032 ext4_ext_is_unwritten(newext),
2033 ext4_ext_get_actual_len(newext),
2034 le32_to_cpu(ex->ee_block),
2035 ext4_ext_is_unwritten(ex),
2036 ext4_ext_get_actual_len(ex),
2037 ext4_ext_pblock(ex));
2038 err = ext4_ext_get_access(handle, inode,
2039 path + depth);
2040 if (err)
2041 return err;
2042
2043 unwritten = ext4_ext_is_unwritten(ex);
2044 ex->ee_block = newext->ee_block;
2045 ext4_ext_store_pblock(ex, ext4_ext_pblock(newext));
2046 ex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex)
2047 + ext4_ext_get_actual_len(newext));
2048 if (unwritten)
2049 ext4_ext_mark_unwritten(ex);
2050 eh = path[depth].p_hdr;
2051 nearex = ex;
2052 goto merge;
2053 }
2054 }
2055
2056 depth = ext_depth(inode);
2057 eh = path[depth].p_hdr;
2058 if (le16_to_cpu(eh->eh_entries) < le16_to_cpu(eh->eh_max))
2059 goto has_space;
2060
2061 /* probably next leaf has space for us? */
2062 fex = EXT_LAST_EXTENT(eh);
2063 next = EXT_MAX_BLOCKS;
2064 if (le32_to_cpu(newext->ee_block) > le32_to_cpu(fex->ee_block))
2065 next = ext4_ext_next_leaf_block(path);
2066 if (next != EXT_MAX_BLOCKS) {
Olivier Deprez157378f2022-04-04 15:47:50 +02002067 ext_debug(inode, "next leaf block - %u\n", next);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002068 BUG_ON(npath != NULL);
Olivier Deprez157378f2022-04-04 15:47:50 +02002069 npath = ext4_find_extent(inode, next, NULL, gb_flags);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002070 if (IS_ERR(npath))
2071 return PTR_ERR(npath);
2072 BUG_ON(npath->p_depth != path->p_depth);
2073 eh = npath[depth].p_hdr;
2074 if (le16_to_cpu(eh->eh_entries) < le16_to_cpu(eh->eh_max)) {
Olivier Deprez157378f2022-04-04 15:47:50 +02002075 ext_debug(inode, "next leaf isn't full(%d)\n",
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002076 le16_to_cpu(eh->eh_entries));
2077 path = npath;
2078 goto has_space;
2079 }
Olivier Deprez157378f2022-04-04 15:47:50 +02002080 ext_debug(inode, "next leaf has no free space(%d,%d)\n",
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002081 le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max));
2082 }
2083
2084 /*
2085 * There is no free space in the found leaf.
2086 * We're gonna add a new leaf in the tree.
2087 */
2088 if (gb_flags & EXT4_GET_BLOCKS_METADATA_NOFAIL)
2089 mb_flags |= EXT4_MB_USE_RESERVED;
2090 err = ext4_ext_create_new_leaf(handle, inode, mb_flags, gb_flags,
2091 ppath, newext);
2092 if (err)
2093 goto cleanup;
2094 depth = ext_depth(inode);
2095 eh = path[depth].p_hdr;
2096
2097has_space:
2098 nearex = path[depth].p_ext;
2099
2100 err = ext4_ext_get_access(handle, inode, path + depth);
2101 if (err)
2102 goto cleanup;
2103
2104 if (!nearex) {
2105 /* there is no extent in this leaf, create first one */
Olivier Deprez157378f2022-04-04 15:47:50 +02002106 ext_debug(inode, "first extent in the leaf: %u:%llu:[%d]%d\n",
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002107 le32_to_cpu(newext->ee_block),
2108 ext4_ext_pblock(newext),
2109 ext4_ext_is_unwritten(newext),
2110 ext4_ext_get_actual_len(newext));
2111 nearex = EXT_FIRST_EXTENT(eh);
2112 } else {
2113 if (le32_to_cpu(newext->ee_block)
2114 > le32_to_cpu(nearex->ee_block)) {
2115 /* Insert after */
Olivier Deprez157378f2022-04-04 15:47:50 +02002116 ext_debug(inode, "insert %u:%llu:[%d]%d before: "
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002117 "nearest %p\n",
2118 le32_to_cpu(newext->ee_block),
2119 ext4_ext_pblock(newext),
2120 ext4_ext_is_unwritten(newext),
2121 ext4_ext_get_actual_len(newext),
2122 nearex);
2123 nearex++;
2124 } else {
2125 /* Insert before */
2126 BUG_ON(newext->ee_block == nearex->ee_block);
Olivier Deprez157378f2022-04-04 15:47:50 +02002127 ext_debug(inode, "insert %u:%llu:[%d]%d after: "
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002128 "nearest %p\n",
2129 le32_to_cpu(newext->ee_block),
2130 ext4_ext_pblock(newext),
2131 ext4_ext_is_unwritten(newext),
2132 ext4_ext_get_actual_len(newext),
2133 nearex);
2134 }
2135 len = EXT_LAST_EXTENT(eh) - nearex + 1;
2136 if (len > 0) {
Olivier Deprez157378f2022-04-04 15:47:50 +02002137 ext_debug(inode, "insert %u:%llu:[%d]%d: "
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002138 "move %d extents from 0x%p to 0x%p\n",
2139 le32_to_cpu(newext->ee_block),
2140 ext4_ext_pblock(newext),
2141 ext4_ext_is_unwritten(newext),
2142 ext4_ext_get_actual_len(newext),
2143 len, nearex, nearex + 1);
2144 memmove(nearex + 1, nearex,
2145 len * sizeof(struct ext4_extent));
2146 }
2147 }
2148
2149 le16_add_cpu(&eh->eh_entries, 1);
2150 path[depth].p_ext = nearex;
2151 nearex->ee_block = newext->ee_block;
2152 ext4_ext_store_pblock(nearex, ext4_ext_pblock(newext));
2153 nearex->ee_len = newext->ee_len;
2154
2155merge:
2156 /* try to merge extents */
2157 if (!(gb_flags & EXT4_GET_BLOCKS_PRE_IO))
2158 ext4_ext_try_to_merge(handle, inode, path, nearex);
2159
2160
2161 /* time to correct all indexes above */
2162 err = ext4_ext_correct_indexes(handle, inode, path);
2163 if (err)
2164 goto cleanup;
2165
2166 err = ext4_ext_dirty(handle, inode, path + path->p_depth);
2167
2168cleanup:
2169 ext4_ext_drop_refs(npath);
2170 kfree(npath);
2171 return err;
2172}
2173
David Brazdil0f672f62019-12-10 10:32:29 +00002174static int ext4_fill_es_cache_info(struct inode *inode,
2175 ext4_lblk_t block, ext4_lblk_t num,
2176 struct fiemap_extent_info *fieinfo)
2177{
2178 ext4_lblk_t next, end = block + num - 1;
2179 struct extent_status es;
2180 unsigned char blksize_bits = inode->i_sb->s_blocksize_bits;
2181 unsigned int flags;
2182 int err;
2183
2184 while (block <= end) {
2185 next = 0;
2186 flags = 0;
2187 if (!ext4_es_lookup_extent(inode, block, &next, &es))
2188 break;
2189 if (ext4_es_is_unwritten(&es))
2190 flags |= FIEMAP_EXTENT_UNWRITTEN;
2191 if (ext4_es_is_delayed(&es))
2192 flags |= (FIEMAP_EXTENT_DELALLOC |
2193 FIEMAP_EXTENT_UNKNOWN);
2194 if (ext4_es_is_hole(&es))
2195 flags |= EXT4_FIEMAP_EXTENT_HOLE;
2196 if (next == 0)
2197 flags |= FIEMAP_EXTENT_LAST;
2198 if (flags & (FIEMAP_EXTENT_DELALLOC|
2199 EXT4_FIEMAP_EXTENT_HOLE))
2200 es.es_pblk = 0;
2201 else
2202 es.es_pblk = ext4_es_pblock(&es);
2203 err = fiemap_fill_next_extent(fieinfo,
2204 (__u64)es.es_lblk << blksize_bits,
2205 (__u64)es.es_pblk << blksize_bits,
2206 (__u64)es.es_len << blksize_bits,
2207 flags);
2208 if (next == 0)
2209 break;
2210 block = next;
2211 if (err < 0)
2212 return err;
2213 if (err == 1)
2214 return 0;
2215 }
2216 return 0;
2217}
2218
2219
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002220/*
2221 * ext4_ext_determine_hole - determine hole around given block
2222 * @inode: inode we lookup in
2223 * @path: path in extent tree to @lblk
2224 * @lblk: pointer to logical block around which we want to determine hole
2225 *
2226 * Determine hole length (and start if easily possible) around given logical
2227 * block. We don't try too hard to find the beginning of the hole but @path
2228 * actually points to extent before @lblk, we provide it.
2229 *
2230 * The function returns the length of a hole starting at @lblk. We update @lblk
2231 * to the beginning of the hole if we managed to find it.
2232 */
2233static ext4_lblk_t ext4_ext_determine_hole(struct inode *inode,
2234 struct ext4_ext_path *path,
2235 ext4_lblk_t *lblk)
2236{
2237 int depth = ext_depth(inode);
2238 struct ext4_extent *ex;
2239 ext4_lblk_t len;
2240
2241 ex = path[depth].p_ext;
2242 if (ex == NULL) {
2243 /* there is no extent yet, so gap is [0;-] */
2244 *lblk = 0;
2245 len = EXT_MAX_BLOCKS;
2246 } else if (*lblk < le32_to_cpu(ex->ee_block)) {
2247 len = le32_to_cpu(ex->ee_block) - *lblk;
2248 } else if (*lblk >= le32_to_cpu(ex->ee_block)
2249 + ext4_ext_get_actual_len(ex)) {
2250 ext4_lblk_t next;
2251
2252 *lblk = le32_to_cpu(ex->ee_block) + ext4_ext_get_actual_len(ex);
2253 next = ext4_ext_next_allocated_block(path);
2254 BUG_ON(next == *lblk);
2255 len = next - *lblk;
2256 } else {
2257 BUG();
2258 }
2259 return len;
2260}
2261
2262/*
2263 * ext4_ext_put_gap_in_cache:
2264 * calculate boundaries of the gap that the requested block fits into
2265 * and cache this gap
2266 */
2267static void
2268ext4_ext_put_gap_in_cache(struct inode *inode, ext4_lblk_t hole_start,
2269 ext4_lblk_t hole_len)
2270{
2271 struct extent_status es;
2272
David Brazdil0f672f62019-12-10 10:32:29 +00002273 ext4_es_find_extent_range(inode, &ext4_es_is_delayed, hole_start,
2274 hole_start + hole_len - 1, &es);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002275 if (es.es_len) {
2276 /* There's delayed extent containing lblock? */
2277 if (es.es_lblk <= hole_start)
2278 return;
2279 hole_len = min(es.es_lblk - hole_start, hole_len);
2280 }
Olivier Deprez157378f2022-04-04 15:47:50 +02002281 ext_debug(inode, " -> %u:%u\n", hole_start, hole_len);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002282 ext4_es_insert_extent(inode, hole_start, hole_len, ~0,
2283 EXTENT_STATUS_HOLE);
2284}
2285
2286/*
2287 * ext4_ext_rm_idx:
2288 * removes index from the index block.
2289 */
2290static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
2291 struct ext4_ext_path *path, int depth)
2292{
2293 int err;
2294 ext4_fsblk_t leaf;
2295
2296 /* free index block */
2297 depth--;
2298 path = path + depth;
2299 leaf = ext4_idx_pblock(path->p_idx);
2300 if (unlikely(path->p_hdr->eh_entries == 0)) {
2301 EXT4_ERROR_INODE(inode, "path->p_hdr->eh_entries == 0");
2302 return -EFSCORRUPTED;
2303 }
2304 err = ext4_ext_get_access(handle, inode, path);
2305 if (err)
2306 return err;
2307
2308 if (path->p_idx != EXT_LAST_INDEX(path->p_hdr)) {
2309 int len = EXT_LAST_INDEX(path->p_hdr) - path->p_idx;
2310 len *= sizeof(struct ext4_extent_idx);
2311 memmove(path->p_idx, path->p_idx + 1, len);
2312 }
2313
2314 le16_add_cpu(&path->p_hdr->eh_entries, -1);
2315 err = ext4_ext_dirty(handle, inode, path);
2316 if (err)
2317 return err;
Olivier Deprez157378f2022-04-04 15:47:50 +02002318 ext_debug(inode, "index is empty, remove it, free block %llu\n", leaf);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002319 trace_ext4_ext_rm_idx(inode, leaf);
2320
2321 ext4_free_blocks(handle, inode, NULL, leaf, 1,
2322 EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET);
2323
2324 while (--depth >= 0) {
2325 if (path->p_idx != EXT_FIRST_INDEX(path->p_hdr))
2326 break;
2327 path--;
2328 err = ext4_ext_get_access(handle, inode, path);
2329 if (err)
2330 break;
2331 path->p_idx->ei_block = (path+1)->p_idx->ei_block;
2332 err = ext4_ext_dirty(handle, inode, path);
2333 if (err)
2334 break;
2335 }
2336 return err;
2337}
2338
2339/*
2340 * ext4_ext_calc_credits_for_single_extent:
2341 * This routine returns max. credits that needed to insert an extent
2342 * to the extent tree.
2343 * When pass the actual path, the caller should calculate credits
2344 * under i_data_sem.
2345 */
2346int ext4_ext_calc_credits_for_single_extent(struct inode *inode, int nrblocks,
2347 struct ext4_ext_path *path)
2348{
2349 if (path) {
2350 int depth = ext_depth(inode);
2351 int ret = 0;
2352
2353 /* probably there is space in leaf? */
2354 if (le16_to_cpu(path[depth].p_hdr->eh_entries)
2355 < le16_to_cpu(path[depth].p_hdr->eh_max)) {
2356
2357 /*
2358 * There are some space in the leaf tree, no
2359 * need to account for leaf block credit
2360 *
2361 * bitmaps and block group descriptor blocks
2362 * and other metadata blocks still need to be
2363 * accounted.
2364 */
2365 /* 1 bitmap, 1 block group descriptor */
2366 ret = 2 + EXT4_META_TRANS_BLOCKS(inode->i_sb);
2367 return ret;
2368 }
2369 }
2370
2371 return ext4_chunk_trans_blocks(inode, nrblocks);
2372}
2373
2374/*
2375 * How many index/leaf blocks need to change/allocate to add @extents extents?
2376 *
2377 * If we add a single extent, then in the worse case, each tree level
2378 * index/leaf need to be changed in case of the tree split.
2379 *
2380 * If more extents are inserted, they could cause the whole tree split more
2381 * than once, but this is really rare.
2382 */
2383int ext4_ext_index_trans_blocks(struct inode *inode, int extents)
2384{
2385 int index;
2386 int depth;
2387
2388 /* If we are converting the inline data, only one is needed here. */
2389 if (ext4_has_inline_data(inode))
2390 return 1;
2391
2392 depth = ext_depth(inode);
2393
2394 if (extents <= 1)
2395 index = depth * 2;
2396 else
2397 index = depth * 3;
2398
2399 return index;
2400}
2401
2402static inline int get_default_free_blocks_flags(struct inode *inode)
2403{
2404 if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode) ||
2405 ext4_test_inode_flag(inode, EXT4_INODE_EA_INODE))
2406 return EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET;
2407 else if (ext4_should_journal_data(inode))
2408 return EXT4_FREE_BLOCKS_FORGET;
2409 return 0;
2410}
2411
David Brazdil0f672f62019-12-10 10:32:29 +00002412/*
2413 * ext4_rereserve_cluster - increment the reserved cluster count when
2414 * freeing a cluster with a pending reservation
2415 *
2416 * @inode - file containing the cluster
2417 * @lblk - logical block in cluster to be reserved
2418 *
2419 * Increments the reserved cluster count and adjusts quota in a bigalloc
2420 * file system when freeing a partial cluster containing at least one
2421 * delayed and unwritten block. A partial cluster meeting that
2422 * requirement will have a pending reservation. If so, the
2423 * RERESERVE_CLUSTER flag is used when calling ext4_free_blocks() to
2424 * defer reserved and allocated space accounting to a subsequent call
2425 * to this function.
2426 */
2427static void ext4_rereserve_cluster(struct inode *inode, ext4_lblk_t lblk)
2428{
2429 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
2430 struct ext4_inode_info *ei = EXT4_I(inode);
2431
2432 dquot_reclaim_block(inode, EXT4_C2B(sbi, 1));
2433
2434 spin_lock(&ei->i_block_reservation_lock);
2435 ei->i_reserved_data_blocks++;
2436 percpu_counter_add(&sbi->s_dirtyclusters_counter, 1);
2437 spin_unlock(&ei->i_block_reservation_lock);
2438
2439 percpu_counter_add(&sbi->s_freeclusters_counter, 1);
2440 ext4_remove_pending(inode, lblk);
2441}
2442
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002443static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
2444 struct ext4_extent *ex,
David Brazdil0f672f62019-12-10 10:32:29 +00002445 struct partial_cluster *partial,
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002446 ext4_lblk_t from, ext4_lblk_t to)
2447{
2448 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
2449 unsigned short ee_len = ext4_ext_get_actual_len(ex);
David Brazdil0f672f62019-12-10 10:32:29 +00002450 ext4_fsblk_t last_pblk, pblk;
2451 ext4_lblk_t num;
2452 int flags;
2453
2454 /* only extent tail removal is allowed */
2455 if (from < le32_to_cpu(ex->ee_block) ||
2456 to != le32_to_cpu(ex->ee_block) + ee_len - 1) {
2457 ext4_error(sbi->s_sb,
2458 "strange request: removal(2) %u-%u from %u:%u",
2459 from, to, le32_to_cpu(ex->ee_block), ee_len);
2460 return 0;
2461 }
2462
2463#ifdef EXTENTS_STATS
2464 spin_lock(&sbi->s_ext_stats_lock);
2465 sbi->s_ext_blocks += ee_len;
2466 sbi->s_ext_extents++;
2467 if (ee_len < sbi->s_ext_min)
2468 sbi->s_ext_min = ee_len;
2469 if (ee_len > sbi->s_ext_max)
2470 sbi->s_ext_max = ee_len;
2471 if (ext_depth(inode) > sbi->s_depth_max)
2472 sbi->s_depth_max = ext_depth(inode);
2473 spin_unlock(&sbi->s_ext_stats_lock);
2474#endif
2475
2476 trace_ext4_remove_blocks(inode, ex, from, to, partial);
2477
2478 /*
2479 * if we have a partial cluster, and it's different from the
2480 * cluster of the last block in the extent, we free it
2481 */
2482 last_pblk = ext4_ext_pblock(ex) + ee_len - 1;
2483
2484 if (partial->state != initial &&
2485 partial->pclu != EXT4_B2C(sbi, last_pblk)) {
2486 if (partial->state == tofree) {
2487 flags = get_default_free_blocks_flags(inode);
2488 if (ext4_is_pending(inode, partial->lblk))
2489 flags |= EXT4_FREE_BLOCKS_RERESERVE_CLUSTER;
2490 ext4_free_blocks(handle, inode, NULL,
2491 EXT4_C2B(sbi, partial->pclu),
2492 sbi->s_cluster_ratio, flags);
2493 if (flags & EXT4_FREE_BLOCKS_RERESERVE_CLUSTER)
2494 ext4_rereserve_cluster(inode, partial->lblk);
2495 }
2496 partial->state = initial;
2497 }
2498
2499 num = le32_to_cpu(ex->ee_block) + ee_len - from;
2500 pblk = ext4_ext_pblock(ex) + ee_len - num;
2501
2502 /*
2503 * We free the partial cluster at the end of the extent (if any),
2504 * unless the cluster is used by another extent (partial_cluster
2505 * state is nofree). If a partial cluster exists here, it must be
2506 * shared with the last block in the extent.
2507 */
2508 flags = get_default_free_blocks_flags(inode);
2509
2510 /* partial, left end cluster aligned, right end unaligned */
2511 if ((EXT4_LBLK_COFF(sbi, to) != sbi->s_cluster_ratio - 1) &&
2512 (EXT4_LBLK_CMASK(sbi, to) >= from) &&
2513 (partial->state != nofree)) {
2514 if (ext4_is_pending(inode, to))
2515 flags |= EXT4_FREE_BLOCKS_RERESERVE_CLUSTER;
2516 ext4_free_blocks(handle, inode, NULL,
2517 EXT4_PBLK_CMASK(sbi, last_pblk),
2518 sbi->s_cluster_ratio, flags);
2519 if (flags & EXT4_FREE_BLOCKS_RERESERVE_CLUSTER)
2520 ext4_rereserve_cluster(inode, to);
2521 partial->state = initial;
2522 flags = get_default_free_blocks_flags(inode);
2523 }
2524
2525 flags |= EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002526
2527 /*
2528 * For bigalloc file systems, we never free a partial cluster
David Brazdil0f672f62019-12-10 10:32:29 +00002529 * at the beginning of the extent. Instead, we check to see if we
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002530 * need to free it on a subsequent call to ext4_remove_blocks,
2531 * or at the end of ext4_ext_rm_leaf or ext4_ext_remove_space.
2532 */
2533 flags |= EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER;
David Brazdil0f672f62019-12-10 10:32:29 +00002534 ext4_free_blocks(handle, inode, NULL, pblk, num, flags);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002535
David Brazdil0f672f62019-12-10 10:32:29 +00002536 /* reset the partial cluster if we've freed past it */
2537 if (partial->state != initial && partial->pclu != EXT4_B2C(sbi, pblk))
2538 partial->state = initial;
2539
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002540 /*
David Brazdil0f672f62019-12-10 10:32:29 +00002541 * If we've freed the entire extent but the beginning is not left
2542 * cluster aligned and is not marked as ineligible for freeing we
2543 * record the partial cluster at the beginning of the extent. It
2544 * wasn't freed by the preceding ext4_free_blocks() call, and we
2545 * need to look farther to the left to determine if it's to be freed
2546 * (not shared with another extent). Else, reset the partial
2547 * cluster - we're either done freeing or the beginning of the
2548 * extent is left cluster aligned.
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002549 */
David Brazdil0f672f62019-12-10 10:32:29 +00002550 if (EXT4_LBLK_COFF(sbi, from) && num == ee_len) {
2551 if (partial->state == initial) {
2552 partial->pclu = EXT4_B2C(sbi, pblk);
2553 partial->lblk = from;
2554 partial->state = tofree;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002555 }
David Brazdil0f672f62019-12-10 10:32:29 +00002556 } else {
2557 partial->state = initial;
2558 }
2559
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002560 return 0;
2561}
2562
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002563/*
2564 * ext4_ext_rm_leaf() Removes the extents associated with the
2565 * blocks appearing between "start" and "end". Both "start"
2566 * and "end" must appear in the same extent or EIO is returned.
2567 *
2568 * @handle: The journal handle
2569 * @inode: The files inode
2570 * @path: The path to the leaf
2571 * @partial_cluster: The cluster which we'll have to free if all extents
2572 * has been released from it. However, if this value is
2573 * negative, it's a cluster just to the right of the
2574 * punched region and it must not be freed.
2575 * @start: The first block to remove
2576 * @end: The last block to remove
2577 */
2578static int
2579ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
2580 struct ext4_ext_path *path,
David Brazdil0f672f62019-12-10 10:32:29 +00002581 struct partial_cluster *partial,
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002582 ext4_lblk_t start, ext4_lblk_t end)
2583{
2584 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
2585 int err = 0, correct_index = 0;
Olivier Deprez157378f2022-04-04 15:47:50 +02002586 int depth = ext_depth(inode), credits, revoke_credits;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002587 struct ext4_extent_header *eh;
2588 ext4_lblk_t a, b;
2589 unsigned num;
2590 ext4_lblk_t ex_ee_block;
2591 unsigned short ex_ee_len;
2592 unsigned unwritten = 0;
2593 struct ext4_extent *ex;
2594 ext4_fsblk_t pblk;
2595
2596 /* the header must be checked already in ext4_ext_remove_space() */
Olivier Deprez157378f2022-04-04 15:47:50 +02002597 ext_debug(inode, "truncate since %u in leaf to %u\n", start, end);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002598 if (!path[depth].p_hdr)
2599 path[depth].p_hdr = ext_block_hdr(path[depth].p_bh);
2600 eh = path[depth].p_hdr;
2601 if (unlikely(path[depth].p_hdr == NULL)) {
2602 EXT4_ERROR_INODE(inode, "path[%d].p_hdr == NULL", depth);
2603 return -EFSCORRUPTED;
2604 }
2605 /* find where to start removing */
2606 ex = path[depth].p_ext;
2607 if (!ex)
2608 ex = EXT_LAST_EXTENT(eh);
2609
2610 ex_ee_block = le32_to_cpu(ex->ee_block);
2611 ex_ee_len = ext4_ext_get_actual_len(ex);
2612
David Brazdil0f672f62019-12-10 10:32:29 +00002613 trace_ext4_ext_rm_leaf(inode, start, ex, partial);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002614
2615 while (ex >= EXT_FIRST_EXTENT(eh) &&
2616 ex_ee_block + ex_ee_len > start) {
2617
2618 if (ext4_ext_is_unwritten(ex))
2619 unwritten = 1;
2620 else
2621 unwritten = 0;
2622
Olivier Deprez157378f2022-04-04 15:47:50 +02002623 ext_debug(inode, "remove ext %u:[%d]%d\n", ex_ee_block,
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002624 unwritten, ex_ee_len);
2625 path[depth].p_ext = ex;
2626
2627 a = ex_ee_block > start ? ex_ee_block : start;
2628 b = ex_ee_block+ex_ee_len - 1 < end ?
2629 ex_ee_block+ex_ee_len - 1 : end;
2630
Olivier Deprez157378f2022-04-04 15:47:50 +02002631 ext_debug(inode, " border %u:%u\n", a, b);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002632
2633 /* If this extent is beyond the end of the hole, skip it */
2634 if (end < ex_ee_block) {
2635 /*
2636 * We're going to skip this extent and move to another,
2637 * so note that its first cluster is in use to avoid
2638 * freeing it when removing blocks. Eventually, the
2639 * right edge of the truncated/punched region will
2640 * be just to the left.
2641 */
2642 if (sbi->s_cluster_ratio > 1) {
2643 pblk = ext4_ext_pblock(ex);
David Brazdil0f672f62019-12-10 10:32:29 +00002644 partial->pclu = EXT4_B2C(sbi, pblk);
2645 partial->state = nofree;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002646 }
2647 ex--;
2648 ex_ee_block = le32_to_cpu(ex->ee_block);
2649 ex_ee_len = ext4_ext_get_actual_len(ex);
2650 continue;
2651 } else if (b != ex_ee_block + ex_ee_len - 1) {
2652 EXT4_ERROR_INODE(inode,
2653 "can not handle truncate %u:%u "
2654 "on extent %u:%u",
2655 start, end, ex_ee_block,
2656 ex_ee_block + ex_ee_len - 1);
2657 err = -EFSCORRUPTED;
2658 goto out;
2659 } else if (a != ex_ee_block) {
2660 /* remove tail of the extent */
2661 num = a - ex_ee_block;
2662 } else {
2663 /* remove whole extent: excellent! */
2664 num = 0;
2665 }
2666 /*
2667 * 3 for leaf, sb, and inode plus 2 (bmap and group
2668 * descriptor) for each block group; assume two block
2669 * groups plus ex_ee_len/blocks_per_block_group for
2670 * the worst case
2671 */
2672 credits = 7 + 2*(ex_ee_len/EXT4_BLOCKS_PER_GROUP(inode->i_sb));
2673 if (ex == EXT_FIRST_EXTENT(eh)) {
2674 correct_index = 1;
2675 credits += (ext_depth(inode)) + 1;
2676 }
2677 credits += EXT4_MAXQUOTAS_TRANS_BLOCKS(inode->i_sb);
Olivier Deprez157378f2022-04-04 15:47:50 +02002678 /*
2679 * We may end up freeing some index blocks and data from the
2680 * punched range. Note that partial clusters are accounted for
2681 * by ext4_free_data_revoke_credits().
2682 */
2683 revoke_credits =
2684 ext4_free_metadata_revoke_credits(inode->i_sb,
2685 ext_depth(inode)) +
2686 ext4_free_data_revoke_credits(inode, b - a + 1);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002687
Olivier Deprez157378f2022-04-04 15:47:50 +02002688 err = ext4_datasem_ensure_credits(handle, inode, credits,
2689 credits, revoke_credits);
2690 if (err) {
2691 if (err > 0)
2692 err = -EAGAIN;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002693 goto out;
Olivier Deprez157378f2022-04-04 15:47:50 +02002694 }
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002695
2696 err = ext4_ext_get_access(handle, inode, path + depth);
2697 if (err)
2698 goto out;
2699
David Brazdil0f672f62019-12-10 10:32:29 +00002700 err = ext4_remove_blocks(handle, inode, ex, partial, a, b);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002701 if (err)
2702 goto out;
2703
2704 if (num == 0)
2705 /* this extent is removed; mark slot entirely unused */
2706 ext4_ext_store_pblock(ex, 0);
2707
2708 ex->ee_len = cpu_to_le16(num);
2709 /*
2710 * Do not mark unwritten if all the blocks in the
2711 * extent have been removed.
2712 */
2713 if (unwritten && num)
2714 ext4_ext_mark_unwritten(ex);
2715 /*
2716 * If the extent was completely released,
2717 * we need to remove it from the leaf
2718 */
2719 if (num == 0) {
2720 if (end != EXT_MAX_BLOCKS - 1) {
2721 /*
2722 * For hole punching, we need to scoot all the
2723 * extents up when an extent is removed so that
2724 * we dont have blank extents in the middle
2725 */
2726 memmove(ex, ex+1, (EXT_LAST_EXTENT(eh) - ex) *
2727 sizeof(struct ext4_extent));
2728
2729 /* Now get rid of the one at the end */
2730 memset(EXT_LAST_EXTENT(eh), 0,
2731 sizeof(struct ext4_extent));
2732 }
2733 le16_add_cpu(&eh->eh_entries, -1);
2734 }
2735
2736 err = ext4_ext_dirty(handle, inode, path + depth);
2737 if (err)
2738 goto out;
2739
Olivier Deprez157378f2022-04-04 15:47:50 +02002740 ext_debug(inode, "new extent: %u:%u:%llu\n", ex_ee_block, num,
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002741 ext4_ext_pblock(ex));
2742 ex--;
2743 ex_ee_block = le32_to_cpu(ex->ee_block);
2744 ex_ee_len = ext4_ext_get_actual_len(ex);
2745 }
2746
2747 if (correct_index && eh->eh_entries)
2748 err = ext4_ext_correct_indexes(handle, inode, path);
2749
2750 /*
2751 * If there's a partial cluster and at least one extent remains in
2752 * the leaf, free the partial cluster if it isn't shared with the
2753 * current extent. If it is shared with the current extent
David Brazdil0f672f62019-12-10 10:32:29 +00002754 * we reset the partial cluster because we've reached the start of the
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002755 * truncated/punched region and we're done removing blocks.
2756 */
David Brazdil0f672f62019-12-10 10:32:29 +00002757 if (partial->state == tofree && ex >= EXT_FIRST_EXTENT(eh)) {
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002758 pblk = ext4_ext_pblock(ex) + ex_ee_len - 1;
David Brazdil0f672f62019-12-10 10:32:29 +00002759 if (partial->pclu != EXT4_B2C(sbi, pblk)) {
2760 int flags = get_default_free_blocks_flags(inode);
2761
2762 if (ext4_is_pending(inode, partial->lblk))
2763 flags |= EXT4_FREE_BLOCKS_RERESERVE_CLUSTER;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002764 ext4_free_blocks(handle, inode, NULL,
David Brazdil0f672f62019-12-10 10:32:29 +00002765 EXT4_C2B(sbi, partial->pclu),
2766 sbi->s_cluster_ratio, flags);
2767 if (flags & EXT4_FREE_BLOCKS_RERESERVE_CLUSTER)
2768 ext4_rereserve_cluster(inode, partial->lblk);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002769 }
David Brazdil0f672f62019-12-10 10:32:29 +00002770 partial->state = initial;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002771 }
2772
2773 /* if this leaf is free, then we should
2774 * remove it from index block above */
2775 if (err == 0 && eh->eh_entries == 0 && path[depth].p_bh != NULL)
2776 err = ext4_ext_rm_idx(handle, inode, path, depth);
2777
2778out:
2779 return err;
2780}
2781
2782/*
2783 * ext4_ext_more_to_rm:
2784 * returns 1 if current index has to be freed (even partial)
2785 */
2786static int
2787ext4_ext_more_to_rm(struct ext4_ext_path *path)
2788{
2789 BUG_ON(path->p_idx == NULL);
2790
2791 if (path->p_idx < EXT_FIRST_INDEX(path->p_hdr))
2792 return 0;
2793
2794 /*
2795 * if truncate on deeper level happened, it wasn't partial,
2796 * so we have to consider current index for truncation
2797 */
2798 if (le16_to_cpu(path->p_hdr->eh_entries) == path->p_block)
2799 return 0;
2800 return 1;
2801}
2802
2803int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start,
2804 ext4_lblk_t end)
2805{
2806 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
2807 int depth = ext_depth(inode);
2808 struct ext4_ext_path *path = NULL;
David Brazdil0f672f62019-12-10 10:32:29 +00002809 struct partial_cluster partial;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002810 handle_t *handle;
2811 int i = 0, err = 0;
2812
David Brazdil0f672f62019-12-10 10:32:29 +00002813 partial.pclu = 0;
2814 partial.lblk = 0;
2815 partial.state = initial;
2816
Olivier Deprez157378f2022-04-04 15:47:50 +02002817 ext_debug(inode, "truncate since %u to %u\n", start, end);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002818
2819 /* probably first extent we're gonna free will be last in block */
Olivier Deprez157378f2022-04-04 15:47:50 +02002820 handle = ext4_journal_start_with_revoke(inode, EXT4_HT_TRUNCATE,
2821 depth + 1,
2822 ext4_free_metadata_revoke_credits(inode->i_sb, depth));
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002823 if (IS_ERR(handle))
2824 return PTR_ERR(handle);
2825
2826again:
2827 trace_ext4_ext_remove_space(inode, start, end, depth);
2828
2829 /*
2830 * Check if we are removing extents inside the extent tree. If that
2831 * is the case, we are going to punch a hole inside the extent tree
2832 * so we have to check whether we need to split the extent covering
2833 * the last block to remove so we can easily remove the part of it
2834 * in ext4_ext_rm_leaf().
2835 */
2836 if (end < EXT_MAX_BLOCKS - 1) {
2837 struct ext4_extent *ex;
2838 ext4_lblk_t ee_block, ex_end, lblk;
2839 ext4_fsblk_t pblk;
2840
2841 /* find extent for or closest extent to this block */
Olivier Deprez157378f2022-04-04 15:47:50 +02002842 path = ext4_find_extent(inode, end, NULL,
2843 EXT4_EX_NOCACHE | EXT4_EX_NOFAIL);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002844 if (IS_ERR(path)) {
2845 ext4_journal_stop(handle);
2846 return PTR_ERR(path);
2847 }
2848 depth = ext_depth(inode);
2849 /* Leaf not may not exist only if inode has no blocks at all */
2850 ex = path[depth].p_ext;
2851 if (!ex) {
2852 if (depth) {
2853 EXT4_ERROR_INODE(inode,
2854 "path[%d].p_hdr == NULL",
2855 depth);
2856 err = -EFSCORRUPTED;
2857 }
2858 goto out;
2859 }
2860
2861 ee_block = le32_to_cpu(ex->ee_block);
2862 ex_end = ee_block + ext4_ext_get_actual_len(ex) - 1;
2863
2864 /*
2865 * See if the last block is inside the extent, if so split
2866 * the extent at 'end' block so we can easily remove the
2867 * tail of the first part of the split extent in
2868 * ext4_ext_rm_leaf().
2869 */
2870 if (end >= ee_block && end < ex_end) {
2871
2872 /*
2873 * If we're going to split the extent, note that
2874 * the cluster containing the block after 'end' is
2875 * in use to avoid freeing it when removing blocks.
2876 */
2877 if (sbi->s_cluster_ratio > 1) {
Olivier Deprez0e641232021-09-23 10:07:05 +02002878 pblk = ext4_ext_pblock(ex) + end - ee_block + 1;
David Brazdil0f672f62019-12-10 10:32:29 +00002879 partial.pclu = EXT4_B2C(sbi, pblk);
2880 partial.state = nofree;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002881 }
2882
2883 /*
2884 * Split the extent in two so that 'end' is the last
2885 * block in the first new extent. Also we should not
2886 * fail removing space due to ENOSPC so try to use
2887 * reserved block if that happens.
2888 */
2889 err = ext4_force_split_extent_at(handle, inode, &path,
2890 end + 1, 1);
2891 if (err < 0)
2892 goto out;
2893
David Brazdil0f672f62019-12-10 10:32:29 +00002894 } else if (sbi->s_cluster_ratio > 1 && end >= ex_end &&
2895 partial.state == initial) {
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002896 /*
David Brazdil0f672f62019-12-10 10:32:29 +00002897 * If we're punching, there's an extent to the right.
2898 * If the partial cluster hasn't been set, set it to
2899 * that extent's first cluster and its state to nofree
2900 * so it won't be freed should it contain blocks to be
2901 * removed. If it's already set (tofree/nofree), we're
2902 * retrying and keep the original partial cluster info
2903 * so a cluster marked tofree as a result of earlier
2904 * extent removal is not lost.
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002905 */
2906 lblk = ex_end + 1;
2907 err = ext4_ext_search_right(inode, path, &lblk, &pblk,
Olivier Deprez157378f2022-04-04 15:47:50 +02002908 NULL);
2909 if (err < 0)
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002910 goto out;
David Brazdil0f672f62019-12-10 10:32:29 +00002911 if (pblk) {
2912 partial.pclu = EXT4_B2C(sbi, pblk);
2913 partial.state = nofree;
2914 }
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002915 }
2916 }
2917 /*
2918 * We start scanning from right side, freeing all the blocks
2919 * after i_size and walking into the tree depth-wise.
2920 */
2921 depth = ext_depth(inode);
2922 if (path) {
2923 int k = i = depth;
2924 while (--k > 0)
2925 path[k].p_block =
2926 le16_to_cpu(path[k].p_hdr->eh_entries)+1;
2927 } else {
2928 path = kcalloc(depth + 1, sizeof(struct ext4_ext_path),
Olivier Deprez157378f2022-04-04 15:47:50 +02002929 GFP_NOFS | __GFP_NOFAIL);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002930 if (path == NULL) {
2931 ext4_journal_stop(handle);
2932 return -ENOMEM;
2933 }
2934 path[0].p_maxdepth = path[0].p_depth = depth;
2935 path[0].p_hdr = ext_inode_hdr(inode);
2936 i = 0;
2937
2938 if (ext4_ext_check(inode, path[0].p_hdr, depth, 0)) {
2939 err = -EFSCORRUPTED;
2940 goto out;
2941 }
2942 }
2943 err = 0;
2944
2945 while (i >= 0 && err == 0) {
2946 if (i == depth) {
2947 /* this is leaf block */
2948 err = ext4_ext_rm_leaf(handle, inode, path,
David Brazdil0f672f62019-12-10 10:32:29 +00002949 &partial, start, end);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002950 /* root level has p_bh == NULL, brelse() eats this */
2951 brelse(path[i].p_bh);
2952 path[i].p_bh = NULL;
2953 i--;
2954 continue;
2955 }
2956
2957 /* this is index block */
2958 if (!path[i].p_hdr) {
Olivier Deprez157378f2022-04-04 15:47:50 +02002959 ext_debug(inode, "initialize header\n");
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002960 path[i].p_hdr = ext_block_hdr(path[i].p_bh);
2961 }
2962
2963 if (!path[i].p_idx) {
2964 /* this level hasn't been touched yet */
2965 path[i].p_idx = EXT_LAST_INDEX(path[i].p_hdr);
2966 path[i].p_block = le16_to_cpu(path[i].p_hdr->eh_entries)+1;
Olivier Deprez157378f2022-04-04 15:47:50 +02002967 ext_debug(inode, "init index ptr: hdr 0x%p, num %d\n",
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002968 path[i].p_hdr,
2969 le16_to_cpu(path[i].p_hdr->eh_entries));
2970 } else {
2971 /* we were already here, see at next index */
2972 path[i].p_idx--;
2973 }
2974
Olivier Deprez157378f2022-04-04 15:47:50 +02002975 ext_debug(inode, "level %d - index, first 0x%p, cur 0x%p\n",
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002976 i, EXT_FIRST_INDEX(path[i].p_hdr),
2977 path[i].p_idx);
2978 if (ext4_ext_more_to_rm(path + i)) {
2979 struct buffer_head *bh;
2980 /* go to the next level */
Olivier Deprez157378f2022-04-04 15:47:50 +02002981 ext_debug(inode, "move to level %d (block %llu)\n",
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002982 i + 1, ext4_idx_pblock(path[i].p_idx));
2983 memset(path + i + 1, 0, sizeof(*path));
Olivier Deprez157378f2022-04-04 15:47:50 +02002984 bh = read_extent_tree_block(inode, path[i].p_idx,
2985 depth - i - 1,
2986 EXT4_EX_NOCACHE);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002987 if (IS_ERR(bh)) {
2988 /* should we reset i_size? */
2989 err = PTR_ERR(bh);
2990 break;
2991 }
2992 /* Yield here to deal with large extent trees.
2993 * Should be a no-op if we did IO above. */
2994 cond_resched();
2995 if (WARN_ON(i + 1 > depth)) {
2996 err = -EFSCORRUPTED;
2997 break;
2998 }
2999 path[i + 1].p_bh = bh;
3000
3001 /* save actual number of indexes since this
3002 * number is changed at the next iteration */
3003 path[i].p_block = le16_to_cpu(path[i].p_hdr->eh_entries);
3004 i++;
3005 } else {
3006 /* we finished processing this index, go up */
3007 if (path[i].p_hdr->eh_entries == 0 && i > 0) {
3008 /* index is empty, remove it;
3009 * handle must be already prepared by the
3010 * truncatei_leaf() */
3011 err = ext4_ext_rm_idx(handle, inode, path, i);
3012 }
3013 /* root level has p_bh == NULL, brelse() eats this */
3014 brelse(path[i].p_bh);
3015 path[i].p_bh = NULL;
3016 i--;
Olivier Deprez157378f2022-04-04 15:47:50 +02003017 ext_debug(inode, "return to level %d\n", i);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00003018 }
3019 }
3020
David Brazdil0f672f62019-12-10 10:32:29 +00003021 trace_ext4_ext_remove_space_done(inode, start, end, depth, &partial,
3022 path->p_hdr->eh_entries);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00003023
3024 /*
David Brazdil0f672f62019-12-10 10:32:29 +00003025 * if there's a partial cluster and we have removed the first extent
3026 * in the file, then we also free the partial cluster, if any
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00003027 */
David Brazdil0f672f62019-12-10 10:32:29 +00003028 if (partial.state == tofree && err == 0) {
3029 int flags = get_default_free_blocks_flags(inode);
3030
3031 if (ext4_is_pending(inode, partial.lblk))
3032 flags |= EXT4_FREE_BLOCKS_RERESERVE_CLUSTER;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00003033 ext4_free_blocks(handle, inode, NULL,
David Brazdil0f672f62019-12-10 10:32:29 +00003034 EXT4_C2B(sbi, partial.pclu),
3035 sbi->s_cluster_ratio, flags);
3036 if (flags & EXT4_FREE_BLOCKS_RERESERVE_CLUSTER)
3037 ext4_rereserve_cluster(inode, partial.lblk);
3038 partial.state = initial;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00003039 }
3040
3041 /* TODO: flexible tree reduction should be here */
3042 if (path->p_hdr->eh_entries == 0) {
3043 /*
3044 * truncate to zero freed all the tree,
3045 * so we need to correct eh_depth
3046 */
3047 err = ext4_ext_get_access(handle, inode, path);
3048 if (err == 0) {
3049 ext_inode_hdr(inode)->eh_depth = 0;
3050 ext_inode_hdr(inode)->eh_max =
3051 cpu_to_le16(ext4_ext_space_root(inode, 0));
3052 err = ext4_ext_dirty(handle, inode, path);
3053 }
3054 }
3055out:
3056 ext4_ext_drop_refs(path);
3057 kfree(path);
3058 path = NULL;
3059 if (err == -EAGAIN)
3060 goto again;
3061 ext4_journal_stop(handle);
3062
3063 return err;
3064}
3065
3066/*
3067 * called at mount time
3068 */
3069void ext4_ext_init(struct super_block *sb)
3070{
3071 /*
3072 * possible initialization would be here
3073 */
3074
3075 if (ext4_has_feature_extents(sb)) {
3076#if defined(AGGRESSIVE_TEST) || defined(CHECK_BINSEARCH) || defined(EXTENTS_STATS)
3077 printk(KERN_INFO "EXT4-fs: file extents enabled"
3078#ifdef AGGRESSIVE_TEST
3079 ", aggressive tests"
3080#endif
3081#ifdef CHECK_BINSEARCH
3082 ", check binsearch"
3083#endif
3084#ifdef EXTENTS_STATS
3085 ", stats"
3086#endif
3087 "\n");
3088#endif
3089#ifdef EXTENTS_STATS
3090 spin_lock_init(&EXT4_SB(sb)->s_ext_stats_lock);
3091 EXT4_SB(sb)->s_ext_min = 1 << 30;
3092 EXT4_SB(sb)->s_ext_max = 0;
3093#endif
3094 }
3095}
3096
3097/*
3098 * called at umount time
3099 */
3100void ext4_ext_release(struct super_block *sb)
3101{
3102 if (!ext4_has_feature_extents(sb))
3103 return;
3104
3105#ifdef EXTENTS_STATS
3106 if (EXT4_SB(sb)->s_ext_blocks && EXT4_SB(sb)->s_ext_extents) {
3107 struct ext4_sb_info *sbi = EXT4_SB(sb);
3108 printk(KERN_ERR "EXT4-fs: %lu blocks in %lu extents (%lu ave)\n",
3109 sbi->s_ext_blocks, sbi->s_ext_extents,
3110 sbi->s_ext_blocks / sbi->s_ext_extents);
3111 printk(KERN_ERR "EXT4-fs: extents: %lu min, %lu max, max depth %lu\n",
3112 sbi->s_ext_min, sbi->s_ext_max, sbi->s_depth_max);
3113 }
3114#endif
3115}
3116
3117static int ext4_zeroout_es(struct inode *inode, struct ext4_extent *ex)
3118{
3119 ext4_lblk_t ee_block;
3120 ext4_fsblk_t ee_pblock;
3121 unsigned int ee_len;
3122
3123 ee_block = le32_to_cpu(ex->ee_block);
3124 ee_len = ext4_ext_get_actual_len(ex);
3125 ee_pblock = ext4_ext_pblock(ex);
3126
3127 if (ee_len == 0)
3128 return 0;
3129
3130 return ext4_es_insert_extent(inode, ee_block, ee_len, ee_pblock,
3131 EXTENT_STATUS_WRITTEN);
3132}
3133
3134/* FIXME!! we need to try to merge to left or right after zero-out */
3135static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex)
3136{
3137 ext4_fsblk_t ee_pblock;
3138 unsigned int ee_len;
3139
3140 ee_len = ext4_ext_get_actual_len(ex);
3141 ee_pblock = ext4_ext_pblock(ex);
3142 return ext4_issue_zeroout(inode, le32_to_cpu(ex->ee_block), ee_pblock,
3143 ee_len);
3144}
3145
3146/*
3147 * ext4_split_extent_at() splits an extent at given block.
3148 *
3149 * @handle: the journal handle
3150 * @inode: the file inode
3151 * @path: the path to the extent
3152 * @split: the logical block where the extent is splitted.
3153 * @split_flags: indicates if the extent could be zeroout if split fails, and
3154 * the states(init or unwritten) of new extents.
3155 * @flags: flags used to insert new extent to extent tree.
3156 *
3157 *
3158 * Splits extent [a, b] into two extents [a, @split) and [@split, b], states
Olivier Deprez157378f2022-04-04 15:47:50 +02003159 * of which are determined by split_flag.
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00003160 *
3161 * There are two cases:
3162 * a> the extent are splitted into two extent.
3163 * b> split is not needed, and just mark the extent.
3164 *
3165 * return 0 on success.
3166 */
3167static int ext4_split_extent_at(handle_t *handle,
3168 struct inode *inode,
3169 struct ext4_ext_path **ppath,
3170 ext4_lblk_t split,
3171 int split_flag,
3172 int flags)
3173{
3174 struct ext4_ext_path *path = *ppath;
3175 ext4_fsblk_t newblock;
3176 ext4_lblk_t ee_block;
3177 struct ext4_extent *ex, newex, orig_ex, zero_ex;
3178 struct ext4_extent *ex2 = NULL;
3179 unsigned int ee_len, depth;
3180 int err = 0;
3181
3182 BUG_ON((split_flag & (EXT4_EXT_DATA_VALID1 | EXT4_EXT_DATA_VALID2)) ==
3183 (EXT4_EXT_DATA_VALID1 | EXT4_EXT_DATA_VALID2));
3184
Olivier Deprez157378f2022-04-04 15:47:50 +02003185 ext_debug(inode, "logical block %llu\n", (unsigned long long)split);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00003186
3187 ext4_ext_show_leaf(inode, path);
3188
3189 depth = ext_depth(inode);
3190 ex = path[depth].p_ext;
3191 ee_block = le32_to_cpu(ex->ee_block);
3192 ee_len = ext4_ext_get_actual_len(ex);
3193 newblock = split - ee_block + ext4_ext_pblock(ex);
3194
3195 BUG_ON(split < ee_block || split >= (ee_block + ee_len));
3196 BUG_ON(!ext4_ext_is_unwritten(ex) &&
3197 split_flag & (EXT4_EXT_MAY_ZEROOUT |
3198 EXT4_EXT_MARK_UNWRIT1 |
3199 EXT4_EXT_MARK_UNWRIT2));
3200
3201 err = ext4_ext_get_access(handle, inode, path + depth);
3202 if (err)
3203 goto out;
3204
3205 if (split == ee_block) {
3206 /*
3207 * case b: block @split is the block that the extent begins with
3208 * then we just change the state of the extent, and splitting
3209 * is not needed.
3210 */
3211 if (split_flag & EXT4_EXT_MARK_UNWRIT2)
3212 ext4_ext_mark_unwritten(ex);
3213 else
3214 ext4_ext_mark_initialized(ex);
3215
3216 if (!(flags & EXT4_GET_BLOCKS_PRE_IO))
3217 ext4_ext_try_to_merge(handle, inode, path, ex);
3218
3219 err = ext4_ext_dirty(handle, inode, path + path->p_depth);
3220 goto out;
3221 }
3222
3223 /* case a */
3224 memcpy(&orig_ex, ex, sizeof(orig_ex));
3225 ex->ee_len = cpu_to_le16(split - ee_block);
3226 if (split_flag & EXT4_EXT_MARK_UNWRIT1)
3227 ext4_ext_mark_unwritten(ex);
3228
3229 /*
3230 * path may lead to new leaf, not to original leaf any more
3231 * after ext4_ext_insert_extent() returns,
3232 */
3233 err = ext4_ext_dirty(handle, inode, path + depth);
3234 if (err)
3235 goto fix_extent_len;
3236
3237 ex2 = &newex;
3238 ex2->ee_block = cpu_to_le32(split);
3239 ex2->ee_len = cpu_to_le16(ee_len - (split - ee_block));
3240 ext4_ext_store_pblock(ex2, newblock);
3241 if (split_flag & EXT4_EXT_MARK_UNWRIT2)
3242 ext4_ext_mark_unwritten(ex2);
3243
3244 err = ext4_ext_insert_extent(handle, inode, ppath, &newex, flags);
Olivier Deprez0e641232021-09-23 10:07:05 +02003245 if (err != -ENOSPC && err != -EDQUOT)
3246 goto out;
3247
3248 if (EXT4_EXT_MAY_ZEROOUT & split_flag) {
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00003249 if (split_flag & (EXT4_EXT_DATA_VALID1|EXT4_EXT_DATA_VALID2)) {
3250 if (split_flag & EXT4_EXT_DATA_VALID1) {
3251 err = ext4_ext_zeroout(inode, ex2);
3252 zero_ex.ee_block = ex2->ee_block;
3253 zero_ex.ee_len = cpu_to_le16(
3254 ext4_ext_get_actual_len(ex2));
3255 ext4_ext_store_pblock(&zero_ex,
3256 ext4_ext_pblock(ex2));
3257 } else {
3258 err = ext4_ext_zeroout(inode, ex);
3259 zero_ex.ee_block = ex->ee_block;
3260 zero_ex.ee_len = cpu_to_le16(
3261 ext4_ext_get_actual_len(ex));
3262 ext4_ext_store_pblock(&zero_ex,
3263 ext4_ext_pblock(ex));
3264 }
3265 } else {
3266 err = ext4_ext_zeroout(inode, &orig_ex);
3267 zero_ex.ee_block = orig_ex.ee_block;
3268 zero_ex.ee_len = cpu_to_le16(
3269 ext4_ext_get_actual_len(&orig_ex));
3270 ext4_ext_store_pblock(&zero_ex,
3271 ext4_ext_pblock(&orig_ex));
3272 }
3273
Olivier Deprez0e641232021-09-23 10:07:05 +02003274 if (!err) {
3275 /* update the extent length and mark as initialized */
3276 ex->ee_len = cpu_to_le16(ee_len);
3277 ext4_ext_try_to_merge(handle, inode, path, ex);
3278 err = ext4_ext_dirty(handle, inode, path + path->p_depth);
3279 if (!err)
3280 /* update extent status tree */
3281 err = ext4_zeroout_es(inode, &zero_ex);
3282 /* If we failed at this point, we don't know in which
3283 * state the extent tree exactly is so don't try to fix
3284 * length of the original extent as it may do even more
3285 * damage.
3286 */
3287 goto out;
3288 }
3289 }
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00003290
3291fix_extent_len:
3292 ex->ee_len = orig_ex.ee_len;
Olivier Deprez157378f2022-04-04 15:47:50 +02003293 /*
3294 * Ignore ext4_ext_dirty return value since we are already in error path
3295 * and err is a non-zero error code.
3296 */
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00003297 ext4_ext_dirty(handle, inode, path + path->p_depth);
3298 return err;
Olivier Deprez0e641232021-09-23 10:07:05 +02003299out:
3300 ext4_ext_show_leaf(inode, path);
3301 return err;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00003302}
3303
3304/*
3305 * ext4_split_extents() splits an extent and mark extent which is covered
3306 * by @map as split_flags indicates
3307 *
3308 * It may result in splitting the extent into multiple extents (up to three)
3309 * There are three possibilities:
3310 * a> There is no split required
3311 * b> Splits in two extents: Split is happening at either end of the extent
3312 * c> Splits in three extents: Somone is splitting in middle of the extent
3313 *
3314 */
3315static int ext4_split_extent(handle_t *handle,
3316 struct inode *inode,
3317 struct ext4_ext_path **ppath,
3318 struct ext4_map_blocks *map,
3319 int split_flag,
3320 int flags)
3321{
3322 struct ext4_ext_path *path = *ppath;
3323 ext4_lblk_t ee_block;
3324 struct ext4_extent *ex;
3325 unsigned int ee_len, depth;
3326 int err = 0;
3327 int unwritten;
3328 int split_flag1, flags1;
3329 int allocated = map->m_len;
3330
3331 depth = ext_depth(inode);
3332 ex = path[depth].p_ext;
3333 ee_block = le32_to_cpu(ex->ee_block);
3334 ee_len = ext4_ext_get_actual_len(ex);
3335 unwritten = ext4_ext_is_unwritten(ex);
3336
3337 if (map->m_lblk + map->m_len < ee_block + ee_len) {
3338 split_flag1 = split_flag & EXT4_EXT_MAY_ZEROOUT;
3339 flags1 = flags | EXT4_GET_BLOCKS_PRE_IO;
3340 if (unwritten)
3341 split_flag1 |= EXT4_EXT_MARK_UNWRIT1 |
3342 EXT4_EXT_MARK_UNWRIT2;
3343 if (split_flag & EXT4_EXT_DATA_VALID2)
3344 split_flag1 |= EXT4_EXT_DATA_VALID1;
3345 err = ext4_split_extent_at(handle, inode, ppath,
3346 map->m_lblk + map->m_len, split_flag1, flags1);
3347 if (err)
3348 goto out;
3349 } else {
3350 allocated = ee_len - (map->m_lblk - ee_block);
3351 }
3352 /*
3353 * Update path is required because previous ext4_split_extent_at() may
3354 * result in split of original leaf or extent zeroout.
3355 */
Olivier Deprez157378f2022-04-04 15:47:50 +02003356 path = ext4_find_extent(inode, map->m_lblk, ppath, flags);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00003357 if (IS_ERR(path))
3358 return PTR_ERR(path);
3359 depth = ext_depth(inode);
3360 ex = path[depth].p_ext;
3361 if (!ex) {
3362 EXT4_ERROR_INODE(inode, "unexpected hole at %lu",
3363 (unsigned long) map->m_lblk);
3364 return -EFSCORRUPTED;
3365 }
3366 unwritten = ext4_ext_is_unwritten(ex);
3367 split_flag1 = 0;
3368
3369 if (map->m_lblk >= ee_block) {
3370 split_flag1 = split_flag & EXT4_EXT_DATA_VALID2;
3371 if (unwritten) {
3372 split_flag1 |= EXT4_EXT_MARK_UNWRIT1;
3373 split_flag1 |= split_flag & (EXT4_EXT_MAY_ZEROOUT |
3374 EXT4_EXT_MARK_UNWRIT2);
3375 }
3376 err = ext4_split_extent_at(handle, inode, ppath,
3377 map->m_lblk, split_flag1, flags);
3378 if (err)
3379 goto out;
3380 }
3381
3382 ext4_ext_show_leaf(inode, path);
3383out:
3384 return err ? err : allocated;
3385}
3386
3387/*
3388 * This function is called by ext4_ext_map_blocks() if someone tries to write
3389 * to an unwritten extent. It may result in splitting the unwritten
3390 * extent into multiple extents (up to three - one initialized and two
3391 * unwritten).
3392 * There are three possibilities:
3393 * a> There is no split required: Entire extent should be initialized
3394 * b> Splits in two extents: Write is happening at either end of the extent
3395 * c> Splits in three extents: Somone is writing in middle of the extent
3396 *
3397 * Pre-conditions:
3398 * - The extent pointed to by 'path' is unwritten.
3399 * - The extent pointed to by 'path' contains a superset
3400 * of the logical span [map->m_lblk, map->m_lblk + map->m_len).
3401 *
3402 * Post-conditions on success:
3403 * - the returned value is the number of blocks beyond map->l_lblk
3404 * that are allocated and initialized.
3405 * It is guaranteed to be >= map->m_len.
3406 */
3407static int ext4_ext_convert_to_initialized(handle_t *handle,
3408 struct inode *inode,
3409 struct ext4_map_blocks *map,
3410 struct ext4_ext_path **ppath,
3411 int flags)
3412{
3413 struct ext4_ext_path *path = *ppath;
3414 struct ext4_sb_info *sbi;
3415 struct ext4_extent_header *eh;
3416 struct ext4_map_blocks split_map;
3417 struct ext4_extent zero_ex1, zero_ex2;
3418 struct ext4_extent *ex, *abut_ex;
3419 ext4_lblk_t ee_block, eof_block;
3420 unsigned int ee_len, depth, map_len = map->m_len;
3421 int allocated = 0, max_zeroout = 0;
3422 int err = 0;
3423 int split_flag = EXT4_EXT_DATA_VALID2;
3424
Olivier Deprez157378f2022-04-04 15:47:50 +02003425 ext_debug(inode, "logical block %llu, max_blocks %u\n",
3426 (unsigned long long)map->m_lblk, map_len);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00003427
3428 sbi = EXT4_SB(inode->i_sb);
Olivier Deprez0e641232021-09-23 10:07:05 +02003429 eof_block = (EXT4_I(inode)->i_disksize + inode->i_sb->s_blocksize - 1)
3430 >> inode->i_sb->s_blocksize_bits;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00003431 if (eof_block < map->m_lblk + map_len)
3432 eof_block = map->m_lblk + map_len;
3433
3434 depth = ext_depth(inode);
3435 eh = path[depth].p_hdr;
3436 ex = path[depth].p_ext;
3437 ee_block = le32_to_cpu(ex->ee_block);
3438 ee_len = ext4_ext_get_actual_len(ex);
3439 zero_ex1.ee_len = 0;
3440 zero_ex2.ee_len = 0;
3441
3442 trace_ext4_ext_convert_to_initialized_enter(inode, map, ex);
3443
3444 /* Pre-conditions */
3445 BUG_ON(!ext4_ext_is_unwritten(ex));
3446 BUG_ON(!in_range(map->m_lblk, ee_block, ee_len));
3447
3448 /*
3449 * Attempt to transfer newly initialized blocks from the currently
3450 * unwritten extent to its neighbor. This is much cheaper
3451 * than an insertion followed by a merge as those involve costly
3452 * memmove() calls. Transferring to the left is the common case in
3453 * steady state for workloads doing fallocate(FALLOC_FL_KEEP_SIZE)
3454 * followed by append writes.
3455 *
3456 * Limitations of the current logic:
3457 * - L1: we do not deal with writes covering the whole extent.
3458 * This would require removing the extent if the transfer
3459 * is possible.
3460 * - L2: we only attempt to merge with an extent stored in the
3461 * same extent tree node.
3462 */
3463 if ((map->m_lblk == ee_block) &&
3464 /* See if we can merge left */
3465 (map_len < ee_len) && /*L1*/
3466 (ex > EXT_FIRST_EXTENT(eh))) { /*L2*/
3467 ext4_lblk_t prev_lblk;
3468 ext4_fsblk_t prev_pblk, ee_pblk;
3469 unsigned int prev_len;
3470
3471 abut_ex = ex - 1;
3472 prev_lblk = le32_to_cpu(abut_ex->ee_block);
3473 prev_len = ext4_ext_get_actual_len(abut_ex);
3474 prev_pblk = ext4_ext_pblock(abut_ex);
3475 ee_pblk = ext4_ext_pblock(ex);
3476
3477 /*
3478 * A transfer of blocks from 'ex' to 'abut_ex' is allowed
3479 * upon those conditions:
3480 * - C1: abut_ex is initialized,
3481 * - C2: abut_ex is logically abutting ex,
3482 * - C3: abut_ex is physically abutting ex,
3483 * - C4: abut_ex can receive the additional blocks without
3484 * overflowing the (initialized) length limit.
3485 */
3486 if ((!ext4_ext_is_unwritten(abut_ex)) && /*C1*/
3487 ((prev_lblk + prev_len) == ee_block) && /*C2*/
3488 ((prev_pblk + prev_len) == ee_pblk) && /*C3*/
3489 (prev_len < (EXT_INIT_MAX_LEN - map_len))) { /*C4*/
3490 err = ext4_ext_get_access(handle, inode, path + depth);
3491 if (err)
3492 goto out;
3493
3494 trace_ext4_ext_convert_to_initialized_fastpath(inode,
3495 map, ex, abut_ex);
3496
3497 /* Shift the start of ex by 'map_len' blocks */
3498 ex->ee_block = cpu_to_le32(ee_block + map_len);
3499 ext4_ext_store_pblock(ex, ee_pblk + map_len);
3500 ex->ee_len = cpu_to_le16(ee_len - map_len);
3501 ext4_ext_mark_unwritten(ex); /* Restore the flag */
3502
3503 /* Extend abut_ex by 'map_len' blocks */
3504 abut_ex->ee_len = cpu_to_le16(prev_len + map_len);
3505
3506 /* Result: number of initialized blocks past m_lblk */
3507 allocated = map_len;
3508 }
3509 } else if (((map->m_lblk + map_len) == (ee_block + ee_len)) &&
3510 (map_len < ee_len) && /*L1*/
3511 ex < EXT_LAST_EXTENT(eh)) { /*L2*/
3512 /* See if we can merge right */
3513 ext4_lblk_t next_lblk;
3514 ext4_fsblk_t next_pblk, ee_pblk;
3515 unsigned int next_len;
3516
3517 abut_ex = ex + 1;
3518 next_lblk = le32_to_cpu(abut_ex->ee_block);
3519 next_len = ext4_ext_get_actual_len(abut_ex);
3520 next_pblk = ext4_ext_pblock(abut_ex);
3521 ee_pblk = ext4_ext_pblock(ex);
3522
3523 /*
3524 * A transfer of blocks from 'ex' to 'abut_ex' is allowed
3525 * upon those conditions:
3526 * - C1: abut_ex is initialized,
3527 * - C2: abut_ex is logically abutting ex,
3528 * - C3: abut_ex is physically abutting ex,
3529 * - C4: abut_ex can receive the additional blocks without
3530 * overflowing the (initialized) length limit.
3531 */
3532 if ((!ext4_ext_is_unwritten(abut_ex)) && /*C1*/
3533 ((map->m_lblk + map_len) == next_lblk) && /*C2*/
3534 ((ee_pblk + ee_len) == next_pblk) && /*C3*/
3535 (next_len < (EXT_INIT_MAX_LEN - map_len))) { /*C4*/
3536 err = ext4_ext_get_access(handle, inode, path + depth);
3537 if (err)
3538 goto out;
3539
3540 trace_ext4_ext_convert_to_initialized_fastpath(inode,
3541 map, ex, abut_ex);
3542
3543 /* Shift the start of abut_ex by 'map_len' blocks */
3544 abut_ex->ee_block = cpu_to_le32(next_lblk - map_len);
3545 ext4_ext_store_pblock(abut_ex, next_pblk - map_len);
3546 ex->ee_len = cpu_to_le16(ee_len - map_len);
3547 ext4_ext_mark_unwritten(ex); /* Restore the flag */
3548
3549 /* Extend abut_ex by 'map_len' blocks */
3550 abut_ex->ee_len = cpu_to_le16(next_len + map_len);
3551
3552 /* Result: number of initialized blocks past m_lblk */
3553 allocated = map_len;
3554 }
3555 }
3556 if (allocated) {
3557 /* Mark the block containing both extents as dirty */
Olivier Deprez157378f2022-04-04 15:47:50 +02003558 err = ext4_ext_dirty(handle, inode, path + depth);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00003559
3560 /* Update path to point to the right extent */
3561 path[depth].p_ext = abut_ex;
3562 goto out;
3563 } else
3564 allocated = ee_len - (map->m_lblk - ee_block);
3565
3566 WARN_ON(map->m_lblk < ee_block);
3567 /*
3568 * It is safe to convert extent to initialized via explicit
3569 * zeroout only if extent is fully inside i_size or new_size.
3570 */
3571 split_flag |= ee_block + ee_len <= eof_block ? EXT4_EXT_MAY_ZEROOUT : 0;
3572
3573 if (EXT4_EXT_MAY_ZEROOUT & split_flag)
3574 max_zeroout = sbi->s_extent_max_zeroout_kb >>
3575 (inode->i_sb->s_blocksize_bits - 10);
3576
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00003577 /*
3578 * five cases:
3579 * 1. split the extent into three extents.
3580 * 2. split the extent into two extents, zeroout the head of the first
3581 * extent.
3582 * 3. split the extent into two extents, zeroout the tail of the second
3583 * extent.
3584 * 4. split the extent into two extents with out zeroout.
3585 * 5. no splitting needed, just possibly zeroout the head and / or the
3586 * tail of the extent.
3587 */
3588 split_map.m_lblk = map->m_lblk;
3589 split_map.m_len = map->m_len;
3590
3591 if (max_zeroout && (allocated > split_map.m_len)) {
3592 if (allocated <= max_zeroout) {
3593 /* case 3 or 5 */
3594 zero_ex1.ee_block =
3595 cpu_to_le32(split_map.m_lblk +
3596 split_map.m_len);
3597 zero_ex1.ee_len =
3598 cpu_to_le16(allocated - split_map.m_len);
3599 ext4_ext_store_pblock(&zero_ex1,
3600 ext4_ext_pblock(ex) + split_map.m_lblk +
3601 split_map.m_len - ee_block);
3602 err = ext4_ext_zeroout(inode, &zero_ex1);
3603 if (err)
3604 goto out;
3605 split_map.m_len = allocated;
3606 }
3607 if (split_map.m_lblk - ee_block + split_map.m_len <
3608 max_zeroout) {
3609 /* case 2 or 5 */
3610 if (split_map.m_lblk != ee_block) {
3611 zero_ex2.ee_block = ex->ee_block;
3612 zero_ex2.ee_len = cpu_to_le16(split_map.m_lblk -
3613 ee_block);
3614 ext4_ext_store_pblock(&zero_ex2,
3615 ext4_ext_pblock(ex));
3616 err = ext4_ext_zeroout(inode, &zero_ex2);
3617 if (err)
3618 goto out;
3619 }
3620
3621 split_map.m_len += split_map.m_lblk - ee_block;
3622 split_map.m_lblk = ee_block;
3623 allocated = map->m_len;
3624 }
3625 }
3626
3627 err = ext4_split_extent(handle, inode, ppath, &split_map, split_flag,
3628 flags);
3629 if (err > 0)
3630 err = 0;
3631out:
3632 /* If we have gotten a failure, don't zero out status tree */
3633 if (!err) {
3634 err = ext4_zeroout_es(inode, &zero_ex1);
3635 if (!err)
3636 err = ext4_zeroout_es(inode, &zero_ex2);
3637 }
3638 return err ? err : allocated;
3639}
3640
3641/*
3642 * This function is called by ext4_ext_map_blocks() from
3643 * ext4_get_blocks_dio_write() when DIO to write
3644 * to an unwritten extent.
3645 *
3646 * Writing to an unwritten extent may result in splitting the unwritten
3647 * extent into multiple initialized/unwritten extents (up to three)
3648 * There are three possibilities:
3649 * a> There is no split required: Entire extent should be unwritten
3650 * b> Splits in two extents: Write is happening at either end of the extent
3651 * c> Splits in three extents: Somone is writing in middle of the extent
3652 *
3653 * This works the same way in the case of initialized -> unwritten conversion.
3654 *
3655 * One of more index blocks maybe needed if the extent tree grow after
3656 * the unwritten extent split. To prevent ENOSPC occur at the IO
3657 * complete, we need to split the unwritten extent before DIO submit
3658 * the IO. The unwritten extent called at this time will be split
3659 * into three unwritten extent(at most). After IO complete, the part
3660 * being filled will be convert to initialized by the end_io callback function
3661 * via ext4_convert_unwritten_extents().
3662 *
3663 * Returns the size of unwritten extent to be written on success.
3664 */
3665static int ext4_split_convert_extents(handle_t *handle,
3666 struct inode *inode,
3667 struct ext4_map_blocks *map,
3668 struct ext4_ext_path **ppath,
3669 int flags)
3670{
3671 struct ext4_ext_path *path = *ppath;
3672 ext4_lblk_t eof_block;
3673 ext4_lblk_t ee_block;
3674 struct ext4_extent *ex;
3675 unsigned int ee_len;
3676 int split_flag = 0, depth;
3677
Olivier Deprez157378f2022-04-04 15:47:50 +02003678 ext_debug(inode, "logical block %llu, max_blocks %u\n",
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00003679 (unsigned long long)map->m_lblk, map->m_len);
3680
Olivier Deprez0e641232021-09-23 10:07:05 +02003681 eof_block = (EXT4_I(inode)->i_disksize + inode->i_sb->s_blocksize - 1)
3682 >> inode->i_sb->s_blocksize_bits;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00003683 if (eof_block < map->m_lblk + map->m_len)
3684 eof_block = map->m_lblk + map->m_len;
3685 /*
3686 * It is safe to convert extent to initialized via explicit
Olivier Deprez157378f2022-04-04 15:47:50 +02003687 * zeroout only if extent is fully inside i_size or new_size.
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00003688 */
3689 depth = ext_depth(inode);
3690 ex = path[depth].p_ext;
3691 ee_block = le32_to_cpu(ex->ee_block);
3692 ee_len = ext4_ext_get_actual_len(ex);
3693
3694 /* Convert to unwritten */
3695 if (flags & EXT4_GET_BLOCKS_CONVERT_UNWRITTEN) {
3696 split_flag |= EXT4_EXT_DATA_VALID1;
3697 /* Convert to initialized */
3698 } else if (flags & EXT4_GET_BLOCKS_CONVERT) {
3699 split_flag |= ee_block + ee_len <= eof_block ?
3700 EXT4_EXT_MAY_ZEROOUT : 0;
3701 split_flag |= (EXT4_EXT_MARK_UNWRIT2 | EXT4_EXT_DATA_VALID2);
3702 }
3703 flags |= EXT4_GET_BLOCKS_PRE_IO;
3704 return ext4_split_extent(handle, inode, ppath, map, split_flag, flags);
3705}
3706
3707static int ext4_convert_unwritten_extents_endio(handle_t *handle,
3708 struct inode *inode,
3709 struct ext4_map_blocks *map,
3710 struct ext4_ext_path **ppath)
3711{
3712 struct ext4_ext_path *path = *ppath;
3713 struct ext4_extent *ex;
3714 ext4_lblk_t ee_block;
3715 unsigned int ee_len;
3716 int depth;
3717 int err = 0;
3718
3719 depth = ext_depth(inode);
3720 ex = path[depth].p_ext;
3721 ee_block = le32_to_cpu(ex->ee_block);
3722 ee_len = ext4_ext_get_actual_len(ex);
3723
Olivier Deprez157378f2022-04-04 15:47:50 +02003724 ext_debug(inode, "logical block %llu, max_blocks %u\n",
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00003725 (unsigned long long)ee_block, ee_len);
3726
3727 /* If extent is larger than requested it is a clear sign that we still
3728 * have some extent state machine issues left. So extent_split is still
3729 * required.
3730 * TODO: Once all related issues will be fixed this situation should be
3731 * illegal.
3732 */
3733 if (ee_block != map->m_lblk || ee_len > map->m_len) {
David Brazdil0f672f62019-12-10 10:32:29 +00003734#ifdef CONFIG_EXT4_DEBUG
3735 ext4_warning(inode->i_sb, "Inode (%ld) finished: extent logical block %llu,"
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00003736 " len %u; IO logical block %llu, len %u",
3737 inode->i_ino, (unsigned long long)ee_block, ee_len,
3738 (unsigned long long)map->m_lblk, map->m_len);
3739#endif
3740 err = ext4_split_convert_extents(handle, inode, map, ppath,
3741 EXT4_GET_BLOCKS_CONVERT);
3742 if (err < 0)
3743 return err;
3744 path = ext4_find_extent(inode, map->m_lblk, ppath, 0);
3745 if (IS_ERR(path))
3746 return PTR_ERR(path);
3747 depth = ext_depth(inode);
3748 ex = path[depth].p_ext;
3749 }
3750
3751 err = ext4_ext_get_access(handle, inode, path + depth);
3752 if (err)
3753 goto out;
3754 /* first mark the extent as initialized */
3755 ext4_ext_mark_initialized(ex);
3756
3757 /* note: ext4_ext_correct_indexes() isn't needed here because
3758 * borders are not changed
3759 */
3760 ext4_ext_try_to_merge(handle, inode, path, ex);
3761
3762 /* Mark modified extent as dirty */
3763 err = ext4_ext_dirty(handle, inode, path + path->p_depth);
3764out:
3765 ext4_ext_show_leaf(inode, path);
3766 return err;
3767}
3768
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00003769static int
3770convert_initialized_extent(handle_t *handle, struct inode *inode,
3771 struct ext4_map_blocks *map,
3772 struct ext4_ext_path **ppath,
Olivier Deprez157378f2022-04-04 15:47:50 +02003773 unsigned int *allocated)
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00003774{
3775 struct ext4_ext_path *path = *ppath;
3776 struct ext4_extent *ex;
3777 ext4_lblk_t ee_block;
3778 unsigned int ee_len;
3779 int depth;
3780 int err = 0;
3781
3782 /*
3783 * Make sure that the extent is no bigger than we support with
3784 * unwritten extent
3785 */
3786 if (map->m_len > EXT_UNWRITTEN_MAX_LEN)
3787 map->m_len = EXT_UNWRITTEN_MAX_LEN / 2;
3788
3789 depth = ext_depth(inode);
3790 ex = path[depth].p_ext;
3791 ee_block = le32_to_cpu(ex->ee_block);
3792 ee_len = ext4_ext_get_actual_len(ex);
3793
Olivier Deprez157378f2022-04-04 15:47:50 +02003794 ext_debug(inode, "logical block %llu, max_blocks %u\n",
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00003795 (unsigned long long)ee_block, ee_len);
3796
3797 if (ee_block != map->m_lblk || ee_len > map->m_len) {
3798 err = ext4_split_convert_extents(handle, inode, map, ppath,
3799 EXT4_GET_BLOCKS_CONVERT_UNWRITTEN);
3800 if (err < 0)
3801 return err;
3802 path = ext4_find_extent(inode, map->m_lblk, ppath, 0);
3803 if (IS_ERR(path))
3804 return PTR_ERR(path);
3805 depth = ext_depth(inode);
3806 ex = path[depth].p_ext;
3807 if (!ex) {
3808 EXT4_ERROR_INODE(inode, "unexpected hole at %lu",
3809 (unsigned long) map->m_lblk);
3810 return -EFSCORRUPTED;
3811 }
3812 }
3813
3814 err = ext4_ext_get_access(handle, inode, path + depth);
3815 if (err)
3816 return err;
3817 /* first mark the extent as unwritten */
3818 ext4_ext_mark_unwritten(ex);
3819
3820 /* note: ext4_ext_correct_indexes() isn't needed here because
3821 * borders are not changed
3822 */
3823 ext4_ext_try_to_merge(handle, inode, path, ex);
3824
3825 /* Mark modified extent as dirty */
3826 err = ext4_ext_dirty(handle, inode, path + path->p_depth);
3827 if (err)
3828 return err;
3829 ext4_ext_show_leaf(inode, path);
3830
3831 ext4_update_inode_fsync_trans(handle, inode, 1);
Olivier Deprez157378f2022-04-04 15:47:50 +02003832
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00003833 map->m_flags |= EXT4_MAP_UNWRITTEN;
Olivier Deprez157378f2022-04-04 15:47:50 +02003834 if (*allocated > map->m_len)
3835 *allocated = map->m_len;
3836 map->m_len = *allocated;
3837 return 0;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00003838}
3839
3840static int
3841ext4_ext_handle_unwritten_extents(handle_t *handle, struct inode *inode,
3842 struct ext4_map_blocks *map,
3843 struct ext4_ext_path **ppath, int flags,
3844 unsigned int allocated, ext4_fsblk_t newblock)
3845{
Olivier Deprez157378f2022-04-04 15:47:50 +02003846 struct ext4_ext_path __maybe_unused *path = *ppath;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00003847 int ret = 0;
3848 int err = 0;
3849
Olivier Deprez157378f2022-04-04 15:47:50 +02003850 ext_debug(inode, "logical block %llu, max_blocks %u, flags 0x%x, allocated %u\n",
3851 (unsigned long long)map->m_lblk, map->m_len, flags,
3852 allocated);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00003853 ext4_ext_show_leaf(inode, path);
3854
3855 /*
3856 * When writing into unwritten space, we should not fail to
3857 * allocate metadata blocks for the new extent block if needed.
3858 */
3859 flags |= EXT4_GET_BLOCKS_METADATA_NOFAIL;
3860
3861 trace_ext4_ext_handle_unwritten_extents(inode, map, flags,
3862 allocated, newblock);
3863
Olivier Deprez157378f2022-04-04 15:47:50 +02003864 /* get_block() before submitting IO, split the extent */
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00003865 if (flags & EXT4_GET_BLOCKS_PRE_IO) {
3866 ret = ext4_split_convert_extents(handle, inode, map, ppath,
3867 flags | EXT4_GET_BLOCKS_CONVERT);
Olivier Deprez157378f2022-04-04 15:47:50 +02003868 if (ret < 0) {
3869 err = ret;
3870 goto out2;
3871 }
3872 /*
3873 * shouldn't get a 0 return when splitting an extent unless
3874 * m_len is 0 (bug) or extent has been corrupted
3875 */
3876 if (unlikely(ret == 0)) {
3877 EXT4_ERROR_INODE(inode,
3878 "unexpected ret == 0, m_len = %u",
3879 map->m_len);
3880 err = -EFSCORRUPTED;
3881 goto out2;
3882 }
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00003883 map->m_flags |= EXT4_MAP_UNWRITTEN;
3884 goto out;
3885 }
3886 /* IO end_io complete, convert the filled extent to written */
3887 if (flags & EXT4_GET_BLOCKS_CONVERT) {
Olivier Deprez157378f2022-04-04 15:47:50 +02003888 err = ext4_convert_unwritten_extents_endio(handle, inode, map,
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00003889 ppath);
Olivier Deprez157378f2022-04-04 15:47:50 +02003890 if (err < 0)
3891 goto out2;
3892 ext4_update_inode_fsync_trans(handle, inode, 1);
3893 goto map_out;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00003894 }
Olivier Deprez157378f2022-04-04 15:47:50 +02003895 /* buffered IO cases */
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00003896 /*
3897 * repeat fallocate creation request
3898 * we already have an unwritten extent
3899 */
3900 if (flags & EXT4_GET_BLOCKS_UNWRIT_EXT) {
3901 map->m_flags |= EXT4_MAP_UNWRITTEN;
3902 goto map_out;
3903 }
3904
3905 /* buffered READ or buffered write_begin() lookup */
3906 if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) {
3907 /*
3908 * We have blocks reserved already. We
3909 * return allocated blocks so that delalloc
3910 * won't do block reservation for us. But
3911 * the buffer head will be unmapped so that
3912 * a read from the block returns 0s.
3913 */
3914 map->m_flags |= EXT4_MAP_UNWRITTEN;
3915 goto out1;
3916 }
3917
Olivier Deprez157378f2022-04-04 15:47:50 +02003918 /*
3919 * Default case when (flags & EXT4_GET_BLOCKS_CREATE) == 1.
3920 * For buffered writes, at writepage time, etc. Convert a
3921 * discovered unwritten extent to written.
3922 */
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00003923 ret = ext4_ext_convert_to_initialized(handle, inode, map, ppath, flags);
Olivier Deprez157378f2022-04-04 15:47:50 +02003924 if (ret < 0) {
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00003925 err = ret;
3926 goto out2;
Olivier Deprez157378f2022-04-04 15:47:50 +02003927 }
3928 ext4_update_inode_fsync_trans(handle, inode, 1);
3929 /*
3930 * shouldn't get a 0 return when converting an unwritten extent
3931 * unless m_len is 0 (bug) or extent has been corrupted
3932 */
3933 if (unlikely(ret == 0)) {
3934 EXT4_ERROR_INODE(inode, "unexpected ret == 0, m_len = %u",
3935 map->m_len);
3936 err = -EFSCORRUPTED;
3937 goto out2;
3938 }
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00003939
Olivier Deprez157378f2022-04-04 15:47:50 +02003940out:
3941 allocated = ret;
3942 map->m_flags |= EXT4_MAP_NEW;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00003943map_out:
3944 map->m_flags |= EXT4_MAP_MAPPED;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00003945out1:
Olivier Deprez157378f2022-04-04 15:47:50 +02003946 map->m_pblk = newblock;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00003947 if (allocated > map->m_len)
3948 allocated = map->m_len;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00003949 map->m_len = allocated;
Olivier Deprez157378f2022-04-04 15:47:50 +02003950 ext4_ext_show_leaf(inode, path);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00003951out2:
3952 return err ? err : allocated;
3953}
3954
3955/*
3956 * get_implied_cluster_alloc - check to see if the requested
3957 * allocation (in the map structure) overlaps with a cluster already
3958 * allocated in an extent.
3959 * @sb The filesystem superblock structure
3960 * @map The requested lblk->pblk mapping
3961 * @ex The extent structure which might contain an implied
3962 * cluster allocation
3963 *
3964 * This function is called by ext4_ext_map_blocks() after we failed to
3965 * find blocks that were already in the inode's extent tree. Hence,
3966 * we know that the beginning of the requested region cannot overlap
3967 * the extent from the inode's extent tree. There are three cases we
3968 * want to catch. The first is this case:
3969 *
3970 * |--- cluster # N--|
3971 * |--- extent ---| |---- requested region ---|
3972 * |==========|
3973 *
3974 * The second case that we need to test for is this one:
3975 *
3976 * |--------- cluster # N ----------------|
3977 * |--- requested region --| |------- extent ----|
3978 * |=======================|
3979 *
3980 * The third case is when the requested region lies between two extents
3981 * within the same cluster:
3982 * |------------- cluster # N-------------|
3983 * |----- ex -----| |---- ex_right ----|
3984 * |------ requested region ------|
3985 * |================|
3986 *
3987 * In each of the above cases, we need to set the map->m_pblk and
3988 * map->m_len so it corresponds to the return the extent labelled as
3989 * "|====|" from cluster #N, since it is already in use for data in
3990 * cluster EXT4_B2C(sbi, map->m_lblk). We will then return 1 to
3991 * signal to ext4_ext_map_blocks() that map->m_pblk should be treated
3992 * as a new "allocated" block region. Otherwise, we will return 0 and
3993 * ext4_ext_map_blocks() will then allocate one or more new clusters
3994 * by calling ext4_mb_new_blocks().
3995 */
3996static int get_implied_cluster_alloc(struct super_block *sb,
3997 struct ext4_map_blocks *map,
3998 struct ext4_extent *ex,
3999 struct ext4_ext_path *path)
4000{
4001 struct ext4_sb_info *sbi = EXT4_SB(sb);
4002 ext4_lblk_t c_offset = EXT4_LBLK_COFF(sbi, map->m_lblk);
4003 ext4_lblk_t ex_cluster_start, ex_cluster_end;
4004 ext4_lblk_t rr_cluster_start;
4005 ext4_lblk_t ee_block = le32_to_cpu(ex->ee_block);
4006 ext4_fsblk_t ee_start = ext4_ext_pblock(ex);
4007 unsigned short ee_len = ext4_ext_get_actual_len(ex);
4008
4009 /* The extent passed in that we are trying to match */
4010 ex_cluster_start = EXT4_B2C(sbi, ee_block);
4011 ex_cluster_end = EXT4_B2C(sbi, ee_block + ee_len - 1);
4012
4013 /* The requested region passed into ext4_map_blocks() */
4014 rr_cluster_start = EXT4_B2C(sbi, map->m_lblk);
4015
4016 if ((rr_cluster_start == ex_cluster_end) ||
4017 (rr_cluster_start == ex_cluster_start)) {
4018 if (rr_cluster_start == ex_cluster_end)
4019 ee_start += ee_len - 1;
4020 map->m_pblk = EXT4_PBLK_CMASK(sbi, ee_start) + c_offset;
4021 map->m_len = min(map->m_len,
4022 (unsigned) sbi->s_cluster_ratio - c_offset);
4023 /*
4024 * Check for and handle this case:
4025 *
4026 * |--------- cluster # N-------------|
4027 * |------- extent ----|
4028 * |--- requested region ---|
4029 * |===========|
4030 */
4031
4032 if (map->m_lblk < ee_block)
4033 map->m_len = min(map->m_len, ee_block - map->m_lblk);
4034
4035 /*
4036 * Check for the case where there is already another allocated
4037 * block to the right of 'ex' but before the end of the cluster.
4038 *
4039 * |------------- cluster # N-------------|
4040 * |----- ex -----| |---- ex_right ----|
4041 * |------ requested region ------|
4042 * |================|
4043 */
4044 if (map->m_lblk > ee_block) {
4045 ext4_lblk_t next = ext4_ext_next_allocated_block(path);
4046 map->m_len = min(map->m_len, next - map->m_lblk);
4047 }
4048
4049 trace_ext4_get_implied_cluster_alloc_exit(sb, map, 1);
4050 return 1;
4051 }
4052
4053 trace_ext4_get_implied_cluster_alloc_exit(sb, map, 0);
4054 return 0;
4055}
4056
4057
4058/*
4059 * Block allocation/map/preallocation routine for extents based files
4060 *
4061 *
4062 * Need to be called with
4063 * down_read(&EXT4_I(inode)->i_data_sem) if not allocating file system block
4064 * (ie, create is zero). Otherwise down_write(&EXT4_I(inode)->i_data_sem)
4065 *
Olivier Deprez157378f2022-04-04 15:47:50 +02004066 * return > 0, number of blocks already mapped/allocated
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00004067 * if create == 0 and these are pre-allocated blocks
4068 * buffer head is unmapped
4069 * otherwise blocks are mapped
4070 *
4071 * return = 0, if plain look up failed (blocks have not been allocated)
4072 * buffer head is unmapped
4073 *
4074 * return < 0, error case.
4075 */
4076int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
4077 struct ext4_map_blocks *map, int flags)
4078{
4079 struct ext4_ext_path *path = NULL;
Olivier Deprez157378f2022-04-04 15:47:50 +02004080 struct ext4_extent newex, *ex, ex2;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00004081 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
Olivier Deprez157378f2022-04-04 15:47:50 +02004082 ext4_fsblk_t newblock = 0, pblk;
4083 int err = 0, depth, ret;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00004084 unsigned int allocated = 0, offset = 0;
4085 unsigned int allocated_clusters = 0;
4086 struct ext4_allocation_request ar;
4087 ext4_lblk_t cluster_offset;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00004088
Olivier Deprez157378f2022-04-04 15:47:50 +02004089 ext_debug(inode, "blocks %u/%u requested\n", map->m_lblk, map->m_len);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00004090 trace_ext4_ext_map_blocks_enter(inode, map->m_lblk, map->m_len, flags);
4091
4092 /* find extent for this block */
4093 path = ext4_find_extent(inode, map->m_lblk, NULL, 0);
4094 if (IS_ERR(path)) {
4095 err = PTR_ERR(path);
4096 path = NULL;
Olivier Deprez157378f2022-04-04 15:47:50 +02004097 goto out;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00004098 }
4099
4100 depth = ext_depth(inode);
4101
4102 /*
4103 * consistent leaf must not be empty;
4104 * this situation is possible, though, _during_ tree modification;
4105 * this is why assert can't be put in ext4_find_extent()
4106 */
4107 if (unlikely(path[depth].p_ext == NULL && depth != 0)) {
4108 EXT4_ERROR_INODE(inode, "bad extent address "
4109 "lblock: %lu, depth: %d pblock %lld",
4110 (unsigned long) map->m_lblk, depth,
4111 path[depth].p_block);
4112 err = -EFSCORRUPTED;
Olivier Deprez157378f2022-04-04 15:47:50 +02004113 goto out;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00004114 }
4115
4116 ex = path[depth].p_ext;
4117 if (ex) {
4118 ext4_lblk_t ee_block = le32_to_cpu(ex->ee_block);
4119 ext4_fsblk_t ee_start = ext4_ext_pblock(ex);
4120 unsigned short ee_len;
4121
4122
4123 /*
4124 * unwritten extents are treated as holes, except that
4125 * we split out initialized portions during a write.
4126 */
4127 ee_len = ext4_ext_get_actual_len(ex);
4128
4129 trace_ext4_ext_show_extent(inode, ee_block, ee_start, ee_len);
4130
4131 /* if found extent covers block, simply return it */
4132 if (in_range(map->m_lblk, ee_block, ee_len)) {
4133 newblock = map->m_lblk - ee_block + ee_start;
4134 /* number of remaining blocks in the extent */
4135 allocated = ee_len - (map->m_lblk - ee_block);
Olivier Deprez157378f2022-04-04 15:47:50 +02004136 ext_debug(inode, "%u fit into %u:%d -> %llu\n",
4137 map->m_lblk, ee_block, ee_len, newblock);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00004138
4139 /*
4140 * If the extent is initialized check whether the
4141 * caller wants to convert it to unwritten.
4142 */
4143 if ((!ext4_ext_is_unwritten(ex)) &&
4144 (flags & EXT4_GET_BLOCKS_CONVERT_UNWRITTEN)) {
Olivier Deprez157378f2022-04-04 15:47:50 +02004145 err = convert_initialized_extent(handle,
4146 inode, map, &path, &allocated);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00004147 goto out;
Olivier Deprez157378f2022-04-04 15:47:50 +02004148 } else if (!ext4_ext_is_unwritten(ex)) {
4149 map->m_flags |= EXT4_MAP_MAPPED;
4150 map->m_pblk = newblock;
4151 if (allocated > map->m_len)
4152 allocated = map->m_len;
4153 map->m_len = allocated;
4154 ext4_ext_show_leaf(inode, path);
4155 goto out;
4156 }
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00004157
4158 ret = ext4_ext_handle_unwritten_extents(
4159 handle, inode, map, &path, flags,
4160 allocated, newblock);
4161 if (ret < 0)
4162 err = ret;
4163 else
4164 allocated = ret;
Olivier Deprez157378f2022-04-04 15:47:50 +02004165 goto out;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00004166 }
4167 }
4168
4169 /*
4170 * requested block isn't allocated yet;
4171 * we couldn't try to create block if create flag is zero
4172 */
4173 if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) {
4174 ext4_lblk_t hole_start, hole_len;
4175
4176 hole_start = map->m_lblk;
4177 hole_len = ext4_ext_determine_hole(inode, path, &hole_start);
4178 /*
4179 * put just found gap into cache to speed up
4180 * subsequent requests
4181 */
4182 ext4_ext_put_gap_in_cache(inode, hole_start, hole_len);
4183
4184 /* Update hole_len to reflect hole size after map->m_lblk */
4185 if (hole_start != map->m_lblk)
4186 hole_len -= map->m_lblk - hole_start;
4187 map->m_pblk = 0;
4188 map->m_len = min_t(unsigned int, map->m_len, hole_len);
4189
Olivier Deprez157378f2022-04-04 15:47:50 +02004190 goto out;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00004191 }
4192
4193 /*
4194 * Okay, we need to do block allocation.
4195 */
4196 newex.ee_block = cpu_to_le32(map->m_lblk);
4197 cluster_offset = EXT4_LBLK_COFF(sbi, map->m_lblk);
4198
4199 /*
4200 * If we are doing bigalloc, check to see if the extent returned
4201 * by ext4_find_extent() implies a cluster we can use.
4202 */
4203 if (cluster_offset && ex &&
4204 get_implied_cluster_alloc(inode->i_sb, map, ex, path)) {
4205 ar.len = allocated = map->m_len;
4206 newblock = map->m_pblk;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00004207 goto got_allocated_blocks;
4208 }
4209
4210 /* find neighbour allocated blocks */
4211 ar.lleft = map->m_lblk;
4212 err = ext4_ext_search_left(inode, path, &ar.lleft, &ar.pleft);
4213 if (err)
Olivier Deprez157378f2022-04-04 15:47:50 +02004214 goto out;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00004215 ar.lright = map->m_lblk;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00004216 err = ext4_ext_search_right(inode, path, &ar.lright, &ar.pright, &ex2);
Olivier Deprez157378f2022-04-04 15:47:50 +02004217 if (err < 0)
4218 goto out;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00004219
4220 /* Check if the extent after searching to the right implies a
4221 * cluster we can use. */
Olivier Deprez157378f2022-04-04 15:47:50 +02004222 if ((sbi->s_cluster_ratio > 1) && err &&
4223 get_implied_cluster_alloc(inode->i_sb, map, &ex2, path)) {
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00004224 ar.len = allocated = map->m_len;
4225 newblock = map->m_pblk;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00004226 goto got_allocated_blocks;
4227 }
4228
4229 /*
4230 * See if request is beyond maximum number of blocks we can have in
4231 * a single extent. For an initialized extent this limit is
4232 * EXT_INIT_MAX_LEN and for an unwritten extent this limit is
4233 * EXT_UNWRITTEN_MAX_LEN.
4234 */
4235 if (map->m_len > EXT_INIT_MAX_LEN &&
4236 !(flags & EXT4_GET_BLOCKS_UNWRIT_EXT))
4237 map->m_len = EXT_INIT_MAX_LEN;
4238 else if (map->m_len > EXT_UNWRITTEN_MAX_LEN &&
4239 (flags & EXT4_GET_BLOCKS_UNWRIT_EXT))
4240 map->m_len = EXT_UNWRITTEN_MAX_LEN;
4241
4242 /* Check if we can really insert (m_lblk)::(m_lblk + m_len) extent */
4243 newex.ee_len = cpu_to_le16(map->m_len);
4244 err = ext4_ext_check_overlap(sbi, inode, &newex, path);
4245 if (err)
4246 allocated = ext4_ext_get_actual_len(&newex);
4247 else
4248 allocated = map->m_len;
4249
4250 /* allocate new block */
4251 ar.inode = inode;
4252 ar.goal = ext4_ext_find_goal(inode, path, map->m_lblk);
4253 ar.logical = map->m_lblk;
4254 /*
4255 * We calculate the offset from the beginning of the cluster
4256 * for the logical block number, since when we allocate a
4257 * physical cluster, the physical block should start at the
4258 * same offset from the beginning of the cluster. This is
4259 * needed so that future calls to get_implied_cluster_alloc()
4260 * work correctly.
4261 */
4262 offset = EXT4_LBLK_COFF(sbi, map->m_lblk);
4263 ar.len = EXT4_NUM_B2C(sbi, offset+allocated);
4264 ar.goal -= offset;
4265 ar.logical -= offset;
4266 if (S_ISREG(inode->i_mode))
4267 ar.flags = EXT4_MB_HINT_DATA;
4268 else
4269 /* disable in-core preallocation for non-regular files */
4270 ar.flags = 0;
4271 if (flags & EXT4_GET_BLOCKS_NO_NORMALIZE)
4272 ar.flags |= EXT4_MB_HINT_NOPREALLOC;
4273 if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
4274 ar.flags |= EXT4_MB_DELALLOC_RESERVED;
4275 if (flags & EXT4_GET_BLOCKS_METADATA_NOFAIL)
4276 ar.flags |= EXT4_MB_USE_RESERVED;
4277 newblock = ext4_mb_new_blocks(handle, &ar, &err);
4278 if (!newblock)
Olivier Deprez157378f2022-04-04 15:47:50 +02004279 goto out;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00004280 allocated_clusters = ar.len;
4281 ar.len = EXT4_C2B(sbi, ar.len) - offset;
Olivier Deprez157378f2022-04-04 15:47:50 +02004282 ext_debug(inode, "allocate new block: goal %llu, found %llu/%u, requested %u\n",
4283 ar.goal, newblock, ar.len, allocated);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00004284 if (ar.len > allocated)
4285 ar.len = allocated;
4286
4287got_allocated_blocks:
4288 /* try to insert new extent into found leaf and return */
Olivier Deprez157378f2022-04-04 15:47:50 +02004289 pblk = newblock + offset;
4290 ext4_ext_store_pblock(&newex, pblk);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00004291 newex.ee_len = cpu_to_le16(ar.len);
4292 /* Mark unwritten */
Olivier Deprez157378f2022-04-04 15:47:50 +02004293 if (flags & EXT4_GET_BLOCKS_UNWRIT_EXT) {
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00004294 ext4_ext_mark_unwritten(&newex);
4295 map->m_flags |= EXT4_MAP_UNWRITTEN;
4296 }
4297
Olivier Deprez157378f2022-04-04 15:47:50 +02004298 err = ext4_ext_insert_extent(handle, inode, &path, &newex, flags);
4299 if (err) {
4300 if (allocated_clusters) {
4301 int fb_flags = 0;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00004302
Olivier Deprez157378f2022-04-04 15:47:50 +02004303 /*
4304 * free data blocks we just allocated.
4305 * not a good idea to call discard here directly,
4306 * but otherwise we'd need to call it every free().
4307 */
4308 ext4_discard_preallocations(inode, 0);
4309 if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
4310 fb_flags = EXT4_FREE_BLOCKS_NO_QUOT_UPDATE;
4311 ext4_free_blocks(handle, inode, NULL, newblock,
4312 EXT4_C2B(sbi, allocated_clusters),
4313 fb_flags);
4314 }
4315 goto out;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00004316 }
4317
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00004318 /*
David Brazdil0f672f62019-12-10 10:32:29 +00004319 * Reduce the reserved cluster count to reflect successful deferred
4320 * allocation of delayed allocated clusters or direct allocation of
4321 * clusters discovered to be delayed allocated. Once allocated, a
4322 * cluster is not included in the reserved count.
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00004323 */
Olivier Deprez157378f2022-04-04 15:47:50 +02004324 if (test_opt(inode->i_sb, DELALLOC) && allocated_clusters) {
David Brazdil0f672f62019-12-10 10:32:29 +00004325 if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) {
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00004326 /*
David Brazdil0f672f62019-12-10 10:32:29 +00004327 * When allocating delayed allocated clusters, simply
4328 * reduce the reserved cluster count and claim quota
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00004329 */
4330 ext4_da_update_reserve_space(inode, allocated_clusters,
4331 1);
David Brazdil0f672f62019-12-10 10:32:29 +00004332 } else {
4333 ext4_lblk_t lblk, len;
4334 unsigned int n;
4335
4336 /*
4337 * When allocating non-delayed allocated clusters
4338 * (from fallocate, filemap, DIO, or clusters
4339 * allocated when delalloc has been disabled by
4340 * ext4_nonda_switch), reduce the reserved cluster
4341 * count by the number of allocated clusters that
4342 * have previously been delayed allocated. Quota
4343 * has been claimed by ext4_mb_new_blocks() above,
4344 * so release the quota reservations made for any
4345 * previously delayed allocated clusters.
4346 */
4347 lblk = EXT4_LBLK_CMASK(sbi, map->m_lblk);
4348 len = allocated_clusters << sbi->s_cluster_bits;
4349 n = ext4_es_delayed_clu(inode, lblk, len);
4350 if (n > 0)
4351 ext4_da_update_reserve_space(inode, (int) n, 0);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00004352 }
4353 }
4354
4355 /*
4356 * Cache the extent and update transaction to commit on fdatasync only
4357 * when it is _not_ an unwritten extent.
4358 */
4359 if ((flags & EXT4_GET_BLOCKS_UNWRIT_EXT) == 0)
4360 ext4_update_inode_fsync_trans(handle, inode, 1);
4361 else
4362 ext4_update_inode_fsync_trans(handle, inode, 0);
Olivier Deprez157378f2022-04-04 15:47:50 +02004363
4364 map->m_flags |= (EXT4_MAP_NEW | EXT4_MAP_MAPPED);
4365 map->m_pblk = pblk;
4366 map->m_len = ar.len;
4367 allocated = map->m_len;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00004368 ext4_ext_show_leaf(inode, path);
Olivier Deprez157378f2022-04-04 15:47:50 +02004369out:
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00004370 ext4_ext_drop_refs(path);
4371 kfree(path);
4372
4373 trace_ext4_ext_map_blocks_exit(inode, flags, map,
4374 err ? err : allocated);
4375 return err ? err : allocated;
4376}
4377
4378int ext4_ext_truncate(handle_t *handle, struct inode *inode)
4379{
4380 struct super_block *sb = inode->i_sb;
4381 ext4_lblk_t last_block;
4382 int err = 0;
4383
4384 /*
4385 * TODO: optimization is possible here.
4386 * Probably we need not scan at all,
4387 * because page truncation is enough.
4388 */
4389
4390 /* we have to know where to truncate from in crash case */
4391 EXT4_I(inode)->i_disksize = inode->i_size;
4392 err = ext4_mark_inode_dirty(handle, inode);
4393 if (err)
4394 return err;
4395
4396 last_block = (inode->i_size + sb->s_blocksize - 1)
4397 >> EXT4_BLOCK_SIZE_BITS(sb);
4398retry:
4399 err = ext4_es_remove_extent(inode, last_block,
4400 EXT_MAX_BLOCKS - last_block);
4401 if (err == -ENOMEM) {
4402 cond_resched();
4403 congestion_wait(BLK_RW_ASYNC, HZ/50);
4404 goto retry;
4405 }
4406 if (err)
4407 return err;
Olivier Deprez157378f2022-04-04 15:47:50 +02004408retry_remove_space:
4409 err = ext4_ext_remove_space(inode, last_block, EXT_MAX_BLOCKS - 1);
4410 if (err == -ENOMEM) {
4411 cond_resched();
4412 congestion_wait(BLK_RW_ASYNC, HZ/50);
4413 goto retry_remove_space;
4414 }
4415 return err;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00004416}
4417
4418static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset,
4419 ext4_lblk_t len, loff_t new_size,
4420 int flags)
4421{
4422 struct inode *inode = file_inode(file);
4423 handle_t *handle;
4424 int ret = 0;
Olivier Deprez157378f2022-04-04 15:47:50 +02004425 int ret2 = 0, ret3 = 0;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00004426 int retries = 0;
4427 int depth = 0;
4428 struct ext4_map_blocks map;
4429 unsigned int credits;
4430 loff_t epos;
4431
4432 BUG_ON(!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS));
4433 map.m_lblk = offset;
4434 map.m_len = len;
4435 /*
4436 * Don't normalize the request if it can fit in one extent so
4437 * that it doesn't get unnecessarily split into multiple
4438 * extents.
4439 */
4440 if (len <= EXT_UNWRITTEN_MAX_LEN)
4441 flags |= EXT4_GET_BLOCKS_NO_NORMALIZE;
4442
4443 /*
4444 * credits to insert 1 extent into extent tree
4445 */
4446 credits = ext4_chunk_trans_blocks(inode, len);
4447 depth = ext_depth(inode);
4448
4449retry:
4450 while (ret >= 0 && len) {
4451 /*
4452 * Recalculate credits when extent tree depth changes.
4453 */
4454 if (depth != ext_depth(inode)) {
4455 credits = ext4_chunk_trans_blocks(inode, len);
4456 depth = ext_depth(inode);
4457 }
4458
4459 handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS,
4460 credits);
4461 if (IS_ERR(handle)) {
4462 ret = PTR_ERR(handle);
4463 break;
4464 }
4465 ret = ext4_map_blocks(handle, inode, &map, flags);
4466 if (ret <= 0) {
4467 ext4_debug("inode #%lu: block %u: len %u: "
4468 "ext4_ext_map_blocks returned %d",
4469 inode->i_ino, map.m_lblk,
4470 map.m_len, ret);
4471 ext4_mark_inode_dirty(handle, inode);
4472 ret2 = ext4_journal_stop(handle);
4473 break;
4474 }
4475 map.m_lblk += ret;
4476 map.m_len = len = len - ret;
4477 epos = (loff_t)map.m_lblk << inode->i_blkbits;
4478 inode->i_ctime = current_time(inode);
4479 if (new_size) {
4480 if (epos > new_size)
4481 epos = new_size;
4482 if (ext4_update_inode_size(inode, epos) & 0x1)
4483 inode->i_mtime = inode->i_ctime;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00004484 }
Olivier Deprez157378f2022-04-04 15:47:50 +02004485 ret2 = ext4_mark_inode_dirty(handle, inode);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00004486 ext4_update_inode_fsync_trans(handle, inode, 1);
Olivier Deprez157378f2022-04-04 15:47:50 +02004487 ret3 = ext4_journal_stop(handle);
4488 ret2 = ret3 ? ret3 : ret2;
4489 if (unlikely(ret2))
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00004490 break;
4491 }
4492 if (ret == -ENOSPC &&
4493 ext4_should_retry_alloc(inode->i_sb, &retries)) {
4494 ret = 0;
4495 goto retry;
4496 }
4497
4498 return ret > 0 ? ret2 : ret;
4499}
4500
Olivier Deprez157378f2022-04-04 15:47:50 +02004501static int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len);
4502
4503static int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len);
4504
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00004505static long ext4_zero_range(struct file *file, loff_t offset,
4506 loff_t len, int mode)
4507{
4508 struct inode *inode = file_inode(file);
4509 handle_t *handle = NULL;
4510 unsigned int max_blocks;
4511 loff_t new_size = 0;
4512 int ret = 0;
4513 int flags;
4514 int credits;
4515 int partial_begin, partial_end;
4516 loff_t start, end;
4517 ext4_lblk_t lblk;
4518 unsigned int blkbits = inode->i_blkbits;
4519
4520 trace_ext4_zero_range(inode, offset, len, mode);
4521
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00004522 /* Call ext4_force_commit to flush all data in case of data=journal. */
4523 if (ext4_should_journal_data(inode)) {
4524 ret = ext4_force_commit(inode->i_sb);
4525 if (ret)
4526 return ret;
4527 }
4528
4529 /*
Olivier Deprez157378f2022-04-04 15:47:50 +02004530 * Round up offset. This is not fallocate, we need to zero out
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00004531 * blocks, so convert interior block aligned part of the range to
4532 * unwritten and possibly manually zero out unaligned parts of the
4533 * range.
4534 */
4535 start = round_up(offset, 1 << blkbits);
4536 end = round_down((offset + len), 1 << blkbits);
4537
4538 if (start < offset || end > offset + len)
4539 return -EINVAL;
4540 partial_begin = offset & ((1 << blkbits) - 1);
4541 partial_end = (offset + len) & ((1 << blkbits) - 1);
4542
4543 lblk = start >> blkbits;
4544 max_blocks = (end >> blkbits);
4545 if (max_blocks < lblk)
4546 max_blocks = 0;
4547 else
4548 max_blocks -= lblk;
4549
4550 inode_lock(inode);
4551
4552 /*
Olivier Deprez157378f2022-04-04 15:47:50 +02004553 * Indirect files do not support unwritten extents
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00004554 */
4555 if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) {
4556 ret = -EOPNOTSUPP;
4557 goto out_mutex;
4558 }
4559
4560 if (!(mode & FALLOC_FL_KEEP_SIZE) &&
Olivier Deprez157378f2022-04-04 15:47:50 +02004561 (offset + len > inode->i_size ||
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00004562 offset + len > EXT4_I(inode)->i_disksize)) {
4563 new_size = offset + len;
4564 ret = inode_newsize_ok(inode, new_size);
4565 if (ret)
4566 goto out_mutex;
4567 }
4568
4569 flags = EXT4_GET_BLOCKS_CREATE_UNWRIT_EXT;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00004570
4571 /* Wait all existing dio workers, newcomers will block on i_mutex */
4572 inode_dio_wait(inode);
4573
4574 /* Preallocate the range including the unaligned edges */
4575 if (partial_begin || partial_end) {
4576 ret = ext4_alloc_file_blocks(file,
4577 round_down(offset, 1 << blkbits) >> blkbits,
4578 (round_up((offset + len), 1 << blkbits) -
4579 round_down(offset, 1 << blkbits)) >> blkbits,
4580 new_size, flags);
4581 if (ret)
4582 goto out_mutex;
4583
4584 }
4585
4586 /* Zero range excluding the unaligned edges */
4587 if (max_blocks > 0) {
4588 flags |= (EXT4_GET_BLOCKS_CONVERT_UNWRITTEN |
4589 EXT4_EX_NOCACHE);
4590
4591 /*
4592 * Prevent page faults from reinstantiating pages we have
4593 * released from page cache.
4594 */
4595 down_write(&EXT4_I(inode)->i_mmap_sem);
4596
4597 ret = ext4_break_layouts(inode);
4598 if (ret) {
4599 up_write(&EXT4_I(inode)->i_mmap_sem);
4600 goto out_mutex;
4601 }
4602
4603 ret = ext4_update_disksize_before_punch(inode, offset, len);
4604 if (ret) {
4605 up_write(&EXT4_I(inode)->i_mmap_sem);
4606 goto out_mutex;
4607 }
4608 /* Now release the pages and zero block aligned part of pages */
4609 truncate_pagecache_range(inode, start, end - 1);
4610 inode->i_mtime = inode->i_ctime = current_time(inode);
4611
4612 ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size,
4613 flags);
4614 up_write(&EXT4_I(inode)->i_mmap_sem);
4615 if (ret)
4616 goto out_mutex;
4617 }
4618 if (!partial_begin && !partial_end)
4619 goto out_mutex;
4620
4621 /*
4622 * In worst case we have to writeout two nonadjacent unwritten
4623 * blocks and update the inode
4624 */
4625 credits = (2 * ext4_ext_index_trans_blocks(inode, 2)) + 1;
4626 if (ext4_should_journal_data(inode))
4627 credits += 2;
4628 handle = ext4_journal_start(inode, EXT4_HT_MISC, credits);
4629 if (IS_ERR(handle)) {
4630 ret = PTR_ERR(handle);
4631 ext4_std_error(inode->i_sb, ret);
4632 goto out_mutex;
4633 }
4634
4635 inode->i_mtime = inode->i_ctime = current_time(inode);
Olivier Deprez157378f2022-04-04 15:47:50 +02004636 if (new_size)
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00004637 ext4_update_inode_size(inode, new_size);
Olivier Deprez157378f2022-04-04 15:47:50 +02004638 ret = ext4_mark_inode_dirty(handle, inode);
4639 if (unlikely(ret))
4640 goto out_handle;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00004641 /* Zero out partial block at the edges of the range */
4642 ret = ext4_zero_partial_blocks(handle, inode, offset, len);
4643 if (ret >= 0)
4644 ext4_update_inode_fsync_trans(handle, inode, 1);
4645
4646 if (file->f_flags & O_SYNC)
4647 ext4_handle_sync(handle);
4648
Olivier Deprez157378f2022-04-04 15:47:50 +02004649out_handle:
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00004650 ext4_journal_stop(handle);
4651out_mutex:
4652 inode_unlock(inode);
4653 return ret;
4654}
4655
4656/*
4657 * preallocate space for a file. This implements ext4's fallocate file
4658 * operation, which gets called from sys_fallocate system call.
4659 * For block-mapped files, posix_fallocate should fall back to the method
4660 * of writing zeroes to the required new blocks (the same behavior which is
4661 * expected for file systems which do not support fallocate() system call).
4662 */
4663long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
4664{
4665 struct inode *inode = file_inode(file);
4666 loff_t new_size = 0;
4667 unsigned int max_blocks;
4668 int ret = 0;
4669 int flags;
4670 ext4_lblk_t lblk;
4671 unsigned int blkbits = inode->i_blkbits;
4672
4673 /*
4674 * Encrypted inodes can't handle collapse range or insert
4675 * range since we would need to re-encrypt blocks with a
4676 * different IV or XTS tweak (which are based on the logical
4677 * block number).
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00004678 */
David Brazdil0f672f62019-12-10 10:32:29 +00004679 if (IS_ENCRYPTED(inode) &&
Olivier Deprez157378f2022-04-04 15:47:50 +02004680 (mode & (FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_INSERT_RANGE)))
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00004681 return -EOPNOTSUPP;
4682
4683 /* Return error if mode is not supported */
4684 if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE |
4685 FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE |
4686 FALLOC_FL_INSERT_RANGE))
4687 return -EOPNOTSUPP;
4688
Olivier Deprez157378f2022-04-04 15:47:50 +02004689 ext4_fc_start_update(inode);
4690
4691 if (mode & FALLOC_FL_PUNCH_HOLE) {
4692 ret = ext4_punch_hole(inode, offset, len);
4693 goto exit;
4694 }
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00004695
4696 ret = ext4_convert_inline_data(inode);
4697 if (ret)
Olivier Deprez157378f2022-04-04 15:47:50 +02004698 goto exit;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00004699
Olivier Deprez157378f2022-04-04 15:47:50 +02004700 if (mode & FALLOC_FL_COLLAPSE_RANGE) {
4701 ret = ext4_collapse_range(inode, offset, len);
4702 goto exit;
4703 }
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00004704
Olivier Deprez157378f2022-04-04 15:47:50 +02004705 if (mode & FALLOC_FL_INSERT_RANGE) {
4706 ret = ext4_insert_range(inode, offset, len);
4707 goto exit;
4708 }
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00004709
Olivier Deprez157378f2022-04-04 15:47:50 +02004710 if (mode & FALLOC_FL_ZERO_RANGE) {
4711 ret = ext4_zero_range(file, offset, len, mode);
4712 goto exit;
4713 }
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00004714 trace_ext4_fallocate_enter(inode, offset, len, mode);
4715 lblk = offset >> blkbits;
4716
4717 max_blocks = EXT4_MAX_BLOCKS(len, offset, blkbits);
4718 flags = EXT4_GET_BLOCKS_CREATE_UNWRIT_EXT;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00004719
4720 inode_lock(inode);
4721
4722 /*
4723 * We only support preallocation for extent-based files only
4724 */
4725 if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) {
4726 ret = -EOPNOTSUPP;
4727 goto out;
4728 }
4729
4730 if (!(mode & FALLOC_FL_KEEP_SIZE) &&
Olivier Deprez157378f2022-04-04 15:47:50 +02004731 (offset + len > inode->i_size ||
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00004732 offset + len > EXT4_I(inode)->i_disksize)) {
4733 new_size = offset + len;
4734 ret = inode_newsize_ok(inode, new_size);
4735 if (ret)
4736 goto out;
4737 }
4738
4739 /* Wait all existing dio workers, newcomers will block on i_mutex */
4740 inode_dio_wait(inode);
4741
4742 ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size, flags);
4743 if (ret)
4744 goto out;
4745
4746 if (file->f_flags & O_SYNC && EXT4_SB(inode->i_sb)->s_journal) {
Olivier Deprez157378f2022-04-04 15:47:50 +02004747 ret = ext4_fc_commit(EXT4_SB(inode->i_sb)->s_journal,
4748 EXT4_I(inode)->i_sync_tid);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00004749 }
4750out:
4751 inode_unlock(inode);
4752 trace_ext4_fallocate_exit(inode, offset, max_blocks, ret);
Olivier Deprez157378f2022-04-04 15:47:50 +02004753exit:
4754 ext4_fc_stop_update(inode);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00004755 return ret;
4756}
4757
4758/*
4759 * This function convert a range of blocks to written extents
4760 * The caller of this function will pass the start offset and the size.
4761 * all unwritten extents within this range will be converted to
4762 * written extents.
4763 *
4764 * This function is called from the direct IO end io call back
4765 * function, to convert the fallocated extents after IO is completed.
4766 * Returns 0 on success.
4767 */
4768int ext4_convert_unwritten_extents(handle_t *handle, struct inode *inode,
4769 loff_t offset, ssize_t len)
4770{
4771 unsigned int max_blocks;
Olivier Deprez157378f2022-04-04 15:47:50 +02004772 int ret = 0, ret2 = 0, ret3 = 0;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00004773 struct ext4_map_blocks map;
Olivier Deprez157378f2022-04-04 15:47:50 +02004774 unsigned int blkbits = inode->i_blkbits;
4775 unsigned int credits = 0;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00004776
4777 map.m_lblk = offset >> blkbits;
4778 max_blocks = EXT4_MAX_BLOCKS(len, offset, blkbits);
4779
Olivier Deprez157378f2022-04-04 15:47:50 +02004780 if (!handle) {
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00004781 /*
4782 * credits to insert 1 extent into extent tree
4783 */
4784 credits = ext4_chunk_trans_blocks(inode, max_blocks);
4785 }
4786 while (ret >= 0 && ret < max_blocks) {
4787 map.m_lblk += ret;
4788 map.m_len = (max_blocks -= ret);
4789 if (credits) {
4790 handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS,
4791 credits);
4792 if (IS_ERR(handle)) {
4793 ret = PTR_ERR(handle);
4794 break;
4795 }
4796 }
4797 ret = ext4_map_blocks(handle, inode, &map,
4798 EXT4_GET_BLOCKS_IO_CONVERT_EXT);
4799 if (ret <= 0)
4800 ext4_warning(inode->i_sb,
4801 "inode #%lu: block %u: len %u: "
4802 "ext4_ext_map_blocks returned %d",
4803 inode->i_ino, map.m_lblk,
4804 map.m_len, ret);
Olivier Deprez157378f2022-04-04 15:47:50 +02004805 ret2 = ext4_mark_inode_dirty(handle, inode);
4806 if (credits) {
4807 ret3 = ext4_journal_stop(handle);
4808 if (unlikely(ret3))
4809 ret2 = ret3;
4810 }
4811
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00004812 if (ret <= 0 || ret2)
4813 break;
4814 }
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00004815 return ret > 0 ? ret2 : ret;
4816}
4817
Olivier Deprez157378f2022-04-04 15:47:50 +02004818int ext4_convert_unwritten_io_end_vec(handle_t *handle, ext4_io_end_t *io_end)
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00004819{
Olivier Deprez157378f2022-04-04 15:47:50 +02004820 int ret = 0, err = 0;
4821 struct ext4_io_end_vec *io_end_vec;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00004822
Olivier Deprez157378f2022-04-04 15:47:50 +02004823 /*
4824 * This is somewhat ugly but the idea is clear: When transaction is
4825 * reserved, everything goes into it. Otherwise we rather start several
4826 * smaller transactions for conversion of each extent separately.
4827 */
4828 if (handle) {
4829 handle = ext4_journal_start_reserved(handle,
4830 EXT4_HT_EXT_CONVERT);
4831 if (IS_ERR(handle))
4832 return PTR_ERR(handle);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00004833 }
4834
Olivier Deprez157378f2022-04-04 15:47:50 +02004835 list_for_each_entry(io_end_vec, &io_end->list_vec, list) {
4836 ret = ext4_convert_unwritten_extents(handle, io_end->inode,
4837 io_end_vec->offset,
4838 io_end_vec->size);
4839 if (ret)
4840 break;
4841 }
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00004842
Olivier Deprez157378f2022-04-04 15:47:50 +02004843 if (handle)
4844 err = ext4_journal_stop(handle);
4845
4846 return ret < 0 ? ret : err;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00004847}
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00004848
Olivier Deprez157378f2022-04-04 15:47:50 +02004849static int ext4_iomap_xattr_fiemap(struct inode *inode, struct iomap *iomap)
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00004850{
4851 __u64 physical = 0;
Olivier Deprez157378f2022-04-04 15:47:50 +02004852 __u64 length = 0;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00004853 int blockbits = inode->i_sb->s_blocksize_bits;
4854 int error = 0;
Olivier Deprez157378f2022-04-04 15:47:50 +02004855 u16 iomap_type;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00004856
4857 /* in-inode? */
4858 if (ext4_test_inode_state(inode, EXT4_STATE_XATTR)) {
4859 struct ext4_iloc iloc;
4860 int offset; /* offset of xattr in inode */
4861
4862 error = ext4_get_inode_loc(inode, &iloc);
4863 if (error)
4864 return error;
4865 physical = (__u64)iloc.bh->b_blocknr << blockbits;
4866 offset = EXT4_GOOD_OLD_INODE_SIZE +
4867 EXT4_I(inode)->i_extra_isize;
4868 physical += offset;
4869 length = EXT4_SB(inode->i_sb)->s_inode_size - offset;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00004870 brelse(iloc.bh);
Olivier Deprez157378f2022-04-04 15:47:50 +02004871 iomap_type = IOMAP_INLINE;
4872 } else if (EXT4_I(inode)->i_file_acl) { /* external block */
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00004873 physical = (__u64)EXT4_I(inode)->i_file_acl << blockbits;
4874 length = inode->i_sb->s_blocksize;
Olivier Deprez157378f2022-04-04 15:47:50 +02004875 iomap_type = IOMAP_MAPPED;
4876 } else {
4877 /* no in-inode or external block for xattr, so return -ENOENT */
4878 error = -ENOENT;
4879 goto out;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00004880 }
4881
Olivier Deprez157378f2022-04-04 15:47:50 +02004882 iomap->addr = physical;
4883 iomap->offset = 0;
4884 iomap->length = length;
4885 iomap->type = iomap_type;
4886 iomap->flags = 0;
4887out:
4888 return error;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00004889}
4890
Olivier Deprez157378f2022-04-04 15:47:50 +02004891static int ext4_iomap_xattr_begin(struct inode *inode, loff_t offset,
4892 loff_t length, unsigned flags,
4893 struct iomap *iomap, struct iomap *srcmap)
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00004894{
Olivier Deprez157378f2022-04-04 15:47:50 +02004895 int error;
David Brazdil0f672f62019-12-10 10:32:29 +00004896
Olivier Deprez157378f2022-04-04 15:47:50 +02004897 error = ext4_iomap_xattr_fiemap(inode, iomap);
4898 if (error == 0 && (offset >= iomap->length))
4899 error = -ENOENT;
4900 return error;
4901}
4902
4903static const struct iomap_ops ext4_iomap_xattr_ops = {
4904 .iomap_begin = ext4_iomap_xattr_begin,
4905};
4906
4907static int ext4_fiemap_check_ranges(struct inode *inode, u64 start, u64 *len)
4908{
4909 u64 maxbytes;
4910
4911 if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
4912 maxbytes = inode->i_sb->s_maxbytes;
4913 else
4914 maxbytes = EXT4_SB(inode->i_sb)->s_bitmap_maxbytes;
4915
4916 if (*len == 0)
4917 return -EINVAL;
4918 if (start > maxbytes)
4919 return -EFBIG;
4920
4921 /*
4922 * Shrink request scope to what the fs can actually handle.
4923 */
4924 if (*len > maxbytes || (maxbytes - *len) < start)
4925 *len = maxbytes - start;
4926 return 0;
4927}
4928
4929int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
4930 u64 start, u64 len)
4931{
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00004932 int error = 0;
4933
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00004934 if (fieinfo->fi_flags & FIEMAP_FLAG_CACHE) {
4935 error = ext4_ext_precache(inode);
4936 if (error)
4937 return error;
David Brazdil0f672f62019-12-10 10:32:29 +00004938 fieinfo->fi_flags &= ~FIEMAP_FLAG_CACHE;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00004939 }
4940
Olivier Deprez157378f2022-04-04 15:47:50 +02004941 /*
4942 * For bitmap files the maximum size limit could be smaller than
4943 * s_maxbytes, so check len here manually instead of just relying on the
4944 * generic check.
4945 */
4946 error = ext4_fiemap_check_ranges(inode, start, &len);
4947 if (error)
4948 return error;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00004949
4950 if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR) {
Olivier Deprez157378f2022-04-04 15:47:50 +02004951 fieinfo->fi_flags &= ~FIEMAP_FLAG_XATTR;
4952 return iomap_fiemap(inode, fieinfo, start, len,
4953 &ext4_iomap_xattr_ops);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00004954 }
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00004955
Olivier Deprez157378f2022-04-04 15:47:50 +02004956 return iomap_fiemap(inode, fieinfo, start, len, &ext4_iomap_report_ops);
David Brazdil0f672f62019-12-10 10:32:29 +00004957}
4958
4959int ext4_get_es_cache(struct inode *inode, struct fiemap_extent_info *fieinfo,
4960 __u64 start, __u64 len)
4961{
Olivier Deprez157378f2022-04-04 15:47:50 +02004962 ext4_lblk_t start_blk, len_blks;
4963 __u64 last_blk;
4964 int error = 0;
4965
David Brazdil0f672f62019-12-10 10:32:29 +00004966 if (ext4_has_inline_data(inode)) {
4967 int has_inline;
4968
4969 down_read(&EXT4_I(inode)->xattr_sem);
4970 has_inline = ext4_has_inline_data(inode);
4971 up_read(&EXT4_I(inode)->xattr_sem);
4972 if (has_inline)
4973 return 0;
4974 }
4975
Olivier Deprez157378f2022-04-04 15:47:50 +02004976 if (fieinfo->fi_flags & FIEMAP_FLAG_CACHE) {
4977 error = ext4_ext_precache(inode);
4978 if (error)
4979 return error;
4980 fieinfo->fi_flags &= ~FIEMAP_FLAG_CACHE;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00004981 }
4982
Olivier Deprez157378f2022-04-04 15:47:50 +02004983 error = fiemap_prep(inode, fieinfo, start, &len, 0);
4984 if (error)
4985 return error;
4986
4987 error = ext4_fiemap_check_ranges(inode, start, &len);
4988 if (error)
4989 return error;
4990
4991 start_blk = start >> inode->i_sb->s_blocksize_bits;
4992 last_blk = (start + len - 1) >> inode->i_sb->s_blocksize_bits;
4993 if (last_blk >= EXT_MAX_BLOCKS)
4994 last_blk = EXT_MAX_BLOCKS-1;
4995 len_blks = ((ext4_lblk_t) last_blk) - start_blk + 1;
4996
4997 /*
4998 * Walk the extent tree gathering extent information
4999 * and pushing extents back to the user.
5000 */
5001 return ext4_fill_es_cache_info(inode, start_blk, len_blks, fieinfo);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00005002}
5003
5004/*
5005 * ext4_ext_shift_path_extents:
5006 * Shift the extents of a path structure lying between path[depth].p_ext
5007 * and EXT_LAST_EXTENT(path[depth].p_hdr), by @shift blocks. @SHIFT tells
5008 * if it is right shift or left shift operation.
5009 */
5010static int
5011ext4_ext_shift_path_extents(struct ext4_ext_path *path, ext4_lblk_t shift,
5012 struct inode *inode, handle_t *handle,
5013 enum SHIFT_DIRECTION SHIFT)
5014{
5015 int depth, err = 0;
5016 struct ext4_extent *ex_start, *ex_last;
Olivier Deprez157378f2022-04-04 15:47:50 +02005017 bool update = false;
5018 int credits, restart_credits;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00005019 depth = path->p_depth;
5020
5021 while (depth >= 0) {
5022 if (depth == path->p_depth) {
5023 ex_start = path[depth].p_ext;
5024 if (!ex_start)
5025 return -EFSCORRUPTED;
5026
5027 ex_last = EXT_LAST_EXTENT(path[depth].p_hdr);
Olivier Deprez157378f2022-04-04 15:47:50 +02005028 /* leaf + sb + inode */
5029 credits = 3;
5030 if (ex_start == EXT_FIRST_EXTENT(path[depth].p_hdr)) {
5031 update = true;
5032 /* extent tree + sb + inode */
5033 credits = depth + 2;
5034 }
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00005035
Olivier Deprez157378f2022-04-04 15:47:50 +02005036 restart_credits = ext4_writepage_trans_blocks(inode);
5037 err = ext4_datasem_ensure_credits(handle, inode, credits,
5038 restart_credits, 0);
5039 if (err) {
5040 if (err > 0)
5041 err = -EAGAIN;
5042 goto out;
5043 }
5044
5045 err = ext4_ext_get_access(handle, inode, path + depth);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00005046 if (err)
5047 goto out;
5048
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00005049 while (ex_start <= ex_last) {
5050 if (SHIFT == SHIFT_LEFT) {
5051 le32_add_cpu(&ex_start->ee_block,
5052 -shift);
5053 /* Try to merge to the left. */
5054 if ((ex_start >
5055 EXT_FIRST_EXTENT(path[depth].p_hdr))
5056 &&
5057 ext4_ext_try_to_merge_right(inode,
5058 path, ex_start - 1))
5059 ex_last--;
5060 else
5061 ex_start++;
5062 } else {
5063 le32_add_cpu(&ex_last->ee_block, shift);
5064 ext4_ext_try_to_merge_right(inode, path,
5065 ex_last);
5066 ex_last--;
5067 }
5068 }
5069 err = ext4_ext_dirty(handle, inode, path + depth);
5070 if (err)
5071 goto out;
5072
5073 if (--depth < 0 || !update)
5074 break;
5075 }
5076
5077 /* Update index too */
Olivier Deprez157378f2022-04-04 15:47:50 +02005078 err = ext4_ext_get_access(handle, inode, path + depth);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00005079 if (err)
5080 goto out;
5081
5082 if (SHIFT == SHIFT_LEFT)
5083 le32_add_cpu(&path[depth].p_idx->ei_block, -shift);
5084 else
5085 le32_add_cpu(&path[depth].p_idx->ei_block, shift);
5086 err = ext4_ext_dirty(handle, inode, path + depth);
5087 if (err)
5088 goto out;
5089
5090 /* we are done if current index is not a starting index */
5091 if (path[depth].p_idx != EXT_FIRST_INDEX(path[depth].p_hdr))
5092 break;
5093
5094 depth--;
5095 }
5096
5097out:
5098 return err;
5099}
5100
5101/*
5102 * ext4_ext_shift_extents:
5103 * All the extents which lies in the range from @start to the last allocated
5104 * block for the @inode are shifted either towards left or right (depending
5105 * upon @SHIFT) by @shift blocks.
5106 * On success, 0 is returned, error otherwise.
5107 */
5108static int
5109ext4_ext_shift_extents(struct inode *inode, handle_t *handle,
5110 ext4_lblk_t start, ext4_lblk_t shift,
5111 enum SHIFT_DIRECTION SHIFT)
5112{
5113 struct ext4_ext_path *path;
5114 int ret = 0, depth;
5115 struct ext4_extent *extent;
5116 ext4_lblk_t stop, *iterator, ex_start, ex_end;
Olivier Deprez157378f2022-04-04 15:47:50 +02005117 ext4_lblk_t tmp = EXT_MAX_BLOCKS;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00005118
5119 /* Let path point to the last extent */
5120 path = ext4_find_extent(inode, EXT_MAX_BLOCKS - 1, NULL,
5121 EXT4_EX_NOCACHE);
5122 if (IS_ERR(path))
5123 return PTR_ERR(path);
5124
5125 depth = path->p_depth;
5126 extent = path[depth].p_ext;
5127 if (!extent)
5128 goto out;
5129
5130 stop = le32_to_cpu(extent->ee_block);
5131
5132 /*
5133 * For left shifts, make sure the hole on the left is big enough to
5134 * accommodate the shift. For right shifts, make sure the last extent
5135 * won't be shifted beyond EXT_MAX_BLOCKS.
5136 */
5137 if (SHIFT == SHIFT_LEFT) {
5138 path = ext4_find_extent(inode, start - 1, &path,
5139 EXT4_EX_NOCACHE);
5140 if (IS_ERR(path))
5141 return PTR_ERR(path);
5142 depth = path->p_depth;
5143 extent = path[depth].p_ext;
5144 if (extent) {
5145 ex_start = le32_to_cpu(extent->ee_block);
5146 ex_end = le32_to_cpu(extent->ee_block) +
5147 ext4_ext_get_actual_len(extent);
5148 } else {
5149 ex_start = 0;
5150 ex_end = 0;
5151 }
5152
5153 if ((start == ex_start && shift > ex_start) ||
5154 (shift > start - ex_end)) {
5155 ret = -EINVAL;
5156 goto out;
5157 }
5158 } else {
5159 if (shift > EXT_MAX_BLOCKS -
5160 (stop + ext4_ext_get_actual_len(extent))) {
5161 ret = -EINVAL;
5162 goto out;
5163 }
5164 }
5165
5166 /*
5167 * In case of left shift, iterator points to start and it is increased
5168 * till we reach stop. In case of right shift, iterator points to stop
5169 * and it is decreased till we reach start.
5170 */
Olivier Deprez157378f2022-04-04 15:47:50 +02005171again:
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00005172 if (SHIFT == SHIFT_LEFT)
5173 iterator = &start;
5174 else
5175 iterator = &stop;
5176
Olivier Deprez157378f2022-04-04 15:47:50 +02005177 if (tmp != EXT_MAX_BLOCKS)
5178 *iterator = tmp;
5179
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00005180 /*
5181 * Its safe to start updating extents. Start and stop are unsigned, so
5182 * in case of right shift if extent with 0 block is reached, iterator
5183 * becomes NULL to indicate the end of the loop.
5184 */
5185 while (iterator && start <= stop) {
5186 path = ext4_find_extent(inode, *iterator, &path,
5187 EXT4_EX_NOCACHE);
5188 if (IS_ERR(path))
5189 return PTR_ERR(path);
5190 depth = path->p_depth;
5191 extent = path[depth].p_ext;
5192 if (!extent) {
5193 EXT4_ERROR_INODE(inode, "unexpected hole at %lu",
5194 (unsigned long) *iterator);
5195 return -EFSCORRUPTED;
5196 }
5197 if (SHIFT == SHIFT_LEFT && *iterator >
5198 le32_to_cpu(extent->ee_block)) {
5199 /* Hole, move to the next extent */
5200 if (extent < EXT_LAST_EXTENT(path[depth].p_hdr)) {
5201 path[depth].p_ext++;
5202 } else {
5203 *iterator = ext4_ext_next_allocated_block(path);
5204 continue;
5205 }
5206 }
5207
Olivier Deprez157378f2022-04-04 15:47:50 +02005208 tmp = *iterator;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00005209 if (SHIFT == SHIFT_LEFT) {
5210 extent = EXT_LAST_EXTENT(path[depth].p_hdr);
5211 *iterator = le32_to_cpu(extent->ee_block) +
5212 ext4_ext_get_actual_len(extent);
5213 } else {
5214 extent = EXT_FIRST_EXTENT(path[depth].p_hdr);
5215 if (le32_to_cpu(extent->ee_block) > 0)
5216 *iterator = le32_to_cpu(extent->ee_block) - 1;
5217 else
5218 /* Beginning is reached, end of the loop */
5219 iterator = NULL;
5220 /* Update path extent in case we need to stop */
5221 while (le32_to_cpu(extent->ee_block) < start)
5222 extent++;
5223 path[depth].p_ext = extent;
5224 }
5225 ret = ext4_ext_shift_path_extents(path, shift, inode,
5226 handle, SHIFT);
Olivier Deprez157378f2022-04-04 15:47:50 +02005227 /* iterator can be NULL which means we should break */
5228 if (ret == -EAGAIN)
5229 goto again;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00005230 if (ret)
5231 break;
5232 }
5233out:
5234 ext4_ext_drop_refs(path);
5235 kfree(path);
5236 return ret;
5237}
5238
5239/*
5240 * ext4_collapse_range:
5241 * This implements the fallocate's collapse range functionality for ext4
5242 * Returns: 0 and non-zero on error.
5243 */
Olivier Deprez157378f2022-04-04 15:47:50 +02005244static int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00005245{
5246 struct super_block *sb = inode->i_sb;
5247 ext4_lblk_t punch_start, punch_stop;
5248 handle_t *handle;
5249 unsigned int credits;
5250 loff_t new_size, ioffset;
5251 int ret;
5252
5253 /*
5254 * We need to test this early because xfstests assumes that a
5255 * collapse range of (0, 1) will return EOPNOTSUPP if the file
5256 * system does not support collapse range.
5257 */
5258 if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
5259 return -EOPNOTSUPP;
5260
Olivier Deprez157378f2022-04-04 15:47:50 +02005261 /* Collapse range works only on fs cluster size aligned regions. */
5262 if (!IS_ALIGNED(offset | len, EXT4_CLUSTER_SIZE(sb)))
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00005263 return -EINVAL;
5264
5265 trace_ext4_collapse_range(inode, offset, len);
5266
5267 punch_start = offset >> EXT4_BLOCK_SIZE_BITS(sb);
5268 punch_stop = (offset + len) >> EXT4_BLOCK_SIZE_BITS(sb);
5269
5270 /* Call ext4_force_commit to flush all data in case of data=journal. */
5271 if (ext4_should_journal_data(inode)) {
5272 ret = ext4_force_commit(inode->i_sb);
5273 if (ret)
5274 return ret;
5275 }
5276
5277 inode_lock(inode);
5278 /*
5279 * There is no need to overlap collapse range with EOF, in which case
5280 * it is effectively a truncate operation
5281 */
Olivier Deprez157378f2022-04-04 15:47:50 +02005282 if (offset + len >= inode->i_size) {
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00005283 ret = -EINVAL;
5284 goto out_mutex;
5285 }
5286
5287 /* Currently just for extent based files */
5288 if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
5289 ret = -EOPNOTSUPP;
5290 goto out_mutex;
5291 }
5292
5293 /* Wait for existing dio to complete */
5294 inode_dio_wait(inode);
5295
5296 /*
5297 * Prevent page faults from reinstantiating pages we have released from
5298 * page cache.
5299 */
5300 down_write(&EXT4_I(inode)->i_mmap_sem);
5301
5302 ret = ext4_break_layouts(inode);
5303 if (ret)
5304 goto out_mmap;
5305
5306 /*
5307 * Need to round down offset to be aligned with page size boundary
5308 * for page size > block size.
5309 */
5310 ioffset = round_down(offset, PAGE_SIZE);
5311 /*
5312 * Write tail of the last page before removed range since it will get
5313 * removed from the page cache below.
5314 */
5315 ret = filemap_write_and_wait_range(inode->i_mapping, ioffset, offset);
5316 if (ret)
5317 goto out_mmap;
5318 /*
5319 * Write data that will be shifted to preserve them when discarding
5320 * page cache below. We are also protected from pages becoming dirty
5321 * by i_mmap_sem.
5322 */
5323 ret = filemap_write_and_wait_range(inode->i_mapping, offset + len,
5324 LLONG_MAX);
5325 if (ret)
5326 goto out_mmap;
5327 truncate_pagecache(inode, ioffset);
5328
5329 credits = ext4_writepage_trans_blocks(inode);
5330 handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits);
5331 if (IS_ERR(handle)) {
5332 ret = PTR_ERR(handle);
5333 goto out_mmap;
5334 }
Olivier Deprez157378f2022-04-04 15:47:50 +02005335 ext4_fc_start_ineligible(sb, EXT4_FC_REASON_FALLOC_RANGE);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00005336
5337 down_write(&EXT4_I(inode)->i_data_sem);
Olivier Deprez157378f2022-04-04 15:47:50 +02005338 ext4_discard_preallocations(inode, 0);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00005339
5340 ret = ext4_es_remove_extent(inode, punch_start,
5341 EXT_MAX_BLOCKS - punch_start);
5342 if (ret) {
5343 up_write(&EXT4_I(inode)->i_data_sem);
5344 goto out_stop;
5345 }
5346
5347 ret = ext4_ext_remove_space(inode, punch_start, punch_stop - 1);
5348 if (ret) {
5349 up_write(&EXT4_I(inode)->i_data_sem);
5350 goto out_stop;
5351 }
Olivier Deprez157378f2022-04-04 15:47:50 +02005352 ext4_discard_preallocations(inode, 0);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00005353
5354 ret = ext4_ext_shift_extents(inode, handle, punch_stop,
5355 punch_stop - punch_start, SHIFT_LEFT);
5356 if (ret) {
5357 up_write(&EXT4_I(inode)->i_data_sem);
5358 goto out_stop;
5359 }
5360
Olivier Deprez157378f2022-04-04 15:47:50 +02005361 new_size = inode->i_size - len;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00005362 i_size_write(inode, new_size);
5363 EXT4_I(inode)->i_disksize = new_size;
5364
5365 up_write(&EXT4_I(inode)->i_data_sem);
5366 if (IS_SYNC(inode))
5367 ext4_handle_sync(handle);
5368 inode->i_mtime = inode->i_ctime = current_time(inode);
Olivier Deprez157378f2022-04-04 15:47:50 +02005369 ret = ext4_mark_inode_dirty(handle, inode);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00005370 ext4_update_inode_fsync_trans(handle, inode, 1);
5371
5372out_stop:
5373 ext4_journal_stop(handle);
Olivier Deprez157378f2022-04-04 15:47:50 +02005374 ext4_fc_stop_ineligible(sb);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00005375out_mmap:
5376 up_write(&EXT4_I(inode)->i_mmap_sem);
5377out_mutex:
5378 inode_unlock(inode);
5379 return ret;
5380}
5381
5382/*
5383 * ext4_insert_range:
5384 * This function implements the FALLOC_FL_INSERT_RANGE flag of fallocate.
5385 * The data blocks starting from @offset to the EOF are shifted by @len
5386 * towards right to create a hole in the @inode. Inode size is increased
5387 * by len bytes.
5388 * Returns 0 on success, error otherwise.
5389 */
Olivier Deprez157378f2022-04-04 15:47:50 +02005390static int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len)
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00005391{
5392 struct super_block *sb = inode->i_sb;
5393 handle_t *handle;
5394 struct ext4_ext_path *path;
5395 struct ext4_extent *extent;
5396 ext4_lblk_t offset_lblk, len_lblk, ee_start_lblk = 0;
5397 unsigned int credits, ee_len;
5398 int ret = 0, depth, split_flag = 0;
5399 loff_t ioffset;
5400
5401 /*
5402 * We need to test this early because xfstests assumes that an
5403 * insert range of (0, 1) will return EOPNOTSUPP if the file
5404 * system does not support insert range.
5405 */
5406 if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
5407 return -EOPNOTSUPP;
5408
Olivier Deprez157378f2022-04-04 15:47:50 +02005409 /* Insert range works only on fs cluster size aligned regions. */
5410 if (!IS_ALIGNED(offset | len, EXT4_CLUSTER_SIZE(sb)))
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00005411 return -EINVAL;
5412
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00005413 trace_ext4_insert_range(inode, offset, len);
5414
5415 offset_lblk = offset >> EXT4_BLOCK_SIZE_BITS(sb);
5416 len_lblk = len >> EXT4_BLOCK_SIZE_BITS(sb);
5417
5418 /* Call ext4_force_commit to flush all data in case of data=journal */
5419 if (ext4_should_journal_data(inode)) {
5420 ret = ext4_force_commit(inode->i_sb);
5421 if (ret)
5422 return ret;
5423 }
5424
5425 inode_lock(inode);
5426 /* Currently just for extent based files */
5427 if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
5428 ret = -EOPNOTSUPP;
5429 goto out_mutex;
5430 }
5431
Olivier Deprez157378f2022-04-04 15:47:50 +02005432 /* Check whether the maximum file size would be exceeded */
5433 if (len > inode->i_sb->s_maxbytes - inode->i_size) {
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00005434 ret = -EFBIG;
5435 goto out_mutex;
5436 }
5437
Olivier Deprez157378f2022-04-04 15:47:50 +02005438 /* Offset must be less than i_size */
5439 if (offset >= inode->i_size) {
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00005440 ret = -EINVAL;
5441 goto out_mutex;
5442 }
5443
5444 /* Wait for existing dio to complete */
5445 inode_dio_wait(inode);
5446
5447 /*
5448 * Prevent page faults from reinstantiating pages we have released from
5449 * page cache.
5450 */
5451 down_write(&EXT4_I(inode)->i_mmap_sem);
5452
5453 ret = ext4_break_layouts(inode);
5454 if (ret)
5455 goto out_mmap;
5456
5457 /*
5458 * Need to round down to align start offset to page size boundary
5459 * for page size > block size.
5460 */
5461 ioffset = round_down(offset, PAGE_SIZE);
5462 /* Write out all dirty pages */
5463 ret = filemap_write_and_wait_range(inode->i_mapping, ioffset,
5464 LLONG_MAX);
5465 if (ret)
5466 goto out_mmap;
5467 truncate_pagecache(inode, ioffset);
5468
5469 credits = ext4_writepage_trans_blocks(inode);
5470 handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits);
5471 if (IS_ERR(handle)) {
5472 ret = PTR_ERR(handle);
5473 goto out_mmap;
5474 }
Olivier Deprez157378f2022-04-04 15:47:50 +02005475 ext4_fc_start_ineligible(sb, EXT4_FC_REASON_FALLOC_RANGE);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00005476
5477 /* Expand file to avoid data loss if there is error while shifting */
5478 inode->i_size += len;
5479 EXT4_I(inode)->i_disksize += len;
5480 inode->i_mtime = inode->i_ctime = current_time(inode);
5481 ret = ext4_mark_inode_dirty(handle, inode);
5482 if (ret)
5483 goto out_stop;
5484
5485 down_write(&EXT4_I(inode)->i_data_sem);
Olivier Deprez157378f2022-04-04 15:47:50 +02005486 ext4_discard_preallocations(inode, 0);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00005487
5488 path = ext4_find_extent(inode, offset_lblk, NULL, 0);
5489 if (IS_ERR(path)) {
5490 up_write(&EXT4_I(inode)->i_data_sem);
5491 goto out_stop;
5492 }
5493
5494 depth = ext_depth(inode);
5495 extent = path[depth].p_ext;
5496 if (extent) {
5497 ee_start_lblk = le32_to_cpu(extent->ee_block);
5498 ee_len = ext4_ext_get_actual_len(extent);
5499
5500 /*
5501 * If offset_lblk is not the starting block of extent, split
5502 * the extent @offset_lblk
5503 */
5504 if ((offset_lblk > ee_start_lblk) &&
5505 (offset_lblk < (ee_start_lblk + ee_len))) {
5506 if (ext4_ext_is_unwritten(extent))
5507 split_flag = EXT4_EXT_MARK_UNWRIT1 |
5508 EXT4_EXT_MARK_UNWRIT2;
5509 ret = ext4_split_extent_at(handle, inode, &path,
5510 offset_lblk, split_flag,
5511 EXT4_EX_NOCACHE |
5512 EXT4_GET_BLOCKS_PRE_IO |
5513 EXT4_GET_BLOCKS_METADATA_NOFAIL);
5514 }
5515
5516 ext4_ext_drop_refs(path);
5517 kfree(path);
5518 if (ret < 0) {
5519 up_write(&EXT4_I(inode)->i_data_sem);
5520 goto out_stop;
5521 }
5522 } else {
5523 ext4_ext_drop_refs(path);
5524 kfree(path);
5525 }
5526
5527 ret = ext4_es_remove_extent(inode, offset_lblk,
5528 EXT_MAX_BLOCKS - offset_lblk);
5529 if (ret) {
5530 up_write(&EXT4_I(inode)->i_data_sem);
5531 goto out_stop;
5532 }
5533
5534 /*
5535 * if offset_lblk lies in a hole which is at start of file, use
5536 * ee_start_lblk to shift extents
5537 */
5538 ret = ext4_ext_shift_extents(inode, handle,
5539 ee_start_lblk > offset_lblk ? ee_start_lblk : offset_lblk,
5540 len_lblk, SHIFT_RIGHT);
5541
5542 up_write(&EXT4_I(inode)->i_data_sem);
5543 if (IS_SYNC(inode))
5544 ext4_handle_sync(handle);
5545 if (ret >= 0)
5546 ext4_update_inode_fsync_trans(handle, inode, 1);
5547
5548out_stop:
5549 ext4_journal_stop(handle);
Olivier Deprez157378f2022-04-04 15:47:50 +02005550 ext4_fc_stop_ineligible(sb);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00005551out_mmap:
5552 up_write(&EXT4_I(inode)->i_mmap_sem);
5553out_mutex:
5554 inode_unlock(inode);
5555 return ret;
5556}
5557
5558/**
David Brazdil0f672f62019-12-10 10:32:29 +00005559 * ext4_swap_extents() - Swap extents between two inodes
5560 * @handle: handle for this transaction
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00005561 * @inode1: First inode
5562 * @inode2: Second inode
5563 * @lblk1: Start block for first inode
5564 * @lblk2: Start block for second inode
5565 * @count: Number of blocks to swap
5566 * @unwritten: Mark second inode's extents as unwritten after swap
5567 * @erp: Pointer to save error value
5568 *
5569 * This helper routine does exactly what is promise "swap extents". All other
5570 * stuff such as page-cache locking consistency, bh mapping consistency or
5571 * extent's data copying must be performed by caller.
5572 * Locking:
5573 * i_mutex is held for both inodes
5574 * i_data_sem is locked for write for both inodes
5575 * Assumptions:
5576 * All pages from requested range are locked for both inodes
5577 */
5578int
5579ext4_swap_extents(handle_t *handle, struct inode *inode1,
5580 struct inode *inode2, ext4_lblk_t lblk1, ext4_lblk_t lblk2,
5581 ext4_lblk_t count, int unwritten, int *erp)
5582{
5583 struct ext4_ext_path *path1 = NULL;
5584 struct ext4_ext_path *path2 = NULL;
5585 int replaced_count = 0;
5586
5587 BUG_ON(!rwsem_is_locked(&EXT4_I(inode1)->i_data_sem));
5588 BUG_ON(!rwsem_is_locked(&EXT4_I(inode2)->i_data_sem));
5589 BUG_ON(!inode_is_locked(inode1));
5590 BUG_ON(!inode_is_locked(inode2));
5591
5592 *erp = ext4_es_remove_extent(inode1, lblk1, count);
5593 if (unlikely(*erp))
5594 return 0;
5595 *erp = ext4_es_remove_extent(inode2, lblk2, count);
5596 if (unlikely(*erp))
5597 return 0;
5598
5599 while (count) {
5600 struct ext4_extent *ex1, *ex2, tmp_ex;
5601 ext4_lblk_t e1_blk, e2_blk;
5602 int e1_len, e2_len, len;
5603 int split = 0;
5604
5605 path1 = ext4_find_extent(inode1, lblk1, NULL, EXT4_EX_NOCACHE);
5606 if (IS_ERR(path1)) {
5607 *erp = PTR_ERR(path1);
5608 path1 = NULL;
5609 finish:
5610 count = 0;
5611 goto repeat;
5612 }
5613 path2 = ext4_find_extent(inode2, lblk2, NULL, EXT4_EX_NOCACHE);
5614 if (IS_ERR(path2)) {
5615 *erp = PTR_ERR(path2);
5616 path2 = NULL;
5617 goto finish;
5618 }
5619 ex1 = path1[path1->p_depth].p_ext;
5620 ex2 = path2[path2->p_depth].p_ext;
Olivier Deprez157378f2022-04-04 15:47:50 +02005621 /* Do we have something to swap ? */
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00005622 if (unlikely(!ex2 || !ex1))
5623 goto finish;
5624
5625 e1_blk = le32_to_cpu(ex1->ee_block);
5626 e2_blk = le32_to_cpu(ex2->ee_block);
5627 e1_len = ext4_ext_get_actual_len(ex1);
5628 e2_len = ext4_ext_get_actual_len(ex2);
5629
5630 /* Hole handling */
5631 if (!in_range(lblk1, e1_blk, e1_len) ||
5632 !in_range(lblk2, e2_blk, e2_len)) {
5633 ext4_lblk_t next1, next2;
5634
5635 /* if hole after extent, then go to next extent */
5636 next1 = ext4_ext_next_allocated_block(path1);
5637 next2 = ext4_ext_next_allocated_block(path2);
5638 /* If hole before extent, then shift to that extent */
5639 if (e1_blk > lblk1)
5640 next1 = e1_blk;
5641 if (e2_blk > lblk2)
5642 next2 = e2_blk;
5643 /* Do we have something to swap */
5644 if (next1 == EXT_MAX_BLOCKS || next2 == EXT_MAX_BLOCKS)
5645 goto finish;
5646 /* Move to the rightest boundary */
5647 len = next1 - lblk1;
5648 if (len < next2 - lblk2)
5649 len = next2 - lblk2;
5650 if (len > count)
5651 len = count;
5652 lblk1 += len;
5653 lblk2 += len;
5654 count -= len;
5655 goto repeat;
5656 }
5657
5658 /* Prepare left boundary */
5659 if (e1_blk < lblk1) {
5660 split = 1;
5661 *erp = ext4_force_split_extent_at(handle, inode1,
5662 &path1, lblk1, 0);
5663 if (unlikely(*erp))
5664 goto finish;
5665 }
5666 if (e2_blk < lblk2) {
5667 split = 1;
5668 *erp = ext4_force_split_extent_at(handle, inode2,
5669 &path2, lblk2, 0);
5670 if (unlikely(*erp))
5671 goto finish;
5672 }
5673 /* ext4_split_extent_at() may result in leaf extent split,
5674 * path must to be revalidated. */
5675 if (split)
5676 goto repeat;
5677
5678 /* Prepare right boundary */
5679 len = count;
5680 if (len > e1_blk + e1_len - lblk1)
5681 len = e1_blk + e1_len - lblk1;
5682 if (len > e2_blk + e2_len - lblk2)
5683 len = e2_blk + e2_len - lblk2;
5684
5685 if (len != e1_len) {
5686 split = 1;
5687 *erp = ext4_force_split_extent_at(handle, inode1,
5688 &path1, lblk1 + len, 0);
5689 if (unlikely(*erp))
5690 goto finish;
5691 }
5692 if (len != e2_len) {
5693 split = 1;
5694 *erp = ext4_force_split_extent_at(handle, inode2,
5695 &path2, lblk2 + len, 0);
5696 if (*erp)
5697 goto finish;
5698 }
5699 /* ext4_split_extent_at() may result in leaf extent split,
5700 * path must to be revalidated. */
5701 if (split)
5702 goto repeat;
5703
5704 BUG_ON(e2_len != e1_len);
5705 *erp = ext4_ext_get_access(handle, inode1, path1 + path1->p_depth);
5706 if (unlikely(*erp))
5707 goto finish;
5708 *erp = ext4_ext_get_access(handle, inode2, path2 + path2->p_depth);
5709 if (unlikely(*erp))
5710 goto finish;
5711
5712 /* Both extents are fully inside boundaries. Swap it now */
5713 tmp_ex = *ex1;
5714 ext4_ext_store_pblock(ex1, ext4_ext_pblock(ex2));
5715 ext4_ext_store_pblock(ex2, ext4_ext_pblock(&tmp_ex));
5716 ex1->ee_len = cpu_to_le16(e2_len);
5717 ex2->ee_len = cpu_to_le16(e1_len);
5718 if (unwritten)
5719 ext4_ext_mark_unwritten(ex2);
5720 if (ext4_ext_is_unwritten(&tmp_ex))
5721 ext4_ext_mark_unwritten(ex1);
5722
5723 ext4_ext_try_to_merge(handle, inode2, path2, ex2);
5724 ext4_ext_try_to_merge(handle, inode1, path1, ex1);
5725 *erp = ext4_ext_dirty(handle, inode2, path2 +
5726 path2->p_depth);
5727 if (unlikely(*erp))
5728 goto finish;
5729 *erp = ext4_ext_dirty(handle, inode1, path1 +
5730 path1->p_depth);
5731 /*
5732 * Looks scarry ah..? second inode already points to new blocks,
5733 * and it was successfully dirtied. But luckily error may happen
5734 * only due to journal error, so full transaction will be
5735 * aborted anyway.
5736 */
5737 if (unlikely(*erp))
5738 goto finish;
5739 lblk1 += len;
5740 lblk2 += len;
5741 replaced_count += len;
5742 count -= len;
5743
5744 repeat:
5745 ext4_ext_drop_refs(path1);
5746 kfree(path1);
5747 ext4_ext_drop_refs(path2);
5748 kfree(path2);
5749 path1 = path2 = NULL;
5750 }
5751 return replaced_count;
5752}
David Brazdil0f672f62019-12-10 10:32:29 +00005753
5754/*
5755 * ext4_clu_mapped - determine whether any block in a logical cluster has
5756 * been mapped to a physical cluster
5757 *
5758 * @inode - file containing the logical cluster
5759 * @lclu - logical cluster of interest
5760 *
5761 * Returns 1 if any block in the logical cluster is mapped, signifying
5762 * that a physical cluster has been allocated for it. Otherwise,
5763 * returns 0. Can also return negative error codes. Derived from
5764 * ext4_ext_map_blocks().
5765 */
5766int ext4_clu_mapped(struct inode *inode, ext4_lblk_t lclu)
5767{
5768 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
5769 struct ext4_ext_path *path;
5770 int depth, mapped = 0, err = 0;
5771 struct ext4_extent *extent;
5772 ext4_lblk_t first_lblk, first_lclu, last_lclu;
5773
5774 /* search for the extent closest to the first block in the cluster */
5775 path = ext4_find_extent(inode, EXT4_C2B(sbi, lclu), NULL, 0);
5776 if (IS_ERR(path)) {
5777 err = PTR_ERR(path);
5778 path = NULL;
5779 goto out;
5780 }
5781
5782 depth = ext_depth(inode);
5783
5784 /*
5785 * A consistent leaf must not be empty. This situation is possible,
5786 * though, _during_ tree modification, and it's why an assert can't
5787 * be put in ext4_find_extent().
5788 */
5789 if (unlikely(path[depth].p_ext == NULL && depth != 0)) {
5790 EXT4_ERROR_INODE(inode,
5791 "bad extent address - lblock: %lu, depth: %d, pblock: %lld",
5792 (unsigned long) EXT4_C2B(sbi, lclu),
5793 depth, path[depth].p_block);
5794 err = -EFSCORRUPTED;
5795 goto out;
5796 }
5797
5798 extent = path[depth].p_ext;
5799
5800 /* can't be mapped if the extent tree is empty */
5801 if (extent == NULL)
5802 goto out;
5803
5804 first_lblk = le32_to_cpu(extent->ee_block);
5805 first_lclu = EXT4_B2C(sbi, first_lblk);
5806
5807 /*
5808 * Three possible outcomes at this point - found extent spanning
5809 * the target cluster, to the left of the target cluster, or to the
5810 * right of the target cluster. The first two cases are handled here.
5811 * The last case indicates the target cluster is not mapped.
5812 */
5813 if (lclu >= first_lclu) {
5814 last_lclu = EXT4_B2C(sbi, first_lblk +
5815 ext4_ext_get_actual_len(extent) - 1);
5816 if (lclu <= last_lclu) {
5817 mapped = 1;
5818 } else {
5819 first_lblk = ext4_ext_next_allocated_block(path);
5820 first_lclu = EXT4_B2C(sbi, first_lblk);
5821 if (lclu == first_lclu)
5822 mapped = 1;
5823 }
5824 }
5825
5826out:
5827 ext4_ext_drop_refs(path);
5828 kfree(path);
5829
5830 return err ? err : mapped;
5831}
Olivier Deprez157378f2022-04-04 15:47:50 +02005832
5833/*
5834 * Updates physical block address and unwritten status of extent
5835 * starting at lblk start and of len. If such an extent doesn't exist,
5836 * this function splits the extent tree appropriately to create an
5837 * extent like this. This function is called in the fast commit
5838 * replay path. Returns 0 on success and error on failure.
5839 */
5840int ext4_ext_replay_update_ex(struct inode *inode, ext4_lblk_t start,
5841 int len, int unwritten, ext4_fsblk_t pblk)
5842{
5843 struct ext4_ext_path *path = NULL, *ppath;
5844 struct ext4_extent *ex;
5845 int ret;
5846
5847 path = ext4_find_extent(inode, start, NULL, 0);
5848 if (IS_ERR(path))
5849 return PTR_ERR(path);
5850 ex = path[path->p_depth].p_ext;
5851 if (!ex) {
5852 ret = -EFSCORRUPTED;
5853 goto out;
5854 }
5855
5856 if (le32_to_cpu(ex->ee_block) != start ||
5857 ext4_ext_get_actual_len(ex) != len) {
5858 /* We need to split this extent to match our extent first */
5859 ppath = path;
5860 down_write(&EXT4_I(inode)->i_data_sem);
5861 ret = ext4_force_split_extent_at(NULL, inode, &ppath, start, 1);
5862 up_write(&EXT4_I(inode)->i_data_sem);
5863 if (ret)
5864 goto out;
5865 kfree(path);
5866 path = ext4_find_extent(inode, start, NULL, 0);
5867 if (IS_ERR(path))
5868 return -1;
5869 ppath = path;
5870 ex = path[path->p_depth].p_ext;
5871 WARN_ON(le32_to_cpu(ex->ee_block) != start);
5872 if (ext4_ext_get_actual_len(ex) != len) {
5873 down_write(&EXT4_I(inode)->i_data_sem);
5874 ret = ext4_force_split_extent_at(NULL, inode, &ppath,
5875 start + len, 1);
5876 up_write(&EXT4_I(inode)->i_data_sem);
5877 if (ret)
5878 goto out;
5879 kfree(path);
5880 path = ext4_find_extent(inode, start, NULL, 0);
5881 if (IS_ERR(path))
5882 return -EINVAL;
5883 ex = path[path->p_depth].p_ext;
5884 }
5885 }
5886 if (unwritten)
5887 ext4_ext_mark_unwritten(ex);
5888 else
5889 ext4_ext_mark_initialized(ex);
5890 ext4_ext_store_pblock(ex, pblk);
5891 down_write(&EXT4_I(inode)->i_data_sem);
5892 ret = ext4_ext_dirty(NULL, inode, &path[path->p_depth]);
5893 up_write(&EXT4_I(inode)->i_data_sem);
5894out:
5895 ext4_ext_drop_refs(path);
5896 kfree(path);
5897 ext4_mark_inode_dirty(NULL, inode);
5898 return ret;
5899}
5900
5901/* Try to shrink the extent tree */
5902void ext4_ext_replay_shrink_inode(struct inode *inode, ext4_lblk_t end)
5903{
5904 struct ext4_ext_path *path = NULL;
5905 struct ext4_extent *ex;
5906 ext4_lblk_t old_cur, cur = 0;
5907
5908 while (cur < end) {
5909 path = ext4_find_extent(inode, cur, NULL, 0);
5910 if (IS_ERR(path))
5911 return;
5912 ex = path[path->p_depth].p_ext;
5913 if (!ex) {
5914 ext4_ext_drop_refs(path);
5915 kfree(path);
5916 ext4_mark_inode_dirty(NULL, inode);
5917 return;
5918 }
5919 old_cur = cur;
5920 cur = le32_to_cpu(ex->ee_block) + ext4_ext_get_actual_len(ex);
5921 if (cur <= old_cur)
5922 cur = old_cur + 1;
5923 ext4_ext_try_to_merge(NULL, inode, path, ex);
5924 down_write(&EXT4_I(inode)->i_data_sem);
5925 ext4_ext_dirty(NULL, inode, &path[path->p_depth]);
5926 up_write(&EXT4_I(inode)->i_data_sem);
5927 ext4_mark_inode_dirty(NULL, inode);
5928 ext4_ext_drop_refs(path);
5929 kfree(path);
5930 }
5931}
5932
5933/* Check if *cur is a hole and if it is, skip it */
5934static int skip_hole(struct inode *inode, ext4_lblk_t *cur)
5935{
5936 int ret;
5937 struct ext4_map_blocks map;
5938
5939 map.m_lblk = *cur;
5940 map.m_len = ((inode->i_size) >> inode->i_sb->s_blocksize_bits) - *cur;
5941
5942 ret = ext4_map_blocks(NULL, inode, &map, 0);
5943 if (ret < 0)
5944 return ret;
5945 if (ret != 0)
5946 return 0;
5947 *cur = *cur + map.m_len;
5948 return 0;
5949}
5950
5951/* Count number of blocks used by this inode and update i_blocks */
5952int ext4_ext_replay_set_iblocks(struct inode *inode)
5953{
5954 struct ext4_ext_path *path = NULL, *path2 = NULL;
5955 struct ext4_extent *ex;
5956 ext4_lblk_t cur = 0, end;
5957 int numblks = 0, i, ret = 0;
5958 ext4_fsblk_t cmp1, cmp2;
5959 struct ext4_map_blocks map;
5960
5961 /* Determin the size of the file first */
5962 path = ext4_find_extent(inode, EXT_MAX_BLOCKS - 1, NULL,
5963 EXT4_EX_NOCACHE);
5964 if (IS_ERR(path))
5965 return PTR_ERR(path);
5966 ex = path[path->p_depth].p_ext;
5967 if (!ex) {
5968 ext4_ext_drop_refs(path);
5969 kfree(path);
5970 goto out;
5971 }
5972 end = le32_to_cpu(ex->ee_block) + ext4_ext_get_actual_len(ex);
5973 ext4_ext_drop_refs(path);
5974 kfree(path);
5975
5976 /* Count the number of data blocks */
5977 cur = 0;
5978 while (cur < end) {
5979 map.m_lblk = cur;
5980 map.m_len = end - cur;
5981 ret = ext4_map_blocks(NULL, inode, &map, 0);
5982 if (ret < 0)
5983 break;
5984 if (ret > 0)
5985 numblks += ret;
5986 cur = cur + map.m_len;
5987 }
5988
5989 /*
5990 * Count the number of extent tree blocks. We do it by looking up
5991 * two successive extents and determining the difference between
5992 * their paths. When path is different for 2 successive extents
5993 * we compare the blocks in the path at each level and increment
5994 * iblocks by total number of differences found.
5995 */
5996 cur = 0;
5997 ret = skip_hole(inode, &cur);
5998 if (ret < 0)
5999 goto out;
6000 path = ext4_find_extent(inode, cur, NULL, 0);
6001 if (IS_ERR(path))
6002 goto out;
6003 numblks += path->p_depth;
6004 ext4_ext_drop_refs(path);
6005 kfree(path);
6006 while (cur < end) {
6007 path = ext4_find_extent(inode, cur, NULL, 0);
6008 if (IS_ERR(path))
6009 break;
6010 ex = path[path->p_depth].p_ext;
6011 if (!ex) {
6012 ext4_ext_drop_refs(path);
6013 kfree(path);
6014 return 0;
6015 }
6016 cur = max(cur + 1, le32_to_cpu(ex->ee_block) +
6017 ext4_ext_get_actual_len(ex));
6018 ret = skip_hole(inode, &cur);
6019 if (ret < 0) {
6020 ext4_ext_drop_refs(path);
6021 kfree(path);
6022 break;
6023 }
6024 path2 = ext4_find_extent(inode, cur, NULL, 0);
6025 if (IS_ERR(path2)) {
6026 ext4_ext_drop_refs(path);
6027 kfree(path);
6028 break;
6029 }
6030 ex = path2[path2->p_depth].p_ext;
6031 for (i = 0; i <= max(path->p_depth, path2->p_depth); i++) {
6032 cmp1 = cmp2 = 0;
6033 if (i <= path->p_depth)
6034 cmp1 = path[i].p_bh ?
6035 path[i].p_bh->b_blocknr : 0;
6036 if (i <= path2->p_depth)
6037 cmp2 = path2[i].p_bh ?
6038 path2[i].p_bh->b_blocknr : 0;
6039 if (cmp1 != cmp2 && cmp2 != 0)
6040 numblks++;
6041 }
6042 ext4_ext_drop_refs(path);
6043 ext4_ext_drop_refs(path2);
6044 kfree(path);
6045 kfree(path2);
6046 }
6047
6048out:
6049 inode->i_blocks = numblks << (inode->i_sb->s_blocksize_bits - 9);
6050 ext4_mark_inode_dirty(NULL, inode);
6051 return 0;
6052}
6053
6054int ext4_ext_clear_bb(struct inode *inode)
6055{
6056 struct ext4_ext_path *path = NULL;
6057 struct ext4_extent *ex;
6058 ext4_lblk_t cur = 0, end;
6059 int j, ret = 0;
6060 struct ext4_map_blocks map;
6061
6062 /* Determin the size of the file first */
6063 path = ext4_find_extent(inode, EXT_MAX_BLOCKS - 1, NULL,
6064 EXT4_EX_NOCACHE);
6065 if (IS_ERR(path))
6066 return PTR_ERR(path);
6067 ex = path[path->p_depth].p_ext;
6068 if (!ex) {
6069 ext4_ext_drop_refs(path);
6070 kfree(path);
6071 return 0;
6072 }
6073 end = le32_to_cpu(ex->ee_block) + ext4_ext_get_actual_len(ex);
6074 ext4_ext_drop_refs(path);
6075 kfree(path);
6076
6077 cur = 0;
6078 while (cur < end) {
6079 map.m_lblk = cur;
6080 map.m_len = end - cur;
6081 ret = ext4_map_blocks(NULL, inode, &map, 0);
6082 if (ret < 0)
6083 break;
6084 if (ret > 0) {
6085 path = ext4_find_extent(inode, map.m_lblk, NULL, 0);
6086 if (!IS_ERR_OR_NULL(path)) {
6087 for (j = 0; j < path->p_depth; j++) {
6088
6089 ext4_mb_mark_bb(inode->i_sb,
6090 path[j].p_block, 1, 0);
6091 ext4_fc_record_regions(inode->i_sb, inode->i_ino,
6092 0, path[j].p_block, 1, 1);
6093 }
6094 ext4_ext_drop_refs(path);
6095 kfree(path);
6096 }
6097 ext4_mb_mark_bb(inode->i_sb, map.m_pblk, map.m_len, 0);
6098 ext4_fc_record_regions(inode->i_sb, inode->i_ino,
6099 map.m_lblk, map.m_pblk, map.m_len, 1);
6100 }
6101 cur = cur + map.m_len;
6102 }
6103
6104 return 0;
6105}