Blame - fs/btrfs/discard.c - hafnium/third_party/linux.git

blob: 9e1a06144e32d8a2b7ae4a9dd80ef3d77427be16 [file] [log] [blame]

Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	1	// SPDX-License-Identifier: GPL-2.0
				2
				3	#include <linux/jiffies.h>
				4	#include <linux/kernel.h>
				5	#include <linux/ktime.h>
				6	#include <linux/list.h>
				7	#include <linux/math64.h>
				8	#include <linux/sizes.h>
				9	#include <linux/workqueue.h>
				10	#include "ctree.h"
				11	#include "block-group.h"
				12	#include "discard.h"
				13	#include "free-space-cache.h"
				14
				15	/*
				16	* This contains the logic to handle async discard.
				17	*
				18	* Async discard manages trimming of free space outside of transaction commit.
				19	* Discarding is done by managing the block_groups on a LRU list based on free
				20	* space recency. Two passes are used to first prioritize discarding extents
				21	* and then allow for trimming in the bitmap the best opportunity to coalesce.
				22	* The block_groups are maintained on multiple lists to allow for multiple
				23	* passes with different discard filter requirements. A delayed work item is
				24	* used to manage discarding with timeout determined by a max of the delay
				25	* incurred by the iops rate limit, the byte rate limit, and the max delay of
				26	* BTRFS_DISCARD_MAX_DELAY.
				27	*
				28	* Note, this only keeps track of block_groups that are explicitly for data.
				29	* Mixed block_groups are not supported.
				30	*
				31	* The first list is special to manage discarding of fully free block groups.
				32	* This is necessary because we issue a final trim for a full free block group
				33	* after forgetting it. When a block group becomes unused, instead of directly
				34	* being added to the unused_bgs list, we add it to this first list. Then
				35	* from there, if it becomes fully discarded, we place it onto the unused_bgs
				36	* list.
				37	*
				38	* The in-memory free space cache serves as the backing state for discard.
				39	* Consequently this means there is no persistence. We opt to load all the
				40	* block groups in as not discarded, so the mount case degenerates to the
				41	* crashing case.
				42	*
				43	* As the free space cache uses bitmaps, there exists a tradeoff between
				44	* ease/efficiency for find_free_extent() and the accuracy of discard state.
				45	* Here we opt to let untrimmed regions merge with everything while only letting
				46	* trimmed regions merge with other trimmed regions. This can cause
				47	* overtrimming, but the coalescing benefit seems to be worth it. Additionally,
				48	* bitmap state is tracked as a whole. If we're able to fully trim a bitmap,
				49	* the trimmed flag is set on the bitmap. Otherwise, if an allocation comes in,
				50	* this resets the state and we will retry trimming the whole bitmap. This is a
				51	* tradeoff between discard state accuracy and the cost of accounting.
				52	*/
				53
				54	/* This is an initial delay to give some chance for block reuse */
				55	#define BTRFS_DISCARD_DELAY (120ULL * NSEC_PER_SEC)
				56	#define BTRFS_DISCARD_UNUSED_DELAY (10ULL * NSEC_PER_SEC)
				57
				58	/* Target completion latency of discarding all discardable extents */
				59	#define BTRFS_DISCARD_TARGET_MSEC (6 * 60 * 60UL * MSEC_PER_SEC)
				60	#define BTRFS_DISCARD_MIN_DELAY_MSEC (1UL)
				61	#define BTRFS_DISCARD_MAX_DELAY_MSEC (1000UL)
				62	#define BTRFS_DISCARD_MAX_IOPS (10U)
				63
				64	/* Montonically decreasing minimum length filters after index 0 */
				65	static int discard_minlen[BTRFS_NR_DISCARD_LISTS] = {
				66	0,
				67	BTRFS_ASYNC_DISCARD_MAX_FILTER,
				68	BTRFS_ASYNC_DISCARD_MIN_FILTER
				69	};
				70
				71	static struct list_head get_discard_list(struct btrfs_discard_ctl discard_ctl,
				72	struct btrfs_block_group *block_group)
				73	{
				74	return &discard_ctl->discard_list[block_group->discard_index];
				75	}
				76
				77	static void __add_to_discard_list(struct btrfs_discard_ctl *discard_ctl,
				78	struct btrfs_block_group *block_group)
				79	{
				80	if (!btrfs_run_discard_work(discard_ctl))
				81	return;
				82
				83	if (list_empty(&block_group->discard_list) \|\|
				84	block_group->discard_index == BTRFS_DISCARD_INDEX_UNUSED) {
				85	if (block_group->discard_index == BTRFS_DISCARD_INDEX_UNUSED)
				86	block_group->discard_index = BTRFS_DISCARD_INDEX_START;
				87	block_group->discard_eligible_time = (ktime_get_ns() +
				88	BTRFS_DISCARD_DELAY);
				89	block_group->discard_state = BTRFS_DISCARD_RESET_CURSOR;
				90	}
				91
				92	list_move_tail(&block_group->discard_list,
				93	get_discard_list(discard_ctl, block_group));
				94	}
				95
				96	static void add_to_discard_list(struct btrfs_discard_ctl *discard_ctl,
				97	struct btrfs_block_group *block_group)
				98	{
				99	if (!btrfs_is_block_group_data_only(block_group))
				100	return;
				101
				102	spin_lock(&discard_ctl->lock);
				103	__add_to_discard_list(discard_ctl, block_group);
				104	spin_unlock(&discard_ctl->lock);
				105	}
				106
				107	static void add_to_discard_unused_list(struct btrfs_discard_ctl *discard_ctl,
				108	struct btrfs_block_group *block_group)
				109	{
				110	spin_lock(&discard_ctl->lock);
				111
				112	if (!btrfs_run_discard_work(discard_ctl)) {
				113	spin_unlock(&discard_ctl->lock);
				114	return;
				115	}
				116
				117	list_del_init(&block_group->discard_list);
				118
				119	block_group->discard_index = BTRFS_DISCARD_INDEX_UNUSED;
				120	block_group->discard_eligible_time = (ktime_get_ns() +
				121	BTRFS_DISCARD_UNUSED_DELAY);
				122	block_group->discard_state = BTRFS_DISCARD_RESET_CURSOR;
				123	list_add_tail(&block_group->discard_list,
				124	&discard_ctl->discard_list[BTRFS_DISCARD_INDEX_UNUSED]);
				125
				126	spin_unlock(&discard_ctl->lock);
				127	}
				128
				129	static bool remove_from_discard_list(struct btrfs_discard_ctl *discard_ctl,
				130	struct btrfs_block_group *block_group)
				131	{
				132	bool running = false;
				133
				134	spin_lock(&discard_ctl->lock);
				135
				136	if (block_group == discard_ctl->block_group) {
				137	running = true;
				138	discard_ctl->block_group = NULL;
				139	}
				140
				141	block_group->discard_eligible_time = 0;
				142	list_del_init(&block_group->discard_list);
				143
				144	spin_unlock(&discard_ctl->lock);
				145
				146	return running;
				147	}
				148
				149	/**
				150	* find_next_block_group - find block_group that's up next for discarding
				151	* @discard_ctl: discard control
				152	* @now: current time
				153	*
				154	* Iterate over the discard lists to find the next block_group up for
				155	* discarding checking the discard_eligible_time of block_group.
				156	*/
				157	static struct btrfs_block_group *find_next_block_group(
				158	struct btrfs_discard_ctl *discard_ctl,
				159	u64 now)
				160	{
				161	struct btrfs_block_group ret_block_group = NULL, block_group;
				162	int i;
				163
				164	for (i = 0; i < BTRFS_NR_DISCARD_LISTS; i++) {
				165	struct list_head *discard_list = &discard_ctl->discard_list[i];
				166
				167	if (!list_empty(discard_list)) {
				168	block_group = list_first_entry(discard_list,
				169	struct btrfs_block_group,
				170	discard_list);
				171
				172	if (!ret_block_group)
				173	ret_block_group = block_group;
				174
				175	if (ret_block_group->discard_eligible_time < now)
				176	break;
				177
				178	if (ret_block_group->discard_eligible_time >
				179	block_group->discard_eligible_time)
				180	ret_block_group = block_group;
				181	}
				182	}
				183
				184	return ret_block_group;
				185	}
				186
				187	/**
				188	* peek_discard_list - wrap find_next_block_group()
				189	* @discard_ctl: discard control
				190	* @discard_state: the discard_state of the block_group after state management
				191	* @discard_index: the discard_index of the block_group after state management
				192	*
				193	* This wraps find_next_block_group() and sets the block_group to be in use.
				194	* discard_state's control flow is managed here. Variables related to
				195	* discard_state are reset here as needed (eg discard_cursor). @discard_state
				196	* and @discard_index are remembered as it may change while we're discarding,
				197	* but we want the discard to execute in the context determined here.
				198	*/
				199	static struct btrfs_block_group *peek_discard_list(
				200	struct btrfs_discard_ctl *discard_ctl,
				201	enum btrfs_discard_state *discard_state,
				202	int *discard_index, u64 now)
				203	{
				204	struct btrfs_block_group *block_group;
				205
				206	spin_lock(&discard_ctl->lock);
				207	again:
				208	block_group = find_next_block_group(discard_ctl, now);
				209
				210	if (block_group && now >= block_group->discard_eligible_time) {
				211	if (block_group->discard_index == BTRFS_DISCARD_INDEX_UNUSED &&
				212	block_group->used != 0) {
				213	if (btrfs_is_block_group_data_only(block_group))
				214	__add_to_discard_list(discard_ctl, block_group);
				215	else
				216	list_del_init(&block_group->discard_list);
				217	goto again;
				218	}
				219	if (block_group->discard_state == BTRFS_DISCARD_RESET_CURSOR) {
				220	block_group->discard_cursor = block_group->start;
				221	block_group->discard_state = BTRFS_DISCARD_EXTENTS;
				222	}
				223	discard_ctl->block_group = block_group;
				224	}
				225	if (block_group) {
				226	*discard_state = block_group->discard_state;
				227	*discard_index = block_group->discard_index;
				228	}
				229	spin_unlock(&discard_ctl->lock);
				230
				231	return block_group;
				232	}
				233
				234	/**
				235	* btrfs_discard_check_filter - updates a block groups filters
				236	* @block_group: block group of interest
				237	* @bytes: recently freed region size after coalescing
				238	*
				239	* Async discard maintains multiple lists with progressively smaller filters
				240	* to prioritize discarding based on size. Should a free space that matches
				241	* a larger filter be returned to the free_space_cache, prioritize that discard
				242	* by moving @block_group to the proper filter.
				243	*/
				244	void btrfs_discard_check_filter(struct btrfs_block_group *block_group,
				245	u64 bytes)
				246	{
				247	struct btrfs_discard_ctl *discard_ctl;
				248
				249	if (!block_group \|\|
				250	!btrfs_test_opt(block_group->fs_info, DISCARD_ASYNC))
				251	return;
				252
				253	discard_ctl = &block_group->fs_info->discard_ctl;
				254
				255	if (block_group->discard_index > BTRFS_DISCARD_INDEX_START &&
				256	bytes >= discard_minlen[block_group->discard_index - 1]) {
				257	int i;
				258
				259	remove_from_discard_list(discard_ctl, block_group);
				260
				261	for (i = BTRFS_DISCARD_INDEX_START; i < BTRFS_NR_DISCARD_LISTS;
				262	i++) {
				263	if (bytes >= discard_minlen[i]) {
				264	block_group->discard_index = i;
				265	add_to_discard_list(discard_ctl, block_group);
				266	break;
				267	}
				268	}
				269	}
				270	}
				271
				272	/**
				273	* btrfs_update_discard_index - moves a block group along the discard lists
				274	* @discard_ctl: discard control
				275	* @block_group: block_group of interest
				276	*
				277	* Increment @block_group's discard_index. If it falls of the list, let it be.
				278	* Otherwise add it back to the appropriate list.
				279	*/
				280	static void btrfs_update_discard_index(struct btrfs_discard_ctl *discard_ctl,
				281	struct btrfs_block_group *block_group)
				282	{
				283	block_group->discard_index++;
				284	if (block_group->discard_index == BTRFS_NR_DISCARD_LISTS) {
				285	block_group->discard_index = 1;
				286	return;
				287	}
				288
				289	add_to_discard_list(discard_ctl, block_group);
				290	}
				291
				292	/**
				293	* btrfs_discard_cancel_work - remove a block_group from the discard lists
				294	* @discard_ctl: discard control
				295	* @block_group: block_group of interest
				296	*
				297	* This removes @block_group from the discard lists. If necessary, it waits on
				298	* the current work and then reschedules the delayed work.
				299	*/
				300	void btrfs_discard_cancel_work(struct btrfs_discard_ctl *discard_ctl,
				301	struct btrfs_block_group *block_group)
				302	{
				303	if (remove_from_discard_list(discard_ctl, block_group)) {
				304	cancel_delayed_work_sync(&discard_ctl->work);
				305	btrfs_discard_schedule_work(discard_ctl, true);
				306	}
				307	}
				308
				309	/**
				310	* btrfs_discard_queue_work - handles queuing the block_groups
				311	* @discard_ctl: discard control
				312	* @block_group: block_group of interest
				313	*
				314	* This maintains the LRU order of the discard lists.
				315	*/
				316	void btrfs_discard_queue_work(struct btrfs_discard_ctl *discard_ctl,
				317	struct btrfs_block_group *block_group)
				318	{
				319	if (!block_group \|\| !btrfs_test_opt(block_group->fs_info, DISCARD_ASYNC))
				320	return;
				321
				322	if (block_group->used == 0)
				323	add_to_discard_unused_list(discard_ctl, block_group);
				324	else
				325	add_to_discard_list(discard_ctl, block_group);
				326
				327	if (!delayed_work_pending(&discard_ctl->work))
				328	btrfs_discard_schedule_work(discard_ctl, false);
				329	}
				330
				331	static void __btrfs_discard_schedule_work(struct btrfs_discard_ctl *discard_ctl,
				332	u64 now, bool override)
				333	{
				334	struct btrfs_block_group *block_group;
				335
				336	if (!btrfs_run_discard_work(discard_ctl))
				337	return;
				338	if (!override && delayed_work_pending(&discard_ctl->work))
				339	return;
				340
				341	block_group = find_next_block_group(discard_ctl, now);
				342	if (block_group) {
				343	unsigned long delay = discard_ctl->delay;
				344	u32 kbps_limit = READ_ONCE(discard_ctl->kbps_limit);
				345
				346	/*
				347	* A single delayed workqueue item is responsible for
				348	* discarding, so we can manage the bytes rate limit by keeping
				349	* track of the previous discard.
				350	*/
				351	if (kbps_limit && discard_ctl->prev_discard) {
				352	u64 bps_limit = ((u64)kbps_limit) * SZ_1K;
				353	u64 bps_delay = div64_u64(discard_ctl->prev_discard *
				354	MSEC_PER_SEC, bps_limit);
				355
				356	delay = max(delay, msecs_to_jiffies(bps_delay));
				357	}
				358
				359	/*
				360	* This timeout is to hopefully prevent immediate discarding
				361	* in a recently allocated block group.
				362	*/
				363	if (now < block_group->discard_eligible_time) {
				364	u64 bg_timeout = block_group->discard_eligible_time - now;
				365
				366	delay = max(delay, nsecs_to_jiffies(bg_timeout));
				367	}
				368
				369	mod_delayed_work(discard_ctl->discard_workers,
				370	&discard_ctl->work, delay);
				371	}
				372	}
				373
				374	/*
				375	* btrfs_discard_schedule_work - responsible for scheduling the discard work
				376	* @discard_ctl: discard control
				377	* @override: override the current timer
				378	*
				379	* Discards are issued by a delayed workqueue item. @override is used to
				380	* update the current delay as the baseline delay interval is reevaluated on
				381	* transaction commit. This is also maxed with any other rate limit.
				382	*/
				383	void btrfs_discard_schedule_work(struct btrfs_discard_ctl *discard_ctl,
				384	bool override)
				385	{
				386	const u64 now = ktime_get_ns();
				387
				388	spin_lock(&discard_ctl->lock);
				389	__btrfs_discard_schedule_work(discard_ctl, now, override);
				390	spin_unlock(&discard_ctl->lock);
				391	}
				392
				393	/**
				394	* btrfs_finish_discard_pass - determine next step of a block_group
				395	* @discard_ctl: discard control
				396	* @block_group: block_group of interest
				397	*
				398	* This determines the next step for a block group after it's finished going
				399	* through a pass on a discard list. If it is unused and fully trimmed, we can
				400	* mark it unused and send it to the unused_bgs path. Otherwise, pass it onto
				401	* the appropriate filter list or let it fall off.
				402	*/
				403	static void btrfs_finish_discard_pass(struct btrfs_discard_ctl *discard_ctl,
				404	struct btrfs_block_group *block_group)
				405	{
				406	remove_from_discard_list(discard_ctl, block_group);
				407
				408	if (block_group->used == 0) {
				409	if (btrfs_is_free_space_trimmed(block_group))
				410	btrfs_mark_bg_unused(block_group);
				411	else
				412	add_to_discard_unused_list(discard_ctl, block_group);
				413	} else {
				414	btrfs_update_discard_index(discard_ctl, block_group);
				415	}
				416	}
				417
				418	/**
				419	* btrfs_discard_workfn - discard work function
				420	* @work: work
				421	*
				422	* This finds the next block_group to start discarding and then discards a
				423	* single region. It does this in a two-pass fashion: first extents and second
				424	* bitmaps. Completely discarded block groups are sent to the unused_bgs path.
				425	*/
				426	static void btrfs_discard_workfn(struct work_struct *work)
				427	{
				428	struct btrfs_discard_ctl *discard_ctl;
				429	struct btrfs_block_group *block_group;
				430	enum btrfs_discard_state discard_state;
				431	int discard_index = 0;
				432	u64 trimmed = 0;
				433	u64 minlen = 0;
				434	u64 now = ktime_get_ns();
				435
				436	discard_ctl = container_of(work, struct btrfs_discard_ctl, work.work);
				437
				438	block_group = peek_discard_list(discard_ctl, &discard_state,
				439	&discard_index, now);
				440	if (!block_group \|\| !btrfs_run_discard_work(discard_ctl))
				441	return;
				442	if (now < block_group->discard_eligible_time) {
				443	btrfs_discard_schedule_work(discard_ctl, false);
				444	return;
				445	}
				446
				447	/* Perform discarding */
				448	minlen = discard_minlen[discard_index];
				449
				450	if (discard_state == BTRFS_DISCARD_BITMAPS) {
				451	u64 maxlen = 0;
				452
				453	/*
				454	* Use the previous levels minimum discard length as the max
				455	* length filter. In the case something is added to make a
				456	* region go beyond the max filter, the entire bitmap is set
				457	* back to BTRFS_TRIM_STATE_UNTRIMMED.
				458	*/
				459	if (discard_index != BTRFS_DISCARD_INDEX_UNUSED)
				460	maxlen = discard_minlen[discard_index - 1];
				461
				462	btrfs_trim_block_group_bitmaps(block_group, &trimmed,
				463	block_group->discard_cursor,
				464	btrfs_block_group_end(block_group),
				465	minlen, maxlen, true);
				466	discard_ctl->discard_bitmap_bytes += trimmed;
				467	} else {
				468	btrfs_trim_block_group_extents(block_group, &trimmed,
				469	block_group->discard_cursor,
				470	btrfs_block_group_end(block_group),
				471	minlen, true);
				472	discard_ctl->discard_extent_bytes += trimmed;
				473	}
				474
				475	discard_ctl->prev_discard = trimmed;
				476
				477	/* Determine next steps for a block_group */
				478	if (block_group->discard_cursor >= btrfs_block_group_end(block_group)) {
				479	if (discard_state == BTRFS_DISCARD_BITMAPS) {
				480	btrfs_finish_discard_pass(discard_ctl, block_group);
				481	} else {
				482	block_group->discard_cursor = block_group->start;
				483	spin_lock(&discard_ctl->lock);
				484	if (block_group->discard_state !=
				485	BTRFS_DISCARD_RESET_CURSOR)
				486	block_group->discard_state =
				487	BTRFS_DISCARD_BITMAPS;
				488	spin_unlock(&discard_ctl->lock);
				489	}
				490	}
				491
				492	spin_lock(&discard_ctl->lock);
				493	discard_ctl->block_group = NULL;
				494	__btrfs_discard_schedule_work(discard_ctl, now, false);
				495	spin_unlock(&discard_ctl->lock);
				496	}
				497
				498	/**
				499	* btrfs_run_discard_work - determines if async discard should be running
				500	* @discard_ctl: discard control
				501	*
				502	* Checks if the file system is writeable and BTRFS_FS_DISCARD_RUNNING is set.
				503	*/
				504	bool btrfs_run_discard_work(struct btrfs_discard_ctl *discard_ctl)
				505	{
				506	struct btrfs_fs_info *fs_info = container_of(discard_ctl,
				507	struct btrfs_fs_info,
				508	discard_ctl);
				509
				510	return (!(fs_info->sb->s_flags & SB_RDONLY) &&
				511	test_bit(BTRFS_FS_DISCARD_RUNNING, &fs_info->flags));
				512	}
				513
				514	/**
				515	* btrfs_discard_calc_delay - recalculate the base delay
				516	* @discard_ctl: discard control
				517	*
				518	* Recalculate the base delay which is based off the total number of
				519	* discardable_extents. Clamp this between the lower_limit (iops_limit or 1ms)
				520	* and the upper_limit (BTRFS_DISCARD_MAX_DELAY_MSEC).
				521	*/
				522	void btrfs_discard_calc_delay(struct btrfs_discard_ctl *discard_ctl)
				523	{
				524	s32 discardable_extents;
				525	s64 discardable_bytes;
				526	u32 iops_limit;
				527	unsigned long delay;
				528	unsigned long lower_limit = BTRFS_DISCARD_MIN_DELAY_MSEC;
				529
				530	discardable_extents = atomic_read(&discard_ctl->discardable_extents);
				531	if (!discardable_extents)
				532	return;
				533
				534	spin_lock(&discard_ctl->lock);
				535
				536	/*
				537	* The following is to fix a potential -1 discrepenancy that we're not
				538	* sure how to reproduce. But given that this is the only place that
				539	* utilizes these numbers and this is only called by from
				540	* btrfs_finish_extent_commit() which is synchronized, we can correct
				541	* here.
				542	*/
				543	if (discardable_extents < 0)
				544	atomic_add(-discardable_extents,
				545	&discard_ctl->discardable_extents);
				546
				547	discardable_bytes = atomic64_read(&discard_ctl->discardable_bytes);
				548	if (discardable_bytes < 0)
				549	atomic64_add(-discardable_bytes,
				550	&discard_ctl->discardable_bytes);
				551
				552	if (discardable_extents <= 0) {
				553	spin_unlock(&discard_ctl->lock);
				554	return;
				555	}
				556
				557	iops_limit = READ_ONCE(discard_ctl->iops_limit);
				558	if (iops_limit)
				559	lower_limit = max_t(unsigned long, lower_limit,
				560	MSEC_PER_SEC / iops_limit);
				561
				562	delay = BTRFS_DISCARD_TARGET_MSEC / discardable_extents;
				563	delay = clamp(delay, lower_limit, BTRFS_DISCARD_MAX_DELAY_MSEC);
				564	discard_ctl->delay = msecs_to_jiffies(delay);
				565
				566	spin_unlock(&discard_ctl->lock);
				567	}
				568
				569	/**
				570	* btrfs_discard_update_discardable - propagate discard counters
				571	* @block_group: block_group of interest
				572	* @ctl: free_space_ctl of @block_group
				573	*
				574	* This propagates deltas of counters up to the discard_ctl. It maintains a
				575	* current counter and a previous counter passing the delta up to the global
				576	* stat. Then the current counter value becomes the previous counter value.
				577	*/
				578	void btrfs_discard_update_discardable(struct btrfs_block_group *block_group,
				579	struct btrfs_free_space_ctl *ctl)
				580	{
				581	struct btrfs_discard_ctl *discard_ctl;
				582	s32 extents_delta;
				583	s64 bytes_delta;
				584
				585	if (!block_group \|\|
				586	!btrfs_test_opt(block_group->fs_info, DISCARD_ASYNC) \|\|
				587	!btrfs_is_block_group_data_only(block_group))
				588	return;
				589
				590	discard_ctl = &block_group->fs_info->discard_ctl;
				591
				592	extents_delta = ctl->discardable_extents[BTRFS_STAT_CURR] -
				593	ctl->discardable_extents[BTRFS_STAT_PREV];
				594	if (extents_delta) {
				595	atomic_add(extents_delta, &discard_ctl->discardable_extents);
				596	ctl->discardable_extents[BTRFS_STAT_PREV] =
				597	ctl->discardable_extents[BTRFS_STAT_CURR];
				598	}
				599
				600	bytes_delta = ctl->discardable_bytes[BTRFS_STAT_CURR] -
				601	ctl->discardable_bytes[BTRFS_STAT_PREV];
				602	if (bytes_delta) {
				603	atomic64_add(bytes_delta, &discard_ctl->discardable_bytes);
				604	ctl->discardable_bytes[BTRFS_STAT_PREV] =
				605	ctl->discardable_bytes[BTRFS_STAT_CURR];
				606	}
				607	}
				608
				609	/**
				610	* btrfs_discard_punt_unused_bgs_list - punt unused_bgs list to discard lists
				611	* @fs_info: fs_info of interest
				612	*
				613	* The unused_bgs list needs to be punted to the discard lists because the
				614	* order of operations is changed. In the normal sychronous discard path, the
				615	* block groups are trimmed via a single large trim in transaction commit. This
				616	* is ultimately what we are trying to avoid with asynchronous discard. Thus,
				617	* it must be done before going down the unused_bgs path.
				618	*/
				619	void btrfs_discard_punt_unused_bgs_list(struct btrfs_fs_info *fs_info)
				620	{
				621	struct btrfs_block_group block_group, next;
				622
				623	spin_lock(&fs_info->unused_bgs_lock);
				624	/* We enabled async discard, so punt all to the queue */
				625	list_for_each_entry_safe(block_group, next, &fs_info->unused_bgs,
				626	bg_list) {
				627	list_del_init(&block_group->bg_list);
				628	btrfs_put_block_group(block_group);
				629	btrfs_discard_queue_work(&fs_info->discard_ctl, block_group);
				630	}
				631	spin_unlock(&fs_info->unused_bgs_lock);
				632	}
				633
				634	/**
				635	* btrfs_discard_purge_list - purge discard lists
				636	* @discard_ctl: discard control
				637	*
				638	* If we are disabling async discard, we may have intercepted block groups that
				639	* are completely free and ready for the unused_bgs path. As discarding will
				640	* now happen in transaction commit or not at all, we can safely mark the
				641	* corresponding block groups as unused and they will be sent on their merry
				642	* way to the unused_bgs list.
				643	*/
				644	static void btrfs_discard_purge_list(struct btrfs_discard_ctl *discard_ctl)
				645	{
				646	struct btrfs_block_group block_group, next;
				647	int i;
				648
				649	spin_lock(&discard_ctl->lock);
				650	for (i = 0; i < BTRFS_NR_DISCARD_LISTS; i++) {
				651	list_for_each_entry_safe(block_group, next,
				652	&discard_ctl->discard_list[i],
				653	discard_list) {
				654	list_del_init(&block_group->discard_list);
				655	spin_unlock(&discard_ctl->lock);
				656	if (block_group->used == 0)
				657	btrfs_mark_bg_unused(block_group);
				658	spin_lock(&discard_ctl->lock);
				659	}
				660	}
				661	spin_unlock(&discard_ctl->lock);
				662	}
				663
				664	void btrfs_discard_resume(struct btrfs_fs_info *fs_info)
				665	{
				666	if (!btrfs_test_opt(fs_info, DISCARD_ASYNC)) {
				667	btrfs_discard_cleanup(fs_info);
				668	return;
				669	}
				670
				671	btrfs_discard_punt_unused_bgs_list(fs_info);
				672
				673	set_bit(BTRFS_FS_DISCARD_RUNNING, &fs_info->flags);
				674	}
				675
				676	void btrfs_discard_stop(struct btrfs_fs_info *fs_info)
				677	{
				678	clear_bit(BTRFS_FS_DISCARD_RUNNING, &fs_info->flags);
				679	}
				680
				681	void btrfs_discard_init(struct btrfs_fs_info *fs_info)
				682	{
				683	struct btrfs_discard_ctl *discard_ctl = &fs_info->discard_ctl;
				684	int i;
				685
				686	spin_lock_init(&discard_ctl->lock);
				687	INIT_DELAYED_WORK(&discard_ctl->work, btrfs_discard_workfn);
				688
				689	for (i = 0; i < BTRFS_NR_DISCARD_LISTS; i++)
				690	INIT_LIST_HEAD(&discard_ctl->discard_list[i]);
				691
				692	discard_ctl->prev_discard = 0;
				693	atomic_set(&discard_ctl->discardable_extents, 0);
				694	atomic64_set(&discard_ctl->discardable_bytes, 0);
				695	discard_ctl->max_discard_size = BTRFS_ASYNC_DISCARD_DEFAULT_MAX_SIZE;
				696	discard_ctl->delay = BTRFS_DISCARD_MAX_DELAY_MSEC;
				697	discard_ctl->iops_limit = BTRFS_DISCARD_MAX_IOPS;
				698	discard_ctl->kbps_limit = 0;
				699	discard_ctl->discard_extent_bytes = 0;
				700	discard_ctl->discard_bitmap_bytes = 0;
				701	atomic64_set(&discard_ctl->discard_bytes_saved, 0);
				702	}
				703
				704	void btrfs_discard_cleanup(struct btrfs_fs_info *fs_info)
				705	{
				706	btrfs_discard_stop(fs_info);
				707	cancel_delayed_work_sync(&fs_info->discard_ctl.work);
				708	btrfs_discard_purge_list(&fs_info->discard_ctl);
				709	}