Blame - drivers/mtd/ubi/wl.c - hafnium/third_party/linux.git

blob: 3fcdefe2714d0167770e26fe522db7e5b00bd540 [file] [log] [blame]

David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame^]	1	// SPDX-License-Identifier: GPL-2.0-or-later
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	2	/*
				3	* Copyright (c) International Business Machines Corp., 2006
				4	*
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	5	* Authors: Artem Bityutskiy (Битюцкий Артём), Thomas Gleixner
				6	*/
				7
				8	/*
				9	* UBI wear-leveling sub-system.
				10	*
				11	* This sub-system is responsible for wear-leveling. It works in terms of
				12	* physical eraseblocks and erase counters and knows nothing about logical
				13	* eraseblocks, volumes, etc. From this sub-system's perspective all physical
				14	* eraseblocks are of two types - used and free. Used physical eraseblocks are
				15	* those that were "get" by the 'ubi_wl_get_peb()' function, and free physical
				16	* eraseblocks are those that were put by the 'ubi_wl_put_peb()' function.
				17	*
				18	* Physical eraseblocks returned by 'ubi_wl_get_peb()' have only erase counter
				19	* header. The rest of the physical eraseblock contains only %0xFF bytes.
				20	*
				21	* When physical eraseblocks are returned to the WL sub-system by means of the
				22	* 'ubi_wl_put_peb()' function, they are scheduled for erasure. The erasure is
				23	* done asynchronously in context of the per-UBI device background thread,
				24	* which is also managed by the WL sub-system.
				25	*
				26	* The wear-leveling is ensured by means of moving the contents of used
				27	* physical eraseblocks with low erase counter to free physical eraseblocks
				28	* with high erase counter.
				29	*
				30	* If the WL sub-system fails to erase a physical eraseblock, it marks it as
				31	* bad.
				32	*
				33	* This sub-system is also responsible for scrubbing. If a bit-flip is detected
				34	* in a physical eraseblock, it has to be moved. Technically this is the same
				35	* as moving it for wear-leveling reasons.
				36	*
				37	* As it was said, for the UBI sub-system all physical eraseblocks are either
				38	* "free" or "used". Free eraseblock are kept in the @wl->free RB-tree, while
				39	* used eraseblocks are kept in @wl->used, @wl->erroneous, or @wl->scrub
				40	* RB-trees, as well as (temporarily) in the @wl->pq queue.
				41	*
				42	* When the WL sub-system returns a physical eraseblock, the physical
				43	* eraseblock is protected from being moved for some "time". For this reason,
				44	* the physical eraseblock is not directly moved from the @wl->free tree to the
				45	* @wl->used tree. There is a protection queue in between where this
				46	* physical eraseblock is temporarily stored (@wl->pq).
				47	*
				48	* All this protection stuff is needed because:
				49	* o we don't want to move physical eraseblocks just after we have given them
				50	* to the user; instead, we first want to let users fill them up with data;
				51	*
				52	* o there is a chance that the user will put the physical eraseblock very
				53	* soon, so it makes sense not to move it for some time, but wait.
				54	*
				55	* Physical eraseblocks stay protected only for limited time. But the "time" is
				56	* measured in erase cycles in this case. This is implemented with help of the
				57	* protection queue. Eraseblocks are put to the tail of this queue when they
				58	* are returned by the 'ubi_wl_get_peb()', and eraseblocks are removed from the
				59	* head of the queue on each erase operation (for any eraseblock). So the
				60	* length of the queue defines how may (global) erase cycles PEBs are protected.
				61	*
				62	* To put it differently, each physical eraseblock has 2 main states: free and
				63	* used. The former state corresponds to the @wl->free tree. The latter state
				64	* is split up on several sub-states:
				65	* o the WL movement is allowed (@wl->used tree);
				66	* o the WL movement is disallowed (@wl->erroneous) because the PEB is
				67	* erroneous - e.g., there was a read error;
				68	* o the WL movement is temporarily prohibited (@wl->pq queue);
				69	* o scrubbing is needed (@wl->scrub tree).
				70	*
				71	* Depending on the sub-state, wear-leveling entries of the used physical
				72	* eraseblocks may be kept in one of those structures.
				73	*
				74	* Note, in this implementation, we keep a small in-RAM object for each physical
				75	* eraseblock. This is surely not a scalable solution. But it appears to be good
				76	* enough for moderately large flashes and it is simple. In future, one may
				77	* re-work this sub-system and make it more scalable.
				78	*
				79	* At the moment this sub-system does not utilize the sequence number, which
				80	* was introduced relatively recently. But it would be wise to do this because
				81	* the sequence number of a logical eraseblock characterizes how old is it. For
				82	* example, when we move a PEB with low erase counter, and we need to pick the
				83	* target PEB, we pick a PEB with the highest EC if our PEB is "old" and we
				84	* pick target PEB with an average EC if our PEB is not very "old". This is a
				85	* room for future re-works of the WL sub-system.
				86	*/
				87
				88	#include <linux/slab.h>
				89	#include <linux/crc32.h>
				90	#include <linux/freezer.h>
				91	#include <linux/kthread.h>
				92	#include "ubi.h"
				93	#include "wl.h"
				94
				95	/* Number of physical eraseblocks reserved for wear-leveling purposes */
				96	#define WL_RESERVED_PEBS 1
				97
				98	/*
				99	* Maximum difference between two erase counters. If this threshold is
				100	* exceeded, the WL sub-system starts moving data from used physical
				101	* eraseblocks with low erase counter to free physical eraseblocks with high
				102	* erase counter.
				103	*/
				104	#define UBI_WL_THRESHOLD CONFIG_MTD_UBI_WL_THRESHOLD
				105
				106	/*
				107	* When a physical eraseblock is moved, the WL sub-system has to pick the target
				108	* physical eraseblock to move to. The simplest way would be just to pick the
				109	* one with the highest erase counter. But in certain workloads this could lead
				110	* to an unlimited wear of one or few physical eraseblock. Indeed, imagine a
				111	* situation when the picked physical eraseblock is constantly erased after the
				112	* data is written to it. So, we have a constant which limits the highest erase
				113	* counter of the free physical eraseblock to pick. Namely, the WL sub-system
				114	* does not pick eraseblocks with erase counter greater than the lowest erase
				115	* counter plus %WL_FREE_MAX_DIFF.
				116	*/
				117	#define WL_FREE_MAX_DIFF (2*UBI_WL_THRESHOLD)
				118
				119	/*
				120	* Maximum number of consecutive background thread failures which is enough to
				121	* switch to read-only mode.
				122	*/
				123	#define WL_MAX_FAILURES 32
				124
				125	static int self_check_ec(struct ubi_device *ubi, int pnum, int ec);
				126	static int self_check_in_wl_tree(const struct ubi_device *ubi,
				127	struct ubi_wl_entry e, struct rb_root root);
				128	static int self_check_in_pq(const struct ubi_device *ubi,
				129	struct ubi_wl_entry *e);
				130
				131	/**
				132	* wl_tree_add - add a wear-leveling entry to a WL RB-tree.
				133	* @e: the wear-leveling entry to add
				134	* @root: the root of the tree
				135	*
				136	* Note, we use (erase counter, physical eraseblock number) pairs as keys in
				137	* the @ubi->used and @ubi->free RB-trees.
				138	*/
				139	static void wl_tree_add(struct ubi_wl_entry e, struct rb_root root)
				140	{
				141	struct rb_node *p, parent = NULL;
				142
				143	p = &root->rb_node;
				144	while (*p) {
				145	struct ubi_wl_entry *e1;
				146
				147	parent = *p;
				148	e1 = rb_entry(parent, struct ubi_wl_entry, u.rb);
				149
				150	if (e->ec < e1->ec)
				151	p = &(*p)->rb_left;
				152	else if (e->ec > e1->ec)
				153	p = &(*p)->rb_right;
				154	else {
				155	ubi_assert(e->pnum != e1->pnum);
				156	if (e->pnum < e1->pnum)
				157	p = &(*p)->rb_left;
				158	else
				159	p = &(*p)->rb_right;
				160	}
				161	}
				162
				163	rb_link_node(&e->u.rb, parent, p);
				164	rb_insert_color(&e->u.rb, root);
				165	}
				166
				167	/**
				168	* wl_tree_destroy - destroy a wear-leveling entry.
				169	* @ubi: UBI device description object
				170	* @e: the wear-leveling entry to add
				171	*
				172	* This function destroys a wear leveling entry and removes
				173	* the reference from the lookup table.
				174	*/
				175	static void wl_entry_destroy(struct ubi_device ubi, struct ubi_wl_entry e)
				176	{
				177	ubi->lookuptbl[e->pnum] = NULL;
				178	kmem_cache_free(ubi_wl_entry_slab, e);
				179	}
				180
				181	/**
				182	* do_work - do one pending work.
				183	* @ubi: UBI device description object
				184	*
				185	* This function returns zero in case of success and a negative error code in
				186	* case of failure.
				187	*/
				188	static int do_work(struct ubi_device *ubi)
				189	{
				190	int err;
				191	struct ubi_work *wrk;
				192
				193	cond_resched();
				194
				195	/*
				196	* @ubi->work_sem is used to synchronize with the workers. Workers take
				197	* it in read mode, so many of them may be doing works at a time. But
				198	* the queue flush code has to be sure the whole queue of works is
				199	* done, and it takes the mutex in write mode.
				200	*/
				201	down_read(&ubi->work_sem);
				202	spin_lock(&ubi->wl_lock);
				203	if (list_empty(&ubi->works)) {
				204	spin_unlock(&ubi->wl_lock);
				205	up_read(&ubi->work_sem);
				206	return 0;
				207	}
				208
				209	wrk = list_entry(ubi->works.next, struct ubi_work, list);
				210	list_del(&wrk->list);
				211	ubi->works_count -= 1;
				212	ubi_assert(ubi->works_count >= 0);
				213	spin_unlock(&ubi->wl_lock);
				214
				215	/*
				216	* Call the worker function. Do not touch the work structure
				217	* after this call as it will have been freed or reused by that
				218	* time by the worker function.
				219	*/
				220	err = wrk->func(ubi, wrk, 0);
				221	if (err)
				222	ubi_err(ubi, "work failed with error code %d", err);
				223	up_read(&ubi->work_sem);
				224
				225	return err;
				226	}
				227
				228	/**
				229	* in_wl_tree - check if wear-leveling entry is present in a WL RB-tree.
				230	* @e: the wear-leveling entry to check
				231	* @root: the root of the tree
				232	*
				233	* This function returns non-zero if @e is in the @root RB-tree and zero if it
				234	* is not.
				235	*/
				236	static int in_wl_tree(struct ubi_wl_entry e, struct rb_root root)
				237	{
				238	struct rb_node *p;
				239
				240	p = root->rb_node;
				241	while (p) {
				242	struct ubi_wl_entry *e1;
				243
				244	e1 = rb_entry(p, struct ubi_wl_entry, u.rb);
				245
				246	if (e->pnum == e1->pnum) {
				247	ubi_assert(e == e1);
				248	return 1;
				249	}
				250
				251	if (e->ec < e1->ec)
				252	p = p->rb_left;
				253	else if (e->ec > e1->ec)
				254	p = p->rb_right;
				255	else {
				256	ubi_assert(e->pnum != e1->pnum);
				257	if (e->pnum < e1->pnum)
				258	p = p->rb_left;
				259	else
				260	p = p->rb_right;
				261	}
				262	}
				263
				264	return 0;
				265	}
				266
				267	/**
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame^]	268	* in_pq - check if a wear-leveling entry is present in the protection queue.
				269	* @ubi: UBI device description object
				270	* @e: the wear-leveling entry to check
				271	*
				272	* This function returns non-zero if @e is in the protection queue and zero
				273	* if it is not.
				274	*/
				275	static inline int in_pq(const struct ubi_device ubi, struct ubi_wl_entry e)
				276	{
				277	struct ubi_wl_entry *p;
				278	int i;
				279
				280	for (i = 0; i < UBI_PROT_QUEUE_LEN; ++i)
				281	list_for_each_entry(p, &ubi->pq[i], u.list)
				282	if (p == e)
				283	return 1;
				284
				285	return 0;
				286	}
				287
				288	/**
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	289	* prot_queue_add - add physical eraseblock to the protection queue.
				290	* @ubi: UBI device description object
				291	* @e: the physical eraseblock to add
				292	*
				293	* This function adds @e to the tail of the protection queue @ubi->pq, where
				294	* @e will stay for %UBI_PROT_QUEUE_LEN erase operations and will be
				295	* temporarily protected from the wear-leveling worker. Note, @wl->lock has to
				296	* be locked.
				297	*/
				298	static void prot_queue_add(struct ubi_device ubi, struct ubi_wl_entry e)
				299	{
				300	int pq_tail = ubi->pq_head - 1;
				301
				302	if (pq_tail < 0)
				303	pq_tail = UBI_PROT_QUEUE_LEN - 1;
				304	ubi_assert(pq_tail >= 0 && pq_tail < UBI_PROT_QUEUE_LEN);
				305	list_add_tail(&e->u.list, &ubi->pq[pq_tail]);
				306	dbg_wl("added PEB %d EC %d to the protection queue", e->pnum, e->ec);
				307	}
				308
				309	/**
				310	* find_wl_entry - find wear-leveling entry closest to certain erase counter.
				311	* @ubi: UBI device description object
				312	* @root: the RB-tree where to look for
				313	* @diff: maximum possible difference from the smallest erase counter
				314	*
				315	* This function looks for a wear leveling entry with erase counter closest to
				316	* min + @diff, where min is the smallest erase counter.
				317	*/
				318	static struct ubi_wl_entry find_wl_entry(struct ubi_device ubi,
				319	struct rb_root *root, int diff)
				320	{
				321	struct rb_node *p;
				322	struct ubi_wl_entry e, prev_e = NULL;
				323	int max;
				324
				325	e = rb_entry(rb_first(root), struct ubi_wl_entry, u.rb);
				326	max = e->ec + diff;
				327
				328	p = root->rb_node;
				329	while (p) {
				330	struct ubi_wl_entry *e1;
				331
				332	e1 = rb_entry(p, struct ubi_wl_entry, u.rb);
				333	if (e1->ec >= max)
				334	p = p->rb_left;
				335	else {
				336	p = p->rb_right;
				337	prev_e = e;
				338	e = e1;
				339	}
				340	}
				341
				342	/* If no fastmap has been written and this WL entry can be used
				343	* as anchor PEB, hold it back and return the second best WL entry
				344	* such that fastmap can use the anchor PEB later. */
				345	if (prev_e && !ubi->fm_disabled &&
				346	!ubi->fm && e->pnum < UBI_FM_MAX_START)
				347	return prev_e;
				348
				349	return e;
				350	}
				351
				352	/**
				353	* find_mean_wl_entry - find wear-leveling entry with medium erase counter.
				354	* @ubi: UBI device description object
				355	* @root: the RB-tree where to look for
				356	*
				357	* This function looks for a wear leveling entry with medium erase counter,
				358	* but not greater or equivalent than the lowest erase counter plus
				359	* %WL_FREE_MAX_DIFF/2.
				360	*/
				361	static struct ubi_wl_entry find_mean_wl_entry(struct ubi_device ubi,
				362	struct rb_root *root)
				363	{
				364	struct ubi_wl_entry e, first, *last;
				365
				366	first = rb_entry(rb_first(root), struct ubi_wl_entry, u.rb);
				367	last = rb_entry(rb_last(root), struct ubi_wl_entry, u.rb);
				368
				369	if (last->ec - first->ec < WL_FREE_MAX_DIFF) {
				370	e = rb_entry(root->rb_node, struct ubi_wl_entry, u.rb);
				371
				372	/* If no fastmap has been written and this WL entry can be used
				373	* as anchor PEB, hold it back and return the second best
				374	* WL entry such that fastmap can use the anchor PEB later. */
				375	e = may_reserve_for_fm(ubi, e, root);
				376	} else
				377	e = find_wl_entry(ubi, root, WL_FREE_MAX_DIFF/2);
				378
				379	return e;
				380	}
				381
				382	/**
				383	* wl_get_wle - get a mean wl entry to be used by ubi_wl_get_peb() or
				384	* refill_wl_user_pool().
				385	* @ubi: UBI device description object
				386	*
				387	* This function returns a a wear leveling entry in case of success and
				388	* NULL in case of failure.
				389	*/
				390	static struct ubi_wl_entry wl_get_wle(struct ubi_device ubi)
				391	{
				392	struct ubi_wl_entry *e;
				393
				394	e = find_mean_wl_entry(ubi, &ubi->free);
				395	if (!e) {
				396	ubi_err(ubi, "no free eraseblocks");
				397	return NULL;
				398	}
				399
				400	self_check_in_wl_tree(ubi, e, &ubi->free);
				401
				402	/*
				403	* Move the physical eraseblock to the protection queue where it will
				404	* be protected from being moved for some time.
				405	*/
				406	rb_erase(&e->u.rb, &ubi->free);
				407	ubi->free_count--;
				408	dbg_wl("PEB %d EC %d", e->pnum, e->ec);
				409
				410	return e;
				411	}
				412
				413	/**
				414	* prot_queue_del - remove a physical eraseblock from the protection queue.
				415	* @ubi: UBI device description object
				416	* @pnum: the physical eraseblock to remove
				417	*
				418	* This function deletes PEB @pnum from the protection queue and returns zero
				419	* in case of success and %-ENODEV if the PEB was not found.
				420	*/
				421	static int prot_queue_del(struct ubi_device *ubi, int pnum)
				422	{
				423	struct ubi_wl_entry *e;
				424
				425	e = ubi->lookuptbl[pnum];
				426	if (!e)
				427	return -ENODEV;
				428
				429	if (self_check_in_pq(ubi, e))
				430	return -ENODEV;
				431
				432	list_del(&e->u.list);
				433	dbg_wl("deleted PEB %d from the protection queue", e->pnum);
				434	return 0;
				435	}
				436
				437	/**
				438	* sync_erase - synchronously erase a physical eraseblock.
				439	* @ubi: UBI device description object
				440	* @e: the the physical eraseblock to erase
				441	* @torture: if the physical eraseblock has to be tortured
				442	*
				443	* This function returns zero in case of success and a negative error code in
				444	* case of failure.
				445	*/
				446	static int sync_erase(struct ubi_device ubi, struct ubi_wl_entry e,
				447	int torture)
				448	{
				449	int err;
				450	struct ubi_ec_hdr *ec_hdr;
				451	unsigned long long ec = e->ec;
				452
				453	dbg_wl("erase PEB %d, old EC %llu", e->pnum, ec);
				454
				455	err = self_check_ec(ubi, e->pnum, e->ec);
				456	if (err)
				457	return -EINVAL;
				458
				459	ec_hdr = kzalloc(ubi->ec_hdr_alsize, GFP_NOFS);
				460	if (!ec_hdr)
				461	return -ENOMEM;
				462
				463	err = ubi_io_sync_erase(ubi, e->pnum, torture);
				464	if (err < 0)
				465	goto out_free;
				466
				467	ec += err;
				468	if (ec > UBI_MAX_ERASECOUNTER) {
				469	/*
				470	* Erase counter overflow. Upgrade UBI and use 64-bit
				471	* erase counters internally.
				472	*/
				473	ubi_err(ubi, "erase counter overflow at PEB %d, EC %llu",
				474	e->pnum, ec);
				475	err = -EINVAL;
				476	goto out_free;
				477	}
				478
				479	dbg_wl("erased PEB %d, new EC %llu", e->pnum, ec);
				480
				481	ec_hdr->ec = cpu_to_be64(ec);
				482
				483	err = ubi_io_write_ec_hdr(ubi, e->pnum, ec_hdr);
				484	if (err)
				485	goto out_free;
				486
				487	e->ec = ec;
				488	spin_lock(&ubi->wl_lock);
				489	if (e->ec > ubi->max_ec)
				490	ubi->max_ec = e->ec;
				491	spin_unlock(&ubi->wl_lock);
				492
				493	out_free:
				494	kfree(ec_hdr);
				495	return err;
				496	}
				497
				498	/**
				499	* serve_prot_queue - check if it is time to stop protecting PEBs.
				500	* @ubi: UBI device description object
				501	*
				502	* This function is called after each erase operation and removes PEBs from the
				503	* tail of the protection queue. These PEBs have been protected for long enough
				504	* and should be moved to the used tree.
				505	*/
				506	static void serve_prot_queue(struct ubi_device *ubi)
				507	{
				508	struct ubi_wl_entry e, tmp;
				509	int count;
				510
				511	/*
				512	* There may be several protected physical eraseblock to remove,
				513	* process them all.
				514	*/
				515	repeat:
				516	count = 0;
				517	spin_lock(&ubi->wl_lock);
				518	list_for_each_entry_safe(e, tmp, &ubi->pq[ubi->pq_head], u.list) {
				519	dbg_wl("PEB %d EC %d protection over, move to used tree",
				520	e->pnum, e->ec);
				521
				522	list_del(&e->u.list);
				523	wl_tree_add(e, &ubi->used);
				524	if (count++ > 32) {
				525	/*
				526	* Let's be nice and avoid holding the spinlock for
				527	* too long.
				528	*/
				529	spin_unlock(&ubi->wl_lock);
				530	cond_resched();
				531	goto repeat;
				532	}
				533	}
				534
				535	ubi->pq_head += 1;
				536	if (ubi->pq_head == UBI_PROT_QUEUE_LEN)
				537	ubi->pq_head = 0;
				538	ubi_assert(ubi->pq_head >= 0 && ubi->pq_head < UBI_PROT_QUEUE_LEN);
				539	spin_unlock(&ubi->wl_lock);
				540	}
				541
				542	/**
				543	* __schedule_ubi_work - schedule a work.
				544	* @ubi: UBI device description object
				545	* @wrk: the work to schedule
				546	*
				547	* This function adds a work defined by @wrk to the tail of the pending works
				548	* list. Can only be used if ubi->work_sem is already held in read mode!
				549	*/
				550	static void __schedule_ubi_work(struct ubi_device ubi, struct ubi_work wrk)
				551	{
				552	spin_lock(&ubi->wl_lock);
				553	list_add_tail(&wrk->list, &ubi->works);
				554	ubi_assert(ubi->works_count >= 0);
				555	ubi->works_count += 1;
				556	if (ubi->thread_enabled && !ubi_dbg_is_bgt_disabled(ubi))
				557	wake_up_process(ubi->bgt_thread);
				558	spin_unlock(&ubi->wl_lock);
				559	}
				560
				561	/**
				562	* schedule_ubi_work - schedule a work.
				563	* @ubi: UBI device description object
				564	* @wrk: the work to schedule
				565	*
				566	* This function adds a work defined by @wrk to the tail of the pending works
				567	* list.
				568	*/
				569	static void schedule_ubi_work(struct ubi_device ubi, struct ubi_work wrk)
				570	{
				571	down_read(&ubi->work_sem);
				572	__schedule_ubi_work(ubi, wrk);
				573	up_read(&ubi->work_sem);
				574	}
				575
				576	static int erase_worker(struct ubi_device ubi, struct ubi_work wl_wrk,
				577	int shutdown);
				578
				579	/**
				580	* schedule_erase - schedule an erase work.
				581	* @ubi: UBI device description object
				582	* @e: the WL entry of the physical eraseblock to erase
				583	* @vol_id: the volume ID that last used this PEB
				584	* @lnum: the last used logical eraseblock number for the PEB
				585	* @torture: if the physical eraseblock has to be tortured
				586	*
				587	* This function returns zero in case of success and a %-ENOMEM in case of
				588	* failure.
				589	*/
				590	static int schedule_erase(struct ubi_device ubi, struct ubi_wl_entry e,
				591	int vol_id, int lnum, int torture, bool nested)
				592	{
				593	struct ubi_work *wl_wrk;
				594
				595	ubi_assert(e);
				596
				597	dbg_wl("schedule erasure of PEB %d, EC %d, torture %d",
				598	e->pnum, e->ec, torture);
				599
				600	wl_wrk = kmalloc(sizeof(struct ubi_work), GFP_NOFS);
				601	if (!wl_wrk)
				602	return -ENOMEM;
				603
				604	wl_wrk->func = &erase_worker;
				605	wl_wrk->e = e;
				606	wl_wrk->vol_id = vol_id;
				607	wl_wrk->lnum = lnum;
				608	wl_wrk->torture = torture;
				609
				610	if (nested)
				611	__schedule_ubi_work(ubi, wl_wrk);
				612	else
				613	schedule_ubi_work(ubi, wl_wrk);
				614	return 0;
				615	}
				616
				617	static int __erase_worker(struct ubi_device ubi, struct ubi_work wl_wrk);
				618	/**
				619	* do_sync_erase - run the erase worker synchronously.
				620	* @ubi: UBI device description object
				621	* @e: the WL entry of the physical eraseblock to erase
				622	* @vol_id: the volume ID that last used this PEB
				623	* @lnum: the last used logical eraseblock number for the PEB
				624	* @torture: if the physical eraseblock has to be tortured
				625	*
				626	*/
				627	static int do_sync_erase(struct ubi_device ubi, struct ubi_wl_entry e,
				628	int vol_id, int lnum, int torture)
				629	{
				630	struct ubi_work wl_wrk;
				631
				632	dbg_wl("sync erase of PEB %i", e->pnum);
				633
				634	wl_wrk.e = e;
				635	wl_wrk.vol_id = vol_id;
				636	wl_wrk.lnum = lnum;
				637	wl_wrk.torture = torture;
				638
				639	return __erase_worker(ubi, &wl_wrk);
				640	}
				641
				642	static int ensure_wear_leveling(struct ubi_device *ubi, int nested);
				643	/**
				644	* wear_leveling_worker - wear-leveling worker function.
				645	* @ubi: UBI device description object
				646	* @wrk: the work object
				647	* @shutdown: non-zero if the worker has to free memory and exit
				648	* because the WL-subsystem is shutting down
				649	*
				650	* This function copies a more worn out physical eraseblock to a less worn out
				651	* one. Returns zero in case of success and a negative error code in case of
				652	* failure.
				653	*/
				654	static int wear_leveling_worker(struct ubi_device ubi, struct ubi_work wrk,
				655	int shutdown)
				656	{
				657	int err, scrubbing = 0, torture = 0, protect = 0, erroneous = 0;
				658	int erase = 0, keep = 0, vol_id = -1, lnum = -1;
				659	#ifdef CONFIG_MTD_UBI_FASTMAP
				660	int anchor = wrk->anchor;
				661	#endif
				662	struct ubi_wl_entry e1, e2;
				663	struct ubi_vid_io_buf *vidb;
				664	struct ubi_vid_hdr *vid_hdr;
				665	int dst_leb_clean = 0;
				666
				667	kfree(wrk);
				668	if (shutdown)
				669	return 0;
				670
				671	vidb = ubi_alloc_vid_buf(ubi, GFP_NOFS);
				672	if (!vidb)
				673	return -ENOMEM;
				674
				675	vid_hdr = ubi_get_vid_hdr(vidb);
				676
				677	down_read(&ubi->fm_eba_sem);
				678	mutex_lock(&ubi->move_mutex);
				679	spin_lock(&ubi->wl_lock);
				680	ubi_assert(!ubi->move_from && !ubi->move_to);
				681	ubi_assert(!ubi->move_to_put);
				682
				683	if (!ubi->free.rb_node \|\|
				684	(!ubi->used.rb_node && !ubi->scrub.rb_node)) {
				685	/*
				686	* No free physical eraseblocks? Well, they must be waiting in
				687	* the queue to be erased. Cancel movement - it will be
				688	* triggered again when a free physical eraseblock appears.
				689	*
				690	* No used physical eraseblocks? They must be temporarily
				691	* protected from being moved. They will be moved to the
				692	* @ubi->used tree later and the wear-leveling will be
				693	* triggered again.
				694	*/
				695	dbg_wl("cancel WL, a list is empty: free %d, used %d",
				696	!ubi->free.rb_node, !ubi->used.rb_node);
				697	goto out_cancel;
				698	}
				699
				700	#ifdef CONFIG_MTD_UBI_FASTMAP
				701	/* Check whether we need to produce an anchor PEB */
				702	if (!anchor)
				703	anchor = !anchor_pebs_available(&ubi->free);
				704
				705	if (anchor) {
				706	e1 = find_anchor_wl_entry(&ubi->used);
				707	if (!e1)
				708	goto out_cancel;
				709	e2 = get_peb_for_wl(ubi);
				710	if (!e2)
				711	goto out_cancel;
				712
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame^]	713	/*
				714	* Anchor move within the anchor area is useless.
				715	*/
				716	if (e2->pnum < UBI_FM_MAX_START)
				717	goto out_cancel;
				718
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	719	self_check_in_wl_tree(ubi, e1, &ubi->used);
				720	rb_erase(&e1->u.rb, &ubi->used);
				721	dbg_wl("anchor-move PEB %d to PEB %d", e1->pnum, e2->pnum);
				722	} else if (!ubi->scrub.rb_node) {
				723	#else
				724	if (!ubi->scrub.rb_node) {
				725	#endif
				726	/*
				727	* Now pick the least worn-out used physical eraseblock and a
				728	* highly worn-out free physical eraseblock. If the erase
				729	* counters differ much enough, start wear-leveling.
				730	*/
				731	e1 = rb_entry(rb_first(&ubi->used), struct ubi_wl_entry, u.rb);
				732	e2 = get_peb_for_wl(ubi);
				733	if (!e2)
				734	goto out_cancel;
				735
				736	if (!(e2->ec - e1->ec >= UBI_WL_THRESHOLD)) {
				737	dbg_wl("no WL needed: min used EC %d, max free EC %d",
				738	e1->ec, e2->ec);
				739
				740	/* Give the unused PEB back */
				741	wl_tree_add(e2, &ubi->free);
				742	ubi->free_count++;
				743	goto out_cancel;
				744	}
				745	self_check_in_wl_tree(ubi, e1, &ubi->used);
				746	rb_erase(&e1->u.rb, &ubi->used);
				747	dbg_wl("move PEB %d EC %d to PEB %d EC %d",
				748	e1->pnum, e1->ec, e2->pnum, e2->ec);
				749	} else {
				750	/* Perform scrubbing */
				751	scrubbing = 1;
				752	e1 = rb_entry(rb_first(&ubi->scrub), struct ubi_wl_entry, u.rb);
				753	e2 = get_peb_for_wl(ubi);
				754	if (!e2)
				755	goto out_cancel;
				756
				757	self_check_in_wl_tree(ubi, e1, &ubi->scrub);
				758	rb_erase(&e1->u.rb, &ubi->scrub);
				759	dbg_wl("scrub PEB %d to PEB %d", e1->pnum, e2->pnum);
				760	}
				761
				762	ubi->move_from = e1;
				763	ubi->move_to = e2;
				764	spin_unlock(&ubi->wl_lock);
				765
				766	/*
				767	* Now we are going to copy physical eraseblock @e1->pnum to @e2->pnum.
				768	* We so far do not know which logical eraseblock our physical
				769	* eraseblock (@e1) belongs to. We have to read the volume identifier
				770	* header first.
				771	*
				772	* Note, we are protected from this PEB being unmapped and erased. The
				773	* 'ubi_wl_put_peb()' would wait for moving to be finished if the PEB
				774	* which is being moved was unmapped.
				775	*/
				776
				777	err = ubi_io_read_vid_hdr(ubi, e1->pnum, vidb, 0);
				778	if (err && err != UBI_IO_BITFLIPS) {
				779	dst_leb_clean = 1;
				780	if (err == UBI_IO_FF) {
				781	/*
				782	* We are trying to move PEB without a VID header. UBI
				783	* always write VID headers shortly after the PEB was
				784	* given, so we have a situation when it has not yet
				785	* had a chance to write it, because it was preempted.
				786	* So add this PEB to the protection queue so far,
				787	* because presumably more data will be written there
				788	* (including the missing VID header), and then we'll
				789	* move it.
				790	*/
				791	dbg_wl("PEB %d has no VID header", e1->pnum);
				792	protect = 1;
				793	goto out_not_moved;
				794	} else if (err == UBI_IO_FF_BITFLIPS) {
				795	/*
				796	* The same situation as %UBI_IO_FF, but bit-flips were
				797	* detected. It is better to schedule this PEB for
				798	* scrubbing.
				799	*/
				800	dbg_wl("PEB %d has no VID header but has bit-flips",
				801	e1->pnum);
				802	scrubbing = 1;
				803	goto out_not_moved;
				804	} else if (ubi->fast_attach && err == UBI_IO_BAD_HDR_EBADMSG) {
				805	/*
				806	* While a full scan would detect interrupted erasures
				807	* at attach time we can face them here when attached from
				808	* Fastmap.
				809	*/
				810	dbg_wl("PEB %d has ECC errors, maybe from an interrupted erasure",
				811	e1->pnum);
				812	erase = 1;
				813	goto out_not_moved;
				814	}
				815
				816	ubi_err(ubi, "error %d while reading VID header from PEB %d",
				817	err, e1->pnum);
				818	goto out_error;
				819	}
				820
				821	vol_id = be32_to_cpu(vid_hdr->vol_id);
				822	lnum = be32_to_cpu(vid_hdr->lnum);
				823
				824	err = ubi_eba_copy_leb(ubi, e1->pnum, e2->pnum, vidb);
				825	if (err) {
				826	if (err == MOVE_CANCEL_RACE) {
				827	/*
				828	* The LEB has not been moved because the volume is
				829	* being deleted or the PEB has been put meanwhile. We
				830	* should prevent this PEB from being selected for
				831	* wear-leveling movement again, so put it to the
				832	* protection queue.
				833	*/
				834	protect = 1;
				835	dst_leb_clean = 1;
				836	goto out_not_moved;
				837	}
				838	if (err == MOVE_RETRY) {
				839	scrubbing = 1;
				840	dst_leb_clean = 1;
				841	goto out_not_moved;
				842	}
				843	if (err == MOVE_TARGET_BITFLIPS \|\| err == MOVE_TARGET_WR_ERR \|\|
				844	err == MOVE_TARGET_RD_ERR) {
				845	/*
				846	* Target PEB had bit-flips or write error - torture it.
				847	*/
				848	torture = 1;
				849	keep = 1;
				850	goto out_not_moved;
				851	}
				852
				853	if (err == MOVE_SOURCE_RD_ERR) {
				854	/*
				855	* An error happened while reading the source PEB. Do
				856	* not switch to R/O mode in this case, and give the
				857	* upper layers a possibility to recover from this,
				858	* e.g. by unmapping corresponding LEB. Instead, just
				859	* put this PEB to the @ubi->erroneous list to prevent
				860	* UBI from trying to move it over and over again.
				861	*/
				862	if (ubi->erroneous_peb_count > ubi->max_erroneous) {
				863	ubi_err(ubi, "too many erroneous eraseblocks (%d)",
				864	ubi->erroneous_peb_count);
				865	goto out_error;
				866	}
				867	dst_leb_clean = 1;
				868	erroneous = 1;
				869	goto out_not_moved;
				870	}
				871
				872	if (err < 0)
				873	goto out_error;
				874
				875	ubi_assert(0);
				876	}
				877
				878	/* The PEB has been successfully moved */
				879	if (scrubbing)
				880	ubi_msg(ubi, "scrubbed PEB %d (LEB %d:%d), data moved to PEB %d",
				881	e1->pnum, vol_id, lnum, e2->pnum);
				882	ubi_free_vid_buf(vidb);
				883
				884	spin_lock(&ubi->wl_lock);
				885	if (!ubi->move_to_put) {
				886	wl_tree_add(e2, &ubi->used);
				887	e2 = NULL;
				888	}
				889	ubi->move_from = ubi->move_to = NULL;
				890	ubi->move_to_put = ubi->wl_scheduled = 0;
				891	spin_unlock(&ubi->wl_lock);
				892
				893	err = do_sync_erase(ubi, e1, vol_id, lnum, 0);
				894	if (err) {
				895	if (e2)
				896	wl_entry_destroy(ubi, e2);
				897	goto out_ro;
				898	}
				899
				900	if (e2) {
				901	/*
				902	* Well, the target PEB was put meanwhile, schedule it for
				903	* erasure.
				904	*/
				905	dbg_wl("PEB %d (LEB %d:%d) was put meanwhile, erase",
				906	e2->pnum, vol_id, lnum);
				907	err = do_sync_erase(ubi, e2, vol_id, lnum, 0);
				908	if (err)
				909	goto out_ro;
				910	}
				911
				912	dbg_wl("done");
				913	mutex_unlock(&ubi->move_mutex);
				914	up_read(&ubi->fm_eba_sem);
				915	return 0;
				916
				917	/*
				918	* For some reasons the LEB was not moved, might be an error, might be
				919	* something else. @e1 was not changed, so return it back. @e2 might
				920	* have been changed, schedule it for erasure.
				921	*/
				922	out_not_moved:
				923	if (vol_id != -1)
				924	dbg_wl("cancel moving PEB %d (LEB %d:%d) to PEB %d (%d)",
				925	e1->pnum, vol_id, lnum, e2->pnum, err);
				926	else
				927	dbg_wl("cancel moving PEB %d to PEB %d (%d)",
				928	e1->pnum, e2->pnum, err);
				929	spin_lock(&ubi->wl_lock);
				930	if (protect)
				931	prot_queue_add(ubi, e1);
				932	else if (erroneous) {
				933	wl_tree_add(e1, &ubi->erroneous);
				934	ubi->erroneous_peb_count += 1;
				935	} else if (scrubbing)
				936	wl_tree_add(e1, &ubi->scrub);
				937	else if (keep)
				938	wl_tree_add(e1, &ubi->used);
				939	if (dst_leb_clean) {
				940	wl_tree_add(e2, &ubi->free);
				941	ubi->free_count++;
				942	}
				943
				944	ubi_assert(!ubi->move_to_put);
				945	ubi->move_from = ubi->move_to = NULL;
				946	ubi->wl_scheduled = 0;
				947	spin_unlock(&ubi->wl_lock);
				948
				949	ubi_free_vid_buf(vidb);
				950	if (dst_leb_clean) {
				951	ensure_wear_leveling(ubi, 1);
				952	} else {
				953	err = do_sync_erase(ubi, e2, vol_id, lnum, torture);
				954	if (err)
				955	goto out_ro;
				956	}
				957
				958	if (erase) {
				959	err = do_sync_erase(ubi, e1, vol_id, lnum, 1);
				960	if (err)
				961	goto out_ro;
				962	}
				963
				964	mutex_unlock(&ubi->move_mutex);
				965	up_read(&ubi->fm_eba_sem);
				966	return 0;
				967
				968	out_error:
				969	if (vol_id != -1)
				970	ubi_err(ubi, "error %d while moving PEB %d to PEB %d",
				971	err, e1->pnum, e2->pnum);
				972	else
				973	ubi_err(ubi, "error %d while moving PEB %d (LEB %d:%d) to PEB %d",
				974	err, e1->pnum, vol_id, lnum, e2->pnum);
				975	spin_lock(&ubi->wl_lock);
				976	ubi->move_from = ubi->move_to = NULL;
				977	ubi->move_to_put = ubi->wl_scheduled = 0;
				978	spin_unlock(&ubi->wl_lock);
				979
				980	ubi_free_vid_buf(vidb);
				981	wl_entry_destroy(ubi, e1);
				982	wl_entry_destroy(ubi, e2);
				983
				984	out_ro:
				985	ubi_ro_mode(ubi);
				986	mutex_unlock(&ubi->move_mutex);
				987	up_read(&ubi->fm_eba_sem);
				988	ubi_assert(err != 0);
				989	return err < 0 ? err : -EIO;
				990
				991	out_cancel:
				992	ubi->wl_scheduled = 0;
				993	spin_unlock(&ubi->wl_lock);
				994	mutex_unlock(&ubi->move_mutex);
				995	up_read(&ubi->fm_eba_sem);
				996	ubi_free_vid_buf(vidb);
				997	return 0;
				998	}
				999
				1000	/**
				1001	* ensure_wear_leveling - schedule wear-leveling if it is needed.
				1002	* @ubi: UBI device description object
				1003	* @nested: set to non-zero if this function is called from UBI worker
				1004	*
				1005	* This function checks if it is time to start wear-leveling and schedules it
				1006	* if yes. This function returns zero in case of success and a negative error
				1007	* code in case of failure.
				1008	*/
				1009	static int ensure_wear_leveling(struct ubi_device *ubi, int nested)
				1010	{
				1011	int err = 0;
				1012	struct ubi_wl_entry *e1;
				1013	struct ubi_wl_entry *e2;
				1014	struct ubi_work *wrk;
				1015
				1016	spin_lock(&ubi->wl_lock);
				1017	if (ubi->wl_scheduled)
				1018	/* Wear-leveling is already in the work queue */
				1019	goto out_unlock;
				1020
				1021	/*
				1022	* If the ubi->scrub tree is not empty, scrubbing is needed, and the
				1023	* the WL worker has to be scheduled anyway.
				1024	*/
				1025	if (!ubi->scrub.rb_node) {
				1026	if (!ubi->used.rb_node \|\| !ubi->free.rb_node)
				1027	/* No physical eraseblocks - no deal */
				1028	goto out_unlock;
				1029
				1030	/*
				1031	* We schedule wear-leveling only if the difference between the
				1032	* lowest erase counter of used physical eraseblocks and a high
				1033	* erase counter of free physical eraseblocks is greater than
				1034	* %UBI_WL_THRESHOLD.
				1035	*/
				1036	e1 = rb_entry(rb_first(&ubi->used), struct ubi_wl_entry, u.rb);
				1037	e2 = find_wl_entry(ubi, &ubi->free, WL_FREE_MAX_DIFF);
				1038
				1039	if (!(e2->ec - e1->ec >= UBI_WL_THRESHOLD))
				1040	goto out_unlock;
				1041	dbg_wl("schedule wear-leveling");
				1042	} else
				1043	dbg_wl("schedule scrubbing");
				1044
				1045	ubi->wl_scheduled = 1;
				1046	spin_unlock(&ubi->wl_lock);
				1047
				1048	wrk = kmalloc(sizeof(struct ubi_work), GFP_NOFS);
				1049	if (!wrk) {
				1050	err = -ENOMEM;
				1051	goto out_cancel;
				1052	}
				1053
				1054	wrk->anchor = 0;
				1055	wrk->func = &wear_leveling_worker;
				1056	if (nested)
				1057	__schedule_ubi_work(ubi, wrk);
				1058	else
				1059	schedule_ubi_work(ubi, wrk);
				1060	return err;
				1061
				1062	out_cancel:
				1063	spin_lock(&ubi->wl_lock);
				1064	ubi->wl_scheduled = 0;
				1065	out_unlock:
				1066	spin_unlock(&ubi->wl_lock);
				1067	return err;
				1068	}
				1069
				1070	/**
				1071	* __erase_worker - physical eraseblock erase worker function.
				1072	* @ubi: UBI device description object
				1073	* @wl_wrk: the work object
				1074	* @shutdown: non-zero if the worker has to free memory and exit
				1075	* because the WL sub-system is shutting down
				1076	*
				1077	* This function erases a physical eraseblock and perform torture testing if
				1078	* needed. It also takes care about marking the physical eraseblock bad if
				1079	* needed. Returns zero in case of success and a negative error code in case of
				1080	* failure.
				1081	*/
				1082	static int __erase_worker(struct ubi_device ubi, struct ubi_work wl_wrk)
				1083	{
				1084	struct ubi_wl_entry *e = wl_wrk->e;
				1085	int pnum = e->pnum;
				1086	int vol_id = wl_wrk->vol_id;
				1087	int lnum = wl_wrk->lnum;
				1088	int err, available_consumed = 0;
				1089
				1090	dbg_wl("erase PEB %d EC %d LEB %d:%d",
				1091	pnum, e->ec, wl_wrk->vol_id, wl_wrk->lnum);
				1092
				1093	err = sync_erase(ubi, e, wl_wrk->torture);
				1094	if (!err) {
				1095	spin_lock(&ubi->wl_lock);
				1096	wl_tree_add(e, &ubi->free);
				1097	ubi->free_count++;
				1098	spin_unlock(&ubi->wl_lock);
				1099
				1100	/*
				1101	* One more erase operation has happened, take care about
				1102	* protected physical eraseblocks.
				1103	*/
				1104	serve_prot_queue(ubi);
				1105
				1106	/* And take care about wear-leveling */
				1107	err = ensure_wear_leveling(ubi, 1);
				1108	return err;
				1109	}
				1110
				1111	ubi_err(ubi, "failed to erase PEB %d, error %d", pnum, err);
				1112
				1113	if (err == -EINTR \|\| err == -ENOMEM \|\| err == -EAGAIN \|\|
				1114	err == -EBUSY) {
				1115	int err1;
				1116
				1117	/* Re-schedule the LEB for erasure */
				1118	err1 = schedule_erase(ubi, e, vol_id, lnum, 0, false);
				1119	if (err1) {
				1120	wl_entry_destroy(ubi, e);
				1121	err = err1;
				1122	goto out_ro;
				1123	}
				1124	return err;
				1125	}
				1126
				1127	wl_entry_destroy(ubi, e);
				1128	if (err != -EIO)
				1129	/*
				1130	* If this is not %-EIO, we have no idea what to do. Scheduling
				1131	* this physical eraseblock for erasure again would cause
				1132	* errors again and again. Well, lets switch to R/O mode.
				1133	*/
				1134	goto out_ro;
				1135
				1136	/* It is %-EIO, the PEB went bad */
				1137
				1138	if (!ubi->bad_allowed) {
				1139	ubi_err(ubi, "bad physical eraseblock %d detected", pnum);
				1140	goto out_ro;
				1141	}
				1142
				1143	spin_lock(&ubi->volumes_lock);
				1144	if (ubi->beb_rsvd_pebs == 0) {
				1145	if (ubi->avail_pebs == 0) {
				1146	spin_unlock(&ubi->volumes_lock);
				1147	ubi_err(ubi, "no reserved/available physical eraseblocks");
				1148	goto out_ro;
				1149	}
				1150	ubi->avail_pebs -= 1;
				1151	available_consumed = 1;
				1152	}
				1153	spin_unlock(&ubi->volumes_lock);
				1154
				1155	ubi_msg(ubi, "mark PEB %d as bad", pnum);
				1156	err = ubi_io_mark_bad(ubi, pnum);
				1157	if (err)
				1158	goto out_ro;
				1159
				1160	spin_lock(&ubi->volumes_lock);
				1161	if (ubi->beb_rsvd_pebs > 0) {
				1162	if (available_consumed) {
				1163	/*
				1164	* The amount of reserved PEBs increased since we last
				1165	* checked.
				1166	*/
				1167	ubi->avail_pebs += 1;
				1168	available_consumed = 0;
				1169	}
				1170	ubi->beb_rsvd_pebs -= 1;
				1171	}
				1172	ubi->bad_peb_count += 1;
				1173	ubi->good_peb_count -= 1;
				1174	ubi_calculate_reserved(ubi);
				1175	if (available_consumed)
				1176	ubi_warn(ubi, "no PEBs in the reserved pool, used an available PEB");
				1177	else if (ubi->beb_rsvd_pebs)
				1178	ubi_msg(ubi, "%d PEBs left in the reserve",
				1179	ubi->beb_rsvd_pebs);
				1180	else
				1181	ubi_warn(ubi, "last PEB from the reserve was used");
				1182	spin_unlock(&ubi->volumes_lock);
				1183
				1184	return err;
				1185
				1186	out_ro:
				1187	if (available_consumed) {
				1188	spin_lock(&ubi->volumes_lock);
				1189	ubi->avail_pebs += 1;
				1190	spin_unlock(&ubi->volumes_lock);
				1191	}
				1192	ubi_ro_mode(ubi);
				1193	return err;
				1194	}
				1195
				1196	static int erase_worker(struct ubi_device ubi, struct ubi_work wl_wrk,
				1197	int shutdown)
				1198	{
				1199	int ret;
				1200
				1201	if (shutdown) {
				1202	struct ubi_wl_entry *e = wl_wrk->e;
				1203
				1204	dbg_wl("cancel erasure of PEB %d EC %d", e->pnum, e->ec);
				1205	kfree(wl_wrk);
				1206	wl_entry_destroy(ubi, e);
				1207	return 0;
				1208	}
				1209
				1210	ret = __erase_worker(ubi, wl_wrk);
				1211	kfree(wl_wrk);
				1212	return ret;
				1213	}
				1214
				1215	/**
				1216	* ubi_wl_put_peb - return a PEB to the wear-leveling sub-system.
				1217	* @ubi: UBI device description object
				1218	* @vol_id: the volume ID that last used this PEB
				1219	* @lnum: the last used logical eraseblock number for the PEB
				1220	* @pnum: physical eraseblock to return
				1221	* @torture: if this physical eraseblock has to be tortured
				1222	*
				1223	* This function is called to return physical eraseblock @pnum to the pool of
				1224	* free physical eraseblocks. The @torture flag has to be set if an I/O error
				1225	* occurred to this @pnum and it has to be tested. This function returns zero
				1226	* in case of success, and a negative error code in case of failure.
				1227	*/
				1228	int ubi_wl_put_peb(struct ubi_device *ubi, int vol_id, int lnum,
				1229	int pnum, int torture)
				1230	{
				1231	int err;
				1232	struct ubi_wl_entry *e;
				1233
				1234	dbg_wl("PEB %d", pnum);
				1235	ubi_assert(pnum >= 0);
				1236	ubi_assert(pnum < ubi->peb_count);
				1237
				1238	down_read(&ubi->fm_protect);
				1239
				1240	retry:
				1241	spin_lock(&ubi->wl_lock);
				1242	e = ubi->lookuptbl[pnum];
				1243	if (e == ubi->move_from) {
				1244	/*
				1245	* User is putting the physical eraseblock which was selected to
				1246	* be moved. It will be scheduled for erasure in the
				1247	* wear-leveling worker.
				1248	*/
				1249	dbg_wl("PEB %d is being moved, wait", pnum);
				1250	spin_unlock(&ubi->wl_lock);
				1251
				1252	/* Wait for the WL worker by taking the @ubi->move_mutex */
				1253	mutex_lock(&ubi->move_mutex);
				1254	mutex_unlock(&ubi->move_mutex);
				1255	goto retry;
				1256	} else if (e == ubi->move_to) {
				1257	/*
				1258	* User is putting the physical eraseblock which was selected
				1259	* as the target the data is moved to. It may happen if the EBA
				1260	* sub-system already re-mapped the LEB in 'ubi_eba_copy_leb()'
				1261	* but the WL sub-system has not put the PEB to the "used" tree
				1262	* yet, but it is about to do this. So we just set a flag which
				1263	* will tell the WL worker that the PEB is not needed anymore
				1264	* and should be scheduled for erasure.
				1265	*/
				1266	dbg_wl("PEB %d is the target of data moving", pnum);
				1267	ubi_assert(!ubi->move_to_put);
				1268	ubi->move_to_put = 1;
				1269	spin_unlock(&ubi->wl_lock);
				1270	up_read(&ubi->fm_protect);
				1271	return 0;
				1272	} else {
				1273	if (in_wl_tree(e, &ubi->used)) {
				1274	self_check_in_wl_tree(ubi, e, &ubi->used);
				1275	rb_erase(&e->u.rb, &ubi->used);
				1276	} else if (in_wl_tree(e, &ubi->scrub)) {
				1277	self_check_in_wl_tree(ubi, e, &ubi->scrub);
				1278	rb_erase(&e->u.rb, &ubi->scrub);
				1279	} else if (in_wl_tree(e, &ubi->erroneous)) {
				1280	self_check_in_wl_tree(ubi, e, &ubi->erroneous);
				1281	rb_erase(&e->u.rb, &ubi->erroneous);
				1282	ubi->erroneous_peb_count -= 1;
				1283	ubi_assert(ubi->erroneous_peb_count >= 0);
				1284	/* Erroneous PEBs should be tortured */
				1285	torture = 1;
				1286	} else {
				1287	err = prot_queue_del(ubi, e->pnum);
				1288	if (err) {
				1289	ubi_err(ubi, "PEB %d not found", pnum);
				1290	ubi_ro_mode(ubi);
				1291	spin_unlock(&ubi->wl_lock);
				1292	up_read(&ubi->fm_protect);
				1293	return err;
				1294	}
				1295	}
				1296	}
				1297	spin_unlock(&ubi->wl_lock);
				1298
				1299	err = schedule_erase(ubi, e, vol_id, lnum, torture, false);
				1300	if (err) {
				1301	spin_lock(&ubi->wl_lock);
				1302	wl_tree_add(e, &ubi->used);
				1303	spin_unlock(&ubi->wl_lock);
				1304	}
				1305
				1306	up_read(&ubi->fm_protect);
				1307	return err;
				1308	}
				1309
				1310	/**
				1311	* ubi_wl_scrub_peb - schedule a physical eraseblock for scrubbing.
				1312	* @ubi: UBI device description object
				1313	* @pnum: the physical eraseblock to schedule
				1314	*
				1315	* If a bit-flip in a physical eraseblock is detected, this physical eraseblock
				1316	* needs scrubbing. This function schedules a physical eraseblock for
				1317	* scrubbing which is done in background. This function returns zero in case of
				1318	* success and a negative error code in case of failure.
				1319	*/
				1320	int ubi_wl_scrub_peb(struct ubi_device *ubi, int pnum)
				1321	{
				1322	struct ubi_wl_entry *e;
				1323
				1324	ubi_msg(ubi, "schedule PEB %d for scrubbing", pnum);
				1325
				1326	retry:
				1327	spin_lock(&ubi->wl_lock);
				1328	e = ubi->lookuptbl[pnum];
				1329	if (e == ubi->move_from \|\| in_wl_tree(e, &ubi->scrub) \|\|
				1330	in_wl_tree(e, &ubi->erroneous)) {
				1331	spin_unlock(&ubi->wl_lock);
				1332	return 0;
				1333	}
				1334
				1335	if (e == ubi->move_to) {
				1336	/*
				1337	* This physical eraseblock was used to move data to. The data
				1338	* was moved but the PEB was not yet inserted to the proper
				1339	* tree. We should just wait a little and let the WL worker
				1340	* proceed.
				1341	*/
				1342	spin_unlock(&ubi->wl_lock);
				1343	dbg_wl("the PEB %d is not in proper tree, retry", pnum);
				1344	yield();
				1345	goto retry;
				1346	}
				1347
				1348	if (in_wl_tree(e, &ubi->used)) {
				1349	self_check_in_wl_tree(ubi, e, &ubi->used);
				1350	rb_erase(&e->u.rb, &ubi->used);
				1351	} else {
				1352	int err;
				1353
				1354	err = prot_queue_del(ubi, e->pnum);
				1355	if (err) {
				1356	ubi_err(ubi, "PEB %d not found", pnum);
				1357	ubi_ro_mode(ubi);
				1358	spin_unlock(&ubi->wl_lock);
				1359	return err;
				1360	}
				1361	}
				1362
				1363	wl_tree_add(e, &ubi->scrub);
				1364	spin_unlock(&ubi->wl_lock);
				1365
				1366	/*
				1367	* Technically scrubbing is the same as wear-leveling, so it is done
				1368	* by the WL worker.
				1369	*/
				1370	return ensure_wear_leveling(ubi, 0);
				1371	}
				1372
				1373	/**
				1374	* ubi_wl_flush - flush all pending works.
				1375	* @ubi: UBI device description object
				1376	* @vol_id: the volume id to flush for
				1377	* @lnum: the logical eraseblock number to flush for
				1378	*
				1379	* This function executes all pending works for a particular volume id /
				1380	* logical eraseblock number pair. If either value is set to %UBI_ALL, then it
				1381	* acts as a wildcard for all of the corresponding volume numbers or logical
				1382	* eraseblock numbers. It returns zero in case of success and a negative error
				1383	* code in case of failure.
				1384	*/
				1385	int ubi_wl_flush(struct ubi_device *ubi, int vol_id, int lnum)
				1386	{
				1387	int err = 0;
				1388	int found = 1;
				1389
				1390	/*
				1391	* Erase while the pending works queue is not empty, but not more than
				1392	* the number of currently pending works.
				1393	*/
				1394	dbg_wl("flush pending work for LEB %d:%d (%d pending works)",
				1395	vol_id, lnum, ubi->works_count);
				1396
				1397	while (found) {
				1398	struct ubi_work wrk, tmp;
				1399	found = 0;
				1400
				1401	down_read(&ubi->work_sem);
				1402	spin_lock(&ubi->wl_lock);
				1403	list_for_each_entry_safe(wrk, tmp, &ubi->works, list) {
				1404	if ((vol_id == UBI_ALL \|\| wrk->vol_id == vol_id) &&
				1405	(lnum == UBI_ALL \|\| wrk->lnum == lnum)) {
				1406	list_del(&wrk->list);
				1407	ubi->works_count -= 1;
				1408	ubi_assert(ubi->works_count >= 0);
				1409	spin_unlock(&ubi->wl_lock);
				1410
				1411	err = wrk->func(ubi, wrk, 0);
				1412	if (err) {
				1413	up_read(&ubi->work_sem);
				1414	return err;
				1415	}
				1416
				1417	spin_lock(&ubi->wl_lock);
				1418	found = 1;
				1419	break;
				1420	}
				1421	}
				1422	spin_unlock(&ubi->wl_lock);
				1423	up_read(&ubi->work_sem);
				1424	}
				1425
				1426	/*
				1427	* Make sure all the works which have been done in parallel are
				1428	* finished.
				1429	*/
				1430	down_write(&ubi->work_sem);
				1431	up_write(&ubi->work_sem);
				1432
				1433	return err;
				1434	}
				1435
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame^]	1436	static bool scrub_possible(struct ubi_device ubi, struct ubi_wl_entry e)
				1437	{
				1438	if (in_wl_tree(e, &ubi->scrub))
				1439	return false;
				1440	else if (in_wl_tree(e, &ubi->erroneous))
				1441	return false;
				1442	else if (ubi->move_from == e)
				1443	return false;
				1444	else if (ubi->move_to == e)
				1445	return false;
				1446
				1447	return true;
				1448	}
				1449
				1450	/**
				1451	* ubi_bitflip_check - Check an eraseblock for bitflips and scrub it if needed.
				1452	* @ubi: UBI device description object
				1453	* @pnum: the physical eraseblock to schedule
				1454	* @force: dont't read the block, assume bitflips happened and take action.
				1455	*
				1456	* This function reads the given eraseblock and checks if bitflips occured.
				1457	* In case of bitflips, the eraseblock is scheduled for scrubbing.
				1458	* If scrubbing is forced with @force, the eraseblock is not read,
				1459	* but scheduled for scrubbing right away.
				1460	*
				1461	* Returns:
				1462	* %EINVAL, PEB is out of range
				1463	* %ENOENT, PEB is no longer used by UBI
				1464	* %EBUSY, PEB cannot be checked now or a check is currently running on it
				1465	* %EAGAIN, bit flips happened but scrubbing is currently not possible
				1466	* %EUCLEAN, bit flips happened and PEB is scheduled for scrubbing
				1467	* %0, no bit flips detected
				1468	*/
				1469	int ubi_bitflip_check(struct ubi_device *ubi, int pnum, int force)
				1470	{
				1471	int err = 0;
				1472	struct ubi_wl_entry *e;
				1473
				1474	if (pnum < 0 \|\| pnum >= ubi->peb_count) {
				1475	err = -EINVAL;
				1476	goto out;
				1477	}
				1478
				1479	/*
				1480	* Pause all parallel work, otherwise it can happen that the
				1481	* erase worker frees a wl entry under us.
				1482	*/
				1483	down_write(&ubi->work_sem);
				1484
				1485	/*
				1486	* Make sure that the wl entry does not change state while
				1487	* inspecting it.
				1488	*/
				1489	spin_lock(&ubi->wl_lock);
				1490	e = ubi->lookuptbl[pnum];
				1491	if (!e) {
				1492	spin_unlock(&ubi->wl_lock);
				1493	err = -ENOENT;
				1494	goto out_resume;
				1495	}
				1496
				1497	/*
				1498	* Does it make sense to check this PEB?
				1499	*/
				1500	if (!scrub_possible(ubi, e)) {
				1501	spin_unlock(&ubi->wl_lock);
				1502	err = -EBUSY;
				1503	goto out_resume;
				1504	}
				1505	spin_unlock(&ubi->wl_lock);
				1506
				1507	if (!force) {
				1508	mutex_lock(&ubi->buf_mutex);
				1509	err = ubi_io_read(ubi, ubi->peb_buf, pnum, 0, ubi->peb_size);
				1510	mutex_unlock(&ubi->buf_mutex);
				1511	}
				1512
				1513	if (force \|\| err == UBI_IO_BITFLIPS) {
				1514	/*
				1515	* Okay, bit flip happened, let's figure out what we can do.
				1516	*/
				1517	spin_lock(&ubi->wl_lock);
				1518
				1519	/*
				1520	* Recheck. We released wl_lock, UBI might have killed the
				1521	* wl entry under us.
				1522	*/
				1523	e = ubi->lookuptbl[pnum];
				1524	if (!e) {
				1525	spin_unlock(&ubi->wl_lock);
				1526	err = -ENOENT;
				1527	goto out_resume;
				1528	}
				1529
				1530	/*
				1531	* Need to re-check state
				1532	*/
				1533	if (!scrub_possible(ubi, e)) {
				1534	spin_unlock(&ubi->wl_lock);
				1535	err = -EBUSY;
				1536	goto out_resume;
				1537	}
				1538
				1539	if (in_pq(ubi, e)) {
				1540	prot_queue_del(ubi, e->pnum);
				1541	wl_tree_add(e, &ubi->scrub);
				1542	spin_unlock(&ubi->wl_lock);
				1543
				1544	err = ensure_wear_leveling(ubi, 1);
				1545	} else if (in_wl_tree(e, &ubi->used)) {
				1546	rb_erase(&e->u.rb, &ubi->used);
				1547	wl_tree_add(e, &ubi->scrub);
				1548	spin_unlock(&ubi->wl_lock);
				1549
				1550	err = ensure_wear_leveling(ubi, 1);
				1551	} else if (in_wl_tree(e, &ubi->free)) {
				1552	rb_erase(&e->u.rb, &ubi->free);
				1553	ubi->free_count--;
				1554	spin_unlock(&ubi->wl_lock);
				1555
				1556	/*
				1557	* This PEB is empty we can schedule it for
				1558	* erasure right away. No wear leveling needed.
				1559	*/
				1560	err = schedule_erase(ubi, e, UBI_UNKNOWN, UBI_UNKNOWN,
				1561	force ? 0 : 1, true);
				1562	} else {
				1563	spin_unlock(&ubi->wl_lock);
				1564	err = -EAGAIN;
				1565	}
				1566
				1567	if (!err && !force)
				1568	err = -EUCLEAN;
				1569	} else {
				1570	err = 0;
				1571	}
				1572
				1573	out_resume:
				1574	up_write(&ubi->work_sem);
				1575	out:
				1576
				1577	return err;
				1578	}
				1579
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1580	/**
				1581	* tree_destroy - destroy an RB-tree.
				1582	* @ubi: UBI device description object
				1583	* @root: the root of the tree to destroy
				1584	*/
				1585	static void tree_destroy(struct ubi_device ubi, struct rb_root root)
				1586	{
				1587	struct rb_node *rb;
				1588	struct ubi_wl_entry *e;
				1589
				1590	rb = root->rb_node;
				1591	while (rb) {
				1592	if (rb->rb_left)
				1593	rb = rb->rb_left;
				1594	else if (rb->rb_right)
				1595	rb = rb->rb_right;
				1596	else {
				1597	e = rb_entry(rb, struct ubi_wl_entry, u.rb);
				1598
				1599	rb = rb_parent(rb);
				1600	if (rb) {
				1601	if (rb->rb_left == &e->u.rb)
				1602	rb->rb_left = NULL;
				1603	else
				1604	rb->rb_right = NULL;
				1605	}
				1606
				1607	wl_entry_destroy(ubi, e);
				1608	}
				1609	}
				1610	}
				1611
				1612	/**
				1613	* ubi_thread - UBI background thread.
				1614	* @u: the UBI device description object pointer
				1615	*/
				1616	int ubi_thread(void *u)
				1617	{
				1618	int failures = 0;
				1619	struct ubi_device *ubi = u;
				1620
				1621	ubi_msg(ubi, "background thread \"%s\" started, PID %d",
				1622	ubi->bgt_name, task_pid_nr(current));
				1623
				1624	set_freezable();
				1625	for (;;) {
				1626	int err;
				1627
				1628	if (kthread_should_stop())
				1629	break;
				1630
				1631	if (try_to_freeze())
				1632	continue;
				1633
				1634	spin_lock(&ubi->wl_lock);
				1635	if (list_empty(&ubi->works) \|\| ubi->ro_mode \|\|
				1636	!ubi->thread_enabled \|\| ubi_dbg_is_bgt_disabled(ubi)) {
				1637	set_current_state(TASK_INTERRUPTIBLE);
				1638	spin_unlock(&ubi->wl_lock);
				1639	schedule();
				1640	continue;
				1641	}
				1642	spin_unlock(&ubi->wl_lock);
				1643
				1644	err = do_work(ubi);
				1645	if (err) {
				1646	ubi_err(ubi, "%s: work failed with error code %d",
				1647	ubi->bgt_name, err);
				1648	if (failures++ > WL_MAX_FAILURES) {
				1649	/*
				1650	* Too many failures, disable the thread and
				1651	* switch to read-only mode.
				1652	*/
				1653	ubi_msg(ubi, "%s: %d consecutive failures",
				1654	ubi->bgt_name, WL_MAX_FAILURES);
				1655	ubi_ro_mode(ubi);
				1656	ubi->thread_enabled = 0;
				1657	continue;
				1658	}
				1659	} else
				1660	failures = 0;
				1661
				1662	cond_resched();
				1663	}
				1664
				1665	dbg_wl("background thread \"%s\" is killed", ubi->bgt_name);
				1666	ubi->thread_enabled = 0;
				1667	return 0;
				1668	}
				1669
				1670	/**
				1671	* shutdown_work - shutdown all pending works.
				1672	* @ubi: UBI device description object
				1673	*/
				1674	static void shutdown_work(struct ubi_device *ubi)
				1675	{
				1676	while (!list_empty(&ubi->works)) {
				1677	struct ubi_work *wrk;
				1678
				1679	wrk = list_entry(ubi->works.next, struct ubi_work, list);
				1680	list_del(&wrk->list);
				1681	wrk->func(ubi, wrk, 1);
				1682	ubi->works_count -= 1;
				1683	ubi_assert(ubi->works_count >= 0);
				1684	}
				1685	}
				1686
				1687	/**
				1688	* erase_aeb - erase a PEB given in UBI attach info PEB
				1689	* @ubi: UBI device description object
				1690	* @aeb: UBI attach info PEB
				1691	* @sync: If true, erase synchronously. Otherwise schedule for erasure
				1692	*/
				1693	static int erase_aeb(struct ubi_device ubi, struct ubi_ainf_peb aeb, bool sync)
				1694	{
				1695	struct ubi_wl_entry *e;
				1696	int err;
				1697
				1698	e = kmem_cache_alloc(ubi_wl_entry_slab, GFP_KERNEL);
				1699	if (!e)
				1700	return -ENOMEM;
				1701
				1702	e->pnum = aeb->pnum;
				1703	e->ec = aeb->ec;
				1704	ubi->lookuptbl[e->pnum] = e;
				1705
				1706	if (sync) {
				1707	err = sync_erase(ubi, e, false);
				1708	if (err)
				1709	goto out_free;
				1710
				1711	wl_tree_add(e, &ubi->free);
				1712	ubi->free_count++;
				1713	} else {
				1714	err = schedule_erase(ubi, e, aeb->vol_id, aeb->lnum, 0, false);
				1715	if (err)
				1716	goto out_free;
				1717	}
				1718
				1719	return 0;
				1720
				1721	out_free:
				1722	wl_entry_destroy(ubi, e);
				1723
				1724	return err;
				1725	}
				1726
				1727	/**
				1728	* ubi_wl_init - initialize the WL sub-system using attaching information.
				1729	* @ubi: UBI device description object
				1730	* @ai: attaching information
				1731	*
				1732	* This function returns zero in case of success, and a negative error code in
				1733	* case of failure.
				1734	*/
				1735	int ubi_wl_init(struct ubi_device ubi, struct ubi_attach_info ai)
				1736	{
				1737	int err, i, reserved_pebs, found_pebs = 0;
				1738	struct rb_node rb1, rb2;
				1739	struct ubi_ainf_volume *av;
				1740	struct ubi_ainf_peb aeb, tmp;
				1741	struct ubi_wl_entry *e;
				1742
				1743	ubi->used = ubi->erroneous = ubi->free = ubi->scrub = RB_ROOT;
				1744	spin_lock_init(&ubi->wl_lock);
				1745	mutex_init(&ubi->move_mutex);
				1746	init_rwsem(&ubi->work_sem);
				1747	ubi->max_ec = ai->max_ec;
				1748	INIT_LIST_HEAD(&ubi->works);
				1749
				1750	sprintf(ubi->bgt_name, UBI_BGT_NAME_PATTERN, ubi->ubi_num);
				1751
				1752	err = -ENOMEM;
				1753	ubi->lookuptbl = kcalloc(ubi->peb_count, sizeof(void *), GFP_KERNEL);
				1754	if (!ubi->lookuptbl)
				1755	return err;
				1756
				1757	for (i = 0; i < UBI_PROT_QUEUE_LEN; i++)
				1758	INIT_LIST_HEAD(&ubi->pq[i]);
				1759	ubi->pq_head = 0;
				1760
				1761	ubi->free_count = 0;
				1762	list_for_each_entry_safe(aeb, tmp, &ai->erase, u.list) {
				1763	cond_resched();
				1764
				1765	err = erase_aeb(ubi, aeb, false);
				1766	if (err)
				1767	goto out_free;
				1768
				1769	found_pebs++;
				1770	}
				1771
				1772	list_for_each_entry(aeb, &ai->free, u.list) {
				1773	cond_resched();
				1774
				1775	e = kmem_cache_alloc(ubi_wl_entry_slab, GFP_KERNEL);
				1776	if (!e) {
				1777	err = -ENOMEM;
				1778	goto out_free;
				1779	}
				1780
				1781	e->pnum = aeb->pnum;
				1782	e->ec = aeb->ec;
				1783	ubi_assert(e->ec >= 0);
				1784
				1785	wl_tree_add(e, &ubi->free);
				1786	ubi->free_count++;
				1787
				1788	ubi->lookuptbl[e->pnum] = e;
				1789
				1790	found_pebs++;
				1791	}
				1792
				1793	ubi_rb_for_each_entry(rb1, av, &ai->volumes, rb) {
				1794	ubi_rb_for_each_entry(rb2, aeb, &av->root, u.rb) {
				1795	cond_resched();
				1796
				1797	e = kmem_cache_alloc(ubi_wl_entry_slab, GFP_KERNEL);
				1798	if (!e) {
				1799	err = -ENOMEM;
				1800	goto out_free;
				1801	}
				1802
				1803	e->pnum = aeb->pnum;
				1804	e->ec = aeb->ec;
				1805	ubi->lookuptbl[e->pnum] = e;
				1806
				1807	if (!aeb->scrub) {
				1808	dbg_wl("add PEB %d EC %d to the used tree",
				1809	e->pnum, e->ec);
				1810	wl_tree_add(e, &ubi->used);
				1811	} else {
				1812	dbg_wl("add PEB %d EC %d to the scrub tree",
				1813	e->pnum, e->ec);
				1814	wl_tree_add(e, &ubi->scrub);
				1815	}
				1816
				1817	found_pebs++;
				1818	}
				1819	}
				1820
				1821	list_for_each_entry(aeb, &ai->fastmap, u.list) {
				1822	cond_resched();
				1823
				1824	e = ubi_find_fm_block(ubi, aeb->pnum);
				1825
				1826	if (e) {
				1827	ubi_assert(!ubi->lookuptbl[e->pnum]);
				1828	ubi->lookuptbl[e->pnum] = e;
				1829	} else {
				1830	bool sync = false;
				1831
				1832	/*
				1833	* Usually old Fastmap PEBs are scheduled for erasure
				1834	* and we don't have to care about them but if we face
				1835	* an power cut before scheduling them we need to
				1836	* take care of them here.
				1837	*/
				1838	if (ubi->lookuptbl[aeb->pnum])
				1839	continue;
				1840
				1841	/*
				1842	* The fastmap update code might not find a free PEB for
				1843	* writing the fastmap anchor to and then reuses the
				1844	* current fastmap anchor PEB. When this PEB gets erased
				1845	* and a power cut happens before it is written again we
				1846	* must make sure that the fastmap attach code doesn't
				1847	* find any outdated fastmap anchors, hence we erase the
				1848	* outdated fastmap anchor PEBs synchronously here.
				1849	*/
				1850	if (aeb->vol_id == UBI_FM_SB_VOLUME_ID)
				1851	sync = true;
				1852
				1853	err = erase_aeb(ubi, aeb, sync);
				1854	if (err)
				1855	goto out_free;
				1856	}
				1857
				1858	found_pebs++;
				1859	}
				1860
				1861	dbg_wl("found %i PEBs", found_pebs);
				1862
				1863	ubi_assert(ubi->good_peb_count == found_pebs);
				1864
				1865	reserved_pebs = WL_RESERVED_PEBS;
				1866	ubi_fastmap_init(ubi, &reserved_pebs);
				1867
				1868	if (ubi->avail_pebs < reserved_pebs) {
				1869	ubi_err(ubi, "no enough physical eraseblocks (%d, need %d)",
				1870	ubi->avail_pebs, reserved_pebs);
				1871	if (ubi->corr_peb_count)
				1872	ubi_err(ubi, "%d PEBs are corrupted and not used",
				1873	ubi->corr_peb_count);
				1874	err = -ENOSPC;
				1875	goto out_free;
				1876	}
				1877	ubi->avail_pebs -= reserved_pebs;
				1878	ubi->rsvd_pebs += reserved_pebs;
				1879
				1880	/* Schedule wear-leveling if needed */
				1881	err = ensure_wear_leveling(ubi, 0);
				1882	if (err)
				1883	goto out_free;
				1884
				1885	return 0;
				1886
				1887	out_free:
				1888	shutdown_work(ubi);
				1889	tree_destroy(ubi, &ubi->used);
				1890	tree_destroy(ubi, &ubi->free);
				1891	tree_destroy(ubi, &ubi->scrub);
				1892	kfree(ubi->lookuptbl);
				1893	return err;
				1894	}
				1895
				1896	/**
				1897	* protection_queue_destroy - destroy the protection queue.
				1898	* @ubi: UBI device description object
				1899	*/
				1900	static void protection_queue_destroy(struct ubi_device *ubi)
				1901	{
				1902	int i;
				1903	struct ubi_wl_entry e, tmp;
				1904
				1905	for (i = 0; i < UBI_PROT_QUEUE_LEN; ++i) {
				1906	list_for_each_entry_safe(e, tmp, &ubi->pq[i], u.list) {
				1907	list_del(&e->u.list);
				1908	wl_entry_destroy(ubi, e);
				1909	}
				1910	}
				1911	}
				1912
				1913	/**
				1914	* ubi_wl_close - close the wear-leveling sub-system.
				1915	* @ubi: UBI device description object
				1916	*/
				1917	void ubi_wl_close(struct ubi_device *ubi)
				1918	{
				1919	dbg_wl("close the WL sub-system");
				1920	ubi_fastmap_close(ubi);
				1921	shutdown_work(ubi);
				1922	protection_queue_destroy(ubi);
				1923	tree_destroy(ubi, &ubi->used);
				1924	tree_destroy(ubi, &ubi->erroneous);
				1925	tree_destroy(ubi, &ubi->free);
				1926	tree_destroy(ubi, &ubi->scrub);
				1927	kfree(ubi->lookuptbl);
				1928	}
				1929
				1930	/**
				1931	* self_check_ec - make sure that the erase counter of a PEB is correct.
				1932	* @ubi: UBI device description object
				1933	* @pnum: the physical eraseblock number to check
				1934	* @ec: the erase counter to check
				1935	*
				1936	* This function returns zero if the erase counter of physical eraseblock @pnum
				1937	* is equivalent to @ec, and a negative error code if not or if an error
				1938	* occurred.
				1939	*/
				1940	static int self_check_ec(struct ubi_device *ubi, int pnum, int ec)
				1941	{
				1942	int err;
				1943	long long read_ec;
				1944	struct ubi_ec_hdr *ec_hdr;
				1945
				1946	if (!ubi_dbg_chk_gen(ubi))
				1947	return 0;
				1948
				1949	ec_hdr = kzalloc(ubi->ec_hdr_alsize, GFP_NOFS);
				1950	if (!ec_hdr)
				1951	return -ENOMEM;
				1952
				1953	err = ubi_io_read_ec_hdr(ubi, pnum, ec_hdr, 0);
				1954	if (err && err != UBI_IO_BITFLIPS) {
				1955	/* The header does not have to exist */
				1956	err = 0;
				1957	goto out_free;
				1958	}
				1959
				1960	read_ec = be64_to_cpu(ec_hdr->ec);
				1961	if (ec != read_ec && read_ec - ec > 1) {
				1962	ubi_err(ubi, "self-check failed for PEB %d", pnum);
				1963	ubi_err(ubi, "read EC is %lld, should be %d", read_ec, ec);
				1964	dump_stack();
				1965	err = 1;
				1966	} else
				1967	err = 0;
				1968
				1969	out_free:
				1970	kfree(ec_hdr);
				1971	return err;
				1972	}
				1973
				1974	/**
				1975	* self_check_in_wl_tree - check that wear-leveling entry is in WL RB-tree.
				1976	* @ubi: UBI device description object
				1977	* @e: the wear-leveling entry to check
				1978	* @root: the root of the tree
				1979	*
				1980	* This function returns zero if @e is in the @root RB-tree and %-EINVAL if it
				1981	* is not.
				1982	*/
				1983	static int self_check_in_wl_tree(const struct ubi_device *ubi,
				1984	struct ubi_wl_entry e, struct rb_root root)
				1985	{
				1986	if (!ubi_dbg_chk_gen(ubi))
				1987	return 0;
				1988
				1989	if (in_wl_tree(e, root))
				1990	return 0;
				1991
				1992	ubi_err(ubi, "self-check failed for PEB %d, EC %d, RB-tree %p ",
				1993	e->pnum, e->ec, root);
				1994	dump_stack();
				1995	return -EINVAL;
				1996	}
				1997
				1998	/**
				1999	* self_check_in_pq - check if wear-leveling entry is in the protection
				2000	* queue.
				2001	* @ubi: UBI device description object
				2002	* @e: the wear-leveling entry to check
				2003	*
				2004	* This function returns zero if @e is in @ubi->pq and %-EINVAL if it is not.
				2005	*/
				2006	static int self_check_in_pq(const struct ubi_device *ubi,
				2007	struct ubi_wl_entry *e)
				2008	{
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	2009	if (!ubi_dbg_chk_gen(ubi))
				2010	return 0;
				2011
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame^]	2012	if (in_pq(ubi, e))
				2013	return 0;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	2014
				2015	ubi_err(ubi, "self-check failed for PEB %d, EC %d, Protect queue",
				2016	e->pnum, e->ec);
				2017	dump_stack();
				2018	return -EINVAL;
				2019	}
				2020	#ifndef CONFIG_MTD_UBI_FASTMAP
				2021	static struct ubi_wl_entry get_peb_for_wl(struct ubi_device ubi)
				2022	{
				2023	struct ubi_wl_entry *e;
				2024
				2025	e = find_wl_entry(ubi, &ubi->free, WL_FREE_MAX_DIFF);
				2026	self_check_in_wl_tree(ubi, e, &ubi->free);
				2027	ubi->free_count--;
				2028	ubi_assert(ubi->free_count >= 0);
				2029	rb_erase(&e->u.rb, &ubi->free);
				2030
				2031	return e;
				2032	}
				2033
				2034	/**
				2035	* produce_free_peb - produce a free physical eraseblock.
				2036	* @ubi: UBI device description object
				2037	*
				2038	* This function tries to make a free PEB by means of synchronous execution of
				2039	* pending works. This may be needed if, for example the background thread is
				2040	* disabled. Returns zero in case of success and a negative error code in case
				2041	* of failure.
				2042	*/
				2043	static int produce_free_peb(struct ubi_device *ubi)
				2044	{
				2045	int err;
				2046
				2047	while (!ubi->free.rb_node && ubi->works_count) {
				2048	spin_unlock(&ubi->wl_lock);
				2049
				2050	dbg_wl("do one work synchronously");
				2051	err = do_work(ubi);
				2052
				2053	spin_lock(&ubi->wl_lock);
				2054	if (err)
				2055	return err;
				2056	}
				2057
				2058	return 0;
				2059	}
				2060
				2061	/**
				2062	* ubi_wl_get_peb - get a physical eraseblock.
				2063	* @ubi: UBI device description object
				2064	*
				2065	* This function returns a physical eraseblock in case of success and a
				2066	* negative error code in case of failure.
				2067	* Returns with ubi->fm_eba_sem held in read mode!
				2068	*/
				2069	int ubi_wl_get_peb(struct ubi_device *ubi)
				2070	{
				2071	int err;
				2072	struct ubi_wl_entry *e;
				2073
				2074	retry:
				2075	down_read(&ubi->fm_eba_sem);
				2076	spin_lock(&ubi->wl_lock);
				2077	if (!ubi->free.rb_node) {
				2078	if (ubi->works_count == 0) {
				2079	ubi_err(ubi, "no free eraseblocks");
				2080	ubi_assert(list_empty(&ubi->works));
				2081	spin_unlock(&ubi->wl_lock);
				2082	return -ENOSPC;
				2083	}
				2084
				2085	err = produce_free_peb(ubi);
				2086	if (err < 0) {
				2087	spin_unlock(&ubi->wl_lock);
				2088	return err;
				2089	}
				2090	spin_unlock(&ubi->wl_lock);
				2091	up_read(&ubi->fm_eba_sem);
				2092	goto retry;
				2093
				2094	}
				2095	e = wl_get_wle(ubi);
				2096	prot_queue_add(ubi, e);
				2097	spin_unlock(&ubi->wl_lock);
				2098
				2099	err = ubi_self_check_all_ff(ubi, e->pnum, ubi->vid_hdr_aloffset,
				2100	ubi->peb_size - ubi->vid_hdr_aloffset);
				2101	if (err) {
				2102	ubi_err(ubi, "new PEB %d does not contain all 0xFF bytes", e->pnum);
				2103	return err;
				2104	}
				2105
				2106	return e->pnum;
				2107	}
				2108	#else
				2109	#include "fastmap-wl.c"
				2110	#endif