Update Linux to v5.10.157
Sourced from [1]
[1] https://cdn.kernel.org/pub/linux/kernel/v5.x/linux-5.10.157.tar.xz
Signed-off-by: Olivier Deprez <olivier.deprez@arm.com>
Change-Id: I7b30d9e98d8c465d6b44de8e7433b4a40b3289ba
diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
index fe6dce1..b47c00d 100644
--- a/drivers/md/bcache/btree.c
+++ b/drivers/md/bcache/btree.c
@@ -2006,8 +2006,7 @@
int i;
struct bkey *k = NULL;
struct btree_iter iter;
- struct btree_check_state *check_state;
- char name[32];
+ struct btree_check_state check_state;
/* check and mark root node keys */
for_each_key_filter(&c->root->keys, k, &iter, bch_ptr_invalid)
@@ -2018,61 +2017,59 @@
if (c->root->level == 0)
return 0;
- check_state = kzalloc(sizeof(struct btree_check_state), GFP_KERNEL);
- if (!check_state)
- return -ENOMEM;
+ memset(&check_state, 0, sizeof(struct btree_check_state));
+ check_state.c = c;
+ check_state.total_threads = bch_btree_chkthread_nr();
+ check_state.key_idx = 0;
+ spin_lock_init(&check_state.idx_lock);
+ atomic_set(&check_state.started, 0);
+ atomic_set(&check_state.enough, 0);
+ init_waitqueue_head(&check_state.wait);
- check_state->c = c;
- check_state->total_threads = bch_btree_chkthread_nr();
- check_state->key_idx = 0;
- spin_lock_init(&check_state->idx_lock);
- atomic_set(&check_state->started, 0);
- atomic_set(&check_state->enough, 0);
- init_waitqueue_head(&check_state->wait);
-
+ rw_lock(0, c->root, c->root->level);
/*
* Run multiple threads to check btree nodes in parallel,
- * if check_state->enough is non-zero, it means current
+ * if check_state.enough is non-zero, it means current
* running check threads are enough, unncessary to create
* more.
*/
- for (i = 0; i < check_state->total_threads; i++) {
- /* fetch latest check_state->enough earlier */
+ for (i = 0; i < check_state.total_threads; i++) {
+ /* fetch latest check_state.enough earlier */
smp_mb__before_atomic();
- if (atomic_read(&check_state->enough))
+ if (atomic_read(&check_state.enough))
break;
- check_state->infos[i].result = 0;
- check_state->infos[i].state = check_state;
- snprintf(name, sizeof(name), "bch_btrchk[%u]", i);
- atomic_inc(&check_state->started);
+ check_state.infos[i].result = 0;
+ check_state.infos[i].state = &check_state;
- check_state->infos[i].thread =
+ check_state.infos[i].thread =
kthread_run(bch_btree_check_thread,
- &check_state->infos[i],
- name);
- if (IS_ERR(check_state->infos[i].thread)) {
+ &check_state.infos[i],
+ "bch_btrchk[%d]", i);
+ if (IS_ERR(check_state.infos[i].thread)) {
pr_err("fails to run thread bch_btrchk[%d]\n", i);
for (--i; i >= 0; i--)
- kthread_stop(check_state->infos[i].thread);
+ kthread_stop(check_state.infos[i].thread);
ret = -ENOMEM;
goto out;
}
+ atomic_inc(&check_state.started);
}
- wait_event_interruptible(check_state->wait,
- atomic_read(&check_state->started) == 0 ||
- test_bit(CACHE_SET_IO_DISABLE, &c->flags));
+ /*
+ * Must wait for all threads to stop.
+ */
+ wait_event(check_state.wait, atomic_read(&check_state.started) == 0);
- for (i = 0; i < check_state->total_threads; i++) {
- if (check_state->infos[i].result) {
- ret = check_state->infos[i].result;
+ for (i = 0; i < check_state.total_threads; i++) {
+ if (check_state.infos[i].result) {
+ ret = check_state.infos[i].result;
goto out;
}
}
out:
- kfree(check_state);
+ rw_unlock(0, c->root);
return ret;
}
diff --git a/drivers/md/bcache/btree.h b/drivers/md/bcache/btree.h
index 5048210..1b5fdbc 100644
--- a/drivers/md/bcache/btree.h
+++ b/drivers/md/bcache/btree.h
@@ -226,7 +226,7 @@
int result;
};
-#define BCH_BTR_CHKTHREAD_MAX 64
+#define BCH_BTR_CHKTHREAD_MAX 12
struct btree_check_state {
struct cache_set *c;
int total_threads;
diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c
index c6613e8..aea4833 100644
--- a/drivers/md/bcache/journal.c
+++ b/drivers/md/bcache/journal.c
@@ -407,6 +407,11 @@
return ret;
}
+void bch_journal_space_reserve(struct journal *j)
+{
+ j->do_reserve = true;
+}
+
/* Journalling */
static void btree_flush_write(struct cache_set *c)
@@ -625,12 +630,30 @@
}
}
+static unsigned int free_journal_buckets(struct cache_set *c)
+{
+ struct journal *j = &c->journal;
+ struct cache *ca = c->cache;
+ struct journal_device *ja = &c->cache->journal;
+ unsigned int n;
+
+ /* In case njournal_buckets is not power of 2 */
+ if (ja->cur_idx >= ja->discard_idx)
+ n = ca->sb.njournal_buckets + ja->discard_idx - ja->cur_idx;
+ else
+ n = ja->discard_idx - ja->cur_idx;
+
+ if (n > (1 + j->do_reserve))
+ return n - (1 + j->do_reserve);
+
+ return 0;
+}
+
static void journal_reclaim(struct cache_set *c)
{
struct bkey *k = &c->journal.key;
struct cache *ca = c->cache;
uint64_t last_seq;
- unsigned int next;
struct journal_device *ja = &ca->journal;
atomic_t p __maybe_unused;
@@ -653,12 +676,10 @@
if (c->journal.blocks_free)
goto out;
- next = (ja->cur_idx + 1) % ca->sb.njournal_buckets;
- /* No space available on this device */
- if (next == ja->discard_idx)
+ if (!free_journal_buckets(c))
goto out;
- ja->cur_idx = next;
+ ja->cur_idx = (ja->cur_idx + 1) % ca->sb.njournal_buckets;
k->ptr[0] = MAKE_PTR(0,
bucket_to_sector(c, ca->sb.d[ja->cur_idx]),
ca->sb.nr_this_dev);
diff --git a/drivers/md/bcache/journal.h b/drivers/md/bcache/journal.h
index f2ea34d..cd316b4 100644
--- a/drivers/md/bcache/journal.h
+++ b/drivers/md/bcache/journal.h
@@ -105,6 +105,7 @@
spinlock_t lock;
spinlock_t flush_write_lock;
bool btree_flushing;
+ bool do_reserve;
/* used when waiting because the journal was full */
struct closure_waitlist wait;
struct closure io;
@@ -182,5 +183,6 @@
void bch_journal_free(struct cache_set *c);
int bch_journal_alloc(struct cache_set *c);
+void bch_journal_space_reserve(struct journal *j);
#endif /* _BCACHE_JOURNAL_H */
diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
index 2143263..9789526 100644
--- a/drivers/md/bcache/request.c
+++ b/drivers/md/bcache/request.c
@@ -1109,6 +1109,12 @@
* which would call closure_get(&dc->disk.cl)
*/
ddip = kzalloc(sizeof(struct detached_dev_io_private), GFP_NOIO);
+ if (!ddip) {
+ bio->bi_status = BLK_STS_RESOURCE;
+ bio->bi_end_io(bio);
+ return;
+ }
+
ddip->d = d;
/* Count on the bcache device */
ddip->start_time = part_start_io_acct(d->disk, &ddip->part, bio);
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index 81f1cc5..7f5ea25 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -2150,6 +2150,7 @@
flash_devs_run(c);
+ bch_journal_space_reserve(&c->journal);
set_bit(CACHE_SET_RUNNING, &c->flags);
return 0;
err:
diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
index 3c74996..3aa73da 100644
--- a/drivers/md/bcache/writeback.c
+++ b/drivers/md/bcache/writeback.c
@@ -119,6 +119,53 @@
dc->writeback_rate_target = target;
}
+static bool idle_counter_exceeded(struct cache_set *c)
+{
+ int counter, dev_nr;
+
+ /*
+ * If c->idle_counter is overflow (idel for really long time),
+ * reset as 0 and not set maximum rate this time for code
+ * simplicity.
+ */
+ counter = atomic_inc_return(&c->idle_counter);
+ if (counter <= 0) {
+ atomic_set(&c->idle_counter, 0);
+ return false;
+ }
+
+ dev_nr = atomic_read(&c->attached_dev_nr);
+ if (dev_nr == 0)
+ return false;
+
+ /*
+ * c->idle_counter is increased by writeback thread of all
+ * attached backing devices, in order to represent a rough
+ * time period, counter should be divided by dev_nr.
+ * Otherwise the idle time cannot be larger with more backing
+ * device attached.
+ * The following calculation equals to checking
+ * (counter / dev_nr) < (dev_nr * 6)
+ */
+ if (counter < (dev_nr * dev_nr * 6))
+ return false;
+
+ return true;
+}
+
+/*
+ * Idle_counter is increased every time when update_writeback_rate() is
+ * called. If all backing devices attached to the same cache set have
+ * identical dc->writeback_rate_update_seconds values, it is about 6
+ * rounds of update_writeback_rate() on each backing device before
+ * c->at_max_writeback_rate is set to 1, and then max wrteback rate set
+ * to each dc->writeback_rate.rate.
+ * In order to avoid extra locking cost for counting exact dirty cached
+ * devices number, c->attached_dev_nr is used to calculate the idle
+ * throushold. It might be bigger if not all cached device are in write-
+ * back mode, but it still works well with limited extra rounds of
+ * update_writeback_rate().
+ */
static bool set_at_max_writeback_rate(struct cache_set *c,
struct cached_dev *dc)
{
@@ -129,21 +176,8 @@
/* Don't set max writeback rate if gc is running */
if (!c->gc_mark_valid)
return false;
- /*
- * Idle_counter is increased everytime when update_writeback_rate() is
- * called. If all backing devices attached to the same cache set have
- * identical dc->writeback_rate_update_seconds values, it is about 6
- * rounds of update_writeback_rate() on each backing device before
- * c->at_max_writeback_rate is set to 1, and then max wrteback rate set
- * to each dc->writeback_rate.rate.
- * In order to avoid extra locking cost for counting exact dirty cached
- * devices number, c->attached_dev_nr is used to calculate the idle
- * throushold. It might be bigger if not all cached device are in write-
- * back mode, but it still works well with limited extra rounds of
- * update_writeback_rate().
- */
- if (atomic_inc_return(&c->idle_counter) <
- atomic_read(&c->attached_dev_nr) * 6)
+
+ if (!idle_counter_exceeded(c))
return false;
if (atomic_read(&c->at_max_writeback_rate) != 1)
@@ -157,13 +191,10 @@
dc->writeback_rate_change = 0;
/*
- * Check c->idle_counter and c->at_max_writeback_rate agagain in case
- * new I/O arrives during before set_at_max_writeback_rate() returns.
- * Then the writeback rate is set to 1, and its new value should be
- * decided via __update_writeback_rate().
+ * In case new I/O arrives during before
+ * set_at_max_writeback_rate() returns.
*/
- if ((atomic_read(&c->idle_counter) <
- atomic_read(&c->attached_dev_nr) * 6) ||
+ if (!idle_counter_exceeded(c) ||
!atomic_read(&c->at_max_writeback_rate))
return false;
@@ -756,13 +787,11 @@
/* Init */
#define INIT_KEYS_EACH_TIME 500000
-#define INIT_KEYS_SLEEP_MS 100
struct sectors_dirty_init {
struct btree_op op;
unsigned int inode;
size_t count;
- struct bkey start;
};
static int sectors_dirty_init_fn(struct btree_op *_op, struct btree *b,
@@ -778,11 +807,8 @@
KEY_START(k), KEY_SIZE(k));
op->count++;
- if (atomic_read(&b->c->search_inflight) &&
- !(op->count % INIT_KEYS_EACH_TIME)) {
- bkey_copy_key(&op->start, k);
- return -EAGAIN;
- }
+ if (!(op->count % INIT_KEYS_EACH_TIME))
+ cond_resched();
return MAP_CONTINUE;
}
@@ -797,24 +823,16 @@
bch_btree_op_init(&op.op, -1);
op.inode = d->id;
op.count = 0;
- op.start = KEY(op.inode, 0, 0);
- do {
- ret = bcache_btree(map_keys_recurse,
- k,
- c->root,
- &op.op,
- &op.start,
- sectors_dirty_init_fn,
- 0);
- if (ret == -EAGAIN)
- schedule_timeout_interruptible(
- msecs_to_jiffies(INIT_KEYS_SLEEP_MS));
- else if (ret < 0) {
- pr_warn("sectors dirty init failed, ret=%d!\n", ret);
- break;
- }
- } while (ret == -EAGAIN);
+ ret = bcache_btree(map_keys_recurse,
+ k,
+ c->root,
+ &op.op,
+ &KEY(op.inode, 0, 0),
+ sectors_dirty_init_fn,
+ 0);
+ if (ret < 0)
+ pr_warn("sectors dirty init failed, ret=%d!\n", ret);
return ret;
}
@@ -858,7 +876,6 @@
goto out;
}
skip_nr--;
- cond_resched();
}
if (p) {
@@ -868,7 +885,6 @@
p = NULL;
prev_idx = cur_idx;
- cond_resched();
}
out:
@@ -899,65 +915,56 @@
struct btree_iter iter;
struct sectors_dirty_init op;
struct cache_set *c = d->c;
- struct bch_dirty_init_state *state;
- char name[32];
+ struct bch_dirty_init_state state;
/* Just count root keys if no leaf node */
+ rw_lock(0, c->root, c->root->level);
if (c->root->level == 0) {
bch_btree_op_init(&op.op, -1);
op.inode = d->id;
op.count = 0;
- op.start = KEY(op.inode, 0, 0);
for_each_key_filter(&c->root->keys,
k, &iter, bch_ptr_invalid)
sectors_dirty_init_fn(&op.op, c->root, k);
+
+ rw_unlock(0, c->root);
return;
}
- state = kzalloc(sizeof(struct bch_dirty_init_state), GFP_KERNEL);
- if (!state) {
- pr_warn("sectors dirty init failed: cannot allocate memory\n");
- return;
- }
+ memset(&state, 0, sizeof(struct bch_dirty_init_state));
+ state.c = c;
+ state.d = d;
+ state.total_threads = bch_btre_dirty_init_thread_nr();
+ state.key_idx = 0;
+ spin_lock_init(&state.idx_lock);
+ atomic_set(&state.started, 0);
+ atomic_set(&state.enough, 0);
+ init_waitqueue_head(&state.wait);
- state->c = c;
- state->d = d;
- state->total_threads = bch_btre_dirty_init_thread_nr();
- state->key_idx = 0;
- spin_lock_init(&state->idx_lock);
- atomic_set(&state->started, 0);
- atomic_set(&state->enough, 0);
- init_waitqueue_head(&state->wait);
-
- for (i = 0; i < state->total_threads; i++) {
- /* Fetch latest state->enough earlier */
+ for (i = 0; i < state.total_threads; i++) {
+ /* Fetch latest state.enough earlier */
smp_mb__before_atomic();
- if (atomic_read(&state->enough))
+ if (atomic_read(&state.enough))
break;
- state->infos[i].state = state;
- atomic_inc(&state->started);
- snprintf(name, sizeof(name), "bch_dirty_init[%d]", i);
-
- state->infos[i].thread =
- kthread_run(bch_dirty_init_thread,
- &state->infos[i],
- name);
- if (IS_ERR(state->infos[i].thread)) {
+ state.infos[i].state = &state;
+ state.infos[i].thread =
+ kthread_run(bch_dirty_init_thread, &state.infos[i],
+ "bch_dirtcnt[%d]", i);
+ if (IS_ERR(state.infos[i].thread)) {
pr_err("fails to run thread bch_dirty_init[%d]\n", i);
for (--i; i >= 0; i--)
- kthread_stop(state->infos[i].thread);
+ kthread_stop(state.infos[i].thread);
goto out;
}
+ atomic_inc(&state.started);
}
- wait_event_interruptible(state->wait,
- atomic_read(&state->started) == 0 ||
- test_bit(CACHE_SET_IO_DISABLE, &c->flags));
-
out:
- kfree(state);
+ /* Must wait for all threads to stop. */
+ wait_event(state.wait, atomic_read(&state.started) == 0);
+ rw_unlock(0, c->root);
}
void bch_cached_dev_writeback_init(struct cached_dev *dc)
diff --git a/drivers/md/bcache/writeback.h b/drivers/md/bcache/writeback.h
index 3f1230e..0f1d969 100644
--- a/drivers/md/bcache/writeback.h
+++ b/drivers/md/bcache/writeback.h
@@ -16,7 +16,7 @@
#define BCH_AUTO_GC_DIRTY_THRESHOLD 50
-#define BCH_DIRTY_INIT_THRD_MAX 64
+#define BCH_DIRTY_INIT_THRD_MAX 12
/*
* 14 (16384ths) is chosen here as something that each backing device
* should be a reasonable fraction of the share, and not to blow up
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index 2aa4acd..3d975db 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -2561,7 +2561,7 @@
static int get_key_size(char **key_string)
{
- return (*key_string[0] == ':') ? -EINVAL : strlen(*key_string) >> 1;
+ return (*key_string[0] == ':') ? -EINVAL : (int)(strlen(*key_string) >> 1);
}
#endif /* CONFIG_KEYS */
@@ -3404,6 +3404,11 @@
return DM_MAPIO_SUBMITTED;
}
+static char hex2asc(unsigned char c)
+{
+ return c + '0' + ((unsigned)(9 - c) >> 4 & 0x27);
+}
+
static void crypt_status(struct dm_target *ti, status_type_t type,
unsigned status_flags, char *result, unsigned maxlen)
{
@@ -3422,9 +3427,12 @@
if (cc->key_size > 0) {
if (cc->key_string)
DMEMIT(":%u:%s", cc->key_size, cc->key_string);
- else
- for (i = 0; i < cc->key_size; i++)
- DMEMIT("%02x", cc->key[i]);
+ else {
+ for (i = 0; i < cc->key_size; i++) {
+ DMEMIT("%c%c", hex2asc(cc->key[i] >> 4),
+ hex2asc(cc->key[i] & 0xf));
+ }
+ }
} else
DMEMIT("-");
diff --git a/drivers/md/dm-era-target.c b/drivers/md/dm-era-target.c
index d9ac737..96bad05 100644
--- a/drivers/md/dm-era-target.c
+++ b/drivers/md/dm-era-target.c
@@ -1396,7 +1396,7 @@
static void stop_worker(struct era *era)
{
atomic_set(&era->suspended, 1);
- flush_workqueue(era->wq);
+ drain_workqueue(era->wq);
}
/*----------------------------------------------------------------
@@ -1566,6 +1566,12 @@
}
stop_worker(era);
+
+ r = metadata_commit(era->md);
+ if (r) {
+ DMERR("%s: metadata_commit failed", __func__);
+ /* FIXME: fail mode */
+ }
}
static int era_preresume(struct dm_target *ti)
diff --git a/drivers/md/dm-historical-service-time.c b/drivers/md/dm-historical-service-time.c
index 186f91e..06fe43c 100644
--- a/drivers/md/dm-historical-service-time.c
+++ b/drivers/md/dm-historical-service-time.c
@@ -429,7 +429,7 @@
{
struct selector *s = ps->context;
struct path_info *pi = NULL, *best = NULL;
- u64 time_now = sched_clock();
+ u64 time_now = ktime_get_ns();
struct dm_path *ret = NULL;
unsigned long flags;
@@ -470,7 +470,7 @@
static u64 path_service_time(struct path_info *pi, u64 start_time)
{
- u64 sched_now = ktime_get_ns();
+ u64 now = ktime_get_ns();
/* if a previous disk request has finished after this IO was
* sent to the hardware, pretend the submission happened
@@ -479,11 +479,11 @@
if (time_after64(pi->last_finish, start_time))
start_time = pi->last_finish;
- pi->last_finish = sched_now;
- if (time_before64(sched_now, start_time))
+ pi->last_finish = now;
+ if (time_before64(now, start_time))
return 0;
- return sched_now - start_time;
+ return now - start_time;
}
static int hst_end_io(struct path_selector *ps, struct dm_path *path,
diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c
index 4c7da1c..2156a2d 100644
--- a/drivers/md/dm-integrity.c
+++ b/drivers/md/dm-integrity.c
@@ -254,6 +254,7 @@
struct completion crypto_backoff;
+ bool wrote_to_journal;
bool journal_uptodate;
bool just_formatted;
bool recalculate_flag;
@@ -2256,6 +2257,8 @@
if (!commit_sections)
goto release_flush_bios;
+ ic->wrote_to_journal = true;
+
i = commit_start;
for (n = 0; n < commit_sections; n++) {
for (j = 0; j < ic->journal_section_entries; j++) {
@@ -2354,9 +2357,11 @@
dm_integrity_io_error(ic, "invalid sector in journal", -EIO);
sec &= ~(sector_t)(ic->sectors_per_block - 1);
}
+ if (unlikely(sec >= ic->provided_data_sectors)) {
+ journal_entry_set_unused(je);
+ continue;
+ }
}
- if (unlikely(sec >= ic->provided_data_sectors))
- continue;
get_area_and_offset(ic, sec, &area, &offset);
restore_last_bytes(ic, access_journal_data(ic, i, j), je);
for (k = j + 1; k < ic->journal_section_entries; k++) {
@@ -2468,10 +2473,6 @@
unsigned prev_free_sectors;
- /* the following test is not needed, but it tests the replay code */
- if (unlikely(dm_post_suspending(ic->ti)) && !ic->meta_dev)
- return;
-
spin_lock_irq(&ic->endio_wait.lock);
write_start = ic->committed_section;
write_sections = ic->n_committed_sections;
@@ -2978,10 +2979,17 @@
drain_workqueue(ic->commit_wq);
if (ic->mode == 'J') {
- if (ic->meta_dev)
- queue_work(ic->writer_wq, &ic->writer_work);
+ queue_work(ic->writer_wq, &ic->writer_work);
drain_workqueue(ic->writer_wq);
dm_integrity_flush_buffers(ic, true);
+ if (ic->wrote_to_journal) {
+ init_journal(ic, ic->free_section,
+ ic->journal_sections - ic->free_section, ic->commit_seq);
+ if (ic->free_section) {
+ init_journal(ic, 0, ic->free_section,
+ next_commit_seq(ic->commit_seq));
+ }
+ }
}
if (ic->mode == 'B') {
@@ -3009,6 +3017,8 @@
DEBUG_print("resume\n");
+ ic->wrote_to_journal = false;
+
if (ic->provided_data_sectors != old_provided_data_sectors) {
if (ic->provided_data_sectors > old_provided_data_sectors &&
ic->mode == 'B' &&
@@ -4230,6 +4240,7 @@
}
if (ic->internal_hash) {
+ size_t recalc_tags_size;
ic->recalc_wq = alloc_workqueue("dm-integrity-recalc", WQ_MEM_RECLAIM, 1);
if (!ic->recalc_wq ) {
ti->error = "Cannot allocate workqueue";
@@ -4243,8 +4254,10 @@
r = -ENOMEM;
goto bad;
}
- ic->recalc_tags = kvmalloc_array(RECALC_SECTORS >> ic->sb->log2_sectors_per_block,
- ic->tag_size, GFP_KERNEL);
+ recalc_tags_size = (RECALC_SECTORS >> ic->sb->log2_sectors_per_block) * ic->tag_size;
+ if (crypto_shash_digestsize(ic->internal_hash) > ic->tag_size)
+ recalc_tags_size += crypto_shash_digestsize(ic->internal_hash) - ic->tag_size;
+ ic->recalc_tags = kvmalloc(recalc_tags_size, GFP_KERNEL);
if (!ic->recalc_tags) {
ti->error = "Cannot allocate tags for recalculating";
r = -ENOMEM;
@@ -4322,8 +4335,6 @@
}
if (should_write_sb) {
- int r;
-
init_journal(ic, 0, ic->journal_sections, 0);
r = dm_integrity_failed(ic);
if (unlikely(r)) {
diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c
index 1ca65b4..20171c9 100644
--- a/drivers/md/dm-ioctl.c
+++ b/drivers/md/dm-ioctl.c
@@ -17,6 +17,7 @@
#include <linux/dm-ioctl.h>
#include <linux/hdreg.h>
#include <linux/compat.h>
+#include <linux/nospec.h>
#include <linux/uaccess.h>
@@ -572,7 +573,7 @@
size_t *needed = needed_param;
*needed += sizeof(struct dm_target_versions);
- *needed += strlen(tt->name);
+ *needed += strlen(tt->name) + 1;
*needed += ALIGN_MASK;
}
@@ -637,7 +638,7 @@
iter_info.old_vers = NULL;
iter_info.vers = vers;
iter_info.flags = 0;
- iter_info.end = (char *)vers+len;
+ iter_info.end = (char *)vers + needed;
/*
* Now loop through filling out the names & versions.
@@ -1696,6 +1697,7 @@
if (unlikely(cmd >= ARRAY_SIZE(_ioctls)))
return NULL;
+ cmd = array_index_nospec(cmd, ARRAY_SIZE(_ioctls));
*ioctl_flags = _ioctls[cmd].flags;
return _ioctls[cmd].fn;
}
diff --git a/drivers/md/dm-log.c b/drivers/md/dm-log.c
index 33e71ea..fe3a947 100644
--- a/drivers/md/dm-log.c
+++ b/drivers/md/dm-log.c
@@ -415,8 +415,7 @@
/*
* Work out how many "unsigned long"s we need to hold the bitset.
*/
- bitset_size = dm_round_up(region_count,
- sizeof(*lc->clean_bits) << BYTE_SHIFT);
+ bitset_size = dm_round_up(region_count, BITS_PER_LONG);
bitset_size >>= BYTE_SHIFT;
lc->bitset_uint32_count = bitset_size / sizeof(*lc->clean_bits);
@@ -616,7 +615,7 @@
log_clear_bit(lc, lc->clean_bits, i);
/* clear any old bits -- device has shrunk */
- for (i = lc->region_count; i % (sizeof(*lc->clean_bits) << BYTE_SHIFT); i++)
+ for (i = lc->region_count; i % BITS_PER_LONG; i++)
log_clear_bit(lc, lc->clean_bits, i);
/* copy clean across to sync */
diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c
index f5083b4..a2d09c9 100644
--- a/drivers/md/dm-raid.c
+++ b/drivers/md/dm-raid.c
@@ -1002,12 +1002,13 @@
static int validate_raid_redundancy(struct raid_set *rs)
{
unsigned int i, rebuild_cnt = 0;
- unsigned int rebuilds_per_group = 0, copies;
+ unsigned int rebuilds_per_group = 0, copies, raid_disks;
unsigned int group_size, last_group_start;
- for (i = 0; i < rs->md.raid_disks; i++)
- if (!test_bit(In_sync, &rs->dev[i].rdev.flags) ||
- !rs->dev[i].rdev.sb_page)
+ for (i = 0; i < rs->raid_disks; i++)
+ if (!test_bit(FirstUse, &rs->dev[i].rdev.flags) &&
+ ((!test_bit(In_sync, &rs->dev[i].rdev.flags) ||
+ !rs->dev[i].rdev.sb_page)))
rebuild_cnt++;
switch (rs->md.level) {
@@ -1047,8 +1048,9 @@
* A A B B C
* C D D E E
*/
+ raid_disks = min(rs->raid_disks, rs->md.raid_disks);
if (__is_raid10_near(rs->md.new_layout)) {
- for (i = 0; i < rs->md.raid_disks; i++) {
+ for (i = 0; i < raid_disks; i++) {
if (!(i % copies))
rebuilds_per_group = 0;
if ((!rs->dev[i].rdev.sb_page ||
@@ -1071,10 +1073,10 @@
* results in the need to treat the last (potentially larger)
* set differently.
*/
- group_size = (rs->md.raid_disks / copies);
- last_group_start = (rs->md.raid_disks / group_size) - 1;
+ group_size = (raid_disks / copies);
+ last_group_start = (raid_disks / group_size) - 1;
last_group_start *= group_size;
- for (i = 0; i < rs->md.raid_disks; i++) {
+ for (i = 0; i < raid_disks; i++) {
if (!(i % copies) && !(i > last_group_start))
rebuilds_per_group = 0;
if ((!rs->dev[i].rdev.sb_page ||
@@ -1589,7 +1591,7 @@
{
int i;
- for (i = 0; i < rs->md.raid_disks; i++) {
+ for (i = 0; i < rs->raid_disks; i++) {
struct md_rdev *rdev = &rs->dev[i].rdev;
if (!test_bit(Journal, &rdev->flags) &&
@@ -3512,7 +3514,7 @@
{
struct raid_set *rs = ti->private;
struct mddev *mddev = &rs->md;
- struct r5conf *conf = mddev->private;
+ struct r5conf *conf = rs_is_raid456(rs) ? mddev->private : NULL;
int i, max_nr_stripes = conf ? conf->max_nr_stripes : 0;
unsigned long recovery;
unsigned int raid_param_cnt = 1; /* at least 1 for chunksize */
@@ -3732,13 +3734,13 @@
unsigned int i;
int r = 0;
- for (i = 0; !r && i < rs->md.raid_disks; i++)
- if (rs->dev[i].data_dev)
- r = fn(ti,
- rs->dev[i].data_dev,
- 0, /* No offset on data devs */
- rs->md.dev_sectors,
- data);
+ for (i = 0; !r && i < rs->raid_disks; i++) {
+ if (rs->dev[i].data_dev) {
+ r = fn(ti, rs->dev[i].data_dev,
+ 0, /* No offset on data devs */
+ rs->md.dev_sectors, data);
+ }
+ }
return r;
}
@@ -3792,7 +3794,7 @@
memset(cleared_failed_devices, 0, sizeof(cleared_failed_devices));
- for (i = 0; i < mddev->raid_disks; i++) {
+ for (i = 0; i < rs->raid_disks; i++) {
r = &rs->dev[i].rdev;
/* HM FIXME: enhance journal device recovery processing */
if (test_bit(Journal, &r->flags))
diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c
index b1e867f..5f933db 100644
--- a/drivers/md/dm-rq.c
+++ b/drivers/md/dm-rq.c
@@ -397,7 +397,7 @@
}
/* The target has remapped the I/O so dispatch it */
- trace_block_rq_remap(clone->q, clone, disk_devt(dm_disk(md)),
+ trace_block_rq_remap(clone, disk_devt(dm_disk(md)),
blk_rq_pos(rq));
ret = dm_dispatch_clone_request(clone, rq);
if (ret == BLK_STS_RESOURCE || ret == BLK_STS_DEV_RESOURCE) {
@@ -492,8 +492,13 @@
if (unlikely(!ti)) {
int srcu_idx;
- struct dm_table *map = dm_get_live_table(md, &srcu_idx);
+ struct dm_table *map;
+ map = dm_get_live_table(md, &srcu_idx);
+ if (unlikely(!map)) {
+ dm_put_live_table(md, srcu_idx);
+ return BLK_STS_RESOURCE;
+ }
ti = dm_table_find_target(map, 0);
dm_put_live_table(md, srcu_idx);
}
diff --git a/drivers/md/dm-stats.c b/drivers/md/dm-stats.c
index 35d368c..55443a6 100644
--- a/drivers/md/dm-stats.c
+++ b/drivers/md/dm-stats.c
@@ -224,6 +224,7 @@
atomic_read(&shared->in_flight[READ]),
atomic_read(&shared->in_flight[WRITE]));
}
+ cond_resched();
}
dm_stat_free(&s->rcu_head);
}
@@ -313,6 +314,7 @@
for (ni = 0; ni < n_entries; ni++) {
atomic_set(&s->stat_shared[ni].in_flight[READ], 0);
atomic_set(&s->stat_shared[ni].in_flight[WRITE], 0);
+ cond_resched();
}
if (s->n_histogram_entries) {
@@ -325,6 +327,7 @@
for (ni = 0; ni < n_entries; ni++) {
s->stat_shared[ni].tmp.histogram = hi;
hi += s->n_histogram_entries + 1;
+ cond_resched();
}
}
@@ -345,6 +348,7 @@
for (ni = 0; ni < n_entries; ni++) {
p[ni].histogram = hi;
hi += s->n_histogram_entries + 1;
+ cond_resched();
}
}
}
@@ -474,6 +478,7 @@
}
DMEMIT("\n");
}
+ cond_resched();
}
mutex_unlock(&stats->mutex);
@@ -750,6 +755,7 @@
local_irq_enable();
}
}
+ cond_resched();
}
}
@@ -865,6 +871,8 @@
if (unlikely(sz + 1 >= maxlen))
goto buffer_overflow;
+
+ cond_resched();
}
if (clear)
diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c
index 6ebb212..842d79e 100644
--- a/drivers/md/dm-thin-metadata.c
+++ b/drivers/md/dm-thin-metadata.c
@@ -2058,10 +2058,13 @@
dm_sm_threshold_fn fn,
void *context)
{
- int r;
+ int r = -EINVAL;
pmd_write_lock_in_core(pmd);
- r = dm_sm_register_threshold_callback(pmd->metadata_sm, threshold, fn, context);
+ if (!pmd->fail_io) {
+ r = dm_sm_register_threshold_callback(pmd->metadata_sm,
+ threshold, fn, context);
+ }
pmd_write_unlock(pmd);
return r;
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
index fff4c50..a196d7c 100644
--- a/drivers/md/dm-thin.c
+++ b/drivers/md/dm-thin.c
@@ -3401,8 +3401,10 @@
calc_metadata_threshold(pt),
metadata_low_callback,
pool);
- if (r)
+ if (r) {
+ ti->error = "Error registering metadata threshold";
goto out_flags_changed;
+ }
dm_pool_register_pre_commit_callback(pool->pmd,
metadata_pre_commit_callback, pool);
diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c
index 808a98e..c801f6b 100644
--- a/drivers/md/dm-verity-target.c
+++ b/drivers/md/dm-verity-target.c
@@ -1242,6 +1242,7 @@
static struct target_type verity_target = {
.name = "verity",
+ .features = DM_TARGET_IMMUTABLE,
.version = {1, 7, 0},
.module = THIS_MODULE,
.ctr = verity_ctr,
diff --git a/drivers/md/dm-writecache.c b/drivers/md/dm-writecache.c
index 9d6ae3e..13cc318 100644
--- a/drivers/md/dm-writecache.c
+++ b/drivers/md/dm-writecache.c
@@ -20,7 +20,7 @@
#define HIGH_WATERMARK 50
#define LOW_WATERMARK 45
-#define MAX_WRITEBACK_JOBS 0
+#define MAX_WRITEBACK_JOBS min(0x10000000 / PAGE_SIZE, totalram_pages() / 16)
#define ENDIO_LATENCY 16
#define WRITEBACK_LATENCY 64
#define AUTOCOMMIT_BLOCKS_SSD 65536
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 6030cba..1005abf 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -607,18 +607,19 @@
false, 0, &io->stats_aux);
}
-static void end_io_acct(struct dm_io *io)
+static void end_io_acct(struct mapped_device *md, struct bio *bio,
+ unsigned long start_time, struct dm_stats_aux *stats_aux)
{
- struct mapped_device *md = io->md;
- struct bio *bio = io->orig_bio;
- unsigned long duration = jiffies - io->start_time;
-
- bio_end_io_acct(bio, io->start_time);
+ unsigned long duration = jiffies - start_time;
if (unlikely(dm_stats_used(&md->stats)))
dm_stats_account_io(&md->stats, bio_data_dir(bio),
bio->bi_iter.bi_sector, bio_sectors(bio),
- true, duration, &io->stats_aux);
+ true, duration, stats_aux);
+
+ smp_wmb();
+
+ bio_end_io_acct(bio, start_time);
/* nudge anyone waiting on suspend queue */
if (unlikely(wq_has_sleeper(&md->wait)))
@@ -903,6 +904,8 @@
blk_status_t io_error;
struct bio *bio;
struct mapped_device *md = io->md;
+ unsigned long start_time = 0;
+ struct dm_stats_aux stats_aux;
/* Push-back supersedes any I/O errors */
if (unlikely(error)) {
@@ -929,8 +932,10 @@
io_error = io->status;
bio = io->orig_bio;
- end_io_acct(io);
+ start_time = io->start_time;
+ stats_aux = io->stats_aux;
free_io(md, io);
+ end_io_acct(md, bio, start_time, &stats_aux);
if (io_error == BLK_STS_DM_REQUEUE)
return;
@@ -1692,15 +1697,10 @@
struct dm_table *map;
map = dm_get_live_table(md, &srcu_idx);
- if (unlikely(!map)) {
- DMERR_LIMIT("%s: mapping table unavailable, erroring io",
- dm_device_name(md));
- bio_io_error(bio);
- goto out;
- }
- /* If suspended, queue this IO for later */
- if (unlikely(test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags))) {
+ /* If suspended, or map not yet available, queue this IO for later */
+ if (unlikely(test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) ||
+ unlikely(!map)) {
if (bio->bi_opf & REQ_NOWAIT)
bio_wouldblock_error(bio);
else if (bio->bi_opf & REQ_RAHEAD)
@@ -2350,6 +2350,8 @@
}
finish_wait(&md->wait, &wait);
+ smp_rmb();
+
return r;
}
@@ -3001,6 +3003,11 @@
goto out;
ti = dm_table_get_target(table, 0);
+ if (dm_suspended_md(md)) {
+ ret = -EAGAIN;
+ goto out;
+ }
+
ret = -EINVAL;
if (!ti->type->iterate_devices)
goto out;
diff --git a/drivers/md/md-bitmap.c b/drivers/md/md-bitmap.c
index ea3130e..d377ea0 100644
--- a/drivers/md/md-bitmap.c
+++ b/drivers/md/md-bitmap.c
@@ -639,14 +639,6 @@
daemon_sleep = le32_to_cpu(sb->daemon_sleep) * HZ;
write_behind = le32_to_cpu(sb->write_behind);
sectors_reserved = le32_to_cpu(sb->sectors_reserved);
- /* Setup nodes/clustername only if bitmap version is
- * cluster-compatible
- */
- if (sb->version == cpu_to_le32(BITMAP_MAJOR_CLUSTERED)) {
- nodes = le32_to_cpu(sb->nodes);
- strlcpy(bitmap->mddev->bitmap_info.cluster_name,
- sb->cluster_name, 64);
- }
/* verify that the bitmap-specific fields are valid */
if (sb->magic != cpu_to_le32(BITMAP_MAGIC))
@@ -668,6 +660,16 @@
goto out;
}
+ /*
+ * Setup nodes/clustername only if bitmap version is
+ * cluster-compatible
+ */
+ if (sb->version == cpu_to_le32(BITMAP_MAJOR_CLUSTERED)) {
+ nodes = le32_to_cpu(sb->nodes);
+ strlcpy(bitmap->mddev->bitmap_info.cluster_name,
+ sb->cluster_name, 64);
+ }
+
/* keep the array size field of the bitmap superblock up to date */
sb->sync_size = cpu_to_le64(bitmap->mddev->resync_max_sectors);
@@ -700,9 +702,9 @@
out:
kunmap_atomic(sb);
- /* Assigning chunksize is required for "re_read" */
- bitmap->mddev->bitmap_info.chunksize = chunksize;
if (err == 0 && nodes && (bitmap->cluster_slot < 0)) {
+ /* Assigning chunksize is required for "re_read" */
+ bitmap->mddev->bitmap_info.chunksize = chunksize;
err = md_setup_cluster(bitmap->mddev, nodes);
if (err) {
pr_warn("%s: Could not setup cluster service (%d)\n",
@@ -713,18 +715,18 @@
goto re_read;
}
-
out_no_sb:
- if (test_bit(BITMAP_STALE, &bitmap->flags))
- bitmap->events_cleared = bitmap->mddev->events;
- bitmap->mddev->bitmap_info.chunksize = chunksize;
- bitmap->mddev->bitmap_info.daemon_sleep = daemon_sleep;
- bitmap->mddev->bitmap_info.max_write_behind = write_behind;
- bitmap->mddev->bitmap_info.nodes = nodes;
- if (bitmap->mddev->bitmap_info.space == 0 ||
- bitmap->mddev->bitmap_info.space > sectors_reserved)
- bitmap->mddev->bitmap_info.space = sectors_reserved;
- if (err) {
+ if (err == 0) {
+ if (test_bit(BITMAP_STALE, &bitmap->flags))
+ bitmap->events_cleared = bitmap->mddev->events;
+ bitmap->mddev->bitmap_info.chunksize = chunksize;
+ bitmap->mddev->bitmap_info.daemon_sleep = daemon_sleep;
+ bitmap->mddev->bitmap_info.max_write_behind = write_behind;
+ bitmap->mddev->bitmap_info.nodes = nodes;
+ if (bitmap->mddev->bitmap_info.space == 0 ||
+ bitmap->mddev->bitmap_info.space > sectors_reserved)
+ bitmap->mddev->bitmap_info.space = sectors_reserved;
+ } else {
md_bitmap_print_sb(bitmap);
if (bitmap->cluster_slot < 0)
md_cluster_stop(bitmap->mddev);
diff --git a/drivers/md/md.c b/drivers/md/md.c
index cc38765..0043dec 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -2648,14 +2648,16 @@
static bool does_sb_need_changing(struct mddev *mddev)
{
- struct md_rdev *rdev;
+ struct md_rdev *rdev = NULL, *iter;
struct mdp_superblock_1 *sb;
int role;
/* Find a good rdev */
- rdev_for_each(rdev, mddev)
- if ((rdev->raid_disk >= 0) && !test_bit(Faulty, &rdev->flags))
+ rdev_for_each(iter, mddev)
+ if ((iter->raid_disk >= 0) && !test_bit(Faulty, &iter->flags)) {
+ rdev = iter;
break;
+ }
/* No good device found. */
if (!rdev)
@@ -6297,6 +6299,7 @@
/* stop the array and free an attached data structures.
* This is called from dm-raid
*/
+ __md_stop_writes(mddev);
__md_stop(mddev);
bioset_exit(&mddev->bio_set);
bioset_exit(&mddev->sync_set);
@@ -7968,17 +7971,22 @@
void md_unregister_thread(struct md_thread **threadp)
{
- struct md_thread *thread = *threadp;
- if (!thread)
- return;
- pr_debug("interrupting MD-thread pid %d\n", task_pid_nr(thread->tsk));
- /* Locking ensures that mddev_unlock does not wake_up a
+ struct md_thread *thread;
+
+ /*
+ * Locking ensures that mddev_unlock does not wake_up a
* non-existent thread
*/
spin_lock(&pers_lock);
+ thread = *threadp;
+ if (!thread) {
+ spin_unlock(&pers_lock);
+ return;
+ }
*threadp = NULL;
spin_unlock(&pers_lock);
+ pr_debug("interrupting MD-thread pid %d\n", task_pid_nr(thread->tsk));
kthread_stop(thread->tsk);
kfree(thread);
}
@@ -9417,6 +9425,7 @@
wake_up(&resync_wait);
/* flag recovery needed just to double check */
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
+ sysfs_notify_dirent_safe(mddev->sysfs_completed);
sysfs_notify_dirent_safe(mddev->sysfs_action);
md_new_event(mddev);
if (mddev->event_work.func)
@@ -9728,16 +9737,18 @@
void md_reload_sb(struct mddev *mddev, int nr)
{
- struct md_rdev *rdev;
+ struct md_rdev *rdev = NULL, *iter;
int err;
/* Find the rdev */
- rdev_for_each_rcu(rdev, mddev) {
- if (rdev->desc_nr == nr)
+ rdev_for_each_rcu(iter, mddev) {
+ if (iter->desc_nr == nr) {
+ rdev = iter;
break;
+ }
}
- if (!rdev || rdev->desc_nr != nr) {
+ if (!rdev) {
pr_warn("%s: %d Could not find rdev with nr %d\n", __func__, __LINE__, nr);
return;
}
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
index 35843df..a20332e 100644
--- a/drivers/md/raid0.c
+++ b/drivers/md/raid0.c
@@ -48,7 +48,7 @@
int len = 0;
for (k = 0; k < conf->strip_zone[j].nb_dev; k++)
- len += snprintf(line+len, 200-len, "%s%s", k?"/":"",
+ len += scnprintf(line+len, 200-len, "%s%s", k?"/":"",
bdevname(conf->devlist[j*raid_disks
+ k]->bdev, b));
pr_debug("md: zone%d=[%s]\n", j, line);
@@ -128,21 +128,6 @@
pr_debug("md/raid0:%s: FINAL %d zones\n",
mdname(mddev), conf->nr_strip_zones);
- if (conf->nr_strip_zones == 1) {
- conf->layout = RAID0_ORIG_LAYOUT;
- } else if (mddev->layout == RAID0_ORIG_LAYOUT ||
- mddev->layout == RAID0_ALT_MULTIZONE_LAYOUT) {
- conf->layout = mddev->layout;
- } else if (default_layout == RAID0_ORIG_LAYOUT ||
- default_layout == RAID0_ALT_MULTIZONE_LAYOUT) {
- conf->layout = default_layout;
- } else {
- pr_err("md/raid0:%s: cannot assemble multi-zone RAID0 with default_layout setting\n",
- mdname(mddev));
- pr_err("md/raid0: please set raid0.default_layout to 1 or 2\n");
- err = -ENOTSUPP;
- goto abort;
- }
/*
* now since we have the hard sector sizes, we can make sure
* chunk size is a multiple of that sector size
@@ -273,6 +258,22 @@
(unsigned long long)smallest->sectors);
}
+ if (conf->nr_strip_zones == 1 || conf->strip_zone[1].nb_dev == 1) {
+ conf->layout = RAID0_ORIG_LAYOUT;
+ } else if (mddev->layout == RAID0_ORIG_LAYOUT ||
+ mddev->layout == RAID0_ALT_MULTIZONE_LAYOUT) {
+ conf->layout = mddev->layout;
+ } else if (default_layout == RAID0_ORIG_LAYOUT ||
+ default_layout == RAID0_ALT_MULTIZONE_LAYOUT) {
+ conf->layout = default_layout;
+ } else {
+ pr_err("md/raid0:%s: cannot assemble multi-zone RAID0 with default_layout setting\n",
+ mdname(mddev));
+ pr_err("md/raid0: please set raid0.default_layout to 1 or 2\n");
+ err = -EOPNOTSUPP;
+ goto abort;
+ }
+
pr_debug("md/raid0:%s: done.\n", mdname(mddev));
*private_conf = conf;
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 70dccc3..0e741a8 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -1809,9 +1809,12 @@
int err = 0;
int number = rdev->raid_disk;
struct md_rdev **rdevp;
- struct raid10_info *p = conf->mirrors + number;
+ struct raid10_info *p;
print_conf(conf);
+ if (unlikely(number >= mddev->raid_disks))
+ return 0;
+ p = conf->mirrors + number;
if (rdev == p->rdev)
rdevp = &p->rdev;
else if (rdev == p->replacement)
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index c82953a..9f114b9 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -36,6 +36,7 @@
*/
#include <linux/blkdev.h>
+#include <linux/delay.h>
#include <linux/kthread.h>
#include <linux/raid/pq.h>
#include <linux/async_tx.h>
@@ -686,17 +687,17 @@
return degraded;
}
-static int has_failed(struct r5conf *conf)
+static bool has_failed(struct r5conf *conf)
{
- int degraded;
+ int degraded = conf->mddev->degraded;
- if (conf->mddev->reshape_position == MaxSector)
- return conf->mddev->degraded > conf->max_degraded;
+ if (test_bit(MD_BROKEN, &conf->mddev->flags))
+ return true;
- degraded = raid5_calc_degraded(conf);
- if (degraded > conf->max_degraded)
- return 1;
- return 0;
+ if (conf->mddev->reshape_position != MaxSector)
+ degraded = raid5_calc_degraded(conf);
+
+ return degraded > conf->max_degraded;
}
struct stripe_head *
@@ -2864,10 +2865,10 @@
if (!test_and_clear_bit(R5_DOUBLE_LOCKED, &sh->dev[i].flags))
clear_bit(R5_LOCKED, &sh->dev[i].flags);
set_bit(STRIPE_HANDLE, &sh->state);
- raid5_release_stripe(sh);
if (sh->batch_head && sh != sh->batch_head)
raid5_release_stripe(sh->batch_head);
+ raid5_release_stripe(sh);
}
static void raid5_error(struct mddev *mddev, struct md_rdev *rdev)
@@ -2877,34 +2878,31 @@
unsigned long flags;
pr_debug("raid456: error called\n");
+ pr_crit("md/raid:%s: Disk failure on %s, disabling device.\n",
+ mdname(mddev), bdevname(rdev->bdev, b));
+
spin_lock_irqsave(&conf->device_lock, flags);
-
- if (test_bit(In_sync, &rdev->flags) &&
- mddev->degraded == conf->max_degraded) {
- /*
- * Don't allow to achieve failed state
- * Don't try to recover this device
- */
- conf->recovery_disabled = mddev->recovery_disabled;
- spin_unlock_irqrestore(&conf->device_lock, flags);
- return;
- }
-
set_bit(Faulty, &rdev->flags);
clear_bit(In_sync, &rdev->flags);
mddev->degraded = raid5_calc_degraded(conf);
+
+ if (has_failed(conf)) {
+ set_bit(MD_BROKEN, &conf->mddev->flags);
+ conf->recovery_disabled = mddev->recovery_disabled;
+
+ pr_crit("md/raid:%s: Cannot continue operation (%d/%d failed).\n",
+ mdname(mddev), mddev->degraded, conf->raid_disks);
+ } else {
+ pr_crit("md/raid:%s: Operation continuing on %d devices.\n",
+ mdname(mddev), conf->raid_disks - mddev->degraded);
+ }
+
spin_unlock_irqrestore(&conf->device_lock, flags);
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
set_bit(Blocked, &rdev->flags);
set_mask_bits(&mddev->sb_flags, 0,
BIT(MD_SB_CHANGE_DEVS) | BIT(MD_SB_CHANGE_PENDING));
- pr_crit("md/raid:%s: Disk failure on %s, disabling device.\n"
- "md/raid:%s: Operation continuing on %d devices.\n",
- mdname(mddev),
- bdevname(rdev->bdev, b),
- mdname(mddev),
- conf->raid_disks - mddev->degraded);
r5c_update_on_rdev_error(mddev, rdev);
}
@@ -3939,7 +3937,7 @@
* back cache (prexor with orig_page, and then xor with
* page) in the read path
*/
- if (s->injournal && s->failed) {
+ if (s->to_read && s->injournal && s->failed) {
if (test_bit(STRIPE_R5C_CACHING, &sh->state))
r5c_make_stripe_write_out(sh);
goto out;
@@ -6522,7 +6520,18 @@
spin_unlock_irq(&conf->device_lock);
md_check_recovery(mddev);
spin_lock_irq(&conf->device_lock);
+
+ /*
+ * Waiting on MD_SB_CHANGE_PENDING below may deadlock
+ * seeing md_check_recovery() is needed to clear
+ * the flag when using mdmon.
+ */
+ continue;
}
+
+ wait_event_lock_irq(mddev->sb_wait,
+ !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags),
+ conf->device_lock);
}
pr_debug("%d stripes handled\n", handled);
@@ -8007,6 +8016,7 @@
*/
if (rdev->saved_raid_disk >= 0 &&
rdev->saved_raid_disk >= first &&
+ rdev->saved_raid_disk <= last &&
conf->disks[rdev->saved_raid_disk].rdev == NULL)
first = rdev->saved_raid_disk;