Update Linux to v5.4.148
Sourced from [1]
[1] https://cdn.kernel.org/pub/linux/kernel/v5.x/linux-5.4.148.tar.gz
Change-Id: Ib3d26c5ba9b022e2e03533005c4fed4d7c30b61b
Signed-off-by: Olivier Deprez <olivier.deprez@arm.com>
diff --git a/drivers/md/dm-writecache.c b/drivers/md/dm-writecache.c
index d06b8aa..ec10fda 100644
--- a/drivers/md/dm-writecache.c
+++ b/drivers/md/dm-writecache.c
@@ -142,6 +142,7 @@
size_t metadata_sectors;
size_t n_blocks;
uint64_t seq_count;
+ sector_t data_device_sectors;
void *block_start;
struct wc_entry *entries;
unsigned block_size;
@@ -153,6 +154,7 @@
bool overwrote_committed:1;
bool memory_vmapped:1;
+ bool start_sector_set:1;
bool high_wm_percent_set:1;
bool low_wm_percent_set:1;
bool max_writeback_jobs_set:1;
@@ -161,6 +163,10 @@
bool writeback_fua_set:1;
bool flush_on_suspend:1;
+ unsigned high_wm_percent_value;
+ unsigned low_wm_percent_value;
+ unsigned autocommit_time_value;
+
unsigned writeback_all;
struct workqueue_struct *writeback_wq;
struct work_struct writeback_work;
@@ -224,6 +230,7 @@
pfn_t pfn;
int id;
struct page **pages;
+ sector_t offset;
wc->memory_vmapped = false;
@@ -242,9 +249,16 @@
goto err1;
}
+ offset = get_start_sect(wc->ssd_dev->bdev);
+ if (offset & (PAGE_SIZE / 512 - 1)) {
+ r = -EINVAL;
+ goto err1;
+ }
+ offset >>= PAGE_SHIFT - 9;
+
id = dax_read_lock();
- da = dax_direct_access(wc->ssd_dev->dax_dev, 0, p, &wc->memory_map, &pfn);
+ da = dax_direct_access(wc->ssd_dev->dax_dev, offset, p, &wc->memory_map, &pfn);
if (da < 0) {
wc->memory_map = NULL;
r = da;
@@ -266,7 +280,7 @@
i = 0;
do {
long daa;
- daa = dax_direct_access(wc->ssd_dev->dax_dev, i, p - i,
+ daa = dax_direct_access(wc->ssd_dev->dax_dev, offset + i, p - i,
NULL, &pfn);
if (daa <= 0) {
r = daa ? daa : -EINVAL;
@@ -279,6 +293,8 @@
while (daa-- && i < p) {
pages[i++] = pfn_t_to_page(pfn);
pfn.val++;
+ if (!(i & 15))
+ cond_resched();
}
} while (i < p);
wc->memory_map = vmap(pages, p, VM_MAP, PAGE_KERNEL);
@@ -306,7 +322,7 @@
#else
static int persistent_memory_claim(struct dm_writecache *wc)
{
- BUG();
+ return -EOPNOTSUPP;
}
#endif
@@ -442,7 +458,13 @@
complete(&endio->c);
}
-static void ssd_commit_flushed(struct dm_writecache *wc)
+static void writecache_wait_for_ios(struct dm_writecache *wc, int direction)
+{
+ wait_event(wc->bio_in_progress_wait[direction],
+ !atomic_read(&wc->bio_in_progress[direction]));
+}
+
+static void ssd_commit_flushed(struct dm_writecache *wc, bool wait_for_ios)
{
struct dm_io_region region;
struct dm_io_request req;
@@ -488,17 +510,20 @@
writecache_notify_io(0, &endio);
wait_for_completion_io(&endio.c);
+ if (wait_for_ios)
+ writecache_wait_for_ios(wc, WRITE);
+
writecache_disk_flush(wc, wc->ssd_dev);
memset(wc->dirty_bitmap, 0, wc->dirty_bitmap_size);
}
-static void writecache_commit_flushed(struct dm_writecache *wc)
+static void writecache_commit_flushed(struct dm_writecache *wc, bool wait_for_ios)
{
if (WC_MODE_PMEM(wc))
wmb();
else
- ssd_commit_flushed(wc);
+ ssd_commit_flushed(wc, wait_for_ios);
}
static void writecache_disk_flush(struct dm_writecache *wc, struct dm_dev *dev)
@@ -522,12 +547,6 @@
writecache_error(wc, r, "error flushing metadata: %d", r);
}
-static void writecache_wait_for_ios(struct dm_writecache *wc, int direction)
-{
- wait_event(wc->bio_in_progress_wait[direction],
- !atomic_read(&wc->bio_in_progress[direction]));
-}
-
#define WFE_RETURN_FOLLOWING 1
#define WFE_LOWEST_SEQ 2
@@ -622,6 +641,12 @@
wc->freelist_size++;
}
+static inline void writecache_verify_watermark(struct dm_writecache *wc)
+{
+ if (unlikely(wc->freelist_size + wc->writeback_size <= wc->freelist_high_watermark))
+ queue_work(wc->writeback_wq, &wc->writeback_work);
+}
+
static struct wc_entry *writecache_pop_from_freelist(struct dm_writecache *wc)
{
struct wc_entry *e;
@@ -643,8 +668,8 @@
list_del(&e->lru);
}
wc->freelist_size--;
- if (unlikely(wc->freelist_size + wc->writeback_size <= wc->freelist_high_watermark))
- queue_work(wc->writeback_wq, &wc->writeback_work);
+
+ writecache_verify_watermark(wc);
return e;
}
@@ -724,15 +749,12 @@
e = e2;
cond_resched();
}
- writecache_commit_flushed(wc);
-
- if (!WC_MODE_PMEM(wc))
- writecache_wait_for_ios(wc, WRITE);
+ writecache_commit_flushed(wc, true);
wc->seq_count++;
pmem_assign(sb(wc)->seq_count, cpu_to_le64(wc->seq_count));
writecache_flush_region(wc, &sb(wc)->seq_count, sizeof sb(wc)->seq_count);
- writecache_commit_flushed(wc);
+ writecache_commit_flushed(wc, false);
wc->overwrote_committed = false;
@@ -756,7 +778,7 @@
}
if (need_flush_after_free)
- writecache_commit_flushed(wc);
+ writecache_commit_flushed(wc, false);
}
static void writecache_flush_work(struct work_struct *work)
@@ -799,6 +821,8 @@
writecache_wait_for_ios(wc, WRITE);
discarded_something = true;
}
+ if (!writecache_entry_is_committed(wc, e))
+ wc->uncommitted_blocks--;
writecache_free_entry(wc, e);
}
@@ -809,7 +833,7 @@
}
if (discarded_something)
- writecache_commit_flushed(wc);
+ writecache_commit_flushed(wc, false);
}
static bool writecache_wait_for_writeback(struct dm_writecache *wc)
@@ -838,7 +862,7 @@
}
wc_unlock(wc);
- flush_workqueue(wc->writeback_wq);
+ drain_workqueue(wc->writeback_wq);
wc_lock(wc);
if (flush_on_suspend)
@@ -866,11 +890,30 @@
struct wc_entry *e = &wc->entries[b];
e->index = b;
e->write_in_progress = false;
+ cond_resched();
}
return 0;
}
+static int writecache_read_metadata(struct dm_writecache *wc, sector_t n_sectors)
+{
+ struct dm_io_region region;
+ struct dm_io_request req;
+
+ region.bdev = wc->ssd_dev->bdev;
+ region.sector = wc->start_sector;
+ region.count = n_sectors;
+ req.bi_op = REQ_OP_READ;
+ req.bi_op_flags = REQ_SYNC;
+ req.mem.type = DM_IO_VMA;
+ req.mem.ptr.vma = (char *)wc->memory_map;
+ req.client = wc->dm_io;
+ req.notify.fn = NULL;
+
+ return dm_io(&req, 1, ®ion, NULL);
+}
+
static void writecache_resume(struct dm_target *ti)
{
struct dm_writecache *wc = ti->private;
@@ -881,8 +924,20 @@
wc_lock(wc);
- if (WC_MODE_PMEM(wc))
+ wc->data_device_sectors = i_size_read(wc->dev->bdev->bd_inode) >> SECTOR_SHIFT;
+
+ if (WC_MODE_PMEM(wc)) {
persistent_memory_invalidate_cache(wc->memory_map, wc->memory_map_size);
+ } else {
+ r = writecache_read_metadata(wc, wc->metadata_sectors);
+ if (r) {
+ size_t sb_entries_offset;
+ writecache_error(wc, r, "unable to read metadata: %d", r);
+ sb_entries_offset = offsetof(struct wc_memory_superblock, entries);
+ memset((char *)wc->memory_map + sb_entries_offset, -1,
+ (wc->metadata_sectors << SECTOR_SHIFT) - sb_entries_offset);
+ }
+ }
wc->tree = RB_ROOT;
INIT_LIST_HEAD(&wc->lru);
@@ -920,6 +975,7 @@
e->original_sector = le64_to_cpu(wme.original_sector);
e->seq_count = le64_to_cpu(wme.seq_count);
}
+ cond_resched();
}
#endif
for (b = 0; b < wc->n_blocks; b++) {
@@ -958,9 +1014,11 @@
if (need_flush) {
writecache_flush_all_metadata(wc);
- writecache_commit_flushed(wc);
+ writecache_commit_flushed(wc, false);
}
+ writecache_verify_watermark(wc);
+
wc_unlock(wc);
}
@@ -1218,7 +1276,8 @@
}
} while (bio->bi_iter.bi_size);
- if (unlikely(wc->uncommitted_blocks >= wc->autocommit_blocks))
+ if (unlikely(bio->bi_opf & REQ_FUA ||
+ wc->uncommitted_blocks >= wc->autocommit_blocks))
writecache_flush(wc);
else
writecache_schedule_autocommit(wc);
@@ -1341,7 +1400,7 @@
wc->writeback_size--;
n_walked++;
if (unlikely(n_walked >= ENDIO_LATENCY)) {
- writecache_commit_flushed(wc);
+ writecache_commit_flushed(wc, false);
wc_unlock(wc);
wc_lock(wc);
n_walked = 0;
@@ -1422,7 +1481,7 @@
writecache_wait_for_ios(wc, READ);
}
- writecache_commit_flushed(wc);
+ writecache_commit_flushed(wc, false);
wc_unlock(wc);
}
@@ -1437,6 +1496,10 @@
void *address = memory_data(wc, e);
persistent_memory_flush_cache(address, block_size);
+
+ if (unlikely(bio_end_sector(&wb->bio) >= wc->data_device_sectors))
+ return true;
+
return bio_add_page(&wb->bio, persistent_memory_page(address),
block_size, persistent_memory_page_offset(address)) != 0;
}
@@ -1508,6 +1571,9 @@
if (writecache_has_error(wc)) {
bio->bi_status = BLK_STS_IOERR;
bio_endio(bio);
+ } else if (unlikely(!bio_sectors(bio))) {
+ bio->bi_status = BLK_STS_OK;
+ bio_endio(bio);
} else {
submit_bio(bio);
}
@@ -1551,6 +1617,14 @@
e = f;
}
+ if (unlikely(to.sector + to.count > wc->data_device_sectors)) {
+ if (to.sector >= wc->data_device_sectors) {
+ writecache_copy_endio(0, 0, c);
+ continue;
+ }
+ from.count = to.count = wc->data_device_sectors - to.sector;
+ }
+
dm_kcopyd_copy(wc->dm_kcopyd, &from, 1, &to, 0, writecache_copy_endio, c);
__writeback_throttle(wc, wbl);
@@ -1761,14 +1835,16 @@
pmem_assign(sb(wc)->n_blocks, cpu_to_le64(wc->n_blocks));
pmem_assign(sb(wc)->seq_count, cpu_to_le64(0));
- for (b = 0; b < wc->n_blocks; b++)
+ for (b = 0; b < wc->n_blocks; b++) {
write_original_sector_seq_count(wc, &wc->entries[b], -1, -1);
+ cond_resched();
+ }
writecache_flush_all_metadata(wc);
- writecache_commit_flushed(wc);
+ writecache_commit_flushed(wc, false);
pmem_assign(sb(wc)->magic, cpu_to_le32(MEMORY_SUPERBLOCK_MAGIC));
writecache_flush_region(wc, &sb(wc)->magic, sizeof sb(wc)->magic);
- writecache_commit_flushed(wc);
+ writecache_commit_flushed(wc, false);
return 0;
}
@@ -1836,7 +1912,7 @@
struct wc_memory_superblock s;
static struct dm_arg _args[] = {
- {0, 10, "Invalid number of feature args"},
+ {0, 16, "Invalid number of feature args"},
};
as.argc = argc;
@@ -1971,6 +2047,12 @@
ti->error = "Invalid block size";
goto bad;
}
+ if (wc->block_size < bdev_logical_block_size(wc->dev->bdev) ||
+ wc->block_size < bdev_logical_block_size(wc->ssd_dev->bdev)) {
+ r = -EINVAL;
+ ti->error = "Block size is smaller than device logical block size";
+ goto bad;
+ }
wc->block_size_bits = __ffs(wc->block_size);
wc->max_writeback_jobs = MAX_WRITEBACK_JOBS;
@@ -1992,6 +2074,7 @@
if (sscanf(string, "%llu%c", &start_sector, &dummy) != 1)
goto invalid_optional;
wc->start_sector = start_sector;
+ wc->start_sector_set = true;
if (wc->start_sector != start_sector ||
wc->start_sector >= wc->memory_map_size >> SECTOR_SHIFT)
goto invalid_optional;
@@ -2001,6 +2084,7 @@
goto invalid_optional;
if (high_wm_percent < 0 || high_wm_percent > 100)
goto invalid_optional;
+ wc->high_wm_percent_value = high_wm_percent;
wc->high_wm_percent_set = true;
} else if (!strcasecmp(string, "low_watermark") && opt_params >= 1) {
string = dm_shift_arg(&as), opt_params--;
@@ -2008,6 +2092,7 @@
goto invalid_optional;
if (low_wm_percent < 0 || low_wm_percent > 100)
goto invalid_optional;
+ wc->low_wm_percent_value = low_wm_percent;
wc->low_wm_percent_set = true;
} else if (!strcasecmp(string, "writeback_jobs") && opt_params >= 1) {
string = dm_shift_arg(&as), opt_params--;
@@ -2027,6 +2112,7 @@
if (autocommit_msecs > 3600000)
goto invalid_optional;
wc->autocommit_jiffies = msecs_to_jiffies(autocommit_msecs);
+ wc->autocommit_time_value = autocommit_msecs;
wc->autocommit_time_set = true;
} else if (!strcasecmp(string, "fua")) {
if (WC_MODE_PMEM(wc)) {
@@ -2053,14 +2139,18 @@
}
if (WC_MODE_PMEM(wc)) {
+ if (!dax_synchronous(wc->ssd_dev->dax_dev)) {
+ r = -EOPNOTSUPP;
+ ti->error = "Asynchronous persistent memory not supported as pmem cache";
+ goto bad;
+ }
+
r = persistent_memory_claim(wc);
if (r) {
ti->error = "Unable to map persistent memory for cache";
goto bad;
}
} else {
- struct dm_io_region region;
- struct dm_io_request req;
size_t n_blocks, n_metadata_blocks;
uint64_t n_bitmap_bits;
@@ -2117,19 +2207,9 @@
goto bad;
}
- region.bdev = wc->ssd_dev->bdev;
- region.sector = wc->start_sector;
- region.count = wc->metadata_sectors;
- req.bi_op = REQ_OP_READ;
- req.bi_op_flags = REQ_SYNC;
- req.mem.type = DM_IO_VMA;
- req.mem.ptr.vma = (char *)wc->memory_map;
- req.client = wc->dm_io;
- req.notify.fn = NULL;
-
- r = dm_io(&req, 1, ®ion, NULL);
+ r = writecache_read_metadata(wc, wc->block_size >> SECTOR_SHIFT);
if (r) {
- ti->error = "Unable to read metadata";
+ ti->error = "Unable to read first block of metadata";
goto bad;
}
}
@@ -2234,7 +2314,6 @@
struct dm_writecache *wc = ti->private;
unsigned extra_args;
unsigned sz = 0;
- uint64_t x;
switch (type) {
case STATUSTYPE_INFO:
@@ -2246,7 +2325,7 @@
DMEMIT("%c %s %s %u ", WC_MODE_PMEM(wc) ? 'p' : 's',
wc->dev->name, wc->ssd_dev->name, wc->block_size);
extra_args = 0;
- if (wc->start_sector)
+ if (wc->start_sector_set)
extra_args += 2;
if (wc->high_wm_percent_set)
extra_args += 2;
@@ -2262,26 +2341,18 @@
extra_args++;
DMEMIT("%u", extra_args);
- if (wc->start_sector)
+ if (wc->start_sector_set)
DMEMIT(" start_sector %llu", (unsigned long long)wc->start_sector);
- if (wc->high_wm_percent_set) {
- x = (uint64_t)wc->freelist_high_watermark * 100;
- x += wc->n_blocks / 2;
- do_div(x, (size_t)wc->n_blocks);
- DMEMIT(" high_watermark %u", 100 - (unsigned)x);
- }
- if (wc->low_wm_percent_set) {
- x = (uint64_t)wc->freelist_low_watermark * 100;
- x += wc->n_blocks / 2;
- do_div(x, (size_t)wc->n_blocks);
- DMEMIT(" low_watermark %u", 100 - (unsigned)x);
- }
+ if (wc->high_wm_percent_set)
+ DMEMIT(" high_watermark %u", wc->high_wm_percent_value);
+ if (wc->low_wm_percent_set)
+ DMEMIT(" low_watermark %u", wc->low_wm_percent_value);
if (wc->max_writeback_jobs_set)
DMEMIT(" writeback_jobs %u", wc->max_writeback_jobs);
if (wc->autocommit_blocks_set)
DMEMIT(" autocommit_blocks %u", wc->autocommit_blocks);
if (wc->autocommit_time_set)
- DMEMIT(" autocommit_time %u", jiffies_to_msecs(wc->autocommit_jiffies));
+ DMEMIT(" autocommit_time %u", wc->autocommit_time_value);
if (wc->writeback_fua_set)
DMEMIT(" %sfua", wc->writeback_fua ? "" : "no");
break;