Update Linux to v5.10.109
Sourced from [1]
[1] https://cdn.kernel.org/pub/linux/kernel/v5.x/linux-5.10.109.tar.xz
Change-Id: I19bca9fc6762d4e63bcf3e4cba88bbe560d9c76c
Signed-off-by: Olivier Deprez <olivier.deprez@arm.com>
diff --git a/drivers/nvdimm/Kconfig b/drivers/nvdimm/Kconfig
index 36af7af..b7d1eb3 100644
--- a/drivers/nvdimm/Kconfig
+++ b/drivers/nvdimm/Kconfig
@@ -4,6 +4,7 @@
depends on PHYS_ADDR_T_64BIT
depends on HAS_IOMEM
depends on BLK_DEV
+ select MEMREGION
help
Generic support for non-volatile memory devices including
ACPI-6-NFIT defined resources. On platforms that define an
diff --git a/drivers/nvdimm/badrange.c b/drivers/nvdimm/badrange.c
index b9eeefa..aaf6e21 100644
--- a/drivers/nvdimm/badrange.c
+++ b/drivers/nvdimm/badrange.c
@@ -211,7 +211,7 @@
}
static void badblocks_populate(struct badrange *badrange,
- struct badblocks *bb, const struct resource *res)
+ struct badblocks *bb, const struct range *range)
{
struct badrange_entry *bre;
@@ -222,34 +222,34 @@
u64 bre_end = bre->start + bre->length - 1;
/* Discard intervals with no intersection */
- if (bre_end < res->start)
+ if (bre_end < range->start)
continue;
- if (bre->start > res->end)
+ if (bre->start > range->end)
continue;
/* Deal with any overlap after start of the namespace */
- if (bre->start >= res->start) {
+ if (bre->start >= range->start) {
u64 start = bre->start;
u64 len;
- if (bre_end <= res->end)
+ if (bre_end <= range->end)
len = bre->length;
else
- len = res->start + resource_size(res)
+ len = range->start + range_len(range)
- bre->start;
- __add_badblock_range(bb, start - res->start, len);
+ __add_badblock_range(bb, start - range->start, len);
continue;
}
/*
* Deal with overlap for badrange starting before
* the namespace.
*/
- if (bre->start < res->start) {
+ if (bre->start < range->start) {
u64 len;
- if (bre_end < res->end)
- len = bre->start + bre->length - res->start;
+ if (bre_end < range->end)
+ len = bre->start + bre->length - range->start;
else
- len = resource_size(res);
+ len = range_len(range);
__add_badblock_range(bb, 0, len);
}
}
@@ -267,7 +267,7 @@
* and add badblocks entries for all matching sub-ranges
*/
void nvdimm_badblocks_populate(struct nd_region *nd_region,
- struct badblocks *bb, const struct resource *res)
+ struct badblocks *bb, const struct range *range)
{
struct nvdimm_bus *nvdimm_bus;
@@ -279,7 +279,7 @@
nvdimm_bus = walk_to_nvdimm_bus(&nd_region->dev);
nvdimm_bus_lock(&nvdimm_bus->dev);
- badblocks_populate(&nvdimm_bus->badrange, bb, res);
+ badblocks_populate(&nvdimm_bus->badrange, bb, range);
nvdimm_bus_unlock(&nvdimm_bus->dev);
}
EXPORT_SYMBOL_GPL(nvdimm_badblocks_populate);
diff --git a/drivers/nvdimm/blk.c b/drivers/nvdimm/blk.c
index 677d6f4..22e5617 100644
--- a/drivers/nvdimm/blk.c
+++ b/drivers/nvdimm/blk.c
@@ -162,10 +162,10 @@
return err;
}
-static blk_qc_t nd_blk_make_request(struct request_queue *q, struct bio *bio)
+static blk_qc_t nd_blk_submit_bio(struct bio *bio)
{
struct bio_integrity_payload *bip;
- struct nd_namespace_blk *nsblk;
+ struct nd_namespace_blk *nsblk = bio->bi_disk->private_data;
struct bvec_iter iter;
unsigned long start;
struct bio_vec bvec;
@@ -176,9 +176,10 @@
return BLK_QC_T_NONE;
bip = bio_integrity(bio);
- nsblk = q->queuedata;
rw = bio_data_dir(bio);
- do_acct = nd_iostat_start(bio, &start);
+ do_acct = blk_queue_io_stat(bio->bi_disk->queue);
+ if (do_acct)
+ start = bio_start_io_acct(bio);
bio_for_each_segment(bvec, bio, iter) {
unsigned int len = bvec.bv_len;
@@ -195,7 +196,7 @@
}
}
if (do_acct)
- nd_iostat_end(bio, start);
+ bio_end_io_acct(bio, start);
bio_endio(bio);
return BLK_QC_T_NONE;
@@ -224,7 +225,7 @@
static const struct block_device_operations nd_blk_fops = {
.owner = THIS_MODULE,
- .revalidate_disk = nvdimm_revalidate_disk,
+ .submit_bio = nd_blk_submit_bio,
};
static void nd_blk_release_queue(void *q)
@@ -249,17 +250,15 @@
internal_nlba = div_u64(nsblk->size, nsblk_internal_lbasize(nsblk));
available_disk_size = internal_nlba * nsblk_sector_size(nsblk);
- q = blk_alloc_queue(GFP_KERNEL);
+ q = blk_alloc_queue(NUMA_NO_NODE);
if (!q)
return -ENOMEM;
if (devm_add_action_or_reset(dev, nd_blk_release_queue, q))
return -ENOMEM;
- blk_queue_make_request(q, nd_blk_make_request);
blk_queue_max_hw_sectors(q, UINT_MAX);
blk_queue_logical_block_size(q, nsblk_sector_size(nsblk));
blk_queue_flag_set(QUEUE_FLAG_NONROT, q);
- q->queuedata = nsblk;
disk = alloc_disk(0);
if (!disk)
@@ -269,6 +268,7 @@
disk->fops = &nd_blk_fops;
disk->queue = q;
disk->flags = GENHD_FL_EXT_DEVT;
+ disk->private_data = nsblk;
nvdimm_namespace_disk_name(&nsblk->common, disk->disk_name);
if (devm_add_action_or_reset(dev, nd_blk_release_disk, disk))
@@ -283,7 +283,7 @@
set_capacity(disk, available_disk_size >> SECTOR_SHIFT);
device_add_disk(dev, disk, NULL);
- revalidate_disk(disk);
+ nvdimm_check_and_set_ro(disk);
return 0;
}
diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c
index 5129543..12ff6f8 100644
--- a/drivers/nvdimm/btt.c
+++ b/drivers/nvdimm/btt.c
@@ -1439,10 +1439,10 @@
return ret;
}
-static blk_qc_t btt_make_request(struct request_queue *q, struct bio *bio)
+static blk_qc_t btt_submit_bio(struct bio *bio)
{
struct bio_integrity_payload *bip = bio_integrity(bio);
- struct btt *btt = q->queuedata;
+ struct btt *btt = bio->bi_disk->private_data;
struct bvec_iter iter;
unsigned long start;
struct bio_vec bvec;
@@ -1452,7 +1452,9 @@
if (!bio_integrity_prep(bio))
return BLK_QC_T_NONE;
- do_acct = nd_iostat_start(bio, &start);
+ do_acct = blk_queue_io_stat(bio->bi_disk->queue);
+ if (do_acct)
+ start = bio_start_io_acct(bio);
bio_for_each_segment(bvec, bio, iter) {
unsigned int len = bvec.bv_len;
@@ -1477,7 +1479,7 @@
}
}
if (do_acct)
- nd_iostat_end(bio, start);
+ bio_end_io_acct(bio, start);
bio_endio(bio);
return BLK_QC_T_NONE;
@@ -1488,10 +1490,8 @@
{
struct btt *btt = bdev->bd_disk->private_data;
int rc;
- unsigned int len;
- len = hpage_nr_pages(page) * PAGE_SIZE;
- rc = btt_do_bvec(btt, NULL, page, len, 0, op, sector);
+ rc = btt_do_bvec(btt, NULL, page, thp_size(page), 0, op, sector);
if (rc == 0)
page_endio(page, op_is_write(op), 0);
@@ -1510,9 +1510,9 @@
static const struct block_device_operations btt_fops = {
.owner = THIS_MODULE,
+ .submit_bio = btt_submit_bio,
.rw_page = btt_rw_page,
.getgeo = btt_getgeo,
- .revalidate_disk = nvdimm_revalidate_disk,
};
static int btt_blk_init(struct btt *btt)
@@ -1521,7 +1521,7 @@
struct nd_namespace_common *ndns = nd_btt->ndns;
/* create a new disk and request queue for btt */
- btt->btt_queue = blk_alloc_queue(GFP_KERNEL);
+ btt->btt_queue = blk_alloc_queue(NUMA_NO_NODE);
if (!btt->btt_queue)
return -ENOMEM;
@@ -1537,14 +1537,10 @@
btt->btt_disk->private_data = btt;
btt->btt_disk->queue = btt->btt_queue;
btt->btt_disk->flags = GENHD_FL_EXT_DEVT;
- btt->btt_disk->queue->backing_dev_info->capabilities |=
- BDI_CAP_SYNCHRONOUS_IO;
- blk_queue_make_request(btt->btt_queue, btt_make_request);
blk_queue_logical_block_size(btt->btt_queue, btt->sector_size);
blk_queue_max_hw_sectors(btt->btt_queue, UINT_MAX);
blk_queue_flag_set(QUEUE_FLAG_NONROT, btt->btt_queue);
- btt->btt_queue->queuedata = btt;
if (btt_meta_size(btt)) {
int rc = nd_integrity_init(btt->btt_disk, btt_meta_size(btt));
@@ -1559,7 +1555,7 @@
set_capacity(btt->btt_disk, btt->nlba * btt->sector_size >> 9);
device_add_disk(&btt->nd_btt->dev, btt->btt_disk, NULL);
btt->nd_btt->size = btt->nlba * (u64)btt->sector_size;
- revalidate_disk(btt->btt_disk);
+ nvdimm_check_and_set_ro(btt->btt_disk);
return 0;
}
@@ -1674,7 +1670,8 @@
struct nd_region *nd_region;
struct btt_sb *btt_sb;
struct btt *btt;
- size_t rawsize;
+ size_t size, rawsize;
+ int rc;
if (!nd_btt->uuid || !nd_btt->ndns || !nd_btt->lbasize) {
dev_dbg(&nd_btt->dev, "incomplete btt configuration\n");
@@ -1685,6 +1682,11 @@
if (!btt_sb)
return -ENOMEM;
+ size = nvdimm_namespace_capacity(ndns);
+ rc = devm_namespace_enable(&nd_btt->dev, ndns, size);
+ if (rc)
+ return rc;
+
/*
* If this returns < 0, that is ok as it just means there wasn't
* an existing BTT, and we're creating a new one. We still need to
@@ -1693,7 +1695,7 @@
*/
nd_btt_version(nd_btt, ndns, btt_sb);
- rawsize = nvdimm_namespace_capacity(ndns) - nd_btt->initial_offset;
+ rawsize = size - nd_btt->initial_offset;
if (rawsize < ARENA_MIN_SIZE) {
dev_dbg(&nd_btt->dev, "%s must be at least %ld bytes\n",
dev_name(&ndns->dev),
diff --git a/drivers/nvdimm/btt_devs.c b/drivers/nvdimm/btt_devs.c
index 3508a79..05feb97 100644
--- a/drivers/nvdimm/btt_devs.c
+++ b/drivers/nvdimm/btt_devs.c
@@ -25,17 +25,6 @@
kfree(nd_btt);
}
-static struct device_type nd_btt_device_type = {
- .name = "nd_btt",
- .release = nd_btt_release,
-};
-
-bool is_nd_btt(struct device *dev)
-{
- return dev->type == &nd_btt_device_type;
-}
-EXPORT_SYMBOL(is_nd_btt);
-
struct nd_btt *to_nd_btt(struct device *dev)
{
struct nd_btt *nd_btt = container_of(dev, struct nd_btt, dev);
@@ -178,6 +167,18 @@
NULL,
};
+static const struct device_type nd_btt_device_type = {
+ .name = "nd_btt",
+ .release = nd_btt_release,
+ .groups = nd_btt_attribute_groups,
+};
+
+bool is_nd_btt(struct device *dev)
+{
+ return dev->type == &nd_btt_device_type;
+}
+EXPORT_SYMBOL(is_nd_btt);
+
static struct device *__nd_btt_create(struct nd_region *nd_region,
unsigned long lbasize, u8 *uuid,
struct nd_namespace_common *ndns)
@@ -204,7 +205,6 @@
dev_set_name(dev, "btt%d.%d", nd_region->id, nd_btt->id);
dev->parent = &nd_region->dev;
dev->type = &nd_btt_device_type;
- dev->groups = nd_btt_attribute_groups;
device_initialize(&nd_btt->dev);
if (ndns && !__nd_attach_ndns(&nd_btt->dev, ndns, &nd_btt->ndns)) {
dev_dbg(&ndns->dev, "failed, already claimed by %s\n",
diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c
index 5e5c6aa..2304c61 100644
--- a/drivers/nvdimm/bus.c
+++ b/drivers/nvdimm/bus.c
@@ -300,9 +300,14 @@
kfree(nvdimm_bus);
}
+static const struct device_type nvdimm_bus_dev_type = {
+ .release = nvdimm_bus_release,
+ .groups = nvdimm_bus_attribute_groups,
+};
+
bool is_nvdimm_bus(struct device *dev)
{
- return dev->release == nvdimm_bus_release;
+ return dev->type == &nvdimm_bus_dev_type;
}
struct nvdimm_bus *walk_to_nvdimm_bus(struct device *nd_dev)
@@ -355,7 +360,7 @@
badrange_init(&nvdimm_bus->badrange);
nvdimm_bus->nd_desc = nd_desc;
nvdimm_bus->dev.parent = parent;
- nvdimm_bus->dev.release = nvdimm_bus_release;
+ nvdimm_bus->dev.type = &nvdimm_bus_dev_type;
nvdimm_bus->dev.groups = nd_desc->attr_groups;
nvdimm_bus->dev.bus = &nvdimm_bus_type;
nvdimm_bus->dev.of_node = nd_desc->of_node;
@@ -623,7 +628,7 @@
}
EXPORT_SYMBOL(__nd_driver_register);
-int nvdimm_revalidate_disk(struct gendisk *disk)
+void nvdimm_check_and_set_ro(struct gendisk *disk)
{
struct device *dev = disk_to_dev(disk)->parent;
struct nd_region *nd_region = to_nd_region(dev->parent);
@@ -634,16 +639,13 @@
* read-only if the disk is already read-only.
*/
if (disk_ro || nd_region->ro == disk_ro)
- return 0;
+ return;
dev_info(dev, "%s read-only, marking %s read-only\n",
dev_name(&nd_region->dev), disk->disk_name);
set_disk_ro(disk, 1);
-
- return 0;
-
}
-EXPORT_SYMBOL(nvdimm_revalidate_disk);
+EXPORT_SYMBOL(nvdimm_check_and_set_ro);
static ssize_t modalias_show(struct device *dev, struct device_attribute *attr,
char *buf)
@@ -669,10 +671,9 @@
/*
* nd_device_attribute_group - generic attributes for all devices on an nd bus
*/
-struct attribute_group nd_device_attribute_group = {
+const struct attribute_group nd_device_attribute_group = {
.attrs = nd_device_attributes,
};
-EXPORT_SYMBOL_GPL(nd_device_attribute_group);
static ssize_t numa_node_show(struct device *dev,
struct device_attribute *attr, char *buf)
@@ -681,28 +682,56 @@
}
static DEVICE_ATTR_RO(numa_node);
+static int nvdimm_dev_to_target_node(struct device *dev)
+{
+ struct device *parent = dev->parent;
+ struct nd_region *nd_region = NULL;
+
+ if (is_nd_region(dev))
+ nd_region = to_nd_region(dev);
+ else if (parent && is_nd_region(parent))
+ nd_region = to_nd_region(parent);
+
+ if (!nd_region)
+ return NUMA_NO_NODE;
+ return nd_region->target_node;
+}
+
+static ssize_t target_node_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ return sprintf(buf, "%d\n", nvdimm_dev_to_target_node(dev));
+}
+static DEVICE_ATTR_RO(target_node);
+
static struct attribute *nd_numa_attributes[] = {
&dev_attr_numa_node.attr,
+ &dev_attr_target_node.attr,
NULL,
};
static umode_t nd_numa_attr_visible(struct kobject *kobj, struct attribute *a,
int n)
{
+ struct device *dev = container_of(kobj, typeof(*dev), kobj);
+
if (!IS_ENABLED(CONFIG_NUMA))
return 0;
+ if (a == &dev_attr_target_node.attr &&
+ nvdimm_dev_to_target_node(dev) == NUMA_NO_NODE)
+ return 0;
+
return a->mode;
}
/*
* nd_numa_attribute_group - NUMA attributes for all devices on an nd bus
*/
-struct attribute_group nd_numa_attribute_group = {
+const struct attribute_group nd_numa_attribute_group = {
.attrs = nd_numa_attributes,
.is_visible = nd_numa_attr_visible,
};
-EXPORT_SYMBOL_GPL(nd_numa_attribute_group);
int nvdimm_bus_create_ndctl(struct nvdimm_bus *nvdimm_bus)
{
@@ -1005,9 +1034,25 @@
dimm_name = "bus";
}
+ /* Validate command family support against bus declared support */
if (cmd == ND_CMD_CALL) {
+ unsigned long *mask;
+
if (copy_from_user(&pkg, p, sizeof(pkg)))
return -EFAULT;
+
+ if (nvdimm) {
+ if (pkg.nd_family > NVDIMM_FAMILY_MAX)
+ return -EINVAL;
+ mask = &nd_desc->dimm_family_mask;
+ } else {
+ if (pkg.nd_family > NVDIMM_BUS_FAMILY_MAX)
+ return -EINVAL;
+ mask = &nd_desc->bus_family_mask;
+ }
+
+ if (!test_bit(pkg.nd_family, mask))
+ return -EINVAL;
}
if (!desc ||
@@ -1229,7 +1274,7 @@
.owner = THIS_MODULE,
.open = nd_open,
.unlocked_ioctl = bus_ioctl,
- .compat_ioctl = bus_ioctl,
+ .compat_ioctl = compat_ptr_ioctl,
.llseek = noop_llseek,
};
@@ -1237,7 +1282,7 @@
.owner = THIS_MODULE,
.open = nd_open,
.unlocked_ioctl = dimm_ioctl,
- .compat_ioctl = dimm_ioctl,
+ .compat_ioctl = compat_ptr_ioctl,
.llseek = noop_llseek,
};
diff --git a/drivers/nvdimm/claim.c b/drivers/nvdimm/claim.c
index 2985ca9..5a7c800 100644
--- a/drivers/nvdimm/claim.c
+++ b/drivers/nvdimm/claim.c
@@ -268,7 +268,7 @@
if (rw == READ) {
if (unlikely(is_bad_pmem(&nsio->bb, sector, sz_align)))
return -EIO;
- if (memcpy_mcsafe(buf, nsio->addr + offset, size) != 0)
+ if (copy_mc_to_kernel(buf, nsio->addr + offset, size) != 0)
return -EIO;
return 0;
}
@@ -300,15 +300,19 @@
return rc;
}
-int devm_nsio_enable(struct device *dev, struct nd_namespace_io *nsio)
+int devm_nsio_enable(struct device *dev, struct nd_namespace_io *nsio,
+ resource_size_t size)
{
- struct resource *res = &nsio->res;
struct nd_namespace_common *ndns = &nsio->common;
+ struct range range = {
+ .start = nsio->res.start,
+ .end = nsio->res.end,
+ };
- nsio->size = resource_size(res);
- if (!devm_request_mem_region(dev, res->start, resource_size(res),
+ nsio->size = size;
+ if (!devm_request_mem_region(dev, range.start, size,
dev_name(&ndns->dev))) {
- dev_warn(dev, "could not reserve region %pR\n", res);
+ dev_warn(dev, "could not reserve region %pR\n", &nsio->res);
return -EBUSY;
}
@@ -316,14 +320,12 @@
if (devm_init_badblocks(dev, &nsio->bb))
return -ENOMEM;
nvdimm_badblocks_populate(to_nd_region(ndns->dev.parent), &nsio->bb,
- &nsio->res);
+ &range);
- nsio->addr = devm_memremap(dev, res->start, resource_size(res),
- ARCH_MEMREMAP_PMEM);
+ nsio->addr = devm_memremap(dev, range.start, size, ARCH_MEMREMAP_PMEM);
return PTR_ERR_OR_ZERO(nsio->addr);
}
-EXPORT_SYMBOL_GPL(devm_nsio_enable);
void devm_nsio_disable(struct device *dev, struct nd_namespace_io *nsio)
{
@@ -331,6 +333,5 @@
devm_memunmap(dev, nsio->addr);
devm_exit_badblocks(dev, &nsio->bb);
- devm_release_mem_region(dev, res->start, resource_size(res));
+ devm_release_mem_region(dev, res->start, nsio->size);
}
-EXPORT_SYMBOL_GPL(devm_nsio_disable);
diff --git a/drivers/nvdimm/core.c b/drivers/nvdimm/core.c
index 9204f1e..c21ba06 100644
--- a/drivers/nvdimm/core.c
+++ b/drivers/nvdimm/core.c
@@ -4,6 +4,7 @@
*/
#include <linux/libnvdimm.h>
#include <linux/badblocks.h>
+#include <linux/suspend.h>
#include <linux/export.h>
#include <linux/module.h>
#include <linux/blkdev.h>
@@ -385,10 +386,162 @@
NULL,
};
-struct attribute_group nvdimm_bus_attribute_group = {
+static const struct attribute_group nvdimm_bus_attribute_group = {
.attrs = nvdimm_bus_attributes,
};
-EXPORT_SYMBOL_GPL(nvdimm_bus_attribute_group);
+
+static ssize_t capability_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct nvdimm_bus *nvdimm_bus = to_nvdimm_bus(dev);
+ struct nvdimm_bus_descriptor *nd_desc = nvdimm_bus->nd_desc;
+ enum nvdimm_fwa_capability cap;
+
+ if (!nd_desc->fw_ops)
+ return -EOPNOTSUPP;
+
+ nvdimm_bus_lock(dev);
+ cap = nd_desc->fw_ops->capability(nd_desc);
+ nvdimm_bus_unlock(dev);
+
+ switch (cap) {
+ case NVDIMM_FWA_CAP_QUIESCE:
+ return sprintf(buf, "quiesce\n");
+ case NVDIMM_FWA_CAP_LIVE:
+ return sprintf(buf, "live\n");
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static DEVICE_ATTR_RO(capability);
+
+static ssize_t activate_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct nvdimm_bus *nvdimm_bus = to_nvdimm_bus(dev);
+ struct nvdimm_bus_descriptor *nd_desc = nvdimm_bus->nd_desc;
+ enum nvdimm_fwa_capability cap;
+ enum nvdimm_fwa_state state;
+
+ if (!nd_desc->fw_ops)
+ return -EOPNOTSUPP;
+
+ nvdimm_bus_lock(dev);
+ cap = nd_desc->fw_ops->capability(nd_desc);
+ state = nd_desc->fw_ops->activate_state(nd_desc);
+ nvdimm_bus_unlock(dev);
+
+ if (cap < NVDIMM_FWA_CAP_QUIESCE)
+ return -EOPNOTSUPP;
+
+ switch (state) {
+ case NVDIMM_FWA_IDLE:
+ return sprintf(buf, "idle\n");
+ case NVDIMM_FWA_BUSY:
+ return sprintf(buf, "busy\n");
+ case NVDIMM_FWA_ARMED:
+ return sprintf(buf, "armed\n");
+ case NVDIMM_FWA_ARM_OVERFLOW:
+ return sprintf(buf, "overflow\n");
+ default:
+ return -ENXIO;
+ }
+}
+
+static int exec_firmware_activate(void *data)
+{
+ struct nvdimm_bus_descriptor *nd_desc = data;
+
+ return nd_desc->fw_ops->activate(nd_desc);
+}
+
+static ssize_t activate_store(struct device *dev,
+ struct device_attribute *attr, const char *buf, size_t len)
+{
+ struct nvdimm_bus *nvdimm_bus = to_nvdimm_bus(dev);
+ struct nvdimm_bus_descriptor *nd_desc = nvdimm_bus->nd_desc;
+ enum nvdimm_fwa_state state;
+ bool quiesce;
+ ssize_t rc;
+
+ if (!nd_desc->fw_ops)
+ return -EOPNOTSUPP;
+
+ if (sysfs_streq(buf, "live"))
+ quiesce = false;
+ else if (sysfs_streq(buf, "quiesce"))
+ quiesce = true;
+ else
+ return -EINVAL;
+
+ nvdimm_bus_lock(dev);
+ state = nd_desc->fw_ops->activate_state(nd_desc);
+
+ switch (state) {
+ case NVDIMM_FWA_BUSY:
+ rc = -EBUSY;
+ break;
+ case NVDIMM_FWA_ARMED:
+ case NVDIMM_FWA_ARM_OVERFLOW:
+ if (quiesce)
+ rc = hibernate_quiet_exec(exec_firmware_activate, nd_desc);
+ else
+ rc = nd_desc->fw_ops->activate(nd_desc);
+ break;
+ case NVDIMM_FWA_IDLE:
+ default:
+ rc = -ENXIO;
+ }
+ nvdimm_bus_unlock(dev);
+
+ if (rc == 0)
+ rc = len;
+ return rc;
+}
+
+static DEVICE_ATTR_ADMIN_RW(activate);
+
+static umode_t nvdimm_bus_firmware_visible(struct kobject *kobj, struct attribute *a, int n)
+{
+ struct device *dev = container_of(kobj, typeof(*dev), kobj);
+ struct nvdimm_bus *nvdimm_bus = to_nvdimm_bus(dev);
+ struct nvdimm_bus_descriptor *nd_desc = nvdimm_bus->nd_desc;
+ enum nvdimm_fwa_capability cap;
+
+ /*
+ * Both 'activate' and 'capability' disappear when no ops
+ * detected, or a negative capability is indicated.
+ */
+ if (!nd_desc->fw_ops)
+ return 0;
+
+ nvdimm_bus_lock(dev);
+ cap = nd_desc->fw_ops->capability(nd_desc);
+ nvdimm_bus_unlock(dev);
+
+ if (cap < NVDIMM_FWA_CAP_QUIESCE)
+ return 0;
+
+ return a->mode;
+}
+static struct attribute *nvdimm_bus_firmware_attributes[] = {
+ &dev_attr_activate.attr,
+ &dev_attr_capability.attr,
+ NULL,
+};
+
+static const struct attribute_group nvdimm_bus_firmware_attribute_group = {
+ .name = "firmware",
+ .attrs = nvdimm_bus_firmware_attributes,
+ .is_visible = nvdimm_bus_firmware_visible,
+};
+
+const struct attribute_group *nvdimm_bus_attribute_groups[] = {
+ &nvdimm_bus_attribute_group,
+ &nvdimm_bus_firmware_attribute_group,
+ NULL,
+};
int nvdimm_bus_add_badrange(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length)
{
@@ -455,7 +608,6 @@
nd_region_exit();
nvdimm_exit();
nvdimm_bus_exit();
- nd_region_devs_exit();
nvdimm_devs_exit();
}
diff --git a/drivers/nvdimm/dax_devs.c b/drivers/nvdimm/dax_devs.c
index 6d22b0f..9996507 100644
--- a/drivers/nvdimm/dax_devs.c
+++ b/drivers/nvdimm/dax_devs.c
@@ -23,17 +23,6 @@
kfree(nd_dax);
}
-static struct device_type nd_dax_device_type = {
- .name = "nd_dax",
- .release = nd_dax_release,
-};
-
-bool is_nd_dax(struct device *dev)
-{
- return dev ? dev->type == &nd_dax_device_type : false;
-}
-EXPORT_SYMBOL(is_nd_dax);
-
struct nd_dax *to_nd_dax(struct device *dev)
{
struct nd_dax *nd_dax = container_of(dev, struct nd_dax, nd_pfn.dev);
@@ -43,13 +32,18 @@
}
EXPORT_SYMBOL(to_nd_dax);
-static const struct attribute_group *nd_dax_attribute_groups[] = {
- &nd_pfn_attribute_group,
- &nd_device_attribute_group,
- &nd_numa_attribute_group,
- NULL,
+static const struct device_type nd_dax_device_type = {
+ .name = "nd_dax",
+ .release = nd_dax_release,
+ .groups = nd_pfn_attribute_groups,
};
+bool is_nd_dax(struct device *dev)
+{
+ return dev ? dev->type == &nd_dax_device_type : false;
+}
+EXPORT_SYMBOL(is_nd_dax);
+
static struct nd_dax *nd_dax_alloc(struct nd_region *nd_region)
{
struct nd_pfn *nd_pfn;
@@ -69,7 +63,6 @@
dev = &nd_pfn->dev;
dev_set_name(dev, "dax%d.%d", nd_region->id, nd_pfn->id);
- dev->groups = nd_dax_attribute_groups;
dev->type = &nd_dax_device_type;
dev->parent = &nd_region->dev;
diff --git a/drivers/nvdimm/dimm.c b/drivers/nvdimm/dimm.c
index 64776ed..7d4ddc4 100644
--- a/drivers/nvdimm/dimm.c
+++ b/drivers/nvdimm/dimm.c
@@ -99,7 +99,7 @@
if (ndd->ns_current >= 0) {
rc = nd_label_reserve_dpa(ndd);
if (rc == 0)
- nvdimm_set_aliasing(dev);
+ nvdimm_set_labeling(dev);
}
nvdimm_bus_unlock(dev);
diff --git a/drivers/nvdimm/dimm_devs.c b/drivers/nvdimm/dimm_devs.c
index e0f4110..9d20857 100644
--- a/drivers/nvdimm/dimm_devs.c
+++ b/drivers/nvdimm/dimm_devs.c
@@ -32,7 +32,7 @@
if (!nvdimm->cmd_mask ||
!test_bit(ND_CMD_GET_CONFIG_DATA, &nvdimm->cmd_mask)) {
- if (test_bit(NDD_ALIASING, &nvdimm->flags))
+ if (test_bit(NDD_LABELING, &nvdimm->flags))
return -ENXIO;
else
return -ENOTTY;
@@ -173,11 +173,11 @@
return rc;
}
-void nvdimm_set_aliasing(struct device *dev)
+void nvdimm_set_labeling(struct device *dev)
{
struct nvdimm *nvdimm = to_nvdimm(dev);
- set_bit(NDD_ALIASING, &nvdimm->flags);
+ set_bit(NDD_LABELING, &nvdimm->flags);
}
void nvdimm_set_locked(struct device *dev)
@@ -202,16 +202,6 @@
kfree(nvdimm);
}
-static struct device_type nvdimm_device_type = {
- .name = "nvdimm",
- .release = nvdimm_release,
-};
-
-bool is_nvdimm(struct device *dev)
-{
- return dev->type == &nvdimm_device_type;
-}
-
struct nvdimm *to_nvdimm(struct device *dev)
{
struct nvdimm *nvdimm = container_of(dev, struct nvdimm, dev);
@@ -322,8 +312,9 @@
{
struct nvdimm *nvdimm = to_nvdimm(dev);
- return sprintf(buf, "%s%s\n",
+ return sprintf(buf, "%s%s%s\n",
test_bit(NDD_ALIASING, &nvdimm->flags) ? "alias " : "",
+ test_bit(NDD_LABELING, &nvdimm->flags) ? "label " : "",
test_bit(NDD_LOCKED, &nvdimm->flags) ? "lock " : "");
}
static DEVICE_ATTR_RO(flags);
@@ -384,14 +375,14 @@
{
struct nvdimm *nvdimm = to_nvdimm(dev);
+ if (test_bit(NVDIMM_SECURITY_OVERWRITE, &nvdimm->sec.flags))
+ return sprintf(buf, "overwrite\n");
if (test_bit(NVDIMM_SECURITY_DISABLED, &nvdimm->sec.flags))
return sprintf(buf, "disabled\n");
if (test_bit(NVDIMM_SECURITY_UNLOCKED, &nvdimm->sec.flags))
return sprintf(buf, "unlocked\n");
if (test_bit(NVDIMM_SECURITY_LOCKED, &nvdimm->sec.flags))
return sprintf(buf, "locked\n");
- if (test_bit(NVDIMM_SECURITY_OVERWRITE, &nvdimm->sec.flags))
- return sprintf(buf, "overwrite\n");
return -ENOTTY;
}
@@ -462,17 +453,150 @@
return 0;
}
-struct attribute_group nvdimm_attribute_group = {
+static const struct attribute_group nvdimm_attribute_group = {
.attrs = nvdimm_attributes,
.is_visible = nvdimm_visible,
};
-EXPORT_SYMBOL_GPL(nvdimm_attribute_group);
+
+static ssize_t result_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ struct nvdimm *nvdimm = to_nvdimm(dev);
+ enum nvdimm_fwa_result result;
+
+ if (!nvdimm->fw_ops)
+ return -EOPNOTSUPP;
+
+ nvdimm_bus_lock(dev);
+ result = nvdimm->fw_ops->activate_result(nvdimm);
+ nvdimm_bus_unlock(dev);
+
+ switch (result) {
+ case NVDIMM_FWA_RESULT_NONE:
+ return sprintf(buf, "none\n");
+ case NVDIMM_FWA_RESULT_SUCCESS:
+ return sprintf(buf, "success\n");
+ case NVDIMM_FWA_RESULT_FAIL:
+ return sprintf(buf, "fail\n");
+ case NVDIMM_FWA_RESULT_NOTSTAGED:
+ return sprintf(buf, "not_staged\n");
+ case NVDIMM_FWA_RESULT_NEEDRESET:
+ return sprintf(buf, "need_reset\n");
+ default:
+ return -ENXIO;
+ }
+}
+static DEVICE_ATTR_ADMIN_RO(result);
+
+static ssize_t activate_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ struct nvdimm *nvdimm = to_nvdimm(dev);
+ enum nvdimm_fwa_state state;
+
+ if (!nvdimm->fw_ops)
+ return -EOPNOTSUPP;
+
+ nvdimm_bus_lock(dev);
+ state = nvdimm->fw_ops->activate_state(nvdimm);
+ nvdimm_bus_unlock(dev);
+
+ switch (state) {
+ case NVDIMM_FWA_IDLE:
+ return sprintf(buf, "idle\n");
+ case NVDIMM_FWA_BUSY:
+ return sprintf(buf, "busy\n");
+ case NVDIMM_FWA_ARMED:
+ return sprintf(buf, "armed\n");
+ default:
+ return -ENXIO;
+ }
+}
+
+static ssize_t activate_store(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t len)
+{
+ struct nvdimm *nvdimm = to_nvdimm(dev);
+ enum nvdimm_fwa_trigger arg;
+ int rc;
+
+ if (!nvdimm->fw_ops)
+ return -EOPNOTSUPP;
+
+ if (sysfs_streq(buf, "arm"))
+ arg = NVDIMM_FWA_ARM;
+ else if (sysfs_streq(buf, "disarm"))
+ arg = NVDIMM_FWA_DISARM;
+ else
+ return -EINVAL;
+
+ nvdimm_bus_lock(dev);
+ rc = nvdimm->fw_ops->arm(nvdimm, arg);
+ nvdimm_bus_unlock(dev);
+
+ if (rc < 0)
+ return rc;
+ return len;
+}
+static DEVICE_ATTR_ADMIN_RW(activate);
+
+static struct attribute *nvdimm_firmware_attributes[] = {
+ &dev_attr_activate.attr,
+ &dev_attr_result.attr,
+ NULL,
+};
+
+static umode_t nvdimm_firmware_visible(struct kobject *kobj, struct attribute *a, int n)
+{
+ struct device *dev = container_of(kobj, typeof(*dev), kobj);
+ struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev);
+ struct nvdimm_bus_descriptor *nd_desc = nvdimm_bus->nd_desc;
+ struct nvdimm *nvdimm = to_nvdimm(dev);
+ enum nvdimm_fwa_capability cap;
+
+ if (!nd_desc->fw_ops)
+ return 0;
+ if (!nvdimm->fw_ops)
+ return 0;
+
+ nvdimm_bus_lock(dev);
+ cap = nd_desc->fw_ops->capability(nd_desc);
+ nvdimm_bus_unlock(dev);
+
+ if (cap < NVDIMM_FWA_CAP_QUIESCE)
+ return 0;
+
+ return a->mode;
+}
+
+static const struct attribute_group nvdimm_firmware_attribute_group = {
+ .name = "firmware",
+ .attrs = nvdimm_firmware_attributes,
+ .is_visible = nvdimm_firmware_visible,
+};
+
+static const struct attribute_group *nvdimm_attribute_groups[] = {
+ &nd_device_attribute_group,
+ &nvdimm_attribute_group,
+ &nvdimm_firmware_attribute_group,
+ NULL,
+};
+
+static const struct device_type nvdimm_device_type = {
+ .name = "nvdimm",
+ .release = nvdimm_release,
+ .groups = nvdimm_attribute_groups,
+};
+
+bool is_nvdimm(struct device *dev)
+{
+ return dev->type == &nvdimm_device_type;
+}
struct nvdimm *__nvdimm_create(struct nvdimm_bus *nvdimm_bus,
void *provider_data, const struct attribute_group **groups,
unsigned long flags, unsigned long cmd_mask, int num_flush,
struct resource *flush_wpq, const char *dimm_id,
- const struct nvdimm_security_ops *sec_ops)
+ const struct nvdimm_security_ops *sec_ops,
+ const struct nvdimm_fw_ops *fw_ops)
{
struct nvdimm *nvdimm = kzalloc(sizeof(*nvdimm), GFP_KERNEL);
struct device *dev;
@@ -502,6 +626,7 @@
dev->devt = MKDEV(nvdimm_major, nvdimm->id);
dev->groups = groups;
nvdimm->sec.ops = sec_ops;
+ nvdimm->fw_ops = fw_ops;
nvdimm->sec.overwrite_tmo = 0;
INIT_DELAYED_WORK(&nvdimm->dwork, nvdimm_security_overwrite_query);
/*
@@ -568,6 +693,21 @@
return rc;
}
+static unsigned long dpa_align(struct nd_region *nd_region)
+{
+ struct device *dev = &nd_region->dev;
+
+ if (dev_WARN_ONCE(dev, !is_nvdimm_bus_locked(dev),
+ "bus lock required for capacity provision\n"))
+ return 0;
+ if (dev_WARN_ONCE(dev, !nd_region->ndr_mappings || nd_region->align
+ % nd_region->ndr_mappings,
+ "invalid region align %#lx mappings: %d\n",
+ nd_region->align, nd_region->ndr_mappings))
+ return 0;
+ return nd_region->align / nd_region->ndr_mappings;
+}
+
int alias_dpa_busy(struct device *dev, void *data)
{
resource_size_t map_end, blk_start, new;
@@ -576,6 +716,7 @@
struct nd_region *nd_region;
struct nvdimm_drvdata *ndd;
struct resource *res;
+ unsigned long align;
int i;
if (!is_memory(dev))
@@ -613,13 +754,21 @@
* Find the free dpa from the end of the last pmem allocation to
* the end of the interleave-set mapping.
*/
+ align = dpa_align(nd_region);
+ if (!align)
+ return 0;
+
for_each_dpa_resource(ndd, res) {
+ resource_size_t start, end;
+
if (strncmp(res->name, "pmem", 4) != 0)
continue;
- if ((res->start >= blk_start && res->start < map_end)
- || (res->end >= blk_start
- && res->end <= map_end)) {
- new = max(blk_start, min(map_end + 1, res->end + 1));
+
+ start = ALIGN_DOWN(res->start, align);
+ end = ALIGN(res->end + 1, align) - 1;
+ if ((start >= blk_start && start < map_end)
+ || (end >= blk_start && end <= map_end)) {
+ new = max(blk_start, min(map_end, end) + 1);
if (new != blk_start) {
blk_start = new;
goto retry;
@@ -659,6 +808,7 @@
.res = NULL,
};
struct resource *res;
+ unsigned long align;
if (!ndd)
return 0;
@@ -666,10 +816,20 @@
device_for_each_child(&nvdimm_bus->dev, &info, alias_dpa_busy);
/* now account for busy blk allocations in unaliased dpa */
+ align = dpa_align(nd_region);
+ if (!align)
+ return 0;
for_each_dpa_resource(ndd, res) {
+ resource_size_t start, end, size;
+
if (strncmp(res->name, "blk", 3) != 0)
continue;
- info.available -= resource_size(res);
+ start = ALIGN_DOWN(res->start, align);
+ end = ALIGN(res->end + 1, align) - 1;
+ size = end - start + 1;
+ if (size >= info.available)
+ return 0;
+ info.available -= size;
}
return info.available;
@@ -688,19 +848,31 @@
struct nvdimm_bus *nvdimm_bus;
resource_size_t max = 0;
struct resource *res;
+ unsigned long align;
/* if a dimm is disabled the available capacity is zero */
if (!ndd)
return 0;
+ align = dpa_align(nd_region);
+ if (!align)
+ return 0;
+
nvdimm_bus = walk_to_nvdimm_bus(ndd->dev);
if (__reserve_free_pmem(&nd_region->dev, nd_mapping->nvdimm))
return 0;
for_each_dpa_resource(ndd, res) {
+ resource_size_t start, end;
+
if (strcmp(res->name, "pmem-reserve") != 0)
continue;
- if (resource_size(res) > max)
- max = resource_size(res);
+ /* trim free space relative to current alignment setting */
+ start = ALIGN(res->start, align);
+ end = ALIGN_DOWN(res->end + 1, align) - 1;
+ if (end < start)
+ continue;
+ if (end - start + 1 > max)
+ max = end - start + 1;
}
release_free_pmem(nvdimm_bus, nd_mapping);
return max;
@@ -728,24 +900,33 @@
struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
struct resource *res;
const char *reason;
+ unsigned long align;
if (!ndd)
return 0;
+ align = dpa_align(nd_region);
+ if (!align)
+ return 0;
+
map_start = nd_mapping->start;
map_end = map_start + nd_mapping->size - 1;
blk_start = max(map_start, map_end + 1 - *overlap);
for_each_dpa_resource(ndd, res) {
- if (res->start >= map_start && res->start < map_end) {
+ resource_size_t start, end;
+
+ start = ALIGN_DOWN(res->start, align);
+ end = ALIGN(res->end + 1, align) - 1;
+ if (start >= map_start && start < map_end) {
if (strncmp(res->name, "blk", 3) == 0)
blk_start = min(blk_start,
- max(map_start, res->start));
- else if (res->end > map_end) {
+ max(map_start, start));
+ else if (end > map_end) {
reason = "misaligned to iset";
goto err;
} else
- busy += resource_size(res);
- } else if (res->end >= map_start && res->end <= map_end) {
+ busy += end - start + 1;
+ } else if (end >= map_start && end <= map_end) {
if (strncmp(res->name, "blk", 3) == 0) {
/*
* If a BLK allocation overlaps the start of
@@ -754,8 +935,8 @@
*/
blk_start = map_start;
} else
- busy += resource_size(res);
- } else if (map_start > res->start && map_start < res->end) {
+ busy += end - start + 1;
+ } else if (map_start > start && map_start < end) {
/* total eclipse of the mapping */
busy += nd_mapping->size;
blk_start = map_start;
@@ -765,7 +946,7 @@
*overlap = map_end + 1 - blk_start;
available = blk_start - map_start;
if (busy < available)
- return available - busy;
+ return ALIGN_DOWN(available - busy, align);
return 0;
err:
diff --git a/drivers/nvdimm/e820.c b/drivers/nvdimm/e820.c
index 87f72f7..4cd18be 100644
--- a/drivers/nvdimm/e820.c
+++ b/drivers/nvdimm/e820.c
@@ -7,17 +7,7 @@
#include <linux/memory_hotplug.h>
#include <linux/libnvdimm.h>
#include <linux/module.h>
-
-static const struct attribute_group *e820_pmem_attribute_groups[] = {
- &nvdimm_bus_attribute_group,
- NULL,
-};
-
-static const struct attribute_group *e820_pmem_region_attribute_groups[] = {
- &nd_region_attribute_group,
- &nd_device_attribute_group,
- NULL,
-};
+#include <linux/numa.h>
static int e820_pmem_remove(struct platform_device *pdev)
{
@@ -27,28 +17,16 @@
return 0;
}
-#ifdef CONFIG_MEMORY_HOTPLUG
-static int e820_range_to_nid(resource_size_t addr)
-{
- return memory_add_physaddr_to_nid(addr);
-}
-#else
-static int e820_range_to_nid(resource_size_t addr)
-{
- return NUMA_NO_NODE;
-}
-#endif
-
static int e820_register_one(struct resource *res, void *data)
{
struct nd_region_desc ndr_desc;
struct nvdimm_bus *nvdimm_bus = data;
+ int nid = phys_to_target_node(res->start);
memset(&ndr_desc, 0, sizeof(ndr_desc));
ndr_desc.res = res;
- ndr_desc.attr_groups = e820_pmem_region_attribute_groups;
- ndr_desc.numa_node = e820_range_to_nid(res->start);
- ndr_desc.target_node = ndr_desc.numa_node;
+ ndr_desc.numa_node = numa_map_to_online_node(nid);
+ ndr_desc.target_node = nid;
set_bit(ND_REGION_PAGEMAP, &ndr_desc.flags);
if (!nvdimm_pmem_region_create(nvdimm_bus, &ndr_desc))
return -ENXIO;
@@ -62,7 +40,6 @@
struct nvdimm_bus *nvdimm_bus;
int rc = -ENXIO;
- nd_desc.attr_groups = e820_pmem_attribute_groups;
nd_desc.provider_name = "e820";
nd_desc.module = THIS_MODULE;
nvdimm_bus = nvdimm_bus_register(dev, &nd_desc);
diff --git a/drivers/nvdimm/label.h b/drivers/nvdimm/label.h
index 4c7b775..956b6d1 100644
--- a/drivers/nvdimm/label.h
+++ b/drivers/nvdimm/label.h
@@ -62,7 +62,7 @@
__le16 major;
__le16 minor;
__le64 checksum;
- u8 free[0];
+ u8 free[];
};
/**
diff --git a/drivers/nvdimm/namespace_devs.c b/drivers/nvdimm/namespace_devs.c
index e6da7b2..7454782 100644
--- a/drivers/nvdimm/namespace_devs.c
+++ b/drivers/nvdimm/namespace_devs.c
@@ -10,6 +10,7 @@
#include <linux/nd.h>
#include "nd-core.h"
#include "pmem.h"
+#include "pfn.h"
#include "nd.h"
static void namespace_io_release(struct device *dev)
@@ -44,35 +45,9 @@
kfree(nsblk);
}
-static const struct device_type namespace_io_device_type = {
- .name = "nd_namespace_io",
- .release = namespace_io_release,
-};
-
-static const struct device_type namespace_pmem_device_type = {
- .name = "nd_namespace_pmem",
- .release = namespace_pmem_release,
-};
-
-static const struct device_type namespace_blk_device_type = {
- .name = "nd_namespace_blk",
- .release = namespace_blk_release,
-};
-
-static bool is_namespace_pmem(const struct device *dev)
-{
- return dev ? dev->type == &namespace_pmem_device_type : false;
-}
-
-static bool is_namespace_blk(const struct device *dev)
-{
- return dev ? dev->type == &namespace_blk_device_type : false;
-}
-
-static bool is_namespace_io(const struct device *dev)
-{
- return dev ? dev->type == &namespace_io_device_type : false;
-}
+static bool is_namespace_pmem(const struct device *dev);
+static bool is_namespace_blk(const struct device *dev);
+static bool is_namespace_io(const struct device *dev);
static int is_uuid_busy(struct device *dev, void *data)
{
@@ -567,6 +542,11 @@
{
bool is_reserve = strcmp(label_id->id, "pmem-reserve") == 0;
bool is_pmem = strncmp(label_id->id, "pmem", 4) == 0;
+ unsigned long align;
+
+ align = nd_region->align / nd_region->ndr_mappings;
+ valid->start = ALIGN(valid->start, align);
+ valid->end = ALIGN_DOWN(valid->end + 1, align) - 1;
if (valid->start >= valid->end)
goto invalid;
@@ -1006,10 +986,10 @@
return -ENXIO;
}
- div_u64_rem(val, PAGE_SIZE * nd_region->ndr_mappings, &remainder);
+ div_u64_rem(val, nd_region->align, &remainder);
if (remainder) {
dev_dbg(dev, "%llu is not %ldK aligned\n", val,
- (PAGE_SIZE * nd_region->ndr_mappings) / SZ_1K);
+ nd_region->align / SZ_1K);
return -EINVAL;
}
@@ -1329,7 +1309,7 @@
return -ENXIO;
return sprintf(buf, "%#llx\n", (unsigned long long) res->start);
}
-static DEVICE_ATTR_RO(resource);
+static DEVICE_ATTR_ADMIN_RO(resource);
static const unsigned long blk_lbasize_supported[] = { 512, 520, 528,
4096, 4104, 4160, 4224, 0 };
@@ -1510,16 +1490,20 @@
}
static DEVICE_ATTR_RO(holder);
-static ssize_t __holder_class_store(struct device *dev, const char *buf)
+static int __holder_class_store(struct device *dev, const char *buf)
{
struct nd_namespace_common *ndns = to_ndns(dev);
if (dev->driver || ndns->claim)
return -EBUSY;
- if (sysfs_streq(buf, "btt"))
- ndns->claim_class = btt_claim_class(dev);
- else if (sysfs_streq(buf, "pfn"))
+ if (sysfs_streq(buf, "btt")) {
+ int rc = btt_claim_class(dev);
+
+ if (rc < NVDIMM_CCLASS_NONE)
+ return rc;
+ ndns->claim_class = rc;
+ } else if (sysfs_streq(buf, "pfn"))
ndns->claim_class = NVDIMM_CCLASS_PFN;
else if (sysfs_streq(buf, "dax"))
ndns->claim_class = NVDIMM_CCLASS_DAX;
@@ -1528,10 +1512,6 @@
else
return -EINVAL;
- /* btt_claim_class() could've returned an error */
- if (ndns->claim_class < 0)
- return ndns->claim_class;
-
return 0;
}
@@ -1539,7 +1519,7 @@
struct device_attribute *attr, const char *buf, size_t len)
{
struct nd_region *nd_region = to_nd_region(dev->parent);
- ssize_t rc;
+ int rc;
nd_device_lock(dev);
nvdimm_bus_lock(dev);
@@ -1547,7 +1527,7 @@
rc = __holder_class_store(dev, buf);
if (rc >= 0)
rc = nd_namespace_label_update(nd_region, dev);
- dev_dbg(dev, "%s(%zd)\n", rc < 0 ? "fail " : "", rc);
+ dev_dbg(dev, "%s(%d)\n", rc < 0 ? "fail " : "", rc);
nvdimm_bus_unlock(dev);
nd_device_unlock(dev);
@@ -1645,11 +1625,8 @@
{
struct device *dev = container_of(kobj, struct device, kobj);
- if (a == &dev_attr_resource.attr) {
- if (is_namespace_blk(dev))
- return 0;
- return 0400;
- }
+ if (a == &dev_attr_resource.attr && is_namespace_blk(dev))
+ return 0;
if (is_namespace_pmem(dev) || is_namespace_blk(dev)) {
if (a == &dev_attr_size.attr)
@@ -1658,11 +1635,11 @@
return a->mode;
}
- if (a == &dev_attr_nstype.attr || a == &dev_attr_size.attr
- || a == &dev_attr_holder.attr
- || a == &dev_attr_holder_class.attr
- || a == &dev_attr_force_raw.attr
- || a == &dev_attr_mode.attr)
+ /* base is_namespace_io() attributes */
+ if (a == &dev_attr_nstype.attr || a == &dev_attr_size.attr ||
+ a == &dev_attr_holder.attr || a == &dev_attr_holder_class.attr ||
+ a == &dev_attr_force_raw.attr || a == &dev_attr_mode.attr ||
+ a == &dev_attr_resource.attr)
return a->mode;
return 0;
@@ -1680,6 +1657,39 @@
NULL,
};
+static const struct device_type namespace_io_device_type = {
+ .name = "nd_namespace_io",
+ .release = namespace_io_release,
+ .groups = nd_namespace_attribute_groups,
+};
+
+static const struct device_type namespace_pmem_device_type = {
+ .name = "nd_namespace_pmem",
+ .release = namespace_pmem_release,
+ .groups = nd_namespace_attribute_groups,
+};
+
+static const struct device_type namespace_blk_device_type = {
+ .name = "nd_namespace_blk",
+ .release = namespace_blk_release,
+ .groups = nd_namespace_attribute_groups,
+};
+
+static bool is_namespace_pmem(const struct device *dev)
+{
+ return dev ? dev->type == &namespace_pmem_device_type : false;
+}
+
+static bool is_namespace_blk(const struct device *dev)
+{
+ return dev ? dev->type == &namespace_blk_device_type : false;
+}
+
+static bool is_namespace_io(const struct device *dev)
+{
+ return dev ? dev->type == &namespace_io_device_type : false;
+}
+
struct nd_namespace_common *nvdimm_namespace_common_probe(struct device *dev)
{
struct nd_btt *nd_btt = is_nd_btt(dev) ? to_nd_btt(dev) : NULL;
@@ -1735,6 +1745,22 @@
return ERR_PTR(-ENODEV);
}
+ /*
+ * Note, alignment validation for fsdax and devdax mode
+ * namespaces happens in nd_pfn_validate() where infoblock
+ * padding parameters can be applied.
+ */
+ if (pmem_should_map_pages(dev)) {
+ struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev);
+ struct resource *res = &nsio->res;
+
+ if (!IS_ALIGNED(res->start | (res->end + 1),
+ memremap_compat_align())) {
+ dev_err(&ndns->dev, "%pr misaligned, unable to map\n", res);
+ return ERR_PTR(-EOPNOTSUPP);
+ }
+ }
+
if (is_namespace_pmem(&ndns->dev)) {
struct nd_namespace_pmem *nspm;
@@ -1759,6 +1785,23 @@
}
EXPORT_SYMBOL(nvdimm_namespace_common_probe);
+int devm_namespace_enable(struct device *dev, struct nd_namespace_common *ndns,
+ resource_size_t size)
+{
+ if (is_namespace_blk(&ndns->dev))
+ return 0;
+ return devm_nsio_enable(dev, to_nd_namespace_io(&ndns->dev), size);
+}
+EXPORT_SYMBOL_GPL(devm_namespace_enable);
+
+void devm_namespace_disable(struct device *dev, struct nd_namespace_common *ndns)
+{
+ if (is_namespace_blk(&ndns->dev))
+ return;
+ devm_nsio_disable(dev, to_nd_namespace_io(&ndns->dev));
+}
+EXPORT_SYMBOL_GPL(devm_namespace_disable);
+
static struct device **create_namespace_io(struct nd_region *nd_region)
{
struct nd_namespace_io *nsio;
@@ -2078,7 +2121,6 @@
}
dev_set_name(dev, "namespace%d.%d", nd_region->id, nsblk->id);
dev->parent = &nd_region->dev;
- dev->groups = nd_namespace_attribute_groups;
return &nsblk->common.dev;
}
@@ -2109,7 +2151,6 @@
return NULL;
}
dev_set_name(dev, "namespace%d.%d", nd_region->id, nspm->id);
- dev->groups = nd_namespace_attribute_groups;
nd_namespace_pmem_set_resource(nd_region, nspm, 0);
return dev;
@@ -2502,7 +2543,7 @@
if (!ndd) {
if (test_bit(NDD_LOCKED, &nvdimm->flags))
/* fail, label data may be unreadable */;
- else if (test_bit(NDD_ALIASING, &nvdimm->flags))
+ else if (test_bit(NDD_LABELING, &nvdimm->flags))
/* fail, labels needed to disambiguate dpa */;
else
continue;
@@ -2613,7 +2654,6 @@
if (id < 0)
break;
dev_set_name(dev, "namespace%d.%d", nd_region->id, id);
- dev->groups = nd_namespace_attribute_groups;
nd_device_register(dev);
}
if (i)
diff --git a/drivers/nvdimm/nd-core.h b/drivers/nvdimm/nd-core.h
index 25fa121..564faa3 100644
--- a/drivers/nvdimm/nd-core.h
+++ b/drivers/nvdimm/nd-core.h
@@ -45,6 +45,7 @@
struct kernfs_node *overwrite_state;
} sec;
struct delayed_work dwork;
+ const struct nvdimm_fw_ops *fw_ops;
};
static inline unsigned long nvdimm_security_flags(
@@ -114,7 +115,6 @@
int __init nvdimm_bus_init(void);
void nvdimm_bus_exit(void);
void nvdimm_devs_exit(void);
-void nd_region_devs_exit(void);
struct nd_region;
void nd_region_advance_seeds(struct nd_region *nd_region, struct device *dev);
void nd_region_create_ns_seed(struct nd_region *nd_region);
@@ -124,11 +124,7 @@
int nvdimm_bus_create_ndctl(struct nvdimm_bus *nvdimm_bus);
void nvdimm_bus_destroy_ndctl(struct nvdimm_bus *nvdimm_bus);
void nd_synchronize(void);
-int nvdimm_bus_register_dimms(struct nvdimm_bus *nvdimm_bus);
-int nvdimm_bus_register_regions(struct nvdimm_bus *nvdimm_bus);
-int nvdimm_bus_init_interleave_sets(struct nvdimm_bus *nvdimm_bus);
void __nd_device_register(struct device *dev);
-int nd_match_dimm(struct device *dev, void *data);
struct nd_label_id;
char *nd_label_gen_id(struct nd_label_id *label_id, u8 *uuid, u32 flags);
bool nd_is_uuid_unique(struct device *dev, u8 *uuid);
@@ -171,6 +167,23 @@
struct nd_pfn *to_nd_pfn_safe(struct device *dev);
bool is_nvdimm_bus(struct device *dev);
+#if IS_ENABLED(CONFIG_ND_CLAIM)
+int devm_nsio_enable(struct device *dev, struct nd_namespace_io *nsio,
+ resource_size_t size);
+void devm_nsio_disable(struct device *dev, struct nd_namespace_io *nsio);
+#else
+static inline int devm_nsio_enable(struct device *dev,
+ struct nd_namespace_io *nsio, resource_size_t size)
+{
+ return -ENXIO;
+}
+
+static inline void devm_nsio_disable(struct device *dev,
+ struct nd_namespace_io *nsio)
+{
+}
+#endif
+
#ifdef CONFIG_PROVE_LOCKING
extern struct class *nd_class;
diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h
index ee5c040..696b555 100644
--- a/drivers/nvdimm/nd.h
+++ b/drivers/nvdimm/nd.h
@@ -39,7 +39,7 @@
int ns_count;
int ns_active;
unsigned int hints_shift;
- void __iomem *flush_wpq[0];
+ void __iomem *flush_wpq[];
};
static inline void __iomem *ndrd_get_flush_wpq(struct nd_region_data *ndrd,
@@ -146,6 +146,7 @@
struct device *btt_seed;
struct device *pfn_seed;
struct device *dax_seed;
+ unsigned long align;
u16 ndr_mappings;
u64 ndr_size;
u64 ndr_start;
@@ -156,7 +157,7 @@
struct nd_interleave_set *nd_set;
struct nd_percpu_lane __percpu *lane;
int (*flush)(struct nd_region *nd_region, struct bio *bio);
- struct nd_mapping mapping[0];
+ struct nd_mapping mapping[];
};
struct nd_blk_region {
@@ -212,6 +213,11 @@
struct nd_pfn nd_pfn;
};
+static inline u32 nd_info_block_reserve(void)
+{
+ return ALIGN(SZ_8K, PAGE_SIZE);
+}
+
enum nd_async_mode {
ND_SYNC,
ND_ASYNC,
@@ -234,6 +240,9 @@
void nvdimm_exit(void);
void nd_region_exit(void);
struct nvdimm;
+extern const struct attribute_group nd_device_attribute_group;
+extern const struct attribute_group nd_numa_attribute_group;
+extern const struct attribute_group *nvdimm_bus_attribute_groups[];
struct nvdimm_drvdata *to_ndd(struct nd_mapping *nd_mapping);
int nvdimm_check_config_data(struct device *dev);
int nvdimm_init_nsarea(struct nvdimm_drvdata *ndd);
@@ -244,7 +253,7 @@
void *buf, size_t len);
long nvdimm_clear_poison(struct device *dev, phys_addr_t phys,
unsigned int len);
-void nvdimm_set_aliasing(struct device *dev);
+void nvdimm_set_labeling(struct device *dev);
void nvdimm_set_locked(struct device *dev);
void nvdimm_clear_locked(struct device *dev);
int nvdimm_security_setup_events(struct device *dev);
@@ -297,7 +306,7 @@
struct device *nd_pfn_devinit(struct nd_pfn *nd_pfn,
struct nd_namespace_common *ndns);
int nd_pfn_validate(struct nd_pfn *nd_pfn, const char *sig);
-extern struct attribute_group nd_pfn_attribute_group;
+extern const struct attribute_group *nd_pfn_attribute_groups[];
#else
static inline int nd_pfn_probe(struct device *dev,
struct nd_namespace_common *ndns)
@@ -352,7 +361,7 @@
void nvdimm_bus_lock(struct device *dev);
void nvdimm_bus_unlock(struct device *dev);
bool is_nvdimm_bus_locked(struct device *dev);
-int nvdimm_revalidate_disk(struct gendisk *disk);
+void nvdimm_check_and_set_ro(struct gendisk *disk);
void nvdimm_drvdata_release(struct kref *kref);
void put_ndd(struct nvdimm_drvdata *ndd);
int nd_label_reserve_dpa(struct nvdimm_drvdata *ndd);
@@ -368,53 +377,26 @@
const char *nvdimm_namespace_disk_name(struct nd_namespace_common *ndns,
char *name);
unsigned int pmem_sector_size(struct nd_namespace_common *ndns);
+struct range;
void nvdimm_badblocks_populate(struct nd_region *nd_region,
- struct badblocks *bb, const struct resource *res);
+ struct badblocks *bb, const struct range *range);
+int devm_namespace_enable(struct device *dev, struct nd_namespace_common *ndns,
+ resource_size_t size);
+void devm_namespace_disable(struct device *dev,
+ struct nd_namespace_common *ndns);
#if IS_ENABLED(CONFIG_ND_CLAIM)
-
/* max struct page size independent of kernel config */
#define MAX_STRUCT_PAGE_SIZE 64
-
int nvdimm_setup_pfn(struct nd_pfn *nd_pfn, struct dev_pagemap *pgmap);
-int devm_nsio_enable(struct device *dev, struct nd_namespace_io *nsio);
-void devm_nsio_disable(struct device *dev, struct nd_namespace_io *nsio);
#else
static inline int nvdimm_setup_pfn(struct nd_pfn *nd_pfn,
struct dev_pagemap *pgmap)
{
return -ENXIO;
}
-static inline int devm_nsio_enable(struct device *dev,
- struct nd_namespace_io *nsio)
-{
- return -ENXIO;
-}
-static inline void devm_nsio_disable(struct device *dev,
- struct nd_namespace_io *nsio)
-{
-}
#endif
int nd_blk_region_init(struct nd_region *nd_region);
int nd_region_activate(struct nd_region *nd_region);
-void __nd_iostat_start(struct bio *bio, unsigned long *start);
-static inline bool nd_iostat_start(struct bio *bio, unsigned long *start)
-{
- struct gendisk *disk = bio->bi_disk;
-
- if (!blk_queue_io_stat(disk->queue))
- return false;
-
- *start = jiffies;
- generic_start_io_acct(disk->queue, bio_op(bio), bio_sectors(bio),
- &disk->part0);
- return true;
-}
-static inline void nd_iostat_end(struct bio *bio, unsigned long start)
-{
- struct gendisk *disk = bio->bi_disk;
-
- generic_end_io_acct(disk->queue, bio_op(bio), &disk->part0, start);
-}
static inline bool is_bad_pmem(struct badblocks *bb, sector_t sector,
unsigned int len)
{
diff --git a/drivers/nvdimm/of_pmem.c b/drivers/nvdimm/of_pmem.c
index 97187d6..10dbdcd 100644
--- a/drivers/nvdimm/of_pmem.c
+++ b/drivers/nvdimm/of_pmem.c
@@ -9,17 +9,6 @@
#include <linux/ioport.h>
#include <linux/slab.h>
-static const struct attribute_group *region_attr_groups[] = {
- &nd_region_attribute_group,
- &nd_device_attribute_group,
- NULL,
-};
-
-static const struct attribute_group *bus_attr_groups[] = {
- &nvdimm_bus_attribute_group,
- NULL,
-};
-
struct of_pmem_private {
struct nvdimm_bus_descriptor bus_desc;
struct nvdimm_bus *bus;
@@ -41,7 +30,6 @@
if (!priv)
return -ENOMEM;
- priv->bus_desc.attr_groups = bus_attr_groups;
priv->bus_desc.provider_name = kstrdup(pdev->name, GFP_KERNEL);
priv->bus_desc.module = THIS_MODULE;
priv->bus_desc.of_node = np;
@@ -66,7 +54,6 @@
* structures so passing a stack pointer is fine.
*/
memset(&ndr_desc, 0, sizeof(ndr_desc));
- ndr_desc.attr_groups = region_attr_groups;
ndr_desc.numa_node = dev_to_node(&pdev->dev);
ndr_desc.target_node = ndr_desc.numa_node;
ndr_desc.res = &pdev->resource[i];
@@ -75,8 +62,10 @@
if (is_volatile)
region = nvdimm_volatile_region_create(bus, &ndr_desc);
- else
+ else {
+ set_bit(ND_REGION_PERSIST_MEMCTRL, &ndr_desc.flags);
region = nvdimm_pmem_region_create(bus, &ndr_desc);
+ }
if (!region)
dev_warn(&pdev->dev, "Unable to register region %pR from %pOF\n",
@@ -101,6 +90,7 @@
static const struct of_device_id of_pmem_region_match[] = {
{ .compatible = "pmem-region" },
+ { .compatible = "pmem-region-v2" },
{ },
};
diff --git a/drivers/nvdimm/pfn.h b/drivers/nvdimm/pfn.h
index acb1951..37cb1b8 100644
--- a/drivers/nvdimm/pfn.h
+++ b/drivers/nvdimm/pfn.h
@@ -24,6 +24,18 @@
__le64 npfns;
__le32 mode;
/* minor-version-1 additions for section alignment */
+ /**
+ * @start_pad: Deprecated attribute to pad start-misaligned namespaces
+ *
+ * start_pad is deprecated because the original definition did
+ * not comprehend that dataoff is relative to the base address
+ * of the namespace not the start_pad adjusted base. The result
+ * is that the dax path is broken, but the block-I/O path is
+ * not. The kernel will no longer create namespaces using start
+ * padding, but it still supports block-I/O for legacy
+ * configurations mainly to allow a backup, reconfigure the
+ * namespace, and restore flow to repair dax operation.
+ */
__le32 start_pad;
__le32 end_trunc;
/* minor-version-2 record the base alignment of the mapping */
diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c
index 60d81fa..b499df6 100644
--- a/drivers/nvdimm/pfn_devs.c
+++ b/drivers/nvdimm/pfn_devs.c
@@ -26,17 +26,6 @@
kfree(nd_pfn);
}
-static struct device_type nd_pfn_device_type = {
- .name = "nd_pfn",
- .release = nd_pfn_release,
-};
-
-bool is_nd_pfn(struct device *dev)
-{
- return dev ? dev->type == &nd_pfn_device_type : false;
-}
-EXPORT_SYMBOL(is_nd_pfn);
-
struct nd_pfn *to_nd_pfn(struct device *dev)
{
struct nd_pfn *nd_pfn = container_of(dev, struct nd_pfn, dev);
@@ -229,7 +218,7 @@
return rc;
}
-static DEVICE_ATTR_RO(resource);
+static DEVICE_ATTR_ADMIN_RO(resource);
static ssize_t size_show(struct device *dev,
struct device_attribute *attr, char *buf)
@@ -280,25 +269,29 @@
NULL,
};
-static umode_t pfn_visible(struct kobject *kobj, struct attribute *a, int n)
-{
- if (a == &dev_attr_resource.attr)
- return 0400;
- return a->mode;
-}
-
-struct attribute_group nd_pfn_attribute_group = {
+static struct attribute_group nd_pfn_attribute_group = {
.attrs = nd_pfn_attributes,
- .is_visible = pfn_visible,
};
-static const struct attribute_group *nd_pfn_attribute_groups[] = {
+const struct attribute_group *nd_pfn_attribute_groups[] = {
&nd_pfn_attribute_group,
&nd_device_attribute_group,
&nd_numa_attribute_group,
NULL,
};
+static const struct device_type nd_pfn_device_type = {
+ .name = "nd_pfn",
+ .release = nd_pfn_release,
+ .groups = nd_pfn_attribute_groups,
+};
+
+bool is_nd_pfn(struct device *dev)
+{
+ return dev ? dev->type == &nd_pfn_device_type : false;
+}
+EXPORT_SYMBOL(is_nd_pfn);
+
struct device *nd_pfn_devinit(struct nd_pfn *nd_pfn,
struct nd_namespace_common *ndns)
{
@@ -337,7 +330,6 @@
dev = &nd_pfn->dev;
dev_set_name(dev, "pfn%d.%d", nd_region->id, nd_pfn->id);
- dev->groups = nd_pfn_attribute_groups;
dev->type = &nd_pfn_device_type;
dev->parent = &nd_region->dev;
@@ -382,6 +374,15 @@
meta_start = (SZ_4K + sizeof(*pfn_sb)) >> 9;
meta_num = (le64_to_cpu(pfn_sb->dataoff) >> 9) - meta_start;
+ /*
+ * re-enable the namespace with correct size so that we can access
+ * the device memmap area.
+ */
+ devm_namespace_disable(&nd_pfn->dev, ndns);
+ rc = devm_namespace_enable(&nd_pfn->dev, ndns, le64_to_cpu(pfn_sb->dataoff));
+ if (rc)
+ return rc;
+
do {
unsigned long zero_len;
u64 nsoff;
@@ -445,6 +446,7 @@
int nd_pfn_validate(struct nd_pfn *nd_pfn, const char *sig)
{
u64 checksum, offset;
+ struct resource *res;
enum nd_pfn_mode mode;
struct nd_namespace_io *nsio;
unsigned long align, start_pad;
@@ -560,14 +562,14 @@
dev_dbg(&nd_pfn->dev, "align: %lx:%lx mode: %d:%d\n",
nd_pfn->align, align, nd_pfn->mode,
mode);
- return -EINVAL;
+ return -EOPNOTSUPP;
}
}
if (align > nvdimm_namespace_capacity(ndns)) {
dev_err(&nd_pfn->dev, "alignment: %lx exceeds capacity %llx\n",
align, nvdimm_namespace_capacity(ndns));
- return -EINVAL;
+ return -EOPNOTSUPP;
}
/*
@@ -577,21 +579,34 @@
* established.
*/
nsio = to_nd_namespace_io(&ndns->dev);
- if (offset >= resource_size(&nsio->res)) {
+ res = &nsio->res;
+ if (offset >= resource_size(res)) {
dev_err(&nd_pfn->dev, "pfn array size exceeds capacity of %s\n",
dev_name(&ndns->dev));
- return -EBUSY;
+ return -EOPNOTSUPP;
}
- if ((align && !IS_ALIGNED(nsio->res.start + offset + start_pad, align))
+ if ((align && !IS_ALIGNED(res->start + offset + start_pad, align))
|| !IS_ALIGNED(offset, PAGE_SIZE)) {
dev_err(&nd_pfn->dev,
"bad offset: %#llx dax disabled align: %#lx\n",
offset, align);
- return -ENXIO;
+ return -EOPNOTSUPP;
}
- return nd_pfn_clear_memmap_errors(nd_pfn);
+ if (!IS_ALIGNED(res->start + le32_to_cpu(pfn_sb->start_pad),
+ memremap_compat_align())) {
+ dev_err(&nd_pfn->dev, "resource start misaligned\n");
+ return -EOPNOTSUPP;
+ }
+
+ if (!IS_ALIGNED(res->end + 1 - le32_to_cpu(pfn_sb->end_trunc),
+ memremap_compat_align())) {
+ dev_err(&nd_pfn->dev, "resource end misaligned\n");
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
}
EXPORT_SYMBOL(nd_pfn_validate);
@@ -635,11 +650,6 @@
}
EXPORT_SYMBOL(nd_pfn_probe);
-static u32 info_block_reserve(void)
-{
- return ALIGN(SZ_8K, PAGE_SIZE);
-}
-
/*
* We hotplug memory at sub-section granularity, pad the reserved area
* from the previous section base to the namespace base address.
@@ -653,7 +663,7 @@
static unsigned long init_altmap_reserve(resource_size_t base)
{
- unsigned long reserve = info_block_reserve() >> PAGE_SHIFT;
+ unsigned long reserve = nd_info_block_reserve() >> PAGE_SHIFT;
unsigned long base_pfn = PHYS_PFN(base);
reserve += base_pfn - SUBSECTION_ALIGN_DOWN(base_pfn);
@@ -662,13 +672,13 @@
static int __nvdimm_setup_pfn(struct nd_pfn *nd_pfn, struct dev_pagemap *pgmap)
{
- struct resource *res = &pgmap->res;
+ struct range *range = &pgmap->range;
struct vmem_altmap *altmap = &pgmap->altmap;
struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb;
u64 offset = le64_to_cpu(pfn_sb->dataoff);
u32 start_pad = __le32_to_cpu(pfn_sb->start_pad);
u32 end_trunc = __le32_to_cpu(pfn_sb->end_trunc);
- u32 reserve = info_block_reserve();
+ u32 reserve = nd_info_block_reserve();
struct nd_namespace_common *ndns = nd_pfn->ndns;
struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev);
resource_size_t base = nsio->res.start + start_pad;
@@ -679,16 +689,17 @@
.end_pfn = PHYS_PFN(end),
};
- memcpy(res, &nsio->res, sizeof(*res));
- res->start += start_pad;
- res->end -= end_trunc;
-
+ *range = (struct range) {
+ .start = nsio->res.start + start_pad,
+ .end = nsio->res.end - end_trunc,
+ };
+ pgmap->nr_range = 1;
if (nd_pfn->mode == PFN_MODE_RAM) {
if (offset < reserve)
return -EINVAL;
nd_pfn->npfns = le64_to_cpu(pfn_sb->npfns);
} else if (nd_pfn->mode == PFN_MODE_PMEM) {
- nd_pfn->npfns = PHYS_PFN((resource_size(res) - offset));
+ nd_pfn->npfns = PHYS_PFN((range_len(range) - offset));
if (le64_to_cpu(nd_pfn->pfn_sb->npfns) > nd_pfn->npfns)
dev_info(&nd_pfn->dev,
"number of pfns truncated from %lld to %ld\n",
@@ -729,6 +740,8 @@
sig = PFN_SIG;
rc = nd_pfn_validate(nd_pfn, sig);
+ if (rc == 0)
+ return nd_pfn_clear_memmap_errors(nd_pfn);
if (rc != -ENODEV)
return rc;
@@ -752,7 +765,19 @@
start = nsio->res.start;
size = resource_size(&nsio->res);
npfns = PHYS_PFN(size - SZ_8K);
- align = max(nd_pfn->align, (1UL << SUBSECTION_SHIFT));
+ align = max(nd_pfn->align, memremap_compat_align());
+
+ /*
+ * When @start is misaligned fail namespace creation. See
+ * the 'struct nd_pfn_sb' commentary on why ->start_pad is not
+ * an option.
+ */
+ if (!IS_ALIGNED(start, memremap_compat_align())) {
+ dev_err(&nd_pfn->dev, "%s: start %pa misaligned to %#lx\n",
+ dev_name(&ndns->dev), &start,
+ memremap_compat_align());
+ return -EINVAL;
+ }
end_trunc = start + size - ALIGN_DOWN(start + size, align);
if (nd_pfn->mode == PFN_MODE_PMEM) {
/*
@@ -796,6 +821,10 @@
checksum = nd_sb_checksum((struct nd_gen_sb *) pfn_sb);
pfn_sb->checksum = cpu_to_le64(checksum);
+ rc = nd_pfn_clear_memmap_errors(nd_pfn);
+ if (rc)
+ return rc;
+
return nvdimm_write_bytes(ndns, SZ_4K, pfn_sb, sizeof(*pfn_sb), 0);
}
diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index f9f76f6..d5dd79b 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -7,7 +7,6 @@
* Copyright (c) 2015, Boaz Harrosh <boaz@plexistor.com>.
*/
-#include <asm/cacheflush.h>
#include <linux/blkdev.h>
#include <linux/hdreg.h>
#include <linux/init.h>
@@ -25,10 +24,11 @@
#include <linux/dax.h>
#include <linux/nd.h>
#include <linux/backing-dev.h>
+#include <linux/mm.h>
+#include <asm/cacheflush.h>
#include "pmem.h"
#include "pfn.h"
#include "nd.h"
-#include "nd-core.h"
static struct device *to_dev(struct pmem_device *pmem)
{
@@ -125,7 +125,7 @@
while (len) {
mem = kmap_atomic(page);
chunk = min_t(unsigned int, len, PAGE_SIZE - off);
- rem = memcpy_mcsafe(mem + off, pmem_addr, chunk);
+ rem = copy_mc_to_kernel(mem + off, pmem_addr, chunk);
kunmap_atomic(mem);
if (rem)
return BLK_STS_IOERR;
@@ -137,9 +137,25 @@
return BLK_STS_OK;
}
-static blk_status_t pmem_do_bvec(struct pmem_device *pmem, struct page *page,
- unsigned int len, unsigned int off, unsigned int op,
- sector_t sector)
+static blk_status_t pmem_do_read(struct pmem_device *pmem,
+ struct page *page, unsigned int page_off,
+ sector_t sector, unsigned int len)
+{
+ blk_status_t rc;
+ phys_addr_t pmem_off = sector * 512 + pmem->data_offset;
+ void *pmem_addr = pmem->virt_addr + pmem_off;
+
+ if (unlikely(is_bad_pmem(&pmem->bb, sector, len)))
+ return BLK_STS_IOERR;
+
+ rc = read_pmem(page, page_off, pmem_addr, len);
+ flush_dcache_page(page);
+ return rc;
+}
+
+static blk_status_t pmem_do_write(struct pmem_device *pmem,
+ struct page *page, unsigned int page_off,
+ sector_t sector, unsigned int len)
{
blk_status_t rc = BLK_STS_OK;
bool bad_pmem = false;
@@ -149,40 +165,31 @@
if (unlikely(is_bad_pmem(&pmem->bb, sector, len)))
bad_pmem = true;
- if (!op_is_write(op)) {
- if (unlikely(bad_pmem))
- rc = BLK_STS_IOERR;
- else {
- rc = read_pmem(page, off, pmem_addr, len);
- flush_dcache_page(page);
- }
- } else {
- /*
- * Note that we write the data both before and after
- * clearing poison. The write before clear poison
- * handles situations where the latest written data is
- * preserved and the clear poison operation simply marks
- * the address range as valid without changing the data.
- * In this case application software can assume that an
- * interrupted write will either return the new good
- * data or an error.
- *
- * However, if pmem_clear_poison() leaves the data in an
- * indeterminate state we need to perform the write
- * after clear poison.
- */
- flush_dcache_page(page);
- write_pmem(pmem_addr, page, off, len);
- if (unlikely(bad_pmem)) {
- rc = pmem_clear_poison(pmem, pmem_off, len);
- write_pmem(pmem_addr, page, off, len);
- }
+ /*
+ * Note that we write the data both before and after
+ * clearing poison. The write before clear poison
+ * handles situations where the latest written data is
+ * preserved and the clear poison operation simply marks
+ * the address range as valid without changing the data.
+ * In this case application software can assume that an
+ * interrupted write will either return the new good
+ * data or an error.
+ *
+ * However, if pmem_clear_poison() leaves the data in an
+ * indeterminate state we need to perform the write
+ * after clear poison.
+ */
+ flush_dcache_page(page);
+ write_pmem(pmem_addr, page, page_off, len);
+ if (unlikely(bad_pmem)) {
+ rc = pmem_clear_poison(pmem, pmem_off, len);
+ write_pmem(pmem_addr, page, page_off, len);
}
return rc;
}
-static blk_qc_t pmem_make_request(struct request_queue *q, struct bio *bio)
+static blk_qc_t pmem_submit_bio(struct bio *bio)
{
int ret = 0;
blk_status_t rc = 0;
@@ -190,23 +197,29 @@
unsigned long start;
struct bio_vec bvec;
struct bvec_iter iter;
- struct pmem_device *pmem = q->queuedata;
+ struct pmem_device *pmem = bio->bi_disk->private_data;
struct nd_region *nd_region = to_region(pmem);
if (bio->bi_opf & REQ_PREFLUSH)
ret = nvdimm_flush(nd_region, bio);
- do_acct = nd_iostat_start(bio, &start);
+ do_acct = blk_queue_io_stat(bio->bi_disk->queue);
+ if (do_acct)
+ start = bio_start_io_acct(bio);
bio_for_each_segment(bvec, bio, iter) {
- rc = pmem_do_bvec(pmem, bvec.bv_page, bvec.bv_len,
- bvec.bv_offset, bio_op(bio), iter.bi_sector);
+ if (op_is_write(bio_op(bio)))
+ rc = pmem_do_write(pmem, bvec.bv_page, bvec.bv_offset,
+ iter.bi_sector, bvec.bv_len);
+ else
+ rc = pmem_do_read(pmem, bvec.bv_page, bvec.bv_offset,
+ iter.bi_sector, bvec.bv_len);
if (rc) {
bio->bi_status = rc;
break;
}
}
if (do_acct)
- nd_iostat_end(bio, start);
+ bio_end_io_acct(bio, start);
if (bio->bi_opf & REQ_FUA)
ret = nvdimm_flush(nd_region, bio);
@@ -221,12 +234,13 @@
static int pmem_rw_page(struct block_device *bdev, sector_t sector,
struct page *page, unsigned int op)
{
- struct pmem_device *pmem = bdev->bd_queue->queuedata;
+ struct pmem_device *pmem = bdev->bd_disk->private_data;
blk_status_t rc;
- rc = pmem_do_bvec(pmem, page, hpage_nr_pages(page) * PAGE_SIZE,
- 0, op, sector);
-
+ if (op_is_write(op))
+ rc = pmem_do_write(pmem, page, 0, sector, thp_size(page));
+ else
+ rc = pmem_do_read(pmem, page, 0, sector, thp_size(page));
/*
* The ->rw_page interface is subtle and tricky. The core
* retries on any error, so we can only invoke page_endio() in
@@ -265,10 +279,20 @@
static const struct block_device_operations pmem_fops = {
.owner = THIS_MODULE,
+ .submit_bio = pmem_submit_bio,
.rw_page = pmem_rw_page,
- .revalidate_disk = nvdimm_revalidate_disk,
};
+static int pmem_dax_zero_page_range(struct dax_device *dax_dev, pgoff_t pgoff,
+ size_t nr_pages)
+{
+ struct pmem_device *pmem = dax_get_private(dax_dev);
+
+ return blk_status_to_errno(pmem_do_write(pmem, ZERO_PAGE(0), 0,
+ PFN_PHYS(pgoff) >> SECTOR_SHIFT,
+ PAGE_SIZE));
+}
+
static long pmem_dax_direct_access(struct dax_device *dax_dev,
pgoff_t pgoff, long nr_pages, void **kaddr, pfn_t *pfn)
{
@@ -279,7 +303,7 @@
/*
* Use the 'no check' versions of copy_from_iter_flushcache() and
- * copy_to_iter_mcsafe() to bypass HARDENED_USERCOPY overhead. Bounds
+ * copy_mc_to_iter() to bypass HARDENED_USERCOPY overhead. Bounds
* checking, both file offset and device offset, is handled by
* dax_iomap_actor()
*/
@@ -292,7 +316,7 @@
static size_t pmem_copy_to_iter(struct dax_device *dax_dev, pgoff_t pgoff,
void *addr, size_t bytes, struct iov_iter *i)
{
- return _copy_to_iter_mcsafe(addr, bytes, i);
+ return _copy_mc_to_iter(addr, bytes, i);
}
static const struct dax_operations pmem_dax_ops = {
@@ -300,6 +324,7 @@
.dax_supported = generic_fsdax_supported,
.copy_from_iter = pmem_copy_from_iter,
.copy_to_iter = pmem_copy_to_iter,
+ .zero_page_range = pmem_dax_zero_page_range,
};
static const struct attribute_group *pmem_attribute_groups[] = {
@@ -338,13 +363,7 @@
put_disk(pmem->disk);
}
-static void pmem_pagemap_page_free(struct page *page)
-{
- wake_up_var(&page->_refcount);
-}
-
static const struct dev_pagemap_ops fsdax_pagemap_ops = {
- .page_free = pmem_pagemap_page_free,
.kill = pmem_pagemap_kill,
.cleanup = pmem_pagemap_cleanup,
};
@@ -356,7 +375,7 @@
struct nd_region *nd_region = to_nd_region(dev->parent);
int nid = dev_to_node(dev), fua;
struct resource *res = &nsio->res;
- struct resource bb_res;
+ struct range bb_range;
struct nd_pfn *nd_pfn = NULL;
struct dax_device *dax_dev;
struct nd_pfn_sb *pfn_sb;
@@ -372,6 +391,10 @@
if (!pmem)
return -ENOMEM;
+ rc = devm_namespace_enable(dev, ndns, nd_info_block_reserve());
+ if (rc)
+ return rc;
+
/* while nsio_rw_bytes is active, parse a pfn info block if present */
if (is_nd_pfn(dev)) {
nd_pfn = to_nd_pfn(dev);
@@ -381,7 +404,7 @@
}
/* we're attaching a block device, disable raw namespace access */
- devm_nsio_disable(dev, nsio);
+ devm_namespace_disable(dev, ndns);
dev_set_drvdata(dev, pmem);
pmem->phys_addr = res->start;
@@ -398,7 +421,7 @@
return -EBUSY;
}
- q = blk_alloc_queue_node(GFP_KERNEL, dev_to_node(dev));
+ q = blk_alloc_queue(dev_to_node(dev));
if (!q)
return -ENOMEM;
@@ -411,24 +434,27 @@
pfn_sb = nd_pfn->pfn_sb;
pmem->data_offset = le64_to_cpu(pfn_sb->dataoff);
pmem->pfn_pad = resource_size(res) -
- resource_size(&pmem->pgmap.res);
+ range_len(&pmem->pgmap.range);
pmem->pfn_flags |= PFN_MAP;
- memcpy(&bb_res, &pmem->pgmap.res, sizeof(bb_res));
- bb_res.start += pmem->data_offset;
+ bb_range = pmem->pgmap.range;
+ bb_range.start += pmem->data_offset;
} else if (pmem_should_map_pages(dev)) {
- memcpy(&pmem->pgmap.res, &nsio->res, sizeof(pmem->pgmap.res));
+ pmem->pgmap.range.start = res->start;
+ pmem->pgmap.range.end = res->end;
+ pmem->pgmap.nr_range = 1;
pmem->pgmap.type = MEMORY_DEVICE_FS_DAX;
pmem->pgmap.ops = &fsdax_pagemap_ops;
addr = devm_memremap_pages(dev, &pmem->pgmap);
pmem->pfn_flags |= PFN_MAP;
- memcpy(&bb_res, &pmem->pgmap.res, sizeof(bb_res));
+ bb_range = pmem->pgmap.range;
} else {
+ addr = devm_memremap(dev, pmem->phys_addr,
+ pmem->size, ARCH_MEMREMAP_PMEM);
if (devm_add_action_or_reset(dev, pmem_release_queue,
&pmem->pgmap))
return -ENOMEM;
- addr = devm_memremap(dev, pmem->phys_addr,
- pmem->size, ARCH_MEMREMAP_PMEM);
- memcpy(&bb_res, &nsio->res, sizeof(bb_res));
+ bb_range.start = res->start;
+ bb_range.end = res->end;
}
if (IS_ERR(addr))
@@ -436,14 +462,12 @@
pmem->virt_addr = addr;
blk_queue_write_cache(q, true, fua);
- blk_queue_make_request(q, pmem_make_request);
blk_queue_physical_block_size(q, PAGE_SIZE);
blk_queue_logical_block_size(q, pmem_sector_size(ndns));
blk_queue_max_hw_sectors(q, UINT_MAX);
blk_queue_flag_set(QUEUE_FLAG_NONROT, q);
if (pmem->pfn_flags & PFN_MAP)
blk_queue_flag_set(QUEUE_FLAG_DAX, q);
- q->queuedata = pmem;
disk = alloc_disk_node(0, nid);
if (!disk)
@@ -453,21 +477,21 @@
disk->fops = &pmem_fops;
disk->queue = q;
disk->flags = GENHD_FL_EXT_DEVT;
- disk->queue->backing_dev_info->capabilities |= BDI_CAP_SYNCHRONOUS_IO;
+ disk->private_data = pmem;
nvdimm_namespace_disk_name(ndns, disk->disk_name);
set_capacity(disk, (pmem->size - pmem->pfn_pad - pmem->data_offset)
/ 512);
if (devm_init_badblocks(dev, &pmem->bb))
return -ENOMEM;
- nvdimm_badblocks_populate(nd_region, &pmem->bb, &bb_res);
+ nvdimm_badblocks_populate(nd_region, &pmem->bb, &bb_range);
disk->bb = &pmem->bb;
if (is_nvdimm_sync(nd_region))
flags = DAXDEV_F_SYNC;
dax_dev = alloc_dax(pmem, disk->disk_name, &pmem_dax_ops, flags);
- if (!dax_dev) {
+ if (IS_ERR(dax_dev)) {
put_disk(disk);
- return -ENOMEM;
+ return PTR_ERR(dax_dev);
}
dax_write_cache(dax_dev, nvdimm_has_cache(nd_region));
pmem->dax_dev = dax_dev;
@@ -478,7 +502,7 @@
if (devm_add_action_or_reset(dev, pmem_release_disk, pmem))
return -ENOMEM;
- revalidate_disk(disk);
+ nvdimm_check_and_set_ro(disk);
pmem->bb_state = sysfs_get_dirent(disk_to_dev(disk)->kobj.sd,
"badblocks");
@@ -497,15 +521,16 @@
if (IS_ERR(ndns))
return PTR_ERR(ndns);
- if (devm_nsio_enable(dev, to_nd_namespace_io(&ndns->dev)))
- return -ENXIO;
-
if (is_nd_btt(dev))
return nvdimm_namespace_attach_btt(ndns);
if (is_nd_pfn(dev))
return pmem_attach_disk(dev, ndns);
+ ret = devm_namespace_enable(dev, ndns, nd_info_block_reserve());
+ if (ret)
+ return ret;
+
ret = nd_btt_probe(dev, ndns);
if (ret == 0)
return -ENXIO;
@@ -532,6 +557,10 @@
return -ENXIO;
else if (ret == -EOPNOTSUPP)
return ret;
+
+ /* probe complete, attach handles namespace enabling */
+ devm_namespace_disable(dev, ndns);
+
return pmem_attach_disk(dev, ndns);
}
@@ -565,8 +594,8 @@
resource_size_t offset = 0, end_trunc = 0;
struct nd_namespace_common *ndns;
struct nd_namespace_io *nsio;
- struct resource res;
struct badblocks *bb;
+ struct range range;
struct kernfs_node *bb_state;
if (event != NVDIMM_REVALIDATE_POISON)
@@ -602,9 +631,9 @@
nsio = to_nd_namespace_io(&ndns->dev);
}
- res.start = nsio->res.start + offset;
- res.end = nsio->res.end - end_trunc;
- nvdimm_badblocks_populate(nd_region, bb, &res);
+ range.start = nsio->res.start + offset;
+ range.end = nsio->res.end - end_trunc;
+ nvdimm_badblocks_populate(nd_region, bb, &range);
if (bb_state)
sysfs_notify_dirent(bb_state);
}
diff --git a/drivers/nvdimm/region.c b/drivers/nvdimm/region.c
index 0f6978e..bfce87e 100644
--- a/drivers/nvdimm/region.c
+++ b/drivers/nvdimm/region.c
@@ -35,7 +35,10 @@
return rc;
if (is_memory(&nd_region->dev)) {
- struct resource ndr_res;
+ struct range range = {
+ .start = nd_region->ndr_start,
+ .end = nd_region->ndr_start + nd_region->ndr_size - 1,
+ };
if (devm_init_badblocks(dev, &nd_region->bb))
return -ENODEV;
@@ -44,9 +47,7 @@
if (!nd_region->bb_state)
dev_warn(&nd_region->dev,
"'badblocks' notification disabled\n");
- ndr_res.start = nd_region->ndr_start;
- ndr_res.end = nd_region->ndr_start + nd_region->ndr_size - 1;
- nvdimm_badblocks_populate(nd_region, &nd_region->bb, &ndr_res);
+ nvdimm_badblocks_populate(nd_region, &nd_region->bb, &range);
}
rc = nd_region_register_namespaces(nd_region, &err);
@@ -121,14 +122,16 @@
{
if (event == NVDIMM_REVALIDATE_POISON) {
struct nd_region *nd_region = to_nd_region(dev);
- struct resource res;
if (is_memory(&nd_region->dev)) {
- res.start = nd_region->ndr_start;
- res.end = nd_region->ndr_start +
- nd_region->ndr_size - 1;
+ struct range range = {
+ .start = nd_region->ndr_start,
+ .end = nd_region->ndr_start +
+ nd_region->ndr_size - 1,
+ };
+
nvdimm_badblocks_populate(nd_region,
- &nd_region->bb, &res);
+ &nd_region->bb, &range);
if (nd_region->bb_state)
sysfs_notify_dirent(nd_region->bb_state);
}
diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c
index b8236a9..e05cc9f 100644
--- a/drivers/nvdimm/region_devs.c
+++ b/drivers/nvdimm/region_devs.c
@@ -3,6 +3,7 @@
* Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
*/
#include <linux/scatterlist.h>
+#include <linux/memregion.h>
#include <linux/highmem.h>
#include <linux/sched.h>
#include <linux/slab.h>
@@ -19,7 +20,6 @@
*/
#include <linux/io-64-nonatomic-hi-lo.h>
-static DEFINE_IDA(region_ida);
static DEFINE_PER_CPU(int, flush_idx);
static int nvdimm_map_flush(struct device *dev, struct nvdimm *nvdimm, int dimm,
@@ -133,43 +133,13 @@
put_device(&nvdimm->dev);
}
free_percpu(nd_region->lane);
- ida_simple_remove(®ion_ida, nd_region->id);
+ memregion_free(nd_region->id);
if (is_nd_blk(dev))
kfree(to_nd_blk_region(dev));
else
kfree(nd_region);
}
-static struct device_type nd_blk_device_type = {
- .name = "nd_blk",
- .release = nd_region_release,
-};
-
-static struct device_type nd_pmem_device_type = {
- .name = "nd_pmem",
- .release = nd_region_release,
-};
-
-static struct device_type nd_volatile_device_type = {
- .name = "nd_volatile",
- .release = nd_region_release,
-};
-
-bool is_nd_pmem(struct device *dev)
-{
- return dev ? dev->type == &nd_pmem_device_type : false;
-}
-
-bool is_nd_blk(struct device *dev)
-{
- return dev ? dev->type == &nd_blk_device_type : false;
-}
-
-bool is_nd_volatile(struct device *dev)
-{
- return dev ? dev->type == &nd_volatile_device_type : false;
-}
-
struct nd_region *to_nd_region(struct device *dev)
{
struct nd_region *nd_region = container_of(dev, struct nd_region, dev);
@@ -225,16 +195,16 @@
int nd_region_to_nstype(struct nd_region *nd_region)
{
if (is_memory(&nd_region->dev)) {
- u16 i, alias;
+ u16 i, label;
- for (i = 0, alias = 0; i < nd_region->ndr_mappings; i++) {
+ for (i = 0, label = 0; i < nd_region->ndr_mappings; i++) {
struct nd_mapping *nd_mapping = &nd_region->mapping[i];
struct nvdimm *nvdimm = nd_mapping->nvdimm;
- if (test_bit(NDD_ALIASING, &nvdimm->flags))
- alias++;
+ if (test_bit(NDD_LABELING, &nvdimm->flags))
+ label++;
}
- if (alias)
+ if (label)
return ND_DEVICE_NAMESPACE_PMEM;
else
return ND_DEVICE_NAMESPACE_IO;
@@ -246,21 +216,25 @@
}
EXPORT_SYMBOL(nd_region_to_nstype);
+static unsigned long long region_size(struct nd_region *nd_region)
+{
+ if (is_memory(&nd_region->dev)) {
+ return nd_region->ndr_size;
+ } else if (nd_region->ndr_mappings == 1) {
+ struct nd_mapping *nd_mapping = &nd_region->mapping[0];
+
+ return nd_mapping->size;
+ }
+
+ return 0;
+}
+
static ssize_t size_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct nd_region *nd_region = to_nd_region(dev);
- unsigned long long size = 0;
- if (is_memory(dev)) {
- size = nd_region->ndr_size;
- } else if (nd_region->ndr_mappings == 1) {
- struct nd_mapping *nd_mapping = &nd_region->mapping[0];
-
- size = nd_mapping->size;
- }
-
- return sprintf(buf, "%llu\n", size);
+ return sprintf(buf, "%llu\n", region_size(nd_region));
}
static DEVICE_ATTR_RO(size);
@@ -559,6 +533,54 @@
}
static DEVICE_ATTR_RW(read_only);
+static ssize_t align_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct nd_region *nd_region = to_nd_region(dev);
+
+ return sprintf(buf, "%#lx\n", nd_region->align);
+}
+
+static ssize_t align_store(struct device *dev,
+ struct device_attribute *attr, const char *buf, size_t len)
+{
+ struct nd_region *nd_region = to_nd_region(dev);
+ unsigned long val, dpa;
+ u32 remainder;
+ int rc;
+
+ rc = kstrtoul(buf, 0, &val);
+ if (rc)
+ return rc;
+
+ if (!nd_region->ndr_mappings)
+ return -ENXIO;
+
+ /*
+ * Ensure space-align is evenly divisible by the region
+ * interleave-width because the kernel typically has no facility
+ * to determine which DIMM(s), dimm-physical-addresses, would
+ * contribute to the tail capacity in system-physical-address
+ * space for the namespace.
+ */
+ dpa = div_u64_rem(val, nd_region->ndr_mappings, &remainder);
+ if (!is_power_of_2(dpa) || dpa < PAGE_SIZE
+ || val > region_size(nd_region) || remainder)
+ return -EINVAL;
+
+ /*
+ * Given that space allocation consults this value multiple
+ * times ensure it does not change for the duration of the
+ * allocation.
+ */
+ nvdimm_bus_lock(dev);
+ nd_region->align = val;
+ nvdimm_bus_unlock(dev);
+
+ return len;
+}
+static DEVICE_ATTR_RW(align);
+
static ssize_t region_badblocks_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
@@ -583,7 +605,7 @@
return sprintf(buf, "%#llx\n", nd_region->ndr_start);
}
-static DEVICE_ATTR_RO(resource);
+static DEVICE_ATTR_ADMIN_RO(resource);
static ssize_t persistence_domain_show(struct device *dev,
struct device_attribute *attr, char *buf)
@@ -601,6 +623,7 @@
static struct attribute *nd_region_attributes[] = {
&dev_attr_size.attr,
+ &dev_attr_align.attr,
&dev_attr_nstype.attr,
&dev_attr_mappings.attr,
&dev_attr_btt_seed.attr,
@@ -635,12 +658,8 @@
if (!is_memory(dev) && a == &dev_attr_badblocks.attr)
return 0;
- if (a == &dev_attr_resource.attr) {
- if (is_memory(dev))
- return 0400;
- else
- return 0;
- }
+ if (a == &dev_attr_resource.attr && !is_memory(dev))
+ return 0;
if (a == &dev_attr_deep_flush.attr) {
int has_flush = nvdimm_has_flush(nd_region);
@@ -660,6 +679,9 @@
return a->mode;
}
+ if (a == &dev_attr_align.attr)
+ return a->mode;
+
if (a != &dev_attr_set_cookie.attr
&& a != &dev_attr_available_size.attr)
return a->mode;
@@ -674,80 +696,6 @@
return 0;
}
-struct attribute_group nd_region_attribute_group = {
- .attrs = nd_region_attributes,
- .is_visible = region_visible,
-};
-EXPORT_SYMBOL_GPL(nd_region_attribute_group);
-
-u64 nd_region_interleave_set_cookie(struct nd_region *nd_region,
- struct nd_namespace_index *nsindex)
-{
- struct nd_interleave_set *nd_set = nd_region->nd_set;
-
- if (!nd_set)
- return 0;
-
- if (nsindex && __le16_to_cpu(nsindex->major) == 1
- && __le16_to_cpu(nsindex->minor) == 1)
- return nd_set->cookie1;
- return nd_set->cookie2;
-}
-
-u64 nd_region_interleave_set_altcookie(struct nd_region *nd_region)
-{
- struct nd_interleave_set *nd_set = nd_region->nd_set;
-
- if (nd_set)
- return nd_set->altcookie;
- return 0;
-}
-
-void nd_mapping_free_labels(struct nd_mapping *nd_mapping)
-{
- struct nd_label_ent *label_ent, *e;
-
- lockdep_assert_held(&nd_mapping->lock);
- list_for_each_entry_safe(label_ent, e, &nd_mapping->labels, list) {
- list_del(&label_ent->list);
- kfree(label_ent);
- }
-}
-
-/*
- * When a namespace is activated create new seeds for the next
- * namespace, or namespace-personality to be configured.
- */
-void nd_region_advance_seeds(struct nd_region *nd_region, struct device *dev)
-{
- nvdimm_bus_lock(dev);
- if (nd_region->ns_seed == dev) {
- nd_region_create_ns_seed(nd_region);
- } else if (is_nd_btt(dev)) {
- struct nd_btt *nd_btt = to_nd_btt(dev);
-
- if (nd_region->btt_seed == dev)
- nd_region_create_btt_seed(nd_region);
- if (nd_region->ns_seed == &nd_btt->ndns->dev)
- nd_region_create_ns_seed(nd_region);
- } else if (is_nd_pfn(dev)) {
- struct nd_pfn *nd_pfn = to_nd_pfn(dev);
-
- if (nd_region->pfn_seed == dev)
- nd_region_create_pfn_seed(nd_region);
- if (nd_region->ns_seed == &nd_pfn->ndns->dev)
- nd_region_create_ns_seed(nd_region);
- } else if (is_nd_dax(dev)) {
- struct nd_dax *nd_dax = to_nd_dax(dev);
-
- if (nd_region->dax_seed == dev)
- nd_region_create_dax_seed(nd_region);
- if (nd_region->ns_seed == &nd_dax->nd_pfn.ndns->dev)
- nd_region_create_ns_seed(nd_region);
- }
- nvdimm_bus_unlock(dev);
-}
-
static ssize_t mappingN(struct device *dev, char *buf, int n)
{
struct nd_region *nd_region = to_nd_region(dev);
@@ -855,11 +803,124 @@
NULL,
};
-struct attribute_group nd_mapping_attribute_group = {
+static const struct attribute_group nd_mapping_attribute_group = {
.is_visible = mapping_visible,
.attrs = mapping_attributes,
};
-EXPORT_SYMBOL_GPL(nd_mapping_attribute_group);
+
+static const struct attribute_group nd_region_attribute_group = {
+ .attrs = nd_region_attributes,
+ .is_visible = region_visible,
+};
+
+static const struct attribute_group *nd_region_attribute_groups[] = {
+ &nd_device_attribute_group,
+ &nd_region_attribute_group,
+ &nd_numa_attribute_group,
+ &nd_mapping_attribute_group,
+ NULL,
+};
+
+static const struct device_type nd_blk_device_type = {
+ .name = "nd_blk",
+ .release = nd_region_release,
+ .groups = nd_region_attribute_groups,
+};
+
+static const struct device_type nd_pmem_device_type = {
+ .name = "nd_pmem",
+ .release = nd_region_release,
+ .groups = nd_region_attribute_groups,
+};
+
+static const struct device_type nd_volatile_device_type = {
+ .name = "nd_volatile",
+ .release = nd_region_release,
+ .groups = nd_region_attribute_groups,
+};
+
+bool is_nd_pmem(struct device *dev)
+{
+ return dev ? dev->type == &nd_pmem_device_type : false;
+}
+
+bool is_nd_blk(struct device *dev)
+{
+ return dev ? dev->type == &nd_blk_device_type : false;
+}
+
+bool is_nd_volatile(struct device *dev)
+{
+ return dev ? dev->type == &nd_volatile_device_type : false;
+}
+
+u64 nd_region_interleave_set_cookie(struct nd_region *nd_region,
+ struct nd_namespace_index *nsindex)
+{
+ struct nd_interleave_set *nd_set = nd_region->nd_set;
+
+ if (!nd_set)
+ return 0;
+
+ if (nsindex && __le16_to_cpu(nsindex->major) == 1
+ && __le16_to_cpu(nsindex->minor) == 1)
+ return nd_set->cookie1;
+ return nd_set->cookie2;
+}
+
+u64 nd_region_interleave_set_altcookie(struct nd_region *nd_region)
+{
+ struct nd_interleave_set *nd_set = nd_region->nd_set;
+
+ if (nd_set)
+ return nd_set->altcookie;
+ return 0;
+}
+
+void nd_mapping_free_labels(struct nd_mapping *nd_mapping)
+{
+ struct nd_label_ent *label_ent, *e;
+
+ lockdep_assert_held(&nd_mapping->lock);
+ list_for_each_entry_safe(label_ent, e, &nd_mapping->labels, list) {
+ list_del(&label_ent->list);
+ kfree(label_ent);
+ }
+}
+
+/*
+ * When a namespace is activated create new seeds for the next
+ * namespace, or namespace-personality to be configured.
+ */
+void nd_region_advance_seeds(struct nd_region *nd_region, struct device *dev)
+{
+ nvdimm_bus_lock(dev);
+ if (nd_region->ns_seed == dev) {
+ nd_region_create_ns_seed(nd_region);
+ } else if (is_nd_btt(dev)) {
+ struct nd_btt *nd_btt = to_nd_btt(dev);
+
+ if (nd_region->btt_seed == dev)
+ nd_region_create_btt_seed(nd_region);
+ if (nd_region->ns_seed == &nd_btt->ndns->dev)
+ nd_region_create_ns_seed(nd_region);
+ } else if (is_nd_pfn(dev)) {
+ struct nd_pfn *nd_pfn = to_nd_pfn(dev);
+
+ if (nd_region->pfn_seed == dev)
+ nd_region_create_pfn_seed(nd_region);
+ if (nd_region->ns_seed == &nd_pfn->ndns->dev)
+ nd_region_create_ns_seed(nd_region);
+ } else if (is_nd_dax(dev)) {
+ struct nd_dax *nd_dax = to_nd_dax(dev);
+
+ if (nd_region->dax_seed == dev)
+ nd_region_create_dax_seed(nd_region);
+ if (nd_region->ns_seed == &nd_dax->nd_pfn.ndns->dev)
+ nd_region_create_ns_seed(nd_region);
+ }
+ nvdimm_bus_unlock(dev);
+}
int nd_blk_region_init(struct nd_region *nd_region)
{
@@ -930,9 +991,44 @@
}
EXPORT_SYMBOL(nd_region_release_lane);
+/*
+ * PowerPC requires this alignment for memremap_pages(). All other archs
+ * should be ok with SUBSECTION_SIZE (see memremap_compat_align()).
+ */
+#define MEMREMAP_COMPAT_ALIGN_MAX SZ_16M
+
+static unsigned long default_align(struct nd_region *nd_region)
+{
+ unsigned long align;
+ int i, mappings;
+ u32 remainder;
+
+ if (is_nd_blk(&nd_region->dev))
+ align = PAGE_SIZE;
+ else
+ align = MEMREMAP_COMPAT_ALIGN_MAX;
+
+ for (i = 0; i < nd_region->ndr_mappings; i++) {
+ struct nd_mapping *nd_mapping = &nd_region->mapping[i];
+ struct nvdimm *nvdimm = nd_mapping->nvdimm;
+
+ if (test_bit(NDD_ALIASING, &nvdimm->flags)) {
+ align = MEMREMAP_COMPAT_ALIGN_MAX;
+ break;
+ }
+ }
+
+ mappings = max_t(u16, 1, nd_region->ndr_mappings);
+ div_u64_rem(align, mappings, &remainder);
+ if (remainder)
+ align *= mappings;
+
+ return align;
+}
+
static struct nd_region *nd_region_create(struct nvdimm_bus *nvdimm_bus,
- struct nd_region_desc *ndr_desc, struct device_type *dev_type,
- const char *caller)
+ struct nd_region_desc *ndr_desc,
+ const struct device_type *dev_type, const char *caller)
{
struct nd_region *nd_region;
struct device *dev;
@@ -985,7 +1081,7 @@
if (!region_buf)
return NULL;
- nd_region->id = ida_simple_get(®ion_ida, 0, 0, GFP_KERNEL);
+ nd_region->id = memregion_alloc(GFP_KERNEL);
if (nd_region->id < 0)
goto err_id;
@@ -1034,6 +1130,7 @@
dev->of_node = ndr_desc->of_node;
nd_region->ndr_size = resource_size(ndr_desc->res);
nd_region->ndr_start = ndr_desc->res->start;
+ nd_region->align = default_align(nd_region);
if (ndr_desc->flush)
nd_region->flush = ndr_desc->flush;
else
@@ -1044,7 +1141,7 @@
return nd_region;
err_percpu:
- ida_simple_remove(®ion_ida, nd_region->id);
+ memregion_free(nd_region->id);
err_id:
kfree(region_buf);
return NULL;
@@ -1109,13 +1206,13 @@
idx = this_cpu_add_return(flush_idx, hash_32(current->pid + idx, 8));
/*
- * The first wmb() is needed to 'sfence' all previous writes
- * such that they are architecturally visible for the platform
- * buffer flush. Note that we've already arranged for pmem
+ * The pmem_wmb() is needed to 'sfence' all
+ * previous writes such that they are architecturally visible for
+ * the platform buffer flush. Note that we've already arranged for pmem
* writes to avoid the cache via memcpy_flushcache(). The final
* wmb() ensures ordering for the NVDIMM flush write.
*/
- wmb();
+ pmem_wmb();
for (i = 0; i < nd_region->ndr_mappings; i++)
if (ndrd_get_flush_wpq(ndrd, i, 0))
writeq(1, ndrd_get_flush_wpq(ndrd, i, idx));
@@ -1221,8 +1318,3 @@
return device_for_each_child(&nvdimm_bus->dev, &ctx, region_conflict);
}
-
-void __exit nd_region_devs_exit(void)
-{
- ida_destroy(®ion_ida);
-}
diff --git a/drivers/nvdimm/security.c b/drivers/nvdimm/security.c
index 35d2650..4b80150 100644
--- a/drivers/nvdimm/security.c
+++ b/drivers/nvdimm/security.c
@@ -95,7 +95,7 @@
struct encrypted_key_payload *epayload;
struct device *dev = &nvdimm->dev;
- keyref = lookup_user_key(id, 0, 0);
+ keyref = lookup_user_key(id, 0, KEY_NEED_SEARCH);
if (IS_ERR(keyref))
return NULL;
diff --git a/drivers/nvdimm/virtio_pmem.c b/drivers/nvdimm/virtio_pmem.c
index 5e3d07b..726c735 100644
--- a/drivers/nvdimm/virtio_pmem.c
+++ b/drivers/nvdimm/virtio_pmem.c
@@ -58,9 +58,9 @@
goto out_err;
}
- virtio_cread(vpmem->vdev, struct virtio_pmem_config,
+ virtio_cread_le(vpmem->vdev, struct virtio_pmem_config,
start, &vpmem->start);
- virtio_cread(vpmem->vdev, struct virtio_pmem_config,
+ virtio_cread_le(vpmem->vdev, struct virtio_pmem_config,
size, &vpmem->size);
res.start = vpmem->start;