Update Linux to v5.4.2
Change-Id: Idf6911045d9d382da2cfe01b1edff026404ac8fd
diff --git a/drivers/nvdimm/Kconfig b/drivers/nvdimm/Kconfig
index 9d36473..36af7af 100644
--- a/drivers/nvdimm/Kconfig
+++ b/drivers/nvdimm/Kconfig
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
menuconfig LIBNVDIMM
tristate "NVDIMM (Non-Volatile Memory Device) Support"
depends on PHYS_ADDR_T_64BIT
@@ -32,7 +33,7 @@
Documentation/admin-guide/kernel-parameters.rst). This driver converts
these persistent memory ranges into block devices that are
capable of DAX (direct-access) file system mappings. See
- Documentation/nvdimm/nvdimm.txt for more details.
+ Documentation/driver-api/nvdimm/nvdimm.rst for more details.
Say Y if you want to use an NVDIMM
@@ -112,4 +113,21 @@
Select Y if unsure.
+config NVDIMM_KEYS
+ def_bool y
+ depends on ENCRYPTED_KEYS
+ depends on (LIBNVDIMM=ENCRYPTED_KEYS) || LIBNVDIMM=m
+
+config NVDIMM_TEST_BUILD
+ tristate "Build the unit test core"
+ depends on m
+ depends on COMPILE_TEST && X86_64
+ default m if COMPILE_TEST
+ help
+ Build the core of the unit test infrastructure. The result of
+ this build is non-functional for unit test execution, but it
+ otherwise helps catch build errors induced by changes to the
+ core devm_memremap_pages() implementation and other
+ infrastructure.
+
endif
diff --git a/drivers/nvdimm/Makefile b/drivers/nvdimm/Makefile
index e884704..29203f3 100644
--- a/drivers/nvdimm/Makefile
+++ b/drivers/nvdimm/Makefile
@@ -5,6 +5,7 @@
obj-$(CONFIG_ND_BLK) += nd_blk.o
obj-$(CONFIG_X86_PMEM_LEGACY) += nd_e820.o
obj-$(CONFIG_OF_PMEM) += of_pmem.o
+obj-$(CONFIG_VIRTIO_PMEM) += virtio_pmem.o nd_virtio.o
nd_pmem-y := pmem.o
@@ -27,3 +28,8 @@
libnvdimm-$(CONFIG_BTT) += btt_devs.o
libnvdimm-$(CONFIG_NVDIMM_PFN) += pfn_devs.o
libnvdimm-$(CONFIG_NVDIMM_DAX) += dax_devs.o
+libnvdimm-$(CONFIG_NVDIMM_KEYS) += security.o
+
+TOOLS := ../../tools
+TEST_SRC := $(TOOLS)/testing/nvdimm/test
+obj-$(CONFIG_NVDIMM_TEST_BUILD) += $(TEST_SRC)/iomap.o
diff --git a/drivers/nvdimm/badrange.c b/drivers/nvdimm/badrange.c
index e068d72..b9eeefa 100644
--- a/drivers/nvdimm/badrange.c
+++ b/drivers/nvdimm/badrange.c
@@ -1,14 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright(c) 2017 Intel Corporation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
*/
#include <linux/libnvdimm.h>
#include <linux/badblocks.h>
diff --git a/drivers/nvdimm/blk.c b/drivers/nvdimm/blk.c
index 62e9cb1..677d6f4 100644
--- a/drivers/nvdimm/blk.c
+++ b/drivers/nvdimm/blk.c
@@ -1,15 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* NVDIMM Block Window Driver
* Copyright (c) 2014, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
*/
#include <linux/blkdev.h>
@@ -290,7 +282,7 @@
}
set_capacity(disk, available_disk_size >> SECTOR_SHIFT);
- device_add_disk(dev, disk);
+ device_add_disk(dev, disk, NULL);
revalidate_disk(disk);
return 0;
}
diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c
index 0360c01..3e9f45a 100644
--- a/drivers/nvdimm/btt.c
+++ b/drivers/nvdimm/btt.c
@@ -1,15 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Block Translation Table
* Copyright (c) 2014-2015, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
*/
#include <linux/highmem.h>
#include <linux/debugfs.h>
@@ -400,9 +392,9 @@
arena->freelist[lane].sub = 1 - arena->freelist[lane].sub;
if (++(arena->freelist[lane].seq) == 4)
arena->freelist[lane].seq = 1;
- if (ent_e_flag(ent->old_map))
+ if (ent_e_flag(le32_to_cpu(ent->old_map)))
arena->freelist[lane].has_err = 1;
- arena->freelist[lane].block = le32_to_cpu(ent_lba(ent->old_map));
+ arena->freelist[lane].block = ent_lba(le32_to_cpu(ent->old_map));
return ret;
}
@@ -541,9 +533,9 @@
static int btt_freelist_init(struct arena_info *arena)
{
- int old, new, ret;
- u32 i, map_entry;
- struct log_entry log_new, log_old;
+ int new, ret;
+ struct log_entry log_new;
+ u32 i, map_entry, log_oldmap, log_newmap;
arena->freelist = kcalloc(arena->nfree, sizeof(struct free_entry),
GFP_KERNEL);
@@ -551,24 +543,26 @@
return -ENOMEM;
for (i = 0; i < arena->nfree; i++) {
- old = btt_log_read(arena, i, &log_old, LOG_OLD_ENT);
- if (old < 0)
- return old;
-
new = btt_log_read(arena, i, &log_new, LOG_NEW_ENT);
if (new < 0)
return new;
+ /* old and new map entries with any flags stripped out */
+ log_oldmap = ent_lba(le32_to_cpu(log_new.old_map));
+ log_newmap = ent_lba(le32_to_cpu(log_new.new_map));
+
/* sub points to the next one to be overwritten */
arena->freelist[i].sub = 1 - new;
arena->freelist[i].seq = nd_inc_seq(le32_to_cpu(log_new.seq));
- arena->freelist[i].block = le32_to_cpu(log_new.old_map);
+ arena->freelist[i].block = log_oldmap;
/*
* FIXME: if error clearing fails during init, we want to make
* the BTT read-only
*/
- if (ent_e_flag(log_new.old_map)) {
+ if (ent_e_flag(le32_to_cpu(log_new.old_map)) &&
+ !ent_normal(le32_to_cpu(log_new.old_map))) {
+ arena->freelist[i].has_err = 1;
ret = arena_clear_freelist_error(arena, i);
if (ret)
dev_err_ratelimited(to_dev(arena),
@@ -576,7 +570,7 @@
}
/* This implies a newly created or untouched flog entry */
- if (log_new.old_map == log_new.new_map)
+ if (log_oldmap == log_newmap)
continue;
/* Check if map recovery is needed */
@@ -584,8 +578,15 @@
NULL, NULL, 0);
if (ret)
return ret;
- if ((le32_to_cpu(log_new.new_map) != map_entry) &&
- (le32_to_cpu(log_new.old_map) == map_entry)) {
+
+ /*
+ * The map_entry from btt_read_map is stripped of any flag bits,
+ * so use the stripped out versions from the log as well for
+ * testing whether recovery is needed. For restoration, use the
+ * 'raw' version of the log entries as that captured what we
+ * were going to write originally.
+ */
+ if ((log_newmap != map_entry) && (log_oldmap == map_entry)) {
/*
* Last transaction wrote the flog, but wasn't able
* to complete the map write. So fix up the map.
@@ -1556,7 +1557,7 @@
}
}
set_capacity(btt->btt_disk, btt->nlba * btt->sector_size >> 9);
- device_add_disk(&btt->nd_btt->dev, btt->btt_disk);
+ device_add_disk(&btt->nd_btt->dev, btt->btt_disk, NULL);
btt->nd_btt->size = btt->nlba * (u64)btt->sector_size;
revalidate_disk(btt->btt_disk);
diff --git a/drivers/nvdimm/btt.h b/drivers/nvdimm/btt.h
index db3cb6d..2e258be 100644
--- a/drivers/nvdimm/btt.h
+++ b/drivers/nvdimm/btt.h
@@ -1,15 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Block Translation Table library
* Copyright (c) 2014-2015, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
*/
#ifndef _LINUX_BTT_H
@@ -44,6 +36,8 @@
#define ent_e_flag(ent) (!!(ent & MAP_ERR_MASK))
#define ent_z_flag(ent) (!!(ent & MAP_TRIM_MASK))
#define set_e_flag(ent) (ent |= MAP_ERR_MASK)
+/* 'normal' is both e and z flags set */
+#define ent_normal(ent) (ent_e_flag(ent) && ent_z_flag(ent))
enum btt_init_state {
INIT_UNCHECKED = 0,
diff --git a/drivers/nvdimm/btt_devs.c b/drivers/nvdimm/btt_devs.c
index 795ad4f..3508a79 100644
--- a/drivers/nvdimm/btt_devs.c
+++ b/drivers/nvdimm/btt_devs.c
@@ -1,14 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
*/
#include <linux/blkdev.h>
#include <linux/device.h>
@@ -70,14 +62,14 @@
struct nd_btt *nd_btt = to_nd_btt(dev);
ssize_t rc;
- device_lock(dev);
+ nd_device_lock(dev);
nvdimm_bus_lock(dev);
rc = nd_size_select_store(dev, buf, &nd_btt->lbasize,
btt_lbasize_supported);
dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf,
buf[len - 1] == '\n' ? "" : "\n");
nvdimm_bus_unlock(dev);
- device_unlock(dev);
+ nd_device_unlock(dev);
return rc ? rc : len;
}
@@ -99,11 +91,11 @@
struct nd_btt *nd_btt = to_nd_btt(dev);
ssize_t rc;
- device_lock(dev);
+ nd_device_lock(dev);
rc = nd_uuid_store(dev, &nd_btt->uuid, buf, len);
dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf,
buf[len - 1] == '\n' ? "" : "\n");
- device_unlock(dev);
+ nd_device_unlock(dev);
return rc ? rc : len;
}
@@ -128,13 +120,13 @@
struct nd_btt *nd_btt = to_nd_btt(dev);
ssize_t rc;
- device_lock(dev);
+ nd_device_lock(dev);
nvdimm_bus_lock(dev);
rc = nd_namespace_store(dev, &nd_btt->ndns, buf, len);
dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf,
buf[len - 1] == '\n' ? "" : "\n");
nvdimm_bus_unlock(dev);
- device_unlock(dev);
+ nd_device_unlock(dev);
return rc;
}
@@ -146,24 +138,32 @@
struct nd_btt *nd_btt = to_nd_btt(dev);
ssize_t rc;
- device_lock(dev);
+ nd_device_lock(dev);
if (dev->driver)
rc = sprintf(buf, "%llu\n", nd_btt->size);
else {
/* no size to convey if the btt instance is disabled */
rc = -ENXIO;
}
- device_unlock(dev);
+ nd_device_unlock(dev);
return rc;
}
static DEVICE_ATTR_RO(size);
+static ssize_t log_zero_flags_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ return sprintf(buf, "Y\n");
+}
+static DEVICE_ATTR_RO(log_zero_flags);
+
static struct attribute *nd_btt_attributes[] = {
&dev_attr_sector_size.attr,
&dev_attr_namespace.attr,
&dev_attr_uuid.attr,
&dev_attr_size.attr,
+ &dev_attr_log_zero_flags.attr,
NULL,
};
@@ -190,14 +190,15 @@
return NULL;
nd_btt->id = ida_simple_get(&nd_region->btt_ida, 0, 0, GFP_KERNEL);
- if (nd_btt->id < 0) {
- kfree(nd_btt);
- return NULL;
- }
+ if (nd_btt->id < 0)
+ goto out_nd_btt;
nd_btt->lbasize = lbasize;
- if (uuid)
+ if (uuid) {
uuid = kmemdup(uuid, 16, GFP_KERNEL);
+ if (!uuid)
+ goto out_put_id;
+ }
nd_btt->uuid = uuid;
dev = &nd_btt->dev;
dev_set_name(dev, "btt%d.%d", nd_region->id, nd_btt->id);
@@ -212,6 +213,13 @@
return NULL;
}
return dev;
+
+out_put_id:
+ ida_simple_remove(&nd_region->btt_ida, nd_btt->id);
+
+out_nd_btt:
+ kfree(nd_btt);
+ return NULL;
}
struct device *nd_btt_create(struct nd_region *nd_region)
diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c
index 9148015..d47412d 100644
--- a/drivers/nvdimm/bus.c
+++ b/drivers/nvdimm/bus.c
@@ -1,14 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/libnvdimm.h>
@@ -23,6 +15,7 @@
#include <linux/ndctl.h>
#include <linux/sched.h>
#include <linux/slab.h>
+#include <linux/cpu.h>
#include <linux/fs.h>
#include <linux/io.h>
#include <linux/mm.h>
@@ -33,7 +26,7 @@
int nvdimm_major;
static int nvdimm_bus_major;
-static struct class *nd_class;
+struct class *nd_class;
static DEFINE_IDA(nd_ida);
static int to_nd_device_type(struct device *dev)
@@ -54,12 +47,6 @@
static int nvdimm_bus_uevent(struct device *dev, struct kobj_uevent_env *env)
{
- /*
- * Ensure that region devices always have their numa node set as
- * early as possible.
- */
- if (is_nd_region(dev))
- set_dev_node(dev, to_nd_region(dev)->numa_node);
return add_uevent_var(env, "MODALIAS=" ND_DEVICE_MODALIAS_FMT,
to_nd_device_type(dev));
}
@@ -86,7 +73,7 @@
{
nvdimm_bus_lock(&nvdimm_bus->dev);
if (--nvdimm_bus->probe_active == 0)
- wake_up(&nvdimm_bus->probe_wait);
+ wake_up(&nvdimm_bus->wait);
nvdimm_bus_unlock(&nvdimm_bus->dev);
}
@@ -104,11 +91,13 @@
dev->driver->name, dev_name(dev));
nvdimm_bus_probe_start(nvdimm_bus);
+ debug_nvdimm_lock(dev);
rc = nd_drv->probe(dev);
- if (rc == 0)
- nd_region_probe_success(nvdimm_bus, dev);
- else
- nd_region_disable(nvdimm_bus, dev);
+ debug_nvdimm_unlock(dev);
+
+ if ((rc == 0 || rc == -EOPNOTSUPP) &&
+ dev->parent && is_nd_region(dev->parent))
+ nd_region_advance_seeds(to_nd_region(dev->parent), dev);
nvdimm_bus_probe_end(nvdimm_bus);
dev_dbg(&nvdimm_bus->dev, "END: %s.probe(%s) = %d\n", dev->driver->name,
@@ -126,9 +115,11 @@
struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev);
int rc = 0;
- if (nd_drv->remove)
+ if (nd_drv->remove) {
+ debug_nvdimm_lock(dev);
rc = nd_drv->remove(dev);
- nd_region_disable(nvdimm_bus, dev);
+ debug_nvdimm_unlock(dev);
+ }
dev_dbg(&nvdimm_bus->dev, "%s.remove(%s) = %d\n", dev->driver->name,
dev_name(dev), rc);
@@ -153,7 +144,7 @@
void nd_device_notify(struct device *dev, enum nvdimm_event event)
{
- device_lock(dev);
+ nd_device_lock(dev);
if (dev->driver) {
struct nd_device_driver *nd_drv;
@@ -161,7 +152,7 @@
if (nd_drv->notify)
nd_drv->notify(dev, event);
}
- device_unlock(dev);
+ nd_device_unlock(dev);
}
EXPORT_SYMBOL(nd_device_notify);
@@ -189,7 +180,7 @@
sector_t sector;
/* make sure device is a region */
- if (!is_nd_pmem(dev))
+ if (!is_memory(dev))
return 0;
nd_region = to_nd_region(dev);
@@ -309,7 +300,7 @@
kfree(nvdimm_bus);
}
-static bool is_nvdimm_bus(struct device *dev)
+bool is_nvdimm_bus(struct device *dev)
{
return dev->release == nvdimm_bus_release;
}
@@ -337,6 +328,12 @@
}
EXPORT_SYMBOL_GPL(to_nvdimm_bus);
+struct nvdimm_bus *nvdimm_to_bus(struct nvdimm *nvdimm)
+{
+ return to_nvdimm_bus(nvdimm->dev.parent);
+}
+EXPORT_SYMBOL_GPL(nvdimm_to_bus);
+
struct nvdimm_bus *nvdimm_bus_register(struct device *parent,
struct nvdimm_bus_descriptor *nd_desc)
{
@@ -348,14 +345,14 @@
return NULL;
INIT_LIST_HEAD(&nvdimm_bus->list);
INIT_LIST_HEAD(&nvdimm_bus->mapping_list);
- init_waitqueue_head(&nvdimm_bus->probe_wait);
+ init_waitqueue_head(&nvdimm_bus->wait);
nvdimm_bus->id = ida_simple_get(&nd_ida, 0, 0, GFP_KERNEL);
- mutex_init(&nvdimm_bus->reconfig_mutex);
- badrange_init(&nvdimm_bus->badrange);
if (nvdimm_bus->id < 0) {
kfree(nvdimm_bus);
return NULL;
}
+ mutex_init(&nvdimm_bus->reconfig_mutex);
+ badrange_init(&nvdimm_bus->badrange);
nvdimm_bus->nd_desc = nd_desc;
nvdimm_bus->dev.parent = parent;
nvdimm_bus->dev.release = nvdimm_bus_release;
@@ -393,9 +390,24 @@
* i.e. remove classless children
*/
if (dev->class)
- /* pass */;
- else
- nd_device_unregister(dev, ND_SYNC);
+ return 0;
+
+ if (is_nvdimm(dev)) {
+ struct nvdimm *nvdimm = to_nvdimm(dev);
+ bool dev_put = false;
+
+ /* We are shutting down. Make state frozen artificially. */
+ nvdimm_bus_lock(dev);
+ set_bit(NVDIMM_SECURITY_FROZEN, &nvdimm->sec.flags);
+ if (test_and_clear_bit(NDD_WORK_PENDING, &nvdimm->flags))
+ dev_put = true;
+ nvdimm_bus_unlock(dev);
+ cancel_delayed_work_sync(&nvdimm->dwork);
+ if (dev_put)
+ put_device(dev);
+ }
+ nd_device_unregister(dev, ND_SYNC);
+
return 0;
}
@@ -418,6 +430,9 @@
list_del_init(&nvdimm_bus->list);
mutex_unlock(&nvdimm_bus_list_mutex);
+ wait_event(nvdimm_bus->wait,
+ atomic_read(&nvdimm_bus->ioctl_active) == 0);
+
nd_synchronize();
device_for_each_child(&nvdimm_bus->dev, NULL, child_unregister);
@@ -508,12 +523,26 @@
{
if (!dev)
return;
+
+ /*
+ * Ensure that region devices always have their NUMA node set as
+ * early as possible. This way we are able to make certain that
+ * any memory associated with the creation and the creation
+ * itself of the region is associated with the correct node.
+ */
+ if (is_nd_region(dev))
+ set_dev_node(dev, to_nd_region(dev)->numa_node);
+
dev->bus = &nvdimm_bus_type;
- if (dev->parent)
+ if (dev->parent) {
get_device(dev->parent);
+ if (dev_to_node(dev) == NUMA_NO_NODE)
+ set_dev_node(dev, dev_to_node(dev->parent));
+ }
get_device(dev);
- async_schedule_domain(nd_async_device_register, dev,
- &nd_async_domain);
+
+ async_schedule_dev_domain(nd_async_device_register, dev,
+ &nd_async_domain);
}
void nd_device_register(struct device *dev)
@@ -525,13 +554,38 @@
void nd_device_unregister(struct device *dev, enum nd_async_mode mode)
{
+ bool killed;
+
switch (mode) {
case ND_ASYNC:
+ /*
+ * In the async case this is being triggered with the
+ * device lock held and the unregistration work needs to
+ * be moved out of line iff this is thread has won the
+ * race to schedule the deletion.
+ */
+ if (!kill_device(dev))
+ return;
+
get_device(dev);
async_schedule_domain(nd_async_device_unregister, dev,
&nd_async_domain);
break;
case ND_SYNC:
+ /*
+ * In the sync case the device is being unregistered due
+ * to a state change of the parent. Claim the kill state
+ * to synchronize against other unregistration requests,
+ * or otherwise let the async path handle it if the
+ * unregistration was already queued.
+ */
+ nd_device_lock(dev);
+ killed = kill_device(dev);
+ nd_device_unlock(dev);
+
+ if (!killed)
+ return;
+
nd_synchronize();
device_unregister(dev);
break;
@@ -551,7 +605,7 @@
struct device_driver *drv = &nd_drv->drv;
if (!nd_drv->type) {
- pr_debug("driver type bitmask not set (%pf)\n",
+ pr_debug("driver type bitmask not set (%ps)\n",
__builtin_return_address(0));
return -EINVAL;
}
@@ -612,7 +666,7 @@
NULL,
};
-/**
+/*
* nd_device_attribute_group - generic attributes for all devices on an nd bus
*/
struct attribute_group nd_device_attribute_group = {
@@ -641,7 +695,7 @@
return a->mode;
}
-/**
+/*
* nd_numa_attribute_group - NUMA attributes for all devices on an nd bus
*/
struct attribute_group nd_numa_attribute_group = {
@@ -837,10 +891,12 @@
do {
if (nvdimm_bus->probe_active == 0)
break;
- nvdimm_bus_unlock(&nvdimm_bus->dev);
- wait_event(nvdimm_bus->probe_wait,
+ nvdimm_bus_unlock(dev);
+ nd_device_unlock(dev);
+ wait_event(nvdimm_bus->wait,
nvdimm_bus->probe_active == 0);
- nvdimm_bus_lock(&nvdimm_bus->dev);
+ nd_device_lock(dev);
+ nvdimm_bus_lock(dev);
} while (true);
}
@@ -898,7 +954,7 @@
/* ask the bus provider if it would like to block this request */
if (nd_desc->clear_to_send) {
- int rc = nd_desc->clear_to_send(nd_desc, nvdimm, cmd);
+ int rc = nd_desc->clear_to_send(nd_desc, nvdimm, cmd, data);
if (rc)
return rc;
@@ -923,20 +979,19 @@
int read_only, unsigned int ioctl_cmd, unsigned long arg)
{
struct nvdimm_bus_descriptor *nd_desc = nvdimm_bus->nd_desc;
- static char out_env[ND_CMD_MAX_ENVELOPE];
- static char in_env[ND_CMD_MAX_ENVELOPE];
const struct nd_cmd_desc *desc = NULL;
unsigned int cmd = _IOC_NR(ioctl_cmd);
struct device *dev = &nvdimm_bus->dev;
void __user *p = (void __user *) arg;
+ char *out_env = NULL, *in_env = NULL;
const char *cmd_name, *dimm_name;
u32 in_len = 0, out_len = 0;
unsigned int func = cmd;
unsigned long cmd_mask;
struct nd_cmd_pkg pkg;
int rc, i, cmd_rc;
+ void *buf = NULL;
u64 buf_len = 0;
- void *buf;
if (nvdimm) {
desc = nd_cmd_dimm_desc(cmd);
@@ -967,7 +1022,7 @@
case ND_CMD_ARS_START:
case ND_CMD_CLEAR_ERROR:
case ND_CMD_CALL:
- dev_dbg(&nvdimm_bus->dev, "'%s' command while read-only.\n",
+ dev_dbg(dev, "'%s' command while read-only.\n",
nvdimm ? nvdimm_cmd_name(cmd)
: nvdimm_bus_cmd_name(cmd));
return -EPERM;
@@ -976,6 +1031,9 @@
}
/* process an input envelope */
+ in_env = kzalloc(ND_CMD_MAX_ENVELOPE, GFP_KERNEL);
+ if (!in_env)
+ return -ENOMEM;
for (i = 0; i < desc->in_num; i++) {
u32 in_size, copy;
@@ -983,14 +1041,17 @@
if (in_size == UINT_MAX) {
dev_err(dev, "%s:%s unknown input size cmd: %s field: %d\n",
__func__, dimm_name, cmd_name, i);
- return -ENXIO;
+ rc = -ENXIO;
+ goto out;
}
- if (in_len < sizeof(in_env))
- copy = min_t(u32, sizeof(in_env) - in_len, in_size);
+ if (in_len < ND_CMD_MAX_ENVELOPE)
+ copy = min_t(u32, ND_CMD_MAX_ENVELOPE - in_len, in_size);
else
copy = 0;
- if (copy && copy_from_user(&in_env[in_len], p + in_len, copy))
- return -EFAULT;
+ if (copy && copy_from_user(&in_env[in_len], p + in_len, copy)) {
+ rc = -EFAULT;
+ goto out;
+ }
in_len += in_size;
}
@@ -1002,6 +1063,12 @@
}
/* process an output envelope */
+ out_env = kzalloc(ND_CMD_MAX_ENVELOPE, GFP_KERNEL);
+ if (!out_env) {
+ rc = -ENOMEM;
+ goto out;
+ }
+
for (i = 0; i < desc->out_num; i++) {
u32 out_size = nd_cmd_out_size(nvdimm, cmd, desc, i,
(u32 *) in_env, (u32 *) out_env, 0);
@@ -1010,15 +1077,18 @@
if (out_size == UINT_MAX) {
dev_dbg(dev, "%s unknown output size cmd: %s field: %d\n",
dimm_name, cmd_name, i);
- return -EFAULT;
+ rc = -EFAULT;
+ goto out;
}
- if (out_len < sizeof(out_env))
- copy = min_t(u32, sizeof(out_env) - out_len, out_size);
+ if (out_len < ND_CMD_MAX_ENVELOPE)
+ copy = min_t(u32, ND_CMD_MAX_ENVELOPE - out_len, out_size);
else
copy = 0;
if (copy && copy_from_user(&out_env[out_len],
- p + in_len + out_len, copy))
- return -EFAULT;
+ p + in_len + out_len, copy)) {
+ rc = -EFAULT;
+ goto out;
+ }
out_len += out_size;
}
@@ -1026,19 +1096,23 @@
if (buf_len > ND_IOCTL_MAX_BUFLEN) {
dev_dbg(dev, "%s cmd: %s buf_len: %llu > %d\n", dimm_name,
cmd_name, buf_len, ND_IOCTL_MAX_BUFLEN);
- return -EINVAL;
+ rc = -EINVAL;
+ goto out;
}
buf = vmalloc(buf_len);
- if (!buf)
- return -ENOMEM;
+ if (!buf) {
+ rc = -ENOMEM;
+ goto out;
+ }
if (copy_from_user(buf, p, buf_len)) {
rc = -EFAULT;
goto out;
}
- nvdimm_bus_lock(&nvdimm_bus->dev);
+ nd_device_lock(dev);
+ nvdimm_bus_lock(dev);
rc = nd_cmd_clear_to_send(nvdimm_bus, nvdimm, func, buf);
if (rc)
goto out_unlock;
@@ -1053,39 +1127,24 @@
nvdimm_account_cleared_poison(nvdimm_bus, clear_err->address,
clear_err->cleared);
}
- nvdimm_bus_unlock(&nvdimm_bus->dev);
if (copy_to_user(p, buf, buf_len))
rc = -EFAULT;
- vfree(buf);
- return rc;
-
- out_unlock:
- nvdimm_bus_unlock(&nvdimm_bus->dev);
- out:
+out_unlock:
+ nvdimm_bus_unlock(dev);
+ nd_device_unlock(dev);
+out:
+ kfree(in_env);
+ kfree(out_env);
vfree(buf);
return rc;
}
-static long nd_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
-{
- long id = (long) file->private_data;
- int rc = -ENXIO, ro;
- struct nvdimm_bus *nvdimm_bus;
-
- ro = ((file->f_flags & O_ACCMODE) == O_RDONLY);
- mutex_lock(&nvdimm_bus_list_mutex);
- list_for_each_entry(nvdimm_bus, &nvdimm_bus_list, list) {
- if (nvdimm_bus->id == id) {
- rc = __nd_ioctl(nvdimm_bus, NULL, ro, cmd, arg);
- break;
- }
- }
- mutex_unlock(&nvdimm_bus_list_mutex);
-
- return rc;
-}
+enum nd_ioctl_mode {
+ BUS_IOCTL,
+ DIMM_IOCTL,
+};
static int match_dimm(struct device *dev, void *data)
{
@@ -1100,31 +1159,62 @@
return 0;
}
-static long nvdimm_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+static long nd_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
+ enum nd_ioctl_mode mode)
+
{
- int rc = -ENXIO, ro;
- struct nvdimm_bus *nvdimm_bus;
+ struct nvdimm_bus *nvdimm_bus, *found = NULL;
+ long id = (long) file->private_data;
+ struct nvdimm *nvdimm = NULL;
+ int rc, ro;
ro = ((file->f_flags & O_ACCMODE) == O_RDONLY);
mutex_lock(&nvdimm_bus_list_mutex);
list_for_each_entry(nvdimm_bus, &nvdimm_bus_list, list) {
- struct device *dev = device_find_child(&nvdimm_bus->dev,
- file->private_data, match_dimm);
- struct nvdimm *nvdimm;
+ if (mode == DIMM_IOCTL) {
+ struct device *dev;
- if (!dev)
- continue;
+ dev = device_find_child(&nvdimm_bus->dev,
+ file->private_data, match_dimm);
+ if (!dev)
+ continue;
+ nvdimm = to_nvdimm(dev);
+ found = nvdimm_bus;
+ } else if (nvdimm_bus->id == id) {
+ found = nvdimm_bus;
+ }
- nvdimm = to_nvdimm(dev);
- rc = __nd_ioctl(nvdimm_bus, nvdimm, ro, cmd, arg);
- put_device(dev);
- break;
+ if (found) {
+ atomic_inc(&nvdimm_bus->ioctl_active);
+ break;
+ }
}
mutex_unlock(&nvdimm_bus_list_mutex);
+ if (!found)
+ return -ENXIO;
+
+ nvdimm_bus = found;
+ rc = __nd_ioctl(nvdimm_bus, nvdimm, ro, cmd, arg);
+
+ if (nvdimm)
+ put_device(&nvdimm->dev);
+ if (atomic_dec_and_test(&nvdimm_bus->ioctl_active))
+ wake_up(&nvdimm_bus->wait);
+
return rc;
}
+static long bus_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+ return nd_ioctl(file, cmd, arg, BUS_IOCTL);
+}
+
+static long dimm_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+ return nd_ioctl(file, cmd, arg, DIMM_IOCTL);
+}
+
static int nd_open(struct inode *inode, struct file *file)
{
long minor = iminor(inode);
@@ -1136,16 +1226,16 @@
static const struct file_operations nvdimm_bus_fops = {
.owner = THIS_MODULE,
.open = nd_open,
- .unlocked_ioctl = nd_ioctl,
- .compat_ioctl = nd_ioctl,
+ .unlocked_ioctl = bus_ioctl,
+ .compat_ioctl = bus_ioctl,
.llseek = noop_llseek,
};
static const struct file_operations nvdimm_fops = {
.owner = THIS_MODULE,
.open = nd_open,
- .unlocked_ioctl = nvdimm_ioctl,
- .compat_ioctl = nvdimm_ioctl,
+ .unlocked_ioctl = dimm_ioctl,
+ .compat_ioctl = dimm_ioctl,
.llseek = noop_llseek,
};
diff --git a/drivers/nvdimm/claim.c b/drivers/nvdimm/claim.c
index fb667bf..2985ca9 100644
--- a/drivers/nvdimm/claim.c
+++ b/drivers/nvdimm/claim.c
@@ -1,14 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
*/
#include <linux/device.h>
#include <linux/sizes.h>
@@ -263,7 +255,7 @@
struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev);
unsigned int sz_align = ALIGN(size + (offset & (512 - 1)), 512);
sector_t sector = offset >> 9;
- int rc = 0;
+ int rc = 0, ret = 0;
if (unlikely(!size))
return 0;
@@ -301,7 +293,9 @@
}
memcpy_flushcache(nsio->addr + offset, buf, size);
- nvdimm_flush(to_nd_region(ndns->dev.parent));
+ ret = nvdimm_flush(to_nd_region(ndns->dev.parent), NULL);
+ if (ret)
+ rc = ret;
return rc;
}
diff --git a/drivers/nvdimm/core.c b/drivers/nvdimm/core.c
index acce050..9204f1e 100644
--- a/drivers/nvdimm/core.c
+++ b/drivers/nvdimm/core.c
@@ -1,14 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
*/
#include <linux/libnvdimm.h>
#include <linux/badblocks.h>
@@ -254,7 +246,7 @@
*
* Enforce that uuids can only be changed while the device is disabled
* (driver detached)
- * LOCKING: expects device_lock() is held on entry
+ * LOCKING: expects nd_device_lock() is held on entry
*/
int nd_uuid_store(struct device *dev, u8 **uuid_out, const char *buf,
size_t len)
@@ -355,15 +347,15 @@
static int flush_namespaces(struct device *dev, void *data)
{
- device_lock(dev);
- device_unlock(dev);
+ nd_device_lock(dev);
+ nd_device_unlock(dev);
return 0;
}
static int flush_regions_dimms(struct device *dev, void *data)
{
- device_lock(dev);
- device_unlock(dev);
+ nd_device_lock(dev);
+ nd_device_unlock(dev);
device_for_each_child(dev, NULL, flush_namespaces);
return 0;
}
diff --git a/drivers/nvdimm/dax_devs.c b/drivers/nvdimm/dax_devs.c
index 0453f49..6d22b0f 100644
--- a/drivers/nvdimm/dax_devs.c
+++ b/drivers/nvdimm/dax_devs.c
@@ -1,14 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright(c) 2013-2016 Intel Corporation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
*/
#include <linux/device.h>
#include <linux/sizes.h>
@@ -126,7 +118,7 @@
nvdimm_bus_unlock(&ndns->dev);
if (!dax_dev)
return -ENOMEM;
- pfn_sb = devm_kzalloc(dev, sizeof(*pfn_sb), GFP_KERNEL);
+ pfn_sb = devm_kmalloc(dev, sizeof(*pfn_sb), GFP_KERNEL);
nd_pfn->pfn_sb = pfn_sb;
rc = nd_pfn_validate(nd_pfn, DAX_SIG);
dev_dbg(dev, "dax: %s\n", rc == 0 ? dev_name(dax_dev) : "<none>");
diff --git a/drivers/nvdimm/dimm.c b/drivers/nvdimm/dimm.c
index 6c8fb75..64776ed 100644
--- a/drivers/nvdimm/dimm.c
+++ b/drivers/nvdimm/dimm.c
@@ -1,14 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
*/
#include <linux/vmalloc.h>
#include <linux/module.h>
@@ -26,6 +18,12 @@
struct nvdimm_drvdata *ndd;
int rc;
+ rc = nvdimm_security_setup_events(dev);
+ if (rc < 0) {
+ dev_err(dev, "security event setup failed: %d\n", rc);
+ return rc;
+ }
+
rc = nvdimm_check_config_data(dev);
if (rc) {
/* not required for non-aliased nvdimm, ex. NVDIMM-N */
@@ -34,7 +32,11 @@
return rc;
}
- /* reset locked, to be validated below... */
+ /*
+ * The locked status bit reflects explicit status codes from the
+ * label reading commands, revalidate it each time the driver is
+ * activated and re-reads the label area.
+ */
nvdimm_clear_locked(dev);
ndd = kzalloc(sizeof(*ndd), GFP_KERNEL);
@@ -52,6 +54,16 @@
kref_init(&ndd->kref);
/*
+ * Attempt to unlock, if the DIMM supports security commands,
+ * otherwise the locked indication is determined by explicit
+ * status codes from the label reading commands.
+ */
+ rc = nvdimm_security_unlock(dev);
+ if (rc < 0)
+ dev_dbg(dev, "failed to unlock dimm: %d\n", rc);
+
+
+ /*
* EACCES failures reading the namespace label-area-properties
* are interpreted as the DIMM capacity being locked but the
* namespace labels themselves being accessible.
@@ -75,7 +87,7 @@
* DIMM capacity. We fail the dimm probe to prevent regions from
* attempting to parse the label area.
*/
- rc = nvdimm_init_config_data(ndd);
+ rc = nd_label_data_init(ndd);
if (rc == -EACCES)
nvdimm_set_locked(dev);
if (rc)
@@ -84,10 +96,6 @@
dev_dbg(dev, "config data size: %d\n", ndd->nsarea.config_size);
nvdimm_bus_lock(dev);
- ndd->ns_current = nd_label_validate(ndd);
- ndd->ns_next = nd_label_next_nsindex(ndd->ns_current);
- nd_label_copy(ndd, to_next_namespace_index(ndd),
- to_current_namespace_index(ndd));
if (ndd->ns_current >= 0) {
rc = nd_label_reserve_dpa(ndd);
if (rc == 0)
diff --git a/drivers/nvdimm/dimm_devs.c b/drivers/nvdimm/dimm_devs.c
index 863cabc..196aa44 100644
--- a/drivers/nvdimm/dimm_devs.c
+++ b/drivers/nvdimm/dimm_devs.c
@@ -1,16 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/moduleparam.h>
#include <linux/vmalloc.h>
#include <linux/device.h>
#include <linux/ndctl.h>
@@ -25,6 +18,10 @@
static DEFINE_IDA(dimm_ida);
+static bool noblk;
+module_param(noblk, bool, 0444);
+MODULE_PARM_DESC(noblk, "force disable BLK / local alias support");
+
/*
* Retrieve bus and dimm handle and return if this bus supports
* get_config_data commands
@@ -53,7 +50,7 @@
rc = nvdimm_check_config_data(ndd->dev);
if (rc)
- dev_dbg(ndd->dev, "%pf: %s error: %d\n",
+ dev_dbg(ndd->dev, "%ps: %s error: %d\n",
__builtin_return_address(0), __func__, rc);
return rc;
}
@@ -85,56 +82,48 @@
return cmd_rc;
}
-int nvdimm_init_config_data(struct nvdimm_drvdata *ndd)
+int nvdimm_get_config_data(struct nvdimm_drvdata *ndd, void *buf,
+ size_t offset, size_t len)
{
struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(ndd->dev);
+ struct nvdimm_bus_descriptor *nd_desc = nvdimm_bus->nd_desc;
int rc = validate_dimm(ndd), cmd_rc = 0;
struct nd_cmd_get_config_data_hdr *cmd;
- struct nvdimm_bus_descriptor *nd_desc;
- u32 max_cmd_size, config_size;
- size_t offset;
+ size_t max_cmd_size, buf_offset;
if (rc)
return rc;
- if (ndd->data)
- return 0;
-
- if (ndd->nsarea.status || ndd->nsarea.max_xfer == 0
- || ndd->nsarea.config_size < ND_LABEL_MIN_SIZE) {
- dev_dbg(ndd->dev, "failed to init config data area: (%d:%d)\n",
- ndd->nsarea.max_xfer, ndd->nsarea.config_size);
+ if (offset + len > ndd->nsarea.config_size)
return -ENXIO;
- }
- ndd->data = kvmalloc(ndd->nsarea.config_size, GFP_KERNEL);
- if (!ndd->data)
- return -ENOMEM;
-
- max_cmd_size = min_t(u32, PAGE_SIZE, ndd->nsarea.max_xfer);
- cmd = kzalloc(max_cmd_size + sizeof(*cmd), GFP_KERNEL);
+ max_cmd_size = min_t(u32, len, ndd->nsarea.max_xfer);
+ cmd = kvzalloc(max_cmd_size + sizeof(*cmd), GFP_KERNEL);
if (!cmd)
return -ENOMEM;
- nd_desc = nvdimm_bus->nd_desc;
- for (config_size = ndd->nsarea.config_size, offset = 0;
- config_size; config_size -= cmd->in_length,
- offset += cmd->in_length) {
- cmd->in_length = min(config_size, max_cmd_size);
- cmd->in_offset = offset;
+ for (buf_offset = 0; len;
+ len -= cmd->in_length, buf_offset += cmd->in_length) {
+ size_t cmd_size;
+
+ cmd->in_offset = offset + buf_offset;
+ cmd->in_length = min(max_cmd_size, len);
+
+ cmd_size = sizeof(*cmd) + cmd->in_length;
+
rc = nd_desc->ndctl(nd_desc, to_nvdimm(ndd->dev),
- ND_CMD_GET_CONFIG_DATA, cmd,
- cmd->in_length + sizeof(*cmd), &cmd_rc);
+ ND_CMD_GET_CONFIG_DATA, cmd, cmd_size, &cmd_rc);
if (rc < 0)
break;
if (cmd_rc < 0) {
rc = cmd_rc;
break;
}
- memcpy(ndd->data + offset, cmd->out_buf, cmd->in_length);
+
+ /* out_buf should be valid, copy it into our output buffer */
+ memcpy(buf + buf_offset, cmd->out_buf, cmd->in_length);
}
- dev_dbg(ndd->dev, "len: %zu rc: %d\n", offset, rc);
- kfree(cmd);
+ kvfree(cmd);
return rc;
}
@@ -151,15 +140,11 @@
if (rc)
return rc;
- if (!ndd->data)
- return -ENXIO;
-
if (offset + len > ndd->nsarea.config_size)
return -ENXIO;
- max_cmd_size = min_t(u32, PAGE_SIZE, len);
- max_cmd_size = min_t(u32, max_cmd_size, ndd->nsarea.max_xfer);
- cmd = kzalloc(max_cmd_size + sizeof(*cmd) + sizeof(u32), GFP_KERNEL);
+ max_cmd_size = min_t(u32, len, ndd->nsarea.max_xfer);
+ cmd = kvzalloc(max_cmd_size + sizeof(*cmd) + sizeof(u32), GFP_KERNEL);
if (!cmd)
return -ENOMEM;
@@ -183,7 +168,7 @@
break;
}
}
- kfree(cmd);
+ kvfree(cmd);
return rc;
}
@@ -382,23 +367,100 @@
}
static DEVICE_ATTR_RO(available_slots);
+__weak ssize_t security_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct nvdimm *nvdimm = to_nvdimm(dev);
+
+ if (test_bit(NVDIMM_SECURITY_DISABLED, &nvdimm->sec.flags))
+ return sprintf(buf, "disabled\n");
+ if (test_bit(NVDIMM_SECURITY_UNLOCKED, &nvdimm->sec.flags))
+ return sprintf(buf, "unlocked\n");
+ if (test_bit(NVDIMM_SECURITY_LOCKED, &nvdimm->sec.flags))
+ return sprintf(buf, "locked\n");
+ if (test_bit(NVDIMM_SECURITY_OVERWRITE, &nvdimm->sec.flags))
+ return sprintf(buf, "overwrite\n");
+ return -ENOTTY;
+}
+
+static ssize_t frozen_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct nvdimm *nvdimm = to_nvdimm(dev);
+
+ return sprintf(buf, "%d\n", test_bit(NVDIMM_SECURITY_FROZEN,
+ &nvdimm->sec.flags));
+}
+static DEVICE_ATTR_RO(frozen);
+
+static ssize_t security_store(struct device *dev,
+ struct device_attribute *attr, const char *buf, size_t len)
+
+{
+ ssize_t rc;
+
+ /*
+ * Require all userspace triggered security management to be
+ * done while probing is idle and the DIMM is not in active use
+ * in any region.
+ */
+ nd_device_lock(dev);
+ nvdimm_bus_lock(dev);
+ wait_nvdimm_bus_probe_idle(dev);
+ rc = nvdimm_security_store(dev, buf, len);
+ nvdimm_bus_unlock(dev);
+ nd_device_unlock(dev);
+
+ return rc;
+}
+static DEVICE_ATTR_RW(security);
+
static struct attribute *nvdimm_attributes[] = {
&dev_attr_state.attr,
&dev_attr_flags.attr,
&dev_attr_commands.attr,
&dev_attr_available_slots.attr,
+ &dev_attr_security.attr,
+ &dev_attr_frozen.attr,
NULL,
};
+static umode_t nvdimm_visible(struct kobject *kobj, struct attribute *a, int n)
+{
+ struct device *dev = container_of(kobj, typeof(*dev), kobj);
+ struct nvdimm *nvdimm = to_nvdimm(dev);
+
+ if (a != &dev_attr_security.attr && a != &dev_attr_frozen.attr)
+ return a->mode;
+ if (!nvdimm->sec.flags)
+ return 0;
+
+ if (a == &dev_attr_security.attr) {
+ /* Are there any state mutation ops (make writable)? */
+ if (nvdimm->sec.ops->freeze || nvdimm->sec.ops->disable
+ || nvdimm->sec.ops->change_key
+ || nvdimm->sec.ops->erase
+ || nvdimm->sec.ops->overwrite)
+ return a->mode;
+ return 0444;
+ }
+
+ if (nvdimm->sec.ops->freeze)
+ return a->mode;
+ return 0;
+}
+
struct attribute_group nvdimm_attribute_group = {
.attrs = nvdimm_attributes,
+ .is_visible = nvdimm_visible,
};
EXPORT_SYMBOL_GPL(nvdimm_attribute_group);
-struct nvdimm *nvdimm_create(struct nvdimm_bus *nvdimm_bus, void *provider_data,
- const struct attribute_group **groups, unsigned long flags,
- unsigned long cmd_mask, int num_flush,
- struct resource *flush_wpq)
+struct nvdimm *__nvdimm_create(struct nvdimm_bus *nvdimm_bus,
+ void *provider_data, const struct attribute_group **groups,
+ unsigned long flags, unsigned long cmd_mask, int num_flush,
+ struct resource *flush_wpq, const char *dimm_id,
+ const struct nvdimm_security_ops *sec_ops)
{
struct nvdimm *nvdimm = kzalloc(sizeof(*nvdimm), GFP_KERNEL);
struct device *dev;
@@ -411,7 +473,11 @@
kfree(nvdimm);
return NULL;
}
+
+ nvdimm->dimm_id = dimm_id;
nvdimm->provider_data = provider_data;
+ if (noblk)
+ flags |= 1 << NDD_NOBLK;
nvdimm->flags = flags;
nvdimm->cmd_mask = cmd_mask;
nvdimm->num_flush = num_flush;
@@ -423,11 +489,72 @@
dev->type = &nvdimm_device_type;
dev->devt = MKDEV(nvdimm_major, nvdimm->id);
dev->groups = groups;
+ nvdimm->sec.ops = sec_ops;
+ nvdimm->sec.overwrite_tmo = 0;
+ INIT_DELAYED_WORK(&nvdimm->dwork, nvdimm_security_overwrite_query);
+ /*
+ * Security state must be initialized before device_add() for
+ * attribute visibility.
+ */
+ /* get security state and extended (master) state */
+ nvdimm->sec.flags = nvdimm_security_flags(nvdimm, NVDIMM_USER);
+ nvdimm->sec.ext_flags = nvdimm_security_flags(nvdimm, NVDIMM_MASTER);
nd_device_register(dev);
return nvdimm;
}
-EXPORT_SYMBOL_GPL(nvdimm_create);
+EXPORT_SYMBOL_GPL(__nvdimm_create);
+
+static void shutdown_security_notify(void *data)
+{
+ struct nvdimm *nvdimm = data;
+
+ sysfs_put(nvdimm->sec.overwrite_state);
+}
+
+int nvdimm_security_setup_events(struct device *dev)
+{
+ struct nvdimm *nvdimm = to_nvdimm(dev);
+
+ if (!nvdimm->sec.flags || !nvdimm->sec.ops
+ || !nvdimm->sec.ops->overwrite)
+ return 0;
+ nvdimm->sec.overwrite_state = sysfs_get_dirent(dev->kobj.sd, "security");
+ if (!nvdimm->sec.overwrite_state)
+ return -ENOMEM;
+
+ return devm_add_action_or_reset(dev, shutdown_security_notify, nvdimm);
+}
+EXPORT_SYMBOL_GPL(nvdimm_security_setup_events);
+
+int nvdimm_in_overwrite(struct nvdimm *nvdimm)
+{
+ return test_bit(NDD_SECURITY_OVERWRITE, &nvdimm->flags);
+}
+EXPORT_SYMBOL_GPL(nvdimm_in_overwrite);
+
+int nvdimm_security_freeze(struct nvdimm *nvdimm)
+{
+ int rc;
+
+ WARN_ON_ONCE(!is_nvdimm_bus_locked(&nvdimm->dev));
+
+ if (!nvdimm->sec.ops || !nvdimm->sec.ops->freeze)
+ return -EOPNOTSUPP;
+
+ if (!nvdimm->sec.flags)
+ return -EIO;
+
+ if (test_bit(NDD_SECURITY_OVERWRITE, &nvdimm->flags)) {
+ dev_warn(&nvdimm->dev, "Overwrite operation in progress.\n");
+ return -EBUSY;
+ }
+
+ rc = nvdimm->sec.ops->freeze(nvdimm);
+ nvdimm->sec.flags = nvdimm_security_flags(nvdimm, NVDIMM_USER);
+
+ return rc;
+}
int alias_dpa_busy(struct device *dev, void *data)
{
diff --git a/drivers/nvdimm/e820.c b/drivers/nvdimm/e820.c
index 521eaf5..87f72f7 100644
--- a/drivers/nvdimm/e820.c
+++ b/drivers/nvdimm/e820.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (c) 2015, Christoph Hellwig.
* Copyright (c) 2015, Intel Corporation.
@@ -47,6 +48,7 @@
ndr_desc.res = res;
ndr_desc.attr_groups = e820_pmem_region_attribute_groups;
ndr_desc.numa_node = e820_range_to_nid(res->start);
+ ndr_desc.target_node = ndr_desc.numa_node;
set_bit(ND_REGION_PAGEMAP, &ndr_desc.flags);
if (!nvdimm_pmem_region_create(nvdimm_bus, &ndr_desc))
return -ENXIO;
diff --git a/drivers/nvdimm/label.c b/drivers/nvdimm/label.c
index 1d28cd6..47a4828 100644
--- a/drivers/nvdimm/label.c
+++ b/drivers/nvdimm/label.c
@@ -1,14 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
*/
#include <linux/device.h>
#include <linux/ndctl.h>
@@ -25,6 +17,8 @@
static guid_t nvdimm_pfn_guid;
static guid_t nvdimm_dax_guid;
+static const char NSINDEX_SIGNATURE[] = "NAMESPACE_INDEX\0";
+
static u32 best_seq(u32 a, u32 b)
{
a &= NSINDEX_SEQ_MASK;
@@ -75,7 +69,8 @@
/*
* Per UEFI 2.7, the minimum size of the Label Storage Area is large
* enough to hold 2 index blocks and 2 labels. The minimum index
- * block size is 256 bytes, and the minimum label size is 256 bytes.
+ * block size is 256 bytes. The label size is 128 for namespaces
+ * prior to version 1.2 and at minimum 256 for version 1.2 and later.
*/
nslot = nvdimm_num_label_slots(ndd);
space = ndd->nsarea.config_size - nslot * sizeof_namespace_label(ndd);
@@ -183,6 +178,13 @@
__le64_to_cpu(nsindex[i]->otheroff));
continue;
}
+ if (__le64_to_cpu(nsindex[i]->labeloff)
+ != 2 * sizeof_namespace_index(ndd)) {
+ dev_dbg(dev, "nsindex%d labeloff: %#llx invalid\n",
+ i, (unsigned long long)
+ __le64_to_cpu(nsindex[i]->labeloff));
+ continue;
+ }
size = __le64_to_cpu(nsindex[i]->mysize);
if (size > sizeof_namespace_index(ndd)
@@ -227,7 +229,7 @@
return -1;
}
-int nd_label_validate(struct nvdimm_drvdata *ndd)
+static int nd_label_validate(struct nvdimm_drvdata *ndd)
{
/*
* In order to probe for and validate namespace index blocks we
@@ -250,12 +252,12 @@
return -1;
}
-void nd_label_copy(struct nvdimm_drvdata *ndd, struct nd_namespace_index *dst,
- struct nd_namespace_index *src)
+static void nd_label_copy(struct nvdimm_drvdata *ndd,
+ struct nd_namespace_index *dst,
+ struct nd_namespace_index *src)
{
- if (dst && src)
- /* pass */;
- else
+ /* just exit if either destination or source is NULL */
+ if (!dst || !src)
return;
memcpy(dst, src, sizeof_namespace_index(ndd));
@@ -351,11 +353,6 @@
if (slot != __le32_to_cpu(nd_label->slot))
return false;
- /* check that DPA allocations are page aligned */
- if ((__le64_to_cpu(nd_label->dpa)
- | __le64_to_cpu(nd_label->rawsize)) % SZ_4K)
- return false;
-
/* check checksum */
if (namespace_label_has(ndd, checksum)) {
u64 sum, sum_save;
@@ -384,6 +381,7 @@
return 0; /* no label, nothing to reserve */
for_each_clear_bit_le(slot, free, nslot) {
+ struct nvdimm *nvdimm = to_nvdimm(ndd->dev);
struct nd_namespace_label *nd_label;
struct nd_region *nd_region = NULL;
u8 label_uuid[NSLABEL_UUID_LEN];
@@ -398,6 +396,8 @@
memcpy(label_uuid, nd_label->uuid, NSLABEL_UUID_LEN);
flags = __le32_to_cpu(nd_label->flags);
+ if (test_bit(NDD_NOBLK, &nvdimm->flags))
+ flags &= ~NSLABEL_FLAG_LOCAL;
nd_label_gen_id(&label_id, label_uuid, flags);
res = nvdimm_allocate_dpa(ndd, &label_id,
__le64_to_cpu(nd_label->dpa),
@@ -410,6 +410,128 @@
return 0;
}
+int nd_label_data_init(struct nvdimm_drvdata *ndd)
+{
+ size_t config_size, read_size, max_xfer, offset;
+ struct nd_namespace_index *nsindex;
+ unsigned int i;
+ int rc = 0;
+ u32 nslot;
+
+ if (ndd->data)
+ return 0;
+
+ if (ndd->nsarea.status || ndd->nsarea.max_xfer == 0) {
+ dev_dbg(ndd->dev, "failed to init config data area: (%u:%u)\n",
+ ndd->nsarea.max_xfer, ndd->nsarea.config_size);
+ return -ENXIO;
+ }
+
+ /*
+ * We need to determine the maximum index area as this is the section
+ * we must read and validate before we can start processing labels.
+ *
+ * If the area is too small to contain the two indexes and 2 labels
+ * then we abort.
+ *
+ * Start at a label size of 128 as this should result in the largest
+ * possible namespace index size.
+ */
+ ndd->nslabel_size = 128;
+ read_size = sizeof_namespace_index(ndd) * 2;
+ if (!read_size)
+ return -ENXIO;
+
+ /* Allocate config data */
+ config_size = ndd->nsarea.config_size;
+ ndd->data = kvzalloc(config_size, GFP_KERNEL);
+ if (!ndd->data)
+ return -ENOMEM;
+
+ /*
+ * We want to guarantee as few reads as possible while conserving
+ * memory. To do that we figure out how much unused space will be left
+ * in the last read, divide that by the total number of reads it is
+ * going to take given our maximum transfer size, and then reduce our
+ * maximum transfer size based on that result.
+ */
+ max_xfer = min_t(size_t, ndd->nsarea.max_xfer, config_size);
+ if (read_size < max_xfer) {
+ /* trim waste */
+ max_xfer -= ((max_xfer - 1) - (config_size - 1) % max_xfer) /
+ DIV_ROUND_UP(config_size, max_xfer);
+ /* make certain we read indexes in exactly 1 read */
+ if (max_xfer < read_size)
+ max_xfer = read_size;
+ }
+
+ /* Make our initial read size a multiple of max_xfer size */
+ read_size = min(DIV_ROUND_UP(read_size, max_xfer) * max_xfer,
+ config_size);
+
+ /* Read the index data */
+ rc = nvdimm_get_config_data(ndd, ndd->data, 0, read_size);
+ if (rc)
+ goto out_err;
+
+ /* Validate index data, if not valid assume all labels are invalid */
+ ndd->ns_current = nd_label_validate(ndd);
+ if (ndd->ns_current < 0)
+ return 0;
+
+ /* Record our index values */
+ ndd->ns_next = nd_label_next_nsindex(ndd->ns_current);
+
+ /* Copy "current" index on top of the "next" index */
+ nsindex = to_current_namespace_index(ndd);
+ nd_label_copy(ndd, to_next_namespace_index(ndd), nsindex);
+
+ /* Determine starting offset for label data */
+ offset = __le64_to_cpu(nsindex->labeloff);
+ nslot = __le32_to_cpu(nsindex->nslot);
+
+ /* Loop through the free list pulling in any active labels */
+ for (i = 0; i < nslot; i++, offset += ndd->nslabel_size) {
+ size_t label_read_size;
+
+ /* zero out the unused labels */
+ if (test_bit_le(i, nsindex->free)) {
+ memset(ndd->data + offset, 0, ndd->nslabel_size);
+ continue;
+ }
+
+ /* if we already read past here then just continue */
+ if (offset + ndd->nslabel_size <= read_size)
+ continue;
+
+ /* if we haven't read in a while reset our read_size offset */
+ if (read_size < offset)
+ read_size = offset;
+
+ /* determine how much more will be read after this next call. */
+ label_read_size = offset + ndd->nslabel_size - read_size;
+ label_read_size = DIV_ROUND_UP(label_read_size, max_xfer) *
+ max_xfer;
+
+ /* truncate last read if needed */
+ if (read_size + label_read_size > config_size)
+ label_read_size = config_size - read_size;
+
+ /* Read the label data */
+ rc = nvdimm_get_config_data(ndd, ndd->data + read_size,
+ read_size, label_read_size);
+ if (rc)
+ goto out_err;
+
+ /* push read_size to next read offset */
+ read_size += label_read_size;
+ }
+
+ dev_dbg(ndd->dev, "len: %zu rc: %d\n", offset, rc);
+out_err:
+ return rc;
+}
+
int nd_label_active_count(struct nvdimm_drvdata *ndd)
{
struct nd_namespace_index *nsindex;
@@ -623,16 +745,27 @@
return &guid_null;
}
+static void reap_victim(struct nd_mapping *nd_mapping,
+ struct nd_label_ent *victim)
+{
+ struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
+ u32 slot = to_slot(ndd, victim->label);
+
+ dev_dbg(ndd->dev, "free: %d\n", slot);
+ nd_label_free_slot(ndd, slot);
+ victim->label = NULL;
+}
+
static int __pmem_label_update(struct nd_region *nd_region,
struct nd_mapping *nd_mapping, struct nd_namespace_pmem *nspm,
- int pos)
+ int pos, unsigned long flags)
{
struct nd_namespace_common *ndns = &nspm->nsio.common;
struct nd_interleave_set *nd_set = nd_region->nd_set;
struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
- struct nd_label_ent *label_ent, *victim = NULL;
struct nd_namespace_label *nd_label;
struct nd_namespace_index *nsindex;
+ struct nd_label_ent *label_ent;
struct nd_label_id label_id;
struct resource *res;
unsigned long *free;
@@ -666,7 +799,7 @@
memcpy(nd_label->uuid, nspm->uuid, NSLABEL_UUID_LEN);
if (nspm->alt_name)
memcpy(nd_label->name, nspm->alt_name, NSLABEL_NAME_LEN);
- nd_label->flags = __cpu_to_le32(NSLABEL_FLAG_UPDATING);
+ nd_label->flags = __cpu_to_le32(flags);
nd_label->nlabel = __cpu_to_le16(nd_region->ndr_mappings);
nd_label->position = __cpu_to_le16(pos);
nd_label->isetcookie = __cpu_to_le64(cookie);
@@ -701,18 +834,10 @@
list_for_each_entry(label_ent, &nd_mapping->labels, list) {
if (!label_ent->label)
continue;
- if (memcmp(nspm->uuid, label_ent->label->uuid,
- NSLABEL_UUID_LEN) != 0)
- continue;
- victim = label_ent;
- list_move_tail(&victim->list, &nd_mapping->labels);
- break;
- }
- if (victim) {
- dev_dbg(ndd->dev, "free: %d\n", slot);
- slot = to_slot(ndd, victim->label);
- nd_label_free_slot(ndd, slot);
- victim->label = NULL;
+ if (test_and_clear_bit(ND_LABEL_REAP, &label_ent->flags)
+ || memcmp(nspm->uuid, label_ent->label->uuid,
+ NSLABEL_UUID_LEN) == 0)
+ reap_victim(nd_mapping, label_ent);
}
/* update index */
@@ -814,8 +939,7 @@
victims = 0;
if (old_num_resources) {
/* convert old local-label-map to dimm-slot victim-map */
- victim_map = kcalloc(BITS_TO_LONGS(nslot), sizeof(long),
- GFP_KERNEL);
+ victim_map = bitmap_zalloc(nslot, GFP_KERNEL);
if (!victim_map)
return -ENOMEM;
@@ -838,7 +962,7 @@
/* don't allow updates that consume the last label */
if (nfree - alloc < 0 || nfree - alloc + victims < 1) {
dev_info(&nsblk->common.dev, "insufficient label space\n");
- kfree(victim_map);
+ bitmap_free(victim_map);
return -ENOSPC;
}
/* from here on we need to abort on error */
@@ -1010,7 +1134,7 @@
out:
kfree(old_res_list);
- kfree(victim_map);
+ bitmap_free(victim_map);
return rc;
abort:
@@ -1120,13 +1244,13 @@
int nd_pmem_namespace_label_update(struct nd_region *nd_region,
struct nd_namespace_pmem *nspm, resource_size_t size)
{
- int i;
+ int i, rc;
for (i = 0; i < nd_region->ndr_mappings; i++) {
struct nd_mapping *nd_mapping = &nd_region->mapping[i];
struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
struct resource *res;
- int rc, count = 0;
+ int count = 0;
if (size == 0) {
rc = del_labels(nd_mapping, nspm->uuid);
@@ -1144,7 +1268,20 @@
if (rc < 0)
return rc;
- rc = __pmem_label_update(nd_region, nd_mapping, nspm, i);
+ rc = __pmem_label_update(nd_region, nd_mapping, nspm, i,
+ NSLABEL_FLAG_UPDATING);
+ if (rc)
+ return rc;
+ }
+
+ if (size == 0)
+ return 0;
+
+ /* Clear the UPDATING flag per UEFI 2.7 expectations */
+ for (i = 0; i < nd_region->ndr_mappings; i++) {
+ struct nd_mapping *nd_mapping = &nd_region->mapping[i];
+
+ rc = __pmem_label_update(nd_region, nd_mapping, nspm, i, 0);
if (rc)
return rc;
}
diff --git a/drivers/nvdimm/label.h b/drivers/nvdimm/label.h
index 18bbe18..4c7b775 100644
--- a/drivers/nvdimm/label.h
+++ b/drivers/nvdimm/label.h
@@ -1,14 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
*/
#ifndef __LABEL_H__
#define __LABEL_H__
@@ -38,8 +30,6 @@
ND_NSINDEX_INIT = 0x1,
};
-static const char NSINDEX_SIGNATURE[] = "NAMESPACE_INDEX\0";
-
/**
* struct nd_namespace_index - label set superblock
* @sig: NAMESPACE_INDEX\0
@@ -138,9 +128,7 @@
}
struct nvdimm_drvdata;
-int nd_label_validate(struct nvdimm_drvdata *ndd);
-void nd_label_copy(struct nvdimm_drvdata *ndd, struct nd_namespace_index *dst,
- struct nd_namespace_index *src);
+int nd_label_data_init(struct nvdimm_drvdata *ndd);
size_t sizeof_namespace_index(struct nvdimm_drvdata *ndd);
int nd_label_active_count(struct nvdimm_drvdata *ndd);
struct nd_namespace_label *nd_label_active(struct nvdimm_drvdata *ndd, int n);
diff --git a/drivers/nvdimm/namespace_devs.c b/drivers/nvdimm/namespace_devs.c
index 4a42662..cca0a3b 100644
--- a/drivers/nvdimm/namespace_devs.c
+++ b/drivers/nvdimm/namespace_devs.c
@@ -1,14 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
*/
#include <linux/module.h>
#include <linux/device.h>
@@ -138,6 +130,7 @@
bool pmem_should_map_pages(struct device *dev)
{
struct nd_region *nd_region = to_nd_region(dev->parent);
+ struct nd_namespace_common *ndns = to_ndns(dev);
struct nd_namespace_io *nsio;
if (!IS_ENABLED(CONFIG_ZONE_DEVICE))
@@ -149,6 +142,9 @@
if (is_nd_pfn(dev) || is_nd_btt(dev))
return false;
+ if (ndns->force_raw)
+ return false;
+
nsio = to_nd_namespace_io(dev);
if (region_intersects(nsio->res.start, resource_size(&nsio->res),
IORESOURCE_SYSTEM_RAM,
@@ -270,11 +266,10 @@
if (dev->driver || to_ndns(dev)->claim)
return -EBUSY;
- input = kmemdup(buf, len + 1, GFP_KERNEL);
+ input = kstrndup(buf, len, GFP_KERNEL);
if (!input)
return -ENOMEM;
- input[len] = '\0';
pos = strim(input);
if (strlen(pos) + 1 > NSLABEL_NAME_LEN) {
rc = -EINVAL;
@@ -415,7 +410,7 @@
struct nd_region *nd_region = to_nd_region(dev->parent);
ssize_t rc;
- device_lock(dev);
+ nd_device_lock(dev);
nvdimm_bus_lock(dev);
wait_nvdimm_bus_probe_idle(dev);
rc = __alt_name_store(dev, buf, len);
@@ -423,7 +418,7 @@
rc = nd_namespace_label_update(nd_region, dev);
dev_dbg(dev, "%s(%zd)\n", rc < 0 ? "fail " : "", rc);
nvdimm_bus_unlock(dev);
- device_unlock(dev);
+ nd_device_unlock(dev);
return rc < 0 ? rc : len;
}
@@ -1011,10 +1006,10 @@
return -ENXIO;
}
- div_u64_rem(val, SZ_4K * nd_region->ndr_mappings, &remainder);
+ div_u64_rem(val, PAGE_SIZE * nd_region->ndr_mappings, &remainder);
if (remainder) {
- dev_dbg(dev, "%llu is not %dK aligned\n", val,
- (SZ_4K * nd_region->ndr_mappings) / SZ_1K);
+ dev_dbg(dev, "%llu is not %ldK aligned\n", val,
+ (PAGE_SIZE * nd_region->ndr_mappings) / SZ_1K);
return -EINVAL;
}
@@ -1082,7 +1077,7 @@
if (rc)
return rc;
- device_lock(dev);
+ nd_device_lock(dev);
nvdimm_bus_lock(dev);
wait_nvdimm_bus_probe_idle(dev);
rc = __size_store(dev, val);
@@ -1108,7 +1103,7 @@
dev_dbg(dev, "%llx %s (%d)\n", val, rc < 0 ? "fail" : "success", rc);
nvdimm_bus_unlock(dev);
- device_unlock(dev);
+ nd_device_unlock(dev);
return rc < 0 ? rc : len;
}
@@ -1244,12 +1239,27 @@
for (i = 0; i < nd_region->ndr_mappings; i++) {
struct nd_mapping *nd_mapping = &nd_region->mapping[i];
struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
+ struct nd_label_ent *label_ent;
struct resource *res;
for_each_dpa_resource(ndd, res)
if (strcmp(res->name, old_label_id.id) == 0)
sprintf((void *) res->name, "%s",
new_label_id.id);
+
+ mutex_lock(&nd_mapping->lock);
+ list_for_each_entry(label_ent, &nd_mapping->labels, list) {
+ struct nd_namespace_label *nd_label = label_ent->label;
+ struct nd_label_id label_id;
+
+ if (!nd_label)
+ continue;
+ nd_label_gen_id(&label_id, nd_label->uuid,
+ __le32_to_cpu(nd_label->flags));
+ if (strcmp(old_label_id.id, label_id.id) == 0)
+ set_bit(ND_LABEL_REAP, &label_ent->flags);
+ }
+ mutex_unlock(&nd_mapping->lock);
}
kfree(*old_uuid);
out:
@@ -1276,7 +1286,7 @@
} else
return -ENXIO;
- device_lock(dev);
+ nd_device_lock(dev);
nvdimm_bus_lock(dev);
wait_nvdimm_bus_probe_idle(dev);
if (to_ndns(dev)->claim)
@@ -1292,7 +1302,7 @@
dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf,
buf[len - 1] == '\n' ? "" : "\n");
nvdimm_bus_unlock(dev);
- device_unlock(dev);
+ nd_device_unlock(dev);
return rc < 0 ? rc : len;
}
@@ -1366,7 +1376,7 @@
} else
return -ENXIO;
- device_lock(dev);
+ nd_device_lock(dev);
nvdimm_bus_lock(dev);
if (to_ndns(dev)->claim)
rc = -EBUSY;
@@ -1377,7 +1387,7 @@
dev_dbg(dev, "result: %zd %s: %s%s", rc, rc < 0 ? "tried" : "wrote",
buf, buf[len - 1] == '\n' ? "" : "\n");
nvdimm_bus_unlock(dev);
- device_unlock(dev);
+ nd_device_unlock(dev);
return rc ? rc : len;
}
@@ -1492,9 +1502,9 @@
struct nd_namespace_common *ndns = to_ndns(dev);
ssize_t rc;
- device_lock(dev);
+ nd_device_lock(dev);
rc = sprintf(buf, "%s\n", ndns->claim ? dev_name(ndns->claim) : "");
- device_unlock(dev);
+ nd_device_unlock(dev);
return rc;
}
@@ -1507,13 +1517,13 @@
if (dev->driver || ndns->claim)
return -EBUSY;
- if (strcmp(buf, "btt") == 0 || strcmp(buf, "btt\n") == 0)
+ if (sysfs_streq(buf, "btt"))
ndns->claim_class = btt_claim_class(dev);
- else if (strcmp(buf, "pfn") == 0 || strcmp(buf, "pfn\n") == 0)
+ else if (sysfs_streq(buf, "pfn"))
ndns->claim_class = NVDIMM_CCLASS_PFN;
- else if (strcmp(buf, "dax") == 0 || strcmp(buf, "dax\n") == 0)
+ else if (sysfs_streq(buf, "dax"))
ndns->claim_class = NVDIMM_CCLASS_DAX;
- else if (strcmp(buf, "") == 0 || strcmp(buf, "\n") == 0)
+ else if (sysfs_streq(buf, ""))
ndns->claim_class = NVDIMM_CCLASS_NONE;
else
return -EINVAL;
@@ -1531,7 +1541,7 @@
struct nd_region *nd_region = to_nd_region(dev->parent);
ssize_t rc;
- device_lock(dev);
+ nd_device_lock(dev);
nvdimm_bus_lock(dev);
wait_nvdimm_bus_probe_idle(dev);
rc = __holder_class_store(dev, buf);
@@ -1539,7 +1549,7 @@
rc = nd_namespace_label_update(nd_region, dev);
dev_dbg(dev, "%s(%zd)\n", rc < 0 ? "fail " : "", rc);
nvdimm_bus_unlock(dev);
- device_unlock(dev);
+ nd_device_unlock(dev);
return rc < 0 ? rc : len;
}
@@ -1550,7 +1560,7 @@
struct nd_namespace_common *ndns = to_ndns(dev);
ssize_t rc;
- device_lock(dev);
+ nd_device_lock(dev);
if (ndns->claim_class == NVDIMM_CCLASS_NONE)
rc = sprintf(buf, "\n");
else if ((ndns->claim_class == NVDIMM_CCLASS_BTT) ||
@@ -1562,7 +1572,7 @@
rc = sprintf(buf, "dax\n");
else
rc = sprintf(buf, "<unknown>\n");
- device_unlock(dev);
+ nd_device_unlock(dev);
return rc;
}
@@ -1576,7 +1586,7 @@
char *mode;
ssize_t rc;
- device_lock(dev);
+ nd_device_lock(dev);
claim = ndns->claim;
if (claim && is_nd_btt(claim))
mode = "safe";
@@ -1589,7 +1599,7 @@
else
mode = "raw";
rc = sprintf(buf, "%s\n", mode);
- device_unlock(dev);
+ nd_device_unlock(dev);
return rc;
}
@@ -1693,8 +1703,8 @@
* Flush any in-progess probes / removals in the driver
* for the raw personality of this namespace.
*/
- device_lock(&ndns->dev);
- device_unlock(&ndns->dev);
+ nd_device_lock(&ndns->dev);
+ nd_device_unlock(&ndns->dev);
if (ndns->dev.driver) {
dev_dbg(&ndns->dev, "is active, can't bind %s\n",
dev_name(dev));
@@ -1812,8 +1822,8 @@
&& !guid_equal(&nd_set->type_guid,
&nd_label->type_guid)) {
dev_dbg(ndd->dev, "expect type_guid %pUb got %pUb\n",
- nd_set->type_guid.b,
- nd_label->type_guid.b);
+ &nd_set->type_guid,
+ &nd_label->type_guid);
continue;
}
@@ -1977,7 +1987,7 @@
nd_mapping = &nd_region->mapping[i];
label_ent = list_first_entry_or_null(&nd_mapping->labels,
typeof(*label_ent), list);
- label0 = label_ent ? label_ent->label : 0;
+ label0 = label_ent ? label_ent->label : NULL;
if (!label0) {
WARN_ON(1);
@@ -2099,7 +2109,6 @@
return NULL;
}
dev_set_name(dev, "namespace%d.%d", nd_region->id, nspm->id);
- dev->parent = &nd_region->dev;
dev->groups = nd_namespace_attribute_groups;
nd_namespace_pmem_set_resource(nd_region, nspm, 0);
@@ -2218,8 +2227,8 @@
if (namespace_label_has(ndd, type_guid)) {
if (!guid_equal(&nd_set->type_guid, &nd_label->type_guid)) {
dev_dbg(ndd->dev, "expect type_guid %pUb got %pUb\n",
- nd_set->type_guid.b,
- nd_label->type_guid.b);
+ &nd_set->type_guid,
+ &nd_label->type_guid);
return ERR_PTR(-EAGAIN);
}
@@ -2247,9 +2256,12 @@
if (!nsblk->uuid)
goto blk_err;
memcpy(name, nd_label->name, NSLABEL_NAME_LEN);
- if (name[0])
+ if (name[0]) {
nsblk->alt_name = kmemdup(name, NSLABEL_NAME_LEN,
GFP_KERNEL);
+ if (!nsblk->alt_name)
+ goto blk_err;
+ }
res = nsblk_add_resource(nd_region, ndd, nsblk,
__le64_to_cpu(nd_label->dpa));
if (!res)
@@ -2310,8 +2322,9 @@
continue;
/* skip labels that describe extents outside of the region */
- if (nd_label->dpa < nd_mapping->start || nd_label->dpa > map_end)
- continue;
+ if (__le64_to_cpu(nd_label->dpa) < nd_mapping->start ||
+ __le64_to_cpu(nd_label->dpa) > map_end)
+ continue;
i = add_namespace_resource(nd_region, nd_label, devs, count);
if (i < 0)
@@ -2450,6 +2463,27 @@
return devs;
}
+static void deactivate_labels(void *region)
+{
+ struct nd_region *nd_region = region;
+ int i;
+
+ for (i = 0; i < nd_region->ndr_mappings; i++) {
+ struct nd_mapping *nd_mapping = &nd_region->mapping[i];
+ struct nvdimm_drvdata *ndd = nd_mapping->ndd;
+ struct nvdimm *nvdimm = nd_mapping->nvdimm;
+
+ mutex_lock(&nd_mapping->lock);
+ nd_mapping_free_labels(nd_mapping);
+ mutex_unlock(&nd_mapping->lock);
+
+ put_ndd(ndd);
+ nd_mapping->ndd = NULL;
+ if (ndd)
+ atomic_dec(&nvdimm->busy);
+ }
+}
+
static int init_active_labels(struct nd_region *nd_region)
{
int i;
@@ -2494,6 +2528,12 @@
if (!label_ent)
break;
label = nd_label_active(ndd, j);
+ if (test_bit(NDD_NOBLK, &nvdimm->flags)) {
+ u32 flags = __le32_to_cpu(label->flags);
+
+ flags &= ~NSLABEL_FLAG_LOCAL;
+ label->flags = __cpu_to_le32(flags);
+ }
label_ent->label = label;
mutex_lock(&nd_mapping->lock);
@@ -2501,16 +2541,17 @@
mutex_unlock(&nd_mapping->lock);
}
- if (j >= count)
- continue;
+ if (j < count)
+ break;
+ }
- mutex_lock(&nd_mapping->lock);
- nd_mapping_free_labels(nd_mapping);
- mutex_unlock(&nd_mapping->lock);
+ if (i < nd_region->ndr_mappings) {
+ deactivate_labels(nd_region);
return -ENOMEM;
}
- return 0;
+ return devm_add_action_or_reset(&nd_region->dev, deactivate_labels,
+ nd_region);
}
int nd_region_register_namespaces(struct nd_region *nd_region, int *err)
diff --git a/drivers/nvdimm/nd-core.h b/drivers/nvdimm/nd-core.h
index 5ff254d..25fa121 100644
--- a/drivers/nvdimm/nd-core.h
+++ b/drivers/nvdimm/nd-core.h
@@ -1,34 +1,28 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
*/
#ifndef __ND_CORE_H__
#define __ND_CORE_H__
#include <linux/libnvdimm.h>
#include <linux/device.h>
-#include <linux/libnvdimm.h>
#include <linux/sizes.h>
#include <linux/mutex.h>
#include <linux/nd.h>
+#include "nd.h"
extern struct list_head nvdimm_bus_list;
extern struct mutex nvdimm_bus_list_mutex;
extern int nvdimm_major;
+extern struct workqueue_struct *nvdimm_wq;
struct nvdimm_bus {
struct nvdimm_bus_descriptor *nd_desc;
- wait_queue_head_t probe_wait;
+ wait_queue_head_t wait;
struct list_head list;
struct device dev;
int id, probe_active;
+ atomic_t ioctl_active;
struct list_head mapping_list;
struct mutex reconfig_mutex;
struct badrange badrange;
@@ -42,8 +36,51 @@
atomic_t busy;
int id, num_flush;
struct resource *flush_wpq;
+ const char *dimm_id;
+ struct {
+ const struct nvdimm_security_ops *ops;
+ unsigned long flags;
+ unsigned long ext_flags;
+ unsigned int overwrite_tmo;
+ struct kernfs_node *overwrite_state;
+ } sec;
+ struct delayed_work dwork;
};
+static inline unsigned long nvdimm_security_flags(
+ struct nvdimm *nvdimm, enum nvdimm_passphrase_type ptype)
+{
+ u64 flags;
+ const u64 state_flags = 1UL << NVDIMM_SECURITY_DISABLED
+ | 1UL << NVDIMM_SECURITY_LOCKED
+ | 1UL << NVDIMM_SECURITY_UNLOCKED
+ | 1UL << NVDIMM_SECURITY_OVERWRITE;
+
+ if (!nvdimm->sec.ops)
+ return 0;
+
+ flags = nvdimm->sec.ops->get_flags(nvdimm, ptype);
+ /* disabled, locked, unlocked, and overwrite are mutually exclusive */
+ dev_WARN_ONCE(&nvdimm->dev, hweight64(flags & state_flags) > 1,
+ "reported invalid security state: %#llx\n",
+ (unsigned long long) flags);
+ return flags;
+}
+int nvdimm_security_freeze(struct nvdimm *nvdimm);
+#if IS_ENABLED(CONFIG_NVDIMM_KEYS)
+ssize_t nvdimm_security_store(struct device *dev, const char *buf, size_t len);
+void nvdimm_security_overwrite_query(struct work_struct *work);
+#else
+static inline ssize_t nvdimm_security_store(struct device *dev,
+ const char *buf, size_t len)
+{
+ return -EOPNOTSUPP;
+}
+static inline void nvdimm_security_overwrite_query(struct work_struct *work)
+{
+}
+#endif
+
/**
* struct blk_alloc_info - tracking info for BLK dpa scanning
* @nd_mapping: blk region mapping boundaries
@@ -78,13 +115,12 @@
void nvdimm_bus_exit(void);
void nvdimm_devs_exit(void);
void nd_region_devs_exit(void);
-void nd_region_probe_success(struct nvdimm_bus *nvdimm_bus, struct device *dev);
struct nd_region;
+void nd_region_advance_seeds(struct nd_region *nd_region, struct device *dev);
void nd_region_create_ns_seed(struct nd_region *nd_region);
void nd_region_create_btt_seed(struct nd_region *nd_region);
void nd_region_create_pfn_seed(struct nd_region *nd_region);
void nd_region_create_dax_seed(struct nd_region *nd_region);
-void nd_region_disable(struct nvdimm_bus *nvdimm_bus, struct device *dev);
int nvdimm_bus_create_ndctl(struct nvdimm_bus *nvdimm_bus);
void nvdimm_bus_destroy_ndctl(struct nvdimm_bus *nvdimm_bus);
void nd_synchronize(void);
@@ -133,4 +169,71 @@
struct nd_namespace_common **_ndns, const char *buf,
size_t len);
struct nd_pfn *to_nd_pfn_safe(struct device *dev);
+bool is_nvdimm_bus(struct device *dev);
+
+#ifdef CONFIG_PROVE_LOCKING
+extern struct class *nd_class;
+
+enum {
+ LOCK_BUS,
+ LOCK_NDCTL,
+ LOCK_REGION,
+ LOCK_DIMM = LOCK_REGION,
+ LOCK_NAMESPACE,
+ LOCK_CLAIM,
+};
+
+static inline void debug_nvdimm_lock(struct device *dev)
+{
+ if (is_nd_region(dev))
+ mutex_lock_nested(&dev->lockdep_mutex, LOCK_REGION);
+ else if (is_nvdimm(dev))
+ mutex_lock_nested(&dev->lockdep_mutex, LOCK_DIMM);
+ else if (is_nd_btt(dev) || is_nd_pfn(dev) || is_nd_dax(dev))
+ mutex_lock_nested(&dev->lockdep_mutex, LOCK_CLAIM);
+ else if (dev->parent && (is_nd_region(dev->parent)))
+ mutex_lock_nested(&dev->lockdep_mutex, LOCK_NAMESPACE);
+ else if (is_nvdimm_bus(dev))
+ mutex_lock_nested(&dev->lockdep_mutex, LOCK_BUS);
+ else if (dev->class && dev->class == nd_class)
+ mutex_lock_nested(&dev->lockdep_mutex, LOCK_NDCTL);
+ else
+ dev_WARN(dev, "unknown lock level\n");
+}
+
+static inline void debug_nvdimm_unlock(struct device *dev)
+{
+ mutex_unlock(&dev->lockdep_mutex);
+}
+
+static inline void nd_device_lock(struct device *dev)
+{
+ device_lock(dev);
+ debug_nvdimm_lock(dev);
+}
+
+static inline void nd_device_unlock(struct device *dev)
+{
+ debug_nvdimm_unlock(dev);
+ device_unlock(dev);
+}
+#else
+static inline void nd_device_lock(struct device *dev)
+{
+ device_lock(dev);
+}
+
+static inline void nd_device_unlock(struct device *dev)
+{
+ device_unlock(dev);
+}
+
+static inline void debug_nvdimm_lock(struct device *dev)
+{
+}
+
+static inline void debug_nvdimm_unlock(struct device *dev)
+{
+}
+#endif
#endif /* __ND_CORE_H__ */
diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h
index 98317e7..ee5c040 100644
--- a/drivers/nvdimm/nd.h
+++ b/drivers/nvdimm/nd.h
@@ -1,14 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
*/
#ifndef __ND_H__
#define __ND_H__
@@ -113,8 +105,12 @@
spinlock_t lock;
};
+enum nd_label_flags {
+ ND_LABEL_REAP,
+};
struct nd_label_ent {
struct list_head list;
+ unsigned long flags;
struct nd_namespace_label *label;
};
@@ -153,12 +149,13 @@
u16 ndr_mappings;
u64 ndr_size;
u64 ndr_start;
- int id, num_lanes, ro, numa_node;
+ int id, num_lanes, ro, numa_node, target_node;
void *provider_data;
struct kernfs_node *bb_state;
struct badblocks bb;
struct nd_interleave_set *nd_set;
struct nd_percpu_lane __percpu *lane;
+ int (*flush)(struct nd_region *nd_region, struct bio *bio);
struct nd_mapping mapping[0];
};
@@ -241,6 +238,8 @@
int nvdimm_check_config_data(struct device *dev);
int nvdimm_init_nsarea(struct nvdimm_drvdata *ndd);
int nvdimm_init_config_data(struct nvdimm_drvdata *ndd);
+int nvdimm_get_config_data(struct nvdimm_drvdata *ndd, void *buf,
+ size_t offset, size_t len);
int nvdimm_set_config_data(struct nvdimm_drvdata *ndd, size_t offset,
void *buf, size_t len);
long nvdimm_clear_poison(struct device *dev, phys_addr_t phys,
@@ -248,6 +247,15 @@
void nvdimm_set_aliasing(struct device *dev);
void nvdimm_set_locked(struct device *dev);
void nvdimm_clear_locked(struct device *dev);
+int nvdimm_security_setup_events(struct device *dev);
+#if IS_ENABLED(CONFIG_NVDIMM_KEYS)
+int nvdimm_security_unlock(struct device *dev);
+#else
+static inline int nvdimm_security_unlock(struct device *dev)
+{
+ return 0;
+}
+#endif
struct nd_btt *to_nd_btt(struct device *dev);
struct nd_gen_sb {
@@ -281,11 +289,7 @@
struct nd_pfn *to_nd_pfn(struct device *dev);
#if IS_ENABLED(CONFIG_NVDIMM_PFN)
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-#define PFN_DEFAULT_ALIGNMENT HPAGE_PMD_SIZE
-#else
-#define PFN_DEFAULT_ALIGNMENT PAGE_SIZE
-#endif
+#define MAX_NVDIMM_ALIGN 4
int nd_pfn_probe(struct device *dev, struct nd_namespace_common *ndns);
bool is_nd_pfn(struct device *dev);
@@ -367,6 +371,10 @@
void nvdimm_badblocks_populate(struct nd_region *nd_region,
struct badblocks *bb, const struct resource *res);
#if IS_ENABLED(CONFIG_ND_CLAIM)
+
+/* max struct page size independent of kernel config */
+#define MAX_STRUCT_PAGE_SIZE 64
+
int nvdimm_setup_pfn(struct nd_pfn *nd_pfn, struct dev_pagemap *pgmap);
int devm_nsio_enable(struct device *dev, struct nd_namespace_io *nsio);
void devm_nsio_disable(struct device *dev, struct nd_namespace_io *nsio);
diff --git a/drivers/nvdimm/nd_virtio.c b/drivers/nvdimm/nd_virtio.c
new file mode 100644
index 0000000..10351d5
--- /dev/null
+++ b/drivers/nvdimm/nd_virtio.c
@@ -0,0 +1,125 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * virtio_pmem.c: Virtio pmem Driver
+ *
+ * Discovers persistent memory range information
+ * from host and provides a virtio based flushing
+ * interface.
+ */
+#include "virtio_pmem.h"
+#include "nd.h"
+
+ /* The interrupt handler */
+void virtio_pmem_host_ack(struct virtqueue *vq)
+{
+ struct virtio_pmem *vpmem = vq->vdev->priv;
+ struct virtio_pmem_request *req_data, *req_buf;
+ unsigned long flags;
+ unsigned int len;
+
+ spin_lock_irqsave(&vpmem->pmem_lock, flags);
+ while ((req_data = virtqueue_get_buf(vq, &len)) != NULL) {
+ req_data->done = true;
+ wake_up(&req_data->host_acked);
+
+ if (!list_empty(&vpmem->req_list)) {
+ req_buf = list_first_entry(&vpmem->req_list,
+ struct virtio_pmem_request, list);
+ req_buf->wq_buf_avail = true;
+ wake_up(&req_buf->wq_buf);
+ list_del(&req_buf->list);
+ }
+ }
+ spin_unlock_irqrestore(&vpmem->pmem_lock, flags);
+}
+EXPORT_SYMBOL_GPL(virtio_pmem_host_ack);
+
+ /* The request submission function */
+static int virtio_pmem_flush(struct nd_region *nd_region)
+{
+ struct virtio_device *vdev = nd_region->provider_data;
+ struct virtio_pmem *vpmem = vdev->priv;
+ struct virtio_pmem_request *req_data;
+ struct scatterlist *sgs[2], sg, ret;
+ unsigned long flags;
+ int err, err1;
+
+ might_sleep();
+ req_data = kmalloc(sizeof(*req_data), GFP_KERNEL);
+ if (!req_data)
+ return -ENOMEM;
+
+ req_data->done = false;
+ init_waitqueue_head(&req_data->host_acked);
+ init_waitqueue_head(&req_data->wq_buf);
+ INIT_LIST_HEAD(&req_data->list);
+ req_data->req.type = cpu_to_le32(VIRTIO_PMEM_REQ_TYPE_FLUSH);
+ sg_init_one(&sg, &req_data->req, sizeof(req_data->req));
+ sgs[0] = &sg;
+ sg_init_one(&ret, &req_data->resp.ret, sizeof(req_data->resp));
+ sgs[1] = &ret;
+
+ spin_lock_irqsave(&vpmem->pmem_lock, flags);
+ /*
+ * If virtqueue_add_sgs returns -ENOSPC then req_vq virtual
+ * queue does not have free descriptor. We add the request
+ * to req_list and wait for host_ack to wake us up when free
+ * slots are available.
+ */
+ while ((err = virtqueue_add_sgs(vpmem->req_vq, sgs, 1, 1, req_data,
+ GFP_ATOMIC)) == -ENOSPC) {
+
+ dev_info(&vdev->dev, "failed to send command to virtio pmem device, no free slots in the virtqueue\n");
+ req_data->wq_buf_avail = false;
+ list_add_tail(&req_data->list, &vpmem->req_list);
+ spin_unlock_irqrestore(&vpmem->pmem_lock, flags);
+
+ /* A host response results in "host_ack" getting called */
+ wait_event(req_data->wq_buf, req_data->wq_buf_avail);
+ spin_lock_irqsave(&vpmem->pmem_lock, flags);
+ }
+ err1 = virtqueue_kick(vpmem->req_vq);
+ spin_unlock_irqrestore(&vpmem->pmem_lock, flags);
+ /*
+ * virtqueue_add_sgs failed with error different than -ENOSPC, we can't
+ * do anything about that.
+ */
+ if (err || !err1) {
+ dev_info(&vdev->dev, "failed to send command to virtio pmem device\n");
+ err = -EIO;
+ } else {
+ /* A host repsonse results in "host_ack" getting called */
+ wait_event(req_data->host_acked, req_data->done);
+ err = le32_to_cpu(req_data->resp.ret);
+ }
+
+ kfree(req_data);
+ return err;
+};
+
+/* The asynchronous flush callback function */
+int async_pmem_flush(struct nd_region *nd_region, struct bio *bio)
+{
+ /*
+ * Create child bio for asynchronous flush and chain with
+ * parent bio. Otherwise directly call nd_region flush.
+ */
+ if (bio && bio->bi_iter.bi_sector != -1) {
+ struct bio *child = bio_alloc(GFP_ATOMIC, 0);
+
+ if (!child)
+ return -ENOMEM;
+ bio_copy_dev(child, bio);
+ child->bi_opf = REQ_PREFLUSH;
+ child->bi_iter.bi_sector = -1;
+ bio_chain(child, bio);
+ submit_bio(child);
+ return 0;
+ }
+ if (virtio_pmem_flush(nd_region))
+ return -EIO;
+
+ return 0;
+};
+EXPORT_SYMBOL_GPL(async_pmem_flush);
+MODULE_LICENSE("GPL");
diff --git a/drivers/nvdimm/of_pmem.c b/drivers/nvdimm/of_pmem.c
index 0a70183..97187d6 100644
--- a/drivers/nvdimm/of_pmem.c
+++ b/drivers/nvdimm/of_pmem.c
@@ -42,7 +42,7 @@
return -ENOMEM;
priv->bus_desc.attr_groups = bus_attr_groups;
- priv->bus_desc.provider_name = "of_pmem";
+ priv->bus_desc.provider_name = kstrdup(pdev->name, GFP_KERNEL);
priv->bus_desc.module = THIS_MODULE;
priv->bus_desc.of_node = np;
@@ -68,6 +68,7 @@
memset(&ndr_desc, 0, sizeof(ndr_desc));
ndr_desc.attr_groups = region_attr_groups;
ndr_desc.numa_node = dev_to_node(&pdev->dev);
+ ndr_desc.target_node = ndr_desc.numa_node;
ndr_desc.res = &pdev->resource[i];
ndr_desc.of_node = np;
set_bit(ND_REGION_PAGEMAP, &ndr_desc.flags);
@@ -108,7 +109,6 @@
.remove = of_pmem_region_remove,
.driver = {
.name = "of_pmem",
- .owner = THIS_MODULE,
.of_match_table = of_pmem_region_match,
},
};
diff --git a/drivers/nvdimm/pfn.h b/drivers/nvdimm/pfn.h
index dde9853..acb1951 100644
--- a/drivers/nvdimm/pfn.h
+++ b/drivers/nvdimm/pfn.h
@@ -1,14 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (c) 2014-2015, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
*/
#ifndef __NVDIMM_PFN_H
@@ -36,22 +28,12 @@
__le32 end_trunc;
/* minor-version-2 record the base alignment of the mapping */
__le32 align;
- u8 padding[4000];
+ /* minor-version-3 guarantee the padding and flags are zero */
+ /* minor-version-4 record the page size and struct page size */
+ __le32 page_size;
+ __le16 page_struct_size;
+ u8 padding[3994];
__le64 checksum;
};
-#ifdef CONFIG_SPARSEMEM
-#define PFN_SECTION_ALIGN_DOWN(x) SECTION_ALIGN_DOWN(x)
-#define PFN_SECTION_ALIGN_UP(x) SECTION_ALIGN_UP(x)
-#else
-/*
- * In this case ZONE_DEVICE=n and we will disable 'pfn' device support,
- * but we still want pmem to compile.
- */
-#define PFN_SECTION_ALIGN_DOWN(x) (x)
-#define PFN_SECTION_ALIGN_UP(x) (x)
-#endif
-
-#define PHYS_SECTION_ALIGN_DOWN(x) PFN_PHYS(PFN_SECTION_ALIGN_DOWN(PHYS_PFN(x)))
-#define PHYS_SECTION_ALIGN_UP(x) PFN_PHYS(PFN_SECTION_ALIGN_UP(PHYS_PFN(x)))
#endif /* __NVDIMM_PFN_H */
diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c
index 7fe84bf..60d81fa 100644
--- a/drivers/nvdimm/pfn_devs.c
+++ b/drivers/nvdimm/pfn_devs.c
@@ -1,14 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright(c) 2013-2016 Intel Corporation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
*/
#include <linux/memremap.h>
#include <linux/blkdev.h>
@@ -75,7 +67,7 @@
struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev);
ssize_t rc = 0;
- device_lock(dev);
+ nd_device_lock(dev);
nvdimm_bus_lock(dev);
if (dev->driver)
rc = -EBUSY;
@@ -97,7 +89,7 @@
dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf,
buf[len - 1] == '\n' ? "" : "\n");
nvdimm_bus_unlock(dev);
- device_unlock(dev);
+ nd_device_unlock(dev);
return rc ? rc : len;
}
@@ -111,43 +103,46 @@
return sprintf(buf, "%ld\n", nd_pfn->align);
}
-static const unsigned long *nd_pfn_supported_alignments(void)
+static unsigned long *nd_pfn_supported_alignments(unsigned long *alignments)
{
- /*
- * This needs to be a non-static variable because the *_SIZE
- * macros aren't always constants.
- */
- const unsigned long supported_alignments[] = {
- PAGE_SIZE,
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
- HPAGE_PMD_SIZE,
-#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
- HPAGE_PUD_SIZE,
-#endif
-#endif
- 0,
- };
- static unsigned long data[ARRAY_SIZE(supported_alignments)];
- memcpy(data, supported_alignments, sizeof(data));
+ alignments[0] = PAGE_SIZE;
- return data;
+ if (has_transparent_hugepage()) {
+ alignments[1] = HPAGE_PMD_SIZE;
+ if (IS_ENABLED(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD))
+ alignments[2] = HPAGE_PUD_SIZE;
+ }
+
+ return alignments;
+}
+
+/*
+ * Use pmd mapping if supported as default alignment
+ */
+static unsigned long nd_pfn_default_alignment(void)
+{
+
+ if (has_transparent_hugepage())
+ return HPAGE_PMD_SIZE;
+ return PAGE_SIZE;
}
static ssize_t align_store(struct device *dev,
struct device_attribute *attr, const char *buf, size_t len)
{
struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev);
+ unsigned long aligns[MAX_NVDIMM_ALIGN] = { [0] = 0, };
ssize_t rc;
- device_lock(dev);
+ nd_device_lock(dev);
nvdimm_bus_lock(dev);
rc = nd_size_select_store(dev, buf, &nd_pfn->align,
- nd_pfn_supported_alignments());
+ nd_pfn_supported_alignments(aligns));
dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf,
buf[len - 1] == '\n' ? "" : "\n");
nvdimm_bus_unlock(dev);
- device_unlock(dev);
+ nd_device_unlock(dev);
return rc ? rc : len;
}
@@ -169,11 +164,11 @@
struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev);
ssize_t rc;
- device_lock(dev);
+ nd_device_lock(dev);
rc = nd_uuid_store(dev, &nd_pfn->uuid, buf, len);
dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf,
buf[len - 1] == '\n' ? "" : "\n");
- device_unlock(dev);
+ nd_device_unlock(dev);
return rc ? rc : len;
}
@@ -198,13 +193,13 @@
struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev);
ssize_t rc;
- device_lock(dev);
+ nd_device_lock(dev);
nvdimm_bus_lock(dev);
rc = nd_namespace_store(dev, &nd_pfn->ndns, buf, len);
dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf,
buf[len - 1] == '\n' ? "" : "\n");
nvdimm_bus_unlock(dev);
- device_unlock(dev);
+ nd_device_unlock(dev);
return rc;
}
@@ -216,7 +211,7 @@
struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev);
ssize_t rc;
- device_lock(dev);
+ nd_device_lock(dev);
if (dev->driver) {
struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb;
u64 offset = __le64_to_cpu(pfn_sb->dataoff);
@@ -230,7 +225,7 @@
/* no address to convey if the pfn instance is disabled */
rc = -ENXIO;
}
- device_unlock(dev);
+ nd_device_unlock(dev);
return rc;
}
@@ -242,7 +237,7 @@
struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev);
ssize_t rc;
- device_lock(dev);
+ nd_device_lock(dev);
if (dev->driver) {
struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb;
u64 offset = __le64_to_cpu(pfn_sb->dataoff);
@@ -258,7 +253,7 @@
/* no size to convey if the pfn instance is disabled */
rc = -ENXIO;
}
- device_unlock(dev);
+ nd_device_unlock(dev);
return rc;
}
@@ -267,7 +262,10 @@
static ssize_t supported_alignments_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
- return nd_size_select_show(0, nd_pfn_supported_alignments(), buf);
+ unsigned long aligns[MAX_NVDIMM_ALIGN] = { [0] = 0, };
+
+ return nd_size_select_show(0,
+ nd_pfn_supported_alignments(aligns), buf);
}
static DEVICE_ATTR_RO(supported_alignments);
@@ -310,7 +308,7 @@
return NULL;
nd_pfn->mode = PFN_MODE_NONE;
- nd_pfn->align = PFN_DEFAULT_ALIGNMENT;
+ nd_pfn->align = nd_pfn_default_alignment();
dev = &nd_pfn->dev;
device_initialize(&nd_pfn->dev);
if (ndns && !__nd_attach_ndns(&nd_pfn->dev, ndns, &nd_pfn->ndns)) {
@@ -361,6 +359,89 @@
return dev;
}
+/*
+ * nd_pfn_clear_memmap_errors() clears any errors in the volatile memmap
+ * space associated with the namespace. If the memmap is set to DRAM, then
+ * this is a no-op. Since the memmap area is freshly initialized during
+ * probe, we have an opportunity to clear any badblocks in this area.
+ */
+static int nd_pfn_clear_memmap_errors(struct nd_pfn *nd_pfn)
+{
+ struct nd_region *nd_region = to_nd_region(nd_pfn->dev.parent);
+ struct nd_namespace_common *ndns = nd_pfn->ndns;
+ void *zero_page = page_address(ZERO_PAGE(0));
+ struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb;
+ int num_bad, meta_num, rc, bb_present;
+ sector_t first_bad, meta_start;
+ struct nd_namespace_io *nsio;
+
+ if (nd_pfn->mode != PFN_MODE_PMEM)
+ return 0;
+
+ nsio = to_nd_namespace_io(&ndns->dev);
+ meta_start = (SZ_4K + sizeof(*pfn_sb)) >> 9;
+ meta_num = (le64_to_cpu(pfn_sb->dataoff) >> 9) - meta_start;
+
+ do {
+ unsigned long zero_len;
+ u64 nsoff;
+
+ bb_present = badblocks_check(&nd_region->bb, meta_start,
+ meta_num, &first_bad, &num_bad);
+ if (bb_present) {
+ dev_dbg(&nd_pfn->dev, "meta: %x badblocks at %llx\n",
+ num_bad, first_bad);
+ nsoff = ALIGN_DOWN((nd_region->ndr_start
+ + (first_bad << 9)) - nsio->res.start,
+ PAGE_SIZE);
+ zero_len = ALIGN(num_bad << 9, PAGE_SIZE);
+ while (zero_len) {
+ unsigned long chunk = min(zero_len, PAGE_SIZE);
+
+ rc = nvdimm_write_bytes(ndns, nsoff, zero_page,
+ chunk, 0);
+ if (rc)
+ break;
+
+ zero_len -= chunk;
+ nsoff += chunk;
+ }
+ if (rc) {
+ dev_err(&nd_pfn->dev,
+ "error clearing %x badblocks at %llx\n",
+ num_bad, first_bad);
+ return rc;
+ }
+ }
+ } while (bb_present);
+
+ return 0;
+}
+
+static bool nd_supported_alignment(unsigned long align)
+{
+ int i;
+ unsigned long supported[MAX_NVDIMM_ALIGN] = { [0] = 0, };
+
+ if (align == 0)
+ return false;
+
+ nd_pfn_supported_alignments(supported);
+ for (i = 0; supported[i]; i++)
+ if (align == supported[i])
+ return true;
+ return false;
+}
+
+/**
+ * nd_pfn_validate - read and validate info-block
+ * @nd_pfn: fsdax namespace runtime state / properties
+ * @sig: 'devdax' or 'fsdax' signature
+ *
+ * Upon return the info-block buffer contents (->pfn_sb) are
+ * indeterminate when validation fails, and a coherent info-block
+ * otherwise.
+ */
int nd_pfn_validate(struct nd_pfn *nd_pfn, const char *sig)
{
u64 checksum, offset;
@@ -400,6 +481,11 @@
if (__le16_to_cpu(pfn_sb->version_minor) < 2)
pfn_sb->align = 0;
+ if (__le16_to_cpu(pfn_sb->version_minor) < 4) {
+ pfn_sb->page_struct_size = cpu_to_le16(64);
+ pfn_sb->page_size = cpu_to_le32(PAGE_SIZE);
+ }
+
switch (le32_to_cpu(pfn_sb->mode)) {
case PFN_MODE_RAM:
case PFN_MODE_PMEM:
@@ -415,6 +501,34 @@
align = 1UL << ilog2(offset);
mode = le32_to_cpu(pfn_sb->mode);
+ if ((le32_to_cpu(pfn_sb->page_size) > PAGE_SIZE) &&
+ (mode == PFN_MODE_PMEM)) {
+ dev_err(&nd_pfn->dev,
+ "init failed, page size mismatch %d\n",
+ le32_to_cpu(pfn_sb->page_size));
+ return -EOPNOTSUPP;
+ }
+
+ if ((le16_to_cpu(pfn_sb->page_struct_size) < sizeof(struct page)) &&
+ (mode == PFN_MODE_PMEM)) {
+ dev_err(&nd_pfn->dev,
+ "init failed, struct page size mismatch %d\n",
+ le16_to_cpu(pfn_sb->page_struct_size));
+ return -EOPNOTSUPP;
+ }
+
+ /*
+ * Check whether the we support the alignment. For Dax if the
+ * superblock alignment is not matching, we won't initialize
+ * the device.
+ */
+ if (!nd_supported_alignment(align) &&
+ !memcmp(pfn_sb->signature, DAX_SIG, PFN_SIG_LEN)) {
+ dev_err(&nd_pfn->dev, "init failed, alignment mismatch: "
+ "%ld:%ld\n", nd_pfn->align, align);
+ return -EOPNOTSUPP;
+ }
+
if (!nd_pfn->uuid) {
/*
* When probing a namepace via nd_pfn_probe() the uuid
@@ -477,7 +591,7 @@
return -ENXIO;
}
- return 0;
+ return nd_pfn_clear_memmap_errors(nd_pfn);
}
EXPORT_SYMBOL(nd_pfn_validate);
@@ -506,7 +620,7 @@
nvdimm_bus_unlock(&ndns->dev);
if (!pfn_dev)
return -ENOMEM;
- pfn_sb = devm_kzalloc(dev, sizeof(*pfn_sb), GFP_KERNEL);
+ pfn_sb = devm_kmalloc(dev, sizeof(*pfn_sb), GFP_KERNEL);
nd_pfn = to_nd_pfn(pfn_dev);
nd_pfn->pfn_sb = pfn_sb;
rc = nd_pfn_validate(nd_pfn, PFN_SIG);
@@ -521,23 +635,28 @@
}
EXPORT_SYMBOL(nd_pfn_probe);
+static u32 info_block_reserve(void)
+{
+ return ALIGN(SZ_8K, PAGE_SIZE);
+}
+
/*
- * We hotplug memory at section granularity, pad the reserved area from
- * the previous section base to the namespace base address.
+ * We hotplug memory at sub-section granularity, pad the reserved area
+ * from the previous section base to the namespace base address.
*/
static unsigned long init_altmap_base(resource_size_t base)
{
unsigned long base_pfn = PHYS_PFN(base);
- return PFN_SECTION_ALIGN_DOWN(base_pfn);
+ return SUBSECTION_ALIGN_DOWN(base_pfn);
}
static unsigned long init_altmap_reserve(resource_size_t base)
{
- unsigned long reserve = PHYS_PFN(SZ_8K);
+ unsigned long reserve = info_block_reserve() >> PAGE_SHIFT;
unsigned long base_pfn = PHYS_PFN(base);
- reserve += base_pfn - PFN_SECTION_ALIGN_DOWN(base_pfn);
+ reserve += base_pfn - SUBSECTION_ALIGN_DOWN(base_pfn);
return reserve;
}
@@ -549,12 +668,15 @@
u64 offset = le64_to_cpu(pfn_sb->dataoff);
u32 start_pad = __le32_to_cpu(pfn_sb->start_pad);
u32 end_trunc = __le32_to_cpu(pfn_sb->end_trunc);
+ u32 reserve = info_block_reserve();
struct nd_namespace_common *ndns = nd_pfn->ndns;
struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev);
resource_size_t base = nsio->res.start + start_pad;
+ resource_size_t end = nsio->res.end - end_trunc;
struct vmem_altmap __altmap = {
.base_pfn = init_altmap_base(base),
.reserve = init_altmap_reserve(base),
+ .end_pfn = PHYS_PFN(end),
};
memcpy(res, &nsio->res, sizeof(*res));
@@ -562,83 +684,41 @@
res->end -= end_trunc;
if (nd_pfn->mode == PFN_MODE_RAM) {
- if (offset < SZ_8K)
+ if (offset < reserve)
return -EINVAL;
nd_pfn->npfns = le64_to_cpu(pfn_sb->npfns);
- pgmap->altmap_valid = false;
} else if (nd_pfn->mode == PFN_MODE_PMEM) {
- nd_pfn->npfns = PFN_SECTION_ALIGN_UP((resource_size(res)
- - offset) / PAGE_SIZE);
+ nd_pfn->npfns = PHYS_PFN((resource_size(res) - offset));
if (le64_to_cpu(nd_pfn->pfn_sb->npfns) > nd_pfn->npfns)
dev_info(&nd_pfn->dev,
"number of pfns truncated from %lld to %ld\n",
le64_to_cpu(nd_pfn->pfn_sb->npfns),
nd_pfn->npfns);
memcpy(altmap, &__altmap, sizeof(*altmap));
- altmap->free = PHYS_PFN(offset - SZ_8K);
+ altmap->free = PHYS_PFN(offset - reserve);
altmap->alloc = 0;
- pgmap->altmap_valid = true;
+ pgmap->flags |= PGMAP_ALTMAP_VALID;
} else
return -ENXIO;
return 0;
}
-static u64 phys_pmem_align_down(struct nd_pfn *nd_pfn, u64 phys)
-{
- return min_t(u64, PHYS_SECTION_ALIGN_DOWN(phys),
- ALIGN_DOWN(phys, nd_pfn->align));
-}
-
-/*
- * Check if pmem collides with 'System RAM', or other regions when
- * section aligned. Trim it accordingly.
- */
-static void trim_pfn_device(struct nd_pfn *nd_pfn, u32 *start_pad, u32 *end_trunc)
-{
- struct nd_namespace_common *ndns = nd_pfn->ndns;
- struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev);
- struct nd_region *nd_region = to_nd_region(nd_pfn->dev.parent);
- const resource_size_t start = nsio->res.start;
- const resource_size_t end = start + resource_size(&nsio->res);
- resource_size_t adjust, size;
-
- *start_pad = 0;
- *end_trunc = 0;
-
- adjust = start - PHYS_SECTION_ALIGN_DOWN(start);
- size = resource_size(&nsio->res) + adjust;
- if (region_intersects(start - adjust, size, IORESOURCE_SYSTEM_RAM,
- IORES_DESC_NONE) == REGION_MIXED
- || nd_region_conflict(nd_region, start - adjust, size))
- *start_pad = PHYS_SECTION_ALIGN_UP(start) - start;
-
- /* Now check that end of the range does not collide. */
- adjust = PHYS_SECTION_ALIGN_UP(end) - end;
- size = resource_size(&nsio->res) + adjust;
- if (region_intersects(start, size, IORESOURCE_SYSTEM_RAM,
- IORES_DESC_NONE) == REGION_MIXED
- || !IS_ALIGNED(end, nd_pfn->align)
- || nd_region_conflict(nd_region, start, size + adjust))
- *end_trunc = end - phys_pmem_align_down(nd_pfn, end);
-}
-
static int nd_pfn_init(struct nd_pfn *nd_pfn)
{
- u32 dax_label_reserve = is_nd_dax(&nd_pfn->dev) ? SZ_128K : 0;
struct nd_namespace_common *ndns = nd_pfn->ndns;
struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev);
resource_size_t start, size;
struct nd_region *nd_region;
- u32 start_pad, end_trunc;
+ unsigned long npfns, align;
+ u32 end_trunc;
struct nd_pfn_sb *pfn_sb;
- unsigned long npfns;
phys_addr_t offset;
const char *sig;
u64 checksum;
int rc;
- pfn_sb = devm_kzalloc(&nd_pfn->dev, sizeof(*pfn_sb), GFP_KERNEL);
+ pfn_sb = devm_kmalloc(&nd_pfn->dev, sizeof(*pfn_sb), GFP_KERNEL);
if (!pfn_sb)
return -ENOMEM;
@@ -647,11 +727,14 @@
sig = DAX_SIG;
else
sig = PFN_SIG;
+
rc = nd_pfn_validate(nd_pfn, sig);
if (rc != -ENODEV)
return rc;
/* no info block, do init */;
+ memset(pfn_sb, 0, sizeof(*pfn_sb));
+
nd_region = to_nd_region(nd_pfn->dev.parent);
if (nd_region->ro) {
dev_info(&nd_pfn->dev,
@@ -660,44 +743,44 @@
return -ENXIO;
}
- memset(pfn_sb, 0, sizeof(*pfn_sb));
-
- trim_pfn_device(nd_pfn, &start_pad, &end_trunc);
- if (start_pad + end_trunc)
- dev_info(&nd_pfn->dev, "%s alignment collision, truncate %d bytes\n",
- dev_name(&ndns->dev), start_pad + end_trunc);
-
/*
* Note, we use 64 here for the standard size of struct page,
* debugging options may cause it to be larger in which case the
* implementation will limit the pfns advertised through
* ->direct_access() to those that are included in the memmap.
*/
- start = nsio->res.start + start_pad;
+ start = nsio->res.start;
size = resource_size(&nsio->res);
- npfns = PFN_SECTION_ALIGN_UP((size - start_pad - end_trunc - SZ_8K)
- / PAGE_SIZE);
+ npfns = PHYS_PFN(size - SZ_8K);
+ align = max(nd_pfn->align, (1UL << SUBSECTION_SHIFT));
+ end_trunc = start + size - ALIGN_DOWN(start + size, align);
if (nd_pfn->mode == PFN_MODE_PMEM) {
/*
* The altmap should be padded out to the block size used
* when populating the vmemmap. This *should* be equal to
* PMD_SIZE for most architectures.
+ *
+ * Also make sure size of struct page is less than 64. We
+ * want to make sure we use large enough size here so that
+ * we don't have a dynamic reserve space depending on
+ * struct page size. But we also want to make sure we notice
+ * when we end up adding new elements to struct page.
*/
- offset = ALIGN(start + SZ_8K + 64 * npfns + dax_label_reserve,
- max(nd_pfn->align, PMD_SIZE)) - start;
+ BUILD_BUG_ON(sizeof(struct page) > MAX_STRUCT_PAGE_SIZE);
+ offset = ALIGN(start + SZ_8K + MAX_STRUCT_PAGE_SIZE * npfns, align)
+ - start;
} else if (nd_pfn->mode == PFN_MODE_RAM)
- offset = ALIGN(start + SZ_8K + dax_label_reserve,
- nd_pfn->align) - start;
+ offset = ALIGN(start + SZ_8K, align) - start;
else
return -ENXIO;
- if (offset + start_pad + end_trunc >= size) {
+ if (offset >= size) {
dev_err(&nd_pfn->dev, "%s unable to satisfy requested alignment\n",
dev_name(&ndns->dev));
return -ENXIO;
}
- npfns = (size - offset - start_pad - end_trunc) / SZ_4K;
+ npfns = PHYS_PFN(size - offset - end_trunc);
pfn_sb->mode = cpu_to_le32(nd_pfn->mode);
pfn_sb->dataoff = cpu_to_le64(offset);
pfn_sb->npfns = cpu_to_le64(npfns);
@@ -705,10 +788,11 @@
memcpy(pfn_sb->uuid, nd_pfn->uuid, 16);
memcpy(pfn_sb->parent_uuid, nd_dev_to_uuid(&ndns->dev), 16);
pfn_sb->version_major = cpu_to_le16(1);
- pfn_sb->version_minor = cpu_to_le16(2);
- pfn_sb->start_pad = cpu_to_le32(start_pad);
+ pfn_sb->version_minor = cpu_to_le16(4);
pfn_sb->end_trunc = cpu_to_le32(end_trunc);
pfn_sb->align = cpu_to_le32(nd_pfn->align);
+ pfn_sb->page_struct_size = cpu_to_le16(MAX_STRUCT_PAGE_SIZE);
+ pfn_sb->page_size = cpu_to_le32(PAGE_SIZE);
checksum = nd_sb_checksum((struct nd_gen_sb *) pfn_sb);
pfn_sb->checksum = cpu_to_le64(checksum);
diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index 2082ae0..f9f76f6 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -1,18 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Persistent Memory Driver
*
* Copyright (c) 2014-2015, Intel Corporation.
* Copyright (c) 2015, Christoph Hellwig <hch@lst.de>.
* Copyright (c) 2015, Boaz Harrosh <boaz@plexistor.com>.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
*/
#include <asm/cacheflush.h>
@@ -113,13 +105,13 @@
while (len) {
mem = kmap_atomic(page);
- chunk = min_t(unsigned int, len, PAGE_SIZE);
+ chunk = min_t(unsigned int, len, PAGE_SIZE - off);
memcpy_flushcache(pmem_addr, mem + off, chunk);
kunmap_atomic(mem);
len -= chunk;
off = 0;
page++;
- pmem_addr += PAGE_SIZE;
+ pmem_addr += chunk;
}
}
@@ -132,7 +124,7 @@
while (len) {
mem = kmap_atomic(page);
- chunk = min_t(unsigned int, len, PAGE_SIZE);
+ chunk = min_t(unsigned int, len, PAGE_SIZE - off);
rem = memcpy_mcsafe(mem + off, pmem_addr, chunk);
kunmap_atomic(mem);
if (rem)
@@ -140,7 +132,7 @@
len -= chunk;
off = 0;
page++;
- pmem_addr += PAGE_SIZE;
+ pmem_addr += chunk;
}
return BLK_STS_OK;
}
@@ -192,6 +184,7 @@
static blk_qc_t pmem_make_request(struct request_queue *q, struct bio *bio)
{
+ int ret = 0;
blk_status_t rc = 0;
bool do_acct;
unsigned long start;
@@ -201,7 +194,7 @@
struct nd_region *nd_region = to_region(pmem);
if (bio->bi_opf & REQ_PREFLUSH)
- nvdimm_flush(nd_region);
+ ret = nvdimm_flush(nd_region, bio);
do_acct = nd_iostat_start(bio, &start);
bio_for_each_segment(bvec, bio, iter) {
@@ -216,7 +209,10 @@
nd_iostat_end(bio, start);
if (bio->bi_opf & REQ_FUA)
- nvdimm_flush(nd_region);
+ ret = nvdimm_flush(nd_region, bio);
+
+ if (ret)
+ bio->bi_status = errno_to_blk_status(ret);
bio_endio(bio);
return BLK_QC_T_NONE;
@@ -281,20 +277,27 @@
return __pmem_direct_access(pmem, pgoff, nr_pages, kaddr, pfn);
}
+/*
+ * Use the 'no check' versions of copy_from_iter_flushcache() and
+ * copy_to_iter_mcsafe() to bypass HARDENED_USERCOPY overhead. Bounds
+ * checking, both file offset and device offset, is handled by
+ * dax_iomap_actor()
+ */
static size_t pmem_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff,
void *addr, size_t bytes, struct iov_iter *i)
{
- return copy_from_iter_flushcache(addr, bytes, i);
+ return _copy_from_iter_flushcache(addr, bytes, i);
}
static size_t pmem_copy_to_iter(struct dax_device *dax_dev, pgoff_t pgoff,
void *addr, size_t bytes, struct iov_iter *i)
{
- return copy_to_iter_mcsafe(addr, bytes, i);
+ return _copy_to_iter_mcsafe(addr, bytes, i);
}
static const struct dax_operations pmem_dax_ops = {
.direct_access = pmem_dax_direct_access,
+ .dax_supported = generic_fsdax_supported,
.copy_from_iter = pmem_copy_from_iter,
.copy_to_iter = pmem_copy_to_iter,
};
@@ -304,13 +307,24 @@
NULL,
};
-static void pmem_release_queue(void *q)
+static void pmem_pagemap_cleanup(struct dev_pagemap *pgmap)
{
+ struct request_queue *q =
+ container_of(pgmap->ref, struct request_queue, q_usage_counter);
+
blk_cleanup_queue(q);
}
-static void pmem_freeze_queue(void *q)
+static void pmem_release_queue(void *pgmap)
{
+ pmem_pagemap_cleanup(pgmap);
+}
+
+static void pmem_pagemap_kill(struct dev_pagemap *pgmap)
+{
+ struct request_queue *q =
+ container_of(pgmap->ref, struct request_queue, q_usage_counter);
+
blk_freeze_queue_start(q);
}
@@ -324,26 +338,16 @@
put_disk(pmem->disk);
}
-static void pmem_release_pgmap_ops(void *__pgmap)
-{
- dev_pagemap_put_ops();
-}
-
-static void fsdax_pagefree(struct page *page, void *data)
+static void pmem_pagemap_page_free(struct page *page)
{
wake_up_var(&page->_refcount);
}
-static int setup_pagemap_fsdax(struct device *dev, struct dev_pagemap *pgmap)
-{
- dev_pagemap_get_ops();
- if (devm_add_action_or_reset(dev, pmem_release_pgmap_ops, pgmap))
- return -ENOMEM;
- pgmap->type = MEMORY_DEVICE_FS_DAX;
- pgmap->page_free = fsdax_pagefree;
-
- return 0;
-}
+static const struct dev_pagemap_ops fsdax_pagemap_ops = {
+ .page_free = pmem_pagemap_page_free,
+ .kill = pmem_pagemap_kill,
+ .cleanup = pmem_pagemap_cleanup,
+};
static int pmem_attach_disk(struct device *dev,
struct nd_namespace_common *ndns)
@@ -362,6 +366,7 @@
struct gendisk *disk;
void *addr;
int rc;
+ unsigned long flags = 0UL;
pmem = devm_kzalloc(dev, sizeof(*pmem), GFP_KERNEL);
if (!pmem)
@@ -393,18 +398,15 @@
return -EBUSY;
}
- q = blk_alloc_queue_node(GFP_KERNEL, dev_to_node(dev), NULL);
+ q = blk_alloc_queue_node(GFP_KERNEL, dev_to_node(dev));
if (!q)
return -ENOMEM;
- if (devm_add_action_or_reset(dev, pmem_release_queue, q))
- return -ENOMEM;
-
pmem->pfn_flags = PFN_DEV;
pmem->pgmap.ref = &q->q_usage_counter;
if (is_nd_pfn(dev)) {
- if (setup_pagemap_fsdax(dev, &pmem->pgmap))
- return -ENOMEM;
+ pmem->pgmap.type = MEMORY_DEVICE_FS_DAX;
+ pmem->pgmap.ops = &fsdax_pagemap_ops;
addr = devm_memremap_pages(dev, &pmem->pgmap);
pfn_sb = nd_pfn->pfn_sb;
pmem->data_offset = le64_to_cpu(pfn_sb->dataoff);
@@ -415,25 +417,20 @@
bb_res.start += pmem->data_offset;
} else if (pmem_should_map_pages(dev)) {
memcpy(&pmem->pgmap.res, &nsio->res, sizeof(pmem->pgmap.res));
- pmem->pgmap.altmap_valid = false;
- if (setup_pagemap_fsdax(dev, &pmem->pgmap))
- return -ENOMEM;
+ pmem->pgmap.type = MEMORY_DEVICE_FS_DAX;
+ pmem->pgmap.ops = &fsdax_pagemap_ops;
addr = devm_memremap_pages(dev, &pmem->pgmap);
pmem->pfn_flags |= PFN_MAP;
memcpy(&bb_res, &pmem->pgmap.res, sizeof(bb_res));
} else {
+ if (devm_add_action_or_reset(dev, pmem_release_queue,
+ &pmem->pgmap))
+ return -ENOMEM;
addr = devm_memremap(dev, pmem->phys_addr,
pmem->size, ARCH_MEMREMAP_PMEM);
memcpy(&bb_res, &nsio->res, sizeof(bb_res));
}
- /*
- * At release time the queue must be frozen before
- * devm_memremap_pages is unwound
- */
- if (devm_add_action_or_reset(dev, pmem_freeze_queue, q))
- return -ENOMEM;
-
if (IS_ERR(addr))
return PTR_ERR(addr);
pmem->virt_addr = addr;
@@ -465,18 +462,19 @@
nvdimm_badblocks_populate(nd_region, &pmem->bb, &bb_res);
disk->bb = &pmem->bb;
- dax_dev = alloc_dax(pmem, disk->disk_name, &pmem_dax_ops);
+ if (is_nvdimm_sync(nd_region))
+ flags = DAXDEV_F_SYNC;
+ dax_dev = alloc_dax(pmem, disk->disk_name, &pmem_dax_ops, flags);
if (!dax_dev) {
put_disk(disk);
return -ENOMEM;
}
dax_write_cache(dax_dev, nvdimm_has_cache(nd_region));
pmem->dax_dev = dax_dev;
-
gendev = disk_to_dev(disk);
gendev->groups = pmem_attribute_groups;
- device_add_disk(dev, disk);
+ device_add_disk(dev, disk, NULL);
if (devm_add_action_or_reset(dev, pmem_release_disk, pmem))
return -ENOMEM;
@@ -492,6 +490,7 @@
static int nd_pmem_probe(struct device *dev)
{
+ int ret;
struct nd_namespace_common *ndns;
ndns = nvdimm_namespace_common_probe(dev);
@@ -507,12 +506,32 @@
if (is_nd_pfn(dev))
return pmem_attach_disk(dev, ndns);
- /* if we find a valid info-block we'll come back as that personality */
- if (nd_btt_probe(dev, ndns) == 0 || nd_pfn_probe(dev, ndns) == 0
- || nd_dax_probe(dev, ndns) == 0)
+ ret = nd_btt_probe(dev, ndns);
+ if (ret == 0)
return -ENXIO;
- /* ...otherwise we're just a raw pmem device */
+ /*
+ * We have two failure conditions here, there is no
+ * info reserver block or we found a valid info reserve block
+ * but failed to initialize the pfn superblock.
+ *
+ * For the first case consider namespace as a raw pmem namespace
+ * and attach a disk.
+ *
+ * For the latter, consider this a success and advance the namespace
+ * seed.
+ */
+ ret = nd_pfn_probe(dev, ndns);
+ if (ret == 0)
+ return -ENXIO;
+ else if (ret == -EOPNOTSUPP)
+ return ret;
+
+ ret = nd_dax_probe(dev, ndns);
+ if (ret == 0)
+ return -ENXIO;
+ else if (ret == -EOPNOTSUPP)
+ return ret;
return pmem_attach_disk(dev, ndns);
}
@@ -524,20 +543,20 @@
nvdimm_namespace_detach_btt(to_nd_btt(dev));
else {
/*
- * Note, this assumes device_lock() context to not race
- * nd_pmem_notify()
+ * Note, this assumes nd_device_lock() context to not
+ * race nd_pmem_notify()
*/
sysfs_put(pmem->bb_state);
pmem->bb_state = NULL;
}
- nvdimm_flush(to_nd_region(dev->parent));
+ nvdimm_flush(to_nd_region(dev->parent), NULL);
return 0;
}
static void nd_pmem_shutdown(struct device *dev)
{
- nvdimm_flush(to_nd_region(dev->parent));
+ nvdimm_flush(to_nd_region(dev->parent), NULL);
}
static void nd_pmem_notify(struct device *dev, enum nvdimm_event event)
diff --git a/drivers/nvdimm/region.c b/drivers/nvdimm/region.c
index b9ca003..0f6978e 100644
--- a/drivers/nvdimm/region.c
+++ b/drivers/nvdimm/region.c
@@ -1,14 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
*/
#include <linux/cpumask.h>
#include <linux/module.h>
@@ -42,18 +34,7 @@
if (rc)
return rc;
- rc = nd_region_register_namespaces(nd_region, &err);
- if (rc < 0)
- return rc;
-
- ndrd = dev_get_drvdata(dev);
- ndrd->ns_active = rc;
- ndrd->ns_count = rc + err;
-
- if (rc && err && rc == err)
- return -ENODEV;
-
- if (is_nd_pmem(&nd_region->dev)) {
+ if (is_memory(&nd_region->dev)) {
struct resource ndr_res;
if (devm_init_badblocks(dev, &nd_region->bb))
@@ -68,6 +49,17 @@
nvdimm_badblocks_populate(nd_region, &nd_region->bb, &ndr_res);
}
+ rc = nd_region_register_namespaces(nd_region, &err);
+ if (rc < 0)
+ return rc;
+
+ ndrd = dev_get_drvdata(dev);
+ ndrd->ns_active = rc;
+ ndrd->ns_count = rc + err;
+
+ if (rc && err && rc == err)
+ return -ENODEV;
+
nd_region->btt_seed = nd_btt_create(nd_region);
nd_region->pfn_seed = nd_pfn_create(nd_region);
nd_region->dax_seed = nd_dax_create(nd_region);
@@ -110,7 +102,7 @@
nvdimm_bus_unlock(dev);
/*
- * Note, this assumes device_lock() context to not race
+ * Note, this assumes nd_device_lock() context to not race
* nd_region_notify()
*/
sysfs_put(nd_region->bb_state);
@@ -131,7 +123,7 @@
struct nd_region *nd_region = to_nd_region(dev);
struct resource res;
- if (is_nd_pmem(&nd_region->dev)) {
+ if (is_memory(&nd_region->dev)) {
res.start = nd_region->ndr_start;
res.end = nd_region->ndr_start +
nd_region->ndr_size - 1;
diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c
index e7377f1..ef423ba 100644
--- a/drivers/nvdimm/region_devs.c
+++ b/drivers/nvdimm/region_devs.c
@@ -1,14 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
*/
#include <linux/scatterlist.h>
#include <linux/highmem.h>
@@ -79,6 +71,11 @@
struct nd_mapping *nd_mapping = &nd_region->mapping[i];
struct nvdimm *nvdimm = nd_mapping->nvdimm;
+ if (test_bit(NDD_SECURITY_OVERWRITE, &nvdimm->flags)) {
+ nvdimm_bus_unlock(&nd_region->dev);
+ return -EBUSY;
+ }
+
/* at least one null hint slot per-dimm for the "no-hint" case */
flush_data_size += sizeof(void *);
num_flush = min_not_zero(num_flush, nvdimm->num_flush);
@@ -290,7 +287,9 @@
return rc;
if (!flush)
return -EINVAL;
- nvdimm_flush(nd_region);
+ rc = nvdimm_flush(nd_region, NULL);
+ if (rc)
+ return rc;
return len;
}
@@ -332,7 +331,7 @@
* the v1.1 namespace label cookie definition. To read all this
* data we need to wait for probing to settle.
*/
- device_lock(dev);
+ nd_device_lock(dev);
nvdimm_bus_lock(dev);
wait_nvdimm_bus_probe_idle(dev);
if (nd_region->ndr_mappings) {
@@ -349,7 +348,7 @@
}
}
nvdimm_bus_unlock(dev);
- device_unlock(dev);
+ nd_device_unlock(dev);
if (rc)
return rc;
@@ -425,10 +424,12 @@
* memory nvdimm_bus_lock() is dropped, but that's userspace's
* problem to not race itself.
*/
+ nd_device_lock(dev);
nvdimm_bus_lock(dev);
wait_nvdimm_bus_probe_idle(dev);
available = nd_region_available_dpa(nd_region);
nvdimm_bus_unlock(dev);
+ nd_device_unlock(dev);
return sprintf(buf, "%llu\n", available);
}
@@ -440,10 +441,12 @@
struct nd_region *nd_region = to_nd_region(dev);
unsigned long long available = 0;
+ nd_device_lock(dev);
nvdimm_bus_lock(dev);
wait_nvdimm_bus_probe_idle(dev);
available = nd_region_allocatable_dpa(nd_region);
nvdimm_bus_unlock(dev);
+ nd_device_unlock(dev);
return sprintf(buf, "%llu\n", available);
}
@@ -562,12 +565,12 @@
struct nd_region *nd_region = to_nd_region(dev);
ssize_t rc;
- device_lock(dev);
+ nd_device_lock(dev);
if (dev->driver)
rc = badblocks_show(&nd_region->bb, buf, 0);
else
rc = -ENXIO;
- device_unlock(dev);
+ nd_device_unlock(dev);
return rc;
}
@@ -629,11 +632,11 @@
if (!is_memory(dev) && a == &dev_attr_dax_seed.attr)
return 0;
- if (!is_nd_pmem(dev) && a == &dev_attr_badblocks.attr)
+ if (!is_memory(dev) && a == &dev_attr_badblocks.attr)
return 0;
if (a == &dev_attr_resource.attr) {
- if (is_nd_pmem(dev))
+ if (is_memory(dev))
return 0400;
else
return 0;
@@ -712,85 +715,37 @@
}
/*
- * Upon successful probe/remove, take/release a reference on the
- * associated interleave set (if present), and plant new btt + namespace
- * seeds. Also, on the removal of a BLK region, notify the provider to
- * disable the region.
+ * When a namespace is activated create new seeds for the next
+ * namespace, or namespace-personality to be configured.
*/
-static void nd_region_notify_driver_action(struct nvdimm_bus *nvdimm_bus,
- struct device *dev, bool probe)
+void nd_region_advance_seeds(struct nd_region *nd_region, struct device *dev)
{
- struct nd_region *nd_region;
-
- if (!probe && is_nd_region(dev)) {
- int i;
-
- nd_region = to_nd_region(dev);
- for (i = 0; i < nd_region->ndr_mappings; i++) {
- struct nd_mapping *nd_mapping = &nd_region->mapping[i];
- struct nvdimm_drvdata *ndd = nd_mapping->ndd;
- struct nvdimm *nvdimm = nd_mapping->nvdimm;
-
- mutex_lock(&nd_mapping->lock);
- nd_mapping_free_labels(nd_mapping);
- mutex_unlock(&nd_mapping->lock);
-
- put_ndd(ndd);
- nd_mapping->ndd = NULL;
- if (ndd)
- atomic_dec(&nvdimm->busy);
- }
- }
- if (dev->parent && is_nd_region(dev->parent) && probe) {
- nd_region = to_nd_region(dev->parent);
- nvdimm_bus_lock(dev);
- if (nd_region->ns_seed == dev)
- nd_region_create_ns_seed(nd_region);
- nvdimm_bus_unlock(dev);
- }
- if (is_nd_btt(dev) && probe) {
+ nvdimm_bus_lock(dev);
+ if (nd_region->ns_seed == dev) {
+ nd_region_create_ns_seed(nd_region);
+ } else if (is_nd_btt(dev)) {
struct nd_btt *nd_btt = to_nd_btt(dev);
- nd_region = to_nd_region(dev->parent);
- nvdimm_bus_lock(dev);
if (nd_region->btt_seed == dev)
nd_region_create_btt_seed(nd_region);
if (nd_region->ns_seed == &nd_btt->ndns->dev)
nd_region_create_ns_seed(nd_region);
- nvdimm_bus_unlock(dev);
- }
- if (is_nd_pfn(dev) && probe) {
+ } else if (is_nd_pfn(dev)) {
struct nd_pfn *nd_pfn = to_nd_pfn(dev);
- nd_region = to_nd_region(dev->parent);
- nvdimm_bus_lock(dev);
if (nd_region->pfn_seed == dev)
nd_region_create_pfn_seed(nd_region);
if (nd_region->ns_seed == &nd_pfn->ndns->dev)
nd_region_create_ns_seed(nd_region);
- nvdimm_bus_unlock(dev);
- }
- if (is_nd_dax(dev) && probe) {
+ } else if (is_nd_dax(dev)) {
struct nd_dax *nd_dax = to_nd_dax(dev);
- nd_region = to_nd_region(dev->parent);
- nvdimm_bus_lock(dev);
if (nd_region->dax_seed == dev)
nd_region_create_dax_seed(nd_region);
if (nd_region->ns_seed == &nd_dax->nd_pfn.ndns->dev)
nd_region_create_ns_seed(nd_region);
- nvdimm_bus_unlock(dev);
}
-}
-
-void nd_region_probe_success(struct nvdimm_bus *nvdimm_bus, struct device *dev)
-{
- nd_region_notify_driver_action(nvdimm_bus, dev, true);
-}
-
-void nd_region_disable(struct nvdimm_bus *nvdimm_bus, struct device *dev)
-{
- nd_region_notify_driver_action(nvdimm_bus, dev, false);
+ nvdimm_bus_unlock(dev);
}
static ssize_t mappingN(struct device *dev, char *buf, int n)
@@ -989,15 +944,22 @@
struct nd_mapping_desc *mapping = &ndr_desc->mapping[i];
struct nvdimm *nvdimm = mapping->nvdimm;
- if ((mapping->start | mapping->size) % SZ_4K) {
- dev_err(&nvdimm_bus->dev, "%s: %s mapping%d is not 4K aligned\n",
- caller, dev_name(&nvdimm->dev), i);
-
+ if ((mapping->start | mapping->size) % PAGE_SIZE) {
+ dev_err(&nvdimm_bus->dev,
+ "%s: %s mapping%d is not %ld aligned\n",
+ caller, dev_name(&nvdimm->dev), i, PAGE_SIZE);
return NULL;
}
if (test_bit(NDD_UNARMED, &nvdimm->flags))
ro = 1;
+
+ if (test_bit(NDD_NOBLK, &nvdimm->flags)
+ && dev_type == &nd_blk_device_type) {
+ dev_err(&nvdimm_bus->dev, "%s: %s mapping%d is not BLK capable\n",
+ caller, dev_name(&nvdimm->dev), i);
+ return NULL;
+ }
}
if (dev_type == &nd_blk_device_type) {
@@ -1015,10 +977,9 @@
}
region_buf = ndbr;
} else {
- nd_region = kzalloc(sizeof(struct nd_region)
- + sizeof(struct nd_mapping)
- * ndr_desc->num_mappings,
- GFP_KERNEL);
+ nd_region = kzalloc(struct_size(nd_region, mapping,
+ ndr_desc->num_mappings),
+ GFP_KERNEL);
region_buf = nd_region;
}
@@ -1060,6 +1021,7 @@
nd_region->flags = ndr_desc->flags;
nd_region->ro = ro;
nd_region->numa_node = ndr_desc->numa_node;
+ nd_region->target_node = ndr_desc->target_node;
ida_init(&nd_region->ns_ida);
ida_init(&nd_region->btt_ida);
ida_init(&nd_region->pfn_ida);
@@ -1072,6 +1034,11 @@
dev->of_node = ndr_desc->of_node;
nd_region->ndr_size = resource_size(ndr_desc->res);
nd_region->ndr_start = ndr_desc->res->start;
+ if (ndr_desc->flush)
+ nd_region->flush = ndr_desc->flush;
+ else
+ nd_region->flush = NULL;
+
nd_device_register(dev);
return nd_region;
@@ -1112,11 +1079,24 @@
}
EXPORT_SYMBOL_GPL(nvdimm_volatile_region_create);
+int nvdimm_flush(struct nd_region *nd_region, struct bio *bio)
+{
+ int rc = 0;
+
+ if (!nd_region->flush)
+ rc = generic_nvdimm_flush(nd_region);
+ else {
+ if (nd_region->flush(nd_region, bio))
+ rc = -EIO;
+ }
+
+ return rc;
+}
/**
* nvdimm_flush - flush any posted write queues between the cpu and pmem media
* @nd_region: blk or interleaved pmem region
*/
-void nvdimm_flush(struct nd_region *nd_region)
+int generic_nvdimm_flush(struct nd_region *nd_region)
{
struct nd_region_data *ndrd = dev_get_drvdata(&nd_region->dev);
int i, idx;
@@ -1140,6 +1120,8 @@
if (ndrd_get_flush_wpq(ndrd, i, 0))
writeq(1, ndrd_get_flush_wpq(ndrd, i, idx));
wmb();
+
+ return 0;
}
EXPORT_SYMBOL_GPL(nvdimm_flush);
@@ -1184,6 +1166,16 @@
}
EXPORT_SYMBOL_GPL(nvdimm_has_cache);
+bool is_nvdimm_sync(struct nd_region *nd_region)
+{
+ if (is_nd_volatile(&nd_region->dev))
+ return true;
+
+ return is_nd_pmem(&nd_region->dev) &&
+ !test_bit(ND_REGION_ASYNC, &nd_region->flags);
+}
+EXPORT_SYMBOL_GPL(is_nvdimm_sync);
+
struct conflict_context {
struct nd_region *nd_region;
resource_size_t start, size;
diff --git a/drivers/nvdimm/security.c b/drivers/nvdimm/security.c
new file mode 100644
index 0000000..89b8597
--- /dev/null
+++ b/drivers/nvdimm/security.c
@@ -0,0 +1,553 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2018 Intel Corporation. All rights reserved. */
+
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/ndctl.h>
+#include <linux/slab.h>
+#include <linux/io.h>
+#include <linux/mm.h>
+#include <linux/cred.h>
+#include <linux/key.h>
+#include <linux/key-type.h>
+#include <keys/user-type.h>
+#include <keys/encrypted-type.h>
+#include "nd-core.h"
+#include "nd.h"
+
+#define NVDIMM_BASE_KEY 0
+#define NVDIMM_NEW_KEY 1
+
+static bool key_revalidate = true;
+module_param(key_revalidate, bool, 0444);
+MODULE_PARM_DESC(key_revalidate, "Require key validation at init.");
+
+static const char zero_key[NVDIMM_PASSPHRASE_LEN];
+
+static void *key_data(struct key *key)
+{
+ struct encrypted_key_payload *epayload = dereference_key_locked(key);
+
+ lockdep_assert_held_read(&key->sem);
+
+ return epayload->decrypted_data;
+}
+
+static void nvdimm_put_key(struct key *key)
+{
+ if (!key)
+ return;
+
+ up_read(&key->sem);
+ key_put(key);
+}
+
+/*
+ * Retrieve kernel key for DIMM and request from user space if
+ * necessary. Returns a key held for read and must be put by
+ * nvdimm_put_key() before the usage goes out of scope.
+ */
+static struct key *nvdimm_request_key(struct nvdimm *nvdimm)
+{
+ struct key *key = NULL;
+ static const char NVDIMM_PREFIX[] = "nvdimm:";
+ char desc[NVDIMM_KEY_DESC_LEN + sizeof(NVDIMM_PREFIX)];
+ struct device *dev = &nvdimm->dev;
+
+ sprintf(desc, "%s%s", NVDIMM_PREFIX, nvdimm->dimm_id);
+ key = request_key(&key_type_encrypted, desc, "");
+ if (IS_ERR(key)) {
+ if (PTR_ERR(key) == -ENOKEY)
+ dev_dbg(dev, "request_key() found no key\n");
+ else
+ dev_dbg(dev, "request_key() upcall failed\n");
+ key = NULL;
+ } else {
+ struct encrypted_key_payload *epayload;
+
+ down_read(&key->sem);
+ epayload = dereference_key_locked(key);
+ if (epayload->decrypted_datalen != NVDIMM_PASSPHRASE_LEN) {
+ up_read(&key->sem);
+ key_put(key);
+ key = NULL;
+ }
+ }
+
+ return key;
+}
+
+static const void *nvdimm_get_key_payload(struct nvdimm *nvdimm,
+ struct key **key)
+{
+ *key = nvdimm_request_key(nvdimm);
+ if (!*key)
+ return zero_key;
+
+ return key_data(*key);
+}
+
+static struct key *nvdimm_lookup_user_key(struct nvdimm *nvdimm,
+ key_serial_t id, int subclass)
+{
+ key_ref_t keyref;
+ struct key *key;
+ struct encrypted_key_payload *epayload;
+ struct device *dev = &nvdimm->dev;
+
+ keyref = lookup_user_key(id, 0, 0);
+ if (IS_ERR(keyref))
+ return NULL;
+
+ key = key_ref_to_ptr(keyref);
+ if (key->type != &key_type_encrypted) {
+ key_put(key);
+ return NULL;
+ }
+
+ dev_dbg(dev, "%s: key found: %#x\n", __func__, key_serial(key));
+
+ down_read_nested(&key->sem, subclass);
+ epayload = dereference_key_locked(key);
+ if (epayload->decrypted_datalen != NVDIMM_PASSPHRASE_LEN) {
+ up_read(&key->sem);
+ key_put(key);
+ key = NULL;
+ }
+ return key;
+}
+
+static const void *nvdimm_get_user_key_payload(struct nvdimm *nvdimm,
+ key_serial_t id, int subclass, struct key **key)
+{
+ *key = NULL;
+ if (id == 0) {
+ if (subclass == NVDIMM_BASE_KEY)
+ return zero_key;
+ else
+ return NULL;
+ }
+
+ *key = nvdimm_lookup_user_key(nvdimm, id, subclass);
+ if (!*key)
+ return NULL;
+
+ return key_data(*key);
+}
+
+
+static int nvdimm_key_revalidate(struct nvdimm *nvdimm)
+{
+ struct key *key;
+ int rc;
+ const void *data;
+
+ if (!nvdimm->sec.ops->change_key)
+ return -EOPNOTSUPP;
+
+ data = nvdimm_get_key_payload(nvdimm, &key);
+
+ /*
+ * Send the same key to the hardware as new and old key to
+ * verify that the key is good.
+ */
+ rc = nvdimm->sec.ops->change_key(nvdimm, data, data, NVDIMM_USER);
+ if (rc < 0) {
+ nvdimm_put_key(key);
+ return rc;
+ }
+
+ nvdimm_put_key(key);
+ nvdimm->sec.flags = nvdimm_security_flags(nvdimm, NVDIMM_USER);
+ return 0;
+}
+
+static int __nvdimm_security_unlock(struct nvdimm *nvdimm)
+{
+ struct device *dev = &nvdimm->dev;
+ struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev);
+ struct key *key;
+ const void *data;
+ int rc;
+
+ /* The bus lock should be held at the top level of the call stack */
+ lockdep_assert_held(&nvdimm_bus->reconfig_mutex);
+
+ if (!nvdimm->sec.ops || !nvdimm->sec.ops->unlock
+ || !nvdimm->sec.flags)
+ return -EIO;
+
+ /* No need to go further if security is disabled */
+ if (test_bit(NVDIMM_SECURITY_DISABLED, &nvdimm->sec.flags))
+ return 0;
+
+ if (test_bit(NDD_SECURITY_OVERWRITE, &nvdimm->flags)) {
+ dev_dbg(dev, "Security operation in progress.\n");
+ return -EBUSY;
+ }
+
+ /*
+ * If the pre-OS has unlocked the DIMM, attempt to send the key
+ * from request_key() to the hardware for verification. Failure
+ * to revalidate the key against the hardware results in a
+ * freeze of the security configuration. I.e. if the OS does not
+ * have the key, security is being managed pre-OS.
+ */
+ if (test_bit(NVDIMM_SECURITY_UNLOCKED, &nvdimm->sec.flags)) {
+ if (!key_revalidate)
+ return 0;
+
+ return nvdimm_key_revalidate(nvdimm);
+ } else
+ data = nvdimm_get_key_payload(nvdimm, &key);
+
+ rc = nvdimm->sec.ops->unlock(nvdimm, data);
+ dev_dbg(dev, "key: %d unlock: %s\n", key_serial(key),
+ rc == 0 ? "success" : "fail");
+
+ nvdimm_put_key(key);
+ nvdimm->sec.flags = nvdimm_security_flags(nvdimm, NVDIMM_USER);
+ return rc;
+}
+
+int nvdimm_security_unlock(struct device *dev)
+{
+ struct nvdimm *nvdimm = to_nvdimm(dev);
+ int rc;
+
+ nvdimm_bus_lock(dev);
+ rc = __nvdimm_security_unlock(nvdimm);
+ nvdimm_bus_unlock(dev);
+ return rc;
+}
+
+static int check_security_state(struct nvdimm *nvdimm)
+{
+ struct device *dev = &nvdimm->dev;
+
+ if (test_bit(NVDIMM_SECURITY_FROZEN, &nvdimm->sec.flags)) {
+ dev_dbg(dev, "Incorrect security state: %#lx\n",
+ nvdimm->sec.flags);
+ return -EIO;
+ }
+
+ if (test_bit(NDD_SECURITY_OVERWRITE, &nvdimm->flags)) {
+ dev_dbg(dev, "Security operation in progress.\n");
+ return -EBUSY;
+ }
+
+ return 0;
+}
+
+static int security_disable(struct nvdimm *nvdimm, unsigned int keyid)
+{
+ struct device *dev = &nvdimm->dev;
+ struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev);
+ struct key *key;
+ int rc;
+ const void *data;
+
+ /* The bus lock should be held at the top level of the call stack */
+ lockdep_assert_held(&nvdimm_bus->reconfig_mutex);
+
+ if (!nvdimm->sec.ops || !nvdimm->sec.ops->disable
+ || !nvdimm->sec.flags)
+ return -EOPNOTSUPP;
+
+ rc = check_security_state(nvdimm);
+ if (rc)
+ return rc;
+
+ data = nvdimm_get_user_key_payload(nvdimm, keyid,
+ NVDIMM_BASE_KEY, &key);
+ if (!data)
+ return -ENOKEY;
+
+ rc = nvdimm->sec.ops->disable(nvdimm, data);
+ dev_dbg(dev, "key: %d disable: %s\n", key_serial(key),
+ rc == 0 ? "success" : "fail");
+
+ nvdimm_put_key(key);
+ nvdimm->sec.flags = nvdimm_security_flags(nvdimm, NVDIMM_USER);
+ return rc;
+}
+
+static int security_update(struct nvdimm *nvdimm, unsigned int keyid,
+ unsigned int new_keyid,
+ enum nvdimm_passphrase_type pass_type)
+{
+ struct device *dev = &nvdimm->dev;
+ struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev);
+ struct key *key, *newkey;
+ int rc;
+ const void *data, *newdata;
+
+ /* The bus lock should be held at the top level of the call stack */
+ lockdep_assert_held(&nvdimm_bus->reconfig_mutex);
+
+ if (!nvdimm->sec.ops || !nvdimm->sec.ops->change_key
+ || !nvdimm->sec.flags)
+ return -EOPNOTSUPP;
+
+ rc = check_security_state(nvdimm);
+ if (rc)
+ return rc;
+
+ data = nvdimm_get_user_key_payload(nvdimm, keyid,
+ NVDIMM_BASE_KEY, &key);
+ if (!data)
+ return -ENOKEY;
+
+ newdata = nvdimm_get_user_key_payload(nvdimm, new_keyid,
+ NVDIMM_NEW_KEY, &newkey);
+ if (!newdata) {
+ nvdimm_put_key(key);
+ return -ENOKEY;
+ }
+
+ rc = nvdimm->sec.ops->change_key(nvdimm, data, newdata, pass_type);
+ dev_dbg(dev, "key: %d %d update%s: %s\n",
+ key_serial(key), key_serial(newkey),
+ pass_type == NVDIMM_MASTER ? "(master)" : "(user)",
+ rc == 0 ? "success" : "fail");
+
+ nvdimm_put_key(newkey);
+ nvdimm_put_key(key);
+ if (pass_type == NVDIMM_MASTER)
+ nvdimm->sec.ext_flags = nvdimm_security_flags(nvdimm,
+ NVDIMM_MASTER);
+ else
+ nvdimm->sec.flags = nvdimm_security_flags(nvdimm,
+ NVDIMM_USER);
+ return rc;
+}
+
+static int security_erase(struct nvdimm *nvdimm, unsigned int keyid,
+ enum nvdimm_passphrase_type pass_type)
+{
+ struct device *dev = &nvdimm->dev;
+ struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev);
+ struct key *key = NULL;
+ int rc;
+ const void *data;
+
+ /* The bus lock should be held at the top level of the call stack */
+ lockdep_assert_held(&nvdimm_bus->reconfig_mutex);
+
+ if (!nvdimm->sec.ops || !nvdimm->sec.ops->erase
+ || !nvdimm->sec.flags)
+ return -EOPNOTSUPP;
+
+ rc = check_security_state(nvdimm);
+ if (rc)
+ return rc;
+
+ if (!test_bit(NVDIMM_SECURITY_UNLOCKED, &nvdimm->sec.ext_flags)
+ && pass_type == NVDIMM_MASTER) {
+ dev_dbg(dev,
+ "Attempt to secure erase in wrong master state.\n");
+ return -EOPNOTSUPP;
+ }
+
+ data = nvdimm_get_user_key_payload(nvdimm, keyid,
+ NVDIMM_BASE_KEY, &key);
+ if (!data)
+ return -ENOKEY;
+
+ rc = nvdimm->sec.ops->erase(nvdimm, data, pass_type);
+ dev_dbg(dev, "key: %d erase%s: %s\n", key_serial(key),
+ pass_type == NVDIMM_MASTER ? "(master)" : "(user)",
+ rc == 0 ? "success" : "fail");
+
+ nvdimm_put_key(key);
+ nvdimm->sec.flags = nvdimm_security_flags(nvdimm, NVDIMM_USER);
+ return rc;
+}
+
+static int security_overwrite(struct nvdimm *nvdimm, unsigned int keyid)
+{
+ struct device *dev = &nvdimm->dev;
+ struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev);
+ struct key *key = NULL;
+ int rc;
+ const void *data;
+
+ /* The bus lock should be held at the top level of the call stack */
+ lockdep_assert_held(&nvdimm_bus->reconfig_mutex);
+
+ if (!nvdimm->sec.ops || !nvdimm->sec.ops->overwrite
+ || !nvdimm->sec.flags)
+ return -EOPNOTSUPP;
+
+ if (dev->driver == NULL) {
+ dev_dbg(dev, "Unable to overwrite while DIMM active.\n");
+ return -EINVAL;
+ }
+
+ rc = check_security_state(nvdimm);
+ if (rc)
+ return rc;
+
+ data = nvdimm_get_user_key_payload(nvdimm, keyid,
+ NVDIMM_BASE_KEY, &key);
+ if (!data)
+ return -ENOKEY;
+
+ rc = nvdimm->sec.ops->overwrite(nvdimm, data);
+ dev_dbg(dev, "key: %d overwrite submission: %s\n", key_serial(key),
+ rc == 0 ? "success" : "fail");
+
+ nvdimm_put_key(key);
+ if (rc == 0) {
+ set_bit(NDD_SECURITY_OVERWRITE, &nvdimm->flags);
+ set_bit(NDD_WORK_PENDING, &nvdimm->flags);
+ set_bit(NVDIMM_SECURITY_OVERWRITE, &nvdimm->sec.flags);
+ /*
+ * Make sure we don't lose device while doing overwrite
+ * query.
+ */
+ get_device(dev);
+ queue_delayed_work(system_wq, &nvdimm->dwork, 0);
+ }
+
+ return rc;
+}
+
+void __nvdimm_security_overwrite_query(struct nvdimm *nvdimm)
+{
+ struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(&nvdimm->dev);
+ int rc;
+ unsigned int tmo;
+
+ /* The bus lock should be held at the top level of the call stack */
+ lockdep_assert_held(&nvdimm_bus->reconfig_mutex);
+
+ /*
+ * Abort and release device if we no longer have the overwrite
+ * flag set. It means the work has been canceled.
+ */
+ if (!test_bit(NDD_WORK_PENDING, &nvdimm->flags))
+ return;
+
+ tmo = nvdimm->sec.overwrite_tmo;
+
+ if (!nvdimm->sec.ops || !nvdimm->sec.ops->query_overwrite
+ || !nvdimm->sec.flags)
+ return;
+
+ rc = nvdimm->sec.ops->query_overwrite(nvdimm);
+ if (rc == -EBUSY) {
+
+ /* setup delayed work again */
+ tmo += 10;
+ queue_delayed_work(system_wq, &nvdimm->dwork, tmo * HZ);
+ nvdimm->sec.overwrite_tmo = min(15U * 60U, tmo);
+ return;
+ }
+
+ if (rc < 0)
+ dev_dbg(&nvdimm->dev, "overwrite failed\n");
+ else
+ dev_dbg(&nvdimm->dev, "overwrite completed\n");
+
+ if (nvdimm->sec.overwrite_state)
+ sysfs_notify_dirent(nvdimm->sec.overwrite_state);
+ nvdimm->sec.overwrite_tmo = 0;
+ clear_bit(NDD_SECURITY_OVERWRITE, &nvdimm->flags);
+ clear_bit(NDD_WORK_PENDING, &nvdimm->flags);
+ put_device(&nvdimm->dev);
+ nvdimm->sec.flags = nvdimm_security_flags(nvdimm, NVDIMM_USER);
+ nvdimm->sec.flags = nvdimm_security_flags(nvdimm, NVDIMM_MASTER);
+}
+
+void nvdimm_security_overwrite_query(struct work_struct *work)
+{
+ struct nvdimm *nvdimm =
+ container_of(work, typeof(*nvdimm), dwork.work);
+
+ nvdimm_bus_lock(&nvdimm->dev);
+ __nvdimm_security_overwrite_query(nvdimm);
+ nvdimm_bus_unlock(&nvdimm->dev);
+}
+
+#define OPS \
+ C( OP_FREEZE, "freeze", 1), \
+ C( OP_DISABLE, "disable", 2), \
+ C( OP_UPDATE, "update", 3), \
+ C( OP_ERASE, "erase", 2), \
+ C( OP_OVERWRITE, "overwrite", 2), \
+ C( OP_MASTER_UPDATE, "master_update", 3), \
+ C( OP_MASTER_ERASE, "master_erase", 2)
+#undef C
+#define C(a, b, c) a
+enum nvdimmsec_op_ids { OPS };
+#undef C
+#define C(a, b, c) { b, c }
+static struct {
+ const char *name;
+ int args;
+} ops[] = { OPS };
+#undef C
+
+#define SEC_CMD_SIZE 32
+#define KEY_ID_SIZE 10
+
+ssize_t nvdimm_security_store(struct device *dev, const char *buf, size_t len)
+{
+ struct nvdimm *nvdimm = to_nvdimm(dev);
+ ssize_t rc;
+ char cmd[SEC_CMD_SIZE+1], keystr[KEY_ID_SIZE+1],
+ nkeystr[KEY_ID_SIZE+1];
+ unsigned int key, newkey;
+ int i;
+
+ rc = sscanf(buf, "%"__stringify(SEC_CMD_SIZE)"s"
+ " %"__stringify(KEY_ID_SIZE)"s"
+ " %"__stringify(KEY_ID_SIZE)"s",
+ cmd, keystr, nkeystr);
+ if (rc < 1)
+ return -EINVAL;
+ for (i = 0; i < ARRAY_SIZE(ops); i++)
+ if (sysfs_streq(cmd, ops[i].name))
+ break;
+ if (i >= ARRAY_SIZE(ops))
+ return -EINVAL;
+ if (ops[i].args > 1)
+ rc = kstrtouint(keystr, 0, &key);
+ if (rc >= 0 && ops[i].args > 2)
+ rc = kstrtouint(nkeystr, 0, &newkey);
+ if (rc < 0)
+ return rc;
+
+ if (i == OP_FREEZE) {
+ dev_dbg(dev, "freeze\n");
+ rc = nvdimm_security_freeze(nvdimm);
+ } else if (i == OP_DISABLE) {
+ dev_dbg(dev, "disable %u\n", key);
+ rc = security_disable(nvdimm, key);
+ } else if (i == OP_UPDATE || i == OP_MASTER_UPDATE) {
+ dev_dbg(dev, "%s %u %u\n", ops[i].name, key, newkey);
+ rc = security_update(nvdimm, key, newkey, i == OP_UPDATE
+ ? NVDIMM_USER : NVDIMM_MASTER);
+ } else if (i == OP_ERASE || i == OP_MASTER_ERASE) {
+ dev_dbg(dev, "%s %u\n", ops[i].name, key);
+ if (atomic_read(&nvdimm->busy)) {
+ dev_dbg(dev, "Unable to secure erase while DIMM active.\n");
+ return -EBUSY;
+ }
+ rc = security_erase(nvdimm, key, i == OP_ERASE
+ ? NVDIMM_USER : NVDIMM_MASTER);
+ } else if (i == OP_OVERWRITE) {
+ dev_dbg(dev, "overwrite %u\n", key);
+ if (atomic_read(&nvdimm->busy)) {
+ dev_dbg(dev, "Unable to overwrite while DIMM active.\n");
+ return -EBUSY;
+ }
+ rc = security_overwrite(nvdimm, key);
+ } else
+ return -EINVAL;
+
+ if (rc == 0)
+ rc = len;
+ return rc;
+}
diff --git a/drivers/nvdimm/virtio_pmem.c b/drivers/nvdimm/virtio_pmem.c
new file mode 100644
index 0000000..5e3d07b
--- /dev/null
+++ b/drivers/nvdimm/virtio_pmem.c
@@ -0,0 +1,122 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * virtio_pmem.c: Virtio pmem Driver
+ *
+ * Discovers persistent memory range information
+ * from host and registers the virtual pmem device
+ * with libnvdimm core.
+ */
+#include "virtio_pmem.h"
+#include "nd.h"
+
+static struct virtio_device_id id_table[] = {
+ { VIRTIO_ID_PMEM, VIRTIO_DEV_ANY_ID },
+ { 0 },
+};
+
+ /* Initialize virt queue */
+static int init_vq(struct virtio_pmem *vpmem)
+{
+ /* single vq */
+ vpmem->req_vq = virtio_find_single_vq(vpmem->vdev,
+ virtio_pmem_host_ack, "flush_queue");
+ if (IS_ERR(vpmem->req_vq))
+ return PTR_ERR(vpmem->req_vq);
+
+ spin_lock_init(&vpmem->pmem_lock);
+ INIT_LIST_HEAD(&vpmem->req_list);
+
+ return 0;
+};
+
+static int virtio_pmem_probe(struct virtio_device *vdev)
+{
+ struct nd_region_desc ndr_desc = {};
+ int nid = dev_to_node(&vdev->dev);
+ struct nd_region *nd_region;
+ struct virtio_pmem *vpmem;
+ struct resource res;
+ int err = 0;
+
+ if (!vdev->config->get) {
+ dev_err(&vdev->dev, "%s failure: config access disabled\n",
+ __func__);
+ return -EINVAL;
+ }
+
+ vpmem = devm_kzalloc(&vdev->dev, sizeof(*vpmem), GFP_KERNEL);
+ if (!vpmem) {
+ err = -ENOMEM;
+ goto out_err;
+ }
+
+ vpmem->vdev = vdev;
+ vdev->priv = vpmem;
+ err = init_vq(vpmem);
+ if (err) {
+ dev_err(&vdev->dev, "failed to initialize virtio pmem vq's\n");
+ goto out_err;
+ }
+
+ virtio_cread(vpmem->vdev, struct virtio_pmem_config,
+ start, &vpmem->start);
+ virtio_cread(vpmem->vdev, struct virtio_pmem_config,
+ size, &vpmem->size);
+
+ res.start = vpmem->start;
+ res.end = vpmem->start + vpmem->size - 1;
+ vpmem->nd_desc.provider_name = "virtio-pmem";
+ vpmem->nd_desc.module = THIS_MODULE;
+
+ vpmem->nvdimm_bus = nvdimm_bus_register(&vdev->dev,
+ &vpmem->nd_desc);
+ if (!vpmem->nvdimm_bus) {
+ dev_err(&vdev->dev, "failed to register device with nvdimm_bus\n");
+ err = -ENXIO;
+ goto out_vq;
+ }
+
+ dev_set_drvdata(&vdev->dev, vpmem->nvdimm_bus);
+
+ ndr_desc.res = &res;
+ ndr_desc.numa_node = nid;
+ ndr_desc.flush = async_pmem_flush;
+ set_bit(ND_REGION_PAGEMAP, &ndr_desc.flags);
+ set_bit(ND_REGION_ASYNC, &ndr_desc.flags);
+ nd_region = nvdimm_pmem_region_create(vpmem->nvdimm_bus, &ndr_desc);
+ if (!nd_region) {
+ dev_err(&vdev->dev, "failed to create nvdimm region\n");
+ err = -ENXIO;
+ goto out_nd;
+ }
+ nd_region->provider_data = dev_to_virtio(nd_region->dev.parent->parent);
+ return 0;
+out_nd:
+ nvdimm_bus_unregister(vpmem->nvdimm_bus);
+out_vq:
+ vdev->config->del_vqs(vdev);
+out_err:
+ return err;
+}
+
+static void virtio_pmem_remove(struct virtio_device *vdev)
+{
+ struct nvdimm_bus *nvdimm_bus = dev_get_drvdata(&vdev->dev);
+
+ nvdimm_bus_unregister(nvdimm_bus);
+ vdev->config->del_vqs(vdev);
+ vdev->config->reset(vdev);
+}
+
+static struct virtio_driver virtio_pmem_driver = {
+ .driver.name = KBUILD_MODNAME,
+ .driver.owner = THIS_MODULE,
+ .id_table = id_table,
+ .probe = virtio_pmem_probe,
+ .remove = virtio_pmem_remove,
+};
+
+module_virtio_driver(virtio_pmem_driver);
+MODULE_DEVICE_TABLE(virtio, id_table);
+MODULE_DESCRIPTION("Virtio pmem driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/nvdimm/virtio_pmem.h b/drivers/nvdimm/virtio_pmem.h
new file mode 100644
index 0000000..0dddefe
--- /dev/null
+++ b/drivers/nvdimm/virtio_pmem.h
@@ -0,0 +1,55 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * virtio_pmem.h: virtio pmem Driver
+ *
+ * Discovers persistent memory range information
+ * from host and provides a virtio based flushing
+ * interface.
+ **/
+
+#ifndef _LINUX_VIRTIO_PMEM_H
+#define _LINUX_VIRTIO_PMEM_H
+
+#include <linux/module.h>
+#include <uapi/linux/virtio_pmem.h>
+#include <linux/libnvdimm.h>
+#include <linux/spinlock.h>
+
+struct virtio_pmem_request {
+ struct virtio_pmem_req req;
+ struct virtio_pmem_resp resp;
+
+ /* Wait queue to process deferred work after ack from host */
+ wait_queue_head_t host_acked;
+ bool done;
+
+ /* Wait queue to process deferred work after virt queue buffer avail */
+ wait_queue_head_t wq_buf;
+ bool wq_buf_avail;
+ struct list_head list;
+};
+
+struct virtio_pmem {
+ struct virtio_device *vdev;
+
+ /* Virtio pmem request queue */
+ struct virtqueue *req_vq;
+
+ /* nvdimm bus registers virtio pmem device */
+ struct nvdimm_bus *nvdimm_bus;
+ struct nvdimm_bus_descriptor nd_desc;
+
+ /* List to store deferred work if virtqueue is full */
+ struct list_head req_list;
+
+ /* Synchronize virtqueue data */
+ spinlock_t pmem_lock;
+
+ /* Memory region information */
+ __u64 start;
+ __u64 size;
+};
+
+void virtio_pmem_host_ack(struct virtqueue *vq);
+int async_pmem_flush(struct nd_region *nd_region, struct bio *bio);
+#endif